Ticket #2687: parsemergedata.R

File parsemergedata.R, 4.3 KB (added by tomb, 8 years ago)

Partial merge data parser

Line 
1## A happy script to parse mergedata from task 2672
2##
3## usage: R -f parsemergedata.R
4library("stringr")
5
6parsemergedataMain <- function(ARGV) {
7  ## structures
8  setClass("extradataFile",
9           representation(
10                          filename = "character",
11                          guardLabel = "character",
12                          filesizeLabel = "character",
13                          filesize = "numeric"
14                          )
15           )
16
17  ## TODO: ensure all possible lvals are represented in this class
18  setClass("mergedata",
19           representation(
20                          buildtimes = "numeric",
21                          circ_id = "numeric",
22                          launch = "numeric",
23                          path = "character",
24                          quantile = "numeric",
25                          resolvefailed = "character",
26                          connectsec = "numeric",
27                          connectusec = "numeric",
28                          datacompletesec = "numeric",
29                          datacompleteusec = "numeric",
30                          datarequestsec = "numeric",
31                          datarequestusec = "numeric",
32                          negotiatesec = "numeric",
33                          negotiateusec = "numeric",
34                          readbytes = "numeric",
35                          fail_reasons = "character"
36                          )
37           )
38
39  ## globals
40  kDebug <- TRUE # set TRUE for debugging output
41  kProgname <- "parsemergedata.R"
42  kVersion <- 0.1
43
44  ## helper functions
45  debug <- function(str) {
46    if (kDebug) {
47      cat(str, '\n')
48    }
49  }
50
51  debug(cat(kProgname, " version ", kVersion))
52
53  ## stuff dealing with input files
54  ## TODO: tomb, genericize arg handleing with code from filter.R
55  files <- NULL # files is a list of extradataFiles as definied below
56
57  my.file <- new("extradataFile", filename = "50kb.mergedata")
58  my.file@guardLabel <- "fooGuard"
59  my.file@filesizeLabel <- "fooSizeLabel"
60  my.file@filesize <- 1
61
62  debug(c("i will read file: ", my.file@filename, ' ',
63        my.file@guardLabel, ' ',
64        my.file@filesizeLabel, ' ',
65        my.file@filesize))
66
67  files <- c(files, my.file)
68
69  mergedata_vector <- NULL
70  rows <- readLines(my.file@filename)
71  for (row in rows) {
72    my.mergedata <- new("mergedata")
73    debug(c("read line: ", row))
74    ## Note: assumes delimiter is space or tab
75    cols <- str_split(row, "[ \t]")
76   
77    debug("cols: ")
78    for (col in unlist(cols)) {
79      lval = str_extract(col, "[^=]+")
80      rval = str_extract(col, "[^=]+$")
81      rvals = unlist(strsplit(rval, ","))
82      debug(cat(lval, " = ", rval))
83     
84      ## add data to object
85      ## NOTE: ignores unknown lvals
86      ## tomb - i decided to handle lvals explicitly to permit typesafety
87      if (lval == "BUILDTIMES") {
88        my.mergedata@buildtimes <- as.numeric(rvals)
89      } else if (lval == "CIRC_ID") {
90        my.mergedata@circ_id <- as.numeric(rval)
91      } else if (lval == "LAUNCH") {
92        my.mergedata@launch <- as.numeric(rval)
93      } else if (lval == "PATH") {
94        my.mergedata@path <- as.character(rval)
95      } else if (lval == "QUANTILE") {
96        my.mergedata@quantile <- as.numeric(rval)
97      } else if (lval == "RESOLVEFAILED") {
98        my.mergedata@resolvefailed <- as.character(rval)
99      } else if (lval == "CONNECTSEC") {
100        my.mergedata@connectsec <- as.numeric(rval)
101      } else if (lval == "CONNECTUSEC") {
102        my.mergedata@connectusec <- as.numeric(rval)
103      } else if (lval == "DATACOMPLETESEC") {
104        my.mergedata@datacompletesec <- as.numeric(rval)
105      } else if (lval == "DATACOMPLETEUSEC") {
106        my.mergedata@datacompleteusec <- as.numeric(rval)
107      } else if (lval == "NEGOTIATESEC") {
108        my.mergedata@negotiatesec <- as.numeric(rval)
109      } else if (lval == "NEGOTIATEUSEC") {
110        my.mergedata@negotiateusec <- as.numeric(rval)
111      } else if (lval == "READBYTES") {
112        my.mergedata@readbytes <- as.numeric(rval)
113      } else if (lval == "FAIL_REASONS") {
114        my.mergedata@fail_reasons <- as.character(rval)
115      }
116    }
117    mergedata_vector <- c(mergedata_vector, my.mergedata)
118  }
119 
120  cat("\n\n")
121  debug(cat("data vector length: ", length(mergedata_vector)))
122  ## write output into a csv
123  ## write.csv(filtered, "filtered.csv", quote = FALSE, row.names = FALSE)
124}
125
126parsemergedataMain()