Ticket #1919: plot.R

File plot.R, 3.5 KB (added by karsten, 9 years ago)

R code to make an ECDF graph of 3x3 torperf output files

Line 
1# Usage: Put 9 torperf output files {slow|normal|fast}-{50kb|1mb|5mb}.data
2# in the working directory and run "R --slave < plot.R". Also read through
3# the comments below, or this script might do something you don't expect!
4
5# Load ggplot library without printing out stupid warnings
6options(warn = -1)
7suppressPackageStartupMessages(library("ggplot2"))
8
9# Read the torperf out data and transform it to put it in an ECDF.
10transform <- function(datafile, expbytes, source, filesize) {
11
12  # Read in the data
13  data <- read.table(datafile, header = FALSE)
14
15  # Remove unfinished downloads
16  data <- data[data$V20 > expbytes, ]
17
18  # Transform data frame with raw data into data frame that has a start
19  # time, a requestion completion time, and a source string
20  data <- data.frame(
21          start = as.POSIXct(data$V1, origin = "1970-01-01 00:00:00",
22                  tz = "GMT"),
23          complete = (data$V17 * 1e6 + data$V18 -
24                      data$V1 * 1e6 + data$V2) / 1e6)
25
26  # Remove the slowest 1 % of all runs, as they'll make the CDF graph much
27  # harder to read without adding much information
28  data <- data[data$complete < quantile(data$complete, 0.99), ]
29
30  # Order data frame by completion time
31  data <- data[order(data$complete), ]
32
33  # Convert to a data frame that has ordered completion times on the x
34  # axis and CDF value on the y axis, plus source and filesize for
35  # distinguishing these values from the other data frames we're going to
36  # bind together.
37  data <- data.frame(
38          x = data$complete,
39          y = (1:length(data$complete)) / length(data$complete),
40          source = source,
41          filesize = filesize)
42
43  # Return the transformed data frame
44  data
45}
46
47# Append the nine data frames to a single data frame
48data <- rbind(
49        transform("slow-50kb.data", 50 * 2^10, "slow", "50 KiB"),
50        transform("slow-1mb.data", 2^20, "slow", "1 MiB"),
51        transform("slow-5mb.data", 5 * 2^20, "slow", "5 MiB"),
52        transform("normal-50kb.data", 50 * 2^10, "normal", "50 KiB"),
53        transform("normal-1mb.data", 2^20, "normal", "1 MiB"),
54        transform("normal-5mb.data", 5 * 2^20, "normal", "5 MiB"),
55        transform("fast-50kb.data", 50 * 2^10, "fast", "50 KiB"),
56        transform("fast-1mb.data", 2^20, "fast", "1 MiB"),
57        transform("fast-5mb.data", 5 * 2^20, "fast", "5 MiB"))
58
59# Start plotting the data with x and y values on the x and y axis and the
60# source being encoded as color
61ggplot(data, aes(x = x, y = y, colour = source)) +
62
63# Draw this graph as a line plot
64geom_line(size = 1) +
65
66# Make three graphs for the three file sizes while rescaling the x axis as
67# needed
68facet_grid(. ~ filesize, scale = "free_x") +
69
70# Change the x axis label to make it more useful
71scale_x_continuous(name = "\nRequest completion time (in seconds)") +
72
73# Also change the y axis label and set its limit to 0..1
74scale_y_continuous(name = "ECDF", limits = c(0, 1)) +
75
76# Remove the legend title and give the three data sources more useful
77# names
78scale_colour_hue(name = "", breaks = c("slow", "normal", "fast"),
79  labels = c("Slow guards", "Default guards", "Fast guards")) +
80
81# Set a graph title and move the legend to the top of the graph
82opts(title = "Influence of guard selection on user-perceived Tor performance",
83     legend.position = "top")
84
85# Save the graph as .PNG file in a resolution that fits nicely into the
86# Tor blog; change the factors 0.9 to 1.3 or something higher to make the
87# graph more readable
88ggsave("torperf-guards-ecdf.png", width = 0.9 * 8, height = 0.9 * 5,
89  dpi = 72)
90