discovery.R (3642B)
1 bscheduler.load_node_discovery_data <- function () { 2 dir <- file.path('build', 'bscheduler-benchmarks', 'output', 'm12') 3 all_files <- list.files( 4 dir, 5 pattern='bsc.10.1.0.1.log', 6 recursive=TRUE 7 ) 8 all_data <- data.frame( 9 daemons=rep(NA,0), 10 nodes=rep(NA,0), 11 attempt=rep(NA,0), 12 timeout=rep(NA,0), 13 t=rep(NA,0) 14 ) 15 row <- 1 16 for (file in all_files) { 17 daemons <- as.numeric(gsub('^d([0-9]+)/.*$', '\\1', file, perl=TRUE)) 18 nodes <- as.numeric(gsub('^d[0-9]+/n([0-9]+)/.*$', '\\1', file, perl=TRUE)) 19 attempt <- as.numeric(gsub('^d[0-9]+/n[0-9]+/a([0-9]+)/.*$', '\\1', file, perl=TRUE)) 20 f <- file.path(dir, file) 21 data <- readLines(f) 22 nsubordinates <- length(data[grepl('add subordinate', data)]) 23 data <- data[grepl('time since epoch', data)] 24 data <- gsub('^.*time since epoch ([0-9]+)ms.*$', '\\1', data, perl=TRUE); 25 data <- as.numeric(data) 26 data <- data - min(data) 27 data <- data.frame(data) 28 # calculate adjacent difference 29 diff <- data[-1,] - data[-nrow(data),] 30 # find termination time point 31 idx <- which(diff > 4000) 32 if (length(idx) > 0) { 33 idx <- idx[[1]] 34 } 35 if (length(idx) == 0 || daemons == 1) { 36 idx <- nrow(data) 37 } 38 # remove all events after termination 39 data <- data[c(1:idx),] 40 t <- max(data) 41 all_data[row, 'attempt'] <- attempt 42 all_data[row, 'nodes'] <- nodes 43 all_data[row, 'daemons'] <- daemons 44 all_data[row, 't'] <- t 45 if (daemons == 1) { 46 all_data[row, 'timeout'] <- 190 47 } else { 48 all_data[row, 'timeout'] <- 100 49 } 50 row <- row + 1 51 if (nsubordinates != nodes*daemons-1) { 52 write(paste('# Bad no. of subordinates:', f, nsubordinates), stderr()) 53 write(paste('rm -rf', dirname(f)), stderr()) 54 } 55 } 56 # subtract artificial timeout 57 all_data$t <- all_data$t - (all_data$nodes*all_data$daemons)*all_data$timeout 58 all_data$timeout <- NULL 59 write('All data:', stdout()) 60 print(all_data[order(all_data$daemons, all_data$nodes, all_data$attempt), ]) 61 result <- aggregate( 62 all_data$t, 63 by=list(nodes=all_data$nodes, daemons=all_data$daemons), 64 FUN=mean 65 ) 66 result$t_avg <- result$x 67 result$x <- NULL 68 result$t_min <- aggregate( 69 all_data$t, 70 by=list(nodes=all_data$nodes, daemons=all_data$daemons), 71 FUN=min 72 )$x 73 result$t_max <- aggregate( 74 all_data$t, 75 by=list(nodes=all_data$nodes, daemons=all_data$daemons), 76 FUN=max 77 )$x 78 # convert milliseconds to seconds 79 result$t_avg <- result$t_avg / 1000 80 result$t_min <- result$t_min / 1000 81 result$t_max <- result$t_max / 1000 82 result 83 } 84 85 bscheduler.plot_discovery <- function (xlabel='No. of physical nodes', 86 ylabel='Time, s', 87 toplabel='Processes per node') { 88 result <- bscheduler.load_node_discovery_data(); 89 # consider only large number of nodes 90 result <- result[result$nodes>=5,] 91 params <- list(list(n=1, col='black'), 92 list(n=8, col='#707070'), 93 list(n=32, col='blue'), 94 list(n=64, col='#c04040')) 95 96 ltext <- sapply(params, function (p) { 97 paste(toplabel, p$n, sep=': ') 98 }) 99 lcolors <- sapply(params, function (p) { p$col }) 100 101 #par(mfrow=c(3,2)) 102 plot.new() 103 plot.window( 104 xlim=range(result$nodes), 105 ylim=range(result$t_min, result$t_max, 0.5, 2.0) 106 ) 107 for (p in params) { 108 n <- p$n 109 res <- result[result$daemons==n,] 110 x <- res$nodes 111 lines(x, res$t_avg, col=p$col, lwd=2) 112 points(x, res$t_avg, col=p$col) 113 lines(x, res$t_min, lty='dashed', col=p$col) 114 lines(x, res$t_max, lty='dashed', col=p$col) 115 } 116 axis(1, at=c(1:max(result$nodes))) 117 axis(2, at=seq(0.5,2.0,0.5)) 118 title(xlab=xlabel, ylab=ylabel) 119 legend( 120 'topright', 121 legend=ltext, 122 col=lcolors, 123 lty='solid', 124 lwd=2 125 ) 126 box() 127 }