commit 511d33ca409b340799fd7e3715daf5f61face8ff
parent c5d51fa828aabfc8110422258c11933e811ffcbc
Author: Ivan Gankevich <igankevich@ya.ru>
Date: Mon, 14 Aug 2017 19:22:29 +0300
Add event plot for XFS, GFS, NFS benchmark.
Diffstat:
R/benchmarks.R | | | 70 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
arma-thesis.org | | | 36 | +++++++++++++++++++++++++++--------- |
2 files changed, 97 insertions(+), 9 deletions(-)
diff --git a/R/benchmarks.R b/R/benchmarks.R
@@ -1,4 +1,5 @@
source(file.path("build", "arma-benchmarks", "R", "arma.load.R"))
+source(file.path("build", "arma-benchmarks", "R", "arma.load_events.R"))
arma.load_benchmark_data <- function(attempt, framework, models, tags) {
data <- data.frame()
@@ -108,3 +109,72 @@ arma.print_sync_vs_async_io <- function(suffix_names, row_names, top_names) {
print(ascii(c("", "", "", top_names[[1]], "", "", top_names[[2]])))
print(ascii(big_table, include.rownames=FALSE))
}
+
+arma.plot_io_events <- function (fsnames) {
+ filesystems <- c("xfs", "nfs", "gfs")
+ conf <- list(
+ a=list(
+ color='#000000',
+ lty="solid",
+ lwd=3,
+ name="generate_surface"
+ ),
+ b=list(
+ color='#9F2D20',
+ lty="solid",
+ lwd=3,
+ name="write_surface"
+ )
+ )
+ for (fs in filesystems) {
+ attempt <- paste("a5", fs, "events", sep="-")
+ data <- arma.load_events(
+ file.path("build", "arma-benchmarks",
+ "output", "gpulab1", attempt, 10000, "openmp", "ar"),
+ c("write_surface", "generate_surface", "programme")
+ )
+ ev_prog <- data[data$event == "programme",]
+ ev_gen <- data[data$event == "generate_surface",]
+ ev_write <- data[data$event == "write_surface",]
+ ev_write$thread_no = max(ev_gen$thread_no) + 1
+ threads <- 0:max(ev_write$thread_no)
+ max_x <- max(ev_write$t1, ev_gen$t1)/1000/1000
+ plot.new()
+ plot.window(xlim=c(0,max_x), ylim=range(threads))
+ conf$a$table=ev_gen
+ conf$b$table=ev_write
+ for (c in conf) {
+ table <- c$table
+ for (row in seq(1,nrow(table),1)) {
+ ev <- table[row,]
+ ys <- rep(ev$thread_no, 2)
+ xs <- c(ev$t0, ev$t1)/1000/1000
+ # points(xs[[1]], ys[[1]], pch=19, cex=0.4)
+ # arrows(xs[[1]], ys[[1]], xs[[2]], ys[[2]], angle=10, length=0.05)
+ lines(xs, ys, lwd=3, col=c$color)
+ }
+ }
+ axis(1, at=pretty(c(0,max_x)))
+ axis(
+ 2,
+ at=threads,
+ labels=c(sapply(
+ threads[1:(length(threads)-1)],
+ function (t) paste("omp", t, sep="-")
+ ), "io-0"),
+ las=2
+ )
+ mtext("Time, s", side=1, line=3)
+ mtext("Thread", side=2, line=4)
+ title(fsnames[[fs]])
+ }
+ legend(
+ "bottom",
+ inset=c(0,0.2),
+ legend=sapply(conf, function (c) c$name),
+ col=sapply(conf, function (c) c$color),
+ lty=sapply(conf, function (c) c$lty),
+ lwd=sapply(conf, function (c) c$lwd),
+ xpd=TRUE
+ )
+}
diff --git a/arma-thesis.org b/arma-thesis.org
@@ -1205,7 +1205,7 @@ fi
cd $dir
git checkout master
git pull
-git checkout 19c0abb1cc442a5ec0134c5028f77cb9f3fb9816
+git checkout 908739d3a21354b86b5397619deca26c84355fb6
#+end_src
#+RESULTS:
@@ -3678,13 +3678,14 @@ of the running time, the use of network-mounted file systems may slow down this
stage. To optimise it wavy surface parts were written to file as soon as full
time slice was available: all completed parts were grouped by time slices they
belong to and subsequently written to file, as soon as the whole time slice is
-finished. That way a separate thread starts writing to files as soon as the
-first time slice is available and finishes it after the main thread group
-finishes the computation. The total time needed to perform I/O is slightly
-increased, but the I/O is done in parallel to computation so the total running
-time is decreased. Using this approach with local file system has the same
-effect, but the total reduction in execution time is small, because local file
-system is more performant.
+finished (fig.\nbsp{}[[fig-arma-io-events]]). That way a separate thread starts
+writing to files as soon as the first time slice is available and finishes it
+after the main thread group finishes the computation. The total time needed to
+perform I/O is slightly increased, but the I/O is done in parallel to
+computation so the total running time is decreased
+(table\nbsp{}[[tab-arma-io-performance]]). Using this approach with local file
+system has the same effect, but the total reduction in execution time is small,
+because local file system is more performant.
#+name: tab-arma-io-performance
#+begin_src R
@@ -3716,6 +3717,24 @@ arma.print_sync_vs_async_io(suffix_names, row_names, top_names)
| Generate wavy surface | 1.26 | 1.26 | 1.33 | 1.33 | 3.30 | 11.06 |
| Write output to files | 0.28 | 2.34 | 10.95 | 0.00 | 0.00 | 0.00 |
+#+name: fig-arma-io-events
+#+header: :width 6 :height 9 :results output graphics
+#+begin_src R :file build/arma-io-events.pdf
+source(file.path("R", "benchmarks.R"))
+fsnames <- list(
+ xfs="XFS",
+ nfs="NFS",
+ gfs="GlusterFS"
+)
+par(mfrow=c(3,1), family="serif")
+arma.plot_io_events(fsnames)
+#+end_src
+
+#+name: fig-arma-io-events
+#+caption: Event plot for XFS, NFS and GlusterFS that shows time intervals spent for I/O and computation by different threads.
+#+RESULTS: fig-arma-io-events
+[[file:build/arma-io-events.pdf]]
+
**** Parallel velocity potential field computation.
The benchmarks for AR, MA and LH models showed that velocity potential field
computation consume only a fraction of total programme execution time, however,
@@ -4705,7 +4724,6 @@ Plugging it in the boundary condition yields
Here \(\epsilon\) is white noise and \(C_t\) includes \(dk\). Substituting
integral with infinite sum yields two-dimensional form of
eq.\nbsp{}[[eq-longuet-higgins]].
-
** Derivative in the direction of the surface normal
:PROPERTIES:
:CUSTOM_ID: directional-derivative