commit c5d51fa828aabfc8110422258c11933e811ffcbc
parent 9df8c96f51d0fb62e4cd382a4ae3a903a99e9669
Author: Ivan Gankevich <igankevich@ya.ru>
Date: Mon, 14 Aug 2017 18:31:31 +0300
Add table with XFS, NFS, GFS benchmark results.
Diffstat:
2 files changed, 85 insertions(+), 6 deletions(-)
diff --git a/R/benchmarks.R b/R/benchmarks.R
@@ -1,6 +1,6 @@
source(file.path("build", "arma-benchmarks", "R", "arma.load.R"))
-arma.load_benchmark_data <- function(framework, models, tags) {
+arma.load_benchmark_data <- function(attempt, framework, models, tags) {
data <- data.frame()
for (m in models) {
if (!(m %in% colnames(data))) {
@@ -11,7 +11,7 @@ arma.load_benchmark_data <- function(framework, models, tags) {
values <- arma.load(
file.path("build", "arma-benchmarks", "output"),
"gpulab1",
- "a4",
+ attempt,
10000,
framework,
m,
@@ -42,7 +42,7 @@ arma.print_openmp_vs_opencl <- function(model_names, row_names) {
)
all_data <- list()
for (framework in frameworks) {
- all_data[[framework]] <- arma.load_benchmark_data(framework, models, tags)
+ all_data[[framework]] <- arma.load_benchmark_data("a4", framework, models, tags)
}
# translate and pretty print in org-mode format
saved_row_names <- rownames(all_data$openmp)
@@ -59,3 +59,52 @@ arma.print_openmp_vs_opencl <- function(model_names, row_names) {
print(ascii(c("", "", "", "OpenMP", "", "OpenCL")))
print(ascii(big_table, include.rownames=FALSE))
}
+
+arma.load_io_benchmark_data <- function(attempt, filesystems, suffix, tags) {
+ data <- data.frame()
+ for (fs in filesystems) {
+ if (!(fs %in% colnames(data))) {
+ data[,fs] <- rep(NA, nrow(data))
+ }
+ idx <- 1
+ for (t in tags) {
+ values <- arma.load(
+ file.path("build", "arma-benchmarks", "output"),
+ "gpulab1",
+ paste(attempt, fs, suffix, sep="-"),
+ 10000,
+ "openmp",
+ "ar",
+ t,
+ ".*\\s+([0-9]+)us.*"
+ )
+ name <- names(tags)[idx]
+ data[if (length(name) == 0 || nchar(name) == 0) t else name,fs] <- mean(values/1000/1000)
+ idx <- idx + 1
+ }
+ }
+ data
+}
+
+arma.print_sync_vs_async_io <- function(suffix_names, row_names, top_names) {
+ library(ascii)
+ options(asciiType="org")
+ tags <- list("generate_surface", "write_all")
+ filesystems <- c("xfs", "nfs", "gfs")
+ all_data <- list()
+ for (suffix in c("seq", "par")) {
+ all_data[[suffix]] <- arma.load_io_benchmark_data("a5", filesystems, suffix, tags)
+ }
+ # translate and pretty print in org-mode format
+ saved_row_names <- rownames(all_data$seq)
+ for (suffix in names(all_data)) {
+ rownames(all_data[[suffix]]) <- 1:nrow(all_data[[suffix]])
+ }
+ big_table <- merge(all_data$seq, all_data$par, by="row.names")
+ rownames(big_table) <- saved_row_names
+ big_table[,"Row.names"] <- saved_row_names
+ big_table[,"Row.names"] <- sapply(big_table[,"Row.names"], function (c) get(c, row_names))
+ colnames(big_table) <- sapply(colnames(big_table), function (c) get(c, suffix_names))
+ print(ascii(c("", "", "", top_names[[1]], "", "", top_names[[2]])))
+ print(ascii(big_table, include.rownames=FALSE))
+}
diff --git a/arma-thesis.org b/arma-thesis.org
@@ -1205,7 +1205,7 @@ fi
cd $dir
git checkout master
git pull
-git checkout f5d212bbd9da472f3b8e08ae5ddc44a20e3d7966
+git checkout 19c0abb1cc442a5ec0134c5028f77cb9f3fb9816
#+end_src
#+RESULTS:
@@ -3498,7 +3498,7 @@ surface parts, whereas MA algorithm requires padding part with noughts to be
able to compute them in parallel. In contrast to these models, LH model has no
dependencies between parts computed in parallel, but requires more computational
power (floating point operations per seconds).
-**** TODO Performance of OpenMP and OpenCL implementations.
+**** Performance of OpenMP and OpenCL implementations.
:PROPERTIES:
:header-args:R: :results output raw :exports results
:END:
@@ -3638,7 +3638,7 @@ arma.print_openmp_vs_opencl(model_names, row_names)
| Validate model | 0.08 | 0.10 | | 0.08 | |
| Generate wavy surface | 1.26 | 5.57 | 350.98 | 769.38 | 0.02 |
| NIT | 7.11 | 7.43 | | 0.02 | |
-| Copy data from GPU to host | | | | 5.22 | 25.06 |
+| Copy data from GPU | | | | 5.22 | 25.06 |
| Compute velocity potentials | 0.05 | 0.05 | 0.06 | 0.03 | 0.03 |
| Write output to files | 0.27 | 0.27 | 0.27 | 0.28 | 0.27 |
@@ -3686,6 +3686,36 @@ time is decreased. Using this approach with local file system has the same
effect, but the total reduction in execution time is small, because local file
system is more performant.
+#+name: tab-arma-io-performance
+#+begin_src R
+source(file.path("R", "benchmarks.R"))
+suffix_names <- list(
+ xfs.x="XFS",
+ xfs.y="XFS",
+ nfs.x="NFS",
+ nfs.y="NFS",
+ gfs.x="GlusterFS",
+ gfs.y="GlusterFS",
+ Row.names="\\orgcmidrule{2-4}{5-7}Subroutine"
+)
+top_names <- c("Sequential", "Parallel")
+row_names <- list(
+ generate_surface="Generate wavy surface",
+ write_all="Write output to files"
+)
+arma.print_sync_vs_async_io(suffix_names, row_names, top_names)
+#+end_src
+
+#+name: tab-arma-io-performance
+#+caption: Running time (s.) for XFS, NFS and GlusterFS with sequential and parallel I/O programme versions.
+#+attr_latex: :booktabs t
+#+RESULTS: tab-arma-io-performance
+| | | | Sequential | | | Parallel |
+| \orgcmidrule{2-4}{5-7}Subroutine | XFS | NFS | GlusterFS | XFS | NFS | GlusterFS |
+|----------------------------------+------+------+------------+------+------+-----------|
+| Generate wavy surface | 1.26 | 1.26 | 1.33 | 1.33 | 3.30 | 11.06 |
+| Write output to files | 0.28 | 2.34 | 10.95 | 0.00 | 0.00 | 0.00 |
+
**** Parallel velocity potential field computation.
The benchmarks for AR, MA and LH models showed that velocity potential field
computation consume only a fraction of total programme execution time, however,