arma-thesis

git clone https://git.igankevich.com/arma-thesis.git
Log | Files | Refs | LICENSE

commit d7d4971ff5c8c92168016bcd92567707ffdfb2b1
parent d4e20ae9eb7b056f15847acf05d44aa4b4241f8f
Author: Ivan Gankevich <igankevich@ya.ru>
Date:   Thu,  3 Aug 2017 18:42:34 +0300

Add results for the new benchmarks.

Diffstat:
Makefile | 2++
R/benchmarks.R | 57+++++++++++++++++++++++++++++++++++++++++++++++++++++++++
arma-thesis-ru.org | 38++++++++++++++++++++++++++++++++++++++
arma-thesis.org | 56++++++++++++++++++++++++++++++++++++++++++++++++++++++++
preamble.tex | 3+++
5 files changed, 156 insertions(+), 0 deletions(-)

diff --git a/Makefile b/Makefile @@ -13,9 +13,11 @@ all: build/$(PHD_RU).pdf build/$(PHD_EN).pdf build/$(PHD_RU).pdf: $(PHD_RU).tex preamble.tex bib/* latexmk $(FLAGS) -f $(PHD_RU).tex + true build/$(PHD_EN).pdf: $(PHD_EN).tex preamble.tex bib/* latexmk $(FLAGS) -f $(PHD_EN).tex + true clean: rm -f build/$(PHD_EN)* diff --git a/R/benchmarks.R b/R/benchmarks.R @@ -0,0 +1,57 @@ +source(file.path("build", "arma-benchmarks", "R", "arma.load.R")) + +arma.load_benchmark_data <- function(framework, models, tags) { + data <- data.frame() + for (m in models) { + if (!(m %in% colnames(data))) { + data[,m] <- rep(NA, nrow(data)) + } + idx <- 1 + for (t in tags) { + values <- arma.load( + file.path("build", "arma-benchmarks", "output"), + "gpulab1", + 10000, + framework, + m, + t, + ".*\\s+([0-9]+)us.*" + ) + name <- names(tags)[idx] + data[if (nchar(name) == 0) t else name,m] <- mean(values/1000/1000) + idx <- idx + 1 + } + } + data +} + +arma.print_openmp_vs_opencl <- function(model_names, row_names) { + library(ascii) + options(asciiType="org") + models <- c("ar", "ma", "lh"); + frameworks <- c("openmp", "opencl") + tags <- list( + "deteremine_coefficients", + "validate", + "generate_surface", + velocity=c("window_function", "second_function", "fft", "dev_to_host_copy") + ) + all_data <- list() + for (framework in frameworks) { + all_data[[framework]] <- arma.load_benchmark_data(framework, models, tags) + } + # translate and pretty print in org-mode format + saved_row_names <- rownames(all_data$openmp) + for (framework in names(all_data)) { + rownames(all_data[[framework]]) <- 1:nrow(all_data[[framework]]) + } + big_table <- merge(all_data$openmp, all_data$opencl, by="row.names") + rownames(big_table) <- saved_row_names + big_table[,"Row.names"] <- saved_row_names + big_table <- big_table[,!(names(big_table) == "ma.y")] + big_table[,"Row.names"] <- sapply(big_table[,"Row.names"], function (c) get(c, row_names)) + colnames(big_table) <- sapply(colnames(big_table), function (c) get(c, model_names)) +# rownames(big_table) <- sapply(rownames(big_table), function (c) paste("~", c, "~", sep="")) + print(ascii(c("", "", "", "OpenMP", "", "OpenCL"))) + print(ascii(big_table, include.rownames=FALSE)) +} diff --git a/arma-thesis-ru.org b/arma-thesis-ru.org @@ -2551,6 +2551,9 @@ downstream-объектов метод ~react~ их родителя вызыв реализации модели АРСС. **** Производительность реализаций на MPI, OpenMP и OpenCL. +:PROPERTIES: +:header-args:R: :results output org +:END: Программная реализация состояла в создании и отладке прототипа программы и в последующем написании компоненты виртуального полигона на языке более низкого уровня. При этом тесты показали, что одной высокопроизводительной @@ -2613,6 +2616,41 @@ Mathematica\nbsp{}cite:mathematica10, а на втором этапе логик | 760000 | 1.56 | 76.86 | 61.41 | 3.47 | 0.156 | 0.155 | | 800000 | 1.64 | 81.03 | 66.42 | 3.25 | 0.166 | 0.174 | +#+name: tab-arma-performance +#+begin_src R :results output org :exports results +source(file.path("R", "benchmarks.R")) +model_names <- list( + ar.x="АР", + ma.x="СС", + lh.x="ЛХ", + ar.y="АР", + ma.y="СС", + lh.y="ЛХ", + Row.names="\\orgcmidrule{2-4}{5-6}Подпрограмма" +) +row_names <- list( + deteremine_coefficients="Определение коэффициентов", + validate="Проверка модели", + generate_surface="Генерация поверхности", + velocity="Выч. потенциалов скорости" +) +arma.print_openmp_vs_opencl(model_names, row_names) +#+end_src + +#+name: tab-arma-performance +#+caption: Время работы (с.) реализации OpenMP и OpenCL для моделей АР, СС и ЛХ. +#+attr_latex: :booktabs t +#+RESULTS: tab-arma-performance +#+BEGIN_SRC org +| | | | OpenMP | | OpenCL | +| \orgcmidrule{2-4}{5-6}Подпрограмма | АР | СС | ЛХ | АР | ЛХ | +|------------------------------------+------+-------+--------+--------+--------| +| Определение коэффициентов | 0.02 | 0.26 | 0.18 | 0.01 | 1.19 | +| Проверка модели | 0.09 | 19.19 | | 0.08 | | +| Генерация поверхности | 1.29 | 10.12 | 351.67 | 769.42 | 25.09 | +| Выч. потенциалов скорости | 0.02 | 0.02 | 0.02 | 0.01 | 0.01 | +#+END_SRC + Кроме выбора стандарта параллельных вычислений на время работы программы влияет выбор библиотек типовых вычислительных методов, и эффективность этих библиотек была показана тестированием их разработчиками. В качестве библиотеки для diff --git a/arma-thesis.org b/arma-thesis.org @@ -1194,6 +1194,24 @@ Partition size = (21,10,10) Zeta size = (192,32,32) 'zeta.csv' -> 'zeta-skewnormal.csv' #+end_example +** Download arma-benchmarks data from repository +#+begin_src sh :exports none :results verbatim +set -e +dir=build/arma-benchmarks +mkdir -p $dir +if ! test -d "$dir/.git" +then + git clone https://github.com/igankevich/arma-benchmarks $dir +fi +cd $dir +git checkout master +git pull +git checkout c230b968c19b29c05a1c37691c3c039b74298871 +#+end_src + +#+RESULTS: +: Ваша ветка обновлена в соответствии с «origin/master». +: Already up-to-date. * Introduction **** Topic relevance. @@ -3417,6 +3435,9 @@ Non-homogeneous kernels may be handled by predicting their execution time, but such kernels are not present in ARMA model implementation. **** Performance of MPI, OpenMP, OpenCL implementations. +:PROPERTIES: +:header-args:R: :results output org +:END: ARMA model does not require highly optimised software implementation to be efficient, its performance is high even without use of co-processors; there are two main causes of that. First, ARMA model itself does not use transcendental @@ -3451,6 +3472,41 @@ OpenCL) to find the most efficient one. | GL, GLUT\nbsp{}cite:kilgard1996opengl | three-dimensional visualisation | | CGAL\nbsp{}cite:fabri2009cgal | wave numbers triangulation | +#+name: tab-arma-performance +#+begin_src R :results output org :exports results +source(file.path("R", "benchmarks.R")) +model_names <- list( + ar.x="AR", + ma.x="MA", + lh.x="LH", + ar.y="AR", + ma.y="MA", + lh.y="LH", + Row.names="\\orgcmidrule{2-4}{5-6}Subroutine" +) +row_names <- list( + deteremine_coefficients="Determine coefficients", + validate="Validate model", + generate_surface="Generate wavy surface", + velocity="Compute velocity potentials" +) +arma.print_openmp_vs_opencl(model_names, row_names) +#+end_src + +#+name: tab-arma-performance +#+caption: Running time (s.) for OpenMP and OpenCL implementations of AR, MA and LH models. +#+attr_latex: :booktabs t +#+RESULTS: tab-arma-performance +#+BEGIN_SRC org +| | | | OpenMP | | OpenCL | +| \orgcmidrule{2-4}{5-6}Subroutine | AR | MA | LH | AR | LH | +|----------------------------------+------+-------+--------+--------+--------| +| Determine coefficients | 0.02 | 0.26 | 0.18 | 0.01 | 1.19 | +| Validate model | 0.09 | 19.19 | | 0.08 | | +| Generate wavy surface | 1.29 | 10.12 | 351.67 | 769.42 | 25.09 | +| Compute velocity potentials | 0.02 | 0.02 | 0.02 | 0.01 | 0.01 | +#+END_SRC + **** Performance of load balancing algorithm. Software implementation of wavy surface generation is balanced in terms of the load on processor cores, however, as shown by tests, has high load on storage diff --git a/preamble.tex b/preamble.tex @@ -63,3 +63,6 @@ % wave vector \newcommand{\Kvec}{\vec{k}} \newcommand{\Kveclen}{\lvert\smash[b]{\Kvec}\rvert} + +% org-mode friendly commands +\newcommand{\orgcmidrule}[2]{\cmidrule(lr){#1}\cmidrule(lr){#2}}