iccsa-21-guile

git clone https://git.igankevich.com/iccsa-21-guile.git
Log | Files | Refs

commit 789b3fe86068bcb325ee1598a056bd4680ebee06
parent e7356e59fe198f916c18176b3cc6f576d039a796
Author: Ivan Gankevich <i.gankevich@spbu.ru>
Date:   Mon, 19 Apr 2021 16:31:00 +0300

Last section in methods.

Diffstat:
main.bib | 76+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----------
main.tex | 107++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
2 files changed, 168 insertions(+), 15 deletions(-)

diff --git a/main.bib b/main.bib @@ -205,17 +205,71 @@ month = {July} } -@inproceedings{gankevich2017subord, - author = {I. Gankevich and Y. Tipikin and V. Korkhov}, - booktitle = {Proceedings of International Conference on High Performance +@InProceedings{ gankevich2017subord, + author = {I. Gankevich and Y. Tipikin and V. Korkhov}, + booktitle = {Proceedings of International Conference on High Performance Computing Simulation (HPCS'17)}, - title = {Subordination: Providing Resilience to Simultaneous Failure + title = {Subordination: Providing Resilience to Simultaneous Failure of Multiple Cluster Nodes}, - year = {2017}, - pages = {832--838}, - publisher = {Institute of Electrical and Electronics Engineers (IEEE)}, - address = {NJ, USA}, - doi = {10.1109/HPCS.2017.126}, - isbn = {978-1-5386-3250-5}, - month = {July}, + year = {2017}, + pages = {832--838}, + publisher = {Institute of Electrical and Electronics Engineers (IEEE)}, + address = {NJ, USA}, + doi = {10.1109/HPCS.2017.126}, + isbn = {978-1-5386-3250-5}, + month = {July} +} + +@Article{ galassi2002guile, + title = {Guile Reference Manual}, + author = {Galassi, Mark and Blandy, Jim and Houston, Gary and Pierce, + Tim and Jerram, Neil and Grabmueller, Martin}, + year = {2002}, + publisher = {Citeseer} +} + +@Article{ sussman1998, + author = {Sussman, Gerald Jay and Steele, Guy L.}, + year = {1998}, + month = {12}, + title = {The First Report on Scheme Revisited}, + journal = {Higher-Order and Symbolic Computation}, + pages = {399--404}, + volume = {11}, + issue = {4}, + doi = {10.1023/A:1010079421970} +} + +@Article{ mccarthy1960, + author = {McCarthy, John}, + title = {Recursive Functions of Symbolic Expressions and Their + Computation by Machine, Part I}, + year = {1960}, + issue_date = {April 1960}, + publisher = {Association for Computing Machinery}, + address = {New York, NY, USA}, + volume = {3}, + number = {4}, + issn = {0001-0782}, + doi = {10.1145/367177.367199}, + journal = {Commun. ACM}, + month = apr, + pages = {184–195}, + numpages = {12} +} + +@Misc{ ndbc-web-data-guide, + title = {{NDBC} Web Data Guide}, + url = {https://www.ndbc.noaa.gov/docs/ndbc_web_data_guide.pdf}, + year = {2015}, + month = {October} +} + +@TechReport{ ndbc-techreport, + title = {Nondirectional and directional wave data analysis + procedures}, + author = {Earle, Marshall D}, + journal = {NDBC technical Document}, + institution = {NDBC}, + year = {1996} } diff --git a/main.tex b/main.tex @@ -6,6 +6,8 @@ \usepackage{cite} \usepackage{graphicx} \usepackage{url} +\usepackage{listings} +\usepackage{tikz} \begin{document} @@ -313,7 +315,7 @@ There several responsibilities of cluster scheduler: \item maintain a list of available cluster nodes. \end{itemize} In order to implement them we created a kernel queue and a thread pool for each -concern that the scheduler has to deal with: we have +concern that the scheduler has to deal with (see~figure~\ref{fig-local-routing}): we have \begin{itemize} \item timer queue for scheduled and periodic tasks, \item network queue for sending to and receiving kernels from other cluster nodes, @@ -328,6 +330,37 @@ as separate threads are used to send and receive kernels, but benchmarks showed this is not a big problem as most of the time these threads wait for the operating system kernel to transfer the data. +\begin{figure} + \centering + \tikzset{Rect/.style={text width=1.30cm,draw,align=center,thick,rounded corners}} + \begin{tikzpicture}[x=1.75cm,y=-1.25cm] + \node[Rect] (parallel) at (0,0) {Processor queue\strut}; + \node[Rect] (timer) at (1,0) {Timer queue\strut}; + \node[Rect] (disk) at (2,0) {Disk queue\strut}; + \node[Rect] (nic) at (3,0) {Network queue\strut}; + \node[Rect] (process) at (4,0) {Process queue\strut}; + \node[Rect] (cpu0) at (0,-1) {CPU 0\strut}; + \node[Rect] (cpu1) at (0,1) {CPU 1\strut}; + \node[Rect] (disk0) at (2,-1) {Disk 0\strut}; + \node[Rect] (disk1) at (2,1) {Disk 1\strut}; + \node[Rect] (timer0) at (1,-1) {Timer 0\strut}; + \node[Rect] (nic0) at (3,-1) {NIC 0\strut}; + \node[Rect] (nic1) at (3,1) {NIC 1\strut}; + \node[Rect] (parent) at (4,-1) {Parent\strut}; + \node[Rect] (child) at (4,1) {Child\strut}; + \path[draw,thick] (parallel) -- (cpu0); + \path[draw,thick] (parallel) -- (cpu1); + \path[draw,thick] (timer) -- (timer0); + \path[draw,thick] (disk) -- (disk0); + \path[draw,thick] (disk) -- (disk1); + \path[draw,thick] (nic) -- (nic0); + \path[draw,thick] (nic) -- (nic1); + \path[draw,thick] (process) -- (parent); + \path[draw,thick] (process) -- (child); + \end{tikzpicture} + \caption{Default kernel queues for each concern.\label{fig-local-routing}} +\end{figure} + Cluster scheduler runs applications in child proecesses; this approach is natural for UNIX-like operating systems as the parent process has full control of its children: the amount of resources can be limited (the number of @@ -429,9 +462,75 @@ embedded applications the application can be linked directly to the scheduler to be able to run in the same daemon process, that way the overhead of the scheduler is minimal. -\subsection{Kernels as intermediate representation for Guile language} - -TODO +\subsection{Parallel and distributed evaluation of Guile expressions using kernels} + +Kernels low-level interface and cluster scheduler are written in C++ language. +From the authors' perspective C is too low-level and Java has too much overhead +for cluster computing, whereas C++ is the middleground choice. The +implementation is the direct mapping of the ideas discussed in previous +sections on C++ abstractions: kernel is a base class +(see~listing~\ref{lst-kernel-api}) for all control flow +objects with common fields (\texttt{parent}, \texttt{target} and all others) +and \texttt{act}, \texttt{react}, \texttt{read}, \texttt{write} virtual +functions that are overridden in subclasses. This direct mapping is natural for +a mixed-paradigm language like C++, but functional languages may benefit from +implementing the same ideas in the compiler or interpreter. + +\begin{lstlisting}[language=C++,% +caption={Public interface of the kernel and the queue classes in C++ (simplified for clarity).},% +captionpos=b, +label={lst-kernel-api}] +enum class states {upstream, downstream}; + +class kernel { +public: + virtual void act(); + virtual void react(kernel* child); + virtual void write(buffer& out) const; + virtual void read(buffer& in); + kernel* parent = nullptr; + kernel* target = nullptr; + states state = states::upstream; +}; + +class queue { +public: + void push(kernel* k); +}; +\end{lstlisting} + +We made a reference implementation of kernels for Guile +language~\cite{galassi2002guile}. Guile is a dialect of +Scheme~\cite{sussman1998} which in turn is a dialect of +LISP~\cite{mccarthy1960}. The distinct feature of LISP-like languages is +homoiconicity, i.e.~the code and the data is reprensented by tree-like +structure (lists that may contain other lists as elements). This feature makes +it possible to express parallelism directly in the language: every list element +can be computed independently and it can be sent to other cluster nodes for +parallel computation. To implement parallelism we created a Guile interpreter +that evaluates every list element in parallel using kernels. In practice this +means that every argument of a function call (a function call is also a list +with the first element being the function name) is computed in parallel. This +interpreter is able to run any existing Guile programme (provided that it does +not use threads, locks and semaphores explicitly) and the output will be the +same as with the original interpreter, the programme will automatically use +cluster nodes for parallel computations, and fault tolerance will be +automatically provided by our cluster scheduler. From the authors' perspective +this approach is the most transparent and safe way of writing parallel and +distributed programmes with clear separation of concerns: the programmer takes +care of the application logic, and cluster scheduler takes care of the +parallelism, load balancing and fault tolerance. + +In order to test performance of our interpreter we used a programme that +processes frequency-directional spectra of ocean waves from NDBC +dataset~\cite{ndbc-web-data-guide,ndbc-techreport}. Each spectrum consists of +five variables, each of which is stored in a separate file in a form of time +series. First, we find five files that correspond to the same station where +the data was collected and the same year. Then we merge the corresponding +records from these files into single vector-valued time series. Incomplete +groups of files and incomplete records are removed. After that we write the +resulting groups to disk. We wrote this programme in C++ with kernels, +in Guile with kernels and in Guile without kernels. \section{Results}