commit 789b3fe86068bcb325ee1598a056bd4680ebee06
parent e7356e59fe198f916c18176b3cc6f576d039a796
Author: Ivan Gankevich <i.gankevich@spbu.ru>
Date: Mon, 19 Apr 2021 16:31:00 +0300
Last section in methods.
Diffstat:
main.bib | | | 76 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------- |
main.tex | | | 107 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--- |
2 files changed, 168 insertions(+), 15 deletions(-)
diff --git a/main.bib b/main.bib
@@ -205,17 +205,71 @@
month = {July}
}
-@inproceedings{gankevich2017subord,
- author = {I. Gankevich and Y. Tipikin and V. Korkhov},
- booktitle = {Proceedings of International Conference on High Performance
+@InProceedings{ gankevich2017subord,
+ author = {I. Gankevich and Y. Tipikin and V. Korkhov},
+ booktitle = {Proceedings of International Conference on High Performance
Computing Simulation (HPCS'17)},
- title = {Subordination: Providing Resilience to Simultaneous Failure
+ title = {Subordination: Providing Resilience to Simultaneous Failure
of Multiple Cluster Nodes},
- year = {2017},
- pages = {832--838},
- publisher = {Institute of Electrical and Electronics Engineers (IEEE)},
- address = {NJ, USA},
- doi = {10.1109/HPCS.2017.126},
- isbn = {978-1-5386-3250-5},
- month = {July},
+ year = {2017},
+ pages = {832--838},
+ publisher = {Institute of Electrical and Electronics Engineers (IEEE)},
+ address = {NJ, USA},
+ doi = {10.1109/HPCS.2017.126},
+ isbn = {978-1-5386-3250-5},
+ month = {July}
+}
+
+@Article{ galassi2002guile,
+ title = {Guile Reference Manual},
+ author = {Galassi, Mark and Blandy, Jim and Houston, Gary and Pierce,
+ Tim and Jerram, Neil and Grabmueller, Martin},
+ year = {2002},
+ publisher = {Citeseer}
+}
+
+@Article{ sussman1998,
+ author = {Sussman, Gerald Jay and Steele, Guy L.},
+ year = {1998},
+ month = {12},
+ title = {The First Report on Scheme Revisited},
+ journal = {Higher-Order and Symbolic Computation},
+ pages = {399--404},
+ volume = {11},
+ issue = {4},
+ doi = {10.1023/A:1010079421970}
+}
+
+@Article{ mccarthy1960,
+ author = {McCarthy, John},
+ title = {Recursive Functions of Symbolic Expressions and Their
+ Computation by Machine, Part I},
+ year = {1960},
+ issue_date = {April 1960},
+ publisher = {Association for Computing Machinery},
+ address = {New York, NY, USA},
+ volume = {3},
+ number = {4},
+ issn = {0001-0782},
+ doi = {10.1145/367177.367199},
+ journal = {Commun. ACM},
+ month = apr,
+ pages = {184–195},
+ numpages = {12}
+}
+
+@Misc{ ndbc-web-data-guide,
+ title = {{NDBC} Web Data Guide},
+ url = {https://www.ndbc.noaa.gov/docs/ndbc_web_data_guide.pdf},
+ year = {2015},
+ month = {October}
+}
+
+@TechReport{ ndbc-techreport,
+ title = {Nondirectional and directional wave data analysis
+ procedures},
+ author = {Earle, Marshall D},
+ journal = {NDBC technical Document},
+ institution = {NDBC},
+ year = {1996}
}
diff --git a/main.tex b/main.tex
@@ -6,6 +6,8 @@
\usepackage{cite}
\usepackage{graphicx}
\usepackage{url}
+\usepackage{listings}
+\usepackage{tikz}
\begin{document}
@@ -313,7 +315,7 @@ There several responsibilities of cluster scheduler:
\item maintain a list of available cluster nodes.
\end{itemize}
In order to implement them we created a kernel queue and a thread pool for each
-concern that the scheduler has to deal with: we have
+concern that the scheduler has to deal with (see~figure~\ref{fig-local-routing}): we have
\begin{itemize}
\item timer queue for scheduled and periodic tasks,
\item network queue for sending to and receiving kernels from other cluster nodes,
@@ -328,6 +330,37 @@ as separate threads are used to send and receive kernels, but benchmarks showed
this is not a big problem as most of the time these threads wait for the operating
system kernel to transfer the data.
+\begin{figure}
+ \centering
+ \tikzset{Rect/.style={text width=1.30cm,draw,align=center,thick,rounded corners}}
+ \begin{tikzpicture}[x=1.75cm,y=-1.25cm]
+ \node[Rect] (parallel) at (0,0) {Processor queue\strut};
+ \node[Rect] (timer) at (1,0) {Timer queue\strut};
+ \node[Rect] (disk) at (2,0) {Disk queue\strut};
+ \node[Rect] (nic) at (3,0) {Network queue\strut};
+ \node[Rect] (process) at (4,0) {Process queue\strut};
+ \node[Rect] (cpu0) at (0,-1) {CPU 0\strut};
+ \node[Rect] (cpu1) at (0,1) {CPU 1\strut};
+ \node[Rect] (disk0) at (2,-1) {Disk 0\strut};
+ \node[Rect] (disk1) at (2,1) {Disk 1\strut};
+ \node[Rect] (timer0) at (1,-1) {Timer 0\strut};
+ \node[Rect] (nic0) at (3,-1) {NIC 0\strut};
+ \node[Rect] (nic1) at (3,1) {NIC 1\strut};
+ \node[Rect] (parent) at (4,-1) {Parent\strut};
+ \node[Rect] (child) at (4,1) {Child\strut};
+ \path[draw,thick] (parallel) -- (cpu0);
+ \path[draw,thick] (parallel) -- (cpu1);
+ \path[draw,thick] (timer) -- (timer0);
+ \path[draw,thick] (disk) -- (disk0);
+ \path[draw,thick] (disk) -- (disk1);
+ \path[draw,thick] (nic) -- (nic0);
+ \path[draw,thick] (nic) -- (nic1);
+ \path[draw,thick] (process) -- (parent);
+ \path[draw,thick] (process) -- (child);
+ \end{tikzpicture}
+ \caption{Default kernel queues for each concern.\label{fig-local-routing}}
+\end{figure}
+
Cluster scheduler runs applications in child proecesses; this approach is
natural for UNIX-like operating systems as the parent process has full control
of its children: the amount of resources can be limited (the number of
@@ -429,9 +462,75 @@ embedded applications the application can be linked directly to the scheduler
to be able to run in the same daemon process, that way the overhead of the
scheduler is minimal.
-\subsection{Kernels as intermediate representation for Guile language}
-
-TODO
+\subsection{Parallel and distributed evaluation of Guile expressions using kernels}
+
+Kernels low-level interface and cluster scheduler are written in C++ language.
+From the authors' perspective C is too low-level and Java has too much overhead
+for cluster computing, whereas C++ is the middleground choice. The
+implementation is the direct mapping of the ideas discussed in previous
+sections on C++ abstractions: kernel is a base class
+(see~listing~\ref{lst-kernel-api}) for all control flow
+objects with common fields (\texttt{parent}, \texttt{target} and all others)
+and \texttt{act}, \texttt{react}, \texttt{read}, \texttt{write} virtual
+functions that are overridden in subclasses. This direct mapping is natural for
+a mixed-paradigm language like C++, but functional languages may benefit from
+implementing the same ideas in the compiler or interpreter.
+
+\begin{lstlisting}[language=C++,%
+caption={Public interface of the kernel and the queue classes in C++ (simplified for clarity).},%
+captionpos=b,
+label={lst-kernel-api}]
+enum class states {upstream, downstream};
+
+class kernel {
+public:
+ virtual void act();
+ virtual void react(kernel* child);
+ virtual void write(buffer& out) const;
+ virtual void read(buffer& in);
+ kernel* parent = nullptr;
+ kernel* target = nullptr;
+ states state = states::upstream;
+};
+
+class queue {
+public:
+ void push(kernel* k);
+};
+\end{lstlisting}
+
+We made a reference implementation of kernels for Guile
+language~\cite{galassi2002guile}. Guile is a dialect of
+Scheme~\cite{sussman1998} which in turn is a dialect of
+LISP~\cite{mccarthy1960}. The distinct feature of LISP-like languages is
+homoiconicity, i.e.~the code and the data is reprensented by tree-like
+structure (lists that may contain other lists as elements). This feature makes
+it possible to express parallelism directly in the language: every list element
+can be computed independently and it can be sent to other cluster nodes for
+parallel computation. To implement parallelism we created a Guile interpreter
+that evaluates every list element in parallel using kernels. In practice this
+means that every argument of a function call (a function call is also a list
+with the first element being the function name) is computed in parallel. This
+interpreter is able to run any existing Guile programme (provided that it does
+not use threads, locks and semaphores explicitly) and the output will be the
+same as with the original interpreter, the programme will automatically use
+cluster nodes for parallel computations, and fault tolerance will be
+automatically provided by our cluster scheduler. From the authors' perspective
+this approach is the most transparent and safe way of writing parallel and
+distributed programmes with clear separation of concerns: the programmer takes
+care of the application logic, and cluster scheduler takes care of the
+parallelism, load balancing and fault tolerance.
+
+In order to test performance of our interpreter we used a programme that
+processes frequency-directional spectra of ocean waves from NDBC
+dataset~\cite{ndbc-web-data-guide,ndbc-techreport}. Each spectrum consists of
+five variables, each of which is stored in a separate file in a form of time
+series. First, we find five files that correspond to the same station where
+the data was collected and the same year. Then we merge the corresponding
+records from these files into single vector-valued time series. Incomplete
+groups of files and incomplete records are removed. After that we write the
+resulting groups to disk. We wrote this programme in C++ with kernels,
+in Guile with kernels and in Guile without kernels.
\section{Results}