iccsa-21-guile

Functional Programming Interface for Parallel and Distributed Computing
git clone https://git.igankevich.com/iccsa-21-guile.git
Log | Files | Refs

commit 4be4b695cc57e273aaf4b223818b6b891edbb78e
parent 8e4bb0f0596ce1f5c49b38500cd8c48d2ec2bd01
Author: Ivan Gankevich <i.gankevich@spbu.ru>
Date:   Sun, 12 Sep 2021 23:58:22 +0300

add slides

Diffstat:
slides.tex | 293+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 289 insertions(+), 4 deletions(-)

diff --git a/slides.tex b/slides.tex @@ -1,7 +1,7 @@ \documentclass[aspectratio=169,xcolor=table]{beamer} \usepackage{polyglossia} \setdefaultlanguage{english} -\usetheme{SaintPetersburg} +\usetheme[numbers]{SaintPetersburg} \usepackage{textcomp} \usepackage{booktabs} @@ -24,25 +24,310 @@ pdfmetalang={en}, pdflicenseurl={http://creativecommons.org/licenses/by-sa/4.0/}, pdfcopyright={Copyright \textcopyright{} 2021 Ivan Petriakov\xmpcomma{} Ivan Gankevich\xmpcomma{}}, - pdfsubject={TODO}, + pdfsubject={Functional programming interface for parallel and distributed computing}, } -\title{TODO} +\title{Functional programming interface\\for parallel and distributed computing} \author{% I.\:Petriakov \and I.\:Gankevich } \institute{Saint Petersburg State University} -\date{July 2021} +\date{September 2021} + +% https://github.com/stuhlmueller/scheme-listings/blob/master/lstlang0.sty +\lstdefinelanguage{scheme}{ + morekeywords=[1]{define, define-syntax, define-macro, lambda, define-stream, stream-lambda, + define*,if,cons,car,cdr}, + morekeywords=[2]{begin, call-with-current-continuation, call/cc, + call-with-input-file, call-with-output-file, case, cond, + do, else, for-each, if, + let*, let, let-syntax, letrec, letrec-syntax, + let-values, let*-values, + and, or, not, delay, force, + quasiquote, quote, unquote, unquote-splicing, + syntax, syntax-rules, eval, environment, query, + car, cdr, cons}, + morekeywords=[3]{import, export}, + alsodigit=!\$\%&*+-./:<=>?@^_~, + sensitive=true, + morecomment=[l]{;}, + morecomment=[s]{\#|}{|\#}, + morestring=[b]", + basicstyle=\small\ttfamily, + keywordstyle={\bf\ttfamily\color[HTML]{4081ec}}, + commentstyle=\color[rgb]{0.33,0.33,0.33}, + stringstyle={\color[HTML]{00a000}}, + upquote=true, + breaklines=true, + breakatwhitespace=true, + literate=*{`}{{`}}{1}, + showstringspaces=false +} + +\lstdefinelanguage{cpp}{ + morekeywords=[1]{class,struct,enum,public,private,protected,virtual,override,const, + void,int,new,delete,nullptr}, + morecomment=[l]{//}, + basicstyle=\small\ttfamily, + keywordstyle={\bf\ttfamily\color[HTML]{4081ec}}, + commentstyle=\color[rgb]{0.33,0.33,0.33}, + stringstyle={\color[HTML]{00a000}}, + escapeinside={LATEX}{END}, +} \begin{document} \frame{\maketitle} \begin{frame}{Motivation} + \begin{itemize} + \item There is no universal low-level representation of distributed computations. + \item There is no high-level interface for distributed computing in functional languages. + \item Existing solutions do not provide automatic fault tolerance for both slave and master nodes. + \end{itemize} + \vfill + + \textit{Parallel} --- several processor cores of single cluster node. + + \textit{Distributed} --- several cluster nodes. +\end{frame} + +\begin{frame}[fragile]{From sync. call stack to async. call stack (kernels)} + Kernel = data + code + result of the computation. + \begin{columns}[T] + \begin{column}{0.36\textwidth} +\begin{lstlisting}[language=cpp] +int nested(int a) { + return 123 + a; +} +\end{lstlisting} + \end{column} + \begin{column}{0.63\textwidth} +\begin{lstlisting}[language=cpp] +struct Nested: public Kernel { + int result; + int a; + Nested(int a): a(a) {} + void act() override { + result = a + 123; + LATEX{\color[HTML]{ac4040}\bf{}\ttfamily{}async\_return}END(); + } +}; +\end{lstlisting} + \end{column} + \end{columns} + \vfill\small + \begin{tabular}{ll} + \color[HTML]{ac4040}\bf\texttt{async\_call} & push child kernel to the queue \\ + \color[HTML]{ac4040}\bf\texttt{async\_return} & push current kernel to the queue \\ + \color[HTML]{ac4040}\bf\texttt{async\_message} & send a kernel to another one via the queue \\ + \end{tabular} +\end{frame} + +\begin{frame}[fragile]{From sync. call stack to async. call stack (kernels)} + \begin{columns}[T] + \begin{column}{0.36\textwidth} +\begin{lstlisting}[language=cpp] +void main() { + // code before + int result = nested(); + // code after + print(result); +} +\end{lstlisting} + \end{column} + \begin{column}{0.63\textwidth} +\begin{lstlisting}[language=cpp] +struct Main: public Kernel { + void act() override { + // code before + LATEX{\color[HTML]{ac4040}\bf{}\ttfamily{}async\_call}END(new Nested); + } + void react(Kernel* child) override { + int result = ((Nested*)child)->result; + // code after + print(result); + LATEX{\color[HTML]{ac4040}\bf{}\ttfamily{}async\_return}END(); + } +}; + +void main() { + LATEX{\color[HTML]{ac4040}\bf{}\ttfamily{}async\_call}END(new Main); + wait(); +} +\end{lstlisting} + \end{column} + \end{columns} +\end{frame} + +\begin{frame}[fragile]{Cluster scheduler architecture} + \begin{columns}[T] + \begin{column}{0.40\textwidth} + \tikzset{Rect/.style={text width=1.70cm,draw,align=center,thick,rounded corners}} + \small + \textbf{Daemon process:}\strut{} + \begin{tikzpicture}[x=2.25cm,y=-1.10cm] + \node[Rect] (parallel) at (2,0) {Processor queue\strut}; + %\node[Rect] (timer) at (1,0) {Timer queue\strut}; + %\node[Rect] (disk) at (2,0) {Disk queue\strut}; + \node[Rect] (nic) at (4,0) {Network queue\strut}; + \node[Rect] (process) at (3,0) {Process queue\strut}; + \node[Rect] (cpu0) at (2,-1) {CPU 0\strut}; + \node[Rect] (cpu1) at (2,1) {CPU 1\strut}; + %\node[Rect] (disk0) at (2,-1) {Disk 0\strut}; + %\node[Rect] (disk1) at (2,1) {Disk 1\strut}; + %\node[Rect] (timer0) at (1,-1) {Timer 0\strut}; + \node[Rect] (nic0) at (4,-1) {NIC 0\strut}; + \node[Rect] (nic1) at (4,1) {NIC 1\strut}; + \node[Rect] (child) at (3,1) {Child\strut}; + \path[draw,thick] (parallel) -- (cpu0); + \path[draw,thick] (parallel) -- (cpu1); + %\path[draw,thick] (timer) -- (timer0); + %\path[draw,thick] (disk) -- (disk0); + %\path[draw,thick] (disk) -- (disk1); + \path[draw,thick] (nic) -- (nic0); + \path[draw,thick] (nic) -- (nic1); + \path[draw,thick] (process) -- (child); + \end{tikzpicture} + \vskip0.5\baselineskip\textbf{Application process:}\strut{} + \begin{tikzpicture}[x=2.25cm,y=-1.10cm] + \node[Rect] (parallel) at (2,0) {Processor queue\strut}; + \node[Rect] (process) at (3,0) {Process queue\strut}; + \node[Rect] (cpu0) at (2,-1) {CPU 0\strut}; + \node[Rect] (cpu1) at (2,1) {CPU 1\strut}; + \node[Rect] (parent) at (3,-1) {Parent\strut}; + \path[draw,thick] (parallel) -- (cpu0); + \path[draw,thick] (parallel) -- (cpu1); + \path[draw,thick] (process) -- (parent); + \end{tikzpicture} + \end{column} + \begin{column}{0.50\textwidth} + \vskip0.5\baselineskip + \begin{itemize} + \item Run applications in child processes. + \item Route kernels between application processes running on + different cluster nodes. + \item Maintain a list of available cluster nodes. + \end{itemize} + \vskip3\baselineskip\small\hspace{-2.0cm} + \begin{tabular}{lp{5.5cm}} + \texttt{async\_call} & push child kernel to the queue \\ + \texttt{async\_return} & push current kernel to the queue \\ + \texttt{async\_message} & send a kernel to another one via the queue \\ + \end{tabular} + \end{column} + \end{columns} +\end{frame} + +\begin{frame}[fragile]{Fault tolerance} + \begin{itemize} + \item Assumption: \textit{main} kernel has only one child kernel at a time. + \item Every \textit{step} kernel (a child of \textit{main}) has a copy of the \textit{main}. + \item Scheduler ensures that \textit{main} and \textit{step} are on different cluster nodes. + \item Every \textit{step} is also appended to the local log file. + \end{itemize} + \vfill + \begin{columns}[T] + \begin{column}{0.25\textwidth} + \definecolor{mydark}{HTML}{E31A1C} + \definecolor{mylight}{HTML}{FB9A99} + \tikzset{Rect/.style={text width=1.50cm,rounded corners,draw,align=center,thick}} + \tikzset{Arrow/.style={draw,-latex}} + \tikzset{DotNoArrow/.style={draw,mydark,thick}} + \tikzset{Dot/.style={DotNoArrow,-latex}} + \begin{tikzpicture}[x=2.50cm,y=-1.5cm] + \node[Rect] (m0) at (0,-1) {Main kernel}; + \node[Rect] (m1) at (0,0) {Step kernel}; + \node[Rect] (m2) at (-0.5,1) {Child kernel 1}; + \node[Rect] (m3) at (0.5,1) {Child kernel 2}; + % solid arrows + \path[Arrow] (m1) -- (m0); + \path[Arrow] (m2) -- (m1); + \path[Arrow] (m3) -- (m1); + \end{tikzpicture} + \end{column} + \begin{column}{0.70\textwidth} + \begin{tabular}{ll} + Failure & Resolution \\ + \midrule + Child 1 & resend Child 1 to the remaining nodes \\ + Child 2 & resend Child 2 to the remaining nodes \\ + Step & resend Step to the remaining nodes \\ + Main & restore Main from the copy \\ + Main and Step & restore Main and Step from the log \\ + \end{tabular} + \vfill\tiny{} + \begin{itemize} + \item I. Gankevich, Yu. Tipikin, V. Korkhov + \href{http://dx.doi.org/10.1109/HPCS.2017.126}{Subordination: Providing resilience to simultaneous failure of multiple cluster nodes}, HPCS'17, 2017.\\ + \item I. Gankevich, Yu. Tipikin, V. Korkhov, V. Gaiduchok, A. Degtyarev, A. Bogdanov + \href{http://dx.doi.org/10.1504/IJBIDM.2017.10007764}{Master node fault tolerance in distributed big data processing clusters}, International Journal of Business Intelligence and Data Mining, 2017. + \end{itemize} + \end{column} + \end{columns} +\end{frame} + +\begin{frame}[fragile]{Kernel and queue definition} +\begin{lstlisting}[language=cpp] +enum class states {upstream, downstream, point_to_point}; + +class kernel { +public: + virtual void act(); + virtual void react(kernel* child); + virtual void write(buffer& out) const; + virtual void read(buffer& in); + kernel* parent = nullptr; + kernel* target = nullptr; + states state = states::upstream; +}; + +class queue { +public: + void push(kernel* k); +}; +\end{lstlisting} +\end{frame} + +\begin{frame}[fragile]{Automatic parallelism} + The idea: evaluate arguments in parallel (one kernel for each argument). +\begin{lstlisting}[language=Scheme] +(define (map proc lst) "Parallel map." + (if (null? lst) lst + (cons (proc (car lst)) (map proc (cdr lst))))) +(define (fold proc init lst) "Sequential fold." + (if (null? lst) init + (fold proc (proc (car lst) init) (cdr lst)))) +(define (do-fold-pairwise proc lst) + (if (null? lst) '() + (if (null? (cdr lst)) lst + (do-fold-pairwise proc + (cons (proc (car lst) (car (cdr lst))) + (do-fold-pairwise proc (cdr (cdr lst)))))))) +(define (fold-pairwise proc lst) "Parallel pairwise fold." + (car (do-fold-pairwise proc lst))) +\end{lstlisting} +\end{frame} + +\begin{frame}{Guile with automatic parallelism (synthetic benchmark)} + \centering + \includegraphics[width=\textwidth]{build/gnuplot/results.eps} \end{frame} \begin{frame}{Conclusion and future work} + Kernels provide + \begin{itemize} + \item standard way of expressing parallel and distributed programme parts, + \item automatic fault tolerance for master and worker nodes and + \item automatic load balancing via cluster scheduler. + \end{itemize} + \vfill + Arguments-based parallelism provide + \begin{itemize} + \item high-level programming interface for clusters and single nodes, + \item conveniently hides the shortcomings of parallel and distributed computations. + \end{itemize} \end{frame} \begin{frame}