commit 4be4b695cc57e273aaf4b223818b6b891edbb78e
parent 8e4bb0f0596ce1f5c49b38500cd8c48d2ec2bd01
Author: Ivan Gankevich <i.gankevich@spbu.ru>
Date: Sun, 12 Sep 2021 23:58:22 +0300
add slides
Diffstat:
slides.tex | | | 293 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-- |
1 file changed, 289 insertions(+), 4 deletions(-)
diff --git a/slides.tex b/slides.tex
@@ -1,7 +1,7 @@
\documentclass[aspectratio=169,xcolor=table]{beamer}
\usepackage{polyglossia}
\setdefaultlanguage{english}
-\usetheme{SaintPetersburg}
+\usetheme[numbers]{SaintPetersburg}
\usepackage{textcomp}
\usepackage{booktabs}
@@ -24,25 +24,310 @@
pdfmetalang={en},
pdflicenseurl={http://creativecommons.org/licenses/by-sa/4.0/},
pdfcopyright={Copyright \textcopyright{} 2021 Ivan Petriakov\xmpcomma{} Ivan Gankevich\xmpcomma{}},
- pdfsubject={TODO},
+ pdfsubject={Functional programming interface for parallel and distributed computing},
}
-\title{TODO}
+\title{Functional programming interface\\for parallel and distributed computing}
\author{%
I.\:Petriakov \and
I.\:Gankevich
}
\institute{Saint Petersburg State University}
-\date{July 2021}
+\date{September 2021}
+
+% https://github.com/stuhlmueller/scheme-listings/blob/master/lstlang0.sty
+\lstdefinelanguage{scheme}{
+ morekeywords=[1]{define, define-syntax, define-macro, lambda, define-stream, stream-lambda,
+ define*,if,cons,car,cdr},
+ morekeywords=[2]{begin, call-with-current-continuation, call/cc,
+ call-with-input-file, call-with-output-file, case, cond,
+ do, else, for-each, if,
+ let*, let, let-syntax, letrec, letrec-syntax,
+ let-values, let*-values,
+ and, or, not, delay, force,
+ quasiquote, quote, unquote, unquote-splicing,
+ syntax, syntax-rules, eval, environment, query,
+ car, cdr, cons},
+ morekeywords=[3]{import, export},
+ alsodigit=!\$\%&*+-./:<=>?@^_~,
+ sensitive=true,
+ morecomment=[l]{;},
+ morecomment=[s]{\#|}{|\#},
+ morestring=[b]",
+ basicstyle=\small\ttfamily,
+ keywordstyle={\bf\ttfamily\color[HTML]{4081ec}},
+ commentstyle=\color[rgb]{0.33,0.33,0.33},
+ stringstyle={\color[HTML]{00a000}},
+ upquote=true,
+ breaklines=true,
+ breakatwhitespace=true,
+ literate=*{`}{{`}}{1},
+ showstringspaces=false
+}
+
+\lstdefinelanguage{cpp}{
+ morekeywords=[1]{class,struct,enum,public,private,protected,virtual,override,const,
+ void,int,new,delete,nullptr},
+ morecomment=[l]{//},
+ basicstyle=\small\ttfamily,
+ keywordstyle={\bf\ttfamily\color[HTML]{4081ec}},
+ commentstyle=\color[rgb]{0.33,0.33,0.33},
+ stringstyle={\color[HTML]{00a000}},
+ escapeinside={LATEX}{END},
+}
\begin{document}
\frame{\maketitle}
\begin{frame}{Motivation}
+ \begin{itemize}
+ \item There is no universal low-level representation of distributed computations.
+ \item There is no high-level interface for distributed computing in functional languages.
+ \item Existing solutions do not provide automatic fault tolerance for both slave and master nodes.
+ \end{itemize}
+ \vfill
+
+ \textit{Parallel} --- several processor cores of single cluster node.
+
+ \textit{Distributed} --- several cluster nodes.
+\end{frame}
+
+\begin{frame}[fragile]{From sync. call stack to async. call stack (kernels)}
+ Kernel = data + code + result of the computation.
+ \begin{columns}[T]
+ \begin{column}{0.36\textwidth}
+\begin{lstlisting}[language=cpp]
+int nested(int a) {
+ return 123 + a;
+}
+\end{lstlisting}
+ \end{column}
+ \begin{column}{0.63\textwidth}
+\begin{lstlisting}[language=cpp]
+struct Nested: public Kernel {
+ int result;
+ int a;
+ Nested(int a): a(a) {}
+ void act() override {
+ result = a + 123;
+ LATEX{\color[HTML]{ac4040}\bf{}\ttfamily{}async\_return}END();
+ }
+};
+\end{lstlisting}
+ \end{column}
+ \end{columns}
+ \vfill\small
+ \begin{tabular}{ll}
+ \color[HTML]{ac4040}\bf\texttt{async\_call} & push child kernel to the queue \\
+ \color[HTML]{ac4040}\bf\texttt{async\_return} & push current kernel to the queue \\
+ \color[HTML]{ac4040}\bf\texttt{async\_message} & send a kernel to another one via the queue \\
+ \end{tabular}
+\end{frame}
+
+\begin{frame}[fragile]{From sync. call stack to async. call stack (kernels)}
+ \begin{columns}[T]
+ \begin{column}{0.36\textwidth}
+\begin{lstlisting}[language=cpp]
+void main() {
+ // code before
+ int result = nested();
+ // code after
+ print(result);
+}
+\end{lstlisting}
+ \end{column}
+ \begin{column}{0.63\textwidth}
+\begin{lstlisting}[language=cpp]
+struct Main: public Kernel {
+ void act() override {
+ // code before
+ LATEX{\color[HTML]{ac4040}\bf{}\ttfamily{}async\_call}END(new Nested);
+ }
+ void react(Kernel* child) override {
+ int result = ((Nested*)child)->result;
+ // code after
+ print(result);
+ LATEX{\color[HTML]{ac4040}\bf{}\ttfamily{}async\_return}END();
+ }
+};
+
+void main() {
+ LATEX{\color[HTML]{ac4040}\bf{}\ttfamily{}async\_call}END(new Main);
+ wait();
+}
+\end{lstlisting}
+ \end{column}
+ \end{columns}
+\end{frame}
+
+\begin{frame}[fragile]{Cluster scheduler architecture}
+ \begin{columns}[T]
+ \begin{column}{0.40\textwidth}
+ \tikzset{Rect/.style={text width=1.70cm,draw,align=center,thick,rounded corners}}
+ \small
+ \textbf{Daemon process:}\strut{}
+ \begin{tikzpicture}[x=2.25cm,y=-1.10cm]
+ \node[Rect] (parallel) at (2,0) {Processor queue\strut};
+ %\node[Rect] (timer) at (1,0) {Timer queue\strut};
+ %\node[Rect] (disk) at (2,0) {Disk queue\strut};
+ \node[Rect] (nic) at (4,0) {Network queue\strut};
+ \node[Rect] (process) at (3,0) {Process queue\strut};
+ \node[Rect] (cpu0) at (2,-1) {CPU 0\strut};
+ \node[Rect] (cpu1) at (2,1) {CPU 1\strut};
+ %\node[Rect] (disk0) at (2,-1) {Disk 0\strut};
+ %\node[Rect] (disk1) at (2,1) {Disk 1\strut};
+ %\node[Rect] (timer0) at (1,-1) {Timer 0\strut};
+ \node[Rect] (nic0) at (4,-1) {NIC 0\strut};
+ \node[Rect] (nic1) at (4,1) {NIC 1\strut};
+ \node[Rect] (child) at (3,1) {Child\strut};
+ \path[draw,thick] (parallel) -- (cpu0);
+ \path[draw,thick] (parallel) -- (cpu1);
+ %\path[draw,thick] (timer) -- (timer0);
+ %\path[draw,thick] (disk) -- (disk0);
+ %\path[draw,thick] (disk) -- (disk1);
+ \path[draw,thick] (nic) -- (nic0);
+ \path[draw,thick] (nic) -- (nic1);
+ \path[draw,thick] (process) -- (child);
+ \end{tikzpicture}
+ \vskip0.5\baselineskip\textbf{Application process:}\strut{}
+ \begin{tikzpicture}[x=2.25cm,y=-1.10cm]
+ \node[Rect] (parallel) at (2,0) {Processor queue\strut};
+ \node[Rect] (process) at (3,0) {Process queue\strut};
+ \node[Rect] (cpu0) at (2,-1) {CPU 0\strut};
+ \node[Rect] (cpu1) at (2,1) {CPU 1\strut};
+ \node[Rect] (parent) at (3,-1) {Parent\strut};
+ \path[draw,thick] (parallel) -- (cpu0);
+ \path[draw,thick] (parallel) -- (cpu1);
+ \path[draw,thick] (process) -- (parent);
+ \end{tikzpicture}
+ \end{column}
+ \begin{column}{0.50\textwidth}
+ \vskip0.5\baselineskip
+ \begin{itemize}
+ \item Run applications in child processes.
+ \item Route kernels between application processes running on
+ different cluster nodes.
+ \item Maintain a list of available cluster nodes.
+ \end{itemize}
+ \vskip3\baselineskip\small\hspace{-2.0cm}
+ \begin{tabular}{lp{5.5cm}}
+ \texttt{async\_call} & push child kernel to the queue \\
+ \texttt{async\_return} & push current kernel to the queue \\
+ \texttt{async\_message} & send a kernel to another one via the queue \\
+ \end{tabular}
+ \end{column}
+ \end{columns}
+\end{frame}
+
+\begin{frame}[fragile]{Fault tolerance}
+ \begin{itemize}
+ \item Assumption: \textit{main} kernel has only one child kernel at a time.
+ \item Every \textit{step} kernel (a child of \textit{main}) has a copy of the \textit{main}.
+ \item Scheduler ensures that \textit{main} and \textit{step} are on different cluster nodes.
+ \item Every \textit{step} is also appended to the local log file.
+ \end{itemize}
+ \vfill
+ \begin{columns}[T]
+ \begin{column}{0.25\textwidth}
+ \definecolor{mydark}{HTML}{E31A1C}
+ \definecolor{mylight}{HTML}{FB9A99}
+ \tikzset{Rect/.style={text width=1.50cm,rounded corners,draw,align=center,thick}}
+ \tikzset{Arrow/.style={draw,-latex}}
+ \tikzset{DotNoArrow/.style={draw,mydark,thick}}
+ \tikzset{Dot/.style={DotNoArrow,-latex}}
+ \begin{tikzpicture}[x=2.50cm,y=-1.5cm]
+ \node[Rect] (m0) at (0,-1) {Main kernel};
+ \node[Rect] (m1) at (0,0) {Step kernel};
+ \node[Rect] (m2) at (-0.5,1) {Child kernel 1};
+ \node[Rect] (m3) at (0.5,1) {Child kernel 2};
+ % solid arrows
+ \path[Arrow] (m1) -- (m0);
+ \path[Arrow] (m2) -- (m1);
+ \path[Arrow] (m3) -- (m1);
+ \end{tikzpicture}
+ \end{column}
+ \begin{column}{0.70\textwidth}
+ \begin{tabular}{ll}
+ Failure & Resolution \\
+ \midrule
+ Child 1 & resend Child 1 to the remaining nodes \\
+ Child 2 & resend Child 2 to the remaining nodes \\
+ Step & resend Step to the remaining nodes \\
+ Main & restore Main from the copy \\
+ Main and Step & restore Main and Step from the log \\
+ \end{tabular}
+ \vfill\tiny{}
+ \begin{itemize}
+ \item I. Gankevich, Yu. Tipikin, V. Korkhov
+ \href{http://dx.doi.org/10.1109/HPCS.2017.126}{Subordination: Providing resilience to simultaneous failure of multiple cluster nodes}, HPCS'17, 2017.\\
+ \item I. Gankevich, Yu. Tipikin, V. Korkhov, V. Gaiduchok, A. Degtyarev, A. Bogdanov
+ \href{http://dx.doi.org/10.1504/IJBIDM.2017.10007764}{Master node fault tolerance in distributed big data processing clusters}, International Journal of Business Intelligence and Data Mining, 2017.
+ \end{itemize}
+ \end{column}
+ \end{columns}
+\end{frame}
+
+\begin{frame}[fragile]{Kernel and queue definition}
+\begin{lstlisting}[language=cpp]
+enum class states {upstream, downstream, point_to_point};
+
+class kernel {
+public:
+ virtual void act();
+ virtual void react(kernel* child);
+ virtual void write(buffer& out) const;
+ virtual void read(buffer& in);
+ kernel* parent = nullptr;
+ kernel* target = nullptr;
+ states state = states::upstream;
+};
+
+class queue {
+public:
+ void push(kernel* k);
+};
+\end{lstlisting}
+\end{frame}
+
+\begin{frame}[fragile]{Automatic parallelism}
+ The idea: evaluate arguments in parallel (one kernel for each argument).
+\begin{lstlisting}[language=Scheme]
+(define (map proc lst) "Parallel map."
+ (if (null? lst) lst
+ (cons (proc (car lst)) (map proc (cdr lst)))))
+(define (fold proc init lst) "Sequential fold."
+ (if (null? lst) init
+ (fold proc (proc (car lst) init) (cdr lst))))
+(define (do-fold-pairwise proc lst)
+ (if (null? lst) '()
+ (if (null? (cdr lst)) lst
+ (do-fold-pairwise proc
+ (cons (proc (car lst) (car (cdr lst)))
+ (do-fold-pairwise proc (cdr (cdr lst))))))))
+(define (fold-pairwise proc lst) "Parallel pairwise fold."
+ (car (do-fold-pairwise proc lst)))
+\end{lstlisting}
+\end{frame}
+
+\begin{frame}{Guile with automatic parallelism (synthetic benchmark)}
+ \centering
+ \includegraphics[width=\textwidth]{build/gnuplot/results.eps}
\end{frame}
\begin{frame}{Conclusion and future work}
+ Kernels provide
+ \begin{itemize}
+ \item standard way of expressing parallel and distributed programme parts,
+ \item automatic fault tolerance for master and worker nodes and
+ \item automatic load balancing via cluster scheduler.
+ \end{itemize}
+ \vfill
+ Arguments-based parallelism provide
+ \begin{itemize}
+ \item high-level programming interface for clusters and single nodes,
+ \item conveniently hides the shortcomings of parallel and distributed computations.
+ \end{itemize}
\end{frame}
\begin{frame}