add slides - iccsa-21-guile - Functional Programming Interface for Parallel and Distributed Computing

commit 4be4b695cc57e273aaf4b223818b6b891edbb78e
parent 8e4bb0f0596ce1f5c49b38500cd8c48d2ec2bd01
Author: Ivan Gankevich <i.gankevich@spbu.ru>
Date:   Sun, 12 Sep 2021 23:58:22 +0300

add slides

Diffstat:
slides.tex  | 293 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--

1 file changed, 289 insertions(+), 4 deletions(-)
diff --git a/slides.tex b/slides.tex
@@ -1,7 +1,7 @@
 \documentclass[aspectratio=169,xcolor=table]{beamer}
 \usepackage{polyglossia}
 \setdefaultlanguage{english}
-\usetheme{SaintPetersburg}
+\usetheme[numbers]{SaintPetersburg}
 
 \usepackage{textcomp}
 \usepackage{booktabs}
@@ -24,25 +24,310 @@
 	pdfmetalang={en},
 	pdflicenseurl={http://creativecommons.org/licenses/by-sa/4.0/},
 	pdfcopyright={Copyright \textcopyright{} 2021 Ivan Petriakov\xmpcomma{} Ivan Gankevich\xmpcomma{}},
-	pdfsubject={TODO},
+	pdfsubject={Functional programming interface for parallel and distributed computing},
 }
 
-\title{TODO}
+\title{Functional programming interface\\for parallel and distributed computing}
 \author{%
     I.\:Petriakov \and
     I.\:Gankevich
 }
 \institute{Saint Petersburg State University}
-\date{July 2021}
+\date{September 2021}
+
+% https://github.com/stuhlmueller/scheme-listings/blob/master/lstlang0.sty
+\lstdefinelanguage{scheme}{
+  morekeywords=[1]{define, define-syntax, define-macro, lambda, define-stream, stream-lambda,
+  define*,if,cons,car,cdr},
+  morekeywords=[2]{begin, call-with-current-continuation, call/cc,
+    call-with-input-file, call-with-output-file, case, cond,
+    do, else, for-each, if,
+    let*, let, let-syntax, letrec, letrec-syntax,
+    let-values, let*-values,
+    and, or, not, delay, force,
+    quasiquote, quote, unquote, unquote-splicing,
+    syntax, syntax-rules, eval, environment, query,
+    car, cdr, cons},
+  morekeywords=[3]{import, export},
+  alsodigit=!\$\%&*+-./:<=>?@^_~,
+  sensitive=true,
+  morecomment=[l]{;},
+  morecomment=[s]{\#|}{|\#},
+  morestring=[b]",
+  basicstyle=\small\ttfamily,
+  keywordstyle={\bf\ttfamily\color[HTML]{4081ec}},
+  commentstyle=\color[rgb]{0.33,0.33,0.33},
+  stringstyle={\color[HTML]{00a000}},
+  upquote=true,
+  breaklines=true,
+  breakatwhitespace=true,
+  literate=*{`}{{`}}{1},
+  showstringspaces=false
+}
+
+\lstdefinelanguage{cpp}{
+  morekeywords=[1]{class,struct,enum,public,private,protected,virtual,override,const,
+                   void,int,new,delete,nullptr},
+  morecomment=[l]{//},
+  basicstyle=\small\ttfamily,
+  keywordstyle={\bf\ttfamily\color[HTML]{4081ec}},
+  commentstyle=\color[rgb]{0.33,0.33,0.33},
+  stringstyle={\color[HTML]{00a000}},
+  escapeinside={LATEX}{END},
+}
 
 \begin{document}
 
 \frame{\maketitle}
 
 \begin{frame}{Motivation}
+    \begin{itemize}
+        \item There is no universal low-level representation of distributed computations.
+        \item There is no high-level interface for distributed computing in functional languages.
+        \item Existing solutions do not provide automatic fault tolerance for both slave and master nodes.
+    \end{itemize}
+    \vfill
+
+    \textit{Parallel} --- several processor cores of single cluster node.
+
+    \textit{Distributed} --- several cluster nodes.
+\end{frame}
+
+\begin{frame}[fragile]{From sync. call stack to async. call stack (kernels)}
+    Kernel = data + code + result of the computation.
+    \begin{columns}[T]
+        \begin{column}{0.36\textwidth}
+\begin{lstlisting}[language=cpp]
+int nested(int a) {
+  return 123 + a;
+}
+\end{lstlisting}
+        \end{column}
+        \begin{column}{0.63\textwidth}
+\begin{lstlisting}[language=cpp]
+struct Nested: public Kernel {
+  int result;
+  int a;
+  Nested(int a): a(a) {}
+  void act() override {
+    result = a + 123;
+    LATEX{\color[HTML]{ac4040}\bf{}\ttfamily{}async\_return}END();
+  }
+};
+\end{lstlisting}
+        \end{column}
+    \end{columns}
+    \vfill\small
+    \begin{tabular}{ll}
+        \color[HTML]{ac4040}\bf\texttt{async\_call} & push child kernel to the queue \\
+        \color[HTML]{ac4040}\bf\texttt{async\_return} & push current kernel to the queue \\
+        \color[HTML]{ac4040}\bf\texttt{async\_message} & send a kernel to another one via the queue \\
+    \end{tabular}
+\end{frame}
+
+\begin{frame}[fragile]{From sync. call stack to async. call stack (kernels)}
+    \begin{columns}[T]
+        \begin{column}{0.36\textwidth}
+\begin{lstlisting}[language=cpp]
+void main() {
+  // code before
+  int result = nested();
+  // code after
+  print(result);
+}
+\end{lstlisting}
+        \end{column}
+        \begin{column}{0.63\textwidth}
+\begin{lstlisting}[language=cpp]
+struct Main: public Kernel {
+  void act() override {
+    // code before
+    LATEX{\color[HTML]{ac4040}\bf{}\ttfamily{}async\_call}END(new Nested);
+  }
+  void react(Kernel* child) override {
+    int result = ((Nested*)child)->result;
+    // code after
+    print(result);
+    LATEX{\color[HTML]{ac4040}\bf{}\ttfamily{}async\_return}END();
+  }
+};
+
+void main() {
+  LATEX{\color[HTML]{ac4040}\bf{}\ttfamily{}async\_call}END(new Main);
+  wait();
+}
+\end{lstlisting}
+        \end{column}
+    \end{columns}
+\end{frame}
+
+\begin{frame}[fragile]{Cluster scheduler architecture}
+    \begin{columns}[T]
+        \begin{column}{0.40\textwidth}
+            \tikzset{Rect/.style={text width=1.70cm,draw,align=center,thick,rounded corners}}
+            \small
+            \textbf{Daemon process:}\strut{}
+            \begin{tikzpicture}[x=2.25cm,y=-1.10cm]
+                \node[Rect] (parallel) at (2,0) {Processor queue\strut};
+                %\node[Rect] (timer) at (1,0) {Timer queue\strut};
+                %\node[Rect] (disk) at (2,0) {Disk queue\strut};
+                \node[Rect] (nic) at (4,0) {Network queue\strut};
+                \node[Rect] (process) at (3,0) {Process queue\strut};
+                \node[Rect] (cpu0) at (2,-1) {CPU 0\strut};
+                \node[Rect] (cpu1) at (2,1) {CPU 1\strut};
+                %\node[Rect] (disk0) at (2,-1) {Disk 0\strut};
+                %\node[Rect] (disk1) at (2,1) {Disk 1\strut};
+                %\node[Rect] (timer0) at (1,-1) {Timer 0\strut};
+                \node[Rect] (nic0) at (4,-1) {NIC 0\strut};
+                \node[Rect] (nic1) at (4,1) {NIC 1\strut};
+                \node[Rect] (child) at (3,1) {Child\strut};
+                \path[draw,thick] (parallel) -- (cpu0);
+                \path[draw,thick] (parallel) -- (cpu1);
+                %\path[draw,thick] (timer) -- (timer0);
+                %\path[draw,thick] (disk) -- (disk0);
+                %\path[draw,thick] (disk) -- (disk1);
+                \path[draw,thick] (nic) -- (nic0);
+                \path[draw,thick] (nic) -- (nic1);
+                \path[draw,thick] (process) -- (child);
+            \end{tikzpicture}
+            \vskip0.5\baselineskip\textbf{Application process:}\strut{}
+            \begin{tikzpicture}[x=2.25cm,y=-1.10cm]
+                \node[Rect] (parallel) at (2,0) {Processor queue\strut};
+                \node[Rect] (process) at (3,0) {Process queue\strut};
+                \node[Rect] (cpu0) at (2,-1) {CPU 0\strut};
+                \node[Rect] (cpu1) at (2,1) {CPU 1\strut};
+                \node[Rect] (parent) at (3,-1) {Parent\strut};
+                \path[draw,thick] (parallel) -- (cpu0);
+                \path[draw,thick] (parallel) -- (cpu1);
+                \path[draw,thick] (process) -- (parent);
+            \end{tikzpicture}
+        \end{column}
+        \begin{column}{0.50\textwidth}
+            \vskip0.5\baselineskip
+            \begin{itemize}
+                \item Run applications in child processes.
+                \item Route kernels between application processes running on
+                    different cluster nodes.
+                \item Maintain a list of available cluster nodes.
+            \end{itemize}
+            \vskip3\baselineskip\small\hspace{-2.0cm}
+            \begin{tabular}{lp{5.5cm}}
+                \texttt{async\_call} & push child kernel to the queue \\
+                \texttt{async\_return} & push current kernel to the queue \\
+                \texttt{async\_message} & send a kernel to another one via the queue \\
+            \end{tabular}
+        \end{column}
+    \end{columns}
+\end{frame}
+
+\begin{frame}[fragile]{Fault tolerance}
+    \begin{itemize}
+        \item Assumption: \textit{main} kernel has only one child kernel at a time.
+        \item Every \textit{step} kernel (a child of \textit{main}) has a copy of the \textit{main}.
+        \item Scheduler ensures that \textit{main} and \textit{step} are on different cluster nodes.
+        \item Every \textit{step} is also appended to the local log file.
+    \end{itemize}
+    \vfill
+    \begin{columns}[T]
+        \begin{column}{0.25\textwidth}
+            \definecolor{mydark}{HTML}{E31A1C} 
+            \definecolor{mylight}{HTML}{FB9A99} 
+            \tikzset{Rect/.style={text width=1.50cm,rounded corners,draw,align=center,thick}}
+            \tikzset{Arrow/.style={draw,-latex}}
+            \tikzset{DotNoArrow/.style={draw,mydark,thick}}
+            \tikzset{Dot/.style={DotNoArrow,-latex}}
+            \begin{tikzpicture}[x=2.50cm,y=-1.5cm]
+                \node[Rect] (m0) at (0,-1) {Main kernel};
+                \node[Rect] (m1) at (0,0) {Step kernel};
+                \node[Rect] (m2) at (-0.5,1) {Child kernel 1};
+                \node[Rect] (m3) at (0.5,1) {Child kernel 2};
+                % solid arrows
+                \path[Arrow] (m1) -- (m0);
+                \path[Arrow] (m2) -- (m1);
+                \path[Arrow] (m3) -- (m1);
+            \end{tikzpicture}
+        \end{column}
+        \begin{column}{0.70\textwidth}
+            \begin{tabular}{ll}
+                Failure & Resolution \\
+                \midrule
+                Child 1 & resend Child 1 to the remaining nodes \\
+                Child 2 & resend Child 2 to the remaining nodes \\
+                Step & resend Step to the remaining nodes \\
+                Main & restore Main from the copy \\
+                Main and Step & restore Main and Step from the log \\
+            \end{tabular}
+            \vfill\tiny{}
+            \begin{itemize}
+            \item I. Gankevich, Yu. Tipikin, V. Korkhov
+            \href{http://dx.doi.org/10.1109/HPCS.2017.126}{Subordination: Providing resilience to simultaneous failure of multiple cluster nodes}, HPCS'17, 2017.\\
+        \item I. Gankevich, Yu. Tipikin, V. Korkhov, V. Gaiduchok, A. Degtyarev, A. Bogdanov
+            \href{http://dx.doi.org/10.1504/IJBIDM.2017.10007764}{Master node fault tolerance in distributed big data processing clusters}, International Journal of Business Intelligence and Data Mining, 2017.
+            \end{itemize}
+        \end{column}
+    \end{columns}
+\end{frame}
+
+\begin{frame}[fragile]{Kernel and queue definition}
+\begin{lstlisting}[language=cpp]
+enum class states {upstream, downstream, point_to_point};
+
+class kernel {
+public:
+  virtual void act();
+  virtual void react(kernel* child);
+  virtual void write(buffer& out) const;
+  virtual void read(buffer& in);
+  kernel* parent = nullptr;
+  kernel* target = nullptr;
+  states state = states::upstream;
+};
+
+class queue {
+public:
+  void push(kernel* k);
+};
+\end{lstlisting}
+\end{frame}
+
+\begin{frame}[fragile]{Automatic parallelism}
+    The idea: evaluate arguments in parallel (one kernel for each argument).
+\begin{lstlisting}[language=Scheme]
+(define (map proc lst) "Parallel map."
+  (if (null? lst) lst
+    (cons (proc (car lst)) (map proc (cdr lst)))))
+(define (fold proc init lst) "Sequential fold."
+  (if (null? lst) init
+    (fold proc (proc (car lst) init) (cdr lst))))
+(define (do-fold-pairwise proc lst)
+  (if (null? lst) '()
+    (if (null? (cdr lst)) lst
+      (do-fold-pairwise proc
+        (cons (proc (car lst) (car (cdr lst)))
+              (do-fold-pairwise proc (cdr (cdr lst))))))))
+(define (fold-pairwise proc lst) "Parallel pairwise fold."
+  (car (do-fold-pairwise proc lst)))
+\end{lstlisting}
+\end{frame}
+
+\begin{frame}{Guile with automatic parallelism (synthetic benchmark)}
+    \centering
+    \includegraphics[width=\textwidth]{build/gnuplot/results.eps}
 \end{frame}
 
 \begin{frame}{Conclusion and future work}
+    Kernels provide 
+    \begin{itemize}
+        \item standard way of expressing parallel and distributed programme parts,
+        \item automatic fault tolerance for master and worker nodes and
+        \item automatic load balancing via cluster scheduler.
+    \end{itemize}
+    \vfill
+    Arguments-based parallelism provide
+    \begin{itemize}
+        \item high-level programming interface for clusters and single nodes,
+        \item conveniently hides the shortcomings of parallel and distributed computations.
+    \end{itemize}
 \end{frame}
 
 \begin{frame}

	iccsa-21-guile Functional Programming Interface for Parallel and Distributed Computing
	git clone https://git.igankevich.com/iccsa-21-guile.git
	Log \| Files \| Refs