commit c8e5ff81f7b75d9659213db32ece3d42baa6bcf4
parent 97d3e5cd7e0b522572fdcbec75bfeabbe80826f0
Author: Ivan Gankevich <igankevich@ya.ru>
Date: Mon, 13 Feb 2017 20:17:00 +0300
Draw a picture where CFO hierarchy is mapped onto the node hierarchy.
Diffstat:
11 files changed, 218 insertions(+), 1 deletion(-)
diff --git a/phd-diss-ru.org b/phd-diss-ru.org
@@ -2821,6 +2821,31 @@ cite:armstrong2003thesis. Для того что реализовать этот
кластера за один шаг вычислений или произвольного количества подчинненых узлов в
любой момент работы программы.
+TODO translate
+
+#+name: fig:fail-over-example
+#+header: :headers '("\\input{preamble}\\setdefaultlanguage{russian}")
+#+begin_src latex :file build/fail-over-example-ru.pdf :exports results :results raw
+\input{tex/preamble}
+\newcommand*{\spbuInsertFigure}[1]{%
+%\flushright%
+\vspace{2\baselineskip}%
+\begin{minipage}{0.5\textwidth}%
+ \Large%
+ \input{#1}%
+\end{minipage}%
+}%
+\noindent%
+\spbuInsertFigure{tex/cluster-0}~\spbuInsertFigure{tex/frame-0}
+\spbuInsertFigure{tex/frame-3}~\spbuInsertFigure{tex/frame-4}
+\spbuInsertFigure{tex/legend-ru}
+#+end_src
+
+#+caption: Пример работы алгоритма восстановления после сбоев.
+#+RESULTS: fig:fail-over-example
+[[file:build/fail-over-example-ru.pdf]]
+
+
**** Результаты тестирования.
Методы отказоустойчивости были протестированы на физическом кластере
(см. [[tab:cluster]]) на примере программы, генерирующей взволнованную
diff --git a/phd-diss.org b/phd-diss.org
@@ -2386,7 +2386,7 @@ where \(f\) maps a node to its rank and operator \(<\) defines strict total orde
this number unique.
The simpliest function \(f\) maps each node to its Internet address position in
-network IP address range. Without conversion to a tree (when only \emph{one}
+network IP address range. Without conversion to a tree (when only /one/
leader is allowed in the network) a node with the lowest position in this range
becomes the principal. If IP-address of a node occupies the first position in
the range, then there is no principal for it, and it continues to be at the top
@@ -2713,6 +2713,58 @@ This simple approach allows tolerating at most one failure of /any/ cluster node
per computational step or arbitrary number of subordinate nodes at any time
during programme execution.
+An example of fail over algorithm follows.
+1. Initial state. Initially, computer cluster does not need to be configured
+ except setting up local network. The algorithm assumes full connectivity of
+ cluster nodes, and works best with tree topologies in which several network
+ switches connect all cluster nodes.
+2. Node hierarchy. When the cluster is bootstrapped, each node starts a
+ /daemon/ process, whose responsibility is to establish hierarchy of such
+ processes superimposed on the topology of cluster nodes. Hierarchical links
+ are solely defined by the position of node's IP address in the local network
+ IP address range eliminating the need for complex distributed consensus
+ algorithm. A node may act as a subordinate or a master simultaneously thus
+ multiple hierarchy layers may be created. The hierarchy is changed only when
+ a new node joins or leaves the cluster, and is reused by every application
+ running on top of it. In an event of node failure its role is reassigned to
+ another node, and tasks that were executing on this node are restarted on
+ healthy ones.
+3. Launch master kernel. HPC application is decomposed into computational
+ kernels with hierarchical dependence. The first, or /master/ kernel, is
+ started on the leaf node. Master kernel may have only one subordinate at a
+ time, and /backup/ copy of the master kernel is sent along with the
+ subordinate kernel \(T_1\) to the root node. \(T_1\) represents one
+ sequential step of a programme (a superstep in Bulk Synchronous Parallel
+ model). There can be any number of sequential steps in a programme, and when
+ node \(B\) fails, the current step is restarted from the beginning.
+4. Launch subordinate kernels. Kernels \(S_1\), \(S_2\), \(S_3\) are launched on
+ the leaf nodes. When node \(B\), \(C\) or \(D\) fails, corresponding master
+ kernel restarts failed subordinates (\(T_1\) restarts \(S_1\), master kernel
+ restarts \(T_1\) etc.). When node \(A\) fails, master kernel is recovered
+ from backup.
+
+#+name: fig:fail-over-example
+#+header: :headers '("\\input{preamble}")
+#+begin_src latex :file build/fail-over-example.pdf :exports results :results raw
+\input{tex/preamble}
+\newcommand*{\spbuInsertFigure}[1]{%
+%\flushright%
+\vspace{2\baselineskip}%
+\begin{minipage}{0.5\textwidth}%
+ \Large%
+ \input{#1}%
+\end{minipage}%
+}%
+\noindent%
+\spbuInsertFigure{tex/cluster-0}~\spbuInsertFigure{tex/frame-0}
+\spbuInsertFigure{tex/frame-3}~\spbuInsertFigure{tex/frame-4}
+\spbuInsertFigure{tex/legend}
+#+end_src
+
+#+caption: An example of fail over algorithm in action.
+#+RESULTS: fig:fail-over-example
+[[file:build/fail-over-example.pdf]]
+
**** Evaluation.
Factory framework is evaluated on physical cluster (Table [[tab:cluster]]) on
the example of hydrodynamics HPC application which was developed
diff --git a/tex/cluster-0.tex b/tex/cluster-0.tex
@@ -0,0 +1,21 @@
+%\centering%
+\begin{tikzpicture}[remember picture,x=3.5cm,y=-3.5cm]%
+ \node[Node] (A1) at (0,0) {A};
+ \node[Node] (B1) at (1,0) {B};
+ \node[Node] (C1) at (0,1) {C};
+ \node[Node] (D1) at (1,1) {D};
+
+ \node[Switch] (S1) at (.5,.5) {};
+
+ \path[PhysicalLink] (A1) edge (S1);
+ \path[PhysicalLink] (B1) edge (S1);
+ \path[PhysicalLink] (C1) edge (S1);
+ \path[PhysicalLink] (D1) edge (S1);
+
+ \path[NetworkLink] (A1) edge (B1);
+ \path[NetworkLink] (A1) edge (C1);
+ \path[NetworkLink] (A1) edge (D1);
+ \path[NetworkLink] (B1) edge (C1);
+ \path[NetworkLink] (B1) edge (D1);
+ \path[NetworkLink] (C1) edge (D1);
+\end{tikzpicture}%
diff --git a/tex/frame-0.tex b/tex/frame-0.tex
@@ -0,0 +1,15 @@
+\input{tex/cluster-0}
+\begin{tikzpicture}[remember picture,overlay]
+
+ \node[Daemon] (Ax) at (A1) {\phantom{A}};
+ \node[Daemon] (Bx) at (B1) {\phantom{B}};
+ \node[Daemon] (Cx) at (C1) {\phantom{C}};
+ \node[Daemon] (Dx) at (D1) {\phantom{D}};
+
+ \path[DaemonLink] (Ax) edge (Bx);
+ %\path[thick] (A1) edge (C1);
+ %\path[thick] (A1) edge (D1);
+ \path[DaemonLink] (Bx) edge (Cx);
+ \path[DaemonLink] (Bx) edge (Dx);
+ %\path[thick] (C1) edge (D1);
+\end{tikzpicture}
diff --git a/tex/frame-1.tex b/tex/frame-1.tex
@@ -0,0 +1,4 @@
+\input{tex/frame-0}
+\begin{tikzpicture}[remember picture,overlay]
+ \node[Task,label={[TaskLabel]90:\textbf{Master\vphantom{p}}}] (Master) at (A1.center) {\phantom{A}};
+\end{tikzpicture}%
diff --git a/tex/frame-2.tex b/tex/frame-2.tex
@@ -0,0 +1,5 @@
+\input{tex/frame-1}
+\begin{tikzpicture}[remember picture,overlay]
+ \node[Task,label={[TaskLabel]90:\textbf{Backup}}] (MasterCopy) at (B1.center) {\phantom{A}};
+ \path[TaskEdge] (Master) edge (MasterCopy);
+\end{tikzpicture}%
diff --git a/tex/frame-3.tex b/tex/frame-3.tex
@@ -0,0 +1,4 @@
+\input{tex/frame-2}
+\begin{tikzpicture}[remember picture,overlay]
+ \node[Task,label={[TaskLabel]0:\textbf{T\textsubscript{\color{spbuGreen}1}}}] (Task1) at (B1.center) {\phantom{A}};
+\end{tikzpicture}%
diff --git a/tex/frame-4.tex b/tex/frame-4.tex
@@ -0,0 +1,10 @@
+\input{tex/frame-3}
+\begin{tikzpicture}[remember picture,overlay]
+ \node[Task,label={[TaskLabel]180:\textbf{S\textsubscript{\color{spbuGreen}1}}}] (Sub1) at (A1.center) {\phantom{A}};
+ \node[Task,label={[TaskLabel]180:\textbf{S\textsubscript{\color{spbuGreen}2}}}] (Sub2) at (C1.center) {\phantom{A}};
+ \node[Task,label={[TaskLabel]0:\textbf{S\textsubscript{\color{spbuGreen}3}}}] (Sub3) at (D1.center) {\phantom{A}};
+ %\node[Task,label={[TaskLabel]0:Task1}] (Task1) at (B1.center) {\phantom{A}};
+ \path[TaskEdge] (Task1) edge[bend left] (Sub1);
+ \path[TaskEdge] (Task1) edge (Sub2);
+ \path[TaskEdge] (Task1) edge (Sub3);
+\end{tikzpicture}%
diff --git a/tex/legend-ru.tex b/tex/legend-ru.tex
@@ -0,0 +1,21 @@
+\begin{tikzpicture}[x=2.3cm,y=-1.0cm,framed]
+
+ \node[Node] at (0,0) {\phantom{A}};
+ \node[anchor=west] (X2) at (0.4,0) {\strut узел кластера};
+ \node[Switch] at (4,0) {};
+ \node[anchor=west] (X2) at (4.4,0) {\strut сетевой коммутатор};
+ \draw[PhysicalLink] (3.8,1) -- (4.2,1);
+ \node[anchor=west] (X2) at (4.4,1) {\strut физическое соединение};
+ \draw[NetworkLink] (3.8,2) -- (4.2,2);
+ \node[anchor=west] (X2) at (4.4,2) {\strut сетевое соединение};
+
+ \node[Daemon,scale=0.6] at (0,3) {\phantom{A}};
+ \node[anchor=west] (X2) at (0.4,3) {\strut процесс-сервис};
+ \draw[DaemonLink] (3.8,3) -- (4.2,3);
+ \node[anchor=west] (X2) at (4.4,3) {\strut связь узлов иерархии};
+
+ \node[Process,scale=0.6] (X3) at (0,4) {\phantom{A}};
+ \node[baseline=(X3.base),anchor=west] at (0.4,4) {\strut управляющий объект};
+ \draw[ProcessEdge] (3.8,4) -- (4.2,4);
+ \node[anchor=west] (X2) at (4.4,4) {\strut связь управляющих объектов};
+\end{tikzpicture}
diff --git a/tex/legend.tex b/tex/legend.tex
@@ -0,0 +1,21 @@
+\begin{tikzpicture}[x=2.3cm,y=-1.0cm,framed]
+
+ \node[Node] at (0,0) {\phantom{A}};
+ \node[anchor=west] (X2) at (0.4,0) {\strut cluster node};
+ \node[Switch] at (4,0) {};
+ \node[anchor=west] (X2) at (4.4,0) {\strut network switch};
+ \draw[PhysicalLink] (3.8,1) -- (4.2,1);
+ \node[anchor=west] (X2) at (4.4,1) {\strut physical link};
+ \draw[NetworkLink] (3.8,2) -- (4.2,2);
+ \node[anchor=west] (X2) at (4.4,2) {\strut network link};
+
+ \node[Daemon,scale=0.6] at (0,3) {\phantom{A}};
+ \node[anchor=west] (X2) at (0.4,3) {\strut daemon process};
+ \draw[DaemonLink] (3.8,3) -- (4.2,3);
+ \node[anchor=west] (X2) at (4.4,3) {\strut node hierarchy link};
+
+ \node[Process,scale=0.6] (X3) at (0,4) {\phantom{A}};
+ \node[baseline=(X3.base),anchor=west] at (0.4,4) {\strut kernel};
+ \draw[ProcessEdge] (3.8,4) -- (4.2,4);
+ \node[anchor=west] (X2) at (4.4,4) {\strut kernel hierarchy link};
+\end{tikzpicture}
diff --git a/tex/preamble.tex b/tex/preamble.tex
@@ -0,0 +1,39 @@
+%\usepackage{tikz}
+\usetikzlibrary{shapes}
+
+% corporate colors
+\definecolor{spbuTerracotta}{cmyk}{.08,.91,.92,.33}
+\definecolor{spbuGray}{cmyk}{.21,.11,.09,.22}
+
+\definecolor{spbuWhite1}{RGB}{245,246,245}
+\definecolor{spbuWhite2}{RGB}{230,231,230}
+\definecolor{spbuWhite3}{RGB}{217,218,217}
+
+\definecolor{spbuWhiteRed2}{RGB}{255,231,230}
+\definecolor{spbuWhiteRed3}{RGB}{255,160,160}
+\definecolor{spbuRed}{RGB}{200,40,40}
+
+\definecolor{spbuDarkGray}{HTML}{404040}
+\definecolor{spbuDarkGray2}{HTML}{5F7177}
+
+\definecolor{spbuGreen}{RGB}{40,160,40}
+\definecolor{spbuBlue}{RGB}{40,40,160}
+
+% physical layer
+\tikzset{Node/.style={rectangle,draw=spbuDarkGray,line width=3pt}}
+\tikzset{Switch/.style={circle,draw=gray,fill=gray,line width=6pt,solid}}
+\tikzset{PhysicalLink/.style={draw=gray,line width=6pt,solid}}
+\tikzset{NetworkLink/.style={draw=black,line width=3pt,dashed}}
+
+% logical layer
+\tikzset{Daemon/.style={ellipse,draw=spbuBlue,line width=3pt,solid,scale=0.65}}
+\tikzset{DaemonLink/.style={draw=spbuBlue,line width=3pt,solid}}
+
+% application layer
+\tikzset{Process/.style={ellipse,draw=spbuGreen,line width=3pt,solid}}
+\tikzset{ProcessEdge/.style={draw=spbuGreen,line width=3pt,solid}}
+
+\tikzset{Task/.style={Process,anchor=center,draw=spbuGreen}}
+\tikzset{TaskEdge/.style={ProcessEdge,draw=spbuGreen,solid,->}}
+\tikzset{Label/.style={label distance=0.1cm,text=spbuTerracotta}}
+\tikzset{TaskLabel/.style={label distance=0.1cm,text=spbuGreen}}