commit 5fa5a12c1a5a10b39bb508d998732d50e68be6c5
parent 16308bcb1fb776d31e823d57f7458a5f4746baa3
Author: Ivan Gankevich <igankevich@ya.ru>
Date: Mon, 15 May 2017 15:38:35 +0300
Add more references.
Diffstat:
2 files changed, 52 insertions(+), 0 deletions(-)
diff --git a/bib/refs.bib b/bib/refs.bib
@@ -73,3 +73,39 @@
year={2007},
organization={IOP Publishing}
}
+
+@inproceedings{robertson2000linux,
+ title={{Linux-HA} Heartbeat System Design.},
+ author={Robertson, Alan},
+ booktitle={Proc. of 4\textsuperscript{th} Annual Linux Showcase \&
+ Conference},
+ year={2000},
+ pages={305--316},
+ address={Atlanta, Georgia},
+ organization={USENIX},
+ url={http://static.usenix.org/publications/library/proceedings/als00/2000papers/papers/full_papers/robertson/robertson_html/}
+}
+
+@article{haddad2003ha,
+ title={{HA-OSCAR}: the birth of highly available {OSCAR}},
+ author={Haddad, Ibrahim and Leangsuksun, Chokchai and Scott, Stephen L},
+ journal={Linux Journal},
+ volume={2003},
+ number={115},
+ pages={1},
+ year={2003},
+ publisher={Belltown Media}
+}
+
+@article{leangsuksun2005achieving,
+ title={Achieving high availability and performance computing with an HA-OSCAR
+ cluster},
+ author={Leangsuksun, Chokchai Box and Shen, Lixin and Liu, Tong and Scott,
+ Stephen L},
+ journal={Future Generation Computer Systems},
+ volume={21},
+ number={4},
+ pages={597--606},
+ year={2005},
+ publisher={Elsevier}
+}
diff --git a/src/tail.tex b/src/tail.tex
@@ -49,6 +49,22 @@ kernels (their execution order) and, consequently, defines for each kernel a
principal kernel, responsibility of which is to re-execute failed subordinate
kernels upon a failure.
+With respect to various high-availability cluster
+projects~\cite{robertson2000linux,haddad2003ha,leangsuksun2005achieving} our
+approach has the following advantages. First, is scales with the large number
+of nodes, as only point-to-point communication between slave and master node is
+used instead of broadcast messages (which has been shown in the previous
+work~\cite{gankevich2015subordination}), hence, the use of several switches and
+routers is possible within single cluster. Second, our approach does not
+require the use of standby servers to provide high availability of a master
+node: we provide fault tolerance on kernel layer instead. As the computation
+progresses, kernels copy themselves on nodes that are directly connected to the
+current one, and these can be any nodes from the cluster. Finally,
+high-availability cluster projects do not deal with parallel programme
+failures, they aim to provide high-availability for services running on master
+node (NFS, SMB, DHCP, etc.), whereas our approach is specifically targeted at
+providing continuous execution of parallel applications.
+
\section{Conclusion}
In the paper we propose a system architecture consisting of two tree