commit 24b076991cca5be8a2da5f949176f525ae94fed5
parent d72dcd4efbcb2a5198bfa15155768c06f85f36fc
Author: Ivan Gankevich <i.gankevich@spbu.ru>
Date: Wed, 14 Apr 2021 11:02:26 +0300
Cite references.
Diffstat:
main.bib | | | 44 | +++++++++++++++++++++++++++++++++++++------- |
main.tex | | | 54 | ++++++++++++++++++++++++++++-------------------------- |
2 files changed, 65 insertions(+), 33 deletions(-)
diff --git a/main.bib b/main.bib
@@ -1,5 +1,5 @@
-*** BibTool WARNING: (line 6 in <stdin>): 237 non-space characters ignored.
+*** BibTool WARNING: (line 4 in <stdin>): 64 non-space characters ignored.
@InProceedings{ llvm,
author = {Lattner, Chris and Adve, Vikram},
@@ -26,13 +26,10 @@
isbn = {9781450302036},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
- url = {https://doi.org/10.1145/1869459.1869527},
doi = {10.1145/1869459.1869527},
booktitle = {Proceedings of the ACM International Conference on Object
Oriented Programming Systems Languages and Applications},
pages = {835–847},
- keywords = {domain specific languages, dynamic optimizations, parallel
- programming},
location = {Reno/Tahoe, Nevada, USA},
series = {OOPSLA'10},
comment = {"While one can hope that a few parallel programming ex-perts
@@ -44,8 +41,8 @@
@Article{ fetterly2009dryadlinq,
title = {{DryadLINQ}: A system for general-purpose distributed
data-parallel computing using a high-level language},
- author = {Fetterly, Yuan Yu Michael Isard Dennis and Budiu, Mihai and
- Erlingsson, {\'U}lfar and Currey, Pradeep Kumar Gunda Jon},
+ author = {Yuan Yu and Michael Isard and Dennis Fetterly and Mihai Budiu and
+ Erlingsson, {\'U}lfar and Gunda, Pradeep Kumar and Jon Currey },
journal = {Proc. LSDS-IR},
volume = {8},
year = {2009},
@@ -82,7 +79,40 @@
software architectures (selected papers from FOCLASA11)},
issn = {0167-6423},
doi = {10.1016/j.scico.2013.03.014},
- url = {https://www.sciencedirect.com/science/article/pii/S0167642313000750},
author = {Eduardo Gurgel Pinho and Francisco Heron {de Carvalho}},
comment = {Main object, good references}
}
+
+@Article{ spark2016,
+ author = {Zaharia, Matei and Xin, Reynold S. and Wendell, Patrick and
+ Das, Tathagata and Armbrust, Michael and Dave, Ankur and Meng,
+ Xiangrui and Rosen, Josh and Venkataraman, Shivaram and
+ Franklin, Michael J. and Ghodsi, Ali and Gonzalez, Joseph and
+ Shenker, Scott and Stoica, Ion},
+ title = {{Apache Spark}: A Unified Engine for Big Data Processing},
+ year = {2016},
+ publisher = {Association for Computing Machinery},
+ address = {New York, NY, USA},
+ volume = {59},
+ number = {11},
+ issn = {0001-0782},
+ url = {https://doi.org/10.1145/2934664},
+ doi = {10.1145/2934664},
+ journal = {Commun. ACM},
+ month = oct,
+ pages = {56–65}
+}
+
+@Article{ fault-tolerant-distributed-haskell,
+ title = {Transparent fault tolerance for scalable functional
+ computation},
+ volume = {26},
+ doi = {10.1017/S095679681600006X},
+ journal = {Journal of Functional Programming},
+ publisher = {Cambridge University Press},
+ author = {Stewart, Robert and Maier, Patrick and Trinder, Phil},
+ year = {2016},
+ pages = {e5},
+ comment = {"Computations in HdpH-RS are always as reliable as the root
+ node".}
+}
diff --git a/main.tex b/main.tex
@@ -39,21 +39,24 @@
\section{Introduction}
-There are many programming frameworks for parallel and distributed computing
-(TODO cite) which are successful both in industry and academia, however, all
-these frameworks are isolated and self-contained. We believe that the main
-reason that there is no common denominator between these frameworks is that
-there is no protocol or low-level language for distributed computations. For
-sequential computations we have bytecode (e.g.~LLVM~\cite{llvm}, Java bytecode,
-Guile bytecode) that is used as an intermediate, portable and universal
+There are many programming frameworks and languages for parallel and
+distributed
+computing~\cite{spark2016,fault-tolerant-distributed-haskell,wilde2011swift,fetterly2009dryadlinq,pinho2014oopp}
+which are successful both in industry and academia, however, all of them are
+isolated and self-contained. We believe that the main reason that there is no
+common denominator between these frameworks and languages is that there is no
+protocol or low-level language for distributed computations. For sequential
+computations we have bytecode (e.g.~LLVM~\cite{llvm}, Java bytecode, Guile
+bytecode) that is used as an intermediate, portable and universal
representation of a programme written in any language; also we have assembler
-which is non-portable and non-universal, but still popular intermediate
-representation. One important feature, that bytecode and assembler lack, is an
-ability to communicate between parallel processes. This communication is the
-common denominator on top of which all the frameworks are built, and there is
-no universal low-level protocol or language that describes communication.
-
-Why such low-level language exists for sequential computations, but does not
+which is non-portable, but still popular intermediate representation. One
+important feature, that bytecode and assembler lack, is an ability to
+communicate between parallel processes. This communication is the common
+denominator on top of which all the frameworks and languages for parallel and
+distributed computations can be built, and there is no universal low-level
+protocol or language that describes communication.
+
+Why common low-level language exists for sequential computations, but does not
exist for parallel and distributed ones? One of the reasons, which applies to
both distributed and parallel computations, is the fact that people still think
about programmes as sequences of steps~--- the same way as people themselves
@@ -64,20 +67,21 @@ compositions of functions with no implied order of computation. Another reason
which applies to distributed computations is the fact that these computations
are inherently unreliable and there is no universal approach for handling
cluster node failures. While imperative langauges produce more efficient
-programmes, they do not provide safety from deadlocks and fault-tolerance
+programmes, they do not provide safety from deadlocks and fault tolerance
guarantees. Also, they are much more difficult to write, as a human have to
work with mutable state (local and global variables, objects etc.) and it is
difficult to keep this state in mind while writing the code. Functional
languages minimise the usage of mutable state, provide partial safety from
-deadlocks (under the assumption that a programmer does not use low-level
-facilities) and can be modified to provide fault tolerance. People understand
-the potential of functional languages, but have not yet realised this potential
-to get all their advantages; people realised the full potential of imperative
-languages, but do not know how to get rid of their disadvantages.
+deadlocks (under the assumption that a programmer does not use locks manually)
+and can be modified to provide fault tolerance. From the authors' perspective
+people understand the potential of functional languages, but have not yet
+realised this potential to get all their advantages; people realised the full
+potential of imperative languages, but do not know how to get rid of their
+disadvantages.
In this paper we describe low-level language and protocol called \emph{kernels}
which is suitable for distributed and parallel computations. Kernels provide
-automatic fault-tolerance and can be used to exchange the data between
+automatic fault tolerance and can be used to exchange the data between
programmes written in different languages. We implement kernels in C++ and
build a reference cluster scheduler that uses kernels as the protocol to run
applications that span multiple cluster nodes. Then we use kernels as an
@@ -85,12 +89,10 @@ intermediate representation for Guile programming language, run benchmarks
using the scheduler and compare the performance of different implementations of
the same programme.
-TODO Spark, Distributed Haskell
-
\cite{lang-virt}
-\cite{fetterly2009dryadlinq}
-\cite{wilde2011swift}
-\cite{pinho2014oopp}
+%\cite{fetterly2009dryadlinq}
+%\cite{wilde2011swift}
+%\cite{pinho2014oopp}
\section{Methods}