hpcs-17-subord

git clone https://git.igankevich.com/hpcs-17-subord.git
Log | Files | Refs

commit ca3ff453c8990331b678b93bdd876118bfc034e7
parent 7d1b914aa319406c8513d365d5976201dbb8cf4d
Author: Ivan Gankevich <igankevich@ya.ru>
Date:   Fri, 24 Mar 2017 15:05:46 +0300

Add references discussed previously with Yuri.

Diffstat:
bib/refs.bib | 49+++++++++++++++++++++++++++++++++++++++----------
src/body.tex | 14+++++++-------
src/head.tex | 4++++
3 files changed, 50 insertions(+), 17 deletions(-)

diff --git a/bib/refs.bib b/bib/refs.bib @@ -1,12 +1,41 @@ -electronic with a howpublished information field -From the August 2001 issue of "IEEE/ACM Transactions on Networking", -page 391, reference #7. -@electronic{IEEEexample:electronhowinfo, - author = "V. Jacobson", - title = "Modified {TCP} Congestion Avoidance Algorithm", - howpublished = "end2end-interest mailing list", - url = "ftp://ftp.isi.edu/end2end/end2end-interest-1990.mail", - month = apr, - year = "1990" +@book{alexandrescu2001modern, + title={Modern C++ design: generic programming and design patterns applied}, + author={Alexandrescu, Andrei}, + year={2001}, + publisher={Addison-Wesley} } +@article{stroustrup2012software, + title={Software development for infrastructure}, + author={Stroustrup, Bjarne}, + journal={IEEE Computer}, + volume={45}, + number={1}, + pages={47--58}, + year={2012} +} + +@inproceedings{zuckerman2011using, + title={Using a codelet program execution model for exascale machines: + position paper}, + author={Zuckerman, St{\'e}phane and Suetterlein, Joshua and Knauerhase, Rob + and Gao, Guang R}, + booktitle={Proceedings of the 1st International Workshop on Adaptive + Self-Tuning Computing Systems for the Exaflop Era}, + pages={64--69}, + year={2011}, + organization={ACM} +} + +@article{meneses2015using, + title={Using migratable objects to enhance fault tolerance schemes in + supercomputers}, + author={Meneses, Esteban and Ni, Xiang and Zheng, Gengbin and Mendes, Celso L + and Kale, Laxmikant V}, + journal={IEEE transactions on parallel and distributed systems}, + volume={26}, + number={7}, + pages={2061--2074}, + year={2015}, + publisher={IEEE} +} diff --git a/src/body.tex b/src/body.tex @@ -336,13 +336,13 @@ state to each subordinate node, and minimal number of copies may be configured in the programme instead. In this case using maps and sets over arrays may incur more overhead as they require certain amount of elements to make searching for an element more efficient than in -arrays~\cite{lemiere-or-straustrup-TODO}. There is no such thing as minimal -number of object copies that ensures fault-tolerance in HPC, but for parallel -file systems there is a number of replicas. This number is typically set to 2 -or 3 depending on the particular site. We believe that there is no need to set -number of object copies more than that, as it allows to tolerate simultaneous -failure of 2 and 3 nodes respectively: it should be more than enough to -tolerate node failures which are common at large +arrays~\cite{alexandrescu2001modern,stroustrup2012software}. There is no such +thing as minimal number of object copies that ensures fault-tolerance in HPC, +but for parallel file systems there is a number of replicas. This number is +typically set to 2 or 3 depending on the particular site. We believe that there +is no need to set number of object copies more than that, as it allows to +tolerate simultaneous failure of 2 and 3 nodes respectively: it should be more +than enough to tolerate node failures which are common at large scale~\cite{mean-time-between-failures-darpa-TODO}. So, using arrays with linear search complexity is more efficient than maps and sets, because the number of elements in them is small, and linear search takes less time than diff --git a/src/head.tex b/src/head.tex @@ -12,3 +12,7 @@ hierarchy interactions framework can provide continuous computations in case of hardware errors or electricity outages. \section{Related work} + +\cite{zuckerman2011using} + +\cite{meneses2015using}