hpcs-16-factory

Factory: Non-stop batch jobs without checkpointing
git clone https://git.igankevich.com/hpcs-16-factory.git
Log | Files | Refs

references.bib (10003B)


      1 @book{wilde2011rest,
      2   title={{REST}: from research to practice},
      3   author={Wilde, Erik and Pautasso, Cesare},
      4   year={2011},
      5   publisher={Springer Science \& Business Media}
      6 }
      7 
      8 @incollection{kochman2012batched,
      9   title={Batched transactions for {RESTful} web services},
     10   author={Kochman, Sebastian and Wojciechowski, Pawe{\l} T and Kmieciak, Mi{\l}osz},
     11   booktitle={Current Trends in Web Engineering},
     12   pages={86--98},
     13   year={2012},
     14   publisher={Springer}
     15 }
     16 
     17 @phdthesis{armstrong2003thesis,
     18   title={Making reliable distributed systems in the presence of software errors},
     19   author={Armstrong, Joe},
     20   year={2003},
     21   school={The Royal Institute of Technology Stockholm, Sweden}
     22 }
     23 
     24 
     25 % second part
     26 
     27 @book{andrianov2007,
     28   title={Parallel and distributed computations (in Russian)},
     29   author={Andrianov, S. and Degtyarev, A.},
     30   isbn={9785983400733},
     31   year={2007},
     32   publisher={Saint Petersburg State University}
     33 }
     34 
     35 @article{soshmina2007,
     36   title = {Using {GRID} technologies for computations (in Russian)},
     37   author = {Soshmina, I. and Bogdanov, A.},
     38   year = {2007},
     39   series = {4},
     40   volume = {3},
     41   pages = {130--137},
     42   journal={Saint Petersburg State University Bulletin (Physics and Chemistry)}
     43 }
     44 
     45 @incollection{deg2003,
     46   title = {High Performance Computer Technologies in Shipbuilding},
     47   booktitle={OPTIMISTIC --- optimization in marine design, Mensch \& Buch Verlag, Berlin},
     48   author={Degtyarev, A.},
     49   editor={Birk, L. and Harries, S.},
     50   year={2003}
     51 }
     52 
     53 @inproceedings{lifflander2014scalable,
     54   title={Scalable replay with partial-order dependencies for message-logging fault tolerance},
     55   author={Lifflander, Jonathan and Meneses, Esteban and Menon, Harshitha and Miller, Phil and Krishnamoorthy, Sriram and Kal{\'e}, Laxmikant V},
     56   booktitle={IEEE International Conference on Cluster Computing (CLUSTER)},
     57   pages={19--28},
     58   year={2014},
     59   organization={IEEE}
     60 }
     61 
     62 @book{tel2000introduction,
     63   title={Introduction to distributed algorithms},
     64   author={Tel, Gerard},
     65   year={2000},
     66   publisher={Cambridge University press}
     67 }
     68 
     69 @inproceedings{lantz2010network,
     70   title={A network in a laptop: rapid prototyping for software-defined networks},
     71   author={Lantz, Bob and Heller, Brandon and McKeown, Nick},
     72   booktitle={Proceedings of the 9\textsuperscript{th} ACM SIGCOMM Workshop on Hot Topics in Networks},
     73   pages={19},
     74   year={2010},
     75   organization={ACM}
     76 }
     77 
     78 @inproceedings{handigol2012reproducible,
     79   title={Reproducible network experiments using container-based emulation},
     80   author={Handigol, Nikhil and Heller, Brandon and Jeyakumar, Vimalkumar and Lantz, Bob and McKeown, Nick},
     81   booktitle={Proceedings of the 8\textsuperscript{th} international conference on Emerging networking experiments and technologies},
     82   pages={253--264},
     83   year={2012},
     84   organization={ACM}
     85 }
     86 
     87 @phdthesis{heller2013reproducible,
     88   title={Reproducible Network Research with High-fidelity Emulation},
     89   author={Heller, Brandon},
     90   year={2013},
     91   school={Stanford University}
     92 }
     93 
     94 
     95 % Leader election
     96 
     97 @article{brunekreef1996design,
     98   title={Design and analysis of dynamic leader election protocols in broadcast networks},
     99   author={Brunekreef, Jacob and Katoen, Joost-Pieter and Koymans, Ron and Mauw, Sjouke},
    100   journal={Distributed Computing},
    101   volume={9},
    102   number={4},
    103   pages={157--171},
    104   year={1996},
    105   publisher={Springer}
    106 }
    107 
    108 @incollection{aguilera2001stable,
    109   title={Stable leader election},
    110   author={Aguilera, Marcos K and Delporte-Gallet, Carole and Fauconnier, Hugues and Toueg, Sam},
    111   booktitle={Distributed Computing},
    112   pages={108--122},
    113   year={2001},
    114   publisher={Springer}
    115 }
    116 
    117 @article{romano2014design,
    118   title={Design and evaluation of a parallel invocation protocol for transactional applications over the web},
    119   author={Romano, Paolo and Quaglia, Francesco},
    120   journal={IEEE Transactions on Computers},
    121   volume={63},
    122   number={2},
    123   pages={317--334},
    124   year={2014},
    125   publisher={IEEE}
    126 }
    127 
    128 
    129 % new refs
    130 
    131 @article{egwutuoha2013survey,
    132   title={A survey of fault tolerance mechanisms and checkpoint/restart implementations for high performance computing systems},
    133   author={Egwutuoha, Ifeanyi P and Levy, David and Selic, Bran and Chen, Shiping},
    134   journal={The Journal of Supercomputing},
    135   volume={65},
    136   number={3},
    137   pages={1302--1326},
    138   year={2013},
    139   publisher={Springer}
    140 }
    141 
    142 @incollection{bhandarkar2001adaptive,
    143   title={Adaptive load balancing for MPI programs},
    144   author={Bhandarkar, Milind and Kal{\'e}, Laxmikant V and de Sturler, Eric and Hoeflinger, Jay},
    145   booktitle={Computational Science-ICCS 2001},
    146   pages={108--117},
    147   year={2001},
    148   publisher={Springer}
    149 }
    150 
    151 @article{lusk2010more,
    152   title={More scalability, less pain: A simple programming model and its implementation for extreme computing},
    153   author={Lusk, Ewing L and Pieper, Steven C and Butler, Ralph M and others},
    154   journal={SciDAC Review},
    155   volume={17},
    156   number={1},
    157   pages={30--37},
    158   year={2010}
    159 }
    160 
    161 @article{bala2012fault,
    162   title={Fault tolerance-challenges, techniques and implementation in cloud computing},
    163   author={Bala, Anju and Chana, Inderveer},
    164   journal={IJCSI International Journal of Computer Science Issues},
    165   volume={9},
    166   number={1},
    167   pages={1694--0814},
    168   year={2012}
    169 }
    170 
    171 
    172 @inproceedings{gankevich2015subordination,
    173   title={Subordination: Cluster management without distributed consensus},
    174   author={Gankevich, Ivan and Tipikin, Yuri and Gaiduchok, Vladimir},
    175   booktitle={International Conference on High Performance Computing \& Simulation (HPCS)},
    176   pages={639--642},
    177   year={2015},
    178   organization={IEEE}
    179 }
    180 
    181 @book{anderson2010couchdb,
    182   title={CouchDB: The definitive guide},
    183   author={Anderson, J Chris and Lehnardt, Jan and Slater, Noah},
    184   year={2010},
    185   publisher={O'Reilly Media, Inc.}
    186 }
    187 
    188 @article{lakshman2010cassandra,
    189   title={Cassandra: A decentralized structured storage system},
    190   author={Lakshman, Avinash and Malik, Prashant},
    191   journal={ACM SIGOPS Operating Systems Review},
    192   volume={44},
    193   number={2},
    194   pages={35--40},
    195   year={2010},
    196   publisher={ACM}
    197 }
    198 
    199 @article{dean2008mapreduce,
    200   title={{MapReduce}: Simplified data processing on large clusters},
    201   author={Dean, Jeffrey and Ghemawat, Sanjay},
    202   journal={Communications of the ACM},
    203   volume={51},
    204   number={1},
    205   pages={107--113},
    206   year={2008},
    207   publisher={ACM}
    208 }
    209 
    210 @inproceedings{vavilapalli2013yarn,
    211   title={Apache Hadoop YARN: Yet Another Resource Negotiator},
    212   author={Vavilapalli, Vinod Kumar and Murthy, Arun C and Douglas, Chris and Agarwal, Sharad and Konar, Mahadev and Evans, Robert and Graves, Thomas and Lowe, Jason and Shah, Hitesh and Seth, Siddharth and others},
    213   booktitle={Proceedings of the 4th annual Symposium on Cloud Computing},
    214   pages={5},
    215   year={2013},
    216   organization={ACM}
    217 }
    218 
    219 % autoreg
    220 
    221 @Inproceedings{autoreg-stab,
    222 author = {A. Degtyarev and I. Gankevich},
    223 title = {Evaluation of hydrodynamic pressures for autoregression model of irregular waves},
    224 booktitle = {Proceedings of 11\textsuperscript{th} International Conference ``Stability of Ships and Ocean Vehicles'', Athens},
    225 year = 2012,
    226 pages = {841--852}
    227 }
    228 
    229 @Inproceedings{autoreg1,
    230 author = {Degtyarev, A.B. and Reed, A.M.},
    231 title = {Modelling of Incident Waves Near the Ship's Hull (Application of autoregressive approach in problems of simulation of rough seas)},
    232 booktitle = {Proceedings of the 12\textsuperscript{th} International Ship Stability Workshop},
    233 year = 2011
    234 }
    235 
    236 @Inproceedings{autoreg2,
    237 author = {Degtyarev, A.B. and Reed, A.M.},
    238 title = {Synoptic and Short-Term Modeling
    239 of Ocean Waves},
    240 booktitle = {Proceedings of 29\textsuperscript{th} Symposium on Naval Hydrodynamics},
    241 year = 2012
    242 }
    243 
    244 @Inproceedings{autoreg2011csit,
    245 author = {A. Degtyarev and I. Gankevich},
    246 title = {Wave Surface Generation Using {OpenCL}, {OpenMP} and {MPI}},
    247 booktitle = {Proceedings of 8\textsuperscript{th} International Conference ``Computer Science \& Information Technologies''},
    248 year = 2011,
    249 pages = {248--251}
    250 }
    251 
    252 @inproceedings{meyer2012radic,
    253   title={RADIC: A FaultTolerant Middleware with Automatic Management of Spare Nodes*},
    254   author={Meyer, Hugo and Rexachs, Dolores and Luque, Emilio},
    255   booktitle={Proceedings of the International Conference on Parallel and Distributed Processing Techniques and Applications (PDPTA)},
    256   pages={1},
    257   year={2012},
    258   organization={The Steering Committee of The World Congress in Computer Science, Computer Engineering and Applied Computing (WorldComp)}
    259 }
    260 
    261 @inproceedings{guermouche2011uncoordinated,
    262   title={Uncoordinated checkpointing without domino effect for send-deterministic mpi applications},
    263   author={Guermouche, Amina and Ropars, Thomas and Brunet, Elisabeth and Snir, Marc and Cappello, Franck},
    264   booktitle={Parallel \& Distributed Processing Symposium (IPDPS), 2011 IEEE International},
    265   pages={989--1000},
    266   year={2011},
    267   organization={IEEE}
    268 }
    269 
    270 @inproceedings{Chiang2004,
    271  author = {Chiang, Chia-Chu},
    272  title = {Low-level Language Constructs Considered Harmful for Distributed Parallel Programming},
    273  booktitle = {Proceedings of the 42Nd Annual Southeast Regional Conference},
    274  series = {ACM-SE 42},
    275  year = {2004},
    276  isbn = {1-58113-870-9},
    277  location = {Huntsville, Alabama},
    278  pages = {279--284},
    279  numpages = {6},
    280  url = {http://doi.acm.org/10.1145/986537.986603},
    281  doi = {10.1145/986537.986603},
    282  acmid = {986603},
    283  publisher = {ACM},
    284  address = {New York, NY, USA},
    285  keywords = {CORBA, coordination, middleware, parallelism},
    286 }
    287 
    288 @article{fischer1985impossibility,
    289   title={Impossibility of distributed consensus with one faulty process},
    290   author={Fischer, Michael J and Lynch, Nancy A and Paterson, Michael S},
    291   journal={Journal of the ACM (JACM)},
    292   volume={32},
    293   number={2},
    294   pages={374--382},
    295   year={1985},
    296   publisher={ACM}
    297 }
    298 
    299 @article{fekete1993impossibility,
    300   title={The impossibility of implementing reliable communication in the face of crashes},
    301   author={Fekete, Alan and Lynch, Nancy and Mansour, Yishay and Spinelli, John},
    302   journal={Journal of the ACM (JACM)},
    303   volume={40},
    304   number={5},
    305   pages={1087--1107},
    306   year={1993},
    307   publisher={ACM}
    308 }