refs.bib (3443B)
1 @book{alexandrescu2001modern, 2 title={Modern C++ design: generic programming and design patterns applied}, 3 author={Alexandrescu, Andrei}, 4 year={2001}, 5 publisher={Addison-Wesley} 6 } 7 8 @article{stroustrup2012software, 9 title={Software development for infrastructure}, 10 author={Stroustrup, Bjarne}, 11 journal={IEEE Computer}, 12 volume={45}, 13 number={1}, 14 pages={47--58}, 15 year={2012} 16 } 17 18 @inproceedings{zuckerman2011using, 19 title={Using a codelet program execution model for exascale machines: 20 position paper}, 21 author={Zuckerman, St{\'e}phane and Suetterlein, Joshua and Knauerhase, Rob 22 and Gao, Guang R}, 23 booktitle={Proceedings of the 1st International Workshop on Adaptive 24 Self-Tuning Computing Systems for the Exaflop Era}, 25 pages={64--69}, 26 year={2011}, 27 organization={ACM} 28 } 29 30 @article{meneses2015using, 31 title={Using migratable objects to enhance fault tolerance schemes in 32 supercomputers}, 33 author={Meneses, Esteban and Ni, Xiang and Zheng, Gengbin and Mendes, Celso L 34 and Kale, Laxmikant V}, 35 journal={IEEE transactions on parallel and distributed systems}, 36 volume={26}, 37 number={7}, 38 pages={2061--2074}, 39 year={2015}, 40 publisher={IEEE} 41 } 42 43 @inproceedings{gankevich2015subordination, 44 title={Subordination: Cluster management without distributed consensus}, 45 author={Gankevich, Ivan and Tipikin, Yuri and Gaiduchok, Vladimir}, 46 booktitle={High Performance Computing \& Simulation (HPCS), 2015 47 International Conference on}, 48 pages={639--642}, 49 year={2015}, 50 organization={IEEE} 51 } 52 53 54 @inproceedings{gankevich2016factory, 55 title={Factory: Non-stop batch jobs without checkpointing}, 56 author={Gankevich, Ivan and Tipikin, Yuri and Korkhov, Vladimir and 57 Gaiduchok, Vladimir}, 58 booktitle={High Performance Computing \& Simulation (HPCS), 2016 59 International Conference on}, 60 pages={979--984}, 61 year={2016}, 62 organization={IEEE} 63 } 64 65 66 @inproceedings{schroeder2007understanding, 67 title={Understanding failures in petascale computers}, 68 author={Schroeder, Bianca and Gibson, Garth A}, 69 booktitle={Journal of Physics: Conference Series}, 70 volume={78}, 71 number={1}, 72 pages={12--22}, 73 year={2007}, 74 organization={IOP Publishing} 75 } 76 77 @inproceedings{robertson2000linux, 78 title={{Linux-HA} Heartbeat System Design.}, 79 author={Robertson, Alan}, 80 booktitle={Proc. of 4\textsuperscript{th} Annual Linux Showcase \& 81 Conference}, 82 year={2000}, 83 pages={305--316}, 84 address={Atlanta, Georgia}, 85 organization={USENIX}, 86 url={http://static.usenix.org/publications/library/proceedings/als00/2000papers/papers/full_papers/robertson/robertson_html/} 87 } 88 89 @article{haddad2003ha, 90 title={{HA-OSCAR}: the birth of highly available {OSCAR}}, 91 author={Haddad, Ibrahim and Leangsuksun, Chokchai and Scott, Stephen L}, 92 journal={Linux Journal}, 93 volume={2003}, 94 number={115}, 95 pages={1}, 96 year={2003}, 97 publisher={Belltown Media} 98 } 99 100 @article{leangsuksun2005achieving, 101 title={Achieving high availability and performance computing with an HA-OSCAR 102 cluster}, 103 author={Leangsuksun, Chokchai Box and Shen, Lixin and Liu, Tong and Scott, 104 Stephen L}, 105 journal={Future Generation Computer Systems}, 106 volume={21}, 107 number={4}, 108 pages={597--606}, 109 year={2005}, 110 publisher={Elsevier} 111 } 112 113 @misc{factoryGithub, 114 title={Factory: A framework for distributed computing}, 115 author={Ivan Gankevich and Yuri Tipikin}, 116 howpublished={\url{https://igankevich.github.io/factory/index.html}} 117 }