Bibliography for William Gropp

%
% Here is a start on building a Bibtex database for all of my publications.
% Some of these I've extracted from bibliography collections. After some
% consideration, I've decided to limit the data (eliminating price, bibdate,
% most annotations, abstract, and related material). I have left in
% keywords when not too lengthy.
%
% In addition to the usual bibtex keys, I've added the following to help
% organize my own publications:
% area = research area (list below)
% areaseq = sequence number used to sort entries by area
% preprintof = BibTex key of the paper that this is a preprint of
%
% The research areas are drawn from this list (to keep the number of
% areas to a small number)
% A - Algorithm
% App - Application
% Coll,Datatype,RMA ; PMI - MPI features
% D - Domain Decomposition
% formal - Formal methods
% GPU - GPUs
% I - I/O (particularly parallel I/O)
% M - MPI
% Misc - miscellaneous
% NS - Numerical Software
% P - Petsc
% Par - Parallel, not MPI
% PV - Performance Visualization
% Perf- Performance Modeling
% R - Mesh Refinement
% S - Software
% Thread - Threads
%
%
% Better is a hierarchical, multi-category representation, e.g.,
% primary:secondary:tertiary ; primary:secondary
% For example,
% Par:MPI:datatype
%
% Also, the special field
% preprintfile="papers/xxx/foo.pdf"
% is used to provide a link to a local copy, and the field
% officialurl="http://....//"
% is used to provide a link to the official copy (or other information, such
% as a book or meeting's web site.

%
% Section: Books
%
@Book{Gropp:1994:UMP,

author	=	"William Gropp and Ewing Lusk and Anthony Skjellum",
title	=	"Using {MPI}: Portable Parallel Programming with the Message-Passing Interface",
publisher	=	"MIT Press",
address	=	"Cambridge, MA",
pages	=	"xx + 307",
year	=	"1994",
ISBN	=	"0-262-57104-8",
LCCN	=	"QA76.642 G76 1994",
OPTseries	=	"Scientific and engineering computation",
keywords	=	"Computer interfaces.; Parallel computers --- Programming.; Parallel programming; Parallel programming (Computer science)",
area	=	"M",
areaseq	=	"0",

}

@Book{BFSmith_PEBjorstad_WDGropp_1996a,

author	=	"B. F. Smith and P. E. Bj{\o}rstad and W. D. Gropp",
title	=	"Domain Decomposition: Parallel Multilevel Methods for Elliptic Partial Differential Equations",
publisher	=	"Cambridge University Press",
address	=	"New York",
year	=	"1996",
area	=	"D",
areaseq	=	"0",

}

@Book{Gropp:1998:MPI2Book,

author	=	{William Gropp and Steven Huss-Lederman and Andrew Lumsdaine and Ewing Lusk and Bill Nitzberg and William Saphir and Marc Snir},
title	=	{{MPI} - The Complete Reference: Volume 2, The {MPI}-2 Extensions},
publisher	=	{MIT Press},
year	=	1998,
OPTseries	=	{Scientific and engineering computation},
address	=	{Cambridge, MA, USA},
area	=	"M",
areaseq	=	"0",

}

%alias usingmpi
@Book{gropp-lusk-skjellum:using-mpi2nd,

author	=	"William Gropp and Ewing Lusk and Anthony Skjellum",
title	=	"Using {MPI}: Portable Parallel Programming with the Message Passing Interface, \textrm{2nd edition}",
address	=	"Cambridge, MA",
publisher	=	"MIT Press",
year	=	1999,
area	=	"M",
areaseq	=	"0",

}

@Book{gropp-lusk-thakur:usingmpi2,

author	=	{William Gropp and Ewing Lusk and Rajeev Thakur},
title	=	{Using {MPI-2}: Advanced Features of the Message-Passing Interface},
publisher	=	{MIT Press},
year	=	1999,
address	=	{Cambridge, MA},
area	=	"M",
areaseq	=	"0",

}

@Book{crpchandbook,

editor	=	{Jack Dongarra and Ian Foster and Geoffrey Fox and William Gropp and Ken Kennedy and Linda Torczon and Andy White},
title	=	{Sourcebook of Parallel Computing},
publisher	=	{Morgan Kaufmann},
year	=	2003

}

@Book{beowulflinux2nd,

editor	=	{William Gropp and Ewing Lusk and Thomas Sterling},
title	=	{Beowulf Cluster Computing with {Linux}},
publisher	=	{MIT Press},
year	=	2003,
edition	=	{2nd}

}

@Proceedings{DBLP:conf/iwomp/2011,

editor	=	{Barbara M. Chapman and William D. Gropp and Kalyan Kumaran and Matthias S. M{\"u}ller},
title	=	{OpenMP in the Petascale Era -- 7th International Workshop on OpenMP, IWOMP 2011, Chicago, IL, USA, June 13-15, 2011. Proceedings},
booktitle	=	{IWOMP},
publisher	=	{Springer},
series	=	{Lecture Notes in Computer Science},
volume	=	{6665},
year	=	{2011},
isbn	=	{978-3-642-21486-8},
ee	=	{http://dx.doi.org/10.1007/978-3-642-21487-5},
officialurl	=	"http://www.springerlink.com/content/978-3-642-21486-8/#section=903292&page=1",
bibsource	=	{DBLP, http://dblp.uni-trier.de}

}

@Book{UsingAdvancedMPI,

author	=	{William Gropp and Torsten Hoefler and Rajeev Thakur and Ewing Lusk},
title	=	{Using Advanced {MPI}: {M}odern Features of the {M}essage-{P}assing {I}nterface},
year	=	{2014},
month	=	{Nov.},
location	=	{Cambridge, MA},
publisher	=	{MIT Press},
isbn	=	{978-0262527637},
source	=	{http://www.unixer.de/~htor/publications/},

}

@Book{UsingMPI3rd,

author	=	"William Gropp and Ewing Lusk and Anthony Skjellum",
title	=	{Using {MPI}: {P}ortable Parallel Programming with the {M}essage-{P}assing {I}nterface, \textrm{3rd edition}},
year	=	{2014},
month	=	{Nov.},
location	=	{Cambridge, MA},
publisher	=	{MIT Press},
isbn	=	{9780262527392},

}

%
% Section: Book Chapters
%
% 2015

@InCollection{pmodels-mpi:15,

author	=	{William Gropp and Rajeev Thakur},
editor	=	{Pavan Balaji},
booktitle	=	{Programming Models for Parallel Computing},
title	=	{{MPI}},
publisher	=	{MIT Press},
year	=	{2015},

}

% 2013

@InCollection{bw-in-vetter13,

author	=	{Brett Bode and Michelle Butler and Thom Dunning and Torsten Hoefler and William Kramer and William Gropp and Wen{-mei} Hwu},
title	=	{The {B}lue {W}aters Super-System for Super-Science},
booktitle	=	{Contemporary High Performance Computing: From Petascale Toward Exascale},
series	=	{CRC Computational Science Series},
volume	=	{1},
year	=	{2013},
pages	=	{339--366},
publisher	=	{Taylor and Francis},
organization	=	{Taylor and Francis},
edition	=	{1},
address	=	{Boca Raton},
url	=	{http://j.mp/RrBdPZ},
editor	=	{Vetter, Jeffrey S.}

}

% 2003
@InCollection{gro03:sourcebook:,

editor	=	{Jack Dongarra and Ian Foster and Geoffrey Fox and William Gropp and Ken Kennedy and Linda Torczon and Andy White},
booktitle	=	{Sourcebook of Parallel Computing},
publisher	=	{Morgan Kaufmann},
author	=	{William Gropp},
year	=	2003,
title	=	{Parallel Computer Architectures},
pages	=	{15--42},

}

@InCollection{of03:sourcebook:pgmmodels,

editor	=	{Jack Dongarra and Ian Foster and Geoffrey Fox and William Gropp and Ken Kennedy and Linda Torczon and Andy White},
booktitle	=	{Sourcebook of Parallel Computing},
publisher	=	{Morgan Kaufmann},
author	=	{Ian Foster and William Gropp and Carl Kesselman},
year	=	2003,
title	=	{Message Passing and Threads},
pages	=	{313--329},

}

@InCollection{thak03:sourcebook:mpiio,

editor	=	{Jack Dongarra and Ian Foster and Geoffrey Fox and William Gropp and Ken Kennedy and Linda Torczon and Andy White},
booktitle	=	{Sourcebook of Parallel Computing},
publisher	=	{Morgan Kaufmann},
author	=	{Rajeev Thakur and William Gropp},
year	=	2003,
title	=	{Parallel {I/O}},
pages	=	{331--355},

}

@InCollection{gro03:sourcebook:poisson,

editor	=	{Jack Dongarra and Ian Foster and Geoffrey Fox and William Gropp and Ken Kennedy and Linda Torczon and Andy White},
booktitle	=	{Sourcebook of Parallel Computing},
publisher	=	{Morgan Kaufmann},
author	=	{William Gropp},
year	=	2003,
title	=	{The 2-D {P}oisson Problem},
pages	=	{469--480},

}

@InCollection{bala03:sourcebook:pdesoft,

editor	=	{Jack Dongarra and Ian Foster and Geoffrey Fox and William Gropp and Ken Kennedy and Linda Torczon and Andy White},
booktitle	=	{Sourcebook of Parallel Computing},
publisher	=	{Morgan Kaufmann},
author	=	{Satish Balay and William Gropp and Lois Curfman McInnes and Barry F. Smith},
year	=	2003,
title	=	{Software for the Scalable Solution of Partial Differential Equations},
pages	=	{621--647},

}

@InCollection{gro03:beowulf:use,

editor	=	{William Gropp and Ewing Lusk and Thomas Sterling},
booktitle	=	{Beowulf Cluster Computing with {Linux}},
author	=	{William Gropp},
title	=	{So You Want to Use a Cluster},
publisher	=	{MIT Press},
year	=	2003,
pages	=	{1--17},

}

@InCollection{lusk03:beowulf:pgmming,

editor	=	{William Gropp and Ewing Lusk and Thomas Sterling},
booktitle	=	{Beowulf Cluster Computing with {Linux}},
author	=	{Ewing Lusk and William Gropp and Ralph Butler},
title	=	{An Introduction to Writing Parallel Programs},
publisher	=	{MIT Press},
year	=	2003,
pages	=	{171--206},

}

@InCollection{gro03:beowulf:mpi1,

editor	=	{William Gropp and Ewing Lusk and Thomas Sterling},
booktitle	=	{Beowulf Cluster Computing with {Linux}},
author	=	{William Gropp and Ewing Lusk},
title	=	{Parallel Programming with {MPI}},
publisher	=	{MIT Press},
year	=	2003,
pages	=	{207--243},

}

@InCollection{gro03:beowulf:mpi2,

editor	=	{William Gropp and Ewing Lusk and Thomas Sterling},
booktitle	=	{Beowulf Cluster Computing with {Linux}},
author	=	{William Gropp and Ewing Lusk},
title	=	{Advanced Topics in {MPI} Programming},
publisher	=	{MIT Press},
year	=	2003,
pages	=	{245--278},

}

% 2004

@InCollection{ree04:mpi-io,

author	=	{Rajeev Thakur and William Gropp and Ewing Lusk},
editor	=	{Daniel A. Reed},
booktitle	=	{Scalable Input/Output},
title	=	{{ADIO}: A Framework for High-Performance, Portable Parallel {I/O}},
publisher	=	{MIT Press},
year	=	2004,
pages	=	{111--134}

}

% 2005

@InCollection{gro04-bk:par-issues,

author	=	{William D. Gropp},
editor	=	{Bo Einarsson},
booktitle	=	{Accuracy and Reliability in Scientific Computing},
title	=	{Issues in Accurate and Reliable Use of Parallel Computing in Numerical Programs},
publisher	=	{SIAM},
year	=	2005,
OPTpages	=	{}

}

% 2006

@InCollection{kend06:pde,

author	=	{Ricky A. Kendall and Masha Sosonkina and William D. Gropp and Robert W. Numrich and Thomas Sterling},
editor	=	{Are Magnus Bruaset and Aslak Tveito},
booktitle	=	{Numerical Solution of Partial Differential Equations on Parallel Computers},
title	=	{Parallel Programming Models Applicable to Cluster Computing and Beyond},
publisher	=	{Springer},
year	=	2006,
number	=	51,
series	=	{Lecture Notes in Computational Science and Engineering},
pages	=	{3--54}

}

@InCollection{gropp06:ppsurvey,

author	=	{William D. Gropp and Andrew Lumsdaine},
editor	=	{Michael A. Heroux and Padma Raghavan and Horst D. Simon},
booktitle	=	{Parallel Processing for Scientific Computing},
title	=	{Parallel Tools and Environments: A Survey},
publisher	=	{SIAM},
year	=	{2006},
pages	=	{223--232},

}

%% 2007

@InCollection{Dagstuhl-book:2007,

author	=	{Boyana Norris and Albert Hartono and William Gropp},
booktitle	=	{Petascale Computing: {A}lgorithms and Applications},
title	=	{Annotations for Productivity and Performance Portability},
publisher	=	{Chapman \& Hall / CRC Press, Taylor and Francis Group},
year	=	{2007},
series	=	{Computational Science},
note	=	{Preprint ANL/MCS-P1392-0107}

}

%
% Section: Reports
%
% Intended for semi-formal reports, such as the SCaLeS report and
% other, major, conference reports. Does not include technical reports
% or less-formal conference reports.

% 2003

% Really editors, not authors.
@Booklet{scalesv1-03,

title	=	{A Science-Based Case For Large-Scale Simulation, Volume 1},
author	=	{David Keyes and Philip Colella and Thom H. Dunning and William D. Gropp},
month	=	JUL,
year	=	2003,
note	=	{Office of Science, U.S. Department of Energy}

}

% 2004

@Booklet{scalesv2-04,

title	=	{A Science-Based Case For Large-Scale Simulation, Volume 2},
author	=	{David Keyes and Philip Colella and Thom H. Dunning and William D. Gropp},
month	=	SEP,
year	=	2004,
note	=	{DRAFT, Office of Science, U.S. Department of Energy}

}

@Misc{zima:hppl04,

author	=	{Hans P. Zima},
title	=	{Workshop on High-Productivity Programming Languages and Models},
year	=	2004,
note	=	{Report of the workshop},
annote	=	{Section: Problem-Solving Environments and Domain-Specific Languages}

}

@Misc{jms04:grid,

author	=	{Jennifer M. Schopf},
title	=	{Grid Performance Workshop 2004 Report},
year	=	2004}

@Misc{mat04:report,

title	=	{International Workshop on Advanced Computational Materials Science: Application to Fusion and Generation-{IV} Fission Reactors},
year	=	2004,
note	=	{Also ORNL/TM-2004/132}

}

% 2005

@Misc{GPW05-Report,

author	=	{Jennifer M. Schopf and William Gropp and Stephen Jarvis and Mark Leese and Brian Tierney},
title	=	{Report fo the International Grid Performance Workshop 2005},
month	=	{JUL},
year	=	2005,
annote	=	{Report of the Workshop, supported by the National e-Science Center (Edinburgh), the US NSF, and JISC. Author list is the steering committee for the workshop; Schopf is listed first as she was the steering committee chair. Also ANL/MCS-TM-288},
url	=	{https://www.dcs.warwick.ac.uk/~saj/papers/IGPW2005.pdf}

}

% 2006

@Misc{nes06,

author	=	{Phillip Finck and David Keyes and Rick Stevens},
title	=	{Workshop on Simulation and Modeling for Advanced Nuclear Energy Systems},
month	=	{August},
year	=	2006,
note	=	{CoAuthored Section 3.4, Software Tools and Environments, with Robert Armstrong. Available as \url{www.mcs.anl.gov/anes/SMANES/gnep06-final.pdf}.},

}

% 2007

% 2008

@Booklet{applmath08,

title	=	{{Applied Mathematics at the U.S. Department of Energy: Past, Present and a View to the Future}},
author	=	{David Brown and John Bell and Donald Estep and William Gropp and Bruce Hendrickson and Sallie Keller-McNulty and David Keyes and J. Tinsley Oden and Linda Petzold and Margaret Wright},
month	=	{May},
year	=	2008,
note	=	{Ed. by David Brown}} % 2010

@Booklet{nsf-soft10,

title	=	{{NSF-ACCI} Task Force on Software for Science and Engineering},
author	=	{David Keyes and Valerie Taylor},
month	=	DEC,
year	=	2010} % 2011 % 2012 % 2014

@Book{NAP18972,

author	=	"{National Research Council}",
title	=	"Future Directions for {NSF} Advanced Computing Infrastructure to Support {U.S.} Science and Engineering in 2017--2020: {I}nterim Report",
isbn	=	"978-0-309-31379-7",
abstract	=	"Advanced computing capabilities are used to tackle a rapidly growing range of challenging science and engineering problems, many of which are compute- and data-intensive as well. Demand for advanced computing has been growing for all types and capabilities of systems, from large numbers of single commodity nodes to jobs requiring thousands of cores; for systems with fast interconnects; for systems with excellent data handling and management; and for an increasingly diverse set of applications that includes data analytics as well as modeling and simulation. Since the advent of its supercomputing centers, the National Science Foundation (NSF) has provided its researchers with state-of-the-art computing systems. The growth of new models of computing, including cloud computing and publically available by privately held data repositories, opens up new possibilities for NSF. In order to better understand the expanding and diverse requirements of the science and engineering community and the importance of a new broader range of advanced computing infrastructure, the NSF requested that the National Research Council carry out a study examining anticipated priorities and associated tradeoffs for advanced computing. This interim report identifies key issues and discusses potential options. Future Directions for NSF Advanced Computing Infrastructure to Support U.S. Science and Engineering in 2017-2020 examines priorities and associated tradeoffs for advanced computing in support of NSF-sponsored science and engineering research. This report is an initial compilation of issues to be considered as future NSF strategy, budgets, and programs for advanced computing are developed. Included in the report are questions on which the authoring committee invites comment. We invite your feedback on this report, and more generally, your comments on the future of advanced computing at NSF.",
url	=	"http://www.nap.edu/catalog/18972/future-directions-for-nsf-advanced-computing-infrastructure-to-support-us-science-and-engineering-in-2017-2020",
year	=	2014,
publisher	=	"The National Academies Press",
address	=	"Washington, DC",

}

% 2015

%2016
@Book{NAP21886,

author	=	"{National Academies of Sciences, Engineering, and Medicine}",
title	=	"Future Directions for {NSF} Advanced Computing Infrastructure to Support U.S. Science and Engineering in 2017--2020",
isbn	=	"978-0-309-38961-7",
doi	=	"10.17226/21886",
abstract	=	"Advanced computing capabilities are used to tackle a rapidly growing range of challenging science and engineering problems, many of which are compute- and data-intensive as well. Demand for advanced computing has been growing for all types and capabilities of systems, from large numbers of single commodity nodes to jobs requiring thousands of cores; for systems with fast interconnects; for systems with excellent data handling and management; and for an increasingly diverse set of applications that includes data analytics as well as modeling and simulation. Since the advent of its supercomputing centers, the National Science Foundation (NSF) has provided its researchers with state-of-the-art computing systems. The growth of new models of computing, including cloud computing and publically available by privately held data repositories, opens up new possibilities for NSF. \n\nIn order to better understand the expanding and diverse requirements of the science and engineering community and the importance of a new broader range of advanced computing infrastructure, the NSF requested that the National Research Council carry out a study examining anticipated priorities and associated tradeoffs for advanced computing. Future Directions for NSF Advanced Computing Infrastructure to Support U.S. Science and Engineering in 2017-2020 provides a framework for future decision-making about NSF's advanced computing strategy and programs. It offers recommendations aimed at achieving four broad goals: (1) position the U.S. for continued leadership in science and engineering, (2) ensure that resources meet community needs, (3) aid the scientific community in keeping up with the revolution in computing, and (4) sustain the infrastructure for advanced computing.",
url	=	"http://www.nap.edu/catalog/21886/future-directions-for-nsf-advanced-computing-infrastructure-to-support-us-science-and-engineering-in-2017-2020",
year	=	2016,
publisher	=	"The National Academies Press",
address	=	"Washington, DC"

}

% 2018
@Book{NAP25199,

author	=	"{National Academies of Sciences, Engineering, and Medicine}",
title	=	"Opportunities from the Integration of Simulation Science and Data Science: Proceedings of a Workshop",
doi	=	"10.17226/25199",
abstract	=	"Convergence has been a key topic of discussion about the future of cyberinfrastructure for science and engineering research. Convergence refers both to the combined use of simulation and data-centric techniques in science and engineering research and the possibilities for a single type of cyberinfrastructure to support both techniques. The National Academies of Science, Engineering, and Medicine convened a Workshop on Converging Simulation and Data-Driven Science on May 10, 2018, in Washington, D.C. The workshop featured speakers from universities, national laboratories, technology companies, and federal agencies who addressed the potential benefits and limitations of convergence as they relate to scientific needs, technological capabilities, funding structures, and system design requirements. This publication summarizes the presentations and discussions from the workshop.",
url	=	"https://www.nap.edu/catalog/25199/opportunities-from-the-integration-of-simulation-science-and-data-science",
year	=	2018,
publisher	=	"The National Academies Press",
address	=	"Washington, DC"

}

@Book{NAP26042,

author	=	"{National Academies of Sciences, Engineering, and Medicine}",
title	=	"Next Generation Earth Systems Science at the National Science Foundation",
isbn	=	"978-0-309-26323-8",
doi	=	"10.17226/26042",
abstract	=	"The National Science Foundation (NSF) has played a key role over the past several decades in advancing understanding of Earth's systems by funding research on atmospheric, ocean, hydrologic, geologic, polar, ecosystem, social, and engineering-related processes. Today, however, those systems are being driven like never before by human technologies and activities. Our understanding has struggled to keep pace with the rapidity and magnitude of human-driven changes, their impacts on human and ecosystem sustainability and resilience, and the effectiveness of different pathways to address those challenges.\nGiven the urgency of understanding human-driven changes, NSF will need to sustain and expand its efforts to achieve greater impact. The time is ripe to create a next-generation Earth systems science initiative that emphasizes research on complex interconnections and feedbacks between natural and social processes. This will require NSF to place an increased emphasis on research inspired by real-world problems while maintaining their strong legacy of curiosity driven research across many disciplines --- as well as enhance the participation of social, engineering, and data scientists, and strengthen efforts to include diverse perspectives in research.",
url	=	"https://www.nap.edu/catalog/26042/next-generation-earth-systems-science-at-the-national-science-foundation",
year	=	2021,
publisher	=	"The National Academies Press",
address	=	"Washington, DC"

}
% 2020
@Misc{gropp2020infrastructure,

title	=	{Infrastructure for Artificial Intelligence, Quantum and High Performance Computing},
author	=	{William Gropp and Sujata Banerjee and Ian Foster},
year	=	{2020},
eprint	=	{2012.09303},
archivePrefix	=	{arXiv},
primaryClass	=	{cs.CY}

}

% 2021
@Misc{conte2021advancing,

title	=	{Advancing Computing's Foundation of {US} Industry \& Society},
author	=	{Thomas M. Conte and Ian T. Foster and William Gropp and Mark D. Hill},
year	=	{2021},
eprint	=	{2101.01284},
archivePrefix	=	{arXiv},
primaryClass	=	{cs.CY}

}

@Misc{foster2021national,

title	=	{A National Discovery Cloud: Preparing the {US} for Global Competitiveness in the New Era of 21st Century Digital Transformation},
author	=	{Ian Foster and Daniel Lopresti and Bill Gropp and Mark D. Hill and Katie Schuman},
year	=	{2021},
eprint	=	{2104.06953},
archivePrefix	=	{arXiv},
primaryClass	=	{cs.CY}

}

@Misc{bradley2021pandemic,

title	=	{Pandemic Informatics: Preparation, Robustness, and Resilience; Vaccine Distribution, Logistics, and Prioritization; and Variants of Concern},
author	=	{Elizabeth Bradley and Madhav Marathe and Melanie Moses and William D. Gropp and Daniel Lopresti},
year	=	{2021},
eprint	=	{2012.09300},
archivePrefix	=	{arXiv},
primaryClass	=	{cs.CY},
note	=	{Updated to include additional topics.}

}

@Misc{cfd2030-July2021,

author	=	{Andrew Cary and John Chawner and Earl Duque and William Gropp and William Kleb and Ray Kolanay and Eric Nielsen and Brian Smith},
title	=	{{CFD} Vision 2030 - Roadmap Updates},
year	=	2021,
url	=	"http://www.cfd2030.com/report",
doi	=	"https://arc.aiaa.org/doi/10.2514/6.2021-2726"

}
%
% Section: Articles
%
%1980
@Article{Gropp:1980:TMM,

author	=	"William D. Gropp",
title	=	"A test of moving mesh refinement for $2$-{D} scalar hyperbolic problems",
journal	=	"SIAM Journal on Scientific and Statistical Computing",
volume	=	"1",
number	=	"2",
pages	=	"191--197",
month	=	jun,
year	=	"1980",
coden	=	"SIJCD4",
ISSN	=	"0196-5204",
area	=	"R",
areaseq	=	"0",

}
%1981
%1982
%1983
%1984
%1985
%1986

%1987
@Article{Gropp:1987:SPL,

author	=	"William D. Gropp",
title	=	"Solving {PDE}s on loosely-coupled parallel processors",
journal	=	"Parallel Computing",
volume	=	"5",
number	=	"1-2",
pages	=	"165--173",
month	=	jul,
year	=	"1987",
coden	=	"PACOEJ",
ISSN	=	"0167-8191",
note	=	"Proceedings of the international conference on vector and parallel computing---issues in applied research and development (Loen, 1986)",
classification	=	"B0290F (Interpolation and function approximation); B0290P (Differential equations); C4130 (Interpolation and function approximation); C4170 (Differential equations); C4240 (Programming and algorithm theory); C5440 (Multiprocessor systems and techniques)",
conflocation	=	"Loen, Norway; 2-6 June 1986",
conftitle	=	"International Conference on Vector and Parallel Computing --- Issues in Applied Research and Development",
corpsource	=	"Dept. of Comput. Studies, Yale Univ., New Haven, CT, USA",
keywords	=	"communication requirements; computational complexity; data; highly-ordered structure; iterative methods; loosely-coupled parallel; memory access; message; parallel; partial differential equations; passing interprocessor communication mechanism; processing; processors",
sponsororg	=	"IBM Norway; R. Norwegian Council Sci. and Ind. Res.; AMDAHL; FPS; CRAY",
treatment	=	"P Practical; T Theoretical or Mathematical",
area	=	"Par",
areaseq	=	"0",

}

@Article{Keyes:1987:CDD,

author	=	"David E. Keyes and William D. Gropp",
title	=	"A comparison of domain decomposition techniques for elliptic partial differential equations and their parallel implementation",
journal	=	"SIAM Journal on Scientific and Statistical Computing",
volume	=	"8",
number	=	"2",
pages	=	"S166--S202",
month	=	mar,
year	=	"1987",
coden	=	"SIJCD4",
ISSN	=	"0196-5204",
note	=	"Reprinted in Selected Papers from the Second Conference on Parallel Processing for Scientific Computing (C. W. Gear \& R. G. Voigt, eds., SIAM, 1987)",
classification	=	"C4170 (Differential equations)",
conflocation	=	"Norfolk, VA, USA; 18-21 Nov. 1985",
conftitle	=	"Second Conference on Parallel Processing for Scientific Computing",
corpsource	=	"Res. Center for Sci. Comput., Yale Univ., New Haven, CT, USA",
keywords	=	"2D self-adjoint elliptic PDEs; domain decomposition techniques; elliptic partial differential equations; fast Poisson solvers; Intel Hypercube; interfacial equations; iterative techniques; parallel algorithms; partial differential equations; preconditioned conjugate gradient",
sponsororg	=	"SIAM; NSF; Air Force Office Sci. Res",
treatment	=	"T Theoretical or Mathematical",
area	=	"Par:D",

}

@Article{Gropp:1987:LUM,

author	=	"William D. Gropp",
title	=	"Local uniform mesh refinement with moving grids",
journal	=	"SIAM Journal on Scientific and Statistical Computing",
volume	=	"8",
number	=	"3",
pages	=	"292--304",
month	=	may,
year	=	"1987",
coden	=	"SIJCD4",
ISSN	=	"0196-5204",
classification	=	"B0290P (Differential equations); C4170 (Differential equations)",
corpsource	=	"Dept. of Comput. Sci., Yale Univ., New Haven, CT, USA",
keywords	=	"2-D scalar problems; hyperbolic partial differential equations; local uniform mesh refinement; moving grids; numerical dispersion; partial differential equations",
treatment	=	"T Theoretical or Mathematical",
area	=	"D",
areaseq	=	"0",

}

%1988
% Journal also known as Computers and Mathematics (short title)
@Article{Gropp88c,

author	=	"W. Gropp",
title	=	"Local Uniform Mesh Refinement on Loosely-Coupled Parallel Processors",
journal	=	"I. J. Comp. Math. Appl.",
volume	=	"15",
pages	=	"375--389",
year	=	"1988",
area	=	"R:Par",
areaseq	=	"0",

}

@Article{Gropp:1988:CPI,

author	=	"William D. Gropp and David E. Keyes",
title	=	"Complexity of parallel implementation of domain decomposition techniques for elliptic partial differential equations",
journal	=	"SIAM Journal on Scientific and Statistical Computing",
volume	=	"9",
number	=	"2",
pages	=	"312--326",
month	=	mar,
year	=	"1988",
coden	=	"SIJCD4",
ISSN	=	"0196-5204",
classification	=	"C4170 (Differential equations); C4240 (Programming and algorithm theory); C5220 (Computer architecture)",
corpsource	=	"Res. Center for Sci. Comput., Yale Univ., New Haven, CT, USA",
keywords	=	"communication startups; complexity; computational complexity; domain decomposition; elliptic partial differential equations; message-passing parallel computers; parallel algorithms; parallel architectures; partial differential equations; preconditioned conjugate gradient",
treatment	=	"T Theoretical or Mathematical",
area	=	"D:Par",
areaseq	=	"0",

}

%1989
@Article{Gropp:1989:RMR,

author	=	"William D. Gropp and I. C. F. Ipsen",
key	=	"GroppIpsen88b",
title	=	"Recursive Mesh Refinement on Hypercubes",
journal	=	"Nordisk Tidskr. Informationsbehandling (BIT)",
volume	=	"29",
pages	=	"186--211",
year	=	"1989",
area	=	"R:Par",
areaseq	=	"0",

}

@Article{Gropp:1989:DDP,

author	=	"William D. Gropp and David E. Keyes",
title	=	"Domain Decomposition on Parallel Computers",
journal	=	"Impact Comput. Sci. Eng.",
volume	=	"1",
pages	=	"421--439",
year	=	"1989",
area	=	"D:Par",
areaseq	=	"0",

}

%1990
@Article{Keyes:1990:DDT,

author	=	"David E. Keyes and William D. Gropp",
title	=	"Domain decomposition techniques for the parallel solution of nonsymmetric systems of elliptic boundary value problems",
journal	=	"Applied Numerical Mathematics: Transactions of IMACS",
volume	=	"6",
number	=	"4",
pages	=	"281--301",
month	=	may,
year	=	"1990",
coden	=	"ANMAEL",
ISSN	=	"0168-9274",
area	=	"Par:D",
areaseq	=	"0",

}

@Article{Berryman:1990:KMP,

author	=	"H. Berryman and J. Saltz and W. Gropp and R. Mirchandaney",
title	=	"{Krylov} Methods Preconditioned with Incompletely Factored Matrices on the {CM-2}",
journal	=	"Journal of Parallel and Distributed Computing",
volume	=	"8",
number	=	"2",
pages	=	"186--190",
month	=	feb,
year	=	"1990",
coden	=	"JPDCER",
ISSN	=	"0743-7315",
classification	=	"C4130 (Interpolation and function approximation); C7310 (Mathematics)",
corpsource	=	"Dept. of Comput. Sci., Yale Univ., New Haven, CT, USA",
keywords	=	"CM-2; incompletely factored matrices; iterative methods; Krylov methods; mathematics computing; parallel; performance; preconditioned Krylov space iterative linear; processing; sparse triangular solver; system solver; work load generator",
treatment	=	"A Application; P Practical",
area	=	"Par",
areaseq	=	"0",

}

@Article{greengardgropp90,

author	=	{Leslie Greengard and William D. Gropp},
title	=	{A Parallel Version of the Fast Multipole Method},
journal	=	{Computers and Mathematics with Applications},
year	=	1990,
volume	=	20,
pages	=	{63--71},
area	=	"A:Par:FMM",
areaseq	=	"0",

}

@Article{groppsmith90,

author	=	{William D. Gropp and Edward Smith},
title	=	{Computational Fluid Dynamics on Parallel Processors},
journal	=	{Computers and Fluids},
year	=	1990,
volume	=	18,
pages	=	{289--304},
area	=	"Par",
areaseq	=	"0",

}

%1991
%1992
@Article{Cai:1992:CRE,

author	=	"Xiao-Chuan Cai and William D. Gropp and David E. Keyes",
title	=	"Convergence rate estimate for a domain decomposition method",
journal	=	"Numerische Mathematik",
volume	=	"61",
number	=	"2",
pages	=	"153--169",
year	=	"1992",
coden	=	"NUMMA7",
ISSN	=	"0029-599X",
classification	=	"B0290T (Finite element analysis); C4185 (Finite element analysis)",
corpsource	=	"Dept. of Math., Kentucky Univ., Lexington, KY, USA",
keywords	=	"algebraic equations; convergence of numerical methods; convergence rate analysis; convergence rate estimate; Dirichlet boundary conditions; domain decomposition method; finite element analysis; finite element discretization; indefinite elliptic problems; mesh size; preconditioned GMRES method; substructures",
treatment	=	"T Theoretical or Mathematical",
area	=	"D",
areaseq	=	"0",

}

@Article{WDGropp_DEKeyes_1992a,

author	=	"W. D. Gropp and D. E. Keyes",
title	=	"Domain decomposition with local mesh refinement",
journal	=	"SIAM J. Sci. Stat. Comput.",
volume	=	"13",
year	=	"1992",
pages	=	"967--993",
area	=	"D:R",
areaseq	=	"0",

}

@Article{siamssc-92/128:gwd,

author	=	"W. D. Gropp and D. E. Keyes",
title	=	"Parallel Performance of Domain-Decomposed Preconditioned {Krylov} Methods for {PDEs} with Locally Uniform Refinement",
journal	=	"{SIAM} Journal on Scientific and Statistical Computing",
volume	=	"13",
pages	=	"128--145",
year	=	"1992",
area	=	"D:Par",
areaseq	=	"0",

}

@Article{WDGropp_DEKeyes_1992c,

author	=	"W. D. Gropp and D. E. Keyes",
title	=	"Domain decomposition methods in computational fluid dynamics",
journal	=	"Int. J. Numer. Meth. Fluids",
volume	=	"14",
year	=	"1992",
pages	=	"147--165",
area	=	"D",
areaseq	=	"0",

}

@Article{FGS,

author	=	"I. Foster and W. Gropp and R. Stevens",
title	=	"The Parallel Scalability of the Spectral Transform Method",
journal	=	"Monthly Weather Review",
volume	=	"120",
number	=	"5",
pages	=	"835--850",
year	=	"1992",
area	=	"A:Par:Perf",
areaseq	=	"0",

}

%1993

@InProceedings{gropp93:parallel,

author	=	{William Gropp},
title	=	{Parallel Programming Tools for Distributed Memory Computers},
booktitle	=	{High Performance Computing: Grand Challenges in Computer Simulation},
pages	=	{166--169},
year	=	1993,
editor	=	{Adrian Tentner},
organization	=	{The Society for Computer Simulation},
area	=	"Par:S",

}

%1994
@Article{Cai:1994:CSD,

author	=	"Xiao-Chuan Cai and William D. Gropp and David E. Keyes",
title	=	"A Comparison of Some Domain Decomposition and {$ILU$} Preconditioned Iterative Methods for Nonsymmetric Elliptic Problems",
journal	=	"Numerical linear algebra with applications",
volume	=	"1",
number	=	"5",
pages	=	"477--504",
year	=	"1994",
coden	=	"NLAAEM",
ISSN	=	"1070-5325",
bibdate	=	"Thu Jan 23 18:22:28 MST 1997",
acknowledgement	=	ack-nhfb,
area	=	"D",
areaseq	=	"0",

}

@Article{mpi-1-standard,

author	=	{Message Passing Interface Forum},
title	=	{{MPI}: A Message Passing Interface Standard},
journal	=	{International Journal of Supercomputer Applications},
year	=	1994,
volume	=	8,
number	=	{3/4},
pages	=	{159--416},
area	=	"Par:M",
areaseq	=	"0",

}

%1995
@Article{forsman95,

author	=	{K. Forsman and W. Gropp and L. Kettunen and D. Levine and J. Salonen},
title	=	{Solution of dense systems of linear equations arising from integral equation formulations},
journal	=	{IEEE Antennas and Propagation Magazine},
year	=	1995,
month	=	dec,
pages	=	{96--100},
area	=	"App:Par:P",
areaseq	=	"0",

}

@Article{Gropp:1995:EIS,

author	=	"W. D. Gropp and E. Lusk",
title	=	"Experiences with the {IBM SP1}",
journal	=	"IBM Systems Journal",
volume	=	"34",
number	=	"2",
pages	=	"249--262",
year	=	"1995",
coden	=	"IBMSA7",
ISSN	=	"0018-8670",
affiliation	=	"Div. of Math. and Comput. Sci., Argonne Nat. Lab., IL, USA",
classification	=	"C5440 (Multiprocessing systems); C5220P (Parallel architecture); C6110P (Parallel programming); C6150N (Distributed systems software)",
keywords	=	"High performance; IBM parallel processing computers; IBM SP1; Message passing interface standard; Parallel supercomputer; Parallelism; Portability; Software technology; SP1 processors; SP2 communication adapters; SP2 software environment; Supercomputer system designers; System architecture",
thesaurus	=	"IBM computers; Message passing; Parallel architectures; Parallel machines; Parallel programming",
area	=	"P",
areaseq	=	"0",

}

@Article{Skjellum:1995:EAM,

author	=	"Anthony Skjellum and Ewing Lusk and William Gropp",
title	=	"Early applications in the {Message-Passing Interface} ({MPI})",
journal	=	"International Journal of Supercomputer Applications and High Performance Computing",
volume	=	"9",
number	=	"2",
pages	=	"79--94",
month	=	"Summer",
year	=	"1995",
coden	=	"IJSCFG",
ISSN	=	"1078-3482",
classification	=	"722.2; 722.3; 722.4; 902.2; C6150N (Distributed systems software)",
corpsource	=	"Dept. of Comput. Sci., Mississippi State Univ., MS, USA",
journalabr	=	"Int J Supercomput Appl High Perform Comput",
keywords	=	"Application developers; application developers; Computer hardware; Data communication systems; message passing; Message passing interface (MPI); Message-Passing Interface; MPI standard; Network protocols; software engineering; software standards; Standards; Survey; survey; User interfaces",
thesaurus	=	"Message passing; Software engineering; Software standards",
treatment	=	"P Practical",
officialurl	=	"http://hpc.sagepub.com/content/9/2/79.abstract",
area	=	"Par:M",
areaseq	=	"0",

}

%1996
@Article{groppkaper96,

author	=	{William D.~Gropp and Hans Kaper and G. Leaf and D. Levine and V. Vinokur and M. Palumbo},
title	=	{Numerical Simulation of Vortex Dynamics in High-$T_c$ Superconductors},
journal	=	{J. Comp. Physics},
year	=	1996,
volume	=	123,
pages	=	{254--266},
area	=	"App:Par",
areaseq	=	"0",

}

@Article{Gropp:1996:HPI,

author	=	"W. Gropp and E. Lusk and N. Doss and A. Skjellum",
title	=	"A high-performance, portable implementation of the {MPI} message passing interface standard",
journal	=	"Parallel Computing",
volume	=	"22",
number	=	"6",
pages	=	"789--828",
month	=	sep,
year	=	"1996",
coden	=	"PACOEJ",
ISSN	=	"0167-8191",
classification	=	"C6150N (Distributed systems software); C6110B (Software engineering techniques); C6115 (Programming support); C6110P (Parallel programming)",
corpsource	=	"Div. of Math. and Comput. Sci., Argonne Nat. Lab., IL, USA",
keywords	=	"applications; design goal; distribution; environments; free; future developments; high-performance portable implementation; library writers; message passing; MPI message; MPI-2; MPICH; parallel computer vendors; parallel programming; passing interface standard; portable parallel programming environment; programming; project management; software libraries; software performance evaluation; software portability; software standards; software tools; specialists; specification; standard library",
treatment	=	"P Practical",
area	=	"Par:M",
areaseq	=	"0",

}

%% ??? should this be inproceedings?
%% pages = "24--??",
@Article{Thakur:1996:EEP,

author	=	"R. Thakur and W. Gropp and E. Lusk",
title	=	"An Experimental Evaluation of the Parallel {I/O} Systems of the {IBM SP} and {Intel Paragon} Using a Production Application",
journal	=	"Lecture Notes in Computer Science",
volume	=	"1127",
year	=	"1996",
coden	=	"LNCSD9",
ISSN	=	"0302-9743",
area	=	"Par:I",
areaseq	=	"0",

}

@Article{smithgropp96,

author	=	{Barry Smith and William Gropp},
title	=	{The Design of Data-Structure-Neutral Libraries for the Iterative Solution of Sparse Linear Systems},
journal	=	{Scientific Programming},
year	=	1996,
volume	=	5,
pages	=	{329--336},
area	=	"NS:P",
areaseq	=	"0",

}

%1997
@Article{Gropp:1997:HPM,

author	=	"W. Gropp and E. Lusk",
title	=	"A high-performance {MPI} implementation on a shared-memory vector supercomputer",
journal	=	"Parallel Computing",
volume	=	"22",
number	=	"11",
pages	=	"1513--1526",
month	=	jan,
year	=	"1997",
coden	=	"PACOEJ",
ISSN	=	"0167-8191",
classification	=	"C6150N (Distributed systems software); C5220P (Parallel architecture); C5610N (Network interfaces)",
corpsource	=	"Div. of Math. and Comput. Sci., Argonne Nat. Lab., IL, USA",
keywords	=	"message passing; Message-Passing Interface; MPI implementation; MPIC; NEC SX-4; network interfaces; parallel; shared memory systems; shared-memory programming; shared-memory vector supercomputer; standards; supercomputer",
treatment	=	"P Practical",
area	=	"Par:M",
areaseq	=	"0",

}

@Article{Gropp:1997:SMC,

author	=	"W. Gropp and E. Lusk",
title	=	"Sowing {MPICH}: {A} Case Study in the Dissemination of a Portable Environment for Parallel Scientific Computing",
journal	=	"The International Journal of Supercomputer Applications and High Performance Computing",
volume	=	"11",
number	=	"2",
pages	=	"103--114",
month	=	"Summer",
year	=	"1997",
coden	=	"IJSCFG",
ISSN	=	"1078-3482",
officialurl	=	"http://hpc.sagepub.com/content/11/2/103.abstract",
area	=	"Par:M:S",
areaseq	=	"0",

}

%1998
@Article{mpi-2-standard,

author	=	{{Message Passing Interface Forum}},
title	=	{{MPI2}: A Message Passing Interface Standard},
journal	=	{High Performance Computing Applications},
year	=	1998,
volume	=	12,
number	=	{1--2},
pages	=	{1--299},
area	=	"Par:M",
areaseq	=	"0",

}

@Article{thakurluskgropp98,

author	=	{Rajeev Thakur and Ewing Lusk and William Gropp},
title	=	{{I/O} in Parallel Applications: The Weakest Link},
journal	=	{The International Journal of High Performance Computer Applications},
year	=	1998,
volume	=	12,
number	=	{4, part 2},
pages	=	{389--395},
officialurl	=	"http://hpc.sagepub.com/content/12/4/389.abstract",
area	=	"Par:I",
areaseq	=	"0",

}

@Article{caigropp97,

author	=	{X-C Cai and William D. Gropp and David E. Keyes and R. G. Melvin and D. P. Young},
title	=	{Parallel {N}ewton-{K}rylov-{S}chwarz Algorithms for the Transonic Full Potential Equation},
journal	=	{SIAM Journal of Scientific Computing},
pages	=	{246--265},
volume	=	19,
month	=	JAN,
year	=	{1998},
note	=	{Also ICASE report TR 96-39.},
area	=	"Par:D",
areaseq	=	"0",

}

% @Comment This article never appeared as fas as I can tell.
% @Comment Article{gropplusk-pvmmpi98,
% @Comment author = {William Gropp and Ewing Lusk},
% @Comment title = {{PVM} and {MPI} are completely different},
% @Comment journal = {Future Generation Computer Systems},
% @Comment year = {1999},
% @Comment note = {Submitted as part of a PVMMPI special issue},
% @Comment area = "M;Par",
% @Comment areaseq = "0",
% @Comment }

@Article{mpi-nexus-pc,

author	=	"I. Foster and J. Geisler and W. Gropp and N. Karonis and E. Lusk and G. Thiruvathukal and S. Tuecke",
title	=	"{A} Wide-Area Implementation of the {M}essage {P}assing {I}nterface",
journal	=	"Parallel Computing",
volume	=	24,
month	=	NOV,
number	=	"12--13",
pages	=	"1735--1749",
year	=	1998,
area	=	"Par:M",
areaseq	=	"0",

}
%
%1999

@Article{LevGroForKet99:petsc-coral,

author	=	"David Levine and William Gropp and Kimmo Forsman and Lauri Kettunen",
title	=	{Parallel Computation of Three-dimensional Nonlinear Magnetostatic Problems},
journal	=	{Concurrency Practice and Experience},
year	=	1999,
volume	=	11,
number	=	2,
month	=	FEB,
pages	=	{109--120},
annote	=	{PETSc},
area	=	"App:Par:P",
areaseq	=	"0",

}

@Article{zaki-lusk-gropp-swider99,

author	=	{Omer Zaki and Ewing Lusk and William Gropp and Deborah Swider},
title	=	{Toward Scalable Performance Visualization with {Jumpshot}},
journal	=	{High Performance Computing Applications},
year	=	1999,
volume	=	13,
number	=	2,
month	=	{Fall},
pages	=	{277--288},
officialurl	=	"http://hpc.sagepub.com/content/13/3/277.abstract",
area	=	"P:PV",
areaseq	=	"0",

}

%2000

@Article{gkmt-nks-98,

author	=	{William Gropp and David E. Keyes and Lois C. McInnes and M. D. Tidriri},
title	=	{Globalized {N}ewton-{K}rylov-{S}chwarz Algorithms and Software for Parallel Implicit {CFD}},
journal	=	{High Performance Computing Applications},
year	=	2000,
volume	=	14,
number	=	2,
pages	=	{102--136},
officialurl	=	"http://hpc.sagepub.com/content/14/2/102.abstract",
area	=	"A:Par:P",
areaseq	=	"0",

}

%2001

% Also referenced as gkks:cfd-hiperf,
@Article{gkks:cfd-hiperf-art,

author	=	{W. D. Gropp and D. K. Kaushik and D. E. Keyes and B. F. Smith},
title	=	{High Performance Parallel Implicit {CFD}},
year	=	2001,
journal	=	"Parallel Computing",
volume	=	"27",
number	=	"4",
pages	=	"337--362",
area	=	"Par:NS:P",
areaseq	=	"0",

}

@Article{bgl00:mpd,

author	=	{Ralph Butler and William Gropp and Ewing Lusk},
title	=	{Components and Interfaces of a Process Management System for Parallel Programs},
journal	=	{Parallel Computing},
month	=	OCT,
year	=	2001,
volume	=	27,
number	=	11,
pages	=	{1417--1429},
area	=	"Par:M:PMI",
areaseq	=	"0",

}

% 2002

@Article{tgl02:mpiio,

author	=	{Rajeev Thakur and William Gropp and Ewing Lusk},
title	=	{Optimizing Noncontiguous Accesses in {MPI-IO}},
journal	=	{Parallel Computing},
year	=	2002,
volume	=	28,
number	=	1,
pages	=	{83--105},
month	=	JAN,
area	=	"Par:M:I",
areaseq	=	"0",

}

@Article{bak03:cluster01,

author	=	{Mark Baker and Daniel Katz and William Gropp and Thomas Sterling},
title	=	{Special Issue: Cluster 2001},
journal	=	{Concurrency and Computation: Practice and Experience},
year	=	2003,
volume	=	15,
number	=	{7--8},
pages	=	{623--624},
area	=	"Par",

}

% 2003
% 2004

@Article{gro04:mpi,

author	=	{William D. Gropp and Ewing Lusk},
title	=	{Fault Tolerance in {MPI} Programs},
journal	=	{International Journal of High Performance Computer Applications},
year	=	2004,
volume	=	18,
number	=	3,
pages	=	{363--372},
officialurl	=	"http://hpc.sagepub.com/content/18/3/363.abstract",
area	=	"Par:M",

}

@Article{DBLP:journals/ijhpcn/ChingCLRG04,

author	=	{Avery Ching and Alok N. Choudhary and Wei{-}keng Liao and Robert B. Ross and William Gropp},
title	=	{Evaluating structured {I/O} methods for parallel file systems},
journal	=	{International Journal of High Performance Computing and Networking},
volume	=	{2},
number	=	{2/3/4},
pages	=	{133--145},
year	=	{2004},
url	=	{http://dx.doi.org/10.1504/IJHPCN.2004.008898},
doi	=	{10.1504/IJHPCN.2004.008898},
timestamp	=	{Mon, 18 Feb 2008 22:18:59 +0100},
biburl	=	{http://dblp.uni-trier.de/rec/bib/journals/ijhpcn/ChingCLRG04},
bibsource	=	{dblp computer science bibliography, http://dblp.org}

}

% 2005
@Article{thak05:mpi-impl:coll,

author	=	{Rajeev Thakur and Rolf Rabenseifner and William Gropp},
title	=	{Optimization of Collective Communication Operations in {MPICH}},
journal	=	{International Journal of High Performance Computer Applications},
year	=	2005,
volume	=	19,
number	=	1,
pages	=	{49--66},
officialurl	=	"http://hpc.sagepub.com/content/19/1/49.abstract",
area	=	"Par:M:Coll",

}

@Article{alma05:mpi-impl:bgl,

author	=	{George Alm\'asi and Charles Archer and Jose G. Casta\~nos and J. A. Gunnels and C. Chris Erway and Philip Heidelberger and Xavier Martorell and Jose E. Moreira and Kurt Pinnow and Joe Ratterman and Burkhard Steinmacher-Burow and William Gropp and Brian Toonen},
title	=	{Design and Implementation of Message-Passing Services for the {Blue Gene/L} Supercomputer},
journal	=	{{IBM} Journal of Research and Development},
year	=	2005,
volume	=	49,
number	=	{2/3},
pages	=	{393--406},
month	=	{March/May},
note	=	{Available at \url{http://www.research.ibm.com/journal/rd49-23.html}},
area	=	"Par:M",

}

@Article{thak05:mpi-impl:rma,

author	=	{Rajeev Thakur and William Gropp and Brian Toonen},
title	=	{Optimizing the Synchronization Operations in {MPI} One-Sided Communication},
journal	=	{High Performance Computing Applications},
year	=	2005,
volume	=	19,
number	=	2,
pages	=	{119--128},
officialurl	=	"http://hpc.sagepub.com/content/19/2/119.abstract",
area	=	"Par:M:RMA",

}

% 2007

@Article{falz07:mpi-debug,

author	=	{Christopher Falzone and Anthony Chan and Ewing Lusk and William Gropp},
title	=	{A Portable Method for Finding User Errors in the Usage of {MPI} Collective Operations},
journal	=	{International Journal of High Performance Computing Applications},
year	=	2007,
volume	=	21,
number	=	2,
pages	=	{155--165},
officialurl	=	"http://hpc.sagepub.com/content/21/2/155.abstract",
area	=	"Par:M",

}

@Article{shen:accel,

author	=	{Baifei Shen and Yuelin Li and Karoly Nemeth and Hairong Shang and Yong-chul Chae and Robert Soliday and Robert Crowell and Edward Frank and William Gropp and John Cary},
title	=	{Electron Injection by a Nanowire in the Bubble Regime},
journal	=	{Physics of Plasmas},
year	=	2007,
volume	=	14,
area	=	"App:A",

}

%% 2008
@Article{chan08:slog2,

author	=	{Anthony Chan and William Gropp and Ewing Lusk},
title	=	{An Efficient Format for Nearly Constant-Time Access to Arbitrary Time Intervals in Large Trace Files},
journal	=	{Scientific Programming},
year	=	2008,
volume	=	16,
number	=	2,
pages	=	{155--165},
area	=	"Par:PV",

}

%% 2009
@Article{thakur09:MPIthreads,

author	=	{Rajeev Thakur and William Gropp},
title	=	{Test Suite for Evaluating Performance of multithreaded {MPI} communication},
journal	=	{Parallel Computing},
year	=	2009,
volume	=	35,
pages	=	{608--617},
area	=	"Par:M:Thread",

}

@Article{FranckCappello11012009,

author	=	{Cappello, Franck and Geist, Al and Gropp, Bill and Kale, Laxmikant and Kramer, Bill and Snir, Marc},
title	=	{Toward Exascale Resilience},
journal	=	{International Journal of High Performance Computing Applications},
volume	=	23,
number	=	4,
pages	=	{374--388},
doi	=	{10.1177/1094342009347767},
year	=	2009,
abstract	=	{Over the past few years resilience has became a major issue for high-performance computing (HPC) systems, in particular in the perspective of large petascale systems and future exascale systems. These systems will typically gather from half a million to several millions of central processing unit (CPU) cores running up to a billion threads. From the current knowledge and observations of existing large systems, it is anticipated that exascale systems will experience various kind of faults many times per day. It is also anticipated that the current approach for resilience, which relies on automatic or application level checkpoint/ restart, will not work because the time for checkpointing and restarting will exceed the mean time to failure of a full system. This set of projections leaves the community of fault tolerance for HPC systems with a difficult challenge: finding new approaches, which are possibly radically disruptive, to run applications until their normal termination, despite the essentially unstable nature of exascale systems. Yet, the community has only five to six years to solve the problem. This white paper synthesizes the motivations, observations and research issues considered as determinant of several complimentary experts of HPC in applications, programming models, distributed systems and system management. },
URL	=	{http://hpc.sagepub.com/cgi/content/abstract/23/4/374},
eprint	=	{http://hpc.sagepub.com/cgi/reprint/23/4/374.pdf},
officialurl	=	"http://hpc.sagepub.com/content/23/4/374.abstract",
area	=	"Par",

}

@Article{WilliamGropp11012009,

author	=	{Gropp, William and Snir, Marc},
title	=	{On the Need for a Consortium of Capability Centers},
journal	=	{International Journal of High Performance Computing Applications},
volume	=	{23},
number	=	{4},
pages	=	{413--420},
doi	=	{10.1177/1094342009347706},
year	=	{2009},
abstract	=	{Users of high-performance computing systems face many challenges, particularly as they design and develop their software to run at multiple facilities. This can lead to a "greatest common denominator" strategy that slows innovation and the adoption of newer techniques. In addition, these systems typically push the limits -- leading to problems with reliability and functionality of the system and software. We propose a consortium of HPC centers to collaborate on raising both the capability and the quality of the software for HPC systems. },
URL	=	{http://hpc.sagepub.com/cgi/content/abstract/23/4/413},
eprint	=	{http://hpc.sagepub.com/cgi/reprint/23/4/413.pdf},
officialurl	=	"http://hpc.sagepub.com/content/23/4/413.abstract",
area	=	"Par"

}

@Article{DBLP:journals/ife/BalajiCTGL09,

author	=	{Pavan Balaji and Anthony Chan and Rajeev Thakur and William Gropp and Ewing L. Lusk},
title	=	{Toward message passing for a million processes: characterizing {MPI} on a massive scale {Blue Gene/P}},
journal	=	{Computer Science - R{\&}D},
volume	=	{24},
number	=	{1-2},
year	=	{2009},
pages	=	{11--19},
ee	=	{http://dx.doi.org/10.1007/s00450-009-0095-3},
bibsource	=	{DBLP, http://dblp.uni-trier.de},
area	=	"Par:M",

}

@Article{1608633,

author	=	{Gropp, William D.},
title	=	{Software for Petascale Computing Systems},
journal	=	{IEEE Computing in Science and Engineering},
volume	=	{11},
number	=	{5},
year	=	{2009},
issn	=	{0740-7475},
pages	=	{17--21},
doi	=	{http://dx.doi.org/10.1109/MCSE.2009.148},
publisher	=	{IEEE Computer Society Press},
address	=	{Los Alamitos, CA, USA},
area	=	"Par",

}

@Article{traff2010,

author	=	{Jesper Larsson Tr\"aff and William D. Gropp and Rajeev Thakur},
title	=	{Self-Consistent {MPI} Performance Guidelines},
journal	=	{IEEE Transactions on Parallel and Distributed Systems},
volume	=	{21},
number	=	{5},
year	=	{2009},
pages	=	{698--709},
area	=	"Par:M",

}

%% 2010
@Article{gopal10,

author	=	{Salman Pervez and Ganesh Gopalakrishnan and Robert M. Kirby and Rajeev Thakur and William Gropp},
title	=	{Formal methods applied to high-performance computing software design: a case study of {MPI} one-sided communication-based locking},
journal	=	{Software Practice and Experience},
year	=	2010,
volume	=	40,
number	=	1,
pages	=	{23--42},
url	=	"http://onlinelibrary.wiley.com/doi/10.1002/spe.946/abstract",
area	=	"Par:M:formal",

}

@Article{PavanBalaji02012010,

author	=	{Balaji, Pavan and Buntinas, Darius and Goodell, David and Gropp, William and Thakur, Rajeev},
title	=	{Fine-Grained Multithreading Support for Hybrid Threaded {MPI} Programming},
journal	=	{International Journal of High Performance Computing Applications},
volume	=	{24},
number	=	{1},
pages	=	{49--57},
doi	=	{10.1177/1094342009360206},
year	=	{2010},
abstract	=	{As high-end computing systems continue to grow in scale, recent advances in multi- and many-core architectures have pushed such growth toward more dense architectures, that is, more processing elements per physical node, rather than more physical nodes themselves. Although a large number of scientific applications have relied so far on an MPI-everywhere model for programming high-end parallel systems; this model may not be sufficient for future machines, given their physical constraints such as decreasing amounts of memory per processing element and shared caches. As a result, application and computer scientists are exploring alternative programming models that involve using MPI between address spaces and some other threaded model, such as OpenMP, Pthreads, or Intel TBB, within an address space. Such hybrid models require efficient support from an MPI implementation for MPI messages sent from multiple threads simultaneously. In this paper, we explore the issues involved in designing such an implementation. We present four approaches to building a fully thread-safe MPI implementation, with decreasing levels of critical-section granularity (from coarse-grain locks to fine-grain locks to lock-free operations) and correspondingly increasing levels of complexity. We present performance results that demonstrate the performance implications of the different approaches. },
URL	=	{http://hpc.sagepub.com/cgi/content/abstract/24/1/49},
eprint	=	{http://hpc.sagepub.com/cgi/reprint/24/1/49.pdf},
area	=	"Par:M:Thread",
officialurl	=	"http://hpc.sagepub.com/content/24/1/49.abstract",

}

@Article{mellor2010teaching,

title	=	{Teaching parallel programming: a roundtable discussion},
author	=	{Mellor-Crummey, J. and Gropp, W. and Herlihy, M.},
journal	=	{XRDS: Crossroads, The ACM Magazine for Students},
volume	=	{17},
number	=	{1},
pages	=	{28--30},
issn	=	{1528-4972},
year	=	{2010},
publisher	=	{ACM},
area	=	"Par",

}

@Article{balaji-mpidata-10,

author	=	{Balaji, Pavan and Chan, Anthony and Gropp, William and Thakur, Rajeev and Lusk, Ewing},
title	=	{The Importance of Non-Data-Communication Overheads in {MPI}},
journal	=	{International Journal of High Performance Computing Applications},
volume	=	{24},
number	=	{1},
pages	=	{5--15},
doi	=	{10.1177/1094342009359258},
year	=	{2010},
abstract	=	{With processor speeds no longer doubling every 18--24 months owing to the exponential increase in power consumption and heat dissipation, modern high-end computing systems tend to rely less on the performance of single processing units and instead rely on achieving high performance by using the parallelism of a massive number of low-frequency/low-power processing cores. Using such low-frequency cores, however, puts a premium on end-host pre- and post-communication processing required within communication stacks, such as the Message Passing Interface (MPI) implementation. Similarly, small amounts of serialization within the communication stack that were acceptable on small/medium systems can be brutal on massively parallel systems. Thus, in this paper, we study the different non-data-communication overheads within the MPI implementation on the IBM Blue Gene/P system. Specifically, we analyze various aspects of MPI, including the MPI stack overhead itself, overhead of allocating and queueing requests, queue searches within the MPI stack, multi-request operations, and various others. Our experiments, that scale up to 131,072 cores of the largest Blue Gene/P system in the world (80\% of the total system size), reveal several insights into overheads in the MPI stack, which were not previously considered significant, but can have a substantial impact on such massive systems.},
URL	=	{http://hpc.sagepub.com/cgi/content/abstract/24/1/5},
eprint	=	{http://hpc.sagepub.com/cgi/reprint/24/1/5.pdf},
area	=	"Par:M",
officialurl	=	"http://hpc.sagepub.com/content/24/1/5.abstract",

}

@Article{JesperLarssonTraff02012010,

author	=	{Tr\"aff, Jesper Larsson and Ripke, Andreas and Siebert, Christian and Balaji, Pavan and Thakur, Rajeev and Gropp, William},
title	=	{A Pipelined Algorithm for Large, Irregular All-Gather Problems},
journal	=	{International Journal of High Performance Computing Applications},
volume	=	{24},
number	=	{1},
pages	=	{58--68},
doi	=	{10.1177/1094342009359013},
year	=	{2010},
abstract	=	{We describe and evaluate a new pipelined algorithm for large, irregular all-gather problems. In the irregular allgather problem each process in a set of processes contributes individual data of possibly different size, and all processes have to collect all data from all processes. The pipelined algorithm is useful for the implementation of the MPI_Allgatherv collective operation of the Message-Passing Interface (MPI) for large problems. By conception, the new algorithm is well suited to implementation on clustered multiprocessors, such as symmetric multiprocessing (SMP) clusters. The new algorithm has been implemented within different MPI libraries. Benchmark results on NEC SX-8, Linux clusters with InfiniBand and Gigabit Ethernet, IBM Blue Gene/P, and SiCortex systems show huge performance gains in accordance with the expected behavior. },
URL	=	{http://hpc.sagepub.com/cgi/content/abstract/24/1/58},
eprint	=	{http://hpc.sagepub.com/cgi/reprint/24/1/58.pdf},
area	=	"Par:M:Coll",
officialurl	=	"http://hpc.sagepub.com/content/24/1/58.abstract",

}

%% 2011

@Article{GuoGropp10,

author	=	{Dahai Guo and William Gropp},
title	=	{Optimizing Sparse Data Structures for Matrix-vector Multiply},
journal	=	{International Journal of High Performance Computing Applications},
year	=	2011,
volume	=	25,
number	=	1,
pages	=	{115--131},
area	=	"NS:A",
officialurl	=	"http://hpc.sagepub.com/content/25/1/115.abstract"

}

@Article{Dongarra01022011,

author	=	{Dongarra, Jack and Beckman, Pete and Moore, Terry and Aerts, Patrick and Aloisio, Giovanni and Andre, Jean-Claude and Barkai, David and Berthou, Jean-Yves and Boku, Taisuke and Braunschweig, Bertrand and Cappello, Franck and Chapman, Barbara and Chi, Xuebin and Choudhary, Alok and Dosanjh, Sudip and Dunning, Thom and Fiore, Sandro and Geist, Al and Gropp, William and Harrison, Robert and Hereld, Mark and Heroux, Michael and Hoisie, Adolfy and Hotta, Koh and Jin, Zhong and Ishikawa, Yutaka and Johnson, Fred and Kale, Sanjay and Kenway, Richard and Keyes, David and Kramer, Bill and Labarta, Jesus and Lichnewsky, Alain and Lippert, Thomas and Lucas, Bob and Maccabe, Barney and Matsuoka, Satoshi and Messina, Paul and Michielse, Peter and Mohr, Bernd and Mueller, Matthias S. and Nagel, Wolfgang E. and Nakashima, Hiroshi and Papka, Michael E and Reed, Dan and Sato, Mitsuhisa and Seidel, Ed and Shalf, John and Skinner, David and Snir, Marc and Sterling, Thomas and Stevens, Rick and Streitz, Fred and Sugar, Bob and Sumimoto, Shinji and Tang, William and Taylor, John and Thakur, Rajeev and Trefethen, Anne and Valero, Mateo and van der Steen, Aad and Vetter, Jeffrey and Williams, Peg and Wisniewski, Robert and Yelick, Kathy},
title	=	{The International Exascale Software Project Roadmap},
volume	=	{25},
number	=	{1},
pages	=	{3--60},
year	=	{2011},
doi	=	{10.1177/1094342010391989},
abstract	=	{Over the last 20 years, the open-source community has provided more and more software on which the world's high-performance computing systems depend for performance and productivity. The community has invested millions of dollars and years of effort to build key components. However, although the investments in these separate software elements have been tremendously valuable, a great deal of productivity has also been lost because of the lack of planning, coordination, and key integration of technologies necessary to make them work together smoothly and efficiently, both within individual petascale systems and between different systems. It seems clear that this completely uncoordinated development model will not provide the software needed to support the unprecedented parallelism required for peta/ exascale computation on millions of cores, or the flexibility required to exploit new hardware models and features, such as transactional memory, speculative execution, and graphics processing units. This report describes the work of the community to prepare for the challenges of exascale computing, ultimately combing their efforts in a coordinated International Exascale Software Project.},
URL	=	{http://hpc.sagepub.com/content/25/1/3.abstract},
eprint	=	{http://hpc.sagepub.com/content/25/1/3.full.pdf+html},
journal	=	{International Journal of High Performance Computing Applications},
area	=	"Par",
officialurl	=	"http://hpc.sagepub.com/content/25/1/3.abstract",

}

@Article{5725240,

author	=	{Showerman, M. and Enos, J. and Steffen, C. and Treichler, S. and Gropp, W. and Hwu, W.-m.W.},
journal	=	{Computing in Science Engineering},
title	=	{{EcoG}: A Power-Efficient {GPU} Cluster Architecture for Scientific Computing},
year	=	{2011},
month	=	{March-April} ,
volume	=	{13},
number	=	{2},
pages	=	{83--87},
keywords	=	{EcoG;power efficient GPU cluster architecture;scientific computing;computer graphic equipment;coprocessors;pattern clustering;power aware computing;},
doi	=	{10.1109/MCSE.2011.30},
ISSN	=	{1521-9615},
area	=	"Par:GPU",

}

@Article{balaji-mpi-mill-11,

author	=	{Pavan Balaji and Darius Buntinas and David Goodell and William Gropp and Torsten Hoefler and Sameer Kumar and Ewing Lusk and Rajeev Thakur and Jesper Larsson Tr\"aff},
title	=	{{MPI} on Millions of Cores},
journal	=	{Parallel Processing Letters},
year	=	2011,
volume	=	21,
number	=	1,
pages	=	{45--60},
url	=	{http://www.worldscinet.com/ppl/21/2101/S0129626411000060.html},
area	=	"Par:M",

}

@Article{DBLP:journals/cacm/GopalakrishnanKSTGLSSB11,

author	=	{Ganesh Gopalakrishnan and Robert M. Kirby and Stephen F. Siegel and Rajeev Thakur and William Gropp and Ewing L. Lusk and Bronis R. de Supinski and Martin Schulz and Greg Bronevetsky},
title	=	{Formal analysis of {MPI}-based parallel programs},
journal	=	{Commun. ACM},
volume	=	{54},
number	=	{12},
year	=	{2011},
pages	=	{82--91},
ee	=	{http://doi.acm.org/10.1145/2043174.2043194},
bibsource	=	{DBLP, http://dblp.uni-trier.de}

}

%% 2012

@Article{Gropp:2012:BAB:2160718.2160739,

author	=	{Gropp, William},
title	=	{Best algorithms + best computers = powerful match},
journal	=	{Commun. ACM},
issue_date	=	{May 2012},
volume	=	{55},
number	=	{5},
month	=	may,
year	=	{2012},
issn	=	{0001-0782},
pages	=	{100--100},
numpages	=	{1},
url	=	{http://doi.acm.org/10.1145/2160718.2160739},
doi	=	{10.1145/2160718.2160739},
acmid	=	{2160739},
publisher	=	{ACM},
address	=	{New York, NY, USA},

}

@Article{guo2013applications,

title	=	{Applications of the streamed storage format for sparse matrix operations},
author	=	{Guo, D. and Gropp, W.},
journal	=	{International Journal of High Performance Computing Applications},
year	=	{2013},
publisher	=	{SAGE Publications}

}

@Article{Keyes01022013,

author	=	{Keyes, David E and McInnes, Lois C and Woodward, Carol and Gropp, William and Myra, Eric and Pernice, Michael and Bell, John and Brown, Jed and Clo, Alain and Connors, Jeffrey and Constantinescu, Emil and Estep, Don and Evans, Kate and Farhat, Charbel and Hakim, Ammar and Hammond, Glenn and Hansen, Glen and Hill, Judith and Isaac, Tobin and Jiao, Xiangmin and Jordan, Kirk and Kaushik, Dinesh and Kaxiras, Efthimios and Koniges, Alice and Lee, Kihwan and Lott, Aaron and Lu, Qiming and Magerlein, John and Maxwell, Reed and McCourt, Michael and Mehl, Miriam and Pawlowski, Roger and Randles, Amanda P and Reynolds, Daniel and Rivi\`ere, Beatrice and R\"ude, Ulrich and Scheibe, Tim and Shadid, John and Sheehan, Brendan and Shephard, Mark and Siegel, Andrew and Smith, Barry and Tang, Xianzhu and Wilson, Cian and Wohlmuth, Barbara},
title	=	{Multiphysics simulations: Challenges and opportunities},
volume	=	{27},
number	=	{1},
pages	=	{4--83},
year	=	{2013},
doi	=	{10.1177/1094342012468181},
abstract	=	{We consider multiphysics applications from algorithmic and architectural perspectives, where ``algorithmic'' includes both mathematical analysis and computational complexity, and ``architectural'' includes both software and hardware environments. Many diverse multiphysics applications can be reduced, en route to their computational simulation, to a common algebraic coupling paradigm. Mathematical analysis of multiphysics coupling in this form is not always practical for realistic applications, but model problems representative of applications discussed herein can provide insight. A variety of software frameworks for multiphysics applications have been constructed and refined within disciplinary communities and executed on leading-edge computer systems. We examine several of these, expose some commonalities among them, and attempt to extrapolate best practices to future systems. From our study, we summarize challenges and forecast opportunities.},
URL	=	{http://hpc.sagepub.com/content/27/1/4.abstract},
eprint	=	{http://hpc.sagepub.com/content/27/1/4.full.pdf+html},
journal	=	{International Journal of High Performance Computing Applications}

}

@Article{mpi-mpi-hybrid-programming,

author	=	{T. Hoefler and J. Dinan and D. Buntinas and P. Balaji and B. Barrett and R. Brightwell and W. Gropp and V. Kale and R. Thakur},
title	=	{{MPI} + {MPI}: a new hybrid approach to parallel programming with {MPI} plus shared memory},
journal	=	{Journal of Computing},
year	=	2013,
month	=	05,
publisher	=	{Springer},
note	=	{doi: 10.1007/s00607-013-0324-2},

}

@Article{6636318,

author	=	{Gropp, W. and Snir, M.},
journal	=	{Computing in Science and Engineering},
title	=	{Programming for Exascale Computers},
year	=	{2013},
volume	=	{PP},
number	=	{99},
keywords	=	{Computational modeling;Data models;Electronics packaging;Message systems;Object oriented modeling;Programming;Synchronization},
doi	=	{10.1109/MCSE.2013.96},
ISSN	=	{1521-9615},} % 2014

@Article{Guo01022014,

author	=	{Guo, Dahai and Gropp, William},
title	=	{Applications of the streamed storage format for sparse matrix operations},
volume	=	{28},
number	=	{1},
pages	=	{3--12},
year	=	{2014},
doi	=	{10.1177/1094342012470469},
abstract	=	{The streamed storage format for sparse matrices showed good performance improvement for sparse matrix and vector multiply (SpMV) compared with compressed sparse row (CSR) and block CSR (BCSR) formats, particularly on IBM Power processors. We extend the format to exploit single instruction multiple data (SIMD) instructions in order to utilize the vector unit, and discuss how the streamed formats perform on the Power7 processor, which is the first eight-core chip from IBM. The streamed format is then applied to two more operations of sparse matrices, successive over-relaxation (SOR) iteration sweeps and incomplete lower and upper (ILU) triangular solvers. Basic solvers are developed for them in the high-performance computing (HPC) package PETSc. Test results on the IBM Power7 processor show that the SIMD instructions improve the performance of the streamed storage format on SpMV. The format also accelerates SOR iteration sweeps and ILU matrix solvers, compared with the traditional BCSR format used in PETSc.},
URL	=	{http://hpc.sagepub.com/content/28/1/3.abstract},
eprint	=	{http://hpc.sagepub.com/content/28/1/3.full.pdf+html},
journal	=	{International Journal of High Performance Computing Applications}

}

@Article{sc13-specialissue,

author	=	{William Gropp and Satoshi Matsuoka},
title	=	{Special issue: {SC13 - The International Conference for High Performance Computing, Networking, Storage and Analysis}},
journal	=	{Scientific Programming},
year	=	2014,
pages	=	"57--58",
DOI	=	{10.3233/SPR-140388},
URL	=	{http://iospress.metapress.com/content/1X71364254G23115},

}

@Article{cappello14-resilience,

author	=	{Franck Cappello and Al Geist and William Gropp and Sanjay Kale and Bill Kramer and Marc Snir},
title	=	{Toward Exascale Resilience: 2014 update},
journal	=	{Supercomputing frontiers and innovations},
year	=	2014,
volume	=	1,
number	=	1,
note	=	{Open Access, \url{http://superfri.org/superfri/article/view/14}}}

@Article{slotnick2014enabling,

title	=	{Enabling the environmentally clean air transportation of the future: a vision of computational fluid dynamics in 2030},
author	=	{Slotnick, Jeffrey P and Khodadoust, Abdollah and Alonso, Juan J and Darmofal, David L and Gropp, William D and Lurie, Elizabeth A and Mavriplis, Dimitri J and Venkatakrishnan, Venkat},
journal	=	{Philosophical Transactions of the Royal Society A: Mathematical, Physical and Engineering Sciences},
volume	=	{372},
number	=	{2022},
year	=	{2014},
publisher	=	{The Royal Society},
doi	=	{10.1098/rsta.2013.0317},

}

%% 2015

@Article{Sack:2015:CAM:2737841.2686882,

author	=	{Sack, Paul and Gropp, William},
title	=	{Collective Algorithms for Multiported Torus Networks},
journal	=	{ACM Trans. Parallel Comput.},
issue_date	=	{January 2015},
volume	=	{1},
number	=	{2},
month	=	feb,
year	=	{2015},
issn	=	{2329-4949},
pages	=	{12:1--12:33},
articleno	=	{12},
numpages	=	{33},
url	=	{http://doi.acm.org/10.1145/2686882},
doi	=	{10.1145/2686882},
acmid	=	{2686882},
publisher	=	{ACM},
address	=	{New York, NY, USA},
keywords	=	{Message-passing, collective algorithms},

}

@Article{journals/topc/HoeflerDTBBGU15,

title	=	"Remote Memory Access Programming in {MPI}-3",
author	=	"Torsten Hoefler and James Dinan and Rajeev Thakur and Brian Barrett and Pavan Balaji and William Gropp and Keith D. Underwood",
journal	=	{ACM Trans. Parallel Comput.},
year	=	"2015",
number	=	"2",
volume	=	"2",
bibdate	=	"2015-08-11",
bibsource	=	"DBLP, http://dblp.uni-trier.de/db/journals/topc/topc2.html#HoeflerDTBBGU15",
url	=	{http://doi.acm.org/10.1145/2780584},
doi	=	{10.1145/2780584},
pages	=	"9:1--9:26",
articleno	=	"9",
URL	=	"http://doi.acm.org/10.1145/2780584",

}

@Article{Guo14072015,

author	=	{Guo, Dahai and Gropp, William and Olson, Luke N.},
title	=	{A hybrid format for better performance of sparse matrix-vector multiplication on a {GPU}},
year	=	{2016},
volume	=	{30},
number	=	{1},
pages	=	{103--120},
doi	=	{10.1177/1094342015593156},
abstract	=	{In this paper, we present a new sparse matrix data format that leads to improved memory coalescing and more efficient sparse matrix-vector multiplication for a wide range of problems on high-throughput architectures such as a GPU. The sparse matrix structure is constructed by sorting the rows based on the row length (defined as the number of non-zero elements in a matrix row) followed by a partition into two ranges, short rows and long rows. Based on this partition, the matrix entries are then transformed into ELLPACK or vectorized compressed sparse row format. In addition, the number of threads are adaptively selected by their row length, in order to balance the workload for each graphics processing unit thread. Several computational experiments are presented to support this approach and the results suggest a notable improvement over a wide range of matrix structures.},
URL	=	{http://hpc.sagepub.com/content/early/2015/07/13/1094342015593156.abstract},
eprint	=	{http://hpc.sagepub.com/content/early/2015/07/13/1094342015593156.full.pdf+html},
journal	=	{International Journal of High Performance Computing Applications}

}

%% 2016

@Article{doi:10.1137/15M1026341,

author	=	{Amanda Bienz and Robert D. Falgout and William Gropp and Luke N. Olson and Jacob B. Schroder},
title	=	{Reducing Parallel Communication in Algebraic Multigrid through Sparsification},
journal	=	{SIAM Journal on Scientific Computing},
volume	=	{38},
number	=	{5},
pages	=	{S332-S357},
year	=	{2016},
doi	=	{10.1137/15M1026341},
URL	=	{http://dx.doi.org/10.1137/15M1026341},
eprint	=	{http://dx.doi.org/10.1137/15M1026341}

}

@Article{CPE:CPE3758,

author	=	{Dinan, James and Balaji, Pavan and Buntinas, Darius and Goodell, David and Gropp, William and Thakur, Rajeev},
title	=	{An implementation and evaluation of the {MPI} 3.0 one-sided communication interface},
journal	=	{Concurrency and Computation: Practice and Experience},
volume	=	{28},
number	=	{17},
issn	=	{1532-0634},
url	=	{http://dx.doi.org/10.1002/cpe.3758},
doi	=	{10.1002/cpe.3758},
pages	=	{4385--4404},
keywords	=	{Message Passing Interface (MPI), one-sided communication, remote memory access (RMA), MPICH},
year	=	{2016},
note	=	{cpe.3758},

}

% 2017

@article{doi:10.1177/1094342016677084,

author	=	{Anthony Kougkas and Hassan Eslami and Xian-He Sun and Rajeev Thakur and William Gropp},
title	=	{Rethinking key-value store for parallel {I/O} optimization},
journal	=	{The International Journal of High Performance Computing Applications},
volume	=	{31},
number	=	{4},
pages	=	{335--356},
year	=	{2017},
doi	=	{10.1177/1094342016677084},
URL	=	{ http://dx.doi.org/10.1177/1094342016677084 },
eprint	=	{ http://dx.doi.org/10.1177/1094342016677084 },
abstract	=	{Key–value stores are being widely used as the storage system for large-scale internet services and cloud storage systems. However, they are rarely used in HPC systems, where parallel file systems are the dominant storage solution. In this study, we examine the architecture differences and performance characteristics of parallel file systems and key–value stores. We propose using key–value stores to optimize overall Input/Output (I/O) performance, especially for workloads that parallel file systems cannot handle well, such as the cases with intense data synchronization or heavy metadata operations. We conducted experiments with several synthetic benchmarks, an I/O benchmark, and a real application. We modeled the performance of these two systems using collected data from our experiments, and we provide a predictive method to identify which system offers better I/O performance given a specific workload. The results show that we can optimize the I/O performance in HPC systems by utilizing key–value stores. }

}

@Article{journals/pc/DangSG17,

title	=	"Eliminating contention bottlenecks in multithreaded {MPI}",
author	=	"Hoang-Vu Dang and Marc Snir and William Gropp",
journal	=	"Parallel Computing",
year	=	"2017",
volume	=	"69",
bibdate	=	"2017-10-26",
bibsource	=	"DBLP, http://dblp.uni-trier.de/https://doi.org/10.1016/j.parco.2017.08.003; DBLP, http://dblp.uni-trier.de/db/journals/pc/pc69.html#DangSG17",
pages	=	"1--23",

}

%% 2018

@Article{doi:10.1177/1094342018778123,

author	=	{M Asch and T Moore and R Badia and M Beck and P Beckman and T Bidot and F Bodin and F Cappello and A Choudhary and B de Supinski and E Deelman and J Dongarra and A Dubey and G Fox and H Fu and S Girona and W Gropp and M Heroux and Y Ishikawa and K Keahey and D Keyes and W Kramer and J-F Lavignon and Y Lu and S Matsuoka and B Mohr and D Reed and S Requena and J Saltz and T Schulthess and R Stevens and M Swany and A Szalay and W Tang and G Varoquaux and J-P Vilotte and R Wisniewski and Z Xu and I Zacharov},
title	=	{Big data and extreme-scale computing: Pathways to Convergence-Toward a shaping strategy for a future software and data ecosystem for scientific inquiry},
journal	=	{The International Journal of High Performance Computing Applications},
volume	=	{32},
number	=	{4},
pages	=	{435-479},
year	=	{2018},
doi	=	{10.1177/1094342018778123},
URL	=	{https://doi.org/10.1177/1094342018778123},
eprint	=	{https://doi.org/10.1177/1094342018778123},
abstract	=	{Over the past four years, the Big Data and Exascale Computing (BDEC) project organized a series of five international workshops that aimed to explore the ways in which the new forms of data-centric discovery introduced by the ongoing revolution in high-end data analysis (HDA) might be integrated with the established, simulation-centric paradigm of the high-performance computing (HPC) community. Based on those meetings, we argue that the rapid proliferation of digital data generators, the unprecedented growth in the volume and diversity of the data they generate, and the intense evolution of the methods for analyzing and using that data are radically reshaping the landscape of scientific computing. The most critical problems involve the logistics of wide-area, multistage workflows that will move back and forth across the computing continuum, between the multitude of distributed sensors, instruments and other devices at the networks edge, and the centralized resources of commercial clouds and HPC centers. We suggest that the prospects for the future integration of technological infrastructures and research ecosystems need to be considered at three different levels. First, we discuss the convergence of research applications and workflows that establish a research paradigm that combines both HPC and HDA, where ongoing progress is already motivating efforts at the other two levels. Second, we offer an account of some of the problems involved with creating a converged infrastructure for peripheral environments, that is, a shared infrastructure that can be deployed throughout the network in a scalable manner to meet the highly diverse requirements for processing, communication, and buffering/storage of massive data workflows of many different scientific domains. Third, we focus on some opportunities for software ecosystem convergence in big, logically centralized facilities that execute large-scale simulations and models and/or perform large-scale data analytics. We close by offering some conclusions and recommendations for future investment and policy review.}

}

@article{doi:10.1177/1094342017695444,

author	=	{Tarun Prabhu and William Gropp},
title	=	{{DAME}: Runtime-compilation for data movement},
journal	=	{The International Journal of High Performance Computing Applications},
volume	=	{32},
number	=	{5},
pages	=	{760-774},
year	=	{2018},
doi	=	{10.1177/1094342017695444},
URL	=	{https://doi.org/10.1177/1094342017695444},
eprint	=	{https://doi.org/10.1177/1094342017695444},
abstract	=	{Modern machines consist of multiple compute devices and complex memory hierarchies. For many applications, it is imperative that any data movement between and within the various compute devices be done as efficiently as possible in order to obtain maximum performance. However, hand-optimizing code for one architecture will likely sacrifice both performance portability and software maintainability. In addition, some optimization decisions are best made at runtime. This suggests that the problem ought to be tackled on two fronts. First, provide the programmer with a declarative language to describe data layouts and data motion. This would allow the runtime system to be tuned for each architecture by a specialist and free the programmer to concentrate on the application itself. Second, exploit the execution time information to optimize the data movement code further. MPI derived datatypes accomplish the former task and Just In Time (JIT) compilation can be used for the latter. In this paper, we present DAME—a language and interpreter designed to be used as the backend for MPI derived datatypes. We also present DAME-L and DAME-X, two JIT-enabled implementations of DAME, all of which have been integrated into MPICH. We evaluate their performance on DDTBench and two mini-applications written with MPI derived datatypes and obtain communication speedups of up to 20× and mini-application speedups of up to 3×.}

}

%% 2019

@Article{doi:10.1177/1094342018762036,

author	=	{Jon Calhoun and Franck Cappello and Luke N. Olson and Marc Snir and William D. Gropp},
title	=	{Exploring the feasibility of lossy compression for {PDE} simulations},
journal	=	{The International Journal of High Performance Computing Applications},
volume	=	{33},
number	=	{2},
pages	=	{397--410},
year	=	{2019},
doi	=	{10.1177/1094342018762036},
URL	=	{https://doi.org/10.1177/1094342018762036},
eprint	=	{https://doi.org/10.1177/1094342018762036},
abstract	=	{Checkpoint restart plays an important role in high-performance computing (HPC) applications, allowing simulation runtime to extend beyond a single job allocation and facilitating recovery from hardware failure. Yet, as machines grow in size and in complexity, traditional approaches to checkpoint restart are becoming prohibitive. Current methods store a subset of the application’s state and exploit the memory hierarchy in the machine. However, as the energy cost of data movement continues to dominate, further reductions in checkpoint size are needed. Lossy compression, which can significantly reduce checkpoint sizes, offers a potential to reduce computational cost in checkpoint restart. This article investigates the use of numerical properties of partial differential equation (PDE) simulations, such as bounds on the truncation error, to evaluate the feasibility of using lossy compression in checkpointing PDE simulations. Restart from a checkpoint with lossy compression is considered for a fail-stop error in two time-dependent HPC application codes: PlasComCM and Nek5000. Results show that error in application variables due to a restart from a lossy compressed checkpoint can be masked by the numerical error in the discretization, leading to increased efficiency in checkpoint restart without influencing overall accuracy in the simulation. }

}

@Article{GROPP2019-EuroMPI17,

title	=	"Guest Editor's Introduction: Special Issue on Best Papers from {EuroMPI/USA} 2017",
journal	=	"Parallel Computing",
year	=	"2019",
issn	=	"0167-8191",
doi	=	"https://doi.org/10.1016/j.parco.2019.03.001",
url	=	"http://www.sciencedirect.com/science/article/pii/S0167819119300560",
author	=	"William Gropp and Rajeev Thakur"

}

@Article{BIENZ2019166,

title	=	"Node aware sparse matrix–vector multiplication",
journal	=	"Journal of Parallel and Distributed Computing",
volume	=	"130",
pages	=	"166--178",
year	=	"2019",
issn	=	"0743-7315",
doi	=	"https://doi.org/10.1016/j.jpdc.2019.03.016",
url	=	"http://www.sciencedirect.com/science/article/pii/S0743731519302321",
author	=	"Amanda Bienz and William D. Gropp and Luke N. Olson",
keywords	=	"Sparse, Matrix–vector multiplication, SpMV, Parallel communication, Node aware",
abstract	=	"The sparse matrix–vector multiply (SpMV) operation is a key computational kernel in many simulations and linear solvers. The large communication requirements associated with a reference implementation of a parallel SpMV result in poor parallel scalability. The cost of communication depends on the physical locations of the send and receive processes: messages injected into the network are more costly than messages sent between processes on the same node. In this paper, a node aware parallel SpMV (NAPSpMV) is introduced to exploit knowledge of the system topology, specifically the node-processor layout, to reduce costs associated with communication. The values of the input vector are redistributed to minimize both the number and the size of messages that are injected into the network during a SpMV, leading to a reduction in communication costs. A variety of computational experiments that highlight the efficiency of this approach are presented."

}

@article{GROPP201998,

title	=	"Using node and socket information to implement {MPI} {C}artesian topologies",
journal	=	"Parallel Computing",
volume	=	"85",
pages	=	"98--108",
year	=	"2019",
issn	=	"0167-8191",
doi	=	"https://doi.org/10.1016/j.parco.2019.01.001",
url	=	"http://www.sciencedirect.com/science/article/pii/S0167819118303156",
author	=	"William D. Gropp",
keywords	=	"Message passing, MPI, Process topology, Cartesian process topology",
abstract	=	"The MPI API provides support for Cartesian process topologies, including the option to reorder the processes to achieve better communication performance. But MPI implementations rarely provide anything useful for the reorder option, typically ignoring it. One argument made is that modern interconnects are fast enough that applications are less sensitive to the exact layout of processes onto the system. However, intranode communication performance is much greater than internode communication performance. In this paper, we show a simple approach that takes into account only information about which MPI processes are on the same node to provide a fast and effective implementation of the MPI Cartesian topology routine. While not optimal, this approach provides a significant improvement over all tested MPI implementations and provides an implementation that may be used as the default in any MPI implementation of MPI_Cart_create. We also explore the impact of taking into account the mapping of processes to processor chips or sockets, and show that this is both relatively easy to accomplish but provides only a small improvement in performance."

}

% 2019

@article{doi:10.1177/1094342019865606,

author	=	{Thiago SFX Teixeira and William Gropp and David Padua},
title	=	{Managing code transformations for better performance portability},
journal	=	{The International Journal of High Performance Computing Applications},
volume	=	{33},
number	=	{6},
pages	=	{1290--1306},
year	=	{2019},
doi	=	{10.1177/1094342019865606},
URL	=	{https://doi.org/10.1177/1094342019865606},
eprint	=	{https://doi.org/10.1177/1094342019865606},
abstract	=	{ Code optimization is an intricate task that is getting more complex as computing systems evolve. Managing the program optimization process, including the implementation and evaluation of code variants, is tedious, inefficient, and errors are likely to be introduced in the process. Moreover, because each platform typically requires a different sequence of transformations to fully harness its computing power, the optimization process complexity grows as new platforms are adopted. To address these issues, systems and frameworks have been proposed to automate the code optimization process. They, however, have not been widely adopted and are primarily used by experts with deep knowledge about underlying architecture and compiler intricacies. This article describes the requirements that we believe necessary for making automatic performance tuning more broadly used, especially in complex, long-lived high-performance computing applications. Besides discussing limitations of current systems and strategies to overcome these, we describe the design of a system that is able to semi-automatically generate efficient platform-specific code. In the proposed system, the code optimization is programmer-guided, separately from application code, on an external file in what we call optimization programming. The language to program the optimization process is able to represent complex collections of transformations and, as a result, generate efficient platform-specific code. A database manages different optimized versions of code regions, providing a pragmatic approach to performance portability, and the framework itself has separate components, allowing the optimized code to be used on systems without installing all of the modules required for the code generation. We present experiments on two different platforms to illustrate the generation of efficient platform-specific code that performs comparable to hand-optimized, vendor-provided code. }

}

@Article{natureMMA19,

author	=	{Huerta, E. A. and Allen, Gabrielle and Andreoni, Igor and Antelis, Javier M. and Bachelet, Etienne and Berriman, G. Bruce and Bianco, Federica B. and Biswas, Rahul and Carrasco Kind, Matias Chard, Kyle and Cho, Minsik and Cowperthwaite, Philip S. and Etienne, Zachariah B. and Fishbach, Maya and Forster, Francisco and George, Daniel and Gibbs, Tom and Graham, Matthew and Gropp, William and Gruendl, Robert and Gupta, Anushri and Haas, Roland and Habib, Sarah and Jennings, Elise and Johnson, Margaret W. G. and Katsavounidis, Erik and Katz, Daniel S. and Khan, Asad and Kindratenko, Volodymyr and Kramer, William T. C. and Liu, Xin and Mahabal, Ashish and Marka, Zsuzsa and McHenry, Kenton and Miller, J. M. and Moreno, Claudia and Neubauer, M. S. and Oberlin, Steve and Olivas, Alexander R. and Petravick, Donald and Rebei, Adam and Rosofsky, Shawn and Ruiz, Milton and Saxton, Aaron and Schutz, Bernard F. and Schwing, Alex and Seidel, Ed and Shapiro, Stuart L. and Shen, Hongyu and Shen, Yue and Singer, Leo P. and Sipocz, Brigitta M. and Sun, Lunan and Towns, John and Tsokaros, Antonios and Wei, Wei and Wells, Jack and Williams, Timothy J. and Xiong, Jinjun and Zhao, Zhizhen},
title	=	{Enabling real-time multi-messenger astrophysics discoveries with deep learning},
journal	=	{Nature Reviews Physics},
year	=	{2019},
volume	=	{1},
OPTnumber	=	{},
pages	=	{600--608},
month	=	{October},
URL	=	{https://doi.org/10.1038/s42254-019-0097-4},
Abstract	=	{Multi-messenger astrophysics is a fast-growing, interdisciplinary field that combines data, which vary in volume and speed of data processing, from many different instruments that probe the Universe using different cosmic messengers: electromagnetic waves, cosmic rays, gravitational waves and neutrinos. In this Expert Recommendation, we review the key challenges of real-time observations of gravitational wave sources and their electromagnetic and astroparticle counterparts, and make a number of recommendations to maximize their potential for scientific discovery. These recommendations refer to the design of scalable and computationally efficient machine learning algorithms; the cyber-infrastructure to numerically simulate astrophysical sources, and to process and interpret multi-messenger astrophysics data; the management of gravitational wave detections to trigger real-time alerts for electromagnetic and astroparticle follow-ups; a vision to harness future developments of machine learning and cyber-infrastructure resources to cope with the big-data requirements; and the need to build a community of experts to realize the goals of multi-messenger astrophysics.},

}

%% 2020

@article{IBEID202063,

title	=	"{FFT}, {FMM}, and multigrid on the road to exascale: Performance challenges and opportunities",
journal	=	"Journal of Parallel and Distributed Computing",
volume	=	"136",
pages	=	"63--74",
year	=	"2020",
issn	=	"0743-7315",
doi	=	"https://doi.org/10.1016/j.jpdc.2019.09.014",
url	=	"http://www.sciencedirect.com/science/article/pii/S0743731518305513",
author	=	"Huda Ibeid and Luke Olson and William Gropp",
keywords	=	"Fast Fourier transform, Fast multipole method, Multigrid, Exascale, Performance modeling",
abstract	=	"FFT, FMM, and multigrid methods are widely used fast and highly scalable solvers for elliptic PDEs. However, emerging large-scale computing systems are introducing challenges in comparison to current petascale computers. Recent efforts (Dongarra et al. 2011) have identified several constraints in the design of exascale software that include massive concurrency, resilience management, exploiting the high performance of heterogeneous systems, energy efficiency, and utilizing the deeper and more complex memory hierarchy expected at exascale. In this paper, we perform a model-based comparison of the FFT, FMM, and multigrid methods in the context of these projected constraints. In addition we use performance models to offer predictions about the expected performance on upcoming exascale system configurations based on current technology trends."

}

@article{GROPP2020101203,

title	=	"Translational research in the {MPICH} project",
journal	=	"Journal of Computational Science",
pages	=	"101203",
year	=	"2020",
issn	=	"1877-7503",
doi	=	"https://doi.org/10.1016/j.jocs.2020.101203",
url	=	"http://www.sciencedirect.com/science/article/pii/S1877750320305044",
author	=	"William Gropp and Rajeev Thakur and Pavan Balaji",
keywords	=	"MPI, MPICH, Translational computer science, Message passing libraries",
abstract	=	"The MPICH project is an example of translational research in computer science before that term was well known or even coined. The project began in 1992 as an effort to develop a portable, high-performance implementation of the emerging Message-Passing Interface (MPI) Standard. It has enabled the widespread adoption of MPI as a way to write scalable parallel applications on systems of all sizes including upcoming exascale supercomputers. In this paper, we describe how the translational research process was used in MPICH, how that led to its success, the challenges encountered and lessons learned, and how the process could be applied to other similar projects."

}

@Article{eliu-bigdata-20,

author	=	{E. A. Huerta and Asad Khan and Edward Davis and Colleen Bushell and William D. Gropp and Daniel S. Katz and Volodymyr Kindratenko and Seid Koric and William T. C. Kramer and Brendan McGinty and Kenton McHenry and Aaron Saxton},
title	=	{Convergence of artificial intelligence and high performance computing on {NSF}-supported cyberinfrastructure},
journal	=	{Journal of Big Data},
year	=	2020,
volume	=	7,
number	=	88,
doiurl	=	{https://doi.org/10.1186/s40537-020-00361-2},
doi	=	{10.1186/s40537-020-00361-2}}

@Article{doi:10.1177/1094342020925535,

author	=	{Amanda Bienz and William D. Gropp and Luke N. Olson},
title	=	{Reducing communication in algebraic multigrid with multi-step node aware communication},
journal	=	{The International Journal of High Performance Computing Applications},
volume	=	{34},
number	=	{5},
pages	=	{547--561},
year	=	{2020},
doi	=	{10.1177/1094342020925535},
URL	=	{https://doi.org/10.1177/1094342020925535},
eprint	=	{https://doi.org/10.1177/1094342020925535},
abstract	=	{Algebraic multigrid (AMG) is often viewed as a scalable O(n) solver for sparse linear systems. Yet, AMG lacks parallel scalability due to increasingly large costs associated with communication, both in the initial construction of a multigrid hierarchy and in the iterative solve phase. This work introduces a parallel implementation of AMG that reduces the cost of communication, yielding improved parallel scalability. It is common in Message Passing Interface (MPI), particularly in the MPI-everywhere approach, to arrange inter-process communication, so that communication is transported regardless of the location of the send and receive processes. Performance tests show notable differences in the cost of intra- and internode communication, motivating a restructuring of communication. In this case, the communication schedule takes advantage of the less costly intra-node communication, reducing both the number and the size of internode messages. Node-centric communication extends to the range of components in both the setup and solve phase of AMG, yielding an increase in the weak and strong scaling of the entire method. }

}

% 2021
@Article{cise-21-intro,

author	=	{Douglas Doerfler and Steven Gottlieb and William Gropp and Barry I. Schneider and Alan Sussman},
journal	=	{Computing in Science Engineering},
title	=	{Performance Portability for Advanced Architectures},
year	=	{2021},
volume	=	{23},
number	=	{5},
pages	=	{7-9},
doi	=	{10.1109/MCSE.2021.3104083}} % 2022

@Article{9690642,

author	=	{W. Gropp and F. Shull},
journal	=	{Computer},
title	=	{Succeeding Together},
year	=	{2022},
volume	=	{55},
number	=	{01},
issn	=	{1558-0814},
pages	=	{12--17},
keywords	=	{},
doi	=	{10.1109/MC.2021.3127998},
publisher	=	{IEEE Computer Society},
address	=	{Los Alamitos, CA, USA},
month	=	{Jan}

}

@Article{9734767,

author	=	{Cary, Andrew and Chawner, John and Duque, Earl and Gropp, William and Kleb, Bil and Kolonay, Ray and Nielsen, Eric and Smith, Brian},
journal	=	{Computing in Science Engineering},
title	=	{Realizing the Vision of {CFD} in 2030},
year	=	{2022},
volume	=	{24},
number	=	{1},
pages	=	{64--70},
doi	=	{10.1109/MCSE.2021.3133677}}

@article{10.1145/3523698,

author	=	{Lawson, Margaret and Gropp, William and Lofstead, Jay},
title	=	{{EMPRESS}: {A}ccelerating Scientific Discovery Through Descriptive Metadata Management},
year	=	{2022},
publisher	=	{Association for Computing Machinery},
address	=	{New York, NY, USA},
volume	=	{18},
number	=	{4},
issn	=	{1553-3077},
url	=	{https://doi.org/10.1145/3523698},
doi	=	{10.1145/3523698},
abstract	=	{High performance computing scientists are producing unprecedented volumes of data that take a long time to load for analysis. However, many analyses only require loading in the data containing particular features of interest and scientists have many approaches for identifying these features. Therefore, if scientists store information (descriptive metadata) about these identified features, for subsequent analyses they can use this information to only read in the data containing these features. This can greatly reduce the amount of data that scientists have to read in, thereby accelerating analysis. Despite the potential benefits of descriptive metadata management, no prior work has created a descriptive metadata system that can help scientists working with a wide range of applications and analyses to restrict their reads to data containing features of interest. In this paper, we present EMPRESS, the first such solution. EMPRESS offers all of the features needed to help accelerate discovery: it can accelerate analysis by up to 300 \texttimes{}, supports a wide range of applications and analyses, is high performing, is highly scalable, and requires minimal storage space. In addition, EMPRESS offers features required for a production-oriented system: scalable metadata consistency techniques, flexible system configurations, fault tolerance as a service, and portability.},
note	=	{Just Accepted},
journal	=	{ACM Trans. Storage},
month	=	sep,
keywords	=	{Decaf, descriptive metadata, HDF5, data tagging, ATDM, high-level indexing, accelerating scientific discovery, EMPRESS}

}

% 2023
@article{LOCKHART2023103021,

title	=	{Characterizing the performance of node-aware strategies for irregular point-to-point communication on heterogeneous architectures},
journal	=	{Parallel Computing},
pages	=	{103021},
year	=	{2023},
issn	=	{0167-8191},
doi	=	{https://doi.org/10.1016/j.parco.2023.103021},
url	=	{https://www.sciencedirect.com/science/article/pii/S0167819123000273},
author	=	{Shelby Lockhart and Amanda Bienz and William D. Gropp and Luke N. Olson},
keywords	=	{Performance modeling, GPU, Data movement, CUDA-aware, GPUDirect, MPI, Parallel, Communication, Sparse matrix},
abstract	=	{Supercomputer architectures are trending toward higher computational throughput due to the inclusion of heterogeneous compute nodes. These multi-GPU nodes increase on-node computational efficiency, while also increasing the amount of data to be communicated and the number of potential data flow paths. In this work, we characterize the performance of irregular point-to-point communication with MPI on heterogeneous compute environments through performance modeling, demonstrating the limitations of standard communication strategies for both device-aware and staging-through-host communication techniques. Presented models suggest staging communicated data through host processes then using node-aware communication strategies for high inter-node message counts. Notably, the models also predict that node-aware communication utilizing all available CPU cores to communicate inter-node data leads to the most performant strategy when communicating with a high number of nodes. Model validation is provided via a case study of irregular point-to-point communication patterns in distributed sparse matrix–vector products. Importantly, we include a discussion on the implications model predictions have on communication strategy design for emerging supercomputer architectures.}

}

@article{10.1145/3580003,

author	=	{Lockhart, Shelby and Bienz, Amanda and Gropp, William and Olson, Luke},
title	=	{Performance Analysis and Optimal Node-Aware Communication for Enlarged Conjugate Gradient Methods},
year	=	{2023},
issue_date	=	{March 2023},
publisher	=	{Association for Computing Machinery},
address	=	{New York, NY, USA},
volume	=	{10},
number	=	{1},
issn	=	{2329-4949},
url	=	{https://doi.org/10.1145/3580003},
doi	=	{10.1145/3580003},
abstract	=	{Krylov methods are a key way of solving large sparse linear systems of equations, but suffer from poor strong scalability on distributed memory machines. This is due to high synchronization costs from large numbers of collective communication calls alongside a low computational workload. Enlarged Krylov methods address this issue by decreasing the total iterations to convergence, an artifact of splitting the initial residual and resulting in operations on block vectors. In this paper, we present a performance study of an Enlarged Krylov Method, Enlarged Conjugate Gradients (ECG), noting the impact of block vectors on parallel performance at scale. Most notably, we observe the increased overhead of point-to-point communication as a result of denser messages in the sparse matrix-block vector multiplication kernel. Additionally, we present models to analyze expected performance of ECG, as well as, motivate design decisions. Most importantly, we introduce a new point-to-point communication approach based on node-aware communication techniques that increases efficiency of the method at scale.},
journal	=	{ACM Trans. Parallel Comput.},
month	=	jan,
keywords	=	{collectives, sparse matrix, communication, node-aware, parallel}

}

%
% Section: Proceedings
%
%1981
@InProceedings{boleygropp81,

author	=	{D.~L.~Boley and William~D.~Gropp and M.~M.~Theimer},
title	=	{A Method for Constructing Preprocessors},
booktitle	=	{Conference on the Computing Environment for Mathematical Software},
note	=	{JPL Publication 81-67},
year	=	1981,
organization	=	{JPL and ACM-SIGNUM},
month	=	jul,
area	=	"S",
areaseq	=	"0",

}

%1985
@InProceedings{GROPP85,

key	=	"Gropp",
author	=	"W. D. Gropp",
title	=	"Numerical Linear Algebra on Workstations",
booktitle	=	"Proc. Army Research Office Workshop on Microcomputers in Scientific Computing",
year	=	"1985",
area	=	"NS",
areaseq	=	"0",

}

%1986

%1987
@InProceedings{gropp-nla87,

author	=	{William D. Gropp},
title	=	{A System for Numerical Linear Algebra},
booktitle	=	{New Computing Environments: Microcomputers in Large-Scale Computing},
editor	=	{A. Wouk},
year	=	1987,
publisher	=	{SIAM},
address	=	{Philadelphia},
pages	=	{26--38},
area	=	"NS",
areaseq	=	"0",

}

@InProceedings{groppLUMR87,

author	=	{William D. Gropp},
title	=	{Local Uniform Mesh Refinement on Parallel Processors},
booktitle	=	{Large Scale Scientific Computing},
editor	=	{P.~Deuflhard and B.~Enquist},
year	=	1987,
publisher	=	{Birkh\"auser},
address	=	{Boston},
area	=	"R:Par",
areaseq	=	"0",

}

%1988
@InProceedings{groppadapt88,

author	=	{William D. Gropp},
title	=	{Adaptive Methods for Hyperbolic Problems on Local Memory Parallel Processors},
booktitle	=	{Numerical Algorithms for Modern Computer Architectures},
editor	=	{M. H. Schultz},
year	=	1988,
publisher	=	{Springer-Verlag},
address	=	{New York},
pages	=	{77--84},
area	=	"R:Par",
areaseq	=	"0",

}

@InProceedings{gro88:par-cfd,

author	=	{William Gropp and Edward Smith},
title	=	{Computational Fluid Dynamics on Parallel Processors},
booktitle	=	{1st National Fluid Dynamics Congress, Part 1},
pages	=	{612--619},
year	=	1988,
month	=	JUL,
organization	=	{AIAA/ASME/SIAM/APS},
publisher	=	{American Institute of Aeronautics and Astronautics},
area	=	"Par",

}

%1989
@InProceedings{groppschultz89,

author	=	{William D. Gropp and Martin Schultz},
title	=	{A Highly Parallel Method for an Underwater Acoustics Problem},
booktitle	=	{Proceedings of the Fourth International Conference on Supercomputing, Santa Clara, California},
confdate	=	{April 30--May 5, 1989},
year	=	1989,
area	=	"A:Par",
areaseq	=	"0",

}

@InProceedings{barrymangroppsaltz89,

author	=	{H.~S.~Barryman and William D.~Gropp and J.~Saltz},
title	=	{Krylov Methods and the {CM/2}},
booktitle	=	{Proceedings of the Fourth International Conference on Supercomputing, Santa Clara, California},
confdate	=	{April 30--May 5, 1989},
year	=	1989,
area	=	"Par",
areaseq	=	"0",

}

@InProceedings{groppfoulser89,

author	=	{William D. Gropp and David Foulser},
title	=	{{CLAM}: A Programming Language for Interactive Supercomputing and Visualization},
booktitle	=	{Proceedings of the Fourth International Conference on Supercomputing, Santa Clara, California},
confdate	=	{April 30--May 5, 1989},
year	=	1989,
area	=	"NS",
areaseq	=	"0",

}

@InProceedings{gropp-dyngrid89,

author	=	{William D. Gropp},
title	=	{Dynamic Grid Manipulation for {PDE}s on Hypercube Parallel Processors},
booktitle	=	{Parallel Processing and Medium-Scale Multiprocessors},
editor	=	{A. Wouk},
year	=	1989,
publisher	=	{SIAM},
address	=	{Philadelphia},
pages	=	{192--203},
area	=	"R:Par",
areaseq	=	"0",

}

@InProceedings{ppsc87*213,

author	=	"Leslie Greengard and William D. Gropp",
title	=	"A Parallel Version of the Fast Multipole Method",
pages	=	"213--222",
ISBN	=	"0-89871-228-9",
editor	=	"Gary Rodrigue",
booktitle	=	"Proceedings of the 3rd Conference on Parallel Processing for Scientific Computing",
month	=	dec,
publisher	=	"SIAM Publishers",
address	=	"Philadelphia, PA, USA",
year	=	"1989",
area	=	"A:Par:FMM",
areaseq	=	"0",

}

@InProceedings{Gropp:1989:GCS,

author	=	"W. D. Gropp and I. C. F. Ipsen",
key	=	"GroppIpsen88a",
title	=	"A {Gray} Code Scheme for Local Uniform Mesh Refinement on Hypercubes",
editor	=	"Garry Rodrigue",
booktitle	=	"Parallel Processing for Scientific Computing: Proceedings of the Third {SIAM} Conference on Parallel Processing for Scientific Computing, Los Angeles, California, December 1--4, 1987",
publisher	=	"SIAM Publ.",
address	=	"Philadelphia",
ISBN	=	"0-89871-228-9",
pages	=	"202--206",
year	=	"1987",
area	=	"A:R:Par",
areaseq	=	"0",

}

@InProceedings{ppsc89*295,

author	=	"William D. Gropp and David E. Keyes",
title	=	"Parallel Domain Decomposition with Local Mesh Refinement",
pages	=	"295--296",
ISBN	=	"0-89871-262-9",
editor	=	"Danny C. Sorensen and Jack Dongarra and Paul Messina and Robert G. Voigt",
booktitle	=	"Proceedings of the 4th Conference on Parallel Processing for Scientific Computing",
month	=	dec,
publisher	=	"SIAM Publishers",
address	=	"Philadelphia, PA, USA",
year	=	"1989",
area	=	"R:D:Par",
areaseq	=	"0",

}

@InCollection{WDGropp_DEKeyes_1989b,

author	=	"W. D. Gropp and D. E. Keyes",
title	=	"Domain decomposition on parallel computers",
booktitle	=	"Domain Decomposition Methods",
editor	=	"T. F. Chan and R. Glowinski and J. P\'eriaux and O. B. Widlund",
publisher	=	"SIAM",
address	=	"Philadelphia",
year	=	"1989",
pages	=	"260--288",
area	=	"D:Par",
areaseq	=	"0",

}

@InProceedings{DEKeyes_WDGropp_AEcder_1989a,

author	=	"D. E. Keyes and W. D. Gropp and A. Ecder",
title	=	"Domain decomposition techniques for large sparse nonsymmetric systems arising from elliptic problems with first--order terms",
booktitle	=	"Proceedings of a Symposium on the Solution of Super Large Problems in Computational Mechanics",
editor	=	"J. H. Kane and A. D. Carlson",
publisher	=	"Plenum",
address	=	"New York",
year	=	"1989",
area	=	"D",
areaseq	=	"0",

}

@InCollection{DEKeyes_WDGropp_1989a,

author	=	"D. E. Keyes and W. D. Gropp",
title	=	"Domain decomposition techniques for nonsymmetric systems of equations: examples from computational fluid dynamics",
booktitle	=	"Domain Decomposition Methods",
editor	=	"T. F. Chan and R. Glowinski and J. P\'eriaux and O. B. Widlund",
publisher	=	"SIAM",
address	=	"Philadelphia",
year	=	"1989",
pages	=	"321--339",
area	=	"D",
areaseq	=	"0",

}

@InProceedings{ppsc89*386,

author	=	"Z. George Mou and David E. Keyes and William D. Gropp",
title	=	"Balanced Divide-and-Conquer Algorithms for the Fine-Grained Parallel Direct Solution of Dense and Banded Triangular Linear Systems and their Connection Machine Implementation",
pages	=	"386--387",
ISBN	=	"0-89871-262-9",
editor	=	"Danny C. Sorensen and Jack Dongarra and Paul Messina and Robert G. Voigt",
booktitle	=	"Proceedings of the 4th Conference on Parallel Processing for Scientific Computing",
month	=	dec,
publisher	=	"SIAM Publishers",
address	=	"Philadelphia, PA, USA",
year	=	"1989",
area	=	"Par",
areaseq	=	"0",

}

%1990
@InProceedings{icpp90-3*35,

author	=	"D. E. Foulser and W. D. Gropp",
title	=	"{CLAM} and {CLAMShell}: An Interactive Front-End for Parallel Computing and Visualization",
pages	=	"35--43",
ISBN	=	"0-271-00728-1",
editor	=	"Pen-Chung Yew",
booktitle	=	"Proceedings of the 1990 International Conference on Parallel Processing. Volume 3: Algorithms and Architectures",
address	=	"Urbana-Champaign, IL",
month	=	aug,
year	=	"1990",
publisher	=	"Pennsylvania State University Press",
area	=	"NS",
areaseq	=	"0",

}

@InProceedings{WDGropp_DEKeyes_1990a,

author	=	"W. D. Gropp and D. E. Keyes",
title	=	"A domain decomposition method with locally uniform mesh refinement",
booktitle	=	"Third International Symposium on Domain Decomposition Methods for Partial Differential Equations",
editor	=	"T. F. Chan and R. Glowinski and J. P\'eriaux and O. B. Widlund",
publisher	=	"SIAM",
address	=	"Philadelphia",
year	=	"1990",
pages	=	"115--129",
area	=	"D:R",
areaseq	=	"0",

}

@InCollection{groppschultz90,

author	=	{William D. Gropp and Martin H. Schultz},
title	=	{High Performance Parabolic Equation Solvers},
booktitle	=	{Computational Acoustics},
publisher	=	{Elsevier Science Pub.},
year	=	1990,
editor	=	{D.~Lee and A.~Cakmak and R.~Vichnevetsky},
volume	=	1,
area	=	"A:Par",
areaseq	=	"0",

}

@InProceedings{gro90:par-comp,

author	=	{William Gropp},
title	=	{Parallel Computing and the Solution of Partial Differential Equations (abstract)},
booktitle	=	{Transactions of the American Nuclear Society},
pages	=	269,
year	=	1990,
editor	=	{Irene O. Macke},
volume	=	62,
month	=	NOV,
organization	=	{American Nuclear Society},
note	=	{Invited Paper},
annote	=	{1990 Winter Meeting, Washington, D.C.},
area	=	"Par",

}

%1991

@InProceedings{gropp91:visual-artifacts,

author	=	{William Gropp},
title	=	{Visual Artifacts in Boundary Conditions},
booktitle	=	{Proceedings for the {ICASE} Workshop on Heterogeneous Boundary Conditions},
pages	=	{1--3},
year	=	1991,
editor	=	{A. Louise Perkins and Jeffrey S. Scroggs},
number	=	{NASA Contractor Report 187630},
month	=	AUG,
organization	=	{ICASE},
area	=	"Misc",

}

@InProceedings{cgk91:dd-transport,

author	=	{Tony F. Chan and William Gropp and David E. Keyes},
title	=	{Domain Decomposed Preconditionings for Transport Operators},
booktitle	=	{Proceedings for the {ICASE} Workshop on Heterogeneous Boundary Conditions},
pages	=	{12--30},
year	=	1991,
editor	=	{A. Louise Perkins and Jeffrey S. Scroggs},
number	=	{NASA Contractor Report 187630},
month	=	AUG,
organization	=	{ICASE},
area	=	"D:A",

}

@InProceedings{ppsc91*307,

author	=	"I. Foster and W. Gropp and R. Stevens",
title	=	"Parallel Scalability of the Spectral Transform Method",
pages	=	"307--314",
ISBN	=	"0-89871-303-X",
editor	=	"Jack Dongarra and Ken Kennedy and Paul Messina and Danny C. Sorensen and Robert G. Voigt",
booktitle	=	"Proceedings of the 5th {SIAM} Conference on Parallel Processing for Scientific Computing",
address	=	"Houston, TX",
month	=	mar,
year	=	"1991",
publisher	=	"SIAM",
area	=	"A:Par:Perf",
areaseq	=	"0",

}

@InProceedings{WDGropp_DEKeyes_1991a,

author	=	"W. D. Gropp and D. E. Keyes",
title	=	"Parallel domain decomposition and the solution of nonlinear systems of equations",
booktitle	=	"Fourth International Symposium on Domain Decomposition Methods for Partial Differential Equations",
editor	=	"R. Glowinski and Yu. A. Kuznetsov and G. A. Meurant and J. P\'eriaux and O. B. Widlund",
publisher	=	"SIAM",
address	=	"Philadelphia",
year	=	"1991",
pages	=	"373--381",
area	=	"D:Par",
areaseq	=	"0",

}

@InProceedings{DEKeyes_WDGropp_1991a,

author	=	"D. E. Keyes and W. D. Gropp",
title	=	"Domain-decomposable preconditioners for second--order upwind discretizations of multicomponent systems",
booktitle	=	"Fourth International Symposium on Domain Decomposition Methods for Partial Differential Equations",
editor	=	"R. Glowinski and Yu. A. Kuznetsov and G. A. Meurant and J. P\'eriaux and O. B. Widlund",
publisher	=	"SIAM",
address	=	"Philadelphia",
year	=	"1991",
pages	=	"129--139",
area	=	"D",
areaseq	=	"0",

}

%1992
%% pages???
%% invited presentation
@InProceedings{Cai:1992:CSD,

author	=	"Xiao-Chuan Cai and William D. Gropp and David E. Keyes",
editor	=	"Tony F. Chan and David E. Keyes and G\'erard A. Meurant and Jeffrey S. Scroggs and Robert G. Voigt",
booktitle	=	"Fifth International Symposium on Domain Decomposition Methods for Partial Differential Equations",
title	=	"A Comparison of Some Domain Decomposition Algorithms for Nonsymmetric Elliptic Problems",
publisher	=	"SIAM",
address	=	"Philadelphia, PA, USA",
year	=	"1992",
bibdate	=	"Wed May 4 18:52:09 MDT 1994",
area	=	"D:Perf",
areaseq	=	"0",

}

@InCollection{groppkeyes-asymp92,

author	=	{William D. Gropp and David E. Keyes},
title	=	{Domain Decomposition as a Mechanism for Using Asymptotic Methods},
booktitle	=	{Asymptotic and Numerical Methods for Partial Differential Equations with Critical Parameters},
publisher	=	{Kluwer},
year	=	1992,
editor	=	{H. G. Kaper and M. Garbey},
address	=	{Dordrecht},
pages	=	{93--106},
area	=	"D:A",
areaseq	=	"0",

}

@InProceedings{Gropp:1992:PCD,

author	=	"William D. Gropp",
editor	=	"Tony F. Chan and David E. Keyes and G\'erard A. Meurant and Jeffrey S. Scroggs and Robert G. Voigt",
booktitle	=	"Fifth International Symposium on Domain Decomposition Methods for Partial Differential Equations",
title	=	"Parallel Computing and Domain Decomposition",
publisher	=	"SIAM",
address	=	"Philadelphia, PA, USA",
year	=	"1992",
bibdate	=	"Wed May 4 18:52:09 MDT 1994",
area	=	"D:Par",
areaseq	=	"0",

}

@InCollection{groppkeyes92,

author	=	{William D. Gropp and David E. Keyes},
title	=	{Semi-structured Refinement and Parallel Domain Decomposition Methods},
booktitle	=	{Unstructured Scientific Computation on Multiprocessors},
editor	=	{P. Mehrotra {et al.}},
year	=	1992,
publisher	=	{MIT Press},
pages	=	{187--203},
area	=	"R:D:Par",
areaseq	=	"0",

}

%1993
@InProceedings{butlergropplusk93,

author	=	{Ralph Butler and William D. Gropp and Ewing Lusk},
title	=	{Developing Applications for a Heterogeneous Computing Environment},
booktitle	=	{Proc. Workshop on Heterogeneous Processing},
year	=	1993,
pages	=	{77--83},
publisher	=	{IEEE},
address	=	{Los Alamitos, California},
area	=	"M:Par",
areaseq	=	"0",

}

@InProceedings{groppscs93,

author	=	{William Gropp},
title	=	{Parallel Programming Tools for Distributed-Memory Computers},
booktitle	=	{Proc. of the 1993 SCS Simulation Multiconference},
year	=	1993,
month	=	mar},
area	=	"S:Par",
areaseq	=	"0",

}

@InProceedings{ppsc93*160,

author	=	"N. Galbreath and W. Gropp and D. Gunter and D. Leaf and D. Levine",
title	=	"Parallel Solution of the Three-Dimensional, Time-Dependent {G}inzburg-{L}andau Equation",
pages	=	"160--164",
ISBN	=	"0-89871-315-3",
editor	=	"Linda R. Petzold and Richard F. Sincovec and David E. Keyes and Michael R. Leuze and Daniel A. Reed",
booktitle	=	"Proceedings of the 6th {SIAM} Conference on Parallel Processing for Scientific Computing",
address	=	"Norfolk, VI",
month	=	mar,
year	=	"1993",
publisher	=	"SIAM Press",
area	=	"Par:I",
areaseq	=	"0",

}

% This is a refereed proceedings
@InProceedings{galbreath:applio,

author	=	"N. Galbreath and W. Gropp and D. Levine",
title	=	"Applications-Driven Parallel {I/O}",
booktitle	=	"Proceedings of Supercomputing '93",
year	=	"1993",
pages	=	"462--471",
publisher	=	"IEEE Computer Society Press",
conflocation	=	"Portland, OR",
keywords	=	"parallel I/O, pario-bib",
note	=	"Reprinted in the book ``High Performance Storage and Parallel I/O'' (\url{http://www.buyya.com/superstorage/}, 2001, pages 539--547)",
area	=	"Par:I",
areaseq	=	"0",

}

@InProceedings{kettunenforsman93,

author	=	{L. Kettunen and K. Forsman and D. Levine and W. Gropp},
title	=	{Solutions of TEAM Problem \#13 Using Integral Equations in a Sequential and Parallel Computing Environment},
booktitle	=	{Proceedings of the Miami TEAM Workshop},
year	=	1993,
organization	=	{Florida International University, Department of Electrical Engineering and Computing Science},
confdate	=	"November, 1993",
month	=	dec,
area	=	"App:Par:P",
areaseq	=	"0",

}

%1994
@InProceedings{XCCai_WDGropp_DEKeyes_MDTidriri_1994a,

author	=	"X.-C. Cai and W. D. Gropp and D. E. Keyes and M. D. Tidriri",
title	=	"Parallel implicit methods for aerodynamics",
booktitle	=	"Domain Decomposition Methods in Scientific and Engineering Computing: Proceedings of the Seventh International Conference on Domain Decomposition",
editors	=	"D. E. Keyes and J. Xu",
series	=	"Contemporary Mathematics",
volume	=	"180",
publisher	=	"American Mathematical Society",
address	=	"Providence, Rhode Island",
year	=	"1994",
pages	=	"465--470",
area	=	"Par:D",
areaseq	=	"0",

}

@InProceedings{caigroppkeyestidriri94,

author	=	{X.-C. Cai and William D. Gropp and David E. Keyes and M. D. Tidriri},
title	=	{{N}ewton-{K}rylov-{S}chwarz Methods in {CFD}},
booktitle	=	{Proceedings of the International Workshop on Numerical Methods for the Navier-Stokes Equations},
editor	=	{F. Hebeker and R. Rannacher},
series	=	{Notes in Numerical Fluid Mechanics},
year	=	1994,
publisher	=	{Vieweg Verlag},
address	=	{Braunschweig},
pages	=	{17--30},
area	=	"A:D",
areaseq	=	"0",

}

% This proceedings is refereed
@InProceedings{WDGropp_DEKeyes_JSMounts_1994a,

author	=	"W. D. Gropp and D. E. Keyes and J. S. Mounts",
title	=	"Implicit domain decomposition algorithms for steady, compressible aerodynamics",
booktitle	=	"Domain Decomposition Methods in Science and Engineering: The Sixth International Conference on Domain Decomposition",
editors	=	"A. Quarteroni and J. P\'eriaux and Yu. A. Kuznetsov and O. B. Widlund",
series	=	"Contemporary Mathematics",
volume	=	"157",
publisher	=	"American Mathematical Society",
address	=	"Providence, Rhode Island",
year	=	"1994",
pages	=	"203--213",
area	=	"A:D",
areaseq	=	"0",

}

% Also InProceedings{GS93,
@InProceedings{Gropp:1994:SEP,

author	=	"W. Gropp and B. Smith",
title	=	"Scalable, extensible, and portable numerical libraries",
booktitle	=	"Proceedings of the Scalable Parallel Libraries Conference, October 6--8, 1993, Mississippi State, Mississippi",
publisher	=	"IEEE Computer Society Press",
address	=	"1109 Spring Street, Suite 300, Silver Spring, MD 20910, USA",
year	=	"1994",
ISBN	=	"0-8186-4980-1",
pages	=	"87--93",
affiliation	=	"Div. of Math. and Comput. Sci., Argonne Nat. Lab., IL, USA",
classification	=	"C6110B (Software engineering techniques); C6120 (File organisation); C6180 (User interfaces)",
keywords	=	"Aggressive data-structure-neutral implementation; Data structures; Implementation language; Meta-communication layer; Parallel communication technology; PETSc library; Portable Extensible Tools for Scientific computing; Portable numerical libraries; Software portability; Software technology; User interfaces; User-interface language",
thesaurus	=	"Data structures; Software portability; User interfaces",
area	=	"NS:P",
areaseq	=	"0",

}

% also InProceedings{GroppLusk93,
@InProceedings{Gropp:1994:MCL,

author	=	"W. Gropp and E. Lusk",
title	=	"The {MPI} communication library: its design and a portable implementation",
booktitle	=	"Proceedings of the Scalable Parallel Libraries Conference, October 6--8, 1993, Mississippi State, Mississippi",
publisher	=	"IEEE Computer Society Press",
address	=	"1109 Spring Street, Suite 300, Silver Spring, MD 20910, USA",
year	=	"1994",
ISBN	=	"0-8186-4980-1",
pages	=	"160--165",
affiliation	=	"Div. of Math. and Comput. Sci., Argonne Nat. Lab., IL, USA",
classification	=	"C5440 (Multiprocessing systems); C5440 (Multiprocessor systems and techniques); C6110P (Parallel programming); C6150N (Distributed systems software); C6150N (Distributed systems)",
conftitle	=	"Proceedings of Scalable Parallel Libraries Conference",
corpsource	=	"Div. of Math. and Comput. Sci., Argonne Nat. Lab., IL, USA",
keywords	=	"advanced features; Advanced features; implementation strategy; Implementation strategy; message passing; MPI communication library; MPI standard; parallel programming; portable implementation; Portable implementation; software portability; standard message-passing interface; Standard message-passing interface; standards",
sponsororg	=	"Mississippi State Univ.; Nat. Sci. Found",
thesaurus	=	"Message passing; Parallel programming; Software portability; Standards",
treatment	=	"P Practical",
area	=	"Par:M",
areaseq	=	"0",

}

% This proceedings is refereed
@InProceedings{Gropp:1994:SUT,

author	=	"W. Gropp and E. Lusk",
title	=	"Scalable {Unix} Tools on Parallel Processors",
booktitle	=	"{Proceedings of the Scalable High-Performance Computing Conference, May 23--25, 1994, Knoxville, Tennessee}",
publisher	=	"IEEE Computer Society Press",
address	=	"1109 Spring Street, Suite 300, Silver Spring, MD 20910, USA",
ISBN	=	"0-8186-5680-8, 0-8186-5681-6",
pages	=	"56--62",
year	=	"1994",
area	=	"Par",
areaseq	=	"0",

}

% also InProceedings{Gropp:1992:EDD,
@InProceedings{WDGropp_BFSmith_1994a,

author	=	"W. D. Gropp and B. F. Smith",
title	=	"Experiences with domain decomposition in three dimensions: overlapping {S}chwarz methods",
booktitle	=	"Domain Decomposition Methods in Science and Engineering: The Sixth International Conference on Domain Decomposition",
editors	=	"A. Quarteroni and J. P\'eriaux and Yu. A. Kuznetsov and O. B. Widlund",
series	=	"Contemporary Mathematics",
volume	=	"157",
publisher	=	"American Mathematical Society",
address	=	"Providence, Rhode Island",
year	=	"1994",
pages	=	"323--333",
area	=	"A:D",
areaseq	=	"0",

}

@InProceedings{kettunenforsmanlevinegropp94,

author	=	{L. Kettunen and K. Forsman and D. Levine and William D. Gropp},
title	=	{Solutions of {TEAM} Problems 13 and 20 Using a Volume Integral Formulation},
booktitle	=	{Proceedings of Aix-les-Bains TEAM workshop},
year	=	1994,
area	=	"App:Par:P",
areaseq	=	"0",

}

%1995
@InProceedings{ppsc95*225,

author	=	"Kimmo Forsman and William Gropp and Lauri Kettunen and David Levine",
title	=	"Computational Electromagnetics and Parallel Dense Matrix Computations",
pages	=	"225--230",
ISBN	=	"0-89871-344-7",
editor	=	"{Bailey, David H., Bj{\o}rstad, Petter E., Gilbert, John E., Mascagni, Michael V., Schreiber, Robert S., Simon, Horst D., Torczon, Virginia J.} and Layne T. Watson",
booktitle	=	"Proceedings of the 27th Conference on Parallel Processing for Scientific Computing",
month	=	feb # "15--17~",
publisher	=	"SIAM Press",
address	=	"Philadelphia, PA, USA",
year	=	"1995",
area	=	"App:Par:P",
areaseq	=	"0",

}

@InCollection{WDGropp_DEKeyes_MDTidriri_1995a,

author	=	"W. D. Gropp and D. E. Keyes and M. D. Tidriri",
title	=	"Parallel implicit solvers for steady, compressible aerodynamics",
booktitle	=	"Parallel Computational Fluid Dynamics",
editors	=	"A. Ecer and J. Hauser and P. Leca and J. P\'eriaux",
publisher	=	"Elsevier Science Publishers B.V. (North--Holland)",
address	=	"Amsterdam",
year	=	"1995",
pages	=	"391--399",
area	=	"Par",
areaseq	=	"0",

}

% This proceedings is refereed
@InProceedings{Gropp:1995:DPM,

author	=	"W. Gropp and E. Lusk",
title	=	"Dynamic process management in an {MPI} setting",
booktitle	=	"Proceedings / Seventh {IEEE} Symposium on Parallel and Distributed Processing, October 25--28, 1995, San Antonio, Texas",
publisher	=	"IEEE Computer Society Press",
address	=	"1109 Spring Street, Suite 300, Silver Spring, MD 20910, USA",
year	=	"1995",
ISBN	=	"0-8186-7195-5",
pages	=	"530--534",
coden	=	"PSPDF8",
ISSN	=	"1063-6374",
note	=	"IEEE catalog number 95TB8131.",
affiliation	=	"Div. of Math. and Comput. Sci., Argonne Nat. Lab.",
affiliationaddress	=	"Argonne, IL, USA",
classification	=	"722.2; 722.3; 722.4; 723.1; 902.2; C5220P (Parallel architecture); C5440 (Multiprocessing systems); C6150N (Distributed systems software)",
conference	=	"Proceedings of the 1995 7th IEEE Symposium on Parallel and Distributed Processing",
conftitle	=	"Proceedings of Seventh IEEE Symposium on Parallel and Distributed Processing",
corpsource	=	"Div. of Math. and Comput. Sci., Argonne Nat. Lab., IL, USA",
journalabr	=	"IEEE Symp Parallel Distrib Process Proc",
keywords	=	"Client/server applications; Computer architecture; Computer networks; Computer software; Computer systems programming; Computer workstations; Data communication systems; dynamic process management; Dynamic process management; Interfaces (computer); message passing; Message-passing interface; MPI setting; parallel applications; Parallel applications; parallel processing; Parallel processing systems; Parallel programs; Process control; process management; Process management; Real time systems; Resource allocation; runtime environment; Runtime environment; Runtime environments; Scheduling; Standards",
meetingaddress	=	"San Antonio, TX, USA",
meetingdate	=	"Oct 25--28 1995",
meetingdate2	=	"10/25--28/95",
sponsor	=	"IEEE",
sponsororg	=	"IEEE Comput Soc. Tech. Committee on Comput. Architecture; IEEE Comput. Soc. Tech. Committee on Distributed Process.; IEEE Comput. Soc. Dallas Chapter",
thesaurus	=	"Message passing; Parallel processing",
treatment	=	"P Practical",
area	=	"Par:M",
areaseq	=	"0",

}

@InProceedings{Gropp:1995:IMM,

author	=	"W. Gropp and E. Lusk",
title	=	"Implementing {MPI}: the 1994 {MPI Implementors' Workshop}",
booktitle	=	"Proceedings of the 1994 Scalable Parallel Libraries Conference: October 12--14, 1994, Mississippi State University, Mississippi",
publisher	=	"IEEE Computer Society Press",
address	=	"1109 Spring Street, Suite 300, Silver Spring, MD 20910, USA",
year	=	"1995",
ISBN	=	"0-8186-6895-4",
pages	=	"55--59",
url	=	"http://www.mcs.anl.gov/mpi-impl/paper.ps",
bibdate	=	"Sat Apr 19 16:34:54 MDT 1997",
acknowledgement	=	ack-nhfb,
affiliation	=	"Div. of Math. and Comput. Sci., Argonne Nat. Lab., IL, USA",
classification	=	"C6110B (Software engineering techniques); C6110P (Parallel programming); C6150N (Distributed systems software)",
conftitle	=	"Proceedings Scalable Parallel Libraries Conference",
corpsource	=	"Div. of Math. and Comput. Sci., Argonne Nat. Lab., IL, USA",
keywords	=	"implementation process; Implementation process; message passing; MPI implementation effort; parallel computing; Parallel computing; parallel library; Parallel library; parallel programming; software libraries; software standards; standard message-passing library interface; Standard message-passing library interface; subroutines",
sponsororg	=	"Mississippi State Univ.; NSF",
thesaurus	=	"Message passing; Parallel programming; Software libraries; Software standards; Subroutines",
treatment	=	"P Practical",
area	=	"Par:M",
areaseq	=	"0",

}

@InProceedings{Gropp:1995:MGX,

author	=	"W. Gropp and E. Karrels and E. Lusk",
title	=	"{MPE} graphics: scalable {X11} graphics in {MPI}",
booktitle	=	"Proceedings of the 1994 Scalable Parallel Libraries Conference: October 12--14, 1994, Mississippi State University, Mississippi",
publisher	=	"IEEE Computer Society Press",
address	=	"1109 Spring Street, Suite 300, Silver Spring, MD 20910, USA",
year	=	"1995",
ISBN	=	"0-8186-6895-4",
pages	=	"49--54",
affiliation	=	"Div. of Math. and Comput. Sci., Argonne Nat. Lab., IL, USA",
classification	=	"C6110P (Parallel programming); C6130B (Graphics techniques); C6150N (Distributed systems software)",
conftitle	=	"Proceedings Scalable Parallel Libraries Conference",
corpsource	=	"Div. of Math. and Comput. Sci., Argonne Nat. Lab., IL, USA",
keywords	=	"communication patterns; Communication patterns; communication traffic; Communication traffic; computer graphics; library based message passing; Library based message passing; message passing; MPE graphics; MPI; MPI implementation; MPI message passing standard; MPI Standard; parallel graphics library; Parallel graphics library; parallel graphics operations; Parallel graphics operations; parallel graphics routines; Parallel graphics routines; parallel programming; parallel programs; Parallel programs; parallel semantics; Parallel semantics; programming libraries; Programming libraries; scalable X11 graphics; Scalable X11 graphics; semantics; Semantics; software standards; subroutines; user control; User control; X-based parallel graphics library",
sponsororg	=	"Mississippi State Univ.; NSF",
thesaurus	=	"Computer graphics; Message passing; Parallel programming; Software standards; Subroutines",
treatment	=	"P Practical",
area	=	"M:Par:PV",
areaseq	=	"0",

}

% This proceedings was refereed
@InProceedings{groppluskmppm95,

author	=	{William D. Gropp and Ewing L. Lusk},
title	=	{A Taxonomy of Programming Models for Symmetric Multiprocessors and {SMP} Clusters},
booktitle	=	{Programming Models for Massively Parallel Computers},
editor	=	{W. K. Giloi and S. Jahnichen and B. D. Shriver},
publisher	=	{IEEE Computer Society Press},
year	=	1995,
month	=	oct,
pages	=	{2--7},
area	=	"Par",
areaseq	=	"0",

}

@InProceedings{GroppMcInnesSmith95,

author	=	"William D. Gropp and Lois Curfman McInnes and Barry Smith",
title	=	"Scalable Libraries for Solving Systems of Nonlinear Equations and Unconstrained Minimization Problems",
booktitle	=	"Proceedings of the 1994 Scalable Parallel Libraries Conference: October 12--14, 1994, Mississippi State University, Mississippi",
publisher	=	"IEEE Computer Society Press",
address	=	"1109 Spring Street, Suite 300, Silver Spring, MD 20910, USA",
year	=	"1995",
ISBN	=	"0-8186-6895-4",
pages	=	"60--67",
affiliation	=	"Div. of Math. and Comput. Sci., Argonne Nat. Lab., IL, USA",
conftitle	=	"Proceedings Scalable Parallel Libraries Conference",
corpsource	=	"Div. of Math. and Comput. Sci., Argonne Nat. Lab., IL, USA",
sponsororg	=	"Mississippi State Univ.; NSF",
area	=	"Par:NS",

}

@InProceedings{Gropp:1995:MMI,

author	=	"W. Gropp and E. Lusk",
title	=	"The {MPI} Message-Passing Interface Standard: Overview and Status",
editor	=	"Grandinetti et al",
booktitle	=	"High performance computing: technology, methods, and applications (Advanced workshop, June 1994, Cetraro, Italy)",
volume	=	"10",
publisher	=	"Elsevier",
address	=	"Amsterdam, The Netherlands",
year	=	"1995",
ISBN	=	"0-444-82163-5",
ISSN	=	"0927-5452",
series	=	"Advances in Parallel Computing",
pages	=	"265--270",
area	=	"Par:M",
areaseq	=	"0",

}

% This book is refereed
@InCollection{groppsmith95,

author	=	{William D. Gropp and Barry Smith},
title	=	{Parallel Domain Decomposition Software},
booktitle	=	{Domain-Based Parallelism and Problem Decomposition Methods in Computational Science and Engineering},
publisher	=	{SIAM},
year	=	1995,
editor	=	{D. E. Keyes and Youcef Saad and Donald G. Truhlar},
address	=	{Philadelphia},
area	=	"D:NS:Par",
areaseq	=	"0",

}

%1996
@InProceedings{ThakurGroLus96,

author	=	"Rajeev Thakur and William Gropp and Ewing Lusk",
title	=	"An Abstract-Device Interface for Implementing Portable Parallel-{I/O} Interfaces",
booktitle	=	"Proceedings of Frontiers '96: The Sixth Symposium on the Frontiers of Massively Parallel Computation",
address	=	"Annapolis, Maryland",
organization	=	"IEEE Computer Society",
url	=	"http://www.mcs.anl.gov/home/thakur/adio.ps",
month	=	oct # " 27--31,",
year	=	"1996",
pages	=	"180--187",
area	=	"Par:I:M",
areaseq	=	"0",

}

% This proceedings is refereed
@InProceedings{Geist:1996:MEM,

author	=	"A. Geist and W. Gropp and S. Huss-Lederman and A. Lumsdaine and E. Lusk and W. Saphir and T. Skjellum and M. Snir",
title	=	"{MPI-2}: extending the {Message-Passing Interface}",
editor	=	"Luc Bouge and P. Fraigniaud and A. Mignotte and Y. Robert",
booktitle	=	"{Euro-Par} '96 parallel processing: second International {Euro-Par} Conference, Lyon, France, August 26--29, 1996: proceedings",
volume	=	"1123--1124",
publisher	=	"Springer-Verlag",
address	=	"Berlin, Germany~/ Heidelberg, Germany~/ London, UK~/ etc.",
year	=	"1996",
ISBN	=	"3-540-61626-8 (vol. 1), 3-540-61627-6 (vol. 2)",
ISSN	=	"0302-9743",
series	=	"Lecture notes in computer science",
pages	=	"128--135",
bibdate	=	"Sat Apr 19 16:34:54 MDT 1997",
acknowledgement	=	ack-nhfb,
classification	=	"C5220P (Parallel architecture); C5610 (Computer interfaces)",
conftitle	=	"Proceedings of European Conference on Parallel Processing EURO-PAR '96",
corpsource	=	"Oak Ridge Nat. Lab., TN, USA",
keywords	=	"collective operations; computer interfaces; dynamic process management; extensions; external interfaces; language binding; message passing; Message Passing Interface; MPI; MPI-2; MPI-2 document; one-sided operations; real-time computing; standards",
treatment	=	"P Practical",
area	=	"Par:M",
areaseq	=	"0",

}

@InProceedings{gropppetsc97,

author	=	{William D. Gropp},
title	=	{Why we couldn't use numerical libraries for {PETSc}},
booktitle	=	{Proceedings of the IFIP TC2/WG2.5 Working Conference on the Quality of Numerical Software, Assessment and Enhancement},
confloc	=	"Oxford, United Kingdom",
confdate	=	"July 8--12, 1996",
editor	=	{Ronald F. Boisvert},
year	=	1997,
publisher	=	{Chapman \& Hall},
pages	=	{249--254},
area	=	"NS",
areaseq	=	"0",

}

% This is refereed
@InProceedings{thakur:evaluation,

author	=	"Rajeev Thakur and William Gropp and Ewing Lusk",
title	=	"An Experimental Evaluation of the Parallel {I/O} Systems of the {IBM~SP} and {Intel Paragon} Using a Production Application",
booktitle	=	"Proceedings of the Third International Conference of the Austrian Center for Parallel Computation (ACPC)",
year	=	"1996",
month	=	sep,
series	=	"Lecture Notes in Computer Science",
volume	=	"1127",
pages	=	"24--35",
publisher	=	"Springer-Verlag",
earlier	=	"thakur:evaluation-tr",
url	=	"http://www.mcs.anl.gov/home/thakur/io-eval.ps",
keywords	=	"parallel I/O, multiprocessor file system, workload characterization, pario-bib",
area	=	"Par:M:I",
areaseq	=	"0",

}

%1997
@InCollection{Balay97,

author	=	{S. Balay and W. D. Gropp and L. C. McInnes and B. F. Smith},
title	=	{Efficient Management of Parallelism in Object-Oriented Numerical Software Libraries},
booktitle	=	{Modern Software Tools in Scientific Computing},
publisher	=	{Birkhauser Press},
year	=	1997,
pages	=	{163--202},
editor	=	{E. Arge and A. M. Bruaset and H. P. Langtangen},
area	=	"Par:NS:P",
areaseq	=	"0",

}

%
% This was also a preprint several years before
@InCollection{groppdebug97,

author	=	{William D. Gropp},
title	=	{An Introduction to Performance Debugging For Parallel Computers},
booktitle	=	{Parallel Numerical Algorithms},
publisher	=	{Kluwer Academic Publishers},
year	=	1997,
editor	=	{D. Keyes and A. Sameh and V. Venkatakrishnan},
pages	=	{369--382},
area	=	"Par:S",
areaseq	=	"0",

}

@InProceedings{GroppMore97,

author	=	{W. Gropp and Jorge Mor\`e},
title	=	{Optimization environments and the {NEOS} Server},
booktitle	=	{Approximation Theory and Optimization:Tributes to M. J. D. Powell},
editor	=	{M. D. Buhmann and A. Iserles},
year	=	1997,
publisher	=	{Cambridge University Press},
pages	=	{167--182},
area	=	"NS",
areaseq	=	"0",

}

@InProceedings{groppkeyesmcinnestidriri97,

author	=	{William D. Gropp and D.E. Keyes and L.C. McInnes and M.D. Tidriri},
title	=	{Parallel Implicit {PDE} Computations: Algorithms and Software},
booktitle	=	{Proceedings of Parallel CFD'97},
year	=	1997,
pages	=	{333--344},
publisher	=	{Elsevier},
area	=	"Par:A:NS",
areaseq	=	"0",

}

% Refereed
@InProceedings{gropplusk_pvmmpi97,

author	=	{William D. Gropp and Ewing Lusk},
title	=	{Why are {PVM} and {MPI} so Different?},
booktitle	=	{Recent Advances in {P}arallel {V}irtual {M}achine and {M}essage {P}assing {I}nterface},
editor	=	{Marian Bubak and Jack Dongarra and Jerzy Wa\'sniewski},
volume	=	1332,
series	=	{Lecture Notes in Computer Science},
year	=	1997,
publisher	=	{Springer Verlag},
pages	=	{3--10},
note	=	{4th European PVM/MPI Users' Group Meeting, Cracow, Poland, November 1997},
area	=	"M:Par",
areaseq	=	"0",

}

@InProceedings{gropp-mppm97,

author	=	{William D. Gropp},
title	=	{Performance Driven Programming Models},
booktitle	=	{Massively Parallel Programming Models (MPPM-97)},
year	=	1997,
publisher	=	{IEEE Computer Society Press},
pages	=	{61--67},
note	=	{November 12-14, 1997; London; Third working conference},
area	=	"Par:Perf",
areaseq	=	"0",

}

% 1998

@InProceedings{groppmaui97,

author	=	{William D. Gropp},
title	=	{Which comes first: The Architecture or the Algorithm? (abstract)},
booktitle	=	{Innovative Architectures for Future Generation High-Performance Processors and Systems},
editor	=	{A. Veidenbaum and K. Joe},
year	=	1998,
publisher	=	{IEEE Computer Society},
pages	=	13,
area	=	"A",
areaseq	=	"0",

}

% This proceedings was refereed

@InProceedings{thakurfrontiers99,

author	=	{Rajeev Thakur and William Gropp and Ewing Lusk},
title	=	{Data Sieving and Collective {I/O} in {ROMIO}},
booktitle	=	{Proceedings of the 7th Symposium on the Frontiers of Massively Parallel Computation},
pages	=	{182--189},
year	=	{1999},
month	=	{February},
publisher	=	{IEEE Computer Society Press},
area	=	"Par:M:I",
areaseq	=	"0",

}

@InProceedings{gropp-siamoo-98,

author	=	{William Gropp},
title	=	{Exploiting Existing Software in Libraries: Successes, Failures, and Reasons Why},
booktitle	=	{Object Oriented Methods for Interoperable Scientific and Engineering Computing},
editor	=	{Michael Henderson and Christopher Anderson and Stephen L. Lyons},
year	=	1999,
organization	=	{SIAM},
publisher	=	{SIAM},
pages	=	{21--29},
annote	=	{Proceedings of a workshop, held October 21--23, 1998, in Yorktown Heights, NY},
area	=	"NS:P",
areaseq	=	"0",

}

@InProceedings{alice-siamoo-98,

author	=	{Satish Balay and William Gropp and Lois Curfman McInnes and Barry Smith},
title	=	{A Microkernel Design for Component-based Numerical Software Systems},
booktitle	=	{Object Oriented Methods for Interoperable Scientific and Engineering Computing},
editor	=	{Michael Henderson and Christopher Anderson and Stephen L. Lyons},
year	=	1998,
organization	=	{SIAM},
publisher	=	{SIAM},
pages	=	{60--69},
note	=	{Also ANL/MCS-P727-0998},
annoteps	=	{/home/bsmith/petsc/docs/tex/talks/siamoo98/paper/paper.ps},
annote	=	{Proceedings of a workshop, held October 21--23, 1998, in Yorktown Heights, NY},
area	=	"NS",
areaseq	=	"0",

}

@InProceedings{thakurluskgropp-datatype98:sc98,

author	=	{Rajeev Thakur and Ewing Lusk and William Gropp},
title	=	{A Case for Using {MPI}'s Derived Datatypes to Improve {I/O} performance},
booktitle	=	"Proceedings of SC98: High Performance Networking and Computing",
year	=	1998,
month	=	NOV,
area	=	"Par:I:M",
areaseq	=	"0",

}

%1999

@InProceedings{gropp-swider-lusk99,

author	=	{William Gropp and Ewing Lusk and Debbie Swider},
title	=	{Improving the performance of {MPI} Derived Datatypes},
booktitle	=	{Proceedings of the Third MPI Developer's and User's Conference},
editor	=	{Anthony Skjellum and Purushotham V. Bangalore and Yoginder S. Dandass},
year	=	1999,
publisher	=	{MPI Software Technology Press},
address	=	{Starkville, MS},
pages	=	{25--30},
area	=	"Par:M:Datatype",
areaseq	=	"0",

}

@InProceedings{thak99b,

author	=	{Rajeev Thakur and William Gropp and Ewing Lusk},
title	=	{On Implementing {MPI-IO} Portably and with High Performance},
booktitle	=	{Proceedings of the 6th Workshop on I/O in Parallel and Distributed Systems},
pages	=	{23--32},
year	=	{1999},
month	=	{May},
pubaddress	=	"New York, NY",
publisher	=	{ACM Press},
area	=	"Par:I:M",
areaseq	=	"0",

}

@InProceedings{gkks99:perf-bounds,

author	=	{W. D. Gropp and D. K. Kaushik and D. E. Keyes and B. F. Smith},
title	=	{Towards Realistic Performance Bounds for Implicit {CFD} Codes},
booktitle	=	{Proceedings of Parallel CFD'99},
pages	=	{241--248},
year	=	1999,
area	=	"Par:Perf",
areaseq	=	"0",

}

@InProceedings{pvmmpi99-mpptest,

author	=	{William D. Gropp and Ewing Lusk},
title	=	{Reproducible Measurements of {MPI} Performance Characteristics},
booktitle	=	{Recent Advances in {P}arallel {V}irtual {M}achine and {M}essage {P}assing {I}nterface},
editor	=	{Jack Dongarra and Emilio Luque and Tom\`as Margalef},
volume	=	1697,
series	=	{Lecture Notes in Computer Science},
year	=	1999,
publisher	=	{Springer Verlag},
pages	=	{11--18},
note	=	{6th European PVM/MPI Users' Group Meeting, Barcelona, Spain, September 1999},
area	=	"Par:M",
areaseq	=	"0",

}

@InProceedings{pvmmpi99-totalview,

author	=	{James Cownie and William Gropp},
title	=	{A Standard Interface for Debugger Access to Message Queue Information in {MPI}},
booktitle	=	{Recent Advances in {P}arallel {V}irtual {M}achine and {M}essage {P}assing {I}nterface},
editor	=	{Jack Dongarra and Emilio Luque and Tom\`as Margalef},
volume	=	1697,
series	=	{Lecture Notes in Computer Science},
year	=	1999,
publisher	=	{Springer Verlag},
pages	=	{51--58},
note	=	{6th European PVM/MPI Users' Group Meeting, Barcelona, Spain, September 1999},
area	=	"Par:M",
areaseq	=	"0",

}

@InProceedings{agkks-sc99-fun3d,

author	=	{W. K. Anderson and William D. Gropp and D. K. Kaushik and D. E. Keyes and B. F. Smith},
title	=	{Achieving High Sustained Performance in an Unstructured Mesh {CFD} Application},
booktitle	=	{Proceedings of the ACM/IEEE SC99 Conference on High Performance Networking and Computing},
confdate	=	{Nov 13--19, 1999},
year	=	1999,
publisher	=	{IEEE Computer Society},
note	=	{CDROM. Also at \url{http://portal.acm.org} and ICASE Report No. 2000-2},
area	=	"Par:Perf:P",
areaseq	=	0,

}

@InProceedings{frei99:num-soft,

author	=	{Lori A. Freitag and William Gropp and Paul D. Hovland and Lois C. McInnes and Barry F. Smith},
title	=	{Infrastructure and Interfaces for Large-Scale Numerical Software},
booktitle	=	{Proceedings of PDPTA 1999},
pages	=	{2657--2664},
year	=	1999,
area	=	"NS",

}

%2000

@InProceedings{kdSFGLB00:mpi-ngi,

author	=	{Nicholas T. Karonis and Bronis R. de Supinski and Ian Foster and William Gropp and Ewing Lusk and John Bresnahan},
title	=	{Exploiting Hierarchy in Parallel Computer Networks to Optimize Collective Operation Performance},
booktitle	=	{Fourteenth International Parallel and Distributed Processing Symposium},
pages	=	{377--384},
year	=	2000,
month	=	MAY,
annote	=	{Cancun, Mexico},
area	=	"Par:M:Coll",
areaseq	=	"0",

}

@InProceedings{bgl00:mpd-short,

author	=	{R. Butler and W. Gropp and E. Lusk},
title	=	{A Scalable Process-Management Environment for Parallel Programs},
booktitle	=	{Recent Advances in Parallel Virutal Machine and Message Passing Interface},
pages	=	{168--175},
year	=	2000,
editor	=	{Jack Dongarra and Peter Kacsuk and Norbert Podhorszki},
number	=	1908,
series	=	{Springer Lecture Notes in Computer Science},
month	=	SEP,
area	=	"Par:M:PMI",
areaseq	=	"0",

}

@InProceedings{gro:mpi-datatypes:pvmmpi00,

author	=	{William D. Gropp},
title	=	{Runtime Checking of Datatype Signatures in {MPI}},
booktitle	=	{Recent Advances in Parallel Virutal Machine and Message Passing Interface},
pages	=	{160--167},
year	=	2000,
editor	=	{Jack Dongarra and Peter Kacsuk and Norbert Podhorszki},
number	=	1908,
series	=	{Springer Lecture Notes in Computer Science},
month	=	SEP,
note	=	{7th European PVM/MPI Users' Group Meeting},
area	=	"Par:M:Datatype",
areaseq	=	"0",

}

@InProceedings{GKSK00,

author	=	{W. D. Gropp and D. K. Kaushik and B. F. Smith and D. E. Keyes},
title	=	{Analyzing the Parallel Scalablity of an Implicit Unstructured Mesh {CFD} Code},
booktitle	=	{High Performance Computing -- HiPC2000},
pages	=	{395--404},
year	=	2000,
editor	=	{Mateo Valero and Viktor K. Prasanna and Sriram Vajapeyam},
number	=	1970,
series	=	{Lecture Notes in Computer Science},
publisher	=	{Springer Verlag},
annote	=	{7th International Conference, Bangalore, India},
area	=	"Par:P:Perf",
areaseq	=	"0",

}

@InProceedings{grop00:petsc-lessons,

author	=	{William Gropp},
title	=	{Solving {CFD} Problems with Open Source Parallel Libraries},
booktitle	=	{Applied Parallel Computing: New Paradigms for HPC in Industry and Academia},
pages	=	52,
year	=	2000,
editor	=	{Tor Sorevik and Fredrik Manne and Randi Moe and Assefaw Hadish Gebremedhin},
number	=	1947,
series	=	{Lecture Notes in Computer Science},
publisher	=	{Springer Verlag},
note	=	{(Abstract)},
annote	=	{Proceedings of PARA2000},
area	=	"NS:Par",
areaseq	=	"0",

}

@InProceedings{SC00-CD-ROM*50,

author	=	"C. Eric Wu and Anthony Bolmarcich and Marc Snir and David Wootton and Farid Parpia and Anthony Chan and Ewing L. Lusk and William Gropp",
title	=	"From Trace Generation to Visualization: {A} Performance Framework for Distributed Parallel Systems",
abstract	=	{In this paper we describe a trace analysis framework, from trace generation to visualization. It includes a unified tracing facility on IBM SP systems, a self-defining interval file format, an API for framework extensions, utilities for merging and statistics generation, and a visualization tool with preview and multiple time-space diagrams. The trace environment is extremely scalable, and combines MPI events with system activities in the same set of trace files, one for each SMP node. Since the amount of trace data may be very large, utilities are developed to convert and merge individual trace files into a self-defining interval trace file with multiple frame directories. The interval format allows the development of multiple time-space diagrams, such as thread-activity view, processor-activity view, etc., from the same interval file. A visualization tool, Jumpshot, is modified to visualize these views. A statistics utility is developed using the API, along with its graphics viewer.},
booktitle	=	{Proceedings of SC2000},
year	=	2000,
area	=	"Par:PV",
areaseq	=	"0",

}

@InProceedings{bush00:petsc,

author	=	{Kristopher R. Buschelman and William Gropp and Lois C. McInnes and Barry F. Smith},
title	=	{{PETSc} and {Overture}: {L}essons Learned Developing an Interface between Components},
booktitle	=	{The Architecture of Scientific Software 2000},
pages	=	{57--68},
year	=	2000,
area	=	"NS:P",

}

@InProceedings{rfgkst00:mpichg-qos-sc,

author	=	{Alain Roy and Ian Foster and William Gropp and Nicholas Karonis and Volker Sander and Brian Toonen},
title	=	{{MPICH-GQ}: Quality of Service for Message Passing Programs},
year	=	2000,
booktitle	=	{Proceedings of SC2000},
area	=	"Par:M",
areaseq	=	"0",

}

@InProceedings{gropp00performance,

author	=	"William D. Gropp and Dinesh K. Kaushik and David E. Keyes and Barry F. Smith",
title	=	"Performance Modeling and Tuning of an Unstructured Mesh {CFD} Application",
booktitle	=	{Proceedings of SC2000},
year	=	"2000",
url	=	"citeseer.ist.psu.edu/gropp00performance.html",
area	=	"Par:Perf",

}

@InProceedings{vin01:mpi-impl,

author	=	{Rajkumar Vinkat and Philip M. Dickens and William Gropp},
title	=	{Efficient Communication Across the {I}nternet in Wide-Area {MPI}},
booktitle	=	{Proceedings of Parallel and Distributed Processing Techniques and Applications},
year	=	{2001},
area	=	"Par:M",
areaseq	=	"0",

}

@InProceedings{toas01:bnr-design,

author	=	{Brian Toonen and David Ashton and Ewing Lusk and Ian Foster and William Gropp and Edgar Gabriel and Ralph Butler and Nicholas Karonis},
title	=	{Interfacing Parallel Jobs to Process Managers},
booktitle	=	{Proceedings of the 10th IEEE International Symposium on High Performance Distributed Computing},
pages	=	{431--432},
year	=	2001,
month	=	AUG,
publisher	=	{IEEE Computer Society Press},
area	=	"Par:M:PMI",
areaseq	=	"0",

}

@InProceedings{wagg01:linux-petsc,

author	=	{Eric Webb and Jay Alameda and William Gropp and Joshua Gray and Richard Alkire},
title	=	{Performance of Tightly Coupled {L}inux Cluster Simulation using {PETSc} of Reaction and Transport Processes During Corrosion Pit Initiation},
booktitle	=	{Proceedings of {Linux} Clusters: the {HPC} Revolution},
year	=	2001,
note	=	{Urbana, IL},
area	=	"App;P:Par",
areaseq	=	"0",

}

@InProceedings{DBLP:conf/cluster/Gropp01,

author	=	{William Gropp},
title	=	{Advanced Cluster Programming with {MPI}},
booktitle	=	{2001 IEEE International Conference on Cluster Computing (CLUSTER 2001), 8-11 October 2001, Newport Beach, CA, USA},
publisher	=	{IEEE Computer Society},
year	=	{2001},
isbn	=	{0-7695-1116-3},
pages	=	{453},
ee	=	{http://csdl.computer.org/comp/proceedings/cluster/2001/1116/00/11160453.pdf},
bibsource	=	{DBLP, http://dblp.uni-trier.de},
annote	=	"Tutorial",
area	=	"Par:M",

}

@InProceedings{ong-lusk-gropp:SUT,

author	=	{Emil Ong and Ewing Lusk and William Gropp},
title	=	{Scalable {U}nix Commands for Parallel Processors: A High-Performance Implementation},
booktitle	=	{Recent Advances in {P}arallel {V}irtual {M}achine and {M}essage {P}assing {I}nterface},
pages	=	{410--418},
year	=	2001,
editor	=	{Y. Cotronis and J. Dongarra},
volume	=	2131,
series	=	{{Lecture Notes in Computer Science}},
month	=	SEP,
publisher	=	{{Springer-Verlag}},
note	=	{8th European PVM/MPI Users' Group Meeting},
area	=	"Par:M",
areaseq	=	"0",

}

@InProceedings{DBLP:conf/pvm/Gropp01,

author	=	{William Gropp},
title	=	{Challenges and Successes in Achieving the Potential of {MPI}},
booktitle	=	{Recent Advances in {P}arallel {V}irtual {M}achine and {M}essage {P}assing {I}nterface},
year	=	{2001},
pages	=	{7},
ee	=	{http://link.springer.de/link/service/series/0558/bibs/2131/21310007.htm},
editor	=	{Y. Cotronis and J. Dongarra},
volume	=	2131,
series	=	{{Lecture Notes in Computer Science}},
month	=	SEP,
bibsource	=	{DBLP, http://dblp.uni-trier.de},
area	=	"Par:M",

}

@InProceedings{gkks:cfd-perf-proc,

author	=	{W. D. Gropp and D. K. Kaushik and D. E. Keyes and B. F. Smith},
title	=	{Latency, Bandwidth, and Concurrent Issue Limitations in High-Performance {CFD}},
booktitle	=	"Proceedings of the First {MIT} Conference on Computational Fluid and Solid Mechanics",
month	=	JUN,
location	=	"Cambridge, MA",
url	=	{http://www.mcs.anl.gov/petsc-fun3d/Papers/mit01.pdf},
year	=	2001,
area	=	"Par:Perf",
areaseq	=	"0",

}

@Misc{bus01:petsc-perf,

author	=	{Kristopher R. Buschelman and William Gropp and Barry F. Smith},
title	=	{Single Precision Incomplete {LU} Factorization for Incompressible Fluid Flow Applications on {P}entium {III} Processors in {PETSc}},
month	=	APR,
year	=	2001,
note	=	{Abstract for poster presented at the 2001 International Conference On Preconditioning Techniques For Large Sparse Matrix Problems In Industrial Applications},
area	=	"A:P",
areaseq	=	"0",

}

@InProceedings{gr01:mpi-lessons,

author	=	{William D. Gropp},
title	=	{Learning from the Success of {MPI}},
booktitle	=	{High Performance Computing -- HiPC 2001},
pages	=	{81--92},
year	=	2001,
editor	=	{Burkhard Monien and Viktor K. Prasanna and Sriram Vajapeyam},
number	=	2228,
series	=	{{Lecture Notes in Computer Science}},
month	=	DEC,
publisher	=	{Springer},
note	=	{8th International Conference},
annote	=	{Invited keynote presentation},
area	=	"Par:M",
areaseq	=	"0",

}

%% 2002
@InProceedings{dg02:wan-ftp,

author	=	{Philip M. Dickens and William D. Gropp},
title	=	{An Evaluation of a User-Level Data Transfer Mechanism for High-Performance Networks},
booktitle	=	{Proceedings of 11th IEEE International Symposium on High Performance Distributed Computing (HPDC'02)},
pages	=	{255--264},
year	=	2002,
doi	=	{10.1109/HPDC.2002.1029925},
area	=	"Par",
areaseq	=	0,

}

@InProceedings{dgw02:wan-ftp,

author	=	{Philip M. Dickens and William Gropp and Paul R. Woodward},
title	=	{High Performance Wide Area Data Transfers over High Performance Networks},
booktitle	=	{Proceedings of IPDPS 2002},
year	=	2002,
doi	=	{10.1109/IPDPS.2002.1016675},
area	=	"Par",

}

@InProceedings{grop02:mpi-pvm,

author	=	{William D. Gropp and Ewing Lusk},
title	=	{Goals Guiding Design: {PVM} and {MPI}},
booktitle	=	{Proceedings of IEEE Cluster},
pages	=	{257--265},
year	=	2002,
editor	=	{William Gropp and Rob Pennington and Dan Reed and Mark Baker and Maxine Brown and Rajkumar Buyya},
publisher	=	{IEEE Computer Society},
area	=	"M:Par",
areaseq	=	"0",

}

@InProceedings{baik02:cluster-middleware,

author	=	{Seongbok Baik and Cynthia S. Hood and William D. Gropp},
title	=	{Prototype of {AM3}: Active Mapper and Monitoring Module for {M}yrinet Environment},
booktitle	=	{HSLN (High-Speed Local Networks) workshop},
pages	=	{703--707},
year	=	2002,
annote	=	{Workshop held in conjunction with IEEE LCN (Local Computer Networks)},
area	=	"Par",
areaseq	=	0,

}

@InProceedings{ching-io-02,

author	=	{A. Ching and A. Choudhary and W.-K. Liao and R. Ross and W. Gropp},
title	=	{Noncontiguous {I/O} through {PVFS}},
booktitle	=	{Proceedings of IEEE Cluster},
editor	=	{William Gropp and Rob Pennington and Dan Reed and Mark Baker and Maxine Brown and Rajkumar Buyya},
publisher	=	{IEEE Computer Society},
area	=	"I:Par",
pages	=	{405--414},
year	=	2002

}

@InProceedings{grop02:mpi-impl:generic,

author	=	{William D. Gropp},
title	=	{Building Library Components that Can Use Any {MPI} Implementation},
booktitle	=	{Recent Advances in Parallel Virtual Machine and Message Passing Interface},
pages	=	{280--287},
year	=	2002,
editor	=	{Dieter Kranzlm\"uller and Peter Kacsuk and Jack Dongarra and Jens Volkert},
number	=	{LNCS2474},
series	=	{Lecture Notes in Computer Science},
publisher	=	{Springer Verlag},
note	=	{9th European PVM/MPI Users' Group Meeting, Linz, Austria},
area	=	"S:Par:M",
areaseq	=	"0",

}

@InProceedings{DBLP:conf/pvm/GroppL02,

author	=	{William Gropp and Ewing L. Lusk},
title	=	{{MPI} on the Grid},
booktitle	=	{Recent Advances in Parallel Virtual Machine and Message Passing Interface},
year	=	{2002},
pages	=	{12},
ee	=	{http://link.springer.de/link/service/series/0558/bibs/2474/24740012.htm},
editor	=	{Dieter Kranzlm\"uller and Peter Kacsuk and Jack Dongarra and Jens Volkert},
number	=	{LNCS2474},
series	=	{Lecture Notes in Computer Science},
publisher	=	{Springer Verlag},
bibsource	=	{DBLP, http://dblp.uni-trier.de},
area	=	"Par:M",

}

@InProceedings{DBLP:conf/pvm/Gropp02,

author	=	{William Gropp},
title	=	{{MPICH2}: A New Start for {MPI} Implementations},
booktitle	=	{Recent Advances in Parallel Virtual Machine and Message Passing Interface},
year	=	{2002},
pages	=	{7},
ee	=	{http://link.springer.de/link/service/series/0558/bibs/2474/24740007.htm},
editor	=	{Dieter Kranzlm\"uller and Peter Kacsuk and Jack Dongarra and Jens Volkert},
number	=	{LNCS2474},
series	=	{Lecture Notes in Computer Science},
publisher	=	{Springer Verlag},
bibsource	=	{DBLP, http://dblp.uni-trier.de},
area	=	"Par:M",

}

% 2003
@InProceedings{EVA03.soft,

author	=	{Jeffrey J. Evans and Seongbok Baik and Cynthia S. Hood and William Gropp},
title	=	{Toward Understanding Soft Faults in High Performance Cluster Networks},
booktitle	=	{Proceedings of the 8th IFIP/IEEE International Symposium on Integrated Network Management},
pages	=	{117--120},
year	=	2003,
month	=	{March},
area	=	"Par",

}

@InProceedings{qcdoc03:trends,

author	=	{William Gropp},
title	=	{Trends in High Performance Computing},
booktitle	=	{High Performance Computing with {QCDOC} and {BlueGene}},
pages	=	{91--97},
year	=	2003,
volume	=	50,
month	=	{February},
organization	=	{RIKEN BNL Research Center},
note	=	{Abstract and six major slides from the presentation},
annote	=	{Report number BNL-71147-2003},
Organizers	=	{N. Christ, J. Davenport, Y. Deng, A. Gara, J. Glimm, R. Mawhinney, E. McFadden, A. Peskin, and W. Pulleyblank},
area	=	"Par",

}

@InProceedings{chin03a:mpi-io,

author	=	{A. Ching and A. Choudhary and K. Coloma and W.-K. Liao and R. Ross and W. Gropp},
title	=	{Noncontiguous {I/O} Accesses Through {MPI-IO}},
booktitle	=	{Proceedings of the 3rd IEEE/ACM International Symposium on Cluster Computing and the Grid (CCGrid2003)},
year	=	2003,
month	=	MAY,
pages	=	{104--111},
annote	=	{Meeting held in Tokyo},
area	=	"Par:I:M",

}

@InProceedings{gro03:mpitrends,

author	=	{William Gropp},
title	=	{Future Developments in {MPI}},
booktitle	=	{Recent Advances in Parallel Virtual Machine and Message Passing Interface},
pages	=	{15--15},
year	=	2003,
editor	=	{Jack Dongarra and Domenico Laforenza and Salvatore Orlando},
number	=	{LNCS2840},
series	=	{Lecture Notes in Computer Science},
publisher	=	{Springer Verlag},
note	=	{10th European PVM/MPI User's Group Meeting, Venice, Italy},
area	=	"Par:M",
areaseq	=	0,

}

@InProceedings{tha03:mpicollective,

author	=	{Rajeev Thakur and William Gropp},
title	=	{Improving the Performance of Collective Operations in {MPICH}},
booktitle	=	{Recent Advances in Parallel Virtual Machine and Message Passing Interface},
pages	=	{257--267},
year	=	2003,
editor	=	{Jack Dongarra and Domenico Laforenza and Salvatore Orlando},
number	=	{LNCS2840},
series	=	{Lecture Notes in Computer Science},
publisher	=	{Springer Verlag},
note	=	{10th European PVM/MPI User's Group Meeting, Venice, Italy},
area	=	"Par:M:Coll",
areaseq	=	0,

}

@InProceedings{alm03:mpibgl,

author	=	{G. Alm\'asi and C. Archer and J. G. Casta\~nos and M. Gupta and X. Martorell and J. E. Moreira and W. D. Gropp and S. Rus and B. Toonen},
title	=	{{MPI} on {BlueGene/L}: Designing an Efficient General Purpose Messaging Solution for a Large Cellular System},
booktitle	=	{Recent Advances in Parallel Virtual Machine and Message Passing Interface},
pages	=	{352--361},
year	=	2003,
editor	=	{Jack Dongarra and Domenico Laforenza and Salvatore Orlando},
number	=	{LNCS2840},
series	=	{Lecture Notes in Computer Science},
publisher	=	{Springer Verlag},
note	=	{10th European PVM/MPI User's Group Meeting, Venice, Italy},
area	=	"Par:M",
areaseq	=	0,

}

@InProceedings{ros03:mpidatatype,

author	=	{R. Ross and N. Miller and W. D. Gropp},
title	=	{Implementing Fast and Reusable Datatype Processing},
booktitle	=	{Recent Advances in Parallel Virtual Machine and Message Passing Interface},
pages	=	{404--413},
year	=	2003,
editor	=	{Jack Dongarra and Domenico Laforenza and Salvatore Orlando},
number	=	{LNCS2840},
series	=	{Lecture Notes in Computer Science},
publisher	=	{Springer Verlag},
note	=	{10th European PVM/MPI User's Group Meeting, Venice, Italy},
area	=	"Par:M:Datatype",
areaseq	=	0,

}

@InProceedings{DBLP:conf/pvm/GroppL03,

author	=	{William Gropp and Ewing L. Lusk},
title	=	{High-Level Programming in {MPI}},
booktitle	=	{Recent Advances in Parallel Virtual Machine and Message Passing Interface},
year	=	{2003},
pages	=	{27},
ee	=	{http://springerlink.metapress.com/openurl.asp?genre=article{\&}issn=0302-9743{\&}volume=2840{\&}spage=27},
editor	=	{Jack Dongarra and Domenico Laforenza and Salvatore Orlando},
number	=	{LNCS2840},
series	=	{Lecture Notes in Computer Science},
publisher	=	{Springer Verlag},
bibsource	=	{DBLP, http://dblp.uni-trier.de},
area	=	"Par:M:S",

}

@InProceedings{evans03:network,

author	=	{Jeffrey Evans and Cynthia Hood and William Gropp},
title	=	{Exploring the Relationship Between Parallel Application Run-Time Variability and Network Performance},
booktitle	=	{Workshop on High-Speed Local Networks (HSLN), IEEE Conference on Local Computer Networks (LCN)},
pages	=	{538--547},
year	=	2003,
month	=	OCT,
area	=	"Par:App",

}

@InProceedings{li03:pnetcdf,

author	=	{J. Li and W. Liao and A. Choudhary and R. Ross and R. Thakur and W. Gropp and R. Latham and A. Siegel and B. Gallagher and M. Zingale},
title	=	{Parallel {netCDF}: A High-Performance Scientific {I/O} Interface},
booktitle	=	{Proceedings of SC2003},
year	=	2003,
month	=	NOV,
annote	=	{Held in Phoeniz, Arizona},
area	=	"Par:I",

}

@InProceedings{ching-io-03,

author	=	{A. Ching and A. Choudhary and W.-K. Liao and R. Ross and W. Gropp},
title	=	{Efficient Structured Data Access in Parallel File Systems},
booktitle	=	{Proceedings of IEEE Cluster},
year	=	2003,
month	=	NOV,
publisher	=	{IEEE Computer Society},
annote	=	{Meeting held in Hong Kong},
area	=	"I:Par",

}

%% 2004

@InProceedings{alma04:mpi-impl:bgl,

author	=	{George Almasi and Charles Archer and Jose G. Castanos and C. Chris Erway and Philip Heidelberger and Xavier Martorell and Jose E. Moreira and Kurt Pinnow and Joe Ratterman and Nils Smeds and Burkhard Steinmacher-Burow and William Gropp and Brian Toonen},
title	=	{Implementing {MPI} on the {BlueGene/L} Supercomputer},
booktitle	=	{Proceedings of EuroPar2004},
pages	=	{833--845},
year	=	2004,
note	=	{Selected as distinquished paper},
area	=	"Par:M",

}

@InProceedings{gro04:mpi-pgming,

author	=	{William Gropp},
title	=	{{MPI} and High Productivity Programming},
booktitle	=	{Recent Advances in Parallel Virtual Machine and Message Passing Interface},
pages	=	{7},
year	=	2004,
editor	=	{Dieter Kranzlm\"uller and Peter Kacsuk and Jack Dongarra},
number	=	{LNCS3241},
series	=	{Lecture Notes in Computer Science},
publisher	=	{Springer Verlag},
note	=	{11th European PVM/MPI User's Group Meeting, Budapest, Hungary},
area	=	"Par:S:M",
areaseq	=	0,

}

@InProceedings{tha04:mpi-impl,

author	=	{Rajeev Thakur and William Gropp and Brian Toonen},
title	=	{Minimizing Synchronization Overhead in the Implementation of {MPI} One-Sided Communication},
booktitle	=	{Recent Advances in Parallel Virtual Machine and Message Passing Interface},
pages	=	{57--67},
year	=	2004,
editor	=	{Dieter Kranzlm\"uller and Peter Kacsuk and Jack Dongarra},
number	=	{LNCS3241},
series	=	{Lecture Notes in Computer Science},
publisher	=	{Springer Verlag},
note	=	{11th European PVM/MPI User's Group Meeting, Budapest, Hungary},
area	=	"Par:M:RMA",
areaseq	=	0,

}

@InProceedings{jia04:mpi-impl,

author	=	{W. Jiang and J. Liu and H.-W. Jin and D. K. Panda and D. Buntinas and Rajeev Thakur and William Gropp},
title	=	{Efficient Implementation of {MPI-2} Passive One-Sided Communication on {InfiniBand} Clusters},
booktitle	=	{Recent Advances in Parallel Virtual Machine and Message Passing Interface},
pages	=	{68--76},
year	=	2004,
editor	=	{Dieter Kranzlm\"uller and Peter Kacsuk and Jack Dongarra},
number	=	{LNCS3241},
series	=	{Lecture Notes in Computer Science},
publisher	=	{Springer Verlag},
note	=	{11th European PVM/MPI User's Group Meeting, Budapest, Hungary},
area	=	"Par:M:RMA",
areaseq	=	0,

}

@InProceedings{liu03:mpich2-infiniband-ipdps,

author	=	{Jiuxing Liu and Weihang Jiang and Pete Wyckoff and Dhabaleswar K. Panda and David Ashton and Darius Buntinas and William Gropp and Brian Toonen},
title	=	{Design and Implementation of {MPICH2} over {I}nfiniband with {RDMA} support},
booktitle	=	{Proceedings of IPDPS 2004},
year	=	2004,
area	=	"Par:M:RMA",

}

@InProceedings{gro04a:pario,

author	=	{William Gropp and Robert Ross and Neill Miller},
title	=	{Providing Efficient {I/O} Redundancy in {MPI} Environments},
booktitle	=	{Recent Advances in Parallel Virtual Machine and Message Passing Interface},
pages	=	{77--86},
year	=	2004,
editor	=	{Dieter Kranzlm\"uller and Peter Kacsuk and Jack Dongarra},
number	=	{LNCS3241},
series	=	{Lecture Notes in Computer Science},
publisher	=	{Springer Verlag},
note	=	{11th European PVM/MPI User's Group Meeting, Budapest, Hungary},
area	=	"Par:I:M",
areaseq	=	0,

}

@InProceedings{DBLP:conf/pvm/Gropp04,

author	=	{William D. Gropp},
title	=	{{MPI} and High Productivity Programming},
pages	=	{7},
ee	=	{http://springerlink.metapress.com/openurl.asp?genre=article{\&}issn=0302-9743{\&}volume=3241{\&}spage=7},
bibsource	=	{DBLP, http://dblp.uni-trier.de},
booktitle	=	{Recent Advances in Parallel Virtual Machine and Message Passing Interface},
year	=	2004,
editor	=	{Dieter Kranzlm\"uller and Peter Kacsuk and Jack Dongarra},
number	=	{LNCS3241},
series	=	{Lecture Notes in Computer Science},
publisher	=	{Springer Verlag},
note	=	{11th European PVM/MPI User's Group Meeting, Budapest, Hungary},
area	=	"Par:M:S",

}

% 2005
@InProceedings{falz05:mpi-impl,

author	=	{Chris Falzone and Anthony Chan and Ewing Lusk and William Gropp},
title	=	{Collective Error Detection for {MPI} Collective Operations},
booktitle	=	{Recent Advances in Parallel Virtual Machine and Message Passing Interface},
pages	=	{138--147},
year	=	2005,
editor	=	{Beniamino Di Martino and Dieter Kranzlu\"uller and Jack Dongarra},
number	=	{LNCS 3666},
month	=	SEP,
series	=	{Lecture Notes in Computer Science},
publisher	=	{Springer Verlag},
note	=	{12th European PVM/MPI User's Group Meeting, Sorrento, Italy},
area	=	"Par:M:S",
areaseq	=	0

}

@InProceedings{bunt05:mpi-impl,

author	=	{Darius Buntinas and William Gropp},
title	=	{Designing a Common Communication Subsystem},
booktitle	=	{Recent Advances in Parallel Virtual Machine and Message Passing Interface},
pages	=	{156--166},
year	=	2005,
editor	=	{Beniamino Di Martino and Dieter Kranzlu\"uller and Jack Dongarra},
number	=	{LNCS 3666},
month	=	SEP,
series	=	{Lecture Notes in Computer Science},
publisher	=	{Springer Verlag},
note	=	{12th European PVM/MPI User's Group Meeting, Sorrento, Italy},
area	=	"Par:M",
areaseq	=	0

}

@InProceedings{gro05:mpi-rma-impl,

author	=	{William Gropp and Rajeev Thakur},
title	=	{An Evaluation of Implementation Options for {MPI} One-Sided Communication},
booktitle	=	{Recent Advances in Parallel Virtual Machine and Message Passing Interface},
pages	=	{415--424},
year	=	2005,
editor	=	{Beniamino Di Martino and Dieter Kranzlu\"uller and Jack Dongarra},
number	=	{LNCS 3666},
month	=	SEP,
series	=	{Lecture Notes in Computer Science},
publisher	=	{Springer Verlag},
note	=	{12th European PVM/MPI User's Group Meeting, Sorrento, Italy},
area	=	"Par:M:RMA",
areaseq	=	0

}

@InProceedings{grop05:progmodels,

author	=	{William Gropp},
title	=	{Towards a Productive {MPI} Environment (abstract)},
booktitle	=	{Recent Advances in Parallel Virtual Machine and Message Passing Interface},
pages	=	4,
year	=	2005,
editor	=	{Beniamino Di Martino and Dieter Kranzlu\"uller and Jack Dongarra},
number	=	{LNCS 3666},
month	=	SEP,
series	=	{Lecture Notes in Computer Science},
publisher	=	{Springer Verlag},
note	=	{12th European PVM/MPI User's Group Meeting, Sorrento, Italy},
area	=	"Par:M:S",
areaseq	=	0

}

@InProceedings{1598125,

author	=	{Yu, H. and Sahoo, R.K. and Howson, C. and Almasi, G. and Castanos, J.G. and Gupta, M. and Moreira, J.E. and Parker, J.J. and Engelsiepen, T.E. and Ross, R.B. and Thakur, R. and Latham, R. and Gropp, W.D.},
booktitle	=	{High-Performance Computer Architecture, 2006. The Twelfth International Symposium on},
title	=	{High performance file {I/O} for the {Blue Gene/L} supercomputer},
year	=	{2006},
month	=	{feb.},
volume	=	{},
number	=	{},
pages	=	{187--196},
keywords	=	{ Blue Gene/L supercomputer; General Parallel File System; MPI; data-intensive application; functional partitioning design; hierarchical partitioning; high performance file I/O; parallel HDF5; parallel I/O benchmark; parallel NetCDF; parallel file I/O architecture; application program interfaces; benchmark testing; file organisation; message passing; parallel architectures; parallel machines;},
doi	=	{10.1109/HPCA.2006.1598125},
ISSN	=	{1530-0897},} % 2006

@InProceedings{PPoPP2006,

title	=	{Collective Communication on Architectures that Support Simultaneous Communication over Multiple Links},
author	=	{Ernie Chan and William Gropp and Rajeev Thakur and Robert van de Geijn},
booktitle	=	{Proceedings of the 2006 ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming},
address	=	{New York, New York, USA},
publisher	=	{ACM},
pages	=	{2--11},
month	=	{March 29-31},
year	=	{2006},
area	=	"Par:M:Coll",

}

@InProceedings{buntinas06:nemesis,

author	=	{Darius Buntinas and Guillaume Mercier and William Gropp},
title	=	{Design and Evaluation of {N}emesis, a scalable, low-latency, message-passing communication subsystem},
booktitle	=	{Proceedings of the 6th IEEE International Symposium on Cluster Computing and the Grid (CCGrid2006)},
pages	=	{521--530},
year	=	2006,
editor	=	{Stephen John Turner and Bu Sung Lee and Wentong Cai},
month	=	MAY,
area	=	"Par:M",

}

@InProceedings{grop06:mpi:threads,

author	=	{William D. Gropp and Rajeev Thakur},
title	=	{Issues in Developing a Thread-Safe {MPI} Implementation},
pages	=	{12--21},
booktitle	=	{Recent Advances in {P}arallel {V}irtual {M}achine and {M}essage {P}assing {I}nterface},
year	=	2006,
editor	=	{Bernd Mohr and Jesper Larsson Tr\"aff and Joachim Worringen and Jack Dongarra},
number	=	{LNCS 4192},
series	=	{Springer Lecture Notes in Computer Science},
month	=	SEP,
publisher	=	{Springer},
area	=	"Par:M:Threads",
note	=	{Outstanding Paper Award (1 of 3)},

}

@InProceedings{pervez06:formal:mpi,

author	=	{Salman Pervez and Ganesh Gopalakrishnan and Robert M. Kirby and Rajeev Thakur and William D. Gropp},
title	=	{Formal Verification of Programs that Use {MPI} One-Sided Communication},
booktitle	=	{Recent Advances in {P}arallel {V}irtual {M}achine and {M}essage {P}assing {I}nterface},
pages	=	{30--39},
year	=	2006,
editor	=	{Bernd Mohr and Jesper Larsson Tr\"aff and Joachim Worringen and Jack Dongarra},
number	=	{LNCS 4192},
series	=	{Springer Lecture Notes in Computer Science},
month	=	SEP,
publisher	=	{Springer},
area	=	"Par:M:formal",
note	=	{Outstanding Paper Award (1 of 3)},

}

@InProceedings{buntinas06:nemesis:shm,

author	=	{Darius Buntinas and Guillaume Mercier and William D. Gropp},
title	=	{Implementation and Shared-Memory Evaluation of {MPICH2} over the {N}emesis Communication Subsystem},
booktitle	=	{Recent Advances in {P}arallel {V}irtual {M}achine and {M}essage {P}assing {I}nterface},
pages	=	{86--95},
year	=	2006,
editor	=	{Bernd Mohr and Jesper Larsson Tr\"aff and Joachim Worringen and Jack Dongarra},
number	=	{LNCS 4192},
series	=	{Springer Lecture Notes in Computer Science},
month	=	SEP,
publisher	=	{Springer},
area	=	"Par:M",

}

@InProceedings{gottbrath06:mpi:debugging,

author	=	{Christopher Gottbrath and Brian Barrett and William D. Gropp and Ewing "Rusty" Lusk and Jeff Squyres},
title	=	{An Interface to Support the Identification of Dynamic {MPI} 2 Processes for Scalable Parallel Debugging},
booktitle	=	{Recent Advances in {P}arallel {V}irtual {M}achine and {M}essage {P}assing {I}nterface},
pages	=	{115--122},
year	=	2006,
editor	=	{Bernd Mohr and Jesper Larsson Tr\"aff and Joachim Worringen and Jack Dongarra},
number	=	{LNCS 4192},
series	=	{Springer Lecture Notes in Computer Science},
month	=	SEP,
publisher	=	{Springer},
area	=	"Par:M",

}

@InProceedings{byna06:mpi:datatypes,

author	=	{Surendra Byna and Xian-He Sun and Rajeev Thakur and William D. Gropp},
title	=	{Automatic Memory Optimization for Improving {MPI} Derived Datatype Performance},
booktitle	=	{Recent Advances in {P}arallel {V}irtual {M}achine and {M}essage {P}assing {I}nterface},
pages	=	{238--246},
year	=	2006,
editor	=	{Bernd Mohr and Jesper Larsson Tr\"aff and Joachim Worringen and Jack Dongarra},
number	=	{LNCS 4192},
series	=	{Springer Lecture Notes in Computer Science},
month	=	SEP,
publisher	=	{Springer},
area	=	"Par:M:Datatype",

}

@InProceedings{gropp06:radtransport,

AUTHOR	=	"William D. Gropp and Dinesh K. Kaushik and David E. Keyes and Barry F. Smith",
TITLE	=	"Parallel Implicit Solution of Diffusion-limited Radiation Transport",
BOOKTITLE	=	"Domain Decomposition Methods in Science and Engineering XVI",
SERIES	=	"Lecture Notes in Computational Science and Engineering",
EDITOR	=	"Olof B. Widlund and David E. Keyes",
VOLUME	=	55,
PUBLISHER	=	"Springer-Verlag",
PAGES	=	"579--586",
YEAR	=	2006,
area	=	"Par:A:NS:P",

}

%% 2007

@InProceedings{Grop07Grid,

author	=	{William Gropp and Eldad Haber and Stefen Heldmann and David Keyes and Neill Miller and Jennifer Schopf and Tianzhi Yang},
title	=	{Grid-based Image Registration},
booktitle	=	{Grid-Based Problem Solving Environments},
pages	=	{435--448},
year	=	2007,
editor	=	{Patrick W. Gaffney and James C. T. Pool},
publisher	=	{Springer},
note	=	{IFIP International Federation for Information Processing, Volume 239},
area	=	"Par:NS",

}

@InProceedings{Grop07GridSummary,

author	=	{William Gropp},
title	=	{Observations on {WoCo9}},
booktitle	=	{Grid-Based Problem Solving Environments},
pages	=	{451--453},
year	=	2007,
editor	=	{Patrick W. Gaffney and James C. T. Pool},
publisher	=	{Springer},
note	=	{IFIP International Federation for Information Processing, Volume 239},
area	=	"Par:NS",

}

@InProceedings{DBLP:conf/pvm/TraffGT07,

author	=	{Jesper Larsson Tr\"aff and William Gropp and Rajeev Thakur},
title	=	{Self-consistent {MPI} Performance Requirements},
booktitle	=	{PVM/MPI},
year	=	{2007},
pages	=	{36--45},
ee	=	{http://dx.doi.org/10.1007/978-3-540-75416-9_12},
crossref	=	{DBLP:conf/pvm/2007},
bibsource	=	{DBLP, http://dblp.uni-trier.de},
area	=	"Par:M:Perf",
note	=	{Outstanding paper (1 of 4)},

}

@InProceedings{DBLP:conf/pvm/ThakurG07,

author	=	{Rajeev Thakur and William Gropp},
title	=	{Test Suite for Evaluating Performance of {MPI} Implementations That Support {MPI\_THREAD\_MULTIPLE}},
year	=	{2007},
pages	=	{46--55},
ee	=	{http://dx.doi.org/10.1007/978-3-540-75416-9_13},
crossref	=	{DBLP:conf/pvm/2007},
bibsource	=	{DBLP, http://dblp.uni-trier.de},
area	=	"Par:M:Thread",
note	=	{Outstanding paper (1 of 4)},

}

@InProceedings{DBLP:conf/pvm/LathamGRT07,

author	=	{Robert Latham and William Gropp and Robert Ross and Rajeev Thakur},
title	=	{Extending the {MPI-2} Generalized Request Interface},
year	=	{2007},
pages	=	{223--232},
ee	=	{http://dx.doi.org/10.1007/978-3-540-75416-9_33},
crossref	=	{DBLP:conf/pvm/2007},
bibsource	=	{DBLP, http://dblp.uni-trier.de},
area	=	"Par:M",

}

@InProceedings{DBLP:conf/pvm/GroppT07,

author	=	{William D. Gropp and Rajeev Thakur},
title	=	{Revealing the Performance of {MPI} {RMA} Implementations},
year	=	{2007},
pages	=	{272--280},
ee	=	{http://dx.doi.org/10.1007/978-3-540-75416-9_38},
crossref	=	{DBLP:conf/pvm/2007},
bibsource	=	{DBLP, http://dblp.uni-trier.de},
area	=	"Par:M:RMA",

}

@InProceedings{DBLP:conf/pvm/PervezGKPTG07,

author	=	{Salman Pervez and Ganesh Gopalakrishnan and Robert M. Kirby and Robert Palmer and Rajeev Thakur and William Gropp},
title	=	{Practical Model-Checking Method for Verifying Correctness of {MPI} Programs},
year	=	{2007},
pages	=	{344--353},
ee	=	{http://dx.doi.org/10.1007/978-3-540-75416-9_46},
crossref	=	{DBLP:conf/pvm/2007},
bibsource	=	{DBLP, http://dblp.uni-trier.de},
area	=	"Par:M:formal",

}

@InProceedings{conf/ipps/BalajiBBSTG07,

title	=	"Nonuniformly Communicating Noncontiguous Data: {A} Case Study with {PETS}c and {MPI}",
author	=	"Pavan Balaji and Darius Buntinas and S. Balay and B. Smith and Rajeev Thakur and William Gropp",
publisher	=	"IEEE",
year	=	"2007",
bibdate	=	"2007-10-04",
bibsource	=	"DBLP, http://dblp.uni-trier.de/db/conf/ipps/ipdps2007.html#BalajiBBSTG07",
crossref	=	"conf/ipps/2007",
pages	=	"1--10",
URL	=	"http://dx.doi.org/10.1109/IPDPS.2007.370223",
area	=	"Par:M:P",

}

@InProceedings{conf/icpp/BalajiBPTG07,

title	=	"Advanced Flow-control Mechanisms for the Sockets Direct Protocol over {I}nfiniBand",
author	=	"Pavan Balaji and S. Bhagvat and Dhabaleswar K. Panda and Rajeev Thakur and William Gropp",
publisher	=	"IEEE Computer Society",
year	=	"2007",
bibdate	=	"2007-10-04",
bibsource	=	"DBLP, http://dblp.uni-trier.de/db/conf/icpp/icpp2007.html#BalajiBPTG07",
crossref	=	"conf/icpp/2007",
pages	=	"73",
URL	=	"http://doi.ieeecomputersociety.org/10.1109/ICPP.2007.14",
area	=	"Par:M",

}

@InProceedings{conf/aPcsac/ThakurG07,

title	=	"Open Issues in {MPI} Implementation",
author	=	"Rajeev Thakur and William Gropp",
bibdate	=	"2007-08-23",
bibsource	=	"DBLP, http://dblp.uni-trier.de/db/conf/aPcsac/aPcsac2007.html#ThakurG07",
booktitle	=	"Advances in Computer Systems Architecture, 12th Asia-Pacific Conference, {ACSAC} 2007, Seoul, Korea, August 23-25, 2007, Proceedings",
publisher	=	"Springer",
year	=	"2007",
volume	=	"4697",
editor	=	"Lynn Choi and Yunheung Paek and Sangyeun Cho",
ISBN	=	"978-3-540-74308-8",
pages	=	"327--338",
series	=	"Lecture Notes in Computer Science",
URL	=	"http://dx.doi.org/10.1007/978-3-540-74309-5_31",
area	=	"Par:M",

}

@InProceedings{Grop:BGMS:07,

author	=	{William D. Gropp and Wolfgang Frings and Marc-Andr\'e Hermanns and Ed Jedlicka and Kirk E. Jordan and Fred Mintzer and Boris Orth},
title	=	{Scaling Science Applications on {Blue Gene}},
booktitle	=	{Parallel Computing: Architectures, Algorithms, and Applications},
pages	=	{583--584},
year	=	2007,
editor	=	{Christian Bischof and Martin B\"ucker and Paul Gibbon and Gerhard Joubert and Thomas Lippert and Bernd Mohr and Frans Peters},
volume	=	38,
series	=	{NIC},
publisher	=	{NIC-Directors},
note	=	{Summary of the Mini-Symposium},
area	=	"Par:App:NS",

}

%
% Note: A requirement of bibtex is that cross-referenced entries must
% appear after all entries that reference the entry
@proceedings{DBLP:conf/pvm/2007,

editor	=	{Franck Cappello and Thomas H{\'e}rault and Jack Dongarra},
title	=	{Recent Advances in Parallel Virtual Machine and Message Passing Interface, 14th European PVM/MPI User's Group Meeting, Paris, France, September 30 - October 3, 2007, Proceedings},
booktitle	=	{PVM/MPI},
publisher	=	{Springer},
series	=	{Lecture Notes in Computer Science},
volume	=	{4757},
year	=	{2007},
isbn	=	{978-3-540-75415-2},
bibsource	=	{DBLP, http://dblp.uni-trier.de}

}

@proceedings{conf/ipps/2007,

title	=	{21th International Parallel and Distributed Processing Symposium (IPDPS 2007), Proceedings, 26-30 March 2007, Long Beach, California, USA},
booktitle	=	{IPDPS},
publisher	=	{IEEE},
year	=	{2007},
bibsource	=	{DBLP, http://dblp.uni-trier.de}

}

@proceedings{conf/icpp/2007,

title	=	{2007 International Conference on Parallel Processing (ICPP 2007), September 10-14, 2007, Xi-An, China},
booktitle	=	{ICPP},
publisher	=	{IEEE Computer Society},
year	=	{2007},
bibsource	=	{DBLP, http://dblp.uni-trier.de}

}

%% 2008

@InProceedings{byna08:_paral_i_o_prefet_using,

author	=	{Suren Byna and Yong Chen and W. D. Gropp and Xian-He Sun and Rajeev Thakur},
title	=	{Parallel {I/O} Prefetching Using {MPI} File Caching and {I/O} Signatures},
booktitle	=	{Proceedings of SC08},
year	=	2008,
publisher	=	{IEEE and ACM},
area	=	"Par:M:I",
note	=	"Best Poster",

}

@InProceedings{byna08:_hidin_i_o_laten_with,

author	=	{Suren Byna and Yong Chen and W. D. Gropp and Xian-He Sun and Rajeev Thakur},
title	=	{Hiding {I/O} Latency with Pre-execution Prefetching for Parallel Applications},
booktitle	=	{Proceedings of SC08},
year	=	2008,
publisher	=	{IEEE and ACM},
note	=	{Finalist for Best Paper and Best Student Paper.},
area	=	"Par:I",

}

@InProceedings{chan08-bg-fft,

author	=	{Anthony Chan and Pavan Balaji and William Gropp and Rajeev Thakur},
title	=	{Communication Analysis of Parallel {3D FFT} for Flat {C}artesian Meshes on Large {Blue Gene} Systems},
booktitle	=	{15th IEEE International Conference on High Performance Computing},
year	=	2008,
pages	=	{422--429},
area	=	"App:Par:M:Coll",

}

@InProceedings{kaushik08-tensor,

author	=	{Dinesh Kaushik and William Gropp and Michael Minkoff and Barry Smith},
title	=	{Improving the Performance of Tensor Matrix Vector Multiplication in Cumulative Reaction Probability Based Quantum Chemistry Codes},
year	=	2008,
booktitle	=	{15th IEEE International Conference on High Performance Computing},
pages	=	{120--130},
area	=	"NS:Perf",

}

@InProceedings{DBLP:conf/pvm/Gropp08,

author	=	{William D. Gropp},
title	=	{{MPI} and Hybrid Programming Models for Petascale Computing},
booktitle	=	{PVM/MPI},
year	=	{2008},
pages	=	{6--7},
ee	=	{http://dx.doi.org/10.1007/978-3-540-87475-1_5},
crossref	=	{DBLP:conf/pvm/2008},
bibsource	=	{DBLP, http://dblp.uni-trier.de},
area	=	"Par:MPI",

}

@InProceedings{DBLP:conf/pvm/BalajiCGTL08,

author	=	{Pavan Balaji and Anthony Chan and William Gropp and Rajeev Thakur and Ewing L. Lusk},
title	=	{Non-data-communication Overheads in {MPI}: Analysis on {Blue Gene/P}},
booktitle	=	{PVM/MPI},
year	=	{2008},
pages	=	{13--22},
ee	=	{http://dx.doi.org/10.1007/978-3-540-87475-1_9},
crossref	=	{DBLP:conf/pvm/2008},
bibsource	=	{DBLP, http://dblp.uni-trier.de},
area	=	"Par:M",

}

@InProceedings{DBLP:conf/pvm/TraffRSBTG08,

author	=	{Jesper Larsson Tr\"aff and Andreas Ripke and Christian Siebert and Pavan Balaji and Rajeev Thakur and William Gropp},
title	=	{A Simple, Pipelined Algorithm for Large, Irregular All-gather Problems},
booktitle	=	{PVM/MPI},
year	=	{2008},
pages	=	{84--93},
ee	=	{http://dx.doi.org/10.1007/978-3-540-87475-1_16},
crossref	=	{DBLP:conf/pvm/2008},
bibsource	=	{DBLP, http://dblp.uni-trier.de},
area	=	"Par:M:Coll",

}

@InProceedings{DBLP:conf/pvm/BalajiBGGT08,

author	=	{Pavan Balaji and Darius Buntinas and David Goodell and William Gropp and Rajeev Thakur},
title	=	{Toward Efficient Support for Multithreaded {MPI} Communication},
booktitle	=	{PVM/MPI},
year	=	{2008},
pages	=	{120--129},
ee	=	{http://dx.doi.org/10.1007/978-3-540-87475-1_20},
crossref	=	{DBLP:conf/pvm/2008},
bibsource	=	{DBLP, http://dblp.uni-trier.de},
area	=	"Par:M:Thread",

}

@InProceedings{DBLP:conf/pvm/GroppKRTT08,

author	=	{William D. Gropp and Dries Kimpe and Robert Ross and Rajeev Thakur and Jesper Larsson Tr\"aff},
title	=	{Self-consistent {MPI-IO} Performance Requirements and Expectations},
booktitle	=	{PVM/MPI},
year	=	{2008},
pages	=	{167--176},
ee	=	{http://dx.doi.org/10.1007/978-3-540-87475-1_25},
crossref	=	{DBLP:conf/pvm/2008},
bibsource	=	{DBLP, http://dblp.uni-trier.de},
area	=	"Par:M:I:Perf",

}

@InProceedings{DBLP:conf/pvm/VakkalankaDGKTG08,

author	=	{Sarvani S. Vakkalanka and Michael Delisi and Ganesh Gopalakrishnan and Robert M. Kirby and Rajeev Thakur and William Gropp},
title	=	{Implementing Efficient Dynamic Formal Verification Methods for {MPI} Programs},
booktitle	=	{PVM/MPI},
year	=	{2008},
pages	=	{248--256},
ee	=	{http://dx.doi.org/10.1007/978-3-540-87475-1_34},
crossref	=	{DBLP:conf/pvm/2008},
bibsource	=	{DBLP, http://dblp.uni-trier.de},
area	=	"Par:M:formal",

}

@InProceedings{DBLP:conf/pvm/SharmaVGKTG08,

author	=	{Subodh Sharma and Sarvani S. Vakkalanka and Ganesh Gopalakrishnan and Robert M. Kirby and Rajeev Thakur and William Gropp},
title	=	{A Formal Approach to Detect Functionally Irrelevant Barriers in {MPI} Programs},
booktitle	=	{PVM/MPI},
year	=	{2008},
pages	=	{265--273},
ee	=	{http://dx.doi.org/10.1007/978-3-540-87475-1_36},
crossref	=	{DBLP:conf/pvm/2008},
bibsource	=	{DBLP, http://dblp.uni-trier.de},
area	=	"Par:M:formal",

}

@proceedings{DBLP:conf/pvm/2008,

editor	=	{Alexey L. Lastovetsky and Tahar Kechadi and Jack Dongarra},
title	=	{Recent Advances in Parallel Virtual Machine and Message Passing Interface, 15th European {PVM/MPI} Users' Group Meeting, Dublin, Ireland, September 7-10, 2008. Proceedings},
booktitle	=	{PVM/MPI},
publisher	=	{Springer},
series	=	{Lecture Notes in Computer Science},
volume	=	{5205},
year	=	{2008},
isbn	=	{978-3-540-87474-4},
bibsource	=	{DBLP, http://dblp.uni-trier.de},

}

%% 2009

@InProceedings{1612220,

author	=	{Balaji, Pavan and Buntinas, Darius and Goodell, David and Gropp, William and Kumar, Sameer and Lusk, Ewing and Thakur, Rajeev and Tr\"aff, Jesper Larsson},
title	=	{{MPI} on a Million Processors},
booktitle	=	{Proceedings of the 16th European PVM/MPI Users' Group Meeting on Recent Advances in Parallel Virtual Machine and Message Passing Interface},
year	=	{2009},
isbn	=	{978-3-642-03769-6},
pages	=	{20--30},
location	=	{Espoo, Finland},
doi	=	{http://dx.doi.org/10.1007/978-3-642-03770-2_9},
publisher	=	{Springer-Verlag},
address	=	{Berlin, Heidelberg},
area	=	"Par:M",

}

@InProceedings{1612212,

author	=	{Gropp, William},
title	=	{{MPI} at {E}xascale: {C}hallenges for Data Structures and Algorithms},
booktitle	=	{Proceedings of the 16th European PVM/MPI Users' Group Meeting on Recent Advances in Parallel Virtual Machine and Message Passing Interface},
year	=	{2009},
isbn	=	{978-3-642-03769-6},
pages	=	{3},
location	=	{Espoo, Finland},
doi	=	{http://dx.doi.org/10.1007/978-3-642-03770-2_3},
publisher	=	{Springer-Verlag},
address	=	{Berlin, Heidelberg},
area	=	"Par:M",

}

@InProceedings{1612222,

author	=	{Ross, Robert and Latham, Robert and Gropp, William and Lusk, Ewing and Thakur, Rajeev},
title	=	{Processing {MPI} Datatypes Outside {MPI}},
booktitle	=	{Proceedings of the 16th European PVM/MPI Users' Group Meeting on Recent Advances in Parallel Virtual Machine and Message Passing Interface},
year	=	{2009},
isbn	=	{978-3-642-03769-6},
pages	=	{42--53},
location	=	{Espoo, Finland},
doi	=	{http://dx.doi.org/10.1007/978-3-642-03770-2_11},
publisher	=	{Springer-Verlag},
address	=	{Berlin, Heidelberg},
area	=	"Par:M:Datatype",

}

@InProceedings{1612262,

author	=	{Zhu, Hao and Goodell, David and Gropp, William and Thakur, Rajeev},
title	=	{Hierarchical Collectives in {MPICH2}},
booktitle	=	{Proceedings of the 16th European PVM/MPI Users' Group Meeting on Recent Advances in Parallel Virtual Machine and Message Passing Interface},
year	=	{2009},
isbn	=	{978-3-642-03769-6},
pages	=	{325--326},
location	=	{Espoo, Finland},
doi	=	{http://dx.doi.org/10.1007/978-3-642-03770-2_41},
publisher	=	{Springer-Verlag},
address	=	{Berlin, Heidelberg},
student	=	"Hao Zhu",
area	=	"Par:M:Coll",

}

@inproceedings{1577927,

author	=	{Santhanaraman, G. and Balaji, P. and Gopalakrishnan, K. and Thakur, R. and Gropp, W. and Panda, D. K.},
title	=	{Natively Supporting True One-Sided Communication in {MPI} on Multi-core Systems with {I}nfiniBand},
booktitle	=	{CCGRID '09: Proceedings of the 2009 9th IEEE/ACM International Symposium on Cluster Computing and the Grid},
year	=	{2009},
isbn	=	{978-0-7695-3622-4},
pages	=	{380--387},
doi	=	{http://dx.doi.org/10.1109/CCGRID.2009.85},
publisher	=	{IEEE Computer Society},
address	=	{Washington, DC, USA},
area	=	"Par:M:RMA",

}

@inproceedings{1679706,

author	=	{Tipparaju, Vinod and Gropp, William and Ritzdorf, Hubert and Thakur, Rajeev and Tr\"aff, Jesper L.},
title	=	{Investigating High Performance {RMA} Interfaces for the {MPI-3} Standard},
booktitle	=	{ICPP '09: Proceedings of the 2009 International Conference on Parallel Processing},
year	=	{2009},
isbn	=	{978-0-7695-3802-0},
pages	=	{293--300},
doi	=	{http://dx.doi.org/10.1109/ICPP.2009.54},
publisher	=	{IEEE Computer Society},
address	=	{Washington, DC, USA},
area	=	"Par:M:RMA",

}

%2010
@InProceedings{bagh10,

author	=	{Sara S. Baghsorkhi and Matthieu Delahaye and Sanjay J. Patel and William D. Gropp and Wen{-mei} W. Hwu},
title	=	{An Adaptive Performance Modeling Tool for {GPU} Architectures},
booktitle	=	{Proceedings of the 15th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming, PPOPP 2010, Bangalore, India, January 9-14, 2010},
pages	=	{105--114},
year	=	2010,
editor	=	{R. Govindarajan and David A. Padua and Mary W. Hall},
publisher	=	{ACM},
student	=	"Sara Baghsorkhi",
area	=	"GPU:Perf",

}

@InProceedings{gahvari10,

author	=	{Gahvari, H. and Gropp, W.},
booktitle	=	{Parallel Distributed Processing (IPDPS), 2010 IEEE International Symposium on},
title	=	{An introductory exascale feasibility study for {FFT}s and multigrid},
year	=	{2010},
pages	=	{1--9},
abstract	=	{The coming decade is going to see a push towards exascale computing. Assuming gigahertz cores, this means exascale systems will have between 100 million and 1 billion of them to achieve this level of performance. At this scale, some important questions need to be answered on the applications end. What applications are feasible at this scale? What needs to be done to make them scalable? How does the hardware have to adapt to meet application needs? In this paper, we introduce a new feasibility-based approach to answering these questions. Our approach involves finding upper and lower bounds on problem size and machine parameters to determine a feasibility region for the application in question. As the underlying architecture of a future exascale machine is currently unknown, we use LogP-based performance models and vary machine parameters to give architecture-indepenent hardware constraints. We consider both strong-scaling and weak-scaling scenarios, and present results for two applications, the Fast Fourier Transform and basic geometric multigrid. The results show substantial constraints that need to be satisfied to enable exascale performance.},
keywords	=	{FFT;LogP-based performance models;architecture-indepenent hardware constraints;exascale computing;exascale feasibility study;fast Fourier transform;geometric multigrid;parallel computation;algorithm theory;fast Fourier transforms;grid computing;},
doi	=	{10.1109/IPDPS.2010.5470417},
ISSN	=	{1530-2075},
area	=	"Par:Perf:App",

}

@inproceedings{sack-exascale-10,

author	=	{Sack, Paul and Gropp, William},
affiliation	=	{University of Illinois at Urbana-Champaign},
title	=	{A Scalable {MPI\_Comm\_split} Algorithm for Exascale Computing},
booktitle	=	{Recent Advances in the Message Passing Interface},
series	=	{Lecture Notes in Computer Science},
editor	=	{Keller, Rainer and Gabriel, Edgar and Resch, Michael and Dongarra, Jack},
publisher	=	{Springer Berlin / Heidelberg},
isbn	=	{},
pages	=	{1--10},
volume	=	{6305},
url	=	{http://dx.doi.org/10.1007/978-3-642-15646-5_1},
year	=	{2010},
area	=	"Par:M:Coll",

}

@inproceedings{hoefler-model-10,

author	=	{Hoefler, Torsten and Gropp, William and Thakur, Rajeev and Tr\"aff, Jesper},
affiliation	=	{University of Illinois at Urbana-Champaign, IL USA},
title	=	{Toward Performance Models of {MPI} Implementations for Understanding Application Scaling Issues},
booktitle	=	{Recent Advances in the Message Passing Interface},
series	=	{Lecture Notes in Computer Science},
editor	=	{Keller, Rainer and Gabriel, Edgar and Resch, Michael and Dongarra, Jack},
publisher	=	{Springer Berlin / Heidelberg},
pages	=	{21--30},
volume	=	{6305},
url	=	{http://dx.doi.org/10.1007/978-3-642-15646-5_3},
year	=	{2010},
area	=	"Par:M:Perf",

}

@inproceedings{dozsa-threads-10,

author	=	{D\'ozsa, G\'abor and Kumar, Sameer and Balaji, Pavan and Buntinas, Darius and Goodell, David and Gropp, William and Ratterman, Joe and Thakur, Rajeev},
affiliation	=	{IBM T. J. Watson Research Center, Yorktown Heights, NY 10598},
title	=	{Enabling Concurrent Multithreaded {MPI} Communication on Multicore Petascale Systems},
booktitle	=	{Recent Advances in the Message Passing Interface},
series	=	{Lecture Notes in Computer Science},
editor	=	{Keller, Rainer and Gabriel, Edgar and Resch, Michael and Dongarra, Jack},
publisher	=	{Springer Berlin / Heidelberg},
isbn	=	{},
pages	=	{11--20},
volume	=	{6305},
url	=	{http://dx.doi.org/10.1007/978-3-642-15646-5_2},
year	=	{2010},
area	=	"Par:M:Thread",

}

@inproceedings{balaji-pmi-10,

author	=	{Balaji, Pavan and Buntinas, Darius and Goodell, David and Gropp, William and Krishna, Jayesh and Lusk, Ewing and Thakur, Rajeev},
affiliation	=	{Argonne National Laboratory, Argonne, IL 60439, USA},
title	=	{{PMI}: A Scalable Parallel Process-Management Interface for Extreme-Scale Systems},
booktitle	=	{Recent Advances in the Message Passing Interface},
series	=	{Lecture Notes in Computer Science},
editor	=	{Keller, Rainer and Gabriel, Edgar and Resch, Michael and Dongarra, Jack},
publisher	=	{Springer Berlin / Heidelberg},
isbn	=	{},
pages	=	{31--41},
volume	=	{6305},
url	=	{http://dx.doi.org/10.1007/978-3-642-15646-5_4},
year	=	{2010},
area	=	"Par:M:PMI",

}

@inproceedings{kale-mpi-10,

author	=	{Kale, Vivek and Gropp, William},
affiliation	=	{University of Illinois at Urbana-Champaign, IL USA},
title	=	{Load Balancing for Regular Meshes on {SMP}s with {MPI}},
booktitle	=	{Recent Advances in the Message Passing Interface},
series	=	{Lecture Notes in Computer Science},
editor	=	{Keller, Rainer and Gabriel, Edgar and Resch, Michael and Dongarra, Jack},
publisher	=	{Springer Berlin / Heidelberg},
pages	=	{229--238},
volume	=	{6305},
url	=	{http://dx.doi.org/10.1007/978-3-642-15646-5_24},
year	=	{2010},
area	=	"Par:M:Perf",

}

@InProceedings{10.1109/CLUSTER.2010.11,

author	=	{David Goodell and Pavan Balaji and Darius Buntinas and Gabor Dozsa and William Gropp and Sameer Kumar and Bronis R. de Supinski and Rajeev Thakur},
title	=	{Minimizing {MPI} Resource Contention in Multithreaded Multicore Environments},
booktitle	=	{IEEE International Conference on Cluster Computing},
isbn	=	{978-0-7695-4220-1},
year	=	{2010},
pages	=	{1--8},
doi	=	{http://doi.ieeecomputersociety.org/10.1109/CLUSTER.2010.11},
publisher	=	{IEEE Computer Society},
address	=	{Los Alamitos, CA, USA},
area	=	"Par:M:Thread",

}

%% 2011
@InProceedings{conf/ics/Gropp11,

title	=	"Performance modeling as the key to extreme scale computing",
author	=	"William D. Gropp",
bibdate	=	"2011-06-09",
bibsource	=	"DBLP, http://dblp.uni-trier.de/db/conf/ics/ics2011.html#Gropp11",
booktitle	=	"Proceedings of the 25th International Conference on Supercomputing, 2011, Tucson, {AZ}, {USA}, May 31 - June 04, 2011",
publisher	=	"ACM",
year	=	"2011",
editor	=	"David K. Lowenthal and Bronis R. de Supinski and Sally A. McKee",
ISBN	=	"978-1-4503-0102-2",
pages	=	"213",
URL	=	"http://doi.acm.org/10.1145/1995896.1995930",
area	=	"Par:Perf",

}

@InProceedings{conf/pvm/GoodellGZT11,

title	=	"Scalable Memory Use in {MPI}: {A} Case Study with {MPICH2}",
author	=	"David Goodell and William Gropp and Xin Zhao and Rajeev Thakur",
bibdate	=	"2011-09-13",
bibsource	=	"DBLP, http://dblp.uni-trier.de/db/conf/pvm/eurompi2011.html#GoodellGZT11",
booktitle	=	"Recent Advances in the Message Passing Interface - 18th European {MPI} Users' Group Meeting, Euro{MPI} 2011, Santorini, Greece, September 18-21, 2011. Proceedings",
publisher	=	"Springer",
year	=	"2011",
volume	=	"6960",
editor	=	"Yiannis Cotronis and Anthony Danalis and Dimitrios S. Nikolopoulos and Jack Dongarra",
ISBN	=	"978-3-642-24448-3",
pages	=	"140--149",
series	=	"Lecture Notes in Computer Science",
URL	=	"http://dx.doi.org/10.1007/978-3-642-24449-0",
area	=	"Par:M",

}

@InProceedings{conf/pvm/RashtiGBAG11,

title	=	"Multi-core and Network Aware {MPI} Topology Functions",
author	=	"Mohammad J. Rashti and Jonathan Green and Pavan Balaji and Ahmad Afsahi and William Gropp",
bibdate	=	"2011-09-13",
bibsource	=	"DBLP, http://dblp.uni-trier.de/db/conf/pvm/eurompi2011.html#RashtiGBAG11",
booktitle	=	"Recent Advances in the Message Passing Interface - 18th European {MPI} Users' Group Meeting, Euro{MPI} 2011, Santorini, Greece, September 18-21, 2011. Proceedings",
publisher	=	"Springer",
year	=	"2011",
volume	=	"6960",
editor	=	"Yiannis Cotronis and Anthony Danalis and Dimitrios S. Nikolopoulos and Jack Dongarra",
ISBN	=	"978-3-642-24448-3",
pages	=	"50--60",
series	=	"Lecture Notes in Computer Science",
URL	=	"http://dx.doi.org/10.1007/978-3-642-24449-0",
area	=	"Par:M",

}

@InProceedings{conf/pvm/GroppHTT11,

title	=	"Performance Expectations and Guidelines for {MPI} Derived Datatypes",
author	=	"William Gropp and Torsten Hoefler and Rajeev Thakur and Jesper Larsson Tr{\"a}ff",
bibdate	=	"2011-09-13",
bibsource	=	"DBLP, http://dblp.uni-trier.de/db/conf/pvm/eurompi2011.html#GroppHTT11",
booktitle	=	"Recent Advances in the Message Passing Interface - 18th European {MPI} Users' Group Meeting, Euro{MPI} 2011, Santorini, Greece, September 18-21, 2011. Proceedings",
publisher	=	"Springer",
year	=	"2011",
volume	=	"6960",
editor	=	"Yiannis Cotronis and Anthony Danalis and Dimitrios S. Nikolopoulos and Jack Dongarra",
ISBN	=	"978-3-642-24448-3",
pages	=	"150--159",
series	=	"Lecture Notes in Computer Science",
URL	=	"http://dx.doi.org/10.1007/978-3-642-24449-0",
area	=	"Par:M:Perf:Datatype",

}

@InProceedings{conf/ipps/ChenSTRG11,

title	=	"{LACIO}: {A} New Collective {I}/{O} Strategy for Parallel {I}/{O} Systems",
author	=	"Yong Chen and Xian-He Sun and Rajeev Thakur and Philip C. Roth and William D. Gropp",
publisher	=	"IEEE",
year	=	"2011",
bibdate	=	"2011-09-09",
bibsource	=	"DBLP, http://dblp.uni-trier.de/db/conf/ipps/ipdps2011.html#ChenSTRG11",
booktitle	=	"IPDPS",
ISBN	=	"978-1-61284-372-8",
pages	=	"794--804",
URL	=	"http://ieeexplore.ieee.org/xpl/mostRecentIssue.jsp?punumber=6011824",
area	=	"Par:I",

}

@InProceedings{conf/ipps/BhateleJGWGK11,

title	=	"Architectural Constraints to Attain 1 Exaflop/s for Three Scientific Application Classes",
author	=	"Abhinav Bhatele and Pritish Jetley and Hormozd Gahvari and Lukasz Wesolowski and William D. Gropp and Laxmikant V. Kal{\'e}",
publisher	=	"IEEE",
year	=	"2011",
bibdate	=	"2011-09-09",
bibsource	=	"DBLP, http://dblp.uni-trier.de/db/conf/ipps/ipdps2011.html#BhateleJGWGK11",
booktitle	=	"IPDPS",
ISBN	=	"978-1-61284-372-8",
pages	=	"80--91",
URL	=	"http://ieeexplore.ieee.org/xpl/mostRecentIssue.jsp?punumber=6011824",
area	=	"Par:Perf",

}

@InProceedings{conf/ics/GahvariBSYJG11,

title	=	"Modeling the performance of an algebraic multigrid cycle on {HPC} platforms",
author	=	"Hormozd Gahvari and Allison H. Baker and Martin Schulz and Ulrike Meier Yang and Kirk E. Jordan and William Gropp",
bibdate	=	"2011-06-09",
bibsource	=	"DBLP, http://dblp.uni-trier.de/db/conf/ics/ics2011.html#GahvariBSYJG11",
booktitle	=	"Proceedings of the 25th International Conference on Supercomputing, 2011, Tucson, {AZ}, {USA}, May 31 - June 04, 2011",
publisher	=	"ACM",
year	=	"2011",
editor	=	"David K. Lowenthal and Bronis R. de Supinski and Sally A. McKee",
ISBN	=	"978-1-4503-0102-2",
pages	=	"172--181",
URL	=	"http://doi.acm.org/10.1145/1995896.1995924",
area	=	"Par:Perf",

}

@inproceedings{DBLP:conf/sc/BhateleJGK11,

author	=	{Abhinav Bhatele and Nikhil Jain and William D. Gropp and Laxmikant V. Kal{\'e}},
title	=	{Avoiding hot-spots on two-level direct networks},
booktitle	=	{Conference on High Performance Computing Networking, Storage and Analysis, SC 2011, Seattle, WA, USA, November 12-18, 2011},
year	=	{2011},
pages	=	{76},
ee	=	{http://doi.acm.org/10.1145/2063384.2063486},
editor	=	{Scott Lathrop and Jim Costa and William Kramer},
bibsource	=	{DBLP, http://dblp.uni-trier.de},
isbn	=	{978-1-4503-0771-0},
publisher	=	{ACM},

}

@inproceedings{Hoefler:2011:PMS:2063348.2063356,

author	=	{Hoefler, Torsten and Gropp, William and Kramer, William and Snir, Marc},
title	=	{Performance Modeling for Systematic Performance Tuning},
booktitle	=	{State of the Practice Reports},
series	=	{SC '11},
year	=	{2011},
isbn	=	{978-1-4503-1139-7},
location	=	{Seattle, Washington},
pages	=	{6:1--6:12},
articleno	=	{6},
numpages	=	{12},
url	=	{http://doi.acm.org/10.1145/2063348.2063356},
doi	=	{10.1145/2063348.2063356},
acmid	=	{2063356},
publisher	=	{ACM},
address	=	{New York, NY, USA},

}

@inproceedings{kale2011weighted,

title	=	{Weighted locality-sensitive scheduling for mitigating noise on multi-core clusters},
author	=	{Kale, V. and Bhatele, A. and Gropp, W.D.},
booktitle	=	{High Performance Computing (HiPC), 2011 18th International Conference on},
pages	=	{1--10},
year	=	{2011},
organization	=	{IEEE}

}

%% 2012
@inproceedings{Sack:2012:FTC:2145816.2145823,

author	=	{Sack, Paul and Gropp, William},
title	=	{Faster topology-aware collective algorithms through non-minimal communication},
booktitle	=	{Proceedings of the 17th ACM SIGPLAN symposium on Principles and Practice of Parallel Programming},
series	=	{PPoPP '12},
year	=	{2012},
isbn	=	{978-1-4503-1160-1},
location	=	{New Orleans, Louisiana, USA},
pages	=	{45--54},
numpages	=	{10},
url	=	{http://doi.acm.org/10.1145/2145816.2145823},
doi	=	{10.1145/2145816.2145823},
acmid	=	{2145823},
publisher	=	{ACM},
address	=	{New York, NY, USA},
keywords	=	{collective-communication algorithms},
note	=	{Best Paper}

}

@inproceedings{Donfack:2012:HSS:2357496.2358627,

author	=	{Donfack, Simplice and Grigori, Laura and Gropp, William D. and Kale, Vivek},
title	=	{Hybrid Static/dynamic Scheduling for Already Optimized Dense Matrix Factorization},
booktitle	=	{Proceedings of the 2012 IEEE 26th International Parallel and Distributed Processing Symposium},
series	=	{IPDPS '12},
year	=	{2012},
isbn	=	{978-0-7695-4675-9},
pages	=	{496--507},
numpages	=	{12},
url	=	{http://dx.doi.org/10.1109/IPDPS.2012.53},
doi	=	{10.1109/IPDPS.2012.53},
acmid	=	{2358627},
publisher	=	{IEEE Computer Society},
address	=	{Washington, DC, USA},
keywords	=	{dynamic scheduling, communication-avoiding, LU factorization, numerical linear algebra},

}

@inproceedings{mpi-sharedmem-12,

author	=	{Hoefler, Torsten and Dinan, James and Buntinas, Darius and Balaji, Pavan and Barrett, Brian and Brightwell, Ron and Gropp, William and Kale, Vivek and Thakur, Rajeev},
affiliation	=	{University of Illinois, Urbana, IL, USA},
title	=	{Leveraging {MPI}'s One-Sided Communication Interface for Shared-Memory Programming},
booktitle	=	{Recent Advances in the Message Passing Interface},
series	=	{Lecture Notes in Computer Science},
editor	=	{Tr\"aff, Jesper and Benkner, Siegfried and Dongarra, Jack},
publisher	=	{Springer Berlin / Heidelberg},
isbn	=	{978-3-642-33517-4},
keyword	=	{Computer Science},
pages	=	{132--141},
volume	=	{7490},
url	=	{http://dx.doi.org/10.1007/978-3-642-33518-1_18},
year	=	{2012}

}

@inproceedings{mpi-success-12,

author	=	{Gropp, William},
affiliation	=	{University of Illinois at Urbana-Champaign, USA},
title	=	{{MPI} 3 and Beyond: Why {MPI} Is Successful and What Challenges It Faces},
booktitle	=	{Recent Advances in the Message Passing Interface},
series	=	{Lecture Notes in Computer Science},
editor	=	{Tr\"aff, Jesper and Benkner, Siegfried and Dongarra, Jack},
publisher	=	{Springer Berlin / Heidelberg},
isbn	=	{978-3-642-33517-4},
keyword	=	{Computer Science},
pages	=	{1--9},
volume	=	{7490},
url	=	{http://dx.doi.org/10.1007/978-3-642-33518-1_1},
year	=	{2012}

}

@inproceedings{contextid-12,

author	=	{Dinan, James and Goodell, David and Gropp, William and Thakur, Rajeev and Balaji, Pavan},
affiliation	=	{Argonne National Laboratory, USA},
title	=	{Efficient Multithreaded Context {ID} Allocation in {MPI}},
booktitle	=	{Recent Advances in the Message Passing Interface},
series	=	{Lecture Notes in Computer Science},
editor	=	{Tr\"aff, Jesper and Benkner, Siegfried and Dongarra, Jack},
publisher	=	{Springer Berlin / Heidelberg},
isbn	=	{978-3-642-33517-4},
keyword	=	{Computer Science},
pages	=	{57--66},
volume	=	{7490},
url	=	{http://dx.doi.org/10.1007/978-3-642-33518-1_11},
year	=	{2012}

}

@inproceedings{adaptive-rma-12,

author	=	{Zhao, Xin and Santhanaraman, Gopalakrishnan and Gropp, William},
affiliation	=	{University of Illinois at Urbana-Champaign, Urbana, IL 61801, USA},
title	=	{Adaptive Strategy for One-Sided Communication in {MPICH2}},
booktitle	=	{Recent Advances in the Message Passing Interface},
series	=	{Lecture Notes in Computer Science},
editor	=	{Tr\"aff, Jesper and Benkner, Siegfried and Dongarra, Jack},
publisher	=	{Springer Berlin / Heidelberg},
isbn	=	{978-3-642-33517-4},
keyword	=	{Computer Science},
pages	=	{16--26},
volume	=	{7490},
url	=	{http://dx.doi.org/10.1007/978-3-642-33518-1_7},
year	=	{2012}

}

@inproceedings{DBLP:conf/icpp/GahvariGJSY12,

author	=	{Hormozd Gahvari and William Gropp and Kirk E. Jordan and Martin Schulz and Ulrike Meier Yang},
title	=	{Modeling the Performance of an Algebraic Multigrid Cycle Using Hybrid {MPI/OpenMP}},
booktitle	=	{ICPP},
year	=	{2012},
pages	=	{128--137},
ee	=	{http://doi.ieeecomputersociety.org/10.1109/ICPP.2012.41},
crossref	=	{DBLP:conf/icpp/2012},
bibsource	=	{DBLP, http://dblp.uni-trier.de}

}

@proceedings{DBLP:conf/icpp/2012,

title	=	{41st International Conference on Parallel Processing, ICPP 2012, Pittsburgh, PA, USA, September 10-13, 2012},
booktitle	=	{ICPP},
publisher	=	{IEEE Computer Society},
year	=	{2012},
isbn	=	{978-1-4673-2508-0},
ee	=	{http://ieeexplore.ieee.org/xpl/mostRecentIssue.jsp?punumber=6336540},
bibsource	=	{DBLP, http://dblp.uni-trier.de}

}

@inproceedings{chen2012decoupled,

title	=	{A Decoupled Execution Paradigm for Data-Intensive High-End Computing},
author	=	{Chen, Y. and Chen, C. and Sun, X.H. and Gropp, W.D. and Thakur, R.},
booktitle	=	{Cluster Computing (CLUSTER), 2012 IEEE International Conference on},
pages	=	{200--208},
year	=	{2012},
organization	=	{IEEE}

}

@inproceedings{10.1109/SC.Companion.2012.19,

author	=	{Philip Carns and Kevin Harms and Dries Kimpe and Robert Ross and Justin Wozniak and Lee Ward and Matthew Curry and Ruth Klundt and Geoff Danielson and Cengiz Karakoyunlu and John Chandy and Bradley Settlemeyer and William Gropp},
title	=	{A Case for Optimistic Coordination in {HPC} Storage Systems},
booktitle	=	{High Performance Computing, Networking Storage and Analysis, SC Companion:},
isbn	=	{978-1-4673-3049-7},
year	=	{2012},
pages	=	{48--53},
doi	=	{http://doi.ieeecomputersociety.org/10.1109/SC.Companion.2012.19},
publisher	=	{IEEE Computer Society},
address	=	{Los Alamitos, CA, USA},

}

%2013
@InProceedings{zhao13-am-mpi,

author	=	{Xin Zhao and Buntinas, D. and Zounmevo, J. and Dinan, J. and Goodell, D. and Balaji, P. and Thakur, R. and Afsahi, A. and Gropp, W.},
booktitle	=	{Cluster, Cloud and Grid Computing (CCGrid), 2013 13th IEEE/ACM International Symposium on},
title	=	{Toward Asynchronous and {MPI}-Interoperable Active Messages},
year	=	{2013},
pages	=	{87--94},
keywords	=	{application program interfaces;message passing;opensystems;parallel processing;shared memory systems;MPI-interoperable active messages;asynchronous active message;data-intensive application;irregular communication patterns;parallelization;shared memory;Active messages;Asynchronous progress;Data-intensive applications;Interoperable;MPI},
doi	=	{10.1109/CCGrid.2013.84},

}

@InProceedings{6844416,

author	=	{Zhao, Xin and Balaji, Pavan and Gropp, William and Thakur, Rajeev},
booktitle	=	{Dependable, Autonomic and Secure Computing (DASC), 2013 IEEE 11th International Conference on},
title	=	{Optimization Strategies for {MPI}-Interoperable Active Messages},
year	=	{2013},
month	=	{Dec},
pages	=	{508-515},
keywords	=	{Active messages;Data-intensive applications;MPI;Multicore;RMA},
doi	=	{10.1109/DASC.2013.116},}

@InProceedings{6808175,

author	=	{Xin Zhao and Balaji, P. and Gropp, W. and Thakur, R.},
booktitle	=	{Parallel and Distributed Systems (ICPADS), 2013 International Conference on},
title	=	{{MPI}-Interoperable Generalized Active Messages},
year	=	{2013},
month	=	{Dec},
pages	=	{200-207},
keywords	=	{application program interfaces;message passing;open systems;AM model;API;MPI ACCUMULATE operations;MPI GET ACCUMULATE operations;MPI-interoperable generalized active messages;data movement approach;scientific computing;Computational modeling;Concurrent computing;Data models;Layout;Libraries;Message systems;Semantics},
doi	=	{10.1109/ICPADS.2013.38},
ISSN	=	{1521-9097},}

@InProceedings{deflatedgmress13,

year	=	{2013},
isbn	=	{978-3-642-35274-4},
booktitle	=	{Domain Decomposition Methods in Science and Engineering XX},
volume	=	{91},
series	=	{Lecture Notes in Computational Science and Engineering},
editor	=	{Bank, Randolph and Holst, Michael and Widlund, Olof and Xu, Jinchao},
doi	=	{10.1007/978-3-642-35275-1_75},
title	=	{Parallel Adaptive Deflated {GMRES}},
url	=	{http://dx.doi.org/10.1007/978-3-642-35275-1_75},
publisher	=	{Springer Berlin Heidelberg},
author	=	{Wakam, D\'esir\'e Nuentsa and Erhel, Jocelyne and Gropp, William D.},
pages	=	{631--638},
language	=	{English}

}

@InProceedings{6702642,

author	=	{Feng, Kun and Yin, Yanlong and Chen, Chao and Eslami, Hassan and Sun, Xian-He and Chen, Yong and Thakur, Rajeev and Gropp, William},
booktitle	=	{Cluster Computing (CLUSTER), 2013 IEEE International Conference on},
title	=	{Runtime system design of decoupled execution paradigm for data-intensive high-end computing},
year	=	{2013},
pages	=	{1},
doi	=	{10.1109/CLUSTER.2013.6702642},}

@InProceedings{conf/pvm/PenaCDBTG13,

title	=	"Analysis of topology-dependent {MPI} performance on {G}emini networks",
author	=	"Antonio J. Pe{\~n}a and Ralf G. Correa Carvalho and James Dinan and Pavan Balaji and Rajeev Thakur and William Gropp",
bibdate	=	"2013-09-09",
bibsource	=	"DBLP, http://dblp.uni-trier.de/db/conf/pvm/eurompi2013.html#PenaCDBTG13",
booktitle	=	"20th European {MPI} Users's Group Meeting, Euro{MPI} '13, Madrid, Spain - September 15 - 18, 2013",
publisher	=	"ACM",
year	=	"2013",
editor	=	"Jack Dongarra and Javier Garc{\'i}a Blas and Jes{\'u}s Carretero",
ISBN	=	"978-1-4503-1903-4",
pages	=	"61--66",
URL	=	"http://dl.acm.org/citation.cfm?id=2488551",

}

@InProceedings{conf/ipps/RandlesKHGK13,

title	=	"Performance Analysis of the Lattice {B}oltzmann Model Beyond {N}avier-{S}tokes",
author	=	"Amanda Peters Randles and Vivek Kale and Jeff Hammond and William Gropp and Efthimios Kaxiras",
publisher	=	"IEEE Computer Society",
year	=	"2013",
bibdate	=	"2013-08-23",
bibsource	=	"DBLP, http://dblp.uni-trier.de/db/conf/ipps/ipdps2013.html#RandlesKHGK13",
booktitle	=	"IPDPS",
crossref	=	"conf/ipps/2013",
ISBN	=	"978-1-4673-6066-1",
pages	=	"1063--1074",
URL	=	"http://ieeexplore.ieee.org/xpl/mostRecentIssue.jsp?punumber=6569024",

}

@InProceedings{conf/ipps/GahvariGJSY13,

title	=	"Systematic Reduction of Data Movement in Algebraic Multigrid Solvers",
author	=	"Hormozd Gahvari and William Gropp and Kirk E. Jordan and Martin Schulz and Ulrike Meier Yang",
publisher	=	"IEEE",
year	=	"2013",
bibdate	=	"2013-11-13",
bibsource	=	"DBLP, http://dblp.uni-trier.de/db/conf/ipps/ipdps2013w.html#GahvariGJSY13",
booktitle	=	"IPDPS Workshops",
crossref	=	"conf/ipps/2013w",
pages	=	"1675--1682",
URL	=	"http://ieeexplore.ieee.org/xpl/mostRecentIssue.jsp?punumber=6603430",

}

@InProceedings{conf/pvm/KaleRG14,

title	=	"Locality-Optimized Mixed Static/Dynamic Scheduling for Improving Load Balancing on {SMP}s",
author	=	"Vivek Kale and Amanda Peters Randles and William D. Gropp",
bibdate	=	"2014-09-18",
bibsource	=	"DBLP, http://dblp.uni-trier.de/db/conf/pvm/eurompi2014.html#KaleRG14",
booktitle	=	"EuroMPI/ASIA",
booktitle	=	"21st European {MPI} Users' Group Meeting, Euro{MPI}/{ASIA} '14, Kyoto, Japan - September 09 - 12, 2014",
publisher	=	"ACM",
year	=	"2014",
editor	=	"Jack Dongarra and Yutaka Ishikawa and Atsushi Hori",
ISBN	=	"978-1-4503-2875-3",
pages	=	"115",
URL	=	"http://dl.acm.org/citation.cfm?id=2642769",

}

@inproceedings{Yin:2014:RKS:2689684.2689691,

author	=	{Yin, Yanlong and Kougkas, Antonios and Feng, Kun and Eslami, Hassan and Lu, Yin and Sun, Xian-He and Thakur, Rajeev and Gropp, William},
title	=	{Rethinking Key-value Store for Parallel {I/O} Optimization},
booktitle	=	{Proceedings of the 2014 International Workshop on Data Intensive Scalable Computing Systems},
series	=	{DISCS '14},
year	=	{2014},
isbn	=	{978-1-4799-7038-4},
location	=	{New Orleans, Louisiana},
pages	=	{33--40},
numpages	=	{8},
url	=	{http://dx.doi.org/10.1109/DISCS.2014.11},
doi	=	{10.1109/DISCS.2014.11},
acmid	=	{2689691},
publisher	=	{IEEE Press},
address	=	{Piscataway, NJ, USA},

}

@inproceedings{Zounmevo:2014:NEM:2683593.2683646,

author	=	{Zounmevo, Judicael A. and Zhao, Xin and Balaji, Pavan and Gropp, William and Afsahi, Ahmad},
title	=	{Nonblocking Epochs in {MPI} One-sided Communication},
booktitle	=	{Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis},
series	=	{SC '14},
year	=	{2014},
isbn	=	{978-1-4799-5500-8},
location	=	{New Orleans, Louisana},
pages	=	{475--486},
numpages	=	{12},
url	=	{http://dx.doi.org/10.1109/SC.2014.44},
doi	=	{10.1109/SC.2014.44},
acmid	=	{2683646},
publisher	=	{IEEE Press},
address	=	{Piscataway, NJ, USA},
keywords	=	{MPI, RMA, latency propagation, nonblocking synchronizations, one-sided},
note	=	{Best paper finalist},

}

%% 2015

@InProceedings{Gahvari15-AMG-Dragonfly,

year	=	{2015},
isbn	=	{978-3-319-17247-7},
booktitle	=	{High Performance Computing Systems. Performance Modeling, Benchmarking, and Simulation},
volume	=	{8966},
series	=	{Lecture Notes in Computer Science},
editor	=	{Jarvis, Stephen A. and Wright, Steven A. and Hammond, Simon D.},
doi	=	{10.1007/978-3-319-17248-4_1},
title	=	{Algebraic Multigrid on a {D}ragonfly Network: First Experiences on a {Cray XC30}},
url	=	{http://dx.doi.org/10.1007/978-3-319-17248-4_1},
publisher	=	{Springer International Publishing},
author	=	{Gahvari, Hormozd and Gropp, William and Jordan, Kirk E. and Schulz, Martin and Yang, Ulrike Meier},
pages	=	{3--23},
language	=	{English}

}

@inproceedings{CalhounOlsonSnirGropp:2015:FR_AMG,

author	=	{Calhoun, Jon and Olson, Luke and Snir, Marc and Gropp, William D.},
title	=	{Towards a More Fault Resilient Multigrid Solver},
booktitle	=	{Proceedings of the High Performance Computing Symposium},
series	=	{HPC '15},
year	=	{2015},
location	=	{Alexandria, VA, USA},
publisher	=	{Society for Computer Simulation International},
address	=	{San Diego, CA, USA},
keywords	=	{Algebraic Multigrid, Silent Errors, Fault Tolerance, Resilience}

}

@InProceedings{conf/ccgrid/ZhaoBG15,

title	=	"Runtime Support for Irregular Computation in {MPI}-Based Applications",
author	=	"Xin Zhao and Pavan Balaji and William Gropp",
publisher	=	"IEEE",
year	=	"2015",
bibdate	=	"2015-07-14",
bibsource	=	"DBLP, http://dblp.uni-trier.de/db/conf/ccgrid/ccgrid2015.html#ZhaoBG15",
booktitle	=	"CCGrid",
crossref	=	"conf/ccgrid/2015",
ISBN	=	"978-1-4799-8006-2",
pages	=	"701--704",
URL	=	"http://ieeexplore.ieee.org/xpl/mostRecentIssue.jsp?punumber=7152188",

}

@InProceedings{conf/hpdc/LuuWGRCHPBY15,

title	=	"A Multiplatform Study of {I}/{O} Behavior on Petascale Supercomputers",
author	=	"Huong Luu and Marianne Winslett and William Gropp and Robert B. Ross and Philip H. Carns and Kevin Harms and Prabhat and Surendra Byna and Yushu Yao",
bibdate	=	"2015-06-10",
bibsource	=	"DBLP, http://dblp.uni-trier.de/db/conf/hpdc/hpdc2015.html#LuuWGRCHPBY15",
booktitle	=	"HPDC",
booktitle	=	"Proceedings of the 24th International Symposium on High-Performance Parallel and Distributed Computing, {HPDC} 2015, Portland, {OR}, {USA}, June 15-19, 2015",
publisher	=	"ACM",
year	=	"2015",
editor	=	"Thilo Kielmann and Dean Hildebrand and Michela Taufer",
ISBN	=	"978-1-4503-3550-8",
pages	=	"33--44",
URL	=	"http://dl.acm.org/citation.cfm?id=2749246",

}

@InProceedings{conf/pvm/PrabhuG15,

title	=	"{DAME}: {A} Runtime-Compiled Engine for Derived Datatypes",
author	=	"Tarun Prabhu and William Gropp",
bibdate	=	"2015-12-11",
bibsource	=	"DBLP, http://dblp.uni-trier.de/db/conf/pvm/eurompi2015.html#PrabhuG15",
booktitle	=	"EuroMPI",
booktitle	=	"Proceedings of the 22nd European {MPI} Users' Group Meeting, Euro{MPI} 2015, Bordeaux, France, September 21-23, 2015",
publisher	=	"ACM",
year	=	"2015",
editor	=	"Jack J. Dongarra and Alexandre Denis and Brice Goglin and Emmanuel Jeannot and Guillaume Mercier",
ISBN	=	"978-1-4503-3795-3",
pages	=	"4:1--4:10",
URL	=	"http://dl.acm.org/citation.cfm?id=2802658",
note	=	"Best paper",

}

@InProceedings{conf/iwomp/KaleG15,

title	=	"Composing Low-Overhead Scheduling Strategies for Improving Performance of Scientific Applications",
author	=	"Vivek Kale and William D. Gropp",
bibdate	=	"2015-10-01",
bibsource	=	"DBLP, http://dblp.uni-trier.de/db/conf/iwomp/iwomp2015.html#KaleG15",
booktitle	=	"IWOMP",
booktitle	=	"Open{MP}: Heterogenous Execution and Data Movements - 11th International Workshop on Open{MP}, {IWOMP} 2015, Aachen, Germany, October 1-2, 2015, Proceedings",
publisher	=	"Springer",
year	=	"2015",
volume	=	"9342",
editor	=	"Christian Terboven and Bronis R. de Supinski and Pablo Reble and Barbara M. Chapman and Matthias S. M{\"u}ller",
ISBN	=	"978-3-319-24594-2",
pages	=	"18--29",
series	=	"Lecture Notes in Computer Science",
URL	=	"http://dx.doi.org/10.1007/978-3-319-24595-9",

}

@inproceedings{Eslami:2015:EDS:2831244.2831249,

author	=	{Eslami, Hassan and Kougkas, Anthony and Kotsifakou, Maria and Kasampalis, Theodoros and Feng, Kun and Lu, Yin and Gropp, William and Sun, Xian-He and Chen, Yong and Thakur, Rajeev},
title	=	{Efficient Disk-to-disk Sorting: A Case Study in the Decoupled Execution Paradigm},
booktitle	=	{Proceedings of the 2015 International Workshop on Data-Intensive Scalable Computing Systems},
series	=	{DISCS '15},
year	=	{2015},
isbn	=	{978-1-4503-3993-3},
location	=	{Austin, Texas},
pages	=	{2:1--2:8},
articleno	=	{2},
numpages	=	{8},
url	=	{http://doi.acm.org/10.1145/2831244.2831249},
doi	=	{10.1145/2831244.2831249},
acmid	=	{2831249},
publisher	=	{ACM},
address	=	{New York, NY, USA},
keywords	=	{decoupled execution paradigm, disk-to-disk sorting, parallel IO, parallel file system, performance optimization},

}

@InProceedings{petsc-cse15,

author	=	{Satish Balay and Jed Brown and William Gropp and Matthew Knepley and Lois Curfman McInnes and Barry F. Smith and Hong Zhang},
title	=	{An Overview of {PETSc}},
booktitle	=	{2015 SIAM Conference on Computational Science and Engineering},
year	=	2015,
pages	=	274,
month	=	MAR,
address	=	{Salt Lake City, Utah},
organization	=	{SIAM},
note	=	{Poster in Minisymposterium 103: Frameworks, Algorithms, and Scalable Technologies for Mathematics (FASTMath)}}

@InProceedings{xpacc-cse15,

author	=	{William Gropp},
title	=	{Building Performance Transportable Codes for Extreme Scale},
booktitle	=	{2015 SIAM Conference on Computational Science and Engineering},
year	=	2015,
pages	=	287,
month	=	MAR,
address	=	{Salt Lake City, Utah},
organization	=	{SIAM},
note	=	{Poster in Minisymposterium 204: CSE Software},
annote	=	{\emph{Session won Best Minisumposterium}}

}

%% 2016

@inproceedings{Dang:2016:TMC:2966884.2966914,

author	=	{Dang, Hoang-Vu and Snir, Marc and Gropp, William},
title	=	{Towards Millions of Communicating Threads},
booktitle	=	{Proceedings of the 23rd European MPI Users' Group Meeting},
series	=	{EuroMPI 2016},
year	=	{2016},
isbn	=	{978-1-4503-4234-6},
location	=	{Edinburgh, United Kingdom},
pages	=	{1--14},
numpages	=	{14},
url	=	{http://doi.acm.org/10.1145/2966884.2966914},
doi	=	{10.1145/2966884.2966914},
acmid	=	{2966914},
publisher	=	{ACM},
address	=	{New York, NY, USA},
keywords	=	{MPI, Message Passing Interface, communication, concurrent execution, multi-threading, runtime system},

}

@inproceedings{Gropp:2016:MMC:2966884.2966919,

author	=	{Gropp, William and Olson, Luke N. and Samfass, Philipp},
title	=	{Modeling {MPI} Communication Performance on {SMP} Nodes: Is It Time to Retire the Ping Pong Test},
booktitle	=	{Proceedings of the 23rd European MPI Users' Group Meeting},
series	=	{EuroMPI 2016},
year	=	{2016},
isbn	=	{978-1-4503-4234-6},
location	=	{Edinburgh, United Kingdom},
pages	=	{41--50},
numpages	=	{10},
url	=	{http://doi.acm.org/10.1145/2966884.2966919},
doi	=	{10.1145/2966884.2966919},
acmid	=	{2966919},
publisher	=	{ACM},
address	=	{New York, NY, USA},
keywords	=	{bandwidth saturation, benchmark, communication, multi-core, parallel computing, performance model, ping pong, symmetric multiprocessor cluster},

}

@inproceedings{Eller:2016:SNP:3014904.3014928,

author	=	{Eller, Paul R. and Gropp, William},
title	=	{Scalable Non-blocking Preconditioned Conjugate Gradient Methods},
booktitle	=	{Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis},
series	=	{SC '16},
year	=	{2016},
isbn	=	{978-1-4673-8815-3},
location	=	{Salt Lake City, Utah},
pages	=	{18:1--18:12},
articleno	=	{18},
numpages	=	{12},
url	=	{http://dl.acm.org/citation.cfm?id=3014904.3014928},
acmid	=	{3014928},
publisher	=	{IEEE Press},
address	=	{Piscataway, NJ, USA},

}

@InProceedings{conf/ispdc/ZhaoBG16,

title	=	"Scalability Challenges in Current {MPI} One-Sided Implementations",
author	=	"Xin Zhao and Pavan Balaji and William Gropp",
bibdate	=	"2017-05-17",
bibsource	=	"DBLP, http://dblp.uni-trier.de/https://doi.org/10.1109/ISPDC.2016.14; DBLP, http://dblp.uni-trier.de/db/conf/ispdc/ispdc2016.html#ZhaoBG16",
booktitle	=	"15th International Symposium on Parallel and Distributed Computing, ISPDC 2016, Fuzhou, China, July 8-10, 2016",
publisher	=	"IEEE Computer Society",
year	=	"2016",
booktitle	=	"ISPDC",
editor	=	"Riqing Chen and Chunming Rong and Dan Grigoras",
ISBN	=	"978-1-5090-4152-7",
pages	=	"38--47",
URL	=	"http://ieeexplore.ieee.org/xpl/mostRecentIssue.jsp?punumber=7901691",

}

@InProceedings{conf/trustcom/ChenCYSTG16,

title	=	"Rethinking High Performance Computing System Architecture for Scientific Big Data Applications",
author	=	"Yong Chen and Chao Chen and Yanlong Yin and Xian-He Sun and Rajeev Thakur and William Gropp",
publisher	=	"IEEE",
year	=	"2016",
bibdate	=	"2017-05-23",
bibsource	=	"DBLP, http://dblp.uni-trier.de/https://doi.org/10.1109/TrustCom.2016.0248; DBLP, http://dblp.uni-trier.de/db/conf/trustcom/trustcom2016.html#ChenCYSTG16",
booktitle	=	"Trustcom/BigDataSE/ISPA",
crossref	=	"conf/trustcom/2016",
ISBN	=	"978-1-5090-3205-1",
pages	=	"1605--1612",
URL	=	"http://ieeexplore.ieee.org/xpl/mostRecentIssue.jsp?punumber=7845250",

}

%% 2017
@InProceedings{conf/hpdc/CalhounSOG17,

title	=	"Towards a More Complete Understanding of {SDC} Propagation",
author	=	"Jon Calhoun and Marc Snir and Luke N. Olson and William D. Gropp",
bibdate	=	"2017-06-25",
bibsource	=	"DBLP, http://dblp.uni-trier.de/db/conf/hpdc/hpdc2017.html#CalhounSOG17",
booktitle	=	"Proceedings of the 26th International Symposium on High-Performance Parallel and Distributed Computing, HPDC 2017, Washington, DC, USA, June 26-30, 2017",
publisher	=	"ACM",
year	=	"2017",
booktitle	=	"HPDC",
editor	=	"H. Howie Huang and Jon B. Weissman and Adriana Iamnitchi and Alexandru Iosup",
ISBN	=	"978-1-4503-4699-3",
pages	=	"131--142",
URL	=	"http://doi.acm.org/10.1145/3078597",

}

@inproceedings{DBLP:conf/IEEEpact/TeixeiraPG17,

author	=	{Thiago Santos Faria Xavier Teixeira and David Padua and William Gropp},
title	=	{A {DSL} for Performance Orchestration},
booktitle	=	{26th International Conference on Parallel Architectures and Compilation Techniques, {PACT} 2017, Portland, OR, USA, September 9-13, 2017},
pages	=	{372},
year	=	{2017},
crossref	=	{DBLP:conf/IEEEpact/2017},
url	=	{https://doi.org/10.1109/PACT.2017.50},
doi	=	{10.1109/PACT.2017.50},
timestamp	=	{Thu, 09 Nov 2017 14:41:15 +0100},
biburl	=	{http://dblp.org/rec/bib/conf/IEEEpact/TeixeiraPG17},
bibsource	=	{dblp computer science bibliography, http://dblp.org}

}

@InProceedings{gropp-dubey_et_al:DR:2018:8664,

author	=	{William D. Gropp},
title	=	{Performance, Portability, and Dreams},
booktitle	=	{{Performance Portability in Extreme Scale Computing (Dagstuhl Seminar 17431)}},
pages	=	{96--97},
journal	=	{Dagstuhl Reports},
ISSN	=	{2192-5283},
year	=	{2018},
volume	=	{7},
number	=	{10},
editor	=	{Anshu Dubey and Paul H. J. Kelly and Bernd Mohr and Jeffrey S. Vetter},
publisher	=	{Schloss Dagstuhl--Leibniz-Zentrum fuer Informatik},
address	=	{Dagstuhl, Germany},
URL	=	{http://drops.dagstuhl.de/opus/volltexte/2018/8664},
URN	=	{urn:nbn:de:0030-drops-86642},
doi	=	{10.4230/DagRep.7.10.84},
annote	=	{Keywords: Parallel programming, performance portability, productivity, scientific computing}

}

%% 2018
@inproceedings{Gropp:2018:UNI:3236367.3236377,

author	=	{Gropp, William D.},
title	=	{Using Node Information to Implement {MPI} {C}artesian Topologies},
booktitle	=	{Proceedings of the 25th European MPI Users' Group Meeting},
series	=	{EuroMPI'18},
year	=	{2018},
isbn	=	{978-1-4503-6492-8},
location	=	{Barcelona, Spain},
pages	=	{18:1--18:9},
articleno	=	{18},
numpages	=	{9},
url	=	{http://doi.acm.org/10.1145/3236367.3236377},
doi	=	{10.1145/3236367.3236377},
acmid	=	{3236377},
publisher	=	{ACM},
address	=	{New York, NY, USA},
keywords	=	{Cartesian process topology, MPI, Message passing, Process topology},

}

@inproceedings{Bienz:2018:IPM:3236367.3236368,

author	=	{Bienz, Amanda and Gropp, William D. and Olson, Luke N.},
title	=	{Improving Performance Models for Irregular Point-to-Point Communication},
booktitle	=	{Proceedings of the 25th European MPI Users' Group Meeting},
series	=	{EuroMPI'18},
year	=	{2018},
isbn	=	{978-1-4503-6492-8},
location	=	{Barcelona, Spain},
pages	=	{7:1--7:8},
articleno	=	{7},
numpages	=	{8},
url	=	{http://doi.acm.org/10.1145/3236367.3236368},
doi	=	{10.1145/3236367.3236368},
acmid	=	{3236368},
publisher	=	{ACM},
address	=	{New York, NY, USA},
keywords	=	{MPI, network contention, performance modeling, point-to-point communication, queue search},

}

%% 2019
@inproceedings{8661203,

author	=	{S. F. X. {Thiago Teixeira} and C. {Ancourt} and D. {Padua} and W. {Gropp}},
booktitle	=	{2019 IEEE/ACM International Symposium on Code Generation and Optimization (CGO)},
title	=	{Locus: A System and a Language for Program Optimization},
year	=	{2019},
volume	=	{},
number	=	{},
pages	=	{217-228},
keywords	=	{optimisation;optimising compilers;Locus;program optimization;program transformation sequences;application code;conventional compiler optimizations;Optimization;Space exploration;Computer architecture;Software;Tools;C++ languages;code generation;optimization;compilers;domain-specific language},
doi	=	{10.1109/CGO.2019.8661203},
ISSN	=	{null},
month	=	{Feb},}

@inproceedings{8778229,

author	=	{H. {Ibeid} and S. {Meng} and O. {Dobon} and L. {Olson} and W. {Gropp}},
booktitle	=	{2019 IEEE International Parallel and Distributed Processing Symposium Workshops (IPDPSW)},
title	=	{Learning with Analytical Models},
year	=	{2019},
volume	=	{},
number	=	{},
pages	=	{778--786},
keywords	=	{learning (artificial intelligence);hybrid model;analytical models;performance modeling;machine learning approaches;Analytical models;Predictive models;Computational modeling;Machine learning;Kernel;Mathematical model;Optimization;performance prediction, analytical modeling, machine learning, hybrid modeling},
doi	=	{10.1109/IPDPSW.2019.00128},
ISSN	=	{null},
month	=	{May},}

@inproceedings{8955452,

author	=	{A. {Bienz} and L. {Olson} and W. {Gropp}},
booktitle	=	{2019 IEEE/ACM Workshop on Exascale MPI (ExaMPI)},
title	=	{Node-Aware Improvements to Allreduce},
year	=	{2019},
volume	=	{},
number	=	{},
pages	=	{19--28},
keywords	=	{application program interfaces;message passing;optimisation;radio networks;node-aware improvements;MPI_Allreduce collective operation;core kernel;parallel codebases;recursive-doubling algorithm;lower bound message count;node-agnostic performance models;node-aware optimizations;MPICH;single master process;inactive processes;inter-node messages;duplicate message removal},
doi	=	{10.1109/ExaMPI49596.2019.00008},
ISSN	=	{null},
month	=	{Nov},}

@inproceedings{10.1145/3330345.3330358,

author	=	{Eller, Paul R. and Hoefler, Torsten and Gropp, William},
title	=	{Using Performance Models to Understand Scalable {K}rylov Solver Performance at Scale for Structured Grid Problems},
year	=	{2019},
isbn	=	{9781450360791},
publisher	=	{Association for Computing Machinery},
address	=	{New York, NY, USA},
url	=	{https://doi.org/10.1145/3330345.3330358},
doi	=	{10.1145/3330345.3330358},
booktitle	=	{Proceedings of the ACM International Conference on Supercomputing},
pages	=	{138-–149},
numpages	=	{12},
keywords	=	{performance analysis, krylov solvers, performance modeling},
location	=	{Phoenix, Arizona},
series	=	{ICS ’19}

}

@InProceedings{10.1007/978-3-030-17872-7_4,

author	=	"Prabhu, Tarun and Gropp, William",
editor	=	"Bhatele, Abhinavand Boehme, David and Levine, Joshua A. and Malony, Allen D. and Schulz, Martin",
title	=	"{Moya}---A {JIT} Compiler for {HPC}",
booktitle	=	"Programming and Performance Visualization Tools",
year	=	"2019",
publisher	=	"Springer International Publishing",
address	=	"Cham",
pages	=	"56--73",
abstract	=	"We describe Moya, an annotation-driven JIT compiler for compiled languages such as Fortran, C and C++. We show that a combination of a small number of easy-to-use annotations coupled with aggressive static analysis that enables dynamic optimization can be used to improve the performance of computationally intensive, long-running numerical applications. We obtain speedups of upto 1.5 on JIT'ed functions and overcome the overheads of the JIT compilation within 25 timesteps in a combustion-simulation application.",
isbn	=	"978-3-030-17872-7"

}

%% 2020
@inproceedings{10.1145/3311790.3396649,

author	=	{Kindratenko, Volodymyr and Mu, Dawei and Zhan, Yan and Maloney, John and Hashemi, Sayed Hadi and Rabe, Benjamin and Xu, Ke and Campbell, Roy and Peng, Jian and Gropp, William},
title	=	{{HAL}: Computer System for Scalable Deep Learning},
year	=	{2020},
isbn	=	{9781450366892},
publisher	=	{Association for Computing Machinery},
address	=	{New York, NY, USA},
url	=	{https://doi.org/10.1145/3311790.3396649},
doi	=	{10.1145/3311790.3396649},
abstract	=	{We describe the design, deployment and operation of a computer system built to efficiently run deep learning frameworks. The system consists of 16 IBM POWER9 servers with 4 NVIDIA V100 GPUs each, interconnected with Mellanox EDR InfiniBand fabric, and a DDN all-flash storage array. The system is tailored towards efficient execution of the IBM Watson Machine Learning enterprise software stack that combines popular open-source deep learning frameworks. We build a custom management software stack to enable an efficient use of the system by a diverse community of users and provide guides and recipes for running deep learning workloads at scale utilizing all available GPUs. We demonstrate scaling of a PyTorch and TensorFlow based deep neural networks to produce state-of-the-art performance results.},
booktitle	=	{Practice and Experience in Advanced Research Computing},
pages	=	{41--48},
numpages	=	{8},
keywords	=	{deep learning, cluster architecture, high-performance computing},
location	=	{Portland, OR, USA},
series	=	{PEARC '20}

}

@MISC{20.500.11850/462530,

copyright	=	{In Copyright - Non-Commercial Use Permitted},
year	=	{2020},
author	=	{Hoefler, Torsten and Barak, Amnon and Drezner, Zvi and Shiloh, Amnon and Snir, Marc and Gropp, William and Besta, Maciej and Di Girolamo, Salvatore and Taranov, Konstantin and Kwasniewski, Grzegorz and De Sensi, Daniele and Schneider, Timo},
size	=	{53 p.},
abstract	=	{We will cover distributed memory programming of high-performance supercomputers and datacenter computers. Starting from the Message Passing Interface, we observe abstractions for distributed computations that we carry through optimizations such as topology mapping and collective communication optimization. We then discuss efficient correction protocols to enable fault tolerance in such high-performance distributed systems. Armed with these insights, we observe that supercomputers are likely to migrate into megadatacenter installations leading to a general convergence of such architectures. The first step, converging the network interfaces, is well underway towards a general acceptance of Remote Direct Memory Access (RDMA) networking. RDMA moves the distributed system closer to shared memory, with a weakly consistent memory model. We discuss several algorithmic and systems approaches to accelerate distributed replicated state machines, databases, and locking systems by orders of magnitude using RDMA. Finally, if time allows, we will outline parametric program graphs – a sound abstraction for analyzing and optimizing applications. Each topic will identify open problems and provide ideas for further work to deepen our understanding of high-performance distributed memory systems.},
language	=	{en},
address	=	{Zurich},
publisher	=	{ETH Zurich, Scalable Parallel Computing Laboratory},
DOI	=	{10.3929/ethz-b-000462530},
title	=	{High-performance distributed memory systems – from supercomputers to data centers},
Note	=	{34th International Symposium on Distributed Computing (DISC 2020); Conference Location: online; Conference Date: October 12–16, 2020; Keynote talk held on October 14, 2020. Due to the Coronavirus (COVID-19) the conference was conducted virtually.}

}

% 2021
@InProceedings{9622742,

author	=	{Bienz, Amanda and Olson, Luke N. and Gropp, William D. and Lockhart, Shelby},
booktitle	=	{2021 IEEE High Performance Extreme Computing Conference (HPEC)},
title	=	{Modeling Data Movement Performance on Heterogeneous Architectures},
year	=	{2021},
volume	=	{},
number	=	{},
pages	=	{1--7},
doi	=	{10.1109/HPEC49654.2021.9622742}}

@InProceedings{doi:10.2514/6.2021-2726,

author	=	{Andrew W. Cary and John Chawner and Earl P. Duque and William Gropp and William L. Kleb and Raymond M. Kolonay and Eric Nielsen and Brian Smith},
title	=	{{CFD} {V}ision 2030 {R}oad {M}ap: {P}rogress and Perspectives},
booktitle	=	{AIAA AVIATION 2021 FORUM},
pages	=	{},
year	=	2021,
doi	=	{10.2514/6.2021-2726},
URL	=	{https://arc.aiaa.org/doi/abs/10.2514/6.2021-2726},
eprint	=	{https://arc.aiaa.org/doi/pdf/10.2514/6.2021-2726},
abstract	=	{ View Video Presentation: https://doi.org/10.2514/6.2021-2726.vidIn 2014, the CFD Vision 2030 Report proposed a desired status for aerospace CFD by 2030 that included several challenge problems and a Roadmap describing how to reach this status. Since then, the Report has been used to identify research topics and support funded activities. Further, the AIAA CFD Vision 2030 Integration Committee has been established to further progress toward this vision. A detailed review of the progress made on the milestones identified on the Roadmap was completed in 2020 by this committee and was used to assess the technology readiness level (TRL) of the different milestones. An interpretation of the TRL scale based on publications and level of technology usage was developed and used for assessment consistency. With this information, the Roadmap has been adjusted to reflect the present status of key CFD technology items. Emerging and additional key technologies have been incorporated into the Roadmap. With these modifications, an updated Roadmap is provided that reflects present status of technology development and steps necessary to attain the CFD Vision 2030. }

}

% 2022
@inproceedings{jay_alameda_2022_7089487,

title	=	{The {D}elta Gateway: Exploring Community Use of {GPU} Resources through a Science Gateway},
booktitle	=	{Gateways 2022 Proceedings},
author	=	{Jay Alameda and Claire Stirm and Gregory Bauer and Timothy Boerner and Brett Bode and Maytal Dahan and William Gropp and Marlon Pierce and Cynthia Yewdall Grigorescu and Michael Zentner and Meghna Babbar-Sebens and Michael Barton and Daniele Bianchi and Michael Bell and Michel Boufadel and Michael Cianfrocco and Sean Cleveland and Cosan Daskiran and Kjiersten Fagnan and Geoffrey Fox and Eleftherios Garyfallidis and Jerome Hajjar and Gerhard Klimeck and Mark Miller and Mark Perri and Victor Pinks II and Mohan Ramamurthy and Michel Regenwetter and Amy Roberts and Aldo Romero and Carol Song and Alejandro Strachan and Ellad Tadmor and Greg Tucker},
year	=	2022,
publisher	=	{Zenodo},
month	=	sep,
doi	=	{10.5281/zenodo.7089487},
url	=	{https://doi.org/10.5281/zenodo.7089487}

}

@INPROCEEDINGS{9826073,

author	=	{Lawson, Margaret and Gropp, William and Lofstead, Jay},
booktitle	=	{2022 22nd IEEE International Symposium on Cluster, Cloud and Internet Computing (CCGrid)},
title	=	{Exploring Spatial Indexing for Accelerated Feature Retrieval in {HPC}},
year	=	{2022},
volume	=	{},
number	=	{},
pages	=	{605--614},
doi	=	{10.1109/CCGrid54584.2022.00070}

}

%% For cross reference only
@proceedings{DBLP:conf/sc/2014pmbs,

editor	=	{Stephen A. Jarvis and Steven A. Wright and Simon D. Hammond},
title	=	{High Performance Computing Systems. Performance Modeling, Benchmarking, and Simulation - 5th International Workshop, {PMBS} 2014, New Orleans, LA, USA, November 16, 2014. Revised Selected Papers},
series	=	{Lecture Notes in Computer Science},
volume	=	{8966},
publisher	=	{Springer},
year	=	{2015},
url	=	{http://dx.doi.org/10.1007/978-3-319-17248-4},
doi	=	{10.1007/978-3-319-17248-4},
isbn	=	{978-3-319-17247-7},
timestamp	=	{Tue, 21 Apr 2015 12:36:12 +0200},
biburl	=	{http://dblp.uni-trier.de/rec/bib/conf/sc/2014pmbs},
bibsource	=	{dblp computer science bibliography, http://dblp.org}

}

%
% Section: Technical Reports
%

%1979
@TechReport{Bolstad:1979:NAP,

author	=	"J. H. Bolstad and T. F. Chan and W. M. {Coughran, Jr.} and W. D. Gropp and E. H. Grosse and M. T. Heath and R. J. LeVeque and F. T. Luk and S. G. Nash and L. N. Trefethen",
title	=	"Numerical Analysis Program Library User's Guide {(NAPLUG)}",
type	=	"User Note",
number	=	"82",
institution	=	"SLAC Computing Services",
year	=	"1979",
note	=	"First issued in 1976 by Chan, Coughran, Heath, and Luk",
area	=	"S;NS",
areaseq	=	"0",

}

%1980
%1981

@TechReport{gropp-thesis,

author	=	{William D. Gropp},
title	=	{Numerical Solution of Transport Equations},
institution	=	{Stanford University},
year	=	1981,
number	=	{STAN-CS-81-888},
month	=	dec,
note	=	{Ph.D. Thesis},
area	=	"A",
areaseq	=	"0",

}

%1982
%1983

@TechReport{gropp83,

author	=	{William D. Gropp},
title	=	{Local Uniform Mesh Refinement for Elliptic Partial Differential Equations},
institution	=	{Yale University, Department of Computer Science},
year	=	1983,
number	=	{YALE/DCS/RR-278},
month	=	Jul,
area	=	"R",
areaseq	=	"0",

}

@TechReport{gropp-hedstrom83,

author	=	{G. W. Hedstrom and William D. Gropp},
title	=	{The Computer as an Aid in the Asymptotic Estimation of Integrals},
institution	=	{Lawrence Livermore National Laboratory},
year	=	1983,
number	=	{UCRL-87297},
month	=	Aug,
area	=	"A",
areaseq	=	"0",

}

%1984

@TechReport{gropp-odonnell84,

author	=	{W.~D.~Gropp and J.~J.~O'Donnell and S.~T.~O'Donnell and M.~H.~Schultz and B.~Weston},
title	=	{A High Performance Bulk Memory System},
institution	=	{Yale University, Department of Computer Science},
year	=	1984,
number	=	{YALE/DCS/RR-311},
month	=	Mar,
area	=	"Par",
areaseq	=	"0",

}

@TechReport{GROPP84,

key	=	"Gropp",
author	=	"W. D. Gropp",
title	=	"Local Uniform Mesh Refinement with Moving Grids",
number	=	"YALEU/DCS/RR-313",
institution	=	"Yale University",
year	=	"1984",
month	=	apr,
area	=	"R",
areaseq	=	"0",

}

@TechReport{GROPP84A,

key	=	"Gropp",
author	=	"W. D. Gropp",
title	=	"Local Uniform Mesh Refinement on Loosely-Coupled Parallel Processors",
number	=	"YALEU/DCS/RR-352",
institution	=	"Yale University",
year	=	"1984",
month	=	dec,
area	=	"R;Par",
areaseq	=	"0",

}

%1985
@TechReport{KEYES85,

key	=	"Keyes \& Gropp",
author	=	"D. E. Keyes and W. D. Gropp",
title	=	"A Comparison of Domain Decomposition Techiques for Elliptic Partial Differential Equations and Their Parallel Implementation",
type	=	"Technical Report",
number	=	"YALEU/DCS/RR-448",
institution	=	"Comput. Sci. Dept., Yale Univ.",
year	=	"1985",
month	=	dec,
area	=	"D",
areaseq	=	"0",

}

%1986
@TechReport{Gropp86a,

author	=	"W. Gropp",
title	=	"Dynamic Grid Manipulation for {PDE's} on Hypercube Parallel Processors",
institution	=	"Department of Computer Science, Yale University",
number	=	"YALEU/DCS/RR-458",
month	=	mar,
year	=	"1986",
area	=	"R;Par",
areaseq	=	"0",

}

%1987
@TechReport{Gropp87b,

author	=	"W. D. Gropp and E. B. Smith",
title	=	"Computational Fluid Dynamics on Parallel Processors",
institution	=	"Department of Computer Science, Yale University",
number	=	"YALEU/DCS/RR-570",
year	=	"1987",
month	=	dec,
area	=	"Par",
areaseq	=	"0",

}

@TechReport{herbin87,

author	=	{R.~H.~Herbin and W.~D.~Gropp and D.~E.~Keyes and V.~Sonnad},
title	=	{A Domain Decomposition Technique on a Loosely Coupled Array of Processors},
institution	=	{IBM Kingston},
year	=	1987,
number	=	{KGN-124},
area	=	"D;Par",
areaseq	=	"0",

}

%1988
@TechReport{Gropp88a,

author	=	"W. Gropp and I. Ipsen",
title	=	"Recursive Mesh Refinement on Hypercubes",
institution	=	"Department of Computer Science, Yale University",
number	=	"YALE/DCS/RR-616",
year	=	"1988",
month	=	mar,
area	=	"R;Par",
areaseq	=	"0",

}

@TechReport{Greengard88,

author	=	{L.~Greengard and W.~Gropp},
title	=	{A Parallel Version of the Fast Multipole Method},
institution	=	{Yale University, Department of Computer Science},
year	=	1988,
number	=	{YALE/DCS/RR-640},
month	=	Aug,
area	=	"A;Par",
areaseq	=	"0",

}

%1989

@TechReport{besa89,

author	=	"H. Berryman and J. Saltz and W. Gropp and R. Mirchandaney",
title	=	"Krylov methods preconditioned with incompletely factored matrices on the {CM}-2",
institution	=	"NASA Langley Research Center, ICASE",
number	=	"89-54",
address	=	"Hampton, VA",
year	=	"1989",
month	=	dec,
note	=	"Also Yale University YALE/DCS/RR-685, March 1989",
area	=	"Par",
areaseq	=	"0",

}

@TechReport{groppkeyes89,

author	=	{William D. Gropp and David Keyes},
title	=	{Domain Decomposition on Parallel Computers},
institution	=	{Yale University, Department of Computer Science},
year	=	1989,
number	=	{YALE/DCS/RR-723},
month	=	Aug,
area	=	"D;Par",
areaseq	=	"0",

}

@TechReport{Keyes:1989:DDL,

author	=	"David E. Keyes and William D. Gropp",
title	=	"Domain Decomposition with Local Mesh refinement",
number	=	"YALEU/DCS/RR-726",
institution	=	"Yale University",
month	=	aug,
year	=	"1989",
area	=	"D;R",
areaseq	=	"0",

}

%1990
@TechReport{groppkeyes90,

author	=	{William D. Gropp and David Keyes},
title	=	{Parallel Performance of Domain-decomposed Preconditioned {K}rylov Methods for {PDE}s with Adaptive Refinement},
institution	=	{Yale University, Department of Computer Science},
year	=	1990,
number	=	{YALE/DCS/RR-773},
month	=	apr,
note	=	{Also ANL Preprint MCS-P147-0490, May 1990},
area	=	"D;Par",
areaseq	=	"0",

}

@TechReport{groppkeyes90b,

author	=	{William D. Gropp and David E. Keyes},
title	=	{Parallel Domain decomposition and the Solution of Nonlinear Systems of Equations},
institution	=	{Mathematics and Computer Science Division, Argonne National Laboratory},
year	=	1990,
number	=	{MCS-P186-1090},
month	=	nov,
area	=	"D;Par",
areaseq	=	"0",

}

@TechReport{keyesgropp90,

author	=	{David E. Keyes and William D. Gropp},
title	=	{Domain-decomposable Preconditioners for Second-order Upwind Discretizations of Multicomponent Systems},
institution	=	{Mathematics and Computer Science Division, Argonne National Laboratory},
year	=	1990,
number	=	{MCS-187-1090},
month	=	Nov,
area	=	"D",
areaseq	=	"0",

}

%1991
% also TechReport{Cai:1990:CRE,
@TechReport{caigroppkeyes91,

author	=	{X.-C.~Cai and William D. Gropp and David E. Keyes},
title	=	{Convergence Rate Estimate for a Domain Decomposition Method},
institution	=	{Yale University, Department of Computer Science},
year	=	1991,
number	=	{YALE/DCS/RR-827},
month	=	jan,
note	=	{also ANL Preprint MCS-P202-1290, January 1991},
area	=	"D",
areaseq	=	"0",

}

@TechReport{groppkeyes91,

author	=	{William D. Gropp and David E. Keyes},
title	=	{Domain Decomposition with Local Mesh Refinement},
institution	=	{ICASE},
year	=	1991,
number	=	{91-19},
month	=	Feb,
area	=	"D;R",
areaseq	=	"0",

}

@TechReport{groppkeyes91a,

author	=	{William D. Gropp and David E. Keyes},
title	=	{Domain Decomposition Methods in Computational Fluid Dynamics},
institution	=	{ICASE},
year	=	1991,
number	=	{91-20},
month	=	Feb,
note	=	{Also ANL Preprint MCS-P210-0191, April 1991},
area	=	"D",
areaseq	=	"0",

}

@TechReport{gropp91,

author	=	{William D. Gropp},
title	=	{Parallel Computing and Domain Decomposition},
institution	=	{Mathematics and Computer Science Division, Argonne National Laboratory},
year	=	1991,
number	=	{MCS-P257-0891},
month	=	Sep,
area	=	"D;Par",
areaseq	=	"0",

}

%1992
@TechReport{GroppWilli92a,

author	=	"William Gropp and Ewing Lusk",
institution	=	"Mathematics and Computer Science Division, Argonne National Laboratory",
title	=	"A Test Implementation of the {MPI} Draft Message-Passing Standard",
year	=	"1992",
url	=	"ftp://info.mcs.anl.gov/pub/tech_reports/reports/ANL9247.dvi.Z",
month	=	dec,
number	=	"ANL-92/47",
scope	=	"pvmmpi",
area	=	"M",
areaseq	=	"0",

}

@TechReport{Gropp:1992:EDDa,

author	=	"William D. Gropp and Barry F. Smith",
title	=	"Experiences with Domain Decomposition in Three Dimensions: Overlapping {S}chwarz Methods",
institution	=	"Mathematics and Computer Science Division, Argonne National Laboratory",
year	=	"1992",
note	=	"Appeared in the Proceedings of the Sixth International Symposium on Domain Decomposition Methods",
area	=	"D",
areaseq	=	"0",

}

@TechReport{keyesgropp92,

author	=	{David E. Keyes and William D. Gropp},
title	=	{Domain Decomposition as a Mechanism for Using Asymptotic Methods},
institution	=	{Mathematics and Computer Science Division, Argonne National Laboratory},
year	=	1992,
number	=	{MCS-P322-0892},
month	=	Sep,
area	=	"D;A",
areaseq	=	"0",

}

%1993
@TechReport{gropp93,

author	=	{William Gropp (ed.)},
title	=	{Early Experiences with the {IBM SP-1}},
institution	=	{Mathematics and Computer Science Division, Argonne National Laboratory},
year	=	1993,
number	=	{ANL-MCS-TM-177},
month	=	may,
area	=	"Par",
areaseq	=	"0",

}

@TechReport{GroppWilli93a,

author	=	"William Gropp",
institution	=	"Mathematics and Computer Science Division, Argonne National Laboratory",
title	=	"Early Experiences with the {IBM} {SP}1 and the High-Performance Switch",
year	=	"1993",
url	=	"ftp://info.mcs.anl.gov/pub/tech_reports/reports/ANL9341.ps.Z",
month	=	nov,
number	=	"ANL-93/41",
area	=	"Par",
areaseq	=	"0",

}

@TechReport{Gropp:1993:DDS,

author	=	"William D. Gropp and Barry F. Smith",
title	=	"The design of data-structure-neutral libraries for the iterative solution of sparse linear systems",
number	=	"MCS-P356-0393",
institution	=	"Argonne National Laboratory",
address	=	"Argonne, IL, USA",
year	=	"1993",
month	=	mar,
keywords	=	"Krylov space methods, software libraries, sparse linear systems, numerical libraries, parallel computing",
area	=	"NS",
areaseq	=	"0",

}

%1994
@TechReport{groppkaper94,

author	=	{William D.~Gropp and Hans Kaper and G. Leaf and D. Levine and V. Vinokur and M. Palumbo},
title	=	{Numerical Simulation of Vortex Dynamics in High-$T_c$ Superconductors},
institution	=	{Mathematics and Computer Science Division, Argonne National Laboratory},
year	=	1994,
number	=	{MCS-P476-1094},
month	=	nov,
area	=	"App;Par;P",
areaseq	=	"0",

}

@TechReport{kettunen94,

author	=	{L. Kettunen and K. Forsman and D. Levine and W. Gropp},
title	=	{Integral Equations in Nonlinear 3d Magnetostatics},
institution	=	{Mathematics and Computer Science Division, Argonne National Laboratory},
year	=	1994,
number	=	{MCS-P460-0894},
month	=	aug,
area	=	"App;Par;P",
areaseq	=	"0",

}

@TechReport{SkjellumAn1994a,

author	=	"Anthony Skjellum and Ewing Lusk and William Gropp",
institution	=	"Department of Computer Science, Mississippi State University",
title	=	"Early applications in the Message Passing Interface ({MPI})",
year	=	"1994",
url	=	"ftp://aurora.cs.msstate.edu/pub/reports/Applications/early_apps_mpi.ps.Z",
month	=	jun,
scope	=	"pvmmpi",
area	=	"M",
areaseq	=	"0",

}

%1995
@TechReport{GroppWilli1995b,

author	=	"William Gropp",
institution	=	"Argonne National Lab",
title	=	"An Introduction to Performance Debugging for Parallel Computers",
year	=	"1995", abstract-url = "http://www.mcs.anl.gov/division/publications/abstracts95.html",
url	=	"ftp://info.mcs.anl.gov/pub/tech_reports/reports/P500.ps.Z",
month	=	apr,
number	=	"MCS-P500-0295",
scope	=	"debug",
area	=	"Par",
areaseq	=	"0",

}

@TechReport{GroppWilli1995a,

author	=	"William D. Gropp and Lois Curfman McInnes and Barry F. Smith",
institution	=	"Argonne National Lab",
title	=	"Using the Scalable Nonlinear Equations Solvers Package",
year	=	"1995",
url	=	"ftp://info.mcs.anl.gov/pub/tech_reports/reports/TM193.ps.Z",
month	=	feb,
number	=	"ANL/MCS-TM-193",
parallelissues	=	"defined",
type	=	"Technical Memorandum",
scope	=	"nlinal",
area	=	"NS",
areaseq	=	"0",

}

@TechReport{forsman95rpt,

author	=	{K. Forsman and W. Gropp and L. Kettunen and D. Levine and J. Salonen},
title	=	{Solution of Dense Systems of Linear Equations Arising from Integral Equation Formulations},
institution	=	{Mathematics and Computer Science Division, Argonne National Laboratory},
year	=	1995,
number	=	{MCS-P538-0895},
month	=	Oct,
area	=	"App;Par;P",
areaseq	=	"0",

}

@TechReport{thakur:astrophysics,

author	=	"Rajeev Thakur and Ewing Lusk and William Gropp",
title	=	"{I/O} Characterization of a Portable Astrophysics Application on the {IBM SP} and {Intel Paragon}",
year	=	"1995",
month	=	aug,
number	=	"MCS-P534-0895",
institution	=	"Mathematics and Computer Science Division, Argonne National Laboratory",
note	=	"Revised October 1995",
url	=	"http://www.mcs.anl.gov/home/thakur/astro.ps",
keywords	=	"file access pattern, workload characterization, parallel I/O, pario-bib",
area	=	"I",
areaseq	=	"0",

}

%1996
@TechReport{thakur:evaluation-tr,

author	=	"Rajeev Thakur and William Gropp and Ewing Lusk",
title	=	"An Experimental Evaluation of the Parallel {I/O} Systems of the {IBM~SP} and {Intel Paragon} Using a Production Application",
year	=	"1996",
month	=	feb,
number	=	"MCS-P569--0296",
institution	=	"Argonne National Laboratory",
later	=	"thakur:evaluation",
keywords	=	"parallel I/O, multiprocessor file system, pario-bib",
area	=	"I",
areaseq	=	"0",

}

@TechReport{thakur:abstract-tr,

author	=	"Rajeev Thakur and William Gropp and Ewing Lusk",
title	=	"An Abstract-Device Interface for Implementing Portable Parallel-{I/O} Interfaces",
year	=	"1996",
month	=	may,
number	=	"MCS-P592-0596",
institution	=	"Argonne National Laboratory, Mathematics and Computer Science Division",
later	=	"thakur:abstract",
url	=	"http://www.mcs.anl.gov/home/thakur/adio.ps",
keywords	=	"multiprocessor file system interface, parallel I/O, pario-bib",
area	=	"I",
areaseq	=	"0",

}

%1997

@TechReport{groppmore97rpt,

author	=	{William D. Gropp and Jorge Mor\'e},
title	=	{Optimization Environments and the {NEOS} Server},
institution	=	{Mathematics and Computer Science Division, Argonne National Laboratory},
year	=	1997,
number	=	{ANL/MCS-P654-0397},
month	=	Mar,
note	=	{Also CRPC-TR97708 and available at \url{http://www.crpc.rice.edu/softlib/TRs_online.html}},
area	=	"NS",
areaseq	=	"0",

}

@TechReport{groppluskpvmmpi97,

author	=	{William D. Gropp and Ewing Lusk},
title	=	{Why are {PVM} and {MPI} so different?},
institution	=	{Mathematics and Computer Science Division, Argonne National Laboratory},
year	=	1997,
number	=	{ANL/MCS-P667-0697},
month	=	Jun,
area	=	"M;Par",
areaseq	=	"0",

}

% 1998

@TechReport{thakurluskgropp-io97,

author	=	{Rajeev Thakur and Ewing Lusk and William Gropp},
title	=	{{I/O} in Parallel Applications: The Weakest Link},
institution	=	{Mathematics and Computer Science Division, Argonne National Laboratory},
year	=	1997,
number	=	{ANL/MCS-P700-1197},
month	=	nov,
note	=	{Appeared in IJSA},
area	=	"I",
areaseq	=	"0",

}

@TechReport{thakurluskgropp-datatype98,

author	=	{Rajeev Thakur and Ewing Lusk and William Gropp},
title	=	{A Case for Using {MPI}'s Derived Datatypes to Improve {I/O} performance},
institution	=	{Mathematics and Computer Science Division, Argonne National Laboratory},
year	=	1998,
number	=	{ANL/MCS-P717-0598},
month	=	may,
note	=	{Appeared at Supercomputing'98.},
location	=	{/home/SIO/doc/sc98.ps},
annote	=	{Extended Abstract.},
area	=	"I;M",
areaseq	=	"0",

}

@TechReport{thakurgropplusk-datasieving98,

author	=	{Rajeev Thakur and William Gropp and Ewing Lusk},
title	=	{Data Sieving and Collective {I/O} in {ROMIO}},
institution	=	{Mathematics and Computer Science Division, Argonne National Laboratory},
year	=	1998,
number	=	{ANL/MCS-P723-0898},
month	=	aug,
note	=	{Submitted to Frontiers'99.},
area	=	"I",
areaseq	=	"0",

}

@TechReport{alice-siamoo-98-preprint,

author	=	{Satish Balay and William Gropp and Lois Curfman McInnes and Barry Smith},
title	=	{A Microkernel Design for Component-based Numerical Software Systems},
institution	=	{Mathematics and Computer Science Division, Argonne National Laboratory},
year	=	1998,
number	=	{ANL/MCS-P727-0998},
month	=	sep,
area	=	"NS",
areaseq	=	"0",
preprintof	=	"alice-siamoo-98",

}

@TechReport{gkmt-nks-98-preprint,

author	=	{William Gropp and David E. Keyes and Lois C. McInnes and M. D. Tidriri},
title	=	{Globalized {N}ewton-{K}rylov-{S}chwarz Algorithms and Software for Parallel Implicit {CFD}},
institution	=	{ICASE},
year	=	1998,
number	=	{98-24},
month	=	aug,
note	=	{Also NASA/CR-1998-208435},
area	=	"A;Par",
areaseq	=	"0",

}

% 1999
@TechReport{thakur-gropp-lusk-mpiio,

author	=	{Rajeev Thakur and William Gropp and Ewing Lusk},
title	=	{Achieving High Performance with {MPI-IO}},
institution	=	{Mathematics and Computer Science Division, Argonne National Laboratory},
year	=	1999,
number	=	{ANL/MCS-P742-0299},
month	=	sep,
area	=	"I;M",
areaseq	=	"0",

}

@TechReport{alice-infrastructure,

author	=	{Lori Freitag and William Gropp and Paul Hovland and Lois Curfman McInnes and Barry Smith},
title	=	{Infrastructure and Interfaces for Large-Scale Numerical Software},
institution	=	{Mathematics and Computer Science Division, Argonne National Laboratory},
year	=	1999,
number	=	{ANL/MCS-P751-0599},
month	=	MAY,
annote	=	{For PDPTA'99},
area	=	"NS",
areaseq	=	"0",

}

@TechReport{pvmmpi99-mpptest-tr,

author	=	{William Gropp and Ewing Lusk},
title	=	{Reproducible Measurements of {MPI} Performance Characteristics},
annote	=	{Appeared in PVMMPI'99},
year	=	1999,
institution	=	{Mathematics and Computer Science Division, Argonne National Laboratory},
number	=	{ANL/MCS-P755-0699},
month	=	JUN,
area	=	"M",
areaseq	=	"0",

}

@TechReport{pvmmpi99-totalview-tr,

author	=	{James Cownie and William Gropp},
title	=	{A Standard Interface for Debugger Access to Message Queue Information in {MPI}},
annote	=	{Appeared in PVMMPI'99},
year	=	1999,
institution	=	{Mathematics and Computer Science Division, Argonne National Laboratory},
number	=	{ANL/MCS-P754-0699},
month	=	JUN,
area	=	"M",
areaseq	=	"0",

}

@TechReport{zaki-lusk-gropp-swider99-techrpt,

author	=	{Omer Zaki and Ewing Lusk and William Gropp and Deborah Swider},
title	=	{Toward Scalable Performance Visualization with {Jumpshot}},
institution	=	{Mathematics and Computer Science Division, Argonne National Laboratory},
number	=	{ANL/MCS-P763-0699},
month	=	JUN,
year	=	1999,
annote	=	{Pre SLOG},
area	=	"PV",
areaseq	=	"0",

}

@TechReport{agkks-bell-prize-sc99,

author	=	{W. Kyle Anderson and William D. Gropp and Dinesh Kaushik and David E. Keyes and Barry F. Smith},
title	=	{Achieving High Sustained Performance in an Unstructured Mesh {CFD} Application},
institution	=	{Mathematics and Computer Science Division, Argonne National Laboratory},
number	=	{ANL/MCS-P776-0899},
year	=	1999,
month	=	AUG,
note	=	{Appeared in Proceedings of SC99},
area	=	"P",
areaseq	=	0,

}

% 2000
@TechReport{gkmt-nks00,

author	=	{William Gropp and David Keyes and Lois McInnes and M. D. Tidiri},
title	=	{Globalized {N}ewton-{K}rylov-{S}chwarz Algorithms and Software for parallel implicit {CFD}},
institution	=	{Mathematics and Computer Science Division, Argonne National Laboratory},
year	=	2000,
number	=	{ANL/MCS-P788-0100},
month	=	JAN,
note	=	{Appeared in High Performance Computing Applications},
area	=	"A",
areaseq	=	"0",

}

@TechReport{ksfglb00:mpi-collective,

author	=	{N. T. Karonis and B. R. de Supinski and I. Foster and W. Gropp and E. Lusk and J. Bresnahan},
title	=	{Exploiting Hierarchy in Parallel Computer Networks to Optimize Collective Operation Performance},
institution	=	{Mathematics and Computer Science Division, Argonne National Laboratory},
year	=	2000,
number	=	{ANL/MCS-P788-0200},
month	=	FEB,
OPTnote	=	{},
area	=	"M",
areaseq	=	"0",

}

@TechReport{gro00:mpi-impl,

author	=	{William D. Gropp},
title	=	{Runtime Checking of Datatype Signatures in {MPI}},
institution	=	{Mathematics and Computer Science Division, Argonne National Laboratory},
year	=	2000,
number	=	{ANL/MCS-P826-0500},
month	=	MAY,
annote	=	{Appeared in PVMMPI00},
area	=	"M",
areaseq	=	"0",

}

@TechReport{bgl00:mpi-mpd-tr,

author	=	{Ralph Butler and William Gropp and Ewing Lusk},
title	=	{A Scalable Process-Management Environment for Parallel Programs},
institution	=	{Mathematics and Computer Science Division, Argonne National Laboratory},
year	=	2000,
number	=	{ANL/MCS-P812-0400},
month	=	APR,
annote	=	{Appeared in PVMMPI00},
area	=	"M",
areaseq	=	"0",

}

@TechReport{gkks00:fun3d,

author	=	{William D. Gropp and Dinesh K. Kaushik and David E. Keyes and Barry F. Smith},
title	=	{Performance Modeling and Tuning of an Unstructured Mesh {CFD} Application},
institution	=	{Mathematics and Computer Science Division, Argonne National Laboratory},
year	=	2000,
number	=	{ANL/MCS-P833-0700},
month	=	JUL,
annote	=	{Appeared in Supercomputing 2000},
area	=	"Par;P",
areaseq	=	"0",

}

@TechReport{bgms00:petsc-chapt,

author	=	{Satish Balay and William Gropp and Lois Curfman McInnes and Barry F. Smith},
title	=	{Software for the Scalable Solution of {PDE}s},
institution	=	{Mathematics and Computer Science Division, Argonne National Laboratory},
year	=	2000,
number	=	{ANL/MCS-P834-0700},
month	=	JUL,
annote	=	{Chapter for the CRPC Handbook of Parallel Computation},
area	=	"P",
areaseq	=	"0",

}

@TechReport{tg00:io-chapt,

author	=	{Rajeev Thakur and William Gropp},
title	=	{Parallel {I/O}},
institution	=	{Mathematics and Computer Science Division, Argonne National Laboratory},
year	=	2000,
number	=	{ANL/MCS-P837-0700},
month	=	JUL,
annote	=	{Chapter for the CRPC Handbook of Parallel Computation},
area	=	"I",
areaseq	=	"0",

}

@TechReport{rfgkst00:mpichg-qos,

author	=	{Alain Roy and Ian Foster and William Gropp and Nicholas Karonis and Volker Sander and Brian Toonen},
title	=	{{MPICH-GQ}: Quality of Service for Message Passing Programs},
institution	=	{Mathematics and Computer Science Division, Argonne National Laboratory},
year	=	2000,
number	=	{ANL/MCS-P838-0700},
month	=	JUL,
area	=	"M",
areaseq	=	"0",

}

@TechReport{gkks:cfd-scal-perf00,

author	=	{W. D. Gropp and D. K. Kaushik and D. E. Keyes and B. F. Smith},
title	=	{Understanding the Parallel Scalability of an Implicit Unstructured Mesh {CFD} Code},
institution	=	{Mathematics and Computer Science Division, Argonne National Laboratory},
year	=	2000,
number	=	{ANL/MCS-P845-0900},
month	=	SEP,
area	=	"Par",
areaseq	=	"0",

}

@TechReport{gkks:cfd-perf,

author	=	{W. D. Gropp and D. K. Kaushik and D. E. Keyes and B. F. Smith},
title	=	{Latency, Bandwidth, and Concurrent Issue Limitations in High-Performance {CFD}},
institution	=	{Mathematics and Computer Science Division, Argonne National Laboratory},
year	=	2000,
number	=	{ANL/MCS-P850-1000},
month	=	OCT,
area	=	"Par",
areaseq	=	"0",

}

@TechReport{gkks:cfd-hiperf-tr,

author	=	{W. D. Gropp and D. K. Kaushik and D. E. Keyes and B. F. Smith},
title	=	{High Performance Parallel Implicit {CFD}},
institution	=	{Mathematics and Computer Science Division, Argonne National Laboratory},
year	=	2000,
number	=	{ANL/MCS-P863-1200},
month	=	DEC,
area	=	"Par",
areaseq	=	"0",

}

% 2001

@TechReport{bgl00:mpd-tr,

author	=	{Ralph Butler and William Gropp and Ewing Lusk},
title	=	{Components and Interfaces of a Process Management System for Parallel Programs},
institution	=	{Mathematics and Computer Science Division, Argonne National Laboratory},
year	=	2001,
number	=	{ANL/MCS-P872-0201},
area	=	"M",
areaseq	=	"0",

}

@TechReport{ong-lusk-gropp:SUT-tr,

author	=	{Emil Ong and Ewing Lusk and William Gropp},
title	=	{Scalable {U}nix Commands for Parallel Processors: A High-Performance Implementation},
institution	=	{Mathematics and Computer Science Division, Argonne National Laboratory},
year	=	2001,
number	=	{ANL/MCS-P885-0601},
area	=	"Par",
areaseq	=	"0",

}

@TechReport{gropp01:mpi-misc,

author	=	{William Gropp},
title	=	{Learning from the Success of {MPI}},
institution	=	{Mathematics and Computer Science Division, Argonne National Laboratory},
year	=	2001,
number	=	{ANL/MCS-P903-0801},
area	=	"M",
areaseq	=	"0",

}

% 2002

@TechReport{gropp02:mpi-generic,

author	=	{William Gropp},
title	=	{Building Library Components That Can Use Any {MPI} Implementation},
institution	=	{Mathematics and Computer Science Division, Argonne National Laboratory},
year	=	2002,
number	=	{ANL/MCS-P956-0502},
area	=	"M",
areaseq	=	"0",

}

@TechReport{chan02:scalable-log,

author	=	{Anthony Chan and William Gropp and Ewing Lusk},
title	=	{Scalable Log Files for Parallel Program Trace Data},
institution	=	{Mathematics and Computer Science Division, Argonne National Laboratory},
year	=	2002,
number	=	{ANL/MCS-TM-256},
area	=	"PV",
areaseq	=	"0",

}

@TechReport{kar02:mpi-impl,

author	=	{Nicholas T. Karonis and Bronis de Supinski and Ian Foster and William Gropp and Ewing Lusk and Sebastien Lacour},
title	=	{A Multilevel Approach to Topology-Aware Collective Operations in Computational Grids},
institution	=	{Mathematics and Computer Science Division, Argonne National Laboratory},
year	=	2002,
number	=	{ANL/MCS-P948-0402},
month	=	APR,
area	=	"M",
areaseq	=	"0",

}

@TechReport{byna03:mpi-impl,

author	=	{Surendra Byna and William Gropp and Xian-He Sun and Rajeev Thakur},
title	=	{Improving the Performance of {MPI} Derived Datatypes by Optimizing Memory-Access Cost},
institution	=	{Mathematics and Computer Science Division, Argonne National Laboratory},
year	=	2003,
area	=	"M",
areaseq	=	0,
number	=	{ANL/MCS-P1045-0403}

}

@TechReport{bla03:cray-eval,

author	=	{A. S. Bland and J. J. Dongarra and J. B. Drake and T. H. Dunigan, Jr. and T. H. Dunning, Jr. and A. Geist and B. Gorda and W. D. Gropp and R. J. Harrison and R. Kendall and D. Keyes and J. A. Nichols and L. Oliker and H. Simon and R. Stevens and J. B. White, III and P. H. Worley and T. Zacharia},
title	=	{Cray {X1} Evaluation},
institution	=	{Oak Ridge National Laboratory},
year	=	2003,
number	=	{ORNL/TM-2003/67},
month	=	MAR

}

@TechReport{thakur03:mpi-coll,

author	=	{Rajeev Thakur and William Gropp},
title	=	{Improving the Performance of Collective Operations in {MPICH}},
institution	=	{Mathematics and Computer Science Division, Argonne National Laboratory},
year	=	2003,
number	=	{ANL/MCS-P1038-0403},
note	=	{Appeared in Euro PVMMPI'03}

}

@TechReport{ross04:mpi-impl:tr,

author	=	{R. Ross and N. Miller and W. D. Gropp},
title	=	{Implementing Fast and Reusable Datatype Processing},
institution	=	{Mathematics and Computer Science Division, Argonne National Laboratory},
year	=	2003,
month	=	JUL,
number	=	{ANL/MCS-P1068-0703},
note	=	{Appeared in Euro PVMMPI'03}

}

@TechReport{liu03:mpich2-infiniband,

author	=	{Jiuxing Liu and Weihang Jiang and Pete Wyckoff and Dhabaleswar K. Panda and David Ashton and Darius Buntinas and William Gropp and Brian Toonen},
title	=	{Design and Implementation of {MPICH2} over {I}nfiniband with {RDMA} support},
institution	=	{Mathematics and Computer Science Division, Argonne National Laboratory},
year	=	2003,
number	=	{ANL/MCS-P1103-1003}

}

% 2004

@TechReport{jiang04:mpi-impl,

author	=	{Weihang Jiang and Jiuxing Liu and Hyun-Wook Jin and Dhabaleswar K. Panda and William Gropp and Rajeev Thakur},
title	=	{High Performance {MPI-2} One-Sided Communication over {InfiniBand}},
institution	=	{Mathematics and Computer Science Division, Argonne National Laboratory},
year	=	2004,
number	=	{ANL/MCS-P1119-0104}

}

@TechReport{ching04:paralle-io,

author	=	{Avery Ching and Alok Choudhary and Wei-keng Liao and Robert Ross and William Gropp},
title	=	{Evaluating Structured {I/O} Methods for Parallel File Systems},
institution	=	{Mathematics and Computer Science Division, Argonne National Laboratory},
year	=	2004,
number	=	{ANL/MCS-P1125-0204},
note	=	{To appear in IJHPCN}

}

@TechReport{thak04:mpi-impl:coll,

author	=	{Rajeev Thakur and Rolf Rabenseifner and William Gropp},
title	=	{Optimization of Collective Communication Operations in {MPICH}},
institution	=	{Mathematics and Computer Science Division, Argonne National Laboratory},
year	=	2004,
number	=	{ANL/MCS-P1140-0304},
month	=	MAR

}

@TechReport{gropp04:mpi-fault,

author	=	{William Gropp and Ewing Lusk},
title	=	{Fault Tolerance in {MPI} Programs},
institution	=	{Mathematics and Computer Science Division, Argonne National Laboratory},
year	=	2004,
number	=	{ANL/MCS-P1154-0404},
annote	=	{Appeared in IJHPCA}

}

@TechReport{thak04:mpi-impl;rma,

author	=	{Rajeev Thakur and William Gropp and Brian Toonen},
title	=	{Minimizing Synchronization Overhead in the Implementation of {MPI} One-Sided Communication},
institution	=	{Mathematics and Computer Science Division, Argonne National Laboratory},
year	=	2004,
number	=	{ANL/MCS-P1158-0504},
month	=	MAY

}

@TechReport{jia04:mpi-impl;ib,

author	=	{Weihang Jiang and Kiuxing Liu and Hyun-Wook Jin and Dhabaleswar K. Panda and Darius Buntinas and Rajeev Thakur and William Gropp},
title	=	{Efficient Implementation of {MPI-2} Passive One-Sided Communication on {InfiniBand} Clusters},
institution	=	{Mathematics and Computer Science Division, Argonne National Laboratory},
year	=	2004,
number	=	{ANL/MCS-P1164-0504},
month	=	MAY

}

@TechReport{gro04:par-io;tr,

author	=	{William Gropp and Robert Ross and Neill Miller},
title	=	{Providing Efficient {I/O} Redundancy in {MPI} Environments},
institution	=	{Mathematics and Computer Science Division, Argonne National Laboratory},
year	=	2004,
number	=	{ANL/MCS-P1178-0604},
month	=	JUN

}

@TechReport{ala04:mpi;bgl,

author	=	{George Alm\'asi and Charles Archer and Jos\'e G. Casta\~nos and John Gunnels and Chris Erway and Philip Heidelberger and Xavier Martorell and Jos\'e E. Moreira and Kurt Pinnow and Joe Ratterman and Burkhard Steinmacher-burow and William Gropp and Brian Toonen},
title	=	{The Design and Implementation of Message Passing Services for the {BlueGene/L} Supercomputer},
institution	=	{Mathematics and Computer Science Division, Argonne National Laboratory},
year	=	2004,
number	=	{ANL/MCS-P1183-0604},
month	=	JUN

}

@TechReport{gro04:par-issues,

author	=	{William D. Gropp},
title	=	{Issues in Accurate and Reliable Use of Parallel Computing in Numerical Programs},
institution	=	{Mathematics and Computer Science Division, Argonne National Laboratory},
year	=	2004,
number	=	{ANL/MCS-P1193-0804},
month	=	AUG,
annote	=	{Appeared in ``Accuracy and Reliability in Scientific Computing,'' published by SIAM.}

}

@TechReport{thak05:mpi-impl:rma:preprint,

author	=	{Rajeev Thakur and William Gropp and Brian Toonen},
title	=	{Optimizing the Synchronization Operations in {MPI} One-Sided Communication},
institution	=	{Mathematics and Computer Science Division, Argonne National Laboratory},
year	=	2005,
number	=	{ANL/MCS-P1232-0205},
month	=	FEB,
annote	=	{Published in HPCA}

}

@TechReport{ross:mpi-io:atomic,

author	=	{Robert Ross and Robert Latham and William Gropp and Rajeev Thakur and Brian Toonen},
title	=	{Implementing {MPI-IO} Atomic Mode without File System Support},
institution	=	{Mathematics and Computer Science Division, Argonne National Laboratory},
year	=	2005,
number	=	{ANL/MCS-P1235-0305},
month	=	MAR

}

@TechReport{buntinas05:common_comm_subsys,

author	=	{Darius Buntinas and William Gropp},
title	=	{Understanding the Requirements Imposed by Programming Model Middleware on a Common Communication Subsystem},
institution	=	{Argonne National Laboratory},
year	=	2005,
number	=	{ANL/MCS-TM-284}

}

@TechReport{nemesis-design-tr,

author	=	{Darius Buntinas and Guillaume Mercier and William Gropp},
title	=	{The Design and Evaluation of {N}emesis, a Scalable Low-Latency Message-Passing Communication Subsystem},
institution	=	{Argonne National Laboratory},
number	=	{ANL/MCS-TM-292},
year	=	2005

}

@TechReport{data_transfer2006,

author	=	{Darius Buntinas and Guillaume Mercier and William Gropp},
title	=	{Data Transfers Between Processes in an {SMP} System: {P}erformance Study and Application to {MPI}},
institution	=	{Argonne National Laboratory},
year	=	2005,
number	=	{ANL/MCS-P1306-1105},
note	=	{Submitted to International Conference on Parallel and Processing (ICPP) 2006.}

}

@TechReport{gropp06:_paral_tools_envir,

author	=	{William Gropp and Andrew Lumsdaine},
title	=	{Parallel Tools and Environments: A Survey},
institution	=	{Argonne National Laboratory},
year	=	2006,
number	=	{ANL/MCS-P1342-0406},
note	=	{To appear in a SIAM volume of work presented at the SIAM Parallel Processing Conference in 2004}

}

@TechReport{Dagstuhl:2007,

author	=	{Boyana Norris and Albert Hartono and William Gropp},
title	=	{Annotations for Productivity and Performance Portability},
institution	=	{Argonne National Laboratory},
month	=	feb,
year	=	{2007},
number	=	{ANL/MCS-P1392-0107},
URL	=	{ftp://info.mcs.anl.gov/pub/tech_reports/reports/P1392.pdf},

}

% 2008
@TechReport{petsc-user-ref-3-0,

Author	=	"Satish Balay and Kris Buschelman and Victor Eijkhout and William D. Gropp and Dinesh Kaushik and Matthew G. Knepley and Lois Curfman McInnes and Barry F. Smith and Hong Zhang",
Title	=	"{PETS}c Users Manual",
Number	=	"ANL-95/11 - Revision 3.0.0",
Institution	=	"Argonne National Laboratory",
Year	=	"2008"} %% 2012

@TechReport{KeyesMcInnesWoodwardEtAl12,

title	=	{Multiphysics Simulations: {C}hallenges and Opportunities},
author	=	{David E. Keyes and Lois Curfman McInnes and Carol Woodward and William D. Gropp and Eric Myra and Michael Pernice and John Bell and Jed Brown and Alain Clo and Jeffrey Connors and Emil Constantinescu and Don Estep and Kate Evans and Charbel Farhat and Ammar Hakim and Glenn Hammond and Glen Hansen and Judith Hill and Tobin Isaac and Xiangmin Jiao and Kirk Jordan and Dinesh Kaushik and Efthimios Kaxiras and Alice Koniges and Kihwan Lee and Aaron Lott and Qiming Lu and John Magerlein and Reed Maxwell and Michael McCourt and Miriam Mehl and Roger Pawlowski and Amanda Peters and Daniel Reynolds and Beatrice Riviere and Ulrich R\"{u}de and Tim Scheibe and John Shadid and Brendan Sheehan and Mark Shephard and Andrew Siegel and Barry Smith and Xianzhu Tang and Cian Wilson and Barbara Wohlmuth},
number	=	{ANL/MCS-TM-321},
note	=	{Workshop Report, Park City, Utah, July 30 - August 6, 2011, sponsored by the Institute for Computing in Science (ICiS)},
institution	=	{Argonne National Laboratory},
month	=	{Jan},
year	=	2012,

}

@TechReport{carns2012case,

title	=	{A Case for Optimistic Coordination in {HPC} Storage Systems},
author	=	{Carns, P. and Harms, K. and Kimpe, D. and Wozniak, J.M. and Ross, R. and Ward, L. and Curry, M. and Klundt, R. and Danielson, G. and Karakoyunlu, C. and Chandy, J. and Settlemyer, B. and Gropp, W.},
year	=	{2012},
institution	=	{Oak Ridge National Laboratory (ORNL)}

}

%% 2013
%% 2014

@TechReport{cfd2030tr,

author	=	{Jeffrey Slotnick and Abdollah Khodadoust and Juan Alonso and David Darmofal and William Gropp and Elizabeth Lurie and Dimitri Mavriplis},
title	=	{{CFD V}ision 2030 Study: A Path to Revolutionary Computational Aerosciences},
institution	=	{NASA},
year	=	2014,
number	=	{NASA/CR-2014-218178},
month	=	MAR,
url	=	{http://ntrs.nasa.gov/search.jsp?R=20140003093}

}
%% 2015
@TechReport{petsc-user-ref,

author	=	{Satish Balay and Shrirang Abhyankar and Mark~F. Adams and Jed Brown and Peter Brune and Kris Buschelman and Lisandro Dalcin and Victor Eijkhout and William~D. Gropp and Dinesh Kaushik and Matthew~G. Knepley and Lois Curfman McInnes and Karl Rupp and Barry~F. Smith and Stefano Zampini and Hong Zhang},
title	=	{{PETS}c Users Manual},
institution	=	{Argonne National Laboratory},
year	=	2015,
number	=	{ANL-95/11 - Revision 3.6},
url	=	{http://www.mcs.anl.gov/petsc}

}

%% 2016

%% 2017

@TechReport{bdec-report,

key	=	{BDEC},
title	=	{{BDEC} Pathways to Convergence: Toward a Shaping Strategy for a Future Software and Data Ecosystem for Scientific Inquiry},
journal	=	{Innovative Computing Laboratory Technical Report},
number	=	{ICL-UT-17-08},
year	=	{2017},
month	=	{11},
publisher	=	{University of Tennessee},
institution	=	{University of Tennessee},
url	=	{http://www.exascale.org/bdec/report},
editor	=	{Terry Moore and Mark Asch},

}
%% Above report has these authors:
%% Authors = "J.-C. Andre and G. Antoniu and M. Asch and R. Badia Sala and
%% M. Beck and P. Beckman and T. Bidot and F. Bodin and F. Cappello and
%% A. Choudhary and B. de Supinski and E. Deelman and J. Dongarra and
%% A. Dubey and G. Fox and H. Fu and S. Girona and W. Gropp and M. Heroux and
%% Y. Ishikawa and K. Keahey and D. Keyes and W. Kramer and J.-F. Lavignon and
%% Y. Lu and S. Matsuoka and B. Mohr and T. Moore and D. Reed and
%% S. Requena and J. Saltz and T. Schulthess and R. Stevens and M. Swany and
%% A. Szalay and W. Tang and G. Varoquaux and J.-P. Vilotte and
%% R. Wisniewski and Z. Xu and I. Zacharov"

%% 2018

% 2020
@Misc{bienz2020modeling,

title	=	{Modeling Data Movement Performance on Heterogeneous Architectures},
author	=	{Amanda Bienz and Luke N. Olson and William D. Gropp and Shelby Lockhart},
year	=	{2020},
eprint	=	{2010.10378},
archivePrefix	=	{arXiv},
primaryClass	=	{cs.DC}

}

%
% Section: Manuals
%
% No number used for manuals. Put TR number into note
@Manual{SLES-manual,

author	=	"William D. Gropp and Barry Smith",
title	=	"Simplified Linear Equation Solvers Users' Manual",
institution	=	"Argonne National Laboratory",
year	=	"1993",
type	=	"Mathematics and Computer Science Division Report",
month	=	feb,
address	=	"Argonne, IL",
url	=	"ftp://info.mcs.anl.gov/pub/pdetools/sles.tar.Z",
note	=	"ANL/MCS-93/8",
area	=	"NS",
areaseq	=	"0",

}

@Manual{Chameleon-manual,

title	=	{Users Manual for the Chameleon Parallel Programming Tools},
author	=	{William D. Gropp and Barry Smith},
organization	=	{Mathematics and Computer Science Division, Argonne National Laboratory},
note	=	"ANL-93/23",
year	=	1993,
month	=	Jun,
area	=	"NS",
areaseq	=	"0",

}

@Manual{KSP-manual,

title	=	{Users Manual for {KSP}:Data-Structure-Neutral Codes Implementing {K}rylov Space Methods},
author	=	{William D. Gropp and Barry Smith},
note	=	"ANL-93/30",
organization	=	{Mathematics and Computer Science Division, Argonne National Laboratory},
year	=	1993,
month	=	aug,
area	=	"NS",
areaseq	=	"0",

}

@Manual{groppluskpieper94,

title	=	{Users Guide for the ANL IBM SP1},
author	=	{William D. Gropp and Ewing Lusk and Steven Pieper},
organization	=	{Mathematics and Computer Science Division, Argonne National Laboratory},
note	=	"ANL/MCS-TM-198",
year	=	1994,
month	=	OCT,
area	=	"Par",
areaseq	=	"0",

}

@Manual{gropplusk94,

title	=	{Users Guide for the ANL IBM SPx},
author	=	{William D. Gropp and Ewing Lusk},
organization	=	{Mathematics and Computer Science Division, Argonne National Laboratory},
note	=	"ANL/MCS-TM-199",
year	=	1994,
month	=	DEC,
area	=	"Par",
areaseq	=	"0",

}

%1995
@Manual{doctext-manual,

title	=	{Users Manual for doctext: Producing Documentation from {C} Source Code},
author	=	{William D. Gropp},
note	=	"ANL/MCS-TM 206",
organization	=	{Mathematics and Computer Science Division, Argonne National Laboratory},
year	=	1995,
month	=	MAR,
area	=	"S",
areaseq	=	"0",

}

@Manual{tohtml-manual,

title	=	{Users Manual for tohtml:Producing True Hypertext Documents from {LaTeX}},
author	=	{William D. Gropp},
note	=	"ANL/MCS-TM 207",
organization	=	{Mathematics and Computer Science Division, Argonne National Laboratory},
year	=	1995,
month	=	MAR,
area	=	"S",
areaseq	=	"0",

}

@Manual{bfort-manual,

title	=	{Users Manual for bfort:Producing {F}ortran Interfaces to {C} Source Code},
author	=	{William D. Gropp},
note	=	"ANL/MCS-TM 208",
organization	=	{Mathematics and Computer Science Division, Argonne National Laboratory},
year	=	1995,
month	=	mar,
area	=	"S",
areaseq	=	"0",

}

%1996
@Manual{mpich-install,

title	=	{Installation Guide for \texttt{mpich}, a Portable Implementation of {MPI}},
author	=	{William D. Gropp and Ewing Lusk},
note	=	"ANL-96/5",
organization	=	{Mathematics and Computer Science Division, Argonne National Laboratory},
year	=	1996,
area	=	"M",
areaseq	=	"0",

}

@Manual{mpich-user,

title	=	{User's Guide for \texttt{mpich}, a Portable Implementation of {MPI}},
author	=	{William D. Gropp and Ewing Lusk},
note	=	"ANL-96/6",
organization	=	{Mathematics and Computer Science Division, Argonne National Laboratory},
year	=	1996,
area	=	"M",
areaseq	=	"0",

}

%1997
@Manual{PETScUsers,

title	=	{PETSc 2.0 Users Manual},
author	=	{Satish Balay and William Gropp and Lois Curfman McInnes and Barry Smith},
organization	=	{Mathematics and Computer Science Division, Argonne National Laboratory},
year	=	1997,
note	=	{ANL-95/11},
url	=	{http://www.mcs.anl.gov/petsc/manual.html},
area	=	"P",
areaseq	=	"0",

}

@Manual{ROMIOUsers,

title	=	{Users Guide for {ROMIO}: A High-Performance, Portable {MPI-IO} Implementation},
author	=	{Rajeev Thakur and William Gropp and Ewing Lusk},
organization	=	{Mathematics and Computer Science Division, Argonne National Laboratory},
year	=	1997,
month	=	Oct,
note	=	{ANL/MCS-TM-234},
area	=	"I;M",
areaseq	=	"0",

}

%
% Section: Miscellaneous
%
@Misc{fpmpi,

author	=	{William Gropp and David Gunter and Valerie Taylor},
title	=	{{FPMPI}: {A} Fine-tuning Performance Profiling Library For {MPI} },
month	=	NOV,
year	=	2001,
note	=	{Poster presented at SC2001},
abstract	=	{FPMPI is a wrapper library for the standard set of MPI functions which has been instrumented to gather performance information about the execution behavior of MPI programs. Its purpose is to aid systems managers and applications developers alike in identifying performance bottlenecks and to provide clues to optimizing an application or hardware configuration. It is simple, requiring only a relinking of existing MPI code for basic data gathering capability. The level of detail is controllable by directives specified at compile time. A companion visualization tool, FPMPIview is in development at NCSA to analyze the data produced by the profiling library.},
area	=	"Par;M;PV",
areaseq	=	"0",

}

@Misc{grop04:par-soft,

author	=	{William Gropp},
title	=	{Commodity Software?},
howpublished	=	{ClusterWorld Magazine},
OPTmonth	=	{},
year	=	{2004},
note	=	{``Head Node'' article},
OPTannote	=	{}

}

%% 2011

@Misc{ghs-pm-siamcse11,

author	=	{William D. Gropp and Torsten Hoefler and Marc Snir},
title	=	{Performance Modeling for Systematic Performance Tuning},
howpublished	=	{Program of the SIAM Conference on Computational Science and Engineering, Reno, Nevada},
year	=	2011,
note	=	{Abstract only.},
annote	=	{Presentation at Minisymposium 31.}

}

%% 2022

@Misc{delta-interviews-22,

author	=	{Alameda, Jay and Stirm, Claire and Bauer, Gregory and Boerner, Timothy and Bode, Brett and Dahan, Maytal and Gropp, William and Pierce, Marlon and Yewdall, Cynthia and Zentner, Michael and Bianchi, Daniele and Babbar-Sebens, Meghna and Barton, Michael and Bell, Michael and Boufadel, Michel and Cianfrocco, Michael and Cleveland, Sean and Daskiran, Cosan and Fagnan, Kjiersten and Fox, Geoffrey and Garyfallidis, Eleftherios and Hajjar, Jerome F. and Klimeck, Gerhard and Miller, Mark A. and Perri, Mark and Roberts, Amy and Romero, Aldo H. and Song, Carol and Strachan, Alejandro and Tadmor, Ellad B. and Tucker, Greg},
title	=	{Informing Design: Exploring Community Use of GPU Resources for NCSA's Delta System: Interview Highlights},
year	=	2022,
abstract	=	{The Delta team conducted 44 interviews with research code leads and science gateway community leads. Thirty-four observations and requirements discovered through these conversations are highlighted in this paper.},
URL	=	{http://hdl.handle.net/2142/113473}

}

%% 2023

@misc{karrels2023finegrained,

title	=	{Fine-grained Policy-driven {I/O} Sharing for Burst Buffers},
author	=	{Ed Karrels and Lei Huang and Yuhong Kan and Ishank Arora and Yinzhi Wang and Daniel S. Katz and William D. Gropp and Zhao Zhang},
year	=	{2023},
eprint	=	{2306.11615},
archivePrefix	=	{arXiv},
primaryClass	=	{cs.DC}

}

@Misc{delta-sys-23,

author	=	{Gropp, William and Boerner, Tim and Bode, Brett and Bauer, Greg},
title	=	{Delta: Balancing {GPU} Performance with Advanced System Interfaces},
year	=	2023,
url	=	{https://hdl.handle.net/2142/117179},
abstract	=	{Advancing the productivity of computational researchers happens in a variety of ways from hardware advances to software developments. Delta–the NSF-funded advanced computing resource awarded to the University of Illinois at Urbana-Champaign’s National Center for Supercomputing Applications–seeks few compromises between its GPU-focused computing power, high-performance file system, and the interfaces that will make it easier to use by a broader set of research communities than many of its predecessors. This paper surveys the system hardware and approach to usability and accessibility that will make Delta a major player in democratizing access to the kind of computing power necessary to meet the growing demands of artificial intelligence and machine learning researchers as well as traditional simulation science.}

}

% @Comment Misc{chenxx,
% @Comment author = {Surendra Byna and Yong Chen and Xian-He Sun and
% @Comment Rajeev Thakur and William Gropp},
% @Comment title = {{I/O} Access Classification and Characterization of
% @Comment Parallel {I/O} Benchmarks and Applications},
% @Comment year = {Before 2008}
% @Comment }
Generated on Thu Oct 26 08:52:26 2023 with bib2html