{"created":"2023-09-20T08:10:27.811697+00:00","id":6401,"links":{},"metadata":{"_buckets":{"deposit":"4475dc19-1cca-4b4e-b90a-e1d7e6841b2d"},"_deposit":{"created_by":7,"id":"6401","owners":[7],"pid":{"revision_id":0,"type":"depid","value":"6401"},"status":"published"},"_oai":{"id":"oai:nied-repo.bosai.go.jp:00006401","sets":[]},"author_link":[],"item_10001_biblio_info_7":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicIssueDates":{"bibliographicIssueDate":"2018","bibliographicIssueDateType":"Issued"},"bibliographicPageEnd":"23","bibliographicPageStart":"12","bibliographic_titles":[{"bibliographic_title":"PROCEEDINGS OF THE INTERNATIONAL CONFERENCE ON HIGH PERFORMANCE COMPUTING IN ASIA-PACIFIC REGION (HPC ASIA 2018)","bibliographic_titleLang":"en"}]}]},"item_10001_description_5":{"attribute_name":"抄録","attribute_value_mlt":[{"subitem_description":"MPI-IO has been used in an internal I/O interface layer of HDF5 or PnetCDF, where collective MPI-IO plays a big role in parallel I/O to manage a huge scale of scientific data. However, existing collective MPI-IO optimization named two-phase I/O has not been tuned enough for recent supercomputers consisting of mesh/torus interconnects and a huge scale of parallel file systems due to lack of topology-awareness in data transfers and optimization for parallel file systems. In this paper, we propose I/O throttling and topology-aware stepwise data aggregation in two-phase I/O of ROMIO, which is a representative MPI-IO library, in order to improve collective MPI-IO performance even if we have multiple processes per compute node. Throttling I/O requests going to a target file system mitigates I/O request contention, and consequently I/O performance improvements are achieved in file access phase of two-phase I/O. Topology-aware aggregator layout with paying attention to multiple aggregators per compute node alleviates contention in data aggregation phase of two-phase I/O. In addition, stepwise data aggregation improves data aggregation performance. HPIO benchmark results on the K computer indicate that the proposed optimization has achieved up to about 73% and 39% improvements in write performance compared with the original implementation using 12,288 and 24,576 processes on 3,072 and 6,144 compute nodes, respectively.","subitem_description_language":"en","subitem_description_type":"Other"}]},"item_10001_publisher_8":{"attribute_name":"出版者","attribute_value_mlt":[{"subitem_publisher":"ASSOC COMPUTING MACHINERY","subitem_publisher_language":"en"}]},"item_10001_relation_14":{"attribute_name":"DOI","attribute_value_mlt":[{"subitem_relation_type_id":{"subitem_relation_type_id_text":"10.1145/3149457.3149464"}}]},"item_creator":{"attribute_name":"著者","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Yuichi Tsujita","creatorNameLang":"en"}]},{"creatorNames":[{"creatorName":"Atsushi Hori","creatorNameLang":"en"}]},{"creatorNames":[{"creatorName":"Toyohisa Kameyama","creatorNameLang":"en"}]},{"creatorNames":[{"creatorName":"Atsuya Uno","creatorNameLang":"en"}]},{"creatorNames":[{"creatorName":"Fumiyoshi Shoji","creatorNameLang":"en"}]},{"creatorNames":[{"creatorName":"Yutaka Ishikawa","creatorNameLang":"en"}]}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"eng"}]},"item_title":"Improving Collective MPI-IO Using Topology-Aware Stepwise Data Aggregation with I/O Throttling","item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"Improving Collective MPI-IO Using Topology-Aware Stepwise Data Aggregation with I/O Throttling","subitem_title_language":"en"}]},"item_type_id":"40001","owner":"7","path":["1670839190650"],"pubdate":{"attribute_name":"PubDate","attribute_value":"2023-09-20"},"publish_date":"2023-09-20","publish_status":"0","recid":"6401","relation_version_is_last":true,"title":["Improving Collective MPI-IO Using Topology-Aware Stepwise Data Aggregation with I/O Throttling"],"weko_creator_id":"7","weko_shared_id":-1},"updated":"2023-09-20T08:10:30.023098+00:00"}