From 91a2b5173ec3f11695f46f0e4efb191f73813cce Mon Sep 17 00:00:00 2001 From: Sergio Date: Mon, 19 Apr 2021 16:10:39 +0200 Subject: [PATCH] Add better support for IME (#10) * Introduces a class to determine the best path for IME and MPI-IO settings. * Unifies the HAVE_MPI flag definition. --- .gitignore | 3 + CMakeLists.txt | 9 +- src/CMakeLists.txt | 1 + src/io/hdf5_writer.cpp | 10 +- src/library/implementation_interface.hpp | 38 +++++--- src/library/sonatareport.cpp | 2 +- src/library/sonatareport.h | 6 +- src/utils/imeutil.cpp | 112 +++++++++++++++++++++++ src/utils/imeutil.h | 48 ++++++++++ tests/integration/integration_test.cpp | 6 +- tests/integration/integration_test.sh.in | 4 +- tests/unit/test_sonatadata.cpp | 4 +- tests/unit/tests.cpp | 6 +- 13 files changed, 216 insertions(+), 33 deletions(-) create mode 100644 src/utils/imeutil.cpp create mode 100644 src/utils/imeutil.h diff --git a/.gitignore b/.gitignore index 0096326..f8a8c31 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,6 @@ build libsonata.cpython-*.so python/__pycache__/ *.pyc +spack-* +.vscode/ +venv-clang-format/ diff --git a/CMakeLists.txt b/CMakeLists.txt index 3679f75..481d376 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -66,6 +66,10 @@ else() set(SONATA_REPORT_COMPILE_OPTIONS -Wall -Wextra -pedantic) endif() +if(CMAKE_SYSTEM_NAME MATCHES "Linux") + add_definitions(-DSONATA_REPORT_CHECK_IME) +endif() + # ============================================================================= # Dependencies # ============================================================================= @@ -81,9 +85,8 @@ if(SONATA_REPORT_ENABLE_MPI) if (MPI_FOUND) if (HDF5_FOUND) if (HDF5_IS_PARALLEL) - set(HAVE_MPI TRUE) - # When MPI and HDF5 parallel are found, enable mpi in the report library - add_definitions(-DHAVE_MPI) + set(SONATA_REPORT_HAVE_MPI TRUE) # For integration tests + add_definitions(-DSONATA_REPORT_HAVE_MPI) message(STATUS "Both MPI and HDF5 parallel found, using reporting parallel implementation") else() message(STATUS "MPI and HDF5 found, but no parallel IO support for HDF5, using reporting serial implementation") diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index d85bfc0..2acc247 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -9,6 +9,7 @@ set(sonatareport_SOURCES "data/sonata_data.cpp" "io/hdf5_writer.cpp" "utils/logger.cpp" + "utils/imeutil.cpp" ) # Shared library diff --git a/src/io/hdf5_writer.cpp b/src/io/hdf5_writer.cpp index 3ba18e1..f07c6d5 100644 --- a/src/io/hdf5_writer.cpp +++ b/src/io/hdf5_writer.cpp @@ -19,11 +19,11 @@ template void HDF5Writer::write(const std::string& dataset_name, HDF5Writer::HDF5Writer(const std::string& report_name) : report_name_(report_name) { - hid_t plist_id = H5Pcreate(H5P_FILE_ACCESS); - std::tie(collective_list_, independent_list_) = Implementation::prepare_write(report_name, - plist_id); - // Create hdf5 file named after the report_name - const std::string file_name = report_name + ".h5"; + hid_t plist_id; + std::string file_name; + std::tie(plist_id, collective_list_, independent_list_, file_name) = + Implementation::prepare_write(report_name); + file_ = H5Fcreate(file_name.data(), H5F_ACC_TRUNC, H5P_DEFAULT, plist_id); // Create enum type for the ordering of the spikes diff --git a/src/library/implementation_interface.hpp b/src/library/implementation_interface.hpp index a18ff90..33ad8b7 100644 --- a/src/library/implementation_interface.hpp +++ b/src/library/implementation_interface.hpp @@ -7,10 +7,11 @@ #include #include +#include "../utils/imeutil.h" #include "../utils/logger.h" #include "sonatareport.h" -#if defined(HAVE_MPI) +#ifdef SONATA_REPORT_HAVE_MPI #include #endif @@ -19,6 +20,8 @@ namespace bbp { namespace sonata { namespace detail { +#define FILE_EXTENSION ".h5" + template struct Implementation { static int init(const std::vector& report_names) { @@ -27,8 +30,9 @@ struct Implementation { static void close() { TImpl::close(); } - static std::tuple prepare_write(const std::string& report_name, hid_t plist_id) { - return TImpl::prepare_write(report_name, plist_id); + static std::tuple prepare_write( + const std::string& report_name) { + return TImpl::prepare_write(report_name); } static hsize_t get_offset(const std::string& report_name, hsize_t value) { return TImpl::get_offset(report_name, value); @@ -85,7 +89,7 @@ static void local_spikevec_sort(std::vector& isvect, std::transform(perm.begin(), perm.end(), osvecg.begin(), [&](uint64_t i) { return isvecg[i]; }); } -#if defined(HAVE_MPI) +#ifdef SONATA_REPORT_HAVE_MPI static MPI_Comm get_Comm(const std::string& report_name) { if (SonataReport::communicators_.find(report_name) != SonataReport::communicators_.end()) { @@ -162,9 +166,18 @@ struct ParallelImplementation { }; static void close(){}; - static std::tuple prepare_write(const std::string& report_name, hid_t plist_id) { - // Enable MPI access + static std::tuple prepare_write( + const std::string& report_name) { + const auto& path_info = IMEUtil::getPathInfo(report_name + FILE_EXTENSION); MPI_Info info = MPI_INFO_NULL; + + // Set proper MPI-IO hints for better IME support + if (path_info.first == FSTYPE_IME) { + IMEUtil::setMPIHints(info); + } + + // Set the MPI Info object with the hints + hid_t plist_id = H5Pcreate(H5P_FILE_ACCESS); H5Pset_fapl_mpio(plist_id, get_Comm(report_name), info); // Initialize independent/collective lists @@ -173,7 +186,7 @@ struct ParallelImplementation { H5Pset_dxpl_mpio(collective_list, H5FD_MPIO_COLLECTIVE); H5Pset_dxpl_mpio(independent_list, H5FD_MPIO_INDEPENDENT); - return std::make_tuple(collective_list, independent_list); + return std::make_tuple(plist_id, collective_list, independent_list, path_info.second); }; static hsize_t get_offset(const std::string& report_name, hsize_t value) { @@ -286,9 +299,12 @@ struct SerialImplementation { return 0; }; static void close(){}; - static std::tuple prepare_write(const std::string& /*report_name*/, - hid_t /*plist_id*/) { - return std::make_tuple(H5Pcreate(H5P_DATASET_XFER), H5Pcreate(H5P_DATASET_XFER)); + static std::tuple prepare_write( + const std::string& report_name) { + return std::make_tuple(H5Pcreate(H5P_FILE_ACCESS), + H5Pcreate(H5P_DATASET_XFER), + H5Pcreate(H5P_DATASET_XFER), + report_name + FILE_EXTENSION); } static hsize_t get_offset(const std::string& /*report_name*/, hsize_t /*value*/) { return 0; @@ -318,7 +334,7 @@ struct SerialImplementation { } // namespace bbp using Implementation = bbp::sonata::detail::Implementation< -#if defined(HAVE_MPI) +#ifdef SONATA_REPORT_HAVE_MPI bbp::sonata::detail::ParallelImplementation #else bbp::sonata::detail::SerialImplementation diff --git a/src/library/sonatareport.cpp b/src/library/sonatareport.cpp index cd6a369..978c20c 100644 --- a/src/library/sonatareport.cpp +++ b/src/library/sonatareport.cpp @@ -13,7 +13,7 @@ double SonataReport::atomic_step_ = 1e-8; double SonataReport::min_steps_to_record_ = 0.0; bool SonataReport::first_report = true; int SonataReport::rank_ = 0; -#ifdef HAVE_MPI +#ifdef SONATA_REPORT_HAVE_MPI MPI_Comm SonataReport::has_nodes_ = MPI_COMM_WORLD; SonataReport::communicators_t SonataReport::communicators_; #endif diff --git a/src/library/sonatareport.h b/src/library/sonatareport.h index 8d80e12..3f25678 100644 --- a/src/library/sonatareport.h +++ b/src/library/sonatareport.h @@ -4,7 +4,7 @@ #include #include -#ifdef HAVE_MPI +#ifdef SONATA_REPORT_HAVE_MPI #include #endif @@ -19,13 +19,13 @@ namespace sonata { class SonataReport { using reports_t = std::unordered_map>; -#ifdef HAVE_MPI +#ifdef SONATA_REPORT_HAVE_MPI using communicators_t = std::unordered_map; #endif public: static double atomic_step_; static double min_steps_to_record_; -#ifdef HAVE_MPI +#ifdef SONATA_REPORT_HAVE_MPI static MPI_Comm has_nodes_; static communicators_t communicators_; #endif diff --git a/src/utils/imeutil.cpp b/src/utils/imeutil.cpp new file mode 100644 index 0000000..5afb46f --- /dev/null +++ b/src/utils/imeutil.cpp @@ -0,0 +1,112 @@ +#include "imeutil.h" +#include +#ifdef SONATA_REPORT_CHECK_IME +#include +#endif + +#define IME_PREFIX "ime://" +#define IME_CONF_ENV "IM_CLIENT_CFG_FILE" +#define IME_CONF_PATH "/etc/ddn/ime/ime.conf" +#define IME_CONF_FUSE_PATH "/etc/ddn/ime/ime-fuse.conf" +#define FUSE_SUPER_MAGIC 0x65735546 // https://man7.org/linux/man-pages/man2/fstatfs.2.html + +using namespace bbp::sonata; + +#ifdef SONATA_REPORT_CHECK_IME +/** + * Parses the IME config. files to determine the BFS and FUSE mount points. + */ +std::pair getIMEMountPoints() { + const char* env = getenv(IME_CONF_ENV); + const char* conf_path = (env != NULL) ? env : IME_CONF_PATH; + + const auto parseFile = + [](std::ifstream ifs, const std::string keyword, const char sep) -> std::string { + if (ifs.is_open()) { + std::string line; + while (std::getline(ifs, line)) { + // Look for mount point setting, and ensure it is uncommented + size_t offset = line.find(keyword); + if (offset != std::string::npos && line.find('#') > offset) { + offset = line.find("/", offset); + return line.substr(offset, line.find(sep, offset) - offset); + } + } + } + return ":Error:-1:"; + }; + + return std::pair( + parseFile(std::ifstream(conf_path), "mount_point", ';'), + parseFile(std::ifstream(IME_CONF_FUSE_PATH), "MNTDIR", '\'')); +} + +/** + * Verifies that a given path is under an active FUSE mount point. + */ +bool isFUSEMountPoint(const std::string& path) { + struct statfs st; + return (statfs(path.c_str(), &st) == 0 && st.f_type == FUSE_SUPER_MAGIC); +} +#endif + +std::pair IMEUtil::getPathInfo(std::string path) { +#ifdef SONATA_REPORT_CHECK_IME + // If the path begins with "ime:", assume native access + if (path.find("ime:") == 0) { + return std::pair(FSTYPE_IME, path); + } + + // Resolve the full path and return an error if not possible (e.g., file, + // original parent folder, or both do not exist yet) + if (path[0] != '/') { + const auto limit = path.find_last_of('/'); + auto path_orig = (limit != std::string::npos) ? path.substr(0, limit) : "."; + char full_path[PATH_MAX]; + if (realpath(path_orig.c_str(), full_path) == NULL) { + return std::pair(FSTYPE_UNKNOWN, path); + } + path = std::string(full_path) + "/" + path.substr((limit != std::string::npos) ? limit : 0); + } + + // Check if the path contains the IME keyword + if (path.find("/ime/") != std::string::npos) { + // Parse config. files and verify FUSE mount point only once for performance + static auto mnt_paths = getIMEMountPoints(); + static bool ime_fuse_active = isFUSEMountPoint(mnt_paths.second); + + // Check if the path contains the BFS mount point + if (path.find(mnt_paths.first) == 0) { + return std::pair(FSTYPE_IME, IME_PREFIX + path); + } + + // Lastly, evaluate if the path is under a FUSE mount point + if (ime_fuse_active && path.find(mnt_paths.second) == 0) { + const off_t offset = mnt_paths.second.size(); + path = IME_PREFIX + mnt_paths.first + path.substr(offset); + return std::pair(FSTYPE_IME, path); + } + } +#endif + // At this point, assume a traditional file system + return std::pair(FSTYPE_DEFAULT, path); +} + +#ifdef SONATA_REPORT_HAVE_MPI +int IMEUtil::setMPIHints(MPI_Info& info) { + int status = MPI_SUCCESS; + + // Create the MPI Info objects, if needed + if (info == MPI_INFO_NULL) { + status = MPI_Info_create(&info); + } + + // Set the hints to disable two-phase I/O and data sieving + if (status == MPI_SUCCESS) { + status = MPI_Info_set(info, "romio_cb_write", "disable"); + status |= MPI_Info_set(info, "romio_ds_write", "disable"); + } + + return status; +} +#endif diff --git a/src/utils/imeutil.h b/src/utils/imeutil.h new file mode 100644 index 0000000..1ad7212 --- /dev/null +++ b/src/utils/imeutil.h @@ -0,0 +1,48 @@ +#pragma once + +#include +#ifdef SONATA_REPORT_HAVE_MPI +#include +#endif + +namespace bbp { +namespace sonata { + +/** + * Enum that defines the type of backend filesystem from a path. + */ +typedef enum { + FSTYPE_DEFAULT = 0x6f510ca1, // Path on the BFS / local storage + FSTYPE_IME = 0x13e00000, // Path with "ime:" prefix + FSTYPE_UNKNOWN = 0xffffffff, // Error (e.g., file not found) +} fstype_t; + +/** + * Helper class that defines utilities to make better use of IME. + */ +class IMEUtil +{ + public: + /** + * Determines the type of backend filesystem from a given path, and + * provides the optimal IME path to use with libraries (e.g., MPI-IO). + * \param path Path to the file or folder. + * \return Filesystem type and optimal path for IME. + */ + static std::pair getPathInfo(std::string path); + +#ifdef SONATA_REPORT_HAVE_MPI + /** + * Defines the MPI Hints necessary to use IME efficiently with MPI-IO. + * \param info MPI Info object to be created / updated. + * \return MPI_SUCCESS if successful, or an MPI-based error. + */ + static int setMPIHints(MPI_Info& info); +#endif + private: + IMEUtil() {} + ~IMEUtil() {} +}; + +} // namespace sonata +} // namespace bbp diff --git a/tests/integration/integration_test.cpp b/tests/integration/integration_test.cpp index 1cce586..9eb93b6 100644 --- a/tests/integration/integration_test.cpp +++ b/tests/integration/integration_test.cpp @@ -5,7 +5,7 @@ #include #include -#ifdef HAVE_MPI +#ifdef SONATA_REPORT_HAVE_MPI #include #endif @@ -119,7 +119,7 @@ int main() { logger->set_level(spdlog::level::trace); int global_rank = 0; int global_size = 1; -#ifdef HAVE_MPI +#ifdef SONATA_REPORT_HAVE_MPI MPI_Init(nullptr, nullptr); MPI_Comm_rank(MPI_COMM_WORLD, &global_rank); MPI_Comm_size(MPI_COMM_WORLD, &global_size); @@ -203,7 +203,7 @@ int main() { } -#ifdef HAVE_MPI +#ifdef SONATA_REPORT_HAVE_MPI MPI_Finalize(); #endif return 0; diff --git a/tests/integration/integration_test.sh.in b/tests/integration/integration_test.sh.in index 72dceac..e4e32a6 100644 --- a/tests/integration/integration_test.sh.in +++ b/tests/integration/integration_test.sh.in @@ -2,8 +2,8 @@ export OMP_NUM_THREADS=1 -echo @HAVE_MPI@ -if [ -z "@HAVE_MPI@" ]; then +echo @SONATA_REPORT_HAVE_MPI@ +if [ -z "@SONATA_REPORT_HAVE_MPI@" ]; then @CMAKE_CURRENT_BINARY_DIR@/reports_integration_test ref_soma=@CMAKE_CURRENT_SOURCE_DIR@/soma_report_serial.ref ref_compartment=@CMAKE_CURRENT_SOURCE_DIR@/compartment_report_serial.ref diff --git a/tests/unit/test_sonatadata.cpp b/tests/unit/test_sonatadata.cpp index ac2ba58..b854ea7 100644 --- a/tests/unit/test_sonatadata.cpp +++ b/tests/unit/test_sonatadata.cpp @@ -1,9 +1,9 @@ #include #include +#include #include #include -#include -#ifdef HAVE_MPI +#ifdef SONATA_REPORT_HAVE_MPI #include #endif diff --git a/tests/unit/tests.cpp b/tests/unit/tests.cpp index 2e768b2..569260c 100644 --- a/tests/unit/tests.cpp +++ b/tests/unit/tests.cpp @@ -1,16 +1,16 @@ #define CATCH_CONFIG_RUNNER #include -#ifdef HAVE_MPI +#ifdef SONATA_REPORT_HAVE_MPI #include #endif int main(int argc, char* argv[]) { -#ifdef HAVE_MPI +#ifdef SONATA_REPORT_HAVE_MPI MPI_Init(nullptr, nullptr); #endif int result = Catch::Session().run(argc, argv); -#ifdef HAVE_MPI +#ifdef SONATA_REPORT_HAVE_MPI MPI_Finalize(); #endif return result;