From 487410f83689c50b5f128c748556bb1c362733c6 Mon Sep 17 00:00:00 2001 From: Felix Schlepper Date: Sat, 7 Dec 2024 12:42:39 +0100 Subject: [PATCH] AOD: File Inspector --- Framework/AODMerger/CMakeLists.txt | 15 +- Framework/AODMerger/src/aodInspector.cxx | 405 +++++++++++++++++++++++ 2 files changed, 417 insertions(+), 3 deletions(-) create mode 100644 Framework/AODMerger/src/aodInspector.cxx diff --git a/Framework/AODMerger/CMakeLists.txt b/Framework/AODMerger/CMakeLists.txt index da74261cf64c8..e17ba86182fed 100644 --- a/Framework/AODMerger/CMakeLists.txt +++ b/Framework/AODMerger/CMakeLists.txt @@ -12,14 +12,23 @@ o2_add_executable(merger COMPONENT_NAME aod SOURCES src/aodMerger.cxx - PUBLIC_LINK_LIBRARIES ROOT::Core ROOT::Net) + PUBLIC_LINK_LIBRARIES ROOT::Core ROOT::Net) o2_add_executable(thinner COMPONENT_NAME aod SOURCES src/aodThinner.cxx - PUBLIC_LINK_LIBRARIES ROOT::Core ROOT::Net) + PUBLIC_LINK_LIBRARIES ROOT::Core ROOT::Net) o2_add_executable(strainer COMPONENT_NAME aod SOURCES src/aodStrainer.cxx - PUBLIC_LINK_LIBRARIES ROOT::Core ROOT::Net) + PUBLIC_LINK_LIBRARIES ROOT::Core ROOT::Net) + +o2_add_executable(inspector + COMPONENT_NAME aod + SOURCES src/aodInspector.cxx + PUBLIC_LINK_LIBRARIES ROOT::Core ROOT::Net + Boost::program_options + O2::Version + O2::CommonUtils + O2::FrameworkLogger) diff --git a/Framework/AODMerger/src/aodInspector.cxx b/Framework/AODMerger/src/aodInspector.cxx new file mode 100644 index 0000000000000..a34423a34f272 --- /dev/null +++ b/Framework/AODMerger/src/aodInspector.cxx @@ -0,0 +1,405 @@ +#include +#include +#include "O2Version.h" +#include "Framework/Logger.h" +#include "CommonUtils/StringUtils.h" + +#include "TFile.h" +#include "TBranch.h" +#include "TTree.h" +#include "TGrid.h" +#include "TMap.h" +#include "TObjString.h" +#include "TStopwatch.h" + +#include + +#include +#include +#include +#include +#include +#include +#include + +namespace bpo = boost::program_options; +namespace fs = std::filesystem; + +using FilePtr = std::shared_ptr; +using CommandMap = std::unordered_map; +constexpr double kb = 1024.; +constexpr double mb = kb * kb; +constexpr double b2mb = 1. / mb; + +// Description of possible exit codes +enum ExitCodes : int { + Success = 0, // there is only one success code + Fail = 1, // non-descriptive general fail + ArgParse = 2, // failed cli arg parse + NonExistantFile = 3, // the input file does not exists + InputList = 3, // input file list not opened +}; + +// Collected info for a specific table +struct TableInfo { + Long64_t uncBytes{0}; + Long64_t compBytes{0}; + Long64_t seen{0}; +}; +using TableInfoMap = std::unordered_map; +void printTableInfoMap(const TableInfoMap& info); +void printMetaData(const TMap* meta); +bool checkFileExists(const std::string& fileName); +FilePtr openFile(const std::string& fileName); + +// Commands +class Command +{ + public: + Command(int precendence, const std::string& config) : mPresedence(precendence), mConfig(config) {} + virtual ~Command() = default; + virtual void init() = 0; + virtual void run(FilePtr file) = 0; + + int getPresedence() const noexcept { return mPresedence; } + + private: + const int mPresedence{0}; + const std::string& mConfig; +}; +std::unique_ptr createCommand(const std::string& name, const std::string& config); + +bool initOptionsAndParse(bpo::options_description& options, int argc, char* argv[], bpo::variables_map& vm, CommandMap& cmds) +{ + // Define and parse options + options.add_options()( + "help,h", "Program description")( + "input,i", bpo::value()->default_value("AO2D.root"), "Single AO2D file or text file with path to files per line")( + "commands,c", bpo::value()->default_value("info"), "Comma separated list of commands to execute"); + try { + bpo::store(bpo::parse_command_line(argc, argv, options), vm); + bpo::notify(vm); + } catch (const bpo::error& e) { + LOGP(error, "Exception during command-line argument parsing: '{}'", e.what()); + LOG(info) << options; + return false; + } + + // Build command map + const auto& str = vm["commands"].as(); + std::stringstream ss(str); + std::string cmd; + while (std::getline(ss, cmd, ',')) { + auto eq = cmd.find('='); + if (eq != std::string::npos) { + cmds[cmd.substr(0, eq)] = cmd.substr(eq + 1); + } else { + cmds[cmd] = ""; + } + } + + return true; +} + +int main(int argc, char* argv[]) +{ + LOGP(info, "AO2D Inspector (Version '{}'; git '{}'; Info '{}')", o2::fullVersion(), o2::gitRevision(), o2::getBuildInfo()); + bpo::options_description options("Allowed options"); + bpo::variables_map vm; + CommandMap cmds; + if (!initOptionsAndParse(options, argc, argv, vm, cmds)) { + return ExitCodes::ArgParse; + } + + if (vm.count("help")) { // first thing check if help requested + LOG(info) << options; + return ExitCodes::Success; + } + + auto inputFile = vm["input"].as(); + if (!checkFileExists(inputFile)) { + return ExitCodes::NonExistantFile; + } + + TStopwatch watch; + watch.Start(); + + // built list of files to open eventually + std::vector fileNames; + if (inputFile.ends_with(".root")) { + fileNames.emplace_back(inputFile); + } else { // treat as input list + std::ifstream file(inputFile); + if (!file.is_open()) { + LOGP(error, "Cannot open input list '{}'", inputFile); + return ExitCodes::InputList; + } + std::string line; + while (std::getline(file, line)) { + o2::utils::Str::trim(line); + if (line.empty() || line.starts_with('\n')) { + continue; + } + fileNames.emplace_back(line); + } + } + LOGP(info, "Prepared {} files for parsing ({:.2f} s)", fileNames.size(), watch.RealTime()); + + std::vector> commands; + for (const auto& [cmd, config] : cmds) { + commands.emplace_back(createCommand(cmd, config))->init(); + } + std::sort(commands.begin(), commands.end(), [](const auto& a, const auto& b) { + return a->getPresedence() < b->getPresedence(); + }); + + watch.Start(true); + for (const auto& file : fileNames) { + LOGP(info, "{:*^30}", file); + auto f = openFile(file); + if (f == nullptr) { + continue; + } + for (auto& cmd : commands) { + cmd->run(f); + } + } + LOGP(info, "Testing all files finished ({:.2f} s)", watch.RealTime()); + + return ExitCodes::Success; +} + +bool checkFileExists(const std::string& fileName) +{ + auto b = o2::utils::Str::pathExists(fileName); + if (!b) { + LOGP(error, "File '{}' does not exist!", fileName); + } + return b; +} + +FilePtr openFile(const std::string& fileName) +{ + if (!checkFileExists(fileName)) { + return nullptr; + } + + std::string realFile; + if (fileName.compare(0, 8, "alien://") == 0) { + if (!gGrid && !TGrid::Connect("alien://")) { + LOGP(error, "Failed to initialize alien for {}", fileName); + return nullptr; + } + realFile = fileName; + } else { + realFile = o2::utils::Str::getFullPath(fileName); + } + + FilePtr file = std::shared_ptr(TFile::Open(realFile.c_str(), "READ")); + if (file == nullptr || !file->IsOpen() || file->IsZombie()) { + LOGP(error, "Failed to open file '{}' (from '{}')", realFile, fileName); + return nullptr; + } + return file; +} + +void printTableInfoMap(const TableInfoMap& tableInfoMap) +{ + constexpr int width = 20; + + std::array header{"Name", "UncBytes", "CompBytes", "Seen"}; + std::string sep; + for (int i{0}; i < header.size(); ++i) { + sep += "+" + std::string(width + 2, '-'); + } + sep += "+"; + + LOGP(info, "{}", sep); + LOGP(info, "| {:<{}} | {:>{}} | {:>{}} | {:>{}} |", header[0], width, header[1], width, header[2], width, header[3], width); + LOGP(info, "{}", sep); + + Long64_t totUncBytes{0}, totCompBytes{0}, totSeen{0}; + + std::vector> sortedInfoMap(tableInfoMap.begin(), tableInfoMap.end()); + std::sort(sortedInfoMap.begin(), sortedInfoMap.end(), [](const auto& a, const auto& b) { return a.second.compBytes > b.second.compBytes; }); + + for (const auto& [name, table] : sortedInfoMap) { + totUncBytes += table.uncBytes; + totCompBytes += table.compBytes; + totSeen += table.seen; + if (table.compBytes == 0) { + continue; + } + + LOGP(info, "| {:<{}} | {:>{}.3f} | {:>{}.3f} | {:>{}} |", name, width, table.uncBytes * b2mb, width, table.compBytes * b2mb, width, table.seen, width); + } + LOGP(info, "{}", sep); + LOGP(info, "| {:<{}} | {:>{}.3f} | {:>{}.3f} | {:>{}} |", "Total", width, totUncBytes * b2mb, width, totCompBytes * b2mb, width, totSeen, width); + LOGP(info, "{}", sep); +} + +void printMetaData(const TMap* meta) +{ + LOGP(info, "Found MetaData with:"); + auto iter = meta->MakeIterator(); + TObject* key{nullptr}; + while ((key = iter->Next())) { + auto value = meta->GetValue(key); + auto keyStr = dynamic_cast(key); + auto valueStr = dynamic_cast(value); + if (keyStr && valueStr) { + LOGP(info, " - {}: {}", keyStr->GetName(), valueStr->GetName()); + } else { + LOGP(info, " - {}: {}", keyStr->GetName(), (value ? value->GetName() : "null")); + } + } + + delete iter; +} + +/// Basic file information +class CmdInfo final : public Command +{ + public: + CmdInfo(int pres, const std::string& config) : Command(pres, config) {} + void init() final {}; + void run(FilePtr file) final + { + constexpr int width = 20; + LOGP(info, " - {:>{}}: {:>{}.3f}", "Size (mb)", width, file->GetSize() * b2mb, width); + LOGP(info, " - {:>{}}: {:>{}}", "Compression", width, file->GetCompressionSettings(), width); + } +}; + +/// Meta Data +class CmdMeta final : public Command +{ + public: + CmdMeta(int pres, const std::string& config) : Command(pres, config) {} + void init() final {}; + void run(FilePtr file) final + { + file->cd(); + auto keys = file->GetListOfKeys(); + auto meta = (TMap*)(keys->FindObject("metaData")); + if (meta == nullptr) { + LOGP(warn, "File does not contain meta data!"); + return; + } + LOGP(info, "Found MetaData with:"); + auto iter = meta->MakeIterator(); + LOGP(info, "!"); + if (iter == nullptr) { + LOGP(error, "Nullptr iter"); + return; + } + LOGP(info, "+"); + TObject* key{nullptr}; + LOGP(info, "?"); + while ((key = iter->Next())) { + LOGP(info, "2"); + auto value = meta->GetValue(key); + auto keyStr = dynamic_cast(key); + auto valueStr = dynamic_cast(value); + LOGP(info, "3"); + if (keyStr && valueStr) { + LOGP(info, " - {}: {}", keyStr->GetName(), valueStr->GetName()); + } else { + LOGP(info, " - {}: {}", keyStr->GetName(), (value ? value->GetName() : "null")); + } + } + + LOGP(info, "-"); + delete iter; + } +}; + +/// Parse the file +class CmdSize final : public Command +{ + public: + CmdSize(int pres, const std::string& config) : Command(pres, config) {} + void init() final {}; + void run(FilePtr file) final + { + auto keys = file->GetListOfKeys(); + keys->Sort(); + for (auto okey : *keys) { + auto key = dynamic_cast(okey); + if (key == nullptr) { + continue; + } + TString name = key->GetName(); + TString keyClassName = key->GetClassName(); + if (keyClassName != "TDirectoryFile") { + continue; + } + + if (!name.BeginsWith("DF_")) { + continue; + } + + auto df = file->Get(name); + if (df != nullptr) { + checkDataFrame(df); + } + } + + printTableInfoMap(mInfo); + } + + private: + TableInfoMap mInfo; + + void checkDataFrame(TDirectory* dir) + { + auto keys = dir->GetListOfKeys(); + keys->Sort(); + for (auto okey : *keys) { + auto key = dynamic_cast(okey); + if (key == nullptr) { + continue; + } + + if (std::string(key->GetClassName()) != "TTree") { + continue; + } + + const auto name = key->GetName(); + if (name[0] != 'O' || name[1] != '2') { + continue; + } + if (mInfo.find(name) == mInfo.end()) { + mInfo.emplace(name, TableInfo()); + } + TableInfo& table = mInfo[name]; + auto tree = dir->Get(name); + if (tree != nullptr) { + checkTree(tree, table); + } + } + } + + void checkTree(TTree* tree, TableInfo& table) + { + table.seen++; + table.compBytes += tree->GetZipBytes(); + table.uncBytes += tree->GetTotBytes(); + } + + void checkBranch(TBranch* bra, TableInfoMap& info) {} +}; + +std::unique_ptr createCommand(const std::string& name, const std::string& config) +{ + if (name == "info") { + return std::make_unique(0, config); + } else if (name == "meta") { + return std::make_unique(1, config); + } else if (name == "size") { + return std::make_unique(2, config); + } else { + LOGP(fatal, "Unknown command '{}'!", name); + } + return nullptr; // unreachable +}