From cac1e1224ac09d20d75d226d5c3ac599c023219b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zo=C3=AB=20Bilodeau?= Date: Tue, 17 Oct 2023 10:58:30 +0200 Subject: [PATCH] adding hadd file and more name changes --- .pre-commit-config.yaml | 212 ++++++++++------ README.md | 2 +- pyproject.toml | 8 +- src/odapt/__init__.py | 2 +- src/odapt/operations/__init__.py | 1 + src/odapt/operations/hadd.py | 418 +++++++++++++++++++++++++++++++ 6 files changed, 564 insertions(+), 79 deletions(-) create mode 100644 src/odapt/operations/__init__.py create mode 100644 src/odapt/operations/hadd.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 474b5d2..6b2bbf4 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -2,77 +2,143 @@ ci: autoupdate_commit_msg: "chore: update pre-commit hooks" autofix_commit_msg: "style: pre-commit fixes" + repos: - - repo: https://github.com/psf/black-pre-commit-mirror - rev: "23.7.0" - hooks: - - id: black-jupyter - - - repo: https://github.com/adamchainz/blacken-docs - rev: "1.16.0" - hooks: - - id: blacken-docs - additional_dependencies: [black==23.7.0] - - - repo: https://github.com/pre-commit/pre-commit-hooks - rev: "v4.4.0" - hooks: - - id: check-added-large-files - - id: check-case-conflict - - id: check-merge-conflict - - id: check-symlinks - - id: check-yaml - - id: debug-statements - - id: end-of-file-fixer - - id: mixed-line-ending - - id: name-tests-test - args: ["--pytest-test-first"] - - id: requirements-txt-fixer - - id: trailing-whitespace - - - repo: https://github.com/pre-commit/pygrep-hooks - rev: "v1.10.0" - hooks: - - id: rst-backticks - - id: rst-directive-colons - - id: rst-inline-touching-normal - - - repo: https://github.com/pre-commit/mirrors-prettier - rev: "v3.0.3" - hooks: - - id: prettier - types_or: [yaml, markdown, html, css, scss, javascript, json] - args: [--prose-wrap=always] - - - repo: https://github.com/astral-sh/ruff-pre-commit - rev: "v0.0.287" - hooks: - - id: ruff - args: ["--fix", "--show-fixes"] - - - repo: https://github.com/pre-commit/mirrors-mypy - rev: "v1.5.1" - hooks: - - id: mypy - files: src|tests - args: [] - additional_dependencies: - - pytest - - - repo: https://github.com/codespell-project/codespell - rev: "v2.2.5" - hooks: - - id: codespell - - - repo: https://github.com/shellcheck-py/shellcheck-py - rev: "v0.9.0.5" - hooks: - - id: shellcheck - - - repo: local - hooks: - - id: disallow-caps - name: Disallow improper capitalization - language: pygrep - entry: PyBind|Numpy|Cmake|CCache|Github|PyTest - exclude: .pre-commit-config.yaml +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.4.0 + hooks: + - id: check-added-large-files + - id: check-case-conflict + - id: check-merge-conflict + - id: check-symlinks + - id: check-yaml + - id: debug-statements + - id: end-of-file-fixer + exclude_types: [svg] + - id: mixed-line-ending + - id: requirements-txt-fixer + - id: trailing-whitespace + - id: name-tests-test + args: ["--pytest-test-first"] + +- repo: https://github.com/asottile/setup-cfg-fmt + rev: v2.4.0 + hooks: + - id: setup-cfg-fmt + args: [--include-version-classifiers, --max-py-version=3.11] + +- repo: https://github.com/psf/black-pre-commit-mirror + rev: 23.9.1 + hooks: + - id: black + +- repo: https://github.com/cheshirekow/cmake-format-precommit + rev: v0.6.13 + hooks: + - id: cmake-format + additional_dependencies: [pyyaml] + +- repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.0.291 + hooks: + - id: ruff + args: ["--fix", "--show-fixes"] + +- repo: https://github.com/codespell-project/codespell + rev: v2.2.5 + hooks: + - id: codespell + args: ["-L", "ue,subjet,parms,fo,numer,thre"] + +- repo: local + hooks: + - id: disallow-caps + name: disallow improper capitalization + language: pygrep + entry: PyBind|Cmake|CCache|Github|PyTest + exclude: .pre-commit-config.yaml + +- repo: https://github.com/shellcheck-py/shellcheck-py + rev: "v0.9.0.5" + hooks: + - id: shellcheck + +- repo: https://github.com/asottile/pyupgrade + rev: v3.13.0 + hooks: + - id: pyupgrade + +# repos: +# - repo: https://github.com/psf/black-pre-commit-mirror +# rev: 23.9.1 +# hooks: +# - id: black + +# - repo: https://github.com/adamchainz/blacken-docs +# rev: "1.16.0" +# hooks: +# - id: blacken-docs +# additional_dependencies: [black==23.7.0] + +# - repo: https://github.com/pre-commit/pre-commit-hooks +# rev: "v4.4.0" +# hooks: +# - id: check-added-large-files +# - id: check-case-conflict +# - id: check-merge-conflict +# - id: check-symlinks +# - id: check-yaml +# - id: debug-statements +# - id: end-of-file-fixer +# - id: mixed-line-ending +# - id: name-tests-test +# args: ["--pytest-test-first"] +# - id: requirements-txt-fixer +# - id: trailing-whitespace + +# - repo: https://github.com/pre-commit/pygrep-hooks +# rev: "v1.10.0" +# hooks: +# - id: rst-backticks +# - id: rst-directive-colons +# - id: rst-inline-touching-normal + +# - repo: https://github.com/pre-commit/mirrors-prettier +# rev: "v3.0.3" +# hooks: +# - id: prettier +# types_or: [yaml, markdown, html, css, scss, javascript, json] +# args: [--prose-wrap=always] + +# - repo: https://github.com/astral-sh/ruff-pre-commit +# rev: "v0.0.287" +# hooks: +# - id: ruff +# args: ["--fix", "--show-fixes"] + +# - repo: https://github.com/pre-commit/mirrors-mypy +# rev: "v1.5.1" +# hooks: +# - id: mypy +# files: src|tests +# args: [] +# additional_dependencies: +# - pytest + +# - repo: https://github.com/codespell-project/codespell +# rev: "v2.2.5" +# hooks: +# - id: codespell + +# - repo: https://github.com/shellcheck-py/shellcheck-py +# rev: "v0.9.0.5" +# hooks: +# - id: shellcheck + +# - repo: local +# hooks: +# - id: disallow-caps +# name: Disallow improper capitalization +# language: pygrep +# entry: PyBind|Numpy|Cmake|CCache|Github|PyTest +# exclude: .pre-commit-config.yaml diff --git a/README.md b/README.md index 7d7f3b2..427a85f 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Odapt +# odapt [![Actions Status][actions-badge]][actions-link] [![Documentation Status][rtd-badge]][rtd-link] diff --git a/pyproject.toml b/pyproject.toml index 5edf88b..8e711e9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -49,10 +49,10 @@ docs = [ ] [project.urls] -Homepage = "https://github.com/zbilodea/Odapt" -"Bug Tracker" = "https://github.com/zbilodea/Odapt/issues" -Discussions = "https://github.com/zbilodea/Odapt/discussions" -Changelog = "https://github.com/zbilodea/Odapt/releases" +Homepage = "https://github.com/zbilodea/odapt" +"Bug Tracker" = "https://github.com/zbilodea/odapt/issues" +Discussions = "https://github.com/zbilodea/odapt/discussions" +Changelog = "https://github.com/zbilodea/odapt/releases" [tool.hatch] diff --git a/src/odapt/__init__.py b/src/odapt/__init__.py index 872d7dc..6c2b2cb 100644 --- a/src/odapt/__init__.py +++ b/src/odapt/__init__.py @@ -1,7 +1,7 @@ """ Copyright (c) 2023 Zoƫ Bilodeau. All rights reserved. -Odapt: File conversion package. +odapt: File conversion package. """ from __future__ import annotations diff --git a/src/odapt/operations/__init__.py b/src/odapt/operations/__init__.py new file mode 100644 index 0000000..9d48db4 --- /dev/null +++ b/src/odapt/operations/__init__.py @@ -0,0 +1 @@ +from __future__ import annotations diff --git a/src/odapt/operations/hadd.py b/src/odapt/operations/hadd.py new file mode 100644 index 0000000..f0e5a13 --- /dev/null +++ b/src/odapt/operations/hadd.py @@ -0,0 +1,418 @@ +from __future__ import annotations + +import argparse +from pathlib import Path + +import numpy as np +import uproot + + +def hadd_1D(destination, file, key, first): + outfile = uproot.open(destination) + try: + hist = file[key] + except ValueError: + msg = "Key missing from {file}" + raise ValueError(msg) from None + if first: + member_data = np.array( + [ + hist.member("fEntries"), + hist.member("fTsumw"), + hist.member("fTsumw2"), + hist.member("fTsumwx"), + hist.member("fTsumwx2"), + ] + ) + return uproot.writing.identify.to_TH1x( + hist.member("fName"), + hist.member("fTitle"), + hist.values(flow=True), + *member_data, + hist.variances(flow=True), + hist.member("fXaxis"), + ) + if hist.member("fN") == outfile[key].member("fN"): + member_data = np.array( + [ + hist.member("fEntries"), + hist.member("fTsumw"), + hist.member("fTsumw2"), + hist.member("fTsumwx"), + hist.member("fTsumwx2"), + ] + ) + h_sum = uproot.writing.identify.to_TH1x( + hist.member("fName"), + hist.member("fTitle"), + outfile[key].values(flow=True) + hist.values(flow=True), + *np.add( + np.array( + [ + outfile[key].member("fEntries"), + outfile[key].member("fTsumw"), + outfile[key].member("fTsumw2"), + outfile[key].member("fTsumwx"), + outfile[key].member("fTsumwx2"), + ] + ), + member_data, + ), + outfile[key].variances(flow=True) + hist.variances(flow=True), + hist.member("fXaxis"), + ) + file.close() + return h_sum + + msg = "Bins must be the same for histograms to be added, not " + raise ValueError( + msg, + hist.member("fN"), + " and ", + outfile[key].member("fN"), + ) from None + + +def hadd_2D(destination, file, key, first): + outfile = uproot.open(destination) + try: + hist = file[key] + except ValueError: + msg = "Key missing from {file}" + raise ValueError(msg) from None + if first: + member_data = np.array( + [ + hist.member("fEntries"), + hist.member("fTsumw"), + hist.member("fTsumw2"), + hist.member("fTsumwx"), + hist.member("fTsumwx2"), + hist.member("fTsumwy"), + hist.member("fTsumwy2"), + hist.member("fTsumwxy"), + ] + ) + return uproot.writing.identify.to_TH2x( + hist.member("fName"), + hist.member("fTitle"), + np.ravel(hist.values(flow=True), order="C"), + *member_data, + np.ravel(hist.variances(flow=True), order="C"), + hist.member("fXaxis"), + hist.member("fYaxis"), + ) + if hist.member("fN") == outfile[key].member("fN"): + member_data = np.array( + [ + hist.member("fEntries"), + hist.member("fTsumw"), + hist.member("fTsumw2"), + hist.member("fTsumwx"), + hist.member("fTsumwx2"), + hist.member("fTsumwy"), + hist.member("fTsumwy2"), + hist.member("fTsumwxy"), + ] + ) + + h_sum = uproot.writing.identify.to_TH2x( + hist.member("fName"), + hist.member("fTitle"), + np.ravel( + outfile[key].values(flow=True) + hist.values(flow=True), order="C" + ), + *np.add( + np.array( + [ + outfile[key].member("fEntries"), + outfile[key].member("fTsumw"), + outfile[key].member("fTsumw2"), + outfile[key].member("fTsumwx"), + outfile[key].member("fTsumwx2"), + outfile[key].member("fTsumwy"), + outfile[key].member("fTsumwy2"), + outfile[key].member("fTsumwxy"), + ] + ), + member_data, + ), + np.ravel( + outfile[key].variances(flow=True) + hist.variances(flow=True), order="C" + ), + hist.member("fXaxis"), + hist.member("fYaxis"), + ) + file.close() + return h_sum + + msg = "Bins must be the same for histograms to be added, not " + raise ValueError( + msg, + hist.member("fN"), + " and ", + outfile[key].member("fN"), + ) from None + + +def hadd_3D(destination, file, key, first): + outfile = uproot.open(destination) + try: + hist = file[key] + except ValueError: + msg = "Key missing from {file}" + raise ValueError(msg) from None + if first: + member_data = np.array( + [ + hist.member("fEntries"), + hist.member("fTsumw"), + hist.member("fTsumw2"), + hist.member("fTsumwx"), + hist.member("fTsumwx2"), + hist.member("fTsumwy"), + hist.member("fTsumwy2"), + hist.member("fTsumwxy"), + hist.member("fTsumwz"), + hist.member("fTsumwz2"), + hist.member("fTsumwxz"), + hist.member("fTsumwyz"), + ] + ) + return uproot.writing.identify.to_TH3x( + hist.member("fName"), + hist.member("fTitle"), + np.ravel(hist.values(flow=True), order="C"), + *member_data, + np.ravel(hist.variances(flow=True), order="C"), + hist.member("fXaxis"), + hist.member("fYaxis"), + hist.member("fZaxis"), + ) + if hist.member("fN") == outfile[key].member("fN"): + member_data = np.add( + np.array( + [ + hist.member("fEntries"), + hist.member("fTsumw"), + hist.member("fTsumw2"), + hist.member("fTsumwx"), + hist.member("fTsumwx2"), + hist.member("fTsumwy"), + hist.member("fTsumwy2"), + hist.member("fTsumwxy"), + hist.member("fTsumwz"), + hist.member("fTsumwz2"), + hist.member("fTsumwxz"), + hist.member("fTsumwyz"), + ] + ), + np.array( + hist.member("fEntries"), + outfile[key].member("fTsumw"), + outfile[key].member("fTsumw2"), + outfile[key].member("fTsumwx"), + outfile[key].member("fTsumwx2"), + outfile[key].member("fTsumwy"), + outfile[key].member("fTsumwy2"), + outfile[key].member("fTsumwxy"), + outfile[key].member("fTsumwz"), + outfile[key].member("fTsumwz2"), + outfile[key].member("fTsumwxz"), + outfile[key].member("fTsumwyz"), + ), + ) + h_sum = uproot.writing.identify.to_TH3x( + hist.member("fName"), + hist.member("fTitle"), + np.ravel( + outfile[key].values(flow=True) + hist.values(flow=True), order="C" + ), + *member_data, + np.ravel( + (outfile[key].variances(flow=True) + hist.variances(flow=True)), + order="C", + ), + hist.member("fXaxis"), + hist.member("fYaxis"), + hist.member("fZaxis"), + ) + file.close() + return h_sum + + msg = "Bins must be the same for histograms to be added, not " + raise ValueError( + msg, + hist.member("fN"), + " and ", + outfile[key].member("fN"), + ) from None + + +def hadd( + destination, + files, + *, + force=True, + append=False, + compression="lz4", + compression_level=1, + skip_bad_files=False, + union=True, +): + """ + Args: + destination (path-like): Name of the output file or file path. + files (Str or list of str): List of local ROOT files to read histograms from. + May contain glob patterns. + force (bool): If True, overwrites destination file if it exists. Force and append + cannot both be True. + append (bool): If True, appends histograms to an existing file. Force and append + cannot both be True. + compression (str): Sets compression level for root file to write to. Can be one of + "ZLIB", "LZMA", "LZ4", or "ZSTD". By default the compression algorithm is "LZ4". + compression_level (int): Use a compression level particular to the chosen compressor. + By default the compression level is 1. + skip_bad_files (bool): If True, skips corrupt or non-existent files without exiting. + max_opened_files (int): Limits the number of files to be open at the same time. If 0, + this gets set to system limit. + union (bool): If True, adds the histograms that have the same name and copies all others + to the new file. + + Adds together histograms from local ROOT files of a collection of ROOT files, and writes them to + a new or existing ROOT file. + + >>> odapt.add_histograms("destination.root", ["file1_to_hadd.root", "file2_to_hadd.root"]) + + """ + if compression == "ZLIB" or compression == "zlib": + compression_code = uproot.const.kZLIB + elif compression == "LZMA" or compression == "lzma": + compression_code = uproot.const.kLZMA + elif compression == "LZ4" or compression == "lz4": + compression_code = uproot.const.kLZ4 + elif compression == "ZSTD" or compression == "zstd": + compression_code = uproot.const.kZSTD + else: + msg = f"unrecognized compression algorithm: {compression}. Only ZLIB, LZMA, LZ4, and ZSTD are accepted." + raise ValueError(msg) + + if Path.isfile(destination): + if not force and not append: + raise FileExistsError + if force and append: + msg = "Cannot append to a new file. Either force or append can be true." + raise ValueError(msg) + else: + if append: + raise FileNotFoundError( + "File %s" + destination + " not found. File must exist to append." + ) + file_out = uproot.recreate( + destination, + compression=uproot.compression.from_code_pair( + compression_code, compression_level + ), + ) + if isinstance(files, list): + files = sorted(Path(files).glob(f"/**/*{'.root'}")) + + with uproot.open(files[0]) as file: + keys = file.keys(filter_classname="TH[1|2|3][I|S|F|D|C]", cycle=False) + if union: + for i, _value in enumerate(files[1:]): + with uproot.open(files[i]) as file: + keys = np.union1d( + keys, + file.keys(filter_classname="TH[1|2|3][I|S|F|D|C]", cycle=False), + ) + if files[i + 1] == files[-1]: + keys_axes = dict(zip(keys, (len(file[j].axes) for j in keys))) + else: + for i, _value in enumerate(files[1:]): + with uproot.open(files[i]) as file: + keys = np.intersect1d( + keys, + file.keys(filter_classname="TH[1|2|3][I|S|F|D|C]", cycle=False), + ) + if files[i + 1] == files[-1]: + keys_axes = dict(zip(keys, (len(file[j].axes) for j in keys))) + + first = True + for input_file in files: + if Path.isfile(input_file): + file_out = uproot.update(destination) + else: + file_out = uproot.recreate(destination) + file_out.compression.from_code_pair(compression_code, compression_level) + try: + file = uproot.open(input_file) + except FileNotFoundError: + if skip_bad_files: + continue + msg = "File: {input_file} does not exist or is corrupt." + raise FileNotFoundError(msg) from None + + for key in keys: + try: + file[key] + except Warning: + Warning("Histogram {key} missing from file {input_file}.") + if keys_axes[key] == 1: + h_sum = hadd_1D(destination, file, key, first) + + elif keys_axes[key] == 2: + h_sum = hadd_2D(destination, file, key, first) + + else: + h_sum = hadd_3D(destination, file, key, first) + + if h_sum is not None: + file_out[key] = h_sum + + first = False + file.close() + + +def args(): + argparser = argparse.ArgumentParser(description="Hadd ROOT histograms with Uproot") + argparser.add_argument("destination", type=str, help="path of output file") + argparser.add_argument( + "input_files", + type=str, + nargs="+", + help="list or directory (glob syntax accepted) of input files", + ) + argparser.add_argument( + "-f", action="force", default=True, help="force overwrite of output file" + ) + argparser.add_argument( + "-a", action="append", default=False, help="append to existing file" + ) + argparser.add_argument( + "-c", action="compression", default=1, help="set compression level between 1-9" + ) + argparser.add_argument( + "-k", + action="skip_bad_files", + default=False, + help="corrupt or non-existent input files are ignored", + ) + argparser.add_argument( + "-u", + action="union", + default=True, + help="all histograms get copied to new file, only those with same name get added", + ) + argparser.add_argument( + "-n", + action="max_opened_files", + default=0, + help="maximum number of files to be open at one time. 0 is system max", + ) + argparser.add_argument( + "-ff", + action="input_compression", + default=True, + help="compression level for the new file the same as the compression of the input files", + )