feat: implement logging and some standards for ruff config (#134)

* feat(logging): add structlog integration with custom processors and example logger * chore: remove star imports * feat(lint): update ruff configuration to include new linting rules and extend exclusions * refactor: update pixi.lock and pixi.toml for ruff command structure and remove unused betapipeline entry * refactor(logging): standardize string quotes and improve logger configuration handling * feat(logging): integrate structured logging and enhance debug information in AutoPipeline and StructureSet * fix: #134 (comment) * fix: #134 (comment) * fix: #134 (comment) * refactor(logging): streamline logging configuration and ensure log directory creation * chore: add log files to .gitignore to prevent tracking of generated logs * fix: Update src/imgtools/logging/__init__.py Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> * fix: #134 (comment) Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> * chore: update .gitignore to include log files in imgtools directory * feat(datagraph): integrate logging for edge table processing and visualization * feat(ops): add timing for graph formation and update DataGraph initialization * chore: rename workflow from Test to CI-CD and restrict push triggers to main branch * feat(crawl): integrate logging for folder crawling and data saving processes * fix(structureset): enhance logging for ROI point retrieval errors * fix(autopipeline): update logger level to use environment variable for flexibility * fix(logging): streamline error handling for log directory creation * fix: #134 (comment) Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> * refactor(logging): enhance logging configuration and streamline JSON setup * chore(pixi.lock): add license_family field for clarity * refactor(logging): enhance LoggingManager with valid log levels and improve JSON logging setup * refactor(logging): enhance documentation and improve LoggingManager configuration options * refactor(logging): validate log level assignment before setting self.level * feat(logging): add mypy and type-checking support; refactor logging manager and improve error handling * refactor(logging): remove unused Optional import from typing --------- Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>
bhklab · Nov 19, 2024 · 7c31019 · 7c31019
1 parent 1549be9
commit 7c31019
Show file tree

Hide file tree

Showing 17 changed files with 1,212 additions and 278 deletions.
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -1,9 +1,9 @@
-name: Test
+name: CI-CD
 
 # only run on pushes to main or pull requests
 on:
   push:
-    branches: ["*"]
+    branches: ["main"]
   pull_request:
     branches: ["*"]
 

diff --git a/.gitignore b/.gitignore
@@ -219,3 +219,5 @@ data/
 
 # pixi environments
 .pixi
+
+.imgtools/**/*.log
diff --git a/config/mypy.ini b/config/mypy.ini
@@ -0,0 +1,2 @@
+[mypy]
+files = src/imgtools/logging/**/*.py
diff --git a/config/ruff.toml b/config/ruff.toml
@@ -1,6 +1,109 @@
-[ lint ]
-# select = ["E4", "E7", "E9", "F", "B"]
+# NOTE:
+# The idea is that all future files should be included in the linting process.
+# To save the headache, we are excluding everything before, and hopefully we can
+# slowly fix everything
 
-ignore = [ "E722", "F405", "F403" ]
+include = [
+  "src/imgtools/logging/**/*.py", 
+]
 
+extend-exclude = [
+  ".pixi/**/*",
+  "tests/**/*.py",
+  "src/imgtools/ops/**/*.py",
+  "src/imgtools/io/**/*.py",
+  "src/imgtools/utils/**/*.py",
+  "src/imgtools/modules/**/*.py",
+  "src/imgtools/transforms/**/*.py",
+  "src/imgtools/autopipeline.py",
+  "src/imgtools/pipeline.py",
+  "src/imgtools/image.py",
+]
 
+
+line-length = 100
+
+[lint]
+
+select = [
+  ###########################################################################
+  # TYPE ANNOTATIONS
+  # Ensure all functions have type annotations
+  # https://docs.astral.sh/ruff/rules/#flake8-annotations-ann
+  "ANN",
+  # Use type hinting consistently
+  # https://docs.astral.sh/ruff/rules/#flake8-type-checking-tch
+  "TCH",
+
+  ###########################################################################
+  # IMPORTS
+  # Sort imports naturally
+  # https://docs.astral.sh/ruff/rules/#isort-i
+  "I",
+  # Follow import conventions
+  # https://docs.astral.sh/ruff/rules/#flake8-import-conventions-icn
+  "ICN",
+  # Clean up and organize imports
+  # https://docs.astral.sh/ruff/rules/#flake8-tidy-imports-tid
+  "TID",
+
+  ###########################################################################
+  # CODE QUALITY
+  # Detect possible bugs, like unused variables or exception handling issues
+  # https://docs.astral.sh/ruff/rules/#flake8-bugbear-b
+  "B",
+  # Avoid using Python builtins incorrectly
+  # https://docs.astral.sh/ruff/rules/#flake8-builtins-a
+  "A",
+  # Enforce correct usage of commas in lists, tuples, etc.
+  # https://docs.astral.sh/ruff/rules/#flake8-commas-com
+  "COM",
+  # Prevent use of debugging code, like breakpoints
+  # https://docs.astral.sh/ruff/rules/#flake8-debugger-t10
+  "T10",
+  # Disallow print statements
+  # https://docs.astral.sh/ruff/rules/#flake8-print-t20
+  "T20",
+  # Provide clear and explanatory error messages
+  # https://docs.astral.sh/ruff/rules/#flake8-errmsg-em
+  "EM",
+
+  ###########################################################################
+  # STANDARDS & STYLE
+  # Prefer pathlib for path manipulation
+  # https://docs.astral.sh/ruff/rules/#flake8-use-pathlib-pth
+  "PTH",
+  # Adhere to Pylint conventions
+  # https://docs.astral.sh/ruff/rules/#pylint-pl
+  "PL",
+  # Simplify code to reduce complexity
+  # https://docs.astral.sh/ruff/rules/#flake8-simplify-sim
+  "SIM",
+  # errors like undefined names and unused imports without enforcing style rules.
+  # https://docs.astral.sh/ruff/rules/#pyflakes-f
+  "F",
+  #
+  # https://docs.astral.sh/ruff/rules/#pep8-naming-n
+  "N",
+  # Pydocstyle
+  # https://docs.astral.sh/ruff/rules/#pydocstyle-d
+  # "D",
+]
+
+ignore = [
+  # allow self to not need type annotations
+  "ANN101",
+  # Allow too many arguments for functions
+  "PLR0913",
+  # Public Module Docstrings
+  "D100",
+  # Ignored because https://docs.astral.sh/ruff/formatter/#conflicting-lint-rules 
+  "COM812", # https://docs.astral.sh/ruff/rules/missing-trailing-comma/#missing-trailing-comma-com812
+
+]
+
+[format]
+
+quote-style = "single"
+indent-style = "tab"
+docstring-code-format = true
diff --git a/pixi.lock b/pixi.lock
diff --git a/pixi.toml b/pixi.toml
@@ -11,7 +11,7 @@ med-imagetools = { path = ".", editable = true }
 [environments]
 default = { features = [
   "test",
-  "style",
+  "quality",
   "build",
   "dev",
 ], solve-group = "default" }
@@ -26,6 +26,7 @@ ipython = "*"
 ipykernel = "*"
 jupyterlab = "*"
 
+
 ############################################## PYTHON ###############################################
 
 [feature.py310.dependencies]
@@ -46,8 +47,8 @@ pytest-xdist = "*"
 cmd = [
   "pytest",
   # "--numprocesses=auto", 
-  "-s",
-  "--verbose",
+  # "-s",
+  # "--verbose",
   "--cov=imgtools",
   "--cov-report=xml:coverage-report/coverage.xml",
   "--cov-config=config/coverage.toml",
@@ -62,22 +63,31 @@ inputs = ["coverage-report/coverage.xml", "config/coverage.toml"]
 depends-on = ["test"]
 description = "Run pytest and generate coverage report"
 
-############################################## STYLE ###############################################
+############################################## QUALITY ###############################################
 # See config/ruff.toml for the configuration
-[feature.style.dependencies]
+[feature.quality.dependencies]
 ruff = ">=0.4.4"
 pre-commit = ">=3.7.1,<3.8"
-
-[feature.style.tasks]
-lint.cmd = [
-  "ruff",
-  "--config",
-  "config/ruff.toml",
-  "check",
-  "src",
-]
-lint.inputs = ["config/ruff.toml", "src"]
-lint.description = "Run ruff check"
+mypy = ">=1.13.0,<2"
+types-pytz = ">=2024.2.0.20241003,<2025"
+types-tqdm = ">=4.66.0.20240417,<5"
+
+[feature.quality.tasks]
+ruff-check.cmd = ["ruff", "--config", "config/ruff.toml", "check", "src"]
+ruff-check.inputs = ["config/ruff.toml", "src"]
+ruff-check.description = "Run ruff check"
+
+ruff-format.cmd = ["ruff", "--config", "config/ruff.toml", "format", "src"]
+ruff-format.inputs = ["config/ruff.toml", "src"]
+ruff-format.depends_on = ["ruff-check"]
+ruff-format.description = "Run ruff format, run check first"
+
+lint.depends_on = ["ruff-format", "ruff-check"]
+lint.description = "Run ruff check and format"
+
+type-check.cmd = ["mypy", "--config-file", "config/mypy.ini"]
+type-check.inputs = ["config/mypy.ini", "src"]
+type-check.description = "Run mypy type check."
 
 #################################### RELEASE & BUILD ###############################################
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -33,6 +33,8 @@ dependencies = [
   "pyyaml>=6.0.1,<7",
   "dill>=0.3.8,<1",
   "attrs>=23.2.0",
+  "structlog>=24.0,<25",
+  "click>=8.1,<9",
 ]
 
 classifiers = [
@@ -53,7 +55,6 @@ debug = ["pyvis"]
 # Entry points for CLI commands
 [project.scripts]
 autopipeline = "imgtools.autopipeline:main"
-betapipeline = "imgtools.autopipeline_refactored:main"
 
 [build-system]
 build-backend = "hatchling.build"

diff --git a/src/imgtools/__init__.py b/src/imgtools/__init__.py
@@ -1,5 +1 @@
-from . import io, ops, utils, pipeline
-
-__all__ = ["io", "ops", "utils", "pipeline"]
-
 __version__ = "1.6.0"
diff --git a/src/imgtools/autopipeline.py b/src/imgtools/autopipeline.py
@@ -15,6 +15,8 @@
 from imgtools.pipeline import Pipeline
 from imgtools.utils.nnunet import generate_dataset_json, markdown_report_images
 from imgtools.utils.args import parser
+from imgtools.logging import get_logger
+
 from joblib import Parallel, delayed
 from imgtools.modules import Segmentation
 from sklearn.model_selection import train_test_split
@@ -26,6 +28,9 @@
 ###############################################################
 
 
+logger = get_logger(level=os.environ.get('IMGTOOLS_LOG_LEVEL', "INFO"))
+
+
 class AutoPipeline(Pipeline):
     """Example processing pipeline for the RADCURE dataset.
     This pipeline loads the CT images and structure sets, re-samples the images,
@@ -164,9 +169,7 @@ def __init__(self,
             if not os.path.exists(self.output_directory):
                 os.makedirs(self.output_directory)
             all_nnunet_folders = glob.glob(pathlib.Path(self.output_directory, "*", " ").as_posix())
-            # print(all_nnunet_folders)
             numbers = [int(os.path.split(os.path.split(folder)[0])[1][4:7]) for folder in all_nnunet_folders if os.path.split(os.path.split(folder)[0])[1].startswith("Task")]
-            # print(numbers, continue_processing)
             if (len(numbers) == 0 and continue_processing) or not continue_processing or not os.path.exists(pathlib.Path(self.output_directory, f"Task{max(numbers)}_{study_name}", ".temp").as_posix()):
                 available_numbers = list(range(500, 1000))
                 for folder in all_nnunet_folders:
@@ -612,7 +615,6 @@ def save_data(self):
         if self.is_nnunet:  # dataset.json for nnunet and .sh file to run to process it
             imagests_path = pathlib.Path(self.output_directory, "imagesTs").as_posix()
             images_test_location = imagests_path if os.path.exists(imagests_path) else None
-            # print(self.existing_roi_indices)
             generate_dataset_json(pathlib.Path(self.output_directory, "dataset.json").as_posix(),
                                   pathlib.Path(self.output_directory, "imagesTr").as_posix(),
                                   images_test_location,
@@ -694,6 +696,7 @@ def run(self):
             # not supported yet, since they cannot be pickled
             if os.path.exists(self.output_df_path) and not self.overwrite:
                 print("Dataset already processed...")
+                logger.info("Dataset already processed...")
                 shutil.rmtree(pathlib.Path(self.output_directory, ".temp").as_posix())
             else:
                 Parallel(n_jobs=self.n_jobs, verbose=verbose, require='sharedmem')(
@@ -720,31 +723,31 @@ def main():
             with open(pathlib.Path(args.output_directory, ".temp", "init_parameters.pkl").as_posix(), "rb") as f:
                 args_dict = dill.load(f)
         except:
-            print("Could not resume processing. Starting processing from the beginning.")
-
-    print('initializing AutoPipeline...')
+            logger.info("Could not resume processing. Starting processing from the beginning.")
+    logger.debug("Starting main:", args=args_dict)
+    logger.info('Initializing AutoPipeline...')
     pipeline = AutoPipeline(**args_dict)
 
     if not args.dry_run:
-        print('starting AutoPipeline...')
+        logger.info('Starting AutoPipeline...')
         pipeline.run()
-        print('finished AutoPipeline!')
+        logger.info('Finished AutoPipeline!')
     else:
-        print('dry run complete, no processing done')
+        logger.info('Dry run complete, no processing done')
 
     """Print general summary info"""
 
     """Print nnU-Net specific info here:
     * dataset.json can be found at /path/to/dataset/json
     * You can train nnU-Net by cloning /path/to/nnunet/repo and run `nnUNet_plan_and_preprocess -t taskID` to let the nnU-Net package prepare
     """
-    print(f"Outputted data to {args.output_directory}")
+    logger.info(f"Outputted data to {args.output_directory}")
     csv_path = pathlib.Path(args.output_directory, "dataset.csv").as_posix()
-    print(f"Dataset info found at {csv_path}")
+    logger.info(f"Dataset info found at {csv_path}")
     if args.nnunet:
         json_path = pathlib.Path(args.output_directory, "dataset.json").as_posix()
-        print(f"dataset.json for nnU-net can be found at {json_path}")
-        print("You can train nnU-net by cloning https://github.com/MIC-DKFZ/nnUNet/ and run `nnUNet_plan_and_preprocess -t taskID` to let the nnU-Net package prepare")
+        logger.info(f"dataset.json for nnU-net can be found at {json_path}")
+        logger.info("You can train nnU-net by cloning https://github.com/MIC-DKFZ/nnUNet/ and run `nnUNet_plan_and_preprocess -t taskID` to let the nnU-Net package prepare")
 
 
 if __name__ == "__main__":
Original file line number	Diff line number	Diff line change
Expand Up		@@ -219,3 +219,5 @@ data/

		# pixi environments
		.pixi

		.imgtools/*/.log