From 78d47498f65de7f76e0fe8f28ba528ef026b3eee Mon Sep 17 00:00:00 2001
From: Ali Khan <alik@robarts.ca>
Date: Thu, 15 Feb 2024 14:25:17 -0500
Subject: [PATCH] removed extraneous files from resources

---
 hippunfold/config/snakebids.yml               |   2 +-
 hippunfold/resources/Training_Instructions.md |  36 ------
 .../resources/desc-flipLR_type-itk_xfm.txt    |   5 -
 .../resources/{ => etc}/identity_xfm.txt      |   0
 hippunfold/resources/example_sbatch.sh        |  13 --
 hippunfold/resources/fineTune_UNet.sh         |  46 -------
 hippunfold/resources/write_config_NiftyNet.py | 112 ------------------
 7 files changed, 1 insertion(+), 213 deletions(-)
 delete mode 100644 hippunfold/resources/Training_Instructions.md
 delete mode 100644 hippunfold/resources/desc-flipLR_type-itk_xfm.txt
 rename hippunfold/resources/{ => etc}/identity_xfm.txt (100%)
 delete mode 100755 hippunfold/resources/example_sbatch.sh
 delete mode 100755 hippunfold/resources/fineTune_UNet.sh
 delete mode 100755 hippunfold/resources/write_config_NiftyNet.py

diff --git a/hippunfold/config/snakebids.yml b/hippunfold/config/snakebids.yml
index 8f7c1067..5f6b2402 100644
--- a/hippunfold/config/snakebids.yml
+++ b/hippunfold/config/snakebids.yml
@@ -337,7 +337,7 @@ cifti_metric_types:
 singularity:
   autotop: 'docker://khanlab/hippunfold_deps:v0.5.0'
 
-xfm_identity: resources/identity_xfm.txt
+xfm_identity: resources/etc/identity_xfm.txt
 
 template_files:
   CITI168:
diff --git a/hippunfold/resources/Training_Instructions.md b/hippunfold/resources/Training_Instructions.md
deleted file mode 100644
index 76de956a..00000000
--- a/hippunfold/resources/Training_Instructions.md
+++ /dev/null
@@ -1,36 +0,0 @@
-This document contains step-by-step instructions for retraining or finetuning UNet for hippocampal segmentation. 
-
-## 1) Run existing data through a previous model
-There's a chance that one of the existing models will perform well on your new dataset, if the data is similar enough. If performance is very good then no further fine-tuning is needed. If performance is poor on some samples, they can be manually corrected or else fully manually segmented. In either case, running the full pipeline end-to-end should produce images that are CorObl, which is the space that segmentations for training should be in.
-
-see `hippunfold -h`
-
-## 2) Collect training images and segmentations
-All training data should be manually inspected, and once the quality is good the CorObl image (eg. `outputdirectory/subjectID/hemi-L/img.nii`) and corresponding segmentation image (eg. `outputdirectory/subjectID/hemi-L/niftynet_lbl.nii` or a manually generated segmentation image) can be copied into a new clean directory (eg. `mynewdataset/training/`). Each new subject in the training directory should have a unique subjectID as a prefix, and either `_img` or `_lbl` for images and segmentations, repsectively.
-
-For example `ls mynewdataset/training` should produce something like this:
-```
-sub-001_img.nii.gz
-sub-001_lbl.nii.gz
-sub-002_img.nii.gz
-sub-002_lbl.nii.gz
-```
-It is also possible to fine-tune on only a subset of subjects (for example, only those that produced good performance on the first pass).
-
-## 3) Fine-tune an existing model, or train one from scratch
-Once you have populated your training data directory, you may train your model or fine-tune an existing model using `fineTune_UNet.sh`. This is a compute-intensive process. This can be run on a CPU, but it is recommended that you run on GPU with sufficient GPU memory (current models were trained on 8xV100 GPU nodes). By default, `fineTune_UNet.sh` will run 100k iterations which should take <\24h with these parameters. 
-
-For example:
-`singularity exec --nv hippocampal_autotop_latest.sif bash /src/resources/fineTune_UNet.sh mynewdataset/training mynewdataset/newCNNmodel`
-(omit `--nv` if no GPU is available)
-
-This will perform 100k training iterations with data augmentation and using the same parameters as previous work. Training and validation progress can be viewed using tensorboard (eg. `tensorboard --logdir mynewdataset/newCNNmodel/models`). Once training is complete, inference will be performed on the remaining test data, which can then be inspected for quality. Further training iterations can be run using the same command as above (specifying the same output directory), or a new model can be trained using the same data by specifying a different directory. 
-
-If you know what you are doing, you can open `mynewdataset/newCNNmodel/config.ini` and modify parameters before running additional training. 
-
-## 4) Incremental learning
-If your dataset is very large, you may fine-tune on only a subset of new samples. In that case, you can re-run steps 1-3 which should now produce more good quality segmentations for use in further training. 
-
-## 5) Share trained models and/or data
-Please consider sharing your data and/or trained models to improve generalizability to future studies.
-
diff --git a/hippunfold/resources/desc-flipLR_type-itk_xfm.txt b/hippunfold/resources/desc-flipLR_type-itk_xfm.txt
deleted file mode 100644
index 335082b6..00000000
--- a/hippunfold/resources/desc-flipLR_type-itk_xfm.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-#Insight Transform File V1.0
-#Transform 0
-Transform: MatrixOffsetTransformBase_double_3_3
-Parameters: -1 0 0 0 1 0 0 0 1 0 0 0
-FixedParameters: 0 0 0
diff --git a/hippunfold/resources/identity_xfm.txt b/hippunfold/resources/etc/identity_xfm.txt
similarity index 100%
rename from hippunfold/resources/identity_xfm.txt
rename to hippunfold/resources/etc/identity_xfm.txt
diff --git a/hippunfold/resources/example_sbatch.sh b/hippunfold/resources/example_sbatch.sh
deleted file mode 100755
index e07a84aa..00000000
--- a/hippunfold/resources/example_sbatch.sh
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/bin/bash
-#SBATCH --account=rrg-lpalaniy
-#SBATCH --ntasks=1
-#SBATCH --gres=gpu:v100:8
-#SBATCH --exclusive
-#SBATCH --cpus-per-task=28
-#SBATCH --mem=86000M
-#SBATCH --time=24:00:00
-
-module load arch/avx512 StdEnv/2018.3
-nvidia-smi
-
-singularity exec --nv hippocampal_autotop_latest.sif bash /src/resources/fineTune_UNet.sh mynewdataset/training mynewdataset/newCNNmodel
diff --git a/hippunfold/resources/fineTune_UNet.sh b/hippunfold/resources/fineTune_UNet.sh
deleted file mode 100755
index d0d4bffc..00000000
--- a/hippunfold/resources/fineTune_UNet.sh
+++ /dev/null
@@ -1,46 +0,0 @@
-trainingdir=$1 #'../training_data_b1000/'
-newmodeldir=$2 #'testmodel'
-
-if [ "$#" -lt 2 ]
-then
-	 echo "This script can be used to incrementally train UNet. If this is the first time running this script for a new model, a new config file will be generated. Otherwise training will resume from the last iteration."
-	 echo ""
-	 echo "Usage: $0 <directory_of_training_data> <output_directory> [optional arguments]"
-	 echo ""
-	 echo " -b bootstrap existing model"
-	 echo " -i number of new iterations (default 100k)"
-	 echo ""
-
-	 exit 1
- fi
-
-shift 2
-iterations=100000
-
-while getopts "b:" options; do
- case $options in
-  b ) echo "bootstrapping model from $OPTARG"
-	  bootstrapmodel=$OPTARG;;
-  i ) echo "number of final iterations (after bootstrapping if included) $OPTARG"
-	  iterations=$OPTARG;;
-    * ) usage
-	exit 1;;
- esac
-done
-
-if [ -f "$newmodeldir/config.ini" ]
-then
-python write_config_NiftyNet.py $trainingdir $newmodeldir $iterations $bootstrapmodel
-else
-mv $newmodeldir/dataset_split_training.csv $newmodeldir/dataset_split.csv # resume past dataset_split.csv
-fi
-
-# TO BE RUN IN BASH
-# requires niftynet
-net_segment -c $newmodeldir/config.ini train
-net_segment -c $newmodeldir/config.ini inference
-net_segment -c $newmodeldir/config.ini evaluation
-
-# need to rename this file before AutoTops_transformAndRollOut.m
-mv $newmodeldir/dataset_split.csv $newmodeldir/dataset_split_training.csv
-
diff --git a/hippunfold/resources/write_config_NiftyNet.py b/hippunfold/resources/write_config_NiftyNet.py
deleted file mode 100755
index 6f567f61..00000000
--- a/hippunfold/resources/write_config_NiftyNet.py
+++ /dev/null
@@ -1,112 +0,0 @@
-# write config file for NiftyNet highres3dnet network training using these default parameters. First argument is the training directory, second argument is the output directory, third argument specifies number of iterations, and fourht (optional) an existing model to bootstrap.
-
-import sys
-import configparser
-import os
-import shutil
-import glob
-
-# input arguments
-
-trainingdir = sys.argv[1]  #'../training_data_b1000/'
-newmodeldir = sys.argv[2]  #'testmodel'
-iterations = sys.argv[3]  #'testmodel'
-try:
-    os.mkdir(newmodeldir)
-except:
-    print("output directory already exists")
-
-# copy over bootstrapped CNN model
-if len(sys.argv) == 5:
-    bootstrapmodel = sys.argv[4]
-    shutil.copytree(bootstrapmodel + "/models/", newmodeldir + "/models/")
-    start_iter = "-1"
-    # add existing iterations to max iterations
-    fn = glob.glob(newmodeldir + "/models/*.index")[0]
-    i = fn.find("ckpt-")[0]
-    fn = fn[i + 5 : -6]
-    iterations = int(iterations) + int(fn)
-elif len(sys.argv) == 4:
-    bootstrapmodel = (
-        ""  # Optional. To resume training a timed out model, specify it here.
-    )
-    start_iter = "0"
-else:
-    print("Error wrong number of input arguments")
-
-# write config file with default values
-config = configparser.ConfigParser()
-
-config["IMG"] = {
-    "path_to_search": trainingdir,
-    "filename_contains": "img",
-    "spatial_window_size": "(64, 64, 64)",
-    "interp_order": "1",
-    "pixdim": "(0.3, 0.3, 0.3)",
-    "axcodes": "(R, A, S)",
-}
-config["LBL"] = {
-    "path_to_search": trainingdir,
-    "filename_contains": "lbl",
-    "spatial_window_size": "(64, 64, 64)",
-    "interp_order": "1",
-    "pixdim": "(0.3, 0.3, 0.3)",
-    "axcodes": "(R, A, S)",
-}
-
-config["SYSTEM"] = {"cuda_devices": '""', "model_dir": newmodeldir}
-
-config["NETWORK"] = {
-    "name": "highres3dnet_large",
-    "batch_size": "1",
-    "activation_function": "relu",
-    "volume_padding_size": "0",
-    "normalisation": "True",
-    "foreground_type": "mean_plus",
-    "cutoff": "(0.001, 0.999)",
-}
-
-config["TRAINING"] = {
-    "sample_per_volume": "5",
-    "lr": "0.001",
-    "loss_type": "Dice",
-    "starting_iter": start_iter,
-    "save_every_n": "1000",
-    "tensorboard_every_n": "100",
-    "max_iter": iterations,
-    "validation_every_n": "100",
-    "exclude_fraction_for_validation": "0.2",
-    "exclude_fraction_for_inference": "0.2",
-    "rotation_angle": "(-10.0,10.0)",
-    "random_flipping_axes": "0",
-    "do_elastic_deformation": "True",
-    "num_ctrl_points": "4",
-    "deformation_sigma": "15",
-    "proportion_to_deform": "0.75",
-    "bias_field_range": "(-5.0,5.0)",
-    "bf_order": "3",
-}
-
-
-config["INFERENCE"] = {
-    "border": "(16,16,16)",
-    "inference_iter": "-1",
-    "save_seg_dir": newmodeldir + "/parcellation_output",
-    "output_interp_order": "0",
-}
-
-config["SEGMENTATION"] = {
-    "image": "IMG",
-    "label": "LBL",
-    "label_normalisation": "False",
-    "output_prob": "False",
-    "num_classes": "9",
-}
-
-config["EVALUATION"] = {
-    "save_csv_dir": newmodeldir + "/eval",
-    "evaluations": "dice,average_distance",
-}
-
-with open(newmodeldir + "/config.ini", "w") as configfile:
-    config.write(configfile)