From 730310e82870e5f438aa4a2e35db82e71af6c0af Mon Sep 17 00:00:00 2001
From: Jon Clucas <jon.clucas@childmind.org>
Date: Thu, 9 Nov 2023 17:46:28 -0500
Subject: [PATCH 01/29] :recycle: Make correlations more antifragile

So some failing correlations don't cause the whole thing to fail
---
 calculate_correlations.py | 399 +++++++++++++++++++++++---------------
 1 file changed, 248 insertions(+), 151 deletions(-)

diff --git a/calculate_correlations.py b/calculate_correlations.py
index 062d0e0..bd95a4b 100644
--- a/calculate_correlations.py
+++ b/calculate_correlations.py
@@ -1,53 +1,66 @@
 #!/usr/bin/env python
-
+import argparse
+from collections.abc import Generator
+import itertools
+import math
+from multiprocessing import Pool
+import os
+from pathlib import Path
+import pickle
+import subprocess
 from typing import Optional, NamedTuple, Tuple, Union
-from utils.html_script import body
+import yaml
 
-import os
+import nibabel as nb
 import numpy as np
 import pandas as pd
 
-from multiprocessing import Pool
+from utils.html_script import body
 
 Axis = Union[int, Tuple[int, ...]]
 
+
 class CorrValue(NamedTuple):
+    """Correlation values"""
     concor: np.ndarray
     pearson: np.ndarray
 
+
 def read_yml_file(yml_filepath):
-    import yaml
     with open(yml_filepath,"r") as f:
         yml_dict = yaml.safe_load(f)
 
     return yml_dict
 
+
 def write_yml_file(yml_dict, out_filepath):
-    import yaml
     with open(out_filepath, "wt") as f:
         yaml.safe_dump(yml_dict, f)
 
+
 def read_pickle(pickle_file):
-    import pickle
     with open(pickle_file, "rb") as f:
         dct = pickle.load(f)
     return dct
 
+
 def write_pickle(dct, out_filepath):
-    import pickle
     with open(out_filepath, "wb") as f:
         pickle.dump(dct, f, protocol=pickle.HIGHEST_PROTOCOL)
 
+
 def read_txt_file(txt_file):
     with open(txt_file,"r") as f:
         strings = f.read().splitlines()
     return strings
 
+
 def write_txt_file(text_lines, out_filepath):
     with open(out_filepath, "wt") as f:
         for line in text_lines:
             f.write("{0}\n".format(line))
 
+
 def write_dct(dct=None, text_lines=None, outname=None):
     if not dct:
         dct = {outname: text_lines}
@@ -55,12 +68,13 @@ def write_dct(dct=None, text_lines=None, outname=None):
         dct.update({outname: text_lines})
     return dct
 
-def gather_local_filepaths(output_folder_path):
-    import os
+
+def gather_local_filepaths(output_folder_path: str) -> list[str]:
+    """Given a local path, return relevant paths within that directory"""
     filepaths = []
 
     print("Gathering file paths from {0}\n".format(output_folder_path))
-    for root, dirs, files in os.walk(output_folder_path):
+    for root, _dirs, files in os.walk(output_folder_path):
         # loops through every file in the directory
         for filename in files:
             # checks if the file is a nifti (.nii.gz)
@@ -69,11 +83,21 @@ def gather_local_filepaths(output_folder_path):
                 filepaths.append(os.path.join(root, filename))
 
     if len(filepaths) == 0:
-        err = "\n\n[!] No filepaths were found given the output folder!\n\n"
-        raise Exception(err)
+        raise FileNotFoundError(
+            "\n\n[!] No filepaths were found given the output folder!\n\n")
 
     return filepaths
 
+
+class SummaryStats:
+    def __init__(self, array: np.ndarray,
+                 axis: Optional[Union[int, str]] = None) -> None:
+        self.mean = np.mean(array, axis=axis, keepdims=True)
+        self.var = np.var(array, axis=axis, keepdims=True)
+        self.std = np.sqrt(self.var)
+        self.norm = (array - self.mean) / self.std
+
+
 def batch_correlate(
     x: np.ndarray, y: np.ndarray, axis: Optional[Axis] = None
 ) -> CorrValue:
@@ -84,54 +108,83 @@ def batch_correlate(
     References:
         https://en.wikipedia.org/wiki/Concordance_correlation_coefficient
     """
-    # Summary stats for x
-    x_mean = np.mean(x, axis=axis, keepdims=True)
-    x_var = np.var(x, axis=axis, keepdims=True)
-    x_std = np.sqrt(x_var)
-    # NOTE: Not trying to fix NaNs
-    x_norm = (x - x_mean) / x_std
-
-    # Summary stats for y
-    y_mean = np.mean(y, axis=axis, keepdims=True)
-    y_var = np.var(y, axis=axis, keepdims=True)
-    y_std = np.sqrt(y_var)
-    y_norm = (y - y_mean) / y_std
+    # summary stats
+    try:
+        summary_stats = {'x': SummaryStats(x), 'y': SummaryStats(y)}
+    except ZeroDivisionError:
+        return CorrValue(np.nan, np.nan)
 
     # Correlation coefficients
-    pearson = np.mean(x_norm * y_norm, axis=axis, keepdims=True)
-    concor = 2 * pearson * x_std * y_std / (x_var + y_var + (x_mean - y_mean) ** 2)
-
+    pearson = np.mean(summary_stats['x'].norm * summary_stats['y'].norm,
+                      axis=axis, keepdims=True)
+    concor = (2 * pearson * summary_stats['x'].std * summary_stats['y'].std /
+              (summary_stats['x'].var + summary_stats['y'].var +
+               (summary_stats['x'].mean - summary_stats['y'].mean) ** 2))
     # Squeeze reduced singleton dimensions
     if axis is not None:
         concor = np.squeeze(concor, axis=axis)
         pearson = np.squeeze(pearson, axis=axis)
     return CorrValue(concor, pearson)
 
-def correlate_text_based(txt1, txt2):
+
+def determine_indices(df : pd.DataFrame) -> list:
+    """Determine indices of str-type columns in a DataFrame"""
+    return [i for i, val in
+            enumerate(df.applymap(lambda _: isinstance(_, str)).values[0]) if
+            val]
+
+
+def correlate_text_based(txts: Union[list, tuple]) -> Generator:
+    delimiters = tuple(delimiter_from_filepath(path) for path in txts)
     # TODO: why do we drop columns containing na?
-    oned_one = pd.read_csv(txt1, delimiter=None, comment="#").dropna(axis=1).values
-    oned_two = pd.read_csv(txt2, delimiter=None, comment="#").dropna(axis=1).values
-
-    concor, pearson = batch_correlate(oned_one, oned_two, axis=0)
-    concor = np.nanmean(concor)
-    pearson = np.nanmean(pearson)
-    return concor, pearson
-
-def create_unique_file_dict(filepaths, output_folder_path, replacements=None):
-
-    # filepaths:
-    #   list of output filepaths from a CPAC output directory
-    # output_folder_path:
-    #   the CPAC output directory the filepaths are from
-    # replacements:
-    #   (optional) a list of strings to be removed from the filepaths should
-    #   they occur
-
-    # output
-    #   files_dict
-    #     a dictionary of dictionaries, format:
-    #     files_dict["centrality"] = 
-    #         {("centrality", midpath, nums): <filepath>, ..}
+    initial_load = [pd.read_csv(txt, delimiter=delimiters[i], comment='#'
+                               ).dropna(axis=1) for i, txt in enumerate(txts)]
+    for i, df in enumerate(initial_load):
+        # if we read a value-row as a header, fix that
+        try:
+            df.columns.astype(float)
+            initial_load[i] = pd.read_csv(txts[i], delimiter=delimiters[i],
+                                          comment='#', header=None
+                                         ).dropna(axis=1)
+        except ValueError:
+            pass
+    # assume string columns are indices and not values to correlate
+    indices = []
+    for i in range(len(initial_load)):
+        indices.append(np.where(df.apply(
+            lambda _: _.dtype == np.dtypes.ObjectDType))[0])
+    oned = []
+    for i, index in enumerate(indices):
+        if index.shape[0]:
+            oned.append(pd.read_csv(txts[i], delimiter=delimiters[i],
+                                    comment='#', index_col=indices[i]
+                                    ).dropna(axis=1).values)
+        else:
+            oned.append(initial_load[i].values)
+    return (np.nanmean(measure) for measure in batch_correlate(*oned, axis=0))
+
+
+def create_unique_file_dict(filepaths: list[str], output_folder_path: str,
+                            replacements: Optional[list[str]] = None
+    ) -> dict[str, dict[tuple, str]]:
+    """
+    Parameters
+    ----------
+    filepaths : list of str
+      list of output filepaths from a CPAC output directory
+    output_folder_path : str
+      the CPAC output directory the filepaths are from
+    replacements : list of str, optional
+      a list of strings to be removed from the filepaths should
+      they occur
+
+    Returns
+    -------
+    files_dict : dict
+        a dictionary of dictionaries, format:
+        files_dict["centrality"] = 
+            {("centrality", midpath, nums): <filepath>, ..}
+    """
 
     files_dict = {}
 
@@ -140,25 +193,25 @@ def create_unique_file_dict(filepaths, output_folder_path, replacements=None):
         if "_stack" in filepath:
             continue
 
-        if ("itk" in filepath) or ("xfm" in filepath) or ("montage" in filepath):
+        if ("itk" in filepath) or ("xfm" in filepath) or (
+            "montage" in filepath
+        ):
             continue
         path_changes = []
         real_filepath = filepath
         if replacements:
             for word_couple in replacements:
                 if "," not in word_couple:
-                    err = "\n\n[!] In the replacements text file, the old " \
-                          "substring and its replacement must be separated " \
-                          "by a comma.\n\n"
-                    raise Exception(err)
-                word = word_couple.split(",")[0]
-                new = word_couple.split(",")[1]
+                    raise SyntaxError(
+                        "\n\n[!] In the replacements text file, the old "
+                        "substring and its replacement must be separated "
+                        "by a comma.\n\n")
+                word, new = word_couple.split(",")
                 if word in filepath:
-                    path_changes.append("old: {0}".format(filepath))
+                    path_changes.append(f"old: {filepath}")
                     filepath = filepath.replace(word, new)
-                    path_changes.append("new: {0}".format(filepath))
+                    path_changes.append(f"new: {filepath}")
         if path_changes:
-            import os
             with open(os.path.join(os.getcwd(), "path_changes.txt"), "wt") as f:
                 for path in path_changes:
                     f.write(path)
@@ -231,11 +284,15 @@ def create_unique_file_dict(filepaths, output_folder_path, replacements=None):
     return files_dict
 
 
-def gather_all_files(input_dct, pickle_dir, source='output_dir'):
-
-    file_dct_list = []
+def gather_all_files(input_dct: dict, pickle_dir: str,
+                     source: str = 'output_dir') -> tuple[dict, dict]:
+    """
+    Given an input dictionary, a pickle directory, and (optionally) a source,
+    returns a pair of dicts
+    """
+    file_dct_list = [{}, {}]
 
-    for key, pipe_dct in input_dct['pipelines'].items():
+    for index, (key, pipe_dct) in enumerate(input_dct['pipelines'].items()):
 
         pipe_outdir = pipe_dct[source]
 
@@ -256,37 +313,42 @@ def gather_all_files(input_dct, pickle_dir, source='output_dir'):
         #          "and not a specific participant's output subdirectory either.)\n"
         #    raise Exception(err)
 
-        output_pkl = os.path.join(pickle_dir, "{0}_{1}_paths.p".format(key, source))
+        output_pkl = os.path.join(pickle_dir, f"{key}_{source}_paths.p")
 
         if os.path.exists(output_pkl):
-            print("Found output list pickle for {0}, skipping output file" \
-                  "path parsing..".format(key))
+            print(f"Found output list pickle for {key}, skipping output file"
+                  "path parsing..")
             pipeline_files_dct = read_pickle(output_pkl)
         else:
             pipeline_files_list = gather_local_filepaths(pipe_outdir)
-
-            pipeline_files_dct = create_unique_file_dict(pipeline_files_list,
-                                                         pipe_outdir,
-                                                         pipe_dct['replacements'])
-
+            pipeline_files_dct = create_unique_file_dict(
+                pipeline_files_list, pipe_outdir, pipe_dct['replacements'])
             write_pickle(pipeline_files_dct, output_pkl)
 
-        file_dct_list.append(pipeline_files_dct)
+        file_dct_list[index] = pipeline_files_dct
 
-    return (file_dct_list[0], file_dct_list[1])
+    return tuple(file_dct_list)
 
-def match_filepaths(old_files_dict, new_files_dict):
-    """Returns a dictionary mapping each filepath from the first CPAC run to the
-    second one, matched to derivative, strategy, and scan.
 
-    old_files_dict: each key is a derivative name, and each value is another
-                    dictionary keying (derivative, mid-path, last digit in path)
-                    tuples to a list containing the full filepath described by
-                    the tuple that is the key
-    new_files_dict: same as above, but for the second CPAC run
+def match_filepaths(old_files_dict: dict[str, dict[tuple, str]],
+                    new_files_dict: dict[str, dict[tuple, str]]
+                    ) -> dict[str, dict[tuple, ]]:
+    """Returns a dictionary mapping each filepath from the first C-PAC
+    run to the second one, matched to derivative, strategy, and scan.
 
-    matched_path_dict: same as the input dictionaries, except the list in the
-                       sub-dictionary value has both file paths that are matched
+    Parameters
+    ----------
+    old_files_dict, new_files_dict : dict
+        each key is a derivative name, and each value is another
+        dictionary keying (derivative, mid-path, last digit in path)
+        tuples to a list containing the full filepath described by
+        the tuple that is the key
+
+    Returns
+    -------
+    matched_path_dict : dict
+        same as the input dictionaries, except the list in the
+        sub-dictionary value has both file paths that are matched
     """
 
     # file path matching
@@ -330,16 +392,28 @@ def match_filepaths(old_files_dict, new_files_dict):
 
     return matched_files_dct
 
-def calculate_correlation(args_tuple):
 
-    import os
-    import subprocess
-    import nibabel as nb
-    import numpy as np
-    import scipy.stats.mstats
-    import scipy.stats
-    import math
-   
+def delimiter_from_filepath(filepath: Union[Path, str]) -> Optional[str]:
+    """
+    Given a filepath, return expected value-separator delimiter
+    """
+    if filepath.endswith('.tsv'):
+        return '\t'
+    if filepath.endswith('.csv'):
+        return ','
+    with open(filepath, 'r', encoding='utf8') as _f:
+        first_line = '#'
+        while first_line.lstrip().startswith('#'):
+            first_line = _f.readline()
+        for delimiter in ['\t', ',', ' ']:
+            if delimiter in first_line:
+                if delimiter == ' ':
+                    return r'\s+'
+                return delimiter
+    return None
+
+
+def calculate_correlation(args_tuple):  
     category = args_tuple[0]
     old_path = args_tuple[1]
     new_path = args_tuple[2]
@@ -400,46 +474,48 @@ def calculate_correlation(args_tuple):
     if os.path.exists(old_path) and os.path.exists(new_path):
 
         if ('.csv' in old_path and '.csv' in new_path) or \
-                ('spatial_map_timeseries.txt' in old_path and 'spatial_map_timeseries.txt' in new_path) or \
+                ('.txt' in old_path and '.txt' in new_path) or \
                     ('.1D' in old_path and '.1D' in new_path) or \
                         ('.tsv' in old_path and '.tsv' in new_path):
             try:
-                concor, pearson = correlate_text_based(old_path, new_path)
+                concor, pearson = correlate_text_based((old_path, new_path))
+            except Exception as e:
+                return category, e, (old_path, new_path)
 
-                if concor > 0.980:
-                    corr_tuple = (category, [concor], [pearson])
-                else:
-                    corr_tuple = (category, [concor], [pearson], (old_path, new_path))
-                if verbose:
-                    print("Success - {0}".format(str(concor)))
+            if concor > 0.980:
+                corr_tuple = (category, [concor], [pearson])
+            else:
+                corr_tuple = (category, [concor], [pearson], (old_path, new_path))
+            if verbose:
+                print("Success - {0}".format(str(concor)))
 
-            except Exception as e:
-                corr_tuple = ("file reading problem: {0}".format(e), 
-                              old_path, new_path)
-                if verbose:
-                    print(str(corr_tuple))
+            # except Exception as e:
+            #     corr_tuple = ("file reading problem: {0}".format(e), 
+            #                   old_path, new_path)
+            #     if verbose:
+            #         print(str(corr_tuple))
 
             return corr_tuple
 
         else:
-            try:
-                old_file_img = nb.load(old_path)
-                old_file_hdr = old_file_img.header
-                new_file_img = nb.load(new_path)
-                new_file_hdr = new_file_img.header
+            # try:
+            old_file_img = nb.load(old_path)
+            old_file_hdr = old_file_img.header
+            new_file_img = nb.load(new_path)
+            new_file_hdr = new_file_img.header
 
-                old_file_dims = old_file_hdr.get_zooms()
-                new_file_dims = new_file_hdr.get_zooms()
+            old_file_dims = old_file_hdr.get_zooms()
+            new_file_dims = new_file_hdr.get_zooms()
 
-                data_1 = nb.load(old_path).get_fdata()
-                data_2 = nb.load(new_path).get_fdata()
+            data_1 = nb.load(old_path).get_fdata()
+            data_2 = nb.load(new_path).get_fdata()
 
-            except Exception as e:
-                corr_tuple = ("file reading problem: {0}".format(e), 
-                              old_path, new_path)
-                if verbose:
-                    print(str(corr_tuple))
-                return corr_tuple
+            # except Exception as e:
+            #     corr_tuple = ("file reading problem: {0}".format(e), 
+            #                   old_path, new_path)
+            #     if verbose:
+            #         print(str(corr_tuple))
+            #     return corr_tuple
 
         ## set up and run the Pearson correlation and concordance correlation
         if data_1.flatten().shape == data_2.flatten().shape:
@@ -485,6 +561,7 @@ def calculate_correlation(args_tuple):
 
     return corr_tuple
 
+
 def run_correlations(matched_dct, input_dct, source='output_dir', quick=False, verbose=False):
 
     all_corr_dct = {
@@ -540,9 +617,15 @@ def run_correlations(matched_dct, input_dct, source='output_dir', quick=False, v
 
     print("\nCorrelations of the {0} are done.\n".format(source))
 
+    failures = []
+
     for corr_tuple in corr_tuple_list:
         if not corr_tuple:
             continue
+        if isinstance(corr_tuple[1], Exception):
+            failures.append((corr_tuple[0], corr_tuple[1],
+                             ' | '.join(corr_tuple[2])))
+            continue
         if corr_tuple[0] not in all_corr_dct['concordance'].keys():
             all_corr_dct['concordance'][corr_tuple[0]] = []
         if corr_tuple[0] not in all_corr_dct['pearson'].keys():
@@ -561,7 +644,8 @@ def run_correlations(matched_dct, input_dct, source='output_dir', quick=False, v
             except TypeError:
                 pass
 
-    return all_corr_dct
+    return all_corr_dct, failures
+
 
 def post180_organize_correlations(concor_dct, corr_type="concordance", quick=False):
 
@@ -694,11 +778,14 @@ def organize_correlations(concor_dict, corr_type="concordance", quick=False):
 
     return corr_map_dict
 
-def quick_summary(dct, corr_map_dct, output_dir):
+
+def quick_summary(dct, corr_map_dct, output_dir) -> dict:
     for corr_group in corr_map_dct["correlations"].keys():
         cat_dct = {}
         lines = []
-        for output_type, corr_vec in dict(corr_map_dct["correlations"][corr_group]).items():
+        for output_type, corr_vec in dict(
+            corr_map_dct["correlations"][corr_group]
+        ).items():
             try:
                 corrmean = np.mean(np.asarray(corr_vec))
             except TypeError:
@@ -708,39 +795,51 @@ def quick_summary(dct, corr_map_dct, output_dir):
         dct = write_dct(dct, lines, output_type)
     return(dct)
 
-def compare_pipelines(input_dct, dir_type='output_dir'):
 
+def compare_pipelines(input_dct: dict,
+                      dir_type: str = 'output_dir') -> tuple[dict, dict]:
+    """
+    Given an input dict containing keys 'settings', gather prreviously
+    generated pickles or all relevant output and working files
+    
+    Returns
+    -------
+    corr_map : dict
+    
+    pearson_map : dict
+    """
     output_dir = input_dct['settings']['output_dir']
     pickle_dir = input_dct['settings']['pickle_dir']
 
-    corrs_pkl = os.path.join(pickle_dir, "{0}_correlations.p".format(dir_type))
-    matched_pkl = os.path.join(pickle_dir, "{0}_matched_files.p".format(dir_type))
-    
+    corrs_pkl = os.path.join(pickle_dir, f"{dir_type}_correlations.p")
+    failures_pkl = os.path.join(pickle_dir, f"{dir_type}_failures.p")
+    matched_pkl = os.path.join(pickle_dir, f"{dir_type}_matched_files.p")
+
     all_corr_dct = None
     if os.path.exists(corrs_pkl):
-        print("\n\nFound the correlations pickle: {0}\n\n"
-              "Starting from there..\n".format(corrs_pkl))
+        print(f"\n\nFound the correlations pickle: {corrs_pkl}\n\n"
+              "Starting from there..\n")
         all_corr_dct = read_pickle(corrs_pkl)
     elif os.path.exists(matched_pkl):
-        print("\n\nFound the matched filepaths pickle: {0}\n\n"
-              "Starting from there..\n".format(matched_pkl))
+        print(f"\n\nFound the matched filepaths pickle: {matched_pkl}\n\n"
+              "Starting from there..\n")
         matched_dct = read_pickle(matched_pkl)
 
     else:
         # gather all relevant output and working files
         outfiles1_dct, outfiles2_dct = gather_all_files(input_dct, pickle_dir, 
                                                         source=dir_type)
-        
         matched_dct = match_filepaths(outfiles1_dct, outfiles2_dct)
         write_pickle(matched_dct, matched_pkl)
 
     if not all_corr_dct:
-        all_corr_dct = run_correlations(matched_dct,
+        all_corr_dct, failures = run_correlations(matched_dct,
                                         input_dct, 
                                         source=dir_type,
                                         quick=input_dct['settings']['quick'],
                                         verbose=input_dct['settings']['verbose'])
         write_pickle(all_corr_dct, corrs_pkl)
+        write_pickle(failures, failures_pkl)
     
     if dir_type == 'work_dir':
         sorted_vals = []
@@ -788,14 +887,13 @@ def compare_pipelines(input_dct, dir_type='output_dir'):
         #                   pearson_map_dict["pipeline_names"], output_dir)
         return(corr_map, pearson_map)
 
-def main():
-
-    import os
-    import argparse
-
-    from multiprocessing import Pool
-    import itertools
 
+def main() -> tuple:
+    """
+    • Parse commandline arguments
+    • Read input YAML
+    • Check for already completed stuff (pickles)
+    """
     parser = argparse.ArgumentParser()
     parser.add_argument("input_yaml", type=str, 
                         help="file path of the script's input YAML")
@@ -811,8 +909,8 @@ def main():
     input_dct = read_yml_file(args.input_yaml)
 
     # check for already completed stuff (pickles)
-    output_dir = os.path.join(os.getcwd(), 
-                              "correlations_{0}".format(input_dct['settings']['run_name']))
+    output_dir = os.path.join(
+        os.getcwd(), f"correlations_{input_dct['settings']['run_name']}")
     pickle_dir = os.path.join(output_dir, "pickles")
 
     if not os.path.exists(pickle_dir):
@@ -821,11 +919,11 @@ def main():
         except:
             err = "\n\n[!] Could not create the output directory for the " \
                   "correlations. Do you have write permissions?\nAttempted " \
-                  "output directory: {0}\n\n".format(output_dir)
+                  f"output directory: {output_dir}\n\n"
             raise Exception(err)
 
-    input_dct['settings'].update({'output_dir': output_dir})
-    input_dct['settings'].update({'pickle_dir': pickle_dir})
+    input_dct['settings'].update({'output_dir': output_dir,
+                                  'pickle_dir': pickle_dir})
 
     corr_map, pearson_map = compare_pipelines(input_dct, dir_type='output_dir')
     corr_map_keys = list(corr_map.keys())
@@ -840,6 +938,5 @@ def main():
 if __name__ == "__main__":
     all_keys, data_source, branch = main()
     html_body = body(all_keys, data_source)
-    file = open(f"{data_source}_{branch}.json","w")
-    file.write(html_body)
-    file.close()
\ No newline at end of file
+    with open(f"{data_source}_{branch}.json", "w") as file:
+        file.write(html_body)

From 56b616c71b5b0cc1e0fec13bd5f66f785095286a Mon Sep 17 00:00:00 2001
From: Jon Clucas <jon.clucas@childmind.org>
Date: Fri, 10 Nov 2023 20:03:54 -0500
Subject: [PATCH 02/29] :construction: :heavy_minus_sign: :heavy_plus_sign:
 Begin script to transition from FusionCharts to D3

Ref https://d3-graph-gallery.com/graph/heatmap_style.html

Co-authored-by: Holtz Yan <yan.holtz.data@gmail.com>
---
 templates/heatmap.js | 126 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 126 insertions(+)
 create mode 100644 templates/heatmap.js

diff --git a/templates/heatmap.js b/templates/heatmap.js
new file mode 100644
index 0000000..0d4ca2d
--- /dev/null
+++ b/templates/heatmap.js
@@ -0,0 +1,126 @@
+// set the dimensions and margins of the graph
+var margin = {top: 80, right: 25, bottom: 30, left: 40},
+  width = 800 - margin.left - margin.right,
+  height = 5000 - margin.top - margin.bottom;
+
+// append the svg object to the body of the page
+var svg = d3.select("#heatmap-container")
+  .html(null)
+  .append("svg")
+    .attr("width", width + margin.left + margin.right)
+    .attr("height", height + margin.top + margin.bottom)
+  .append("g")
+    .attr("transform",
+          "translate(" + margin.left + "," + margin.top + ")");
+
+//Read the data
+datasource = d3.json("DATAFILE");
+datasource.then(function(data) {
+
+  data.sort(function(a, b) { return d3.descending(a.rowid, b.rowid) });
+  // Labels of row and columns -> unique identifier of the column called 'group' and 'variable'
+  var groupedData = d3.group(data, d => d.columnid);
+  var myGroups = Array.from(groupedData.keys());
+  var myVars = Array.from(d3.group(data, d => d.rowid).keys());
+
+  // Build X scales and axis:
+  var x = d3.scaleBand()
+    .domain(myGroups)
+    .range([0, width])
+    .padding(0.05);
+
+  svg.append("g")
+    .style("font-size", 15)
+    .attr("transform", "translate(0,0)")
+    .call(d3.axisTop(x).tickSize(0))
+    .select(".domain").remove();
+
+  // Build Y scales and axis:
+  var y = d3.scaleBand()
+    .domain(myVars)
+    .range([height, 0])
+    .padding(0.05);
+
+  svg.append("g")
+    .style("font-size", 15)
+    .attr("transform", "translate(" + width + ",0)")
+    .call(d3.axisLeft(y).tickSize(0))
+    .select(".domain").remove();
+
+  // Build color scale
+  var myColor = d3.scaleSequential()
+    .interpolator(d3.interpolateRdYlGn)
+    .domain([0.8, 1]);
+
+  // Create a tooltip
+  var tooltip = d3.select("#my_dataviz")
+    .append("div")
+    .style("opacity", 0)
+    .attr("class", "tooltip")
+    .style("background-color", "white")
+    .style("border", "solid")
+    .style("border-width", "2px")
+    .style("border-radius", "5px")
+    .style("padding", "5px");
+
+  // Three functions that change the tooltip when user hovers / moves / leaves a cell
+  var mouseover = function(d) {
+    tooltip
+      .style("opacity", 1);
+    d3.select(this)
+      .style("stroke", "black")
+      .style("opacity", 1);
+  };
+
+  var mousemove = function(d) {
+    tooltip
+      .html(d.rowid + ": " + d.value)
+      .style("left", (d3.pointer(this)[0] + 70) + "px")
+      .style("top", (d3.pointer(this)[1]) + "px");
+  };
+
+  var mouseleave = function(d) {
+    tooltip
+      .style("opacity", 0);
+    d3.select(this)
+      .style("stroke", "none")
+      .style("opacity", 0.8);
+  };
+
+  // Add the squares
+  svg.selectAll()
+    .data(data, function(d) {return d.columnid + ':' + d.variable;})
+    .enter()
+    .append("rect")
+      .attr("x", function(d) { return x(d.columnid) + (x.bandwidth() / 2); })
+      .attr("y", function(d) { return y(d.rowid); })
+      .attr("rx", 4)
+      .attr("ry", 4)
+      .attr("width", y.bandwidth())
+      .attr("height", y.bandwidth())
+      .style("fill", function(d) { return myColor(d.value); })
+      .style("stroke-width", 0)
+      .style("stroke", "none")
+      .style("opacity", 0.8)
+    .on("mouseover", mouseover)
+    .on("mousemove", mousemove)
+    .on("mouseleave", mouseleave);
+});
+
+// Add title to graph
+svg.append("text")
+  .attr("x", 0)
+  .attr("y", -50)
+  .attr("text-anchor", "left")
+  .style("font-size", "22px")
+  .text("GRAPHTITLE");
+
+// Add subtitle to graph
+svg.append("text")
+  .attr("x", 0)
+  .attr("y", -20)
+  .attr("text-anchor", "left")
+  .style("font-size", "14px")
+  .style("fill", "grey")
+  .style("max-width", 400)
+  .text("GRAPHSUBTITLE");
\ No newline at end of file

From 5b3f06f6c900675822e883991ef84c8863c289c6 Mon Sep 17 00:00:00 2001
From: Jon Clucas <jon.clucas@childmind.org>
Date: Fri, 10 Nov 2023 20:07:00 -0500
Subject: [PATCH 03/29] :construction: :heavy_minus_sign: :heavy_plus_sign:
 Work toward moving from FusionCharts to D3

---
 build_d3_dashboard.py  | 37 +++++++++++++++++++++++++++++++++++++
 templates/heatmap.html |  9 +++++++++
 2 files changed, 46 insertions(+)
 create mode 100644 build_d3_dashboard.py
 create mode 100644 templates/heatmap.html

diff --git a/build_d3_dashboard.py b/build_d3_dashboard.py
new file mode 100644
index 0000000..1ea05b9
--- /dev/null
+++ b/build_d3_dashboard.py
@@ -0,0 +1,37 @@
+import os
+from shutil import copy
+import click
+from lxml import etree
+
+
+@click.command()
+@click.option('--json_file', required=True, help='JSON file from correlations')
+@click.option('--branch', required=True, help='branch name')
+
+
+def main(json_file=None, branch=None):
+    outdir = f'output/{branch}'
+    os.makedirs(outdir, exist_ok=True)
+    json_filename = os.path.basename(json_file)
+    copy(json_file, '/'.join([outdir, json_filename]))
+    name = json_filename.replace(f"_{branch}.json", '')
+    with open('templates/heatmap.html', 'r', encoding='utf-8') as _f:
+        body = etree.HTML(_f.read())
+    script_element = etree.SubElement(body[0], 'script')
+    script_element.set('defer', 'defer')
+    script_element.set('src', f'./heatmap.js')
+    with open('templates/heatmap.js', 'r', encoding='utf-8') as _f:
+        with open(f'{outdir}/heatmap.js', 'w', encoding='utf=8') as _s:
+            _s.write(_f.read().replace(
+            'DATAFILE', json_filename).replace(
+            'GRAPHTITLE', branch).replace('GRAPHSUBTITLE', name))
+    body = etree.tostring(body, encoding='unicode', method='html')
+    
+    with open(f'{outdir}/{name}.html', 'w', encoding='utf-8') as _f:
+        _f.write(body)
+
+    return body, name, branch
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/templates/heatmap.html b/templates/heatmap.html
new file mode 100644
index 0000000..0cd1893
--- /dev/null
+++ b/templates/heatmap.html
@@ -0,0 +1,9 @@
+<html>
+    <head>
+        <title>Correlations heatmap</title>
+        <script src="https://cdn.jsdelivr.net/npm/d3@7"></script>
+    </head>
+    <body>
+        <div id="heatmap-container">Correlations heatmap will load here!</div>
+    </body>
+</html>
\ No newline at end of file

From 699e5b42cc3a6d7b94927aecedcc48649b1aea72 Mon Sep 17 00:00:00 2001
From: Florian Rupprecht <floruppr@gmail.com>
Date: Tue, 21 Feb 2023 16:23:56 -0500
Subject: [PATCH 04/29] Add pre-commit hooks for isort and black

---
 .pre-commit-config.yaml | 11 +++++++++++
 1 file changed, 11 insertions(+)
 create mode 100644 .pre-commit-config.yaml

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000..0df1a99
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,11 @@
+repos:
+  - repo: https://github.com/pycqa/isort
+    rev: 5.11.5
+    hooks:
+      - id: isort
+        files: "\\.(py)$"
+  - repo: https://github.com/psf/black
+    rev: 23.1.0
+    hooks:
+      - id: black
+        files: "\\.(py)$"
\ No newline at end of file

From db1a98ed1c39d87bc00678076625aa879d6eebcc Mon Sep 17 00:00:00 2001
From: Jon Clucas <jon.clucas@childmind.org>
Date: Mon, 13 Nov 2023 13:09:14 -0500
Subject: [PATCH 05/29] :rotating_light: Initital run of precommit hooks (isort
 & black)

---
 build_d3_dashboard.py     |  42 ++--
 build_dashboard.py        |  29 ++-
 calculate_correlations.py | 498 +++++++++++++++++++++-----------------
 create_yml.py             |  49 ++--
 utils/html_script.py      |  23 +-
 utils/parse_yaml.py       |  64 ++---
 6 files changed, 394 insertions(+), 311 deletions(-)

diff --git a/build_d3_dashboard.py b/build_d3_dashboard.py
index 1ea05b9..72d1f24 100644
--- a/build_d3_dashboard.py
+++ b/build_d3_dashboard.py
@@ -1,37 +1,39 @@
 import os
 from shutil import copy
+
 import click
 from lxml import etree
 
 
 @click.command()
-@click.option('--json_file', required=True, help='JSON file from correlations')
-@click.option('--branch', required=True, help='branch name')
-
-
+@click.option("--json_file", required=True, help="JSON file from correlations")
+@click.option("--branch", required=True, help="branch name")
 def main(json_file=None, branch=None):
-    outdir = f'output/{branch}'
+    outdir = f"output/{branch}"
     os.makedirs(outdir, exist_ok=True)
     json_filename = os.path.basename(json_file)
-    copy(json_file, '/'.join([outdir, json_filename]))
-    name = json_filename.replace(f"_{branch}.json", '')
-    with open('templates/heatmap.html', 'r', encoding='utf-8') as _f:
+    copy(json_file, "/".join([outdir, json_filename]))
+    name = json_filename.replace(f"_{branch}.json", "")
+    with open("templates/heatmap.html", "r", encoding="utf-8") as _f:
         body = etree.HTML(_f.read())
-    script_element = etree.SubElement(body[0], 'script')
-    script_element.set('defer', 'defer')
-    script_element.set('src', f'./heatmap.js')
-    with open('templates/heatmap.js', 'r', encoding='utf-8') as _f:
-        with open(f'{outdir}/heatmap.js', 'w', encoding='utf=8') as _s:
-            _s.write(_f.read().replace(
-            'DATAFILE', json_filename).replace(
-            'GRAPHTITLE', branch).replace('GRAPHSUBTITLE', name))
-    body = etree.tostring(body, encoding='unicode', method='html')
-    
-    with open(f'{outdir}/{name}.html', 'w', encoding='utf-8') as _f:
+    script_element = etree.SubElement(body[0], "script")
+    script_element.set("defer", "defer")
+    script_element.set("src", f"./heatmap.js")
+    with open("templates/heatmap.js", "r", encoding="utf-8") as _f:
+        with open(f"{outdir}/heatmap.js", "w", encoding="utf=8") as _s:
+            _s.write(
+                _f.read()
+                .replace("DATAFILE", json_filename)
+                .replace("GRAPHTITLE", branch)
+                .replace("GRAPHSUBTITLE", name)
+            )
+    body = etree.tostring(body, encoding="unicode", method="html")
+
+    with open(f"{outdir}/{name}.html", "w", encoding="utf-8") as _f:
         _f.write(body)
 
     return body, name, branch
 
 
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()
diff --git a/build_dashboard.py b/build_dashboard.py
index 82ee895..5880235 100644
--- a/build_dashboard.py
+++ b/build_dashboard.py
@@ -1,24 +1,30 @@
-from utils.html_script import write_html, setup_browser
-
 import os
+
 import click
 
+from utils.html_script import setup_browser, write_html
+
+
 def process_option(ctx, param, value):
     if value is not None:
-        values = value.split(',')
+        values = value.split(",")
         return [val.strip() for val in values]
 
-@click.command()
-@click.option('--json_files', required=True, 
-              callback=process_option, help='JSON files from correlations')
-@click.option('--branch', required=True, help='branch name')
 
+@click.command()
+@click.option(
+    "--json_files",
+    required=True,
+    callback=process_option,
+    help="JSON files from correlations",
+)
+@click.option("--branch", required=True, help="branch name")
 def main(json_files=None, branch=None):
-    body = ''
+    body = ""
     data_source = []
     for json in json_files:
         name = os.path.basename(json)
-        data = name.replace(f"_{branch}.json", '')
+        data = name.replace(f"_{branch}.json", "")
         data_source.append(data)
         with open(json) as user_file:
             file_contents = user_file.read()
@@ -26,12 +32,13 @@ def main(json_files=None, branch=None):
     body = (body.rstrip()).rstrip(",")
 
     html_body = write_html(body)
-    file = open('html.html', 'w')
+    file = open("html.html", "w")
     file.write(html_body)
     file.close()
     setup_browser(html_body)
 
     return body, data_source, branch
 
+
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()
diff --git a/calculate_correlations.py b/calculate_correlations.py
index bd95a4b..7edbdfc 100644
--- a/calculate_correlations.py
+++ b/calculate_correlations.py
@@ -1,19 +1,19 @@
 #!/usr/bin/env python
 import argparse
-from collections.abc import Generator
 import itertools
 import math
-from multiprocessing import Pool
 import os
-from pathlib import Path
 import pickle
 import subprocess
-from typing import Optional, NamedTuple, Tuple, Union
-import yaml
+from collections.abc import Generator
+from multiprocessing import Pool
+from pathlib import Path
+from typing import NamedTuple, Optional, Tuple, Union
 
 import nibabel as nb
 import numpy as np
 import pandas as pd
+import yaml
 
 from utils.html_script import body
 
@@ -22,12 +22,13 @@
 
 class CorrValue(NamedTuple):
     """Correlation values"""
+
     concor: np.ndarray
     pearson: np.ndarray
 
 
 def read_yml_file(yml_filepath):
-    with open(yml_filepath,"r") as f:
+    with open(yml_filepath, "r") as f:
         yml_dict = yaml.safe_load(f)
 
     return yml_dict
@@ -50,7 +51,7 @@ def write_pickle(dct, out_filepath):
 
 
 def read_txt_file(txt_file):
-    with open(txt_file,"r") as f:
+    with open(txt_file, "r") as f:
         strings = f.read().splitlines()
     return strings
 
@@ -78,20 +79,27 @@ def gather_local_filepaths(output_folder_path: str) -> list[str]:
         # loops through every file in the directory
         for filename in files:
             # checks if the file is a nifti (.nii.gz)
-            if '.nii' in filename or '.csv' in filename or '.txt' in filename \
-                    or '.1D' in filename or '.tsv' in filename:
+            if (
+                ".nii" in filename
+                or ".csv" in filename
+                or ".txt" in filename
+                or ".1D" in filename
+                or ".tsv" in filename
+            ):
                 filepaths.append(os.path.join(root, filename))
 
     if len(filepaths) == 0:
         raise FileNotFoundError(
-            "\n\n[!] No filepaths were found given the output folder!\n\n")
+            "\n\n[!] No filepaths were found given the output folder!\n\n"
+        )
 
     return filepaths
 
 
 class SummaryStats:
-    def __init__(self, array: np.ndarray,
-                 axis: Optional[Union[int, str]] = None) -> None:
+    def __init__(
+        self, array: np.ndarray, axis: Optional[Union[int, str]] = None
+    ) -> None:
         self.mean = np.mean(array, axis=axis, keepdims=True)
         self.var = np.var(array, axis=axis, keepdims=True)
         self.std = np.sqrt(self.var)
@@ -110,16 +118,25 @@ def batch_correlate(
     """
     # summary stats
     try:
-        summary_stats = {'x': SummaryStats(x), 'y': SummaryStats(y)}
+        summary_stats = {"x": SummaryStats(x), "y": SummaryStats(y)}
     except ZeroDivisionError:
         return CorrValue(np.nan, np.nan)
 
     # Correlation coefficients
-    pearson = np.mean(summary_stats['x'].norm * summary_stats['y'].norm,
-                      axis=axis, keepdims=True)
-    concor = (2 * pearson * summary_stats['x'].std * summary_stats['y'].std /
-              (summary_stats['x'].var + summary_stats['y'].var +
-               (summary_stats['x'].mean - summary_stats['y'].mean) ** 2))
+    pearson = np.mean(
+        summary_stats["x"].norm * summary_stats["y"].norm, axis=axis, keepdims=True
+    )
+    concor = (
+        2
+        * pearson
+        * summary_stats["x"].std
+        * summary_stats["y"].std
+        / (
+            summary_stats["x"].var
+            + summary_stats["y"].var
+            + (summary_stats["x"].mean - summary_stats["y"].mean) ** 2
+        )
+    )
     # Squeeze reduced singleton dimensions
     if axis is not None:
         concor = np.squeeze(concor, axis=axis)
@@ -127,46 +144,57 @@ def batch_correlate(
     return CorrValue(concor, pearson)
 
 
-def determine_indices(df : pd.DataFrame) -> list:
+def determine_indices(df: pd.DataFrame) -> list:
     """Determine indices of str-type columns in a DataFrame"""
-    return [i for i, val in
-            enumerate(df.applymap(lambda _: isinstance(_, str)).values[0]) if
-            val]
+    return [
+        i
+        for i, val in enumerate(df.applymap(lambda _: isinstance(_, str)).values[0])
+        if val
+    ]
 
 
 def correlate_text_based(txts: Union[list, tuple]) -> Generator:
     delimiters = tuple(delimiter_from_filepath(path) for path in txts)
     # TODO: why do we drop columns containing na?
-    initial_load = [pd.read_csv(txt, delimiter=delimiters[i], comment='#'
-                               ).dropna(axis=1) for i, txt in enumerate(txts)]
+    initial_load = [
+        pd.read_csv(txt, delimiter=delimiters[i], comment="#").dropna(axis=1)
+        for i, txt in enumerate(txts)
+    ]
     for i, df in enumerate(initial_load):
         # if we read a value-row as a header, fix that
         try:
             df.columns.astype(float)
-            initial_load[i] = pd.read_csv(txts[i], delimiter=delimiters[i],
-                                          comment='#', header=None
-                                         ).dropna(axis=1)
+            initial_load[i] = pd.read_csv(
+                txts[i], delimiter=delimiters[i], comment="#", header=None
+            ).dropna(axis=1)
         except ValueError:
             pass
     # assume string columns are indices and not values to correlate
     indices = []
     for i in range(len(initial_load)):
-        indices.append(np.where(df.apply(
-            lambda _: _.dtype == np.dtypes.ObjectDType))[0])
+        indices.append(
+            np.where(df.apply(lambda _: _.dtype == np.dtypes.ObjectDType))[0]
+        )
     oned = []
     for i, index in enumerate(indices):
         if index.shape[0]:
-            oned.append(pd.read_csv(txts[i], delimiter=delimiters[i],
-                                    comment='#', index_col=indices[i]
-                                    ).dropna(axis=1).values)
+            oned.append(
+                pd.read_csv(
+                    txts[i], delimiter=delimiters[i], comment="#", index_col=indices[i]
+                )
+                .dropna(axis=1)
+                .values
+            )
         else:
             oned.append(initial_load[i].values)
     return (np.nanmean(measure) for measure in batch_correlate(*oned, axis=0))
 
 
-def create_unique_file_dict(filepaths: list[str], output_folder_path: str,
-                            replacements: Optional[list[str]] = None
-    ) -> dict[str, dict[tuple, str]]:
+def create_unique_file_dict(
+    filepaths: list[str],
+    output_folder_path: str,
+    replacements: Optional[list[str]] = None,
+) -> dict[str, dict[tuple, str]]:
     """
     Parameters
     ----------
@@ -182,20 +210,17 @@ def create_unique_file_dict(filepaths: list[str], output_folder_path: str,
     -------
     files_dict : dict
         a dictionary of dictionaries, format:
-        files_dict["centrality"] = 
+        files_dict["centrality"] =
             {("centrality", midpath, nums): <filepath>, ..}
     """
 
     files_dict = {}
 
     for filepath in filepaths:
-
         if "_stack" in filepath:
             continue
 
-        if ("itk" in filepath) or ("xfm" in filepath) or (
-            "montage" in filepath
-        ):
+        if ("itk" in filepath) or ("xfm" in filepath) or ("montage" in filepath):
             continue
         path_changes = []
         real_filepath = filepath
@@ -205,7 +230,8 @@ def create_unique_file_dict(filepaths: list[str], output_folder_path: str,
                     raise SyntaxError(
                         "\n\n[!] In the replacements text file, the old "
                         "substring and its replacement must be separated "
-                        "by a comma.\n\n")
+                        "by a comma.\n\n"
+                    )
                 word, new = word_couple.split(",")
                 if word in filepath:
                     path_changes.append(f"old: {filepath}")
@@ -242,15 +268,15 @@ def create_unique_file_dict(filepaths: list[str], output_folder_path: str,
         else:
             tags = []
             category = filename
-            category = category.rstrip('.gz').rstrip('.nii')
+            category = category.rstrip(".gz").rstrip(".nii")
 
-            excl_tags = ['sub-', 'ses-', 'task-', 'run-', 'acq-']
+            excl_tags = ["sub-", "ses-", "task-", "run-", "acq-"]
 
             # len(filetag) == 1 is temporary for broken/missing ses-* tag
             for filetag in filename.split("_"):
                 for exctag in excl_tags:
                     if exctag in filetag or len(filetag) == 1:
-                        category = category.replace(f'{filetag}_', '')
+                        category = category.replace(f"{filetag}_", "")
 
         # this provides a way to safely identify the specific file
         # without relying on a full string of the filename (because
@@ -259,7 +285,7 @@ def create_unique_file_dict(filepaths: list[str], output_folder_path: str,
         nums_in_folder = [int(s) for s in folder if s.isdigit()]
         nums_in_filename = [int(s) for s in filename if s.isdigit()]
 
-        file_nums = ''
+        file_nums = ""
 
         for num in nums_in_folder:
             file_nums = file_nums + str(num)
@@ -280,33 +306,35 @@ def create_unique_file_dict(filepaths: list[str], output_folder_path: str,
             files_dict[category] = {}
 
         files_dict[category].update(temp_dict)
-        
+
     return files_dict
 
 
-def gather_all_files(input_dct: dict, pickle_dir: str,
-                     source: str = 'output_dir') -> tuple[dict, dict]:
+def gather_all_files(
+    input_dct: dict, pickle_dir: str, source: str = "output_dir"
+) -> tuple[dict, dict]:
     """
     Given an input dictionary, a pickle directory, and (optionally) a source,
     returns a pair of dicts
     """
     file_dct_list = [{}, {}]
 
-    for index, (key, pipe_dct) in enumerate(input_dct['pipelines'].items()):
-
+    for index, (key, pipe_dct) in enumerate(input_dct["pipelines"].items()):
         pipe_outdir = pipe_dct[source]
 
-        if input_dct['settings']['s3_creds']:
+        if input_dct["settings"]["s3_creds"]:
             if not "s3://" in pipe_outdir:
-                err = "\n\n[!] If pulling output files from an S3 bucket, the "\
-                      "output folder path must have the s3:// prefix.\n\n"
+                err = (
+                    "\n\n[!] If pulling output files from an S3 bucket, the "
+                    "output folder path must have the s3:// prefix.\n\n"
+                )
                 raise Exception(err)
         else:
-            pipe_outdir = os.path.abspath(pipe_outdir).rstrip('/')
+            pipe_outdir = os.path.abspath(pipe_outdir).rstrip("/")
 
-        pipeline_name = pipe_outdir.split('/')[-1]
+        pipeline_name = pipe_outdir.split("/")[-1]
 
-        #if source == "output_dir" and "pipeline_" not in pipeline_name:
+        # if source == "output_dir" and "pipeline_" not in pipeline_name:
         #    err = "\n\n[!] Your pipeline output directory has to be a specific " \
         #          "one that has the 'pipeline_' prefix.\n\n(Not the main output " \
         #          "directory that contains all of the 'pipeline_X' subdirectories," \
@@ -316,13 +344,16 @@ def gather_all_files(input_dct: dict, pickle_dir: str,
         output_pkl = os.path.join(pickle_dir, f"{key}_{source}_paths.p")
 
         if os.path.exists(output_pkl):
-            print(f"Found output list pickle for {key}, skipping output file"
-                  "path parsing..")
+            print(
+                f"Found output list pickle for {key}, skipping output file"
+                "path parsing.."
+            )
             pipeline_files_dct = read_pickle(output_pkl)
         else:
             pipeline_files_list = gather_local_filepaths(pipe_outdir)
             pipeline_files_dct = create_unique_file_dict(
-                pipeline_files_list, pipe_outdir, pipe_dct['replacements'])
+                pipeline_files_list, pipe_outdir, pipe_dct["replacements"]
+            )
             write_pickle(pipeline_files_dct, output_pkl)
 
         file_dct_list[index] = pipeline_files_dct
@@ -330,9 +361,10 @@ def gather_all_files(input_dct: dict, pickle_dir: str,
     return tuple(file_dct_list)
 
 
-def match_filepaths(old_files_dict: dict[str, dict[tuple, str]],
-                    new_files_dict: dict[str, dict[tuple, str]]
-                    ) -> dict[str, dict[tuple, ]]:
+def match_filepaths(
+    old_files_dict: dict[str, dict[tuple, str]],
+    new_files_dict: dict[str, dict[tuple, str]],
+) -> dict[str, dict[tuple,]]:
     """Returns a dictionary mapping each filepath from the first C-PAC
     run to the second one, matched to derivative, strategy, and scan.
 
@@ -361,15 +393,15 @@ def match_filepaths(old_files_dict: dict[str, dict[tuple, str]],
         if key in old_files_dict.keys():
             for file_id in new_files_dict[key]:
                 if file_id in old_files_dict[key].keys():
-
                     if key not in matched_path_dict.keys():
                         matched_path_dict[key] = {}
 
-                    matched_path_dict[key][file_id] = \
+                    matched_path_dict[key][file_id] = (
                         old_files_dict[key][file_id] + new_files_dict[key][file_id]
+                    )
 
                 else:
-                    missing_in_old.append(file_id)#new_files_dict[key][file_id])
+                    missing_in_old.append(file_id)  # new_files_dict[key][file_id])
         else:
             missing_in_old.append(new_files_dict[key])
 
@@ -380,14 +412,16 @@ def match_filepaths(old_files_dict: dict[str, dict[tuple, str]],
             missing_in_new.append(old_files_dict[key])
 
     if len(matched_path_dict) == 0:
-        err = "\n\n[!] No output paths were successfully matched between " \
-              "the two CPAC output directories!\n\n"
+        err = (
+            "\n\n[!] No output paths were successfully matched between "
+            "the two CPAC output directories!\n\n"
+        )
         raise Exception(err)
 
     matched_files_dct = {
         "matched": matched_path_dict,
         "missing_old": missing_in_old,
-        "missing_new": missing_in_new
+        "missing_new": missing_in_new,
     }
 
     return matched_files_dct
@@ -397,23 +431,23 @@ def delimiter_from_filepath(filepath: Union[Path, str]) -> Optional[str]:
     """
     Given a filepath, return expected value-separator delimiter
     """
-    if filepath.endswith('.tsv'):
-        return '\t'
-    if filepath.endswith('.csv'):
-        return ','
-    with open(filepath, 'r', encoding='utf8') as _f:
-        first_line = '#'
-        while first_line.lstrip().startswith('#'):
+    if filepath.endswith(".tsv"):
+        return "\t"
+    if filepath.endswith(".csv"):
+        return ","
+    with open(filepath, "r", encoding="utf8") as _f:
+        first_line = "#"
+        while first_line.lstrip().startswith("#"):
             first_line = _f.readline()
-        for delimiter in ['\t', ',', ' ']:
+        for delimiter in ["\t", ",", " "]:
             if delimiter in first_line:
-                if delimiter == ' ':
-                    return r'\s+'
+                if delimiter == " ":
+                    return r"\s+"
                 return delimiter
     return None
 
 
-def calculate_correlation(args_tuple):  
+def calculate_correlation(args_tuple):
     category = args_tuple[0]
     old_path = args_tuple[1]
     new_path = args_tuple[2]
@@ -429,14 +463,16 @@ def calculate_correlation(args_tuple):
     if s3_creds:
         try:
             # full filepath with filename
-            old_local_file = os.path.join(local_dir, "s3_input_files", \
-                old_path.replace("s3://",""))
+            old_local_file = os.path.join(
+                local_dir, "s3_input_files", old_path.replace("s3://", "")
+            )
             # directory without filename
-            old_local_path = old_local_file.replace(old_path.split("/")[-1],"")
+            old_local_path = old_local_file.replace(old_path.split("/")[-1], "")
 
-            new_local_file = os.path.join(local_dir, "s3_input_files", \
-                new_path.replace("s3://",""))
-            new_local_path = new_local_file.replace(new_path.split("/")[-1],"")
+            new_local_file = os.path.join(
+                local_dir, "s3_input_files", new_path.replace("s3://", "")
+            )
+            new_local_path = new_local_file.replace(new_path.split("/")[-1], "")
 
             if not os.path.exists(old_local_path):
                 os.makedirs(old_local_path)
@@ -444,39 +480,44 @@ def calculate_correlation(args_tuple):
                 os.makedirs(new_local_path)
 
         except Exception as e:
-            err = "\n\nLocals: {0}\n\n[!] Could not create the local S3 " \
-                  "download directory.\n\nError details: {1}\n\n".format((locals(), e))
+            err = (
+                "\n\nLocals: {0}\n\n[!] Could not create the local S3 "
+                "download directory.\n\nError details: {1}\n\n".format((locals(), e))
+            )
             raise Exception(e)
 
         try:
             old_path = old_local_file
         except Exception as e:
-            err = "\n\nLocals: {0}\n\n[!] Could not download the files from " \
-                  "the S3 bucket. \nS3 filepath: {1}\nLocal destination: {2}" \
-                  "\nS3 creds: {3}\n\nError details: {4}\n\n".format(locals(), 
-                                                                     old_path, 
-                                                                     old_local_path, 
-                                                                     s3_creds, e)
+            err = (
+                "\n\nLocals: {0}\n\n[!] Could not download the files from "
+                "the S3 bucket. \nS3 filepath: {1}\nLocal destination: {2}"
+                "\nS3 creds: {3}\n\nError details: {4}\n\n".format(
+                    locals(), old_path, old_local_path, s3_creds, e
+                )
+            )
             raise Exception(e)
 
         try:
             new_path = new_local_file
         except Exception as e:
-            err = "\n\nLocals: {0}\n\n[!] Could not download the files from " \
-                 "the S3 bucket. \nS3 filepath: {1}\nLocal destination: {2}" \
-                  "\nS3 creds: {3}\n\nError details: {4}\n\n".format(locals(), 
-                                                                     new_path, 
-                                                                     new_local_path, 
-                                                                     s3_creds, e)
+            err = (
+                "\n\nLocals: {0}\n\n[!] Could not download the files from "
+                "the S3 bucket. \nS3 filepath: {1}\nLocal destination: {2}"
+                "\nS3 creds: {3}\n\nError details: {4}\n\n".format(
+                    locals(), new_path, new_local_path, s3_creds, e
+                )
+            )
             raise Exception(e)
 
     ## nibabel to pull the data from the re-assembled file paths
     if os.path.exists(old_path) and os.path.exists(new_path):
-
-        if ('.csv' in old_path and '.csv' in new_path) or \
-                ('.txt' in old_path and '.txt' in new_path) or \
-                    ('.1D' in old_path and '.1D' in new_path) or \
-                        ('.tsv' in old_path and '.tsv' in new_path):
+        if (
+            (".csv" in old_path and ".csv" in new_path)
+            or (".txt" in old_path and ".txt" in new_path)
+            or (".1D" in old_path and ".1D" in new_path)
+            or (".tsv" in old_path and ".tsv" in new_path)
+        ):
             try:
                 concor, pearson = correlate_text_based((old_path, new_path))
             except Exception as e:
@@ -490,7 +531,7 @@ def calculate_correlation(args_tuple):
                 print("Success - {0}".format(str(concor)))
 
             # except Exception as e:
-            #     corr_tuple = ("file reading problem: {0}".format(e), 
+            #     corr_tuple = ("file reading problem: {0}".format(e),
             #                   old_path, new_path)
             #     if verbose:
             #         print(str(corr_tuple))
@@ -511,7 +552,7 @@ def calculate_correlation(args_tuple):
             data_2 = nb.load(new_path).get_fdata()
 
             # except Exception as e:
-            #     corr_tuple = ("file reading problem: {0}".format(e), 
+            #     corr_tuple = ("file reading problem: {0}".format(e),
             #                   old_path, new_path)
             #     if verbose:
             #         print(str(corr_tuple))
@@ -528,8 +569,7 @@ def calculate_correlation(args_tuple):
                 else:
                     concor, pearson = batch_correlate(data_1, data_2)
             except Exception as e:
-                corr_tuple = ("correlating problem: {0}".format(e), 
-                              old_path, new_path)
+                corr_tuple = ("correlating problem: {0}".format(e), old_path, new_path)
                 if verbose:
                     print(str(corr_tuple))
                 return corr_tuple
@@ -562,55 +602,54 @@ def calculate_correlation(args_tuple):
     return corr_tuple
 
 
-def run_correlations(matched_dct, input_dct, source='output_dir', quick=False, verbose=False):
-
-    all_corr_dct = {
-        'pearson': {},
-        'concordance': {},
-        'sub_optimal': {}
-    }
+def run_correlations(
+    matched_dct, input_dct, source="output_dir", quick=False, verbose=False
+):
+    all_corr_dct = {"pearson": {}, "concordance": {}, "sub_optimal": {}}
 
     args_list = []
 
     quick_list = [
-        'anatomical_brain',
-        'anatomical_csf_mask',
-        'anatomical_gm_mask',
-        'anatomical_wm_mask',
-        'anatomical_to_standard',
-        'functional_preprocessed',
-        'functional_brain_mask',
-        'mean_functional_in_anat',
-        'functional_nuisance_residuals',
-        'functional_nuisance_regressors',
-        'functional_to_standard',
-        'roi_timeseries'
+        "anatomical_brain",
+        "anatomical_csf_mask",
+        "anatomical_gm_mask",
+        "anatomical_wm_mask",
+        "anatomical_to_standard",
+        "functional_preprocessed",
+        "functional_brain_mask",
+        "mean_functional_in_anat",
+        "functional_nuisance_residuals",
+        "functional_nuisance_regressors",
+        "functional_to_standard",
+        "roi_timeseries",
     ]
 
-    matched_path_dct = matched_dct['matched']
-    output_dir = input_dct['settings']['correlations_dir']
-    s3_creds = input_dct['settings']['s3_creds']
+    matched_path_dct = matched_dct["matched"]
+    output_dir = input_dct["settings"]["correlations_dir"]
+    s3_creds = input_dct["settings"]["s3_creds"]
 
     for category in matched_path_dct.keys():
-
         if quick:
             if category not in quick_list:
                 continue
 
         for file_id in matched_path_dct[category].keys():
-
             old_path = matched_path_dct[category][file_id][0]
             new_path = matched_path_dct[category][file_id][1]
 
-            if source == 'work_dir':
-                args_list.append((file_id, old_path, new_path, output_dir, s3_creds, verbose))
+            if source == "work_dir":
+                args_list.append(
+                    (file_id, old_path, new_path, output_dir, s3_creds, verbose)
+                )
             else:
-                args_list.append((category, old_path, new_path, output_dir, s3_creds, verbose))
+                args_list.append(
+                    (category, old_path, new_path, output_dir, s3_creds, verbose)
+                )
 
     print("\nNumber of correlations to calculate: {0}\n".format(len(args_list)))
 
     print("Running correlations...")
-    p = Pool(input_dct['settings']['n_cpus'])
+    p = Pool(input_dct["settings"]["n_cpus"])
     corr_tuple_list = p.map(calculate_correlation, args_list)
     p.close()
     p.join()
@@ -623,24 +662,23 @@ def run_correlations(matched_dct, input_dct, source='output_dir', quick=False, v
         if not corr_tuple:
             continue
         if isinstance(corr_tuple[1], Exception):
-            failures.append((corr_tuple[0], corr_tuple[1],
-                             ' | '.join(corr_tuple[2])))
+            failures.append((corr_tuple[0], corr_tuple[1], " | ".join(corr_tuple[2])))
             continue
-        if corr_tuple[0] not in all_corr_dct['concordance'].keys():
-            all_corr_dct['concordance'][corr_tuple[0]] = []
-        if corr_tuple[0] not in all_corr_dct['pearson'].keys():
-            all_corr_dct['pearson'][corr_tuple[0]] = []
-        all_corr_dct['concordance'][corr_tuple[0]] += corr_tuple[1]
-        all_corr_dct['pearson'][corr_tuple[0]] += corr_tuple[2]
+        if corr_tuple[0] not in all_corr_dct["concordance"].keys():
+            all_corr_dct["concordance"][corr_tuple[0]] = []
+        if corr_tuple[0] not in all_corr_dct["pearson"].keys():
+            all_corr_dct["pearson"][corr_tuple[0]] = []
+        all_corr_dct["concordance"][corr_tuple[0]] += corr_tuple[1]
+        all_corr_dct["pearson"][corr_tuple[0]] += corr_tuple[2]
 
         if len(corr_tuple) > 3:
-            if corr_tuple[0] not in all_corr_dct['sub_optimal'].keys():
-                all_corr_dct['sub_optimal'][corr_tuple[0]] = []
+            if corr_tuple[0] not in all_corr_dct["sub_optimal"].keys():
+                all_corr_dct["sub_optimal"][corr_tuple[0]] = []
             try:
-                all_corr_dct['sub_optimal'][corr_tuple[0]].append("{0}:\n{1}\n{2}"
-                                                                  "\n\n".format(corr_tuple[1][0], 
-                                                                                corr_tuple[3][0],
-                                                                                corr_tuple[3][1]))
+                all_corr_dct["sub_optimal"][corr_tuple[0]].append(
+                    "{0}:\n{1}\n{2}"
+                    "\n\n".format(corr_tuple[1][0], corr_tuple[3][0], corr_tuple[3][1])
+                )
             except TypeError:
                 pass
 
@@ -648,13 +686,12 @@ def run_correlations(matched_dct, input_dct, source='output_dir', quick=False, v
 
 
 def post180_organize_correlations(concor_dct, corr_type="concordance", quick=False):
-
     corr_map_dct = {"correlations": {}}
     for key in concor_dct:
         if "problem" in key:
             continue
         # shouldn't need this - FIX
-        rawkey = key.replace('acq-', '').replace('run-', '')
+        rawkey = key.replace("acq-", "").replace("run-", "")
         datatype = rawkey.split("_")[-1]
 
         if datatype not in corr_map_dct["correlations"]:
@@ -679,40 +716,28 @@ def organize_correlations(concor_dict, corr_type="concordance", quick=False):
     corr_map_dict = {}
     corr_map_dict["correlations"] = {}
 
-    derivs = [
-        'alff', 
-        'dr_tempreg', 
-        'reho', 
-        'sca_roi', 
-        'timeseries', 
-        'ndmg']
-    anats = [
-        'anatomical', 
-        'seg'
-    ]
+    derivs = ["alff", "dr_tempreg", "reho", "sca_roi", "timeseries", "ndmg"]
+    anats = ["anatomical", "seg"]
     time_series = [
-        'functional_freq',
-        'nuisance_residuals',
-        'functional_preprocessed',
-        'functional_to_standard',
-        'ica_aroma_',
-        'motion_correct',
-        'slice_time',
+        "functional_freq",
+        "nuisance_residuals",
+        "functional_preprocessed",
+        "functional_to_standard",
+        "ica_aroma_",
+        "motion_correct",
+        "slice_time",
     ]
-    funcs = [
-        'functional',
-        'displacement']
+    funcs = ["functional", "displacement"]
 
     for key in concor_dict:
-
         if quick:
             core[key] = concor_dict[key]
             continue
 
-        if 'xfm' in key or 'mixel' in key:
+        if "xfm" in key or "mixel" in key:
             continue
 
-        if 'centrality' in key or 'vmhc' in key or 'sca_tempreg' in key:
+        if "centrality" in key or "vmhc" in key or "sca_tempreg" in key:
             template_outputs[key] = concor_dict[key]
             continue
 
@@ -722,7 +747,7 @@ def organize_correlations(concor_dict, corr_type="concordance", quick=False):
                 continue
 
         for word in derivs:
-            if word in key and 'standard' not in key:
+            if word in key and "standard" not in key:
                 native_outputs[key] = concor_dict[key]
                 continue
             elif word in key:
@@ -730,7 +755,7 @@ def organize_correlations(concor_dict, corr_type="concordance", quick=False):
                 continue
 
         for word in time_series:
-            if word in key and 'mean' not in key and 'mask' not in key:
+            if word in key and "mean" not in key and "mask" not in key:
                 timeseries[key] = concor_dict[key]
                 continue
 
@@ -751,7 +776,7 @@ def organize_correlations(concor_dict, corr_type="concordance", quick=False):
         corr_map_dict["correlations"][group] = regCorrMap
     else:
         print("No values in {0}".format(group))
- 
+
     group = "{0}_native_space_outputs".format(corr_type)
     if len(native_outputs.values()) > 0:
         corr_map_dict["correlations"][group] = native_outputs
@@ -793,23 +818,24 @@ def quick_summary(dct, corr_map_dct, output_dir) -> dict:
             lines.append("{0}: {1}".format(output_type, corrmean))
 
         dct = write_dct(dct, lines, output_type)
-    return(dct)
+    return dct
 
 
-def compare_pipelines(input_dct: dict,
-                      dir_type: str = 'output_dir') -> tuple[dict, dict]:
+def compare_pipelines(
+    input_dct: dict, dir_type: str = "output_dir"
+) -> tuple[dict, dict]:
     """
     Given an input dict containing keys 'settings', gather prreviously
     generated pickles or all relevant output and working files
-    
+
     Returns
     -------
     corr_map : dict
-    
+
     pearson_map : dict
     """
-    output_dir = input_dct['settings']['output_dir']
-    pickle_dir = input_dct['settings']['pickle_dir']
+    output_dir = input_dct["settings"]["output_dir"]
+    pickle_dir = input_dct["settings"]["pickle_dir"]
 
     corrs_pkl = os.path.join(pickle_dir, f"{dir_type}_correlations.p")
     failures_pkl = os.path.join(pickle_dir, f"{dir_type}_failures.p")
@@ -817,40 +843,51 @@ def compare_pipelines(input_dct: dict,
 
     all_corr_dct = None
     if os.path.exists(corrs_pkl):
-        print(f"\n\nFound the correlations pickle: {corrs_pkl}\n\n"
-              "Starting from there..\n")
+        print(
+            f"\n\nFound the correlations pickle: {corrs_pkl}\n\n"
+            "Starting from there..\n"
+        )
         all_corr_dct = read_pickle(corrs_pkl)
     elif os.path.exists(matched_pkl):
-        print(f"\n\nFound the matched filepaths pickle: {matched_pkl}\n\n"
-              "Starting from there..\n")
+        print(
+            f"\n\nFound the matched filepaths pickle: {matched_pkl}\n\n"
+            "Starting from there..\n"
+        )
         matched_dct = read_pickle(matched_pkl)
 
     else:
         # gather all relevant output and working files
-        outfiles1_dct, outfiles2_dct = gather_all_files(input_dct, pickle_dir, 
-                                                        source=dir_type)
+        outfiles1_dct, outfiles2_dct = gather_all_files(
+            input_dct, pickle_dir, source=dir_type
+        )
         matched_dct = match_filepaths(outfiles1_dct, outfiles2_dct)
         write_pickle(matched_dct, matched_pkl)
 
     if not all_corr_dct:
-        all_corr_dct, failures = run_correlations(matched_dct,
-                                        input_dct, 
-                                        source=dir_type,
-                                        quick=input_dct['settings']['quick'],
-                                        verbose=input_dct['settings']['verbose'])
+        all_corr_dct, failures = run_correlations(
+            matched_dct,
+            input_dct,
+            source=dir_type,
+            quick=input_dct["settings"]["quick"],
+            verbose=input_dct["settings"]["verbose"],
+        )
         write_pickle(all_corr_dct, corrs_pkl)
         write_pickle(failures, failures_pkl)
-    
-    if dir_type == 'work_dir':
+
+    if dir_type == "work_dir":
         sorted_vals = []
-        #sorted_keys = sorted(all_corr_dct, key=all_corr_dct.get)
-        for key in all_corr_dct.keys(): #sorted_keys:
-            if 'file reading problem:' in key or 'different shape' in key or 'correlating problem' in key:
+        # sorted_keys = sorted(all_corr_dct, key=all_corr_dct.get)
+        for key in all_corr_dct.keys():  # sorted_keys:
+            if (
+                "file reading problem:" in key
+                or "different shape" in key
+                or "correlating problem" in key
+            ):
                 continue
             else:
                 sorted_vals.append("{0}: {1}".format(all_corr_dct[key], key))
         working_corrs_file = os.path.join(output_dir, "work_dir_correlations.txt")
-        with open(working_corrs_file, 'wt') as f:
+        with open(working_corrs_file, "wt") as f:
             for line in sorted_vals:
                 f.write(line)
                 f.write("\n")
@@ -862,30 +899,36 @@ def compare_pipelines(input_dct: dict,
         else:
             organize = post180_organize_correlations
 
-        corr_map_dict = organize(all_corr_dct["concordance"], "concordance",
-                                 quick=input_dct['settings']['quick'])
+        corr_map_dict = organize(
+            all_corr_dct["concordance"],
+            "concordance",
+            quick=input_dct["settings"]["quick"],
+        )
         corr_map_dict["pipeline_names"] = input_dct["pipelines"].keys()
-    
-        pearson_map_dict = organize(all_corr_dct["pearson"], "pearson",
-                                    quick=input_dct['settings']['quick'])
+
+        pearson_map_dict = organize(
+            all_corr_dct["pearson"], "pearson", quick=input_dct["settings"]["quick"]
+        )
         pearson_map_dict["pipeline_names"] = input_dct["pipelines"].keys()
         dct = {}
         corr_map = quick_summary(dct, corr_map_dict, output_dir)
         pearson_map = quick_summary(dct, pearson_map_dict, output_dir)
 
-        if all_corr_dct['sub_optimal']:
-            write_yml_file(all_corr_dct['sub_optimal'], os.path.join(output_dir, "sub_optimal.yml"))
+        if all_corr_dct["sub_optimal"]:
+            write_yml_file(
+                all_corr_dct["sub_optimal"], os.path.join(output_dir, "sub_optimal.yml")
+            )
 
-        #for corr_group_name in corr_map_dict["correlations"].keys():
+        # for corr_group_name in corr_map_dict["correlations"].keys():
         #    corr_group = corr_map_dict["correlations"][corr_group_name]
         #    create_boxplot(corr_group, corr_group_name,
         #                   corr_map_dict["pipeline_names"], output_dir)
 
-        #for corr_group_name in pearson_map_dict["correlations"].keys():
+        # for corr_group_name in pearson_map_dict["correlations"].keys():
         #   corr_group = pearson_map_dict["correlations"][corr_group_name]
         #    create_boxplot(corr_group, corr_group_name,
         #                   pearson_map_dict["pipeline_names"], output_dir)
-        return(corr_map, pearson_map)
+        return (corr_map, pearson_map)
 
 
 def main() -> tuple:
@@ -895,12 +938,11 @@ def main() -> tuple:
     • Check for already completed stuff (pickles)
     """
     parser = argparse.ArgumentParser()
-    parser.add_argument("input_yaml", type=str, 
-                        help="file path of the script's input YAML")
-    parser.add_argument("--data_source", type=str, 
-                        help="Which site data comes from")
-    parser.add_argument("--branch", type=str, 
-                        help="Branch name")
+    parser.add_argument(
+        "input_yaml", type=str, help="file path of the script's input YAML"
+    )
+    parser.add_argument("--data_source", type=str, help="Which site data comes from")
+    parser.add_argument("--branch", type=str, help="Branch name")
     args = parser.parse_args()
     data_source = args.data_source
     branch = args.branch
@@ -910,27 +952,29 @@ def main() -> tuple:
 
     # check for already completed stuff (pickles)
     output_dir = os.path.join(
-        os.getcwd(), f"correlations_{input_dct['settings']['run_name']}")
+        os.getcwd(), f"correlations_{input_dct['settings']['run_name']}"
+    )
     pickle_dir = os.path.join(output_dir, "pickles")
 
     if not os.path.exists(pickle_dir):
         try:
             os.makedirs(pickle_dir)
         except:
-            err = "\n\n[!] Could not create the output directory for the " \
-                  "correlations. Do you have write permissions?\nAttempted " \
-                  f"output directory: {output_dir}\n\n"
+            err = (
+                "\n\n[!] Could not create the output directory for the "
+                "correlations. Do you have write permissions?\nAttempted "
+                f"output directory: {output_dir}\n\n"
+            )
             raise Exception(err)
 
-    input_dct['settings'].update({'output_dir': output_dir,
-                                  'pickle_dir': pickle_dir})
+    input_dct["settings"].update({"output_dir": output_dir, "pickle_dir": pickle_dir})
 
-    corr_map, pearson_map = compare_pipelines(input_dct, dir_type='output_dir')
+    corr_map, pearson_map = compare_pipelines(input_dct, dir_type="output_dir")
     corr_map_keys = list(corr_map.keys())
     all_keys = []
     for key in corr_map_keys:
         keys = list(corr_map[key])
-        for i in keys: 
+        for i in keys:
             all_keys.append(i)
     return all_keys, data_source, branch
 
diff --git a/create_yml.py b/create_yml.py
index a7238ea..fb4a3ed 100644
--- a/create_yml.py
+++ b/create_yml.py
@@ -1,29 +1,48 @@
-from utils.parse_yaml import cpac_yaml
-
 import os
+
 import click
 
-@click.command()
-@click.option('--pipeline1', required=True, type=str, help='Path to output directory from CPAC run '
-              'to correlate against pipeline2')
-@click.option('--pipeline2', required=True, type=str, help='Path to output directory from CPAC run '
-              'to correlate against pipeline1')
-@click.option('--workspace', type=str, help = 'directory to save correlations')
-@click.option('--branch', type=str, help = 'branch name')
-@click.option('--data_source', type=str, help = 'Data site')
+from utils.parse_yaml import cpac_yaml
 
 
+@click.command()
+@click.option(
+    "--pipeline1",
+    required=True,
+    type=str,
+    help="Path to output directory from CPAC run " "to correlate against pipeline2",
+)
+@click.option(
+    "--pipeline2",
+    required=True,
+    type=str,
+    help="Path to output directory from CPAC run " "to correlate against pipeline1",
+)
+@click.option("--workspace", type=str, help="directory to save correlations")
+@click.option("--branch", type=str, help="branch name")
+@click.option("--data_source", type=str, help="Data site")
 def main(pipeline1, pipeline2, workspace, branch, data_source):
     """
     Correlate outputs from regression run again another C-PAC version.
     """
 
-    git_home = os.path.normpath(os.path.dirname(os.path.abspath(__file__)) + os.sep + os.pardir)
-    run_name = f'{branch}_{data_source}'
-
-    cpac_yaml(pipeline1, pipeline2, f'{workspace}/correlations', run_name, 1, branch, data_source)
+    git_home = os.path.normpath(
+        os.path.dirname(os.path.abspath(__file__)) + os.sep + os.pardir
+    )
+    run_name = f"{branch}_{data_source}"
+
+    cpac_yaml(
+        pipeline1,
+        pipeline2,
+        f"{workspace}/correlations",
+        run_name,
+        1,
+        branch,
+        data_source,
+    )
 
     return
 
+
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()
diff --git a/utils/html_script.py b/utils/html_script.py
index 7a23761..c0aff5c 100644
--- a/utils/html_script.py
+++ b/utils/html_script.py
@@ -1,4 +1,3 @@
-
 def dataset(name, data_source, value):
     dataset = f"""
         {{
@@ -9,10 +8,11 @@ def dataset(name, data_source, value):
             """
     return dataset
 
+
 def body(all_keys, data_source):
-    data_body = ''
+    data_body = ""
     for key in all_keys:
-        name_value = key.split(': ')
+        name_value = key.split(": ")
         name = name_value[0]
         value = name_value[1]
         data_body += dataset(name, data_source, value)
@@ -23,9 +23,9 @@ def body(all_keys, data_source):
             """
     return data_body
 
+
 def write_html(data_body):
-    script = \
-    f"""
+    script = f"""
     <html>
     <head>
         <title>Correlations</title>
@@ -92,15 +92,16 @@ def write_html(data_body):
     </html>
     """
 
-    return(script)
+    return script
+
 
 def setup_browser(html_template):
     import tempfile
     import webbrowser
 
-    with tempfile.NamedTemporaryFile(suffix='.html', delete=False) as temp_file:
-        temp_file.write(html_template.encode('utf-8'))
-        filename = 'file:///'+ temp_file.name
+    with tempfile.NamedTemporaryFile(suffix=".html", delete=False) as temp_file:
+        temp_file.write(html_template.encode("utf-8"))
+        filename = "file:///" + temp_file.name
         webbrowser.open_new_tab(filename)
-    
-    return
\ No newline at end of file
+
+    return
diff --git a/utils/parse_yaml.py b/utils/parse_yaml.py
index f288dee..bc1932e 100644
--- a/utils/parse_yaml.py
+++ b/utils/parse_yaml.py
@@ -1,59 +1,69 @@
 import os
+
 import yaml
 
+
 def get_dir(paths):
     if not paths:
         directory = None
     else:
         for root, dirs, files in os.walk(paths):
             for dir in dirs:
-                if 'pipeline_' in dir:
+                if "pipeline_" in dir:
                     directory = os.path.join(root, dir)
     return directory
 
-def write_pipeline_yaml(output_dir=None, working_dir=None, log_dir=None, \
-                        pipeline_config=None, pipeline_name=None):
 
+def write_pipeline_yaml(
+    output_dir=None,
+    working_dir=None,
+    log_dir=None,
+    pipeline_config=None,
+    pipeline_name=None,
+):
     pipeline = {
         pipeline_name: {
             "output_dir": output_dir,
             "work_dir": working_dir,
             "log_dir": log_dir,
             "pipe_config": pipeline_config,
-            "replacements": None
+            "replacements": None,
         }
     }
 
     return pipeline
 
+
 def parse_yaml(directory=None, pipeline_name=None):
-    subdirs = ['log', 'working', 'output']
+    subdirs = ["log", "working", "output"]
     paths = {}
 
     for subdir in subdirs:
         if os.path.isdir(os.path.join(directory, subdir)):
-            paths[f"{subdir}_dir"] = (os.path.join(directory, subdir))
+            paths[f"{subdir}_dir"] = os.path.join(directory, subdir)
         else:
             paths[f"{subdir}_dir"] = None
 
-    log_dir = get_dir(paths['log_dir'])
+    log_dir = get_dir(paths["log_dir"])
 
-    for root, dirs, files in os.walk(paths['log_dir']):
+    for root, dirs, files in os.walk(paths["log_dir"]):
         for file in files:
             if file.endswith("Z.yml"):
                 pipeline_config = os.path.join(root, file)
 
-    working_dir = get_dir(paths['working_dir'])
-    output_dir = get_dir(paths['output_dir'])
+    working_dir = get_dir(paths["working_dir"])
+    output_dir = get_dir(paths["output_dir"])
 
-    pipeline_dict = write_pipeline_yaml(output_dir, working_dir, log_dir, \
-                        pipeline_config, pipeline_name)
+    pipeline_dict = write_pipeline_yaml(
+        output_dir, working_dir, log_dir, pipeline_config, pipeline_name
+    )
 
     return pipeline_dict
 
-def write_yaml(pipeline_1=None, pipeline_2=None, correlations_dir=None, \
-                run_name=None, n_cpus=None):
 
+def write_yaml(
+    pipeline_1=None, pipeline_2=None, correlations_dir=None, run_name=None, n_cpus=None
+):
     yaml_dict = {}
     yaml_dict["settings"] = {
         "n_cpus": n_cpus,
@@ -61,25 +71,25 @@ def write_yaml(pipeline_1=None, pipeline_2=None, correlations_dir=None, \
         "run_name": run_name,
         "s3_creds": None,
         "quick": False,
-        "verbose": False
+        "verbose": False,
     }
 
-    yaml_dict["pipelines"] = {
-            **pipeline_1,
-            **pipeline_2
-        }
+    yaml_dict["pipelines"] = {**pipeline_1, **pipeline_2}
 
     return yaml_dict
 
-def cpac_yaml(pipeline1, pipeline2, correlations_dir, run_name, n_cpus, branch, data_source):
-    
-    pipeline_1 = parse_yaml(pipeline1, 'pipeline_1')
-    pipeline_2 = parse_yaml(pipeline2, 'pipeline_2')
 
-    yaml_contents = write_yaml(pipeline_1, pipeline_2, correlations_dir, 
-                               run_name, n_cpus)
+def cpac_yaml(
+    pipeline1, pipeline2, correlations_dir, run_name, n_cpus, branch, data_source
+):
+    pipeline_1 = parse_yaml(pipeline1, "pipeline_1")
+    pipeline_2 = parse_yaml(pipeline2, "pipeline_2")
+
+    yaml_contents = write_yaml(
+        pipeline_1, pipeline_2, correlations_dir, run_name, n_cpus
+    )
 
-    with open(f'{branch}_{data_source}.yml', 'w') as file:
+    with open(f"{branch}_{data_source}.yml", "w") as file:
         yaml.dump(yaml_contents, file, default_flow_style=False, sort_keys=False)
 
-    return
\ No newline at end of file
+    return

From 0cdde222426dc302a2be93447bfef0aa69a4f22e Mon Sep 17 00:00:00 2001
From: Jon Clucas <jon.clucas@childmind.org>
Date: Tue, 14 Nov 2023 17:52:08 -0500
Subject: [PATCH 06/29] :truck: SSOT cpac_correlations

Ref https://github.com/FCP-INDI/CPAC_regtest_pack/pull/7
---
 calculate_correlations.py | 983 +-------------------------------------
 requirements.txt          |   1 +
 2 files changed, 5 insertions(+), 979 deletions(-)
 create mode 100644 requirements.txt

diff --git a/calculate_correlations.py b/calculate_correlations.py
index 7edbdfc..103f5ed 100644
--- a/calculate_correlations.py
+++ b/calculate_correlations.py
@@ -1,986 +1,11 @@
 #!/usr/bin/env python
-import argparse
-import itertools
-import math
-import os
-import pickle
-import subprocess
-from collections.abc import Generator
-from multiprocessing import Pool
-from pathlib import Path
-from typing import NamedTuple, Optional, Tuple, Union
-
-import nibabel as nb
-import numpy as np
-import pandas as pd
-import yaml
+"""Calculate correlations and write them to D3-friendly file"""
+from cpac_correlations import cpac_correlations
 
 from utils.html_script import body
 
-Axis = Union[int, Tuple[int, ...]]
-
-
-class CorrValue(NamedTuple):
-    """Correlation values"""
-
-    concor: np.ndarray
-    pearson: np.ndarray
-
-
-def read_yml_file(yml_filepath):
-    with open(yml_filepath, "r") as f:
-        yml_dict = yaml.safe_load(f)
-
-    return yml_dict
-
-
-def write_yml_file(yml_dict, out_filepath):
-    with open(out_filepath, "wt") as f:
-        yaml.safe_dump(yml_dict, f)
-
-
-def read_pickle(pickle_file):
-    with open(pickle_file, "rb") as f:
-        dct = pickle.load(f)
-    return dct
-
-
-def write_pickle(dct, out_filepath):
-    with open(out_filepath, "wb") as f:
-        pickle.dump(dct, f, protocol=pickle.HIGHEST_PROTOCOL)
-
-
-def read_txt_file(txt_file):
-    with open(txt_file, "r") as f:
-        strings = f.read().splitlines()
-    return strings
-
-
-def write_txt_file(text_lines, out_filepath):
-    with open(out_filepath, "wt") as f:
-        for line in text_lines:
-            f.write("{0}\n".format(line))
-
-
-def write_dct(dct=None, text_lines=None, outname=None):
-    if not dct:
-        dct = {outname: text_lines}
-    else:
-        dct.update({outname: text_lines})
-    return dct
-
-
-def gather_local_filepaths(output_folder_path: str) -> list[str]:
-    """Given a local path, return relevant paths within that directory"""
-    filepaths = []
-
-    print("Gathering file paths from {0}\n".format(output_folder_path))
-    for root, _dirs, files in os.walk(output_folder_path):
-        # loops through every file in the directory
-        for filename in files:
-            # checks if the file is a nifti (.nii.gz)
-            if (
-                ".nii" in filename
-                or ".csv" in filename
-                or ".txt" in filename
-                or ".1D" in filename
-                or ".tsv" in filename
-            ):
-                filepaths.append(os.path.join(root, filename))
-
-    if len(filepaths) == 0:
-        raise FileNotFoundError(
-            "\n\n[!] No filepaths were found given the output folder!\n\n"
-        )
-
-    return filepaths
-
-
-class SummaryStats:
-    def __init__(
-        self, array: np.ndarray, axis: Optional[Union[int, str]] = None
-    ) -> None:
-        self.mean = np.mean(array, axis=axis, keepdims=True)
-        self.var = np.var(array, axis=axis, keepdims=True)
-        self.std = np.sqrt(self.var)
-        self.norm = (array - self.mean) / self.std
-
-
-def batch_correlate(
-    x: np.ndarray, y: np.ndarray, axis: Optional[Axis] = None
-) -> CorrValue:
-    """
-    Compute a batch of concordance and Pearson correlation coefficients between
-    x and y along an axis (or axes).
-
-    References:
-        https://en.wikipedia.org/wiki/Concordance_correlation_coefficient
-    """
-    # summary stats
-    try:
-        summary_stats = {"x": SummaryStats(x), "y": SummaryStats(y)}
-    except ZeroDivisionError:
-        return CorrValue(np.nan, np.nan)
-
-    # Correlation coefficients
-    pearson = np.mean(
-        summary_stats["x"].norm * summary_stats["y"].norm, axis=axis, keepdims=True
-    )
-    concor = (
-        2
-        * pearson
-        * summary_stats["x"].std
-        * summary_stats["y"].std
-        / (
-            summary_stats["x"].var
-            + summary_stats["y"].var
-            + (summary_stats["x"].mean - summary_stats["y"].mean) ** 2
-        )
-    )
-    # Squeeze reduced singleton dimensions
-    if axis is not None:
-        concor = np.squeeze(concor, axis=axis)
-        pearson = np.squeeze(pearson, axis=axis)
-    return CorrValue(concor, pearson)
-
-
-def determine_indices(df: pd.DataFrame) -> list:
-    """Determine indices of str-type columns in a DataFrame"""
-    return [
-        i
-        for i, val in enumerate(df.applymap(lambda _: isinstance(_, str)).values[0])
-        if val
-    ]
-
-
-def correlate_text_based(txts: Union[list, tuple]) -> Generator:
-    delimiters = tuple(delimiter_from_filepath(path) for path in txts)
-    # TODO: why do we drop columns containing na?
-    initial_load = [
-        pd.read_csv(txt, delimiter=delimiters[i], comment="#").dropna(axis=1)
-        for i, txt in enumerate(txts)
-    ]
-    for i, df in enumerate(initial_load):
-        # if we read a value-row as a header, fix that
-        try:
-            df.columns.astype(float)
-            initial_load[i] = pd.read_csv(
-                txts[i], delimiter=delimiters[i], comment="#", header=None
-            ).dropna(axis=1)
-        except ValueError:
-            pass
-    # assume string columns are indices and not values to correlate
-    indices = []
-    for i in range(len(initial_load)):
-        indices.append(
-            np.where(df.apply(lambda _: _.dtype == np.dtypes.ObjectDType))[0]
-        )
-    oned = []
-    for i, index in enumerate(indices):
-        if index.shape[0]:
-            oned.append(
-                pd.read_csv(
-                    txts[i], delimiter=delimiters[i], comment="#", index_col=indices[i]
-                )
-                .dropna(axis=1)
-                .values
-            )
-        else:
-            oned.append(initial_load[i].values)
-    return (np.nanmean(measure) for measure in batch_correlate(*oned, axis=0))
-
-
-def create_unique_file_dict(
-    filepaths: list[str],
-    output_folder_path: str,
-    replacements: Optional[list[str]] = None,
-) -> dict[str, dict[tuple, str]]:
-    """
-    Parameters
-    ----------
-    filepaths : list of str
-      list of output filepaths from a CPAC output directory
-    output_folder_path : str
-      the CPAC output directory the filepaths are from
-    replacements : list of str, optional
-      a list of strings to be removed from the filepaths should
-      they occur
-
-    Returns
-    -------
-    files_dict : dict
-        a dictionary of dictionaries, format:
-        files_dict["centrality"] =
-            {("centrality", midpath, nums): <filepath>, ..}
-    """
-
-    files_dict = {}
-
-    for filepath in filepaths:
-        if "_stack" in filepath:
-            continue
-
-        if ("itk" in filepath) or ("xfm" in filepath) or ("montage" in filepath):
-            continue
-        path_changes = []
-        real_filepath = filepath
-        if replacements:
-            for word_couple in replacements:
-                if "," not in word_couple:
-                    raise SyntaxError(
-                        "\n\n[!] In the replacements text file, the old "
-                        "substring and its replacement must be separated "
-                        "by a comma.\n\n"
-                    )
-                word, new = word_couple.split(",")
-                if word in filepath:
-                    path_changes.append(f"old: {filepath}")
-                    filepath = filepath.replace(word, new)
-                    path_changes.append(f"new: {filepath}")
-        if path_changes:
-            with open(os.path.join(os.getcwd(), "path_changes.txt"), "wt") as f:
-                for path in path_changes:
-                    f.write(path)
-                    f.write("\n")
-
-        filename = filepath.split("/")[-1]
-
-        # name of the directory the file is in
-        folder = filepath.split("/")[-2]
-
-        midpath = filepath.replace(output_folder_path, "")
-        midpath = midpath.replace(filename, "")
-
-        pre180 = False
-        if pre180:
-            # name of the output type/derivative
-            try:
-                category = midpath.split("/")[2]
-            except IndexError as e:
-                continue
-
-            if "eigenvector" in filepath:
-                category = category + ": eigenvector"
-            if "degree" in filepath:
-                category = category + ": degree"
-            if "lfcd" in filepath:
-                category = category + ": lfcd"
-        else:
-            tags = []
-            category = filename
-            category = category.rstrip(".gz").rstrip(".nii")
-
-            excl_tags = ["sub-", "ses-", "task-", "run-", "acq-"]
-
-            # len(filetag) == 1 is temporary for broken/missing ses-* tag
-            for filetag in filename.split("_"):
-                for exctag in excl_tags:
-                    if exctag in filetag or len(filetag) == 1:
-                        category = category.replace(f"{filetag}_", "")
-
-        # this provides a way to safely identify the specific file
-        # without relying on a full string of the filename (because
-        # this can change between versions depending on what any given
-        # processing tool appends to output file names)
-        nums_in_folder = [int(s) for s in folder if s.isdigit()]
-        nums_in_filename = [int(s) for s in filename if s.isdigit()]
-
-        file_nums = ""
-
-        for num in nums_in_folder:
-            file_nums = file_nums + str(num)
-
-        for num in nums_in_filename:
-            file_nums = file_nums + str(num)
-
-        # load these settings into the tuple so that the file can be
-        # identified without relying on its full path (as it would be
-        # impossible to match files from two regression tests just
-        # based on their filepaths)
-        file_tuple = (category, midpath, file_nums)
-
-        temp_dict = {}
-        temp_dict[file_tuple] = [real_filepath]
-
-        if category not in files_dict.keys():
-            files_dict[category] = {}
-
-        files_dict[category].update(temp_dict)
-
-    return files_dict
-
-
-def gather_all_files(
-    input_dct: dict, pickle_dir: str, source: str = "output_dir"
-) -> tuple[dict, dict]:
-    """
-    Given an input dictionary, a pickle directory, and (optionally) a source,
-    returns a pair of dicts
-    """
-    file_dct_list = [{}, {}]
-
-    for index, (key, pipe_dct) in enumerate(input_dct["pipelines"].items()):
-        pipe_outdir = pipe_dct[source]
-
-        if input_dct["settings"]["s3_creds"]:
-            if not "s3://" in pipe_outdir:
-                err = (
-                    "\n\n[!] If pulling output files from an S3 bucket, the "
-                    "output folder path must have the s3:// prefix.\n\n"
-                )
-                raise Exception(err)
-        else:
-            pipe_outdir = os.path.abspath(pipe_outdir).rstrip("/")
-
-        pipeline_name = pipe_outdir.split("/")[-1]
-
-        # if source == "output_dir" and "pipeline_" not in pipeline_name:
-        #    err = "\n\n[!] Your pipeline output directory has to be a specific " \
-        #          "one that has the 'pipeline_' prefix.\n\n(Not the main output " \
-        #          "directory that contains all of the 'pipeline_X' subdirectories," \
-        #          "and not a specific participant's output subdirectory either.)\n"
-        #    raise Exception(err)
-
-        output_pkl = os.path.join(pickle_dir, f"{key}_{source}_paths.p")
-
-        if os.path.exists(output_pkl):
-            print(
-                f"Found output list pickle for {key}, skipping output file"
-                "path parsing.."
-            )
-            pipeline_files_dct = read_pickle(output_pkl)
-        else:
-            pipeline_files_list = gather_local_filepaths(pipe_outdir)
-            pipeline_files_dct = create_unique_file_dict(
-                pipeline_files_list, pipe_outdir, pipe_dct["replacements"]
-            )
-            write_pickle(pipeline_files_dct, output_pkl)
-
-        file_dct_list[index] = pipeline_files_dct
-
-    return tuple(file_dct_list)
-
-
-def match_filepaths(
-    old_files_dict: dict[str, dict[tuple, str]],
-    new_files_dict: dict[str, dict[tuple, str]],
-) -> dict[str, dict[tuple,]]:
-    """Returns a dictionary mapping each filepath from the first C-PAC
-    run to the second one, matched to derivative, strategy, and scan.
-
-    Parameters
-    ----------
-    old_files_dict, new_files_dict : dict
-        each key is a derivative name, and each value is another
-        dictionary keying (derivative, mid-path, last digit in path)
-        tuples to a list containing the full filepath described by
-        the tuple that is the key
-
-    Returns
-    -------
-    matched_path_dict : dict
-        same as the input dictionaries, except the list in the
-        sub-dictionary value has both file paths that are matched
-    """
-
-    # file path matching
-    matched_path_dict = {}
-    missing_in_old = []
-    missing_in_new = []
-
-    for key in new_files_dict:
-        # for types of derivative...
-        if key in old_files_dict.keys():
-            for file_id in new_files_dict[key]:
-                if file_id in old_files_dict[key].keys():
-                    if key not in matched_path_dict.keys():
-                        matched_path_dict[key] = {}
-
-                    matched_path_dict[key][file_id] = (
-                        old_files_dict[key][file_id] + new_files_dict[key][file_id]
-                    )
-
-                else:
-                    missing_in_old.append(file_id)  # new_files_dict[key][file_id])
-        else:
-            missing_in_old.append(new_files_dict[key])
-
-    # find out what is in the last version's outputs that isn't in the new
-    # version's outputs
-    for key in old_files_dict:
-        if new_files_dict.get(key) != None:
-            missing_in_new.append(old_files_dict[key])
-
-    if len(matched_path_dict) == 0:
-        err = (
-            "\n\n[!] No output paths were successfully matched between "
-            "the two CPAC output directories!\n\n"
-        )
-        raise Exception(err)
-
-    matched_files_dct = {
-        "matched": matched_path_dict,
-        "missing_old": missing_in_old,
-        "missing_new": missing_in_new,
-    }
-
-    return matched_files_dct
-
-
-def delimiter_from_filepath(filepath: Union[Path, str]) -> Optional[str]:
-    """
-    Given a filepath, return expected value-separator delimiter
-    """
-    if filepath.endswith(".tsv"):
-        return "\t"
-    if filepath.endswith(".csv"):
-        return ","
-    with open(filepath, "r", encoding="utf8") as _f:
-        first_line = "#"
-        while first_line.lstrip().startswith("#"):
-            first_line = _f.readline()
-        for delimiter in ["\t", ",", " "]:
-            if delimiter in first_line:
-                if delimiter == " ":
-                    return r"\s+"
-                return delimiter
-    return None
-
-
-def calculate_correlation(args_tuple):
-    category = args_tuple[0]
-    old_path = args_tuple[1]
-    new_path = args_tuple[2]
-    local_dir = args_tuple[3]
-    s3_creds = args_tuple[4]
-    verbose = args_tuple[5]
-
-    if verbose:
-        print("Calculating correlation between {0} and {1}".format(old_path, new_path))
-
-    corr_tuple = None
-
-    if s3_creds:
-        try:
-            # full filepath with filename
-            old_local_file = os.path.join(
-                local_dir, "s3_input_files", old_path.replace("s3://", "")
-            )
-            # directory without filename
-            old_local_path = old_local_file.replace(old_path.split("/")[-1], "")
-
-            new_local_file = os.path.join(
-                local_dir, "s3_input_files", new_path.replace("s3://", "")
-            )
-            new_local_path = new_local_file.replace(new_path.split("/")[-1], "")
-
-            if not os.path.exists(old_local_path):
-                os.makedirs(old_local_path)
-            if not os.path.exists(new_local_path):
-                os.makedirs(new_local_path)
-
-        except Exception as e:
-            err = (
-                "\n\nLocals: {0}\n\n[!] Could not create the local S3 "
-                "download directory.\n\nError details: {1}\n\n".format((locals(), e))
-            )
-            raise Exception(e)
-
-        try:
-            old_path = old_local_file
-        except Exception as e:
-            err = (
-                "\n\nLocals: {0}\n\n[!] Could not download the files from "
-                "the S3 bucket. \nS3 filepath: {1}\nLocal destination: {2}"
-                "\nS3 creds: {3}\n\nError details: {4}\n\n".format(
-                    locals(), old_path, old_local_path, s3_creds, e
-                )
-            )
-            raise Exception(e)
-
-        try:
-            new_path = new_local_file
-        except Exception as e:
-            err = (
-                "\n\nLocals: {0}\n\n[!] Could not download the files from "
-                "the S3 bucket. \nS3 filepath: {1}\nLocal destination: {2}"
-                "\nS3 creds: {3}\n\nError details: {4}\n\n".format(
-                    locals(), new_path, new_local_path, s3_creds, e
-                )
-            )
-            raise Exception(e)
-
-    ## nibabel to pull the data from the re-assembled file paths
-    if os.path.exists(old_path) and os.path.exists(new_path):
-        if (
-            (".csv" in old_path and ".csv" in new_path)
-            or (".txt" in old_path and ".txt" in new_path)
-            or (".1D" in old_path and ".1D" in new_path)
-            or (".tsv" in old_path and ".tsv" in new_path)
-        ):
-            try:
-                concor, pearson = correlate_text_based((old_path, new_path))
-            except Exception as e:
-                return category, e, (old_path, new_path)
-
-            if concor > 0.980:
-                corr_tuple = (category, [concor], [pearson])
-            else:
-                corr_tuple = (category, [concor], [pearson], (old_path, new_path))
-            if verbose:
-                print("Success - {0}".format(str(concor)))
-
-            # except Exception as e:
-            #     corr_tuple = ("file reading problem: {0}".format(e),
-            #                   old_path, new_path)
-            #     if verbose:
-            #         print(str(corr_tuple))
-
-            return corr_tuple
-
-        else:
-            # try:
-            old_file_img = nb.load(old_path)
-            old_file_hdr = old_file_img.header
-            new_file_img = nb.load(new_path)
-            new_file_hdr = new_file_img.header
-
-            old_file_dims = old_file_hdr.get_zooms()
-            new_file_dims = new_file_hdr.get_zooms()
-
-            data_1 = nb.load(old_path).get_fdata()
-            data_2 = nb.load(new_path).get_fdata()
-
-            # except Exception as e:
-            #     corr_tuple = ("file reading problem: {0}".format(e),
-            #                   old_path, new_path)
-            #     if verbose:
-            #         print(str(corr_tuple))
-            #     return corr_tuple
-
-        ## set up and run the Pearson correlation and concordance correlation
-        if data_1.flatten().shape == data_2.flatten().shape:
-            try:
-                if len(old_file_dims) > 3:
-                    axis = tuple(range(3, len(old_file_dims)))
-                    concor, pearson = batch_correlate(data_1, data_2, axis=axis)
-                    concor = np.nanmean(concor)
-                    pearson = np.nanmean(pearson)
-                else:
-                    concor, pearson = batch_correlate(data_1, data_2)
-            except Exception as e:
-                corr_tuple = ("correlating problem: {0}".format(e), old_path, new_path)
-                if verbose:
-                    print(str(corr_tuple))
-                return corr_tuple
-            if concor > 0.980:
-                corr_tuple = (category, [concor], [pearson])
-            else:
-                corr_tuple = (category, [concor], [pearson], (old_path, new_path))
-            if verbose:
-                print("Success - {0}".format(str(concor)))
-        else:
-            corr_tuple = ("different shape", old_path, new_path)
-            if verbose:
-                print(str(corr_tuple))
-
-    else:
-        if not os.path.exists(old_path):
-            corr_tuple = ("file doesn't exist", [old_path], None)
-            if verbose:
-                print(str(corr_tuple))
-        if not os.path.exists(new_path):
-            if not corr_tuple:
-                corr_tuple = ("file doesn't exist", [new_path], None)
-                if verbose:
-                    print(str(corr_tuple))
-            else:
-                corr_tuple = ("file doesn't exist", old_path, new_path)
-                if verbose:
-                    print(str(corr_tuple))
-
-    return corr_tuple
-
-
-def run_correlations(
-    matched_dct, input_dct, source="output_dir", quick=False, verbose=False
-):
-    all_corr_dct = {"pearson": {}, "concordance": {}, "sub_optimal": {}}
-
-    args_list = []
-
-    quick_list = [
-        "anatomical_brain",
-        "anatomical_csf_mask",
-        "anatomical_gm_mask",
-        "anatomical_wm_mask",
-        "anatomical_to_standard",
-        "functional_preprocessed",
-        "functional_brain_mask",
-        "mean_functional_in_anat",
-        "functional_nuisance_residuals",
-        "functional_nuisance_regressors",
-        "functional_to_standard",
-        "roi_timeseries",
-    ]
-
-    matched_path_dct = matched_dct["matched"]
-    output_dir = input_dct["settings"]["correlations_dir"]
-    s3_creds = input_dct["settings"]["s3_creds"]
-
-    for category in matched_path_dct.keys():
-        if quick:
-            if category not in quick_list:
-                continue
-
-        for file_id in matched_path_dct[category].keys():
-            old_path = matched_path_dct[category][file_id][0]
-            new_path = matched_path_dct[category][file_id][1]
-
-            if source == "work_dir":
-                args_list.append(
-                    (file_id, old_path, new_path, output_dir, s3_creds, verbose)
-                )
-            else:
-                args_list.append(
-                    (category, old_path, new_path, output_dir, s3_creds, verbose)
-                )
-
-    print("\nNumber of correlations to calculate: {0}\n".format(len(args_list)))
-
-    print("Running correlations...")
-    p = Pool(input_dct["settings"]["n_cpus"])
-    corr_tuple_list = p.map(calculate_correlation, args_list)
-    p.close()
-    p.join()
-
-    print("\nCorrelations of the {0} are done.\n".format(source))
-
-    failures = []
-
-    for corr_tuple in corr_tuple_list:
-        if not corr_tuple:
-            continue
-        if isinstance(corr_tuple[1], Exception):
-            failures.append((corr_tuple[0], corr_tuple[1], " | ".join(corr_tuple[2])))
-            continue
-        if corr_tuple[0] not in all_corr_dct["concordance"].keys():
-            all_corr_dct["concordance"][corr_tuple[0]] = []
-        if corr_tuple[0] not in all_corr_dct["pearson"].keys():
-            all_corr_dct["pearson"][corr_tuple[0]] = []
-        all_corr_dct["concordance"][corr_tuple[0]] += corr_tuple[1]
-        all_corr_dct["pearson"][corr_tuple[0]] += corr_tuple[2]
-
-        if len(corr_tuple) > 3:
-            if corr_tuple[0] not in all_corr_dct["sub_optimal"].keys():
-                all_corr_dct["sub_optimal"][corr_tuple[0]] = []
-            try:
-                all_corr_dct["sub_optimal"][corr_tuple[0]].append(
-                    "{0}:\n{1}\n{2}"
-                    "\n\n".format(corr_tuple[1][0], corr_tuple[3][0], corr_tuple[3][1])
-                )
-            except TypeError:
-                pass
-
-    return all_corr_dct, failures
-
-
-def post180_organize_correlations(concor_dct, corr_type="concordance", quick=False):
-    corr_map_dct = {"correlations": {}}
-    for key in concor_dct:
-        if "problem" in key:
-            continue
-        # shouldn't need this - FIX
-        rawkey = key.replace("acq-", "").replace("run-", "")
-        datatype = rawkey.split("_")[-1]
-
-        if datatype not in corr_map_dct["correlations"]:
-            corr_map_dct["correlations"][datatype] = {}
-        corr_map_dct["correlations"][datatype][rawkey] = concor_dct[key]
-
-    return corr_map_dct
-
-
-def organize_correlations(concor_dict, corr_type="concordance", quick=False):
-    # break up all of the correlations into groups - each group of derivatives
-    # will go into its own boxplot
-
-    regCorrMap = {}
-    native_outputs = {}
-    template_outputs = {}
-    timeseries = {}
-    functionals = {}
-
-    core = {}
-
-    corr_map_dict = {}
-    corr_map_dict["correlations"] = {}
-
-    derivs = ["alff", "dr_tempreg", "reho", "sca_roi", "timeseries", "ndmg"]
-    anats = ["anatomical", "seg"]
-    time_series = [
-        "functional_freq",
-        "nuisance_residuals",
-        "functional_preprocessed",
-        "functional_to_standard",
-        "ica_aroma_",
-        "motion_correct",
-        "slice_time",
-    ]
-    funcs = ["functional", "displacement"]
-
-    for key in concor_dict:
-        if quick:
-            core[key] = concor_dict[key]
-            continue
-
-        if "xfm" in key or "mixel" in key:
-            continue
-
-        if "centrality" in key or "vmhc" in key or "sca_tempreg" in key:
-            template_outputs[key] = concor_dict[key]
-            continue
-
-        for word in anats:
-            if word in key:
-                regCorrMap[key] = concor_dict[key]
-                continue
-
-        for word in derivs:
-            if word in key and "standard" not in key:
-                native_outputs[key] = concor_dict[key]
-                continue
-            elif word in key:
-                template_outputs[key] = concor_dict[key]
-                continue
-
-        for word in time_series:
-            if word in key and "mean" not in key and "mask" not in key:
-                timeseries[key] = concor_dict[key]
-                continue
-
-        for word in funcs:
-            if word in key:
-                functionals[key] = concor_dict[key]
-
-    if quick:
-        group = "{0}_core_outputs".format(corr_type)
-        if len(core.values()) > 0:
-            corr_map_dict["correlations"][group] = core
-        else:
-            print("No values in {0}".format(group))
-        return corr_map_dict
-
-    group = "{0}_registration_and_segmentation".format(corr_type)
-    if len(regCorrMap.values()) > 0:
-        corr_map_dict["correlations"][group] = regCorrMap
-    else:
-        print("No values in {0}".format(group))
-
-    group = "{0}_native_space_outputs".format(corr_type)
-    if len(native_outputs.values()) > 0:
-        corr_map_dict["correlations"][group] = native_outputs
-    else:
-        print("No values in {0}".format(group))
-
-    group = "{0}_template_space_outputs".format(corr_type)
-    if len(template_outputs.values()) > 0:
-        corr_map_dict["correlations"][group] = template_outputs
-    else:
-        print("No values in {0}".format(group))
-
-    group = "{0}_timeseries_outputs".format(corr_type)
-    if len(timeseries.values()) > 0:
-        corr_map_dict["correlations"][group] = timeseries
-    else:
-        print("No values in {0}".format(group))
-
-    group = "{0}_functional_outputs".format(corr_type)
-    if len(functionals.values()) > 0:
-        corr_map_dict["correlations"][group] = functionals
-    else:
-        print("No values in {0}".format(group))
-
-    return corr_map_dict
-
-
-def quick_summary(dct, corr_map_dct, output_dir) -> dict:
-    for corr_group in corr_map_dct["correlations"].keys():
-        cat_dct = {}
-        lines = []
-        for output_type, corr_vec in dict(
-            corr_map_dct["correlations"][corr_group]
-        ).items():
-            try:
-                corrmean = np.mean(np.asarray(corr_vec))
-            except TypeError:
-                continue
-            lines.append("{0}: {1}".format(output_type, corrmean))
-
-        dct = write_dct(dct, lines, output_type)
-    return dct
-
-
-def compare_pipelines(
-    input_dct: dict, dir_type: str = "output_dir"
-) -> tuple[dict, dict]:
-    """
-    Given an input dict containing keys 'settings', gather prreviously
-    generated pickles or all relevant output and working files
-
-    Returns
-    -------
-    corr_map : dict
-
-    pearson_map : dict
-    """
-    output_dir = input_dct["settings"]["output_dir"]
-    pickle_dir = input_dct["settings"]["pickle_dir"]
-
-    corrs_pkl = os.path.join(pickle_dir, f"{dir_type}_correlations.p")
-    failures_pkl = os.path.join(pickle_dir, f"{dir_type}_failures.p")
-    matched_pkl = os.path.join(pickle_dir, f"{dir_type}_matched_files.p")
-
-    all_corr_dct = None
-    if os.path.exists(corrs_pkl):
-        print(
-            f"\n\nFound the correlations pickle: {corrs_pkl}\n\n"
-            "Starting from there..\n"
-        )
-        all_corr_dct = read_pickle(corrs_pkl)
-    elif os.path.exists(matched_pkl):
-        print(
-            f"\n\nFound the matched filepaths pickle: {matched_pkl}\n\n"
-            "Starting from there..\n"
-        )
-        matched_dct = read_pickle(matched_pkl)
-
-    else:
-        # gather all relevant output and working files
-        outfiles1_dct, outfiles2_dct = gather_all_files(
-            input_dct, pickle_dir, source=dir_type
-        )
-        matched_dct = match_filepaths(outfiles1_dct, outfiles2_dct)
-        write_pickle(matched_dct, matched_pkl)
-
-    if not all_corr_dct:
-        all_corr_dct, failures = run_correlations(
-            matched_dct,
-            input_dct,
-            source=dir_type,
-            quick=input_dct["settings"]["quick"],
-            verbose=input_dct["settings"]["verbose"],
-        )
-        write_pickle(all_corr_dct, corrs_pkl)
-        write_pickle(failures, failures_pkl)
-
-    if dir_type == "work_dir":
-        sorted_vals = []
-        # sorted_keys = sorted(all_corr_dct, key=all_corr_dct.get)
-        for key in all_corr_dct.keys():  # sorted_keys:
-            if (
-                "file reading problem:" in key
-                or "different shape" in key
-                or "correlating problem" in key
-            ):
-                continue
-            else:
-                sorted_vals.append("{0}: {1}".format(all_corr_dct[key], key))
-        working_corrs_file = os.path.join(output_dir, "work_dir_correlations.txt")
-        with open(working_corrs_file, "wt") as f:
-            for line in sorted_vals:
-                f.write(line)
-                f.write("\n")
-
-    else:
-        pre180 = False
-        if pre180:
-            organize = organize_correlations
-        else:
-            organize = post180_organize_correlations
-
-        corr_map_dict = organize(
-            all_corr_dct["concordance"],
-            "concordance",
-            quick=input_dct["settings"]["quick"],
-        )
-        corr_map_dict["pipeline_names"] = input_dct["pipelines"].keys()
-
-        pearson_map_dict = organize(
-            all_corr_dct["pearson"], "pearson", quick=input_dct["settings"]["quick"]
-        )
-        pearson_map_dict["pipeline_names"] = input_dct["pipelines"].keys()
-        dct = {}
-        corr_map = quick_summary(dct, corr_map_dict, output_dir)
-        pearson_map = quick_summary(dct, pearson_map_dict, output_dir)
-
-        if all_corr_dct["sub_optimal"]:
-            write_yml_file(
-                all_corr_dct["sub_optimal"], os.path.join(output_dir, "sub_optimal.yml")
-            )
-
-        # for corr_group_name in corr_map_dict["correlations"].keys():
-        #    corr_group = corr_map_dict["correlations"][corr_group_name]
-        #    create_boxplot(corr_group, corr_group_name,
-        #                   corr_map_dict["pipeline_names"], output_dir)
-
-        # for corr_group_name in pearson_map_dict["correlations"].keys():
-        #   corr_group = pearson_map_dict["correlations"][corr_group_name]
-        #    create_boxplot(corr_group, corr_group_name,
-        #                   pearson_map_dict["pipeline_names"], output_dir)
-        return (corr_map, pearson_map)
-
-
-def main() -> tuple:
-    """
-    • Parse commandline arguments
-    • Read input YAML
-    • Check for already completed stuff (pickles)
-    """
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "input_yaml", type=str, help="file path of the script's input YAML"
-    )
-    parser.add_argument("--data_source", type=str, help="Which site data comes from")
-    parser.add_argument("--branch", type=str, help="Branch name")
-    args = parser.parse_args()
-    data_source = args.data_source
-    branch = args.branch
-
-    # get the input info
-    input_dct = read_yml_file(args.input_yaml)
-
-    # check for already completed stuff (pickles)
-    output_dir = os.path.join(
-        os.getcwd(), f"correlations_{input_dct['settings']['run_name']}"
-    )
-    pickle_dir = os.path.join(output_dir, "pickles")
-
-    if not os.path.exists(pickle_dir):
-        try:
-            os.makedirs(pickle_dir)
-        except:
-            err = (
-                "\n\n[!] Could not create the output directory for the "
-                "correlations. Do you have write permissions?\nAttempted "
-                f"output directory: {output_dir}\n\n"
-            )
-            raise Exception(err)
-
-    input_dct["settings"].update({"output_dir": output_dir, "pickle_dir": pickle_dir})
-
-    corr_map, pearson_map = compare_pipelines(input_dct, dir_type="output_dir")
-    corr_map_keys = list(corr_map.keys())
-    all_keys = []
-    for key in corr_map_keys:
-        keys = list(corr_map[key])
-        for i in keys:
-            all_keys.append(i)
-    return all_keys, data_source, branch
-
-
 if __name__ == "__main__":
-    all_keys, data_source, branch = main()
+    all_keys, data_source, branch = cpac_correlations()
     html_body = body(all_keys, data_source)
-    with open(f"{data_source}_{branch}.json", "w") as file:
+    with open(f"{data_source}_{branch}.json", "w", encoding="utf-8") as file:
         file.write(html_body)
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..9b3ef36
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1 @@
+git+https://github.com/FCP-INDI/CPAC_regtest_pack.git@package_correlations#subdirectory=cpac_correlations
\ No newline at end of file

From 164a8a656807afeac080e7da8b06964423668976 Mon Sep 17 00:00:00 2001
From: Jon Clucas <jon.clucas@childmind.org>
Date: Thu, 28 Dec 2023 22:43:16 -0500
Subject: [PATCH 07/29] :package: Package repo for portable installation

---
 .gitignore                                    |   1 +
 .pre-commit-config.yaml                       |  72 +++++++++++++---
 pyproject.toml                                |  81 ++++++++++++++++++
 requirements.txt                              |   1 -
 src/regression_dashboard/__init__.py          |   0
 .../build_d3_dashboard.py                     |   2 +-
 .../regression_dashboard/build_dashboard.py   |   3 +-
 .../calculate_correlations.py                 |   4 +-
 .../regression_dashboard/create_yml.py        |  15 +---
 src/regression_dashboard/generate_comment.py  |   3 +
 .../templates}/heatmap.html                   |   2 +-
 .../templates}/heatmap.js                     |   2 +-
 .../__pycache__/parse_yaml.cpython-311.pyc    | Bin
 .../utils}/html_script.py                     |  22 ++---
 .../regression_dashboard/utils}/parse_yaml.py |  18 ++--
 15 files changed, 170 insertions(+), 56 deletions(-)
 create mode 100644 .gitignore
 create mode 100644 pyproject.toml
 delete mode 100644 requirements.txt
 create mode 100644 src/regression_dashboard/__init__.py
 rename build_d3_dashboard.py => src/regression_dashboard/build_d3_dashboard.py (96%)
 rename build_dashboard.py => src/regression_dashboard/build_dashboard.py (93%)
 rename calculate_correlations.py => src/regression_dashboard/calculate_correlations.py (75%)
 rename create_yml.py => src/regression_dashboard/create_yml.py (71%)
 create mode 100644 src/regression_dashboard/generate_comment.py
 rename {templates => src/regression_dashboard/templates}/heatmap.html (96%)
 rename {templates => src/regression_dashboard/templates}/heatmap.js (99%)
 rename {utils => src/regression_dashboard/utils}/__pycache__/parse_yaml.cpython-311.pyc (100%)
 rename {utils => src/regression_dashboard/utils}/html_script.py (92%)
 rename {utils => src/regression_dashboard/utils}/parse_yaml.py (89%)

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..bee8a64
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+__pycache__
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 0df1a99..267a008 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,11 +1,63 @@
+fail_fast: false
+
 repos:
-  - repo: https://github.com/pycqa/isort
-    rev: 5.11.5
-    hooks:
-      - id: isort
-        files: "\\.(py)$"
-  - repo: https://github.com/psf/black
-    rev: 23.1.0
-    hooks:
-      - id: black
-        files: "\\.(py)$"
\ No newline at end of file
+- repo: https://github.com/python-poetry/poetry
+  rev: 1.7.0
+  hooks:
+  - id: poetry-check
+
+- repo: https://github.com/astral-sh/ruff-pre-commit
+  rev: v0.1.9
+  hooks:
+  - id: ruff
+    args: [--fix]
+  - id: ruff-format
+
+- repo: https://github.com/pre-commit/mirrors-mypy
+  rev: v1.8.0
+  hooks:
+  - id: mypy
+    args: [--ignore-missing-imports]
+    additional_dependencies:
+    - types-toml
+    - types-PyYAML
+- repo: https://github.com/macisamuele/language-formatters-pre-commit-hooks
+  rev: v2.12.0
+  hooks:
+  - id: pretty-format-yaml
+    args:
+    - --autofix
+    - --indent=2
+  - id: pretty-format-toml
+    exclude: ^poetry.lock$
+    args:
+    - --autofix
+    - --indent=2
+    - --no-sort
+
+- repo: https://github.com/pre-commit/pre-commit-hooks
+  rev: v4.5.0
+  hooks:
+  - id: check-case-conflict
+  - id: end-of-file-fixer
+  - id: mixed-line-ending
+    args:
+    - --fix=lf
+  - id: trailing-whitespace
+  - id: pretty-format-json
+    args:
+    - --autofix
+    - --indent=4
+    - --no-sort-keys
+  - id: check-merge-conflict
+  - id: check-yaml
+  - id: check-json
+  - id: check-toml
+
+- repo: local
+  hooks:
+  - id: yaml-file-extension
+    name: Prefer .yaml over .yml.
+    entry: YAML files must have .yaml extension.
+    language: fail
+    files: \.yml$
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..fbcdc4a
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,81 @@
+[tool.poetry]
+name = "cpac_regression_dashboard"
+version = "1.0.0"
+description = "Generate a dashboard for C-PAC regression tests"
+authors = [
+  "Amy Gutierrez <58920810+amygutierrez@users.noreply.github.com>",
+  "Jon Clucas <jon.clucas@childmind.org"
+]
+license = "LGPL-2.1"
+readme = "README.md"
+packages = [{include = "regression_dashboard", from = "src"}]
+
+[tool.poetry.dependencies]
+python = ">=3.9"
+cpac-correlations = {git = "https://github.com/FCP-INDI/CPAC_regtest_pack.git", rev = "package_correlations", subdirectory = "cpac_correlations"}
+
+[tool.poetry.group.dev.dependencies]
+pytest = "^7.4.3"
+mypy = "^1.7.1"
+pre-commit = "^3.5.0"
+pytest-cov = "^4.1.0"
+ruff = "^0.1.7"
+
+[tool.poetry.scripts]
+cpac_regsuite_create_yaml = 'regression_dashboard.create_yml:main'
+cpac_regsuite_create_yml = 'regression_dashboard.create_yml:main'
+
+[tool.pytest.ini_options]
+pythonpath = [
+  "src"
+]
+
+[tool.ruff]
+extend-select = ["A", "C4", "D", "G", "I", "ICN", "NPY", "PL", "RET", "RSE", "RUF", "Q", "W"]
+line-length = 88
+indent-width = 4
+src = ["src"]
+target-version = "py39"
+
+[tool.ruff.lint]
+select = ["ANN", "D", "E", "F", "I"]
+ignore = [
+  "ANN101",  # self should not be annotated.
+  "ANN102"  # cls should not be annotated.
+]
+fixable = ["ALL"]
+unfixable = []
+dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
+
+[tool.ruff.lint.isort]
+combine-as-imports = true
+force-sort-within-sections = true
+known-first-party = ["cpac-correlations"]
+no-lines-before = ["local-folder"]
+order-by-type = false
+
+[tool.ruff.lint.pylint]
+max-args = 10
+max-branches = 50
+max-returns = 12
+max-statements = 100
+
+[tool.ruff.lint.pyupgrade]
+# Until variants Python ≥ 3.10
+keep-runtime-typing = true
+
+[tool.ruff.format]
+quote-style = "double"
+indent-style = "space"
+skip-magic-trailing-comma = false
+line-ending = "auto"
+
+[tool.ruff.pydocstyle]
+convention = "numpy"
+
+[tool.ruff.per-file-ignores]
+"tests/**/*.py" = []
+
+[build-system]
+requires = ["poetry-core>=1.2.0"]
+build-backend = "poetry.core.masonry.api"
diff --git a/requirements.txt b/requirements.txt
deleted file mode 100644
index 9b3ef36..0000000
--- a/requirements.txt
+++ /dev/null
@@ -1 +0,0 @@
-git+https://github.com/FCP-INDI/CPAC_regtest_pack.git@package_correlations#subdirectory=cpac_correlations
\ No newline at end of file
diff --git a/src/regression_dashboard/__init__.py b/src/regression_dashboard/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/build_d3_dashboard.py b/src/regression_dashboard/build_d3_dashboard.py
similarity index 96%
rename from build_d3_dashboard.py
rename to src/regression_dashboard/build_d3_dashboard.py
index 72d1f24..0cc47c9 100644
--- a/build_d3_dashboard.py
+++ b/src/regression_dashboard/build_d3_dashboard.py
@@ -18,7 +18,7 @@ def main(json_file=None, branch=None):
         body = etree.HTML(_f.read())
     script_element = etree.SubElement(body[0], "script")
     script_element.set("defer", "defer")
-    script_element.set("src", f"./heatmap.js")
+    script_element.set("src", "./heatmap.js")
     with open("templates/heatmap.js", "r", encoding="utf-8") as _f:
         with open(f"{outdir}/heatmap.js", "w", encoding="utf=8") as _s:
             _s.write(
diff --git a/build_dashboard.py b/src/regression_dashboard/build_dashboard.py
similarity index 93%
rename from build_dashboard.py
rename to src/regression_dashboard/build_dashboard.py
index 5880235..f00da15 100644
--- a/build_dashboard.py
+++ b/src/regression_dashboard/build_dashboard.py
@@ -2,13 +2,14 @@
 
 import click
 
-from utils.html_script import setup_browser, write_html
+from .utils.html_script import setup_browser, write_html
 
 
 def process_option(ctx, param, value):
     if value is not None:
         values = value.split(",")
         return [val.strip() for val in values]
+    return []
 
 
 @click.command()
diff --git a/calculate_correlations.py b/src/regression_dashboard/calculate_correlations.py
similarity index 75%
rename from calculate_correlations.py
rename to src/regression_dashboard/calculate_correlations.py
index 103f5ed..d0b7326 100644
--- a/calculate_correlations.py
+++ b/src/regression_dashboard/calculate_correlations.py
@@ -1,8 +1,8 @@
 #!/usr/bin/env python
-"""Calculate correlations and write them to D3-friendly file"""
+"""Calculate correlations and write them to D3-friendly file."""
 from cpac_correlations import cpac_correlations
 
-from utils.html_script import body
+from .utils.html_script import body
 
 if __name__ == "__main__":
     all_keys, data_source, branch = cpac_correlations()
diff --git a/create_yml.py b/src/regression_dashboard/create_yml.py
similarity index 71%
rename from create_yml.py
rename to src/regression_dashboard/create_yml.py
index fb4a3ed..b7eed2c 100644
--- a/create_yml.py
+++ b/src/regression_dashboard/create_yml.py
@@ -2,7 +2,7 @@
 
 import click
 
-from utils.parse_yaml import cpac_yaml
+from .utils.parse_yaml import cpac_yaml
 
 
 @click.command()
@@ -21,14 +21,9 @@
 @click.option("--workspace", type=str, help="directory to save correlations")
 @click.option("--branch", type=str, help="branch name")
 @click.option("--data_source", type=str, help="Data site")
-def main(pipeline1, pipeline2, workspace, branch, data_source):
-    """
-    Correlate outputs from regression run again another C-PAC version.
-    """
-
-    git_home = os.path.normpath(
-        os.path.dirname(os.path.abspath(__file__)) + os.sep + os.pardir
-    )
+def main(pipeline1, pipeline2, workspace, branch, data_source) -> None:
+    """Correlate outputs from regression run again another C-PAC version."""
+    os.path.normpath(os.path.dirname(os.path.abspath(__file__)) + os.sep + os.pardir)
     run_name = f"{branch}_{data_source}"
 
     cpac_yaml(
@@ -41,8 +36,6 @@ def main(pipeline1, pipeline2, workspace, branch, data_source):
         data_source,
     )
 
-    return
-
 
 if __name__ == "__main__":
     main()
diff --git a/src/regression_dashboard/generate_comment.py b/src/regression_dashboard/generate_comment.py
new file mode 100644
index 0000000..7bff39a
--- /dev/null
+++ b/src/regression_dashboard/generate_comment.py
@@ -0,0 +1,3 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""Gather generated PNGs and link to heatmap in a GitHub-flavored Markdown string."""
diff --git a/templates/heatmap.html b/src/regression_dashboard/templates/heatmap.html
similarity index 96%
rename from templates/heatmap.html
rename to src/regression_dashboard/templates/heatmap.html
index 0cd1893..2f5fc29 100644
--- a/templates/heatmap.html
+++ b/src/regression_dashboard/templates/heatmap.html
@@ -6,4 +6,4 @@
     <body>
         <div id="heatmap-container">Correlations heatmap will load here!</div>
     </body>
-</html>
\ No newline at end of file
+</html>
diff --git a/templates/heatmap.js b/src/regression_dashboard/templates/heatmap.js
similarity index 99%
rename from templates/heatmap.js
rename to src/regression_dashboard/templates/heatmap.js
index 0d4ca2d..c36bcd6 100644
--- a/templates/heatmap.js
+++ b/src/regression_dashboard/templates/heatmap.js
@@ -123,4 +123,4 @@ svg.append("text")
   .style("font-size", "14px")
   .style("fill", "grey")
   .style("max-width", 400)
-  .text("GRAPHSUBTITLE");
\ No newline at end of file
+  .text("GRAPHSUBTITLE");
diff --git a/utils/__pycache__/parse_yaml.cpython-311.pyc b/src/regression_dashboard/utils/__pycache__/parse_yaml.cpython-311.pyc
similarity index 100%
rename from utils/__pycache__/parse_yaml.cpython-311.pyc
rename to src/regression_dashboard/utils/__pycache__/parse_yaml.cpython-311.pyc
diff --git a/utils/html_script.py b/src/regression_dashboard/utils/html_script.py
similarity index 92%
rename from utils/html_script.py
rename to src/regression_dashboard/utils/html_script.py
index c0aff5c..99d60a6 100644
--- a/utils/html_script.py
+++ b/src/regression_dashboard/utils/html_script.py
@@ -1,12 +1,11 @@
-def dataset(name, data_source, value):
-    dataset = f"""
+def dataset(name, data_source, value) -> str:
+    return f"""
         {{
             "rowid": "{name}",
             "columnid": "{data_source}",
             "value": "{value}"
         }},
             """
-    return dataset
 
 
 def body(all_keys, data_source):
@@ -16,16 +15,11 @@ def body(all_keys, data_source):
         name = name_value[0]
         value = name_value[1]
         data_body += dataset(name, data_source, value)
-    out = f"""
-        {{"data": [
-                {data_body}
-            ]}}
-            """
     return data_body
 
 
-def write_html(data_body):
-    script = f"""
+def write_html(data_body) -> str:
+    return f"""
     <html>
     <head>
         <title>Correlations</title>
@@ -90,12 +84,10 @@ def write_html(data_body):
             <div id="chart-container">Correlations heatmap will load here!</div>
         </body>
     </html>
-    """
-
-    return script
+    """  # noqa: E501
 
 
-def setup_browser(html_template):
+def setup_browser(html_template) -> None:
     import tempfile
     import webbrowser
 
@@ -103,5 +95,3 @@ def setup_browser(html_template):
         temp_file.write(html_template.encode("utf-8"))
         filename = "file:///" + temp_file.name
         webbrowser.open_new_tab(filename)
-
-    return
diff --git a/utils/parse_yaml.py b/src/regression_dashboard/utils/parse_yaml.py
similarity index 89%
rename from utils/parse_yaml.py
rename to src/regression_dashboard/utils/parse_yaml.py
index bc1932e..73af52f 100644
--- a/utils/parse_yaml.py
+++ b/src/regression_dashboard/utils/parse_yaml.py
@@ -8,9 +8,9 @@ def get_dir(paths):
         directory = None
     else:
         for root, dirs, files in os.walk(paths):
-            for dir in dirs:
-                if "pipeline_" in dir:
-                    directory = os.path.join(root, dir)
+            for _dir in dirs:
+                if "pipeline_" in _dir:
+                    directory = os.path.join(root, _dir)
     return directory
 
 
@@ -21,7 +21,7 @@ def write_pipeline_yaml(
     pipeline_config=None,
     pipeline_name=None,
 ):
-    pipeline = {
+    return {
         pipeline_name: {
             "output_dir": output_dir,
             "work_dir": working_dir,
@@ -31,8 +31,6 @@ def write_pipeline_yaml(
         }
     }
 
-    return pipeline
-
 
 def parse_yaml(directory=None, pipeline_name=None):
     subdirs = ["log", "working", "output"]
@@ -54,12 +52,10 @@ def parse_yaml(directory=None, pipeline_name=None):
     working_dir = get_dir(paths["working_dir"])
     output_dir = get_dir(paths["output_dir"])
 
-    pipeline_dict = write_pipeline_yaml(
+    return write_pipeline_yaml(
         output_dir, working_dir, log_dir, pipeline_config, pipeline_name
     )
 
-    return pipeline_dict
-
 
 def write_yaml(
     pipeline_1=None, pipeline_2=None, correlations_dir=None, run_name=None, n_cpus=None
@@ -81,7 +77,7 @@ def write_yaml(
 
 def cpac_yaml(
     pipeline1, pipeline2, correlations_dir, run_name, n_cpus, branch, data_source
-):
+) -> None:
     pipeline_1 = parse_yaml(pipeline1, "pipeline_1")
     pipeline_2 = parse_yaml(pipeline2, "pipeline_2")
 
@@ -91,5 +87,3 @@ def cpac_yaml(
 
     with open(f"{branch}_{data_source}.yml", "w") as file:
         yaml.dump(yaml_contents, file, default_flow_style=False, sort_keys=False)
-
-    return

From f87094d8236874df0f315831cb1df869a4a79a34 Mon Sep 17 00:00:00 2001
From: Jon Clucas <jon.clucas@childmind.org>
Date: Wed, 3 Jan 2024 16:21:41 -0500
Subject: [PATCH 08/29] :sparkles: Post boxplots and correlation coefficients
 to comments and PRs

---
 pyproject.toml                               |   2 +
 src/regression_dashboard/generate_comment.py | 144 +++++++++++++++++++
 2 files changed, 146 insertions(+)

diff --git a/pyproject.toml b/pyproject.toml
index fbcdc4a..dd44ec6 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -12,6 +12,7 @@ packages = [{include = "regression_dashboard", from = "src"}]
 
 [tool.poetry.dependencies]
 python = ">=3.9"
+PyGithub = "*"
 cpac-correlations = {git = "https://github.com/FCP-INDI/CPAC_regtest_pack.git", rev = "package_correlations", subdirectory = "cpac_correlations"}
 
 [tool.poetry.group.dev.dependencies]
@@ -24,6 +25,7 @@ ruff = "^0.1.7"
 [tool.poetry.scripts]
 cpac_regsuite_create_yaml = 'regression_dashboard.create_yml:main'
 cpac_regsuite_create_yml = 'regression_dashboard.create_yml:main'
+cpac_regsuite_generate_comment = 'regression_dashboard.generate_comment:main'
 
 [tool.pytest.ini_options]
 pythonpath = [
diff --git a/src/regression_dashboard/generate_comment.py b/src/regression_dashboard/generate_comment.py
index 7bff39a..da79b60 100644
--- a/src/regression_dashboard/generate_comment.py
+++ b/src/regression_dashboard/generate_comment.py
@@ -1,3 +1,147 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 """Gather generated PNGs and link to heatmap in a GitHub-flavored Markdown string."""
+from dataclasses import dataclass
+import os
+from pathlib import Path
+import sys
+from typing import Generator
+
+from github import Github
+
+
+@dataclass
+class EnvVars:
+    """Dataclass for environment variables."""
+
+    github_token: str
+    owner: str
+    repo: str
+    sha: str
+    testing_owner: str
+
+    def __init__(self) -> None:
+        """Initialize the dataclass from the environment."""
+        attrs = ["github_token", "owner", "repo", "sha", "testing_owner"]
+        for attr in attrs:
+            setattr(self, attr, os.environ.get(attr.upper(), ""))
+
+
+_ENV = EnvVars()
+
+
+def gather_images(path: Path) -> Generator[Path, None, None]:
+    """Gather the images.
+
+    Parameters
+    ----------
+    path : Path
+        The path to the correlations directory..
+
+    Yields
+    ------
+    image : Path
+       The path to an image.
+    """
+    return path.glob("*.png")
+
+
+def gather_text(path: Path) -> str:
+    """Gathers and concatenates all text files in the given directory.
+
+    Parameters
+    ----------
+    path : Path
+        The path to the correlations directory.
+
+    Returns
+    -------
+    str
+        The concatenated text.
+    """
+    text = ""
+    for file in path.glob("*.txt"):
+        with open(file, "r", encoding="utf=8") as _f:
+            text += _f.read()
+        text += "\n"
+    return text.strip()
+
+
+def generate_comment(path: Path) -> str:
+    """Generate the comment.
+
+    Parameters
+    ----------
+    path : Path
+        The path to the correlations directory.
+
+    Returns
+    -------
+    str : The comment.
+    """
+    comment = ""
+    for image in gather_images(path):
+        raw_image_path = _raw_image_path(_ENV.testing_owner, _ENV.repo, _ENV.sha, image)
+        comment += f"![{image.stem}]({raw_image_path})\n"
+    return comment + gather_text(path)
+
+
+def main() -> None:
+    """Generate and post a comment on a GitHub commit.
+
+    Also post the comment to any open PR in which the commit is the most recent.
+    """
+    if len(sys.argv) > 1:
+        if sys.argv[1] in ["-h", "--help"]:
+            print("Usage: cpac_regsuite_generate_comment [path]")
+            print("If no path is given, the current working directory is used.")
+            print("Required environment variables:")
+            print(
+                "GITHUB_TOKEN: A personal access token with scope to write to "
+                "comments and pull requests."
+            )
+            print("OWNER: The owner of the repository.")
+            print("REPO: The name of the repository.")
+            print("SHA: The SHA of the commit.")
+            print("TESTING_OWNER: The owner of the testing repository.")
+            sys.exit(0)
+        path = Path(sys.argv[1])
+    else:
+        path = Path(os.getcwd())
+    personal_access_token = os.environ.get("GITHUB_TOKEN")
+    g = Github(personal_access_token)
+    repo = g.get_repo(f"{_ENV.owner}/{_ENV.repo}")
+    commit = repo.get_commit(_ENV.sha)
+    comment = generate_comment(path)
+    commit.create_comment(comment)
+    for pr in repo.get_pulls(state="open", sort="created"):
+        if pr.head.sha == _ENV.sha:
+            pr.create_issue_comment(comment)
+
+
+def _raw_image_path(owner: str, repo: str, sha: str, image: Path) -> str:
+    """Generate the raw image path.
+
+    Parameters
+    ----------
+    owner : str
+        The owner of the repository.
+
+    repo : str
+        The name of the repository.
+
+    sha : str
+        The SHA of the commit.
+
+    image : Path
+        The path to the image.
+
+    Returns
+    -------
+    str : The raw image path.
+    """
+    return f"https://raw.githubusercontent.com/{owner}/regtest-runlogs/{repo}_{sha}/{image.name}"
+
+
+if __name__ == "__main__":
+    main()

From 0465513cff02f7a9d6f10205e5fb0de5c8eb37e4 Mon Sep 17 00:00:00 2001
From: Jon Clucas <jon.clucas@childmind.org>
Date: Fri, 5 Jan 2024 01:02:19 -0500
Subject: [PATCH 09/29] :sparkles: Add script to post a comment to a given
 commit and relevant PR(s)

---
 pyproject.toml                                |  18 ++-
 src/cpac_regression_dashboard/__init__.py     |   3 +
 src/cpac_regression_dashboard/_version.py     |   9 ++
 .../build_d3_dashboard.py                     |   0
 .../build_dashboard.py                        |   0
 .../calculate_correlations.py                 |   0
 .../create_yml.py                             |   0
 .../generate_comment.py                       | 107 +++++++++++++++++-
 .../templates/heatmap.html                    |   0
 .../templates/heatmap.js                      |   0
 .../__pycache__/parse_yaml.cpython-311.pyc    | Bin
 .../utils/html_script.py                      |   0
 .../utils/parse_yaml.py                       |   0
 src/regression_dashboard/__init__.py          |   0
 14 files changed, 125 insertions(+), 12 deletions(-)
 create mode 100644 src/cpac_regression_dashboard/__init__.py
 create mode 100644 src/cpac_regression_dashboard/_version.py
 rename src/{regression_dashboard => cpac_regression_dashboard}/build_d3_dashboard.py (100%)
 rename src/{regression_dashboard => cpac_regression_dashboard}/build_dashboard.py (100%)
 rename src/{regression_dashboard => cpac_regression_dashboard}/calculate_correlations.py (100%)
 rename src/{regression_dashboard => cpac_regression_dashboard}/create_yml.py (100%)
 rename src/{regression_dashboard => cpac_regression_dashboard}/generate_comment.py (52%)
 rename src/{regression_dashboard => cpac_regression_dashboard}/templates/heatmap.html (100%)
 rename src/{regression_dashboard => cpac_regression_dashboard}/templates/heatmap.js (100%)
 rename src/{regression_dashboard => cpac_regression_dashboard}/utils/__pycache__/parse_yaml.cpython-311.pyc (100%)
 rename src/{regression_dashboard => cpac_regression_dashboard}/utils/html_script.py (100%)
 rename src/{regression_dashboard => cpac_regression_dashboard}/utils/parse_yaml.py (100%)
 delete mode 100644 src/regression_dashboard/__init__.py

diff --git a/pyproject.toml b/pyproject.toml
index dd44ec6..fb14f3a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "cpac_regression_dashboard"
-version = "1.0.0"
+version = "1.0.0.dev1"
 description = "Generate a dashboard for C-PAC regression tests"
 authors = [
   "Amy Gutierrez <58920810+amygutierrez@users.noreply.github.com>",
@@ -8,11 +8,14 @@ authors = [
 ]
 license = "LGPL-2.1"
 readme = "README.md"
-packages = [{include = "regression_dashboard", from = "src"}]
+packages = [{from = "src", include = "cpac_regression_dashboard"}]
 
 [tool.poetry.dependencies]
 python = ">=3.9"
+cairosvg = "*"
+gitpython = "*"
 PyGithub = "*"
+pyppeteer = "*"
 cpac-correlations = {git = "https://github.com/FCP-INDI/CPAC_regtest_pack.git", rev = "package_correlations", subdirectory = "cpac_correlations"}
 
 [tool.poetry.group.dev.dependencies]
@@ -23,9 +26,12 @@ pytest-cov = "^4.1.0"
 ruff = "^0.1.7"
 
 [tool.poetry.scripts]
-cpac_regsuite_create_yaml = 'regression_dashboard.create_yml:main'
-cpac_regsuite_create_yml = 'regression_dashboard.create_yml:main'
-cpac_regsuite_generate_comment = 'regression_dashboard.generate_comment:main'
+cpac_regsuite_create_yaml = 'cpac_regression_dashboard.create_yml:main'
+cpac_regsuite_create_yml = 'cpac_regression_dashboard.create_yml:main'
+cpac_regsuite_generate_comment = 'cpac_regression_dashboard.generate_comment:main'
+
+[tool.poetry.urls]
+"Source Code" = "https://github.com/FCP-INDI/C-PAC_regression_dashboard"
 
 [tool.pytest.ini_options]
 pythonpath = [
@@ -40,7 +46,7 @@ src = ["src"]
 target-version = "py39"
 
 [tool.ruff.lint]
-select = ["ANN", "D", "E", "F", "I"]
+select = ["ANN", "D", "E", "F", "I", "Q"]
 ignore = [
   "ANN101",  # self should not be annotated.
   "ANN102"  # cls should not be annotated.
diff --git a/src/cpac_regression_dashboard/__init__.py b/src/cpac_regression_dashboard/__init__.py
new file mode 100644
index 0000000..75013dc
--- /dev/null
+++ b/src/cpac_regression_dashboard/__init__.py
@@ -0,0 +1,3 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""Create a dashboard of regression test results."""
diff --git a/src/cpac_regression_dashboard/_version.py b/src/cpac_regression_dashboard/_version.py
new file mode 100644
index 0000000..f220d89
--- /dev/null
+++ b/src/cpac_regression_dashboard/_version.py
@@ -0,0 +1,9 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""Get version from packaging metadata."""
+from importlib.metadata import PackageNotFoundError, version
+
+try:
+    __version__ = version("cpac_regression_dashboard")
+except PackageNotFoundError:
+    __version__ = "unknown"
diff --git a/src/regression_dashboard/build_d3_dashboard.py b/src/cpac_regression_dashboard/build_d3_dashboard.py
similarity index 100%
rename from src/regression_dashboard/build_d3_dashboard.py
rename to src/cpac_regression_dashboard/build_d3_dashboard.py
diff --git a/src/regression_dashboard/build_dashboard.py b/src/cpac_regression_dashboard/build_dashboard.py
similarity index 100%
rename from src/regression_dashboard/build_dashboard.py
rename to src/cpac_regression_dashboard/build_dashboard.py
diff --git a/src/regression_dashboard/calculate_correlations.py b/src/cpac_regression_dashboard/calculate_correlations.py
similarity index 100%
rename from src/regression_dashboard/calculate_correlations.py
rename to src/cpac_regression_dashboard/calculate_correlations.py
diff --git a/src/regression_dashboard/create_yml.py b/src/cpac_regression_dashboard/create_yml.py
similarity index 100%
rename from src/regression_dashboard/create_yml.py
rename to src/cpac_regression_dashboard/create_yml.py
diff --git a/src/regression_dashboard/generate_comment.py b/src/cpac_regression_dashboard/generate_comment.py
similarity index 52%
rename from src/regression_dashboard/generate_comment.py
rename to src/cpac_regression_dashboard/generate_comment.py
index da79b60..3ddffa2 100644
--- a/src/regression_dashboard/generate_comment.py
+++ b/src/cpac_regression_dashboard/generate_comment.py
@@ -1,13 +1,21 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 """Gather generated PNGs and link to heatmap in a GitHub-flavored Markdown string."""
+import asyncio
 from dataclasses import dataclass
+from importlib.metadata import metadata
 import os
 from pathlib import Path
 import sys
+import tempfile
 from typing import Generator
 
+from cairosvg import svg2png
+from git import Repo
 from github import Github
+from pyppeteer import launch
+
+from ._version import __version__
 
 
 @dataclass
@@ -30,6 +38,45 @@ def __init__(self) -> None:
 _ENV = EnvVars()
 
 
+@dataclass
+class Heatmap:
+    """Heatmap dataclass."""
+
+    filename: str
+    content: str
+
+
+def add_heatmap_to_branch(file: Heatmap) -> None:
+    """Add a heatmap to a branch.
+
+    Parameters
+    ----------
+    file : Heatmap
+        The heatmap file to add.
+
+    Returns
+    -------
+    None
+    """
+    personal_access_token = os.environ.get("GITHUB_TOKEN")
+    g = Github(personal_access_token)
+    repo = g.get_repo(f"{_ENV.testing_owner}/regtest-runlogs")
+    branch_name = f"{_ENV.repo}_{_ENV.sha}"
+    with tempfile.TemporaryDirectory() as _temp_dir:
+        temp_dir = Path(_temp_dir)
+        local_repo = Repo.clone_from(
+            repo.clone_url, temp_dir, branch=branch_name, depth=1
+        )
+        svg_path = temp_dir / f"{file.filename}.svg"
+        png_path = temp_dir / f"{file.filename}.png"
+        with open(svg_path, "w") as _f:
+            _f.write(file.content)
+        svg2png(background_color="white", url=str(svg_path), write_to=str(png_path))
+        local_repo.index.add([png_path])
+        local_repo.index.commit(":loud_sound: Add heatmap image")
+        local_repo.remotes.origin.push(branch_name)
+
+
 def gather_images(path: Path) -> Generator[Path, None, None]:
     """Gather the images.
 
@@ -59,15 +106,15 @@ def gather_text(path: Path) -> str:
     str
         The concatenated text.
     """
-    text = ""
+    text = "|feature|coefficient|\n|---|---|\n"
     for file in path.glob("*.txt"):
         with open(file, "r", encoding="utf=8") as _f:
-            text += _f.read()
-        text += "\n"
+            for line in _f.readlines():
+                text += f"|{'|'.join(_.strip() for _ in line.split(':', 1))}|\n"
     return text.strip()
 
 
-def generate_comment(path: Path) -> str:
+async def generate_comment(path: Path) -> str:
     """Generate the comment.
 
     Parameters
@@ -79,13 +126,53 @@ def generate_comment(path: Path) -> str:
     -------
     str : The comment.
     """
-    comment = ""
+    project_urls = metadata(__package__).get_all("Project-URL", [])
+    source_url = None
+    for _url in project_urls:
+        if _url.startswith("Source Code, "):
+            source_url = _url.split(",")[1].strip()
+            break
+    if source_url is None:
+        comment = f"Generated by {__name__} {__version__}\n\n"
+    else:
+        _packageless_name = __name__.replace(__package__, "").lstrip(".")
+        comment = (
+            f"Generated by [{__package__}]({source_url})."
+            f"{_packageless_name} {__version__}\n\n"
+        )
+    comment += await get_heatmap()
     for image in gather_images(path):
         raw_image_path = _raw_image_path(_ENV.testing_owner, _ENV.repo, _ENV.sha, image)
         comment += f"![{image.stem}]({raw_image_path})\n"
     return comment + gather_text(path)
 
 
+async def get_heatmap() -> str:
+    """Get a heatmap image."""
+    url = f"https://{_ENV.testing_owner}.github.io/dashboard/?data_sha={_ENV.sha}"
+    browser = await launch()
+    page = await browser.newPage()
+    await page.goto(url, waitUntil="networkidle0")
+    svg_string = await page.evaluate(
+        """() => {
+        let svg = document.querySelector('svg');
+        return svg ? svg.outerHTML : null;
+    }"""
+    )
+    if svg_string is not None:
+        _heatmap = Heatmap("heatmap", svg_string)
+        add_heatmap_to_branch(_heatmap)
+        heatmap = _raw_image_path(
+            _ENV.testing_owner, _ENV.repo, _ENV.sha, Path(f"{_heatmap.filename}.png")
+        )
+        heatmap = f"[![heatmap]({heatmap})]({url})"
+    else:
+        heatmap = ""
+
+    await browser.close()
+    return heatmap
+
+
 def main() -> None:
     """Generate and post a comment on a GitHub commit.
 
@@ -105,14 +192,22 @@ def main() -> None:
             print("SHA: The SHA of the commit.")
             print("TESTING_OWNER: The owner of the testing repository.")
             sys.exit(0)
+        elif sys.argv[1] in ["-v", "--version"]:
+            print(f"{__name__} version {__version__}")
+            sys.exit(0)
         path = Path(sys.argv[1])
     else:
         path = Path(os.getcwd())
+    asyncio.run(post_comment(path))
+
+
+async def post_comment(path: Path) -> None:
+    """Post a comment on a GitHub commit and relevant PR."""
     personal_access_token = os.environ.get("GITHUB_TOKEN")
     g = Github(personal_access_token)
     repo = g.get_repo(f"{_ENV.owner}/{_ENV.repo}")
     commit = repo.get_commit(_ENV.sha)
-    comment = generate_comment(path)
+    comment = await generate_comment(path)
     commit.create_comment(comment)
     for pr in repo.get_pulls(state="open", sort="created"):
         if pr.head.sha == _ENV.sha:
diff --git a/src/regression_dashboard/templates/heatmap.html b/src/cpac_regression_dashboard/templates/heatmap.html
similarity index 100%
rename from src/regression_dashboard/templates/heatmap.html
rename to src/cpac_regression_dashboard/templates/heatmap.html
diff --git a/src/regression_dashboard/templates/heatmap.js b/src/cpac_regression_dashboard/templates/heatmap.js
similarity index 100%
rename from src/regression_dashboard/templates/heatmap.js
rename to src/cpac_regression_dashboard/templates/heatmap.js
diff --git a/src/regression_dashboard/utils/__pycache__/parse_yaml.cpython-311.pyc b/src/cpac_regression_dashboard/utils/__pycache__/parse_yaml.cpython-311.pyc
similarity index 100%
rename from src/regression_dashboard/utils/__pycache__/parse_yaml.cpython-311.pyc
rename to src/cpac_regression_dashboard/utils/__pycache__/parse_yaml.cpython-311.pyc
diff --git a/src/regression_dashboard/utils/html_script.py b/src/cpac_regression_dashboard/utils/html_script.py
similarity index 100%
rename from src/regression_dashboard/utils/html_script.py
rename to src/cpac_regression_dashboard/utils/html_script.py
diff --git a/src/regression_dashboard/utils/parse_yaml.py b/src/cpac_regression_dashboard/utils/parse_yaml.py
similarity index 100%
rename from src/regression_dashboard/utils/parse_yaml.py
rename to src/cpac_regression_dashboard/utils/parse_yaml.py
diff --git a/src/regression_dashboard/__init__.py b/src/regression_dashboard/__init__.py
deleted file mode 100644
index e69de29..0000000

From e3c5aada83e2351123ce8a09afefbcb1df365f8a Mon Sep 17 00:00:00 2001
From: Jon Clucas <jon.clucas@childmind.org>
Date: Fri, 19 Jan 2024 18:29:18 -0500
Subject: [PATCH 10/29] :necktie: Only walk log_dir if exists

---
 .../utils/parse_yaml.py                       | 66 +++++++++++++------
 1 file changed, 45 insertions(+), 21 deletions(-)

diff --git a/src/cpac_regression_dashboard/utils/parse_yaml.py b/src/cpac_regression_dashboard/utils/parse_yaml.py
index 73af52f..0aa504d 100644
--- a/src/cpac_regression_dashboard/utils/parse_yaml.py
+++ b/src/cpac_regression_dashboard/utils/parse_yaml.py
@@ -1,9 +1,17 @@
+"""From a pair of CPAC output directories, write a YAML file for regression."""
 import os
+from typing import Optional, Union
 
 import yaml
 
+_PIPELINE_DICT = dict[Optional[str], dict[str, Optional[Union[str, int]]]]
+_FULL_YAML_DICT = dict[
+    str, Union[dict[str, Union[bool, int, Optional[str]]], _PIPELINE_DICT]
+]
 
-def get_dir(paths):
+
+def get_dir(paths: str) -> Optional[str]:
+    """Get the full path to a ``pipeline_*`` directory."""
     if not paths:
         directory = None
     else:
@@ -15,12 +23,13 @@ def get_dir(paths):
 
 
 def write_pipeline_yaml(
-    output_dir=None,
-    working_dir=None,
-    log_dir=None,
-    pipeline_config=None,
-    pipeline_name=None,
-):
+    output_dir: Optional[str] = None,
+    working_dir: Optional[str] = None,
+    log_dir: Optional[str] = None,
+    pipeline_config: Optional[str] = None,
+    pipeline_name: Optional[str] = None,
+) -> _PIPELINE_DICT:
+    """Collect paths and strings to write."""
     return {
         pipeline_name: {
             "output_dir": output_dir,
@@ -32,24 +41,27 @@ def write_pipeline_yaml(
     }
 
 
-def parse_yaml(directory=None, pipeline_name=None):
+def parse_yaml(directory: str, pipeline_name: str) -> _PIPELINE_DICT:
+    """Parse a CPAC output directory for pipeline information."""
     subdirs = ["log", "working", "output"]
-    paths = {}
+    paths: dict[str, Optional[str]] = {}
 
     for subdir in subdirs:
         if os.path.isdir(os.path.join(directory, subdir)):
             paths[f"{subdir}_dir"] = os.path.join(directory, subdir)
         else:
             paths[f"{subdir}_dir"] = None
-
-    log_dir = get_dir(paths["log_dir"])
-
-    for root, dirs, files in os.walk(paths["log_dir"]):
-        for file in files:
-            if file.endswith("Z.yml"):
-                pipeline_config = os.path.join(root, file)
-
+    assert isinstance(paths["log_dir"], str)
+    log_dir: Optional[str] = get_dir(paths["log_dir"])
+
+    if log_dir is not None:
+        for root, _dirs, files in os.walk(paths["log_dir"]):
+            for file in files:
+                if file.endswith("Z.yml"):
+                    pipeline_config = os.path.join(root, file)
+    assert isinstance(paths["working_dir"], str)
     working_dir = get_dir(paths["working_dir"])
+    assert isinstance(paths["output_dir"], str)
     output_dir = get_dir(paths["output_dir"])
 
     return write_pipeline_yaml(
@@ -58,9 +70,14 @@ def parse_yaml(directory=None, pipeline_name=None):
 
 
 def write_yaml(
-    pipeline_1=None, pipeline_2=None, correlations_dir=None, run_name=None, n_cpus=None
-):
-    yaml_dict = {}
+    pipeline_1: _PIPELINE_DICT,
+    pipeline_2: _PIPELINE_DICT,
+    correlations_dir: Optional[str] = None,
+    run_name: Optional[str] = None,
+    n_cpus: Optional[int] = None,
+) -> _FULL_YAML_DICT:
+    """Combine settings and both pipelines into a single dictionary."""
+    yaml_dict: _FULL_YAML_DICT = {}
     yaml_dict["settings"] = {
         "n_cpus": n_cpus,
         "correlations_dir": correlations_dir,
@@ -76,8 +93,15 @@ def write_yaml(
 
 
 def cpac_yaml(
-    pipeline1, pipeline2, correlations_dir, run_name, n_cpus, branch, data_source
+    pipeline1: str,
+    pipeline2: str,
+    correlations_dir: str,
+    run_name: str,
+    n_cpus: int,
+    branch: str,
+    data_source: str,
 ) -> None:
+    """Write a YAML file for the regression run."""
     pipeline_1 = parse_yaml(pipeline1, "pipeline_1")
     pipeline_2 = parse_yaml(pipeline2, "pipeline_2")
 

From 71bb23bbbcb550a8effbd1135d395e56fafc1e63 Mon Sep 17 00:00:00 2001
From: Jon Clucas <jon.clucas@childmind.org>
Date: Fri, 19 Jan 2024 18:34:19 -0500
Subject: [PATCH 11/29] :package: Make `cpac_regsuite_correlate` a CLI script

---
 pyproject.toml                                          | 1 +
 src/cpac_regression_dashboard/calculate_correlations.py | 9 ++++++++-
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index fb14f3a..0b785d7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -26,6 +26,7 @@ pytest-cov = "^4.1.0"
 ruff = "^0.1.7"
 
 [tool.poetry.scripts]
+cpac_regsuite_correlate = 'cpac_regression_dashboard.cpac_correlations:main'
 cpac_regsuite_create_yaml = 'cpac_regression_dashboard.create_yml:main'
 cpac_regsuite_create_yml = 'cpac_regression_dashboard.create_yml:main'
 cpac_regsuite_generate_comment = 'cpac_regression_dashboard.generate_comment:main'
diff --git a/src/cpac_regression_dashboard/calculate_correlations.py b/src/cpac_regression_dashboard/calculate_correlations.py
index d0b7326..2477282 100644
--- a/src/cpac_regression_dashboard/calculate_correlations.py
+++ b/src/cpac_regression_dashboard/calculate_correlations.py
@@ -4,8 +4,15 @@
 
 from .utils.html_script import body
 
-if __name__ == "__main__":
+
+def main() -> None:  # noqa: D103
     all_keys, data_source, branch = cpac_correlations()
     html_body = body(all_keys, data_source)
     with open(f"{data_source}_{branch}.json", "w", encoding="utf-8") as file:
         file.write(html_body)
+
+
+main.__doc__ = __doc__
+
+if __name__ == "__main__":
+    main()

From b7e3c0d6e30d9dca88e7e93592cd15df82171a44 Mon Sep 17 00:00:00 2001
From: Jon Clucas <jon.clucas@childmind.org>
Date: Mon, 24 Jun 2024 14:21:38 -0400
Subject: [PATCH 12/29] :alien: FCP-INDI/CPAC_regtest_pack#7 is merged

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 0b785d7..c884bf3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -16,7 +16,7 @@ cairosvg = "*"
 gitpython = "*"
 PyGithub = "*"
 pyppeteer = "*"
-cpac-correlations = {git = "https://github.com/FCP-INDI/CPAC_regtest_pack.git", rev = "package_correlations", subdirectory = "cpac_correlations"}
+cpac-correlations = {git = "https://github.com/FCP-INDI/CPAC_regtest_pack.git", subdirectory = "cpac_correlations"}
 
 [tool.poetry.group.dev.dependencies]
 pytest = "^7.4.3"

From eb56a7916e70db3bc76cd98cc1f0f069dbee00c7 Mon Sep 17 00:00:00 2001
From: Jon Clucas <jon.clucas@childmind.org>
Date: Mon, 5 Aug 2024 11:40:38 -0400
Subject: [PATCH 13/29] :necktie: Return path to regression YAML when
 generating

---
 src/cpac_regression_dashboard/utils/parse_yaml.py | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/src/cpac_regression_dashboard/utils/parse_yaml.py b/src/cpac_regression_dashboard/utils/parse_yaml.py
index 0aa504d..bf93449 100644
--- a/src/cpac_regression_dashboard/utils/parse_yaml.py
+++ b/src/cpac_regression_dashboard/utils/parse_yaml.py
@@ -1,5 +1,6 @@
 """From a pair of CPAC output directories, write a YAML file for regression."""
 import os
+from pathlib import Path
 from typing import Optional, Union
 
 import yaml
@@ -100,14 +101,17 @@ def cpac_yaml(
     n_cpus: int,
     branch: str,
     data_source: str,
-) -> None:
+) -> Path:
     """Write a YAML file for the regression run."""
-    pipeline_1 = parse_yaml(pipeline1, "pipeline_1")
-    pipeline_2 = parse_yaml(pipeline2, "pipeline_2")
+    pipeline_1: _PIPELINE_DICT = parse_yaml(pipeline1, "pipeline_1")
+    pipeline_2: _PIPELINE_DICT = parse_yaml(pipeline2, "pipeline_2")
 
-    yaml_contents = write_yaml(
+    yaml_contents: _FULL_YAML_DICT = write_yaml(
         pipeline_1, pipeline_2, correlations_dir, run_name, n_cpus
     )
 
-    with open(f"{branch}_{data_source}.yml", "w") as file:
+    yaml_path: Path = Path(f"{branch}_{data_source}.yml")
+    """Path to YAML file for regression correlation."""
+    with yaml_path.open("w") as file:
         yaml.dump(yaml_contents, file, default_flow_style=False, sort_keys=False)
+    return yaml_path

From 63ee4764cc5dc12f7b579ce932ad87398c632480 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Theodore=20=F0=9F=90=88=F0=9F=A4=96?=
 <CMI_CPAC_Support@childmind.org>
Date: Mon, 12 Aug 2024 12:15:15 -0400
Subject: [PATCH 14/29] :alien: Update regtest dev branch

---
 pyproject.toml                                   |   2 +-
 .../utils/__pycache__/parse_yaml.cpython-311.pyc | Bin 3382 -> 0 bytes
 2 files changed, 1 insertion(+), 1 deletion(-)
 delete mode 100644 src/cpac_regression_dashboard/utils/__pycache__/parse_yaml.cpython-311.pyc

diff --git a/pyproject.toml b/pyproject.toml
index c884bf3..15a8fa3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -16,7 +16,7 @@ cairosvg = "*"
 gitpython = "*"
 PyGithub = "*"
 pyppeteer = "*"
-cpac-correlations = {git = "https://github.com/FCP-INDI/CPAC_regtest_pack.git", subdirectory = "cpac_correlations"}
+cpac-correlations = {git = "https://github.com/FCP-INDI/CPAC_regtest_pack.git", subdirectory = "cpac_correlations", branch = "correlate_from_python"}
 
 [tool.poetry.group.dev.dependencies]
 pytest = "^7.4.3"
diff --git a/src/cpac_regression_dashboard/utils/__pycache__/parse_yaml.cpython-311.pyc b/src/cpac_regression_dashboard/utils/__pycache__/parse_yaml.cpython-311.pyc
deleted file mode 100644
index 206699b974352d7157f108b335e0e879d71cedac..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 3382
zcmbtWO>7&-6`tY#aF>6wC`zJaNl|4>3>8_d;vdO>Mchhu<REI@I1M7SAZYGNrbKbs
z*=1B2yTBfD5CQ=vdJ&P@sA{OT3Kyu3K~MFqM-kv*v48*pg$})K;06$K>YL?{NILGN
zv%7C*-n@A`^X9$p&5K~rhoCTj`@8ge5utz4PQ7?)jn_Yg#x|0WgtKU|TH{45dA>x8
z9ubL1@=DB?c#)9=iG`Z2@i=I65)V?mgg-%X@!wS8LK@ff(u#+Vf$wz*$TrfE%1Fq-
zxmm4AYNgTE(Q|0OZ1iW*ZxX+T`G&_}3|42LKW1<e*10bb8P-5&y{y!CgLdfu-YB|9
z=bnSi%Z8kRw_;mA*6GSU3$~cg<mGH8Cnw{q?NL;lT~B3KZ8o3M9@^}RlF7w+o1;C|
z7Nrc4)0#pyY?dgBX0yOhZEh)(l~tRe9lGyfw|svi@u?~kHIZ7|m|QMs8JQ6IQDRm}
z7uMvQrY5A6`tX60B2oedvuYxrBC4F+NUdcj^BcBzS=N%Ek(>dIEPQGfh>0x!83*y)
zj1`G)iksr`kIwD(?sflZXm`j#j5y<xa$s<Cws^k8S-oc+<Ou>7NRhF+hIY;!bdAD$
zI6SiJeGzyT*t@@fu{=Co8J;fApEw*9=24*gfQ`I{|DiNTX{~snb`P~58gQtrj&uwL
zhNG466E1@dPUl>+#yQ)k6tsN7-GeQxD`b`K#`b2FWw+s{c2B02+)`%Q4iGt?O{Hbm
zFn>XDo^-(<Y(Jfr$+;6qH|2CRPOCOE*kI&RYqBlWyG;i{I_S)<b&}EKWPRMVGZ}=D
z5Pa$+5ED595__!v^H%hn)jMRJ9k&LC9X}uSfyIF^!WrKD(21Zx#QZtwgemEUQ$eR;
zodw`-qh+wkGi-P=$Y692IB?EjTO7CH>^sd`KvA{4MfQiC9L}QE$y>vT#Qr~RI-|3H
zK$7RF=LRfpYe_<F97O|=sTc4Ermx;<3v7dH0f2^g{jWJ-yAND2;0>=K=mPYo4N(_&
zXt2FeB(`nut#EN_^&hq*uZyjgG<;h#TesTw$Gvrb{x(Vet&#zaI%cit0o|{2x=+W?
z!6q*ob}@o_5F*OE5NB)#V#oH@V#4OD@ny3vO3>(d#4S#4tYr!PqZ>eRflx(+dZx`~
zR0<~oWl&5Z)Oxlc=OlGKqdjzEAs!+$ZIH8c$g36}P)H;6Xl=gQbHkAMsZfBDARS{V
z01^rcr~kH3BU8$xHI;^06&UNZ8kfFP*duX}p&_YWgHr;i3ymMRuX)D`gv`04aG!%%
zZrmlC{GmU%HU0SJS2v%ApY;B5;Ol{%%zjU~Z>G{WQ}$o2_^(nd96ua;cX#<k_E~oS
z>NiW}vAN3F+~!>ISH%Q<%7KxCz{t^Pyq$F%>MqKq`AW}NITWjeV&<LW!FQf~w)5%U
z)ynAQ^5B)q;1y8MU3W=Y=-XsBrQ)0wffag0-~!p4wK}>U8($fv&&nO6m5xyd;o$Yd
zk@LH;7ZcAW_O$)`<&kTZk!!^c3cTX=?*!D<UmD#BR{AH);Y1~zFh4r%jF!4fdZllo
z+<BqWdBMDU9Et7u_CNYZuN95$4Svhu?wS7}Am+yuxcd&+Xcd|oE(8W`K+`4YY$Jdq
z4#2wx+!AE`C%n!{3=QrK<mx|T9mB^-EakY6&Mn-AJjmyg>3l)8&!iPdX4#aMQF5va
z=K?9@l2r&+FDBDOmLM<wwvb7$+TJf@@<34moK;!V0L~$i)?rvBQTPLB_SHGbsiyY(
zWCDiC1xnrnVv96A!VkfrPBU%Ik*WYs1;`bUrJwo>Ab{&U>g+P-kHS%FV8V(FK<>0U
zd#tXg6Z8)94gzA%Z?2TtJ}1l!`K(aXVW2*Onu8j~R?BR^<E!Sys(GnN>!o4d6X0y4
zW`xrr3$)>hw`ClR-!?92O~|Bsb%7of;Cm}Bh+5MD>l~mnqw_%7r>xX=;kk?qpVqYA
z6}G-`UDSONFY$l%J_p{*1|QSQAUv%r!zkxJho+Fqu6usRe2&&JMxP@LkAwiDgavXD
z6cg%WHcmcbDHz#!{dU}Ahb4I_Rmf_|rL3}^RJDz)Y>TQwwB)M1p~g8k^xb2#StT#$
zY?huQ+^-62`MCeoU#3o*Z}<lPfTVKihqhlzX{n^D6i8ZbT2at-9k@1V^!7z!uu7HU
zSRIa_t~!#K&ZpAV`@t-X-=%^_KumPpdA2maqnA4`RXQ)3cMn^;$C07ZqrGbf(|4?Q
zVtYL_GN)){PMIHn$0J{;*mDqi58e_b)pzfcvN%-{rvMqaU*i4yheBXWEMC}&d^7sb
z%Ll@}vT&~=+=GV0P=Tsgm453^0sU?KW|`*p$;nA`x24S^&TrmU!?4L$zP?)t=2e2d
zb<z9k3I-bI7HEOH@<QBO%_lcOu$n_CTA!XBq4shk+Kmy{oZkyK))c9bm48C!V3ej`
z^)V3G5ylo0P4}^o-*g{`6>ztM8YF^d@rT&D@qxnxal}EbWT+Pd;I@+Y(a-Q5Y>kaO
TOqi-_CGYyN-$8E@cftP!;c@}m


From 89fc4880de0b980d097976f1f1ea186cb5fdfbad Mon Sep 17 00:00:00 2001
From: Jon Clucas <jon.clucas@childmind.org>
Date: Tue, 27 Aug 2024 12:33:49 -0400
Subject: [PATCH 15/29] :bug: Make `directory = paths` if `paths` is an empty
 dir

---
 src/cpac_regression_dashboard/utils/parse_yaml.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/cpac_regression_dashboard/utils/parse_yaml.py b/src/cpac_regression_dashboard/utils/parse_yaml.py
index bf93449..2593f30 100644
--- a/src/cpac_regression_dashboard/utils/parse_yaml.py
+++ b/src/cpac_regression_dashboard/utils/parse_yaml.py
@@ -13,9 +13,8 @@
 
 def get_dir(paths: str) -> Optional[str]:
     """Get the full path to a ``pipeline_*`` directory."""
-    if not paths:
-        directory = None
-    else:
+    directory = paths
+    if directory:
         for root, dirs, files in os.walk(paths):
             for _dir in dirs:
                 if "pipeline_" in _dir:

From 7447bd6a542e15530b4f32448a8d32fb48bde987 Mon Sep 17 00:00:00 2001
From: Jon Clucas <jon.clucas@childmind.org>
Date: Tue, 27 Aug 2024 13:40:44 -0400
Subject: [PATCH 16/29] :goal_net: Coerce `n_cpus` to int

---
 .../utils/parse_yaml.py                       | 33 ++++++++++---------
 1 file changed, 17 insertions(+), 16 deletions(-)

diff --git a/src/cpac_regression_dashboard/utils/parse_yaml.py b/src/cpac_regression_dashboard/utils/parse_yaml.py
index 2593f30..2b97856 100644
--- a/src/cpac_regression_dashboard/utils/parse_yaml.py
+++ b/src/cpac_regression_dashboard/utils/parse_yaml.py
@@ -1,14 +1,12 @@
 """From a pair of CPAC output directories, write a YAML file for regression."""
 import os
 from pathlib import Path
-from typing import Optional, Union
+from typing import cast, Optional
 
 import yaml
 
-_PIPELINE_DICT = dict[Optional[str], dict[str, Optional[Union[str, int]]]]
-_FULL_YAML_DICT = dict[
-    str, Union[dict[str, Union[bool, int, Optional[str]]], _PIPELINE_DICT]
-]
+_PIPELINE_DICT = dict[Optional[str], dict[str, Optional[int | str]]]
+_FULL_YAML_DICT = dict[str, dict[str, bool | int | Optional[str]] | _PIPELINE_DICT]
 
 
 def get_dir(paths: str) -> Optional[str]:
@@ -72,20 +70,23 @@ def parse_yaml(directory: str, pipeline_name: str) -> _PIPELINE_DICT:
 def write_yaml(
     pipeline_1: _PIPELINE_DICT,
     pipeline_2: _PIPELINE_DICT,
-    correlations_dir: Optional[str] = None,
-    run_name: Optional[str] = None,
-    n_cpus: Optional[int] = None,
+    correlations_dir: str,
+    run_name: str,
+    n_cpus: int = 1,
 ) -> _FULL_YAML_DICT:
     """Combine settings and both pipelines into a single dictionary."""
     yaml_dict: _FULL_YAML_DICT = {}
-    yaml_dict["settings"] = {
-        "n_cpus": n_cpus,
-        "correlations_dir": correlations_dir,
-        "run_name": run_name,
-        "s3_creds": None,
-        "quick": False,
-        "verbose": False,
-    }
+    yaml_dict["settings"] = cast(
+        dict[str, bool | int | Optional[str]] | _PIPELINE_DICT,
+        {
+            "n_cpus": int(n_cpus),
+            "correlations_dir": correlations_dir,
+            "run_name": run_name,
+            "s3_creds": None,
+            "quick": False,
+            "verbose": False,
+        },
+    )
 
     yaml_dict["pipelines"] = {**pipeline_1, **pipeline_2}
 

From 8b71ac982daebcc11f9318dec6232c4959992f8d Mon Sep 17 00:00:00 2001
From: Jon Clucas <jon.clucas@childmind.org>
Date: Tue, 27 Aug 2024 14:41:45 -0400
Subject: [PATCH 17/29] :packaging: Fix corrleation script target

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 15a8fa3..0259653 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -26,7 +26,7 @@ pytest-cov = "^4.1.0"
 ruff = "^0.1.7"
 
 [tool.poetry.scripts]
-cpac_regsuite_correlate = 'cpac_regression_dashboard.cpac_correlations:main'
+cpac_regsuite_correlate = 'cpac_regression_dashboard.calculate_correlations:main'
 cpac_regsuite_create_yaml = 'cpac_regression_dashboard.create_yml:main'
 cpac_regsuite_create_yml = 'cpac_regression_dashboard.create_yml:main'
 cpac_regsuite_generate_comment = 'cpac_regression_dashboard.generate_comment:main'

From c4d6fed9ebbafc1bbf04d9f676f7e9e23f395e75 Mon Sep 17 00:00:00 2001
From: Jon Clucas <jon.clucas@childmind.org>
Date: Wed, 28 Aug 2024 16:01:45 -0400
Subject: [PATCH 18/29] :necktie: Make JSON an Array

---
 src/cpac_regression_dashboard/utils/html_script.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/cpac_regression_dashboard/utils/html_script.py b/src/cpac_regression_dashboard/utils/html_script.py
index 99d60a6..8442e4e 100644
--- a/src/cpac_regression_dashboard/utils/html_script.py
+++ b/src/cpac_regression_dashboard/utils/html_script.py
@@ -1,4 +1,4 @@
-def dataset(name, data_source, value) -> str:
+def dataset(name: str, data_source: str, value: float | int | str) -> str:
     return f"""
         {{
             "rowid": "{name}",
@@ -8,13 +8,14 @@ def dataset(name, data_source, value) -> str:
             """
 
 
-def body(all_keys, data_source):
-    data_body = ""
+def body(all_keys: list[str], data_source: str) -> str:
+    data_body: str = "["
     for key in all_keys:
         name_value = key.split(": ")
         name = name_value[0]
         value = name_value[1]
         data_body += dataset(name, data_source, value)
+    data_body += "]"
     return data_body
 
 
From 2fe78c3c0e63179cf9ae3f1bf797c3a4ff3ed70a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Theodore=20=F0=9F=90=88=F0=9F=A4=96?=
 <CMI_CPAC_Support@childmind.org>
Date: Fri, 20 Sep 2024 20:33:12 -0400
Subject: [PATCH 19/29] :necktie: Make correlations D3-readble JSON

---
 src/cpac_regression_dashboard/calculate_correlations.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/cpac_regression_dashboard/calculate_correlations.py b/src/cpac_regression_dashboard/calculate_correlations.py
index 2477282..ec21582 100644
--- a/src/cpac_regression_dashboard/calculate_correlations.py
+++ b/src/cpac_regression_dashboard/calculate_correlations.py
@@ -1,15 +1,18 @@
 #!/usr/bin/env python
 """Calculate correlations and write them to D3-friendly file."""
+import json
+
 from cpac_correlations import cpac_correlations
 
 from .utils.html_script import body
 
 
 def main() -> None:  # noqa: D103
+    """Gather correlation coefficients and write them to D3-readable JSON."""
     all_keys, data_source, branch = cpac_correlations()
     html_body = body(all_keys, data_source)
     with open(f"{data_source}_{branch}.json", "w", encoding="utf-8") as file:
-        file.write(html_body)
+        file.write(json.dumps(json.loads(f"[{html_body.strip().strip(',')}]")))
 
 
 main.__doc__ = __doc__

From de1ba8f68044f5cbc4af7689416fe86962d6c871 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Theodore=20=F0=9F=90=88=F0=9F=A4=96?=
 <CMI_CPAC_Support@childmind.org>
Date: Wed, 25 Sep 2024 13:37:22 -0400
Subject: [PATCH 20/29] :art: Clean up JSON format

---
 src/cpac_regression_dashboard/utils/html_script.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/cpac_regression_dashboard/utils/html_script.py b/src/cpac_regression_dashboard/utils/html_script.py
index 8442e4e..020c090 100644
--- a/src/cpac_regression_dashboard/utils/html_script.py
+++ b/src/cpac_regression_dashboard/utils/html_script.py
@@ -1,3 +1,6 @@
+import json
+
+
 def dataset(name: str, data_source: str, value: float | int | str) -> str:
     return f"""
         {{
@@ -15,8 +18,9 @@ def body(all_keys: list[str], data_source: str) -> str:
         name = name_value[0]
         value = name_value[1]
         data_body += dataset(name, data_source, value)
+    data_body = data_body.strip()
     data_body += "]"
-    return data_body
+    return json.dumps(json.loads(data_body))
 
 
 def write_html(data_body) -> str:

From c92a7281583722f0c51c98cbcb8d5e5fab23d43e Mon Sep 17 00:00:00 2001
From: Jon Clucas <jon.clucas@childmind.org>
Date: Thu, 26 Sep 2024 13:58:43 -0400
Subject: [PATCH 21/29] :goal_net: Specify AssertionErrors

---
 src/cpac_regression_dashboard/utils/parse_yaml.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/cpac_regression_dashboard/utils/parse_yaml.py b/src/cpac_regression_dashboard/utils/parse_yaml.py
index 2b97856..8d50688 100644
--- a/src/cpac_regression_dashboard/utils/parse_yaml.py
+++ b/src/cpac_regression_dashboard/utils/parse_yaml.py
@@ -49,7 +49,7 @@ def parse_yaml(directory: str, pipeline_name: str) -> _PIPELINE_DICT:
             paths[f"{subdir}_dir"] = os.path.join(directory, subdir)
         else:
             paths[f"{subdir}_dir"] = None
-    assert isinstance(paths["log_dir"], str)
+    assert isinstance(paths["log_dir"], str), f"log_dir: {paths['log_dir']}"
     log_dir: Optional[str] = get_dir(paths["log_dir"])
 
     if log_dir is not None:
@@ -57,9 +57,9 @@ def parse_yaml(directory: str, pipeline_name: str) -> _PIPELINE_DICT:
             for file in files:
                 if file.endswith("Z.yml"):
                     pipeline_config = os.path.join(root, file)
-    assert isinstance(paths["working_dir"], str)
+    assert isinstance(paths["working_dir"], str), f"working_dir: {paths['working_dir']}"
     working_dir = get_dir(paths["working_dir"])
-    assert isinstance(paths["output_dir"], str)
+    assert isinstance(paths["output_dir"], str), f"output_dir: {paths['output_dir']}"
     output_dir = get_dir(paths["output_dir"])
 
     return write_pipeline_yaml(

From 17eb0a8f3337f7c303c389873c6ffb2c75251745 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Theodore=20=F0=9F=90=88=F0=9F=A4=96?=
 <CMI_CPAC_Support@childmind.org>
Date: Fri, 27 Sep 2024 11:07:23 -0400
Subject: [PATCH 22/29] :heavy_plus_sign:/:heavy_minus_sign: Replace
 `pyppeteer` with `playwright`

---
 pyproject.toml                                |  2 +-
 .../generate_comment.py                       | 42 ++++++++++---------
 2 files changed, 24 insertions(+), 20 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 0259653..4e07978 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -15,7 +15,7 @@ python = ">=3.9"
 cairosvg = "*"
 gitpython = "*"
 PyGithub = "*"
-pyppeteer = "*"
+playwright = "*"
 cpac-correlations = {git = "https://github.com/FCP-INDI/CPAC_regtest_pack.git", subdirectory = "cpac_correlations", branch = "correlate_from_python"}
 
 [tool.poetry.group.dev.dependencies]
diff --git a/src/cpac_regression_dashboard/generate_comment.py b/src/cpac_regression_dashboard/generate_comment.py
index 3ddffa2..933633a 100644
--- a/src/cpac_regression_dashboard/generate_comment.py
+++ b/src/cpac_regression_dashboard/generate_comment.py
@@ -13,7 +13,7 @@
 from cairosvg import svg2png
 from git import Repo
 from github import Github
-from pyppeteer import launch
+from playwright.async_api import async_playwright
 
 from ._version import __version__
 
@@ -150,26 +150,30 @@ async def generate_comment(path: Path) -> str:
 async def get_heatmap() -> str:
     """Get a heatmap image."""
     url = f"https://{_ENV.testing_owner}.github.io/dashboard/?data_sha={_ENV.sha}"
-    browser = await launch()
-    page = await browser.newPage()
-    await page.goto(url, waitUntil="networkidle0")
-    svg_string = await page.evaluate(
-        """() => {
-        let svg = document.querySelector('svg');
-        return svg ? svg.outerHTML : null;
-    }"""
-    )
-    if svg_string is not None:
-        _heatmap = Heatmap("heatmap", svg_string)
-        add_heatmap_to_branch(_heatmap)
-        heatmap = _raw_image_path(
-            _ENV.testing_owner, _ENV.repo, _ENV.sha, Path(f"{_heatmap.filename}.png")
+    async with async_playwright() as p:
+        browser = await p.chromium.launch(headless=True)
+        page = await browser.new_page()
+        await page.goto(url, wait_until="networkidle")
+        svg_string = await page.evaluate(
+            """() => {
+            let svg = document.querySelector('svg');
+            return svg ? svg.outerHTML : null;
+        }"""
         )
-        heatmap = f"[![heatmap]({heatmap})]({url})"
-    else:
-        heatmap = ""
+        if svg_string is not None:
+            _heatmap = Heatmap("heatmap", svg_string)
+            add_heatmap_to_branch(_heatmap)
+            heatmap = _raw_image_path(
+                _ENV.testing_owner,
+                _ENV.repo,
+                _ENV.sha,
+                Path(f"{_heatmap.filename}.png"),
+            )
+            heatmap = f"[![heatmap]({heatmap})]({url})"
+        else:
+            heatmap = ""
 
-    await browser.close()
+        await browser.close()
     return heatmap
 
 
From 814e02461ee229ca12317816e72972cf63403f75 Mon Sep 17 00:00:00 2001
From: Jon Clucas <jon.clucas@childmind.org>
Date: Fri, 27 Sep 2024 14:52:57 -0400
Subject: [PATCH 23/29] :goal_net: Warn if playwright + chromium fails

---
 .../generate_comment.py                       | 25 ++++++++++++-------
 1 file changed, 16 insertions(+), 9 deletions(-)

diff --git a/src/cpac_regression_dashboard/generate_comment.py b/src/cpac_regression_dashboard/generate_comment.py
index 933633a..ba004f1 100644
--- a/src/cpac_regression_dashboard/generate_comment.py
+++ b/src/cpac_regression_dashboard/generate_comment.py
@@ -151,15 +151,22 @@ async def get_heatmap() -> str:
     """Get a heatmap image."""
     url = f"https://{_ENV.testing_owner}.github.io/dashboard/?data_sha={_ENV.sha}"
     async with async_playwright() as p:
-        browser = await p.chromium.launch(headless=True)
-        page = await browser.new_page()
-        await page.goto(url, wait_until="networkidle")
-        svg_string = await page.evaluate(
-            """() => {
-            let svg = document.querySelector('svg');
-            return svg ? svg.outerHTML : null;
-        }"""
-        )
+        try:
+            browser = await p.chromium.launch(headless=True)
+            page = await browser.new_page()
+            await page.goto(url, wait_until="networkidle")
+            svg_string = await page.evaluate(
+                """() => {
+    let svg = document.querySelector('svg');
+    return svg ? svg.outerHTML : null;
+}"""
+            )
+        except Exception as exception:
+            from warnings import warn
+
+            warn(
+                f"{exception}\n\nAre playwright and chromium installed?", RuntimeWarning
+            )
         if svg_string is not None:
             _heatmap = Heatmap("heatmap", svg_string)
             add_heatmap_to_branch(_heatmap)

From 9eaaf35c68798ae3f3b69d7861fd27017abf8d8f Mon Sep 17 00:00:00 2001
From: Jon Clucas <jon.clucas@childmind.org>
Date: Fri, 18 Oct 2024 11:21:47 -0400
Subject: [PATCH 24/29] :memo: Document `$PLAYWRIGHT_BROWSERS_PATH`

---
 pyproject.toml                                    | 1 +
 src/cpac_regression_dashboard/generate_comment.py | 1 +
 2 files changed, 2 insertions(+)

diff --git a/pyproject.toml b/pyproject.toml
index 4e07978..3f093e6 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -30,6 +30,7 @@ cpac_regsuite_correlate = 'cpac_regression_dashboard.calculate_correlations:main
 cpac_regsuite_create_yaml = 'cpac_regression_dashboard.create_yml:main'
 cpac_regsuite_create_yml = 'cpac_regression_dashboard.create_yml:main'
 cpac_regsuite_generate_comment = 'cpac_regression_dashboard.generate_comment:main'
+"cpac-regsuite-generate-comment" = 'cpac_regression_dashboard.generate_comment:main'
 
 [tool.poetry.urls]
 "Source Code" = "https://github.com/FCP-INDI/C-PAC_regression_dashboard"
diff --git a/src/cpac_regression_dashboard/generate_comment.py b/src/cpac_regression_dashboard/generate_comment.py
index ba004f1..7643719 100644
--- a/src/cpac_regression_dashboard/generate_comment.py
+++ b/src/cpac_regression_dashboard/generate_comment.py
@@ -199,6 +199,7 @@ def main() -> None:
                 "comments and pull requests."
             )
             print("OWNER: The owner of the repository.")
+            print("PLAYWRIGHT_BROWSERS_PATH: The path for Playwright browsers.")
             print("REPO: The name of the repository.")
             print("SHA: The SHA of the commit.")
             print("TESTING_OWNER: The owner of the testing repository.")

From 646a47e6bde86a1664b725490bb701f255092a12 Mon Sep 17 00:00:00 2001
From: Jon Clucas <jon.clucas@childmind.org>
Date: Fri, 18 Oct 2024 16:19:39 -0400
Subject: [PATCH 25/29] :necktie: Set personal access token in `git.Repo`

---
 src/cpac_regression_dashboard/generate_comment.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/src/cpac_regression_dashboard/generate_comment.py b/src/cpac_regression_dashboard/generate_comment.py
index 7643719..e890c2e 100644
--- a/src/cpac_regression_dashboard/generate_comment.py
+++ b/src/cpac_regression_dashboard/generate_comment.py
@@ -58,14 +58,18 @@ def add_heatmap_to_branch(file: Heatmap) -> None:
     -------
     None
     """
-    personal_access_token = os.environ.get("GITHUB_TOKEN")
-    g = Github(personal_access_token)
+    g = Github(_ENV.github_token)
     repo = g.get_repo(f"{_ENV.testing_owner}/regtest-runlogs")
     branch_name = f"{_ENV.repo}_{_ENV.sha}"
     with tempfile.TemporaryDirectory() as _temp_dir:
         temp_dir = Path(_temp_dir)
         local_repo = Repo.clone_from(
-            repo.clone_url, temp_dir, branch=branch_name, depth=1
+            repo.clone_url.replace(
+                "https://", f"https://${_ENV.github_token}:x-oauth-basic@"
+            ),
+            temp_dir,
+            branch=branch_name,
+            depth=1,
         )
         svg_path = temp_dir / f"{file.filename}.svg"
         png_path = temp_dir / f"{file.filename}.png"

From e9443814a386e4e3cf43e492a982a03b7c134677 Mon Sep 17 00:00:00 2001
From: Jon Clucas <jon.clucas@childmind.org>
Date: Mon, 21 Oct 2024 12:35:35 -0400
Subject: [PATCH 26/29] :necktie: Adjust git fetch-pull-(force)push flow

---
 src/cpac_regression_dashboard/generate_comment.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/src/cpac_regression_dashboard/generate_comment.py b/src/cpac_regression_dashboard/generate_comment.py
index e890c2e..29d2abe 100644
--- a/src/cpac_regression_dashboard/generate_comment.py
+++ b/src/cpac_regression_dashboard/generate_comment.py
@@ -12,6 +12,7 @@
 
 from cairosvg import svg2png
 from git import Repo
+from git.exc import GitCommandError
 from github import Github
 from playwright.async_api import async_playwright
 
@@ -71,6 +72,9 @@ def add_heatmap_to_branch(file: Heatmap) -> None:
             branch=branch_name,
             depth=1,
         )
+        # make sure branch is up to date
+        local_repo.remotes.origin.fetch("+refs/heads/*:refs/remotes/origin/*")
+        local_repo.remotes.origin.pull(branch_name)
         svg_path = temp_dir / f"{file.filename}.svg"
         png_path = temp_dir / f"{file.filename}.png"
         with open(svg_path, "w") as _f:
@@ -78,7 +82,10 @@ def add_heatmap_to_branch(file: Heatmap) -> None:
         svg2png(background_color="white", url=str(svg_path), write_to=str(png_path))
         local_repo.index.add([png_path])
         local_repo.index.commit(":loud_sound: Add heatmap image")
-        local_repo.remotes.origin.push(branch_name)
+        try:
+            local_repo.remotes.origin.push(branch_name)
+        except GitCommandError:
+            local_repo.remotes.origin.push(branch_name, force=True)
 
 
 def gather_images(path: Path) -> Generator[Path, None, None]:

From 0cc9df65bfa2d8d24d29954e30f04d03aa5c67d9 Mon Sep 17 00:00:00 2001
From: Jon Clucas <jon.clucas@childmind.org>
Date: Tue, 22 Oct 2024 22:11:11 -0400
Subject: [PATCH 27/29] :construction_worker: Update Chromium before building
 heatmap + repost comment on any open PRs

---
 .pre-commit-config.yaml                       |  1 +
 .../generate_comment.py                       | 36 +++++++++++++++++--
 2 files changed, 35 insertions(+), 2 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 267a008..4d6b15e 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -19,6 +19,7 @@ repos:
   - id: mypy
     args: [--ignore-missing-imports]
     additional_dependencies:
+    - types-requests
     - types-toml
     - types-PyYAML
 - repo: https://github.com/macisamuele/language-formatters-pre-commit-hooks
diff --git a/src/cpac_regression_dashboard/generate_comment.py b/src/cpac_regression_dashboard/generate_comment.py
index 29d2abe..8e14f1a 100644
--- a/src/cpac_regression_dashboard/generate_comment.py
+++ b/src/cpac_regression_dashboard/generate_comment.py
@@ -6,15 +6,18 @@
 from importlib.metadata import metadata
 import os
 from pathlib import Path
+import subprocess
 import sys
 import tempfile
-from typing import Generator
+from typing import Generator, Optional
 
 from cairosvg import svg2png
 from git import Repo
 from git.exc import GitCommandError
 from github import Github
+from github.Repository import Repository
 from playwright.async_api import async_playwright
+import requests
 
 from ._version import __version__
 
@@ -160,6 +163,9 @@ async def generate_comment(path: Path) -> str:
 
 async def get_heatmap() -> str:
     """Get a heatmap image."""
+    subprocess.run(
+        "playwright install chromium".split(" "), check=False
+    )  # update chromium
     url = f"https://{_ENV.testing_owner}.github.io/dashboard/?data_sha={_ENV.sha}"
     async with async_playwright() as p:
         try:
@@ -224,9 +230,35 @@ def main() -> None:
     asyncio.run(post_comment(path))
 
 
+def repost_comment_on_pull_request(
+    repo: Repository, comment: str, pr: dict[str, str]
+) -> None:
+    """Repost a commit comment on a PR containing that commit."""
+    pr_number = pr["number"]
+    issue = repo.get_issue(number=pr_number)
+    issue.create_comment(comment)
+
+
+def repost_comment_on_pull_requests(repo: Repository, comment: str) -> None:
+    """Repost a commit comment on all PR containing that commit."""
+    pr_url: str = f"https://api.github.com/repos/{_ENV.owner}/{_ENV.repo}/commits/{_ENV.sha}/pulls"
+    headers: dict[str, str] = {
+        "Authorization": f"Bearer {_ENV.github_token}",
+        "Accept": "application/vnd.github.v3+json",
+    }
+
+    response: requests.Response = requests.get(pr_url, headers=headers)
+    success_response = 200
+    if response.status_code == success_response:
+        pull_requests: Optional[list[dict]] = response.json()
+        if pull_requests:
+            for pr in pull_requests:
+                repost_comment_on_pull_request(repo, comment, pr)
+
+
 async def post_comment(path: Path) -> None:
     """Post a comment on a GitHub commit and relevant PR."""
-    personal_access_token = os.environ.get("GITHUB_TOKEN")
+    personal_access_token = _ENV.github_token
     g = Github(personal_access_token)
     repo = g.get_repo(f"{_ENV.owner}/{_ENV.repo}")
     commit = repo.get_commit(_ENV.sha)

From a3e98433ce2f27ee28d68a545035c61204ca3344 Mon Sep 17 00:00:00 2001
From: Jon Clucas <jon.clucas@childmind.org>
Date: Thu, 7 Nov 2024 21:54:52 -0500
Subject: [PATCH 28/29] :bookmark: Set canonincal dependencies + initial
 version and CHANGELOG

---
 CHANGELOG.md                                  | 21 +++++++++++++++++++
 pyproject.toml                                |  8 +++----
 .../generate_comment.py                       |  2 +-
 3 files changed, 25 insertions(+), 6 deletions(-)
 create mode 100644 CHANGELOG.md

diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 0000000..cebba76
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,21 @@
+<!-- Copyright (C) 2022-2024  C-PAC Developers
+
+This file is part of C-PAC.
+
+C-PAC is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
+
+C-PAC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public License along with C-PAC. If not, see <https://www.gnu.org/licenses/>. -->
+# Changelog
+
+All notable changes to this project will be documented in this file.
+
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
+and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+
+## [1.0.0]
+
+Initial release. See [README](https://github.com/FCP-INDI/C-PAC_regression_dashboard/blob/v1.0.0/README.md).
+
+[1.0.0]: https://github.com/FCP-INDI/C-PAC_regression_dashboard/releases/tag/v1.0.0
diff --git a/pyproject.toml b/pyproject.toml
index 3f093e6..ac04376 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "cpac_regression_dashboard"
-version = "1.0.0.dev1"
+version = "1.0.0"
 description = "Generate a dashboard for C-PAC regression tests"
 authors = [
   "Amy Gutierrez <58920810+amygutierrez@users.noreply.github.com>",
@@ -9,6 +9,7 @@ authors = [
 license = "LGPL-2.1"
 readme = "README.md"
 packages = [{from = "src", include = "cpac_regression_dashboard"}]
+repository = "https://github.com/FCP-INDI/C-PAC_regression_dashboard"
 
 [tool.poetry.dependencies]
 python = ">=3.9"
@@ -16,7 +17,7 @@ cairosvg = "*"
 gitpython = "*"
 PyGithub = "*"
 playwright = "*"
-cpac-correlations = {git = "https://github.com/FCP-INDI/CPAC_regtest_pack.git", subdirectory = "cpac_correlations", branch = "correlate_from_python"}
+cpac-correlations = {git = "https://github.com/FCP-INDI/CPAC_regtest_pack.git", subdirectory = "cpac_correlations", branch = "main"}
 
 [tool.poetry.group.dev.dependencies]
 pytest = "^7.4.3"
@@ -32,9 +33,6 @@ cpac_regsuite_create_yml = 'cpac_regression_dashboard.create_yml:main'
 cpac_regsuite_generate_comment = 'cpac_regression_dashboard.generate_comment:main'
 "cpac-regsuite-generate-comment" = 'cpac_regression_dashboard.generate_comment:main'
 
-[tool.poetry.urls]
-"Source Code" = "https://github.com/FCP-INDI/C-PAC_regression_dashboard"
-
 [tool.pytest.ini_options]
 pythonpath = [
   "src"
diff --git a/src/cpac_regression_dashboard/generate_comment.py b/src/cpac_regression_dashboard/generate_comment.py
index 8e14f1a..9010daf 100644
--- a/src/cpac_regression_dashboard/generate_comment.py
+++ b/src/cpac_regression_dashboard/generate_comment.py
@@ -143,7 +143,7 @@ async def generate_comment(path: Path) -> str:
     project_urls = metadata(__package__).get_all("Project-URL", [])
     source_url = None
     for _url in project_urls:
-        if _url.startswith("Source Code, "):
+        if _url.startswith("Repository, "):
             source_url = _url.split(",")[1].strip()
             break
     if source_url is None:

From 27c25873378e25baa7ee37cd8d9c9f78c3a298ac Mon Sep 17 00:00:00 2001
From: Jon Cluce <jon.clucas@childmind.org>
Date: Mon, 11 Nov 2024 15:11:12 -0500
Subject: [PATCH 29/29] :goal_net: Handle missing correlations

---
 src/cpac_regression_dashboard/generate_comment.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/cpac_regression_dashboard/generate_comment.py b/src/cpac_regression_dashboard/generate_comment.py
index 9010daf..86f4e60 100644
--- a/src/cpac_regression_dashboard/generate_comment.py
+++ b/src/cpac_regression_dashboard/generate_comment.py
@@ -167,6 +167,7 @@ async def get_heatmap() -> str:
         "playwright install chromium".split(" "), check=False
     )  # update chromium
     url = f"https://{_ENV.testing_owner}.github.io/dashboard/?data_sha={_ENV.sha}"
+    svg_string: Optional[str] = None
     async with async_playwright() as p:
         try:
             browser = await p.chromium.launch(headless=True)
@@ -184,7 +185,7 @@ async def get_heatmap() -> str:
             warn(
                 f"{exception}\n\nAre playwright and chromium installed?", RuntimeWarning
             )
-        if svg_string is not None:
+        if svg_string:
             _heatmap = Heatmap("heatmap", svg_string)
             add_heatmap_to_branch(_heatmap)
             heatmap = _raw_image_path(