diff --git a/scripts/benchmarks/benchmark.py b/scripts/benchmarks/benchmark.py
index 74f6b3b8..8b95e8fd 100644
--- a/scripts/benchmarks/benchmark.py
+++ b/scripts/benchmarks/benchmark.py
@@ -11,13 +11,13 @@
 
 # Third Party
 from tqdm import tqdm
-from transformers import AutoConfig, HfArgumentParser, TrainingArguments
-from transformers import AutoTokenizer
+from transformers import AutoConfig, AutoTokenizer, HfArgumentParser, TrainingArguments
 import datasets
 import pandas as pd
 import torch
 import yaml
 
+# First Party
 from scripts.benchmarks.data_processing import build_data_formatting_func
 
 """
@@ -84,8 +84,9 @@
 RESULT_FIELD_PEAK_ALLOCATED_GPU_MEM = "mem_peak_torch_mem_alloc_in_bytes"
 ERROR_MESSAGES = "error_messages"
 
-SCENARIOS_STANZA_SCN = 'scenarios'
-SCENARIOS_STANZA_DATA = 'data_processing' # optional
+SCENARIOS_STANZA_SCN = "scenarios"
+SCENARIOS_STANZA_DATA = "data_processing"  # optional
+
 
 def extract_gpu_memory_metrics(output_metrics) -> Tuple[float]:
     """
@@ -155,36 +156,36 @@ class BenchmarkDataset:
     def __init__(
         self,
         data_save_path: str,
-        dataset_name: str = 'yahma/alpaca-cleaned',
+        dataset_name: str = "yahma/alpaca-cleaned",
         dataset_split: str = "train",
-        formatting: str = 'instruct',
+        formatting: str = "instruct",
         tokenize: bool = False,
-        input_field: str = 'input',
-        dataset_text_field: str = 'output',
+        input_field: str = "input",
+        dataset_text_field: str = "output",
         chat_template: str = None,
     ) -> None:
 
-        self.dataset_split = datasets.load_dataset(
-            dataset_name, split=dataset_split
-        )
+        self.dataset_split = datasets.load_dataset(dataset_name, split=dataset_split)
 
         self.kwargs = {
-            'formatting': formatting,
-            'tokenize': tokenize,
-            'input_field': input_field,
-            'dataset_text_field': dataset_text_field,
-            'chat_template' : chat_template
+            "formatting": formatting,
+            "tokenize": tokenize,
+            "input_field": input_field,
+            "dataset_text_field": dataset_text_field,
+            "chat_template": chat_template,
         }
-        self.training_paths = {} # cache to store the training paths
+        self.training_paths = {}  # cache to store the training paths
         self.data_save_path = data_save_path
 
     def prepare_dataset(
-        self, model_name: str, response_template: str = None,
+        self,
+        model_name: str,
+        response_template: str = None,
     ):
         if model_name in self.training_paths:
             return self.training_paths[model_name]
 
-        if self.kwargs['tokenize']:
+        if self.kwargs["tokenize"]:
             tokenizer = AutoTokenizer.from_pretrained(model_name)
 
             # for now, if pad_token_id is None, will just do a replacement
@@ -193,27 +194,28 @@ def prepare_dataset(
 
             # replace some special characters in the model name
             save_path = DATA_JSON_NAME.format(
-                re.sub(r'[/-]', '_', model_name),
+                re.sub(r"[/-]", "_", model_name),
             )
         else:
             tokenizer = None
-            save_path = DATA_JSON_NAME.format('all')
+            save_path = DATA_JSON_NAME.format("all")
 
         # get the full path
         save_path = os.path.join(self.data_save_path, save_path)
 
         # build the formatting func
         format_fn, kwargs = build_data_formatting_func(
-            tokenizer, **self.kwargs, 
+            tokenizer,
+            **self.kwargs,
             features=set(self.dataset_split.features),
             response_template=response_template,
         )
 
-        if 'chat_template' in self.kwargs:
-            print ('*** CHAT TEMPLATE *****')
-            print (self.kwargs['chat_template'])
+        if "chat_template" in self.kwargs:
+            print("*** CHAT TEMPLATE *****")
+            print(self.kwargs["chat_template"])
 
-        print (f"Preparing dataset '{save_path}'")
+        print(f"Preparing dataset '{save_path}'")
 
         # call the map
         ds = self.dataset_split.map(format_fn, **kwargs)
@@ -225,6 +227,7 @@ def prepare_dataset(
         self.training_paths[model_name] = save_path
         return save_path
 
+
 def convert_keypairs_to_map(keypairs: List):
     return {key: val for key, val in zip(keypairs[::2], keypairs[1::2])}
 
@@ -673,12 +676,12 @@ def prepare_arguments(args, benchmark_dataset: BenchmarkDataset):
         for x in products:
             # prepare the dataset
             training_path = benchmark_dataset.prepare_dataset(
-                x['model_name_or_path'],
+                x["model_name_or_path"],
                 (
-                    x[HF_ARG_RESPONSE_TEMPLATE] 
+                    x[HF_ARG_RESPONSE_TEMPLATE]
                     if HF_ARG_RESPONSE_TEMPLATE in x
                     else constants.get(HF_ARG_RESPONSE_TEMPLATE)
-                )
+                ),
             )
             # update
             x[HF_ARG_TRAINING_DATA_PATH] = training_path
@@ -838,7 +841,7 @@ def main(args):
 
     # 1. Prepares a standard BenchmarkDataset
     # -  the preperation of the dataset is deferred to when 'prepare_dataset' is called
-    # -  try to read the data_processing stanza of 
+    # -  try to read the data_processing stanza of
     dataset_processing_args = ConfigUtils.read_yaml(args.scenarios_config_path).get(
         SCENARIOS_STANZA_DATA, {}
     )
diff --git a/scripts/benchmarks/compare_with_reference.py b/scripts/benchmarks/compare_with_reference.py
index 6a66cebd..46b76b80 100644
--- a/scripts/benchmarks/compare_with_reference.py
+++ b/scripts/benchmarks/compare_with_reference.py
@@ -36,6 +36,7 @@
 BENCHMARK_FILENAME = "benchmarks.csv"
 OUTLIERS_FILENAME = "outliers.csv"
 
+
 def plot_chart(ax, x, y, title, xlabel, ylabel):
     ax.scatter(x, y, s=10)
     ax.set_title(title, fontsize=8)
diff --git a/scripts/benchmarks/data_processing.py b/scripts/benchmarks/data_processing.py
index 6d32c3ab..1a860bbe 100644
--- a/scripts/benchmarks/data_processing.py
+++ b/scripts/benchmarks/data_processing.py
@@ -1,34 +1,43 @@
-from trl import DataCollatorForCompletionOnlyLM
+# Standard
+from typing import Callable, Dict, List
+
+# Third Party
 from transformers import PreTrainedTokenizer
-from typing import Dict, Callable, List
+from trl import DataCollatorForCompletionOnlyLM
+
+DEFAULT_FIELDS = ["input_ids", "attention_mask", "labels"]
 
-DEFAULT_FIELDS = [
-    'input_ids', 
-    'attention_mask', 
-    'labels'
-]
 
 def build_data_formatting_func(
     tokenizer: PreTrainedTokenizer = None,
-    formatting: str = 'instruct',
+    formatting: str = "instruct",
     tokenize: bool = False,
-    input_field: str = 'input',
-    dataset_text_field: str = 'output',
-    features: List = None, 
+    input_field: str = "input",
+    dataset_text_field: str = "output",
+    features: List = None,
     response_template: str = None,
     chat_template: str = None,
 ):
     if tokenizer is None or chat_template is None:
         return _build_data_formatting_func_without_chat_template(
-            tokenizer, formatting, tokenize, input_field, dataset_text_field,
-            features, response_template
+            tokenizer,
+            formatting,
+            tokenize,
+            input_field,
+            dataset_text_field,
+            features,
+            response_template,
         )
 
     return _build_data_formatting_func(
-        tokenizer, tokenize, chat_template,
-        dataset_text_field, features, response_template
+        tokenizer,
+        tokenize,
+        chat_template,
+        dataset_text_field,
+        features,
+        response_template,
     )
-    
+
 
 # this one uses the chat template and tokenizer
 def _build_data_formatting_func(
@@ -36,7 +45,7 @@ def _build_data_formatting_func(
     tokenize: bool = False,
     chat_template: str = None,
     dataset_text_field: str = "output",
-    features: List = None, 
+    features: List = None,
     response_template: str = None,
 ):
 
@@ -47,19 +56,18 @@ def _build_data_formatting_func(
         loss_masking = instruction_mask_loss(tokenizer, response_template)
 
     def _format(example):
-        formatted_and_maybe_tokenized = tokenizer.apply_chat_template([example], tokenize=tokenize)
-        key = 'input_ids' if tokenize else dataset_text_field
+        formatted_and_maybe_tokenized = tokenizer.apply_chat_template(
+            [example], tokenize=tokenize
+        )
+        key = "input_ids" if tokenize else dataset_text_field
         if not loss_masking:
             return {key: formatted_and_maybe_tokenized}
         return loss_masking(formatted_and_maybe_tokenized)
 
-    return _format, {
-        'remove_columns': features.difference(
-            set(DEFAULT_FIELDS)
-        )
-    }
+    return _format, {"remove_columns": features.difference(set(DEFAULT_FIELDS))}
+
 
-# ---- NOTE: remove this eventually and move to check templates ---- 
+# ---- NOTE: remove this eventually and move to check templates ----
 PROMPT_DICT = {
     "prompt_input": (
         "Below is an instruction that describes a task, paired with an input that provides further context. "
@@ -76,7 +84,9 @@ def _format(example):
 # combine functions
 # c = combine(a, b) then c(i) = b(a(i))
 FUNC = Callable[[Dict], Dict]
-def combine_functions(*funcs : FUNC) -> FUNC:
+
+
+def combine_functions(*funcs: FUNC) -> FUNC:
     def _combine(x):
         for f in funcs:
             x = f(x)
@@ -84,13 +94,14 @@ def _combine(x):
 
     return _combine
 
+
 def _build_data_formatting_func_without_chat_template(
     tokenizer: PreTrainedTokenizer = None,
-    formatting: str = 'instruct',
+    formatting: str = "instruct",
     tokenize: bool = False,
-    input_field: str = 'input',
-    dataset_text_field: str = 'output',
-    features: List = None, 
+    input_field: str = "input",
+    dataset_text_field: str = "output",
+    features: List = None,
     response_template: str = None,
 ):
     # FIFO
@@ -99,43 +110,31 @@ def _build_data_formatting_func_without_chat_template(
     if features is None:
         features = set()
 
-    if formatting == 'instruct':
+    if formatting == "instruct":
         funcs.append(
             instruction_formatter(
-                input_field=input_field,
-                dataset_text_field=dataset_text_field
+                input_field=input_field, dataset_text_field=dataset_text_field
             )
         )
 
     if tokenize:
-        funcs.append(
-            tokenization(
-                tokenizer,
-                dataset_text_field=dataset_text_field
-            )
-        )
+        funcs.append(tokenization(tokenizer, dataset_text_field=dataset_text_field))
 
-        if formatting == 'instruct' and response_template:
-            funcs.append(
-                instruction_mask_loss(tokenizer, response_template)
-            )
+        if formatting == "instruct" and response_template:
+            funcs.append(instruction_mask_loss(tokenizer, response_template))
 
     if len(funcs) == 0:
-        raise ValueError(
-            "Unable to build a data formatting recipe"
-        )
+        raise ValueError("Unable to build a data formatting recipe")
 
     return combine_functions(*funcs), {
-        'remove_columns': features.union(
+        "remove_columns": features.union(
             set([input_field, dataset_text_field])
-        ).difference(
-            set(DEFAULT_FIELDS)
-        )
+        ).difference(set(DEFAULT_FIELDS))
     }
 
+
 def instruction_formatter(
-    input_field: str = "input", 
-    dataset_text_field: str = "output"
+    input_field: str = "input", dataset_text_field: str = "output"
 ):
     def format_fn(example: Dict):
         prompt_input, prompt_no_input = (
@@ -152,20 +151,20 @@ def format_fn(example: Dict):
 
     return format_fn
 
-def tokenization(
-    tokenizer: PreTrainedTokenizer, 
-    dataset_text_field: str = "output"
-):
+
+def tokenization(tokenizer: PreTrainedTokenizer, dataset_text_field: str = "output"):
     def _tokenize(example):
         text_field = example[dataset_text_field] + tokenizer.eos_token
         return tokenizer(text_field)
 
     return _tokenize
 
-# ---- NOTE: remove this eventually and move to check templates ---- 
+
+# ---- NOTE: remove this eventually and move to check templates ----
+
 
 def instruction_mask_loss(
-    tokenizer: PreTrainedTokenizer, 
+    tokenizer: PreTrainedTokenizer,
     response_template: str,
     take_from_index: int = 2,
 ):
@@ -177,17 +176,21 @@ def instruction_mask_loss(
         response_template, add_special_tokens=False
     )
 
-    # this ignores the first 
+    # this ignores the first
     if len(response_template_ids) > take_from_index:
         response_template_ids = response_template_ids[take_from_index:]
-        print (f"Taking response_ids[{take_from_index}:] from '{len(response_template_ids)}' response tokens")
-    
-    collator = DataCollatorForCompletionOnlyLM(response_template_ids, tokenizer=tokenizer, ignore_index=-100)
+        print(
+            f"Taking response_ids[{take_from_index}:] from '{len(response_template_ids)}' response tokens"
+        )
+
+    collator = DataCollatorForCompletionOnlyLM(
+        response_template_ids, tokenizer=tokenizer, ignore_index=-100
+    )
 
     def collate_example(example):
         # single example
-        collated_example = collator([example], return_tensors = "pt")
+        collated_example = collator([example], return_tensors="pt")
         # flatten the additional dim
-        return {k: v.view(-1) for k,v in collated_example.items()}
+        return {k: v.view(-1) for k, v in collated_example.items()}
 
     return collate_example