From 41b486c0c79514c74a541669cf0e8210beb91449 Mon Sep 17 00:00:00 2001
From: Tapasvi Patel <133996364+tapspatel@users.noreply.github.com>
Date: Wed, 11 Dec 2024 11:07:21 -0600
Subject: [PATCH] #1507: Added ability to gather golden result information and
 consolidate into report as well as store golden and device artifacts
 generated during runtime (#1544)

---
 runtime/include/tt/runtime/detail/debug.h    |  8 +-
 runtime/tools/python/ttrt/common/golden.py   | 77 +++++++++++++++++++-
 runtime/tools/python/ttrt/common/run.py      | 58 ++++++++++++++-
 runtime/tools/python/ttrt/common/util.py     | 19 +++--
 runtime/tools/python/ttrt/runtime/module.cpp |  8 ++
 5 files changed, 155 insertions(+), 15 deletions(-)

diff --git a/runtime/include/tt/runtime/detail/debug.h b/runtime/include/tt/runtime/detail/debug.h
index ed049df2d..829f23660 100644
--- a/runtime/include/tt/runtime/detail/debug.h
+++ b/runtime/include/tt/runtime/detail/debug.h
@@ -63,13 +63,19 @@ struct Hooks {
 #endif
   }
 
+  void unregisterHooks() const {
+#if defined(TT_RUNTIME_DEBUG) && TT_RUNTIME_DEBUG == 1
+    operatorCallback = std::nullopt;
+#endif
+  }
+
 private:
 #if defined(TT_RUNTIME_DEBUG) && TT_RUNTIME_DEBUG == 1
   Hooks(std::optional<std::function<void(Binary, CallbackContext, OpContext)>>
             operatorCallback)
       : operatorCallback(operatorCallback) {}
 
-  std::optional<std::function<void(Binary, CallbackContext, OpContext)>>
+  mutable std::optional<std::function<void(Binary, CallbackContext, OpContext)>>
       operatorCallback;
 #else
   constexpr Hooks() = default;
diff --git a/runtime/tools/python/ttrt/common/golden.py b/runtime/tools/python/ttrt/common/golden.py
index 7bab624f6..2666c1f2e 100644
--- a/runtime/tools/python/ttrt/common/golden.py
+++ b/runtime/tools/python/ttrt/common/golden.py
@@ -16,10 +16,27 @@
 import shutil
 import atexit
 import re
+from functools import partial
 
 from ttrt.common.util import *
 
 
+class GoldenRuntimeConfig:
+    def __init__(
+        self,
+        atol=1e-08,
+        rtol=1e-05,
+        pcc=0.99,
+        artifact_dir="",
+        save_golden_tensors=False,
+    ):
+        self.artifact_dir = artifact_dir
+        self.pcc = pcc
+        self.atol = atol
+        self.rtol = rtol
+        self.save_golden_tensors = save_golden_tensors
+
+
 def get_atol_rtol_pcc(golden, calculated):
     import numpy as np
     import torch
@@ -103,7 +120,9 @@ def get_pcc(golden, calculated):
     )
 
 
-def golden(binary, programContext, opContext):
+def golden_partial_function(
+    golden_runtime_config, golden_results_data, binary, program_context, op_context
+):
     import torch
     import ttrt.runtime
     import ttrt.binary
@@ -111,12 +130,23 @@ def golden(binary, programContext, opContext):
     print("-----------executing golden comparision-----------")
 
     try:
-        loc = ttrt.runtime.get_op_loc_info(opContext)
+        op_debug_str = ttrt.runtime.get_op_debug_str(op_context)
 
+        # find matching golden tensor based on loc in op debug string
+        match = re.search(r"loc\(([^)]+)\)", op_debug_str)
+
+        if not match:
+            print(f"debug_str={op_debug_str}")
+            print("No location found in debug string - skipping golden comparison")
+            return
+
+        loc = match.group(1).replace('"', "")
         print(f"found location={loc}")
 
         op_golden_tensor = binary.get_debug_info_golden(loc)
-        op_output_tensor = ttrt.runtime.get_op_output_tensor(opContext, programContext)
+        op_output_tensor = ttrt.runtime.get_op_output_tensor(
+            op_context, program_context
+        )
 
         if len(op_golden_tensor) == 0:
             print("Golden tensor is empty - skipping golden comparison")
@@ -139,11 +169,52 @@ def golden(binary, programContext, opContext):
             op_output_tensor, dtype=torch.float32
         ).flatten()
 
+        if golden_runtime_config.save_golden_tensors:
+            torch.save(
+                golden_tensor_torch,
+                f"{golden_runtime_config.artifact_dir}/{loc}_golden.pt",
+            )
+            torch.save(
+                output_tensor_torch,
+                f"{golden_runtime_config.artifact_dir}/{loc}_device.pt",
+            )
+
         _, _, cal_pcc, output_str = get_atol_rtol_pcc(
             golden_tensor_torch, output_tensor_torch
         )
 
         print(f"PCC={cal_pcc}")
         print(output_str)
+
+        results = {}
+        results["expected_pcc"] = golden_runtime_config.pcc
+        results["actual_pcc"] = cal_pcc
+        results["atol"] = golden_runtime_config.atol
+        results["rtol"] = golden_runtime_config.rtol
+        results["allclose"] = torch.allclose(
+            golden_tensor_torch,
+            output_tensor_torch,
+            atol=golden_runtime_config.atol,
+            rtol=golden_runtime_config.rtol,
+        )
+        results["max"] = torch.max(
+            torch.abs(golden_tensor_torch - output_tensor_torch)
+        ).item()
+        results["mean_absolute_error"] = torch.mean(
+            torch.abs(golden_tensor_torch - output_tensor_torch)
+        ).item()
+        results["root_mean_square_error"] = torch.sqrt(
+            torch.mean((golden_tensor_torch - output_tensor_torch) ** 2)
+        ).item()
+        results["cosine_similarity"] = torch.nn.functional.cosine_similarity(
+            golden_tensor_torch.unsqueeze(0), output_tensor_torch.unsqueeze(0)
+        ).item()
+
+        golden_results_data[loc] = results
+
     finally:
         print("-----------finished executing golden comparision-----------")
+
+
+def get_golden_fn(golden_runtime_config, golden_results_data):
+    return partial(golden_partial_function, golden_runtime_config, golden_results_data)
diff --git a/runtime/tools/python/ttrt/common/run.py b/runtime/tools/python/ttrt/common/run.py
index 19ad61e24..b6a6bf51d 100644
--- a/runtime/tools/python/ttrt/common/run.py
+++ b/runtime/tools/python/ttrt/common/run.py
@@ -18,7 +18,7 @@
 
 from ttrt.common.util import *
 from ttrt.common.query import Query
-from ttrt.common.golden import golden
+from ttrt.common.golden import get_golden_fn, GoldenRuntimeConfig
 
 
 class Run:
@@ -103,6 +103,13 @@ def initialize_api():
             choices=None,
             help="atol for golden test",
         )
+        Run.register_arg(
+            name="--pcc",
+            type=float,
+            default=0.99,
+            choices=None,
+            help="pcc for golden test",
+        )
         Run.register_arg(
             name="--seed",
             type=int,
@@ -159,6 +166,13 @@ def initialize_api():
             choices=[True, False],
             help="run golden comparison for intermediate and output tensors",
         )
+        Run.register_arg(
+            name="--save-golden-tensors",
+            type=bool,
+            default=False,
+            choices=[True, False],
+            help="save golden and device tensors that are compared during callback runtime",
+        )
         Run.register_arg(
             name="binary",
             type=str,
@@ -348,9 +362,6 @@ def _execute(binaries):
                 self.logging.warning(f"no binaries found to run - returning early")
                 return
 
-            if self["--golden"]:
-                callback_env = ttrt.runtime.DebugHooks.get(golden)
-
             debug_env = ttrt.runtime.DebugEnv.get(
                 self["--load-kernels-from-disk"], self["--enable-async-ttnn"]
             )
@@ -373,6 +384,9 @@ def _execute(binaries):
                     try:
                         self.logging.info(f"evaluating binary={bin.file_path}")
 
+                        if self["--save-artifacts"]:
+                            self.artifacts.create_binary_artifacts_folder(bin)
+
                         program_indices = []
                         if self["--program-index"] == "all":
                             program_indices.extend(range(bin.get_num_programs()))
@@ -440,6 +454,20 @@ def _execute(binaries):
                                 total_outputs.append(outputs)
 
                             event = None
+                            golden_results_data = {}
+                            if self["--golden"]:
+                                callback_env = ttrt.runtime.DebugHooks.get(
+                                    get_golden_fn(
+                                        GoldenRuntimeConfig(
+                                            self["--atol"],
+                                            self["--rtol"],
+                                            self["--pcc"],
+                                            f"{self.artifacts.get_binary_folder_path(bin)}/run/program_{program_index}",
+                                            self["--save-golden-tensors"],
+                                        ),
+                                        golden_results_data,
+                                    )
+                                )
                             for loop in range(self["--loops"]):
                                 self.logging.debug(
                                     f"starting loop={loop+1}/{self['--loops']} for binary={bin.file_path}"
@@ -519,6 +547,28 @@ def _execute(binaries):
                                 self.logging.debug(f"{tensor}\n")
 
                             device.deallocate_buffers()
+
+                            # if golden comparison is enabled, check golden results json file to see if test passed
+                            if self["--golden"]:
+                                if self["--save-artifacts"]:
+                                    golden_results_file_path = f"{self.artifacts.get_binary_folder_path(bin)}/run/program_{program_index}/golden_results.json"
+
+                                    with open(
+                                        golden_results_file_path, "w"
+                                    ) as json_file:
+                                        json.dump(
+                                            golden_results_data, json_file, indent=4
+                                        )
+
+                                for loc, golden_data in golden_results_data.items():
+                                    if (
+                                        golden_data["actual_pcc"]
+                                        < golden_data["expected_pcc"]
+                                    ):
+                                        raise Exception(
+                                            f"Failed: golden comparison failed for program={program_index}, actual_pcc={golden_data['actual_pcc']} < expected_pcc={golden_data['expected_pcc']}"
+                                        )
+
                     except Exception as e:
                         test_result = {
                             "file_path": bin.file_path,
diff --git a/runtime/tools/python/ttrt/common/util.py b/runtime/tools/python/ttrt/common/util.py
index 45e0a9db9..f1b7cfdc4 100644
--- a/runtime/tools/python/ttrt/common/util.py
+++ b/runtime/tools/python/ttrt/common/util.py
@@ -409,16 +409,22 @@ def clean_artifacts(self):
     def clean_binary_artifacts(self, binary):
         self.file_manager.remove_directory(self.get_binary_folder_path(binary))
 
+    def create_binary_artifacts_folder(self, binary):
+        binary_folder = self.get_binary_folder_path(binary)
+        self.file_manager.create_directory(binary_folder)
+        self.file_manager.create_directory(f"{binary_folder}/run")
+        self.file_manager.create_directory(f"{binary_folder}/perf")
+
+        for program in binary.programs:
+            program_folder = f"{binary_folder}/run/program_{program.index}"
+            self.file_manager.create_directory(program_folder)
+
     def save_binary(self, binary, query=None):
         binary_folder = self.get_binary_folder_path(binary)
 
         self.logging.info(
             f"saving binary={binary.file_path} to binary_folder={binary_folder}"
         )
-        self.file_manager.create_directory(binary_folder)
-        self.file_manager.create_directory(f"{binary_folder}/run")
-        self.file_manager.create_directory(f"{binary_folder}/perf")
-
         self.file_manager.copy_file(f"{binary_folder}", binary.file_path)
 
         for program in binary.programs:
@@ -427,20 +433,19 @@ def save_binary(self, binary, query=None):
             self.logging.info(
                 f"saving program={program.index} for binary={binary.file_path} to program_folder={program_folder}"
             )
-            self.file_manager.create_directory(program_folder)
 
             for i in range(len(program.input_tensors)):
                 self.save_torch_tensor(
                     program_folder,
                     program.input_tensors[i],
-                    f"program_{program.index}_input_{i}.pt",
+                    f"input_{i}.pt",
                 )
 
             for i in range(len(program.output_tensors)):
                 self.save_torch_tensor(
                     program_folder,
                     program.output_tensors[i],
-                    f"program_{program.index}_output_{i}.pt",
+                    f"output_{i}.pt",
                 )
 
         if query != None:
diff --git a/runtime/tools/python/ttrt/runtime/module.cpp b/runtime/tools/python/ttrt/runtime/module.cpp
index 47b42eab5..2efdd1a03 100644
--- a/runtime/tools/python/ttrt/runtime/module.cpp
+++ b/runtime/tools/python/ttrt/runtime/module.cpp
@@ -222,4 +222,12 @@ PYBIND11_MODULE(_C, m) {
               &tt::runtime::ttnn::test::getHostRowMajorLayout, py::arg("dtype"),
               "Get host row major layout");
 #endif
+
+  /**
+   * Cleanup code to force a well ordered destruction w.r.t. the GIL
+   */
+  auto cleanup_callback = []() {
+    ::tt::runtime::debug::Hooks::get().unregisterHooks();
+  };
+  m.add_object("_cleanup", py::capsule(cleanup_callback));
 }