Improve Acto's results' interpretability and docs

Signed-off-by: Tyler Gu <[email protected]>
xlab-uiuc · Jan 16, 2024 · 9bdacc4 · 9bdacc4
1 parent 9c942b5
commit 9bdacc4
Show file tree

Hide file tree

Showing 183 changed files with 633,153 additions and 364,561 deletions.
diff --git a/acto/checker/impl/tests/__init__.py b/acto/checker/impl/tests/__init__.py
@@ -83,4 +83,7 @@ def load_snapshot(
         cli_result=load_cli_output(checker_name, test_case_id, load_prev),
         system_state=load_system_state(checker_name, test_case_id, load_prev),
         operator_log=load_operator_log(checker_name, test_case_id, load_prev),
+        events={},
+        not_ready_pods_logs=None,
+        generation=0,
     )
diff --git a/acto/checker/impl/tests/test_crash.py b/acto/checker/impl/tests/test_crash.py
@@ -16,7 +16,15 @@ def checker_func(s: Snapshot) -> Optional[OracleResult]:
     return checker.check(
         0,
         s,
-        Snapshot(input_cr={}, cli_result={}, system_state={}, operator_log=[]),
+        Snapshot(
+            input_cr={},
+            cli_result={},
+            system_state={},
+            operator_log=[],
+            events={},
+            not_ready_pods_logs=None,
+            generation=0,
+        ),
     )
 
 
@@ -33,7 +41,7 @@ def checker_func(s: Snapshot) -> Optional[OracleResult]:
         )
     ),
 )
-def test_check(test_case_id, result_dict):
+def test_crash(test_case_id, result_dict):
     snapshot = load_snapshot("crash", test_case_id)
     oracle_result = checker_func(snapshot)
     assert oracle_result == result_dict
diff --git a/acto/checker/impl/tests/test_health.py b/acto/checker/impl/tests/test_health.py
@@ -16,7 +16,15 @@ def checker_func(s: Snapshot) -> Optional[OracleResult]:
     return checker.check(
         0,
         s,
-        Snapshot(input_cr={}, cli_result={}, system_state={}, operator_log=[]),
+        Snapshot(
+            input_cr={},
+            cli_result={},
+            system_state={},
+            operator_log=[],
+            events={},
+            not_ready_pods_logs=None,
+            generation=0,
+        ),
     )
 
 

diff --git a/acto/checker/impl/tests/test_snapshot.py b/acto/checker/impl/tests/test_snapshot.py
@@ -24,12 +24,18 @@ def test_delta():
         cli_result={},
         system_state={},
         operator_log=[],
+        events={},
+        not_ready_pods_logs=None,
+        generation=0,
     )
     snapshot_curr = Snapshot(
         input_cr=yaml.safe_load(input_curr),
         cli_result={},
         system_state={},
         operator_log=[],
+        events={},
+        not_ready_pods_logs=None,
+        generation=0,
     )
     input_delta, _ = snapshot_curr.delta(snapshot_prev)
     print(input_delta)

diff --git a/acto/engine.py b/acto/engine.py
@@ -393,7 +393,7 @@ def run_trial(
             self.kubeconfig,
             self.context_name,
             wait_time=self.wait_time,
-            operator_container_name=self.deploy.operator_container_name
+            operator_container_name=self.deploy.operator_container_name,
         )
         checker: CheckerSet = self.checker_t(
             self.context,
@@ -404,7 +404,17 @@ def run_trial(
         )
 
         curr_input = self.input_model.get_seed_input()
-        self.snapshots.append(Snapshot(input_cr=curr_input))
+        self.snapshots.append(
+            Snapshot(
+                input_cr=curr_input,
+                cli_result={},
+                generation=0,
+                system_state={},
+                operator_log=[],
+                not_ready_pods_logs={},
+                events={},
+            )
+        )
 
         generation = 0
         while (
@@ -429,7 +439,7 @@ def run_trial(
                 # break and move to the next trial
                 if test_groups is None:
                     return TrialResult(
-                        trial_id=curr_trial,
+                        trial_id=f"trial-{self.worker_id + self.sequence_base:02d}-{self.curr_trial:04d}",
                         duration=time.time() - trial_start_time,
                         error=None,
                     )
@@ -493,7 +503,7 @@ def run_trial(
                         ):
                             logger.error("Connection refused, exiting")
                             return TrialResult(
-                                trial_id=curr_trial,
+                                trial_id=f"trial-{self.worker_id + self.sequence_base:02d}-{self.curr_trial:04d}",
                                 duration=time.time() - trial_start_time,
                                 error=None,
                             )
@@ -505,7 +515,7 @@ def run_trial(
                             )
                             generation += 1
                             return TrialResult(
-                                trial_id=curr_trial,
+                                trial_id=f"trial-{self.worker_id + self.sequence_base:02d}-{self.curr_trial:04d}",
                                 duration=time.time() - trial_start_time,
                                 error=run_result.oracle_result,
                             )
@@ -544,7 +554,8 @@ def run_trial(
                 generation += 1
 
                 return TrialResult(
-                    trial_id=curr_trial,
+                    trial_id=f"trial-{self.worker_id + self.sequence_base:02d}"
+                    + f"-{self.curr_trial:04d}",
                     duration=time.time() - trial_start_time,
                     error=run_result.oracle_result,
                 )
@@ -554,7 +565,7 @@ def run_trial(
                 break
 
         return TrialResult(
-            trial_id=curr_trial,
+            trial_id=f"trial-{self.worker_id + self.sequence_base:02d}-{self.curr_trial:04d}",
             duration=time.time() - trial_start_time,
             error=None,
         )
@@ -657,7 +668,10 @@ def run_and_check(
 
         run_result = RunResult(
             testcase=testcase_signature,
-            generation=generation,
+            step_id=StepID(
+                trial=runner.trial_dir,
+                generation=generation,
+            ),
             oracle_result=oracle_result,
             cli_status=cli_result,
             is_revert=revert,
@@ -985,7 +999,11 @@ def __learn(self, context_file, helper_crd, analysis_only=False):
                     break
             apiclient = kubernetes_client(learn_kubeconfig, learn_context_name)
             runner = Runner(
-                self.context, "learn", learn_kubeconfig, learn_context_name, self.deploy.operator_container_name
+                self.context,
+                "learn",
+                learn_kubeconfig,
+                learn_context_name,
+                self.deploy.operator_container_name,
             )
             runner.run_without_collect(
                 self.operator_config.seed_custom_resource
@@ -1098,7 +1116,7 @@ def run(
                 self.is_reproduce,
                 self.apply_testcase_f,
                 self.acto_namespace,
-                self.operator_config.diff_ignore_fields
+                self.operator_config.diff_ignore_fields,
             )
             runners.append(runner)
 

diff --git a/acto/post_process/collect_test_result.py b/acto/post_process/collect_test_result.py
@@ -0,0 +1,59 @@
+import argparse
+import json
+
+import pandas as pd
+
+from acto.lib.operator_config import OperatorConfig
+from acto.post_process.post_process import PostProcessor
+
+
+class CollectTestResult(PostProcessor):
+    """Post processor for diff test"""
+
+    def post_process(self, output_path: str):
+        """Post process the results"""
+        return self.dump_csv(output_path)
+
+    def dump_csv(self, output_path: str):
+        """Dump the results to a CSV file"""
+        normal_results = []
+        for trial in self.trial_to_steps.values():
+            for step in trial.steps.values():
+                normal_results.append(
+                    {
+                        "Trial number": str(step.run_result.step_id),
+                        "Testcase": json.dumps(step.run_result.testcase),
+                        "Crash": step.run_result.oracle_result.crash,
+                        "Health": step.run_result.oracle_result.health,
+                        "Operator log": step.run_result.oracle_result.operator_log,
+                        "Consistency": step.run_result.oracle_result.consistency,
+                        "Differential": str(
+                            step.run_result.oracle_result.differential
+                        ),
+                        "Custom": step.run_result.oracle_result.custom,
+                    }
+                )
+
+        normal_results_df = pd.DataFrame(normal_results)
+        normal_results_df = normal_results_df.sort_values(by=["Trial number"])
+        normal_results_df.to_csv(output_path, index=False)
+
+
+def main():
+    """Main entry point."""
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--config", type=str, required=True)
+    parser.add_argument("--testrun-dir", type=str, required=True)
+    args = parser.parse_args()
+
+    with open(args.config, "r", encoding="utf-8") as config_file:
+        config = OperatorConfig.model_validate(json.load(config_file))
+    post_processor = CollectTestResult(
+        args.testrun_dir,
+        config,
+    )
+    post_processor.post_process("./result.csv")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/acto/post_process/post_diff_test.py b/acto/post_process/post_diff_test.py
@@ -27,15 +27,15 @@
 from acto.kubectl_client.kubectl import KubectlClient
 from acto.kubernetes_engine import base, kind
 from acto.lib.operator_config import OperatorConfig
+from acto.post_process.post_process import PostProcessor
 from acto.result import DifferentialOracleResult, StepID
 from acto.runner import Runner
 from acto.serialization import ActoEncoder
 from acto.snapshot import Snapshot
+from acto.trial import Step
 from acto.utils import add_acto_label, get_thread_logger
 from acto.utils.error_handler import handle_excepthook, thread_excepthook
 
-from .post_process import PostProcessor, Step
-
 
 class DiffTestResult(pydantic.BaseModel):
     """The result of a diff test
@@ -66,7 +66,7 @@ def to_file(self, file_path: str):
         """Dump the DiffTestResult to a file"""
         with open(file_path, "w", encoding="utf-8") as f:
             json.dump(
-                self.model_dump(mode="json"), f, cls=ActoEncoder, indent=6
+                self.model_dump(), f, cls=ActoEncoder, indent=4
             )
 
 
@@ -456,7 +456,7 @@ def run(self):
             err = True
             difftest_result = DiffTestResult(
                 input_digest=group.iloc[0]["input_digest"],
-                snapshot=snapshot.serialize(),
+                snapshot=snapshot,
                 originals=group[["trial", "gen"]].to_dict("records"),
                 time={
                     "k8s_bootstrap": after_k8s_bootstrap_time
@@ -519,21 +519,24 @@ def __init__(
         logger = get_thread_logger(with_prefix=True)
 
         self.all_inputs = []
-        for trial, steps in self.trial_to_steps.items():
-            for step in steps.values():
-                invalid = step.runtime_result.is_invalid_input()
+        for trial_name, trial in self.trial_to_steps.items():
+            for step in trial.steps.values():
+                invalid = step.run_result.is_invalid_input()
                 if invalid and not ignore_invalid:
                     continue
                 self.all_inputs.append(
                     {
-                        "trial": trial,
-                        "gen": step.gen,
-                        "input": step.input,
-                        "input_digest": step.input_digest,
-                        "operator_log": step.operator_log,
-                        "system_state": step.system_state,
-                        "cli_output": step.cli_output,
-                        "runtime_result": step.runtime_result,
+                        "trial": trial_name,
+                        "gen": step.run_result.step_id.generation,
+                        "input": step.snapshot.input_cr,
+                        "input_digest": hashlib.md5(
+                            json.dumps(
+                                step.snapshot.input_cr, sort_keys=True
+                            ).encode("utf-8")
+                        ).hexdigest(),
+                        "operator_log": step.snapshot.operator_log,
+                        "system_state": step.snapshot.system_state,
+                        "cli_output": step.snapshot.cli_result,
                     }
                 )
 
@@ -547,7 +550,6 @@ def __init__(
                 "operator_log",
                 "system_state",
                 "cli_output",
-                "runtime_result",
             ],
         )
 
@@ -680,7 +682,9 @@ def check_diff_test_result(
                     continue
 
                 trial_basename = os.path.basename(trial)
-                original_result = self.trial_to_steps[trial_basename][str(gen)]
+                original_result = self.trial_to_steps[trial_basename].steps[
+                    str(gen)
+                ]
                 step_result = PostDiffTest.check_diff_test_step(
                     diff_test_result,
                     original_result,
@@ -714,17 +718,17 @@ def check_diff_test_step(
         additional_runner: Optional[AdditionalRunner] = None,
     ) -> Optional[DifferentialOracleResult]:
         logger = get_thread_logger(with_prefix=True)
-        trial_dir = original_result.trial_dir
-        gen = original_result.gen
+        trial_dir = original_result.run_result.step_id.trial
+        gen = original_result.run_result.step_id.generation
 
-        if original_result.runtime_result.oracle_result.health is not None:
+        if original_result.run_result.oracle_result.health is not None:
             return None
 
-        original_operator_log = original_result.operator_log
+        original_operator_log = original_result.snapshot.operator_log
         if invalid_input_message_regex(original_operator_log):
             return None
 
-        original_system_state = original_result.system_state
+        original_system_state = original_result.snapshot.system_state
         result = compare_system_equality(
             diff_test_result.snapshot.system_state,
             original_system_state,