recalibrated values from script

aptos-labs · Dec 2, 2024 · e4d29e8 · e4d29e8
1 parent f3ef251
commit e4d29e8
Show file tree

Hide file tree

Showing 7 changed files with 159 additions and 102 deletions.
diff --git a/aptos-move/e2e-benchmark/data/calibration_values.tsv b/aptos-move/e2e-benchmark/data/calibration_values.tsv
@@ -1,26 +1,26 @@
-Loop { loop_count: Some(100000), loop_type: NoOp }	60	0.955	1.074	41893.7
-Loop { loop_count: Some(10000), loop_type: Arithmetic }	60	0.965	1.078	25915.0
-CreateObjects { num_objects: 10, object_payload_size: 0 }	60	0.924	1.082	158.1
-CreateObjects { num_objects: 10, object_payload_size: 10240 }	60	0.951	1.118	9356.2
-CreateObjects { num_objects: 100, object_payload_size: 0 }	60	0.926	1.082	1574.2
-CreateObjects { num_objects: 100, object_payload_size: 10240 }	60	0.952	1.092	11541.9
-InitializeVectorPicture { length: 128 }	10	0.965	1.038	163.3
-VectorPicture { length: 128 }	10	0.938	1.060	48.8
-VectorPictureRead { length: 128 }	10	0.977	1.077	46.4
-InitializeVectorPicture { length: 30720 }	60	0.948	1.123	27893.4
-VectorPicture { length: 30720 }	60	0.931	1.125	6923.1
-VectorPictureRead { length: 30720 }	60	0.934	1.102	6923.1
-SmartTablePicture { length: 30720, num_points_per_txn: 200 }	60	0.952	1.109	43594.7
-SmartTablePicture { length: 1048576, num_points_per_txn: 300 }	60	0.957	1.120	73865.4
-ResourceGroupsSenderWriteTag { string_length: 1024 }	60	0.934	1.134	15.0
-ResourceGroupsSenderMultiChange { string_length: 1024 }	60	0.929	1.122	32.3
-TokenV1MintAndTransferFT	60	0.958	1.093	385.2
-TokenV1MintAndTransferNFTSequential	60	0.973	1.139	588.1
-TokenV2AmbassadorMint { numbered: true }	60	0.960	1.141	512.5
-LiquidityPoolSwap { is_stable: true }	60	0.961	1.103	590.3
-LiquidityPoolSwap { is_stable: false }	60	0.954	1.134	552.2
-CoinInitAndMint	10	0.975	1.043	199.6
-FungibleAssetMint	10	0.954	1.038	236.3
-IncGlobalMilestoneAggV2 { milestone_every: 1 }	10	0.960	1.047	32.9
-IncGlobalMilestoneAggV2 { milestone_every: 2 }	10	0.971	1.066	18.1
-EmitEvents { count: 1000 }	10	0.969	1.052	8615.5
+Loop { loop_count: Some(100000), loop_type: NoOp }	60	0.960	1.119	42122.6
+Loop { loop_count: Some(10000), loop_type: Arithmetic }	60	0.956	1.074	26240.7
+CreateObjects { num_objects: 10, object_payload_size: 0 }	60	0.938	1.168	156.6
+CreateObjects { num_objects: 10, object_payload_size: 10240 }	60	0.924	1.086	9713.2
+CreateObjects { num_objects: 100, object_payload_size: 0 }	60	0.922	1.275	1577.0
+CreateObjects { num_objects: 100, object_payload_size: 10240 }	60	0.935	1.070	11728.7
+InitializeVectorPicture { length: 128 }	60	0.926	1.069	169.4
+VectorPicture { length: 128 }	60	0.908	1.075	50.2
+VectorPictureRead { length: 128 }	60	0.919	1.059	48.0
+InitializeVectorPicture { length: 30720 }	60	0.939	1.127	28404.4
+VectorPicture { length: 30720 }	60	0.936	1.095	6935.6
+VectorPictureRead { length: 30720 }	60	0.939	1.093	6948.2
+SmartTablePicture { length: 30720, num_points_per_txn: 200 }	60	0.947	1.080	43673.3
+SmartTablePicture { length: 1048576, num_points_per_txn: 300 }	60	0.947	1.111	74145.8
+ResourceGroupsSenderWriteTag { string_length: 1024 }	60	0.918	1.075	15.8
+ResourceGroupsSenderMultiChange { string_length: 1024 }	60	0.909	1.169	32.9
+TokenV1MintAndTransferFT	60	0.953	1.069	384.6
+TokenV1MintAndTransferNFTSequential	60	0.938	1.064	600.3
+TokenV2AmbassadorMint { numbered: true }	60	0.951	1.057	516.6
+LiquidityPoolSwap { is_stable: true }	60	0.961	1.139	582.6
+LiquidityPoolSwap { is_stable: false }	60	0.929	1.099	563.0
+CoinInitAndMint	60	0.928	1.130	205.0
+FungibleAssetMint	60	0.930	1.098	235.8
+IncGlobalMilestoneAggV2 { milestone_every: 1 }	60	0.914	1.051	33.5
+IncGlobalMilestoneAggV2 { milestone_every: 2 }	60	0.914	1.105	19.0
+EmitEvents { count: 1000 }	60	0.937	1.158	8818.7
diff --git a/aptos-move/e2e-benchmark/src/main.rs b/aptos-move/e2e-benchmark/src/main.rs
@@ -91,7 +91,9 @@ struct CalibrationInfo {
 }
 
 fn get_parsed_calibration_values() -> HashMap<String, CalibrationInfo> {
-    let calibration_values = fs::read_to_string("aptos-move/e2e-benchmark/data/calibration_values.tsv").expect("Unable to read file");
+    let calibration_values =
+        fs::read_to_string("aptos-move/e2e-benchmark/data/calibration_values.tsv")
+            .expect("Unable to read file");
     calibration_values
         .trim()
         .split('\n')

diff --git a/testsuite/forge.py b/testsuite/forge.py
@@ -619,16 +619,19 @@ def format_pre_comment(context: ForgeContext) -> str:
         context.forge_namespace,
     )
 
-    return textwrap.dedent(
-        f"""
+    return (
+        textwrap.dedent(
+            f"""
             ### Forge is running suite `{context.forge_test_suite}` on {get_testsuite_images(context)}
             * [Grafana dashboard (auto-refresh)]({dashboard_link})
             * [Humio Logs]({humio_logs_link})
             * [Axiom Logs]({axiom_logs_link})
             * [Validator CPU Profile]({validator_cpu_profile_link})
             * [Fullnode CPU Profile]({fullnode_cpu_profile_link})
             """
-    ).lstrip() + format_github_info(context)
+        ).lstrip()
+        + format_github_info(context)
+    )
 
 
 def format_comment(context: ForgeContext, result: ForgeResult) -> str:

diff --git a/testsuite/replay-verify/main.py b/testsuite/replay-verify/main.py
@@ -28,6 +28,7 @@
 
 REPLAY_CONCURRENCY_LEVEL = 1
 
+
 class Network(Enum):
     TESTNET = 1
     MAINNET = 2
@@ -241,6 +242,7 @@ def get_pod_status(self):
     def get_humio_log_link(self):
         return construct_humio_url(self.label, self.name, self.start_time, time.time())
 
+
 class ReplayConfig:
     def __init__(self, network):
         if network == Network.TESTNET:
@@ -253,9 +255,10 @@ def __init__(self, network):
             self.concurrent_replayer = 18
             self.pvc_number = 8
             self.min_range_size = 10_000
-            self.range_size = 2_000_000 
+            self.range_size = 2_000_000
             self.timeout_secs = 400
 
+
 class TaskStats:
     def __init__(self, name):
         self.name = name
@@ -308,7 +311,7 @@ def __init__(
         self.image = image
         self.pvcs = []
         self.config = replay_config
-        
+
     def __str__(self):
         return f"""ReplayScheduler:
             id: {self.id}
@@ -360,7 +363,11 @@ def create_pvc_from_snapshot(self):
             else MAINNET_SNAPSHOT_NAME
         )
         pvcs = create_pvcs_from_snapshot(
-            self.id, snapshot_name, self.namespace, self.config.pvc_number, self.get_label()
+            self.id,
+            snapshot_name,
+            self.namespace,
+            self.config.pvc_number,
+            self.get_label(),
         )
         assert len(pvcs) == self.config.pvc_number, "failed to create all pvcs"
         self.pvcs = pvcs
@@ -504,12 +511,16 @@ def get_image(image_tag=None):
     shell = forge.LocalShell()
     git = forge.Git(shell)
     image_name = "tools"
-    default_latest_image = forge.find_recent_images(
-        shell,
-        git,
-        1,
-        image_name=image_name,
-    )[0] if image_tag is None else image_tag
+    default_latest_image = (
+        forge.find_recent_images(
+            shell,
+            git,
+            1,
+            image_name=image_name,
+        )[0]
+        if image_tag is None
+        else image_tag
+    )
     full_image = f"{forge.GAR_REPO_NAME}/{image_name}:{default_latest_image}"
     return full_image
 
@@ -546,7 +557,7 @@ def print_logs(failed_workpod_logs, txn_mismatch_logs):
         range_size=range_size,
         image=image,
         replay_config=config,
-        network= network,
+        network=network,
         namespace=args.namespace,
     )
     logger.info(f"scheduler: {scheduler}")

diff --git a/testsuite/single_node_performance.py b/testsuite/single_node_performance.py
@@ -189,7 +189,7 @@ class RunGroupConfig:
     RunGroupConfig(key=RunGroupKey("mix_publish_transfer"), key_extra=RunGroupKeyExtra(
         transaction_type_override="publish-package apt-fa-transfer",
         transaction_weights_override="1 100",
-    ), included_in=LAND_BLOCKING_AND_C, waived=True),
+    ), included_in=LAND_BLOCKING_AND_C),
     RunGroupConfig(key=RunGroupKey("batch100-transfer"), included_in=LAND_BLOCKING_AND_C),
     RunGroupConfig(key=RunGroupKey("batch100-transfer", executor_type="NativeVM"), included_in=Flow.CONTINUOUS),
 
@@ -251,7 +251,7 @@ class RunGroupConfig:
     # fee payer sequentializes transactions today. in these tests module publisher is the fee payer, so larger number of modules tests throughput with multiple fee payers
     RunGroupConfig(key=RunGroupKey("no-op-fee-payer"), included_in=LAND_BLOCKING_AND_C),
     RunGroupConfig(key=RunGroupKey("no-op-fee-payer", module_working_set_size=DEFAULT_MODULE_WORKING_SET_SIZE), included_in=Flow.CONTINUOUS),
-    RunGroupConfig(key=RunGroupKey("simple-script"), included_in=LAND_BLOCKING_AND_C, waived=True),
+    RunGroupConfig(key=RunGroupKey("simple-script"), included_in=LAND_BLOCKING_AND_C),
 
     RunGroupConfig(expected_tps=50000, key=RunGroupKey("coin_transfer_connected_components", executor_type="sharded"), key_extra=RunGroupKeyExtra(sharding_traffic_flags="--connected-tx-grps 5000", transaction_type_override=""), included_in=Flow.REPRESENTATIVE, waived=True),
     RunGroupConfig(expected_tps=50000, key=RunGroupKey("coin_transfer_hotspot", executor_type="sharded"), key_extra=RunGroupKeyExtra(sharding_traffic_flags="--hotspot-probability 0.8", transaction_type_override=""), included_in=Flow.REPRESENTATIVE, waived=True),
@@ -1024,16 +1024,24 @@ def print_table(
         """If you expect your PR to change the performance, you need to recalibrate the values.
 To do so, you should run the test on your branch 6 times
 (https://github.com/aptos-labs/aptos-core/actions/workflows/workflow-run-execution-performance.yaml ; remember to select CONTINUOUS).
-Then go to Humio calibration link (https://gist.github.com/igor-aptos/7b12ca28de03894cddda8e415f37889e),
-update it to your branch, and export values as CSV, and then open and copy values inside
-testsuite/single_node_performance.py testsuite), and add Blockchain oncall as the reviewer.
+Then run the script locally `./testsuite/single_node_performance_calibration.py --branch=YOUR_BRANCH` to update calibration values
+and add Blockchain oncall as the reviewer.
 """
     )
     exit(1)
 
 if move_e2e_benchmark_failed:
     print(
-        "Move e2e benchmark failed, failing the job. See logs at the beginning for more details."
+        """
+Move e2e benchmark failed, failing the job. See logs at the beginning for more details.
+
+If you expect your PR to change the performance, you need to recalibrate the values.
+To do so, you should run the test on your branch 6 times
+(https://github.com/aptos-labs/aptos-core/actions/workflows/workflow-run-execution-performance.yaml ; remember to select CONTINUOUS,
+and don't select to skip move-only e2e tests).
+Then run the script locally `./testsuite/single_node_performance_calibration.py --branch=YOUR_BRANCH --move-e2e` to update calibration values
+and add Blockchain oncall as the reviewer.
+"""
     )
     exit(1)
 

diff --git a/testsuite/single_node_performance_calibration.py b/testsuite/single_node_performance_calibration.py
@@ -3,32 +3,54 @@
 import argparse
 import requests
 
+
 def humio_secret():
+    print(
+        "trying to get a humio secret from gcloud. if it asks for a password, abort and run `gcloud auth login --update-adc` first"
+    )
     import subprocess
-    return subprocess.run(["gcloud", "secrets", "versions", "access", "--secret=ci_humio_read_token", "--project=aptos-shared-secrets", "latest"], capture_output=True).stdout.decode("utf-8")
+
+    return subprocess.run(
+        [
+            "gcloud",
+            "secrets",
+            "versions",
+            "access",
+            "--secret=ci_humio_read_token",
+            "--project=aptos-shared-secrets",
+            "latest",
+        ],
+        capture_output=True,
+    ).stdout.decode("utf-8")
 
 
 def humio_url():
     return "https://cloud.us.humio.com/api/v1/repositories/github/query"
 
+
 def parse_args():
-    parser = argparse.ArgumentParser(description='Benchmark calibration tools')
+    parser = argparse.ArgumentParser(description="Benchmark calibration tools")
 
     parser.add_argument(
-        '--branch', 
+        "--branch",
         type=str,
-        help='Optional branch, if passed - only looks at results run on that branch through adhoc runs',
+        help="Optional branch, if passed - only looks at results run on that branch through adhoc runs",
     )
 
     parser.add_argument(
-        '--move-e2e', 
-        action='store_true',
-        help='Calibrate move e2e test',
+        "--move-e2e",
+        action="store_true",
+        help="Calibrate move e2e test",
+    )
+
+    parser.add_argument(
+        "--time-interval", default="5d", help="Time interval to look at humio for"
     )
 
     return parser.parse_args()
 
-def query_humio(query_string, time_interval="5d"):
+
+def query_humio(query_string, time_interval):
     query = {
         "queryString": query_string,
         "start": time_interval,
@@ -47,6 +69,7 @@ def query_humio(query_string, time_interval="5d"):
 
     return resp.text.strip()
 
+
 def main():
     args = parse_args()
 
@@ -57,6 +80,7 @@ def main():
         | github.workflow.head_branch = "{branch}"
         | "grep_json_aptos_move_vm_perf"
         | parseJson(message)
+        | code_perf_version = "v1"
         """.format(
                 branch=args.branch
             )
@@ -98,6 +122,7 @@ def split_line(line):
                 return res
             else:
                 return line.split(", ")
+
         output_file_name = "aptos-move/e2e-benchmark/data/calibration_values.tsv"
 
     else:
@@ -155,10 +180,13 @@ def split_line(line):
 
         output_file_name = "testsuite/single_node_performance_values.tsv"
 
-    response_text = query_humio(query_string)
+    response_text = query_humio(query_string, time_interval=args.time_interval)
 
     parsed = [
-        {(parts := key_value.split("->"))[0]: parts[1] for key_value in split_line(line)}
+        {
+            (parts := key_value.split("->"))[0]: parts[1]
+            for key_value in split_line(line)
+        }
         for line in response_text.split("\n")
     ]
 
@@ -169,5 +197,6 @@ def split_line(line):
 
     print(f"Written to {output_file_name}")
 
-if __name__ == '__main__':
-    main()
+
+if __name__ == "__main__":
+    main()