diff --git a/.github/workflows/profiler.yml b/.github/workflows/profiler.yml new file mode 100644 index 0000000000..aa5de3ecfd --- /dev/null +++ b/.github/workflows/profiler.yml @@ -0,0 +1,24 @@ +name: TileDB-SOMA Profiler run + +on: + pull_request: + + push: + branches: + - main + - 'release-*' + workflow_dispatch: + +jobs: + + check-links: + + name: Setup profiler + + run: | + python -m venv profiler_env + source profiler_env/bin/activate + pip install -e ./profiler + + run: | + ./perf_checker.sh \ No newline at end of file diff --git a/profiler/ann_data.py b/profiler/ann_data.py new file mode 100644 index 0000000000..0fea471665 --- /dev/null +++ b/profiler/ann_data.py @@ -0,0 +1,22 @@ +from time import perf_counter + +import cellxgene_census + +import tiledbsoma as soma + +census_S3_latest = dict(census_version="2023-10-23") + + +def main(): + t1 = perf_counter() + with cellxgene_census.open_soma(**census_S3_latest) as census: + with census["census_data"]["homo_sapiens"].axis_query( + measurement_name="RNA", + obs_query=soma.AxisQuery(value_filter="""tissue_general == 'eye'"""), + ) as query: + query.to_anndata(X_name="raw") + t2 = perf_counter() + print(f"End to end time {t2 - t1}") + + +main() diff --git a/profiler/perf_checker.sh b/profiler/perf_checker.sh new file mode 100755 index 0000000000..8140301743 --- /dev/null +++ b/profiler/perf_checker.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +python -m venv perf +source perf/bin/activate +pip install gitpython +pip install psutil +pip install comacore +pip install profiler +pip install tiledbsoma +pip install cellxgene_census +python -m profiler "python ann_data.py" -t gtime + +python ./profile_report.py \ No newline at end of file diff --git a/profiler/profile_report.py b/profiler/profile_report.py new file mode 100644 index 0000000000..a737eca613 --- /dev/null +++ b/profiler/profile_report.py @@ -0,0 +1,28 @@ +import data + +# Processes the set of previously written logs + +threshold = 1.10 # Percent difference + +db = data.FileBasedProfileDB() +actual_max_ts = 0 +dt = db.find("python ann_data.py") +last_two = dt[-2:] +c = 0 + +for s in last_two: + new_db = sorted(dt, key=lambda ProfileData: ProfileData.timestamp) + + L = [] + L[0] = dt[0].user_time_sec + dt[0].elapsed_time + L[1] = dt[1].user_time_sec + dt[1].elapsed_time + for i in range(0, len(dt)): + print(f"{i} dt[{i}].user_time_sec = {dt[i].user_time_sec} ts {dt[i].timestamp}") + print(f"Prev = {L[0]} Curr = {L[1]}") + + if threshold * float(L[1]) < float(L[0]) or float(L[1]) > threshold * float(L[0]): + raise SystemExit(f"Potential performance degradation detected {L[0]} va {L[1]}") + print("No recent performance degradation detected") + print( + f"Prev TBD version = {dt[0].tiledbsoma_version} Curr TBD version = {dt[1].tiledbsoma_version}" + ) diff --git a/profiler/profiler.py b/profiler/profiler.py index 15d14654bd..282153219d 100644 --- a/profiler/profiler.py +++ b/profiler/profiler.py @@ -10,11 +10,10 @@ from typing import Any, Dict, Optional import somacore +from context_generator import host_context import tiledbsoma - -from .context_generator import host_context -from .data import FileBasedProfileDB, ProfileData, ProfileDB +from data import FileBasedProfileDB, ProfileData, ProfileDB GNU_TIME_FORMAT = ( 'Command being timed: "%C"\n' @@ -149,6 +148,7 @@ def main(): required=False, help="The flamegraph output produced by prof2", ) + args = parser.parse_args(sys.argv[1:]) print(f"Command to be run: {args.command}", file=stderr) @@ -159,7 +159,6 @@ def main(): stderr=PIPE, ) - print(f"Running command to be profiled, PID = {p.pid}", file=stderr) # Running additional profilers to extract flame graphs for the run p1 = None p2 = None @@ -206,3 +205,7 @@ def main(): f"{data.command_key=}, {data.command=}, {data.exit_status=}, {db_record_file=}", file=stderr, ) + + +if __name__ == "__main__": + main() diff --git a/profiler/setup.py b/profiler/setup.py index 85845ceb7a..7921b399ce 100644 --- a/profiler/setup.py +++ b/profiler/setup.py @@ -4,5 +4,5 @@ name="soma-profiler", version="1.0", packages=find_packages(), - requires=["gitpython", "psutil"], + requires=["gitpython", "psutil", "tiledbsoma", "cellxgene_census"], )