diff --git a/.github/workflows/profiler.yml b/.github/workflows/profiler.yml new file mode 100644 index 0000000000..f2e275e1bc --- /dev/null +++ b/.github/workflows/profiler.yml @@ -0,0 +1,24 @@ +name: TileDB-SOMA Profiler run + +on: + push: + branches: + pull_request: + branches: + - "main" + +paths: + - ".github/workflows/profiler" +jobs: + + check-links: + + name: Setup profiler + + run: | + python -m venv profiler_env + source profiler_env/bin/activate + pip install -e ./profiler + + run: | + ./perf_checker.sh \ No newline at end of file diff --git a/profiler/ann_data.py b/profiler/ann_data.py new file mode 100644 index 0000000000..ca28e26d0b --- /dev/null +++ b/profiler/ann_data.py @@ -0,0 +1,23 @@ +from time import perf_counter + +import cellxgene_census + +import tiledbsoma as soma + +census_S3_latest = dict(census_version="latest") +census_local_copy = dict(uri="/Users/brobatmili/projects/census_data/") + + +def main(): + t1 = perf_counter() + with cellxgene_census.open_soma(**census_local_copy) as census: + with census["census_data"]["homo_sapiens"].axis_query( + measurement_name="RNA", + obs_query=soma.AxisQuery(value_filter="""tissue_general == 'eye'"""), + ) as query: + query.to_anndata(X_name="raw") + t2 = perf_counter() + print(f"End to end time {t2 - t1}") + + +main() diff --git a/profiler/perf_checker.sh b/profiler/perf_checker.sh new file mode 100755 index 0000000000..451433645f --- /dev/null +++ b/profiler/perf_checker.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +python -m venv perf +source perf/bin/activate +pip install gitpython +pip install psutil +pip install comacore +pip install profiler +pip install tiledbsoma +pip install cellxgene_census +python -m profiler "python ann_data.py" -t gtime + +python ./top_profiler.py \ No newline at end of file diff --git a/profiler/profiler.py b/profiler/profiler.py index 8e3ff3c749..282153219d 100644 --- a/profiler/profiler.py +++ b/profiler/profiler.py @@ -10,10 +10,9 @@ from typing import Any, Dict, Optional import somacore +from context_generator import host_context import tiledbsoma - -from context_generator import host_context from data import FileBasedProfileDB, ProfileData, ProfileDB GNU_TIME_FORMAT = ( @@ -207,5 +206,6 @@ def main(): file=stderr, ) + if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/profiler/setup.py b/profiler/setup.py index 85845ceb7a..e3d6722cd8 100644 --- a/profiler/setup.py +++ b/profiler/setup.py @@ -4,5 +4,5 @@ name="soma-profiler", version="1.0", packages=find_packages(), - requires=["gitpython", "psutil"], + requires=["gitpython", "comacore", "psutil", "tiledbsoma", "cellxgene_census"], ) diff --git a/profiler/top_profiler.py b/profiler/top_profiler.py new file mode 100644 index 0000000000..e867b7c00d --- /dev/null +++ b/profiler/top_profiler.py @@ -0,0 +1,25 @@ +import data + +# Processes the set of previously written logs + +threshold = 1.10 # Percent difference + +db = data.FileBasedProfileDB() +actual_max_ts = 0 +dt = db.find("python ann_data.py") +last_two = dt[-2:] +c = 0 + +for s in dt: + new_db = sorted(dt, key=lambda ProfileData: ProfileData.timestamp) + + L = [1, 2] + L[0] = dt[0].user_time_sec + L[1] = dt[1].user_time_sec + for i in range(0, len(dt)): + print(f"{i} dt[{i}].user_time_sec = {dt[i].user_time_sec} ts {dt[i].timestamp}") + print(f"L0 = {L[0]} L1 {L[1]}") + + if threshold * float(L[1]) < float(L[0]) or float(L[1]) > threshold * float(L[0]): + raise SystemExit(f"Potential performance degradation detected {L[0]} va {L[1]}") + print("No recent performance degradation detected")