diff --git a/.github/workflows/profiler.yml b/.github/workflows/profiler.yml new file mode 100644 index 0000000000..c3d8be70be --- /dev/null +++ b/.github/workflows/profiler.yml @@ -0,0 +1,21 @@ +name: TileDB-SOMA Profiler run + +on: + push: + branches: + pull_request: + branches: + - "main" + +paths: + - ".github/workflows/profiler.yaml" +jobs: + + check-links: + + name: Setup profiler + + run: pip install ./profiler + + run: | + ./profiler/perf_checker.sh \ No newline at end of file diff --git a/profiler/ann_data.py b/profiler/ann_data.py new file mode 100644 index 0000000000..671d73ab8a --- /dev/null +++ b/profiler/ann_data.py @@ -0,0 +1,21 @@ +import cellxgene_census +import tiledbsoma as soma +from time import perf_counter + +census_S3_latest = dict(census_version="latest") +census_local_copy = dict(uri="/Users/brobatmili/projects/census_data/") + + +def main(): + t1 = perf_counter() + with cellxgene_census.open_soma(**census_local_copy) as census: + with census["census_data"]["homo_sapiens"].axis_query( + measurement_name="RNA", obs_query=soma.AxisQuery( + value_filter="""tissue_general == 'eye'""") + + ) as query: + query.to_anndata(X_name="raw") + t2 = perf_counter() + print(f"End to end time {t2 - t1}") + +main() \ No newline at end of file diff --git a/profiler/perf_checker.sh b/profiler/perf_checker.sh new file mode 100755 index 0000000000..b1a8b1f905 --- /dev/null +++ b/profiler/perf_checker.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +pip install profiler + +python -m profiler "python ann_data.py" -t gtime + +python ./top_profiler.py \ No newline at end of file diff --git a/profiler/top_profiler.py b/profiler/top_profiler.py new file mode 100644 index 0000000000..480b2b04ea --- /dev/null +++ b/profiler/top_profiler.py @@ -0,0 +1,34 @@ +from subprocess import PIPE +import subprocess +import data + +threshold = 1.10 # Percent difference + +db = data.FileBasedProfileDB() + +dt = db.find('python ann_data.py') +last_two = dt[-2:] + +for s in dt: + second_max_ts: float = 0; + max_ts: float = 0; + l = 0 + for ii in dt: + if ii.timestamp > max_ts: + second_max_ts = max_ts + max_ts = l; + l = l + 1; + print(f"max_ts {max_ts} second_max_ts {second_max_ts}") + print(s.user_time_sec) + last_index = -1 + + print(f"Found indexes are {max_ts} {second_max_ts}") + L = [1, 2] + L[0] = dt[max_ts].user_time_sec + L[1] = dt[second_max_ts].user_time_sec + print(f"{L[0]} {L[1]}") + for i in range(0, 8): + print(f"{i} dt[i].user_time_sec = {dt[i].user_time_sec}") + + if threshold * float(L[0]) > float(L[1]) or threshold * float(L[1]) > float(L[0]): + raise SystemExit(f"Potential performance degradation detected {L[0]} va {L[1]}")