From 41e7ccd2ce603ce479d89e73f67eaaa3707cb351 Mon Sep 17 00:00:00 2001 From: Behnam Robatmili Date: Thu, 26 Oct 2023 19:52:30 -0700 Subject: [PATCH] [Python] Early version of the profiler github actions --- .github/workflows/profiler.yml | 21 +++++++++++++++++ profiler/ann_data.py | 23 +++++++++++++++++++ profiler/perf_checker.sh | 13 +++++++++++ profiler/profiler.py | 6 ++--- profiler/top_profiler.py | 41 ++++++++++++++++++++++++++++++++++ 5 files changed, 101 insertions(+), 3 deletions(-) create mode 100644 .github/workflows/profiler.yml create mode 100644 profiler/ann_data.py create mode 100755 profiler/perf_checker.sh create mode 100644 profiler/top_profiler.py diff --git a/.github/workflows/profiler.yml b/.github/workflows/profiler.yml new file mode 100644 index 0000000000..c3d8be70be --- /dev/null +++ b/.github/workflows/profiler.yml @@ -0,0 +1,21 @@ +name: TileDB-SOMA Profiler run + +on: + push: + branches: + pull_request: + branches: + - "main" + +paths: + - ".github/workflows/profiler.yaml" +jobs: + + check-links: + + name: Setup profiler + + run: pip install ./profiler + + run: | + ./profiler/perf_checker.sh \ No newline at end of file diff --git a/profiler/ann_data.py b/profiler/ann_data.py new file mode 100644 index 0000000000..ca28e26d0b --- /dev/null +++ b/profiler/ann_data.py @@ -0,0 +1,23 @@ +from time import perf_counter + +import cellxgene_census + +import tiledbsoma as soma + +census_S3_latest = dict(census_version="latest") +census_local_copy = dict(uri="/Users/brobatmili/projects/census_data/") + + +def main(): + t1 = perf_counter() + with cellxgene_census.open_soma(**census_local_copy) as census: + with census["census_data"]["homo_sapiens"].axis_query( + measurement_name="RNA", + obs_query=soma.AxisQuery(value_filter="""tissue_general == 'eye'"""), + ) as query: + query.to_anndata(X_name="raw") + t2 = perf_counter() + print(f"End to end time {t2 - t1}") + + +main() diff --git a/profiler/perf_checker.sh b/profiler/perf_checker.sh new file mode 100755 index 0000000000..451433645f --- /dev/null +++ b/profiler/perf_checker.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +python -m venv perf +source perf/bin/activate +pip install gitpython +pip install psutil +pip install comacore +pip install profiler +pip install tiledbsoma +pip install cellxgene_census +python -m profiler "python ann_data.py" -t gtime + +python ./top_profiler.py \ No newline at end of file diff --git a/profiler/profiler.py b/profiler/profiler.py index 8e3ff3c749..282153219d 100644 --- a/profiler/profiler.py +++ b/profiler/profiler.py @@ -10,10 +10,9 @@ from typing import Any, Dict, Optional import somacore +from context_generator import host_context import tiledbsoma - -from context_generator import host_context from data import FileBasedProfileDB, ProfileData, ProfileDB GNU_TIME_FORMAT = ( @@ -207,5 +206,6 @@ def main(): file=stderr, ) + if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/profiler/top_profiler.py b/profiler/top_profiler.py new file mode 100644 index 0000000000..58e6a6059a --- /dev/null +++ b/profiler/top_profiler.py @@ -0,0 +1,41 @@ +import data + +threshold = 1.10 # Percent difference + +db = data.FileBasedProfileDB() +actual_max_ts = 0 +dt = db.find("python ann_data.py") +last_two = dt[-2:] +c = 0 + +for s in dt: + max_pointer = 0 + second_max_pointer = 0 + cc = 0 + second_max_ts: float = 0 + max_ts: float = 0 + for ii in dt: + if ii.timestamp > max_ts: + second_max_pointer = second_max_pointer + 1 + actual_second_max_ts = actual_max_ts + second_max_pointer = c - 1 + max_ts = ii.timestamp + max_pointer = c + c = c + 1 + print(f"max_ts {max_ts} second_max_ts {second_max_ts}") + print(s.user_time_sec) + + print(f"Found indexes are {max_ts} {second_max_ts}") + L = [1, 2] + L[0] = dt[max_pointer].user_time_sec + L[1] = dt[second_max_pointer].user_time_sec + print( + f"The latest found timestamps {max_ts} time metric {L[0]} " + f" second timestamp {second_max_ts} time metric {L[1]}" + ) + for i in range(0, len(dt)): + print(f"{i} dt[{i}].user_time_sec = {dt[i].user_time_sec} ts {dt[i].timestamp}") + + if threshold * float(L[1]) < float(L[0]) or float(L[1]) > threshold * float(L[0]): + raise SystemExit(f"Potential performance degradation detected {L[0]} va {L[1]}") + print("No recent performance degradation detected")