Skip to content

Commit

Permalink
[Python] Early version of the profiler github actions
Browse files Browse the repository at this point in the history
  • Loading branch information
beroy committed Oct 30, 2023
1 parent 4a003c6 commit 41e7ccd
Show file tree
Hide file tree
Showing 5 changed files with 101 additions and 3 deletions.
21 changes: 21 additions & 0 deletions .github/workflows/profiler.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
name: TileDB-SOMA Profiler run

on:
push:
branches:
pull_request:
branches:
- "main"

paths:
- ".github/workflows/profiler.yaml"
jobs:

check-links:

name: Setup profiler

run: pip install ./profiler

run: |
./profiler/perf_checker.sh
23 changes: 23 additions & 0 deletions profiler/ann_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
from time import perf_counter

import cellxgene_census

import tiledbsoma as soma

census_S3_latest = dict(census_version="latest")
census_local_copy = dict(uri="/Users/brobatmili/projects/census_data/")


def main():
t1 = perf_counter()
with cellxgene_census.open_soma(**census_local_copy) as census:
with census["census_data"]["homo_sapiens"].axis_query(
measurement_name="RNA",
obs_query=soma.AxisQuery(value_filter="""tissue_general == 'eye'"""),
) as query:
query.to_anndata(X_name="raw")
t2 = perf_counter()
print(f"End to end time {t2 - t1}")


main()
13 changes: 13 additions & 0 deletions profiler/perf_checker.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/bin/bash

python -m venv perf
source perf/bin/activate
pip install gitpython
pip install psutil
pip install comacore
pip install profiler
pip install tiledbsoma
pip install cellxgene_census
python -m profiler "python ann_data.py" -t gtime

python ./top_profiler.py
6 changes: 3 additions & 3 deletions profiler/profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,9 @@
from typing import Any, Dict, Optional

import somacore
from context_generator import host_context

import tiledbsoma

from context_generator import host_context
from data import FileBasedProfileDB, ProfileData, ProfileDB

GNU_TIME_FORMAT = (
Expand Down Expand Up @@ -207,5 +206,6 @@ def main():
file=stderr,
)


if __name__ == "__main__":
main()
main()
41 changes: 41 additions & 0 deletions profiler/top_profiler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import data

threshold = 1.10 # Percent difference

db = data.FileBasedProfileDB()
actual_max_ts = 0
dt = db.find("python ann_data.py")
last_two = dt[-2:]
c = 0

for s in dt:
max_pointer = 0
second_max_pointer = 0
cc = 0
second_max_ts: float = 0
max_ts: float = 0
for ii in dt:
if ii.timestamp > max_ts:
second_max_pointer = second_max_pointer + 1
actual_second_max_ts = actual_max_ts
second_max_pointer = c - 1
max_ts = ii.timestamp
max_pointer = c
c = c + 1
print(f"max_ts {max_ts} second_max_ts {second_max_ts}")
print(s.user_time_sec)

print(f"Found indexes are {max_ts} {second_max_ts}")
L = [1, 2]
L[0] = dt[max_pointer].user_time_sec
L[1] = dt[second_max_pointer].user_time_sec
print(
f"The latest found timestamps {max_ts} time metric {L[0]} "
f" second timestamp {second_max_ts} time metric {L[1]}"
)
for i in range(0, len(dt)):
print(f"{i} dt[{i}].user_time_sec = {dt[i].user_time_sec} ts {dt[i].timestamp}")

if threshold * float(L[1]) < float(L[0]) or float(L[1]) > threshold * float(L[0]):
raise SystemExit(f"Potential performance degradation detected {L[0]} va {L[1]}")
print("No recent performance degradation detected")

0 comments on commit 41e7ccd

Please sign in to comment.