Skip to content

Commit

Permalink
Merge branch 'feature/improve_ci' into 'master'
Browse files Browse the repository at this point in the history
Tidy up CI failures in tag script

See merge request minknow/mkr-file-format!4
  • Loading branch information
0x55555555 committed May 4, 2022
2 parents a92312d + 4c2719a commit 9905227
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 39 deletions.
2 changes: 1 addition & 1 deletion .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ tag_version_check:
stage: pre-flight
only:
- tags
image: ${PYTHON_IMAGE}
image: git.oxfordnanolabs.local:4567/minknow/images/build-x86_64-gcc9:latest
script:
- mkr_version="$(cmake -P ci/get_tag_version.cmake 2>&1)"
- tag_version="${CI_COMMIT_TAG/#v/}"
Expand Down
65 changes: 27 additions & 38 deletions benchmarks/tools/find_and_get_mkr.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import argparse
import multiprocessing as mp
from pathlib import Path
import pickle
from queue import Empty
from uuid import UUID

Expand Down Expand Up @@ -33,9 +32,7 @@ def process_read(get_columns, read, read_ids, extracted_columns):
col.append(getattr(read, c))


def do_batch_bulk_work(
filename, batches, select_read_ids, get_columns, c_api, result_q
):
def do_batch_work(filename, batches, get_columns, c_api, result_q):
read_ids = []
extracted_columns = {"read_id": read_ids}

Expand All @@ -47,19 +44,13 @@ def do_batch_bulk_work(
result_q.put(pd.DataFrame(extracted_columns))


def do_batch_search_work(
filename, batches, select_read_ids_pickled, get_columns, c_api, result_q
):
def do_search_work(files, select_read_ids, get_columns, c_api, result_q):
read_ids = []
extracted_columns = {"read_id": read_ids}
for file in files:
file = mkr_format.open_combined_file(file, use_c_api=c_api)

select_read_ids = pickle.loads(select_read_ids_pickled)

file = mkr_format.open_combined_file(filename, use_c_api=c_api)
for batch in batches:
for read in filter(
lambda x: x.read_id in select_read_ids, file.get_batch(batch).reads()
):
for read in file.select_reads(UUID(s) for s in select_read_ids):
process_read(get_columns, read, read_ids, extracted_columns)

result_q.put(pd.DataFrame(extracted_columns))
Expand All @@ -82,36 +73,34 @@ def run(input_dir, output, select_read_ids=None, get_columns=[], c_api=False):
files = list(input_dir.glob("*.mkr"))
print(f"Searching for read ids in {[str(f) for f in files]}")

fn_to_call = do_batch_bulk_work
if select_read_ids is not None:
fn_to_call = do_batch_search_work

select_read_ids = pickle.dumps(
set(UUID(s) for s in select_read_ids) if select_read_ids is not None else None
)

processes = []
for filename in files:
file = mkr_format.open_combined_file(filename, use_c_api=c_api)
batches = list(range(file.batch_count))
approx_chunk_size = max(1, len(batches) // runners)
if select_read_ids is not None:
approx_chunk_size = max(1, len(select_read_ids) // runners)
start_index = 0
while start_index < len(batches):
select_batches = batches[start_index : start_index + approx_chunk_size]
while start_index < len(select_read_ids):
select_ids = select_read_ids[start_index : start_index + approx_chunk_size]
p = mp.Process(
target=fn_to_call,
args=(
filename,
select_batches,
select_read_ids,
get_columns,
c_api,
result_queue,
),
target=do_search_work,
args=(files, select_ids, get_columns, c_api, result_queue),
)
p.start()
processes.append(p)
start_index += len(select_batches)
start_index += len(select_ids)
else:
for filename in files:
file = mkr_format.open_combined_file(filename, use_c_api=c_api)
batches = list(range(file.batch_count))
approx_chunk_size = max(1, len(batches) // runners)
start_index = 0
while start_index < len(batches):
select_batches = batches[start_index : start_index + approx_chunk_size]
p = mp.Process(
target=do_batch_work,
args=(filename, select_batches, get_columns, c_api, result_queue),
)
p.start()
processes.append(p)
start_index += len(select_batches)

print("Wait for processes...")
items = []
Expand Down

0 comments on commit 9905227

Please sign in to comment.