Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes in mergeSTR #155

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion RELEASE_NOTES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ Functionality Changes:

Misc:

* mergeutils: function GetMinHarmonizedRecords was transformed into GetRecordComparabilityAndIncrement, which allows the caller
* mergeutils: function GetMinHarmonizedRecords was transformed into GetIncrementAndComparability, which allows the caller
to define custom predicate that decides whether records are comparable.

4.0.2
Expand Down
4 changes: 2 additions & 2 deletions trtools/compareSTR/compareSTR.py
Original file line number Diff line number Diff line change
Expand Up @@ -899,8 +899,8 @@ def main(args):
range(len(current_records))]
# increments contains information about which record should be
# skipped in next iteration
increment, comparable = mergeutils.GetRecordComparabilityAndIncrement(harmonized_records, chroms,
handle_overlaps)
increment, comparable = mergeutils.GetIncrementAndComparability(harmonized_records, chroms,
handle_overlaps)

if args.verbose: mergeutils.DebugPrintRecordLocations(current_records, increment)
if mergeutils.CheckMin(increment): return 1
Expand Down
15 changes: 10 additions & 5 deletions trtools/mergeSTR/mergeSTR.py
Original file line number Diff line number Diff line change
Expand Up @@ -624,12 +624,17 @@ def main(args: Any) -> int:
", e.g.: bcftools reheader -f hg19.fa.fai -o myvcf-readher.vcf.gz myvcf.vcf.gz")
return 1
harmonized_records = HarmonizeIfNotNone(current_records, vcftype)
is_min = mergeutils.GetMinHarmonizedRecords(harmonized_records, chroms)
if args.verbose: mergeutils.DebugPrintRecordLocations(current_records, is_min)
if mergeutils.CheckMin(is_min): return 1
MergeRecords(vcfreaders, vcftype, num_samples, harmonized_records, is_min, vcfw, useinfo,

# mergeSTR doesnt provide custom comparability handler. By default, only the increment is necessary to decide
# which records should be merged during single iteration. This is because the merge is based on the position
# of the records. If this behaviour changes in the future, custom mergability handler will have to be created.
increment, _ = mergeutils.GetIncrementAndComparability(harmonized_records, chroms)

if args.verbose: mergeutils.DebugPrintRecordLocations(current_records, increment)
if mergeutils.CheckMin(increment): return 1
MergeRecords(vcfreaders, vcftype, num_samples, harmonized_records, increment, vcfw, useinfo,
useformat, format_type)
current_records = mergeutils.GetNextRecords(vcfreaders, current_records, is_min)
current_records = mergeutils.GetNextRecords(vcfreaders, current_records, increment)
done = mergeutils.DoneReading(current_records)
return 0

Expand Down
20 changes: 13 additions & 7 deletions trtools/utils/mergeutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

CYVCF_RECORD = cyvcf2.Variant
CYVCF_READER = cyvcf2.VCF
COMPARABILITY_CALLBACK = Callable[[List[Optional[trh.TRRecord]], List[int], int], Union[bool, List[bool]]]


def LoadReaders(vcffiles: List[str], region: Optional[str] = None) -> List[CYVCF_READER]:
Expand Down Expand Up @@ -248,13 +249,18 @@ def GetMinRecords(record_list: List[Optional[trh.TRRecord]], chroms: List[str])
return [CheckPos(r, chroms[min_chrom], min_pos) for r in record_list]


def default_callback(records: List[trh.TRRecord], chrom_order: List[int], min_chrom_index: int) -> bool:
return True


def GetIncrementAndComparability(record_list: List[Optional[trh.TRRecord]],
chroms: List[str],
overlap_callback: COMPARABILITY_CALLBACK = default_callback) \
-> Tuple[List[bool], Union[bool, List[bool]]]:

def GetRecordComparabilityAndIncrement(record_list: List[Optional[trh.TRRecord]],
chroms: List[str],
overlap_callback: Callable[[List[Optional[trh.TRRecord]], List[int], int], bool]) \
-> Tuple[List[bool], bool]:
r"""Get list that says which records should be skipped in the next
iteration, and whether they are all comparable with each other
iteration (increment), and whether they are all comparable / mergable
The value of increment elements is determined by the (harmonized) position of corresponding records


Parameters
Expand All @@ -265,15 +271,15 @@ def GetRecordComparabilityAndIncrement(record_list: List[Optional[trh.TRRecord]]
chroms : list of str
Ordered list of all chromosomes

overlap_callback: Callable[[List[Optional[trh.TRRecord]], List[int], int], bool]
overlap_callback: Callable[[List[Optional[trh.TRRecord]], List[int], int], Union[bool, List[bool]]
Function that calculates whether the records are comparable

Returns
-------
increment : list of bool
List or bools, where items are set to True when the record at the index of the item should be
skipped during VCF file comparison.
comparable: bool
comparable: bool or list of bool
Value, that determines whether current records are comparable / mergable, depending on the callback
"""
chrom_order = [np.inf if r is None else chroms.index(r.chrom) for r in record_list]
Expand Down
16 changes: 8 additions & 8 deletions trtools/utils/tests/test_mergeutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,27 +110,27 @@ def comp_callback_false(x, y, z):


pair = [DummyHarmonizedRecord("chr1", 20), DummyHarmonizedRecord("chr1", 20)]
assert mergeutils.GetRecordComparabilityAndIncrement(pair, chromosomes, comp_callback_true) == ([True, True], True)
assert mergeutils.GetIncrementAndComparability(pair, chromosomes, comp_callback_true) == ([True, True], True)

# these two test cases show that second result of GetRecordComparabilityAndIncrement is
# entirely dependant on the callback
pair = [DummyHarmonizedRecord("chr1", 21), DummyHarmonizedRecord("chr1", 20)]
assert mergeutils.GetRecordComparabilityAndIncrement(pair, chromosomes, comp_callback_false) == ([False, True], False)
assert mergeutils.GetIncrementAndComparability(pair, chromosomes, comp_callback_false) == ([False, True], False)

pair = [DummyHarmonizedRecord("chr1", 21), DummyHarmonizedRecord("chr1", 20)]
assert mergeutils.GetRecordComparabilityAndIncrement(pair, chromosomes, comp_callback_true) == ([False, True], True)
assert mergeutils.GetIncrementAndComparability(pair, chromosomes, comp_callback_true) == ([False, True], True)

pair = [DummyHarmonizedRecord("chr2", 20), DummyHarmonizedRecord("chr1", 20)]
assert mergeutils.GetRecordComparabilityAndIncrement(pair, chromosomes, comp_callback_false) == ([False, True], False)
assert mergeutils.GetIncrementAndComparability(pair, chromosomes, comp_callback_false) == ([False, True], False)

pair = [DummyHarmonizedRecord("chr1", 20), DummyHarmonizedRecord("chr1", 21)]
assert mergeutils.GetRecordComparabilityAndIncrement(pair, chromosomes, comp_callback_true) == ([True, False], True)
assert mergeutils.GetIncrementAndComparability(pair, chromosomes, comp_callback_true) == ([True, False], True)

pair = [None, None]
assert mergeutils.GetRecordComparabilityAndIncrement(pair, chromosomes, comp_callback_false) == ([False, False], False)
assert mergeutils.GetIncrementAndComparability(pair, chromosomes, comp_callback_false) == ([False, False], False)

pair = [DummyHarmonizedRecord("chr1", 20), None]
assert mergeutils.GetRecordComparabilityAndIncrement(pair, chromosomes, comp_callback_false) == ([True, False], False)
assert mergeutils.GetIncrementAndComparability(pair, chromosomes, comp_callback_false) == ([True, False], False)

pair = [None, DummyHarmonizedRecord("chr1", 20)]
assert mergeutils.GetRecordComparabilityAndIncrement(pair, chromosomes, comp_callback_false) == ([False, True], False)
assert mergeutils.GetIncrementAndComparability(pair, chromosomes, comp_callback_false) == ([False, True], False)