Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/36796/salisbury new rules #127

Merged
merged 8 commits into from
Nov 27, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
119 changes: 73 additions & 46 deletions lib/import/brca/providers/salisbury/salisbury_handler.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
require 'possibly'

module Import
module Brca
module Providers
Expand Down Expand Up @@ -58,11 +56,11 @@ def process_row_case(genotypes, genotype, record)
genotype_new.add_status(status)
extract_gene_row(genotype_new, record)
if [2, 10].include? status
assign_variantpathclass_record(genotype_new)
variant = record['genotype']
process_variants(genotype_new, variant) if positive_record?(genotype_new) && variant.present?
handle_variant_record(genotype_new, record, genotypes)
else
genotypes << genotype_new
end
genotypes << genotype_new
genotypes
end

def process_panel_case(genotypes, genotype, record)
Expand Down Expand Up @@ -105,20 +103,21 @@ def process_hybrid_case(genotypes, genotype, record)

def process_panel_record(genotypes, genotype, raw_record)
status_genes = extract_genes(%w[test genotype], raw_record)

status_found = @status_genes_hash[status_genes]&.uniq
if status_found.size > 1
process_multi_status_genes(status_genes, status_found, genotype, genotypes, raw_record)
elsif UNKNOWN_STATUS.include? @status
process_status_genes(status_genes, 4, genotype, genotypes, raw_record)
elsif FAILED_TEST.match(@status)
process_status_genes(@all_genes, 9, genotype, genotypes, raw_record)
elsif ABNORMAL_STATUS.include? @status
process_status_genes(status_genes, 10, genotype, genotypes, raw_record)
elsif NEGATIVE_STATUS.include? @status
process_status_genes(status_genes, 1, genotype, genotypes, raw_record)
elsif POSITIVE_STATUS.include?(@status) || @status.match(/^variant*/ix)
process_status_genes(status_genes, 2, genotype, genotypes, raw_record)
status_genes.each do |status_gene|
status_found = @status_genes_hash[status_gene]&.uniq
if status_found.size > 1
process_multi_status_genes([status_gene], status_found, genotype, genotypes, raw_record)
elsif UNKNOWN_STATUS.include? @status
process_status_genes([status_gene], 4, genotype, genotypes, raw_record)
elsif FAILED_TEST.match(@status)
process_status_genes(@all_genes, 9, genotype, genotypes, raw_record)
elsif ABNORMAL_STATUS.include? @status
process_status_genes([status_gene], 10, genotype, genotypes, raw_record)
elsif NEGATIVE_STATUS.include? @status
process_status_genes([status_gene], 1, genotype, genotypes, raw_record)
elsif POSITIVE_STATUS.include?(@status) || @status.match(/^variant*/ix)
process_status_genes([status_gene], 2, genotype, genotypes, raw_record)
end
end
end

Expand All @@ -127,11 +126,12 @@ def prepare_gene_status_hash(record)
assign_status_var(raw_record)

status_genes = extract_genes(%w[test genotype], raw_record)

if @status_genes_hash[status_genes]
@status_genes_hash[status_genes] << @status
else
@status_genes_hash[status_genes] = [@status]
status_genes.each do |status_gene|
if @status_genes_hash[status_gene]
@status_genes_hash[status_gene] << @status
else
@status_genes_hash[status_gene] = [@status]
end
end
end
end
Expand All @@ -153,19 +153,21 @@ def assign_status_var(raw_record)
end

# Use priority if more than one status is present for same gene for a given record
# rubocop:disable Metrics/CyclomaticComplexity, Metrics/AbcSize, Metrics/PerceivedComplexity
def process_multi_status_genes(status_genes, status_found, genotype, genotypes, raw_record)
if (status_found & POSITIVE_STATUS) || status_found.map { |e| e.match(/^variant/) }
process_status_genes(status_genes, 2, genotype, genotypes, raw_record)
elsif status_found & ABNORMAL_STATUS
process_status_genes(status_genes, 10, genotype, genotypes, raw_record)
elsif status_found & NEGATIVE_STATUS
process_status_genes(status_genes, 1, genotype, genotypes, raw_record)
elsif status_found & FAILED_TEST
process_status_genes(@all_genes, 9, genotype, genotypes, raw_record)
elsif status_found & UNKNOWN_STATUS
process_status_genes(status_genes, 4, genotype, genotypes, raw_record)
if status_found.intersect?(POSITIVE_STATUS) || status_found.any? { |e| e.match(/^variant/) }
process_status_genes(status_genes, 2, genotype, genotypes, raw_record) if extract_teststatus_record == 2
elsif status_found.intersect?(ABNORMAL_STATUS)
process_status_genes(status_genes, 10, genotype, genotypes, raw_record) if extract_teststatus_record == 10
elsif status_found.intersect?(NEGATIVE_STATUS)
process_status_genes(status_genes, 1, genotype, genotypes, raw_record) if extract_teststatus_record == 1
elsif status_found.match(FAILED_TEST)
process_status_genes(@all_genes, 9, genotype, genotypes, raw_record) if extract_teststatus_record == 9
elsif status_found.intersect?(UNKNOWN_STATUS)
process_status_genes(status_genes, 4, genotype, genotypes, raw_record) if extract_teststatus_record == 4
end
end
# rubocop:enable Metrics/CyclomaticComplexity, Metrics/AbcSize, Metrics/PerceivedComplexity

def process_status_genes(genes, status, genotype, genotypes, record)
return unless genes&.all? { |gene| @all_genes.include?(gene) }
Expand All @@ -176,10 +178,25 @@ def process_status_genes(genes, status, genotype, genotypes, record)
genotype_new.add_gene(gene)
genotype_new.add_status(status)
if [2, 10].include? status
assign_variantpathclass_record(genotype_new)
variant = record['genotype']
process_variants(genotype_new, variant) if positive_record?(genotype_new) && variant.present?
handle_variant_record(genotype_new, record, genotypes)
else
genotypes << genotype_new
end
end
genotypes
end

def handle_variant_record(genotype_new, record, genotypes)
assign_variantpathclass_record(genotype_new)
variant = record['genotype']
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How can the variantpathclass be handled before assigning the variant? What happens in the case of multiple variants?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @lauramccluskey1 , As per rules variantpathclass is assigned based on 'status' of record and not variant. For multivariants we will be duplicating genotype object and each having variantpathclass depending on status of raw_record and then capturing the variant present in it. Also to note this method gets called only for teststatus 2 and 10 records.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@lauramccluskey1 @NImeson committed updated rule as discussed in d5f8aa5, Thanks ☺️

Copy link
Collaborator

@NImeson NImeson Nov 25, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fix looks fine :-)

if variant.present?
if variant.scan(CDNA_REGEX).size > 1 ||
variant.scan(EXON_VARIANT_REGEX).size > 1
Copy link
Collaborator

@lauramccluskey1 lauramccluskey1 Nov 21, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does this need to be if (variant.scan(CDNA_REGEX).size + variant.scan(EXON_VARIANT_REGEX).size) >1

Copy link
Contributor Author

@shilpigoeldev shilpigoeldev Nov 21, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes @lauramccluskey1 good point, have now fixed it in 9f1ea0e

process_multi_vars(genotype_new, variant, genotypes)
else
process_variants(genotype_new, variant, genotypes)
end
else
genotypes << genotype_new
end
end
Expand Down Expand Up @@ -232,10 +249,24 @@ def extract_gene_row(genotype, record)
genotype.add_gene(gene.first)
end

def process_variants(genotype, variant)
process_cdna_variant(genotype, variant)
process_exonic_variant(genotype, variant)
process_protein_impact(genotype, variant)
def process_multi_vars(genotype_new, variant, genotypes)
variants = variant.split(/;|,/)
variants.each do |var|
genotype_dup = genotype_new.dup
gene = var&.scan(BRCA_REGEX)&.flatten&.uniq
if gene.present?
genotype_dup.add_gene(gene[0])
@all_genes -= gene if @all_genes.present?
Copy link
Collaborator

@NImeson NImeson Nov 20, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need to try and ensure we keep the structure of the database if there happened to be two variants in the same gene written e.g. "BRCA1 c123A>C; BRCA1 c456G>A"? - this would ideally need to create one record in genetic_test_results which two child records in genetic_sequence_variants, rather than two BRCA1 records

As discussed, this might be a wider problem in the importers?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Happy with this after investigations into persister functioning

end
process_variants(genotype_dup, var, genotypes)
end
end

def process_variants(genotype_new, variant, genotypes)
process_cdna_variant(genotype_new, variant)
process_exonic_variant(genotype_new, variant)
process_protein_impact(genotype_new, variant)
genotypes << genotype_new
end

def process_exonic_variant(genotype, variant)
Expand All @@ -259,10 +290,6 @@ def process_protein_impact(genotype, variant)
genotype.add_protein_impact($LAST_MATCH_INFO[:impact])
@logger.debug "SUCCESSFUL protein parse for: #{$LAST_MATCH_INFO[:impact]}"
end

def positive_record?(genotype)
[2, 10].include? genotype.attribute_map['teststatus']
end
end
end
end
Expand Down
11 changes: 1 addition & 10 deletions lib/import/helpers/brca/providers/rnz/rnz_constants.rb
Original file line number Diff line number Diff line change
Expand Up @@ -77,16 +77,7 @@ module RnzConstants
CONFIRM_SEQ_NGS = /Confirmation\sSequencing|NGS\sResults/ix

# rubocop:disable Lint/MixedRegexpCaptureTypes
CDNA_REGEX = /c\.\[?(?<cdna>
([0-9]+[+>_-][0-9][+>_-][0-9]+[+>_-][0-9][ACGTdelinsup]+)|
([0-9]+[+>_-][0-9][+>_-][0-9]+[+>_-][0-9]+[ACGTdelinsup]+)|
([0-9]+[+>_-][0-9]+[ACGTdelinsup][+>_-][ACGTdelinsup])|
([0-9]+[ACGTdelinsup]+[+>_-][ACGTdelinsup])|
([0-9]+[+>_-][0-9]+[ACGTdelinsup]+)|
([0-9]+[+>_-][0-9]+[+>_-][0-9]+[0-9]+[ACGTdelinsup]+)|
([0-9]+[?+>_-]+[0-9]+[?+>_-]+[ACGTdelinsup]+)|
([0-9]+[ACGTdelinsup]+)
)\]?/ix
CDNA_REGEX = /c\.(?<cdna>[\w+>*\-]+)?[\w\s.]?/ix

PROTEIN_REGEX = /p\.\((?<impact>.+)\)|
\(p\.(?<impact>[A-Za-z]+.+)\)|
Expand Down
31 changes: 28 additions & 3 deletions test/lib/import/brca/providers/salisbury/salisbury_handler_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def setup
end

test 'process_variants' do
@handler.process_variants(@genotype, @record.raw_fields.first['genotype'])
@handler.process_variants(@genotype, @record.raw_fields.first['genotype'], [])
assert_equal 'c.9382C>T', @genotype.attribute_map['codingdnasequencechange']
assert_equal 'p.Arg3128Ter', @genotype.attribute_map['proteinimpact']
assert_equal 1, @genotype.attribute_map['sequencevarianttype']
Expand All @@ -31,7 +31,7 @@ def setup
test 'process_exonic_variants' do
exonic_record = build_raw_record(options: { 'pseudo_id1' => 'bob' })
exonic_record.raw_fields.first['genotype'] = 'exons 21-24'
@handler.process_variants(@genotype, exonic_record.raw_fields.first['genotype'])
@handler.process_variants(@genotype, exonic_record.raw_fields.first['genotype'], [])
assert_equal '21-24', @genotype.attribute_map['exonintroncodonnumber']
assert_equal 10, @genotype.attribute_map['sequencevarianttype']
assert_equal 1, @genotype.attribute_map['variantlocation']
Expand Down Expand Up @@ -175,7 +175,6 @@ def setup
panel_rec.raw_fields.first['status'] = 'Normal'
panel_rec.raw_fields.first['test'] = 'BRCA2 dosage analysis'
panel_rec.raw_fields.first['genotype'] = nil
panel_rec.raw_fields[1]['moleculartestingtype'] = 'Breast and ovarian cancer 7-gene panel (R208)'
@handler.assign_molecular_testing_var(panel_rec)
@handler.process_molecular_testing(@genotype)
genotypes = @handler.process_record(@genotype, panel_rec)
Expand All @@ -201,6 +200,32 @@ def setup
assert_equal 3616, genotypes[6].attribute_map['gene']
end

test 'process multivariant cases' do
panel_rec = build_raw_record(options: { 'pseudo_id1' => 'bob' })
panel_rec.raw_fields.first['moleculartestingtype'] = 'Breast and ovarian cancer 7-gene panel (R208)'
panel_rec.raw_fields.first['genotype'] = 'BRCA2 c.5dupC p.(Gln74); CHEK2 c.4G>A'
panel_rec.raw_fields.first['test'] = 'Cartagenia/Congenica analysis'
@handler.assign_molecular_testing_var(panel_rec)
@handler.process_molecular_testing(@genotype)
genotypes = @handler.process_record(@genotype, panel_rec)
assert_equal 7, genotypes.size
assert_equal 'Full screen BRCA1 and BRCA2', genotypes[0].attribute_map['genetictestscope']
assert_equal 2, genotypes[0].attribute_map['teststatus']
assert_equal 8, genotypes[0].attribute_map['gene']
assert_equal 'c.5dupC', genotypes[0].attribute_map['codingdnasequencechange']
assert_equal 'p.Gln74', genotypes[0].attribute_map['proteinimpact']
assert_equal 2, genotypes[1].attribute_map['teststatus']
assert_equal 865, genotypes[1].attribute_map['gene']
assert_equal 'c.4G>A', genotypes[1].attribute_map['codingdnasequencechange']
assert_nil genotypes[1].attribute_map['proteinimpact']
assert_equal 1, genotypes[2].attribute_map['teststatus']
assert_equal 451, genotypes[2].attribute_map['gene']
assert_equal 1, genotypes[3].attribute_map['teststatus']
assert_equal 7, genotypes[3].attribute_map['gene']
assert_equal 1, genotypes[4].attribute_map['teststatus']
assert_equal 3186, genotypes[4].attribute_map['gene']
end

private

def build_raw_record(raw_hash: {}, options: {})
Expand Down
Loading