Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/36558/st george old import mising records #123

Draft
wants to merge 4 commits into
base: develop
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
107 changes: 97 additions & 10 deletions lib/import/brca/providers/st_george_old/st_george_handler_old.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ class StGeorgeHandlerOld < Import::Germline::ProviderHandler
PASS_THROUGH_FIELDS = %w[age sex consultantcode collecteddate
receiveddate authoriseddate servicereportidentifier
providercode receiveddate sampletype].freeze
CDNA_REGEX = /c\.(?<cdna>[0-9]+[^\s)]+)|c\.\[(?<cdna>.*?)\]/i.freeze
CDNA_REGEX = /c\.(?<cdna>[0-9]+[^\s)]+)|c\.\[(?<cdna>.*?)\]|c\.\*(?<cdna>[0-9]+[^\s)]+)/i.freeze


PROTEIN_REGEX = /p\.(?<impact>[a-z]+[0-9]+[a-z]+)|
p\.(?<sqrbo>\[)?(?<rndbo>\()?(?<impact>[a-z]+[0-9]+[a-z]+)
Expand Down Expand Up @@ -47,7 +48,9 @@ class StGeorgeHandlerOld < Import::Germline::ProviderHandler
(?<exons>[0-9]+(?<dgs>\sto\s[0-9]+))|
(?<variant>del|dup|ins)(?<s>\s)?(?<exons>[0-9]+(?<dgs>-[0-9]+)?)|
ex(?<on>on)?(?<s>s)?\s(?<exons>[0-9]+(?<dgs>\sto\s[0-9]+)?)\s
(?<variant>del|dup|ins)/ix.freeze
(?<variant>del|dup|ins)|(?<variant>dup|del|ins)\s?ex\s?(?<exons>\d+)|
(?<variant>dup|del|ins)\s?x\s?(?<exons>\d+(-|_)\d+)/ix.freeze


DEPRECATED_BRCA_NAMES_REGEX = /B1|BR1|BRCA\s1|B2|BR2|BRCA\s2/i.freeze

Expand All @@ -61,6 +64,7 @@ def process_fields(record)
genotype.add_passthrough_fields(record.mapped_fields,
record.raw_fields,
PASS_THROUGH_FIELDS)

add_organisationcode_testresult(genotype)
add_moleculartestingtype(genotype, record)
process_genetictestcope(genotype, record)
Expand All @@ -69,6 +73,7 @@ def process_fields(record)
@batch.provider = 'RJ7'
@batch.registryid = 'RJ7'
res.each { |cur_genotype| @persister.integrate_and_store(cur_genotype) }

end

def add_organisationcode_testresult(genotype)
Expand Down Expand Up @@ -138,8 +143,8 @@ def process_deprecated_gene(deprecated_gene, positive_genes)
end

def process_fullscreen_records(genotype, record, positive_genes, genotypes)
if normal?(record)
normal_full_screen(genotype, genotypes)
if ucs_variant?(record)
process_ucs_variants(genotype, genotypes, positive_genes, record)
elsif failed_test?(record)
failed_full_screen(genotype, genotypes)
elsif positive_cdna?(record) || positive_exonvariant?(record)
Expand All @@ -148,10 +153,53 @@ def process_fullscreen_records(genotype, record, positive_genes, genotypes)
else
single_variant_full_screen(genotype, genotypes, positive_genes, record)
end
elsif normal?(record)
normal_full_screen(genotype, genotypes)
else
unknown_status(genotype, genotypes, positive_genes, record)
end
genotypes
end

def ucs_variant?(record)
record.raw_fields['genotype'].scan(/ucs/i).size.positive?
end

def process_ucs_variants(genotype, genotypes, positive_genes, record)
if ashkenazi?(record) || polish?(record) || full_screen?(record)
negative_gene = %w[BRCA1 BRCA2] - positive_genes
genotype_dup = genotype.dup
genotype_dup.add_gene(negative_gene.join)
genotype_dup.add_status(1)
genotypes.append(genotype_dup)
genotype.add_gene(positive_genes.join)
genotype.add_status(10)
else
process_single_gene(genotype, record)
genotype.add_status(10)
end

genotypes.append(genotype)
end

def unknown_status(genotype, genotypes, positive_genes, record)
if ashkenazi?(record) || polish?(record) || full_screen?(record)
negative_gene = %w[BRCA1 BRCA2] - positive_genes
genotype_dup = genotype.dup
genotype_dup.add_gene(negative_gene.join)
genotype_dup.add_status(1)
genotypes.append(genotype_dup)
genotype.add_gene(positive_genes.join)
genotype.add_status(4)
else
process_single_gene(genotype, record)
genotype.add_gene(positive_genes.join) if !positive_genes.nil?
genotype.add_status(4)
end
genotypes.append(genotype)
end


def normal_full_screen(genotype, genotypes)
%w[BRCA1 BRCA2].each do |negative_gene|
genotype_dup = genotype.dup
Expand Down Expand Up @@ -183,12 +231,17 @@ def single_variant_full_screen(genotype, genotypes, positive_genes, record)
end

def process_targeted_records(positive_genes, genotype, record, genotypes)
if normal?(record)
process_normal_targeted(genotype, record, genotypes)
if ucs_variant?(record)
process_ucs_variants(genotype, genotypes, positive_genes, record)
elsif failed_test?(record)
process_failed_targeted(genotype, record, genotypes)
elsif positive_cdna?(record) || positive_exonvariant?(record)
process_positive_targeted(record, positive_genes, genotype, genotypes)
elsif normal?(record)
process_normal_targeted(genotype, record, genotypes)
else
unknown_status(genotype, genotypes, positive_genes, record)

end
genotypes
end
Expand Down Expand Up @@ -232,6 +285,7 @@ def process_single_gene(genotype, record)
else
@logger.debug "FAILED gene parse for: #{record.raw_fields['genotype']}"
end

end
# rubocop:enable Lint/DuplicateBranch

Expand Down Expand Up @@ -266,17 +320,48 @@ def add_variants_multiple_results(variants, genotype, genotypes)
end

def process_multiple_positive_variants(positive_genes, genotype, record, genotypes)

if positive_genes.flatten.uniq.size > 1
variants = process_multi_genes_rec(record, positive_genes)
elsif positive_genes.flatten.uniq.size == 1
variants = process_uniq_gene_rec(record, positive_genes)
elsif positive_genes.empty?
process_multi_variants_no_gene(record, genotype, genotypes)
end

add_variants_multiple_results(variants, genotype, genotypes) unless variants.nil?

genotypes
end

def process_multi_variants_no_gene(record, genotype, genotypes)
return if record.raw_fields['genotype'].nil?
record.raw_fields['genotype'].scan(DELIMETER_REGEX)
unless $LAST_MATCH_INFO.nil?
raw_genotypes = record.raw_fields['genotype'].split($LAST_MATCH_INFO[0])
variants =[]
raw_genotypes.each do |raw_genotype|
genotype_dup = genotype.dup
mutation = get_cdna_mutation(raw_genotype)
protein = get_protein_impact(raw_genotype)
genotype_dup.add_gene_location(mutation[0]) unless mutation.nil?
genotype_dup.add_protein_impact(protein[0]) unless protein.nil?
genotype_dup.add_status(2)
genotypes.append(genotype_dup)
end
end
create_empty_brca_tests(record, genotype, genotypes) if full_screen?(record)
end

def create_empty_brca_tests(record, genotype, genotypes)
fs_genes = ['BRCA1', 'BRCA2']
fs_genes.each do |fs_gene|
genotype_dup = genotype.dup
genotype_dup.add_gene(fs_gene)
genotype_dup.add_status(4)
genotypes.append(genotype_dup)
end
end

def process_multi_genes_rec(record, positive_genes)
if record.raw_fields['genotype'].scan(DELIMETER_REGEX).size > 1
variants = process_single_variant(record, positive_genes)
Expand Down Expand Up @@ -431,10 +516,12 @@ def failed_test?(record)
end

def void_genetictestscope?(record)
return if record.raw_fields['moleculartestingtype'].nil?

return if record.raw_fields['moleculartestingtype'].nil?
record.raw_fields['moleculartestingtype'].empty? ||
record.raw_fields['moleculartestingtype'] == 'Store'
record.raw_fields['moleculartestingtype'] == 'Store' ||
record.raw_fields['moleculartestingtype'] == 'Reclassification of previous result'

end
end
# rubocop:enable Metrics/ClassLength
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,33 @@ def setup
assert_equal 'c.6275_6276del', variants[1].attribute_map['codingdnasequencechange']
end

# test 'process_multi_variants_no_gene' do
# multiple_variants_no_gene_record = build_raw_record('pseudo_id1' => 'bob')
# multiple_variants_no_gene_record.raw_fields['genotype'] = 'c.666A>G + c.6275_6276del'
# genotypes = []
# variants = @handler.process_multi_variants_no_gene(multiple_variants_no_gene_record, @genotype, genotypes)
# assert_equal 2, variants[0].attribute_map['teststatus']
# assert_equal 2, variants[1].attribute_map['teststatus']
# assert_equal nil, variants[0].attribute_map['gene']
# assert_equal nil, variants[1].attribute_map['gene']
# assert_equal 'c.666A>G', variants[0].attribute_map['codingdnasequencechange']
# assert_equal 'c.6275_6276del', variants[1].attribute_map['codingdnasequencechange']
# end

test 'process_ucs_variants' do
ucs_variant_record = build_raw_record('pseudo_id1' => 'bob')
ucs_variant_record.raw_fields['genotype'] = 'N + BR1 UCS'
positive_genes=['BRCA1', 'BRCA2']
genotypes = []
variants = @handler.process_ucs_variants(@genotype, genotypes, positive_genes, ucs_variant_record)
assert_equal 10, variants[0].attribute_map['teststatus']
assert_equal 7, variants[0].attribute_map['gene']
end

test 'unknown_status' do
end


test 'process_multiple_cdnavariants_protein_for_same_gene' do
multiple_cdnavariants_record = build_raw_record('pseudo_id1' => 'bob')
multiple_cdnavariants_record.raw_fields['genotype'] = 'BR1 c.3005delA, c.3119G>A (p.Ser1040Asn)'
Expand Down
Loading