diff --git a/lib/import/brca/providers/st_george_old/st_george_handler_old.rb b/lib/import/brca/providers/st_george_old/st_george_handler_old.rb index 171c74c9..b857ec32 100644 --- a/lib/import/brca/providers/st_george_old/st_george_handler_old.rb +++ b/lib/import/brca/providers/st_george_old/st_george_handler_old.rb @@ -11,7 +11,8 @@ class StGeorgeHandlerOld < Import::Germline::ProviderHandler PASS_THROUGH_FIELDS = %w[age sex consultantcode collecteddate receiveddate authoriseddate servicereportidentifier providercode receiveddate sampletype].freeze - CDNA_REGEX = /c\.(?[0-9]+[^\s)]+)|c\.\[(?.*?)\]/i.freeze + CDNA_REGEX = /c\.(?[0-9]+[^\s)]+)|c\.\[(?.*?)\]|c\.\*(?[0-9]+[^\s)]+)/i.freeze + PROTEIN_REGEX = /p\.(?[a-z]+[0-9]+[a-z]+)| p\.(?\[)?(?\()?(?[a-z]+[0-9]+[a-z]+) @@ -47,7 +48,9 @@ class StGeorgeHandlerOld < Import::Germline::ProviderHandler (?[0-9]+(?\sto\s[0-9]+))| (?del|dup|ins)(?\s)?(?[0-9]+(?-[0-9]+)?)| ex(?on)?(?s)?\s(?[0-9]+(?\sto\s[0-9]+)?)\s - (?del|dup|ins)/ix.freeze + (?del|dup|ins)|(?dup|del|ins)\s?ex\s?(?\d+)| + (?dup|del|ins)\s?x\s?(?\d+(-|_)\d+)/ix.freeze + DEPRECATED_BRCA_NAMES_REGEX = /B1|BR1|BRCA\s1|B2|BR2|BRCA\s2/i.freeze @@ -61,6 +64,7 @@ def process_fields(record) genotype.add_passthrough_fields(record.mapped_fields, record.raw_fields, PASS_THROUGH_FIELDS) + add_organisationcode_testresult(genotype) add_moleculartestingtype(genotype, record) process_genetictestcope(genotype, record) @@ -69,6 +73,7 @@ def process_fields(record) @batch.provider = 'RJ7' @batch.registryid = 'RJ7' res.each { |cur_genotype| @persister.integrate_and_store(cur_genotype) } + end def add_organisationcode_testresult(genotype) @@ -138,8 +143,8 @@ def process_deprecated_gene(deprecated_gene, positive_genes) end def process_fullscreen_records(genotype, record, positive_genes, genotypes) - if normal?(record) - normal_full_screen(genotype, genotypes) + if ucs_variant?(record) + process_ucs_variants(genotype, genotypes, positive_genes, record) elsif failed_test?(record) failed_full_screen(genotype, genotypes) elsif positive_cdna?(record) || positive_exonvariant?(record) @@ -148,10 +153,53 @@ def process_fullscreen_records(genotype, record, positive_genes, genotypes) else single_variant_full_screen(genotype, genotypes, positive_genes, record) end + elsif normal?(record) + normal_full_screen(genotype, genotypes) + else + unknown_status(genotype, genotypes, positive_genes, record) end genotypes end + def ucs_variant?(record) + record.raw_fields['genotype'].scan(/ucs/i).size.positive? + end + + def process_ucs_variants(genotype, genotypes, positive_genes, record) + if ashkenazi?(record) || polish?(record) || full_screen?(record) + negative_gene = %w[BRCA1 BRCA2] - positive_genes + genotype_dup = genotype.dup + genotype_dup.add_gene(negative_gene.join) + genotype_dup.add_status(1) + genotypes.append(genotype_dup) + genotype.add_gene(positive_genes.join) + genotype.add_status(10) + else + process_single_gene(genotype, record) + genotype.add_status(10) + end + + genotypes.append(genotype) + end + + def unknown_status(genotype, genotypes, positive_genes, record) + if ashkenazi?(record) || polish?(record) || full_screen?(record) + negative_gene = %w[BRCA1 BRCA2] - positive_genes + genotype_dup = genotype.dup + genotype_dup.add_gene(negative_gene.join) + genotype_dup.add_status(1) + genotypes.append(genotype_dup) + genotype.add_gene(positive_genes.join) + genotype.add_status(4) + else + process_single_gene(genotype, record) + genotype.add_gene(positive_genes.join) if !positive_genes.nil? + genotype.add_status(4) + end + genotypes.append(genotype) + end + + def normal_full_screen(genotype, genotypes) %w[BRCA1 BRCA2].each do |negative_gene| genotype_dup = genotype.dup @@ -183,12 +231,17 @@ def single_variant_full_screen(genotype, genotypes, positive_genes, record) end def process_targeted_records(positive_genes, genotype, record, genotypes) - if normal?(record) - process_normal_targeted(genotype, record, genotypes) + if ucs_variant?(record) + process_ucs_variants(genotype, genotypes, positive_genes, record) elsif failed_test?(record) process_failed_targeted(genotype, record, genotypes) elsif positive_cdna?(record) || positive_exonvariant?(record) process_positive_targeted(record, positive_genes, genotype, genotypes) + elsif normal?(record) + process_normal_targeted(genotype, record, genotypes) + else + unknown_status(genotype, genotypes, positive_genes, record) + end genotypes end @@ -232,6 +285,7 @@ def process_single_gene(genotype, record) else @logger.debug "FAILED gene parse for: #{record.raw_fields['genotype']}" end + end # rubocop:enable Lint/DuplicateBranch @@ -266,17 +320,48 @@ def add_variants_multiple_results(variants, genotype, genotypes) end def process_multiple_positive_variants(positive_genes, genotype, record, genotypes) + if positive_genes.flatten.uniq.size > 1 variants = process_multi_genes_rec(record, positive_genes) elsif positive_genes.flatten.uniq.size == 1 variants = process_uniq_gene_rec(record, positive_genes) + elsif positive_genes.empty? + process_multi_variants_no_gene(record, genotype, genotypes) end - add_variants_multiple_results(variants, genotype, genotypes) unless variants.nil? genotypes end + def process_multi_variants_no_gene(record, genotype, genotypes) + return if record.raw_fields['genotype'].nil? + record.raw_fields['genotype'].scan(DELIMETER_REGEX) + unless $LAST_MATCH_INFO.nil? + raw_genotypes = record.raw_fields['genotype'].split($LAST_MATCH_INFO[0]) + variants =[] + raw_genotypes.each do |raw_genotype| + genotype_dup = genotype.dup + mutation = get_cdna_mutation(raw_genotype) + protein = get_protein_impact(raw_genotype) + genotype_dup.add_gene_location(mutation[0]) unless mutation.nil? + genotype_dup.add_protein_impact(protein[0]) unless protein.nil? + genotype_dup.add_status(2) + genotypes.append(genotype_dup) + end + end + create_empty_brca_tests(record, genotype, genotypes) if full_screen?(record) + end + + def create_empty_brca_tests(record, genotype, genotypes) + fs_genes = ['BRCA1', 'BRCA2'] + fs_genes.each do |fs_gene| + genotype_dup = genotype.dup + genotype_dup.add_gene(fs_gene) + genotype_dup.add_status(4) + genotypes.append(genotype_dup) + end + end + def process_multi_genes_rec(record, positive_genes) if record.raw_fields['genotype'].scan(DELIMETER_REGEX).size > 1 variants = process_single_variant(record, positive_genes) @@ -431,10 +516,12 @@ def failed_test?(record) end def void_genetictestscope?(record) - return if record.raw_fields['moleculartestingtype'].nil? - + return if record.raw_fields['moleculartestingtype'].nil? + record.raw_fields['moleculartestingtype'].empty? || - record.raw_fields['moleculartestingtype'] == 'Store' + record.raw_fields['moleculartestingtype'] == 'Store' || + record.raw_fields['moleculartestingtype'] == 'Reclassification of previous result' + end end # rubocop:enable Metrics/ClassLength diff --git a/test/lib/import/brca/providers/st_george_old/st_george_handler_old_test.rb b/test/lib/import/brca/providers/st_george_old/st_george_handler_old_test.rb index 9bb67c25..07312bea 100644 --- a/test/lib/import/brca/providers/st_george_old/st_george_handler_old_test.rb +++ b/test/lib/import/brca/providers/st_george_old/st_george_handler_old_test.rb @@ -134,6 +134,33 @@ def setup assert_equal 'c.6275_6276del', variants[1].attribute_map['codingdnasequencechange'] end + # test 'process_multi_variants_no_gene' do + # multiple_variants_no_gene_record = build_raw_record('pseudo_id1' => 'bob') + # multiple_variants_no_gene_record.raw_fields['genotype'] = 'c.666A>G + c.6275_6276del' + # genotypes = [] + # variants = @handler.process_multi_variants_no_gene(multiple_variants_no_gene_record, @genotype, genotypes) + # assert_equal 2, variants[0].attribute_map['teststatus'] + # assert_equal 2, variants[1].attribute_map['teststatus'] + # assert_equal nil, variants[0].attribute_map['gene'] + # assert_equal nil, variants[1].attribute_map['gene'] + # assert_equal 'c.666A>G', variants[0].attribute_map['codingdnasequencechange'] + # assert_equal 'c.6275_6276del', variants[1].attribute_map['codingdnasequencechange'] + # end + + test 'process_ucs_variants' do + ucs_variant_record = build_raw_record('pseudo_id1' => 'bob') + ucs_variant_record.raw_fields['genotype'] = 'N + BR1 UCS' + positive_genes=['BRCA1', 'BRCA2'] + genotypes = [] + variants = @handler.process_ucs_variants(@genotype, genotypes, positive_genes, ucs_variant_record) + assert_equal 10, variants[0].attribute_map['teststatus'] + assert_equal 7, variants[0].attribute_map['gene'] + end + + test 'unknown_status' do + end + + test 'process_multiple_cdnavariants_protein_for_same_gene' do multiple_cdnavariants_record = build_raw_record('pseudo_id1' => 'bob') multiple_cdnavariants_record.raw_fields['genotype'] = 'BR1 c.3005delA, c.3119G>A (p.Ser1040Asn)'