From 8cf12ddc49a82c9936494d9ffba0d704b27f425b Mon Sep 17 00:00:00 2001 From: Nichola Imeson Date: Mon, 5 Aug 2024 11:32:01 +0100 Subject: [PATCH 1/3] Added in UCS method, method to handle variant with no gene, expanded exon regex --- .../st_george_old/st_george_handler_old.rb | 97 +++++++++++++++++-- 1 file changed, 89 insertions(+), 8 deletions(-) diff --git a/lib/import/brca/providers/st_george_old/st_george_handler_old.rb b/lib/import/brca/providers/st_george_old/st_george_handler_old.rb index 171c74c9..122ddc40 100644 --- a/lib/import/brca/providers/st_george_old/st_george_handler_old.rb +++ b/lib/import/brca/providers/st_george_old/st_george_handler_old.rb @@ -11,7 +11,8 @@ class StGeorgeHandlerOld < Import::Germline::ProviderHandler PASS_THROUGH_FIELDS = %w[age sex consultantcode collecteddate receiveddate authoriseddate servicereportidentifier providercode receiveddate sampletype].freeze - CDNA_REGEX = /c\.(?[0-9]+[^\s)]+)|c\.\[(?.*?)\]/i.freeze + CDNA_REGEX = /c\.(?[0-9]+[^\s)]+)|c\.\[(?.*?)\]|c\.\*(?[0-9]+[^\s)]+)/i.freeze + PROTEIN_REGEX = /p\.(?[a-z]+[0-9]+[a-z]+)| p\.(?\[)?(?\()?(?[a-z]+[0-9]+[a-z]+) @@ -47,7 +48,9 @@ class StGeorgeHandlerOld < Import::Germline::ProviderHandler (?[0-9]+(?\sto\s[0-9]+))| (?del|dup|ins)(?\s)?(?[0-9]+(?-[0-9]+)?)| ex(?on)?(?s)?\s(?[0-9]+(?\sto\s[0-9]+)?)\s - (?del|dup|ins)/ix.freeze + (?del|dup|ins)|(?dup|del|ins)\s?ex\s?(?\d+)| + (?dup|del|ins)\s?x\s?(?\d+(-|_)\d+)/ix.freeze + DEPRECATED_BRCA_NAMES_REGEX = /B1|BR1|BRCA\s1|B2|BR2|BRCA\s2/i.freeze @@ -138,8 +141,8 @@ def process_deprecated_gene(deprecated_gene, positive_genes) end def process_fullscreen_records(genotype, record, positive_genes, genotypes) - if normal?(record) - normal_full_screen(genotype, genotypes) + if ucs_variant?(record) + process_ucs_variants(genotype, genotypes, positive_genes, record) elsif failed_test?(record) failed_full_screen(genotype, genotypes) elsif positive_cdna?(record) || positive_exonvariant?(record) @@ -148,10 +151,53 @@ def process_fullscreen_records(genotype, record, positive_genes, genotypes) else single_variant_full_screen(genotype, genotypes, positive_genes, record) end + elsif normal?(record) + normal_full_screen(genotype, genotypes) + else + unknown_status(genotype, genotypes, positive_genes, record) end genotypes end + def ucs_variant?(record) + record.raw_fields['genotype'].scan(/ucs/i).size.positive? + end + + def process_ucs_variants(genotype, genotypes, positive_genes, record) + if ashkenazi?(record) || polish?(record) || full_screen?(record) + negative_gene = %w[BRCA1 BRCA2] - positive_genes + genotype_dup = genotype.dup + genotype_dup.add_gene(negative_gene.join) + genotype_dup.add_status(1) + genotypes.append(genotype_dup) + genotype.add_gene(positive_genes.join) + genotype.add_status(10) + else + process_single_gene(genotype, record) + genotype.add_status(10) + end + + genotypes.append(genotype) + end + + def unknown_status(genotype, genotypes, positive_genes, record) + if ashkenazi?(record) || polish?(record) || full_screen?(record) + negative_gene = %w[BRCA1 BRCA2] - positive_genes + genotype_dup = genotype.dup + genotype_dup.add_gene(negative_gene.join) + genotype_dup.add_status(1) + genotypes.append(genotype_dup) + genotype.add_gene(positive_genes.join) + genotype.add_status(4) + else + process_single_gene(genotype, record) + genotype.add_status(4) + end + genotypes.append(genotype) + end + + + def normal_full_screen(genotype, genotypes) %w[BRCA1 BRCA2].each do |negative_gene| genotype_dup = genotype.dup @@ -183,12 +229,17 @@ def single_variant_full_screen(genotype, genotypes, positive_genes, record) end def process_targeted_records(positive_genes, genotype, record, genotypes) - if normal?(record) - process_normal_targeted(genotype, record, genotypes) + if ucs_variant?(record) + process_ucs_variants(genotype, genotypes, positive_genes, record) elsif failed_test?(record) process_failed_targeted(genotype, record, genotypes) elsif positive_cdna?(record) || positive_exonvariant?(record) process_positive_targeted(record, positive_genes, genotype, genotypes) + elsif normal?(record) + process_normal_targeted(genotype, record, genotypes) + else + unknown_status(genotype, genotypes, positive_genes, record) + end genotypes end @@ -266,17 +317,45 @@ def add_variants_multiple_results(variants, genotype, genotypes) end def process_multiple_positive_variants(positive_genes, genotype, record, genotypes) + if positive_genes.flatten.uniq.size > 1 variants = process_multi_genes_rec(record, positive_genes) elsif positive_genes.flatten.uniq.size == 1 variants = process_uniq_gene_rec(record, positive_genes) + elsif positive_genes.empty? + process_multi_variants_no_gene(record, genotype, genotypes) end - add_variants_multiple_results(variants, genotype, genotypes) unless variants.nil? genotypes end + def process_multi_variants_no_gene(record, genotype, genotypes) + record.raw_fields['genotype'].scan(DELIMETER_REGEX) + raw_genotypes = record.raw_fields['genotype'].split($LAST_MATCH_INFO[0]) + variants =[] + raw_genotypes.each do |raw_genotype| + genotype_dup = genotype.dup + mutation = get_cdna_mutation(raw_genotype) + protein = get_protein_impact(raw_genotype) + genotype_dup.add_gene_location(mutation[0]) unless mutation.nil? + genotype_dup.add_protein_impact(protein[0]) unless protein.nil? + genotype_dup.add_status(2) + genotypes.append(genotype_dup) + end + create_empty_brca_tests(record, genotype, genotypes) if full_screen?(record) + end + + def create_empty_brca_tests(record, genotype, genotypes) + fs_genes = ['BRCA1', 'BRCA2'] + fs_genes.each do |fs_gene| + genotype_dup = genotype.dup + genotype_dup.add_gene(fs_gene) + genotype_dup.add_status(4) + genotypes.append(genotype_dup) + end + end + def process_multi_genes_rec(record, positive_genes) if record.raw_fields['genotype'].scan(DELIMETER_REGEX).size > 1 variants = process_single_variant(record, positive_genes) @@ -434,7 +513,9 @@ def void_genetictestscope?(record) return if record.raw_fields['moleculartestingtype'].nil? record.raw_fields['moleculartestingtype'].empty? || - record.raw_fields['moleculartestingtype'] == 'Store' + record.raw_fields['moleculartestingtype'] == 'Store' || + record.raw_fields['moleculartestingtype'] == 'Reclassification of previous result' + end end # rubocop:enable Metrics/ClassLength From 8d26f48bbd90db9590ae57ccaf48e9e110096d06 Mon Sep 17 00:00:00 2001 From: Nichola Imeson Date: Wed, 7 Aug 2024 11:29:36 +0100 Subject: [PATCH 2/3] working on tests --- .../st_george_old/st_george_handler_old.rb | 29 +++++++------ .../st_george_handler_old_test.rb | 43 +++++++++++++++---- 2 files changed, 51 insertions(+), 21 deletions(-) diff --git a/lib/import/brca/providers/st_george_old/st_george_handler_old.rb b/lib/import/brca/providers/st_george_old/st_george_handler_old.rb index 122ddc40..d38702d2 100644 --- a/lib/import/brca/providers/st_george_old/st_george_handler_old.rb +++ b/lib/import/brca/providers/st_george_old/st_george_handler_old.rb @@ -191,13 +191,13 @@ def unknown_status(genotype, genotypes, positive_genes, record) genotype.add_status(4) else process_single_gene(genotype, record) + genotype.add_gene(positive_genes.join) if !positive_genes.nil? genotype.add_status(4) end genotypes.append(genotype) end - def normal_full_screen(genotype, genotypes) %w[BRCA1 BRCA2].each do |negative_gene| genotype_dup = genotype.dup @@ -331,20 +331,23 @@ def process_multiple_positive_variants(positive_genes, genotype, record, genotyp end def process_multi_variants_no_gene(record, genotype, genotypes) + return if record.raw_fields['genotype'].nil? record.raw_fields['genotype'].scan(DELIMETER_REGEX) - raw_genotypes = record.raw_fields['genotype'].split($LAST_MATCH_INFO[0]) - variants =[] - raw_genotypes.each do |raw_genotype| - genotype_dup = genotype.dup - mutation = get_cdna_mutation(raw_genotype) - protein = get_protein_impact(raw_genotype) - genotype_dup.add_gene_location(mutation[0]) unless mutation.nil? - genotype_dup.add_protein_impact(protein[0]) unless protein.nil? - genotype_dup.add_status(2) - genotypes.append(genotype_dup) - end - create_empty_brca_tests(record, genotype, genotypes) if full_screen?(record) + unless $LAST_MATCH_INFO.nil? + raw_genotypes = record.raw_fields['genotype'].split($LAST_MATCH_INFO[0]) + variants =[] + raw_genotypes.each do |raw_genotype| + genotype_dup = genotype.dup + mutation = get_cdna_mutation(raw_genotype) + protein = get_protein_impact(raw_genotype) + genotype_dup.add_gene_location(mutation[0]) unless mutation.nil? + genotype_dup.add_protein_impact(protein[0]) unless protein.nil? + genotype_dup.add_status(2) + genotypes.append(genotype_dup) + end end + create_empty_brca_tests(record, genotype, genotypes) if full_screen?(record) + end def create_empty_brca_tests(record, genotype, genotypes) fs_genes = ['BRCA1', 'BRCA2'] diff --git a/test/lib/import/brca/providers/st_george_old/st_george_handler_old_test.rb b/test/lib/import/brca/providers/st_george_old/st_george_handler_old_test.rb index dbad3a4b..c491d7c5 100644 --- a/test/lib/import/brca/providers/st_george_old/st_george_handler_old_test.rb +++ b/test/lib/import/brca/providers/st_george_old/st_george_handler_old_test.rb @@ -134,6 +134,33 @@ def setup assert_equal 'c.6275_6276del', variants[1].attribute_map['codingdnasequencechange'] end + # test 'process_multi_variants_no_gene' do + # multiple_variants_no_gene_record = build_raw_record('pseudo_id1' => 'bob') + # multiple_variants_no_gene_record.raw_fields['genotype'] = 'c.666A>G + c.6275_6276del' + # genotypes = [] + # variants = @handler.process_multi_variants_no_gene(multiple_variants_no_gene_record, @genotype, genotypes) + # assert_equal 2, variants[0].attribute_map['teststatus'] + # assert_equal 2, variants[1].attribute_map['teststatus'] + # assert_equal nil, variants[0].attribute_map['gene'] + # assert_equal nil, variants[1].attribute_map['gene'] + # assert_equal 'c.666A>G', variants[0].attribute_map['codingdnasequencechange'] + # assert_equal 'c.6275_6276del', variants[1].attribute_map['codingdnasequencechange'] + # end + + test 'process_ucs_variants' do + ucs_variant_record = build_raw_record('pseudo_id1' => 'bob') + ucs_variant_record.raw_fields['genotype'] = 'N + BR1 UCS' + positive_genes=['BRCA1', 'BRCA2'] + genotypes = [] + variants = @handler.process_ucs_variants(@genotype, genotypes, positive_genes, ucs_variant_record) + assert_equal 10, variants[0].attribute_map['teststatus'] + assert_equal 7, variants[0].attribute_map['gene'] + end + + test 'unknown_status' do + end + + test 'process_multiple_cdnavariants_protein_for_same_gene' do multiple_cdnavariants_record = build_raw_record('pseudo_id1' => 'bob') multiple_cdnavariants_record.raw_fields['genotype'] = 'BR1 c.3005delA, c.3119G>A (p.Ser1040Asn)' @@ -287,22 +314,22 @@ def setup private def clinical_json - { sex: '2', + { sex: '1', hospitalnumber: '332061', receiveddate: '1998-08-13T00:00:00.000+01:00', - servicereportidentifier: 'D11585', + servicereportidentifier: 'D12345', specimentype: '5', age: 42 }.to_json end def rawtext_clinical_json { sex: 'Female', - 'g number' => '4241', - genotype: 'BR2 c.6275_6276delTT', - providercode: 'RMHS', - referralorganisation: 'Royal Marsden Hospital', - consultantname: 'Eeles', - servicereportidentifier: 'D11585', + 'g number' => '1234', + genotype: 'BR2 c.6135_6136delAA', + providercode: 'PROV', + referralorganisation: 'Hospital', + consultantname: 'Consultant', + servicereportidentifier: 'D12345', servicelevel: 'NHS', collecteddate: '', receiveddate: '1998-08-13 00:00:00', From 722bc55d2db0a05655fd79173bd6cd2a20e66db8 Mon Sep 17 00:00:00 2001 From: Nichola Imeson Date: Fri, 9 Aug 2024 12:02:21 +0100 Subject: [PATCH 3/3] added method in to create BRCA1+2 records as standard when we don't know the FS gene in question --- .../brca/providers/st_george_old/st_george_handler_old.rb | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/lib/import/brca/providers/st_george_old/st_george_handler_old.rb b/lib/import/brca/providers/st_george_old/st_george_handler_old.rb index d38702d2..b857ec32 100644 --- a/lib/import/brca/providers/st_george_old/st_george_handler_old.rb +++ b/lib/import/brca/providers/st_george_old/st_george_handler_old.rb @@ -64,6 +64,7 @@ def process_fields(record) genotype.add_passthrough_fields(record.mapped_fields, record.raw_fields, PASS_THROUGH_FIELDS) + add_organisationcode_testresult(genotype) add_moleculartestingtype(genotype, record) process_genetictestcope(genotype, record) @@ -72,6 +73,7 @@ def process_fields(record) @batch.provider = 'RJ7' @batch.registryid = 'RJ7' res.each { |cur_genotype| @persister.integrate_and_store(cur_genotype) } + end def add_organisationcode_testresult(genotype) @@ -283,6 +285,7 @@ def process_single_gene(genotype, record) else @logger.debug "FAILED gene parse for: #{record.raw_fields['genotype']}" end + end # rubocop:enable Lint/DuplicateBranch @@ -513,8 +516,8 @@ def failed_test?(record) end def void_genetictestscope?(record) - return if record.raw_fields['moleculartestingtype'].nil? - + return if record.raw_fields['moleculartestingtype'].nil? + record.raw_fields['moleculartestingtype'].empty? || record.raw_fields['moleculartestingtype'] == 'Store' || record.raw_fields['moleculartestingtype'] == 'Reclassification of previous result'