Skip to content

Commit

Permalink
Feature/31243/leeds crc refactor (#76)
Browse files Browse the repository at this point in the history
* Initial commit to new Leeds CRC code

* Leeds CRC refactoring QA final code

* rubocop fixes

* rubocop fixes

* corrected script command

* Review comment , moved batch level code

* PR review comments changes

* More comments added

* PR review comments changes
  • Loading branch information
shilpigoeldev authored Jun 6, 2024
1 parent 5f5d0d4 commit 8c03df2
Show file tree
Hide file tree
Showing 8 changed files with 1,206 additions and 706 deletions.
37 changes: 17 additions & 20 deletions lib/import/colorectal/core/genocolorectal.rb
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,13 @@ class Genocolorectal < Import::Germline::Genotype
'STK11' => 76,
'GREM1' => 1882,
'NTHL1' => 3108,
'CDH1' => 794 }.freeze
'CDH1' => 794,
'BRCA1' => 7,
'BRCA2' => 8,
'TP53' => 79,
'PALB2' => 3186,
'RNF43' => 5019,
'VHL' => 83 }.freeze

COLORECTAL_REGEX = /(?<apc>APC)|
(?<bmpr>BMPR1A)|
Expand All @@ -49,30 +55,21 @@ class Genocolorectal < Import::Germline::Genotype
(?<stk>STK11)|
(?<grem>GREM1)|
(?<nthl>NTHL1)|
(?<cdh1>CDH1)/ix # Added by Francesco
(?<cdh1>CDH1)|
(?<brca1>BRCA1)|
(?<brca2>BRCA2)|
(?<tp53>TP53)|
(?<palb2>PALB2)|
(?<rnf43>RNF43)|
(?<vhl>VHL)/ix # Added by Francesco

# ------------------------ Interogators ------------------------------

# this is present in Newcastle storage manager
def full_screen?
scope = @attribute_map['genetictestscope']
return nil unless scope

scope == 'Full screen Colorectal Lynch or MMR'
end

def targeted?
scope = @attribute_map['genetictestscope']
return nil unless scope

scope == 'Targeted Colorectal Lynch or MMR'
end

def add_gene_colorectal(colorectal_input)
case colorectal_input
when Integer
if [1432, 358, 577, 2744, 2804, 2808, 2850, 3394,
3408, 5000, 62, 72, 76, 1882, 3108, 794].include? colorectal_input
if [1432, 358, 577, 2744, 2804, 2808, 2850, 3394, 7, 8, 79, 3186, 5019,
3408, 5000, 62, 72, 76, 1882, 3108, 794, 83].include? colorectal_input

@attribute_map['gene'] = colorectal_input
@logger.debug "SUCCESSFUL gene parse for #{colorectal_input}"
Expand All @@ -92,7 +89,7 @@ def add_gene_colorectal(colorectal_input)
"#{colorectal_input}"
else
if colorectal_input.include? '/'
@logger.debug 'WARNING: string provided for gene extraction contains a slash,'\
@logger.debug 'WARNING: string provided for gene extraction contains a slash,' \
"possible multi-gene error: #{colorectal_input}"
end
case variable = COLORECTAL_REGEX.match(colorectal_input.strip)
Expand Down
1,013 changes: 379 additions & 634 deletions lib/import/colorectal/providers/leeds/leeds_handler_colorectal.rb

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -33,18 +33,7 @@ done
RR8 () {
PROV='RR8'
IFS=$'\n'
for x in $(find $DIRPATH/$FILEPATH -not -path "*/API_BETA_RETRIEVED/*" -type f -name "*MMR*.pseudo" -path "*/$PROV/*")
do
IFS="$OIFS"
$BRAKE import:colorectal fname="$(echo "$x" | sed -e 's:.*pseudonymised_data/\(.*\):\1:')" prov_code=$PROV
done
}


RR8_2 () {
PROV='RR8'
IFS=$'\n'
for x in $(find $DIRPATH/$FILEPATH -not -path "*/API_BETA_RETRIEVED/*" -type f -name "*other*pseudo" -path "*/$PROV/*" )
for x in $(find $DIRPATH/$FILEPATH -not -path "*/API_BETA_RETRIEVED/*" -type f -name "*MMR*.pseudo" -o -type f -name "*other*.pseudo" -path "*/$PROV/*")
do
IFS="$OIFS"
$BRAKE import:colorectal fname="$(echo "$x" | sed -e 's:.*pseudonymised_data/\(.*\):\1:')" prov_code=$PROV
Expand Down Expand Up @@ -173,4 +162,4 @@ $BRAKE import:colorectal fname="$(echo "$x" | sed -e 's:.*pseudonymised_data/\(.
done
}

RR8; RR8_2; RNZ; RTD; RX1; RCU; RGT; R0A; R1K; RPY; RP4; RTH; RQ3; REP; R1H
RR8; RNZ; RTD; RX1; RCU; RGT; R0A; R1K; RPY; RP4; RTH; RQ3; REP; R1H
6 changes: 5 additions & 1 deletion lib/import/germline/genotype.rb
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,15 @@ def initialize(raw_record, attribute_map = {})
'pathogenic' => 5,
'likely deleterious' => 4,
'likely pathogenic' => 4,
'likely to be pathogenic' => 4,
'unknown' => 3,
'unclassified variant' => 3,
'vus' => 3,
'likely benign' => 2,
'likely to be benign' => 2,
'non-pathological variant' => 1,
'likely non-pathogenic' => 2,
'uncertain significance' => 3,
'benign' => 1 }.freeze

VARIANT_IMPACT_MAP = { 'missense' => 1,
Expand Down Expand Up @@ -336,7 +340,7 @@ def add_variant_impact(impact)
end

def add_variant_class(variant)
if variant.is_a?(Integer) && variant >= 1 && variant <= 5
if variant.is_a?(Integer) && variant >= 1 && variant <= 7
@attribute_map['variantpathclass'] = variant
elsif variant.is_a?(String)
if VARIANT_CLASS_MAP[variant.downcase.strip]
Expand Down
197 changes: 197 additions & 0 deletions lib/import/helpers/colorectal/providers/rr8/constants.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
module Import
module Helpers
module Colorectal
module Providers
module Rr8
# Constants used by Leeds Colorectal
module Constants
TEST_SCOPE_MAP_COLO = { 'carrier test' => :targeted_mutation,
'confirmation' => :targeted_mutation,
'diagnostic' => :full_screen,
'diagnostic; fap' => :full_screen,
'diagnostic; lynch' => :full_screen,
'diagnostic; pms2' => :full_screen,
'predictive' => :targeted_mutation,
'predictive test' => :targeted_mutation,
'r209.1' => :full_screen,
'r209.2' => :full_screen,
'r210.5' => :full_screen,
'r210.2' => :full_screen,
'r211.1' => :full_screen,
'r211.2' => :full_screen,
'familial' => :targeted_mutation }.freeze

TEST_TYPE_MAP_COLO = { 'carrier test' => :carrier,
'diagnostic' => :diagnostic,
'diagnostic; fap' => :diagnostic,
'diagnostic; lynch' => :diagnostic,
'confirmation' => :diagnostic,
'predictive' => :predictive,
'predictive test' => :predictive,
'familial' => :predictive }.freeze

PASS_THROUGH_FIELDS = %w[age consultantcode
providercode
receiveddate
authoriseddate
requesteddate
servicereportidentifier
organisationcode_testresult
specimentype].freeze
FIELD_NAME_MAPPINGS = { 'consultantcode' => 'practitionercode',
'instigated_date' => 'requesteddate' }.freeze

GENES = 'APC|ATM|BAP1|BMPR1A|BRCA1|BRCA2|CHEK2|EPCAM|FH|FLCN|GREM1|MET|
MLH1|MSH2|MSH6|MUTYH|NTHL1|PALB2|PMS2|POLD1|POLE|PTEN|RAD51C|RAD51D|
RNF43|SDHB|SMAD4|STK11|TP53|VHL'.freeze

# rubocop:disable Lint/MixedRegexpCaptureTypes
MMR_GENE_REGEX = /APC|BMPR1A|EPCAM|GREM1|MLH1|MSH2|MSH6|MUTYH|NTHL1|PMS2|POLD1|
POLE|PTEN|SMAD4|STK11/ix
CDNA_REGEX = /c\.(?<cdna>[\w+>*\-]+)?/ix
PROTEIN_REGEX = /\(?p\.\(?(?<impact>\w+)\)?/ix
EXON_REGEX = /(?<exon>exon(s)?[\s\-\d]+)/ix
GENE_FAIL_REGEX = /(?=(?<gene>#{GENES})[\w\s]+fail)/ix
NOPATH_REGEX = /.No pathogenic variant was identified./i
EXON_VARIANT_REGEX = /(?<variant>del|dup|ins).+ex(on)?s?\s?
(?<exons>[0-9]+(-[0-9]+)?)|
ex(on)?s?\s?(?<exons>[0-9]+(-[0-9]+)?)\s?
(?<variant>del|dup|ins)|
ex(on)?s?\s?(?<exons>[0-9]+\s?(\s?-\s?[0-9]+)?)\s?
(?<variant>del|dup|ins)?|
(?<variant>del|dup|ins)\s?(?<exons>[0-9]+(?<dgs>-[0-9]+)?)|
ex(on)?s?\s?(?<exons>[0-9]+(\sto\s[0-9]+)?)\s
(?<variant>del|dup|ins)|
x(?<exons>[0-9+-? ]+)+(?<variant>del|dup|ins)|
ex(on)?s?[\s\w,]+(?<variant>del|dup|ins)|
(?<variant>del|dup|ins)[\s\w]+gene/ix

COLORECTAL_GENES_REGEX = /(?<colorectal>#{GENES})/x

VARIANT_REPORT_REGEX = /(?<report>(hetero|homo)zygo[\w\s\-.>():=,'+]+)+/ix

EXONIC_REPORT_REGEX = /(?<report>(#{GENES})\sexon(s)?[\w\s\-.>():=,&]+)/ix

PATHOGENIC_REPORT_REGEX = /(?<report>pathogenic\s(#{GENES})[\w\s\-.>():=,&]+)/ix

TARG_GENE_REGEX = /(?<report>(#{GENES})[\w\s]+(c\.[\w\s\-.>():=,']+))/ix

ABSENT_REGEX = /is absent in this patient/i
NO_DEL_REGEX = /this patient does not have the deletion/i
PATHOGENIC_REGEX = /(?<pathogenic>likely\snon-pathogenic|likely\spathogenic|
likely\s(to\sbe\s)?pathogenic|pathogenic[^ity]|benign|likely\s(to\sbe\s)?benign|
uncertain\s(clinical\s)?significance)/ix
PATHOGENIC_GENES_REGEX = /(?<pathogenic>likely\snon-pathogenic|likely\spathogenic|
pathogenic[^ity]|benign|likely\s(to\sbe\s)?benign|
uncertain\s(clinical\s)?significance)[\w\s\W]*(?<assocgene>#{GENES})/ix
GENE_PATH_REGEX = /(?<assocgene>#{GENES})[\w\s\W]*(?<pathogenic>likely\snon-pathogenic|
likely\spathogenic|pathogenic[^ity]|benign|likely\s(to\sbe\s)?benign|
uncertain\s(clinical\s)?significance)/ix
# rubocop:enable Lint/MixedRegexpCaptureTypes

GENES_FILEPATH = 'lib/import/helpers/colorectal/providers/rr8/genes.yml'.freeze
STATUS_FILEPATH = 'lib/import/helpers/colorectal/providers/rr8/status.yml'.freeze

GENES_PANEL = {
'apc' => %w[APC],
'epcam' => %w[EPCAM],
'mlh1' => %w[MLH1],
'mlh1_msh2' => %w[MLH1 MSH2],
'mlh1_msh2_msh6' => %w[MLH1 MSH2 MSH6],
'mlh1_pms2' => %w[MLH1 PMS2],
'pms2_mutyh' => %w[MUTYH PMS2],
'msh2' => %w[MSH2],
'msh6' => %w[MSH6],
'mutyh' => %w[MUTYH],
'pms2' => %w[PMS2]
}.freeze

STATUS_PANEL = {
'unknown' => 4,
'normal' => 1,
'abnormal' => 2,
'normal_var' => 10,
'fail' => 9
}.freeze

VARIANT_CLASS_5 = [
'conf mlpa +ve',
'mlh1 confirmation +ve',
'mlpa del confirmation +ve',
'msh2 confirmation +ve',
'mlpa +ve(large exon deletion) + seq -ve',
'mlpa multi-exon deletion (with seq)',
'ngs class m',
'sequencing positive',
'pred mlpa epcam del +ve',
'pred mlpa msh2 del +ve',
'pred mlpa msh2 dup +ve',
'pred mlpa msh6 del +ve',
'pred seq mlh1 +ve',
'pred seq msh2 +ve',
'pred seq msh6 +ve',
'confirmation_seq_positive',
'diagnostic apc +ve',
'predictive_seq_positive',
'seq mutation +ve',
'biallelic pred positive',
'mlpa pred positive',
'pred complex mut +ve',
'r802x homozygote (diag)',
'seq pred positive',
'apc - conf mlpa +ve',
'fap diagn mutyh het.',
'conf seq +ve (apc)',
'conf seq +ve (mutyh)',
'fap conf-pred +ve (apc)',
'fap diagn +ve (apc)',
'fap diagn +ve (mutyh c.het)',
'fap diagn +ve (mutyh homoz)',
'fap diagn mutyh het.',
'(v2) mutyh het.',
'apc - conf seq +ve'
].freeze

VARIANT_CLASS_7 = [
'conf seq +ve',
'mlpa -ve + seq (splice site mutation)',
'mlpa -ve + seq +ve (nonsense/frameshift)',
'ngs mlh1 truncating/frameshift',
'ngs msh2 truncating/frameshift',
'ngs multiple exon mlpa del',
'pred mlpa +ve',
'mlpa positive',
'mlpa positive (diag)',
'pred (other) positive',
'generic c4/5',
'lynch diag; c4/5',
'generic c4/5',
'lynch diag; c4/5',
'r210_c4/5',
'lynch diag; c4/5',
'generic c4/5'
].freeze

NON_PATH_VARCLASS = [
'likely benign',
'likely to be benign',
'non-pathological variant',
'likely non-pathogenic',
'benign'
].freeze

EXCLUDE_STATEMENTS = [
'Screening for mutations in MLH1, MSH2 and MSH6 is now in progress as requested.',
'MLPA and MSH2 analysis was not requested.',
'MLPA and MSH2 analysis were not requested.',
'if MSH2 and MSH6 data analysis is required.',
'No further screening for mutations in MLH1, MSH2 or MSH6 has been performed.',
'developing further MSH2-related cancers',
'developing MSH2-associated cancer'
].freeze
end
end
end
end
end
end
Loading

0 comments on commit 8c03df2

Please sign in to comment.