diff --git a/clinvar_data/conversion/dict_to_pb.py b/clinvar_data/conversion/dict_to_pb.py index 4688b47..cb65a7c 100644 --- a/clinvar_data/conversion/dict_to_pb.py +++ b/clinvar_data/conversion/dict_to_pb.py @@ -97,6 +97,14 @@ def ensure_str(value: str | dict[str, str]) -> str: return value +def ensure_str_optional(value: None | str | dict[str, str]) -> None | str: + """Similar to ``ensure_str`` but allows ``None``.""" + if value is None: + return None + else: + return ensure_str(value) + + class ConvertGeneVariantRelationship: """Static method helper for converting XML data to to ``GeneVariantRelationship``.""" @@ -1669,7 +1677,7 @@ def xmldict_data_to_pb(cls, value: dict[str, Any]) -> AggregatedGermlineClassifi tag_germline_classification["ReviewStatus"] ) ) - description: str | None = tag_germline_classification.get("Description") + description: str | None = ensure_str_optional(tag_germline_classification.get("Description")) explanation: Comment | None = None if "Explanation" in tag_germline_classification: explanation = ConvertComment.xmldict_data_to_pb( @@ -1764,7 +1772,7 @@ def xmldict_data_to_pb(cls, value: dict[str, Any]) -> AggregatedSomaticClinicalI tag_somatic_clinical_impact["ReviewStatus"] ) ) - description: str | None = tag_somatic_clinical_impact.get("Description") + description: str | None = ensure_str_optional(tag_somatic_clinical_impact.get("Description")) # Parse out Citation, XRef, Comment tags. cxcs = cls.parse_citations_xrefs_comments(tag_somatic_clinical_impact) @@ -1848,7 +1856,7 @@ def xmldict_data_to_pb(cls, value: dict[str, Any]) -> AggregatedOncogenicityClas tag_oncogenicity_classification["ReviewStatus"] ) ) - description: str | None = tag_oncogenicity_classification.get("Description") + description: str | None = ensure_str_optional(tag_oncogenicity_classification.get("Description")) # Parse out Citation, XRef, Comment tags. cxcs = cls.parse_citations_xrefs_comments(tag_oncogenicity_classification) @@ -1977,7 +1985,7 @@ def xmldict_data_to_pb(cls, value: dict[str, Any]) -> ClinicalSignificance: review_status = ConvertSubmitterReviewStatus.xmldict_data_to_pb( tag_clinical_significance["ReviewStatus"] ) - description: str | None = tag_clinical_significance.get("Description") + description: str | None = ensure_str_optional(tag_clinical_significance.get("Description")) explanation: Comment | None = None if "Explanation" in tag_clinical_significance: explanation = ConvertComment.xmldict_data_to_pb( @@ -2187,18 +2195,18 @@ def xmldict_data_to_pb(cls, value: dict[str, Any]) -> ClassificationScv: ensure_str(tag_classification["ReviewStatus"]) ) ) - germline_classification: str | None = tag_classification.get("GermlineClassification") + germline_classification: str | None = ensure_str_optional(tag_classification.get("GermlineClassification")) somatic_clinical_impact: ClassificationScv.SomaticClinicalImpact | None = None if "SomaticClinicalImpact" in tag_classification: somatic_clinical_impact = cls.convert_somatic_clinical_impact( {"SomaticClinicalImpact": tag_classification["SomaticClinicalImpact"]} ) - oncogenicity_classification: str | None = tag_classification.get( + oncogenicity_classification: str | None = ensure_str_optional(tag_classification.get( "OncogenicityClassification" - ) - explanation_of_classification: str | None = tag_classification.get( + )) + explanation_of_classification: str | None = ensure_str_optional(tag_classification.get( "ExplanationOfClassification" - ) + )) classification_scores: list[ClassificationScv.ClassificationScore] | None = None if "ClassificationScore" in tag_classification: classification_scores = [ @@ -2785,7 +2793,7 @@ def xmldict_data_to_pb(cls, tag: dict[str, Any]) -> FamilyData: assert "FamilyData" in tag tag_inner: dict[str, Any] = tag["FamilyData"] - family_history: str | None = tag_inner.get("FamilyHistory") + family_history: str | None = ensure_str_optional(tag_inner.get("FamilyHistory")) num_families: int | None = None if "@NumFamilies" in tag_inner: num_families = int(tag_inner["@NumFamilies"]) @@ -3016,8 +3024,8 @@ def xmldict_data_to_pb(cls, value: dict[str, Any]) -> Sample: if "Origin" in tag_sample: origin = ConvertOrigin.xmldict_data_to_pb(ensure_str(tag_sample["Origin"])) - ethnicity: str | None = tag_sample.get("Ethnicity") - geographic_origin: str | None = tag_sample.get("GeographicOrigin") + ethnicity: str | None = ensure_str_optional(tag_sample.get("Ethnicity")) + geographic_origin: str | None = ensure_str_optional(tag_sample.get("GeographicOrigin")) val_tissue: str | dict[str, str] | None = tag_sample.get("Tissue") if val_tissue: tissue = ensure_str(val_tissue) @@ -3033,14 +3041,14 @@ def xmldict_data_to_pb(cls, value: dict[str, Any]) -> Sample: somatic_variant_allele_fraction: str | None = None if "SomaticVariantAlleleFraction" in tag_sample: somatic_variant_allele_fraction = tag_sample["SomaticVariantAlleleFraction"] - cell_line: str | None = tag_sample.get("CellLine") + cell_line: str | None = ensure_str_optional(tag_sample.get("CellLine")) species: Species | None = None if "Species" in tag_sample: species = ConvertSpecies.xmldict_data_to_pb(tag_sample) ages: list[Sample.Age] | None = None if "Age" in tag_sample: ages = [cls.convert_age({"Age": entry}) for entry in cls.ensure_list(tag_sample["Age"])] - strain: str | None = tag_sample.get("Strain") + strain: str | None = ensure_str_optional(tag_sample.get("Strain")) affected_status: Sample.AffectedStatus.ValueType | None = None if "AffectedStatus" in tag_sample: affected_status = cls.convert_affected_status(ensure_str(tag_sample["AffectedStatus"])) @@ -3462,7 +3470,7 @@ def xmldict_data_to_pb(cls, value: dict[str, Any]) -> AlleleScv: # noqa: C901 if "Name" in tag_sa: assert isinstance(tag_sa["Name"], (dict, str)), f"is: {tag_sa['Name']}" name = ConvertOtherName.xmldict_data_to_pb({"Name": tag_sa["Name"]}) - variant_type: str | None = tag_sa.get("VariantType") + variant_type: str | None = ensure_str_optional(tag_sa.get("VariantType")) location: Location | None = None if "Location" in tag_sa: location = ConvertLocation.xmldict_data_to_pb({"Location": tag_sa["Location"]}) @@ -3581,7 +3589,7 @@ def xmldict_data_to_pb(cls, tag: dict[str, Any]) -> HaplotypeScv: ConvertAlleleScv.xmldict_data_to_pb({"SimpleAllele": entry}) for entry in cls.ensure_list(tag_genotype["SimpleAllele"]) ] - name: str | None = tag_genotype.get("Name") + name: str | None = ensure_str_optional(tag_genotype.get("Name")) other_names: list[OtherName] | None = None if "OtherNameList" in tag_genotype: other_names = [ @@ -3677,7 +3685,7 @@ def xmldict_data_to_pb(cls, tag: dict[str, Any]) -> GenotypeScv: ConvertHaplotypeScv.xmldict_data_to_pb({"Haplotype": entry}) for entry in cls.ensure_list(tag_genotype["Haplotype"]) ] - name: str | None = tag_genotype.get("Name") + name: str | None = ensure_str_optional(tag_genotype.get("Name")) other_names: list[OtherName] | None = None if "OtherNameList" in tag_genotype: other_names = [ @@ -4141,8 +4149,8 @@ def xmldict_data_to_pb(cls, tag: dict[str, Any]) -> ClinicalAssertion: # noqa: ConvertCitation.xmldict_data_to_pb({"Citation": entry}) for entry in cls.ensure_list(tag_ca["CitationList"]["Citation"]) ] - study_name: str | None = tag_ca.get("StudyName") - study_description: str | None = tag_ca.get("StudyDescription") + study_name: str | None = ensure_str_optional(tag_ca.get("StudyName")) + study_description: str | None = ensure_str_optional(tag_ca.get("StudyDescription")) comments: list[Comment] | None = None if "Comment" in tag_ca: comments = [ @@ -4247,7 +4255,7 @@ def convert_gene(cls, tag: dict[str, Any]) -> Allele.Gene: properties: list[str] | None = None if "Property" in tag_gene: properties = [entry for entry in cls.ensure_list(tag_gene["Property"])] - symbol: str | None = tag_gene.get("Symbol") + symbol: str | None = ensure_str_optional(tag_gene.get("Symbol")) full_name: str = tag_gene["@FullName"] gene_id: int = int(tag_gene["@GeneID"]) hgnc_id: str | None = None @@ -4348,7 +4356,7 @@ def xmldict_data_to_pb(cls, tag: dict[str, Any]) -> Allele: for entry in cls.ensure_list(tag_allele["GeneList"]["Gene"]) ] name: str = tag_allele["Name"] - canonical_spdi: str | None = tag_allele.get("CanonicalSPDI") + canonical_spdi: str | None = ensure_str_optional(tag_allele.get("CanonicalSPDI")) variant_types: list[str] | None = None if "VariantType" in tag_allele: variant_types = [entry for entry in cls.ensure_list(tag_allele["VariantType"])] diff --git a/tests/clinvar_data/data/regressions/2024-07-01/allele-id-1706807.json b/tests/clinvar_data/data/regressions/2024-07-01/allele-id-1706807.json new file mode 100644 index 0000000..d7fac99 --- /dev/null +++ b/tests/clinvar_data/data/regressions/2024-07-01/allele-id-1706807.json @@ -0,0 +1,568 @@ +{ + "RecordStatus": "current", + "Species": "Homo sapiens", + "ClassifiedRecord": { + "SimpleAllele": { + "@AlleleID": "1706807", + "@VariationID": "1708509", + "GeneList": { + "Gene": { + "@Symbol": "MSL3", + "@FullName": "MSL complex subunit 3", + "@GeneID": "10943", + "@HGNC_ID": "HGNC:7370", + "@Source": "submitted", + "@RelationshipType": "within single gene", + "Location": { + "CytogeneticLocation": "Xp22.2", + "SequenceLocation": [ + { + "@Assembly": "GRCh38", + "@AssemblyAccessionVersion": "GCF_000001405.38", + "@AssemblyStatus": "current", + "@Chr": "X", + "@Accession": "NC_000023.11", + "@start": "11758159", + "@stop": "11775772", + "@display_start": "11758159", + "@display_stop": "11775772", + "@Strand": "+" + }, + { + "@Assembly": "GRCh37", + "@AssemblyAccessionVersion": "GCF_000001405.25", + "@AssemblyStatus": "previous", + "@Chr": "X", + "@Accession": "NC_000023.10", + "@start": "11776277", + "@stop": "11793871", + "@display_start": "11776277", + "@display_stop": "11793871", + "@Strand": "+" + } + ] + }, + "OMIM": "300609", + "Haploinsufficiency": { + "@last_evaluated": "2023-08-23", + "@ClinGen": "https://www.ncbi.nlm.nih.gov/projects/dbvar/ISCA/isca_gene.cgi?sym=MSL3", + "#text": "Sufficient evidence for dosage pathogenicity" + }, + "Triplosensitivity": { + "@last_evaluated": "2023-08-23", + "@ClinGen": "https://www.ncbi.nlm.nih.gov/projects/dbvar/ISCA/isca_gene.cgi?sym=MSL3", + "#text": "No evidence available" + } + } + }, + "Name": "NM_078629.4(MSL3):c.1282-249_1382-154del", + "VariantType": "Deletion", + "Location": { + "CytogeneticLocation": "Xp22.2", + "SequenceLocation": [ + { + "@Assembly": "GRCh38", + "@AssemblyAccessionVersion": "GCF_000001405.38", + "@forDisplay": "true", + "@AssemblyStatus": "current", + "@Chr": "X", + "@Accession": "NC_000023.11", + "@start": "11771907", + "@stop": "11772467", + "@display_start": "11771907", + "@display_stop": "11772467", + "@variantLength": "561", + "@positionVCF": "11771906", + "@referenceAlleleVCF": "AAAGGACTAAAGAACCTAAACAGTTACAGTTATTTATAGAGAATAACTGTTGCGAGTTAAGAAATTAAAGACTTTCTTGATCTATTTTAAGTGTTTTTGCCTTTCATCTTTGTTTTTTGTAAGCTTGTGTTTATTTTGATCCAATCACTTATTTTTTCCCCTCTACGTACAATTTACTGTCATAATTGTTAGGTGGTTGGAAGTGTCTCCATTAAGAATAACAAAAGCATTCAATTCGTGCTTTTTCCAGGTCCTCTCCTGGAAGCTTGTGCCTGACAATTACCCCCCAGGTGACCAGCCGCCTCCACCCTCTTACATTTATGGGGCACAACATTTGCTGCGATTGTTTGGTAAGAATCCTGGTTCCTGCCTTCTTTCCATTTTTCATTTTGTATTCTCTTGTTAGCTTTCTGTACACCTTGTGGTTTGGGCTATAATTATCTAACTAAGAAATTTGGGGATCCAAGAAAAATTAAAAGTAATCTATCAAAGAAAAGGCATTGAATGAATCTTAAATTTCATAAAAATTATGTTTGTAATATTTGTACTGCTACTCTGTCTT", + "@alternateAlleleVCF": "A" + }, + { + "@Assembly": "GRCh37", + "@AssemblyAccessionVersion": "GCF_000001405.25", + "@AssemblyStatus": "previous", + "@Chr": "X", + "@Accession": "NC_000023.10", + "@start": "11790026", + "@stop": "11790586", + "@display_start": "11790026", + "@display_stop": "11790586", + "@variantLength": "561", + "@positionVCF": "11790025", + "@referenceAlleleVCF": "AAAGGACTAAAGAACCTAAACAGTTACAGTTATTTATAGAGAATAACTGTTGCGAGTTAAGAAATTAAAGACTTTCTTGATCTATTTTAAGTGTTTTTGCCTTTCATCTTTGTTTTTTGTAAGCTTGTGTTTATTTTGATCCAATCACTTATTTTTTCCCCTCTACGTACAATTTACTGTCATAATTGTTAGGTGGTTGGAAGTGTCTCCATTAAGAATAACAAAAGCATTCAATTCGTGCTTTTTCCAGGTCCTCTCCTGGAAGCTTGTGCCTGACAATTACCCCCCAGGTGACCAGCCGCCTCCACCCTCTTACATTTATGGGGCACAACATTTGCTGCGATTGTTTGGTAAGAATCCTGGTTCCTGCCTTCTTTCCATTTTTCATTTTGTATTCTCTTGTTAGCTTTCTGTACACCTTGTGGTTTGGGCTATAATTATCTAACTAAGAAATTTGGGGATCCAAGAAAAATTAAAAGTAATCTATCAAAGAAAAGGCATTGAATGAATCTTAAATTTCATAAAAATTATGTTTGTAATATTTGTACTGCTACTCTGTCTT", + "@alternateAlleleVCF": "A" + } + ] + }, + "HGVSlist": { + "HGVS": [ + { + "@Assembly": "GRCh37", + "@Type": "genomic, top-level", + "NucleotideExpression": { + "@sequenceAccessionVersion": "NC_000023.10", + "@sequenceAccession": "NC_000023", + "@sequenceVersion": "10", + "@change": "g.11790026_11790586del", + "@Assembly": "GRCh37", + "Expression": "NC_000023.10:g.11790026_11790586del" + } + }, + { + "@Assembly": "GRCh38", + "@Type": "genomic, top-level", + "NucleotideExpression": { + "@sequenceAccessionVersion": "NC_000023.11", + "@sequenceAccession": "NC_000023", + "@sequenceVersion": "11", + "@change": "g.11771907_11772467del", + "@Assembly": "GRCh38", + "Expression": "NC_000023.11:g.11771907_11772467del" + } + }, + { + "@Type": "genomic", + "NucleotideExpression": { + "@sequenceAccessionVersion": "NG_012564.1", + "@sequenceAccession": "NG_012564", + "@sequenceVersion": "1", + "@change": "g.18749_19309del", + "Expression": "NG_012564.1:g.18749_19309del" + } + }, + { + "@Type": "coding", + "NucleotideExpression": { + "@sequenceAccessionVersion": "NM_001282174.1", + "@sequenceAccession": "NM_001282174", + "@sequenceVersion": "1", + "@change": "c.835-249_935-154del", + "Expression": "NM_001282174.1:c.835-249_935-154del" + }, + "MolecularConsequence": [ + { + "@ID": "SO:0001574", + "@Type": "splice acceptor variant", + "@DB": "SO" + }, + { + "@ID": "SO:0001575", + "@Type": "splice donor variant", + "@DB": "SO" + } + ] + }, + { + "@Type": "coding", + "NucleotideExpression": { + "@sequenceAccessionVersion": "NM_078629.4", + "@sequenceAccession": "NM_078629", + "@sequenceVersion": "4", + "@change": "c.1282-249_1382-154del", + "@MANESelect": "true", + "Expression": "NM_078629.4:c.1282-249_1382-154del" + }, + "MolecularConsequence": [ + { + "@ID": "SO:0001574", + "@Type": "splice acceptor variant", + "@DB": "SO" + }, + { + "@ID": "SO:0001575", + "@Type": "splice donor variant", + "@DB": "SO" + } + ] + }, + { + "@Type": "coding", + "NucleotideExpression": { + "@sequenceAccessionVersion": "NM_001193270.2", + "@sequenceAccession": "NM_001193270", + "@sequenceVersion": "2", + "@change": "c.1246-249_1346-154del", + "Expression": "NM_001193270.2:c.1246-249_1346-154del" + }, + "MolecularConsequence": [ + { + "@ID": "SO:0001574", + "@Type": "splice acceptor variant", + "@DB": "SO" + }, + { + "@ID": "SO:0001575", + "@Type": "splice donor variant", + "@DB": "SO" + } + ] + }, + { + "@Type": "coding", + "NucleotideExpression": { + "@sequenceAccessionVersion": "NM_006800.4", + "@sequenceAccession": "NM_006800", + "@sequenceVersion": "4", + "@change": "c.784-249_884-154del", + "Expression": "NM_006800.4:c.784-249_884-154del" + }, + "MolecularConsequence": [ + { + "@ID": "SO:0001574", + "@Type": "splice acceptor variant", + "@DB": "SO" + }, + { + "@ID": "SO:0001575", + "@Type": "splice donor variant", + "@DB": "SO" + } + ] + } + ] + } + }, + "RCVList": { + "RCVAccession": { + "@Title": "NM_078629.4(MSL3):c.1282-249_1382-154del AND Basilicata-Akhtar syndrome", + "@Accession": "RCV002287880", + "@Version": "3", + "ClassifiedConditionList": { + "@TraitSetID": "50614", + "ClassifiedCondition": { + "@DB": "MedGen", + "@ID": "C5231394", + "#text": "Basilicata-Akhtar syndrome" + } + }, + "RCVClassifications": { + "GermlineClassification": { + "ReviewStatus": "criteria provided, single submitter", + "Description": { + "@DateLastEvaluated": "2022-09-27", + "@SubmissionCount": "1", + "#text": "Pathogenic" + } + } + } + } + }, + "Classifications": { + "GermlineClassification": { + "@DateLastEvaluated": "2022-09-27", + "@NumberOfSubmissions": "1", + "@NumberOfSubmitters": "1", + "@DateCreated": "2022-10-08", + "@MostRecentSubmission": "2022-10-08", + "ReviewStatus": "criteria provided, single submitter", + "Description": "Pathogenic", + "ConditionList": { + "TraitSet": { + "@ID": "50614", + "@Type": "Disease", + "@ContributesToAggregateClassification": "true", + "Trait": { + "@ID": "42934", + "@Type": "Disease", + "Name": [ + { + "ElementValue": { + "@Type": "Alternate", + "#text": "MENTAL RETARDATION, X-LINKED, SYNDROMIC, BASILICATA-AKHTAR TYPE" + }, + "XRef": { + "@Type": "MIM", + "@ID": "301032", + "@DB": "OMIM" + } + }, + { + "ElementValue": { + "@Type": "Preferred", + "#text": "Basilicata-Akhtar syndrome" + }, + "XRef": { + "@ID": "MONDO:0026730", + "@DB": "MONDO" + } + }, + { + "ElementValue": { + "@Type": "Alternate", + "#text": "INTELLECTUAL DEVELOPMENTAL DISORDER, X-LINKED, SYNDROMIC, 36" + }, + "XRef": { + "@Type": "MIM", + "@ID": "301032", + "@DB": "OMIM" + } + } + ], + "Symbol": [ + { + "ElementValue": { + "@Type": "Alternate", + "#text": "MRXSBA" + }, + "XRef": { + "@Type": "MIM", + "@ID": "301032", + "@DB": "OMIM" + } + }, + { + "ElementValue": { + "@Type": "Alternate", + "#text": "MRXS36" + }, + "XRef": { + "@Type": "MIM", + "@ID": "301032", + "@DB": "OMIM" + } + } + ], + "XRef": [ + { + "@ID": "C5231394", + "@DB": "MedGen" + }, + { + "@ID": "MONDO:0026730", + "@DB": "MONDO" + }, + { + "@Type": "MIM", + "@ID": "301032", + "@DB": "OMIM" + } + ] + } + } + } + } + }, + "ClinicalAssertionList": { + "ClinicalAssertion": { + "@ID": "5039779", + "@SubmissionDate": "2022-10-07", + "@DateLastUpdated": "2022-10-08", + "@DateCreated": "2022-10-08", + "ClinVarSubmissionID": { + "@localKey": "1f7d8179-9e58-4ea0-bcba-4faaec993f6e", + "@localKeyIsSubmitted": "1", + "@submittedAssembly": "GRCh37" + }, + "ClinVarAccession": { + "@Accession": "SCV002578174", + "@DateUpdated": "2022-10-08", + "@DateCreated": "2022-10-08", + "@Type": "SCV", + "@Version": "1", + "@SubmitterName": "Institute for Medical Genetics and Human Genetics, Charité - Universitätsmedizin Berlin", + "@OrgID": "505735", + "@OrganizationCategory": "clinic", + "@OrgAbbreviation": "Charité - Universitätsmedizin" + }, + "AdditionalSubmitters": { + "SubmitterDescription": { + "@OrgID": "507461", + "@SubmitterName": "CUBI - Core Unit Bioinformatics, Berlin Institute of Health", + "@Type": "secondary", + "@OrganizationCategory": "laboratory" + } + }, + "RecordStatus": "current", + "Classification": { + "@DateLastEvaluated": "2022-09-27", + "ReviewStatus": { + "@xmlns:xsi": "http://www.w3.org/2001/XMLSchema-instance", + "#text": "criteria provided, single submitter" + }, + "GermlineClassification": { + "@xmlns:xsi": "http://www.w3.org/2001/XMLSchema-instance", + "#text": "Pathogenic" + } + }, + "Assertion": "variation to disease", + "AttributeSet": [ + { + "Attribute": { + "@Type": "AssertionMethod", + "#text": "ACMG Guidelines, 2015" + }, + "Citation": { + "ID": { + "@Source": "PubMed", + "#text": "25741868" + } + } + }, + { + "Attribute": { + "@Type": "ModeOfInheritance", + "#text": "X-linked dominant inheritance" + } + } + ], + "ObservedInList": { + "ObservedIn": { + "Sample": { + "Origin": { + "@xmlns:xsi": "http://www.w3.org/2001/XMLSchema-instance", + "#text": "germline" + }, + "Tissue": { + "@xmlns:xsi": "http://www.w3.org/2001/XMLSchema-instance", + "#text": "Blood" + }, + "Species": { + "@xmlns:xsi": "http://www.w3.org/2001/XMLSchema-instance", + "@TaxonomyId": "9606", + "#text": "human" + }, + "AffectedStatus": { + "@xmlns:xsi": "http://www.w3.org/2001/XMLSchema-instance", + "#text": "yes" + }, + "NumberTested": { + "@xmlns:xsi": "http://www.w3.org/2001/XMLSchema-instance", + "#text": "1" + }, + "Gender": { + "@xmlns:xsi": "http://www.w3.org/2001/XMLSchema-instance", + "#text": "female" + }, + "FamilyData": { + "@xmlns:xsi": "http://www.w3.org/2001/XMLSchema-instance", + "@PedigreeID": "09393c9d-7530-4277-b8a7-13f252a0d7cf" + } + }, + "Method": { + "MethodType": "clinical testing" + }, + "ObservedData": [ + { + "Attribute": { + "@Type": "SingleHeterozygote", + "@integerValue": "1" + } + }, + { + "Attribute": { + "@Type": "SampleLocalID", + "#text": "6129e3ba-c880-44d7-b2d2-65d437bc4de7" + } + } + ], + "TraitSet": { + "@Type": "Finding", + "Trait": [ + { + "@ClinicalFeaturesAffectedStatus": "present", + "@Type": "Finding", + "XRef": { + "@DB": "HP", + "@ID": "HP:0001263" + } + }, + { + "@ClinicalFeaturesAffectedStatus": "present", + "@Type": "Finding", + "XRef": { + "@DB": "HP", + "@ID": "HP:0000256" + } + }, + { + "@ClinicalFeaturesAffectedStatus": "present", + "@Type": "Finding", + "XRef": { + "@DB": "HP", + "@ID": "HP:0000006" + } + } + ] + } + } + }, + "SimpleAllele": { + "GeneList": { + "Gene": { + "@Symbol": "MSL3" + } + }, + "VariantType": "Deletion", + "Location": { + "SequenceLocation": { + "@Assembly": "GRCh37", + "@Chr": "X", + "@start": "11790026", + "@stop": "11790586" + } + } + }, + "TraitSet": { + "@Type": "Disease", + "Trait": { + "@Type": "Disease", + "XRef": { + "@DB": "OMIM", + "@ID": "301032" + } + } + }, + "SubmissionNameList": { + "SubmissionName": "SUB7643814" + } + } + }, + "TraitMappingList": { + "TraitMapping": [ + { + "@ClinicalAssertionID": "5039779", + "@TraitType": "Finding", + "@MappingType": "XRef", + "@MappingValue": "HP:0001263", + "@MappingRef": "HP", + "MedGen": { + "@CUI": "C0557874", + "@Name": "Global developmental delay" + } + }, + { + "@ClinicalAssertionID": "5039779", + "@TraitType": "Finding", + "@MappingType": "XRef", + "@MappingValue": "HP:0000256", + "@MappingRef": "HP", + "MedGen": { + "@CUI": "C2243051", + "@Name": "Macrocephaly" + } + }, + { + "@ClinicalAssertionID": "5039779", + "@TraitType": "Finding", + "@MappingType": "XRef", + "@MappingValue": "HP:0000006", + "@MappingRef": "HP", + "MedGen": { + "@CUI": "C0443147", + "@Name": "Autosomal dominant inheritance" + } + }, + { + "@ClinicalAssertionID": "5039779", + "@TraitType": "Disease", + "@MappingType": "XRef", + "@MappingValue": "301032", + "@MappingRef": "OMIM", + "MedGen": { + "@CUI": "C5231394", + "@Name": "Basilicata-Akhtar syndrome" + } + } + ] + } + } +} diff --git a/tests/clinvar_data/data/regressions/2024-07-01/allelle-id-1704898.json b/tests/clinvar_data/data/regressions/2024-07-01/allelle-id-1704898.json new file mode 100644 index 0000000..1390f3c --- /dev/null +++ b/tests/clinvar_data/data/regressions/2024-07-01/allelle-id-1704898.json @@ -0,0 +1,650 @@ +{ + "RecordStatus": "current", + "Species": "Homo sapiens", + "ClassifiedRecord": { + "SimpleAllele": { + "@AlleleID": "1704898", + "@VariationID": "1706588", + "GeneList": { + "Gene": { + "@Symbol": "GRIN1", + "@FullName": "glutamate ionotropic receptor NMDA type subunit 1", + "@GeneID": "2902", + "@HGNC_ID": "HGNC:4584", + "@Source": "submitted", + "@RelationshipType": "within single gene", + "Location": { + "CytogeneticLocation": "9q34.3", + "SequenceLocation": [ + { + "@Assembly": "GRCh38", + "@AssemblyAccessionVersion": "GCF_000001405.38", + "@AssemblyStatus": "current", + "@Chr": "9", + "@Accession": "NC_000009.12", + "@start": "137139154", + "@stop": "137168756", + "@display_start": "137139154", + "@display_stop": "137168756", + "@Strand": "+" + }, + { + "@Assembly": "GRCh37", + "@AssemblyAccessionVersion": "GCF_000001405.25", + "@AssemblyStatus": "previous", + "@Chr": "9", + "@Accession": "NC_000009.11", + "@start": "140033608", + "@stop": "140063213", + "@display_start": "140033608", + "@display_stop": "140063213", + "@Strand": "+" + } + ] + }, + "OMIM": "138249" + } + }, + "Name": "NM_007327.4(GRIN1):c.2377G>T (p.Val793Phe)", + "CanonicalSPDI": "NC_000009.12:137163601:G:T", + "VariantType": "single nucleotide variant", + "Location": { + "CytogeneticLocation": "9q34.3", + "SequenceLocation": [ + { + "@Assembly": "GRCh38", + "@AssemblyAccessionVersion": "GCF_000001405.38", + "@forDisplay": "true", + "@AssemblyStatus": "current", + "@Chr": "9", + "@Accession": "NC_000009.12", + "@start": "137163602", + "@stop": "137163602", + "@display_start": "137163602", + "@display_stop": "137163602", + "@variantLength": "1", + "@positionVCF": "137163602", + "@referenceAlleleVCF": "G", + "@alternateAlleleVCF": "T" + }, + { + "@Assembly": "GRCh37", + "@AssemblyAccessionVersion": "GCF_000001405.25", + "@AssemblyStatus": "previous", + "@Chr": "9", + "@Accession": "NC_000009.11", + "@start": "140058054", + "@stop": "140058054", + "@display_start": "140058054", + "@display_stop": "140058054", + "@variantLength": "1", + "@positionVCF": "140058054", + "@referenceAlleleVCF": "G", + "@alternateAlleleVCF": "T" + } + ] + }, + "ProteinChange": [ + "V793F", + "V814F" + ], + "HGVSlist": { + "HGVS": [ + { + "@Assembly": "GRCh37", + "@Type": "genomic, top-level", + "NucleotideExpression": { + "@sequenceAccessionVersion": "NC_000009.11", + "@sequenceAccession": "NC_000009", + "@sequenceVersion": "11", + "@change": "g.140058054G>T", + "@Assembly": "GRCh37", + "Expression": "NC_000009.11:g.140058054G>T" + } + }, + { + "@Assembly": "GRCh38", + "@Type": "genomic, top-level", + "NucleotideExpression": { + "@sequenceAccessionVersion": "NC_000009.12", + "@sequenceAccession": "NC_000009", + "@sequenceVersion": "12", + "@change": "g.137163602G>T", + "@Assembly": "GRCh38", + "Expression": "NC_000009.12:g.137163602G>T" + } + }, + { + "@Type": "genomic", + "NucleotideExpression": { + "@sequenceAccessionVersion": "NG_011507.1", + "@sequenceAccession": "NG_011507", + "@sequenceVersion": "1", + "@change": "g.29446G>T", + "Expression": "NG_011507.1:g.29446G>T" + } + }, + { + "@Type": "coding", + "NucleotideExpression": { + "@sequenceAccessionVersion": "NM_000832.7", + "@sequenceAccession": "NM_000832", + "@sequenceVersion": "7", + "@change": "c.2377G>T", + "Expression": "NM_000832.7:c.2377G>T" + }, + "ProteinExpression": { + "@sequenceAccessionVersion": "NP_000823.4", + "@sequenceAccession": "NP_000823", + "@sequenceVersion": "4", + "@change": "p.Val793Phe", + "Expression": "NP_000823.4:p.Val793Phe" + }, + "MolecularConsequence": { + "@ID": "SO:0001583", + "@Type": "missense variant", + "@DB": "SO" + } + }, + { + "@Type": "coding", + "NucleotideExpression": { + "@sequenceAccessionVersion": "NM_001185090.2", + "@sequenceAccession": "NM_001185090", + "@sequenceVersion": "2", + "@change": "c.2440G>T", + "Expression": "NM_001185090.2:c.2440G>T" + }, + "ProteinExpression": { + "@sequenceAccessionVersion": "NP_001172019.1", + "@sequenceAccession": "NP_001172019", + "@sequenceVersion": "1", + "@change": "p.Val814Phe", + "Expression": "NP_001172019.1:p.Val814Phe" + }, + "MolecularConsequence": { + "@ID": "SO:0001583", + "@Type": "missense variant", + "@DB": "SO" + } + }, + { + "@Type": "coding", + "NucleotideExpression": { + "@sequenceAccessionVersion": "NM_001185091.2", + "@sequenceAccession": "NM_001185091", + "@sequenceVersion": "2", + "@change": "c.2440G>T", + "Expression": "NM_001185091.2:c.2440G>T" + }, + "ProteinExpression": { + "@sequenceAccessionVersion": "NP_001172020.1", + "@sequenceAccession": "NP_001172020", + "@sequenceVersion": "1", + "@change": "p.Val814Phe", + "Expression": "NP_001172020.1:p.Val814Phe" + }, + "MolecularConsequence": { + "@ID": "SO:0001583", + "@Type": "missense variant", + "@DB": "SO" + } + }, + { + "@Type": "coding", + "NucleotideExpression": { + "@sequenceAccessionVersion": "NM_007327.4", + "@sequenceAccession": "NM_007327", + "@sequenceVersion": "4", + "@change": "c.2377G>T", + "@MANESelect": "true", + "Expression": "NM_007327.4:c.2377G>T" + }, + "ProteinExpression": { + "@sequenceAccessionVersion": "NP_015566.1", + "@sequenceAccession": "NP_015566", + "@sequenceVersion": "1", + "@change": "p.Val793Phe", + "Expression": "NP_015566.1:p.Val793Phe" + }, + "MolecularConsequence": { + "@ID": "SO:0001583", + "@Type": "missense variant", + "@DB": "SO" + } + }, + { + "@Type": "coding", + "NucleotideExpression": { + "@sequenceAccessionVersion": "NM_021569.4", + "@sequenceAccession": "NM_021569", + "@sequenceVersion": "4", + "@change": "c.2377G>T", + "Expression": "NM_021569.4:c.2377G>T" + }, + "ProteinExpression": { + "@sequenceAccessionVersion": "NP_067544.1", + "@sequenceAccession": "NP_067544", + "@sequenceVersion": "1", + "@change": "p.Val793Phe", + "Expression": "NP_067544.1:p.Val793Phe" + }, + "MolecularConsequence": { + "@ID": "SO:0001583", + "@Type": "missense variant", + "@DB": "SO" + } + } + ] + } + }, + "RCVList": { + "RCVAccession": { + "@Title": "NM_007327.4(GRIN1):c.2377G>T (p.Val793Phe) AND Intellectual disability, autosomal dominant 8", + "@Accession": "RCV002285197", + "@Version": "2", + "ClassifiedConditionList": { + "@TraitSetID": "7616", + "ClassifiedCondition": { + "@DB": "MedGen", + "@ID": "C3280282", + "#text": "Intellectual disability, autosomal dominant 8" + } + }, + "RCVClassifications": { + "GermlineClassification": { + "ReviewStatus": "criteria provided, single submitter", + "Description": { + "@DateLastEvaluated": "2022-09-16", + "@SubmissionCount": "1", + "#text": "Likely pathogenic" + } + } + } + } + }, + "Classifications": { + "GermlineClassification": { + "@DateLastEvaluated": "2022-09-16", + "@NumberOfSubmissions": "1", + "@NumberOfSubmitters": "1", + "@DateCreated": "2022-10-01", + "@MostRecentSubmission": "2022-10-01", + "ReviewStatus": "criteria provided, single submitter", + "Description": "Likely pathogenic", + "ConditionList": { + "TraitSet": { + "@ID": "7616", + "@Type": "Disease", + "@ContributesToAggregateClassification": "true", + "Trait": { + "@ID": "16563", + "@Type": "Disease", + "Name": [ + { + "ElementValue": { + "@Type": "Alternate", + "#text": "Mental retardation, autosomal dominant 8" + }, + "XRef": { + "@ID": "Mental+retardation%2C+autosomal+dominant+8/8831", + "@DB": "Genetic Alliance" + } + }, + { + "ElementValue": { + "@Type": "Preferred", + "#text": "Intellectual disability, autosomal dominant 8" + }, + "XRef": { + "@ID": "MONDO:0013655", + "@DB": "MONDO" + } + }, + { + "ElementValue": { + "@Type": "Alternate", + "#text": "Neurodevelopmental disorder with or without hyperkinetic movements and seizures, autosomal dominant" + } + } + ], + "Symbol": { + "ElementValue": { + "@Type": "Preferred", + "#text": "NDHMSD" + }, + "XRef": { + "@Type": "MIM", + "@ID": "614254", + "@DB": "OMIM" + } + }, + "AttributeSet": [ + { + "Attribute": { + "@Type": "GARD id", + "@integerValue": "13686" + }, + "XRef": { + "@ID": "13686", + "@DB": "Office of Rare Diseases" + } + }, + { + "Attribute": { + "@Type": "public definition", + "#text": "GRIN1-related neurodevelopmental disorder (GRIN1-NDD) is characterized by mild-to-profound developmental delay/ intellectual disability (DD/ID) in all affected individuals. Other common manifestations are epilepsy, muscular hypotonia, movement disorders, spasticity, feeding difficulties, and behavior problems. A subset of individuals show a malformation of cortical development consisting of extensive and diffuse bilateral polymicrogyria. To date, 72 individuals with GRIN1-NDD have been reported." + }, + "XRef": { + "@ID": "NBK542807", + "@DB": "GeneReviews" + } + } + ], + "Citation": { + "@Type": "review", + "@Abbrev": "GeneReviews", + "ID": [ + { + "@Source": "PubMed", + "#text": "31219694" + }, + { + "@Source": "BookShelf", + "#text": "NBK542807" + } + ] + }, + "XRef": [ + { + "@ID": "C3280282", + "@DB": "MedGen" + }, + { + "@ID": "MONDO:0013655", + "@DB": "MONDO" + }, + { + "@Type": "MIM", + "@ID": "614254", + "@DB": "OMIM" + } + ] + } + } + } + } + }, + "ClinicalAssertionList": { + "ClinicalAssertion": { + "@ID": "5034259", + "@SubmissionDate": "2022-09-22", + "@DateLastUpdated": "2022-10-01", + "@DateCreated": "2022-10-01", + "ClinVarSubmissionID": { + "@localKey": "4cb0b18b-3b43-49ac-8352-6a0cd7048e13", + "@localKeyIsSubmitted": "1", + "@submittedAssembly": "GRCh37" + }, + "ClinVarAccession": { + "@Accession": "SCV002574937", + "@DateUpdated": "2022-10-01", + "@DateCreated": "2022-10-01", + "@Type": "SCV", + "@Version": "1", + "@SubmitterName": "Institute for Medical Genetics and Human Genetics, Charité - Universitätsmedizin Berlin", + "@OrgID": "505735", + "@OrganizationCategory": "clinic", + "@OrgAbbreviation": "Charité - Universitätsmedizin" + }, + "AdditionalSubmitters": { + "SubmitterDescription": { + "@OrgID": "507461", + "@SubmitterName": "CUBI - Core Unit Bioinformatics, Berlin Institute of Health", + "@Type": "secondary", + "@OrganizationCategory": "laboratory" + } + }, + "RecordStatus": "current", + "Classification": { + "@DateLastEvaluated": "2022-09-16", + "ReviewStatus": { + "@xmlns:xsi": "http://www.w3.org/2001/XMLSchema-instance", + "#text": "criteria provided, single submitter" + }, + "GermlineClassification": { + "@xmlns:xsi": "http://www.w3.org/2001/XMLSchema-instance", + "#text": "Likely pathogenic" + } + }, + "Assertion": "variation to disease", + "AttributeSet": { + "Attribute": { + "@Type": "AssertionMethod", + "#text": "ACMG Guidelines, 2015" + }, + "Citation": { + "ID": { + "@Source": "PubMed", + "#text": "25741868" + } + } + }, + "ObservedInList": { + "ObservedIn": { + "Sample": { + "Origin": { + "@xmlns:xsi": "http://www.w3.org/2001/XMLSchema-instance", + "#text": "germline" + }, + "Tissue": { + "@xmlns:xsi": "http://www.w3.org/2001/XMLSchema-instance", + "#text": "Blood" + }, + "Species": { + "@xmlns:xsi": "http://www.w3.org/2001/XMLSchema-instance", + "@TaxonomyId": "9606", + "#text": "human" + }, + "AffectedStatus": { + "@xmlns:xsi": "http://www.w3.org/2001/XMLSchema-instance", + "#text": "yes" + }, + "NumberTested": { + "@xmlns:xsi": "http://www.w3.org/2001/XMLSchema-instance", + "#text": "1" + }, + "Gender": { + "@xmlns:xsi": "http://www.w3.org/2001/XMLSchema-instance", + "#text": "male" + }, + "FamilyData": { + "@xmlns:xsi": "http://www.w3.org/2001/XMLSchema-instance", + "@PedigreeID": "fe3ca5e9-f193-401c-ad76-edbf30fe29f3" + } + }, + "Method": { + "MethodType": "clinical testing" + }, + "ObservedData": [ + { + "Attribute": { + "@Type": "SingleHeterozygote", + "@integerValue": "1" + } + }, + { + "Attribute": { + "@Type": "SampleLocalID", + "#text": "202eb76d-a7a1-4d2c-97fe-608bed412367" + } + } + ], + "TraitSet": { + "@Type": "Finding", + "Trait": [ + { + "@ClinicalFeaturesAffectedStatus": "present", + "@Type": "Finding", + "XRef": { + "@DB": "HP", + "@ID": "HP:0001263" + } + }, + { + "@ClinicalFeaturesAffectedStatus": "present", + "@Type": "Finding", + "XRef": { + "@DB": "HP", + "@ID": "HP:0003502" + } + }, + { + "@ClinicalFeaturesAffectedStatus": "present", + "@Type": "Finding", + "XRef": { + "@DB": "HP", + "@ID": "HP:0001252" + } + }, + { + "@ClinicalFeaturesAffectedStatus": "present", + "@Type": "Finding", + "XRef": { + "@DB": "HP", + "@ID": "HP:0000733" + } + }, + { + "@ClinicalFeaturesAffectedStatus": "present", + "@Type": "Finding", + "XRef": { + "@DB": "HP", + "@ID": "HP:0002020" + } + }, + { + "@ClinicalFeaturesAffectedStatus": "present", + "@Type": "Finding", + "XRef": { + "@DB": "HP", + "@ID": "HP:0011145" + } + } + ] + } + } + }, + "SimpleAllele": { + "GeneList": { + "Gene": { + "@Symbol": "GRIN1" + } + }, + "VariantType": "Variation", + "Location": { + "SequenceLocation": { + "@Assembly": "GRCh37", + "@Chr": "9", + "@alternateAllele": "T", + "@referenceAllele": "G", + "@start": "140058054", + "@stop": "140058054" + } + } + }, + "TraitSet": { + "@Type": "Disease", + "Trait": { + "@Type": "Disease", + "XRef": { + "@DB": "OMIM", + "@ID": "614254" + } + } + } + } + }, + "TraitMappingList": { + "TraitMapping": [ + { + "@ClinicalAssertionID": "5034259", + "@TraitType": "Finding", + "@MappingType": "XRef", + "@MappingValue": "HP:0001252", + "@MappingRef": "HP", + "MedGen": { + "@CUI": "C0026827", + "@Name": "Hypotonia" + } + }, + { + "@ClinicalAssertionID": "5034259", + "@TraitType": "Disease", + "@MappingType": "XRef", + "@MappingValue": "614254", + "@MappingRef": "OMIM", + "MedGen": { + "@CUI": "C3280282", + "@Name": "Intellectual disability, autosomal dominant 8" + } + }, + { + "@ClinicalAssertionID": "5034259", + "@TraitType": "Finding", + "@MappingType": "XRef", + "@MappingValue": "HP:0000733", + "@MappingRef": "HP", + "MedGen": { + "@CUI": "C0038271", + "@Name": "Motor stereotypies" + } + }, + { + "@ClinicalAssertionID": "5034259", + "@TraitType": "Finding", + "@MappingType": "XRef", + "@MappingValue": "HP:0011145", + "@MappingRef": "HP", + "MedGen": { + "@CUI": "C4023516", + "@Name": "Symptomatic seizures" + } + }, + { + "@ClinicalAssertionID": "5034259", + "@TraitType": "Finding", + "@MappingType": "XRef", + "@MappingValue": "HP:0001263", + "@MappingRef": "HP", + "MedGen": { + "@CUI": "C0557874", + "@Name": "Global developmental delay" + } + }, + { + "@ClinicalAssertionID": "5034259", + "@TraitType": "Finding", + "@MappingType": "XRef", + "@MappingValue": "HP:0002020", + "@MappingRef": "HP", + "MedGen": { + "@CUI": "C4317146", + "@Name": "Gastroesophageal reflux" + } + }, + { + "@ClinicalAssertionID": "5034259", + "@TraitType": "Finding", + "@MappingType": "XRef", + "@MappingValue": "HP:0003502", + "@MappingRef": "HP", + "MedGen": { + "@CUI": "C3150077", + "@Name": "Mild short stature" + } + } + ] + } + } +} diff --git a/tests/clinvar_data/snapshots/test_xmldictdata_to_pb_regressions/test_convert_classified_record_xmldict_data_to_pb/testsclinvar_datadataregressions2024-07-01allele-id-1706807.json/result b/tests/clinvar_data/snapshots/test_xmldictdata_to_pb_regressions/test_convert_classified_record_xmldict_data_to_pb/testsclinvar_datadataregressions2024-07-01allele-id-1706807.json/result new file mode 100644 index 0000000..17475f9 --- /dev/null +++ b/tests/clinvar_data/snapshots/test_xmldictdata_to_pb_regressions/test_convert_classified_record_xmldict_data_to_pb/testsclinvar_datadataregressions2024-07-01allele-id-1706807.json/result @@ -0,0 +1,564 @@ +{ + "simpleAllele": { + "genes": [ + { + "locations": [ + { + "cytogeneticLocations": [ + "Xp22.2" + ], + "sequenceLocations": [ + { + "assembly": "GRCh38", + "chr": "CHROMOSOME_X", + "accession": "NC_000023.11", + "start": 11758159, + "stop": 11775772, + "displayStart": 11758159, + "displayStop": 11775772, + "strand": "+" + }, + { + "assembly": "GRCh37", + "chr": "CHROMOSOME_X", + "accession": "NC_000023.10", + "start": 11776277, + "stop": 11793871, + "displayStart": 11776277, + "displayStop": 11793871, + "strand": "+" + } + ] + } + ], + "omims": [ + "300609" + ], + "haploinsufficiency": { + "value": "Sufficient evidence for dosage pathogenicity", + "lastEvaluated": "2023-08-23T00:00:00Z", + "clingen": "https://www.ncbi.nlm.nih.gov/projects/dbvar/ISCA/isca_gene.cgi?sym=MSL3" + }, + "triplosensitivity": { + "value": "No evidence available", + "lastEvaluated": "2023-08-23T00:00:00Z", + "clingen": "https://www.ncbi.nlm.nih.gov/projects/dbvar/ISCA/isca_gene.cgi?sym=MSL3" + }, + "fullName": "MSL complex subunit 3", + "geneId": "10943", + "hgncId": "HGNC:7370", + "source": "submitted", + "relationshipType": "GENE_VARIANT_RELATIONSHIP_WITHIN_SINGLE_GENE" + } + ], + "name": "NM_078629.4(MSL3):c.1282-249_1382-154del", + "variantTypes": [ + "Deletion" + ], + "locations": [ + { + "cytogeneticLocations": [ + "Xp22.2" + ], + "sequenceLocations": [ + { + "forDisplay": true, + "assembly": "GRCh38", + "chr": "CHROMOSOME_X", + "accession": "NC_000023.11", + "start": 11771907, + "stop": 11772467, + "displayStart": 11771907, + "displayStop": 11772467, + "variantLength": 561, + "positionVcf": 11771906, + "referenceAlleleVcf": "AAAGGACTAAAGAACCTAAACAGTTACAGTTATTTATAGAGAATAACTGTTGCGAGTTAAGAAATTAAAGACTTTCTTGATCTATTTTAAGTGTTTTTGCCTTTCATCTTTGTTTTTTGTAAGCTTGTGTTTATTTTGATCCAATCACTTATTTTTTCCCCTCTACGTACAATTTACTGTCATAATTGTTAGGTGGTTGGAAGTGTCTCCATTAAGAATAACAAAAGCATTCAATTCGTGCTTTTTCCAGGTCCTCTCCTGGAAGCTTGTGCCTGACAATTACCCCCCAGGTGACCAGCCGCCTCCACCCTCTTACATTTATGGGGCACAACATTTGCTGCGATTGTTTGGTAAGAATCCTGGTTCCTGCCTTCTTTCCATTTTTCATTTTGTATTCTCTTGTTAGCTTTCTGTACACCTTGTGGTTTGGGCTATAATTATCTAACTAAGAAATTTGGGGATCCAAGAAAAATTAAAAGTAATCTATCAAAGAAAAGGCATTGAATGAATCTTAAATTTCATAAAAATTATGTTTGTAATATTTGTACTGCTACTCTGTCTT", + "alternateAlleleVcf": "A" + }, + { + "assembly": "GRCh37", + "chr": "CHROMOSOME_X", + "accession": "NC_000023.10", + "start": 11790026, + "stop": 11790586, + "displayStart": 11790026, + "displayStop": 11790586, + "variantLength": 561, + "positionVcf": 11790025, + "referenceAlleleVcf": "AAAGGACTAAAGAACCTAAACAGTTACAGTTATTTATAGAGAATAACTGTTGCGAGTTAAGAAATTAAAGACTTTCTTGATCTATTTTAAGTGTTTTTGCCTTTCATCTTTGTTTTTTGTAAGCTTGTGTTTATTTTGATCCAATCACTTATTTTTTCCCCTCTACGTACAATTTACTGTCATAATTGTTAGGTGGTTGGAAGTGTCTCCATTAAGAATAACAAAAGCATTCAATTCGTGCTTTTTCCAGGTCCTCTCCTGGAAGCTTGTGCCTGACAATTACCCCCCAGGTGACCAGCCGCCTCCACCCTCTTACATTTATGGGGCACAACATTTGCTGCGATTGTTTGGTAAGAATCCTGGTTCCTGCCTTCTTTCCATTTTTCATTTTGTATTCTCTTGTTAGCTTTCTGTACACCTTGTGGTTTGGGCTATAATTATCTAACTAAGAAATTTGGGGATCCAAGAAAAATTAAAAGTAATCTATCAAAGAAAAGGCATTGAATGAATCTTAAATTTCATAAAAATTATGTTTGTAATATTTGTACTGCTACTCTGTCTT", + "alternateAlleleVcf": "A" + } + ] + } + ], + "hgvsExpressions": [ + { + "nucleotideExpression": { + "expression": "NC_000023.10:g.11790026_11790586del", + "sequenceAccessionVersion": "NC_000023.10", + "sequenceAccession": "NC_000023", + "sequenceVersion": 10, + "change": "g.11790026_11790586del" + }, + "type": "HGVS_TYPE_GENOMIC_TOP_LEVEL", + "assembly": "GRCh37" + }, + { + "nucleotideExpression": { + "expression": "NC_000023.11:g.11771907_11772467del", + "sequenceAccessionVersion": "NC_000023.11", + "sequenceAccession": "NC_000023", + "sequenceVersion": 11, + "change": "g.11771907_11772467del" + }, + "type": "HGVS_TYPE_GENOMIC_TOP_LEVEL", + "assembly": "GRCh38" + }, + { + "nucleotideExpression": { + "expression": "NG_012564.1:g.18749_19309del", + "sequenceAccessionVersion": "NG_012564.1", + "sequenceAccession": "NG_012564", + "sequenceVersion": 1, + "change": "g.18749_19309del" + }, + "type": "HGVS_TYPE_GENOMIC" + }, + { + "nucleotideExpression": { + "expression": "NM_001282174.1:c.835-249_935-154del", + "sequenceAccessionVersion": "NM_001282174.1", + "sequenceAccession": "NM_001282174", + "sequenceVersion": 1, + "change": "c.835-249_935-154del" + }, + "molecularConsequences": [ + { + "db": "SO", + "id": "SO:0001574", + "type": "splice acceptor variant" + }, + { + "db": "SO", + "id": "SO:0001575", + "type": "splice donor variant" + } + ], + "type": "HGVS_TYPE_CODING" + }, + { + "nucleotideExpression": { + "expression": "NM_078629.4:c.1282-249_1382-154del", + "sequenceAccessionVersion": "NM_078629.4", + "sequenceAccession": "NM_078629", + "sequenceVersion": 4, + "change": "c.1282-249_1382-154del", + "maneSelect": true + }, + "molecularConsequences": [ + { + "db": "SO", + "id": "SO:0001574", + "type": "splice acceptor variant" + }, + { + "db": "SO", + "id": "SO:0001575", + "type": "splice donor variant" + } + ], + "type": "HGVS_TYPE_CODING" + }, + { + "nucleotideExpression": { + "expression": "NM_001193270.2:c.1246-249_1346-154del", + "sequenceAccessionVersion": "NM_001193270.2", + "sequenceAccession": "NM_001193270", + "sequenceVersion": 2, + "change": "c.1246-249_1346-154del" + }, + "molecularConsequences": [ + { + "db": "SO", + "id": "SO:0001574", + "type": "splice acceptor variant" + }, + { + "db": "SO", + "id": "SO:0001575", + "type": "splice donor variant" + } + ], + "type": "HGVS_TYPE_CODING" + }, + { + "nucleotideExpression": { + "expression": "NM_006800.4:c.784-249_884-154del", + "sequenceAccessionVersion": "NM_006800.4", + "sequenceAccession": "NM_006800", + "sequenceVersion": 4, + "change": "c.784-249_884-154del" + }, + "molecularConsequences": [ + { + "db": "SO", + "id": "SO:0001574", + "type": "splice acceptor variant" + }, + { + "db": "SO", + "id": "SO:0001575", + "type": "splice donor variant" + } + ], + "type": "HGVS_TYPE_CODING" + } + ], + "alleleId": "1706807", + "variationId": "1708509" + }, + "rcvList": { + "rcvAccessions": [ + { + "classifiedConditionList": { + "classifiedConditions": [ + { + "value": "Basilicata-Akhtar syndrome", + "db": "MedGen", + "id": "C5231394" + } + ], + "traitSetId": "50614" + }, + "rcvClassifications": { + "germlineClassification": { + "reviewStatus": "AGGREGATE_GERMLINE_REVIEW_STATUS_CRITERIA_PROVIDED_SINGLE_SUBMITTER", + "description": { + "value": "Pathogenic", + "dateLastEvaluated": "2022-09-27T00:00:00Z", + "submissionCount": 1 + } + } + }, + "title": "NM_078629.4(MSL3):c.1282-249_1382-154del AND Basilicata-Akhtar syndrome", + "accession": "RCV002287880", + "version": 3 + } + ] + }, + "classifications": { + "germlineClassification": { + "reviewStatus": "AGGREGATE_GERMLINE_REVIEW_STATUS_CRITERIA_PROVIDED_SINGLE_SUBMITTER", + "description": "Pathogenic", + "conditions": [ + { + "traits": [ + { + "names": [ + { + "value": "MENTAL RETARDATION, X-LINKED, SYNDROMIC, BASILICATA-AKHTAR TYPE", + "type": "Alternate", + "xrefs": [ + { + "db": "OMIM", + "id": "301032", + "type": "MIM" + } + ] + }, + { + "value": "Basilicata-Akhtar syndrome", + "type": "Preferred", + "xrefs": [ + { + "db": "MONDO", + "id": "MONDO:0026730" + } + ] + }, + { + "value": "INTELLECTUAL DEVELOPMENTAL DISORDER, X-LINKED, SYNDROMIC, 36", + "type": "Alternate", + "xrefs": [ + { + "db": "OMIM", + "id": "301032", + "type": "MIM" + } + ] + } + ], + "symbols": [ + { + "value": "MRXSBA", + "type": "Alternate", + "xrefs": [ + { + "db": "OMIM", + "id": "301032", + "type": "MIM" + } + ] + }, + { + "value": "MRXS36", + "type": "Alternate", + "xrefs": [ + { + "db": "OMIM", + "id": "301032", + "type": "MIM" + } + ] + } + ], + "xrefs": [ + { + "db": "MedGen", + "id": "C5231394" + }, + { + "db": "MONDO", + "id": "MONDO:0026730" + }, + { + "db": "OMIM", + "id": "301032", + "type": "MIM" + } + ] + } + ], + "type": "TYPE_DISEASE", + "id": "50614", + "contributesToAggregateClassification": true + } + ], + "dateLastEvaluated": "2022-09-27T00:00:00Z", + "dateCreated": "2022-10-08T00:00:00Z", + "mostRecentSubmission": "2022-10-08T00:00:00Z", + "numberOfSubmitters": 1, + "numberOfSubmissions": 1 + } + }, + "clinicalAssertions": [ + { + "clinvarSubmissionId": { + "localKey": "1f7d8179-9e58-4ea0-bcba-4faaec993f6e", + "localKeyIsSubmitted": false, + "submittedAssembly": "GRCh37" + }, + "clinvarAccession": { + "accession": "SCV002578174", + "version": 1, + "submitterIdentifiers": { + "submitterName": "Institute for Medical Genetics and Human Genetics, Charit\u00e9 - Universit\u00e4tsmedizin Berlin", + "orgId": "505735", + "orgCategory": "clinic", + "orgAbbreviation": "Charit\u00e9 - Universit\u00e4tsmedizin" + }, + "dateUpdated": "2022-10-08T00:00:00Z", + "dateCreated": "2022-10-08T00:00:00Z" + }, + "additionalSubmitters": [ + { + "submitterIdentifiers": { + "submitterName": "CUBI - Core Unit Bioinformatics, Berlin Institute of Health", + "orgId": "507461", + "orgCategory": "laboratory" + }, + "type": "TYPE_SECONDARY" + } + ], + "recordStatus": "RECORD_STATUS_CURRENT", + "classifications": { + "reviewStatus": "SUBMITTER_REVIEW_STATUS_CRITERIA_PROVIDED_SINGLE_SUBMITTER", + "germlineClassification": "Pathogenic", + "dateLastEvaluated": "2022-09-27T00:00:00Z" + }, + "assertion": "ASSERTION_VARIATION_TO_DISEASE", + "attributes": [ + { + "attribute": { + "value": "ACMG Guidelines, 2015" + }, + "type": "TYPE_ASSERTION_METHOD", + "citations": [ + { + "ids": [ + { + "value": "25741868", + "source": "PubMed" + } + ] + } + ] + }, + { + "attribute": { + "value": "X-linked dominant inheritance" + }, + "type": "TYPE_MODE_OF_INHERITANCE" + } + ], + "observedIns": [ + { + "sample": { + "origin": "ORIGIN_GERMLINE", + "tissue": "Blood", + "species": { + "name": "human", + "taxonomyId": 9606 + }, + "affectedStatus": "AFFECTED_STATUS_YES", + "numerTested": 1, + "gender": "GENDER_FEMALE", + "familyData": { + "pedigreeId": "09393c9d-7530-4277-b8a7-13f252a0d7cf" + } + }, + "observedData": [ + { + "attributes": [ + { + "base": { + "integerValue": "1" + }, + "type": "TYPE_SINGLE_HETEROZYGOUS" + } + ] + }, + { + "attributes": [ + { + "base": { + "value": "6129e3ba-c880-44d7-b2d2-65d437bc4de7" + }, + "type": "TYPE_SAMPLE_LOCAL_ID" + } + ] + } + ], + "traitSet": { + "traits": [ + { + "xrefs": [ + { + "db": "HP", + "id": "HP:0001263" + } + ] + }, + { + "xrefs": [ + { + "db": "HP", + "id": "HP:0000256" + } + ] + }, + { + "xrefs": [ + { + "db": "HP", + "id": "HP:0000006" + } + ] + } + ], + "type": "TYPE_FINDING" + } + } + ], + "simpleAllele": { + "genes": [ + { + "symbol": "MSL3" + } + ], + "variantType": "Deletion", + "location": { + "sequenceLocations": [ + { + "assembly": "GRCh37", + "chr": "CHROMOSOME_X", + "start": 11790026, + "stop": 11790586 + } + ] + } + }, + "traitSet": { + "traits": [ + { + "xrefs": [ + { + "db": "OMIM", + "id": "301032" + } + ] + } + ], + "type": "TYPE_DISEASE" + }, + "submissionNames": [ + "SUB7643814" + ], + "id": "5039779" + } + ], + "traitMappings": [ + { + "medgens": [ + { + "name": "Global developmental delay", + "cui": "C0557874" + } + ], + "clinicalAssertionId": "5039779", + "traitType": "Finding", + "mappingType": "MAPPING_TYPE_XREF", + "mappingValue": "HP:0001263", + "mappingRef": "HP" + }, + { + "medgens": [ + { + "name": "Macrocephaly", + "cui": "C2243051" + } + ], + "clinicalAssertionId": "5039779", + "traitType": "Finding", + "mappingType": "MAPPING_TYPE_XREF", + "mappingValue": "HP:0000256", + "mappingRef": "HP" + }, + { + "medgens": [ + { + "name": "Autosomal dominant inheritance", + "cui": "C0443147" + } + ], + "clinicalAssertionId": "5039779", + "traitType": "Finding", + "mappingType": "MAPPING_TYPE_XREF", + "mappingValue": "HP:0000006", + "mappingRef": "HP" + }, + { + "medgens": [ + { + "name": "Basilicata-Akhtar syndrome", + "cui": "C5231394" + } + ], + "clinicalAssertionId": "5039779", + "traitType": "Disease", + "mappingType": "MAPPING_TYPE_XREF", + "mappingValue": "301032", + "mappingRef": "OMIM" + } + ] +} \ No newline at end of file diff --git a/tests/clinvar_data/snapshots/test_xmldictdata_to_pb_regressions/test_convert_classified_record_xmldict_data_to_pb/testsclinvar_datadataregressions2024-07-01allelle-id-1704898.json/result b/tests/clinvar_data/snapshots/test_xmldictdata_to_pb_regressions/test_convert_classified_record_xmldict_data_to_pb/testsclinvar_datadataregressions2024-07-01allelle-id-1704898.json/result new file mode 100644 index 0000000..7358a68 --- /dev/null +++ b/tests/clinvar_data/snapshots/test_xmldictdata_to_pb_regressions/test_convert_classified_record_xmldict_data_to_pb/testsclinvar_datadataregressions2024-07-01allelle-id-1704898.json/result @@ -0,0 +1,674 @@ +{ + "simpleAllele": { + "genes": [ + { + "locations": [ + { + "cytogeneticLocations": [ + "9q34.3" + ], + "sequenceLocations": [ + { + "assembly": "GRCh38", + "chr": "CHROMOSOME_9", + "accession": "NC_000009.12", + "start": 137139154, + "stop": 137168756, + "displayStart": 137139154, + "displayStop": 137168756, + "strand": "+" + }, + { + "assembly": "GRCh37", + "chr": "CHROMOSOME_9", + "accession": "NC_000009.11", + "start": 140033608, + "stop": 140063213, + "displayStart": 140033608, + "displayStop": 140063213, + "strand": "+" + } + ] + } + ], + "omims": [ + "138249" + ], + "fullName": "glutamate ionotropic receptor NMDA type subunit 1", + "geneId": "2902", + "hgncId": "HGNC:4584", + "source": "submitted", + "relationshipType": "GENE_VARIANT_RELATIONSHIP_WITHIN_SINGLE_GENE" + } + ], + "name": "NM_007327.4(GRIN1):c.2377G>T (p.Val793Phe)", + "canonicalSpdi": "NC_000009.12:137163601:G:T", + "variantTypes": [ + "single nucleotide variant" + ], + "locations": [ + { + "cytogeneticLocations": [ + "9q34.3" + ], + "sequenceLocations": [ + { + "forDisplay": true, + "assembly": "GRCh38", + "chr": "CHROMOSOME_9", + "accession": "NC_000009.12", + "start": 137163602, + "stop": 137163602, + "displayStart": 137163602, + "displayStop": 137163602, + "variantLength": 1, + "positionVcf": 137163602, + "referenceAlleleVcf": "G", + "alternateAlleleVcf": "T" + }, + { + "assembly": "GRCh37", + "chr": "CHROMOSOME_9", + "accession": "NC_000009.11", + "start": 140058054, + "stop": 140058054, + "displayStart": 140058054, + "displayStop": 140058054, + "variantLength": 1, + "positionVcf": 140058054, + "referenceAlleleVcf": "G", + "alternateAlleleVcf": "T" + } + ] + } + ], + "proteinChanges": [ + "V793F", + "V814F" + ], + "hgvsExpressions": [ + { + "nucleotideExpression": { + "expression": "NC_000009.11:g.140058054G>T", + "sequenceAccessionVersion": "NC_000009.11", + "sequenceAccession": "NC_000009", + "sequenceVersion": 11, + "change": "g.140058054G>T" + }, + "type": "HGVS_TYPE_GENOMIC_TOP_LEVEL", + "assembly": "GRCh37" + }, + { + "nucleotideExpression": { + "expression": "NC_000009.12:g.137163602G>T", + "sequenceAccessionVersion": "NC_000009.12", + "sequenceAccession": "NC_000009", + "sequenceVersion": 12, + "change": "g.137163602G>T" + }, + "type": "HGVS_TYPE_GENOMIC_TOP_LEVEL", + "assembly": "GRCh38" + }, + { + "nucleotideExpression": { + "expression": "NG_011507.1:g.29446G>T", + "sequenceAccessionVersion": "NG_011507.1", + "sequenceAccession": "NG_011507", + "sequenceVersion": 1, + "change": "g.29446G>T" + }, + "type": "HGVS_TYPE_GENOMIC" + }, + { + "nucleotideExpression": { + "expression": "NM_000832.7:c.2377G>T", + "sequenceAccessionVersion": "NM_000832.7", + "sequenceAccession": "NM_000832", + "sequenceVersion": 7, + "change": "c.2377G>T" + }, + "proteinExpression": { + "expression": "NP_000823.4:p.Val793Phe", + "sequenceAccessionVersion": "NP_000823.4", + "sequenceAccession": "NP_000823", + "sequenceVersion": 4, + "change": "p.Val793Phe" + }, + "molecularConsequences": [ + { + "db": "SO", + "id": "SO:0001583", + "type": "missense variant" + } + ], + "type": "HGVS_TYPE_CODING" + }, + { + "nucleotideExpression": { + "expression": "NM_001185090.2:c.2440G>T", + "sequenceAccessionVersion": "NM_001185090.2", + "sequenceAccession": "NM_001185090", + "sequenceVersion": 2, + "change": "c.2440G>T" + }, + "proteinExpression": { + "expression": "NP_001172019.1:p.Val814Phe", + "sequenceAccessionVersion": "NP_001172019.1", + "sequenceAccession": "NP_001172019", + "sequenceVersion": 1, + "change": "p.Val814Phe" + }, + "molecularConsequences": [ + { + "db": "SO", + "id": "SO:0001583", + "type": "missense variant" + } + ], + "type": "HGVS_TYPE_CODING" + }, + { + "nucleotideExpression": { + "expression": "NM_001185091.2:c.2440G>T", + "sequenceAccessionVersion": "NM_001185091.2", + "sequenceAccession": "NM_001185091", + "sequenceVersion": 2, + "change": "c.2440G>T" + }, + "proteinExpression": { + "expression": "NP_001172020.1:p.Val814Phe", + "sequenceAccessionVersion": "NP_001172020.1", + "sequenceAccession": "NP_001172020", + "sequenceVersion": 1, + "change": "p.Val814Phe" + }, + "molecularConsequences": [ + { + "db": "SO", + "id": "SO:0001583", + "type": "missense variant" + } + ], + "type": "HGVS_TYPE_CODING" + }, + { + "nucleotideExpression": { + "expression": "NM_007327.4:c.2377G>T", + "sequenceAccessionVersion": "NM_007327.4", + "sequenceAccession": "NM_007327", + "sequenceVersion": 4, + "change": "c.2377G>T", + "maneSelect": true + }, + "proteinExpression": { + "expression": "NP_015566.1:p.Val793Phe", + "sequenceAccessionVersion": "NP_015566.1", + "sequenceAccession": "NP_015566", + "sequenceVersion": 1, + "change": "p.Val793Phe" + }, + "molecularConsequences": [ + { + "db": "SO", + "id": "SO:0001583", + "type": "missense variant" + } + ], + "type": "HGVS_TYPE_CODING" + }, + { + "nucleotideExpression": { + "expression": "NM_021569.4:c.2377G>T", + "sequenceAccessionVersion": "NM_021569.4", + "sequenceAccession": "NM_021569", + "sequenceVersion": 4, + "change": "c.2377G>T" + }, + "proteinExpression": { + "expression": "NP_067544.1:p.Val793Phe", + "sequenceAccessionVersion": "NP_067544.1", + "sequenceAccession": "NP_067544", + "sequenceVersion": 1, + "change": "p.Val793Phe" + }, + "molecularConsequences": [ + { + "db": "SO", + "id": "SO:0001583", + "type": "missense variant" + } + ], + "type": "HGVS_TYPE_CODING" + } + ], + "alleleId": "1704898", + "variationId": "1706588" + }, + "rcvList": { + "rcvAccessions": [ + { + "classifiedConditionList": { + "classifiedConditions": [ + { + "value": "Intellectual disability, autosomal dominant 8", + "db": "MedGen", + "id": "C3280282" + } + ], + "traitSetId": "7616" + }, + "rcvClassifications": { + "germlineClassification": { + "reviewStatus": "AGGREGATE_GERMLINE_REVIEW_STATUS_CRITERIA_PROVIDED_SINGLE_SUBMITTER", + "description": { + "value": "Likely pathogenic", + "dateLastEvaluated": "2022-09-16T00:00:00Z", + "submissionCount": 1 + } + } + }, + "title": "NM_007327.4(GRIN1):c.2377G>T (p.Val793Phe) AND Intellectual disability, autosomal dominant 8", + "accession": "RCV002285197", + "version": 2 + } + ] + }, + "classifications": { + "germlineClassification": { + "reviewStatus": "AGGREGATE_GERMLINE_REVIEW_STATUS_CRITERIA_PROVIDED_SINGLE_SUBMITTER", + "description": "Likely pathogenic", + "conditions": [ + { + "traits": [ + { + "names": [ + { + "value": "Mental retardation, autosomal dominant 8", + "type": "Alternate", + "xrefs": [ + { + "db": "Genetic Alliance", + "id": "Mental+retardation%2C+autosomal+dominant+8/8831" + } + ] + }, + { + "value": "Intellectual disability, autosomal dominant 8", + "type": "Preferred", + "xrefs": [ + { + "db": "MONDO", + "id": "MONDO:0013655" + } + ] + }, + { + "value": "Neurodevelopmental disorder with or without hyperkinetic movements and seizures, autosomal dominant", + "type": "Alternate" + } + ], + "symbols": [ + { + "value": "NDHMSD", + "type": "Preferred", + "xrefs": [ + { + "db": "OMIM", + "id": "614254", + "type": "MIM" + } + ] + } + ], + "attributes": [ + { + "attribute": { + "base": { + "integerValue": "13686" + }, + "type": "GARD id" + }, + "xrefs": [ + { + "db": "Office of Rare Diseases", + "id": "13686" + } + ] + }, + { + "attribute": { + "base": { + "value": "GRIN1-related neurodevelopmental disorder (GRIN1-NDD) is characterized by mild-to-profound developmental delay/ intellectual disability (DD/ID) in all affected individuals. Other common manifestations are epilepsy, muscular hypotonia, movement disorders, spasticity, feeding difficulties, and behavior problems. A subset of individuals show a malformation of cortical development consisting of extensive and diffuse bilateral polymicrogyria. To date, 72 individuals with GRIN1-NDD have been reported." + }, + "type": "public definition" + }, + "xrefs": [ + { + "db": "GeneReviews", + "id": "NBK542807" + } + ] + } + ], + "citations": [ + { + "ids": [ + { + "value": "31219694", + "source": "PubMed" + }, + { + "value": "NBK542807", + "source": "BookShelf" + } + ], + "type": "review", + "abbrev": "GeneReviews" + } + ], + "xrefs": [ + { + "db": "MedGen", + "id": "C3280282" + }, + { + "db": "MONDO", + "id": "MONDO:0013655" + }, + { + "db": "OMIM", + "id": "614254", + "type": "MIM" + } + ] + } + ], + "type": "TYPE_DISEASE", + "id": "7616", + "contributesToAggregateClassification": true + } + ], + "dateLastEvaluated": "2022-09-16T00:00:00Z", + "dateCreated": "2022-10-01T00:00:00Z", + "mostRecentSubmission": "2022-10-01T00:00:00Z", + "numberOfSubmitters": 1, + "numberOfSubmissions": 1 + } + }, + "clinicalAssertions": [ + { + "clinvarSubmissionId": { + "localKey": "4cb0b18b-3b43-49ac-8352-6a0cd7048e13", + "localKeyIsSubmitted": false, + "submittedAssembly": "GRCh37" + }, + "clinvarAccession": { + "accession": "SCV002574937", + "version": 1, + "submitterIdentifiers": { + "submitterName": "Institute for Medical Genetics and Human Genetics, Charit\u00e9 - Universit\u00e4tsmedizin Berlin", + "orgId": "505735", + "orgCategory": "clinic", + "orgAbbreviation": "Charit\u00e9 - Universit\u00e4tsmedizin" + }, + "dateUpdated": "2022-10-01T00:00:00Z", + "dateCreated": "2022-10-01T00:00:00Z" + }, + "additionalSubmitters": [ + { + "submitterIdentifiers": { + "submitterName": "CUBI - Core Unit Bioinformatics, Berlin Institute of Health", + "orgId": "507461", + "orgCategory": "laboratory" + }, + "type": "TYPE_SECONDARY" + } + ], + "recordStatus": "RECORD_STATUS_CURRENT", + "classifications": { + "reviewStatus": "SUBMITTER_REVIEW_STATUS_CRITERIA_PROVIDED_SINGLE_SUBMITTER", + "germlineClassification": "Likely pathogenic", + "dateLastEvaluated": "2022-09-16T00:00:00Z" + }, + "assertion": "ASSERTION_VARIATION_TO_DISEASE", + "attributes": [ + { + "attribute": { + "value": "ACMG Guidelines, 2015" + }, + "type": "TYPE_ASSERTION_METHOD", + "citations": [ + { + "ids": [ + { + "value": "25741868", + "source": "PubMed" + } + ] + } + ] + } + ], + "observedIns": [ + { + "sample": { + "origin": "ORIGIN_GERMLINE", + "tissue": "Blood", + "species": { + "name": "human", + "taxonomyId": 9606 + }, + "affectedStatus": "AFFECTED_STATUS_YES", + "numerTested": 1, + "gender": "GENDER_MALE", + "familyData": { + "pedigreeId": "fe3ca5e9-f193-401c-ad76-edbf30fe29f3" + } + }, + "observedData": [ + { + "attributes": [ + { + "base": { + "integerValue": "1" + }, + "type": "TYPE_SINGLE_HETEROZYGOUS" + } + ] + }, + { + "attributes": [ + { + "base": { + "value": "202eb76d-a7a1-4d2c-97fe-608bed412367" + }, + "type": "TYPE_SAMPLE_LOCAL_ID" + } + ] + } + ], + "traitSet": { + "traits": [ + { + "xrefs": [ + { + "db": "HP", + "id": "HP:0001263" + } + ] + }, + { + "xrefs": [ + { + "db": "HP", + "id": "HP:0003502" + } + ] + }, + { + "xrefs": [ + { + "db": "HP", + "id": "HP:0001252" + } + ] + }, + { + "xrefs": [ + { + "db": "HP", + "id": "HP:0000733" + } + ] + }, + { + "xrefs": [ + { + "db": "HP", + "id": "HP:0002020" + } + ] + }, + { + "xrefs": [ + { + "db": "HP", + "id": "HP:0011145" + } + ] + } + ], + "type": "TYPE_FINDING" + } + } + ], + "simpleAllele": { + "genes": [ + { + "symbol": "GRIN1" + } + ], + "variantType": "Variation", + "location": { + "sequenceLocations": [ + { + "assembly": "GRCh37", + "chr": "CHROMOSOME_9", + "start": 140058054, + "stop": 140058054, + "referenceAllele": "G", + "alternateAllele": "T" + } + ] + } + }, + "traitSet": { + "traits": [ + { + "xrefs": [ + { + "db": "OMIM", + "id": "614254" + } + ] + } + ], + "type": "TYPE_DISEASE" + }, + "id": "5034259" + } + ], + "traitMappings": [ + { + "medgens": [ + { + "name": "Hypotonia", + "cui": "C0026827" + } + ], + "clinicalAssertionId": "5034259", + "traitType": "Finding", + "mappingType": "MAPPING_TYPE_XREF", + "mappingValue": "HP:0001252", + "mappingRef": "HP" + }, + { + "medgens": [ + { + "name": "Intellectual disability, autosomal dominant 8", + "cui": "C3280282" + } + ], + "clinicalAssertionId": "5034259", + "traitType": "Disease", + "mappingType": "MAPPING_TYPE_XREF", + "mappingValue": "614254", + "mappingRef": "OMIM" + }, + { + "medgens": [ + { + "name": "Motor stereotypies", + "cui": "C0038271" + } + ], + "clinicalAssertionId": "5034259", + "traitType": "Finding", + "mappingType": "MAPPING_TYPE_XREF", + "mappingValue": "HP:0000733", + "mappingRef": "HP" + }, + { + "medgens": [ + { + "name": "Symptomatic seizures", + "cui": "C4023516" + } + ], + "clinicalAssertionId": "5034259", + "traitType": "Finding", + "mappingType": "MAPPING_TYPE_XREF", + "mappingValue": "HP:0011145", + "mappingRef": "HP" + }, + { + "medgens": [ + { + "name": "Global developmental delay", + "cui": "C0557874" + } + ], + "clinicalAssertionId": "5034259", + "traitType": "Finding", + "mappingType": "MAPPING_TYPE_XREF", + "mappingValue": "HP:0001263", + "mappingRef": "HP" + }, + { + "medgens": [ + { + "name": "Gastroesophageal reflux", + "cui": "C4317146" + } + ], + "clinicalAssertionId": "5034259", + "traitType": "Finding", + "mappingType": "MAPPING_TYPE_XREF", + "mappingValue": "HP:0002020", + "mappingRef": "HP" + }, + { + "medgens": [ + { + "name": "Mild short stature", + "cui": "C3150077" + } + ], + "clinicalAssertionId": "5034259", + "traitType": "Finding", + "mappingType": "MAPPING_TYPE_XREF", + "mappingValue": "HP:0003502", + "mappingRef": "HP" + } + ] +} \ No newline at end of file diff --git a/tests/clinvar_data/test_xmldictdata_to_pb_regressions.py b/tests/clinvar_data/test_xmldictdata_to_pb_regressions.py new file mode 100644 index 0000000..1801123 --- /dev/null +++ b/tests/clinvar_data/test_xmldictdata_to_pb_regressions.py @@ -0,0 +1,22 @@ +"""Regression tests for issues occuring in weekly builds of clinvar-data-jsonl""" + +import json + +from google.protobuf.json_format import MessageToDict, Parse +import pytest + +from clinvar_data.conversion.dict_to_pb import ConvertClassifiedRecord + +@pytest.mark.parametrize( + "path_json_input", + [ + "tests/clinvar_data/data/regressions/2024-07-01/allele-id-1706807.json", + "tests/clinvar_data/data/regressions/2024-07-01/allelle-id-1704898.json", + ] +) +def test_convert_classified_record_xmldict_data_to_pb(path_json_input: str, snapshot): + with open(path_json_input, "rt") as input: + xmldict_value = json.load(input) + result = ConvertClassifiedRecord.xmldict_data_to_pb(xmldict_value) + result_json = MessageToDict(result) + snapshot.assert_match(json.dumps(result_json, indent=2), "result")