From eafc40d63272f28bfab34899f0e65ca899bfc6de Mon Sep 17 00:00:00 2001 From: spiekos Date: Thu, 23 Mar 2023 15:07:59 -0700 Subject: [PATCH] =?UTF-8?q?=E2=80=9CThe=20International=20Committee=20on?= =?UTF-8?q?=20Taxonomy=20of=20Viruses=20(ICTV)=20authorizes=20and=20organi?= =?UTF-8?q?zes=20the=20taxonomic=20classification=20of=20and=20the=20nomen?= =?UTF-8?q?clatures=20for=20viruses.=20The=20ICTV=20has=20developed=20a=20?= =?UTF-8?q?universal=20taxonomic=20scheme=20for=20viruses,=20and=20thus=20?= =?UTF-8?q?has=20the=20means=20to=20appropriately=20describe,=20name,=20an?= =?UTF-8?q?d=20classify=20every=20virus=20that=20affects=20living=20organi?= =?UTF-8?q?sms.=20The=20members=20of=20the=20International=20Committee=20o?= =?UTF-8?q?n=20Taxonomy=20of=20Viruses=20are=20considered=20expert=20virol?= =?UTF-8?q?ogists.=20The=20ICTV=20was=20formed=20from=20and=20is=20governe?= =?UTF-8?q?d=20by=20the=20Virology=20Division=20of=20the=20International?= =?UTF-8?q?=20Union=20of=20Microbiological=20Societies.=20Detailed=20work,?= =?UTF-8?q?=20such=20as=20delimiting=20the=20boundaries=20of=20species=20w?= =?UTF-8?q?ithin=20a=20family,=20typically=20is=20performed=20by=20study?= =?UTF-8?q?=20groups=20of=20experts=20in=20the=20families.=E2=80=9D=20Desc?= =?UTF-8?q?ription=20from=20Wikipedia.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ICTV Master Species List is curated by virology experts, which have established over 100 international study groups, which organize discussions on emerging taxonomic issues in their field, oversee the submission of proposals for new taxonomy, and prepare or revise the relevant chapter(s) in ICTV reports. ICTV is open to submissions of proposals for taxonomic changes from an individual, however in practice proposals are usually submitted by members of the relevant study groups. The ICTV chooses an exemplar virus for each species and the Virus Metadata Resource provides a list of these exemplars. An exemplar virus serves as an example of a well-characterized virus isolate of that species and includes the GenBank accession number for the genomic sequence of the isolate as well as the virus name, isolate designation, suggested abbreviation, genome composition, and host source. This import is internally documented including the schema additions [here](https://docs.google.com/document/d/1ELM4XmjyG1bitWqdSrSp6d49EQ2_ya4PpXHc_B0cPIE/edit?resourcekey=0-eefsHcX6YqQ7UqRcwVpaBg#heading=h.qtewylhpzoc9). This import is also being documented on GitHub in datacommonsorg/data [PR #834](https://github.com/datacommonsorg/data/pull/834). This cleans up the Master Species List and Virus Metadata Resource datasets from ICTV. They are formatted as a tmcf + csv biomedical import. It also adds schema to represent the data in this import. It passed the tests from Prashanth's json tool and the internal v3 staging tool. PiperOrigin-RevId: 518973987 --- biomedical_schema/genome_annotation.mcf | 10 +- biomedical_schema/virus_taxonomy.mcf | 229 ++++++++++++++++++++++ biomedical_schema/virus_taxonomy_enum.mcf | 225 +++++++++++++++++++++ core/dcschema.mcf | 7 + 4 files changed, 466 insertions(+), 5 deletions(-) create mode 100644 biomedical_schema/virus_taxonomy.mcf create mode 100644 biomedical_schema/virus_taxonomy_enum.mcf diff --git a/biomedical_schema/genome_annotation.mcf b/biomedical_schema/genome_annotation.mcf index 809d8ea28..6b15a63c5 100644 --- a/biomedical_schema/genome_annotation.mcf +++ b/biomedical_schema/genome_annotation.mcf @@ -51,21 +51,21 @@ name: "Gene" typeOf: schema:Class subClassOf: dcs:GenomeAnnotation description: "Gene symbol of a gene, which is the basic hereditary unit of life." -sameAs: "https://bioportal.bioontology.org/ontologies/OGG" +descriptionUrl: "https://bioportal.bioontology.org/ontologies/OGG" Node: dcid:GeneticVariant name: "GeneticVariant" typeOf: schema:Class subClassOf: dcs:GenomeAnnotation description: "A single-nucleotide polymorphism, which is a substitution of a single nucleotide that occurs at a specific position in the genome, where each variation is present to some appreciable degree within a population. These are defined by dbSNP and includes small indels as well." -sameAs: "http://rohsdb.usc.edu/GBshape/cgi-bin/hgTables?db=hg19&hgta_group=varRep&hgta_track=snp137&hgta_table=snp137&hgta_doSchema=describe+table+schema" +descriptionUrl: "http://rohsdb.usc.edu/GBshape/cgi-bin/hgTables?db=hg19&hgta_group=varRep&hgta_track=snp137&hgta_table=snp137&hgta_doSchema=describe+table+schema" Node: dcid:GeneticVariantGeneAssociation name: "GeneticVariantGeneAssociation" typeOf: schema:Class subClassOf: dcs:GeneticAssociation description: "An association between a genetic variant and a gene in a given tissue. This is determined by performing a regression analysis on paired genome sequencing and RNA-sequencing across a population." -sameAs: "https://storage.googleapis.com/gtex_analysis_v6p/single_tissue_eqtl_data/README_eQTL_v6p.txt" +descriptionUrl: "https://storage.googleapis.com/gtex_analysis_v6p/single_tissue_eqtl_data/README_eQTL_v6p.txt" Node: Position typeOf: dcs:UnitOfMeasure @@ -123,14 +123,14 @@ Node: dcid:genBankAccession name: "genBankAssemblyAccession" typeOf: schema:Property rangeIncludes: schema:Text -domainIncludes: dcs:GenomeAssembly,dcs:GenomeAssemblyUnit,dcs:Chromosome +domainIncludes: dcs:BiologicalElement description: "The accession version of the GenBank assembly or sequence element." Node: dcid:refSeqAccession name: "refSeqAssemblyAccession" typeOf: schema:Property rangeIncludes: schema:Text -domainIncludes: dcs:GenomeAssembly,dcs:GenomeAssemblyUnit,dcs:Chromosome +domainIncludes: dcs:BiologicalElement description: "The accession version of the RefSeq assembly or sequence element." Node: dcid:ncbiBioProject diff --git a/biomedical_schema/virus_taxonomy.mcf b/biomedical_schema/virus_taxonomy.mcf new file mode 100644 index 000000000..90d438a7c --- /dev/null +++ b/biomedical_schema/virus_taxonomy.mcf @@ -0,0 +1,229 @@ +# Virus Taxonomy +Node: dcid:Virus +name: "Virus" +typeOf: schema:Class +subClassOf: dcs:BiologicalSpecimen +description: "A virus is a submicroscopic infectious agent that replicates only inside the living cells of an organism. Viruses infect all life forms, from animals and plants to microorganisms, including bacteria and archaea. The classification of viruses is developed by the International Committee on the Taxonomy of Viruses (ICTV)." +descriptionUrl: "https://en.wikipedia.org/wiki/Virus" +descriptionUrl: "https://talk.ictvonline.org/taxonomy/w/ictv-taxonomy" + +Node: dcid:VirusIsolate +name: "VirusIsolate" +typeOf: schema:Class +subClassOf: dcs:Virus +description: "A virus that has been isolated from an infected host and can be propogated in culture." +descriptionUrl: "https://talk.ictvonline.org/taxonomy/w/ictv-taxonomy" + +Node: dcid:VirusGenomeSegment +name: "VirusGenomeSegment" +typeOf: schema:Class +subClassOf: dcs:VirusIsolate +description: "A segment of a virus whose genome is fragmented into two or more nucleic acid molecules." + +Node: dcid:genomeCoverage +name: "genomeCoverage" +typeOf: schema:Property +domainIncludes: dcs:VirusIsolate +rangeIncludes: dcs:GenomeCoverageEnum +description: "Genome coverage refers to the number of unique reads aligned to a specific locus in a reference genome. Genome coverage can also be used to denote the breadth of coverage of a target genome, which is defined as the percentage of target bases that are sequenced a given number of times." +descriptionUrl: "https://www.nature.com/articles/nrg3642" + +Node: dcid:genomeSegmentOf +name: "genomeSegmentOf" +typeOf: schema:Property +domainIncludes: dcs:VirusGenomeSegment +rangeIncludes: dcs:VirusIsolate +description: "The virus isolate from which this genome segment was sequenced." + +Node: dcid:isExemplarVirusIsolate +name: "isExemplarVirusIsolate" +typeOf: schema:Property +domainIncludes: dcs:VirusIsolate +rangeIncludes: schema:Boolean +description: "A virus isolate is a virus that has been isolated from an infected host and can be propogated in culture. ICTV chooses examplar isolates to represent a virus species. An exemplar virus serves as an example of a well-characterized virus isolate of that species and includes the GenBank accession number for the genomic sequence of the isolate as well as the virus name, isolate designation, suggested abbreviation, genome composition, and host source." +descriptionUrl: "https://ictv.global/vmr" + +Node: dcid:ofVirusSpecies +name: "ofVirusSpecies" +typeOf: schema:Property +domainIncludes: dcs:VirusIsolate +rangeIncludes: dcs:Virus +description: "The species of a virus isolate." + +Node: dcid:proposalForLastChange +name: "proposalForLastChange" +typeOf: schema:Property +domainIncludes: dcs:Virus +rangeIncludes: schema:Text +description: "The file name of the taxonomic proposal that details the justification for the last change. Proposals can be retrieved by appending the file nameand '.pdf' to the end of the following url: 'https://talk.ictvonline.org/ictv/proposals/