js library for resolving biological ids to their equivalent ids in batch
$ npm i biomedical_id_resolver
const resolve = require('biomedical_id_resolver');
// input should be an object, with semantic type as the key, and array of CURIEs as value
let input = {
"Gene": ["NCBIGene:1017", "NCBIGene:1018", "HGNC:1177"],
"SmallMolecule": ["CHEBI:15377"],
"Disease": ["MONDO:0004976"],
"Cell": ["CL:0002372"]
};
(async () => {
const resolver = new resolve();
console.log(await resolver.resolve(input);
//=> {'NCBIGene:1017': {...}, 'NCBIGene:1018': {...}, 'HGNC:1177': {...}, 'CHEBI:15377': {...}, 'MONDO:0004976': {...}, 'CL:0002372': {...}}
})();
-
Output is a javascript Object
-
The root keys are CURIES (e.g. NCBIGene:1017) which are passed in as input
-
The values represents resolved identifiers
-
Each CURIE will have 4 required fields
-
id: the primary id (selected based on the ranking described in the next section) and label
-
curies: an array, each element represents a resolved id in CURIE format
-
type: the semantic type of the identifier
-
db_ids: original ids from source database, could be curies or non-curies.
-
-
if an ID can not be resolved using the package, it will have an additional field called "flag", with value equal to "failed"
-
Example Output
{
"NCBIGene:1017": {
"id": {
"label": "cyclin dependent kinase 2",
"identifier": "NCBIGene:1017"
},
"db_ids": {
"NCBIGene": [
"1017"
],
"ENSEMBL": [
"ENSG00000123374"
],
"HGNC": [
"1771"
],
"SYMBOL": [
"CDK2"
],
"UMLS": [
"C1332733",
"C0108855"
],
"name": [
"cyclin dependent kinase 2"
]
},
"type": "Gene",
"curies": [
"NCBIGene:1017",
"ENSEMBL:ENSG00000123374",
"HGNC:1771",
"SYMBOL:CDK2",
"UMLS:C1332733",
"UMLS:C0108855"
]
}
}
Query Using SRI node normalizer
const resolver = require('biomedical_id_resolver');
// input must be an object, with semantic type as the key, and array of CURIEs as value
let input = {
"Gene": ["NCBIGene:1017", "NCBIGene:1018", "HGNC:1177"],
"SmallMolecule": ["CHEBI:15377"],
"Disease": ["MONDO:0004976"],
"Cell": ["CL:0002372"]
};
(async () => {
let res = await resolver.resolveSRI(input);
console.log(res);
})();
The output contains id
and equivalent_identifiers
straight from SRI as well as the same fields as the base resolver to make it backwards compatible with it.
{
"NCBIGene:1017": [
{
"id": {
"identifier": "NCBIGene:1017",
"label": "CDK2"
},
"equivalent_identifiers": [
{
"identifier": "NCBIGene:1017",
"label": "CDK2"
},
{
"identifier": "ENSEMBL:ENSG00000123374"
},
{
"identifier": "HGNC:1771",
"label": "CDK2"
},
{
"identifier": "OMIM:116953"
},
{
"identifier": "UMLS:C1332733",
"label": "CDK2 gene"
}
],
"type": [
"biolink:Gene",
"biolink:GeneOrGeneProduct",
"biolink:BiologicalEntity",
"biolink:NamedThing",
"biolink:Entity",
"biolink:MacromolecularMachineMixin"
],
"primaryID": "NCBIGene:1017",
"label": "CDK2",
"attributes": {},
"semanticType": "Gene",
"semanticTypes": [
"biolink:Gene",
"biolink:GeneOrGeneProduct",
"biolink:BiologicalEntity",
"biolink:NamedThing",
"biolink:Entity",
"biolink:MacromolecularMachineMixin"
],
"dbIDs": {
"NCBIGene": [
"1017"
],
"ENSEMBL": [
"ENSG00000123374"
],
"HGNC": [
"1771"
],
"OMIM": [
"116953"
],
"UMLS": [
"C1332733"
],
"name": [
"CDK2",
"CDK2 gene"
]
},
"curies": [
"NCBIGene:1017",
"ENSEMBL:ENSG00000123374",
"HGNC:1771",
"OMIM:116953",
"UMLS:C1332733"
]
}
]
}
Gene, Transcript, Protein ID resolution is done through MyGene.info API
-
Gene
- NCBIGene
- ENSEMBL
- HGNC
- MGI
- OMIM
- UMLS
- SYMBOL
- UniProtKB
- name
-
Transcript
- ENSEMBL
- SYMBOL
- name
-
Protein
- UniProtKB
- ENSEMBL
- UMLS
- SYMBOL
- name
Variant ID resolution is done through MyVariant.info API
- SequenceVariant
- CLINVAR
- DBSNP
- HGVS
- MYVARIANT_HG19
SmallMolecule, Drug ID resolution is done through MyChem.info API
-
SmallMolecule
- PUBCHEM.COMPOUND
- CHEMBL.COMPOUND
- UNII
- CHEBI
- DRUGBANK
- MESH
- CAS
- HMDB
- KEGG.COMPOUND
- INCHI
- INCHIKEY
- UMLS
- LINCS
- name
-
Drug
- RXCUI
- NDC
- DRUGBANK
- PUBCHEM.COMPOUND
- CHEMBL.COMPOUND
- UNII
- CHEBI
- MESH
- CAS
- HMDB
- KEGG.COMPOUND
- INCHI
- INCHIKEY
- UMLS
- LINCS
- name
Disease, ClinicalFinding ID Resolution is done through MyDisease.info API
-
Disease
- MONDO
- DOID
- OMIM
- ORPHANET
- EFO
- UMLS
- MESH
- MEDDRA
- NCIT
- SNOMEDCT
- HP
- GARD
- name
-
ClinicalFinding
- LOINC
- NCIT
- EFO
- name
Pathway ID Resolution is done through biothings.ncats.io/geneset API
- Pathway
- GO
- REACT
- KEGG
- SMPDB
- PHARMGKB.PATHWAYS
- WIKIPATHWAYS
- BIOCARTA
- name
MolecularActivity ID Resolution is done through BioThings Gene Ontology Molecular Activity API
- MolecularActivity
- GO
- REACT
- RHEA
- MetaCyc
- KEGG.REACTION
- name
CellularComponent ID Resolution is done through BioThings Gene Ontology Cellular Component API
- CellularComponent
- GO
- MetaCyc
- name
BiologicalProcess ID Resolution is done through BioThings Gene Ontology Biological Process API
-
BiologicalProcess
- GO
- REACT
- MetaCyc
- KEGG
- name
AnatomicalEntity ID Resolution is done through BioThings UBERON API
- AnatomicalEntity
- UBERON
- UMLS
- MESH
- NCIT
- name
PhenotypicFeature ID Resolution is done through BioThings HPO API
- PhenotypicFeature
- HP
- EFO
- NCIT
- UMLS
- MEDDRA
- MP
- SNOMEDCT
- MESH
- name
Cell ID Resolution is done through Biothings Cell Ontology API
- Cell
- CL
- NCIT
- MESH
- EFO
- name
- Install Node 12 or later. You can use the package manager of your choice. Tests need to pass in Node 12 and 14.
- Clone this repository.
- Run
npm ci
to install the dependencies. - scripts are stored in
/src
folder - Add test to
/__tests__
folder - run
npm run release
to bump version and generate change log - run
npx depcheck
to check for unused packages in package.json
See CHANGELOG.md