diff --git a/.eslintrc.json b/.eslintrc.json index 55d0782..21d71ea 100644 --- a/.eslintrc.json +++ b/.eslintrc.json @@ -7,7 +7,10 @@ "no-console": 1, "eqeqeq": "warn", "no-cond-assign": 0, - "no-unused-vars": 1, + "no-unused-vars": "off", + "@typescript-eslint/no-unused-vars": [ + "error" + ], "no-extra-semi": "warn", "semi": "warn" }, @@ -25,4 +28,4 @@ "experimentalObjectRestSpread": true } } -} +} \ No newline at end of file diff --git a/package-lock.json b/package-lock.json index ca30a2c..aeeada6 100644 --- a/package-lock.json +++ b/package-lock.json @@ -8,7 +8,9 @@ "name": "periodum_api", "version": "1.0.0", "dependencies": { - "express": "^4.17.2" + "express": "^4.17.2", + "limiter": "^2.0.1", + "node-fetch": "^3.2.0" }, "devDependencies": { "@swc/cli": "^0.1.55", @@ -1411,6 +1413,14 @@ "node": ">=8" } }, + "node_modules/data-uri-to-buffer": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-4.0.0.tgz", + "integrity": "sha512-Vr3mLBA8qWmcuschSLAOogKgQ/Jwxulv3RNE4FXnYWRGujzrRWQI4m12fQqRkwX06C0KanhLr4hK+GydchZsaA==", + "engines": { + "node": ">= 12" + } + }, "node_modules/debug": { "version": "2.6.9", "resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz", @@ -1938,6 +1948,28 @@ "reusify": "^1.0.4" } }, + "node_modules/fetch-blob": { + "version": "3.1.4", + "resolved": "https://registry.npmjs.org/fetch-blob/-/fetch-blob-3.1.4.tgz", + "integrity": "sha512-Eq5Xv5+VlSrYWEqKrusxY1C3Hm/hjeAsCGVG3ft7pZahlUAChpGZT/Ms1WmSLnEAisEXszjzu/s+ce6HZB2VHA==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/jimmywarting" + }, + { + "type": "paypal", + "url": "https://paypal.me/jimmywarting" + } + ], + "dependencies": { + "node-domexception": "^1.0.0", + "web-streams-polyfill": "^3.0.3" + }, + "engines": { + "node": "^12.20 || >= 14.13" + } + }, "node_modules/file-entry-cache": { "version": "6.0.1", "resolved": "https://registry.npmjs.org/file-entry-cache/-/file-entry-cache-6.0.1.tgz", @@ -2023,6 +2055,17 @@ "integrity": "sha512-WIWGi2L3DyTUvUrwRKgGi9TwxQMUEqPOPQBVi71R96jZXJdFskXEmf54BoZaS1kknGODoIGASGEzBUYdyMCBJg==", "dev": true }, + "node_modules/formdata-polyfill": { + "version": "4.0.10", + "resolved": "https://registry.npmjs.org/formdata-polyfill/-/formdata-polyfill-4.0.10.tgz", + "integrity": "sha512-buewHzMvYL29jdeQTVILecSaZKnt/RJWjoZCF5OW60Z67/GmSLBkOFM7qh1PI3zFNtJbaZL5eQu1vLfazOwj4g==", + "dependencies": { + "fetch-blob": "^3.1.2" + }, + "engines": { + "node": ">=12.20.0" + } + }, "node_modules/forwarded": { "version": "0.2.0", "resolved": "https://registry.npmjs.org/forwarded/-/forwarded-0.2.0.tgz", @@ -2544,6 +2587,11 @@ "json5": "lib/cli.js" } }, + "node_modules/just-performance": { + "version": "4.2.0", + "resolved": "https://registry.npmjs.org/just-performance/-/just-performance-4.2.0.tgz", + "integrity": "sha512-4TikKSf+Gb+Et5SnA4ppyrxLSf9qWFq+SqfdDdrgHE1KLwSch/Zi1AQB0TrE4ppYjZdUrHnwdx+6dyx0cx/HyA==" + }, "node_modules/keyv": { "version": "3.1.0", "resolved": "https://registry.npmjs.org/keyv/-/keyv-3.1.0.tgz", @@ -2578,6 +2626,14 @@ "node": ">= 0.8.0" } }, + "node_modules/limiter": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/limiter/-/limiter-2.0.1.tgz", + "integrity": "sha512-OhORJ29edCj2WVLYEou3AVRYZmobyTOy2A9yvDuwL68teu+vWLLlnJxEBMH4gDXIKlkyCTBL9JDEygSPOiwfMg==", + "dependencies": { + "just-performance": "4.2.0" + } + }, "node_modules/locate-path": { "version": "6.0.0", "resolved": "https://registry.npmjs.org/locate-path/-/locate-path-6.0.0.tgz", @@ -2926,6 +2982,41 @@ "node": ">= 0.6" } }, + "node_modules/node-domexception": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/node-domexception/-/node-domexception-1.0.0.tgz", + "integrity": "sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/jimmywarting" + }, + { + "type": "github", + "url": "https://paypal.me/jimmywarting" + } + ], + "engines": { + "node": ">=10.5.0" + } + }, + "node_modules/node-fetch": { + "version": "3.2.0", + "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-3.2.0.tgz", + "integrity": "sha512-8xeimMwMItMw8hRrOl3C9/xzU49HV/yE6ORew/l+dxWimO5A4Ra8ld2rerlJvc/O7et5Z1zrWsPX43v1QBjCxw==", + "dependencies": { + "data-uri-to-buffer": "^4.0.0", + "fetch-blob": "^3.1.4", + "formdata-polyfill": "^4.0.10" + }, + "engines": { + "node": "^12.20.0 || ^14.13.1 || >=16.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/node-fetch" + } + }, "node_modules/nodemon": { "version": "2.0.15", "resolved": "https://registry.npmjs.org/nodemon/-/nodemon-2.0.15.tgz", @@ -4007,6 +4098,14 @@ "node": ">= 0.8" } }, + "node_modules/web-streams-polyfill": { + "version": "3.2.0", + "resolved": "https://registry.npmjs.org/web-streams-polyfill/-/web-streams-polyfill-3.2.0.tgz", + "integrity": "sha512-EqPmREeOzttaLRm5HS7io98goBgZ7IVz79aDvqjD0kYXLtFZTc0T/U6wHTPKyIjb+MdN7DFIIX6hgdBEpWmfPA==", + "engines": { + "node": ">= 8" + } + }, "node_modules/which": { "version": "2.0.2", "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz", @@ -5116,6 +5215,11 @@ "integrity": "sha512-v1plID3y9r/lPhviJ1wrXpLeyUIGAZ2SHNYTEapm7/8A9nLPoyvVp3RK/EPFqn5kEznyWgYZNsRtYYIWbuG8KA==", "dev": true }, + "data-uri-to-buffer": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-4.0.0.tgz", + "integrity": "sha512-Vr3mLBA8qWmcuschSLAOogKgQ/Jwxulv3RNE4FXnYWRGujzrRWQI4m12fQqRkwX06C0KanhLr4hK+GydchZsaA==" + }, "debug": { "version": "2.6.9", "resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz", @@ -5526,6 +5630,15 @@ "reusify": "^1.0.4" } }, + "fetch-blob": { + "version": "3.1.4", + "resolved": "https://registry.npmjs.org/fetch-blob/-/fetch-blob-3.1.4.tgz", + "integrity": "sha512-Eq5Xv5+VlSrYWEqKrusxY1C3Hm/hjeAsCGVG3ft7pZahlUAChpGZT/Ms1WmSLnEAisEXszjzu/s+ce6HZB2VHA==", + "requires": { + "node-domexception": "^1.0.0", + "web-streams-polyfill": "^3.0.3" + } + }, "file-entry-cache": { "version": "6.0.1", "resolved": "https://registry.npmjs.org/file-entry-cache/-/file-entry-cache-6.0.1.tgz", @@ -5590,6 +5703,14 @@ "integrity": "sha512-WIWGi2L3DyTUvUrwRKgGi9TwxQMUEqPOPQBVi71R96jZXJdFskXEmf54BoZaS1kknGODoIGASGEzBUYdyMCBJg==", "dev": true }, + "formdata-polyfill": { + "version": "4.0.10", + "resolved": "https://registry.npmjs.org/formdata-polyfill/-/formdata-polyfill-4.0.10.tgz", + "integrity": "sha512-buewHzMvYL29jdeQTVILecSaZKnt/RJWjoZCF5OW60Z67/GmSLBkOFM7qh1PI3zFNtJbaZL5eQu1vLfazOwj4g==", + "requires": { + "fetch-blob": "^3.1.2" + } + }, "forwarded": { "version": "0.2.0", "resolved": "https://registry.npmjs.org/forwarded/-/forwarded-0.2.0.tgz", @@ -5969,6 +6090,11 @@ "minimist": "^1.2.0" } }, + "just-performance": { + "version": "4.2.0", + "resolved": "https://registry.npmjs.org/just-performance/-/just-performance-4.2.0.tgz", + "integrity": "sha512-4TikKSf+Gb+Et5SnA4ppyrxLSf9qWFq+SqfdDdrgHE1KLwSch/Zi1AQB0TrE4ppYjZdUrHnwdx+6dyx0cx/HyA==" + }, "keyv": { "version": "3.1.0", "resolved": "https://registry.npmjs.org/keyv/-/keyv-3.1.0.tgz", @@ -5997,6 +6123,14 @@ "type-check": "~0.4.0" } }, + "limiter": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/limiter/-/limiter-2.0.1.tgz", + "integrity": "sha512-OhORJ29edCj2WVLYEou3AVRYZmobyTOy2A9yvDuwL68teu+vWLLlnJxEBMH4gDXIKlkyCTBL9JDEygSPOiwfMg==", + "requires": { + "just-performance": "4.2.0" + } + }, "locate-path": { "version": "6.0.0", "resolved": "https://registry.npmjs.org/locate-path/-/locate-path-6.0.0.tgz", @@ -6248,6 +6382,21 @@ "resolved": "https://registry.npmjs.org/negotiator/-/negotiator-0.6.3.tgz", "integrity": "sha512-+EUsqGPLsM+j/zdChZjsnX51g4XrHFOIXwfnCVPGlQk/k5giakcKsuxCObBRu6DSm9opw/O6slWbJdghQM4bBg==" }, + "node-domexception": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/node-domexception/-/node-domexception-1.0.0.tgz", + "integrity": "sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ==" + }, + "node-fetch": { + "version": "3.2.0", + "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-3.2.0.tgz", + "integrity": "sha512-8xeimMwMItMw8hRrOl3C9/xzU49HV/yE6ORew/l+dxWimO5A4Ra8ld2rerlJvc/O7et5Z1zrWsPX43v1QBjCxw==", + "requires": { + "data-uri-to-buffer": "^4.0.0", + "fetch-blob": "^3.1.4", + "formdata-polyfill": "^4.0.10" + } + }, "nodemon": { "version": "2.0.15", "resolved": "https://registry.npmjs.org/nodemon/-/nodemon-2.0.15.tgz", @@ -7030,6 +7179,11 @@ "resolved": "https://registry.npmjs.org/vary/-/vary-1.1.2.tgz", "integrity": "sha1-IpnwLG3tMNSllhsLn3RSShj2NPw=" }, + "web-streams-polyfill": { + "version": "3.2.0", + "resolved": "https://registry.npmjs.org/web-streams-polyfill/-/web-streams-polyfill-3.2.0.tgz", + "integrity": "sha512-EqPmREeOzttaLRm5HS7io98goBgZ7IVz79aDvqjD0kYXLtFZTc0T/U6wHTPKyIjb+MdN7DFIIX6hgdBEpWmfPA==" + }, "which": { "version": "2.0.2", "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz", diff --git a/package.json b/package.json index 0eba48e..18b37f7 100644 --- a/package.json +++ b/package.json @@ -10,10 +10,13 @@ "dev": "nodemon --ext ts --exec \"npm run build-dev\"", "start": "npm run build && NODE_ENV=production node dist/app.js", "test": "NODE_ENV=test mocha --check-leaks -r ts-node/register -r tsconfig-paths/register \"test/**/*.spec.ts\"", - "test:watch": "NODE_ENV=test mocha --check-leaks -r ts-node/register -r tsconfig-paths/register \"test/**/*.spec.ts\" -w --watch-files \"test/**/*.spec.ts\",\"src/**/*.ts\"" + "test:watch": "NODE_ENV=test mocha --check-leaks -r ts-node/register -r tsconfig-paths/register \"test/**/*.spec.ts\" -w --watch-files \"test/**/*.spec.ts\",\"src/**/*.ts\"", + "import:pubchem": "node dist/pubchem-import/index.js" }, "dependencies": { - "express": "^4.17.2" + "express": "^4.17.2", + "limiter": "^2.0.1", + "node-fetch": "^3.2.0" }, "type": "module", "devDependencies": { @@ -33,4 +36,4 @@ "tsconfig-paths": "^3.12.0", "typescript": "^4.5.5" } -} +} \ No newline at end of file diff --git a/src/pubchem-import/index.ts b/src/pubchem-import/index.ts new file mode 100644 index 0000000..a96c5c9 --- /dev/null +++ b/src/pubchem-import/index.ts @@ -0,0 +1,48 @@ +import fetch from "node-fetch"; +// import type { PubChemCompound } from "./types"; +// import getNecessaryData from "./parseData.js"; +import { RateLimiter } from "limiter"; +import { RawCompound } from "./types"; +import getNecessaryData from "./parseData.js"; + +const _parseInt = (x: string) => parseInt(x); + +const [startId, endId, compoundId] = process.argv.slice(2).map(_parseInt); +const API_URL = "https://pubchem.ncbi.nlm.nih.gov/rest/pug_view/data/compound/"; + +async function fetchJson(compoundId: number): Promise { + return (await (await fetch(API_URL + compoundId + "/JSON")).json()) as Promise; +} + +async function init() { + const ids: number[] = []; + + for (let i = compoundId; i < compoundId + (endId - startId + 1); i++) { + ids.push(i); + } + + if (ids.length === 0) { + return; + } + + const limiter = new RateLimiter({ tokensPerInterval: 1, interval: 250 }); + + async function sendMessage(id: number) { + // eslint-disable-next-line @typescript-eslint/no-unused-vars + const remainingMessages = await limiter.removeTokens(1); + // const id = getId(); + if (id === undefined) { + return; + } + const result = await fetchJson(id); + return getNecessaryData(result.Record); + } + + ids.forEach(async (id) => { + await sendMessage(id).then((e) => { + console.log(e?.RecordTitle); + }); + }); +} + +init(); diff --git a/src/pubchem-import/parseData.ts b/src/pubchem-import/parseData.ts new file mode 100644 index 0000000..c64da49 --- /dev/null +++ b/src/pubchem-import/parseData.ts @@ -0,0 +1,444 @@ +import { PubChemCompound } from "./types"; +import type { BaseSection, DataKeys, Markup, Value } from "./types"; +type ObjectOfAny = { [key: string]: any }; +type Resolver = (data: Record) => Record | Record[]; + +const NoData = "N/A"; +const dataPaths: { + name: string; + sectionPath: string[]; + dataPath: DataKeys[]; + resolver?: Resolver; +}[] = [ + { + name: "ChemicalSafety", + sectionPath: ["Chemical Safety"], + dataPath: ["Information", "Value", "StringWithMarkup", "Markup"], + resolver: (data: Markup) => { + return { Extra: data.Extra, Type: data.Type, URL: data.URL }; + }, + }, + { + name: "RecordDescription", + sectionPath: ["Names and Identifiers", "Record Description"], + dataPath: ["Information", "Value", "StringWithMarkup", "String"], + }, + { + name: "IUPACName", + sectionPath: ["Names and Identifiers", "Computed Descriptors", "IUPAC Name"], + dataPath: ["Information", "Value", "StringWithMarkup", "String"], + }, + { + name: "InChI", + sectionPath: ["Names and Identifiers", "Computed Descriptors", "InChI"], + dataPath: ["Information", "Value", "StringWithMarkup", "String"], + }, + { + name: "InChIKey", + sectionPath: ["Names and Identifiers", "Computed Descriptors", "InChI Key"], + dataPath: ["Information", "Value", "StringWithMarkup", "String"], + }, + { + name: "CanonicalSMILES", + sectionPath: ["Names and Identifiers", "Computed Descriptors", "Canonical SMILES"], + dataPath: ["Information", "Value", "StringWithMarkup", "String"], + }, + { + name: "MolecularFormula", + sectionPath: ["Names and Identifiers", "Molecular Formula"], + dataPath: ["Information", "Value", "StringWithMarkup", "String"], + }, + + { + name: "CAS", + sectionPath: ["Names and Identifiers", "Other Identifiers", "CAS"], + dataPath: ["Information", "Value", "StringWithMarkup", "String"], + }, + { + name: "RelatedCAS", + sectionPath: ["Names and Identifiers", "Other Identifiers", "Related CAS"], + dataPath: ["Information", "Value", "StringWithMarkup", "String"], + }, + { + name: "EuropeanCommunityNumber", + sectionPath: ["Names and Identifiers", "Other Identifiers", "European Community (EC) Number"], + dataPath: ["Information", "Value", "StringWithMarkup", "String"], + }, + { + name: "ICSCNumber", + sectionPath: ["Names and Identifiers", "Other Identifiers", "ICSC Number"], + dataPath: ["Information", "Value", "StringWithMarkup", "String"], + }, + { + name: "RTECSNumber", + sectionPath: ["Names and Identifiers", "Other Identifiers", "RTECS Number"], + dataPath: ["Information", "Value", "StringWithMarkup", "String"], + }, + { + name: "UNNumber", + sectionPath: ["Names and Identifiers", "Other Identifiers", "UN Number"], + dataPath: ["Information", "Value", "StringWithMarkup", "String"], + }, + { + name: "UNII", + sectionPath: ["Names and Identifiers", "Other Identifiers", "UNII"], + dataPath: ["Information", "Value", "StringWithMarkup", "String"], + }, + { + name: "FEMANumber", + sectionPath: ["Names and Identifiers", "Other Identifiers", "FEMA Number"], + dataPath: ["Information", "Value", "StringWithMarkup", "String"], + }, + { + name: "DSSToxSubstanceID", + sectionPath: ["Names and Identifiers", "Other Identifiers", "DSSTox Substance ID"], + dataPath: ["Information", "Value", "StringWithMarkup", "String"], + }, + { + name: "Wikipedia", + sectionPath: ["Names and Identifiers", "Other Identifiers", "Wikipedia"], + dataPath: ["Information", "URL"], + }, + { + name: "NCIThesaurusCode", + sectionPath: ["Names and Identifiers", "Other Identifiers", "NCI Thesaurus Code"], + dataPath: ["Information", "Value", "StringWithMarkup", "String"], + }, + + { + name: "MolecularWeight", + sectionPath: ["Chemical and Physical Properties", "Computed Properties", "Molecular Weight"], + dataPath: ["Information", "Value", "StringWithMarkup", "String"], + }, + { + name: "CompoundIsCanonicalized", + sectionPath: ["Chemical and Physical Properties", "Computed Properties", "Compound Is Canonicalized"], + dataPath: ["Information", "Value", "StringWithMarkup", "String"], + }, + { + name: "XLogP3", + sectionPath: ["Chemical and Physical Properties", "Computed Properties", "XLogP3"], + dataPath: ["Information", "Value", "Number"], + }, + + { + name: "HydrogenBondDonorCount", + sectionPath: ["Chemical and Physical Properties", "Computed Properties", "Hydrogen Bond Donor Count"], + dataPath: ["Information", "Value", "Number"], + }, + { + name: "HydrogenBondAcceptorCount", + sectionPath: ["Chemical and Physical Properties", "Computed Properties", "Hydrogen Bond Acceptor Count"], + dataPath: ["Information", "Value", "Number"], + }, + { + name: "RotatableBondCount", + sectionPath: ["Chemical and Physical Properties", "Computed Properties", "Rotatable Bond Count"], + dataPath: ["Information", "Value", "Number"], + }, + { + name: "ExactMass", + sectionPath: ["Chemical and Physical Properties", "Computed Properties", "Exact Mass"], + dataPath: ["Information", "Value"], + resolver: (data: Value) => { + return { + String: extractFromArrayIfOneItem(resolveData(data, ["StringWithMarkup", "String"])), + Unit: extractFromArrayIfOneItem(resolveData(data, ["Unit"])), + }; + }, + }, + { + name: "MonoisotopicMass", + sectionPath: ["Chemical and Physical Properties", "Computed Properties", "Monoisotopic Mass"], + dataPath: ["Information", "Value"], + resolver: (data: Value) => { + return { + String: extractFromArrayIfOneItem(resolveData(data, ["StringWithMarkup", "String"])), + Unit: extractFromArrayIfOneItem(resolveData(data, ["Unit"])), + }; + }, + }, + { + name: "TopologicalPolarSurfaceArea", + sectionPath: ["Chemical and Physical Properties", "Computed Properties", "Topological Polar Surface Area"], + dataPath: ["Information", "Value"], + resolver: (data: Value) => { + return { + Number: extractFromArrayIfOneItem(resolveData(data, ["Number"])), + Unit: extractFromArrayIfOneItem(resolveData(data, ["Unit"])), + }; + }, + }, + { + name: "HeavyAtomCount", + sectionPath: ["Chemical and Physical Properties", "Computed Properties", "Heavy Atom Count"], + dataPath: ["Information", "Value", "Number"], + }, + { + name: "FormalCharge", + sectionPath: ["Chemical and Physical Properties", "Computed Properties", "Formal Charge"], + dataPath: ["Information", "Value", "Number"], + }, + { + name: "Complexity", + sectionPath: ["Chemical and Physical Properties", "Computed Properties", "Complexity"], + dataPath: ["Information", "Value", "Number"], + }, + + { + name: "IsotopeAtomCount", + sectionPath: ["Chemical and Physical Properties", "Computed Properties", "Isotope Atom Count"], + dataPath: ["Information", "Value", "Number"], + }, + { + name: "DefinedAtomStereocenterCount", + sectionPath: ["Chemical and Physical Properties", "Computed Properties", "Defined Atom Stereocenter Count"], + dataPath: ["Information", "Value", "Number"], + }, + { + name: "UndefinedAtomStereocenterCount", + sectionPath: ["Chemical and Physical Properties", "Computed Properties", "Undefined Atom Stereocenter Count"], + dataPath: ["Information", "Value", "Number"], + }, + { + name: "DefinedBondStereocenterCount", + sectionPath: ["Chemical and Physical Properties", "Computed Properties", "Defined Bond Stereocenter Count"], + dataPath: ["Information", "Value", "Number"], + }, + { + name: "UndefinedBondStereocenterCount", + sectionPath: ["Chemical and Physical Properties", "Computed Properties", "Undefined Bond Stereocenter Count"], + dataPath: ["Information", "Value", "Number"], + }, + { + name: "Covalently-BondedUnitCount", + sectionPath: ["Chemical and Physical Properties", "Computed Properties", "Covalently-Bonded Unit Count"], + dataPath: ["Information", "Value", "Number"], + }, + + { + name: "PhysicalDescription", + sectionPath: ["Chemical and Physical Properties", "Experimental Properties", "Physical Description"], + dataPath: ["Information", "Value", "StringWithMarkup", "String"], + }, + { + name: "ColorForm", + sectionPath: ["Chemical and Physical Properties", "Experimental Properties", "Color/Form"], + dataPath: ["Information", "Value", "StringWithMarkup", "String"], + }, + { + name: "Odor", + sectionPath: ["Chemical and Physical Properties", "Experimental Properties", "Odor"], + dataPath: ["Information", "Value", "StringWithMarkup", "String"], + }, + { + name: "Taste", + sectionPath: ["Chemical and Physical Properties", "Experimental Properties", "Taste"], + dataPath: ["Information", "Value", "StringWithMarkup", "String"], + }, + { + name: "BoilingPoint", + sectionPath: ["Chemical and Physical Properties", "Experimental Properties", "Boiling Point"], + dataPath: ["Information", "Value", "StringWithMarkup", "String"], + }, + { + name: "MeltingPoint", + sectionPath: ["Chemical and Physical Properties", "Experimental Properties", "Melting Point"], + dataPath: ["Information", "Value", "StringWithMarkup", "String"], + }, + { + name: "FlashPoint", + sectionPath: ["Chemical and Physical Properties", "Experimental Properties", "Flash Point"], + dataPath: ["Information", "Value", "StringWithMarkup", "String"], + }, + { + name: "Solubility", + sectionPath: ["Chemical and Physical Properties", "Experimental Properties", "Solubility"], + dataPath: ["Information", "Value", "StringWithMarkup", "String"], + }, + { + name: "Density", + sectionPath: ["Chemical and Physical Properties", "Experimental Properties", "Density"], + dataPath: ["Information", "Value", "StringWithMarkup", "String"], + }, + { + name: "VaporDensity", + sectionPath: ["Chemical and Physical Properties", "Experimental Properties", "Vapor Density"], + dataPath: ["Information", "Value", "StringWithMarkup", "String"], + }, + { + name: "VaporPressure", + sectionPath: ["Chemical and Physical Properties", "Experimental Properties", "Vapor Pressure"], + dataPath: ["Information", "Value", "StringWithMarkup", "String"], + }, + { + name: "LogP", + sectionPath: ["Chemical and Physical Properties", "Experimental Properties", "LogP"], + dataPath: ["Information", "Value", "StringWithMarkup", "String"], + }, + { + name: "HenrysLawConstant", + sectionPath: ["Chemical and Physical Properties", "Experimental Properties", "Henrys Law Constant"], + dataPath: ["Information", "Value", "StringWithMarkup", "String"], + }, + { + name: "AtmosphericOHRateConstant", + sectionPath: ["Chemical and Physical Properties", "Experimental Properties", "Atmospheric OH Rate Constant"], + dataPath: ["Information", "Value", "StringWithMarkup", "String"], + }, + { + name: "Stability/ShelfLife", + sectionPath: ["Chemical and Physical Properties", "Experimental Properties", "Stability/Shelf Life"], + dataPath: ["Information", "Value", "StringWithMarkup", "String"], + }, + { + name: "AutoignitionTemperature", + sectionPath: ["Chemical and Physical Properties", "Experimental Properties", "Autoignition Temperature"], + dataPath: ["Information", "Value", "StringWithMarkup", "String"], + }, + { + name: "Decomposition", + sectionPath: ["Chemical and Physical Properties", "Experimental Properties", "Decomposition"], + dataPath: ["Information", "Value", "StringWithMarkup", "String"], + }, + { + name: "Viscosity", + sectionPath: ["Chemical and Physical Properties", "Experimental Properties", "Viscosity"], + dataPath: ["Information", "Value", "StringWithMarkup", "String"], + }, + { + name: "Corrosivity", + sectionPath: ["Chemical and Physical Properties", "Experimental Properties", "Corrosivity"], + dataPath: ["Information", "Value", "StringWithMarkup", "String"], + }, + { + name: "HeatofCombustion", + sectionPath: ["Chemical and Physical Properties", "Experimental Properties", "Heat of Combustion"], + dataPath: ["Information", "Value", "StringWithMarkup", "String"], + }, + { + name: "HeatofVaporization", + sectionPath: ["Chemical and Physical Properties", "Experimental Properties", "Heat of Vaporization"], + dataPath: ["Information", "Value", "StringWithMarkup", "String"], + }, + { + name: "pH", + sectionPath: ["Chemical and Physical Properties", "Experimental Properties", "pH"], + dataPath: ["Information", "Value", "StringWithMarkup", "String"], + }, + { + name: "SurfaceTension", + sectionPath: ["Chemical and Physical Properties", "Experimental Properties", "Surface Tension"], + dataPath: ["Information", "Value", "StringWithMarkup", "String"], + }, + { + name: "IonizationPotential", + sectionPath: ["Chemical and Physical Properties", "Experimental Properties", "Ionization Potential"], + dataPath: ["Information", "Value", "StringWithMarkup", "String"], + }, + { + name: "Polymerization", + sectionPath: ["Chemical and Physical Properties", "Experimental Properties", "Polymerization"], + dataPath: ["Information", "Value", "StringWithMarkup", "String"], + }, + { + name: "OdorThreshold", + sectionPath: ["Chemical and Physical Properties", "Experimental Properties", "Odor Threshold"], + dataPath: ["Information", "Value", "StringWithMarkup", "String"], + }, + { + name: "RefractiveIndex", + sectionPath: ["Chemical and Physical Properties", "Experimental Properties", "Refractive Index"], + dataPath: ["Information", "Value", "StringWithMarkup", "String"], + }, + { + name: "DissociationConstants", + sectionPath: ["Chemical and Physical Properties", "Experimental Properties", "Dissociation Constants"], + dataPath: ["Information", "Value", "StringWithMarkup", "String"], + }, + { + name: "KovatsRetentionIndex", + sectionPath: ["Chemical and Physical Properties", "Experimental Properties", "Kovats Retention Index"], + dataPath: ["Information", "Value", "Number"], + }, + { + name: "OtherExperimentalProperties", + sectionPath: ["Chemical and Physical Properties", "Experimental Properties", "Other Experimental Properties"], + dataPath: ["Information", "Value", "StringWithMarkup", "String"], + }, + { + name: "FoodAdditiveClasses", + sectionPath: ["Food Additives and Ingredients", "Food Additive Classes"], + dataPath: ["Information", "Value", "StringWithMarkup", "String"], + }, + { + name: "AgrochemicalCategory", + sectionPath: ["Agrochemical Information", "Agrochemical Category"], + dataPath: ["Information", "Value", "StringWithMarkup", "String"], + }, +]; + +const findSection = (parentSection: BaseSection | PubChemCompound) => (targetSectionHeading: string) => + parentSection.Section?.find((x) => x.TOCHeading === targetSectionHeading); + +const getFromObject = ( + obj: ObjectOfAny | ObjectOfAny[], + path: string +): Record | Record[] => { + if (Array.isArray(obj)) { + return [...new Set(obj.map((x) => getFromObject(x, path)).flat())].filter((x) => x !== undefined); + } + return obj[path]; +}; + +const resolveData = (parent: ObjectOfAny, dataPath: string[]): ObjectOfAny | ObjectOfAny[] | typeof NoData => { + if (dataPath.length === 0) { + return parent; + } + const [head, ...tail] = dataPath; + const next = getFromObject(parent, head); + if (!next || (Array.isArray(next) && extractFromArrayIfOneItem(next) === undefined)) { + return NoData; + } else { + return resolveData(next, tail); + } +}; + +const extractFromArrayIfOneItem = (val: unknown) => { + if (val && Array.isArray(val) && val.length === 1) { + return val[0]; + } + return val; +}; + +export default function getNecessaryData(raw: PubChemCompound): PubChemCompound { + let res = {} as PubChemCompound; + res = { ...res, RecordTitle: raw.RecordTitle }; + res = { ...res, RecordNumber: raw.RecordNumber }; + dataPaths.forEach(({ name, sectionPath, dataPath, resolver }) => { + // if (name === "MeltingPoint") { + const section = [...sectionPath].reduce((acc, cur, i, arr) => { + if (Object.keys(acc).length === 0) { + const foundSection = findSection(raw)(cur); + if (!foundSection) { + arr.splice(i, 1); + return acc; + } + return foundSection; + } + return findSection(acc)(cur) || acc; + }, {} as BaseSection); + let data = extractFromArrayIfOneItem(resolveData(section, dataPath)); + if (resolver && data && data !== NoData) { + if (Array.isArray(data)) { + data = data.map(resolver); + } else { + data = resolver(data); + } + } + res = { ...res, [name]: data }; + // console.log(data, name); + // } + }); + return res; +} diff --git a/src/pubchem-import/types.ts b/src/pubchem-import/types.ts new file mode 100644 index 0000000..215a257 --- /dev/null +++ b/src/pubchem-import/types.ts @@ -0,0 +1,147 @@ +export type DataKeys = + | keyof BaseSection + | keyof Information + | keyof Value + | keyof StringWithMarkup + | keyof Markup; + +export interface Markup { + URL?: string; + Type?: string; + Extra?: string; +} + +export interface StringWithMarkup { + String: string; + Markup: Markup; +} + +export interface Value { + Number?: number[]; + Unit?: string; + StringWithMarkup?: StringWithMarkup[]; +} + +export interface Information { + URL?: string; + Value: { + Number?: [number]; + Unit?: string; + StringWithMarkup: [ + { + String: string; + Markup: { URL: string; Type: string; Extra: string }[]; + } + ]; + }; +} + +export interface BaseSection { + TOCHeading: T; + Description: string; + Information: Information[]; + Section?: BaseSection[]; +} +export type ChemicalSafety = BaseSection<"Chemical Safety">; + +export interface NamesAndIdentifiers extends BaseSection<"Names and Identifiers"> { + Section: [ + BaseSection<"Record Description">, + BaseSection<"Computed Descriptors", "IUPAC Name">, + BaseSection<"Computed Descriptors", "InChI">, + BaseSection<"Computed Descriptors", "InChI Key">, + BaseSection<"Computed Descriptors", "Canonical SMILES">, + BaseSection<"Molecular Formula">, + BaseSection<"Other Identifiers">, + BaseSection<"Other Identifiers", "CAS">, + BaseSection<"Other Identifiers", "Related CAS">, + BaseSection<"Other Identifiers", "European Community (EC) Number">, + BaseSection<"Other Identifiers", "ICSC Number">, + BaseSection<"Other Identifiers", "RTECS Number">, + BaseSection<"Other Identifiers", "UN Number">, + BaseSection<"Other Identifiers", "UNII">, + BaseSection<"Other Identifiers", "FEMA Number">, + BaseSection<"Other Identifiers", "DSSTox Substance ID">, + BaseSection<"Other Identifiers", "Wikipedia">, + BaseSection<"Other Identifiers", "NCI Thesaurus Code"> + ]; +} + +export interface ChemicalAndPhysicalProperties extends BaseSection<"Chemical and Physical Properties"> { + Section: [ + BaseSection<"Computed Properties", "Molecular Weight">, + BaseSection<"Computed Properties", "XLogP3">, + BaseSection<"Computed Properties", "Hydrogen Bond Donor Count">, + BaseSection<"Computed Properties", "Hydrogen Bond Acceptor Count">, + BaseSection<"Computed Properties", "Rotatable Bond Count">, + BaseSection<"Computed Properties", "Exact Mass">, + BaseSection<"Computed Properties", "Monoisotopic Mass">, + BaseSection<"Computed Properties", "Topological Polar Surface Area">, + BaseSection<"Computed Properties", "Heavy Atom Count">, + BaseSection<"Computed Properties", "Formal Charge">, + BaseSection<"Computed Properties", "Complexity">, + BaseSection<"Computed Properties", "Isotope Atom Count">, + BaseSection<"Computed Properties", "Defined Atom Stereocenter Count">, + BaseSection<"Computed Properties", "Undefined Atom Stereocenter Count">, + BaseSection<"Computed Properties", "Defined Bond Stereocenter Count">, + BaseSection<"Computed Properties", "Undefined Bond Stereocenter Count">, + BaseSection<"Computed Properties", "Covalently-Bonded Unit Count">, + BaseSection<"Computed Properties", "Compound Is Canonicalized">, + + BaseSection<"Experimental Properties", "Physical Description">, + BaseSection<"Experimental Properties", "Color/Form">, + BaseSection<"Experimental Properties", "Odor">, + BaseSection<"Experimental Properties", "Taste">, + BaseSection<"Experimental Properties", "Boiling Point">, + BaseSection<"Experimental Properties", "Melting Point">, + BaseSection<"Experimental Properties", "Flash Point">, + BaseSection<"Experimental Properties", "Solubility">, + BaseSection<"Experimental Properties", "Density">, + BaseSection<"Experimental Properties", "Vapor Density">, + BaseSection<"Experimental Properties", "Vapor Pressure">, + BaseSection<"Experimental Properties", "LogP">, + BaseSection<"Experimental Properties", "Henrys Law Constant">, + BaseSection<"Experimental Properties", "Atmospheric OH Rate Constant">, + BaseSection<"Experimental Properties", "Stability/Shelf Life">, + BaseSection<"Experimental Properties", "Autoignition Temperature">, + BaseSection<"Experimental Properties", "Decomposition">, + BaseSection<"Experimental Properties", "Viscosity">, + BaseSection<"Experimental Properties", "Corrosivity">, + BaseSection<"Experimental Properties", "Heat of Combustion">, + BaseSection<"Experimental Properties", "Heat of Vaporization">, + BaseSection<"Experimental Properties", "pH">, + BaseSection<"Experimental Properties", "Surface Tension">, + BaseSection<"Experimental Properties", "Ionization Potential">, + BaseSection<"Experimental Properties", "Polymerization">, + BaseSection<"Experimental Properties", "Odor Threshold">, + BaseSection<"Experimental Properties", "Refractive Index">, + BaseSection<"Experimental Properties", "Dissociation Constants">, + BaseSection<"Experimental Properties", "Kovats Retention Index">, + BaseSection<"Experimental Properties", "Other Experimental Properties"> + ]; +} + +export interface FoodAdditivesAndIngredients extends BaseSection<"Food Additives and Ingredients"> { + Section: [BaseSection<"Food Additive Classes">]; +} + +export interface AgrochemicalInformation extends BaseSection<"Agrochemical Information"> { + Section: [BaseSection<"Agrochemical Category">]; +} + +export interface PubChemCompound { + RecordNumber: number; + RecordTitle: string; + Section: [ + ChemicalSafety, + NamesAndIdentifiers, + ChemicalAndPhysicalProperties, + FoodAdditivesAndIngredients, + AgrochemicalInformation + ]; + id?: number; +} + +export interface RawCompound { + Record: PubChemCompound; +} diff --git a/test/index.spec.js b/test/index.spec.js new file mode 100644 index 0000000..ab79779 --- /dev/null +++ b/test/index.spec.js @@ -0,0 +1,10 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +const chai_1 = require("chai"); +describe('Testing root files', () => { + it('should return 4 when 2 times 2 multiplied', () => { + const result = 2 * 2; + // This is here on purpose to ensure to test working + (0, chai_1.expect)(result).equals(4); + }); +}); diff --git a/test/pubchem/import.spec.js b/test/pubchem/import.spec.js new file mode 100644 index 0000000..e701db6 --- /dev/null +++ b/test/pubchem/import.spec.js @@ -0,0 +1,9 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +const chai_1 = require("chai"); +describe("PubChem Import Script Specs", () => { + it('should return related object when ToCHeading contains H1 key', () => { + // This is here on purpose to ensure to test working + (0, chai_1.expect)("nothing").equals("nothing"); + }); +});