-
Notifications
You must be signed in to change notification settings - Fork 0
/
dataOverlay.js
117 lines (98 loc) · 3 KB
/
dataOverlay.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import fs from 'fs';
import path from 'path';
const DATA_TYPE_COMPONENTS = {
gene: 'gene',
metabolite: 'compartmentalizedMetabolite',
reaction: 'reaction',
};
/*
* This function transforms data files into files that are ready
* to be used in the Metabolic Atlas website.
* Example:
* `modelDir`: ../data-files/integrated-models/Human-GEM
* output index file: ./dataOverlay/Human-GEM/index.json
* output data source file: ./dataOverlay/Human-GEM/transcriptomics/protein1.mock.tsv
*/
const processDataOverlayFiles = ({ modelDir, outDir, componentIdDict }) => {
const filesDir = `${modelDir}/dataOverlay`;
if (!fs.existsSync(filesDir)) {
return;
}
const modelOutDir = getModelOutDir({ modelDir, outDir });
const dataOverlayFiles = {};
const dataTypes = fs
.readdirSync(filesDir, { withFileTypes: true })
.filter((dirent) => dirent.isDirectory())
.map((dirent) => dirent.name);
const dataSourcesDict = dataTypes.reduce(
(obj, dt) => ({
...obj,
[dt]: parseIndexFile(
fs.readFileSync(`${filesDir}/${dt}/index.tsv`, 'utf8'),
),
}),
{},
);
fs.writeFileSync(
`${modelOutDir}/index.json`,
JSON.stringify(dataSourcesDict),
'utf8',
);
for (const [dt, metadataList] of Object.entries(dataSourcesDict)) {
const componentType = DATA_TYPE_COMPONENTS[dt];
const componentIdSet = new Set(Object.keys(componentIdDict[componentType]));
for (const { filename } of metadataList) {
const inputFile = fs.readFileSync(
`${filesDir}/${dt}/${filename}`,
'utf8',
);
const condensedFile = condenseDataSourceFile({
inputFile,
componentIdSet,
});
const dataSourceOutDir = `${modelOutDir}/${dt}`;
if (!fs.existsSync(`${dataSourceOutDir}`)) {
fs.mkdirSync(`${dataSourceOutDir}`);
}
fs.writeFileSync(
`${dataSourceOutDir}/${filename}`,
condensedFile,
'utf8',
);
}
}
};
const getModelOutDir = ({ modelDir, outDir }) => {
if (!fs.existsSync(`${outDir}`)) {
fs.mkdirSync(`${outDir}`);
}
const modelFolder = modelDir.split('/').pop();
const modelOutDir = `${outDir}/${modelFolder}`;
if (!fs.existsSync(`${modelOutDir}`)) {
fs.mkdirSync(`${modelOutDir}`);
}
return modelOutDir;
};
const parseIndexFile = (indexFile) => {
const dataSources = [];
const [header, ...rows] = indexFile.split('\n').filter(Boolean);
const keys = header.trim().split('\t').filter(Boolean);
return rows.map((row) =>
keys.reduce(
(obj, key) => ({
...obj,
[key]: row.trim().split('\t').filter(Boolean)[keys.indexOf(key)],
}),
{},
),
);
};
const condenseDataSourceFile = ({ inputFile, componentIdSet }) => {
const [header, ...rows] = inputFile.split('\n').filter(Boolean);
const filteredRows = rows.filter((row) => {
const [id] = row.trim().split('\t').filter(Boolean);
return componentIdSet.has(id);
});
return [header, ...filteredRows].join('\n');
};
export { processDataOverlayFiles };