-
Notifications
You must be signed in to change notification settings - Fork 44
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #245 from SysBioChalmers/feat/addMAMids
feat: add MA met ids to metabolites.tsv
- Loading branch information
Showing
4 changed files
with
8,478 additions
and
8,381 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,5 @@ | ||
# Model curation scripts | ||
|
||
This directory contains curation-related scripts and functions used to make changes to the Human-GEM model. These model curation scripts help to improve transparency of changes made to the model when the number of changes is too large to view practically. | ||
This directory contains curation-related scripts and functions used to make changes to the Human-GEM repository. These curation scripts help to improve transparency of changes made to the model when the number of changes is too large to view practically. | ||
|
||
Note that all code in this directory is considered deprecated and will not be updated or maintained. | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
% this script is to add metabolite identifiers (#174) that will be used for Met Atlas web-portal. | ||
|
||
% load data | ||
metIDs = importTsvFile('metabolites.tsv'); | ||
|
||
% get compartment id for non-HMR mets that will be used later | ||
compID = cellfun(@(x) regexprep(x, '^.+_(\w)$', '$1'), metIDs.mets, 'UniformOutput', false); | ||
matche2NonHMR = regexp(metIDs.mets, '_\w$', 'match'); | ||
emptymask = cellfun('isempty', matche2NonHMR); | ||
compID(emptymask) = {''}; % empty HMR met ids | ||
|
||
% generate MA met ids from HMR ids only with "MA-" prefix | ||
metMAID = cellfun(@(x) regexprep(x, '^m(\d\d\d\d\d\w)$', 'MA-M$1'), metIDs.mets, 'UniformOutput', false); | ||
|
||
% check consistency between HMR ids and empty elemnets of non-HMR ids | ||
matche2HMR = regexp(metMAID, '^MA-M\d\d\d\d\d\w$', 'match'); | ||
ind2HMR = ~cellfun('isempty', matche2HMR); | ||
check = find(ind2HMR ~= emptymask); | ||
metIDs.mets(check) | ||
% {'temp001c'} | ||
% {'temp001s'} | ||
% these two mets are from HMR but in non-standard format, will be treated as | ||
% non-HMR ids | ||
compID(check) = {'c';'s'}; | ||
|
||
% kepp HMR ids in metMAID and clean others | ||
ind_MAID = startsWith(metMAID, 'MA-M'); | ||
metMAID(~ind_MAID) = {''}; | ||
metMAIDNoComp = cellfun(@(x) regexprep(x, '^(.+)\w$', '$1'), metMAID, 'UniformOutput', false); | ||
|
||
% get index of nonHMR ids from compID and remove their comppart id | ||
nonHMRidInd = ~cellfun('isempty', compID); | ||
nonHMRidNoComp = cellfun(@(x) regexprep(x, '^(.+)_\w$', '$1'), metIDs.mets(nonHMRidInd), 'UniformOutput', false); | ||
|
||
% get unique list of nonHMR ids | ||
[uniqueID, ia, ic] = unique(nonHMRidNoComp); | ||
%isequal(nonHMRidNoComp, uniqueID(ic)) | ||
%isequal(uniqueID, nonHMRidNoComp(ia)) | ||
|
||
% prepare standard format MA ids for nonHMR mets | ||
idsToAdd = cell(10000,1); | ||
for i=1:10000 | ||
idsToAdd{i} = strcat('MA-M',sprintf('%05d', i)); | ||
end | ||
idsToAdd = setdiff(idsToAdd, metMAIDNoComp); % exclude existing ones | ||
idsToAdd = idsToAdd(1:length(uniqueID)); | ||
|
||
% get ids to fill in | ||
idsToFill = idsToAdd(ic); | ||
|
||
% append compartment id to idsToFill | ||
idsToFill = strcat(idsToFill,compID(nonHMRidInd)); | ||
|
||
% complete MA met ids | ||
metMAID(nonHMRidInd) = idsToFill; | ||
|
||
% format check | ||
all(cell2mat(regexp(metMAID, '^MA-M\d\d\d\d\d\w$'))) | ||
% unique check | ||
isequal(length(metMAID), length(sort(metMAID))) | ||
% empty check | ||
B = cellfun('isempty', metMAID); | ||
all(B(:) == 0) | ||
|
||
% add new field and output | ||
metIDs.metMAID = metMAID; | ||
exportTsvFile(metIDs,'metabolites.tsv'); | ||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
% this script is to provide complete reaction identifiers (#174) that will be used for Met Atlas web-portal. | ||
|
||
% load reaction ids | ||
rxnIDs = importTsvFile('reactions.tsv'); | ||
|
||
% replace underscore with `R` | ||
rxnIDs.rxnMAID = strrep(rxnIDs.rxnMAID,'_','R'); | ||
indToFill = getNonEmptyList(rxnIDs.rxnMAID,false); | ||
|
||
% prepare a list to fill | ||
idsToAdd = cell(19999,1); | ||
for i=1:19999 | ||
idsToAdd{i} = strcat('MAR',sprintf('%05d', i)); | ||
end | ||
idsToAdd = setdiff(idsToAdd, rxnIDs.rxnMAID); | ||
|
||
% sequentially fill in blank MA ids | ||
rxnIDs.rxnMAID(indToFill) = idsToAdd(1:length(indToFill)); | ||
if isequal(length(rxnIDs.rxnMAID), length(unique(rxnIDs.rxnMAID))) | ||
fprintf('the filling looks okay.\n'); | ||
end | ||
|
||
% insert dash between MA and RDDDDD | ||
rxnIDs.rxnMAID = strrep(rxnIDs.rxnMAID,'MA','MA-'); | ||
|
||
|
||
exportTsvFile(rxnIDs,'reactions.tsv'); | ||
|
Oops, something went wrong.