diff --git a/.github/issue_template.md b/.github/issue_template.md index 79269c9e..3d7e716c 100644 --- a/.github/issue_template.md +++ b/.github/issue_template.md @@ -1,12 +1,12 @@ ### Description of the issue: -*Try to be as clear as possible, e.g.: + #### Reproducing this issue: -*If applicable, please attach the problematic code. PLEASE DELETE THIS LINE.* + ```matlab INSERT CODE HERE ``` @@ -17,8 +17,7 @@ INSERT CODE HERE 2. Operating system (Windows/Mac/Linux; include version) **I hereby confirm that I have:** + - [ ] Followed the [guidelines](https://github.com/SysBioChalmers/RAVEN/wiki/Installation) to install RAVEN. - [ ] Checked that a similar issue does not [already exist](https://github.com/SysBioChalmers/RAVEN/issues?utf8=%E2%9C%93&q=is%3Aissue) - [ ] If suitable, needed, asked first in the [Gitter chat room](https://gitter.im/SysBioChalmers/RAVEN) about the issue - -*Note: replace [ ] with [X] to check the box. PLEASE DELETE THIS LINE* diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index a114340e..ce928797 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -1,14 +1,12 @@ ### Main improvements in this PR: -*Pointwise mention what changes were made in what function. Examples: + **I hereby confirm that I have:** + - [ ] Tested my code on my own machine - [ ] Followed the [development guidelines](https://github.com/SysBioChalmers/RAVEN/wiki/DevGuidelines). - [ ] Selected `devel` as a target branch - [ ] If needed, asked first in the [Gitter chat room](https://gitter.im/SysBioChalmers/RAVEN) about this PR - -*Note: replace [ ] with [X] to check the box. PLEASE DELETE THIS LINE* diff --git a/.github/workflows/testing-comment.md b/.github/workflows/testing-comment.md new file mode 100644 index 00000000..c0c38f90 --- /dev/null +++ b/.github/workflows/testing-comment.md @@ -0,0 +1,7 @@ +This PR has been [automatically tested with GH Actions](https://github.com/SysBioChalmers/RAVEN/actions/runs/{GH_ACTION_RUN}). Here is the output of the tests: + +``` +{TEST_RESULTS} +``` + +> _Note: In the case of multiple test runs, this post will be edited._ \ No newline at end of file diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml new file mode 100644 index 00000000..888c2aba --- /dev/null +++ b/.github/workflows/testing.yml @@ -0,0 +1,28 @@ +name: Testing + +on: [pull_request] + +jobs: + matlab-tests: + runs-on: self-hosted + + steps: + + - name: Checkout + uses: actions/checkout@v2 + + - name: Run tests + id: matlab-test + run: | + TEST_RESULTS=$(/usr/local/bin/matlab -nodisplay -nosplash -nodesktop -r "addpath(genpath('.')); cd('testing/unit_tests'); runtests(struct2table(dir('*.m')).name); exit;") + PARSED_RESULTS=$(echo $TEST_RESULTS | awk -F'.com.' '{ n = split($2, v, "__________"); for (i = 0; ++i <= n;) { print v[i] } }') + echo ::set-output name=results::$PARSED_RESULTS + + - name: Post comment + uses: NejcZdovc/comment-pr@v1 + with: + file: "testing-comment.md" + env: + GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} + TEST_RESULTS: ${{steps.matlab-test.outputs.results}} + GH_ACTION_RUN: ${{github.run_id}} \ No newline at end of file diff --git a/core/checkModelStruct.m b/core/checkModelStruct.m index fd20ab87..381278ae 100755 --- a/core/checkModelStruct.m +++ b/core/checkModelStruct.m @@ -246,6 +246,18 @@ function checkModelStruct(model,throwErrors,trimWarnings) EM='The following reactions have bounds contradicting their reversibility:'; dispEM(EM,throwErrors,model.rxns(model.lb<0 & model.rev==0),trimWarnings); +%Multiple or no objective functions not allowed in SBML L3V1 FBCv2 +if numel(find(model.c))>1 + EM='Multiple objective functions found. This might be intended, but exportModel will fail due to SBML FBCv2 non-compliance:'; + dispEM(EM,false,model.rxns(find(model.c)),trimWarnings); +elseif ~any(model.c) + EM='No objective function found. This might be intended, but exportModel will fail due to SBML FBCv2 non-compliance'; + dispEM(EM,false); +end + +EM='The following reactions have contradicting bounds:'; +dispEM(EM,throwErrors,model.rxns(model.lb>model.ub),trimWarnings); + %Mapping of compartments if isfield(model,'compOutside') EM='The following compartments are in "compOutside" but not in "comps":'; diff --git a/core/compareModels.m b/core/compareRxnsGenesMetsComps.m old mode 100755 new mode 100644 similarity index 97% rename from core/compareModels.m rename to core/compareRxnsGenesMetsComps.m index 7af16a6d..37d1f9e7 --- a/core/compareModels.m +++ b/core/compareRxnsGenesMetsComps.m @@ -1,5 +1,5 @@ -function compStruct=compareModels(models,printResults) -% compareModels +function compStruct=compareRxnsGenesMetsComps(models,printResults) +% compareRxnsGenesMetsComps % Compares two or more models with respect to overlap in terms of genes, % reactions, metabolites and compartments. % @@ -23,7 +23,7 @@ % nElements vector with the number of elements for each % comparison % -% Usage: compStruct=compareModels(models,printResults) +% Usage: compStruct=compareRxnsGenesMetsComps(models,printResults) if nargin<2 printResults=true; diff --git a/core/convertToIrrev.m b/core/convertToIrrev.m index ecac9d0b..aae1e62f 100755 --- a/core/convertToIrrev.m +++ b/core/convertToIrrev.m @@ -14,13 +14,13 @@ % Usage: irrevModel=convertToIrrev(model,rxns) if nargin<2 - rxns=model.rxns; + I=true(numel(model.rxns),1); +else + I=getIndexes(model,rxns,'rxns',true); end irrevModel=model; -I=getIndexes(model,rxns,'rxns',true); - revIndexesBool=model.rev~=0 & I; revIndexes=find(revIndexesBool); if any(revIndexesBool) diff --git a/core/dispEM.m b/core/dispEM.m index 325ba758..9947f76f 100755 --- a/core/dispEM.m +++ b/core/dispEM.m @@ -42,6 +42,8 @@ function dispEM(string,throwErrors,toList,trimWarnings) end end if throwErrors==false + %Escape special characters, required for fprintf + errorText=regexprep(errorText,'(\\|\%|'')','\\$0'); fprintf([errorText '\n']); else throw(MException('',errorText)); diff --git a/core/permuteModel.m b/core/permuteModel.m index 15765f83..23a06cea 100755 --- a/core/permuteModel.m +++ b/core/permuteModel.m @@ -2,111 +2,155 @@ % permuteModel % Changes the order of the reactions or metabolites in a model % +% Input: % model a model structure -% indexes a vector with the same length as the number of reactions in the -% model which gives the new order of reactions -% type 'rxns' for reactions and 'mets' for metabolites +% indexes a vector with the same length as the number of items in the +% model, which gives the new order of items +% type 'rxns' for reactions, 'mets' for metabolites, 'genes' for +% genes, 'comps' for compartments % -% newModel an updated model structure +% Output: +% newModel an updated model structure % % Usage: newModel=permuteModel(model, indexes, type) newModel=model; indexes=indexes(:); -if strcmp(type,'rxns') - if isfield(newModel,'rxns') - newModel.rxns=newModel.rxns(indexes); - end - if isfield(newModel,'lb') - newModel.lb=newModel.lb(indexes); - end - if isfield(newModel,'ub') - newModel.ub=newModel.ub(indexes); - end - if isfield(newModel,'rev') - newModel.rev=newModel.rev(indexes); - end - if isfield(newModel,'c') - newModel.c=newModel.c(indexes); - end - if isfield(newModel,'S') - newModel.S=newModel.S(:,indexes); - end - if isfield(newModel,'rxnNames') - newModel.rxnNames=newModel.rxnNames(indexes); - end - if isfield(newModel,'rxnGeneMat') - newModel.rxnGeneMat=newModel.rxnGeneMat(indexes,:); - end - if isfield(newModel,'grRules') - newModel.grRules=newModel.grRules(indexes); - end - if isfield(newModel,'subSystems') - newModel.subSystems=newModel.subSystems(indexes); - end - if isfield(newModel,'eccodes') - newModel.eccodes=newModel.eccodes(indexes); - end - if isfield(newModel,'equations') - newModel.equations=newModel.equations(indexes); - end - if isfield(newModel,'rxnMiriams') - newModel.rxnMiriams=newModel.rxnMiriams(indexes); - end - if isfield(newModel,'rxnComps') - newModel.rxnComps=newModel.rxnComps(indexes); - end - if isfield(newModel,'rxnFrom') - newModel.rxnFrom=newModel.rxnFrom(indexes); - end - if isfield(newModel,'rxnScores') - newModel.rxnScores=newModel.rxnScores(indexes); - end - if isfield(newModel,'rxnNotes') - newModel.rxnNotes=newModel.rxnNotes(indexes); - end - if isfield(newModel,'rxnReferences') - newModel.rxnReferences=newModel.rxnReferences(indexes); - end - if isfield(newModel,'rxnConfidenceScores') - newModel.rxnConfidenceScores=newModel.rxnConfidenceScores(indexes); - end -end - -if strcmp(type,'mets') - if isfield(newModel,'mets') - newModel.mets=newModel.mets(indexes); - end - if isfield(newModel,'metNames') - newModel.metNames=newModel.metNames(indexes); - end - if isfield(newModel,'b') - newModel.b=newModel.b(indexes,:); - end - if isfield(newModel,'metComps') - newModel.metComps=newModel.metComps(indexes); - end - if isfield(newModel,'S') - newModel.S=newModel.S(indexes,:); - end - if isfield(newModel,'unconstrained') - newModel.unconstrained=newModel.unconstrained(indexes); - end - if isfield(newModel,'metMiriams') - newModel.metMiriams=newModel.metMiriams(indexes,:); - end - if isfield(newModel,'inchis') - newModel.inchis=newModel.inchis(indexes); - end - if isfield(newModel,'metFormulas') - newModel.metFormulas=newModel.metFormulas(indexes); - end - if isfield(newModel,'metFrom') - newModel.metFrom=newModel.metFrom(indexes); - end - if isfield(newModel,'metCharges') - newModel.metCharges=newModel.metCharges(indexes); - end +switch type + case 'rxns' + if isfield(newModel,'rxns') + newModel.rxns=newModel.rxns(indexes); + end + if isfield(newModel,'lb') + newModel.lb=newModel.lb(indexes); + end + if isfield(newModel,'ub') + newModel.ub=newModel.ub(indexes); + end + if isfield(newModel,'rev') + newModel.rev=newModel.rev(indexes); + end + if isfield(newModel,'c') + newModel.c=newModel.c(indexes); + end + if isfield(newModel,'S') + newModel.S=newModel.S(:,indexes); + end + if isfield(newModel,'rxnNames') + newModel.rxnNames=newModel.rxnNames(indexes); + end + if isfield(newModel,'rxnGeneMat') + newModel.rxnGeneMat=newModel.rxnGeneMat(indexes,:); + end + if isfield(newModel,'grRules') + newModel.grRules=newModel.grRules(indexes); + end + if isfield(newModel,'subSystems') + newModel.subSystems=newModel.subSystems(indexes); + end + if isfield(newModel,'eccodes') + newModel.eccodes=newModel.eccodes(indexes); + end + if isfield(newModel,'equations') + newModel.equations=newModel.equations(indexes); + end + if isfield(newModel,'rxnMiriams') + newModel.rxnMiriams=newModel.rxnMiriams(indexes); + end + if isfield(newModel,'rxnComps') + newModel.rxnComps=newModel.rxnComps(indexes); + end + if isfield(newModel,'rxnFrom') + newModel.rxnFrom=newModel.rxnFrom(indexes); + end + if isfield(newModel,'rxnScores') + newModel.rxnScores=newModel.rxnScores(indexes); + end + if isfield(newModel,'rxnNotes') + newModel.rxnNotes=newModel.rxnNotes(indexes); + end + if isfield(newModel,'rxnReferences') + newModel.rxnReferences=newModel.rxnReferences(indexes); + end + if isfield(newModel,'rxnConfidenceScores') + newModel.rxnConfidenceScores=newModel.rxnConfidenceScores(indexes); + end + case 'mets' + if isfield(newModel,'mets') + newModel.mets=newModel.mets(indexes); + end + if isfield(newModel,'metNames') + newModel.metNames=newModel.metNames(indexes); + end + if isfield(newModel,'b') + newModel.b=newModel.b(indexes,:); + end + if isfield(newModel,'metComps') + newModel.metComps=newModel.metComps(indexes); + end + if isfield(newModel,'S') + newModel.S=newModel.S(indexes,:); + end + if isfield(newModel,'unconstrained') + newModel.unconstrained=newModel.unconstrained(indexes); + end + if isfield(newModel,'metMiriams') + newModel.metMiriams=newModel.metMiriams(indexes,:); + end + if isfield(newModel,'inchis') + newModel.inchis=newModel.inchis(indexes); + end + if isfield(newModel,'metFormulas') + newModel.metFormulas=newModel.metFormulas(indexes); + end + if isfield(newModel,'metFrom') + newModel.metFrom=newModel.metFrom(indexes); + end + if isfield(newModel,'metCharges') + newModel.metCharges=newModel.metCharges(indexes); + end + case 'genes' + if isfield(newModel,'genes') + newModel.genes=newModel.genes(indexes); + end + if isfield(newModel,'geneComps') + newModel.geneComps=newModel.geneComps(indexes); + end + if isfield(newModel,'geneMiriams') + newModel.geneMiriams=newModel.geneMiriams(indexes); + end + if isfield(newModel,'geneShortNames') + newModel.geneShortNames=newModel.geneShortNames(indexes); + end + if isfield(newModel,'rxnGeneMat') + newModel.rxnGeneMat=newModel.rxnGeneMat(:,indexes); + end + case 'comps' + if isfield(newModel,'comps') + newModel.comps=newModel.comps(indexes); + end + if isfield(newModel,'compNames') + newModel.compNames=newModel.compNames(indexes); + end + if isfield(newModel,'compOutside') + newModel.compOutside=newModel.compOutside(indexes); + end + if isfield(newModel,'compMiriams') + newModel.compMiriams=newModel.compMiriams(indexes); + end + [~,J]=sort(indexes); % The *index* of compartment is used in next fields + if isfield(newModel,'metComps') + [toreplace, bywhat] = ismember(newModel.metComps,1:length(J)); + newModel.metComps(toreplace) = J(bywhat(toreplace)); + end + if isfield(model,'rxnComps') + [toreplace, bywhat] = ismember(model.rxnComps,1:length(J)); + model.rxnComps(toreplace) = J(bywhat(toreplace)); + end + if isfield(model,'geneComps') + [toreplace, bywhat] = ismember(model.geneComps,1:length(J)); + model.geneComps(toreplace) = J(bywhat(toreplace)); + end end end diff --git a/doc/core/checkModelStruct.html b/doc/core/checkModelStruct.html index 90aa5bf6..92e3e48f 100644 --- a/doc/core/checkModelStruct.html +++ b/doc/core/checkModelStruct.html @@ -305,116 +305,128 @@

SOURCE CODE ^'The following reactions have bounds contradicting their reversibility:'; 0247 dispEM(EM,throwErrors,model.rxns(model.lb<0 & model.rev==0),trimWarnings); 0248 -0249 %Mapping of compartments -0250 if isfield(model,'compOutside') -0251 EM='The following compartments are in "compOutside" but not in "comps":'; -0252 dispEM(EM,throwErrors,setdiff(model.compOutside,[{''};model.comps]),trimWarnings); -0253 end -0254 -0255 %Met names which start with number -0256 I=false(numel(model.metNames),1); -0257 for i=1:numel(model.metNames) -0258 index=strfind(model.metNames{i},' '); -0259 if any(index) -0260 if any(str2double(model.metNames{i}(1:index(1)-1))) -0261 I(i)=true; -0262 end -0263 end -0264 end -0265 EM='The following metabolite names begin with a number directly followed by space:'; -0266 dispEM(EM,throwErrors,model.mets(I),trimWarnings); -0267 -0268 %Non-parseable composition -0269 if isfield(model,'metFormulas') -0270 [~, ~, exitFlag]=parseFormulas(model.metFormulas,true,false); -0271 EM='The composition for the following metabolites could not be parsed:'; -0272 dispEM(EM,false,model.mets(exitFlag==-1),trimWarnings); -0273 end -0274 -0275 %Check if there are metabolites with different names but the same MIRIAM -0276 %codes -0277 if isfield(model,'metMiriams') -0278 miriams=containers.Map(); -0279 for i=1:numel(model.mets) -0280 if ~isempty(model.metMiriams{i}) -0281 %Loop through and add for each miriam -0282 for j=1:numel(model.metMiriams{i}.name) -0283 %Get existing metabolite indexes -0284 current=strcat(model.metMiriams{i}.name{j},'/',model.metMiriams{i}.value{j}); -0285 if isKey(miriams,current) -0286 existing=miriams(current); -0287 else -0288 existing=[]; -0289 end -0290 miriams(current)=[existing;i]; -0291 end -0292 end -0293 end -0294 -0295 %Get all keys -0296 allMiriams=keys(miriams); -0297 -0298 hasMultiple=false(numel(allMiriams),1); -0299 for i=1:numel(allMiriams) -0300 if numel(miriams(allMiriams{i}))>1 -0301 %Check if they all have the same name -0302 if numel(unique(model.metNames(miriams(allMiriams{i}))))>1 -0303 if ~regexp(allMiriams{i},'^sbo\/SBO:') % SBO terms are expected to be multiple -0304 hasMultiple(i)=true; -0305 end -0306 end -0307 end -0308 end +0249 %Multiple or no objective functions not allowed in SBML L3V1 FBCv2 +0250 if numel(find(model.c))>1 +0251 EM='Multiple objective functions found. This might be intended, but exportModel will fail due to SBML FBCv2 non-compliance:'; +0252 dispEM(EM,false,model.rxns(find(model.c)),trimWarnings); +0253 elseif ~any(model.c) +0254 EM='No objective function found. This might be intended, but exportModel will fail due to SBML FBCv2 non-compliance'; +0255 dispEM(EM,false); +0256 end +0257 +0258 EM='The following reactions have contradicting bounds:'; +0259 dispEM(EM,throwErrors,model.rxns(model.lb>model.ub),trimWarnings); +0260 +0261 %Mapping of compartments +0262 if isfield(model,'compOutside') +0263 EM='The following compartments are in "compOutside" but not in "comps":'; +0264 dispEM(EM,throwErrors,setdiff(model.compOutside,[{''};model.comps]),trimWarnings); +0265 end +0266 +0267 %Met names which start with number +0268 I=false(numel(model.metNames),1); +0269 for i=1:numel(model.metNames) +0270 index=strfind(model.metNames{i},' '); +0271 if any(index) +0272 if any(str2double(model.metNames{i}(1:index(1)-1))) +0273 I(i)=true; +0274 end +0275 end +0276 end +0277 EM='The following metabolite names begin with a number directly followed by space:'; +0278 dispEM(EM,throwErrors,model.mets(I),trimWarnings); +0279 +0280 %Non-parseable composition +0281 if isfield(model,'metFormulas') +0282 [~, ~, exitFlag]=parseFormulas(model.metFormulas,true,false); +0283 EM='The composition for the following metabolites could not be parsed:'; +0284 dispEM(EM,false,model.mets(exitFlag==-1),trimWarnings); +0285 end +0286 +0287 %Check if there are metabolites with different names but the same MIRIAM +0288 %codes +0289 if isfield(model,'metMiriams') +0290 miriams=containers.Map(); +0291 for i=1:numel(model.mets) +0292 if ~isempty(model.metMiriams{i}) +0293 %Loop through and add for each miriam +0294 for j=1:numel(model.metMiriams{i}.name) +0295 %Get existing metabolite indexes +0296 current=strcat(model.metMiriams{i}.name{j},'/',model.metMiriams{i}.value{j}); +0297 if isKey(miriams,current) +0298 existing=miriams(current); +0299 else +0300 existing=[]; +0301 end +0302 miriams(current)=[existing;i]; +0303 end +0304 end +0305 end +0306 +0307 %Get all keys +0308 allMiriams=keys(miriams); 0309 -0310 %Print output -0311 EM='The following MIRIAM strings are associated to more than one unique metabolite name:'; -0312 dispEM(EM,false,allMiriams(hasMultiple),trimWarnings); -0313 end -0314 -0315 %Check if there are metabolites with different names but the same InChI -0316 %codes -0317 if isfield(model,'inchis') -0318 inchis=containers.Map(); -0319 for i=1:numel(model.mets) -0320 if ~isempty(model.inchis{i}) -0321 %Get existing metabolite indexes -0322 if isKey(inchis,model.inchis{i}) -0323 existing=inchis(model.inchis{i}); -0324 else -0325 existing=[]; -0326 end -0327 inchis(model.inchis{i})=[existing;i]; -0328 end -0329 end -0330 -0331 %Get all keys -0332 allInchis=keys(inchis); -0333 -0334 hasMultiple=false(numel(allInchis),1); -0335 for i=1:numel(allInchis) -0336 if numel(inchis(allInchis{i}))>1 -0337 %Check if they all have the same name -0338 if numel(unique(model.metNames(inchis(allInchis{i}))))>1 -0339 hasMultiple(i)=true; -0340 end -0341 end -0342 end -0343 -0344 %Print output -0345 EM='The following InChI strings are associated to more than one unique metabolite name:'; -0346 dispEM(EM,false,allInchis(hasMultiple),trimWarnings); -0347 end -0348 end -0349 -0350 function I=duplicates(strings) -0351 I=false(numel(strings),1); -0352 [J, K]=unique(strings); -0353 if numel(J)~=numel(strings) -0354 L=1:numel(strings); -0355 L(K)=[]; -0356 I(L)=true; -0357 end -0358 end +0310 hasMultiple=false(numel(allMiriams),1); +0311 for i=1:numel(allMiriams) +0312 if numel(miriams(allMiriams{i}))>1 +0313 %Check if they all have the same name +0314 if numel(unique(model.metNames(miriams(allMiriams{i}))))>1 +0315 if ~regexp(allMiriams{i},'^sbo\/SBO:') % SBO terms are expected to be multiple +0316 hasMultiple(i)=true; +0317 end +0318 end +0319 end +0320 end +0321 +0322 %Print output +0323 EM='The following MIRIAM strings are associated to more than one unique metabolite name:'; +0324 dispEM(EM,false,allMiriams(hasMultiple),trimWarnings); +0325 end +0326 +0327 %Check if there are metabolites with different names but the same InChI +0328 %codes +0329 if isfield(model,'inchis') +0330 inchis=containers.Map(); +0331 for i=1:numel(model.mets) +0332 if ~isempty(model.inchis{i}) +0333 %Get existing metabolite indexes +0334 if isKey(inchis,model.inchis{i}) +0335 existing=inchis(model.inchis{i}); +0336 else +0337 existing=[]; +0338 end +0339 inchis(model.inchis{i})=[existing;i]; +0340 end +0341 end +0342 +0343 %Get all keys +0344 allInchis=keys(inchis); +0345 +0346 hasMultiple=false(numel(allInchis),1); +0347 for i=1:numel(allInchis) +0348 if numel(inchis(allInchis{i}))>1 +0349 %Check if they all have the same name +0350 if numel(unique(model.metNames(inchis(allInchis{i}))))>1 +0351 hasMultiple(i)=true; +0352 end +0353 end +0354 end +0355 +0356 %Print output +0357 EM='The following InChI strings are associated to more than one unique metabolite name:'; +0358 dispEM(EM,false,allInchis(hasMultiple),trimWarnings); +0359 end +0360 end +0361 +0362 function I=duplicates(strings) +0363 I=false(numel(strings),1); +0364 [J, K]=unique(strings); +0365 if numel(J)~=numel(strings) +0366 L=1:numel(strings); +0367 L(K)=[]; +0368 I(L)=true; +0369 end +0370 end
Generated by m2html © 2005
\ No newline at end of file diff --git a/doc/core/compareModels.html b/doc/core/compareRxnsGenesMetsComps.html similarity index 95% rename from doc/core/compareModels.html rename to doc/core/compareRxnsGenesMetsComps.html index c7cebd24..5f8f0b07 100644 --- a/doc/core/compareModels.html +++ b/doc/core/compareRxnsGenesMetsComps.html @@ -2,9 +2,9 @@ "http://www.w3.org/TR/REC-html40/loose.dtd"> - Description of compareModels - - + Description of compareRxnsGenesMetsComps + + @@ -12,22 +12,22 @@ -
Home > core > compareModels.m
+
Home > core > compareRxnsGenesMetsComps.m
-

compareModels +

compareRxnsGenesMetsComps

PURPOSE ^

-
compareModels
+
compareRxnsGenesMetsComps

SYNOPSIS ^

-
function compStruct=compareModels(models,printResults)
+
function compStruct=compareRxnsGenesMetsComps(models,printResults)

DESCRIPTION ^

-
 compareModels
+
 compareRxnsGenesMetsComps
    Compares two or more models with respect to overlap in terms of genes,
    reactions, metabolites and compartments.
 
@@ -51,7 +51,7 @@ 

DESCRIPTION ^CROSS-REFERENCE INFORMATION ^

@@ -68,8 +68,8 @@

SUBFUNCTIONS ^function A=getElements(models,field)
  • function toCheck=getToCheck(models,field)
  • function I=checkField(A)
  • function printList(models,toCheck,nElements)
  • function nElements=checkStuff(A,toCheck)
  • SOURCE CODE ^

    -
    0001 function compStruct=compareModels(models,printResults)
    -0002 % compareModels
    +
    0001 function compStruct=compareRxnsGenesMetsComps(models,printResults)
    +0002 % compareRxnsGenesMetsComps
     0003 %   Compares two or more models with respect to overlap in terms of genes,
     0004 %   reactions, metabolites and compartments.
     0005 %
    @@ -93,7 +93,7 @@ 

    SOURCE CODE ^% nElements vector with the number of elements for each 0024 % comparison 0025 % -0026 % Usage: compStruct=compareModels(models,printResults) +0026 % Usage: compStruct=compareRxnsGenesMetsComps(models,printResults) 0027 0028 if nargin<2 0029 printResults=true; diff --git a/doc/core/constructEquations.html b/doc/core/constructEquations.html index 4d7d5ef5..eeb4da81 100644 --- a/doc/core/constructEquations.html +++ b/doc/core/constructEquations.html @@ -69,7 +69,7 @@

    CROSS-REFERENCE INFORMATION ^
 <li><a href=buildEquation buildEquation
  • getIndexes getIndexes
  • sortModel sortModel
  • This function is called by: +
  • addRxnsGenesMets addRxnsGenesMets
  • compareRxnsGenesMetsComps compareRxnsGenesMetsComps
  • followChanged followChanged
  • followFluxes followFluxes
  • printFluxes printFluxes
  • printModelStats printModelStats
  • diff --git a/doc/core/convertToIrrev.html b/doc/core/convertToIrrev.html index 61376c46..dcb995f3 100644 --- a/doc/core/convertToIrrev.html +++ b/doc/core/convertToIrrev.html @@ -70,12 +70,12 @@

    SOURCE CODE ^% Usage: irrevModel=convertToIrrev(model,rxns) 0015 0016 if nargin<2 -0017 rxns=model.rxns; -0018 end -0019 -0020 irrevModel=model; +0017 I=true(numel(model.rxns),1); +0018 else +0019 I=getIndexes(model,rxns,'rxns',true); +0020 end 0021 -0022 I=getIndexes(model,rxns,'rxns',true); +0022 irrevModel=model; 0023 0024 revIndexesBool=model.rev~=0 & I; 0025 revIndexes=find(revIndexesBool); diff --git a/doc/core/dispEM.html b/doc/core/dispEM.html index 6d6c6fb1..1460b77c 100644 --- a/doc/core/dispEM.html +++ b/doc/core/dispEM.html @@ -49,7 +49,7 @@

    CROSS-REFERENCE INFORMATION ^
 </ul>
 This function is called by:
 <ul style= -
  • addGenesRaven addGenesRaven
  • addMets addMets
  • addRxns addRxns
  • addRxnsGenesMets addRxnsGenesMets
  • addTransport addTransport
  • analyzeSampling analyzeSampling
  • buildEquation buildEquation
  • changeRxns changeRxns
  • checkModelStruct checkModelStruct
  • checkRxn checkRxn
  • checkTasks checkTasks
  • compareModels compareModels
  • compareMultipleModels compareMultipleModels
  • constructS constructS
  • consumeSomething consumeSomething
  • contractModel contractModel
  • expandModel expandModel
  • fillGaps fillGaps
  • findGeneDeletions findGeneDeletions
  • fitParameters fitParameters
  • fitTasks fitTasks
  • getElementalBalance getElementalBalance
  • getEssentialRxns getEssentialRxns
  • getExpressionStructure getExpressionStructure
  • getFluxZ getFluxZ
  • getMetsInComp getMetsInComp
  • getMinNrFluxes getMinNrFluxes
  • getModelFromHomology getModelFromHomology
  • getRxnsInComp getRxnsInComp
  • guessComposition guessComposition
  • makeSomething makeSomething
  • mapCompartments mapCompartments
  • mergeCompartments mergeCompartments
  • mergeModels mergeModels
  • parseTaskList parseTaskList
  • predictLocalization predictLocalization
  • printFluxes printFluxes
  • randomSampling randomSampling
  • removeBadRxns removeBadRxns
  • removeMets removeMets
  • replaceMets replaceMets
  • reporterMetabolites reporterMetabolites
  • setParam setParam
  • simplifyModel simplifyModel
  • sortModel sortModel
  • standardizeGrRules standardizeGrRules
  • +
  • addGenesRaven addGenesRaven
  • addMets addMets
  • addRxns addRxns
  • addRxnsGenesMets addRxnsGenesMets
  • addTransport addTransport
  • analyzeSampling analyzeSampling
  • buildEquation buildEquation
  • changeRxns changeRxns
  • checkModelStruct checkModelStruct
  • checkRxn checkRxn
  • checkTasks checkTasks
  • compareMultipleModels compareMultipleModels
  • compareRxnsGenesMetsComps compareRxnsGenesMetsComps
  • constructS constructS
  • consumeSomething consumeSomething
  • contractModel contractModel
  • expandModel expandModel
  • fillGaps fillGaps
  • findGeneDeletions findGeneDeletions
  • fitParameters fitParameters
  • fitTasks fitTasks
  • getElementalBalance getElementalBalance
  • getEssentialRxns getEssentialRxns
  • getExpressionStructure getExpressionStructure
  • getFluxZ getFluxZ
  • getMetsInComp getMetsInComp
  • getMinNrFluxes getMinNrFluxes
  • getModelFromHomology getModelFromHomology
  • getRxnsInComp getRxnsInComp
  • guessComposition guessComposition
  • makeSomething makeSomething
  • mapCompartments mapCompartments
  • mergeCompartments mergeCompartments
  • mergeModels mergeModels
  • parseTaskList parseTaskList
  • predictLocalization predictLocalization
  • printFluxes printFluxes
  • randomSampling randomSampling
  • removeBadRxns removeBadRxns
  • removeMets removeMets
  • replaceMets replaceMets
  • reporterMetabolites reporterMetabolites
  • setParam setParam
  • simplifyModel simplifyModel
  • sortModel sortModel
  • standardizeGrRules standardizeGrRules
  • @@ -99,11 +99,13 @@

    SOURCE CODE ^end 0043 end 0044 if throwErrors==false -0045 fprintf([errorText '\n']); -0046 else -0047 throw(MException('',errorText)); -0048 end -0049 end

    +0045 %Escape special characters, required for fprintf +0046 errorText=regexprep(errorText,'(\\|\%|'')','\\$0'); +0047 fprintf([errorText '\n']); +0048 else +0049 throw(MException('',errorText)); +0050 end +0051 end

    Generated by m2html © 2005
    \ No newline at end of file diff --git a/doc/core/index.html b/doc/core/index.html index 49cc1fca..5da70d52 100644 --- a/doc/core/index.html +++ b/doc/core/index.html @@ -19,7 +19,7 @@

    Index for core

    Matlab files in this directory:

    -
     FSEOFFSEOF: implements the algorithm of Flux Scanning based on Enforced Objective Flux.
     addExchangeRxnsaddExchangeRxns
     addGenesRavenaddGenesRaven
     addMetsaddMets
     addRxnsaddRxns
     addRxnsGenesMetsaddRxnsGenesMets
     addTransportaddTransport
     analyzeSamplinganalyzeSampling
     buildEquationbuildEquation
     canConsumecanConsume
     canProducecanProduce
     changeGeneAssocchangeGeneAssoc
     changeGrRuleschangeGrRules
     changeRxnschangeRxns
     checkModelStructcheckModelStruct
     checkProductioncheckProduction
     checkRxncheckRxn
     checkTaskscheckTasks
     compareModelscompareModels
     compareMultipleModelscompareMultipleModels
     constructEquationsconstructEquations
     constructSconstructS
     consumeSomethingconsumeSomething
     contractModelcontractModel
     convertToIrrevconvertToIrrev
     copyToCompscopyToComps
     deleteUnusedGenesdeleteUnusedGenes
     dispEMdispEM
     expandModelexpandModel
     fillGapsfillGaps
     findGeneDeletionsfindGeneDeletions
     fitParametersfitParameters
     fitTasksfitTasks
     followChangedfollowChanged
     followFluxesfollowFluxes
     gapReportgapReport
     generateNewIdsgenerateNewIds
     getAllRxnsFromGenesgetAllRxnsFromGenes
     getAllSubGraphsgetAllSubGraphs
     getAllowedBoundsgetAllowedBounds
     getElementalBalancegetElementalBalance
     getEssentialRxnsgetEssentialRxns
     getExchangeRxnsgetExchangeRxns
     getExpressionStructuregetExpressionStructure
     getFluxZgetFluxZ
     getIndexesgetIndexes
     getMetsInCompgetMetsInComp
     getMinNrFluxesgetMinNrFluxes
     getModelFromHomologygetModelFromHomology
     getObjectiveStringgetObjectiveString
     getRxnsInCompgetRxnsInComp
     getTransportRxnsgetTransportRxns
     guessCompositionguessComposition
     haveFluxhaveFlux
     makeSomethingmakeSomething
     mapCompartmentsmapCompartments
     mergeCompartmentsmergeCompartments
     mergeModelsmergeModels
     parseFormulasparseFormulas
     parseRxnEquparseRxnEqu
     parseTaskListparseTaskList
     permuteModelpermuteModel
     predictLocalizationpredictLocalization
     printFluxesprintFluxes
     printModelprintModel
     printModelStatsprintModelStats
     randomSamplingrandomSampling
     removeBadRxnsremoveBadRxns
     removeGenesremoveGenes
     removeMetsremoveMets
     removeReactionsremoveReactions
     replaceMetsreplaceMets
     reporterMetabolitesreporterMetabolites
     setExchangeBoundssetExchangeBounds
     setParamsetParam
     simplifyModelsimplifyModel
     sortModelsortModel
     standardizeGrRulesstandardizeGrRules
    + FSEOFFSEOF: implements the algorithm of Flux Scanning based on Enforced Objective Flux.  addExchangeRxnsaddExchangeRxns  addGenesRavenaddGenesRaven  addMetsaddMets  addRxnsaddRxns  addRxnsGenesMetsaddRxnsGenesMets  addTransportaddTransport  analyzeSamplinganalyzeSampling  buildEquationbuildEquation  canConsumecanConsume  canProducecanProduce  changeGeneAssocchangeGeneAssoc  changeGrRuleschangeGrRules  changeRxnschangeRxns  checkModelStructcheckModelStruct  checkProductioncheckProduction  checkRxncheckRxn  checkTaskscheckTasks  compareMultipleModelscompareMultipleModels  compareRxnsGenesMetsCompscompareRxnsGenesMetsComps  constructEquationsconstructEquations  constructSconstructS  consumeSomethingconsumeSomething  contractModelcontractModel  convertToIrrevconvertToIrrev  copyToCompscopyToComps  deleteUnusedGenesdeleteUnusedGenes  dispEMdispEM  expandModelexpandModel  fillGapsfillGaps  findGeneDeletionsfindGeneDeletions  fitParametersfitParameters  fitTasksfitTasks  followChangedfollowChanged  followFluxesfollowFluxes  gapReportgapReport  generateNewIdsgenerateNewIds  getAllRxnsFromGenesgetAllRxnsFromGenes  getAllSubGraphsgetAllSubGraphs  getAllowedBoundsgetAllowedBounds  getElementalBalancegetElementalBalance  getEssentialRxnsgetEssentialRxns  getExchangeRxnsgetExchangeRxns  getExpressionStructuregetExpressionStructure  getFluxZgetFluxZ  getIndexesgetIndexes  getMetsInCompgetMetsInComp  getMinNrFluxesgetMinNrFluxes  getModelFromHomologygetModelFromHomology  getObjectiveStringgetObjectiveString  getRxnsInCompgetRxnsInComp  getTransportRxnsgetTransportRxns  guessCompositionguessComposition  haveFluxhaveFlux  makeSomethingmakeSomething  mapCompartmentsmapCompartments  mergeCompartmentsmergeCompartments  mergeModelsmergeModels  parseFormulasparseFormulas  parseRxnEquparseRxnEqu  parseTaskListparseTaskList  permuteModelpermuteModel  predictLocalizationpredictLocalization  printFluxesprintFluxes  printModelprintModel  printModelStatsprintModelStats  randomSamplingrandomSampling  removeBadRxnsremoveBadRxns  removeGenesremoveGenes  removeMetsremoveMets  removeReactionsremoveReactions  replaceMetsreplaceMets  reporterMetabolitesreporterMetabolites  setExchangeBoundssetExchangeBounds  setParamsetParam  simplifyModelsimplifyModel  sortModelsortModel  standardizeGrRulesstandardizeGrRules diff --git a/doc/core/permuteModel.html b/doc/core/permuteModel.html index e09feed4..32b608e1 100644 --- a/doc/core/permuteModel.html +++ b/doc/core/permuteModel.html @@ -30,12 +30,15 @@

    DESCRIPTION ^
     permuteModel
        Changes the order of the reactions or metabolites in a model
     
    +   Input:
        model     a model structure
    -   indexes   a vector with the same length as the number of reactions in the
    -             model which gives the new order of reactions
    -   type      'rxns' for reactions and 'mets' for metabolites
    +   indexes   a vector with the same length as the number of items in the
    +             model, which gives the new order of items
    +   type      'rxns' for reactions, 'mets' for metabolites, 'genes' for
    +             genes, 'comps' for compartments
     
    -     newModel  an updated model structure
    +     Output:
    +   newModel  an updated model structure
     
          Usage: newModel=permuteModel(model, indexes, type)

    @@ -56,114 +59,158 @@

    SOURCE CODE ^% permuteModel 0003 % Changes the order of the reactions or metabolites in a model 0004 % -0005 % model a model structure -0006 % indexes a vector with the same length as the number of reactions in the -0007 % model which gives the new order of reactions -0008 % type 'rxns' for reactions and 'mets' for metabolites -0009 % -0010 % newModel an updated model structure +0005 % Input: +0006 % model a model structure +0007 % indexes a vector with the same length as the number of items in the +0008 % model, which gives the new order of items +0009 % type 'rxns' for reactions, 'mets' for metabolites, 'genes' for +0010 % genes, 'comps' for compartments 0011 % -0012 % Usage: newModel=permuteModel(model, indexes, type) -0013 -0014 newModel=model; -0015 indexes=indexes(:); +0012 % Output: +0013 % newModel an updated model structure +0014 % +0015 % Usage: newModel=permuteModel(model, indexes, type) 0016 -0017 if strcmp(type,'rxns') -0018 if isfield(newModel,'rxns') -0019 newModel.rxns=newModel.rxns(indexes); -0020 end -0021 if isfield(newModel,'lb') -0022 newModel.lb=newModel.lb(indexes); -0023 end -0024 if isfield(newModel,'ub') -0025 newModel.ub=newModel.ub(indexes); -0026 end -0027 if isfield(newModel,'rev') -0028 newModel.rev=newModel.rev(indexes); -0029 end -0030 if isfield(newModel,'c') -0031 newModel.c=newModel.c(indexes); -0032 end -0033 if isfield(newModel,'S') -0034 newModel.S=newModel.S(:,indexes); -0035 end -0036 if isfield(newModel,'rxnNames') -0037 newModel.rxnNames=newModel.rxnNames(indexes); -0038 end -0039 if isfield(newModel,'rxnGeneMat') -0040 newModel.rxnGeneMat=newModel.rxnGeneMat(indexes,:); -0041 end -0042 if isfield(newModel,'grRules') -0043 newModel.grRules=newModel.grRules(indexes); -0044 end -0045 if isfield(newModel,'subSystems') -0046 newModel.subSystems=newModel.subSystems(indexes); -0047 end -0048 if isfield(newModel,'eccodes') -0049 newModel.eccodes=newModel.eccodes(indexes); -0050 end -0051 if isfield(newModel,'equations') -0052 newModel.equations=newModel.equations(indexes); -0053 end -0054 if isfield(newModel,'rxnMiriams') -0055 newModel.rxnMiriams=newModel.rxnMiriams(indexes); -0056 end -0057 if isfield(newModel,'rxnComps') -0058 newModel.rxnComps=newModel.rxnComps(indexes); -0059 end -0060 if isfield(newModel,'rxnFrom') -0061 newModel.rxnFrom=newModel.rxnFrom(indexes); -0062 end -0063 if isfield(newModel,'rxnScores') -0064 newModel.rxnScores=newModel.rxnScores(indexes); -0065 end -0066 if isfield(newModel,'rxnNotes') -0067 newModel.rxnNotes=newModel.rxnNotes(indexes); -0068 end -0069 if isfield(newModel,'rxnReferences') -0070 newModel.rxnReferences=newModel.rxnReferences(indexes); -0071 end -0072 if isfield(newModel,'rxnConfidenceScores') -0073 newModel.rxnConfidenceScores=newModel.rxnConfidenceScores(indexes); -0074 end -0075 end -0076 -0077 if strcmp(type,'mets') -0078 if isfield(newModel,'mets') -0079 newModel.mets=newModel.mets(indexes); -0080 end -0081 if isfield(newModel,'metNames') -0082 newModel.metNames=newModel.metNames(indexes); -0083 end -0084 if isfield(newModel,'b') -0085 newModel.b=newModel.b(indexes,:); -0086 end -0087 if isfield(newModel,'metComps') -0088 newModel.metComps=newModel.metComps(indexes); -0089 end -0090 if isfield(newModel,'S') -0091 newModel.S=newModel.S(indexes,:); -0092 end -0093 if isfield(newModel,'unconstrained') -0094 newModel.unconstrained=newModel.unconstrained(indexes); -0095 end -0096 if isfield(newModel,'metMiriams') -0097 newModel.metMiriams=newModel.metMiriams(indexes,:); -0098 end -0099 if isfield(newModel,'inchis') -0100 newModel.inchis=newModel.inchis(indexes); -0101 end -0102 if isfield(newModel,'metFormulas') -0103 newModel.metFormulas=newModel.metFormulas(indexes); -0104 end -0105 if isfield(newModel,'metFrom') -0106 newModel.metFrom=newModel.metFrom(indexes); -0107 end -0108 if isfield(newModel,'metCharges') -0109 newModel.metCharges=newModel.metCharges(indexes); -0110 end -0111 end -0112 end

    +0017 newModel=model; +0018 indexes=indexes(:); +0019 +0020 switch type +0021 case 'rxns' +0022 if isfield(newModel,'rxns') +0023 newModel.rxns=newModel.rxns(indexes); +0024 end +0025 if isfield(newModel,'lb') +0026 newModel.lb=newModel.lb(indexes); +0027 end +0028 if isfield(newModel,'ub') +0029 newModel.ub=newModel.ub(indexes); +0030 end +0031 if isfield(newModel,'rev') +0032 newModel.rev=newModel.rev(indexes); +0033 end +0034 if isfield(newModel,'c') +0035 newModel.c=newModel.c(indexes); +0036 end +0037 if isfield(newModel,'S') +0038 newModel.S=newModel.S(:,indexes); +0039 end +0040 if isfield(newModel,'rxnNames') +0041 newModel.rxnNames=newModel.rxnNames(indexes); +0042 end +0043 if isfield(newModel,'rxnGeneMat') +0044 newModel.rxnGeneMat=newModel.rxnGeneMat(indexes,:); +0045 end +0046 if isfield(newModel,'grRules') +0047 newModel.grRules=newModel.grRules(indexes); +0048 end +0049 if isfield(newModel,'subSystems') +0050 newModel.subSystems=newModel.subSystems(indexes); +0051 end +0052 if isfield(newModel,'eccodes') +0053 newModel.eccodes=newModel.eccodes(indexes); +0054 end +0055 if isfield(newModel,'equations') +0056 newModel.equations=newModel.equations(indexes); +0057 end +0058 if isfield(newModel,'rxnMiriams') +0059 newModel.rxnMiriams=newModel.rxnMiriams(indexes); +0060 end +0061 if isfield(newModel,'rxnComps') +0062 newModel.rxnComps=newModel.rxnComps(indexes); +0063 end +0064 if isfield(newModel,'rxnFrom') +0065 newModel.rxnFrom=newModel.rxnFrom(indexes); +0066 end +0067 if isfield(newModel,'rxnScores') +0068 newModel.rxnScores=newModel.rxnScores(indexes); +0069 end +0070 if isfield(newModel,'rxnNotes') +0071 newModel.rxnNotes=newModel.rxnNotes(indexes); +0072 end +0073 if isfield(newModel,'rxnReferences') +0074 newModel.rxnReferences=newModel.rxnReferences(indexes); +0075 end +0076 if isfield(newModel,'rxnConfidenceScores') +0077 newModel.rxnConfidenceScores=newModel.rxnConfidenceScores(indexes); +0078 end +0079 case 'mets' +0080 if isfield(newModel,'mets') +0081 newModel.mets=newModel.mets(indexes); +0082 end +0083 if isfield(newModel,'metNames') +0084 newModel.metNames=newModel.metNames(indexes); +0085 end +0086 if isfield(newModel,'b') +0087 newModel.b=newModel.b(indexes,:); +0088 end +0089 if isfield(newModel,'metComps') +0090 newModel.metComps=newModel.metComps(indexes); +0091 end +0092 if isfield(newModel,'S') +0093 newModel.S=newModel.S(indexes,:); +0094 end +0095 if isfield(newModel,'unconstrained') +0096 newModel.unconstrained=newModel.unconstrained(indexes); +0097 end +0098 if isfield(newModel,'metMiriams') +0099 newModel.metMiriams=newModel.metMiriams(indexes,:); +0100 end +0101 if isfield(newModel,'inchis') +0102 newModel.inchis=newModel.inchis(indexes); +0103 end +0104 if isfield(newModel,'metFormulas') +0105 newModel.metFormulas=newModel.metFormulas(indexes); +0106 end +0107 if isfield(newModel,'metFrom') +0108 newModel.metFrom=newModel.metFrom(indexes); +0109 end +0110 if isfield(newModel,'metCharges') +0111 newModel.metCharges=newModel.metCharges(indexes); +0112 end +0113 case 'genes' +0114 if isfield(newModel,'genes') +0115 newModel.genes=newModel.genes(indexes); +0116 end +0117 if isfield(newModel,'geneComps') +0118 newModel.geneComps=newModel.geneComps(indexes); +0119 end +0120 if isfield(newModel,'geneMiriams') +0121 newModel.geneMiriams=newModel.geneMiriams(indexes); +0122 end +0123 if isfield(newModel,'geneShortNames') +0124 newModel.geneShortNames=newModel.geneShortNames(indexes); +0125 end +0126 if isfield(newModel,'rxnGeneMat') +0127 newModel.rxnGeneMat=newModel.rxnGeneMat(:,indexes); +0128 end +0129 case 'comps' +0130 if isfield(newModel,'comps') +0131 newModel.comps=newModel.comps(indexes); +0132 end +0133 if isfield(newModel,'compNames') +0134 newModel.compNames=newModel.compNames(indexes); +0135 end +0136 if isfield(newModel,'compOutside') +0137 newModel.compOutside=newModel.compOutside(indexes); +0138 end +0139 if isfield(newModel,'compMiriams') +0140 newModel.compMiriams=newModel.compMiriams(indexes); +0141 end +0142 [~,J]=sort(indexes); % The *index* of compartment is used in next fields +0143 if isfield(newModel,'metComps') +0144 [toreplace, bywhat] = ismember(newModel.metComps,1:length(J)); +0145 newModel.metComps(toreplace) = J(bywhat(toreplace)); +0146 end +0147 if isfield(model,'rxnComps') +0148 [toreplace, bywhat] = ismember(model.rxnComps,1:length(J)); +0149 model.rxnComps(toreplace) = J(bywhat(toreplace)); +0150 end +0151 if isfield(model,'geneComps') +0152 [toreplace, bywhat] = ismember(model.geneComps,1:length(J)); +0153 model.geneComps(toreplace) = J(bywhat(toreplace)); +0154 end +0155 end +0156 end
    Generated by m2html © 2005
    \ No newline at end of file diff --git a/doc/external/getBlast.html b/doc/external/getBlast.html index 09ddcfcb..aecef88b 100644 --- a/doc/external/getBlast.html +++ b/doc/external/getBlast.html @@ -24,12 +24,12 @@

    PURPOSE ^getBlast

    SYNOPSIS ^

    -
    function blastStructure=getBlast(organismID,fastaFile,modelIDs,refFastaFiles)
    +
    function [blastStructure,blastReport]=getBlast(organismID,fastaFile,modelIDs,refFastaFiles,develMode,hideVerbose)

    DESCRIPTION ^

     getBlast
    -   Performs a bidirectional BLASTP between the organism of interest and a
    -   set of template organisms.
    +   Performs a bidirectional BLAST between the organism of interest and a
    +   set of template organisms
     
        Input:
        organismID      the id of the organism of interest. This should also
    @@ -41,19 +41,27 @@ 

    DESCRIPTION ^CROSS-REFERENCE INFORMATION ^

    @@ -68,144 +76,189 @@

    CROSS-REFERENCE INFORMATION ^
 
 
 <h2><a name=SOURCE CODE ^

    -
    0001 function blastStructure=getBlast(organismID,fastaFile,modelIDs,refFastaFiles)
    -0002 % getBlast
    -0003 %   Performs a bidirectional BLASTP between the organism of interest and a
    -0004 %   set of template organisms.
    -0005 %
    -0006 %   Input:
    -0007 %   organismID      the id of the organism of interest. This should also
    -0008 %                   match with the id supplied to getModelFromHomology
    -0009 %   fastaFile       a FASTA file with the protein sequences for the
    -0010 %                   organism of interest
    -0011 %   modelIDs        a cell array of model ids. These must match the
    -0012 %                   "model.id" fields in the "models" structure if the
    -0013 %                   output is to be used with getModelFromHomology
    -0014 %   refFastaFiles   a cell array with the paths to the corresponding FASTA
    -0015 %                   files
    -0016 %
    -0017 %   Output:
    -0018 %   blastStructure  structure containing the bidirectional homology
    -0019 %                   measurements which are used by getModelFromHomology
    -0020 %
    -0021 %   NOTE: This function calls BLASTP to perform a bidirectional homology
    -0022 %   test between the organism of interest and a set of other organisms
    -0023 %   using standard settings. The only filtering this functions does is the
    -0024 %   removal of hits with E value higher than 10e-5. If you would like to
    -0025 %   use other homology measurements, please see getBlastFromExcel.
    -0026 %
    -0027 %   Usage: blastStructure=getBlast(organismID,fastaFile,modelIDs,...
    -0028 %           refFastaFiles)
    -0029 
    -0030 %Everything should be cell arrays
    -0031 organismID=cellstr(organismID);
    -0032 fastaFile=cellstr(fastaFile);
    -0033 modelIDs=cellstr(modelIDs);
    -0034 refFastaFiles=cellstr(refFastaFiles);
    -0035 
    -0036 blastStructure=[];
    -0037 
    -0038 %Get the directory for RAVEN Toolbox. This may not be the easiest or best
    -0039 %way to do this
    -0040 [ST, I]=dbstack('-completenames');
    -0041 ravenPath=fileparts(fileparts(ST(I).file));
    -0042 
    -0043 %Construct databases and output file
    -0044 tmpDB=tempname;
    -0045 outFile=tempname;
    -0046 
    -0047 %Check for existence of files. If no full path is specified for a file,
    -0048 %assume that it is in the current folder
    -0049 if isrow(refFastaFiles)
    -0050     files=horzcat(fastaFile,refFastaFiles);
    -0051 else
    -0052     files=vertcat(fastaFile,refFastaFiles);
    -0053 end
    -0054 
    -0055 files=checkFileExistence(files,true,false); %No whitespace allowed
    -0056 fastaFile = files(1);
    -0057 refFastaFiles = files(2:end);
    -0058 
    -0059 %Create a database for the new organism and blast each of the refFastaFiles
    -0060 %against it
    -0061 if isunix
    -0062     if ismac
    -0063         binEnd='.mac';
    -0064     else
    -0065         binEnd='';
    -0066     end
    -0067 elseif ispc
    -0068     binEnd='';
    -0069     setenv('BLASTDB_LMDB_MAP_SIZE','1000000');
    -0070 else
    -0071     dispEM('Unknown OS, exiting.')
    -0072     return
    -0073 end
    -0074 
    -0075 % Run BLAST multi-threaded to use all logical cores assigned to MATLAB.
    -0076 cores = evalc('feature(''numcores'')');
    -0077 cores = strsplit(cores, 'MATLAB was assigned: ');
    -0078 cores = regexp(cores{2},'^\d*','match');
    -0079 cores = cores{1};
    +
    0001 function [blastStructure,blastReport]=getBlast(organismID,fastaFile,...
    +0002     modelIDs,refFastaFiles,develMode,hideVerbose)
    +0003 % getBlast
    +0004 %   Performs a bidirectional BLAST between the organism of interest and a
    +0005 %   set of template organisms
    +0006 %
    +0007 %   Input:
    +0008 %   organismID      the id of the organism of interest. This should also
    +0009 %                   match with the id supplied to getModelFromHomology
    +0010 %   fastaFile       a FASTA file with the protein sequences for the
    +0011 %                   organism of interest
    +0012 %   modelIDs        a cell array of model ids. These must match the
    +0013 %                   "model.id" fields in the "models" structure if the
    +0014 %                   output is to be used with getModelFromHomology
    +0015 %   refFastaFiles   a cell array with the paths to the corresponding FASTA
    +0016 %                   files
    +0017 %   develMode       true if blastReport should be generated that is used
    +0018 %                   in the unit testing function for BLAST+ (opt, default
    +0019 %                   false)
    +0020 %   hideVerbose     true if no status messages should be printed (opt,
    +0021 %                   default false)
    +0022 %
    +0023 %   Output:
    +0024 %   blastStructure  structure containing the bidirectional homology
    +0025 %                   measurements that can be used by getModelFromHomology
    +0026 %   blastReport     structure containing MD5 hashes for FASTA database
    +0027 %                   files and non-parsed BLAST output data. Will be blank
    +0028 %                   if develMode is false.
    +0029 %
    +0030 %   NOTE: This function calls BLAST+ to perform a bidirectional homology
    +0031 %   test between the organism of interest and a set of other organisms
    +0032 %   using standard settings. The only filtering this function does is the
    +0033 %   removal of hits with an E-value higher than 10e-5. The other homology
    +0034 %   measurements can be implemented using getBlastFromExcel.
    +0035 %
    +0036 %   Usage: [blastStructure,blastReport]=getBlast(organismID,fastaFile,...
    +0037 %    modelIDs,refFastaFiles,develMode,hideVerbose)
    +0038 
    +0039 if nargin<5
    +0040     develMode = false;
    +0041 end
    +0042 if nargin<6
    +0043     hideVerbose = false;
    +0044 end
    +0045 
    +0046 %Everything should be cell arrays
    +0047 organismID=cellstr(organismID);
    +0048 fastaFile=cellstr(fastaFile);
    +0049 modelIDs=cellstr(modelIDs);
    +0050 refFastaFiles=cellstr(refFastaFiles);
    +0051 
    +0052 %Create blank structures for results
    +0053 blastStructure=[];
    +0054 blastReport.dbHashes.phr={};
    +0055 blastReport.dbHashes.pot={};
    +0056 blastReport.dbHashes.psq={};
    +0057 blastReport.dbHashes.pto={};
    +0058 blastReport.blastTxtOutput={};
    +0059 
    +0060 %Get the directory for RAVEN Toolbox. This may not be the easiest or best
    +0061 %way to do this
    +0062 [ST, I]=dbstack('-completenames');
    +0063 ravenPath=fileparts(fileparts(ST(I).file));
    +0064 
    +0065 %Generate temporary names for BLAST databases and output files
    +0066 tmpDB=tempname;
    +0067 outFile=tempname;
    +0068 
    +0069 %Check for existence of files. If no full path is specified for a file,
    +0070 %assume that it is in the current folder
    +0071 if isrow(refFastaFiles)
    +0072     files=horzcat(fastaFile,refFastaFiles);
    +0073 else
    +0074     files=vertcat(fastaFile,refFastaFiles);
    +0075 end
    +0076 
    +0077 files=checkFileExistence(files,2); %Copy files to temp dir
    +0078 fastaFile = files(1);
    +0079 refFastaFiles = files(2:end);
     0080 
    -0081 [status, ~]=system(['"' fullfile(ravenPath,'software','blast+',['makeblastdb' binEnd]) '" -in "' fastaFile{1} '" -out "' tmpDB '" -dbtype prot']);
    -0082 if status~=0
    -0083     EM=['makeblastdb did not run successfully, error: ', num2str(status)];
    -0084     dispEM(EM,true);
    -0085 end
    -0086 
    -0087 for i=1:numel(refFastaFiles)
    -0088     fprintf(['BLASTing "' modelIDs{i} '" against "' organismID{1} '"..\n']);
    -0089     [status, ~]=system(['"' fullfile(ravenPath,'software','blast+',['blastp' binEnd]) '" -query "' refFastaFiles{i} '" -out "' outFile '_' num2str(i) '" -db "' tmpDB '" -evalue 10e-5 -outfmt "10 qseqid sseqid evalue pident length bitscore ppos" -num_threads "' cores '"']);
    -0090     if status~=0
    -0091         EM=['blastp did not run successfully, error: ', num2str(status)];
    -0092         dispEM(EM,true);
    -0093     end
    +0081 %Identify the operating system
    +0082 if isunix
    +0083     if ismac
    +0084         binEnd='.mac';
    +0085     else
    +0086         binEnd='';
    +0087     end
    +0088 elseif ispc
    +0089     binEnd='.exe';
    +0090     setenv('BLASTDB_LMDB_MAP_SIZE','1000000');
    +0091 else
    +0092     dispEM('Unknown OS, exiting.')
    +0093     return
     0094 end
    -0095 delete([tmpDB '*']);
    -0096 
    -0097 %Then create a database for each of the reference organisms and blast the
    -0098 %new organism against them
    -0099 for i=1:numel(refFastaFiles)
    -0100     fprintf(['BLASTing "' organismID{1} '" against "' modelIDs{i} '"..\n']);
    -0101     [status, ~]=system(['"' fullfile(ravenPath,'software','blast+',['makeblastdb' binEnd]) '" -in "' refFastaFiles{i} '" -out "' tmpDB '" -dbtype prot']);
    -0102     if status~=0
    -0103         EM=['makeblastdb did not run successfully, error: ', num2str(status)];
    -0104         dispEM(EM,true);
    -0105     end
    -0106     [status, ~]=system(['"' fullfile(ravenPath,'software','blast+',['blastp' binEnd]) '" -query "' fastaFile{1} '" -out "' outFile '_r' num2str(i) '" -db "' tmpDB '" -evalue 10e-5 -outfmt "10 qseqid sseqid evalue pident length bitscore ppos" -num_threads "' cores '"']);
    -0107     delete([tmpDB '*']);
    -0108     if status~=0
    -0109         EM=['blastp did not run successfully, error: ', num2str(status)];
    -0110         dispEM(EM,true);
    -0111     end
    -0112 end
    -0113 
    -0114 %Done with the BLAST, do the parsing of the text files
    -0115 for i=1:numel(refFastaFiles)*2
    -0116     tempStruct=[];
    -0117     if i<=numel(refFastaFiles)
    -0118         tempStruct.fromId=modelIDs{i};
    -0119         tempStruct.toId=organismID{1};
    -0120         A=readtable([outFile '_' num2str(i)],'Delimiter',',','Format','%s%s%f%f%f%f%f');
    -0121     else
    -0122         tempStruct.fromId=organismID{1};
    -0123         tempStruct.toId=modelIDs{i-numel(refFastaFiles)};
    -0124         A=readtable([outFile '_r' num2str(i-numel(refFastaFiles))],'Delimiter',',','Format','%s%s%f%f%f%f%f');
    -0125     end
    -0126     tempStruct.fromGenes=A{:,1};
    -0127     tempStruct.toGenes=A{:,2};
    -0128     tempStruct.evalue=table2array(A(:,3));
    -0129     tempStruct.identity=table2array(A(:,4));
    -0130     tempStruct.aligLen=table2array(A(:,5));
    -0131     tempStruct.bitscore=table2array(A(:,6));
    -0132     tempStruct.ppos=table2array(A(:,7));
    -0133     blastStructure=[blastStructure tempStruct];
    -0134 end
    -0135 
    -0136 %Remove the old tempfiles
    -0137 delete([outFile '*']);
    -0138 end
    +0095 +0096 %Run BLAST multi-threaded to use all logical cores assigned to MATLAB +0097 cores = evalc('feature(''numcores'')'); +0098 cores = strsplit(cores, 'MATLAB was assigned: '); +0099 cores = regexp(cores{2},'^\d*','match'); +0100 cores = cores{1}; +0101 +0102 %Create a database for the new organism and blast each of the refFastaFiles +0103 %against it +0104 [status, ~]=system(['"' fullfile(ravenPath,'software','blast+',['makeblastdb' binEnd]) '" -in ' fastaFile{1} ' -out "' fullfile(tmpDB, 'tmpDB') '" -dbtype prot']); +0105 if develMode +0106 blastReport.dbHashes.phr{numel(blastReport.dbHashes.phr)+1}=getMD5Hash(fullfile(tmpDB, 'tmpDB.phr')); +0107 blastReport.dbHashes.pot{numel(blastReport.dbHashes.pot)+1}=getMD5Hash(fullfile(tmpDB, 'tmpDB.pot')); +0108 blastReport.dbHashes.psq{numel(blastReport.dbHashes.psq)+1}=getMD5Hash(fullfile(tmpDB, 'tmpDB.psq')); +0109 blastReport.dbHashes.pto{numel(blastReport.dbHashes.pto)+1}=getMD5Hash(fullfile(tmpDB, 'tmpDB.pto')); +0110 end +0111 if status~=0 +0112 EM=['makeblastdb did not run successfully, error: ', num2str(status)]; +0113 dispEM(EM,true); +0114 end +0115 +0116 for i=1:numel(refFastaFiles) +0117 if ~hideVerbose +0118 fprintf(['BLASTing "' modelIDs{i} '" against "' organismID{1} '"..\n']); +0119 end +0120 [status, ~]=system(['"' fullfile(ravenPath,'software','blast+',['blastp' binEnd]) '" -query ' refFastaFiles{i} ' -out "' outFile '_' num2str(i) '" -db "' fullfile(tmpDB, 'tmpDB') '" -evalue 10e-5 -outfmt "10 qseqid sseqid evalue pident length bitscore ppos" -num_threads "' cores '"']); +0121 if develMode +0122 blastReport.blastTxtOutput{numel(blastReport.blastTxtOutput)+1}=importdata([outFile '_' num2str(i)]); +0123 end +0124 if status~=0 +0125 EM=['blastp did not run successfully, error: ', num2str(status)]; +0126 dispEM(EM,true); +0127 end +0128 end +0129 delete([tmpDB filesep 'tmpDB*']); +0130 +0131 %Then create a database for each of the reference organisms and blast the +0132 %new organism against them +0133 for i=1:numel(refFastaFiles) +0134 if ~hideVerbose +0135 fprintf(['BLASTing "' organismID{1} '" against "' modelIDs{i} '"..\n']); +0136 end +0137 [status, ~]=system(['"' fullfile(ravenPath,'software','blast+',['makeblastdb' binEnd]) '" -in ' refFastaFiles{i} ' -out "' fullfile(tmpDB, 'tmpDB') '" -dbtype prot']); +0138 if status~=0 +0139 EM=['makeblastdb did not run successfully, error: ', num2str(status)]; +0140 dispEM(EM,true); +0141 end +0142 [status, ~]=system(['"' fullfile(ravenPath,'software','blast+',['blastp' binEnd]) '" -query ' fastaFile{1} ' -out "' outFile '_r' num2str(i) '" -db "' fullfile(tmpDB, 'tmpDB') '" -evalue 10e-5 -outfmt "10 qseqid sseqid evalue pident length bitscore ppos" -num_threads "' cores '"']); +0143 if develMode +0144 blastReport.dbHashes.phr{numel(blastReport.dbHashes.phr)+1}=getMD5Hash(fullfile(tmpDB, 'tmpDB.phr')); +0145 blastReport.dbHashes.pot{numel(blastReport.dbHashes.pot)+1}=getMD5Hash(fullfile(tmpDB, 'tmpDB.pot')); +0146 blastReport.dbHashes.psq{numel(blastReport.dbHashes.psq)+1}=getMD5Hash(fullfile(tmpDB, 'tmpDB.psq')); +0147 blastReport.dbHashes.pto{numel(blastReport.dbHashes.pto)+1}=getMD5Hash(fullfile(tmpDB, 'tmpDB.pto')); +0148 blastReport.blastTxtOutput{numel(blastReport.blastTxtOutput)+1}=importdata([outFile '_r' num2str(i)]); +0149 end +0150 if status~=0 +0151 EM=['blastp did not run successfully, error: ', num2str(status)]; +0152 dispEM(EM,true); +0153 end +0154 delete([tmpDB filesep 'tmpDB*']); +0155 end +0156 +0157 %Done with the BLAST, do the parsing of the text files +0158 for i=1:numel(refFastaFiles)*2 +0159 tempStruct=[]; +0160 if i<=numel(refFastaFiles) +0161 tempStruct.fromId=modelIDs{i}; +0162 tempStruct.toId=organismID{1}; +0163 A=readtable([outFile '_' num2str(i)],'Delimiter',',','Format','%s%s%f%f%f%f%f'); +0164 else +0165 tempStruct.fromId=organismID{1}; +0166 tempStruct.toId=modelIDs{i-numel(refFastaFiles)}; +0167 A=readtable([outFile '_r' num2str(i-numel(refFastaFiles))],'Delimiter',',','Format','%s%s%f%f%f%f%f'); +0168 end +0169 tempStruct.fromGenes=A{:,1}; +0170 tempStruct.toGenes=A{:,2}; +0171 tempStruct.evalue=table2array(A(:,3)); +0172 tempStruct.identity=table2array(A(:,4)); +0173 tempStruct.aligLen=table2array(A(:,5)); +0174 tempStruct.bitscore=table2array(A(:,6)); +0175 tempStruct.ppos=table2array(A(:,7)); +0176 blastStructure=[blastStructure tempStruct]; +0177 end +0178 +0179 %Remove the old tempfiles +0180 delete([outFile '*']); +0181 %Remove the temp fasta files +0182 delete(files{:}) +0183 end

    Generated by m2html © 2005
    \ No newline at end of file diff --git a/doc/external/getDiamond.html b/doc/external/getDiamond.html index eda2386c..3fff175b 100644 --- a/doc/external/getDiamond.html +++ b/doc/external/getDiamond.html @@ -24,12 +24,12 @@

    PURPOSE ^getDiamond

    SYNOPSIS ^

    -
    function blastStructure=getDiamond(organismID,fastaFile,modelIDs,refFastaFiles)
    +
    function [blastStructure,diamondReport]=getDiamond(organismID,fastaFile,modelIDs,refFastaFiles,develMode,hideVerbose)

    DESCRIPTION ^

     getDiamond
    -   Uses DIAMOND to performs a bidirectional BLASTP between the organism
    -   of interest and a set of template organisms.
    +   Uses DIAMOND to perform a bidirectional BLAST between the organism
    +   of interest and a set of template organisms
     
        Input:
        organismID      the id of the organism of interest. This should also
    @@ -41,20 +41,28 @@ 

    DESCRIPTION ^CROSS-REFERENCE INFORMATION ^

    @@ -69,145 +77,180 @@

    CROSS-REFERENCE INFORMATION ^
 
 
 <h2><a name=SOURCE CODE ^

    -
    0001 function blastStructure=getDiamond(organismID,fastaFile,modelIDs,refFastaFiles)
    -0002 % getDiamond
    -0003 %   Uses DIAMOND to performs a bidirectional BLASTP between the organism
    -0004 %   of interest and a set of template organisms.
    -0005 %
    -0006 %   Input:
    -0007 %   organismID      the id of the organism of interest. This should also
    -0008 %                   match with the id supplied to getModelFromHomology
    -0009 %   fastaFile       a FASTA file with the protein sequences for the
    -0010 %                   organism of interest
    -0011 %   modelIDs        a cell array of model ids. These must match the
    -0012 %                   "model.id" fields in the "models" structure if the
    -0013 %                   output is to be used with getModelFromHomology
    -0014 %   refFastaFiles   a cell array with the paths to the corresponding FASTA
    -0015 %                   files
    -0016 %
    -0017 %   Output:
    -0018 %   blastStructure  structure containing the bidirectional homology
    -0019 %                   measurements which are used by getModelFromHomology
    -0020 %
    -0021 %   NOTE: This function calls DIAMOND to perform a bidirectional homology
    -0022 %   test between the organism of interest and a set of other organisms
    -0023 %   using the '--more-sensitive' setting from DIAMOND. For the most
    -0024 %   sensitive results, the use of getBlast() is adviced, however,
    -0025 %   getDiamond() is a fast alternative (>15x faster). The blastStructure
    -0026 %   generated is in the same format as those obtained from getBlast().
    -0027 %
    -0028 %   Usage: blastStructure=getDiamond(organismID,fastaFile,modelIDs,...
    -0029 %           refFastaFiles)
    -0030 
    -0031 %Everything should be cell arrays
    -0032 organismID=cellstr(organismID);
    -0033 fastaFile=cellstr(fastaFile);
    -0034 modelIDs=cellstr(modelIDs);
    -0035 refFastaFiles=cellstr(refFastaFiles);
    -0036 
    -0037 blastStructure=[];
    -0038 
    -0039 %Get the directory for RAVEN Toolbox. This may not be the easiest or best
    -0040 %way to do this
    -0041 [ST, I]=dbstack('-completenames');
    -0042 ravenPath=fileparts(fileparts(ST(I).file));
    -0043 
    -0044 %Construct databases and output file
    -0045 tmpDB=tempname;
    -0046 outFile=tempname;
    -0047 
    -0048 %Check for existence of files. If no full path is specified for a file,
    -0049 %assume that it is in the current folder
    -0050 if isrow(refFastaFiles)
    -0051     files=horzcat(fastaFile,refFastaFiles);
    -0052 else
    -0053     files=vertcat(fastaFile,refFastaFiles);
    -0054 end
    -0055 
    -0056 files=checkFileExistence(files,true,false); %No whitespace allowed
    -0057 fastaFile = files(1);
    -0058 refFastaFiles = files(2:end);
    -0059 
    -0060 %Create a database for the new organism and blast each of the refFastaFiles
    -0061 %against it
    +
    0001 function [blastStructure,diamondReport]=getDiamond(organismID,fastaFile,...
    +0002     modelIDs,refFastaFiles,develMode,hideVerbose)
    +0003 % getDiamond
    +0004 %   Uses DIAMOND to perform a bidirectional BLAST between the organism
    +0005 %   of interest and a set of template organisms
    +0006 %
    +0007 %   Input:
    +0008 %   organismID      the id of the organism of interest. This should also
    +0009 %                   match with the id supplied to getModelFromHomology
    +0010 %   fastaFile       a FASTA file with the protein sequences for the
    +0011 %                   organism of interest
    +0012 %   modelIDs        a cell array of model ids. These must match the
    +0013 %                   "model.id" fields in the "models" structure if the
    +0014 %                   output is to be used with getModelFromHomology
    +0015 %   refFastaFiles   a cell array with the paths to the corresponding FASTA
    +0016 %                   files
    +0017 %   develMode       true if blastReport should be generated that is used
    +0018 %                   in the unit testing function for DIAMOND (opt, default
    +0019 %                   false)
    +0020 %   hideVerbose     true if no status messages should be printed (opt,
    +0021 %                   default false)
    +0022 %
    +0023 %   Output:
    +0024 %   blastStructure  structure containing the bidirectional homology
    +0025 %                   measurements which are used by getModelFromHomology
    +0026 %   diamondReport   structure containing MD5 hashes for FASTA database
    +0027 %                   files and non-parsed BLAST output data. Will be blank
    +0028 %                   if develMode is false.
    +0029 %
    +0030 %   NOTE: This function calls DIAMOND to perform a bidirectional homology
    +0031 %   search between the organism of interest and a set of other organisms
    +0032 %   using the '--more-sensitive' setting from DIAMOND. For the most
    +0033 %   sensitive results, the use of getBlast() is adviced, however,
    +0034 %   getDiamond() is a fast alternative (>15x faster). The blastStructure
    +0035 %   generated is in the same format as those obtained from getBlast().
    +0036 %
    +0037 %   Usage: [blastStructure,diamondReport]=getDiamond(organismID,fastaFile,...
    +0038 %    modelIDs,refFastaFiles,develMode,hideVerbose)
    +0039 
    +0040 if nargin<5
    +0041     develMode = false;
    +0042 end
    +0043 if nargin<6
    +0044     hideVerbose = false;
    +0045 end
    +0046 
    +0047 %Everything should be cell arrays
    +0048 organismID=cellstr(organismID);
    +0049 fastaFile=cellstr(fastaFile);
    +0050 modelIDs=cellstr(modelIDs);
    +0051 refFastaFiles=cellstr(refFastaFiles);
    +0052 
    +0053 %Create blank structures for results
    +0054 blastStructure=[];
    +0055 diamondReport.dbHashes={};
    +0056 diamondReport.diamondTxtOutput={};
    +0057 
    +0058 %Get the directory for RAVEN Toolbox. This may not be the easiest or best
    +0059 %way to do this
    +0060 [ST, I]=dbstack('-completenames');
    +0061 ravenPath=fileparts(fileparts(ST(I).file));
     0062 
    -0063 if isunix
    -0064     if ismac
    -0065         binEnd='.mac';
    -0066     else
    -0067         binEnd='';
    -0068     end
    -0069 elseif ispc
    -0070     binEnd='';
    +0063 %Generate temporary names for DIAMOND databases and output files
    +0064 tmpDB=tempname;
    +0065 outFile=tempname;
    +0066 
    +0067 %Check for existence of files. If no full path is specified for a file,
    +0068 %assume that it is in the current folder
    +0069 if isrow(refFastaFiles)
    +0070     files=horzcat(fastaFile,refFastaFiles);
     0071 else
    -0072     dispEM('Unknown OS, exiting.')
    -0073     return
    -0074 end
    -0075 
    -0076 % Run BLAST multi-threaded to use all logical cores assigned to MATLAB.
    -0077 cores = evalc('feature(''numcores'')');
    -0078 cores = strsplit(cores, 'MATLAB was assigned: ');
    -0079 cores = regexp(cores{2},'^\d*','match');
    -0080 cores = cores{1};
    -0081 
    -0082 [status, ~]=system(['"' fullfile(ravenPath,'software','diamond',['diamond' binEnd]) '" makedb --in "' fastaFile{1} '" --db "' tmpDB '"']);
    -0083 if status~=0
    -0084     EM=['DIAMOND makedb did not run successfully, error: ', num2str(status)];
    -0085     dispEM(EM,true);
    -0086 end
    -0087 
    -0088 for i=1:numel(refFastaFiles)
    -0089     fprintf(['Running DIAMOND blastp with "' modelIDs{i} '" against "' organismID{1} '"..\n']);
    -0090     [status, ~]=system(['"' fullfile(ravenPath,'software','diamond',['diamond' binEnd]) '" blastp --query "' refFastaFiles{i} '" --out "' outFile '_' num2str(i) '" --db "' tmpDB '" --more-sensitive --outfmt 6 qseqid sseqid evalue pident length bitscore ppos --threads ' cores ]);
    -0091     if status~=0
    -0092         EM=['DIAMOND blastp did not run successfully, error: ', num2str(status)];
    -0093         dispEM(EM,true);
    -0094     end
    -0095 end
    -0096 delete([tmpDB '*']);
    -0097 
    -0098 %Then create a database for each of the reference organisms and blast the
    -0099 %new organism against them
    -0100 for i=1:numel(refFastaFiles)
    -0101     fprintf(['Running DIAMOND blastp with "' organismID{1} '" against "' modelIDs{i} '"..\n']);
    -0102     [status, ~]=system(['"' fullfile(ravenPath,'software','diamond',['diamond' binEnd]) '" makedb --in "' refFastaFiles{i} '" --db "' tmpDB '"']);
    -0103     if status~=0
    -0104         EM=['DIAMOND makedb did not run successfully, error: ', num2str(status)];
    -0105         dispEM(EM,true);
    -0106     end
    -0107     [status, ~]=system(['"' fullfile(ravenPath,'software','diamond',['diamond' binEnd]) '" blastp --query "' fastaFile{1} '" --out "' outFile '_r' num2str(i) '" --db "' tmpDB '" --more-sensitive --outfmt 6 qseqid sseqid evalue pident length bitscore ppos --threads ' cores]);
    -0108     delete([tmpDB '*']);
    -0109     if status~=0
    -0110         EM=['DIAMOND blastp did not run successfully, error: ', num2str(status)];
    -0111         dispEM(EM,true);
    -0112     end
    -0113 end
    -0114 
    -0115 %Done with the DIAMOND blastp, do the parsing of the text files
    -0116 for i=1:numel(refFastaFiles)*2
    -0117     tempStruct=[];
    -0118     if i<=numel(refFastaFiles)
    -0119         tempStruct.fromId=modelIDs{i};
    -0120         tempStruct.toId=organismID{1};
    -0121         A=readtable([outFile '_' num2str(i)],'Delimiter','\t','Format','%s%s%f%f%f%f%f');
    -0122     else
    -0123         tempStruct.fromId=organismID{1};
    -0124         tempStruct.toId=modelIDs{i-numel(refFastaFiles)};
    -0125         A=readtable([outFile '_r' num2str(i-numel(refFastaFiles))],'Delimiter','\t','Format','%s%s%f%f%f%f%f');
    -0126     end
    -0127     tempStruct.fromGenes=A{:,1};
    -0128     tempStruct.toGenes=A{:,2};
    -0129     tempStruct.evalue=table2array(A(:,3));
    -0130     tempStruct.identity=table2array(A(:,4));
    -0131     tempStruct.aligLen=table2array(A(:,5));
    -0132     tempStruct.bitscore=table2array(A(:,6));
    -0133     tempStruct.ppos=table2array(A(:,7));
    -0134     blastStructure=[blastStructure tempStruct];
    -0135 end
    -0136 
    -0137 %Remove the old tempfiles
    -0138 delete([outFile '*']);
    -0139 end
    +0072 files=vertcat(fastaFile,refFastaFiles); +0073 end +0074 +0075 files=checkFileExistence(files,2); %Copy files to temp dir +0076 fastaFile = files(1); +0077 refFastaFiles = files(2:end); +0078 +0079 %Identify the operating system +0080 if isunix +0081 if ismac +0082 binEnd='.mac'; +0083 else +0084 binEnd=''; +0085 end +0086 elseif ispc +0087 binEnd='.exe'; +0088 else +0089 dispEM('Unknown OS, exiting.') +0090 return +0091 end +0092 +0093 %Run DIAMOND multi-threaded to use all logical cores assigned to MATLAB. +0094 cores = evalc('feature(''numcores'')'); +0095 cores = strsplit(cores, 'MATLAB was assigned: '); +0096 cores = regexp(cores{2},'^\d*','match'); +0097 cores = cores{1}; +0098 +0099 %Create a database for the new organism and blast each of the refFastaFiles +0100 %against it +0101 [status, message]=system(['"' fullfile(ravenPath,'software','diamond',['diamond' binEnd]) '" makedb --in "' fastaFile{1} '" --db "' fullfile(tmpDB) '"']); +0102 if develMode +0103 diamondReport.dbHashes{numel(diamondReport.dbHashes)+1} = char(regexp(message,'[a-f0-9]{32}','match')); +0104 end +0105 if status~=0 +0106 EM=['DIAMOND makedb did not run successfully, error: ', num2str(status)]; +0107 dispEM(EM,true); +0108 end +0109 +0110 for i=1:numel(refFastaFiles) +0111 if ~hideVerbose +0112 fprintf(['Running DIAMOND blastp with "' modelIDs{i} '" against "' organismID{1} '"..\n']); +0113 end +0114 [status, ~]=system(['"' fullfile(ravenPath,'software','diamond',['diamond' binEnd]) '" blastp --query "' refFastaFiles{i} '" --out "' outFile '_' num2str(i) '" --db "' fullfile(tmpDB) '" --more-sensitive --outfmt 6 qseqid sseqid evalue pident length bitscore ppos --threads ' cores ]); +0115 if develMode +0116 diamondReport.diamondTxtOutput{numel(diamondReport.diamondTxtOutput)+1}=importdata([outFile '_' num2str(i)]); +0117 end +0118 if status~=0 +0119 EM=['DIAMOND blastp did not run successfully, error: ', num2str(status)]; +0120 dispEM(EM,true); +0121 end +0122 end +0123 delete([tmpDB filesep 'tmpDB*']); +0124 +0125 %Then create a database for each of the reference organisms and blast the +0126 %new organism against them +0127 for i=1:numel(refFastaFiles) +0128 if ~hideVerbose +0129 fprintf(['Running DIAMOND blastp with "' organismID{1} '" against "' modelIDs{i} '"..\n']); +0130 end +0131 [status, message]=system(['"' fullfile(ravenPath,'software','diamond',['diamond' binEnd]) '" makedb --in "' refFastaFiles{i} '" --db "' fullfile(tmpDB) '"']); +0132 if status~=0 +0133 EM=['DIAMOND makedb did not run successfully, error: ', num2str(status)]; +0134 dispEM(EM,true); +0135 end +0136 [status, ~]=system(['"' fullfile(ravenPath,'software','diamond',['diamond' binEnd]) '" blastp --query "' fastaFile{1} '" --out "' outFile '_r' num2str(i) '" --db "' fullfile(tmpDB) '" --more-sensitive --outfmt 6 qseqid sseqid evalue pident length bitscore ppos --threads ' cores]); +0137 if develMode +0138 diamondReport.dbHashes{numel(diamondReport.dbHashes)+1} = char(regexp(message,'[a-f0-9]{32}','match')); +0139 diamondReport.diamondTxtOutput{numel(diamondReport.diamondTxtOutput)+1}=importdata([outFile '_r' num2str(i)]); +0140 end +0141 if status~=0 +0142 EM=['DIAMOND blastp did not run successfully, error: ', num2str(status)]; +0143 dispEM(EM,true); +0144 end +0145 delete([tmpDB filesep 'tmpDB*']); +0146 end +0147 +0148 %Done with the DIAMOND blastp, do the parsing of the text files +0149 for i=1:numel(refFastaFiles)*2 +0150 tempStruct=[]; +0151 if i<=numel(refFastaFiles) +0152 tempStruct.fromId=modelIDs{i}; +0153 tempStruct.toId=organismID{1}; +0154 A=readtable([outFile '_' num2str(i)],'Delimiter','\t','Format','%s%s%f%f%f%f%f'); +0155 else +0156 tempStruct.fromId=organismID{1}; +0157 tempStruct.toId=modelIDs{i-numel(refFastaFiles)}; +0158 A=readtable([outFile '_r' num2str(i-numel(refFastaFiles))],'Delimiter','\t','Format','%s%s%f%f%f%f%f'); +0159 end +0160 tempStruct.fromGenes=A{:,1}; +0161 tempStruct.toGenes=A{:,2}; +0162 tempStruct.evalue=table2array(A(:,3)); +0163 tempStruct.identity=table2array(A(:,4)); +0164 tempStruct.aligLen=table2array(A(:,5)); +0165 tempStruct.bitscore=table2array(A(:,6)); +0166 tempStruct.ppos=table2array(A(:,7)); +0167 blastStructure=[blastStructure tempStruct]; +0168 end +0169 +0170 %Remove the old tempfiles +0171 delete([outFile '*']); +0172 %Remove the temp fasta files +0173 delete(files{:}) +0174 end

    Generated by m2html © 2005
    \ No newline at end of file diff --git a/doc/external/kegg/constructMultiFasta.html b/doc/external/kegg/constructMultiFasta.html index 980bfae6..0c9e4435 100644 --- a/doc/external/kegg/constructMultiFasta.html +++ b/doc/external/kegg/constructMultiFasta.html @@ -49,7 +49,7 @@

    CROSS-REFERENCE INFORMATION ^
 </ul>
 This function is called by:
 <ul style= -
  • getKEGGModelForOrganism getKEGGModelForOrganism
  • +
  • getKEGGModelForOrganism getKEGGModelForOrganism
  • SUBFUNCTIONS ^

    @@ -149,86 +149,82 @@

    SOURCE CODE ^end 0091 end 0092 end -0093 fprintf('COMPLETE\n'); +0093 fprintf('COMPLETE\n'); 0094 -0095 fprintf('Generating the KEGG Orthology specific multi-FASTA files... '); +0095 fprintf('Generating the KEGG Orthology specific multi-FASTA files... 0%% complete'); 0096 %Loop through the reactions and print the corresponding sequences -0097 progressFlag=0; -0098 for i=1:numel(model.rxns) -0099 -0100 %Do not overwrite existing files -0101 if ~exist(fullfile(outputDir,[model.rxns{i} '.fa']), 'file') -0102 -0103 %Get the positions in elementPositions for the involved genes -0104 genesUsed=model.rxnGeneMat(i,:); -0105 -0106 %Open a file for this reaction. This saves empty files for KOs -0107 %without genes -0108 rxnfid=fopen(fullfile(outputDir,[model.rxns{i} '.fa']),'w'); -0109 -0110 if any(genesUsed) -0111 positions=genePositions(genesUsed~=0); -0112 -0113 %It could be that some genes were not found. Delete those -0114 %elements -0115 positions(positions==0)=[]; -0116 -0117 %Print each sequence -0118 for j=1:numel(positions) -0119 fseek(fid,elementPositions(positions(j)),-1); -0120 %Should check that it ends with a gene!!!! Check for eof -0121 if positions(j)<numel(elementPositions) -0122 str=fread(fid,[1 double(elementPositions(positions(j)+1))-double(elementPositions(positions(j)))-1],'*char'); -0123 -0124 %If the string does not end with a line feed character -0125 if str(end)~=10 -0126 str=[str fread(fid,[1 double(elementPositions(positions(j)+2))-double(elementPositions(positions(j)+1))],'*char')]; -0127 -0128 %This is if we still have not found a new gene. -0129 %Maximal unluck. This whole check should be done -0130 %when elementPositions are calculated! -0131 if str(end)~=10 -0132 %Skip this gene -0133 continue; -0134 end -0135 end -0136 else -0137 str=fread(fid,[1 inf],'*char'); -0138 end -0139 fwrite(rxnfid,['>' str]); -0140 end -0141 end -0142 fclose(rxnfid); -0143 end -0144 %Print the progress: no need to update this for every -0145 %iteration, just report once 25%, 50% and 75% are done -0146 if progressFlag==0 && i>numel(model.rxns)*0.25 -0147 fprintf('%*.*f%% complete',5,2,(numel(listFiles(fullfile(outputDir,'*.fa')))/numel(model.rxns))*100); -0148 progressFlag=progressFlag+1; -0149 elseif (progressFlag==1 && i>=numel(model.rxns)*0.5) || (progressFlag==2 && i>=numel(model.rxns)*0.75) -0150 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b%*.*f%% complete',5,2,(numel(listFiles(fullfile(outputDir,'*.fa')))/numel(model.rxns))*100); -0151 progressFlag=progressFlag+1; -0152 end -0153 end -0154 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\b\b\bCOMPLETE\n'); +0097 for i=1:numel(model.rxns) +0098 +0099 %Do not overwrite existing files +0100 if ~exist(fullfile(outputDir,[model.rxns{i} '.fa']), 'file') +0101 +0102 %Get the positions in elementPositions for the involved genes +0103 genesUsed=model.rxnGeneMat(i,:); +0104 +0105 %Open a file for this reaction. This saves empty files for KOs +0106 %without genes +0107 rxnfid=fopen(fullfile(outputDir,[model.rxns{i} '.fa']),'w'); +0108 +0109 if any(genesUsed) +0110 positions=genePositions(genesUsed~=0); +0111 +0112 %It could be that some genes were not found. Delete those +0113 %elements +0114 positions(positions==0)=[]; +0115 +0116 %Print each sequence +0117 for j=1:numel(positions) +0118 fseek(fid,elementPositions(positions(j)),-1); +0119 %Should check that it ends with a gene!!!! Check for eof +0120 if positions(j)<numel(elementPositions) +0121 str=fread(fid,[1 double(elementPositions(positions(j)+1))-double(elementPositions(positions(j)))-1],'*char'); +0122 +0123 %If the string does not end with a line feed character +0124 if str(end)~=10 +0125 str=[str fread(fid,[1 double(elementPositions(positions(j)+2))-double(elementPositions(positions(j)+1))],'*char')]; +0126 +0127 %This is if we still have not found a new gene. +0128 %Maximal unluck. This whole check should be done +0129 %when elementPositions are calculated! +0130 if str(end)~=10 +0131 %Skip this gene +0132 continue; +0133 end +0134 end +0135 else +0136 str=fread(fid,[1 inf],'*char'); +0137 end +0138 fwrite(rxnfid,['>' str]); +0139 end +0140 end +0141 fclose(rxnfid); +0142 end +0143 %Print the progress +0144 if rem(i-1,50) == 0 +0145 progress=num2str(i/numel(model.rxns)); +0146 progress=pad(progress,3,'left'); +0147 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\b%s%% complete',progress); +0148 end +0149 end +0150 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\bCOMPLETE\n'); +0151 +0152 %Close the source file +0153 fclose(fid); +0154 end 0155 -0156 %Close the source file -0157 fclose(fid); -0158 end -0159 -0160 function files=listFiles(directory) -0161 %Supporter function to list the files in a directory and return them as a -0162 %cell array -0163 temp=dir(directory); -0164 files=cell(numel(temp),1); -0165 for i=1:numel(temp) -0166 files{i}=temp(i,1).name; -0167 end -0168 files=strrep(files,'.fa',''); -0169 files=strrep(files,'.hmm',''); -0170 files=strrep(files,'.out',''); -0171 files=strrep(files,'.faw',''); -0172 end

    +0156 function files=listFiles(directory) +0157 %Supporter function to list the files in a directory and return them as a +0158 %cell array +0159 temp=dir(directory); +0160 files=cell(numel(temp),1); +0161 for i=1:numel(temp) +0162 files{i}=temp(i,1).name; +0163 end +0164 files=strrep(files,'.fa',''); +0165 files=strrep(files,'.hmm',''); +0166 files=strrep(files,'.out',''); +0167 files=strrep(files,'.faw',''); +0168 end
    Generated by m2html © 2005
    \ No newline at end of file diff --git a/doc/external/kegg/getKEGGModelForOrganism.html b/doc/external/kegg/getKEGGModelForOrganism.html index db647501..f98436a4 100644 --- a/doc/external/kegg/getKEGGModelForOrganism.html +++ b/doc/external/kegg/getKEGGModelForOrganism.html @@ -24,7 +24,7 @@

    PURPOSE ^getKEGGModelForOrganism

    SYNOPSIS ^

    -
    function model=getKEGGModelForOrganism(organismID,fastaFile,dataDir,outDir,keepSpontaneous,keepUndefinedStoich,keepIncomplete,keepGeneral,cutOff,minScoreRatioKO,minScoreRatioG,maxPhylDist,nSequences,seqIdentity)
    +
    function model=getKEGGModelForOrganism(organismID,fastaFile,dataDir,outDir,keepSpontaneous,keepUndefinedStoich,keepIncomplete,keepGeneral,cutOff,minScoreRatioKO,minScoreRatioG,maxPhylDist,nSequences,seqIdentity,globalModel)

    DESCRIPTION ^

     getKEGGModelForOrganism
    @@ -67,7 +67,7 @@ 

    DESCRIPTION ^DESCRIPTION ^DESCRIPTION ^DESCRIPTION ^CROSS-REFERENCE INFORMATION ^

    This function calls: +
  • constructMultiFasta constructMultiFasta
  • getModelFromKEGG getModelFromKEGG
  • getPhylDist getPhylDist
  • getWSLpath getWSLpath
  • This function is called by: @@ -308,7 +292,7 @@

    SOURCE CODE ^
    0001 function model=getKEGGModelForOrganism(organismID,fastaFile,dataDir,...
     0002     outDir,keepSpontaneous,keepUndefinedStoich,keepIncomplete,...
     0003     keepGeneral,cutOff,minScoreRatioKO,minScoreRatioG,maxPhylDist,...
    -0004     nSequences,seqIdentity)
    +0004     nSequences,seqIdentity,globalModel)
     0005 % getKEGGModelForOrganism
     0006 %   Reconstructs a genome-scale metabolic model based on protein homology
     0007 %   to the orthologies in KEGG. If the target species is not available in
    @@ -349,7 +333,7 @@ 

    SOURCE CODE ^% the HMMs were trained on pro- or eukaryotic 0043 % sequences, using a sequence similarity threshold of 0044 % XXX %, fitting the KEGG version YY. E.g. -0045 % euk100_kegg82. (opt, see note about fastaFile. Note +0045 % euk90_kegg100. (opt, see note about fastaFile. Note 0046 % that in order to rebuild the KEGG model from a 0047 % database dump, as opposed to using the version 0048 % supplied with RAVEN, you would still need to supply @@ -406,1031 +390,953 @@

    SOURCE CODE ^% running CD-HIT (opt, default inf) 0100 % seqIdentity sequence identity threshold in CD-HIT, referred as 0101 % "global sequence identity" in CD-HIT User's Guide. -0102 % The only possible options are 1 (100 %), 0.9 (90 %) -0103 % and 0.5 (50 %). If other values are provided, -0104 % CD-HIT is skipped (opt, default -1, i.e. CD-HIT is -0105 % skipped) -0106 % -0107 % Output: -0108 % model the reconstructed model -0109 % -0110 % PLEASE READ THIS: The input to this function can be confusing, because -0111 % it is intended to be run in parallel on a cluster or in multiple -0112 % sessions. It therefore saves a lot of intermediate results to storage. -0113 % This also serves the purpose of not having to do redundant -0114 % calculations. This, however, comes with the disadvantage of somewhat -0115 % trickier handling. This is what this function does: -0116 % -0117 % 1a. Loads files from a local KEGG FTP dump and constructs a general -0118 % RAVEN model representing the metabolic network. The functions -0119 % getRxnsFromKEGG, getGenesFromKEGG, getMetsFromKEGG summarise the -0120 % data into 'keggRxns.mat', 'keggGenes.mat' and 'keggMets.mat' files, -0121 % which are later merged into 'keggModel.mat' by getModelFromKEGG -0122 % function. The function getPhylDist generates 'keggPhylDist.mat' -0123 % file. KEGG FTP access requires a <a href="matlab: -0124 % web('http://www.bioinformatics.jp/en/keggftp.html')">license</a>. -0125 % 1b. Generates protein FASTA files from the KEGG FTP dump (see 1a). One -0126 % multi-FASTA file for each KO in KEGG is generated. -0127 % -0128 % The Step 1 has to be re-done every time KEGG updates their database (or -0129 % rather when the updates are large enough to warrant re-running this -0130 % part). Many users would probably never use this feature. -0131 % -0132 % 2a. Filters KO-specific protein sets. This is done by using the -0133 % settings "maxPhylDist" and "nSequences" to control which sequences -0134 % should be used for constructing Hidden Markov models (HMMs), and -0135 % later for matching your sequences to. -0136 % The most common alternatives here would be to use sequences from -0137 % only eukaryotes, only prokaryotes or all sequences in KEGG. As -0138 % explained in the README.md file, various sets of pre-trained hidden -0139 % Markov models are available at <a href="matlab: -0140 % web('http://biomet-toolbox.chalmers.se/index.php?page=downtools-raven')">BioMet -0141 % Toolbox</a>. This is normally the most convenient way, but if you -0142 % would like to use, for example, only fungal sequences for training -0143 % the HMMs then you need to re-run this step. -0144 % 2b. KO-specific protein FASTA files are re-organised into -0145 % non-redundant protein sets with CD-HIT. The user can only set -0146 % seqIdentity parameter, which corresponds to '-c' parameter in -0147 % CD-HIT, described as "sequence identity threshold". The following -0148 % non-default parameter settings are used depending on seqIdentity -0149 % value: -0150 % __________________________________________________________________ -0151 % | | seqIdentity value | -0152 % | -------------------------------------- -0153 % | | 1.0 | 0.9 | 0.5 | x | -0154 % | CD-HIT parameters -------------------------------------| -0155 % -----------------------------------------------------------------| -0156 % | Input Dataset (-i) | raw | cdh100 | cdh90 | raw | -0157 % | Output Dataset (-o) | cdh100 | cdh90 | cdh50 | cdhOth | -0158 % | Sequence identity (-c) | 1.0 | 0.9 | 0.5 | x | -0159 % | word_length (-n) | 5 | 5 | 4 | 2-5* | -0160 % | Max available memory (-M) | 2000 | -0161 % ------------------------------------------------------------------ -0162 % * - word length depends from sequence identity value (see CD-HIT -0163 % manual for more details) -0164 % -0165 % The table reads as follows: if seqIdentity is equal to 1, then -0166 % "cdh100" set is produced from raw set of proteins. If seqIdentity -0167 % is equal to 0.9, then "cdh90" is produced from "cdh100" proteins -0168 % set. When seqIdentity is equal to 0.5, "cdh50" is obtained from -0169 % "cdh90" protein set. Finally, if other seqIdentity value is used, -0170 % it is obtained directly from the raw set of proteins. -0171 % 2c. Does a multi sequence alignment for multi-FASTA files obtained in -0172 % Step 2b for future use. MAFFT software with automatic selection of -0173 % alignment algorithm is used in this step ('--auto'). -0174 % 2d. Trains hidden Markov models using HMMer for each of the aligned -0175 % KO-specific FASTA files obtained in Step 2c. This is performed with -0176 % 'hmmbuild' using the default settings. -0177 % -0178 % Step 2 may be reasonable to be re-done if the user wants to tweak the -0179 % settings in proteins filtering, clustering, multi sequence alignment or -0180 % HMMs training steps. However, it requires to have KO-specific protein -0181 % FASTA files obtained in Step 1a. As such files are not provided in -0182 % RAVEN and BioMet ToolBox, the user can only generate these files from -0183 % KEGG FTP dump files, so KEGG FTP license is needed. -0184 % -0185 % 3a. Queries the HMMs with sequences for the organism you are making a -0186 % model for. This step uses both the output from step 1a and from 2d. -0187 % This is done with 'hmmsearch' function under default settings. The -0188 % significance threshold value set in 'cutOff' parameter is used -0189 % later when parsing '*.out' files to filter out KO hits with higher -0190 % value than 'cutOff' value. The results with passable E values are -0191 % summarised into KO-gene occurence matrix with E values in -0192 % intersections as 'koGeneMat'. The parameters 'minScoreRatioG' and -0193 % 'minScoreRatioKO' are then applied to 'prune' KO-gene associations -0194 % (see the function descriptions above for more details). The -0195 % intersection values for these 'prunable' associations are converted -0196 % to zeroes. -0197 % 3b. Constructs a model based on the pre-processed KO-gene association -0198 % matrix (koGeneMat). As the full KEGG model already has reaction-KO -0199 % relationships, KOs are converted into the query genes. The final -0200 % draft model contains only these reactions, which are associated -0201 % with KOs from koGeneMat. The reactions without the genes may also -0202 % be included, if the user set keepSpontaneous as 'true'. -0203 % -0204 % The Step 3 is specific to the organism for which the model is -0205 % reconstructed. -0206 % -0207 % In principle the function looks at which output that is already available -0208 % and runs only the parts that are required for step 3. This means -0209 % that (see the definition of the parameters for details): -0210 % -1a is only performed if there are no KEGG model files in the -0211 % RAVEN\external\kegg directory -0212 % -1b is only performed if not all required HMMs OR aligned FASTA files -0213 % OR multi-FASTA files exist in the defined dataDir. This means that this -0214 % step is skipped if the HMMs are downloaded from BioMet Toolbox instead -0215 % (see above). If not all files exist it will try to find -0216 % the KEGG database files in dataDir. -0217 % -2a is only performed if not all required HMMs OR aligned FASTA files -0218 % files exist in the defined dataDir. This means that this step is skipped -0219 % if the HMMs are downloaded from BioMet Toolbox instead (see above). -0220 % -2b is only performed if not all required HMMs exist in the defined -0221 % dataDir. This means that this step is skipped if the FASTA files or -0222 % HMMs are downloaded from BioMet Toolbox instead (see above). -0223 % -3a is performed for the required HMMs for which no corresponding .out -0224 % file exists in outDir. This is just a way to enable the function to be -0225 % run in parallel or to resume if interrupted. -0226 % -3b is always performed. -0227 % -0228 % These steps are specific to the organism for which you are -0229 % reconstructing the model. -0230 % -0231 % Regarding the whole pipeline, the function checks the output that is -0232 % already available and runs only the parts that are required for step 3. -0233 % This means that (see the definition of the parameters for details): -0234 % -1a is only performed if there are no KEGG model files in the -0235 % RAVEN\external\kegg directory. -0236 % -1b is only performed if any of required KOs do not have HMMs, aligned -0237 % FASTA files, clustered FASTA files and raw FASTA files in the defined -0238 % dataDir. This means that this step is skipped if the HMMs are -0239 % downloaded from BioMet Toolbox instead (see above). If not all files -0240 % exist it will try to find the KEGG database files in dataDir. -0241 % -2ab are only performed if any of required KOs do not have HMMs, -0242 % aligned FASTA files and clustered FASTA files in the defined dataDir. -0243 % This means that this step is skipped if the HMMs are downloaded from -0244 % BioMet Toolbox instead (see above). -0245 % -2c is only performed if any of required KOs do not have HMMs and -0246 % aligned FASTA files in the defined dataDir. This means that this step -0247 % is skipped if the HMMs are downloaded from BioMet Toolbox instead (see -0248 % above). -0249 % -2d is only performed if any of required KOs do not have HMMs exist in -0250 % the defined dataDir. This means that this step is skipped if the FASTA -0251 % files or HMMs are downloaded from BioMet Toolbox instead (see above). -0252 % -3a is performed for the required HMMs for which no corresponding .out -0253 % file exists in outDir. This is just a way to enable the function to be -0254 % run in parallel or to resume if interrupted. -0255 % -3b is always performed. -0256 % -0257 % NOTE: it is also possible to obtain draft model from KEGG without -0258 % providing protein FASTA file for the target organism. In such case the -0259 % organism three-four letter abbreviation set as 'organismID' must exist -0260 % in the local KEGG database. In such case, the program just fetches all -0261 % the reactions, which are associated with given 'organismID'. -0262 % -0263 % Usage: model=getKEGGModelForOrganism(organismID,fastaFile,dataDir,... -0264 % outDir,keepSpontaneous,keepUndefinedStoich,keepIncomplete,... -0265 % keepGeneral,cutOff,minScoreRatioKO,minScoreRatioG,maxPhylDist,... -0266 % nSequences,seqIdentity) -0267 -0268 if nargin<2 -0269 fastaFile=[]; +0102 % If -1 is provided, CD-HIT is skipped (opt, default 0.9) +0103 % globalModel structure containing both model and KOModel +0104 % structures as generated by getModelFromKEGG. These +0105 % will otherwise be loaded by via getModelFromKEGG. +0106 % Providing globalKEGGmodel can speed up model +0107 % generation if getKEGGModelForOrganism is run +0108 % multiple times for different strains. Example: +0109 % [globalModel.model,globalModel.KOModel] = getModelFromKEGG; +0110 % (opt, default empty, global model is loaded by +0111 % getModelFromKEGG) +0112 % +0113 % Output: +0114 % model the reconstructed model +0115 % +0116 % PLEASE READ THIS: The input to this function can be confusing, because +0117 % it is intended to be run in parallel on a cluster or in multiple +0118 % sessions. It therefore saves a lot of intermediate results to storage. +0119 % This also serves the purpose of not having to do redundant +0120 % calculations. This, however, comes with the disadvantage of somewhat +0121 % trickier handling. This is what this function does: +0122 % +0123 % 1a. Loads files from a local KEGG FTP dump and constructs a general +0124 % RAVEN model representing the metabolic network. The functions +0125 % getRxnsFromKEGG, getGenesFromKEGG, getMetsFromKEGG summarise the +0126 % data into 'keggRxns.mat', 'keggGenes.mat' and 'keggMets.mat' files, +0127 % which are later merged into 'keggModel.mat' by getModelFromKEGG +0128 % function. The function getPhylDist generates 'keggPhylDist.mat' +0129 % file. KEGG FTP access requires a <a href="matlab: +0130 % web('http://www.bioinformatics.jp/en/keggftp.html')">license</a>. +0131 % 1b. Generates protein FASTA files from the KEGG FTP dump (see 1a). One +0132 % multi-FASTA file for each KO in KEGG is generated. +0133 % +0134 % The Step 1 has to be re-done every time KEGG updates their database (or +0135 % rather when the updates are large enough to warrant re-running this +0136 % part). Many users would probably never use this feature. +0137 % +0138 % 2a. Filters KO-specific protein sets. This is done by using the +0139 % settings "maxPhylDist" and "nSequences" to control which sequences +0140 % should be used for constructing Hidden Markov models (HMMs), and +0141 % later for matching your sequences to. +0142 % The most common alternatives here would be to use sequences from +0143 % only eukaryotes, only prokaryotes or all sequences in KEGG. As +0144 % explained in the README.md file, various sets of pre-trained hidden +0145 % Markov models are available at <a href="matlab: +0146 % web('http://biomet-toolbox.chalmers.se/index.php?page=downtools-raven')">BioMet +0147 % Toolbox</a>. This is normally the most convenient way, but if you +0148 % would like to use, for example, only fungal sequences for training +0149 % the HMMs then you need to re-run this step. +0150 % 2b. KO-specific protein FASTA files are re-organised into +0151 % non-redundant protein sets with CD-HIT. The user can only set +0152 % seqIdentity parameter, which corresponds to '-c' parameter in +0153 % CD-HIT, described as "sequence identity threshold". CD-HIT suggsted +0154 % sequence identity specific word_length (-n) parameters are used. +0155 % 2c. Does a multi sequence alignment for multi-FASTA files obtained in +0156 % Step 2b for future use. MAFFT software with automatic selection of +0157 % alignment algorithm is used in this step ('--auto'). +0158 % 2d. Trains hidden Markov models using HMMer for each of the aligned +0159 % KO-specific FASTA files obtained in Step 2c. This is performed with +0160 % 'hmmbuild' using the default settings. +0161 % +0162 % Step 2 may be reasonable to be re-done if the user wants to tweak the +0163 % settings in proteins filtering, clustering, multi sequence alignment or +0164 % HMMs training steps. However, it requires to have KO-specific protein +0165 % FASTA files obtained in Step 1a. As such files are not provided in +0166 % RAVEN and BioMet ToolBox, the user can only generate these files from +0167 % KEGG FTP dump files, so KEGG FTP license is needed. +0168 % +0169 % 3a. Queries the HMMs with sequences for the organism you are making a +0170 % model for. This step uses both the output from step 1a and from 2d. +0171 % This is done with 'hmmsearch' function under default settings. The +0172 % significance threshold value set in 'cutOff' parameter is used +0173 % later when parsing '*.out' files to filter out KO hits with higher +0174 % value than 'cutOff' value. The results with passable E values are +0175 % summarised into KO-gene occurence matrix with E values in +0176 % intersections as 'koGeneMat'. The parameters 'minScoreRatioG' and +0177 % 'minScoreRatioKO' are then applied to 'prune' KO-gene associations +0178 % (see the function descriptions above for more details). The +0179 % intersection values for these 'prunable' associations are converted +0180 % to zeroes. +0181 % 3b. Constructs a model based on the pre-processed KO-gene association +0182 % matrix (koGeneMat). As the full KEGG model already has reaction-KO +0183 % relationships, KOs are converted into the query genes. The final +0184 % draft model contains only these reactions, which are associated +0185 % with KOs from koGeneMat. The reactions without the genes may also +0186 % be included, if the user set keepSpontaneous as 'true'. +0187 % +0188 % The Step 3 is specific to the organism for which the model is +0189 % reconstructed. +0190 % +0191 % In principle the function looks at which output that is already available +0192 % and runs only the parts that are required for step 3. This means +0193 % that (see the definition of the parameters for details): +0194 % -1a is only performed if there are no KEGG model files in the +0195 % RAVEN\external\kegg directory +0196 % -1b is only performed if not all required HMMs OR aligned FASTA files +0197 % OR multi-FASTA files exist in the defined dataDir. This means that this +0198 % step is skipped if the HMMs are downloaded from BioMet Toolbox instead +0199 % (see above). If not all files exist it will try to find +0200 % the KEGG database files in dataDir. +0201 % -2a is only performed if not all required HMMs OR aligned FASTA files +0202 % files exist in the defined dataDir. This means that this step is skipped +0203 % if the HMMs are downloaded from BioMet Toolbox instead (see above). +0204 % -2b is only performed if not all required HMMs exist in the defined +0205 % dataDir. This means that this step is skipped if the FASTA files or +0206 % HMMs are downloaded from BioMet Toolbox instead (see above). +0207 % -3a is performed for the required HMMs for which no corresponding .out +0208 % file exists in outDir. This is just a way to enable the function to be +0209 % run in parallel or to resume if interrupted. +0210 % -3b is always performed. +0211 % +0212 % These steps are specific to the organism for which you are +0213 % reconstructing the model. +0214 % +0215 % Regarding the whole pipeline, the function checks the output that is +0216 % already available and runs only the parts that are required for step 3. +0217 % This means that (see the definition of the parameters for details): +0218 % -1a is only performed if there are no KEGG model files in the +0219 % RAVEN\external\kegg directory. +0220 % -1b is only performed if any of required KOs do not have HMMs, aligned +0221 % FASTA files, clustered FASTA files and raw FASTA files in the defined +0222 % dataDir. This means that this step is skipped if the HMMs are +0223 % downloaded from BioMet Toolbox instead (see above). If not all files +0224 % exist it will try to find the KEGG database files in dataDir. +0225 % -2ab are only performed if any of required KOs do not have HMMs, +0226 % aligned FASTA files and clustered FASTA files in the defined dataDir. +0227 % This means that this step is skipped if the HMMs are downloaded from +0228 % BioMet Toolbox instead (see above). +0229 % -2c is only performed if any of required KOs do not have HMMs and +0230 % aligned FASTA files in the defined dataDir. This means that this step +0231 % is skipped if the HMMs are downloaded from BioMet Toolbox instead (see +0232 % above). +0233 % -2d is only performed if any of required KOs do not have HMMs exist in +0234 % the defined dataDir. This means that this step is skipped if the FASTA +0235 % files or HMMs are downloaded from BioMet Toolbox instead (see above). +0236 % -3a is performed for the required HMMs for which no corresponding .out +0237 % file exists in outDir. This is just a way to enable the function to be +0238 % run in parallel or to resume if interrupted. +0239 % -3b is always performed. +0240 % +0241 % NOTE: it is also possible to obtain draft model from KEGG without +0242 % providing protein FASTA file for the target organism. In such case the +0243 % organism three-four letter abbreviation set as 'organismID' must exist +0244 % in the local KEGG database. In such case, the program just fetches all +0245 % the reactions, which are associated with given 'organismID'. +0246 % +0247 % Usage: model=getKEGGModelForOrganism(organismID,fastaFile,dataDir,... +0248 % outDir,keepSpontaneous,keepUndefinedStoich,keepIncomplete,... +0249 % keepGeneral,cutOff,minScoreRatioKO,minScoreRatioG,maxPhylDist,... +0250 % nSequences,seqIdentity) +0251 +0252 if nargin<2 +0253 fastaFile=[]; +0254 end +0255 if nargin<3 +0256 dataDir=[]; +0257 end +0258 if nargin<4 +0259 outDir=[]; +0260 end +0261 if isempty(outDir) +0262 outDir=tempdir; +0263 %Delete all *.out files if any exist +0264 delete(fullfile(outDir,'*.out')); +0265 elseif ~isstr(outDir) +0266 error('outDir should be provided as string'); +0267 end +0268 if nargin<5 +0269 keepSpontaneous=true; 0270 end -0271 if nargin<3 -0272 dataDir=[]; +0271 if nargin<6 +0272 keepUndefinedStoich=true; 0273 end -0274 if nargin<4 -0275 outDir=[]; +0274 if nargin<7 +0275 keepIncomplete=true; 0276 end -0277 if isempty(outDir) -0278 outDir=tempdir; -0279 %Delete all *.out files if any exist -0280 delete(fullfile(outDir,'*.out')); -0281 elseif ~isstr(outDir) -0282 error('outDir should be provided as string'); -0283 end -0284 if nargin<5 -0285 keepSpontaneous=true; -0286 end -0287 if nargin<6 -0288 keepUndefinedStoich=true; -0289 end -0290 if nargin<7 -0291 keepIncomplete=true; +0277 if nargin<8 +0278 keepGeneral=false; +0279 end +0280 if nargin<9 +0281 cutOff=10^-50; +0282 end +0283 if nargin<10 +0284 minScoreRatioKO=0.3; +0285 end +0286 if nargin<11 +0287 minScoreRatioG=0.8; +0288 end +0289 if nargin<12 +0290 maxPhylDist=inf; +0291 %Include all sequences for each reaction 0292 end -0293 if nargin<8 -0294 keepGeneral=false; -0295 end -0296 if nargin<9 -0297 cutOff=10^-50; -0298 end -0299 if nargin<10 -0300 minScoreRatioKO=0.3; -0301 end -0302 if nargin<11 -0303 minScoreRatioG=0.8; -0304 end -0305 if nargin<12 -0306 maxPhylDist=inf; -0307 %Include all sequences for each reaction -0308 end -0309 if nargin<13 -0310 nSequences=inf; -0311 %Include all sequences for each reaction -0312 end -0313 if nargin<14 -0314 seqIdentity=-1; -0315 %CD-HIT is not used in the pipeline -0316 end -0317 -0318 if isempty(fastaFile) -0319 fprintf(['\n\n*** The model reconstruction from KEGG based on the annotation available for KEGG Species <strong>' organismID '</strong> ***\n\n']); -0320 else -0321 fprintf('\n\n*** The model reconstruction from KEGG based on the protein homology search against KEGG Orthology specific HMMs ***\n\n'); -0322 %Check if query fasta exists -0323 fastaFile=checkFileExistence(fastaFile,true,false); -0324 end -0325 -0326 %Run the external binaries multi-threaded to use all logical cores assigned -0327 %to MATLAB -0328 cores = evalc('feature(''numcores'')'); -0329 cores = strsplit(cores, 'MATLAB was assigned: '); -0330 cores = regexp(cores{2},'^\d*','match'); -0331 cores = cores{1}; -0332 -0333 %Get the directory for RAVEN Toolbox. This is to get the path to the third -0334 %party software used -0335 [ST, I]=dbstack('-completenames'); -0336 ravenPath=fileparts(fileparts(fileparts(ST(I).file))); -0337 -0338 %Checking if dataDir is consistent. It must point to pre-trained HMMs set, -0339 %compatible with the the current RAVEN version. The user may have the -0340 %required zip file already in working directory or have it extracted. If -0341 %the zip file and directory is not here, it is downloaded from the cloud -0342 if ~isempty(dataDir) -0343 hmmOptions={'euk100_kegg94', ... -0344 'euk90_kegg94', ... -0345 'euk50_kegg94', ... -0346 'prok100_kegg94', ... -0347 'prok90_kegg94', ... -0348 'prok50_kegg94'}; -0349 hmmLinks={'wbnghgtpgftb6pcw572bhkl9a8ekh32d', ... -0350 '754bdz0965261fktzlwc77rcv7me87i6', ... -0351 '5xwgv17cn099xn7bxo2dq5h1dsdxrhn7', ... -0352 'azpn5lwrb4gind2mn5hnbmux0lao5vt5', ... -0353 'j19ybilr7js34uisnss92gvq5g6lljkk', ... -0354 'b5vn631jrwdzcj4uwmvbshe2ws3zoalm'}; -0355 if all(cellfun(@isempty,regexp(dataDir,strcat(hmmOptions,'$')))) %Check if dataDir ends with any of the hmmOptions -0356 if ~exist(fullfile(dataDir,'keggdb','genes.pep'),'file') && ... -0357 ~exist(fullfile(dataDir,'fasta'),'dir') && ... -0358 ~exist(fullfile(dataDir,'aligned'),'dir') && ... -0359 ~exist(fullfile(dataDir,'hmms'),'dir') -0360 EM='Pre-trained HMMs set is not recognised. It should match any of the following sets:'; -0361 disp(EM); -0362 disp(hmmOptions); -0363 error('Fatal error occured. See the details above'); -0364 end -0365 else -0366 if exist(dataDir,'dir') && exist(fullfile(dataDir,'hmms','K00844.hmm'),'file') -0367 fprintf(['NOTE: Found <strong>' dataDir '</strong> directory with pre-trained HMMs, it will therefore be used during reconstruction\n']); -0368 elseif ~exist(dataDir,'dir') && exist([dataDir,'.zip'],'file') -0369 fprintf('Extracting the HMMs archive file... '); -0370 unzip([dataDir,'.zip']); -0371 fprintf('COMPLETE\n'); -0372 else -0373 hmmIndex=regexp(dataDir,hmmOptions); -0374 hmmIndex=~cellfun(@isempty,hmmIndex); -0375 fprintf('Downloading the HMMs archive file... '); -0376 try -0377 websave([dataDir,'.zip'],['https://chalmersuniversity.box.com/shared/static/',hmmLinks{hmmIndex},'.zip']); -0378 catch ME -0379 if strcmp(ME.identifier,'MATLAB:webservices:HTTP404StatusCodeError') -0380 error('Failed to download the HMMs archive file, the server returned a 404 error, try again later. If the problem persists please report it on the RAVEN GitHub Issues page: https://github.com/SysBioChalmers/RAVEN/issues') -0381 end -0382 end -0383 -0384 fprintf('COMPLETE\n'); -0385 fprintf('Extracting the HMMs archive file... '); -0386 unzip([dataDir,'.zip']); -0387 fprintf('COMPLETE\n'); -0388 end -0389 %Check if HMMs are extracted -0390 if ~exist(fullfile(dataDir,'hmms','K00844.hmm'),'file') -0391 EM=['The HMM files seem improperly extracted and not found in ',dataDir,'/hmms. Please remove ',dataDir,' folder and rerun getKEGGModelForOrganism']; -0392 disp(EM); -0393 error('Fatal error occured. See the details above'); -0394 end -0395 end -0396 end -0397 -0398 %Check if the fasta-file contains '/' or'\'. If not then it's probably just -0399 %a file name. Expand to full path. -0400 if any(fastaFile) -0401 if ~any(strfind(fastaFile,'\')) && ~any(strfind(fastaFile,'/')) -0402 fastaFile=which(fastaFile); -0403 end -0404 %Create the required sub-folders in dataDir if they dont exist -0405 if ~exist(fullfile(dataDir,'keggdb'),'dir') -0406 mkdir(dataDir,'keggdb'); -0407 end -0408 if ~exist(fullfile(dataDir,'fasta'),'dir') -0409 mkdir(dataDir,'fasta'); -0410 end -0411 if ~exist(fullfile(dataDir,'aligned'),'dir') -0412 mkdir(dataDir,'aligned'); -0413 end -0414 if ~exist(fullfile(dataDir,'hmms'),'dir') -0415 mkdir(dataDir,'hmms'); +0293 if nargin<13 +0294 nSequences=inf; +0295 %Include all sequences for each reaction +0296 end +0297 if nargin<14 +0298 seqIdentity=0.9; +0299 end +0300 +0301 if isempty(fastaFile) +0302 fprintf(['\n\n*** The model reconstruction from KEGG based on the annotation available for KEGG Species <strong>' organismID '</strong> ***\n\n']); +0303 else +0304 fprintf('\n\n*** The model reconstruction from KEGG based on the protein homology search against KEGG Orthology specific HMMs ***\n\n'); +0305 %Check if query fasta exists +0306 fastaFile=checkFileExistence(fastaFile,2); %Copy file to temp dir +0307 end +0308 +0309 %Run the external binaries multi-threaded to use all logical cores assigned +0310 %to MATLAB +0311 cores = evalc('feature(''numcores'')'); +0312 cores = strsplit(cores, 'MATLAB was assigned: '); +0313 cores = regexp(cores{2},'^\d*','match'); +0314 cores = cores{1}; +0315 +0316 %Get the directory for RAVEN Toolbox. This is to get the path to the third +0317 %party software used +0318 [ST, I]=dbstack('-completenames'); +0319 ravenPath=fileparts(fileparts(fileparts(ST(I).file))); +0320 +0321 %Checking if dataDir is consistent. It must point to pre-trained HMMs set, +0322 %compatible with the the current RAVEN version. The user may have the +0323 %required zip file already in working directory or have it extracted. If +0324 %the zip file and directory is not here, it is downloaded from the cloud +0325 if ~isempty(dataDir) +0326 hmmOptions={'euk90_kegg100','prok90_kegg100'}; +0327 if ~endsWith(dataDir,hmmOptions) %Check if dataDir ends with any of the hmmOptions. +0328 %If not, then check whether the required folders exist anyway. +0329 if ~exist(fullfile(dataDir,'keggdb','genes.pep'),'file') && ... +0330 ~exist(fullfile(dataDir,'fasta'),'dir') && ... +0331 ~exist(fullfile(dataDir,'aligned'),'dir') && ... +0332 ~exist(fullfile(dataDir,'hmms'),'dir') +0333 error(['Pre-trained HMMs set is not recognised. If you want download RAVEN provided sets, it should match any of the following: ' strjoin(hmmOptions,' or ')]) +0334 end +0335 else +0336 if exist(dataDir,'dir') && exist(fullfile(dataDir,'hmms','K00844.hmm'),'file') +0337 fprintf(['NOTE: Found <strong>' dataDir '</strong> directory with pre-trained HMMs, it will therefore be used during reconstruction\n']); +0338 elseif ~exist(dataDir,'dir') && exist([dataDir,'.zip'],'file') +0339 fprintf('Extracting the HMMs archive file... '); +0340 unzip([dataDir,'.zip']); +0341 fprintf('COMPLETE\n'); +0342 else +0343 hmmIndex=strcmp(dataDir,hmmOptions); +0344 if ~any(hmmIndex) +0345 error(['Pre-trained HMMs are only provided with proteins clustered at 90% sequence identity (i.e. prok90_kegg100 and euk90_kegg100). ' ... +0346 'Use either of these datasets, or otherwise download the relevant sequence data from KEGG to train HMMs with your desired sequence identity']) +0347 else +0348 fprintf('Downloading the HMMs archive file... '); +0349 try +0350 websave([dataDir,'.zip'],['https://github.com/SysBioChalmers/RAVEN/releases/download/v2.6.0/',hmmOptions{hmmIndex},'.zip']); +0351 catch ME +0352 if strcmp(ME.identifier,'MATLAB:webservices:HTTP404StatusCodeError') +0353 error('Failed to download the HMMs archive file, the server returned a 404 error, try again later. If the problem persists please report it on the RAVEN GitHub Issues page: https://github.com/SysBioChalmers/RAVEN/issues') +0354 end +0355 end +0356 end +0357 +0358 fprintf('COMPLETE\n'); +0359 fprintf('Extracting the HMMs archive file... '); +0360 unzip([dataDir,'.zip']); +0361 fprintf('COMPLETE\n'); +0362 end +0363 %Check if HMMs are extracted +0364 if ~exist(fullfile(dataDir,'hmms','K00844.hmm'),'file') +0365 error(['The HMM files seem improperly extracted and not found in ',dataDir,'/hmms. Please remove ',dataDir,' folder and rerun getKEGGModelForOrganism']); +0366 end +0367 end +0368 end +0369 +0370 %Check if the fasta-file contains '/' or'\'. If not then it's probably just +0371 %a file name. Expand to full path. +0372 if any(fastaFile) +0373 if ~any(strfind(fastaFile,'\')) && ~any(strfind(fastaFile,'/')) +0374 fastaFile=which(fastaFile); +0375 end +0376 %Create the required sub-folders in dataDir if they dont exist +0377 if ~exist(fullfile(dataDir,'keggdb'),'dir') +0378 mkdir(dataDir,'keggdb'); +0379 end +0380 if ~exist(fullfile(dataDir,'fasta'),'dir') +0381 mkdir(dataDir,'fasta'); +0382 end +0383 if ~exist(fullfile(dataDir,'aligned'),'dir') +0384 mkdir(dataDir,'aligned'); +0385 end +0386 if ~exist(fullfile(dataDir,'hmms'),'dir') +0387 mkdir(dataDir,'hmms'); +0388 end +0389 if ~exist(outDir,'dir') +0390 mkdir(outDir); +0391 end +0392 end +0393 +0394 %First generate the full global KEGG model. Can be provided as input. +0395 %Otherwise, getModelFromKEGG is run. The dataDir must not be supplied as +0396 %there is also an internal RAVEN version available +0397 if nargin==15 +0398 model=globalModel.model; +0399 KOModel=globalModel.KOModel; +0400 elseif any(dataDir) +0401 [model, KOModel]=getModelFromKEGG(fullfile(dataDir,'keggdb'),keepSpontaneous,keepUndefinedStoich,keepIncomplete,keepGeneral); +0402 else +0403 [model, KOModel]=getModelFromKEGG([],keepSpontaneous,keepUndefinedStoich,keepIncomplete,keepGeneral); +0404 end +0405 model.id=organismID; +0406 model.c=zeros(numel(model.rxns),1); +0407 +0408 %If no FASTA file is supplied, then just remove all genes which are not for +0409 %the given organism ID +0410 if isempty(fastaFile) +0411 %Check if organismID can be found in KEGG species list or is +0412 %set to "eukaryotes" or "prokaryotes" +0413 phylDistsFull=getPhylDist(fullfile(dataDir,'keggdb'),true); +0414 if ~ismember(organismID,[phylDistsFull.ids 'eukaryotes' 'prokaryotes']) +0415 error('Provided organismID is incorrect. Only species abbreviations from KEGG Species List or "eukaryotes"/"prokaryotes" are allowed.'); 0416 end -0417 if ~exist(outDir,'dir') -0418 mkdir(outDir); -0419 end -0420 end -0421 -0422 %First generate the full KEGG model. The dataDir must not be supplied as -0423 %there is also an internal RAVEN version available -0424 if any(dataDir) -0425 [model, KOModel]=getModelFromKEGG(fullfile(dataDir,'keggdb'),keepSpontaneous,keepUndefinedStoich,keepIncomplete,keepGeneral); -0426 else -0427 [model, KOModel]=getModelFromKEGG([],keepSpontaneous,keepUndefinedStoich,keepIncomplete,keepGeneral); -0428 end -0429 model.id=organismID; -0430 model.c=zeros(numel(model.rxns),1); -0431 -0432 %If no FASTA file is supplied, then just remove all genes which are not for -0433 %the given organism ID -0434 if isempty(fastaFile) -0435 fprintf(['Pruning the model from <strong>non-' organismID '</strong> genes... ']); -0436 if ismember(organismID,{'eukaryotes','prokaryotes'}) -0437 phylDists=getPhylDist(fullfile(dataDir,'keggdb'),maxPhylDist==-1); -0438 if strcmp(organismID,'eukaryotes') -0439 proxyid='hsa'; -0440 %Use H. sapiens here -0441 else -0442 proxyid='eco'; -0443 %Use E. coli here -0444 end -0445 [~, phylDistId]=ismember(proxyid,phylDists.ids); -0446 idsToKeep=phylDists.ids(~isinf(phylDists.distMat(phylDistId,:))); -0447 taxIDs=cellfun(@(x) x{1},cellfun(@(x) strsplit(x,':'),model.genes,'UniformOutput',false),'UniformOutput',false); -0448 I=ismember(upper(taxIDs),upper(idsToKeep)); -0449 else -0450 %KEGG organism IDs may have three or four letters -0451 organismID=strcat(organismID,':'); -0452 %Add colon for accurate matching -0453 if length(organismID)==4 -0454 I=cellfun(@(x) strcmpi(x(1:4),organismID),model.genes); -0455 elseif length(organismID)==5 -0456 I=cellfun(@(x) strcmpi(x(1:5),organismID),model.genes); -0457 end -0458 end -0459 %Remove those genes -0460 model.genes=model.genes(I); -0461 model.rxnGeneMat=model.rxnGeneMat(:,I); -0462 fprintf('COMPLETE\n'); -0463 end -0464 -0465 %First remove all reactions without genes -0466 if keepSpontaneous==true -0467 fprintf('Removing non-spontaneous reactions without GPR rules... '); -0468 load(fullfile(ravenPath,'external','kegg','keggRxns.mat'),'isSpontaneous'); -0469 I=~any(model.rxnGeneMat,2)&~ismember(model.rxns,isSpontaneous); -0470 spontRxnsWithGenes=model.rxns(any(model.rxnGeneMat,2)&~ismember(model.rxns,isSpontaneous)); -0471 else -0472 fprintf('Removing reactions without GPR rules... '); -0473 I=~any(model.rxnGeneMat,2); -0474 end -0475 model=removeReactions(model,I,true); -0476 fprintf('COMPLETE\n'); -0477 -0478 %Clean gene names -0479 fprintf('Fixing gene names in the model... '); -0480 for i=1:numel(model.genes) -0481 %First get rid of the prefix organism id -0482 model.genes{i}=model.genes{i}(strfind(model.genes{i},':')+1:end); -0483 %Find and remove the description in parentheses if any -0484 s=strfind(model.genes{i},'('); -0485 if any(s) -0486 model.genes{i}=model.genes{i}(1:s-1); -0487 end -0488 end -0489 fprintf('COMPLETE\n'); -0490 -0491 %If no FASTA file is supplied, then we are done here -0492 if isempty(fastaFile) -0493 %Create grRules -0494 fprintf('Constructing GPR associations and annotations for the model... '); -0495 model.grRules=cell(numel(model.rxns),1); -0496 model.grRules(:)={''}; -0497 %Add the gene associations as 'or' -0498 for i=1:numel(model.rxns) -0499 %Find the involved genes -0500 I=find(model.rxnGeneMat(i,:)); -0501 if any(I) -0502 model.grRules{i}=['(' model.genes{I(1)}]; -0503 for j=2:numel(I) -0504 model.grRules{i}=[model.grRules{i} ' or ' model.genes{I(j)}]; -0505 end -0506 model.grRules{i}=[model.grRules{i} ')']; -0507 end -0508 end -0509 %Fix grRules and reconstruct rxnGeneMat -0510 [grRules,rxnGeneMat] = standardizeGrRules(model); %Give detailed output -0511 model.grRules = grRules; -0512 model.rxnGeneMat = rxnGeneMat; -0513 %Add geneMiriams, assuming that it follows the syntax -0514 %kegg.genes/organismID:geneName -0515 model.geneMiriams=''; -0516 for i=1:numel(model.genes) -0517 model.geneMiriams{i,1}.name{1,1}='kegg.genes'; -0518 model.geneMiriams{i,1}.value{1,1}=strcat(lower(organismID),model.genes{i,1}); -0519 end -0520 %Add the description to the reactions -0521 for i=1:numel(model.rxns) -0522 if ~isempty(model.rxnNotes{i}) -0523 model.rxnNotes(i)=strcat('Included by getKEGGModelForOrganism (without HMMs).',model.rxnNotes(i)); -0524 model.rxnNotes(i)=strrep(model.rxnNotes(i),'.','. '); -0525 else -0526 model.rxnNotes(i)={'Included by getKEGGModelForOrganism (without HMMs)'}; -0527 end -0528 end -0529 fprintf('COMPLETE\n\n'); -0530 fprintf('*** Model reconstruction complete ***\n'); -0531 return; -0532 end +0417 +0418 fprintf(['Pruning the model from <strong>non-' organismID '</strong> genes... ']); +0419 if ismember(organismID,{'eukaryotes','prokaryotes'}) +0420 phylDists=getPhylDist(fullfile(dataDir,'keggdb'),maxPhylDist==-1); +0421 if strcmp(organismID,'eukaryotes') +0422 proxyid='hsa'; +0423 %Use H. sapiens here +0424 else +0425 proxyid='eco'; +0426 %Use E. coli here +0427 end +0428 [~, phylDistId]=ismember(proxyid,phylDists.ids); +0429 idsToKeep=phylDists.ids(~isinf(phylDists.distMat(phylDistId,:))); +0430 taxIDs=cellfun(@(x) x{1},cellfun(@(x) strsplit(x,':'),model.genes,'UniformOutput',false),'UniformOutput',false); +0431 I=ismember(upper(taxIDs),upper(idsToKeep)); +0432 else +0433 %KEGG organism IDs may have three or four letters +0434 organismID=strcat(organismID,':'); +0435 %Add colon for accurate matching +0436 if length(organismID)==4 +0437 I=cellfun(@(x) strcmpi(x(1:4),organismID),model.genes); +0438 elseif length(organismID)==5 +0439 I=cellfun(@(x) strcmpi(x(1:5),organismID),model.genes); +0440 end +0441 end +0442 %Remove those genes +0443 model.genes=model.genes(I); +0444 model.rxnGeneMat=model.rxnGeneMat(:,I); +0445 fprintf('COMPLETE\n'); +0446 end +0447 +0448 %First remove all reactions without genes +0449 if keepSpontaneous==true +0450 fprintf('Removing non-spontaneous reactions without GPR rules... '); +0451 load(fullfile(ravenPath,'external','kegg','keggRxns.mat'),'isSpontaneous'); +0452 I=~any(model.rxnGeneMat,2)&~ismember(model.rxns,isSpontaneous); +0453 spontRxnsWithGenes=model.rxns(any(model.rxnGeneMat,2)&~ismember(model.rxns,isSpontaneous)); +0454 else +0455 fprintf('Removing reactions without GPR rules... '); +0456 I=~any(model.rxnGeneMat,2); +0457 end +0458 model=removeReactions(model,I,true); +0459 fprintf('COMPLETE\n'); +0460 +0461 %Clean gene names +0462 fprintf('Fixing gene names in the model... '); +0463 %Get rid of the prefix organism id +0464 model.genes=regexprep(model.genes,'^\w+?:',''); +0465 fprintf('COMPLETE\n'); +0466 +0467 %If no FASTA file is supplied, then we are done here +0468 if isempty(fastaFile) +0469 %Create grRules +0470 fprintf('Constructing GPR associations and annotations for the model... '); +0471 model.grRules=cell(numel(model.rxns),1); +0472 model.grRules(:)={''}; +0473 %Add the gene associations as 'or' +0474 for i=1:numel(model.rxns) +0475 %Find the involved genes +0476 I=find(model.rxnGeneMat(i,:)); +0477 if any(I) +0478 model.grRules{i}=['(' model.genes{I(1)}]; +0479 for j=2:numel(I) +0480 model.grRules{i}=[model.grRules{i} ' or ' model.genes{I(j)}]; +0481 end +0482 model.grRules{i}=[model.grRules{i} ')']; +0483 end +0484 end +0485 %Fix grRules and reconstruct rxnGeneMat +0486 [grRules,rxnGeneMat] = standardizeGrRules(model); %Give detailed output +0487 model.grRules = grRules; +0488 model.rxnGeneMat = rxnGeneMat; +0489 %Add geneMiriams, assuming that it follows the syntax +0490 %kegg.genes/organismID:geneName +0491 model.geneMiriams=''; +0492 for i=1:numel(model.genes) +0493 model.geneMiriams{i,1}.name{1,1}='kegg.genes'; +0494 model.geneMiriams{i,1}.value{1,1}=strcat(lower(organismID),model.genes{i,1}); +0495 end +0496 %Add the description to the reactions +0497 for i=1:numel(model.rxns) +0498 if ~isempty(model.rxnNotes{i}) +0499 model.rxnNotes(i)=strcat('Included by getKEGGModelForOrganism (without HMMs).',model.rxnNotes(i)); +0500 model.rxnNotes(i)=strrep(model.rxnNotes(i),'.','. '); +0501 else +0502 model.rxnNotes(i)={'Included by getKEGGModelForOrganism (without HMMs)'}; +0503 end +0504 end +0505 fprintf('COMPLETE\n\n'); +0506 fprintf('*** Model reconstruction complete ***\n'); +0507 return; +0508 end +0509 +0510 %Create a phylogenetic distance structure +0511 phylDistStruct=getPhylDist(fullfile(dataDir,'keggdb'),maxPhylDist==-1); +0512 [~, phylDistId]=ismember(model.id,phylDistStruct.ids); +0513 +0514 %Calculate the real maximal distance now. An abitary large number of 1000 +0515 %is used for the "all in kingdom" or "all sequences" options. This is a bit +0516 %inconvenient way to do it, but it is to make it fit with some older code +0517 if isinf(maxPhylDist) || maxPhylDist==-1 +0518 maxPhylDist=1000; +0519 end +0520 +0521 %Get the KO ids for which files have been generated. Maybe not the neatest +0522 %way.. +0523 fastaFiles=listFiles(fullfile(dataDir,'fasta','*.fa')); +0524 alignedFiles=listFiles(fullfile(dataDir,'aligned','*.fa')); +0525 alignedWorking=listFiles(fullfile(dataDir,'aligned','*.faw')); +0526 hmmFiles=listFiles(fullfile(dataDir,'hmms','*.hmm')); +0527 outFiles=listFiles(fullfile(outDir,'*.out')); +0528 +0529 %Check if multi-FASTA files should be generated. This should only be +0530 %performed if there are IDs in the KOModel structure that haven't been +0531 %parsed yet +0532 missingFASTA=setdiff(KOModel.rxns,[fastaFiles;alignedFiles;hmmFiles;outFiles]); 0533 -0534 %Create a phylogenetic distance structure -0535 phylDistStruct=getPhylDist(fullfile(dataDir,'keggdb'),maxPhylDist==-1); -0536 [~, phylDistId]=ismember(model.id,phylDistStruct.ids); -0537 -0538 %Calculate the real maximal distance now. An abitary large number of 1000 -0539 %is used for the "all in kingdom" or "all sequences" options. This is a bit -0540 %inconvenient way to do it, but it is to make it fit with some older code -0541 if isinf(maxPhylDist) || maxPhylDist==-1 -0542 maxPhylDist=1000; -0543 end -0544 -0545 %Get the KO ids for which files have been generated. Maybe not the neatest -0546 %way.. -0547 fastaFiles=listFiles(fullfile(dataDir,'fasta','*.fa')); -0548 alignedFiles=listFiles(fullfile(dataDir,'aligned','*.fa')); -0549 alignedWorking=listFiles(fullfile(dataDir,'aligned','*.faw')); -0550 hmmFiles=listFiles(fullfile(dataDir,'hmms','*.hmm')); -0551 outFiles=listFiles(fullfile(outDir,'*.out')); -0552 -0553 %Check if multi-FASTA files should be generated. This should only be -0554 %performed if there are IDs in the KOModel structure that haven't been -0555 %parsed yet -0556 missingFASTA=setdiff(KOModel.rxns,[fastaFiles;alignedFiles;hmmFiles;outFiles]); -0557 -0558 if ~isempty(missingFASTA) -0559 if ~exist(fullfile(dataDir,'keggdb','genes.pep'),'file') -0560 EM=['The file ''genes.pep'' cannot be located at ' strrep(dataDir,'\','/') '/ and should be downloaded from the KEGG FTP.\n']; -0561 dispEM(EM); -0562 end -0563 %Only construct models for KOs which don't have files already -0564 fastaModel=removeReactions(KOModel,setdiff(KOModel.rxns,missingFASTA),true,true); -0565 %Permute the order of the KOs in the model so that constructMultiFasta -0566 %can be run on several processors at once -0567 fastaModel=permuteModel(fastaModel,randperm(RandStream.create('mrg32k3a','Seed',cputime()),numel(fastaModel.rxns)),'rxns'); -0568 constructMultiFasta(fastaModel,fullfile(dataDir,'keggdb','genes.pep'),fullfile(dataDir,'fasta')); -0569 else -0570 fprintf('Generating the KEGG Orthology specific multi-FASTA files... COMPLETE\n'); -0571 end -0572 -0573 if isunix -0574 if ismac -0575 binEnd='.mac'; -0576 else -0577 binEnd=''; -0578 end -0579 elseif ispc -0580 binEnd=''; -0581 else -0582 EM='Unknown OS, exiting.'; -0583 disp(EM); -0584 return -0585 end -0586 -0587 %Check if alignment of FASTA files should be performed -0588 missingAligned=setdiff(KOModel.rxns,[alignedFiles;hmmFiles;alignedWorking;outFiles]); -0589 if ~isempty(missingAligned) -0590 if seqIdentity==-1 -0591 fprintf('Performing the multiple alignment for KEGG Orthology specific protein sets... '); -0592 else -0593 fprintf('Performing clustering and multiple alignment for KEGG Orthology specific protein sets... '); -0594 end -0595 missingAligned=missingAligned(randperm(RandStream.create('mrg32k3a','Seed',cputime()),numel(missingAligned))); -0596 progressFlag=0; -0597 %Update fastaFiles. This is needed once rebuilding KEGG from FTP dump -0598 %files for more accurate progress reporting -0599 fastaFiles=listFiles(fullfile(dataDir,'fasta','*.fa')); -0600 %Align all sequences using MAFFT -0601 for i=1:numel(missingAligned) -0602 %This is checked here because it could be that it is created by a -0603 %parallel process. The faw-files are saved as temporary files to -0604 %kept track of which files are being worked on -0605 if ~exist(fullfile(dataDir,'aligned',[missingAligned{i} '.faw']),'file') &&... -0606 ~exist(fullfile(dataDir,'aligned',[missingAligned{i} '.fa']),'file') -0607 %Check that the multi-FASTA file exists. It should do so since -0608 %we are saving empty files as well. Print a warning and -0609 %continue if not -0610 if ~exist(fullfile(dataDir,'fasta',[missingAligned{i} '.fa']),'file') -0611 EM=['WARNING: The multi-FASTA file for ' missingAligned{i} ' does not exist']; -0612 dispEM(EM,false); -0613 continue; -0614 end -0615 -0616 %If the multi-FASTA file is empty then save an empty aligned -0617 %file and continue -0618 s=dir(fullfile(dataDir,'fasta',[missingAligned{i} '.fa'])); -0619 if s.bytes<=0 -0620 fid=fopen(fullfile(dataDir,'aligned',[missingAligned{i} '.fa']),'w'); -0621 fclose(fid); -0622 continue; -0623 end -0624 -0625 %Create an empty file to prevent other threads to start to work -0626 %on the same alignment -0627 fid=fopen(fullfile(dataDir,'aligned',[missingAligned{i} '.faw']),'w'); -0628 fclose(fid); -0629 -0630 %First load the FASTA file, then select up to nSequences -0631 %sequences of the most closely related species, apply any -0632 %constraints from maxPhylDist, and save it as a temporary file, -0633 %and create the model from that +0534 if ~isempty(missingFASTA) +0535 if ~exist(fullfile(dataDir,'keggdb','genes.pep'),'file') +0536 EM=['The file ''genes.pep'' cannot be located at ' strrep(dataDir,'\','/') '/ and should be downloaded from the KEGG FTP.\n']; +0537 dispEM(EM); +0538 end +0539 %Only construct models for KOs which don't have files already +0540 fastaModel=removeReactions(KOModel,setdiff(KOModel.rxns,missingFASTA),true,true); +0541 %Permute the order of the KOs in the model so that constructMultiFasta +0542 %can be run on several processors at once +0543 fastaModel=permuteModel(fastaModel,randperm(RandStream.create('mrg32k3a','Seed',cputime()),numel(fastaModel.rxns)),'rxns'); +0544 constructMultiFasta(fastaModel,fullfile(dataDir,'keggdb','genes.pep'),fullfile(dataDir,'fasta')); +0545 else +0546 fprintf('Generating the KEGG Orthology specific multi-FASTA files... COMPLETE\n'); +0547 end +0548 +0549 if isunix +0550 if ismac +0551 binEnd='.mac'; +0552 else +0553 binEnd=''; +0554 end +0555 elseif ispc +0556 binEnd=''; +0557 else +0558 EM='Unknown OS, exiting.'; +0559 disp(EM); +0560 return +0561 end +0562 +0563 %Check if alignment of FASTA files should be performed +0564 missingAligned=setdiff(KOModel.rxns,[alignedFiles;hmmFiles;alignedWorking;outFiles]); +0565 if ~isempty(missingAligned) +0566 if seqIdentity==-1 +0567 fprintf('Performing the multiple alignment for KEGG Orthology specific protein sets... 0%% complete'); +0568 else +0569 fprintf('Performing clustering and multiple alignment for KEGG Orthology specific protein sets... 0%% complete'); +0570 end +0571 missingAligned=missingAligned(randperm(RandStream.create('mrg32k3a','Seed',cputime()),numel(missingAligned))); +0572 tmpFile=tempname; +0573 %On Windows, paths need to be translated to Unix before parsing it to WSL +0574 if ispc +0575 wslPath.tmpFile=getWSLpath(tmpFile); +0576 %mafft has problems writing to terminal (/dev/stderr) when running +0577 %on WSL via MATLAB, instead write and read progress file +0578 mafftOutput = tempname; +0579 wslPath.mafftOutput=getWSLpath(mafftOutput); +0580 wslPath.mafft=getWSLpath(fullfile(ravenPath,'software','mafft','mafft-linux64','mafft.bat')); +0581 wslPath.cdhit=getWSLpath(fullfile(ravenPath,'software','cd-hit','cd-hit')); +0582 end +0583 +0584 for i=1:numel(missingAligned) +0585 %This is checked here because it could be that it is created by a +0586 %parallel process. The faw-files are saved as temporary files to +0587 %kept track of which files are being worked on +0588 if ~exist(fullfile(dataDir,'aligned',[missingAligned{i} '.faw']),'file') &&... +0589 ~exist(fullfile(dataDir,'aligned',[missingAligned{i} '.fa']),'file') +0590 %Check that the multi-FASTA file exists. It should do so since +0591 %we are saving empty files as well. Print a warning and +0592 %continue if not +0593 if ~exist(fullfile(dataDir,'fasta',[missingAligned{i} '.fa']),'file') +0594 EM=['WARNING: The multi-FASTA file for ' missingAligned{i} ' does not exist']; +0595 dispEM(EM,false); +0596 continue; +0597 end +0598 +0599 %If the multi-FASTA file is empty then save an empty aligned +0600 %file and continue +0601 s=dir(fullfile(dataDir,'fasta',[missingAligned{i} '.fa'])); +0602 if s.bytes<=0 +0603 fid=fopen(fullfile(dataDir,'aligned',[missingAligned{i} '.fa']),'w'); +0604 fclose(fid); +0605 continue; +0606 end +0607 +0608 %Create an empty file to prevent other threads to start to work +0609 %on the same alignment +0610 fid=fopen(fullfile(dataDir,'aligned',[missingAligned{i} '.faw']),'w'); +0611 fclose(fid); +0612 +0613 %First load the FASTA file, then select up to nSequences +0614 %sequences of the most closely related species, apply any +0615 %constraints from maxPhylDist, and save it as a temporary file, +0616 %and create the model from that +0617 +0618 fastaStruct=fastaread(fullfile(dataDir,'fasta',[missingAligned{i} '.fa'])); +0619 phylDist=inf(numel(fastaStruct),1); +0620 for j=1:numel(fastaStruct) +0621 %Get the organism abbreviation +0622 index=strfind(fastaStruct(j).Header,':'); +0623 if any(index) +0624 abbrev=fastaStruct(j).Header(1:index(1)-1); +0625 [~, index]=ismember(abbrev,phylDistStruct.ids); +0626 if any(index) +0627 phylDist(j)=phylDistStruct.distMat(index(1),phylDistId); +0628 end +0629 end +0630 end +0631 +0632 %Inf means that it should not be included +0633 phylDist(phylDist>maxPhylDist)=[]; 0634 -0635 fastaStruct=fastaread(fullfile(dataDir,'fasta',[missingAligned{i} '.fa'])); -0636 phylDist=inf(numel(fastaStruct),1); -0637 for j=1:numel(fastaStruct) -0638 %Get the organism abbreviation -0639 index=strfind(fastaStruct(j).Header,':'); -0640 if any(index) -0641 abbrev=fastaStruct(j).Header(1:index(1)-1); -0642 [~, index]=ismember(abbrev,phylDistStruct.ids); -0643 if any(index) -0644 phylDist(j)=phylDistStruct.distMat(index(1),phylDistId); -0645 end -0646 end -0647 end -0648 -0649 %Inf means that it should not be included -0650 phylDist(phylDist>maxPhylDist)=[]; -0651 -0652 %Sort based on phylDist -0653 [~, order]=sort(phylDist); -0654 -0655 %Save the first nSequences hits to a temporary FASTA file -0656 if nSequences<=numel(fastaStruct) -0657 fastaStruct=fastaStruct(order(1:nSequences)); -0658 else -0659 fastaStruct=fastaStruct(order); -0660 end -0661 -0662 %Do the clustering and alignment if there are more than one -0663 %sequences, otherwise just save the sequence (or an empty file) -0664 if numel(fastaStruct)>1 -0665 if seqIdentity==0.9 -0666 cdhitInp100=tempname; -0667 fastawrite(cdhitInp100,fastaStruct); -0668 cdhitInp90=tempname; -0669 [status, output]=system(['"' fullfile(ravenPath,'software','cd-hit',['cd-hit' binEnd]) '" -T "' num2str(cores) '" -i "' cdhitInp100 '" -o "' cdhitInp90 '" -c 1.0 -n 5 -M 2000']); -0670 if status~=0 -0671 EM=['Error when performing clustering of ' missingAligned{i} ':\n' output]; -0672 dispEM(EM); -0673 end -0674 %Remove the old tempfile -0675 if exist(cdhitInp100, 'file') -0676 delete([cdhitInp100 '*']); -0677 end -0678 tmpFile=tempname; -0679 [status, output]=system(['"' fullfile(ravenPath,'software','cd-hit',['cd-hit' binEnd]) '" -T "' num2str(cores) '" -i "' cdhitInp90 '" -o "' tmpFile '" -c 0.9 -n 5 -M 2000 -aL 0.8']); -0680 if status~=0 -0681 EM=['Error when performing clustering of ' missingAligned{i} ':\n' output]; -0682 dispEM(EM); -0683 end -0684 %Remove the old tempfile -0685 if exist(cdhitInp90, 'file') -0686 delete([cdhitInp90 '*']); -0687 end -0688 elseif seqIdentity==0.5 -0689 cdhitInp100=tempname; -0690 fastawrite(cdhitInp100,fastaStruct); -0691 cdhitInp90=tempname; -0692 [status, output]=system(['"' fullfile(ravenPath,'software','cd-hit',['cd-hit' binEnd]) '" -T "' num2str(cores) '" -i "' cdhitInp100 '" -o "' cdhitInp90 '" -c 1.0 -n 5 -M 2000']); -0693 if status~=0 -0694 EM=['Error when performing clustering of ' missingAligned{i} ':\n' output]; -0695 dispEM(EM); -0696 end -0697 %Remove the old tempfile -0698 if exist(cdhitInp100, 'file') -0699 delete([cdhitInp100 '*']); -0700 end -0701 cdhitInp50=tempname; -0702 [status, output]=system(['"' fullfile(ravenPath,'software','cd-hit',['cd-hit' binEnd]) '" -T "' num2str(cores) '" -i "' cdhitInp90 '" -o "' cdhitInp50 '" -c 0.9 -n 5 -M 2000 -aL 0.8']); -0703 if status~=0 -0704 EM=['Error when performing clustering of ' missingAligned{i} ':\n' output]; -0705 dispEM(EM); -0706 end -0707 %Remove the old tempfile -0708 if exist(cdhitInp90, 'file') -0709 delete([cdhitInp90 '*']); -0710 end -0711 tmpFile=tempname; -0712 [status, output]=system(['"' fullfile(ravenPath,'software','cd-hit',['cd-hit' binEnd]) '" -T "' num2str(cores) '" -i "' cdhitInp50 '" -o "' tmpFile '" -c 0.5 -n 3 -M 2000 -aL 0.8']); -0713 if status~=0 -0714 EM=['Error when performing clustering of ' missingAligned{i} ':\n' output]; -0715 dispEM(EM); -0716 end -0717 %Remove the old tempfile -0718 if exist(cdhitInp50, 'file') -0719 delete([cdhitInp50 '*']); -0720 end -0721 elseif seqIdentity~=-1 -0722 cdhitInpCustom=tempname; -0723 fastawrite(cdhitInpCustom,fastaStruct); -0724 tmpFile=tempname; -0725 if seqIdentity<=1 && seqIdentity>0.7 -0726 [status, output]=system(['"' fullfile(ravenPath,'software','cd-hit',['cd-hit' binEnd]) '" -T "' num2str(cores) '" -i "' cdhitInpCustom '" -o "' tmpFile '" -c "' num2str(seqIdentity) '" -n 5 -M 2000']); -0727 elseif seqIdentity>0.6 -0728 [status, output]=system(['"' fullfile(ravenPath,'software','cd-hit',['cd-hit' binEnd]) '" -T "' num2str(cores) '" -i "' cdhitInpCustom '" -o "' tmpFile '" -c "' num2str(seqIdentity) '" -n 4 -M 2000']); -0729 elseif seqidentity>0.5 -0730 [status, output]=system(['"' fullfile(ravenPath,'software','cd-hit',['cd-hit' binEnd]) '" -T "' num2str(cores) '" -i "' cdhitInpCustom '" -o "' tmpFile '" -c "' num2str(seqIdentity) '" -n 3 -M 2000']); -0731 elseif seqidentity>0.4 -0732 [status, output]=system(['"' fullfile(ravenPath,'software','cd-hit',['cd-hit' binEnd]) '" -T "' num2str(cores) '" -i "' cdhitInpCustom '" -o "' tmpFile '" -c "' num2str(seqIdentity) '" -n 2 -M 2000']); -0733 else -0734 EM='The provided seqIdentity must be between 0 and 1\n'; -0735 dispEM(EM); -0736 end -0737 if status~=0 -0738 EM=['Error when performing clustering of ' missingAligned{i} ':\n' output]; -0739 dispEM(EM); -0740 end -0741 %Remove the old tempfile -0742 if exist(cdhitInpCustom, 'file') -0743 delete([cdhitInpCustom '*']); -0744 end -0745 else -0746 %This means that CD-HIT should be skipped since -0747 %seqIdentity is equal to -1 -0748 tmpFile=tempname; -0749 fastawrite(tmpFile,fastaStruct); -0750 end -0751 %Do the alignment for this file -0752 if ismac -0753 [status, output]=system(['"' fullfile(ravenPath,'software','mafft','mafft-mac','mafft.bat') '" --auto --anysymbol --thread "' num2str(cores) '" "' tmpFile '" > "' fullfile(dataDir,'aligned',[missingAligned{i} '.faw']) '"']); -0754 elseif isunix -0755 [status, output]=system(['"' fullfile(ravenPath,'software','mafft','mafft-linux64','mafft.bat') '" --auto --anysymbol --thread "' num2str(cores) '" "' tmpFile '" > "' fullfile(dataDir,'aligned',[missingAligned{i} '.faw']) '"']); -0756 elseif ispc -0757 [status, output]=system(['"' fullfile(ravenPath,'software','mafft','mafft-win','mafft.bat') '" --auto --anysymbol --thread "' num2str(cores) '" "' tmpFile '" > "' fullfile(dataDir,'aligned',[missingAligned{i} '.faw']) '"']); -0758 end -0759 if status~=0 -0760 %It could be that alignment failed because only one -0761 %sequence was left after clustering. If that is the -0762 %case, then the clustered file is just copied as 'faw' -0763 %file -0764 if any(regexp(output,'Only 1 sequence found')) -0765 movefile(tmpFile,fullfile(dataDir,'aligned',[missingAligned{i} '.faw']),'f'); -0766 else -0767 EM=['Error when performing alignment of ' missingAligned{i} ':\n' output]; -0768 dispEM(EM); -0769 end -0770 end -0771 %Remove the old tempfile -0772 if exist(tmpFile, 'file') -0773 delete([tmpFile '*']); -0774 end -0775 else -0776 %If there is only one sequence then it's not possible to do -0777 %a multiple alignment. Just print the sequence instead. An -0778 %empty file was written previously so that doesn't have to -0779 %be dealt with -0780 if numel(fastaStruct)==1 -0781 fastawrite(fullfile(dataDir,'aligned',[missingAligned{i} '.faw']),fastaStruct); -0782 end -0783 end -0784 %Move the temporary file to the real one -0785 movefile(fullfile(dataDir,'aligned',[missingAligned{i} '.faw']),fullfile(dataDir,'aligned',[missingAligned{i} '.fa']),'f'); -0786 -0787 %Print the progress: no need to update this for every -0788 %iteration, just report once 25%, 50% and 75% are done -0789 if progressFlag==0 && i>numel(missingAligned)*0.25 -0790 fprintf('%*.*f%% complete',5,2,(numel(listFiles(fullfile(dataDir,'*.fa')))/numel(fastaFiles))*100); -0791 progressFlag=progressFlag+1; -0792 elseif (progressFlag==1 && i>=numel(missingAligned)*0.5) || (progressFlag==2 && i>=numel(missingAligned)*0.75) -0793 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b%*.*f%% complete',5,2,(numel(listFiles(fullfile(dataDir,'*.fa')))/numel(fastaFiles))*100); -0794 progressFlag=progressFlag+1; -0795 end -0796 end -0797 end -0798 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\b\b\bCOMPLETE\n'); -0799 else -0800 if seqIdentity==-1 -0801 fprintf('Performing the multiple alignment for KEGG Orthology specific protein sets... COMPLETE\n'); -0802 else -0803 fprintf('Performing clustering and multiple alignment for KEGG Orthology specific protein sets... COMPLETE\n'); -0804 end -0805 end -0806 -0807 %Check if training of Hidden Markov models should be performed -0808 missingHMMs=setdiff(KOModel.rxns,[hmmFiles;outFiles]); -0809 if ~isempty(missingHMMs) -0810 fprintf('Training the KEGG Orthology specific HMMs... '); -0811 missingHMMs=missingHMMs(randperm(RandStream.create('mrg32k3a','Seed',cputime()),numel(missingHMMs))); -0812 progressFlag=0; -0813 %Update alignedFiles. This is needed once rebuilding KEGG from FTP dump -0814 %files for more accurate progress reporting -0815 alignedFiles=listFiles(fullfile(dataDir,'aligned','*.fa')); -0816 %Train models for all missing KOs -0817 for i=1:numel(missingHMMs) -0818 %This is checked here because it could be that it is created by a -0819 %parallel process -0820 if ~exist(fullfile(dataDir,'hmms',[missingHMMs{i} '.hmm']),'file') && ~exist(fullfile(dataDir,'hmms',[missingHMMs{i} '.hmw']),'file') -0821 %Check that the aligned FASTA file exists. It could be that it -0822 %is still being worked on by some other instance of the program -0823 %(the .faw file should then exist). This should not happen on a -0824 %single computer. It doesn't throw an error, because it should -0825 %finalize the ones it can -0826 if ~exist(fullfile(dataDir,'aligned',[missingHMMs{i} '.fa']),'file') -0827 EM=['The aligned FASTA file for ' missingHMMs{i} ' does not exist']; -0828 dispEM(EM,false); -0829 continue; -0830 end -0831 -0832 %If the multi-FASTA file is empty then save an empty aligned -0833 %file and continue -0834 s=dir(fullfile(dataDir,'aligned',[missingHMMs{i} '.fa'])); -0835 if s.bytes<=0 -0836 fid=fopen(fullfile(dataDir,'hmms',[missingHMMs{i} '.hmm']),'w'); -0837 fclose(fid); -0838 continue; -0839 end -0840 %Create a temporary file to indicate that it is working on the -0841 %KO. This is because hmmbuild cannot overwrite existing files -0842 fid=fopen(fullfile(dataDir,'hmms',[missingHMMs{i} '.hmw']),'w'); -0843 fclose(fid); -0844 -0845 %Create HMM -0846 [status, output]=system(['"' fullfile(ravenPath,'software','hmmer',['hmmbuild' binEnd]) '" --cpu "' num2str(cores) '" "' fullfile(dataDir,'hmms',[missingHMMs{i} '.hmm']) '" "' fullfile(dataDir,'aligned',[missingHMMs{i} '.fa']) '"']); -0847 if status~=0 -0848 EM=['Error when training HMM for ' missingHMMs{i} ':\n' output]; -0849 dispEM(EM); -0850 end -0851 -0852 %Delete the temporary file -0853 delete(fullfile(dataDir,'hmms',[missingHMMs{i} '.hmw'])); -0854 -0855 %Print the progress: no need to update this for every -0856 %iteration, just report once 25%, 50% and 75% are done -0857 if progressFlag==0 && i>numel(missingHMMs)*0.25 -0858 fprintf('%*.*f%% complete',5,2,(numel(listFiles(fullfile(dataDir,'*.hmm')))/numel(alignedFiles))*100); -0859 progressFlag=progressFlag+1; -0860 elseif (progressFlag==1 && i>=numel(missingHMMs)*0.5) || (progressFlag==2 && i>=numel(missingHMMs)*0.75) -0861 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b%*.*f%% complete',5,2,(numel(listFiles(fullfile(dataDir,'*.hmm')))/numel(alignedFiles))*100); -0862 progressFlag=progressFlag+1; -0863 end -0864 end -0865 end -0866 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\b\b\bCOMPLETE\n'); -0867 else -0868 fprintf('Training the KEGG Orthology specific HMMs... COMPLETE\n'); -0869 end -0870 -0871 %Check which new .out files that should be generated. Check if training of -0872 %Hidden Markov models should be performed -0873 missingOUT=setdiff(KOModel.rxns,outFiles); -0874 if ~isempty(missingOUT) -0875 fprintf(['Querying <strong>' strrep(fastaFile,'\','/') '</strong> against the KEGG Orthology specific HMMs... ']); -0876 missingOUT=missingOUT(randperm(RandStream.create('mrg32k3a','Seed',cputime()),numel(missingOUT))); -0877 progressFlag=0; -0878 %Update hmmFiles. This is needed once rebuilding KEGG from FTP dump -0879 %files for more accurate progress reporting -0880 hmmFiles=listFiles(fullfile(dataDir,'hmms','*.hmm')); -0881 for i=1:numel(missingOUT) -0882 %This is checked here because it could be that it is created by a -0883 %parallel process -0884 if ~exist(fullfile(outDir,[missingOUT{i} '.out']),'file') -0885 %Check that the HMM file exists. It should do so since %we are -0886 %saving empty files as well. Print a warning and continue if -0887 %not -0888 if ~exist(fullfile(dataDir,'hmms',[missingOUT{i} '.hmm']),'file') -0889 EM=['The HMM file for ' missingOUT{i} ' does not exist']; -0890 dispEM(EM,false); -0891 continue; -0892 end -0893 -0894 %Save an empty file to prevent several threads working on the -0895 %same file -0896 fid=fopen(fullfile(outDir,[missingOUT{i} '.out']),'w'); -0897 fclose(fid); -0898 -0899 %If the HMM file is empty then save an out file and continue -0900 s=dir(fullfile(dataDir,'hmms',[missingOUT{i} '.hmm'])); -0901 if s.bytes<=0 -0902 continue; -0903 end -0904 -0905 %Check each gene in the input file against this model -0906 [status, output]=system(['"' fullfile(ravenPath,'software','hmmer',['hmmsearch' binEnd]) '" --cpu "' num2str(cores) '" "' fullfile(dataDir,'hmms',[missingOUT{i} '.hmm']) '" "' fastaFile '"']); -0907 if status~=0 -0908 EM=['Error when querying HMM for ' missingOUT{i} ':\n' output]; -0909 dispEM(EM); -0910 end -0911 -0912 %Save the output to a file -0913 fid=fopen(fullfile(outDir,[missingOUT{i} '.out']),'w'); -0914 fwrite(fid,output); -0915 fclose(fid); -0916 -0917 %Print the progress: no need to update this for every -0918 %iteration, just report once 25%, 50% and 75% are done -0919 if progressFlag==0 && i>numel(missingOUT)*0.25 -0920 fprintf('%*.*f%% complete',5,2,(numel(listFiles(fullfile(outDir,'*.out')))/numel(hmmFiles))*100); -0921 progressFlag=progressFlag+1; -0922 elseif (progressFlag==1 && i>=numel(missingOUT)*0.5) || (progressFlag==2 && i>=numel(missingOUT)*0.75) -0923 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b%*.*f%% complete',5,2,(numel(listFiles(fullfile(outDir,'*.out')))/numel(hmmFiles))*100); -0924 progressFlag=progressFlag+1; -0925 end -0926 end -0927 end -0928 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\b\b\bCOMPLETE\n'); -0929 else -0930 fprintf(['Querying <strong>' fastaFile '</strong> against the KEGG Orthology specific HMMs... COMPLETE\n']); -0931 end -0932 -0933 -0934 %***Begin retrieving the output and putting together the resulting model -0935 -0936 fprintf('Parsing the HMM search results... '); -0937 %Retrieve matched genes from the HMMs -0938 koGeneMat=zeros(numel(KOModel.rxns),3000); %Make room for 3000 genes -0939 genes=cell(3000,1); -0940 %Store the best score for a gene in a hash list (since it will be searching -0941 %many times) -0942 hTable = java.util.Hashtable; -0943 -0944 geneCounter=0; -0945 for i=1:numel(KOModel.rxns) -0946 if exist(fullfile(outDir,[KOModel.rxns{i} '.out']), 'file') -0947 fid=fopen(fullfile(outDir,[KOModel.rxns{i} '.out']),'r'); -0948 beginMatches=false; -0949 while 1 -0950 %Get the next line -0951 tline = fgetl(fid); -0952 -0953 %Abort at end of file -0954 if ~ischar(tline) -0955 break; -0956 end -0957 -0958 if and(beginMatches,strcmp(tline,' ------ inclusion threshold ------')) -0959 break; -0960 end -0961 -0962 if beginMatches==false -0963 %This is how the listing of matches begins -0964 if any(strfind(tline,'E-value ')) -0965 %Read one more line that is only padding -0966 tline = fgetl(fid); -0967 beginMatches=true; -0968 end -0969 else -0970 %If matches should be read -0971 if ~strcmp(tline,' [No hits detected that satisfy reporting thresholds]') && ~isempty(tline) -0972 elements=regexp(tline,' ','split'); -0973 elements=elements(cellfun(@any,elements)); -0974 -0975 %Check if the match is below the treshhold -0976 score=str2double(elements{1}); -0977 gene=elements{9}; -0978 if score<=cutOff -0979 %If the score is exactly 0, change it to a very -0980 %small value to avoid NaN -0981 if score==0 -0982 score=10^-250; -0983 end -0984 %Check if the gene is added already and, is so, get -0985 %the best score for it -0986 I=hTable.get(gene); -0987 if any(I) -0988 koGeneMat(i,I)=score; -0989 else -0990 geneCounter=geneCounter+1; -0991 %The gene was not present yet so add it -0992 hTable.put(gene,geneCounter); -0993 genes{geneCounter}=gene; -0994 koGeneMat(i,geneCounter)=score; -0995 end -0996 end -0997 else -0998 break; -0999 end -1000 end -1001 end -1002 fclose(fid); -1003 end -1004 end -1005 fprintf('COMPLETE\n'); -1006 -1007 fprintf('Removing gene, KEGG Orthology associations below minScoreRatioKO, minScoreRatioG... '); -1008 koGeneMat=koGeneMat(:,1:geneCounter); -1009 -1010 %Remove the genes for each KO that are below minScoreRatioKO. -1011 for i=1:size(koGeneMat,1) -1012 J=find(koGeneMat(i,:)); -1013 if any(J) -1014 koGeneMat(i,J(log(koGeneMat(i,J))/log(min(koGeneMat(i,J)))<minScoreRatioKO))=0; -1015 end -1016 end -1017 -1018 %Remove the KOs for each gene that are below minScoreRatioG -1019 for i=1:size(koGeneMat,2) -1020 J=find(koGeneMat(:,i)); -1021 if any(J) -1022 koGeneMat(J(log(koGeneMat(J,i))/log(min(koGeneMat(J,i)))<minScoreRatioG),i)=0; -1023 end -1024 end -1025 fprintf('COMPLETE\n'); -1026 -1027 fprintf('Adding gene annotations to the model... '); -1028 %Create the new model -1029 model.genes=genes(1:geneCounter); -1030 model.grRules=cell(numel(model.rxns),1); -1031 model.grRules(:)={''}; -1032 model.rxnGeneMat=sparse(numel(model.rxns),numel(model.genes)); -1033 -1034 %Loop through the reactions and add the corresponding genes -1035 for i=1:numel(model.rxns) -1036 if isstruct(model.rxnMiriams{i}) -1037 %Get all KOs -1038 I=find(strcmpi(model.rxnMiriams{i}.name,'kegg.orthology')); -1039 KOs=model.rxnMiriams{i}.value(I); -1040 %Find the KOs and the corresponding genes -1041 J=ismember(KOModel.rxns,KOs); -1042 [~, K]=find(koGeneMat(J,:)); -1043 -1044 if any(K) -1045 model.rxnGeneMat(i,K)=1; -1046 %Also delete KOs for which no genes were found. If no genes at -1047 %all were matched to the reaction it will be deleted later -1048 L=sum(koGeneMat(J,:),2)==0; -1049 model.rxnMiriams{i}.value(I(L))=[]; -1050 model.rxnMiriams{i}.name(I(L))=[]; -1051 end -1052 end -1053 end -1054 fprintf('COMPLETE\n'); -1055 -1056 %Find and delete all reactions without genes. This also removes genes that -1057 %are not used (which could happen because minScoreRatioG and -1058 %minScoreRatioKO). If keepSpontaneous==true, the spontaneous reactions -1059 %without genes are kept in the model. Spontaneous reactions with original -1060 %gene associations are treated in the same way, like the rest of the -1061 %reactions - if gene associations were removed during HMM search, such -1062 %reactions are deleted from the model -1063 if keepSpontaneous==true -1064 %Not the most comprise way to delete reactions without genes, but this -1065 %makes the code easier to understand. Firstly the non-spontaneous -1066 %reactions without genes are removed. After that, the second deletion -1067 %step removes spontaneous reactions, which had gene associations before -1068 %HMM search, but no longer have after it -1069 fprintf('Removing non-spontaneous reactions which after HMM search no longer have GPR rules... '); -1070 I=~any(model.rxnGeneMat,2)&~ismember(model.rxns,isSpontaneous); -1071 model=removeReactions(model,I,true,true); -1072 I=~any(model.rxnGeneMat,2)&ismember(model.rxns,spontRxnsWithGenes); -1073 model=removeReactions(model,I,true,true); -1074 else -1075 %Just simply check for any new reactions without genes and remove -1076 %it -1077 fprintf('Removing reactions which after HMM search no longer have GPR rules... '); -1078 I=~any(model.rxnGeneMat,2); -1079 model=removeReactions(model,I,true,true); -1080 end -1081 fprintf('COMPLETE\n'); -1082 -1083 fprintf('Constructing GPR rules and finalizing the model... '); -1084 %Add the gene associations as 'or' -1085 for i=1:numel(model.rxns) -1086 %Find the involved genes -1087 I=find(model.rxnGeneMat(i,:)); -1088 if any(I) -1089 model.grRules{i}=['(' model.genes{I(1)}]; -1090 for j=2:numel(I) -1091 model.grRules{i}=[model.grRules{i} ' or ' model.genes{I(j)}]; -1092 end -1093 model.grRules{i}=[model.grRules{i} ')']; -1094 end -1095 end -1096 -1097 %Fix grRules and reconstruct rxnGeneMat -1098 [grRules,rxnGeneMat] = standardizeGrRules(model,false); %Give detailed output -1099 model.grRules = grRules; -1100 model.rxnGeneMat = rxnGeneMat; -1101 -1102 %Add the description to the reactions -1103 for i=1:numel(model.rxns) -1104 if ~isempty(model.rxnNotes{i}) -1105 model.rxnNotes(i)=strcat('Included by getKEGGModelForOrganism (using HMMs).',model.rxnNotes(i)); -1106 model.rxnNotes(i)=strrep(model.rxnNotes(i),'.','. '); -1107 else -1108 model.rxnNotes(i)={'Included by getKEGGModelForOrganism (using HMMs)'}; -1109 end -1110 end -1111 fprintf('COMPLETE\n\n*** Model reconstruction complete ***\n'); -1112 end -1113 -1114 function files=listFiles(directory) -1115 %Supporter function to list the files in a directory and return them as a -1116 %cell array -1117 temp=dir(directory); -1118 files=cell(numel(temp),1); -1119 for i=1:numel(temp) -1120 files{i}=temp(i,1).name; -1121 end -1122 files=strrep(files,'.fa',''); -1123 files=strrep(files,'.hmm',''); -1124 files=strrep(files,'.out',''); -1125 files=strrep(files,'.faw',''); -1126 end

    +0635 %Sort based on phylDist +0636 [~, order]=sort(phylDist); +0637 +0638 %Save the first nSequences hits to a temporary FASTA file +0639 if nSequences<=numel(fastaStruct) +0640 fastaStruct=fastaStruct(order(1:nSequences)); +0641 else +0642 fastaStruct=fastaStruct(order); +0643 end +0644 +0645 %Do the clustering and alignment if there are more than one +0646 %sequences, otherwise just save the sequence (or an empty file) +0647 if numel(fastaStruct)>1 +0648 if seqIdentity~=-1 +0649 cdhitInpCustom=tempname; +0650 fastawrite(cdhitInpCustom,fastaStruct); +0651 if seqIdentity<=1 && seqIdentity>0.7 +0652 nparam='5'; +0653 elseif seqIdentity>0.6 +0654 nparam='4'; +0655 elseif seqIdentity>0.5 +0656 nparam='3'; +0657 elseif seqIdentity>0.4 +0658 nparam='2'; +0659 else +0660 EM='The provided seqIdentity must be between 0 and 1\n'; +0661 dispEM(EM); +0662 end +0663 if ispc +0664 wslPath.cdhitInpCustom=getWSLpath(cdhitInpCustom); +0665 [status, output]=system(['wsl "' wslPath.cdhit '" -T "' num2str(cores) '" -i "' wslPath.cdhitInpCustom '" -o "' wslPath.tmpFile '" -c "' num2str(seqIdentity) '" -n ' nparam ' -M 2000']); +0666 elseif ismac || isunix +0667 [status, output]=system(['"' fullfile(ravenPath,'software','cd-hit',['cd-hit' binEnd]) '" -T "' num2str(cores) '" -i "' cdhitInpCustom '" -o "' tmpFile '" -c "' num2str(seqIdentity) '" -n ' nparam ' -M 2000']); +0668 end +0669 if status~=0 +0670 EM=['Error when performing clustering of ' missingAligned{i} ':\n' output]; +0671 dispEM(EM); +0672 end +0673 %Remove the old tempfile +0674 if exist(cdhitInpCustom, 'file') +0675 delete([cdhitInpCustom '*']); +0676 end +0677 else +0678 %This means that CD-HIT should be skipped since +0679 %seqIdentity is equal to -1 +0680 fastawrite(tmpFile,fastaStruct); +0681 end +0682 %Do the alignment for this file +0683 if ismac +0684 [status, output]=system(['"' fullfile(ravenPath,'software','mafft','mafft-mac','mafft.bat') '" --auto --anysymbol --thread "' num2str(cores) '" "' tmpFile '" > "' fullfile(dataDir,'aligned',[missingAligned{i} '.faw']) '"']); +0685 elseif isunix +0686 [status, output]=system(['"' fullfile(ravenPath,'software','mafft','mafft-linux64','mafft.bat') '" --auto --anysymbol --thread "' num2str(cores) '" "' tmpFile '" > "' fullfile(dataDir,'aligned',[missingAligned{i} '.faw']) '"']); +0687 elseif ispc +0688 wslPath.fawFile=getWSLpath(fullfile(dataDir,'aligned',[missingAligned{i} '.faw'])); +0689 [status, ~]=system(['wsl "' wslPath.mafft '" --auto --anysymbol --progress "' wslPath.mafftOutput '" --thread "' num2str(cores) '" --out "' wslPath.fawFile '" "' wslPath.tmpFile '"']); +0690 output=fileread(mafftOutput); +0691 delete(mafftOutput); +0692 end +0693 if status~=0 +0694 %It could be that alignment failed because only one +0695 %sequence was left after clustering. If that is the +0696 %case, then the clustered file is just copied as 'faw' +0697 %file +0698 if any(regexp(output,'Only 1 sequence found')) +0699 movefile(tmpFile,fullfile(dataDir,'aligned',[missingAligned{i} '.faw']),'f'); +0700 else +0701 EM=['Error when performing alignment of ' missingAligned{i} ':\n' output]; +0702 dispEM(EM); +0703 end +0704 end +0705 %Remove the old tempfile +0706 if exist(tmpFile, 'file') +0707 delete([tmpFile '*']); +0708 end +0709 else +0710 %If there is only one sequence then it's not possible to do +0711 %a multiple alignment. Just print the sequence instead. An +0712 %empty file was written previously so that doesn't have to +0713 %be dealt with +0714 if numel(fastaStruct)==1 +0715 warnState = warning; %Save the current warning state +0716 warning('off','Bioinfo:fastawrite:AppendToFile'); +0717 fastawrite(fullfile(dataDir,'aligned',[missingAligned{i} '.faw']),fastaStruct); +0718 warning(warnState) %Reset warning state to previous settings +0719 end +0720 end +0721 %Move the temporary file to the real one +0722 movefile(fullfile(dataDir,'aligned',[missingAligned{i} '.faw']),fullfile(dataDir,'aligned',[missingAligned{i} '.fa']),'f'); +0723 +0724 %Print the progress every 25 files +0725 if rem(i-1,25) == 0 +0726 progress=num2str(floor(100*numel(listFiles(fullfile(dataDir,'aligned','*.fa')))/numel(KOModel.rxns))); +0727 progress=pad(progress,3,'left'); +0728 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\b%s%% complete',progress); +0729 end +0730 end +0731 end +0732 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\bCOMPLETE\n'); +0733 else +0734 if seqIdentity==-1 +0735 fprintf('Performing the multiple alignment for KEGG Orthology specific protein sets... COMPLETE\n'); +0736 else +0737 fprintf('Performing clustering and multiple alignment for KEGG Orthology specific protein sets... COMPLETE\n'); +0738 end +0739 end +0740 +0741 %Check if training of Hidden Markov models should be performed +0742 missingHMMs=setdiff(KOModel.rxns,[hmmFiles;outFiles]); +0743 if ~isempty(missingHMMs) +0744 fprintf('Training the KEGG Orthology specific HMMs... 0%% complete'); +0745 missingHMMs=missingHMMs(randperm(RandStream.create('mrg32k3a','Seed',cputime()),numel(missingHMMs))); +0746 %Train models for all missing KOs +0747 for i=1:numel(missingHMMs) +0748 %This is checked here because it could be that it is created by a +0749 %parallel process +0750 if ~exist(fullfile(dataDir,'hmms',[missingHMMs{i} '.hmm']),'file') && ~exist(fullfile(dataDir,'hmms',[missingHMMs{i} '.hmw']),'file') +0751 %Check that the aligned FASTA file exists. It could be that it +0752 %is still being worked on by some other instance of the program +0753 %(the .faw file should then exist). This should not happen on a +0754 %single computer. It doesn't throw an error, because it should +0755 %finalize the ones it can +0756 if ~exist(fullfile(dataDir,'aligned',[missingHMMs{i} '.fa']),'file') +0757 EM=['The aligned FASTA file for ' missingHMMs{i} ' does not exist']; +0758 dispEM(EM,false); +0759 continue; +0760 end +0761 +0762 %If the multi-FASTA file is empty then save an empty aligned +0763 %file and continue +0764 s=dir(fullfile(dataDir,'aligned',[missingHMMs{i} '.fa'])); +0765 if s.bytes<=0 +0766 fid=fopen(fullfile(dataDir,'hmms',[missingHMMs{i} '.hmm']),'w'); +0767 fclose(fid); +0768 continue; +0769 end +0770 %Create a temporary file to indicate that it is working on the +0771 %KO. This is because hmmbuild cannot overwrite existing files +0772 fid=fopen(fullfile(dataDir,'hmms',[missingHMMs{i} '.hmw']),'w'); +0773 fclose(fid); +0774 +0775 %Create HMM +0776 [status, output]=system(['"' fullfile(ravenPath,'software','hmmer',['hmmbuild' binEnd]) '" --cpu "' num2str(cores) '" "' fullfile(dataDir,'hmms',[missingHMMs{i} '.hmm']) '" "' fullfile(dataDir,'aligned',[missingHMMs{i} '.fa']) '"']); +0777 if status~=0 +0778 EM=['Error when training HMM for ' missingHMMs{i} ':\n' output]; +0779 dispEM(EM); +0780 end +0781 +0782 %Delete the temporary file +0783 delete(fullfile(dataDir,'hmms',[missingHMMs{i} '.hmw'])); +0784 +0785 %Print the progress every 25 files +0786 if rem(i-1,25) == 0 +0787 progress=num2str(floor(100*numel(listFiles(fullfile(dataDir,'hmms','*.hmm')))/numel(KOModel.rxns))); +0788 progress=pad(progress,3,'left'); +0789 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\b%s%% complete',progress); +0790 end +0791 end +0792 end +0793 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\bCOMPLETE\n'); +0794 else +0795 fprintf('Training the KEGG Orthology specific HMMs... COMPLETE\n'); +0796 end +0797 +0798 %Check which new .out files that should be generated. Check if training of +0799 %Hidden Markov models should be performed +0800 missingOUT=setdiff(KOModel.rxns,outFiles); +0801 if ~isempty(missingOUT) +0802 fprintf('Querying the user-specified FASTA file against the KEGG Orthology specific HMMs... 0%% complete'); +0803 missingOUT=missingOUT(randperm(RandStream.create('mrg32k3a','Seed',cputime()),numel(missingOUT))); +0804 for i=1:numel(missingOUT) +0805 %This is checked here because it could be that it is created by a +0806 %parallel process +0807 if ~exist(fullfile(outDir,[missingOUT{i} '.out']),'file') +0808 %Check that the HMM file exists. It should do so since %we are +0809 %saving empty files as well. Print a warning and continue if +0810 %not +0811 if ~exist(fullfile(dataDir,'hmms',[missingOUT{i} '.hmm']),'file') +0812 EM=['The HMM file for ' missingOUT{i} ' does not exist']; +0813 dispEM(EM,false); +0814 continue; +0815 end +0816 +0817 %Save an empty file to prevent several threads working on the +0818 %same file +0819 fid=fopen(fullfile(outDir,[missingOUT{i} '.out']),'w'); +0820 fclose(fid); +0821 +0822 %If the HMM file is empty then save an out file and continue +0823 s=dir(fullfile(dataDir,'hmms',[missingOUT{i} '.hmm'])); +0824 if s.bytes<=0 +0825 continue; +0826 end +0827 +0828 %Check each gene in the input file against this model +0829 [status, output]=system(['"' fullfile(ravenPath,'software','hmmer',['hmmsearch' binEnd]) '" --cpu "' num2str(cores) '" "' fullfile(dataDir,'hmms',[missingOUT{i} '.hmm']) '" "' fastaFile '"']); +0830 if status~=0 +0831 EM=['Error when querying HMM for ' missingOUT{i} ':\n' output]; +0832 dispEM(EM); +0833 end +0834 +0835 %Save the output to a file +0836 fid=fopen(fullfile(outDir,[missingOUT{i} '.out']),'w'); +0837 fwrite(fid,output); +0838 fclose(fid); +0839 +0840 %Print the progress every 25 files +0841 if rem(i-1,25) == 0 +0842 progress=num2str(floor(100*numel(listFiles(fullfile(outDir,'*.out')))/numel(KOModel.rxns))); +0843 progress=pad(progress,3,'left'); +0844 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\b%s%% complete',progress); +0845 end +0846 end +0847 end +0848 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\bCOMPLETE\n'); +0849 else +0850 fprintf('Querying the user-specified FASTA file against the KEGG Orthology specific HMMs... COMPLETE\n'); +0851 end +0852 +0853 +0854 %***Begin retrieving the output and putting together the resulting model +0855 +0856 fprintf('Parsing the HMM search results... '); +0857 %Retrieve matched genes from the HMMs +0858 koGeneMat=zeros(numel(KOModel.rxns),3000); %Make room for 3000 genes +0859 genes=cell(3000,1); +0860 %Store the best score for a gene in a hash list (since it will be searching +0861 %many times) +0862 hTable = java.util.Hashtable; +0863 +0864 geneCounter=0; +0865 for i=1:numel(KOModel.rxns) +0866 if exist(fullfile(outDir,[KOModel.rxns{i} '.out']), 'file') +0867 fid=fopen(fullfile(outDir,[KOModel.rxns{i} '.out']),'r'); +0868 beginMatches=false; +0869 while 1 +0870 %Get the next line +0871 tline = fgetl(fid); +0872 +0873 %Abort at end of file +0874 if ~ischar(tline) +0875 break; +0876 end +0877 +0878 if and(beginMatches,strcmp(tline,' ------ inclusion threshold ------')) +0879 break; +0880 end +0881 +0882 if beginMatches==false +0883 %This is how the listing of matches begins +0884 if any(strfind(tline,'E-value ')) +0885 %Read one more line that is only padding +0886 tline = fgetl(fid); +0887 beginMatches=true; +0888 end +0889 else +0890 %If matches should be read +0891 if ~strcmp(tline,' [No hits detected that satisfy reporting thresholds]') && ~isempty(tline) +0892 elements=regexp(tline,' ','split'); +0893 elements=elements(cellfun(@any,elements)); +0894 +0895 %Check if the match is below the treshhold +0896 score=str2double(elements{1}); +0897 gene=elements{9}; +0898 if score<=cutOff +0899 %If the score is exactly 0, change it to a very +0900 %small value to avoid NaN +0901 if score==0 +0902 score=10^-250; +0903 end +0904 %Check if the gene is added already and, is so, get +0905 %the best score for it +0906 I=hTable.get(gene); +0907 if any(I) +0908 koGeneMat(i,I)=score; +0909 else +0910 geneCounter=geneCounter+1; +0911 %The gene was not present yet so add it +0912 hTable.put(gene,geneCounter); +0913 genes{geneCounter}=gene; +0914 koGeneMat(i,geneCounter)=score; +0915 end +0916 end +0917 else +0918 break; +0919 end +0920 end +0921 end +0922 fclose(fid); +0923 end +0924 end +0925 fprintf('COMPLETE\n'); +0926 +0927 fprintf('Removing gene, KEGG Orthology associations below minScoreRatioKO, minScoreRatioG... '); +0928 koGeneMat=koGeneMat(:,1:geneCounter); +0929 +0930 %Remove the genes for each KO that are below minScoreRatioKO. +0931 for i=1:size(koGeneMat,1) +0932 J=find(koGeneMat(i,:)); +0933 if any(J) +0934 koGeneMat(i,J(log(koGeneMat(i,J))/log(min(koGeneMat(i,J)))<minScoreRatioKO))=0; +0935 end +0936 end +0937 +0938 %Remove the KOs for each gene that are below minScoreRatioG +0939 for i=1:size(koGeneMat,2) +0940 J=find(koGeneMat(:,i)); +0941 if any(J) +0942 koGeneMat(J(log(koGeneMat(J,i))/log(min(koGeneMat(J,i)))<minScoreRatioG),i)=0; +0943 end +0944 end +0945 fprintf('COMPLETE\n'); +0946 +0947 fprintf('Adding gene annotations to the model... '); +0948 %Create the new model +0949 model.genes=genes(1:geneCounter); +0950 model.grRules=cell(numel(model.rxns),1); +0951 model.grRules(:)={''}; +0952 model.rxnGeneMat=sparse(numel(model.rxns),numel(model.genes)); +0953 +0954 %Loop through the reactions and add the corresponding genes +0955 for i=1:numel(model.rxns) +0956 if isstruct(model.rxnMiriams{i}) +0957 %Get all KOs +0958 I=find(strcmpi(model.rxnMiriams{i}.name,'kegg.orthology')); +0959 KOs=model.rxnMiriams{i}.value(I); +0960 %Find the KOs and the corresponding genes +0961 J=ismember(KOModel.rxns,KOs); +0962 [~, K]=find(koGeneMat(J,:)); +0963 +0964 if any(K) +0965 model.rxnGeneMat(i,K)=1; +0966 %Also delete KOs for which no genes were found. If no genes at +0967 %all were matched to the reaction it will be deleted later +0968 L=sum(koGeneMat(J,:),2)==0; +0969 model.rxnMiriams{i}.value(I(L))=[]; +0970 model.rxnMiriams{i}.name(I(L))=[]; +0971 end +0972 end +0973 end +0974 fprintf('COMPLETE\n'); +0975 +0976 %Find and delete all reactions without genes. This also removes genes that +0977 %are not used (which could happen because minScoreRatioG and +0978 %minScoreRatioKO). If keepSpontaneous==true, the spontaneous reactions +0979 %without genes are kept in the model. Spontaneous reactions with original +0980 %gene associations are treated in the same way, like the rest of the +0981 %reactions - if gene associations were removed during HMM search, such +0982 %reactions are deleted from the model +0983 if keepSpontaneous==true +0984 %Not the most comprise way to delete reactions without genes, but this +0985 %makes the code easier to understand. Firstly the non-spontaneous +0986 %reactions without genes are removed. After that, the second deletion +0987 %step removes spontaneous reactions, which had gene associations before +0988 %HMM search, but no longer have after it +0989 fprintf('Removing non-spontaneous reactions which after HMM search no longer have GPR rules... '); +0990 I=~any(model.rxnGeneMat,2)&~ismember(model.rxns,isSpontaneous); +0991 model=removeReactions(model,I,true,true); +0992 I=~any(model.rxnGeneMat,2)&ismember(model.rxns,spontRxnsWithGenes); +0993 model=removeReactions(model,I,true,true); +0994 else +0995 %Just simply check for any new reactions without genes and remove +0996 %it +0997 fprintf('Removing reactions which after HMM search no longer have GPR rules... '); +0998 I=~any(model.rxnGeneMat,2); +0999 model=removeReactions(model,I,true,true); +1000 end +1001 fprintf('COMPLETE\n'); +1002 +1003 fprintf('Constructing GPR rules and finalizing the model... '); +1004 %Add the gene associations as 'or' +1005 for i=1:numel(model.rxns) +1006 %Find the involved genes +1007 I=find(model.rxnGeneMat(i,:)); +1008 if any(I) +1009 model.grRules{i}=['(' model.genes{I(1)}]; +1010 for j=2:numel(I) +1011 model.grRules{i}=[model.grRules{i} ' or ' model.genes{I(j)}]; +1012 end +1013 model.grRules{i}=[model.grRules{i} ')']; +1014 end +1015 end +1016 +1017 %Fix grRules and reconstruct rxnGeneMat +1018 [grRules,rxnGeneMat] = standardizeGrRules(model,false); %Give detailed output +1019 model.grRules = grRules; +1020 model.rxnGeneMat = rxnGeneMat; +1021 +1022 %Add the description to the reactions +1023 for i=1:numel(model.rxns) +1024 if ~isempty(model.rxnNotes{i}) +1025 model.rxnNotes(i)=strcat('Included by getKEGGModelForOrganism (using HMMs).',model.rxnNotes(i)); +1026 model.rxnNotes(i)=strrep(model.rxnNotes(i),'.','. '); +1027 else +1028 model.rxnNotes(i)={'Included by getKEGGModelForOrganism (using HMMs)'}; +1029 end +1030 end +1031 %Remove the temp fasta file +1032 delete(fastaFile) +1033 fprintf('COMPLETE\n\n*** Model reconstruction complete ***\n'); +1034 end +1035 +1036 function files=listFiles(directory) +1037 %Supporter function to list the files in a directory and return them as a +1038 %cell array +1039 temp=dir(directory); +1040 files=cell(numel(temp),1); +1041 for i=1:numel(temp) +1042 files{i}=temp(i,1).name; +1043 end +1044 files=strrep(files,'.fa',''); +1045 files=strrep(files,'.hmm',''); +1046 files=strrep(files,'.out',''); +1047 files=strrep(files,'.faw',''); +1048 end
    Generated by m2html © 2005
    \ No newline at end of file diff --git a/doc/external/kegg/getModelFromKEGG.html b/doc/external/kegg/getModelFromKEGG.html index a0f5bdd5..7624e2c5 100644 --- a/doc/external/kegg/getModelFromKEGG.html +++ b/doc/external/kegg/getModelFromKEGG.html @@ -78,7 +78,7 @@

    CROSS-REFERENCE INFORMATION ^
 <li><a href=getGenesFromKEGG getGenesFromKEGG
  • getMetsFromKEGG getMetsFromKEGG
  • getRxnsFromKEGG getRxnsFromKEGG
  • This function is called by: +
  • getKEGGModelForOrganism getKEGGModelForOrganism
  • SUBFUNCTIONS ^

    diff --git a/doc/external/kegg/getPhylDist.html b/doc/external/kegg/getPhylDist.html index d427a15b..3fd2bc21 100644 --- a/doc/external/kegg/getPhylDist.html +++ b/doc/external/kegg/getPhylDist.html @@ -55,7 +55,7 @@

    CROSS-REFERENCE INFORMATION ^
 </ul>
 This function is called by:
 <ul style= -
  • getKEGGModelForOrganism getKEGGModelForOrganism
  • +
  • getKEGGModelForOrganism getKEGGModelForOrganism
  • diff --git a/doc/external/kegg/getWSLpath.html b/doc/external/kegg/getWSLpath.html new file mode 100644 index 00000000..b5f665d1 --- /dev/null +++ b/doc/external/kegg/getWSLpath.html @@ -0,0 +1,80 @@ + + + + Description of getWSLpath + + + + + + + + + +
    Home > external > kegg > getWSLpath.m
    + + + +

    getWSLpath +

    + +

    PURPOSE ^

    +
    getWSLpath
    + +

    SYNOPSIS ^

    +
    function path=getWSLpath(path)
    + +

    DESCRIPTION ^

    +
     getWSLpath
    +   Translate Windows-style path to its Unix WSL (Windows Subsystem for
    +   Linux) equivalent.
    +
    +   Input:
    +   path        string with directory of file path, in Windows-style (e.g.
    +               'C:\Directory\')
    +
    +   Output:
    +   path        string with directory of file path, in Unix style (e.g.
    +               '/mnt/c/Directory/')
    +
    +   Uses the WSL function 'wslpath' to translate the path.
    +
    +   Usage: path=getWSLpath(path)
    + + +

    CROSS-REFERENCE INFORMATION ^

    +This function calls: + +This function is called by: + + + + + +

    SOURCE CODE ^

    +
    0001 function path=getWSLpath(path)
    +0002 % getWSLpath
    +0003 %   Translate Windows-style path to its Unix WSL (Windows Subsystem for
    +0004 %   Linux) equivalent.
    +0005 %
    +0006 %   Input:
    +0007 %   path        string with directory of file path, in Windows-style (e.g.
    +0008 %               'C:\Directory\')
    +0009 %
    +0010 %   Output:
    +0011 %   path        string with directory of file path, in Unix style (e.g.
    +0012 %               '/mnt/c/Directory/')
    +0013 %
    +0014 %   Uses the WSL function 'wslpath' to translate the path.
    +0015 %
    +0016 %   Usage: path=getWSLpath(path)
    +0017 [~,path]=system(['wsl wslpath ''' path '''']);
    +0018 path=path(1:end-1);% Remove final character (line-break)
    +0019 end
    +
    Generated by m2html © 2005
    + + \ No newline at end of file diff --git a/doc/external/kegg/index.html b/doc/external/kegg/index.html index a6620e9d..6bb246fa 100644 --- a/doc/external/kegg/index.html +++ b/doc/external/kegg/index.html @@ -19,7 +19,7 @@

    Index for external\kegg

    Matlab files in this directory:

    -
     constructMultiFastaconstructMultiFasta
     getGenesFromKEGGgetGenesFromKEGG
     getKEGGModelForOrganismgetKEGGModelForOrganism
     getMetsFromKEGGgetMetsFromKEGG
     getModelFromKEGGgetModelFromKEGG
     getPhylDistgetPhylDist
     getRxnsFromKEGGgetRxnsFromKEGG
    + constructMultiFastaconstructMultiFasta  getGenesFromKEGGgetGenesFromKEGG  getKEGGModelForOrganismgetKEGGModelForOrganism  getMetsFromKEGGgetMetsFromKEGG  getModelFromKEGGgetModelFromKEGG  getPhylDistgetPhylDist  getRxnsFromKEGGgetRxnsFromKEGG  getWSLpathgetWSLpath

    Other Matlab-specific files in this directory:

    Matlab Files found in these Directories

    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    FSEOF deleteUnusedGenes getModelFromHomology randomSampling
    SBMLFromExcel dispEM getModelFromKEGG ravenCobraWrapper
    addExchangeRxns drawMap getModelFromMetaCyc removeBadRxns
    addGenesRaven drawPathway getObjectiveString removeGenes
    addJavaPaths expandModel getPathwayDimensions removeMets
    addMets exportForGit getPhylDist removeReactions
    addRavenToUserPath exportModel getRxnsFromKEGG replaceMets
    addRxns exportModelToSIF getRxnsFromMetaCyc reporterMetabolites
    addRxnsGenesMets exportToExcelFormat getRxnsInComp runINIT
    addSpontaneousRxns exportToTabDelimited getToolboxVersion scoreModel
    addTransport extractMiriam getTransportRxns setColorToMapRxns
    analyzeSampling fillGaps getWoLFScores setExchangeBounds
    buildEquation findGeneDeletions guessComposition setOmicDataToRxns
    canConsume fitParameters gurobiToCobraRes setParam
    canProduce fitTasks haveFlux setRavenSolver
    changeGeneAssoc followChanged importExcelModel setTitle
    changeGrRules followFluxes importModel simplifyModel
    changeRxns gapReport linkMetaCycKEGGRxns solveLP
    checkFileExistence generateNewIds loadSheet solveQP
    checkFunctionUniqueness getAllRxnsFromGenes loadWorkbook sortModel
    checkInstallation getAllSubGraphs makeFakeBlastStructure standardizeGrRules
    checkModelStruct getAllowedBounds makeSomething standardizeModelFieldOrder
    checkProduction getBlast mapCompartments startup
    checkRxn getBlastFromExcel mapPathwayRxnNames testBlast
    checkSolution getColorCodes markPathwayWithExpression testDiamond
    checkTasks getDiamond markPathwayWithFluxes tinitTests
    cleanSheet getElementalBalance mergeCompartments trimPathway
    closeModel getEnzymesFromMetaCyc mergeModels tutorial1
    cobraToGurobiProb getEssentialRxns optimizeProb tutorial2
    cobraToMosekRes getExchangeRxns parseFormulas tutorial2_solutions
    colorPathway getExpressionStructure parseHPA tutorial3
    colorSubsystem getFluxZ parseHPArna tutorial3_solutions
    combineMetaCycKEGGModels getFullPath parseRxnEqu tutorial4
    compareModels getGenesFromKEGG parseScores tutorial4_solutions
    compareMultipleModels getINITModel parseTaskList tutorial5
    constructEquations getIndexes permuteModel tutorial6
    constructMultiFasta getKEGGModelForOrganism plotAdditionalInfo updateDocumentation
    constructPathwayFromCelldesigner getMILPParams plotLabels writeSheet
    constructS getMetaCycModelForOrganism predictLocalization writeYaml
    consumeSomething getMetsFromKEGG printFluxes
    contractModel getMetsFromMetaCyc printModel
    convertToIrrev getMetsInComp printModelStats
    copyToComps getMinNrFluxes qMOMA
    + FSEOF copyToComps getMetsInComp printFluxes + SBMLFromExcel deleteUnusedGenes getMinNrFluxes printModel + addExchangeRxns diamondTests getModelFromHomology printModelStats + addGenesRaven dispEM getModelFromKEGG qMOMA + addJavaPaths drawMap getModelFromMetaCyc randomSampling + addMets drawPathway getObjectiveString ravenCobraWrapper + addRavenToUserPath expandModel getPathwayDimensions removeBadRxns + addRxns exportForGit getPhylDist removeGenes + addRxnsGenesMets exportModel getRxnsFromKEGG removeMets + addSpontaneousRxns exportModelToSIF getRxnsFromMetaCyc removeReactions + addTransport exportToExcelFormat getRxnsInComp replaceMets + analyzeSampling exportToTabDelimited getToolboxVersion reporterMetabolites + blastPlusTests extractMiriam getTransportRxns runINIT + buildEquation fillGaps getWSLpath scoreModel + canConsume findGeneDeletions getWoLFScores setColorToMapRxns + canProduce fitParameters guessComposition setExchangeBounds + cdhitTests fitTasks gurobiToCobraRes setOmicDataToRxns + changeGeneAssoc followChanged haveFlux setParam + changeGrRules followFluxes hmmerTests setRavenSolver + changeRxns gapReport importExcelModel setTitle + checkFileExistence generateNewIds importModel simplifyModel + checkFunctionUniqueness getAllRxnsFromGenes linkMetaCycKEGGRxns solveLP + checkInstallation getAllSubGraphs loadSheet solveQP + checkModelStruct getAllowedBounds loadWorkbook sortIdentifiers + checkProduction getBlast mafftTests sortIdentifiers_and_permuteModelTests + checkRxn getBlastFromExcel makeFakeBlastStructure sortModel + checkSolution getColorCodes makeSomething standardizeGrRules + checkTasks getDiamond mapCompartments standardizeModelFieldOrder + cleanSheet getElementalBalance mapPathwayRxnNames startup + closeModel getEnzymesFromMetaCyc markPathwayWithExpression tinitTests + cobraToGurobiProb getEssentialRxns markPathwayWithFluxes trimPathway + cobraToMosekRes getExchangeRxns mergeCompartments tutorial1 + colorPathway getExpressionStructure mergeModels tutorial2 + colorSubsystem getFluxZ optimizeProb tutorial2_solutions + combineMetaCycKEGGModels getFullPath parseFormulas tutorial3 + compareMultipleModels getGenesFromKEGG parseHPA tutorial3_solutions + compareRxnsGenesMetsComps getINITModel parseHPArna tutorial4 + constructEquations getIndexes parseRxnEqu tutorial4_solutions + constructMultiFasta getKEGGModelForOrganism parseScores tutorial5 + constructPathwayFromCelldesigner getMD5Hash parseTaskList tutorial6 + constructS getMILPParams permuteModel updateDocumentation + consumeSomething getMetaCycModelForOrganism plotAdditionalInfo writeSheet + contractModel getMetsFromKEGG plotLabels writeYaml + convertToIrrev getMetsFromMetaCyc predictLocalization
    Generated by m2html © 2005
    diff --git a/doc/installation/checkFunctionUniqueness.html b/doc/installation/checkFunctionUniqueness.html index 1d804c53..7348b95a 100644 --- a/doc/installation/checkFunctionUniqueness.html +++ b/doc/installation/checkFunctionUniqueness.html @@ -40,7 +40,7 @@

    CROSS-REFERENCE INFORMATION ^
 </ul>
 This function is called by:
 <ul style= -
  • checkInstallation checkInstallation
  • +
  • checkInstallation checkInstallation
  • diff --git a/doc/installation/checkInstallation.html b/doc/installation/checkInstallation.html index b0381763..a60fd0cd 100644 --- a/doc/installation/checkInstallation.html +++ b/doc/installation/checkInstallation.html @@ -24,7 +24,7 @@

    PURPOSE ^checkInstallation

    SYNOPSIS ^

    -
    function checkInstallation()
    +
    function checkInstallation(develMode)

    DESCRIPTION ^

     checkInstallation
    @@ -33,7 +33,12 @@ 

    DESCRIPTION ^CROSS-REFERENCE INFORMATION ^

    @@ -45,204 +50,180 @@

    CROSS-REFERENCE INFORMATION ^
 </ul>
 <!-- crossreference -->
 
-
+<h2><a name=SUBFUNCTIONS ^

    +

    SOURCE CODE ^

    -
    0001 function checkInstallation()
    +
    0001 function checkInstallation(develMode)
     0002 % checkInstallation
     0003 %   The purpose of this function is to check if all necessary functions are
     0004 %   installed and working. It also checks whether there are any functions
     0005 %   with overlapping names between RAVEN and other toolboxes or
     0006 %   user-defined functions, which are accessible from MATLAB pathlist
     0007 %
    -0008 %   Usage: checkInstallation()
    -0009 
    -0010 %Check if RAVEN is in the MATLAB path list
    -0011 paths=textscan(path,'%s','delimiter', pathsep);
    -0012 paths=paths{1};
    -0013 
    -0014 %Get the RAVEN path
    -0015 [ST, I]=dbstack('-completenames');
    -0016 [ravenDir,~,~]=fileparts(fileparts(ST(I).file));
    -0017 
    -0018 %Print the RAVEN version if it is not the development version
    -0019 if exist(fullfile(ravenDir,'version.txt'), 'file') == 2
    -0020     fprintf(['\n*** THE RAVEN TOOLBOX v.' fgetl(fopen(fullfile(ravenDir,'version.txt'))) ' ***\n\n']);
    -0021 else
    -0022     fprintf('\n*** THE RAVEN TOOLBOX - DEVELOPMENT VERSION ***\n\n');
    -0023 end
    -0024 
    -0025 fprintf('Checking if RAVEN is on the MATLAB path...\t\t\t\t\t\t\t\t\t');
    -0026 if ismember(ravenDir,paths)
    -0027     fprintf('OK\n');
    -0028 else
    -0029     fprintf('OK (just added)\n');
    -0030     subpath=regexp(genpath(ravenDir),pathsep,'split'); %List all subdirectories
    -0031     pathsToKeep=cellfun(@(x) isempty(strfind(x,'.git')),subpath) & cellfun(@(x) isempty(strfind(x,'doc')),subpath);
    -0032     addpath(strjoin(subpath(pathsToKeep),pathsep));
    -0033     savepath
    -0034 end
    +0008 %   Input:
    +0009 %   develMode       logical indicating development mode, which includes
    +0010 %                   testing of binaries that are required to update KEGG
    +0011 %                   HMMs (opt, default false)
    +0012 %
    +0013 %   Usage: checkInstallation(develMode)
    +0014 
    +0015 if nargin<1
    +0016     develMode=false;
    +0017 end
    +0018 
    +0019 %Check if RAVEN is in the MATLAB path list
    +0020 paths=textscan(path,'%s','delimiter', pathsep);
    +0021 paths=paths{1};
    +0022 
    +0023 %Get the RAVEN path
    +0024 [ST, I]=dbstack('-completenames');
    +0025 [ravenDir,~,~]=fileparts(fileparts(ST(I).file));
    +0026 
    +0027 %Print the RAVEN version if it is not the development version
    +0028 if exist(fullfile(ravenDir,'version.txt'), 'file') == 2
    +0029     fprintf(['\n*** THE RAVEN TOOLBOX v.' fgetl(fopen(fullfile(ravenDir,'version.txt'))) ' ***\n\n']);
    +0030 else
    +0031     fprintf('\n*** THE RAVEN TOOLBOX - DEVELOPMENT VERSION ***\n\n');
    +0032 end
    +0033 
    +0034 fprintf(['MATLAB R' version('-release') ' detected\n\n']);
     0035 
    -0036 %Add the required classes to the static Java path if not already added
    -0037 addJavaPaths();
    -0038 
    -0039 excelFile=fullfile(ravenDir,'tutorial','empty.xlsx');
    -0040 xmlFile=fullfile(ravenDir,'tutorial','empty.xml');
    -0041 matFile=fullfile(ravenDir,'tutorial','empty.mat');
    -0042 
    -0043 %Check if it is possible to parse an Excel file
    -0044 fprintf('Checking if it is possible to parse a model in Microsoft Excel format...\t');
    -0045 try
    -0046     importExcelModel(excelFile,false,false,true);
    -0047     fprintf('OK\n');
    -0048 catch
    -0049     fprintf('Not OK\n');
    -0050 end
    -0051 
    -0052 %Check if it is possible to import an SBML model using libSBML
    -0053 fprintf('Checking if it is possible to import an SBML model using libSBML...\t\t\t');
    -0054 try
    -0055     importModel(xmlFile);
    -0056     try
    -0057         libSBMLver=OutputSBML; % Only works in libSBML 5.17.0+
    -0058         fprintf('OK\n');
    -0059     catch
    -0060         fprintf(['Not OK\n\n'...
    -0061             'An older libSBML version was found, update to version 5.17.0 or higher\n'...
    -0062             'for a significant improvement of model import\n\n']);
    -0063     end
    -0064 catch
    -0065     fprintf(['Not OK\nTo import SBML models, download libSBML from\n'...
    -0066         'http://sbml.org/Software/libSBML/Downloading_libSBML and add to MATLAB path\n']);
    -0067 end
    -0068 
    -0069 %Define values for keepSolver and workingSolvers, needed for solver
    -0070 %functionality check
    -0071 keepSolver=false;
    -0072 workingSolvers='';
    -0073 %Get current solver. Set it to 'none', if it is not set
    -0074 if ~ispref('RAVEN','solver')
    -0075     fprintf('Solver found in preferences... NONE\n');
    -0076     setRavenSolver('none');
    -0077     curSolv=getpref('RAVEN','solver');
    -0078 else
    -0079     curSolv=getpref('RAVEN','solver');
    -0080     fprintf(['Solver found in preferences... ',curSolv,'\n']);
    -0081 end
    -0082 
    -0083 %Check if it is possible to solve an LP problem using different solvers
    -0084 solver={'gurobi','cobra'};
    -0085 
    -0086 for i=1:numel(solver)
    -0087     fprintf(['Checking if it is possible to solve an LP problem using ',solver{i},'...\t\t\t']);
    -0088     try
    -0089         setRavenSolver(solver{i});
    -0090         load(matFile);
    -0091         solveLP(emptyModel);
    -0092         workingSolvers=strcat(workingSolvers,';',solver{i});
    -0093         fprintf('OK\n');
    -0094         if strcmp(curSolv,solver{i})
    -0095             keepSolver=true;
    -0096         end
    -0097     catch
    -0098         fprintf('Not OK\n');
    -0099     end
    -0100 end
    -0101 
    -0102 if keepSolver
    -0103     %The solver set in curSolv is functional, so the settings are restored
    -0104     %to the ones which were set before running checkInstallation
    -0105     setRavenSolver(curSolv);
    -0106     fprintf(['Preferred solver... KEPT\nSolver saved as preference... ',curSolv,'\n\n']);
    -0107 elseif ~isempty(workingSolvers)
    -0108     %There are working solvers, but the none of them is the solver defined
    -0109     %by curSolv. The first working solver is therefore set as RAVEN solver
    -0110     workingSolvers=regexprep(workingSolvers,'^;','');
    -0111     workingSolvers=regexprep(workingSolvers,';.+$','');
    -0112     %Only one working solver should be left by now in workingSolvers
    -0113     setRavenSolver(workingSolvers);
    -0114     fprintf(['Preferred solver... NEW\nSolver saved as preference... ',workingSolvers,'\n\n']);
    -0115 else
    -0116     %No functional solvers were found, so the setting is restored back to
    -0117     %original
    -0118     setRavenSolver(curSolv);
    -0119     fprintf(['WARNING: No working solver was found!\n'...
    -0120         'Install the solver, set it using setRavenSolver(''solverName'') and run checkInstallation again\n'...
    -0121         'Available solverName options are ''mosek'', ''gurobi'' and ''cobra''\n\n']);
    -0122 end
    -0123 
    -0124 if ismac
    -0125     binEnd='.mac';
    -0126 elseif isunix
    -0127     binEnd='';
    -0128 elseif ispc
    -0129     binEnd='.exe';
    -0130 end
    -0131 fprintf('Checking essential binary executables:\n');
    -0132 fprintf('NOTE: Broken binary executables <strong>must be fixed</strong> before running RAVEN\n');
    -0133 fprintf(['\tmakeblastdb' binEnd '...\t\t\t\t\t\t\t']);
    -0134 [res,~]=system(['"' fullfile(ravenDir,'software','blast+',['makeblastdb' binEnd]) '"']);
    -0135 if res==1
    -0136     fprintf('OK\n');
    -0137 else
    -0138     fprintf('Not OK! Download/compile the binary and run checkInstallation again\n');
    -0139 end
    -0140 fprintf(['\tblastp' binEnd '...\t\t\t\t\t\t\t\t']);
    -0141 [res,~]=system(['"' fullfile(ravenDir,'software','blast+',['blastp' binEnd]) '"']);
    -0142 if res==1
    -0143     fprintf('OK\n');
    -0144 else
    -0145     fprintf('Not OK! Download/compile the binary and run checkInstallation again\n');
    -0146 end
    -0147 fprintf(['\tdiamond' binEnd '...\t\t\t\t\t\t\t\t']);
    -0148 [res,~]=system(['"' fullfile(ravenDir,'software','diamond',['diamond' binEnd]) '"']);
    -0149 if res==1
    -0150     fprintf('OK\n');
    -0151 else
    -0152     fprintf('Not OK! Download/compile the binary and run checkInstallation again\n');
    -0153 end
    -0154 fprintf(['\thmmsearch' binEnd '...\t\t\t\t\t\t\t']);
    -0155 [res,~]=system(['"' fullfile(ravenDir,'software','hmmer',['hmmsearch' binEnd]) '"']);
    -0156 if res==1
    -0157     fprintf('OK\n');
    -0158 else
    -0159     fprintf('Not OK! Download/compile the binary and run checkInstallation again\n');
    -0160 end
    -0161 fprintf('Checking non-essential/development binary executables:\n');
    -0162 fprintf('NOTE: Only fix these binaries if planning to use KEGG FTP dump files in getKEGGModelForOrganism\n');
    -0163 fprintf(['\tcd-hit' binEnd '...\t\t\t\t\t\t\t\t']);
    -0164 [res,~]=system(['"' fullfile(ravenDir,'software','cd-hit',['cd-hit' binEnd]) '"']);
    -0165 if res==1
    -0166     fprintf('OK\n');
    -0167 else
    -0168     fprintf('Not OK! If necessary, download/compile the binary and run checkInstallation again\n');
    -0169 end
    -0170 fprintf('\tmafft.bat...\t\t\t\t\t\t\t\t');
    -0171 if ismac
    -0172     [res,~]=system(['"' fullfile(ravenDir,'software','mafft','mafft-mac','mafft.bat') '" --help ']);
    -0173 elseif isunix
    -0174     [res,~]=system(['"' fullfile(ravenDir,'software','mafft','mafft-linux64','mafft.bat') '" --help ']);
    -0175 elseif ispc
    -0176     [res,~]=system(['"' fullfile(ravenDir,'software','mafft','mafft-win','mafft.bat') '" --help ']);
    -0177 end
    -0178 if res==1
    -0179     fprintf('OK\n');
    -0180 else
    -0181     fprintf('Not OK! If necessary, download/compile the binary and run checkInstallation again\n');
    -0182 end
    -0183 fprintf(['\thmmbuild' binEnd '...\t\t\t\t\t\t\t\t']);
    -0184 [res,~]=system(['"' fullfile(ravenDir,'software','hmmer',['hmmbuild' binEnd]) '"']);
    -0185 if res==1
    -0186     fprintf('OK\n\n');
    -0187 else
    -0188     fprintf('Not OK! If necessary, download/compile the binary and run checkInstallation again\n');
    -0189 end
    -0190 
    -0191 fprintf('Checking whether RAVEN functions are non-redundant across MATLAB path...\t');
    -0192 checkFunctionUniqueness();
    -0193 
    -0194 fprintf('\n*** checkInstallation complete ***\n\n');
    -0195 end
    +0036 fprintf('Checking if RAVEN is on the MATLAB path...\t\t\t\t\t\t\t\t\t'); +0037 if ismember(ravenDir,paths) +0038 fprintf('OK\n'); +0039 else +0040 fprintf('OK (just added)\n'); +0041 subpath=regexp(genpath(ravenDir),pathsep,'split'); %List all subdirectories +0042 pathsToKeep=cellfun(@(x) isempty(strfind(x,'.git')),subpath) & cellfun(@(x) isempty(strfind(x,'doc')),subpath); +0043 addpath(strjoin(subpath(pathsToKeep),pathsep)); +0044 savepath +0045 end +0046 +0047 %Add the required classes to the static Java path if not already added +0048 addJavaPaths(); +0049 +0050 excelFile=fullfile(ravenDir,'tutorial','empty.xlsx'); +0051 xmlFile=fullfile(ravenDir,'tutorial','empty.xml'); +0052 matFile=fullfile(ravenDir,'tutorial','empty.mat'); +0053 +0054 %Check if it is possible to parse an Excel file +0055 fprintf('Checking if it is possible to parse a model in Microsoft Excel format...\t'); +0056 try +0057 importExcelModel(excelFile,false,false,true); +0058 fprintf('OK\n'); +0059 catch +0060 fprintf('Not OK\n'); +0061 end +0062 +0063 %Check if it is possible to import an SBML model using libSBML +0064 fprintf('Checking if it is possible to import an SBML model using libSBML...\t\t\t'); +0065 try +0066 importModel(xmlFile); +0067 try +0068 libSBMLver=OutputSBML; % Only works in libSBML 5.17.0+ +0069 fprintf('OK\n'); +0070 catch +0071 fprintf(['Not OK\n\n'... +0072 'An older libSBML version was found, update to version 5.17.0 or higher\n'... +0073 'for a significant improvement of model import\n\n']); +0074 end +0075 catch +0076 fprintf(['Not OK\nTo import SBML models, download libSBML from\n'... +0077 'http://sbml.org/Software/libSBML/Downloading_libSBML and add to MATLAB path\n']); +0078 end +0079 +0080 %Define values for keepSolver and workingSolvers, needed for solver +0081 %functionality check +0082 keepSolver=false; +0083 workingSolvers=''; +0084 %Get current solver. Set it to 'none', if it is not set +0085 if ~ispref('RAVEN','solver') +0086 fprintf('Solver found in preferences... NONE\n'); +0087 setRavenSolver('none'); +0088 curSolv=getpref('RAVEN','solver'); +0089 else +0090 curSolv=getpref('RAVEN','solver'); +0091 fprintf(['Solver found in preferences... ',curSolv,'\n']); +0092 end +0093 +0094 %Check if it is possible to solve an LP problem using different solvers +0095 solver={'gurobi','cobra'}; +0096 +0097 for i=1:numel(solver) +0098 fprintf(['Checking if it is possible to solve an LP problem using ',solver{i},'...\t\t\t']); +0099 try +0100 setRavenSolver(solver{i}); +0101 load(matFile); +0102 solveLP(emptyModel); +0103 workingSolvers=strcat(workingSolvers,';',solver{i}); +0104 fprintf('OK\n'); +0105 if strcmp(curSolv,solver{i}) +0106 keepSolver=true; +0107 end +0108 catch +0109 fprintf('Not OK\n'); +0110 end +0111 end +0112 +0113 if keepSolver +0114 %The solver set in curSolv is functional, so the settings are restored +0115 %to the ones which were set before running checkInstallation +0116 setRavenSolver(curSolv); +0117 fprintf(['Preferred solver... KEPT\nSolver saved as preference... ',curSolv,'\n\n']); +0118 elseif ~isempty(workingSolvers) +0119 %There are working solvers, but the none of them is the solver defined +0120 %by curSolv. The first working solver is therefore set as RAVEN solver +0121 workingSolvers=regexprep(workingSolvers,'^;',''); +0122 workingSolvers=regexprep(workingSolvers,';.+$',''); +0123 %Only one working solver should be left by now in workingSolvers +0124 setRavenSolver(workingSolvers); +0125 fprintf(['Preferred solver... NEW\nSolver saved as preference... ',workingSolvers,'\n\n']); +0126 else +0127 %No functional solvers were found, so the setting is restored back to +0128 %original +0129 setRavenSolver(curSolv); +0130 fprintf(['WARNING: No working solver was found!\n'... +0131 'Install the solver, set it using setRavenSolver(''solverName'') and run checkInstallation again\n'... +0132 'Available solverName options are ''gurobi'' and ''cobra''\n\n']); +0133 end +0134 +0135 fprintf('Checking essential binary executables:\n'); +0136 +0137 fprintf('\tBLAST+... '); +0138 res=runtests('blastPlusTests.m','OutputDetail',0); +0139 interpretResults(res); +0140 fprintf('\tDIAMOND... '); +0141 res=runtests('diamondTests.m','OutputDetail',0); +0142 interpretResults(res); +0143 fprintf('\tHMMER... '); +0144 res=runtests('hmmerTests.m','OutputDetail',0); +0145 interpretResults(res); +0146 +0147 if develMode +0148 fprintf('NOTE: Only fix these binaries if planning to use KEGG FTP dump files in getKEGGModelForOrganism\n'); +0149 fprintf('\tCD-HIT... '); +0150 res=runtests('cdhitTests.m','OutputDetail',0); +0151 interpretResults(res); +0152 fprintf('\tMAFFT... '); +0153 res=runtests('mafftTests.m','OutputDetail',0); +0154 interpretResults(res); +0155 end +0156 +0157 fprintf('Checking whether RAVEN functions are non-redundant across MATLAB path...\t'); +0158 checkFunctionUniqueness(); +0159 +0160 fprintf('\n*** checkInstallation complete ***\n\n'); +0161 end +0162 +0163 function interpretResults(results) +0164 if results.Failed==0 && results.Incomplete==0 +0165 fprintf('OK\n'); +0166 else +0167 fprintf('Not OK! Download/compile the binary and rerun checkInstallation\n'); +0168 end +0169 end

    Generated by m2html © 2005
    \ No newline at end of file diff --git a/doc/io/SBMLFromExcel.html b/doc/io/SBMLFromExcel.html index 9799b702..34b74dae 100644 --- a/doc/io/SBMLFromExcel.html +++ b/doc/io/SBMLFromExcel.html @@ -49,7 +49,7 @@

    DESCRIPTION ^CROSS-REFERENCE INFORMATION ^

    This function calls: +
  • exportModel exportModel
  • importExcelModel importExcelModel
  • This function is called by: @@ -86,7 +86,7 @@

    SOURCE CODE ^importExcelModel(fileName,false,printWarnings); 0028 printModelStats(model,printWarnings,false); -0029 exportModel(model,outputFileName,toCOBRA,true); +0029 exportModel(model,outputFileName,toCOBRA,true); 0030 end


    Generated by m2html © 2005
    diff --git a/doc/io/checkFileExistence.html b/doc/io/checkFileExistence.html index 703f1c0d..7e61f5d9 100644 --- a/doc/io/checkFileExistence.html +++ b/doc/io/checkFileExistence.html @@ -24,7 +24,7 @@

    PURPOSE ^checkFileExistence

    SYNOPSIS ^

    -
    function files=checkFileExistence(files,makeFullPath,allowSpace)
    +
    function files=checkFileExistence(files,fullOrTemp,allowSpace)

    DESCRIPTION ^

     checkFileExistence
    @@ -35,13 +35,20 @@ 

    DESCRIPTION ^CROSS-REFERENCE INFORMATION ^

    @@ -50,13 +57,13 @@

    CROSS-REFERENCE INFORMATION ^
 </ul>
 This function is called by:
 <ul style= - +
  • getMD5Hash getMD5Hash
  • SOURCE CODE ^

    -
    0001 function files=checkFileExistence(files,makeFullPath,allowSpace)
    +
    0001 function files=checkFileExistence(files,fullOrTemp,allowSpace)
     0002 % checkFileExistence
     0003 %   Check whether files exist. If no full path is given a file should be
     0004 %   located in the current folder, which by default is appended to the
    @@ -65,46 +72,69 @@ 

    SOURCE CODE ^% Input: 0008 % files string or cell array of strings with path to file(s) or 0009 % path or filename(s) -0010 % makeFullPath logical, whether files located in the current folder -0011 % should be provided with the full path (opt, default -0012 % true) -0013 % allowSpace logical, whether 'space' character is allowed in the -0014 % path (opt, default true) -0015 % -0016 % Usage: files=checkFileExistence(files,makeFullPath,allowSpace) -0017 -0018 if nargin<2 -0019 makeFullPath = true; -0020 end -0021 if nargin<3 -0022 allowSpace = true; -0023 end +0010 % fullOrTemp 0: do not change path to file(s) +0011 % 1: return full path to file(s) +0012 % 2: copy file(s) to system default temporary folder and +0013 % return full path +0014 % (opt, default 0) +0015 % allowSpace logical, whether 'space' character is allowed in the +0016 % path (opt, default true) +0017 % +0018 % Output: +0019 % files string or cell array of strings with updated paths if +0020 % fullOrTemp was set as 1 or 2, otherwise original paths +0021 % are returned +0022 % +0023 % Usage: files=checkFileExistence(files,fullOrTemp,allowSpace) 0024 -0025 if isstr(files) -0026 oneFile=true; -0027 files={files}; -0028 else -0029 oneFile=false; +0025 if nargin<2 +0026 fullOrTemp = 0; +0027 end +0028 if nargin<3 +0029 allowSpace = true; 0030 end -0031 filesOriginal = files; -0032 -0033 inCurrDir = ~contains(files,'\') & ~contains(files,'/'); -0034 files(inCurrDir) = fullfile(cd,files(inCurrDir)); -0035 -0036 for i=1:numel(files) -0037 if ~exist(files{i},'file') -0038 error('File "%s" cannot be found\n',files{i}); -0039 elseif allowSpace == true & strfind(files{i},' ') -0040 error('File "%s" has an invalid space in the filename or path, please remove this before running this function\n',files{i}); -0041 end -0042 end -0043 -0044 if makeFullPath == false -0045 files = filesOriginal; -0046 end -0047 if oneFile == true -0048 files = files{1}; -0049 end

    +0031 +0032 if isstr(files) +0033 oneFile=true; +0034 files={files}; +0035 else +0036 oneFile=false; +0037 end +0038 filesOriginal = files; +0039 +0040 %Make all full paths before check of file existence +0041 if ispc % full path starts like "C:\" +0042 inCurrDir = cellfun(@isempty,regexpi(files,'^[a-z]\:\\')); +0043 else %isunix full path starts like "/" +0044 inCurrDir = cellfun(@isempty,regexpi(files,'\/')); +0045 end +0046 files(inCurrDir) = fullfile(cd,files(inCurrDir)); +0047 +0048 %Check existence +0049 for i=1:numel(files) +0050 if ~exist(files{i},'file') +0051 error('File "%s" cannot be found\n',files{i}); +0052 elseif allowSpace == false & strfind(files{i},' ') +0053 error('File "%s" has an invalid space in the filename or path, please remove this before running this function\n',files{i}); +0054 end +0055 end +0056 +0057 switch fullOrTemp +0058 case 0 +0059 files = filesOriginal; +0060 case 1 +0061 % files already contains full path +0062 case 2 +0063 for i=1:numel(files) +0064 tmpFile=tempname; +0065 copyfile(files{i},tmpFile); +0066 files{i}=tmpFile; +0067 end +0068 end +0069 +0070 if oneFile == true +0071 files = files{1}; +0072 end

    Generated by m2html © 2005
    \ No newline at end of file diff --git a/doc/io/exportForGit.html b/doc/io/exportForGit.html index 82d9dc3b..43f5ef74 100644 --- a/doc/io/exportForGit.html +++ b/doc/io/exportForGit.html @@ -24,7 +24,7 @@

    PURPOSE ^exportForGit

    SYNOPSIS ^

    -
    function out=exportForGit(model,prefix,path,formats,masterFlag,subDirs)
    +
    function out=exportForGit(model,prefix,path,formats,mainBranchFlag,subDirs)

    DESCRIPTION ^

     exportForGit
    @@ -41,8 +41,8 @@ 

    DESCRIPTION ^DESCRIPTION ^CROSS-REFERENCE INFORMATION ^

    This function calls: +
  • exportModel exportModel
  • exportToExcelFormat exportToExcelFormat
  • getToolboxVersion getToolboxVersion
  • sortIdentifiers exportModel
  • writeYaml writeYaml
  • This function is called by: @@ -65,7 +65,7 @@

    CROSS-REFERENCE INFORMATION ^
 
 
 <h2><a name=SOURCE CODE ^

    -
    0001 function out=exportForGit(model,prefix,path,formats,masterFlag,subDirs)
    +
    0001 function out=exportForGit(model,prefix,path,formats,mainBranchFlag,subDirs)
     0002 % exportForGit
     0003 %   Generates a directory structure and populates this with model files, ready
     0004 %   to be commited to a Git(Hub) maintained model repository. Writes the model
    @@ -80,8 +80,8 @@ 

    SOURCE CODE ^% formats cell array of strings specifying in what file formats 0014 % the model should be exported (opt, default to all 0015 % formats as {'mat', 'txt', 'xlsx', 'xml', 'yml'}) -0016 % masterFlag logical, if true, function will error if RAVEN (and -0017 % COBRA if detected) is/are not on the master branch. +0016 % mainBranchFlag logical, if true, function will error if RAVEN (and +0017 % COBRA if detected) is/are not on the main branch. 0018 % (opt, default false) 0019 % subDirs logical, whether model files for each file format 0020 % should be written in its own subdirectory, with @@ -89,12 +89,12 @@

    SOURCE CODE ^% standard-GEM repository format. If false, all files 0023 % are stored in the same folder. (opt, default true) 0024 % -0025 % Usage: exportForGit(model,prefix,path,formats,masterFlag) +0025 % Usage: exportForGit(model,prefix,path,formats,mainBranchFlag) 0026 if nargin<6 0027 subDirs=true; 0028 end 0029 if nargin<5 -0030 masterFlag=false; +0030 mainBranchFlag=false; 0031 end 0032 if nargin<4 || isempty(formats) 0033 formats={'mat', 'txt', 'xlsx', 'xml', 'yml'}; @@ -113,91 +113,94 @@

    SOURCE CODE ^'model'; 0047 end 0048 -0049 %Get versions or commits of toolboxes: -0050 RAVENver = getToolboxVersion('RAVEN','ravenCobraWrapper.m',masterFlag); -0051 COBRAver = getToolboxVersion('COBRA','initCobraToolbox.m',masterFlag); -0052 -0053 %Retrieve libSBML version: -0054 try % 5.17.0 and newer -0055 libSBMLver=OutputSBML; -0056 libSBMLver=libSBMLver.libSBML_version_string; -0057 catch % before 5.17.0 -0058 fid = fopen('tempModelForLibSBMLversion.xml','w+'); -0059 fclose(fid); -0060 evalc('[~,~,libSBMLver]=TranslateSBML(''tempModelForLibSBMLversion.xml'',0,0)'); -0061 libSBMLver=libSBMLver.libSBML_version_string; -0062 delete('tempModelForLibSBMLversion.xml'); -0063 end -0064 -0065 % Make models folder, no warnings if folder already exists -0066 if subDirs -0067 path=fullfile(path,'model'); -0068 filePath=strcat(path,filesep,{'txt','yml','mat','xlsx','xml'}); -0069 [~,~,~]=mkdir(path); -0070 for i = 1:length(formats) -0071 [~,~,~]=mkdir(fullfile(path,formats{i})); -0072 end -0073 else -0074 filePath=cell(1,5); filePath(:)={path}; -0075 end -0076 -0077 -0078 % Write TXT format -0079 if ismember('txt', formats) -0080 fid=fopen(fullfile(filePath{1},strcat(prefix,'.txt')),'w'); -0081 eqns=constructEquations(model,model.rxns,false,false,false,true); -0082 eqns=strrep(eqns,' => ',' -> '); -0083 eqns=strrep(eqns,' <=> ',' <=> '); -0084 eqns=regexprep(eqns,'> $','>'); -0085 grRules=regexprep(model.grRules,'\((?!\()','( '); -0086 grRules=regexprep(grRules,'(?<!\))\)',' )'); -0087 fprintf(fid, 'Rxn name\tFormula\tGene-reaction association\tLB\tUB\tObjective\n'); -0088 for i = 1:numel(model.rxns) -0089 fprintf(fid, '%s\t', model.rxns{i}); -0090 fprintf(fid, '%s \t', eqns{i}); -0091 fprintf(fid, '%s\t', grRules{i}); -0092 fprintf(fid, '%6.2f\t%6.2f\t%6.2f\n', model.lb(i), model.ub(i), model.c(i)); -0093 end -0094 fclose(fid); -0095 end -0096 -0097 % Write YML format -0098 if ismember('yml', formats) -0099 writeYaml(model,fullfile(filePath{2},strcat(prefix,'.yml'))); -0100 end -0101 -0102 % Write MAT format -0103 if ismember('mat', formats) -0104 save(fullfile(filePath{3},strcat(prefix,'.mat')),'model'); -0105 end -0106 -0107 % Write XLSX format -0108 if ismember('xlsx', formats) -0109 exportToExcelFormat(model,fullfile(filePath{4},strcat(prefix,'.xlsx'))); -0110 end -0111 -0112 % Write XML format -0113 if ismember('xml', formats) -0114 exportModel(model,fullfile(filePath{5},strcat(prefix,'.xml'))); -0115 end -0116 -0117 %Save file with versions: -0118 fid = fopen(fullfile(path,'dependencies.txt'),'wt'); -0119 fprintf(fid,['MATLAB\t' version '\n']); -0120 fprintf(fid,['libSBML\t' libSBMLver '\n']); -0121 fprintf(fid,['RAVEN_toolbox\t' RAVENver '\n']); -0122 if ~isempty(COBRAver) -0123 fprintf(fid,['COBRA_toolbox\t' COBRAver '\n']); -0124 end -0125 if isfield(model,'modelVersion') -0126 fields = fieldnames(model.modelVersion); -0127 for i = 1:length(fields) -0128 value = model.modelVersion.(fields{i}); -0129 fprintf(fid,[fields{i} '\t' num2str(value) '\n']); -0130 end -0131 end -0132 fclose(fid); -0133 end

    +0049 %Sort reactions, metabolites and genes alphabetically +0050 model=sortIdentifiers(model); +0051 +0052 %Get versions or commits of toolboxes: +0053 RAVENver = getToolboxVersion('RAVEN','ravenCobraWrapper.m',mainBranchFlag); +0054 COBRAver = getToolboxVersion('COBRA','initCobraToolbox.m',mainBranchFlag); +0055 +0056 %Retrieve libSBML version: +0057 try % 5.17.0 and newer +0058 libSBMLver=OutputSBML; +0059 libSBMLver=libSBMLver.libSBML_version_string; +0060 catch % before 5.17.0 +0061 fid = fopen('tempModelForLibSBMLversion.xml','w+'); +0062 fclose(fid); +0063 evalc('[~,~,libSBMLver]=TranslateSBML(''tempModelForLibSBMLversion.xml'',0,0)'); +0064 libSBMLver=libSBMLver.libSBML_version_string; +0065 delete('tempModelForLibSBMLversion.xml'); +0066 end +0067 +0068 % Make models folder, no warnings if folder already exists +0069 if subDirs +0070 path=fullfile(path,'model'); +0071 filePath=strcat(path,filesep,{'txt','yml','mat','xlsx','xml'}); +0072 [~,~,~]=mkdir(path); +0073 for i = 1:length(formats) +0074 [~,~,~]=mkdir(fullfile(path,formats{i})); +0075 end +0076 else +0077 filePath=cell(1,5); filePath(:)={path}; +0078 end +0079 +0080 +0081 % Write TXT format +0082 if ismember('txt', formats) +0083 fid=fopen(fullfile(filePath{1},strcat(prefix,'.txt')),'w'); +0084 eqns=constructEquations(model,model.rxns,false,false,false,true); +0085 eqns=strrep(eqns,' => ',' -> '); +0086 eqns=strrep(eqns,' <=> ',' <=> '); +0087 eqns=regexprep(eqns,'> $','>'); +0088 grRules=regexprep(model.grRules,'\((?!\()','( '); +0089 grRules=regexprep(grRules,'(?<!\))\)',' )'); +0090 fprintf(fid, 'Rxn name\tFormula\tGene-reaction association\tLB\tUB\tObjective\n'); +0091 for i = 1:numel(model.rxns) +0092 fprintf(fid, '%s\t', model.rxns{i}); +0093 fprintf(fid, '%s \t', eqns{i}); +0094 fprintf(fid, '%s\t', grRules{i}); +0095 fprintf(fid, '%6.2f\t%6.2f\t%6.2f\n', model.lb(i), model.ub(i), model.c(i)); +0096 end +0097 fclose(fid); +0098 end +0099 +0100 % Write YML format +0101 if ismember('yml', formats) +0102 writeYaml(model,fullfile(filePath{2},strcat(prefix,'.yml'))); +0103 end +0104 +0105 % Write MAT format +0106 if ismember('mat', formats) +0107 save(fullfile(filePath{3},strcat(prefix,'.mat')),'model'); +0108 end +0109 +0110 % Write XLSX format +0111 if ismember('xlsx', formats) +0112 exportToExcelFormat(model,fullfile(filePath{4},strcat(prefix,'.xlsx'))); +0113 end +0114 +0115 % Write XML format +0116 if ismember('xml', formats) +0117 exportModel(model,fullfile(filePath{5},strcat(prefix,'.xml'))); +0118 end +0119 +0120 %Save file with versions: +0121 fid = fopen(fullfile(path,'dependencies.txt'),'wt'); +0122 fprintf(fid,['MATLAB\t' version '\n']); +0123 fprintf(fid,['libSBML\t' libSBMLver '\n']); +0124 fprintf(fid,['RAVEN_toolbox\t' RAVENver '\n']); +0125 if ~isempty(COBRAver) +0126 fprintf(fid,['COBRA_toolbox\t' COBRAver '\n']); +0127 end +0128 if isfield(model,'modelVersion') +0129 fields = fieldnames(model.modelVersion); +0130 for i = 1:length(fields) +0131 value = model.modelVersion.(fields{i}); +0132 fprintf(fid,[fields{i} '\t' num2str(value) '\n']); +0133 end +0134 end +0135 fclose(fid); +0136 end

    Generated by m2html © 2005
    \ No newline at end of file diff --git a/doc/io/exportModel.html b/doc/io/exportModel.html index 336c5367..2f9283c0 100644 --- a/doc/io/exportModel.html +++ b/doc/io/exportModel.html @@ -24,7 +24,7 @@

    PURPOSE ^exportModel

    SYNOPSIS ^

    -
    function exportModel(model,fileName,exportGeneComplexes,supressWarnings)
    +
    function exportModel(model,fileName,exportGeneComplexes,supressWarnings,sortIds)

    DESCRIPTION ^

     exportModel
    @@ -38,18 +38,20 @@ 

    DESCRIPTION ^CROSS-REFERENCE INFORMATION ^

    This function calls: +
  • sortIdentifiers exportModel
  • This function is called by: +
  • SBMLFromExcel SBMLFromExcel
  • exportForGit exportForGit
  • SUBFUNCTIONS ^

    @@ -57,7 +59,7 @@

    SUBFUNCTIONS ^function modelSBML=getSBMLStructure(sbmlLevel,sbmlVersion,sbmlPackages,sbmlPackageVersions)
  • function miriamString=getMiriam(miriamStruct)
  • function [tmp_Rxn]=addReactantsProducts(model,sbmlModel,i)
  • function vecT = columnVector(vec)
  • SOURCE CODE ^

    -
    0001 function exportModel(model,fileName,exportGeneComplexes,supressWarnings)
    +
    0001 function exportModel(model,fileName,exportGeneComplexes,supressWarnings,sortIds)
     0002 % exportModel
     0003 %   Exports a constraint-based model to an SBML file (L3V1 FBCv2)
     0004 %
    @@ -69,795 +71,803 @@ 

    SOURCE CODE ^% (opt, default false) 0011 % supressWarnings true if warnings should be supressed (opt, default 0012 % false) -0013 % -0014 % -0015 % Usage: exportModel(model,fileName,exportGeneComplexes,supressWarnings) -0016 if nargin<3 -0017 exportGeneComplexes=false; -0018 end -0019 if nargin<4 -0020 supressWarnings=false; -0021 end -0022 -0023 %If no subSystems are defined, then no need to use groups package -0024 if isfield(model,'subSystems') -0025 modelHasSubsystems=true; -0026 else -0027 modelHasSubsystems=false; -0028 end -0029 -0030 %The default SBML format settings, which are used as input for appropriate -0031 %libSBML functions to generate the blank SBML model structure before using -0032 %exporting in with OutputSBML to xml file -0033 sbmlLevel=3; -0034 sbmlVersion=1; -0035 sbmlPackages={'fbc'}; -0036 sbmlPackageVersions=2; -0037 if modelHasSubsystems -0038 sbmlPackages={sbmlPackages,'groups'}; -0039 sbmlPackageVersions=[sbmlPackageVersions,1]; -0040 end -0041 -0042 %Check if the "unconstrained" field is still present. This shows if -0043 %exchange metabolites have been removed -0044 if ~isfield(model,'unconstrained') -0045 if supressWarnings==false -0046 EM='There is no unconstrained field in the model structure. This means that no metabolites are considered exchange metabolites'; -0047 dispEM(EM,false); -0048 end -0049 model.unconstrained=zeros(numel(model.mets),1); -0050 end -0051 -0052 %If model id and name do not exist, make sure that default -0053 %strings are included -0054 if ~isfield(model,'id') -0055 fprintf('WARNING: The model is missing the "id" field. Uses "blankID". \n'); -0056 model.id='blankID'; -0057 end -0058 if ~isfield(model,'name') -0059 fprintf('WARNING: The model is missing the "name" field. Uses "blankName". \n'); -0060 model.name='blankName'; -0061 end -0062 -0063 %Check the model structure -0064 if supressWarnings==false -0065 checkModelStruct(model,false); -0066 end -0067 -0068 %Add several blank fields, if they do not exist already. This is to reduce -0069 %the number of conditions below -0070 if ~isfield(model,'compMiriams') -0071 model.compMiriams=cell(numel(model.comps),1); -0072 end -0073 if ~isfield(model,'inchis') -0074 model.inchis=cell(numel(model.mets),1); -0075 end -0076 if ~isfield(model,'metFormulas') -0077 model.metFormulas=cell(numel(model.mets),1); -0078 end -0079 if ~isfield(model,'metMiriams') -0080 model.metMiriams=cell(numel(model.mets),1); -0081 end -0082 if ~isfield(model,'geneMiriams') && isfield(model,'genes') -0083 model.geneMiriams=cell(numel(model.genes),1); -0084 end -0085 if ~isfield(model,'geneShortNames') && isfield(model,'genes') -0086 model.geneShortNames=cell(numel(model.genes),1); -0087 end -0088 if ~isfield(model,'subSystems') -0089 model.subSystems=cell(numel(model.rxns),1); -0090 end -0091 if ~isfield(model,'eccodes') -0092 model.eccodes=cell(numel(model.rxns),1); -0093 end -0094 if ~isfield(model,'rxnReferences') -0095 model.rxnReferences=cell(numel(model.rxns),1); -0096 end -0097 if ~isfield(model,'rxnConfidenceScores') -0098 model.rxnConfidenceScores=NaN(numel(model.rxns),1); -0099 end -0100 if ~isfield(model,'rxnNotes') -0101 model.rxnNotes=cell(numel(model.rxns),1); -0102 end -0103 if ~isfield(model,'rxnMiriams') -0104 model.rxnMiriams=cell(numel(model.rxns),1); -0105 end -0106 -0107 if sbmlLevel<3 -0108 %Check if genes have associated compartments -0109 if ~isfield(model,'geneComps') && isfield(model,'genes') -0110 if supressWarnings==false -0111 EM='There are no compartments specified for genes. All genes will be assigned to the first compartment. This is because the SBML structure requires all elements to be assigned to a compartment'; -0112 dispEM(EM,false); -0113 end -0114 model.geneComps=ones(numel(model.genes),1); -0115 end -0116 end -0117 -0118 %Convert ids to SBML-convenient format. This is to avoid the data loss when -0119 %unsupported characters are included in ids. Here we are using part from -0120 %convertSBMLID, originating from the COBRA Toolbox -0121 model.rxns=regexprep(model.rxns,'([^0-9_a-zA-Z])','__${num2str($1+0)}__'); -0122 model.mets=regexprep(model.mets,'([^0-9_a-zA-Z])','__${num2str($1+0)}__'); -0123 model.comps=regexprep(model.comps,'([^0-9_a-zA-Z])','__${num2str($1+0)}__'); -0124 if isfield(model,'genes') -0125 problemGenes=find(~cellfun('isempty',regexp(model.genes,'([^0-9_a-zA-Z])'))); -0126 originalGenes=model.genes(problemGenes); -0127 replacedGenes=regexprep(model.genes(problemGenes),'([^0-9_a-zA-Z])','__${num2str($1+0)}__'); -0128 model.genes(problemGenes)=replacedGenes; -0129 for i=1:numel(problemGenes) -0130 model.grRules = regexprep(model.grRules, ['(^|\s|\()' originalGenes{i} '($|\s|\))'], ['$1' replacedGenes{i} '$2']); -0131 end -0132 end -0133 -0134 %Generate an empty SBML structure -0135 modelSBML=getSBMLStructure(sbmlLevel,sbmlVersion,sbmlPackages,sbmlPackageVersions); -0136 modelSBML.metaid=model.id; -0137 modelSBML.id=model.id; -0138 modelSBML.name=model.name; -0139 -0140 if isfield(model,'annotation') -0141 if isfield(model.annotation,'note') -0142 modelSBML.notes=['<notes><body xmlns="http://www.w3.org/1999/xhtml"><p>',regexprep(model.annotation.note,'<p>|</p>',''),'</p></body></notes>']; -0143 end -0144 else -0145 modelSBML.notes='<notes><body xmlns="http://www.w3.org/1999/xhtml"><p>This file was generated using the exportModel function in RAVEN Toolbox 2 and OutputSBML in libSBML </p></body></notes>'; -0146 end +0013 % sortIds logical whether metabolites, reactions and genes +0014 % should be sorted alphabetically by their +0015 % identifiers (opt, default false) +0016 % +0017 % Usage: exportModel(model,fileName,exportGeneComplexes,supressWarnings,sortIds) +0018 if nargin<3 +0019 exportGeneComplexes=false; +0020 end +0021 if nargin<4 +0022 supressWarnings=false; +0023 end +0024 if nargin<5 +0025 sortIds=false; +0026 end +0027 if sortIds==true +0028 model=sortIdentifiers(model); +0029 end +0030 +0031 %If no subSystems are defined, then no need to use groups package +0032 if isfield(model,'subSystems') +0033 modelHasSubsystems=true; +0034 else +0035 modelHasSubsystems=false; +0036 end +0037 +0038 %The default SBML format settings, which are used as input for appropriate +0039 %libSBML functions to generate the blank SBML model structure before using +0040 %exporting in with OutputSBML to xml file +0041 sbmlLevel=3; +0042 sbmlVersion=1; +0043 sbmlPackages={'fbc'}; +0044 sbmlPackageVersions=2; +0045 if modelHasSubsystems +0046 sbmlPackages={sbmlPackages,'groups'}; +0047 sbmlPackageVersions=[sbmlPackageVersions,1]; +0048 end +0049 +0050 %Check if the "unconstrained" field is still present. This shows if +0051 %exchange metabolites have been removed +0052 if ~isfield(model,'unconstrained') +0053 if supressWarnings==false +0054 EM='There is no unconstrained field in the model structure. This means that no metabolites are considered exchange metabolites'; +0055 dispEM(EM,false); +0056 end +0057 model.unconstrained=zeros(numel(model.mets),1); +0058 end +0059 +0060 %If model id and name do not exist, make sure that default +0061 %strings are included +0062 if ~isfield(model,'id') +0063 fprintf('WARNING: The model is missing the "id" field. Uses "blankID". \n'); +0064 model.id='blankID'; +0065 end +0066 if ~isfield(model,'name') +0067 fprintf('WARNING: The model is missing the "name" field. Uses "blankName". \n'); +0068 model.name='blankName'; +0069 end +0070 +0071 %Check the model structure +0072 if supressWarnings==false +0073 checkModelStruct(model,false); +0074 end +0075 +0076 %Add several blank fields, if they do not exist already. This is to reduce +0077 %the number of conditions below +0078 if ~isfield(model,'compMiriams') +0079 model.compMiriams=cell(numel(model.comps),1); +0080 end +0081 if ~isfield(model,'inchis') +0082 model.inchis=cell(numel(model.mets),1); +0083 end +0084 if ~isfield(model,'metFormulas') +0085 model.metFormulas=cell(numel(model.mets),1); +0086 end +0087 if ~isfield(model,'metMiriams') +0088 model.metMiriams=cell(numel(model.mets),1); +0089 end +0090 if ~isfield(model,'geneMiriams') && isfield(model,'genes') +0091 model.geneMiriams=cell(numel(model.genes),1); +0092 end +0093 if ~isfield(model,'geneShortNames') && isfield(model,'genes') +0094 model.geneShortNames=cell(numel(model.genes),1); +0095 end +0096 if ~isfield(model,'subSystems') +0097 model.subSystems=cell(numel(model.rxns),1); +0098 end +0099 if ~isfield(model,'eccodes') +0100 model.eccodes=cell(numel(model.rxns),1); +0101 end +0102 if ~isfield(model,'rxnReferences') +0103 model.rxnReferences=cell(numel(model.rxns),1); +0104 end +0105 if ~isfield(model,'rxnConfidenceScores') +0106 model.rxnConfidenceScores=NaN(numel(model.rxns),1); +0107 end +0108 if ~isfield(model,'rxnNotes') +0109 model.rxnNotes=cell(numel(model.rxns),1); +0110 end +0111 if ~isfield(model,'rxnMiriams') +0112 model.rxnMiriams=cell(numel(model.rxns),1); +0113 end +0114 +0115 if sbmlLevel<3 +0116 %Check if genes have associated compartments +0117 if ~isfield(model,'geneComps') && isfield(model,'genes') +0118 if supressWarnings==false +0119 EM='There are no compartments specified for genes. All genes will be assigned to the first compartment. This is because the SBML structure requires all elements to be assigned to a compartment'; +0120 dispEM(EM,false); +0121 end +0122 model.geneComps=ones(numel(model.genes),1); +0123 end +0124 end +0125 +0126 %Convert ids to SBML-convenient format. This is to avoid the data loss when +0127 %unsupported characters are included in ids. Here we are using part from +0128 %convertSBMLID, originating from the COBRA Toolbox +0129 model.rxns=regexprep(model.rxns,'([^0-9_a-zA-Z])','__${num2str($1+0)}__'); +0130 model.mets=regexprep(model.mets,'([^0-9_a-zA-Z])','__${num2str($1+0)}__'); +0131 model.comps=regexprep(model.comps,'([^0-9_a-zA-Z])','__${num2str($1+0)}__'); +0132 if isfield(model,'genes') +0133 problemGenes=find(~cellfun('isempty',regexp(model.genes,'([^0-9_a-zA-Z])'))); +0134 originalGenes=model.genes(problemGenes); +0135 replacedGenes=regexprep(model.genes(problemGenes),'([^0-9_a-zA-Z])','__${num2str($1+0)}__'); +0136 model.genes(problemGenes)=replacedGenes; +0137 for i=1:numel(problemGenes) +0138 model.grRules = regexprep(model.grRules, ['(^|\s|\()' originalGenes{i} '($|\s|\))'], ['$1' replacedGenes{i} '$2']); +0139 end +0140 end +0141 +0142 %Generate an empty SBML structure +0143 modelSBML=getSBMLStructure(sbmlLevel,sbmlVersion,sbmlPackages,sbmlPackageVersions); +0144 modelSBML.metaid=model.id; +0145 modelSBML.id=model.id; +0146 modelSBML.name=model.name; 0147 -0148 modelSBML.annotation=['<annotation><rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:vCard="http://www.w3.org/2001/vcard-rdf/3.0#" xmlns:bqbiol="http://biomodels.net/biology-qualifiers/" xmlns:bqmodel="http://biomodels.net/model-qualifiers/"><rdf:Description rdf:about="#meta_' model.id '">']; -0149 if isfield(model,'annotation') -0150 nameString=''; -0151 if isfield(model.annotation,'familyName') -0152 if ~isempty(model.annotation.familyName) -0153 nameString=['<vCard:Family>' model.annotation.familyName '</vCard:Family>']; -0154 end -0155 end -0156 if isfield(model.annotation,'givenName') -0157 if ~isempty(model.annotation.givenName) -0158 nameString=[nameString '<vCard:Given>' model.annotation.givenName '</vCard:Given>']; -0159 end -0160 end -0161 email=''; -0162 if isfield(model.annotation,'email') -0163 if ~isempty(model.annotation.email) -0164 email=['<vCard:EMAIL>' model.annotation.email '</vCard:EMAIL>']; -0165 end -0166 end -0167 org=''; -0168 if isfield(model.annotation,'organization') -0169 if ~isempty(model.annotation.organization) -0170 org=['<vCard:ORG rdf:parseType="Resource"><vCard:Orgname>' model.annotation.organization '</vCard:Orgname></vCard:ORG>']; -0171 end -0172 end -0173 if ~isempty(nameString) || ~isempty(email) || ~isempty(org) -0174 modelSBML.annotation=[modelSBML.annotation '<dc:creator><rdf:Bag><rdf:li rdf:parseType="Resource">']; -0175 if ~isempty(nameString) -0176 modelSBML.annotation=[modelSBML.annotation '<vCard:N rdf:parseType="Resource">' nameString '</vCard:N>']; -0177 end -0178 modelSBML.annotation=[modelSBML.annotation email org '</rdf:li></rdf:Bag></dc:creator>']; -0179 end -0180 end -0181 modelSBML.annotation=[modelSBML.annotation '<dcterms:created rdf:parseType="Resource">'... -0182 '<dcterms:W3CDTF>' datestr(now,'yyyy-mm-ddTHH:MM:SSZ') '</dcterms:W3CDTF>'... -0183 '</dcterms:created>'... -0184 '<dcterms:modified rdf:parseType="Resource">'... -0185 '<dcterms:W3CDTF>' datestr(now,'yyyy-mm-ddTHH:MM:SSZ') '</dcterms:W3CDTF>'... -0186 '</dcterms:modified>']; -0187 -0188 if isfield(model,'annotation') -0189 if isfield(model.annotation,'taxonomy') -0190 modelSBML.annotation=[modelSBML.annotation '<bqbiol:is><rdf:Bag><rdf:li rdf:resource="https://identifiers.org/taxonomy/' regexprep(model.annotation.taxonomy,'taxonomy/','') '"/></rdf:Bag></bqbiol:is>']; -0191 end -0192 end -0193 modelSBML.annotation=[modelSBML.annotation '</rdf:Description></rdf:RDF></annotation>']; -0194 -0195 %Prepare compartments -0196 for i=1:numel(model.comps) -0197 %Add the default values, as these will be the same in all entries -0198 if i==1 -0199 if isfield(modelSBML.compartment, 'sboTerm') -0200 modelSBML.compartment(i).sboTerm=290; -0201 end -0202 if isfield(modelSBML.compartment, 'spatialDimensions') -0203 modelSBML.compartment(i).spatialDimensions=3; -0204 end -0205 if isfield(modelSBML.compartment, 'size') -0206 modelSBML.compartment(i).size=1; -0207 end -0208 if isfield(modelSBML.compartment, 'constant') -0209 modelSBML.compartment(i).constant=1; -0210 end -0211 if isfield(modelSBML.compartment, 'isSetSize') -0212 modelSBML.compartment(i).isSetSize=1; -0213 end -0214 if isfield(modelSBML.compartment, 'isSetSpatialDimensions') -0215 modelSBML.compartment(i).isSetSpatialDimensions=1; -0216 end -0217 end -0218 %Copy the default values to the next entry as long as it is not the -0219 %last one -0220 if i<numel(model.comps) -0221 modelSBML.compartment(i+1)=modelSBML.compartment(i); -0222 end -0223 -0224 if isfield(modelSBML.compartment,'metaid') -0225 if ~isnan(str2double(model.comps(i))) -0226 EM='The compartment IDs are in numeric format. For the compliance with SBML specifications, compartment IDs will be preceded with "c_" string'; -0227 dispEM(EM,false); -0228 model.comps(i)=strcat('c_',model.comps(i)); -0229 end -0230 modelSBML.compartment(i).metaid=model.comps{i}; -0231 end -0232 %Prepare Miriam strings -0233 if ~isempty(model.compMiriams{i}) -0234 [~,sbo_ind] = ismember('sbo',model.compMiriams{i}.name); -0235 if sbo_ind > 0 -0236 modelSBML.compartment(i).sboTerm=str2double(regexprep(model.compMiriams{i}.value{sbo_ind},'SBO:','','ignorecase')); -0237 % remove the SBO term from compMiriams so the information is -0238 % not duplicated in the "annotation" field later on -0239 model.compMiriams{i}.name(sbo_ind) = []; -0240 model.compMiriams{i}.value(sbo_ind) = []; -0241 end -0242 end -0243 if ~isempty(model.compMiriams{i}) && isfield(modelSBML.compartment(i),'annotation') -0244 modelSBML.compartment(i).annotation=['<annotation><rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:vCard="http://www.w3.org/2001/vcard-rdf/3.0#" xmlns:bqbiol="http://biomodels.net/biology-qualifiers/" xmlns:bqmodel="http://biomodels.net/model-qualifiers/"><rdf:Description rdf:about="#meta_' model.comps{i} '">']; -0245 modelSBML.compartment(i).annotation=[modelSBML.compartment(i).annotation '<bqbiol:is><rdf:Bag>']; -0246 modelSBML.compartment(i).annotation=[modelSBML.compartment(i).annotation getMiriam(model.compMiriams{i}) '</rdf:Bag></bqbiol:is></rdf:Description></rdf:RDF></annotation>']; -0247 end -0248 if isfield(modelSBML.compartment, 'name') -0249 modelSBML.compartment(i).name=model.compNames{i}; +0148 if isfield(model,'annotation') +0149 if isfield(model.annotation,'note') +0150 modelSBML.notes=['<notes><body xmlns="http://www.w3.org/1999/xhtml"><p>',regexprep(model.annotation.note,'<p>|</p>',''),'</p></body></notes>']; +0151 end +0152 else +0153 modelSBML.notes='<notes><body xmlns="http://www.w3.org/1999/xhtml"><p>This file was generated using the exportModel function in RAVEN Toolbox 2 and OutputSBML in libSBML </p></body></notes>'; +0154 end +0155 +0156 modelSBML.annotation=['<annotation><rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:vCard="http://www.w3.org/2001/vcard-rdf/3.0#" xmlns:bqbiol="http://biomodels.net/biology-qualifiers/" xmlns:bqmodel="http://biomodels.net/model-qualifiers/"><rdf:Description rdf:about="#meta_' model.id '">']; +0157 if isfield(model,'annotation') +0158 nameString=''; +0159 if isfield(model.annotation,'familyName') +0160 if ~isempty(model.annotation.familyName) +0161 nameString=['<vCard:Family>' model.annotation.familyName '</vCard:Family>']; +0162 end +0163 end +0164 if isfield(model.annotation,'givenName') +0165 if ~isempty(model.annotation.givenName) +0166 nameString=[nameString '<vCard:Given>' model.annotation.givenName '</vCard:Given>']; +0167 end +0168 end +0169 email=''; +0170 if isfield(model.annotation,'email') +0171 if ~isempty(model.annotation.email) +0172 email=['<vCard:EMAIL>' model.annotation.email '</vCard:EMAIL>']; +0173 end +0174 end +0175 org=''; +0176 if isfield(model.annotation,'organization') +0177 if ~isempty(model.annotation.organization) +0178 org=['<vCard:ORG rdf:parseType="Resource"><vCard:Orgname>' model.annotation.organization '</vCard:Orgname></vCard:ORG>']; +0179 end +0180 end +0181 if ~isempty(nameString) || ~isempty(email) || ~isempty(org) +0182 modelSBML.annotation=[modelSBML.annotation '<dc:creator><rdf:Bag><rdf:li rdf:parseType="Resource">']; +0183 if ~isempty(nameString) +0184 modelSBML.annotation=[modelSBML.annotation '<vCard:N rdf:parseType="Resource">' nameString '</vCard:N>']; +0185 end +0186 modelSBML.annotation=[modelSBML.annotation email org '</rdf:li></rdf:Bag></dc:creator>']; +0187 end +0188 end +0189 modelSBML.annotation=[modelSBML.annotation '<dcterms:created rdf:parseType="Resource">'... +0190 '<dcterms:W3CDTF>' datestr(now,'yyyy-mm-ddTHH:MM:SSZ') '</dcterms:W3CDTF>'... +0191 '</dcterms:created>'... +0192 '<dcterms:modified rdf:parseType="Resource">'... +0193 '<dcterms:W3CDTF>' datestr(now,'yyyy-mm-ddTHH:MM:SSZ') '</dcterms:W3CDTF>'... +0194 '</dcterms:modified>']; +0195 +0196 if isfield(model,'annotation') +0197 if isfield(model.annotation,'taxonomy') +0198 modelSBML.annotation=[modelSBML.annotation '<bqbiol:is><rdf:Bag><rdf:li rdf:resource="https://identifiers.org/taxonomy/' regexprep(model.annotation.taxonomy,'taxonomy/','') '"/></rdf:Bag></bqbiol:is>']; +0199 end +0200 end +0201 modelSBML.annotation=[modelSBML.annotation '</rdf:Description></rdf:RDF></annotation>']; +0202 +0203 %Prepare compartments +0204 for i=1:numel(model.comps) +0205 %Add the default values, as these will be the same in all entries +0206 if i==1 +0207 if isfield(modelSBML.compartment, 'sboTerm') +0208 modelSBML.compartment(i).sboTerm=290; +0209 end +0210 if isfield(modelSBML.compartment, 'spatialDimensions') +0211 modelSBML.compartment(i).spatialDimensions=3; +0212 end +0213 if isfield(modelSBML.compartment, 'size') +0214 modelSBML.compartment(i).size=1; +0215 end +0216 if isfield(modelSBML.compartment, 'constant') +0217 modelSBML.compartment(i).constant=1; +0218 end +0219 if isfield(modelSBML.compartment, 'isSetSize') +0220 modelSBML.compartment(i).isSetSize=1; +0221 end +0222 if isfield(modelSBML.compartment, 'isSetSpatialDimensions') +0223 modelSBML.compartment(i).isSetSpatialDimensions=1; +0224 end +0225 end +0226 %Copy the default values to the next entry as long as it is not the +0227 %last one +0228 if i<numel(model.comps) +0229 modelSBML.compartment(i+1)=modelSBML.compartment(i); +0230 end +0231 +0232 if isfield(modelSBML.compartment,'metaid') +0233 if regexp(model.comps{i},'^[^a-zA-Z_]') +0234 EM='The compartment IDs are in numeric format. For the compliance with SBML specifications, compartment IDs will be preceded with "c_" string'; +0235 dispEM(EM,false); +0236 model.comps(i)=strcat('c_',model.comps(i)); +0237 end +0238 modelSBML.compartment(i).metaid=model.comps{i}; +0239 end +0240 %Prepare Miriam strings +0241 if ~isempty(model.compMiriams{i}) +0242 [~,sbo_ind] = ismember('sbo',model.compMiriams{i}.name); +0243 if sbo_ind > 0 +0244 modelSBML.compartment(i).sboTerm=str2double(regexprep(model.compMiriams{i}.value{sbo_ind},'SBO:','','ignorecase')); +0245 % remove the SBO term from compMiriams so the information is +0246 % not duplicated in the "annotation" field later on +0247 model.compMiriams{i}.name(sbo_ind) = []; +0248 model.compMiriams{i}.value(sbo_ind) = []; +0249 end 0250 end -0251 if isfield(modelSBML.compartment, 'id') -0252 modelSBML.compartment(i).id=model.comps{i}; -0253 end -0254 -0255 end -0256 -0257 %Begin writing species -0258 for i=1:numel(model.mets) -0259 %Add the default values, as these will be the same in all entries -0260 if i==1 -0261 if isfield(modelSBML.species, 'sboTerm') -0262 modelSBML.species(i).sboTerm=247; -0263 end -0264 if isfield(modelSBML.species, 'initialAmount') -0265 modelSBML.species(i).initialAmount=1; -0266 end -0267 if isfield(modelSBML.species, 'initialConcentration') -0268 modelSBML.species(i).initialConcentration=0; -0269 end -0270 if isfield(modelSBML.species, 'isSetInitialAmount') -0271 modelSBML.species(i).isSetInitialAmount=1; -0272 end -0273 if isfield(modelSBML.species, 'isSetInitialConcentration') -0274 modelSBML.species(i).isSetInitialConcentration=1; -0275 end -0276 end -0277 %Copy the default values to the next entry as long as it is not the -0278 %last one -0279 if i<numel(model.mets) -0280 modelSBML.species(i+1)=modelSBML.species(i); -0281 end -0282 -0283 if isfield(modelSBML.species,'metaid') -0284 modelSBML.species(i).metaid=['M_' model.mets{i}]; -0285 end -0286 if isfield(modelSBML.species, 'name') -0287 modelSBML.species(i).name=model.metNames{i}; -0288 end -0289 if isfield(modelSBML.species, 'id') -0290 modelSBML.species(i).id=['M_' model.mets{i}]; -0291 end -0292 if isfield(modelSBML.species, 'compartment') -0293 modelSBML.species(i).compartment=model.comps{model.metComps(i)}; -0294 end -0295 if isfield(model,'unconstrained') -0296 if model.unconstrained(i) -0297 modelSBML.species(i).boundaryCondition=1; -0298 end +0251 if ~isempty(model.compMiriams{i}) && isfield(modelSBML.compartment(i),'annotation') +0252 modelSBML.compartment(i).annotation=['<annotation><rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:vCard="http://www.w3.org/2001/vcard-rdf/3.0#" xmlns:bqbiol="http://biomodels.net/biology-qualifiers/" xmlns:bqmodel="http://biomodels.net/model-qualifiers/"><rdf:Description rdf:about="#meta_' model.comps{i} '">']; +0253 modelSBML.compartment(i).annotation=[modelSBML.compartment(i).annotation '<bqbiol:is><rdf:Bag>']; +0254 modelSBML.compartment(i).annotation=[modelSBML.compartment(i).annotation getMiriam(model.compMiriams{i}) '</rdf:Bag></bqbiol:is></rdf:Description></rdf:RDF></annotation>']; +0255 end +0256 if isfield(modelSBML.compartment, 'name') +0257 modelSBML.compartment(i).name=model.compNames{i}; +0258 end +0259 if isfield(modelSBML.compartment, 'id') +0260 modelSBML.compartment(i).id=model.comps{i}; +0261 end +0262 +0263 end +0264 +0265 %Begin writing species +0266 for i=1:numel(model.mets) +0267 %Add the default values, as these will be the same in all entries +0268 if i==1 +0269 if isfield(modelSBML.species, 'sboTerm') +0270 modelSBML.species(i).sboTerm=247; +0271 end +0272 if isfield(modelSBML.species, 'initialAmount') +0273 modelSBML.species(i).initialAmount=1; +0274 end +0275 if isfield(modelSBML.species, 'initialConcentration') +0276 modelSBML.species(i).initialConcentration=0; +0277 end +0278 if isfield(modelSBML.species, 'isSetInitialAmount') +0279 modelSBML.species(i).isSetInitialAmount=1; +0280 end +0281 if isfield(modelSBML.species, 'isSetInitialConcentration') +0282 modelSBML.species(i).isSetInitialConcentration=1; +0283 end +0284 end +0285 %Copy the default values to the next entry as long as it is not the +0286 %last one +0287 if i<numel(model.mets) +0288 modelSBML.species(i+1)=modelSBML.species(i); +0289 end +0290 +0291 if isfield(modelSBML.species,'metaid') +0292 modelSBML.species(i).metaid=['M_' model.mets{i}]; +0293 end +0294 if isfield(modelSBML.species, 'name') +0295 modelSBML.species(i).name=model.metNames{i}; +0296 end +0297 if isfield(modelSBML.species, 'id') +0298 modelSBML.species(i).id=['M_' model.mets{i}]; 0299 end -0300 if isfield(modelSBML.species, 'fbc_charge') && isfield(model,'metCharges') -0301 if ~isnan(model.metCharges(i)) -0302 modelSBML.species(i).fbc_charge=model.metCharges(i); -0303 modelSBML.species(i).isSetfbc_charge=1; -0304 else -0305 modelSBML.species(i).isSetfbc_charge=0; +0300 if isfield(modelSBML.species, 'compartment') +0301 modelSBML.species(i).compartment=model.comps{model.metComps(i)}; +0302 end +0303 if isfield(model,'unconstrained') +0304 if model.unconstrained(i) +0305 modelSBML.species(i).boundaryCondition=1; 0306 end 0307 end -0308 if ~isempty(model.metMiriams{i}) -0309 [~,sbo_ind] = ismember('sbo',model.metMiriams{i}.name); -0310 if sbo_ind > 0 -0311 modelSBML.species(i).sboTerm=str2double(regexprep(model.metMiriams{i}.value{sbo_ind},'SBO:','','ignorecase')); -0312 % remove the SBO term from metMiriams so the information is -0313 % not duplicated in the "annotation" field later on -0314 model.metMiriams{i}.name(sbo_ind) = []; -0315 model.metMiriams{i}.value(sbo_ind) = []; -0316 end -0317 end -0318 if isfield(modelSBML.species,'annotation') -0319 if ~isempty(model.metMiriams{i}) || ~isempty(model.metFormulas{i}) -0320 hasInchi=false; -0321 if ~isempty(model.metFormulas{i}) -0322 %Only export formula if there is no InChI. This is because -0323 %the metFormulas field is populated by InChIs if available -0324 if ~isempty(model.inchis{i}) -0325 hasInchi=true; -0326 end -0327 if hasInchi==false -0328 modelSBML.species(i).fbc_chemicalFormula=model.metFormulas{i}; -0329 end -0330 end -0331 if ~isempty(model.metMiriams{i}) || hasInchi==true -0332 modelSBML.species(i).annotation=['<annotation><rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:vCard="http://www.w3.org/2001/vcard-rdf/3.0#" xmlns:bqbiol="http://biomodels.net/biology-qualifiers/" xmlns:bqmodel="http://biomodels.net/model-qualifiers/"><rdf:Description rdf:about="#meta_M_' model.mets{i} '">']; -0333 modelSBML.species(i).annotation=[modelSBML.species(i).annotation '<bqbiol:is><rdf:Bag>']; -0334 if ~isempty(model.metMiriams{i}) -0335 modelSBML.species(i).annotation=[modelSBML.species(i).annotation getMiriam(model.metMiriams{i})]; -0336 end -0337 if hasInchi==true -0338 modelSBML.species(i).annotation=[modelSBML.species(i).annotation '<rdf:li rdf:resource="https://identifiers.org/inchi/InChI=' regexprep(model.inchis{i},'^InChI=','') '"/>']; -0339 modelSBML.species(i).fbc_chemicalFormula=char(regexp(model.inchis{i}, '/(\w+)/', 'tokens', 'once')); -0340 end -0341 modelSBML.species(i).annotation=[modelSBML.species(i).annotation '</rdf:Bag></bqbiol:is></rdf:Description></rdf:RDF></annotation>']; -0342 end -0343 end -0344 end -0345 end -0346 -0347 if isfield(model,'genes') -0348 for i=1:numel(model.genes) -0349 %Add the default values, as these will be the same in all entries -0350 if i==1 -0351 if isfield(modelSBML.fbc_geneProduct, 'sboTerm') -0352 modelSBML.fbc_geneProduct(i).sboTerm=243; -0353 end -0354 end -0355 %Copy the default values to the next index as long as it is not the -0356 %last one -0357 if i<numel(model.genes) -0358 modelSBML.fbc_geneProduct(i+1)=modelSBML.fbc_geneProduct(i); -0359 end -0360 -0361 if isfield(modelSBML.fbc_geneProduct,'metaid') -0362 modelSBML.fbc_geneProduct(i).metaid=model.genes{i}; -0363 end -0364 if ~isempty(model.geneMiriams{i}) -0365 [~,sbo_ind] = ismember('sbo',model.geneMiriams{i}.name); -0366 if sbo_ind > 0 -0367 modelSBML.fbc_geneProduct(i).sboTerm=str2double(regexprep(model.geneMiriams{i}.value{sbo_ind},'SBO:','','ignorecase')); -0368 % remove the SBO term from compMiriams so the information is -0369 % not duplicated in the "annotation" field later on -0370 model.geneMiriams{i}.name(sbo_ind) = []; -0371 model.geneMiriams{i}.value(sbo_ind) = []; -0372 end -0373 end -0374 if ~isempty(model.geneMiriams{i}) && isfield(modelSBML.fbc_geneProduct(i),'annotation') -0375 modelSBML.fbc_geneProduct(i).annotation=['<annotation><rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:vCard="http://www.w3.org/2001/vcard-rdf/3.0#" xmlns:bqbiol="http://biomodels.net/biology-qualifiers/" xmlns:bqmodel="http://biomodels.net/model-qualifiers/"><rdf:Description rdf:about="#meta_' model.genes{i} '">']; -0376 modelSBML.fbc_geneProduct(i).annotation=[modelSBML.fbc_geneProduct(i).annotation '<bqbiol:is><rdf:Bag>']; -0377 modelSBML.fbc_geneProduct(i).annotation=[modelSBML.fbc_geneProduct(i).annotation getMiriam(model.geneMiriams{i}) '</rdf:Bag></bqbiol:is></rdf:Description></rdf:RDF></annotation>']; -0378 end -0379 if isfield(modelSBML.fbc_geneProduct, 'fbc_id') -0380 modelSBML.fbc_geneProduct(i).fbc_id=model.genes{i}; +0308 if isfield(modelSBML.species, 'fbc_charge') && isfield(model,'metCharges') +0309 if ~isnan(model.metCharges(i)) +0310 modelSBML.species(i).fbc_charge=model.metCharges(i); +0311 modelSBML.species(i).isSetfbc_charge=1; +0312 else +0313 modelSBML.species(i).isSetfbc_charge=0; +0314 end +0315 end +0316 if ~isempty(model.metMiriams{i}) +0317 [~,sbo_ind] = ismember('sbo',model.metMiriams{i}.name); +0318 if sbo_ind > 0 +0319 modelSBML.species(i).sboTerm=str2double(regexprep(model.metMiriams{i}.value{sbo_ind},'SBO:','','ignorecase')); +0320 % remove the SBO term from metMiriams so the information is +0321 % not duplicated in the "annotation" field later on +0322 model.metMiriams{i}.name(sbo_ind) = []; +0323 model.metMiriams{i}.value(sbo_ind) = []; +0324 end +0325 end +0326 if isfield(modelSBML.species,'annotation') +0327 if ~isempty(model.metMiriams{i}) || ~isempty(model.metFormulas{i}) +0328 hasInchi=false; +0329 if ~isempty(model.metFormulas{i}) +0330 %Only export formula if there is no InChI. This is because +0331 %the metFormulas field is populated by InChIs if available +0332 if ~isempty(model.inchis{i}) +0333 hasInchi=true; +0334 end +0335 if hasInchi==false +0336 modelSBML.species(i).fbc_chemicalFormula=model.metFormulas{i}; +0337 end +0338 end +0339 if ~isempty(model.metMiriams{i}) || hasInchi==true +0340 modelSBML.species(i).annotation=['<annotation><rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:vCard="http://www.w3.org/2001/vcard-rdf/3.0#" xmlns:bqbiol="http://biomodels.net/biology-qualifiers/" xmlns:bqmodel="http://biomodels.net/model-qualifiers/"><rdf:Description rdf:about="#meta_M_' model.mets{i} '">']; +0341 modelSBML.species(i).annotation=[modelSBML.species(i).annotation '<bqbiol:is><rdf:Bag>']; +0342 if ~isempty(model.metMiriams{i}) +0343 modelSBML.species(i).annotation=[modelSBML.species(i).annotation getMiriam(model.metMiriams{i})]; +0344 end +0345 if hasInchi==true +0346 modelSBML.species(i).annotation=[modelSBML.species(i).annotation '<rdf:li rdf:resource="https://identifiers.org/inchi/InChI=' regexprep(model.inchis{i},'^InChI=','') '"/>']; +0347 modelSBML.species(i).fbc_chemicalFormula=char(regexp(model.inchis{i}, '/(\w+)/', 'tokens', 'once')); +0348 end +0349 modelSBML.species(i).annotation=[modelSBML.species(i).annotation '</rdf:Bag></bqbiol:is></rdf:Description></rdf:RDF></annotation>']; +0350 end +0351 end +0352 end +0353 end +0354 +0355 if isfield(model,'genes') +0356 for i=1:numel(model.genes) +0357 %Add the default values, as these will be the same in all entries +0358 if i==1 +0359 if isfield(modelSBML.fbc_geneProduct, 'sboTerm') +0360 modelSBML.fbc_geneProduct(i).sboTerm=243; +0361 end +0362 end +0363 %Copy the default values to the next index as long as it is not the +0364 %last one +0365 if i<numel(model.genes) +0366 modelSBML.fbc_geneProduct(i+1)=modelSBML.fbc_geneProduct(i); +0367 end +0368 +0369 if isfield(modelSBML.fbc_geneProduct,'metaid') +0370 modelSBML.fbc_geneProduct(i).metaid=model.genes{i}; +0371 end +0372 if ~isempty(model.geneMiriams{i}) +0373 [~,sbo_ind] = ismember('sbo',model.geneMiriams{i}.name); +0374 if sbo_ind > 0 +0375 modelSBML.fbc_geneProduct(i).sboTerm=str2double(regexprep(model.geneMiriams{i}.value{sbo_ind},'SBO:','','ignorecase')); +0376 % remove the SBO term from compMiriams so the information is +0377 % not duplicated in the "annotation" field later on +0378 model.geneMiriams{i}.name(sbo_ind) = []; +0379 model.geneMiriams{i}.value(sbo_ind) = []; +0380 end 0381 end -0382 if isfield(modelSBML.fbc_geneProduct, 'fbc_label') && isfield(model,'geneShortNames') -0383 if isempty(model.geneShortNames{i}) -0384 modelSBML.fbc_geneProduct(i).fbc_label=model.genes{i}; -0385 else -0386 modelSBML.fbc_geneProduct(i).fbc_label=model.geneShortNames{i}; -0387 end -0388 end -0389 end -0390 if exportGeneComplexes==true -0391 %Also add the complexes as genes. This is done by splitting grRules -0392 %on "or" and adding the ones which contain several genes -0393 geneComplexes={}; -0394 if isfield(model,'grRules') -0395 %Only grRules which contain " and " can be complexes -0396 uniqueRules=unique(model.grRules); -0397 I=cellfun(@any,strfind(uniqueRules,' and ')); -0398 uniqueRules(~I)=[]; -0399 uniqueRules=strrep(uniqueRules,'(',''); -0400 uniqueRules=strrep(uniqueRules,')',''); -0401 uniqueRules=strrep(uniqueRules,' and ',':'); -0402 for i=1:numel(uniqueRules) -0403 genes=regexp(uniqueRules(i),' or ','split'); -0404 genes=genes{1}(:); -0405 %Check which ones are complexes -0406 I=cellfun(@any,strfind(genes,':')); -0407 geneComplexes=[geneComplexes;genes(I)]; -0408 end -0409 end -0410 geneComplexes=unique(geneComplexes); -0411 if ~isempty(geneComplexes) -0412 %Then add them as genes. There is a possiblity that a complex -0413 %A&B is added as separate from B&A. This is not really an issue -0414 %so this is not dealt with -0415 for i=1:numel(geneComplexes) -0416 modelSBML.fbc_geneProduct(numel(model.genes)+i)=modelSBML.fbc_geneProduct(1); -0417 if isfield(modelSBML.fbc_geneProduct,'metaid') -0418 modelSBML.fbc_geneProduct(numel(model.genes)+i).metaid=geneComplexes{i}; -0419 end -0420 if isfield(modelSBML.fbc_geneProduct,'fbc_id') -0421 modelSBML.fbc_geneProduct(numel(model.genes)+i).fbc_id=geneComplexes{i}; -0422 else -0423 modelSBML.fbc_geneProduct(i).fbc_label=modelSBML.fbc_geneProduct(i).fbc_id; -0424 end -0425 end -0426 end -0427 end -0428 end -0429 -0430 %Generate a list of unique fbc_bound names -0431 totalValues=[model.lb; model.ub]; -0432 totalNames=cell(size(totalValues,1),1); -0433 -0434 listUniqueValues=unique(totalValues); -0435 -0436 for i=1:length(listUniqueValues) -0437 listUniqueNames{i,1}=['FB',num2str(i),'N',num2str(abs(round(listUniqueValues(i))))]; % create unique flux bound IDs. -0438 ind=find(ismember(totalValues,listUniqueValues(i))); -0439 totalNames(ind)=listUniqueNames(i,1); -0440 end +0382 if ~isempty(model.geneMiriams{i}) && isfield(modelSBML.fbc_geneProduct(i),'annotation') +0383 modelSBML.fbc_geneProduct(i).annotation=['<annotation><rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:vCard="http://www.w3.org/2001/vcard-rdf/3.0#" xmlns:bqbiol="http://biomodels.net/biology-qualifiers/" xmlns:bqmodel="http://biomodels.net/model-qualifiers/"><rdf:Description rdf:about="#meta_' model.genes{i} '">']; +0384 modelSBML.fbc_geneProduct(i).annotation=[modelSBML.fbc_geneProduct(i).annotation '<bqbiol:is><rdf:Bag>']; +0385 modelSBML.fbc_geneProduct(i).annotation=[modelSBML.fbc_geneProduct(i).annotation getMiriam(model.geneMiriams{i}) '</rdf:Bag></bqbiol:is></rdf:Description></rdf:RDF></annotation>']; +0386 end +0387 if isfield(modelSBML.fbc_geneProduct, 'fbc_id') +0388 modelSBML.fbc_geneProduct(i).fbc_id=model.genes{i}; +0389 end +0390 if isfield(modelSBML.fbc_geneProduct, 'fbc_label') && isfield(model,'geneShortNames') +0391 if isempty(model.geneShortNames{i}) +0392 modelSBML.fbc_geneProduct(i).fbc_label=model.genes{i}; +0393 else +0394 modelSBML.fbc_geneProduct(i).fbc_label=model.geneShortNames{i}; +0395 end +0396 end +0397 end +0398 if exportGeneComplexes==true +0399 %Also add the complexes as genes. This is done by splitting grRules +0400 %on "or" and adding the ones which contain several genes +0401 geneComplexes={}; +0402 if isfield(model,'grRules') +0403 %Only grRules which contain " and " can be complexes +0404 uniqueRules=unique(model.grRules); +0405 I=cellfun(@any,strfind(uniqueRules,' and ')); +0406 uniqueRules(~I)=[]; +0407 uniqueRules=strrep(uniqueRules,'(',''); +0408 uniqueRules=strrep(uniqueRules,')',''); +0409 uniqueRules=strrep(uniqueRules,' and ',':'); +0410 for i=1:numel(uniqueRules) +0411 genes=regexp(uniqueRules(i),' or ','split'); +0412 genes=genes{1}(:); +0413 %Check which ones are complexes +0414 I=cellfun(@any,strfind(genes,':')); +0415 geneComplexes=[geneComplexes;genes(I)]; +0416 end +0417 end +0418 geneComplexes=unique(geneComplexes); +0419 if ~isempty(geneComplexes) +0420 %Then add them as genes. There is a possiblity that a complex +0421 %A&B is added as separate from B&A. This is not really an issue +0422 %so this is not dealt with +0423 for i=1:numel(geneComplexes) +0424 modelSBML.fbc_geneProduct(numel(model.genes)+i)=modelSBML.fbc_geneProduct(1); +0425 if isfield(modelSBML.fbc_geneProduct,'metaid') +0426 modelSBML.fbc_geneProduct(numel(model.genes)+i).metaid=geneComplexes{i}; +0427 end +0428 if isfield(modelSBML.fbc_geneProduct,'fbc_id') +0429 modelSBML.fbc_geneProduct(numel(model.genes)+i).fbc_id=geneComplexes{i}; +0430 else +0431 modelSBML.fbc_geneProduct(i).fbc_label=modelSBML.fbc_geneProduct(i).fbc_id; +0432 end +0433 end +0434 end +0435 end +0436 end +0437 +0438 %Generate a list of unique fbc_bound names +0439 totalValues=[model.lb; model.ub]; +0440 totalNames=cell(size(totalValues,1),1); 0441 -0442 for i=1:length(listUniqueNames) -0443 %Add the default values, as these will be the same in all entries -0444 if i==1 -0445 if isfield(modelSBML.parameter, 'constant') -0446 modelSBML.parameter(i).constant=1; -0447 end -0448 if isfield(modelSBML.parameter, 'isSetValue') -0449 modelSBML.parameter(i).isSetValue=1; -0450 end -0451 end -0452 %Copy the default values to the next index as long as it is not the -0453 %last one -0454 if i<numel(listUniqueNames) -0455 modelSBML.parameter(i+1)=modelSBML.parameter(i); -0456 end -0457 modelSBML.parameter(i).id=listUniqueNames{i}; -0458 modelSBML.parameter(i).value=listUniqueValues(i); -0459 end -0460 -0461 for i=1:numel(model.rxns) -0462 %Add the default values, as these will be the same in all entries -0463 if i==1 -0464 if isfield(modelSBML.reaction, 'sboTerm') -0465 modelSBML.reaction(i).sboTerm=176; -0466 end -0467 if isfield(modelSBML.reaction, 'isSetFast') -0468 modelSBML.reaction(i).isSetFast=1; -0469 end -0470 end -0471 %Copy the default values to the next index as long as it is not the -0472 %last one -0473 if i<numel(model.rxns) -0474 modelSBML.reaction(i+1)=modelSBML.reaction(i); -0475 end -0476 -0477 if isfield(modelSBML.reaction,'metaid') -0478 modelSBML.reaction(i).metaid=['R_' model.rxns{i}]; -0479 end -0480 -0481 %Export notes information -0482 if (~isnan(model.rxnConfidenceScores(i)) || ~isempty(model.rxnReferences{i}) || ~isempty(model.rxnNotes{i})) -0483 modelSBML.reaction(i).notes='<notes><body xmlns="http://www.w3.org/1999/xhtml">'; -0484 if ~isnan(model.rxnConfidenceScores(i)) -0485 modelSBML.reaction(i).notes=[modelSBML.reaction(i).notes '<p>Confidence Level: ' num2str(model.rxnConfidenceScores(i)) '</p>']; -0486 end -0487 if ~isempty(model.rxnReferences{i}) -0488 modelSBML.reaction(i).notes=[modelSBML.reaction(i).notes '<p>AUTHORS: ' model.rxnReferences{i} '</p>']; -0489 end -0490 if ~isempty(model.rxnNotes{i}) -0491 modelSBML.reaction(i).notes=[modelSBML.reaction(i).notes '<p>NOTES: ' model.rxnNotes{i} '</p>']; -0492 end -0493 modelSBML.reaction(i).notes=[modelSBML.reaction(i).notes '</body></notes>']; -0494 end -0495 -0496 % Export SBO terms from rxnMiriams -0497 if ~isempty(model.rxnMiriams{i}) -0498 [~,sbo_ind] = ismember('sbo',model.rxnMiriams{i}.name); -0499 if sbo_ind > 0 -0500 modelSBML.reaction(i).sboTerm=str2double(regexprep(model.rxnMiriams{i}.value{sbo_ind},'SBO:','','ignorecase')); -0501 % remove the SBO term from rxnMiriams so the information is not -0502 % duplicated in the "annotation" field later on -0503 model.rxnMiriams{i}.name(sbo_ind) = []; -0504 model.rxnMiriams{i}.value(sbo_ind) = []; -0505 end -0506 end -0507 -0508 %Export annotation information from rxnMiriams -0509 if (~isempty(model.rxnMiriams{i}) && isfield(modelSBML.reaction(i),'annotation')) || ~isempty(model.eccodes{i}) -0510 modelSBML.reaction(i).annotation=['<annotation><rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:vCard="http://www.w3.org/2001/vcard-rdf/3.0#" xmlns:bqbiol="http://biomodels.net/biology-qualifiers/" xmlns:bqmodel="http://biomodels.net/model-qualifiers/"><rdf:Description rdf:about="#meta_R_' model.rxns{i} '">']; -0511 modelSBML.reaction(i).annotation=[modelSBML.reaction(i).annotation '<bqbiol:is><rdf:Bag>']; -0512 if ~isempty(model.eccodes{i}) -0513 eccodes=regexp(model.eccodes{i},';','split'); -0514 for j=1:numel(eccodes) -0515 modelSBML.reaction(i).annotation=[modelSBML.reaction(i).annotation '<rdf:li rdf:resource="https://identifiers.org/ec-code/' regexprep(eccodes{j},'ec-code/|EC','') '"/>']; -0516 end -0517 end -0518 modelSBML.reaction(i).annotation=[modelSBML.reaction(i).annotation getMiriam(model.rxnMiriams{i}) '</rdf:Bag></bqbiol:is></rdf:Description></rdf:RDF></annotation>']; -0519 end -0520 -0521 if isfield(modelSBML.reaction, 'name') -0522 modelSBML.reaction(i).name=model.rxnNames{i}; -0523 end -0524 if isfield(modelSBML.reaction, 'id') -0525 modelSBML.reaction(i).id=['R_' model.rxns{i}]; -0526 end -0527 -0528 %Add the information about reactants and products -0529 involvedMets=addReactantsProducts(model,modelSBML,i); -0530 for j=1:numel(involvedMets.reactant) -0531 if j<numel(involvedMets.reactant) -0532 modelSBML.reaction(i).reactant(j+1)=modelSBML.reaction(i).reactant(j); -0533 end -0534 modelSBML.reaction(i).reactant(j).species=involvedMets.reactant(j).species; -0535 modelSBML.reaction(i).reactant(j).stoichiometry=involvedMets.reactant(j).stoichiometry; -0536 modelSBML.reaction(i).reactant(j).isSetStoichiometry=involvedMets.reactant(j).isSetStoichiometry; -0537 modelSBML.reaction(i).reactant(j).constant=involvedMets.reactant(j).constant; -0538 end -0539 if numel(involvedMets.reactant)==0 -0540 modelSBML.reaction(i).reactant=''; -0541 end -0542 for j=1:numel(involvedMets.product) -0543 if j<numel(involvedMets.product) -0544 modelSBML.reaction(i).product(j+1)=modelSBML.reaction(i).product(j); -0545 end -0546 modelSBML.reaction(i).product(j).species=involvedMets.product(j).species; -0547 modelSBML.reaction(i).product(j).stoichiometry=involvedMets.product(j).stoichiometry; -0548 modelSBML.reaction(i).product(j).isSetStoichiometry=involvedMets.product(j).isSetStoichiometry; -0549 modelSBML.reaction(i).product(j).constant=involvedMets.product(j).constant; -0550 end -0551 if numel(involvedMets.product)==0 -0552 modelSBML.reaction(i).product=''; -0553 end -0554 %Export reversibility information. Reactions are irreversible by -0555 %default -0556 if model.rev(i)==1 -0557 modelSBML.reaction(i).reversible=1; +0442 listUniqueValues=unique(totalValues); +0443 +0444 for i=1:length(listUniqueValues) +0445 listUniqueNames{i,1}=['FB',num2str(i),'N',num2str(abs(round(listUniqueValues(i))))]; % create unique flux bound IDs. +0446 ind=find(ismember(totalValues,listUniqueValues(i))); +0447 totalNames(ind)=listUniqueNames(i,1); +0448 end +0449 +0450 for i=1:length(listUniqueNames) +0451 %Add the default values, as these will be the same in all entries +0452 if i==1 +0453 if isfield(modelSBML.parameter, 'constant') +0454 modelSBML.parameter(i).constant=1; +0455 end +0456 if isfield(modelSBML.parameter, 'isSetValue') +0457 modelSBML.parameter(i).isSetValue=1; +0458 end +0459 end +0460 %Copy the default values to the next index as long as it is not the +0461 %last one +0462 if i<numel(listUniqueNames) +0463 modelSBML.parameter(i+1)=modelSBML.parameter(i); +0464 end +0465 modelSBML.parameter(i).id=listUniqueNames{i}; +0466 modelSBML.parameter(i).value=listUniqueValues(i); +0467 end +0468 +0469 for i=1:numel(model.rxns) +0470 %Add the default values, as these will be the same in all entries +0471 if i==1 +0472 if isfield(modelSBML.reaction, 'sboTerm') +0473 modelSBML.reaction(i).sboTerm=176; +0474 end +0475 if isfield(modelSBML.reaction, 'isSetFast') +0476 modelSBML.reaction(i).isSetFast=1; +0477 end +0478 end +0479 %Copy the default values to the next index as long as it is not the +0480 %last one +0481 if i<numel(model.rxns) +0482 modelSBML.reaction(i+1)=modelSBML.reaction(i); +0483 end +0484 +0485 if isfield(modelSBML.reaction,'metaid') +0486 modelSBML.reaction(i).metaid=['R_' model.rxns{i}]; +0487 end +0488 +0489 %Export notes information +0490 if (~isnan(model.rxnConfidenceScores(i)) || ~isempty(model.rxnReferences{i}) || ~isempty(model.rxnNotes{i})) +0491 modelSBML.reaction(i).notes='<notes><body xmlns="http://www.w3.org/1999/xhtml">'; +0492 if ~isnan(model.rxnConfidenceScores(i)) +0493 modelSBML.reaction(i).notes=[modelSBML.reaction(i).notes '<p>Confidence Level: ' num2str(model.rxnConfidenceScores(i)) '</p>']; +0494 end +0495 if ~isempty(model.rxnReferences{i}) +0496 modelSBML.reaction(i).notes=[modelSBML.reaction(i).notes '<p>AUTHORS: ' model.rxnReferences{i} '</p>']; +0497 end +0498 if ~isempty(model.rxnNotes{i}) +0499 modelSBML.reaction(i).notes=[modelSBML.reaction(i).notes '<p>NOTES: ' model.rxnNotes{i} '</p>']; +0500 end +0501 modelSBML.reaction(i).notes=[modelSBML.reaction(i).notes '</body></notes>']; +0502 end +0503 +0504 % Export SBO terms from rxnMiriams +0505 if ~isempty(model.rxnMiriams{i}) +0506 [~,sbo_ind] = ismember('sbo',model.rxnMiriams{i}.name); +0507 if sbo_ind > 0 +0508 modelSBML.reaction(i).sboTerm=str2double(regexprep(model.rxnMiriams{i}.value{sbo_ind},'SBO:','','ignorecase')); +0509 % remove the SBO term from rxnMiriams so the information is not +0510 % duplicated in the "annotation" field later on +0511 model.rxnMiriams{i}.name(sbo_ind) = []; +0512 model.rxnMiriams{i}.value(sbo_ind) = []; +0513 end +0514 end +0515 +0516 %Export annotation information from rxnMiriams +0517 if (~isempty(model.rxnMiriams{i}) && isfield(modelSBML.reaction(i),'annotation')) || ~isempty(model.eccodes{i}) +0518 modelSBML.reaction(i).annotation=['<annotation><rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:vCard="http://www.w3.org/2001/vcard-rdf/3.0#" xmlns:bqbiol="http://biomodels.net/biology-qualifiers/" xmlns:bqmodel="http://biomodels.net/model-qualifiers/"><rdf:Description rdf:about="#meta_R_' model.rxns{i} '">']; +0519 modelSBML.reaction(i).annotation=[modelSBML.reaction(i).annotation '<bqbiol:is><rdf:Bag>']; +0520 if ~isempty(model.eccodes{i}) +0521 eccodes=regexp(model.eccodes{i},';','split'); +0522 for j=1:numel(eccodes) +0523 modelSBML.reaction(i).annotation=[modelSBML.reaction(i).annotation '<rdf:li rdf:resource="https://identifiers.org/ec-code/' regexprep(eccodes{j},'ec-code/|EC','') '"/>']; +0524 end +0525 end +0526 modelSBML.reaction(i).annotation=[modelSBML.reaction(i).annotation getMiriam(model.rxnMiriams{i}) '</rdf:Bag></bqbiol:is></rdf:Description></rdf:RDF></annotation>']; +0527 end +0528 +0529 if isfield(modelSBML.reaction, 'name') +0530 modelSBML.reaction(i).name=model.rxnNames{i}; +0531 end +0532 if isfield(modelSBML.reaction, 'id') +0533 modelSBML.reaction(i).id=['R_' model.rxns{i}]; +0534 end +0535 +0536 %Add the information about reactants and products +0537 involvedMets=addReactantsProducts(model,modelSBML,i); +0538 for j=1:numel(involvedMets.reactant) +0539 if j<numel(involvedMets.reactant) +0540 modelSBML.reaction(i).reactant(j+1)=modelSBML.reaction(i).reactant(j); +0541 end +0542 modelSBML.reaction(i).reactant(j).species=involvedMets.reactant(j).species; +0543 modelSBML.reaction(i).reactant(j).stoichiometry=involvedMets.reactant(j).stoichiometry; +0544 modelSBML.reaction(i).reactant(j).isSetStoichiometry=involvedMets.reactant(j).isSetStoichiometry; +0545 modelSBML.reaction(i).reactant(j).constant=involvedMets.reactant(j).constant; +0546 end +0547 if numel(involvedMets.reactant)==0 +0548 modelSBML.reaction(i).reactant=''; +0549 end +0550 for j=1:numel(involvedMets.product) +0551 if j<numel(involvedMets.product) +0552 modelSBML.reaction(i).product(j+1)=modelSBML.reaction(i).product(j); +0553 end +0554 modelSBML.reaction(i).product(j).species=involvedMets.product(j).species; +0555 modelSBML.reaction(i).product(j).stoichiometry=involvedMets.product(j).stoichiometry; +0556 modelSBML.reaction(i).product(j).isSetStoichiometry=involvedMets.product(j).isSetStoichiometry; +0557 modelSBML.reaction(i).product(j).constant=involvedMets.product(j).constant; 0558 end -0559 if isfield(model, 'rxnComps') -0560 modelSBML.reaction(i).compartment=model.comps{model.rxnComps(i)}; +0559 if numel(involvedMets.product)==0 +0560 modelSBML.reaction(i).product=''; 0561 end -0562 if isfield(model, 'grRules') -0563 modelSBML.reaction(i).fbc_geneProductAssociation.fbc_association.fbc_association=model.grRules{i}; -0564 end -0565 modelSBML.reaction(i).fbc_lowerFluxBound=totalNames{i}; -0566 modelSBML.reaction(i).fbc_upperFluxBound=totalNames{length(model.lb)+i}; -0567 end -0568 -0569 %Prepare subSystems Code taken from COBRA functions getModelSubSystems, -0570 %writeSBML, findRxnsFromSubSystem under GNU General Public License v3.0, -0571 %license file in readme/GPL.MD. Code modified for RAVEN -0572 if modelHasSubsystems -0573 modelSBML.groups_group.groups_kind = 'partonomy'; -0574 modelSBML.groups_group.sboTerm = 633; -0575 tmpStruct=modelSBML.groups_group; +0562 %Export reversibility information. Reactions are irreversible by +0563 %default +0564 if model.rev(i)==1 +0565 modelSBML.reaction(i).reversible=1; +0566 end +0567 if isfield(model, 'rxnComps') +0568 modelSBML.reaction(i).compartment=model.comps{model.rxnComps(i)}; +0569 end +0570 if isfield(model, 'grRules') +0571 modelSBML.reaction(i).fbc_geneProductAssociation.fbc_association.fbc_association=model.grRules{i}; +0572 end +0573 modelSBML.reaction(i).fbc_lowerFluxBound=totalNames{i}; +0574 modelSBML.reaction(i).fbc_upperFluxBound=totalNames{length(model.lb)+i}; +0575 end 0576 -0577 rxns=strcat('R_',model.rxns); -0578 if ~any(cellfun(@iscell,model.subSystems)) -0579 if ~any(~cellfun(@isempty,model.subSystems)) -0580 subSystems = {}; -0581 else -0582 subSystems = setdiff(model.subSystems,''); -0583 end -0584 else -0585 orderedSubs = cellfun(@(x) columnVector(x),model.subSystems,'UniformOUtput',false); -0586 subSystems = setdiff(vertcat(orderedSubs{:}),''); -0587 end -0588 if isempty(subSystems) -0589 subSystems = {}; -0590 end -0591 if ~isempty(subSystems) -0592 %Build the groups for the group package -0593 groupIDs = strcat('group',cellfun(@num2str, num2cell(1:length(subSystems)),'UniformOutput',false)); -0594 for i = 1:length(subSystems) -0595 cgroup = tmpStruct; -0596 if ~any(cellfun(@iscell,model.subSystems)) -0597 present = ismember(model.subSystems,subSystems{i}); -0598 else -0599 present = cellfun(@(x) any(ismember(x,subSystems{i})),model.subSystems); -0600 end -0601 groupMembers = rxns(present); -0602 for j = 1:numel(groupMembers) -0603 cMember = tmpStruct.groups_member; -0604 cMember.groups_idRef = groupMembers{j}; -0605 if j == 1 -0606 cgroup.groups_member = cMember; -0607 else -0608 cgroup.groups_member(j) = cMember; -0609 end -0610 end -0611 cgroup.groups_id = groupIDs{i}; -0612 cgroup.groups_name = subSystems{i}; -0613 if i == 1 -0614 modelSBML.groups_group = cgroup; -0615 else -0616 modelSBML.groups_group(i) = cgroup; -0617 end -0618 end -0619 end -0620 end -0621 -0622 %Prepare fbc_objective subfield -0623 -0624 modelSBML.fbc_objective.fbc_type='maximize'; -0625 modelSBML.fbc_objective.fbc_id='obj'; -0626 -0627 ind=find(model.c); -0628 -0629 if isempty(ind) -0630 modelSBML.fbc_objective.fbc_fluxObjective.fbc_coefficient=0; -0631 EM='The objective function is not defined. The model will be exported as it is. Notice that having undefined objective function may produce warnings related to "fbc:coefficient" and "fbc:reaction" in SBML Validator'; -0632 dispEM(EM,false); -0633 else -0634 for i=1:length(ind) -0635 %Copy the default values to the next index as long as it is not the -0636 %last one -0637 if i<numel(ind) -0638 modelSBML.reaction(i+1)=modelSBML.reaction(i); -0639 end -0640 values=model.c(model.c~=0); -0641 modelSBML.fbc_objective(i).fbc_fluxObjective.fbc_reaction=modelSBML.reaction(ind(i)).id; -0642 modelSBML.fbc_objective(i).fbc_fluxObjective.fbc_coefficient=values(i); -0643 modelSBML.fbc_objective(i).fbc_fluxObjective.isSetfbc_coefficient=1; -0644 end -0645 end -0646 -0647 modelSBML.fbc_activeObjective=modelSBML.fbc_objective.fbc_id; -0648 -0649 fbcStr=['http://www.sbml.org/sbml/level', num2str(sbmlLevel), '/version', num2str(sbmlVersion), '/fbc/version',num2str(sbmlPackageVersions(1))]; -0650 if modelHasSubsystems -0651 groupStr=['http://www.sbml.org/sbml/level', num2str(sbmlLevel), '/version', num2str(sbmlVersion), '/groups/version',num2str(sbmlPackageVersions(2))]; -0652 modelSBML.namespaces=struct('prefix',{'','fbc','groups'},... -0653 'uri',{['http://www.sbml.org/sbml/level', num2str(sbmlLevel), '/version', num2str(sbmlVersion), '/core'],... -0654 fbcStr,groupStr}); -0655 else -0656 modelSBML.namespaces=struct('prefix',{'','fbc'},... -0657 'uri',{['http://www.sbml.org/sbml/level', num2str(sbmlLevel), '/version', num2str(sbmlVersion), '/core'],... -0658 fbcStr}); -0659 end -0660 -0661 if sbmlPackageVersions(1) == 2 -0662 modelSBML.fbc_strict=1; -0663 end -0664 -0665 modelSBML.rule=[]; -0666 modelSBML.constraint=[]; -0667 -0668 OutputSBML(modelSBML,fileName,1,0,[1,0]); -0669 end -0670 -0671 -0672 function modelSBML=getSBMLStructure(sbmlLevel,sbmlVersion,sbmlPackages,sbmlPackageVersions) -0673 %Returns the blank SBML model structure by using appropriate libSBML -0674 %functions. This creates structure by considering three levels +0577 %Prepare subSystems Code taken from COBRA functions getModelSubSystems, +0578 %writeSBML, findRxnsFromSubSystem under GNU General Public License v3.0, +0579 %license file in readme/GPL.MD. Code modified for RAVEN +0580 if modelHasSubsystems +0581 modelSBML.groups_group.groups_kind = 'partonomy'; +0582 modelSBML.groups_group.sboTerm = 633; +0583 tmpStruct=modelSBML.groups_group; +0584 +0585 rxns=strcat('R_',model.rxns); +0586 if ~any(cellfun(@iscell,model.subSystems)) +0587 if ~any(~cellfun(@isempty,model.subSystems)) +0588 subSystems = {}; +0589 else +0590 subSystems = setdiff(model.subSystems,''); +0591 end +0592 else +0593 orderedSubs = cellfun(@(x) columnVector(x),model.subSystems,'UniformOUtput',false); +0594 subSystems = setdiff(vertcat(orderedSubs{:}),''); +0595 end +0596 if isempty(subSystems) +0597 subSystems = {}; +0598 end +0599 if ~isempty(subSystems) +0600 %Build the groups for the group package +0601 groupIDs = strcat('group',cellfun(@num2str, num2cell(1:length(subSystems)),'UniformOutput',false)); +0602 for i = 1:length(subSystems) +0603 cgroup = tmpStruct; +0604 if ~any(cellfun(@iscell,model.subSystems)) +0605 present = ismember(model.subSystems,subSystems{i}); +0606 else +0607 present = cellfun(@(x) any(ismember(x,subSystems{i})),model.subSystems); +0608 end +0609 groupMembers = rxns(present); +0610 for j = 1:numel(groupMembers) +0611 cMember = tmpStruct.groups_member; +0612 cMember.groups_idRef = groupMembers{j}; +0613 if j == 1 +0614 cgroup.groups_member = cMember; +0615 else +0616 cgroup.groups_member(j) = cMember; +0617 end +0618 end +0619 cgroup.groups_id = groupIDs{i}; +0620 cgroup.groups_name = subSystems{i}; +0621 if i == 1 +0622 modelSBML.groups_group = cgroup; +0623 else +0624 modelSBML.groups_group(i) = cgroup; +0625 end +0626 end +0627 end +0628 end +0629 +0630 %Prepare fbc_objective subfield +0631 +0632 modelSBML.fbc_objective.fbc_type='maximize'; +0633 modelSBML.fbc_objective.fbc_id='obj'; +0634 +0635 ind=find(model.c); +0636 +0637 if isempty(ind) +0638 modelSBML.fbc_objective.fbc_fluxObjective.fbc_coefficient=0; +0639 EM='The objective function is not defined. The model will be exported as it is. Notice that having undefined objective function may produce warnings related to "fbc:coefficient" and "fbc:reaction" in SBML Validator'; +0640 dispEM(EM,false); +0641 else +0642 for i=1:length(ind) +0643 %Copy the default values to the next index as long as it is not the +0644 %last one +0645 if i<numel(ind) +0646 modelSBML.reaction(i+1)=modelSBML.reaction(i); +0647 end +0648 values=model.c(model.c~=0); +0649 modelSBML.fbc_objective(i).fbc_fluxObjective.fbc_reaction=modelSBML.reaction(ind(i)).id; +0650 modelSBML.fbc_objective(i).fbc_fluxObjective.fbc_coefficient=values(i); +0651 modelSBML.fbc_objective(i).fbc_fluxObjective.isSetfbc_coefficient=1; +0652 end +0653 end +0654 +0655 modelSBML.fbc_activeObjective=modelSBML.fbc_objective.fbc_id; +0656 +0657 fbcStr=['http://www.sbml.org/sbml/level', num2str(sbmlLevel), '/version', num2str(sbmlVersion), '/fbc/version',num2str(sbmlPackageVersions(1))]; +0658 if modelHasSubsystems +0659 groupStr=['http://www.sbml.org/sbml/level', num2str(sbmlLevel), '/version', num2str(sbmlVersion), '/groups/version',num2str(sbmlPackageVersions(2))]; +0660 modelSBML.namespaces=struct('prefix',{'','fbc','groups'},... +0661 'uri',{['http://www.sbml.org/sbml/level', num2str(sbmlLevel), '/version', num2str(sbmlVersion), '/core'],... +0662 fbcStr,groupStr}); +0663 else +0664 modelSBML.namespaces=struct('prefix',{'','fbc'},... +0665 'uri',{['http://www.sbml.org/sbml/level', num2str(sbmlLevel), '/version', num2str(sbmlVersion), '/core'],... +0666 fbcStr}); +0667 end +0668 +0669 if sbmlPackageVersions(1) == 2 +0670 modelSBML.fbc_strict=1; +0671 end +0672 +0673 modelSBML.rule=[]; +0674 modelSBML.constraint=[]; 0675 -0676 sbmlFieldNames=getStructureFieldnames('model',sbmlLevel,sbmlVersion,sbmlPackages,sbmlPackageVersions); -0677 sbmlDefaultValues=getDefaultValues('model',sbmlLevel,sbmlVersion,sbmlPackages,sbmlPackageVersions); +0676 OutputSBML(modelSBML,fileName,1,0,[1,0]); +0677 end 0678 -0679 for i=1:numel(sbmlFieldNames) -0680 modelSBML.(sbmlFieldNames{1,i})=sbmlDefaultValues{1,i}; -0681 sbmlSubfieldNames=getStructureFieldnames(sbmlFieldNames{1,i},sbmlLevel,sbmlVersion,sbmlPackages,sbmlPackageVersions); -0682 sbmlSubfieldValues=getDefaultValues(sbmlFieldNames{1,i},sbmlLevel,sbmlVersion,sbmlPackages,sbmlPackageVersions); -0683 if ~strcmp(sbmlFieldNames{1,i},'event') && ~strcmp(sbmlFieldNames{1,i},'functionDefinition') && ~strcmp(sbmlFieldNames{1,i},'initialAssignment') -0684 for j=1:numel(sbmlSubfieldNames) -0685 modelSBML.(sbmlFieldNames{1,i}).(sbmlSubfieldNames{1,j})=sbmlSubfieldValues{1,j}; -0686 sbmlSubsubfieldNames=getStructureFieldnames(sbmlSubfieldNames{1,j},sbmlLevel,sbmlVersion,sbmlPackages,sbmlPackageVersions); -0687 sbmlSubsubfieldValues=getDefaultValues(sbmlSubfieldNames{1,j},sbmlLevel,sbmlVersion,sbmlPackages,sbmlPackageVersions); -0688 if ~strcmp(sbmlSubfieldNames{1,j},'modifier') && ~strcmp(sbmlSubfieldNames{1,j},'kineticLaw') -0689 for k=1:numel(sbmlSubsubfieldNames) -0690 %'compartment' and 'species' fields are not supposed to -0691 %have their standalone structures if they are subfields -0692 %or subsubfields -0693 if ~strcmp(sbmlSubfieldNames{1,j},'compartment') && ~strcmp(sbmlSubfieldNames{1,j},'species') -0694 modelSBML.(sbmlFieldNames{1,i}).(sbmlSubfieldNames{1,j}).(sbmlSubsubfieldNames{1,k})=sbmlSubsubfieldValues{1,k}; -0695 end -0696 %If it is fbc_association in the third level, we need -0697 %to establish the fourth level, since libSBML requires -0698 %it -0699 if strcmp(sbmlSubsubfieldNames{1,k},'fbc_association') -0700 fbc_associationFieldNames=getStructureFieldnames('fbc_association',sbmlLevel,sbmlVersion,sbmlPackages,sbmlPackageVersions); -0701 fbc_associationFieldValues=getDefaultValues('fbc_association',sbmlLevel,sbmlVersion,sbmlPackages,sbmlPackageVersions); -0702 for l=1:numel(fbc_associationFieldNames) -0703 modelSBML.(sbmlFieldNames{1,i}).(sbmlSubfieldNames{1,j}).(sbmlSubsubfieldNames{1,k}).(fbc_associationFieldNames{1,l})=fbc_associationFieldValues{1,l}; -0704 end -0705 end -0706 end -0707 end -0708 end -0709 end -0710 if ~isstruct(modelSBML.(sbmlFieldNames{1,i})) -0711 modelSBML.(sbmlFieldNames{1,i})=sbmlDefaultValues{1,i}; -0712 end -0713 end -0714 -0715 modelSBML.unitDefinition.id='mmol_per_gDW_per_hr'; -0716 -0717 unitFieldNames=getStructureFieldnames('unit',sbmlLevel,sbmlVersion,sbmlPackages,sbmlPackageVersions); -0718 unitDefaultValues=getDefaultValues('unit',sbmlLevel,sbmlVersion,sbmlPackages,sbmlPackageVersions); -0719 -0720 kinds={'mole','gram','second'}; -0721 exponents=[1 -1 -1]; -0722 scales=[-3 0 0]; -0723 multipliers=[1 1 1*60*60]; +0679 +0680 function modelSBML=getSBMLStructure(sbmlLevel,sbmlVersion,sbmlPackages,sbmlPackageVersions) +0681 %Returns the blank SBML model structure by using appropriate libSBML +0682 %functions. This creates structure by considering three levels +0683 +0684 sbmlFieldNames=getStructureFieldnames('model',sbmlLevel,sbmlVersion,sbmlPackages,sbmlPackageVersions); +0685 sbmlDefaultValues=getDefaultValues('model',sbmlLevel,sbmlVersion,sbmlPackages,sbmlPackageVersions); +0686 +0687 for i=1:numel(sbmlFieldNames) +0688 modelSBML.(sbmlFieldNames{1,i})=sbmlDefaultValues{1,i}; +0689 sbmlSubfieldNames=getStructureFieldnames(sbmlFieldNames{1,i},sbmlLevel,sbmlVersion,sbmlPackages,sbmlPackageVersions); +0690 sbmlSubfieldValues=getDefaultValues(sbmlFieldNames{1,i},sbmlLevel,sbmlVersion,sbmlPackages,sbmlPackageVersions); +0691 if ~strcmp(sbmlFieldNames{1,i},'event') && ~strcmp(sbmlFieldNames{1,i},'functionDefinition') && ~strcmp(sbmlFieldNames{1,i},'initialAssignment') +0692 for j=1:numel(sbmlSubfieldNames) +0693 modelSBML.(sbmlFieldNames{1,i}).(sbmlSubfieldNames{1,j})=sbmlSubfieldValues{1,j}; +0694 sbmlSubsubfieldNames=getStructureFieldnames(sbmlSubfieldNames{1,j},sbmlLevel,sbmlVersion,sbmlPackages,sbmlPackageVersions); +0695 sbmlSubsubfieldValues=getDefaultValues(sbmlSubfieldNames{1,j},sbmlLevel,sbmlVersion,sbmlPackages,sbmlPackageVersions); +0696 if ~strcmp(sbmlSubfieldNames{1,j},'modifier') && ~strcmp(sbmlSubfieldNames{1,j},'kineticLaw') +0697 for k=1:numel(sbmlSubsubfieldNames) +0698 %'compartment' and 'species' fields are not supposed to +0699 %have their standalone structures if they are subfields +0700 %or subsubfields +0701 if ~strcmp(sbmlSubfieldNames{1,j},'compartment') && ~strcmp(sbmlSubfieldNames{1,j},'species') +0702 modelSBML.(sbmlFieldNames{1,i}).(sbmlSubfieldNames{1,j}).(sbmlSubsubfieldNames{1,k})=sbmlSubsubfieldValues{1,k}; +0703 end +0704 %If it is fbc_association in the third level, we need +0705 %to establish the fourth level, since libSBML requires +0706 %it +0707 if strcmp(sbmlSubsubfieldNames{1,k},'fbc_association') +0708 fbc_associationFieldNames=getStructureFieldnames('fbc_association',sbmlLevel,sbmlVersion,sbmlPackages,sbmlPackageVersions); +0709 fbc_associationFieldValues=getDefaultValues('fbc_association',sbmlLevel,sbmlVersion,sbmlPackages,sbmlPackageVersions); +0710 for l=1:numel(fbc_associationFieldNames) +0711 modelSBML.(sbmlFieldNames{1,i}).(sbmlSubfieldNames{1,j}).(sbmlSubsubfieldNames{1,k}).(fbc_associationFieldNames{1,l})=fbc_associationFieldValues{1,l}; +0712 end +0713 end +0714 end +0715 end +0716 end +0717 end +0718 if ~isstruct(modelSBML.(sbmlFieldNames{1,i})) +0719 modelSBML.(sbmlFieldNames{1,i})=sbmlDefaultValues{1,i}; +0720 end +0721 end +0722 +0723 modelSBML.unitDefinition.id='mmol_per_gDW_per_hr'; 0724 -0725 for i=1:numel(unitFieldNames) -0726 modelSBML.unitDefinition.unit(1).(unitFieldNames{1,i})=unitDefaultValues{1,i}; -0727 for j=1:3 -0728 modelSBML.unitDefinition.unit(j).(unitFieldNames{1,i})=unitDefaultValues{1,i}; -0729 if strcmp(unitFieldNames{1,i},'kind') -0730 modelSBML.unitDefinition.unit(j).(unitFieldNames{1,i})=kinds{j}; -0731 elseif strcmp(unitFieldNames{1,i},'exponent') -0732 modelSBML.unitDefinition.unit(j).(unitFieldNames{1,i})=exponents(j); -0733 elseif strcmp(unitFieldNames{1,i},'scale') -0734 modelSBML.unitDefinition.unit(j).(unitFieldNames{1,i})=scales(j); -0735 elseif strcmp(unitFieldNames{1,i},'multiplier') -0736 modelSBML.unitDefinition.unit(j).(unitFieldNames{1,i})=multipliers(j); -0737 end -0738 end -0739 end -0740 end -0741 -0742 function miriamString=getMiriam(miriamStruct) -0743 %Returns a string with list elements for a miriam structure ('<rdf:li -0744 %rdf:resource="https://identifiers.org/go/GO:0005739"/>' for example). This -0745 %is just to speed up things since this is done many times during the -0746 %exporting -0747 -0748 miriamString=''; -0749 if isfield(miriamStruct,'name') -0750 for i=1:numel(miriamStruct.name) -0751 miriamString=[miriamString '<rdf:li rdf:resource="https://identifiers.org/' miriamStruct.name{i} '/' miriamStruct.value{i} '"/>']; -0752 end -0753 end -0754 end +0725 unitFieldNames=getStructureFieldnames('unit',sbmlLevel,sbmlVersion,sbmlPackages,sbmlPackageVersions); +0726 unitDefaultValues=getDefaultValues('unit',sbmlLevel,sbmlVersion,sbmlPackages,sbmlPackageVersions); +0727 +0728 kinds={'mole','gram','second'}; +0729 exponents=[1 -1 -1]; +0730 scales=[-3 0 0]; +0731 multipliers=[1 1 1*60*60]; +0732 +0733 for i=1:numel(unitFieldNames) +0734 modelSBML.unitDefinition.unit(1).(unitFieldNames{1,i})=unitDefaultValues{1,i}; +0735 for j=1:3 +0736 modelSBML.unitDefinition.unit(j).(unitFieldNames{1,i})=unitDefaultValues{1,i}; +0737 if strcmp(unitFieldNames{1,i},'kind') +0738 modelSBML.unitDefinition.unit(j).(unitFieldNames{1,i})=kinds{j}; +0739 elseif strcmp(unitFieldNames{1,i},'exponent') +0740 modelSBML.unitDefinition.unit(j).(unitFieldNames{1,i})=exponents(j); +0741 elseif strcmp(unitFieldNames{1,i},'scale') +0742 modelSBML.unitDefinition.unit(j).(unitFieldNames{1,i})=scales(j); +0743 elseif strcmp(unitFieldNames{1,i},'multiplier') +0744 modelSBML.unitDefinition.unit(j).(unitFieldNames{1,i})=multipliers(j); +0745 end +0746 end +0747 end +0748 end +0749 +0750 function miriamString=getMiriam(miriamStruct) +0751 %Returns a string with list elements for a miriam structure ('<rdf:li +0752 %rdf:resource="https://identifiers.org/go/GO:0005739"/>' for example). This +0753 %is just to speed up things since this is done many times during the +0754 %exporting 0755 -0756 function [tmp_Rxn]=addReactantsProducts(model,sbmlModel,i) -0757 %This function provides reactants and products for particular reaction. The -0758 %function was 'borrowed' from writeSBML in COBRA toolbox, lines 663-679 -0759 -0760 met_idx = find(model.S(:, i)); -0761 tmp_Rxn.product=[]; -0762 tmp_Rxn.reactant=[]; -0763 for j_met=1:size(met_idx,1) -0764 tmp_idx = met_idx(j_met,1); -0765 sbml_tmp_species_ref.species = sbmlModel.species(tmp_idx).id; -0766 met_stoich = model.S(tmp_idx, i); -0767 sbml_tmp_species_ref.stoichiometry = abs(met_stoich); -0768 sbml_tmp_species_ref.isSetStoichiometry=1; -0769 sbml_tmp_species_ref.constant=1; -0770 if (met_stoich > 0) -0771 tmp_Rxn.product = [ tmp_Rxn.product, sbml_tmp_species_ref ]; -0772 else -0773 tmp_Rxn.reactant = [ tmp_Rxn.reactant, sbml_tmp_species_ref]; -0774 end -0775 end -0776 end -0777 -0778 function vecT = columnVector(vec) -0779 % Code below taken from COBRA Toolbox under GNU General Public License v3.0 -0780 % license file in readme/GPL.MD. -0781 % -0782 % Converts a vector to a column vector -0783 % -0784 % USAGE: -0785 % -0786 % vecT = columnVector(vec) -0787 % -0788 % INPUT: -0789 % vec: a vector -0790 % -0791 % OUTPUT: -0792 % vecT: a column vector -0793 -0794 [n, m] = size(vec); -0795 -0796 if n < m -0797 vecT = vec'; -0798 else -0799 vecT = vec; -0800 end -0801 end

    +0756 miriamString=''; +0757 if isfield(miriamStruct,'name') +0758 for i=1:numel(miriamStruct.name) +0759 miriamString=[miriamString '<rdf:li rdf:resource="https://identifiers.org/' miriamStruct.name{i} '/' miriamStruct.value{i} '"/>']; +0760 end +0761 end +0762 end +0763 +0764 function [tmp_Rxn]=addReactantsProducts(model,sbmlModel,i) +0765 %This function provides reactants and products for particular reaction. The +0766 %function was 'borrowed' from writeSBML in COBRA toolbox, lines 663-679 +0767 +0768 met_idx = find(model.S(:, i)); +0769 tmp_Rxn.product=[]; +0770 tmp_Rxn.reactant=[]; +0771 for j_met=1:size(met_idx,1) +0772 tmp_idx = met_idx(j_met,1); +0773 sbml_tmp_species_ref.species = sbmlModel.species(tmp_idx).id; +0774 met_stoich = model.S(tmp_idx, i); +0775 sbml_tmp_species_ref.stoichiometry = abs(met_stoich); +0776 sbml_tmp_species_ref.isSetStoichiometry=1; +0777 sbml_tmp_species_ref.constant=1; +0778 if (met_stoich > 0) +0779 tmp_Rxn.product = [ tmp_Rxn.product, sbml_tmp_species_ref ]; +0780 else +0781 tmp_Rxn.reactant = [ tmp_Rxn.reactant, sbml_tmp_species_ref]; +0782 end +0783 end +0784 end +0785 +0786 function vecT = columnVector(vec) +0787 % Code below taken from COBRA Toolbox under GNU General Public License v3.0 +0788 % license file in readme/GPL.MD. +0789 % +0790 % Converts a vector to a column vector +0791 % +0792 % USAGE: +0793 % +0794 % vecT = columnVector(vec) +0795 % +0796 % INPUT: +0797 % vec: a vector +0798 % +0799 % OUTPUT: +0800 % vecT: a column vector +0801 +0802 [n, m] = size(vec); +0803 +0804 if n < m +0805 vecT = vec'; +0806 else +0807 vecT = vec; +0808 end +0809 end

    Generated by m2html © 2005
    \ No newline at end of file diff --git a/doc/io/exportToExcelFormat.html b/doc/io/exportToExcelFormat.html index df284072..35004a4e 100644 --- a/doc/io/exportToExcelFormat.html +++ b/doc/io/exportToExcelFormat.html @@ -24,7 +24,7 @@

    PURPOSE ^exportToExcelFormat

    SYNOPSIS ^

    -
    function exportToExcelFormat(model,filename)
    +
    function exportToExcelFormat(model,filename,sortIds)

    DESCRIPTION ^

     exportToExcelFormat
    @@ -36,6 +36,9 @@ 

    DESCRIPTION ^DESCRIPTION ^CROSS-REFERENCE INFORMATION ^

    This function calls: +
  • exportToTabDelimited exportToTabDelimited
  • loadWorkbook loadWorkbook
  • sortIdentifiers exportModel
  • writeSheet writeSheet
  • This function is called by: +
  • exportForGit exportForGit
  • SOURCE CODE ^

    -
    0001 function exportToExcelFormat(model,filename)
    +
    0001 function exportToExcelFormat(model,filename,sortIds)
     0002 % exportToExcelFormat
     0003 %   Exports a model structure to the Microsoft Excel model format
     0004 %
    @@ -68,369 +71,378 @@ 

    SOURCE CODE ^% be only a path, in which case the model is exported to a set 0009 % of tab-delimited text files instead. See exportToTabDelimited 0010 % for details regarding that functionality -0011 % -0012 % The resulting Excel file can be used with importExcelModel/SBMLFromExcel -0013 % for modelling or to generate a SBML file. +0011 % sortIds logical whether metabolites, reactions and genes should be +0012 % sorted alphabetically by their identifiers (opt, default +0013 % false) 0014 % -0015 % NOTE: No checks are made regarding the correctness of the model. Use -0016 % checkModelStruct to identify problems in the model structure +0015 % The resulting Excel file can be used with importExcelModel/SBMLFromExcel +0016 % for modelling or to generate a SBML file. 0017 % -0018 % Usage: exportToExcelFormat(model,filename) -0019 -0020 [~, A, B]=fileparts(filename); -0021 -0022 %If a path was used call on exportToTabDelimited instead -0023 if ~any(A) || ~any(B) -0024 exportToTabDelimited(model,filename); -0025 return; -0026 end -0027 -0028 if ~strcmpi(B,'.xlsx') -0029 EM='As of RAVEN version 1.9, only export to xlsx format is supported'; -0030 dispEM(EM); -0031 end -0032 -0033 import java.io.File; -0034 import java.io.FileOutputStream; -0035 import java.io.IOException; +0018 % NOTE: No checks are made regarding the correctness of the model. Use +0019 % checkModelStruct to identify problems in the model structure +0020 % +0021 % Usage: exportToExcelFormat(model,filename,sortIds) +0022 if nargin<3 +0023 sortIds=false; +0024 end +0025 if sortIds==true +0026 model=sortIdentifiers(model); +0027 end +0028 +0029 [~, A, B]=fileparts(filename); +0030 +0031 %If a path was used call on exportToTabDelimited instead +0032 if ~any(A) || ~any(B) +0033 exportToTabDelimited(model,filename); +0034 return; +0035 end 0036 -0037 %Remove the output file if it already exists -0038 if exist(filename,'file') -0039 delete(filename); +0037 if ~strcmpi(B,'.xlsx') +0038 EM='As of RAVEN version 1.9, only export to xlsx format is supported'; +0039 dispEM(EM); 0040 end 0041 -0042 %Load an empty workbook -0043 wb=loadWorkbook(filename,true); -0044 -0045 %Construct equations -0046 model.equations=constructEquations(model,model.rxns,true); -0047 -0048 %Check if it should print genes -0049 if isfield(model,'grRules') -0050 rules=model.grRules; -0051 else -0052 rules=[]; -0053 end -0054 -0055 %Check if the model has default upper/lower bounds. This determines if -0056 %those values should be printed or not -0057 hasDefaultLB=false; -0058 hasDefaultUB=false; -0059 if isfield(model,'annotation') -0060 if isfield(model.annotation,'defaultLB') -0061 hasDefaultLB=true; -0062 end -0063 if isfield(model.annotation,'defaultUB') -0064 hasDefaultUB=true; -0065 end -0066 end -0067 -0068 %Add the RXNS sheet -0069 -0070 %Create the header row -0071 headers={'#';'ID';'NAME';'EQUATION';'EC-NUMBER';'GENE ASSOCIATION';'LOWER BOUND';'UPPER BOUND';'OBJECTIVE';'COMPARTMENT';'MIRIAM';'SUBSYSTEM';'REPLACEMENT ID';'NOTE';'REFERENCE';'CONFIDENCE SCORE'}; -0072 -0073 %Add empty comments -0074 emptyColumn=cell(numel(model.rxns),1); -0075 rxnSheet=emptyColumn; +0042 import java.io.File; +0043 import java.io.FileOutputStream; +0044 import java.io.IOException; +0045 +0046 %Remove the output file if it already exists +0047 if exist(filename,'file') +0048 delete(filename); +0049 end +0050 +0051 %Load an empty workbook +0052 wb=loadWorkbook(filename,true); +0053 +0054 %Construct equations +0055 model.equations=constructEquations(model,model.rxns,true); +0056 +0057 %Check if it should print genes +0058 if isfield(model,'grRules') +0059 rules=model.grRules; +0060 else +0061 rules=[]; +0062 end +0063 +0064 %Check if the model has default upper/lower bounds. This determines if +0065 %those values should be printed or not +0066 hasDefaultLB=false; +0067 hasDefaultUB=false; +0068 if isfield(model,'annotation') +0069 if isfield(model.annotation,'defaultLB') +0070 hasDefaultLB=true; +0071 end +0072 if isfield(model.annotation,'defaultUB') +0073 hasDefaultUB=true; +0074 end +0075 end 0076 -0077 %Add the model fields -0078 rxnSheet=[rxnSheet model.rxns]; -0079 -0080 if isfield(model,'rxnNames') -0081 rxnSheet=[rxnSheet model.rxnNames]; -0082 else -0083 rxnSheet=[rxnSheet emptyColumn]; -0084 end +0077 %Add the RXNS sheet +0078 +0079 %Create the header row +0080 headers={'#';'ID';'NAME';'EQUATION';'EC-NUMBER';'GENE ASSOCIATION';'LOWER BOUND';'UPPER BOUND';'OBJECTIVE';'COMPARTMENT';'MIRIAM';'SUBSYSTEM';'REPLACEMENT ID';'NOTE';'REFERENCE';'CONFIDENCE SCORE'}; +0081 +0082 %Add empty comments +0083 emptyColumn=cell(numel(model.rxns),1); +0084 rxnSheet=emptyColumn; 0085 -0086 rxnSheet=[rxnSheet model.equations]; -0087 -0088 if isfield(model,'eccodes') -0089 rxnSheet=[rxnSheet model.eccodes]; -0090 else -0091 rxnSheet=[rxnSheet emptyColumn]; -0092 end -0093 -0094 if ~isempty(rules) -0095 rxnSheet=[rxnSheet rules]; -0096 else -0097 rxnSheet=[rxnSheet emptyColumn]; -0098 end -0099 -0100 lb=emptyColumn; -0101 ub=emptyColumn; -0102 objective=emptyColumn; -0103 rxnMiriams=emptyColumn; -0104 -0105 for i=1:numel(model.rxns) -0106 if isfield(model,'lb') -0107 if hasDefaultLB==true -0108 if model.rev(i)==1 -0109 %If reversible, print only if different than defaultLB -0110 if model.lb(i) ~= model.annotation.defaultLB -0111 lb{i}=model.lb(i); -0112 end -0113 else -0114 %If irreversible, print only for non-zero values -0115 if model.lb(i)~=0 -0116 lb{i}=model.lb(i); -0117 end -0118 end -0119 else -0120 lb{i}=model.lb(i); -0121 end -0122 end -0123 -0124 if isfield(model,'ub') -0125 if hasDefaultUB==true -0126 if model.ub(i) ~= model.annotation.defaultUB -0127 ub{i}=model.ub(i); -0128 end -0129 else -0130 ub{i}=model.ub(i); -0131 end -0132 end -0133 -0134 if isfield(model,'c') -0135 if model.c(i)~=0 -0136 objective{i}=model.c(i); -0137 end -0138 end -0139 -0140 if isfield(model,'rxnMiriams') -0141 if ~isempty(model.rxnMiriams{i}) -0142 toPrint=[]; -0143 for j=1:numel(model.rxnMiriams{i}.name) -0144 toPrint=[toPrint strtrim(model.rxnMiriams{i}.name{j}) '/' strtrim(model.rxnMiriams{i}.value{j}) ';']; -0145 end -0146 rxnMiriams{i}=toPrint(1:end-1); -0147 end -0148 end -0149 end -0150 -0151 rxnSheet=[rxnSheet lb]; -0152 rxnSheet=[rxnSheet ub]; -0153 rxnSheet=[rxnSheet objective]; -0154 -0155 if isfield(model,'rxnComps') -0156 rxnSheet=[rxnSheet model.comps(model.rxnComps)]; -0157 else -0158 rxnSheet=[rxnSheet emptyColumn]; -0159 end -0160 -0161 rxnSheet=[rxnSheet rxnMiriams]; -0162 -0163 subsystems=''; -0164 if isfield(model,'subSystems') -0165 for i=1:numel(model.subSystems) -0166 if ~isempty(model.subSystems{i,1}) -0167 subsystems{i,1}=strjoin(model.subSystems{i,1},';'); -0168 else -0169 subsystems{i,1}=''; -0170 end -0171 end -0172 rxnSheet=[rxnSheet subsystems]; -0173 else -0174 rxnSheet=[rxnSheet emptyColumn]; -0175 end -0176 -0177 %For REPLACEMENT ID which isn't in the model -0178 rxnSheet=[rxnSheet emptyColumn]; -0179 -0180 if isfield(model,'rxnNotes') -0181 rxnSheet=[rxnSheet model.rxnNotes]; +0086 %Add the model fields +0087 rxnSheet=[rxnSheet model.rxns]; +0088 +0089 if isfield(model,'rxnNames') +0090 rxnSheet=[rxnSheet model.rxnNames]; +0091 else +0092 rxnSheet=[rxnSheet emptyColumn]; +0093 end +0094 +0095 rxnSheet=[rxnSheet model.equations]; +0096 +0097 if isfield(model,'eccodes') +0098 rxnSheet=[rxnSheet model.eccodes]; +0099 else +0100 rxnSheet=[rxnSheet emptyColumn]; +0101 end +0102 +0103 if ~isempty(rules) +0104 rxnSheet=[rxnSheet rules]; +0105 else +0106 rxnSheet=[rxnSheet emptyColumn]; +0107 end +0108 +0109 lb=emptyColumn; +0110 ub=emptyColumn; +0111 objective=emptyColumn; +0112 rxnMiriams=emptyColumn; +0113 +0114 for i=1:numel(model.rxns) +0115 if isfield(model,'lb') +0116 if hasDefaultLB==true +0117 if model.rev(i)==1 +0118 %If reversible, print only if different than defaultLB +0119 if model.lb(i) ~= model.annotation.defaultLB +0120 lb{i}=model.lb(i); +0121 end +0122 else +0123 %If irreversible, print only for non-zero values +0124 if model.lb(i)~=0 +0125 lb{i}=model.lb(i); +0126 end +0127 end +0128 else +0129 lb{i}=model.lb(i); +0130 end +0131 end +0132 +0133 if isfield(model,'ub') +0134 if hasDefaultUB==true +0135 if model.ub(i) ~= model.annotation.defaultUB +0136 ub{i}=model.ub(i); +0137 end +0138 else +0139 ub{i}=model.ub(i); +0140 end +0141 end +0142 +0143 if isfield(model,'c') +0144 if model.c(i)~=0 +0145 objective{i}=model.c(i); +0146 end +0147 end +0148 +0149 if isfield(model,'rxnMiriams') +0150 if ~isempty(model.rxnMiriams{i}) +0151 toPrint=[]; +0152 for j=1:numel(model.rxnMiriams{i}.name) +0153 toPrint=[toPrint strtrim(model.rxnMiriams{i}.name{j}) '/' strtrim(model.rxnMiriams{i}.value{j}) ';']; +0154 end +0155 rxnMiriams{i}=toPrint(1:end-1); +0156 end +0157 end +0158 end +0159 +0160 rxnSheet=[rxnSheet lb]; +0161 rxnSheet=[rxnSheet ub]; +0162 rxnSheet=[rxnSheet objective]; +0163 +0164 if isfield(model,'rxnComps') +0165 rxnSheet=[rxnSheet model.comps(model.rxnComps)]; +0166 else +0167 rxnSheet=[rxnSheet emptyColumn]; +0168 end +0169 +0170 rxnSheet=[rxnSheet rxnMiriams]; +0171 +0172 subsystems=''; +0173 if isfield(model,'subSystems') +0174 for i=1:numel(model.subSystems) +0175 if ~isempty(model.subSystems{i,1}) +0176 subsystems{i,1}=strjoin(model.subSystems{i,1},';'); +0177 else +0178 subsystems{i,1}=''; +0179 end +0180 end +0181 rxnSheet=[rxnSheet subsystems]; 0182 else 0183 rxnSheet=[rxnSheet emptyColumn]; 0184 end 0185 -0186 if isfield(model,'rxnReferences') -0187 rxnSheet=[rxnSheet model.rxnReferences]; -0188 else -0189 rxnSheet=[rxnSheet emptyColumn]; -0190 end -0191 -0192 if isfield(model,'rxnConfidenceScores') -0193 rxnSheet=[rxnSheet num2cell(model.rxnConfidenceScores)]; -0194 else -0195 rxnSheet=[rxnSheet emptyColumn]; -0196 end -0197 -0198 wb=writeSheet(wb,'RXNS',0,headers,[],rxnSheet); -0199 -0200 headers={'#';'ID';'NAME';'UNCONSTRAINED';'MIRIAM';'COMPOSITION';'InChI';'COMPARTMENT';'REPLACEMENT ID';'CHARGE'}; -0201 -0202 metSheet=cell(numel(model.mets),numel(headers)); -0203 -0204 for i=1:numel(model.mets) -0205 metSheet{i,2}=[model.metNames{i} '[' model.comps{model.metComps(i)} ']']; -0206 -0207 if isfield(model,'metNames') -0208 metSheet(i,3)=model.metNames(i); -0209 end -0210 -0211 if isfield(model,'unconstrained') -0212 if model.unconstrained(i)~=0 -0213 metSheet{i,4}=true; -0214 end -0215 end -0216 -0217 if isfield(model,'metMiriams') -0218 if ~isempty(model.metMiriams{i}) -0219 toPrint=[]; -0220 for j=1:numel(model.metMiriams{i}.name) -0221 toPrint=[toPrint strtrim(model.metMiriams{i}.name{j}) '/' strtrim(model.metMiriams{i}.value{j}) ';']; -0222 end -0223 metSheet{i,5}=toPrint(1:end-1); -0224 end -0225 end -0226 -0227 % Making sure that only these metFormulas are exported, which don't -0228 % have InChI strings -0229 if isfield(model,'metFormulas') -0230 if isfield(model,'inchis') -0231 if isempty(model.inchis{i}) -0232 metSheet(i,6)=model.metFormulas(i); -0233 end -0234 else -0235 metSheet(i,6)=model.metFormulas(i); -0236 end -0237 end -0238 -0239 if isfield(model,'inchis') -0240 metSheet(i,7)=model.inchis(i); -0241 end -0242 -0243 if isfield(model,'metComps') -0244 metSheet(i,8)=model.comps(model.metComps(i)); -0245 end -0246 -0247 metSheet(i,9)=model.mets(i); -0248 -0249 if isfield(model,'metCharges') -0250 metSheet{i,10}=model.metCharges(i); -0251 end -0252 end -0253 -0254 wb=writeSheet(wb,'METS',1,headers,[],metSheet); -0255 -0256 %Add the COMPS sheet -0257 -0258 %Create the header row -0259 headers={'#';'ABBREVIATION';'NAME';'INSIDE';'MIRIAM'}; -0260 -0261 compSheet=cell(numel(model.comps),numel(headers)); +0186 %For REPLACEMENT ID which isn't in the model +0187 rxnSheet=[rxnSheet emptyColumn]; +0188 +0189 if isfield(model,'rxnNotes') +0190 rxnSheet=[rxnSheet model.rxnNotes]; +0191 else +0192 rxnSheet=[rxnSheet emptyColumn]; +0193 end +0194 +0195 if isfield(model,'rxnReferences') +0196 rxnSheet=[rxnSheet model.rxnReferences]; +0197 else +0198 rxnSheet=[rxnSheet emptyColumn]; +0199 end +0200 +0201 if isfield(model,'rxnConfidenceScores') +0202 rxnSheet=[rxnSheet num2cell(model.rxnConfidenceScores)]; +0203 else +0204 rxnSheet=[rxnSheet emptyColumn]; +0205 end +0206 +0207 wb=writeSheet(wb,'RXNS',0,headers,[],rxnSheet); +0208 +0209 headers={'#';'ID';'NAME';'UNCONSTRAINED';'MIRIAM';'COMPOSITION';'InChI';'COMPARTMENT';'REPLACEMENT ID';'CHARGE'}; +0210 +0211 metSheet=cell(numel(model.mets),numel(headers)); +0212 +0213 for i=1:numel(model.mets) +0214 metSheet{i,2}=[model.metNames{i} '[' model.comps{model.metComps(i)} ']']; +0215 +0216 if isfield(model,'metNames') +0217 metSheet(i,3)=model.metNames(i); +0218 end +0219 +0220 if isfield(model,'unconstrained') +0221 if model.unconstrained(i)~=0 +0222 metSheet{i,4}=true; +0223 end +0224 end +0225 +0226 if isfield(model,'metMiriams') +0227 if ~isempty(model.metMiriams{i}) +0228 toPrint=[]; +0229 for j=1:numel(model.metMiriams{i}.name) +0230 toPrint=[toPrint strtrim(model.metMiriams{i}.name{j}) '/' strtrim(model.metMiriams{i}.value{j}) ';']; +0231 end +0232 metSheet{i,5}=toPrint(1:end-1); +0233 end +0234 end +0235 +0236 % Making sure that only these metFormulas are exported, which don't +0237 % have InChI strings +0238 if isfield(model,'metFormulas') +0239 if isfield(model,'inchis') +0240 if isempty(model.inchis{i}) +0241 metSheet(i,6)=model.metFormulas(i); +0242 end +0243 else +0244 metSheet(i,6)=model.metFormulas(i); +0245 end +0246 end +0247 +0248 if isfield(model,'inchis') +0249 metSheet(i,7)=model.inchis(i); +0250 end +0251 +0252 if isfield(model,'metComps') +0253 metSheet(i,8)=model.comps(model.metComps(i)); +0254 end +0255 +0256 metSheet(i,9)=model.mets(i); +0257 +0258 if isfield(model,'metCharges') +0259 metSheet{i,10}=model.metCharges(i); +0260 end +0261 end 0262 -0263 for i=1:numel(model.comps) -0264 compSheet(i,2)=model.comps(i); -0265 -0266 if isfield(model,'compNames') -0267 compSheet(i,3)=model.compNames(i); -0268 end -0269 -0270 if isfield(model,'compOutside') -0271 compSheet(i,4)=model.compOutside(i); -0272 end -0273 -0274 if isfield(model,'compMiriams') -0275 if ~isempty(model.compMiriams{i}) -0276 toPrint=[]; -0277 for j=1:numel(model.compMiriams{i}.name) -0278 toPrint=[toPrint strtrim(model.compMiriams{i}.name{j}) '/' strtrim(model.compMiriams{i}.value{j}) ';']; -0279 end -0280 compSheet{i,5}=toPrint(1:end-1); -0281 end -0282 end -0283 end -0284 -0285 wb=writeSheet(wb,'COMPS',2,headers,[],compSheet); -0286 -0287 %Add the GENES sheet -0288 if isfield(model,'genes') -0289 %Create the header row -0290 headers={'#';'NAME';'MIRIAM';'SHORT NAME';'COMPARTMENT'}; -0291 -0292 geneSheet=cell(numel(model.genes),numel(headers)); -0293 -0294 for i=1:numel(model.genes) -0295 geneSheet(i,2)=model.genes(i); -0296 -0297 if isfield(model,'geneMiriams') -0298 if ~isempty(model.geneMiriams{i}) -0299 toPrint=[]; -0300 for j=1:numel(model.geneMiriams{i}.name) -0301 toPrint=[toPrint strtrim(model.geneMiriams{i}.name{j}) '/' strtrim(model.geneMiriams{i}.value{j}) ';']; -0302 end -0303 geneSheet{i,3}=toPrint(1:end-1); -0304 end -0305 end -0306 if isfield(model,'geneShortNames') -0307 geneSheet(i,4)=model.geneShortNames(i); -0308 end -0309 if isfield(model,'geneComps') -0310 geneSheet(i,5)=model.comps(model.geneComps(i)); -0311 end -0312 end -0313 -0314 wb=writeSheet(wb,'GENES',3,headers,[],geneSheet); -0315 end -0316 -0317 %Add the MODEL sheet -0318 -0319 %Create the header row -0320 headers={'#';'ID';'NAME';'TAXONOMY';'DEFAULT LOWER';'DEFAULT UPPER';'CONTACT GIVEN NAME';'CONTACT FAMILY NAME';'CONTACT EMAIL';'ORGANIZATION';'NOTES'}; -0321 -0322 modelSheet=cell(1,numel(headers)); -0323 -0324 if ~isfield(model,'annotation') -0325 model.annotation = []; -0326 end +0263 wb=writeSheet(wb,'METS',1,headers,[],metSheet); +0264 +0265 %Add the COMPS sheet +0266 +0267 %Create the header row +0268 headers={'#';'ABBREVIATION';'NAME';'INSIDE';'MIRIAM'}; +0269 +0270 compSheet=cell(numel(model.comps),numel(headers)); +0271 +0272 for i=1:numel(model.comps) +0273 compSheet(i,2)=model.comps(i); +0274 +0275 if isfield(model,'compNames') +0276 compSheet(i,3)=model.compNames(i); +0277 end +0278 +0279 if isfield(model,'compOutside') +0280 compSheet(i,4)=model.compOutside(i); +0281 end +0282 +0283 if isfield(model,'compMiriams') +0284 if ~isempty(model.compMiriams{i}) +0285 toPrint=[]; +0286 for j=1:numel(model.compMiriams{i}.name) +0287 toPrint=[toPrint strtrim(model.compMiriams{i}.name{j}) '/' strtrim(model.compMiriams{i}.value{j}) ';']; +0288 end +0289 compSheet{i,5}=toPrint(1:end-1); +0290 end +0291 end +0292 end +0293 +0294 wb=writeSheet(wb,'COMPS',2,headers,[],compSheet); +0295 +0296 %Add the GENES sheet +0297 if isfield(model,'genes') +0298 %Create the header row +0299 headers={'#';'NAME';'MIRIAM';'SHORT NAME';'COMPARTMENT'}; +0300 +0301 geneSheet=cell(numel(model.genes),numel(headers)); +0302 +0303 for i=1:numel(model.genes) +0304 geneSheet(i,2)=model.genes(i); +0305 +0306 if isfield(model,'geneMiriams') +0307 if ~isempty(model.geneMiriams{i}) +0308 toPrint=[]; +0309 for j=1:numel(model.geneMiriams{i}.name) +0310 toPrint=[toPrint strtrim(model.geneMiriams{i}.name{j}) '/' strtrim(model.geneMiriams{i}.value{j}) ';']; +0311 end +0312 geneSheet{i,3}=toPrint(1:end-1); +0313 end +0314 end +0315 if isfield(model,'geneShortNames') +0316 geneSheet(i,4)=model.geneShortNames(i); +0317 end +0318 if isfield(model,'geneComps') +0319 geneSheet(i,5)=model.comps(model.geneComps(i)); +0320 end +0321 end +0322 +0323 wb=writeSheet(wb,'GENES',3,headers,[],geneSheet); +0324 end +0325 +0326 %Add the MODEL sheet 0327 -0328 if isfield(model,'id') -0329 modelSheet{1,2}=model.id; -0330 else -0331 modelSheet{1,2}='blankID'; -0332 end -0333 if isfield(model,'name') -0334 modelSheet{1,3}=model.name; -0335 else -0336 modelSheet{1,3}='blankName'; -0337 end -0338 if isfield(model.annotation,'taxonomy') -0339 modelSheet{1,4}=model.annotation.taxonomy; -0340 end -0341 if isfield(model.annotation,'defaultLB') -0342 modelSheet{1,5}=model.annotation.defaultLB; -0343 end -0344 if isfield(model.annotation,'defaultUB') -0345 modelSheet{1,6}=model.annotation.defaultUB; +0328 %Create the header row +0329 headers={'#';'ID';'NAME';'TAXONOMY';'DEFAULT LOWER';'DEFAULT UPPER';'CONTACT GIVEN NAME';'CONTACT FAMILY NAME';'CONTACT EMAIL';'ORGANIZATION';'NOTES'}; +0330 +0331 modelSheet=cell(1,numel(headers)); +0332 +0333 if ~isfield(model,'annotation') +0334 model.annotation = []; +0335 end +0336 +0337 if isfield(model,'id') +0338 modelSheet{1,2}=model.id; +0339 else +0340 modelSheet{1,2}='blankID'; +0341 end +0342 if isfield(model,'name') +0343 modelSheet{1,3}=model.name; +0344 else +0345 modelSheet{1,3}='blankName'; 0346 end -0347 if isfield(model.annotation,'givenName') -0348 modelSheet{1,7}=model.annotation.givenName; +0347 if isfield(model.annotation,'taxonomy') +0348 modelSheet{1,4}=model.annotation.taxonomy; 0349 end -0350 if isfield(model.annotation,'familyName') -0351 modelSheet{1,8}=model.annotation.familyName; +0350 if isfield(model.annotation,'defaultLB') +0351 modelSheet{1,5}=model.annotation.defaultLB; 0352 end -0353 if isfield(model.annotation,'email') -0354 modelSheet{1,9}=model.annotation.email; +0353 if isfield(model.annotation,'defaultUB') +0354 modelSheet{1,6}=model.annotation.defaultUB; 0355 end -0356 if isfield(model.annotation,'organization') -0357 modelSheet{1,10}=model.annotation.organization; +0356 if isfield(model.annotation,'givenName') +0357 modelSheet{1,7}=model.annotation.givenName; 0358 end -0359 if isfield(model.annotation,'note') -0360 modelSheet{1,11}=model.annotation.note; +0359 if isfield(model.annotation,'familyName') +0360 modelSheet{1,8}=model.annotation.familyName; 0361 end -0362 -0363 if isfield(model,'genes') -0364 wb=writeSheet(wb,'MODEL',4,headers,[],modelSheet); -0365 else -0366 wb=writeSheet(wb,'MODEL',3,headers,[],modelSheet); +0362 if isfield(model.annotation,'email') +0363 modelSheet{1,9}=model.annotation.email; +0364 end +0365 if isfield(model.annotation,'organization') +0366 modelSheet{1,10}=model.annotation.organization; 0367 end -0368 -0369 %Open the output stream -0370 out = FileOutputStream(filename); -0371 wb.write(out); -0372 out.close(); -0373 end

    +0368 if isfield(model.annotation,'note') +0369 modelSheet{1,11}=model.annotation.note; +0370 end +0371 +0372 if isfield(model,'genes') +0373 wb=writeSheet(wb,'MODEL',4,headers,[],modelSheet); +0374 else +0375 wb=writeSheet(wb,'MODEL',3,headers,[],modelSheet); +0376 end +0377 +0378 %Open the output stream +0379 out = FileOutputStream(filename); +0380 wb.write(out); +0381 out.close(); +0382 end

    Generated by m2html © 2005
    \ No newline at end of file diff --git a/doc/io/exportToTabDelimited.html b/doc/io/exportToTabDelimited.html index b35d6ca0..d570724b 100644 --- a/doc/io/exportToTabDelimited.html +++ b/doc/io/exportToTabDelimited.html @@ -24,7 +24,7 @@

    PURPOSE ^exportToTabDelimited

    SYNOPSIS ^

    -
    function exportToTabDelimited(model,path)
    +
    function exportToTabDelimited(model,path,sortIds)

    DESCRIPTION ^

     exportToTabDelimited
    @@ -34,6 +34,8 @@ 

    DESCRIPTION ^DESCRIPTION ^CROSS-REFERENCE INFORMATION ^

    This function calls: +
  • sortIdentifiers exportModel
  • This function is called by: +
  • exportToExcelFormat exportToExcelFormat
  • SOURCE CODE ^

    -
    0001 function exportToTabDelimited(model,path)
    +
    0001 function exportToTabDelimited(model,path,sortIds)
     0002 % exportToTabDelimited
     0003 %   Exports a model structure to a set of tab-delimited text files
     0004 %
    @@ -65,330 +67,338 @@ 

    SOURCE CODE ^% path the path to export to. The resulting text files will be saved 0007 % under the names excelRxns.txt, excelMets.txt, excelGenes.txt, 0008 % excelModel.txt, and excelComps.txt -0009 % -0010 % NOTE: This functionality was previously a part of exportToExcelFormat. -0011 % The naming of the resulting text files is to preserve backward -0012 % compatibility -0013 % -0014 % NOTE: No checks are made regarding the correctness of the model. Use -0015 % checkModelStruct to identify problems in the model structure -0016 % -0017 % Usage: exportToTabDelimited(model,path) -0018 -0019 if nargin<2 -0020 path='./'; -0021 end -0022 -0023 %If the folder doesn't exist then create it -0024 if ~exist(path,'dir') -0025 mkdir(path); +0009 % sortIds logical whether metabolites, reactions and genes should be +0010 % sorted alphabetically by their identifiers (opt, default false) +0011 % +0012 % NOTE: This functionality was previously a part of exportToExcelFormat. +0013 % The naming of the resulting text files is to preserve backward +0014 % compatibility +0015 % +0016 % NOTE: No checks are made regarding the correctness of the model. Use +0017 % checkModelStruct to identify problems in the model structure +0018 % +0019 % Usage: exportToTabDelimited(model,path,sortIds) +0020 +0021 if nargin<2 +0022 path='./'; +0023 end +0024 if nargin<3 +0025 sortIds=false; 0026 end -0027 -0028 %Remove the files if they already exist -0029 if exist(fullfile(path,'excelRxns.txt'),'file') -0030 delete(fullfile(path,'excelRxns.txt')); -0031 end -0032 if exist(fullfile(path,'excelMets.txt'),'file') -0033 delete(fullfile(path,'excelMets.txt')); +0027 if sortIds==true +0028 model=sortIdentifiers(model); +0029 end +0030 +0031 %If the folder doesn't exist then create it +0032 if ~exist(path,'dir') +0033 mkdir(path); 0034 end -0035 if exist(fullfile(path,'excelGenes.txt'),'file') -0036 delete(fullfile(path,'excelGenes.txt')); -0037 end -0038 if exist(fullfile(path,'excelModel.txt'),'file') -0039 delete(fullfile(path,'excelModel.txt')); -0040 end -0041 if exist(fullfile(path,'excelComps.txt'),'file') -0042 delete(fullfile(path,'excelComps.txt')); -0043 end -0044 -0045 %Construct equations -0046 model.equations=constructEquations(model,model.rxns,true); -0047 -0048 %Open for printing the rxn sheet -0049 rxnFile=fopen(fullfile(path,'excelRxns.txt'),'wt'); -0050 -0051 %Print header -0052 fprintf(rxnFile,'#\tID\tNAME\tEQUATION\tEC-NUMBER\tGENE ASSOCIATION\tLOWER BOUND\tUPPER BOUND\tOBJECTIVE\tCOMPARTMENT\tMIRIAM\tSUBSYSTEM\tREPLACEMENT ID\tNOTE\tREFERENCE\tCONFIDENCE SCORE\n'); -0053 -0054 %Loop through the reactions -0055 for i=1:numel(model.rxns) -0056 fprintf(rxnFile,['\t' model.rxns{i} '\t' model.rxnNames{i} '\t' model.equations{i} '\t']); -0057 -0058 if isfield(model,'eccodes') -0059 fprintf(rxnFile,[model.eccodes{i} '\t']); -0060 else -0061 fprintf(rxnFile,'\t'); -0062 end -0063 -0064 if isfield(model,'grRules') -0065 fprintf(rxnFile,[model.grRules{i} '\t']); -0066 else -0067 fprintf(rxnFile,'\t'); -0068 end -0069 -0070 %Print bounds and objectives -0071 fprintf(rxnFile,[num2str(model.lb(i)) '\t' num2str(model.ub(i)) '\t']); -0072 -0073 if model.c(i)~=0 -0074 fprintf(rxnFile,[num2str(model.c(i)) '\t' ]); -0075 else -0076 fprintf(rxnFile,'\t'); -0077 end -0078 -0079 if isfield(model,'rxnComps') -0080 fprintf(rxnFile,[model.comps{model.rxnComps(i)} '\t']); -0081 else -0082 fprintf(rxnFile,'\t'); -0083 end -0084 -0085 if isfield(model,'rxnMiriams') -0086 if ~isempty(model.rxnMiriams{i}) -0087 toPrint=[]; -0088 for j=1:numel(model.rxnMiriams{i}.name) -0089 toPrint=[toPrint strtrim(model.rxnMiriams{i}.name{j}) '/' strtrim(model.rxnMiriams{i}.value{j}) ';']; -0090 end -0091 fprintf(rxnFile,[toPrint(1:end-1) '\t']); -0092 else -0093 fprintf(rxnFile,'\t'); -0094 end -0095 else -0096 fprintf(rxnFile,'\t'); -0097 end -0098 -0099 if isfield(model,'subSystems') -0100 if ~isempty(model.subSystems{i}) -0101 fprintf(rxnFile,[strjoin(model.subSystems{i,1},';') '\t']); -0102 else -0103 fprintf(rxnFile,'\t'); -0104 end +0035 +0036 %Remove the files if they already exist +0037 if exist(fullfile(path,'excelRxns.txt'),'file') +0038 delete(fullfile(path,'excelRxns.txt')); +0039 end +0040 if exist(fullfile(path,'excelMets.txt'),'file') +0041 delete(fullfile(path,'excelMets.txt')); +0042 end +0043 if exist(fullfile(path,'excelGenes.txt'),'file') +0044 delete(fullfile(path,'excelGenes.txt')); +0045 end +0046 if exist(fullfile(path,'excelModel.txt'),'file') +0047 delete(fullfile(path,'excelModel.txt')); +0048 end +0049 if exist(fullfile(path,'excelComps.txt'),'file') +0050 delete(fullfile(path,'excelComps.txt')); +0051 end +0052 +0053 %Construct equations +0054 model.equations=constructEquations(model,model.rxns,true); +0055 +0056 %Open for printing the rxn sheet +0057 rxnFile=fopen(fullfile(path,'excelRxns.txt'),'wt'); +0058 +0059 %Print header +0060 fprintf(rxnFile,'#\tID\tNAME\tEQUATION\tEC-NUMBER\tGENE ASSOCIATION\tLOWER BOUND\tUPPER BOUND\tOBJECTIVE\tCOMPARTMENT\tMIRIAM\tSUBSYSTEM\tREPLACEMENT ID\tNOTE\tREFERENCE\tCONFIDENCE SCORE\n'); +0061 +0062 %Loop through the reactions +0063 for i=1:numel(model.rxns) +0064 fprintf(rxnFile,['\t' model.rxns{i} '\t' model.rxnNames{i} '\t' model.equations{i} '\t']); +0065 +0066 if isfield(model,'eccodes') +0067 fprintf(rxnFile,[model.eccodes{i} '\t']); +0068 else +0069 fprintf(rxnFile,'\t'); +0070 end +0071 +0072 if isfield(model,'grRules') +0073 fprintf(rxnFile,[model.grRules{i} '\t']); +0074 else +0075 fprintf(rxnFile,'\t'); +0076 end +0077 +0078 %Print bounds and objectives +0079 fprintf(rxnFile,[num2str(model.lb(i)) '\t' num2str(model.ub(i)) '\t']); +0080 +0081 if model.c(i)~=0 +0082 fprintf(rxnFile,[num2str(model.c(i)) '\t' ]); +0083 else +0084 fprintf(rxnFile,'\t'); +0085 end +0086 +0087 if isfield(model,'rxnComps') +0088 fprintf(rxnFile,[model.comps{model.rxnComps(i)} '\t']); +0089 else +0090 fprintf(rxnFile,'\t'); +0091 end +0092 +0093 if isfield(model,'rxnMiriams') +0094 if ~isempty(model.rxnMiriams{i}) +0095 toPrint=[]; +0096 for j=1:numel(model.rxnMiriams{i}.name) +0097 toPrint=[toPrint strtrim(model.rxnMiriams{i}.name{j}) '/' strtrim(model.rxnMiriams{i}.value{j}) ';']; +0098 end +0099 fprintf(rxnFile,[toPrint(1:end-1) '\t']); +0100 else +0101 fprintf(rxnFile,'\t'); +0102 end +0103 else +0104 fprintf(rxnFile,'\t'); 0105 end 0106 -0107 %Print replacement IDs -0108 fprintf(rxnFile,'\t'); -0109 -0110 if isfield(model,'rxnNotes') -0111 fprintf(rxnFile,[model.rxnNotes{i} '\t']); -0112 else -0113 fprintf(rxnFile,'\t'); -0114 end -0115 -0116 if isfield(model,'rxnReferences') -0117 fprintf(rxnFile,[model.rxnReferences{i} '\t']); -0118 else -0119 fprintf(rxnFile,'\t'); -0120 end -0121 -0122 if isfield(model,'rxnConfidenceScores') -0123 fprintf(rxnFile,[num2str(model.rxnConfidenceScores(i)) '\t' ]); -0124 else -0125 fprintf(rxnFile,'\t'); -0126 end -0127 -0128 fprintf(rxnFile,'\n'); -0129 end -0130 -0131 fclose(rxnFile); -0132 -0133 %Open for printing the metabolites sheet -0134 metFile=fopen(fullfile(path,'excelMets.txt'),'wt'); -0135 -0136 %Print header -0137 fprintf(metFile,'#\tID\tNAME\tUNCONSTRAINED\tMIRIAM\tCOMPOSITION\tInChI\tCOMPARTMENT\tREPLACEMENT ID\tMETS FIELD\tCHARGE\n'); +0107 if isfield(model,'subSystems') +0108 if ~isempty(model.subSystems{i}) +0109 fprintf(rxnFile,[strjoin(model.subSystems{i,1},';') '\t']); +0110 else +0111 fprintf(rxnFile,'\t'); +0112 end +0113 end +0114 +0115 %Print replacement IDs +0116 fprintf(rxnFile,'\t'); +0117 +0118 if isfield(model,'rxnNotes') +0119 fprintf(rxnFile,[model.rxnNotes{i} '\t']); +0120 else +0121 fprintf(rxnFile,'\t'); +0122 end +0123 +0124 if isfield(model,'rxnReferences') +0125 fprintf(rxnFile,[model.rxnReferences{i} '\t']); +0126 else +0127 fprintf(rxnFile,'\t'); +0128 end +0129 +0130 if isfield(model,'rxnConfidenceScores') +0131 fprintf(rxnFile,[num2str(model.rxnConfidenceScores(i)) '\t' ]); +0132 else +0133 fprintf(rxnFile,'\t'); +0134 end +0135 +0136 fprintf(rxnFile,'\n'); +0137 end 0138 -0139 %Loop through the metabolites -0140 for i=1:numel(model.mets) -0141 fprintf(metFile,['\t' model.metNames{i} '[' model.comps{model.metComps(i)} ']\t' model.metNames{i} '\t']); -0142 -0143 if isfield(model,'unconstrained') -0144 if model.unconstrained(i)~=0 -0145 fprintf(metFile,'TRUE\t'); -0146 else -0147 fprintf(metFile,'\t'); -0148 end -0149 else -0150 fprintf(metFile,'\t'); -0151 end -0152 -0153 if isfield(model,'metMiriams') -0154 if ~isempty(model.metMiriams{i}) -0155 toPrint=[]; -0156 for j=1:numel(model.metMiriams{i}.name) -0157 toPrint=[toPrint strtrim(model.metMiriams{i}.name{j}) '/' strtrim(model.metMiriams{i}.value{j}) ';']; -0158 end -0159 fprintf(rxnFile,[toPrint(1:end-1) '\t']); -0160 else -0161 fprintf(metFile,'\t'); -0162 end -0163 else -0164 fprintf(metFile,'\t'); -0165 end -0166 -0167 if isfield(model,'metFormulas') -0168 fprintf(metFile,[model.metFormulas{i} '\t']); -0169 else -0170 fprintf(metFile,'\t'); -0171 end -0172 -0173 if isfield(model,'inchis') -0174 fprintf(metFile,[model.inchis{i} '\t']); -0175 else -0176 fprintf(metFile,'\t'); -0177 end -0178 -0179 fprintf(metFile,[model.comps{model.metComps(i)} '\t']); +0139 fclose(rxnFile); +0140 +0141 %Open for printing the metabolites sheet +0142 metFile=fopen(fullfile(path,'excelMets.txt'),'wt'); +0143 +0144 %Print header +0145 fprintf(metFile,'#\tID\tNAME\tUNCONSTRAINED\tMIRIAM\tCOMPOSITION\tInChI\tCOMPARTMENT\tREPLACEMENT ID\tMETS FIELD\tCHARGE\n'); +0146 +0147 %Loop through the metabolites +0148 for i=1:numel(model.mets) +0149 fprintf(metFile,['\t' model.metNames{i} '[' model.comps{model.metComps(i)} ']\t' model.metNames{i} '\t']); +0150 +0151 if isfield(model,'unconstrained') +0152 if model.unconstrained(i)~=0 +0153 fprintf(metFile,'TRUE\t'); +0154 else +0155 fprintf(metFile,'\t'); +0156 end +0157 else +0158 fprintf(metFile,'\t'); +0159 end +0160 +0161 if isfield(model,'metMiriams') +0162 if ~isempty(model.metMiriams{i}) +0163 toPrint=[]; +0164 for j=1:numel(model.metMiriams{i}.name) +0165 toPrint=[toPrint strtrim(model.metMiriams{i}.name{j}) '/' strtrim(model.metMiriams{i}.value{j}) ';']; +0166 end +0167 fprintf(rxnFile,[toPrint(1:end-1) '\t']); +0168 else +0169 fprintf(metFile,'\t'); +0170 end +0171 else +0172 fprintf(metFile,'\t'); +0173 end +0174 +0175 if isfield(model,'metFormulas') +0176 fprintf(metFile,[model.metFormulas{i} '\t']); +0177 else +0178 fprintf(metFile,'\t'); +0179 end 0180 -0181 %There can be no replacement IDs in the structure, but it has to be -0182 %something to give working met IDs. -0183 fprintf(metFile,['m' int2str(i) '\t']); -0184 -0185 %Print the model.mets field. The reason for not putting this as -0186 %replacement ID is that it's not guaranteed to be a valid SBML id. -0187 fprintf(metFile,[model.mets{i} '\t']); +0181 if isfield(model,'inchis') +0182 fprintf(metFile,[model.inchis{i} '\t']); +0183 else +0184 fprintf(metFile,'\t'); +0185 end +0186 +0187 fprintf(metFile,[model.comps{model.metComps(i)} '\t']); 0188 -0189 if isfield(model,'metCharges') -0190 fprintf(metFile,[num2str(model.metCharges(i)) '\t']); -0191 else -0192 fprintf(metFile,'\t'); -0193 end -0194 -0195 fprintf(metFile,'\n'); -0196 end -0197 -0198 fclose(metFile); -0199 -0200 if isfield(model,'genes') -0201 %Open for printing the genes sheet -0202 geneFile=fopen(fullfile(path,'excelGenes.txt'),'wt'); -0203 -0204 %Print header -0205 fprintf(geneFile,'#\tNAME\tMIRIAM\tSHORT NAME\tCOMPARTMENT\n'); -0206 -0207 %Loop through the genes -0208 for i=1:numel(model.genes) -0209 fprintf(geneFile,['\t' model.genes{i} '\t']); -0210 -0211 if isfield(model,'geneMiriams') -0212 if ~isempty(model.geneMiriams{i}) -0213 toPrint=[]; -0214 for j=1:numel(model.geneMiriams{i}.name) -0215 toPrint=[toPrint strtrim(model.geneMiriams{i}.name{j}) '/' strtrim(model.geneMiriams{i}.value{j}) ';']; -0216 end -0217 fprintf(geneFile,[toPrint(1:end-1) '\t']); -0218 else -0219 fprintf(geneFile,'\t'); -0220 end -0221 else -0222 fprintf(geneFile,'\t'); -0223 end -0224 -0225 if isfield(model,'geneShortNames') -0226 fprintf(geneFile,[model.geneShortNames{i} '\t']); -0227 else -0228 fprintf(geneFile,'\t'); -0229 end -0230 -0231 if isfield(model,'geneComps') -0232 fprintf(geneFile,[model.comps{model.geneComps(i)} '\t']); -0233 else -0234 fprintf(geneFile,'\t'); -0235 end -0236 -0237 fprintf(geneFile,'\n'); -0238 end -0239 fclose(geneFile); -0240 end -0241 -0242 if isfield(model,'id') -0243 %Open for printing the model sheet -0244 modelFile=fopen(fullfile(path,'excelModel.txt'),'wt'); -0245 -0246 %Print header -0247 fprintf(geneFile,'#\tID\tNAME\tDEFAULT LOWER\tDEFAULT UPPER\tCONTACT GIVEN NAME\tCONTACT FAMILY NAME\tCONTACT EMAIL\tORGANIZATION\tTAXONOMY\tNOTES\n'); -0248 -0249 %Print model ID and name. It is assumed that the default lower/upper -0250 %bound correspond to min/max of the bounds -0251 toPrint=['\t' model.id '\t' model.name '\t']; -0252 if isfield(model,'annotation') -0253 if isfield(model.annotation,'defaultLB') -0254 toPrint=[toPrint num2str(model.annotation.defaultLB) '\t']; -0255 else -0256 toPrint=[toPrint num2str(min(model.lb)) '\t']; -0257 end -0258 if isfield(model.annotation,'defaultUB') -0259 toPrint=[toPrint num2str(model.annotation.defaultUB) '\t']; -0260 else -0261 toPrint=[toPrint num2str(max(model.ub)) '\t']; -0262 end -0263 if isfield(model.annotation,'givenName') -0264 toPrint=[toPrint model.annotation.givenName '\t']; -0265 else -0266 toPrint=[toPrint '\t']; -0267 end -0268 if isfield(model.annotation,'familyName') -0269 toPrint=[toPrint model.annotation.familyName '\t']; -0270 else -0271 toPrint=[toPrint '\t']; -0272 end -0273 if isfield(model.annotation,'email') -0274 toPrint=[toPrint model.annotation.email '\t']; -0275 else -0276 toPrint=[toPrint '\t']; -0277 end -0278 if isfield(model.annotation,'organization') -0279 toPrint=[toPrint model.annotation.organization '\t']; -0280 else -0281 toPrint=[toPrint '\t']; -0282 end -0283 if isfield(model.annotation,'taxonomy') -0284 toPrint=[toPrint model.annotation.taxonomy '\t']; -0285 else -0286 toPrint=[toPrint '\t']; -0287 end -0288 if isfield(model.annotation,'note') -0289 toPrint=[toPrint model.annotation.note '\t']; -0290 else -0291 toPrint=[toPrint '\t']; -0292 end -0293 else -0294 toPrint=[toPrint num2str(min(model.lb)) '\t' num2str(max(model.ub)) '\t\t\t\t\t\t\n']; -0295 end -0296 fprintf(modelFile,toPrint); -0297 fclose(modelFile); -0298 end -0299 -0300 if isfield(model,'comps') -0301 %Open for printing the model sheet -0302 compsFile=fopen(fullfile(path,'excelComps.txt'),'wt'); -0303 -0304 %Print header -0305 fprintf(compsFile,'#\tABBREVIATION\tNAME\tINSIDE\tMIRIAM\n'); -0306 -0307 for i=1:numel(model.comps) -0308 toPrint=['\t' model.comps{i} '\t' model.compNames{i} '\t']; -0309 if isfield(model,'compOutside') -0310 toPrint=[toPrint model.compOutside{i} '\t']; -0311 else -0312 toPrint=[toPrint '\t']; -0313 end -0314 if isfield(model,'compMiriams') -0315 if ~isempty(model.compMiriams{i}) -0316 for j=1:numel(model.compMiriams{i}.name) -0317 toPrint=[toPrint strtrim(model.compMiriams{i}.name{j}) '/' strtrim(model.compMiriams{i}.value{j}) ';']; -0318 end -0319 toPrint(end)=[]; -0320 toPrint=[toPrint '\t']; -0321 else -0322 toPrint=[toPrint '\t']; -0323 end -0324 else -0325 toPrint=[toPrint '\t']; -0326 end -0327 toPrint=[toPrint '\n']; -0328 fprintf(compsFile,toPrint); -0329 end -0330 fclose(compsFile); -0331 end -0332 end

    +0189 %There can be no replacement IDs in the structure, but it has to be +0190 %something to give working met IDs. +0191 fprintf(metFile,['m' int2str(i) '\t']); +0192 +0193 %Print the model.mets field. The reason for not putting this as +0194 %replacement ID is that it's not guaranteed to be a valid SBML id. +0195 fprintf(metFile,[model.mets{i} '\t']); +0196 +0197 if isfield(model,'metCharges') +0198 fprintf(metFile,[num2str(model.metCharges(i)) '\t']); +0199 else +0200 fprintf(metFile,'\t'); +0201 end +0202 +0203 fprintf(metFile,'\n'); +0204 end +0205 +0206 fclose(metFile); +0207 +0208 if isfield(model,'genes') +0209 %Open for printing the genes sheet +0210 geneFile=fopen(fullfile(path,'excelGenes.txt'),'wt'); +0211 +0212 %Print header +0213 fprintf(geneFile,'#\tNAME\tMIRIAM\tSHORT NAME\tCOMPARTMENT\n'); +0214 +0215 %Loop through the genes +0216 for i=1:numel(model.genes) +0217 fprintf(geneFile,['\t' model.genes{i} '\t']); +0218 +0219 if isfield(model,'geneMiriams') +0220 if ~isempty(model.geneMiriams{i}) +0221 toPrint=[]; +0222 for j=1:numel(model.geneMiriams{i}.name) +0223 toPrint=[toPrint strtrim(model.geneMiriams{i}.name{j}) '/' strtrim(model.geneMiriams{i}.value{j}) ';']; +0224 end +0225 fprintf(geneFile,[toPrint(1:end-1) '\t']); +0226 else +0227 fprintf(geneFile,'\t'); +0228 end +0229 else +0230 fprintf(geneFile,'\t'); +0231 end +0232 +0233 if isfield(model,'geneShortNames') +0234 fprintf(geneFile,[model.geneShortNames{i} '\t']); +0235 else +0236 fprintf(geneFile,'\t'); +0237 end +0238 +0239 if isfield(model,'geneComps') +0240 fprintf(geneFile,[model.comps{model.geneComps(i)} '\t']); +0241 else +0242 fprintf(geneFile,'\t'); +0243 end +0244 +0245 fprintf(geneFile,'\n'); +0246 end +0247 fclose(geneFile); +0248 end +0249 +0250 if isfield(model,'id') +0251 %Open for printing the model sheet +0252 modelFile=fopen(fullfile(path,'excelModel.txt'),'wt'); +0253 +0254 %Print header +0255 fprintf(geneFile,'#\tID\tNAME\tDEFAULT LOWER\tDEFAULT UPPER\tCONTACT GIVEN NAME\tCONTACT FAMILY NAME\tCONTACT EMAIL\tORGANIZATION\tTAXONOMY\tNOTES\n'); +0256 +0257 %Print model ID and name. It is assumed that the default lower/upper +0258 %bound correspond to min/max of the bounds +0259 toPrint=['\t' model.id '\t' model.name '\t']; +0260 if isfield(model,'annotation') +0261 if isfield(model.annotation,'defaultLB') +0262 toPrint=[toPrint num2str(model.annotation.defaultLB) '\t']; +0263 else +0264 toPrint=[toPrint num2str(min(model.lb)) '\t']; +0265 end +0266 if isfield(model.annotation,'defaultUB') +0267 toPrint=[toPrint num2str(model.annotation.defaultUB) '\t']; +0268 else +0269 toPrint=[toPrint num2str(max(model.ub)) '\t']; +0270 end +0271 if isfield(model.annotation,'givenName') +0272 toPrint=[toPrint model.annotation.givenName '\t']; +0273 else +0274 toPrint=[toPrint '\t']; +0275 end +0276 if isfield(model.annotation,'familyName') +0277 toPrint=[toPrint model.annotation.familyName '\t']; +0278 else +0279 toPrint=[toPrint '\t']; +0280 end +0281 if isfield(model.annotation,'email') +0282 toPrint=[toPrint model.annotation.email '\t']; +0283 else +0284 toPrint=[toPrint '\t']; +0285 end +0286 if isfield(model.annotation,'organization') +0287 toPrint=[toPrint model.annotation.organization '\t']; +0288 else +0289 toPrint=[toPrint '\t']; +0290 end +0291 if isfield(model.annotation,'taxonomy') +0292 toPrint=[toPrint model.annotation.taxonomy '\t']; +0293 else +0294 toPrint=[toPrint '\t']; +0295 end +0296 if isfield(model.annotation,'note') +0297 toPrint=[toPrint model.annotation.note '\t']; +0298 else +0299 toPrint=[toPrint '\t']; +0300 end +0301 else +0302 toPrint=[toPrint num2str(min(model.lb)) '\t' num2str(max(model.ub)) '\t\t\t\t\t\t\n']; +0303 end +0304 fprintf(modelFile,toPrint); +0305 fclose(modelFile); +0306 end +0307 +0308 if isfield(model,'comps') +0309 %Open for printing the model sheet +0310 compsFile=fopen(fullfile(path,'excelComps.txt'),'wt'); +0311 +0312 %Print header +0313 fprintf(compsFile,'#\tABBREVIATION\tNAME\tINSIDE\tMIRIAM\n'); +0314 +0315 for i=1:numel(model.comps) +0316 toPrint=['\t' model.comps{i} '\t' model.compNames{i} '\t']; +0317 if isfield(model,'compOutside') +0318 toPrint=[toPrint model.compOutside{i} '\t']; +0319 else +0320 toPrint=[toPrint '\t']; +0321 end +0322 if isfield(model,'compMiriams') +0323 if ~isempty(model.compMiriams{i}) +0324 for j=1:numel(model.compMiriams{i}.name) +0325 toPrint=[toPrint strtrim(model.compMiriams{i}.name{j}) '/' strtrim(model.compMiriams{i}.value{j}) ';']; +0326 end +0327 toPrint(end)=[]; +0328 toPrint=[toPrint '\t']; +0329 else +0330 toPrint=[toPrint '\t']; +0331 end +0332 else +0333 toPrint=[toPrint '\t']; +0334 end +0335 toPrint=[toPrint '\n']; +0336 fprintf(compsFile,toPrint); +0337 end +0338 fclose(compsFile); +0339 end +0340 end

    Generated by m2html © 2005
    \ No newline at end of file diff --git a/doc/io/getMD5Hash.html b/doc/io/getMD5Hash.html new file mode 100644 index 00000000..6dbdb0ba --- /dev/null +++ b/doc/io/getMD5Hash.html @@ -0,0 +1,117 @@ + + + + Description of getMD5Hash + + + + + + + + + +
    Home > io > getMD5Hash.m
    + + + +

    getMD5Hash +

    + +

    PURPOSE ^

    +
    getMD5Hash
    + +

    SYNOPSIS ^

    +
    function md5Hash=getMD5Hash(inputFile,binEnd)
    + +

    DESCRIPTION ^

    +
     getMD5Hash
    +   Calculates MD5 hash for a file
    +
    +   Input:
    +   inputFile       string with the path to file for which MD5 hash should
    +                   be calculated
    +   binEnd          string that shows the operating system running in the
    +                   client's computer. Use ".exe" for Windows, ".mac" for
    +                   macOS or leave it blank for Linux (""). (opt, by
    +                   default the function automatically detects the client's
    +                   operating system)
    +
    +   Output:
    +   md5Hash         string containing an MD5 hash for inputFile
    +   
    +   Usage: md5Hash=getMD5Hash(inputFile,binEnd)
    + + +

    CROSS-REFERENCE INFORMATION ^

    +This function calls: + +This function is called by: + + + + + +

    SOURCE CODE ^

    +
    0001 function md5Hash=getMD5Hash(inputFile,binEnd)
    +0002 % getMD5Hash
    +0003 %   Calculates MD5 hash for a file
    +0004 %
    +0005 %   Input:
    +0006 %   inputFile       string with the path to file for which MD5 hash should
    +0007 %                   be calculated
    +0008 %   binEnd          string that shows the operating system running in the
    +0009 %                   client's computer. Use ".exe" for Windows, ".mac" for
    +0010 %                   macOS or leave it blank for Linux (""). (opt, by
    +0011 %                   default the function automatically detects the client's
    +0012 %                   operating system)
    +0013 %
    +0014 %   Output:
    +0015 %   md5Hash         string containing an MD5 hash for inputFile
    +0016 %
    +0017 %   Usage: md5Hash=getMD5Hash(inputFile,binEnd)
    +0018 
    +0019 if nargin<2
    +0020     if isunix
    +0021         if ismac
    +0022             binEnd='.mac';
    +0023         else
    +0024             binEnd='';
    +0025         end
    +0026     elseif ispc
    +0027         binEnd='.exe';
    +0028     else
    +0029         dispEM('Unknown OS, exiting.')
    +0030         return
    +0031     end
    +0032 end
    +0033 
    +0034 %Check if binEnd is valid
    +0035 if ~strcmp(binEnd,'.mac') && ~strcmp(binEnd,'') && ~strcmp(binEnd,'.exe')
    +0036    dispEM('Unknown OS, exiting.')
    +0037    return
    +0038 end
    +0039 
    +0040 %Check file existence
    +0041 inputFile=checkFileExistence(inputFile);
    +0042 
    +0043 %Get string containing an MD5 hash
    +0044 switch binEnd
    +0045     case '.mac'
    +0046         [~, md5HashMessage]=system(['md5 "' inputFile '"']);
    +0047     case ''
    +0048         [~, md5HashMessage]=system(['md5sum "' inputFile '"']);
    +0049     case '.exe'
    +0050         [~, md5HashMessage]=system(['certutil -hashfile "' inputFile '" MD5"']);
    +0051 end
    +0052 
    +0053 %Extract MD5 hash from a string
    +0054 md5Hash = char(regexp(md5HashMessage,'[a-f0-9]{32}','match'));
    +0055 end
    +
    Generated by m2html © 2005
    + + \ No newline at end of file diff --git a/doc/io/getToolboxVersion.html b/doc/io/getToolboxVersion.html index ef1b173f..64d98f58 100644 --- a/doc/io/getToolboxVersion.html +++ b/doc/io/getToolboxVersion.html @@ -24,7 +24,7 @@

    PURPOSE ^getToolboxVersion

    SYNOPSIS ^

    -
    function version = getToolboxVersion(toolbox,fileID,masterFlag)
    +
    function version = getToolboxVersion(toolbox,fileID,mainBranchFlag)

    DESCRIPTION ^

     getToolboxVersion
    @@ -34,13 +34,13 @@ 

    DESCRIPTION ^CROSS-REFERENCE INFORMATION ^

    @@ -49,7 +49,7 @@

    CROSS-REFERENCE INFORMATION ^
 </ul>
 This function is called by:
 <ul style= -
  • exportForGit exportForGit
  • +
  • exportForGit exportForGit
  • SUBFUNCTIONS ^

    @@ -57,7 +57,7 @@

    SUBFUNCTIONS ^function slashPos = getSlashPos(path)

    SOURCE CODE ^

    -
    0001 function version = getToolboxVersion(toolbox,fileID,masterFlag)
    +
    0001 function version = getToolboxVersion(toolbox,fileID,mainBranchFlag)
     0002 % getToolboxVersion
     0003 %   Returns the version of a given toolbox, or if not available the latest
     0004 %   commit hash (7 characters).
    @@ -65,16 +65,16 @@ 

    SOURCE CODE ^% toolbox string with the toolbox name (e.g. "RAVEN") 0007 % fileID string with the name of a file that is only found in 0008 % the corresponding toolbox (e.g. "ravenCobraWrapper.m"). -0009 % masterFlag logical, if true, function will error if the toolbox is -0010 % not on the master branch (opt, default false). +0009 % mainBranchFlag logical, if true, function will error if the toolbox is +0010 % not on the main branch (opt, default false). 0011 % 0012 % version string containing either the toolbox version or latest 0013 % commit hash (7 characters). 0014 % -0015 % Usage: version = getToolboxVersion(toolbox,fileID,masterFlag) +0015 % Usage: version = getToolboxVersion(toolbox,fileID,mainBranchFlag) 0016 0017 if nargin<3 -0018 masterFlag = false; +0018 mainBranchFlag = false; 0019 end 0020 0021 currentPath = pwd; @@ -97,12 +97,12 @@

    SOURCE CODE ^' toolbox cannot be found']) 0039 version = 'unknown'; 0040 end -0041 %Check if in master: -0042 if masterFlag +0041 %Check if in main: +0042 if mainBranchFlag 0043 currentBranch = git('rev-parse --abbrev-ref HEAD'); -0044 if ~strcmp(currentBranch,'master') +0044 if any([strcmp(currentBranch, "main"), strcmp(currentBranch, "master")]) 0045 cd(currentPath); -0046 error(['ERROR: ' toolbox ' not in master. Check-out the master branch of ' toolbox ' before submitting model for Git.']) +0046 error(['ERROR: ' toolbox ' not in main (or master) branch. Check-out this branch of ' toolbox ' before submitting model for Git.']) 0047 end 0048 end 0049 %Try to find version file of the toolbox: diff --git a/doc/io/index.html b/doc/io/index.html index f17f6cf2..c944a6d6 100644 --- a/doc/io/index.html +++ b/doc/io/index.html @@ -19,7 +19,7 @@

    Index for io

    Matlab files in this directory:

    -
     SBMLFromExcelSBMLFromExcel
     addJavaPathsaddJavaPaths
     checkFileExistencecheckFileExistence
     cleanSheetcleanSheet
     closeModelcloseModel
     exportForGitexportForGit
     exportModelexportModel
     exportModelToSIFexportModelToSIF
     exportToExcelFormatexportToExcelFormat
     exportToTabDelimitedexportToTabDelimited
     getFullPathgetFullPath - Get absolute canonical path of a file or folder
     getToolboxVersiongetToolboxVersion
     importExcelModelimportExcelModel
     importModelimportModel
     loadSheetloadSheet
     loadWorkbookloadWorkbook
     startupAdd the Java Paths at startup in order to be ready for use if the Toolbox is on the path
     writeSheetwriteSheet
     writeYamlwriteYaml
    + 
    SBMLFromExcelSBMLFromExcel  addJavaPathsaddJavaPaths  checkFileExistencecheckFileExistence  cleanSheetcleanSheet  closeModelcloseModel  exportForGitexportForGit  exportModelexportModel  exportModelToSIFexportModelToSIF  exportToExcelFormatexportToExcelFormat  exportToTabDelimitedexportToTabDelimited  getFullPathgetFullPath - Get absolute canonical path of a file or folder  getMD5HashgetMD5Hash  getToolboxVersiongetToolboxVersion  importExcelModelimportExcelModel  importModelimportModel  loadSheetloadSheet  loadWorkbookloadWorkbook  sortIdentifiersexportModel  startupAdd the Java Paths at startup in order to be ready for use if the Toolbox is on the path  writeSheetwriteSheet  writeYamlwriteYaml diff --git a/doc/io/loadWorkbook.html b/doc/io/loadWorkbook.html index 7c06b146..ebca57da 100644 --- a/doc/io/loadWorkbook.html +++ b/doc/io/loadWorkbook.html @@ -46,7 +46,7 @@

    CROSS-REFERENCE INFORMATION ^
 <li><a href=addJavaPaths addJavaPaths
  • getFullPath getFullPath - Get absolute canonical path of a file or folder
  • This function is called by: +
  • exportToExcelFormat exportToExcelFormat
  • importExcelModel importExcelModel
  • diff --git a/doc/io/sortIdentifiers.html b/doc/io/sortIdentifiers.html new file mode 100644 index 00000000..a65291c6 --- /dev/null +++ b/doc/io/sortIdentifiers.html @@ -0,0 +1,83 @@ + + + + Description of sortIdentifiers + + + + + + + + + +
    Home > io > sortIdentifiers.m
    + + + +

    sortIdentifiers +

    + +

    PURPOSE ^

    +
    exportModel
    + +

    SYNOPSIS ^

    +
    function newModel = sortIdentifiers(model)
    + +

    DESCRIPTION ^

    +
     exportModel
    +   Sort reactions, metabolites, genes and compartments alphabetically by
    +   their identifier.
    +
    +   Input:
    +   model       a model structure
    +
    +   Output:
    +   newModel    an updated model structure with alphabetically sorted
    +               identifiers
    +
    +   Usage: newModel=sortIdentifiers(model)
    + + +

    CROSS-REFERENCE INFORMATION ^

    +This function calls: +
      +
    +This function is called by: + + + + + +

    SOURCE CODE ^

    +
    0001 function newModel = sortIdentifiers(model)
    +0002 % exportModel
    +0003 %   Sort reactions, metabolites, genes and compartments alphabetically by
    +0004 %   their identifier.
    +0005 %
    +0006 %   Input:
    +0007 %   model       a model structure
    +0008 %
    +0009 %   Output:
    +0010 %   newModel    an updated model structure with alphabetically sorted
    +0011 %               identifiers
    +0012 %
    +0013 %   Usage: newModel=sortIdentifiers(model)
    +0014 
    +0015 [~,I]=sort(model.rxns);
    +0016 newModel=permuteModel(model,I,'rxns');
    +0017 [~,I]=sort(newModel.mets);
    +0018 newModel=permuteModel(newModel,I,'mets');
    +0019 if isfield(newModel,'genes')
    +0020     [~,I]=sort(newModel.genes);
    +0021     newModel=permuteModel(newModel,I,'genes');
    +0022 end
    +0023 [~,I]=sort(newModel.comps);
    +0024 newModel=permuteModel(newModel,I,'comps');
    +0025 end
    +
    Generated by m2html © 2005
    + + \ No newline at end of file diff --git a/doc/io/writeSheet.html b/doc/io/writeSheet.html index 10b1aa8c..04270560 100644 --- a/doc/io/writeSheet.html +++ b/doc/io/writeSheet.html @@ -48,7 +48,7 @@

    CROSS-REFERENCE INFORMATION ^
 <li><a href=addJavaPaths addJavaPaths This function is called by: +
  • exportToExcelFormat exportToExcelFormat
  • SUBFUNCTIONS ^

    diff --git a/doc/io/writeYaml.html b/doc/io/writeYaml.html index c5a19c29..27868d2f 100644 --- a/doc/io/writeYaml.html +++ b/doc/io/writeYaml.html @@ -45,10 +45,10 @@

    DESCRIPTION ^CROSS-REFERENCE INFORMATION ^

    This function calls:
      -
    +
  • sortIdentifiers exportModel
  • This function is called by: +
  • exportForGit exportForGit
  • SUBFUNCTIONS ^

    @@ -77,298 +77,282 @@

    SOURCE CODE ^if nargin < 4 0020 sortIds = false; 0021 end -0022 -0023 if ~endsWith(name,{'.yml','.yaml'}) -0024 name = strcat(name,'.yml'); -0025 end -0026 -0027 %Check that model is in RAVEN format: -0028 if isfield(model,'rules') -0029 model = ravenCobraWrapper(model); -0030 end -0031 -0032 %Simplify Miriam fields: -0033 if isfield(model,'metMiriams') -0034 [model.newMetMiriams,model.newMetMiriamNames] = extractMiriam(model.metMiriams); -0035 model.newMetMiriams = regexprep(model.newMetMiriams,'^.+/',''); -0036 end -0037 if isfield(model,'rxnMiriams') -0038 [model.newRxnMiriams,model.newRxnMiriamNames] = extractMiriam(model.rxnMiriams); -0039 model.newRxnMiriams = regexprep(model.newRxnMiriams,'^.+/',''); +0022 if ~endsWith(name,{'.yml','.yaml'}) +0023 name = strcat(name,'.yml'); +0024 end +0025 +0026 %Check that model is in RAVEN format: +0027 if isfield(model,'rules') +0028 model = ravenCobraWrapper(model); +0029 end +0030 +0031 %Sort identifiers alphabetically +0032 if sortIds == true +0033 model = sortIdentifiers(model); +0034 end +0035 +0036 %Simplify Miriam fields: +0037 if isfield(model,'metMiriams') +0038 [model.newMetMiriams,model.newMetMiriamNames] = extractMiriam(model.metMiriams); +0039 model.newMetMiriams = regexprep(model.newMetMiriams,'^.+/',''); 0040 end -0041 if isfield(model,'geneMiriams') -0042 [model.newGeneMiriams,model.newGeneMiriamNames] = extractMiriam(model.geneMiriams); -0043 model.newGeneMiriams = regexprep(model.newGeneMiriams,'^.+/',''); +0041 if isfield(model,'rxnMiriams') +0042 [model.newRxnMiriams,model.newRxnMiriamNames] = extractMiriam(model.rxnMiriams); +0043 model.newRxnMiriams = regexprep(model.newRxnMiriams,'^.+/',''); 0044 end -0045 if isfield(model,'compMiriams') -0046 [model.newCompMiriams,model.newCompMiriamNames] = extractMiriam(model.compMiriams); -0047 model.newCompMiriams = regexprep(model.newCompMiriams,'^.+/',''); +0045 if isfield(model,'geneMiriams') +0046 [model.newGeneMiriams,model.newGeneMiriamNames] = extractMiriam(model.geneMiriams); +0047 model.newGeneMiriams = regexprep(model.newGeneMiriams,'^.+/',''); 0048 end -0049 -0050 %Open file: -0051 fid = fopen(name,'wt'); -0052 fprintf(fid,'---\n!!omap\n'); +0049 if isfield(model,'compMiriams') +0050 [model.newCompMiriams,model.newCompMiriamNames] = extractMiriam(model.compMiriams); +0051 model.newCompMiriams = regexprep(model.newCompMiriams,'^.+/',''); +0052 end 0053 -0054 %Insert file header (metadata) -0055 writeMetadata(model,fid); -0056 -0057 %Metabolites: -0058 fprintf(fid,'- metabolites:\n'); -0059 if sortIds==true -0060 [~,pos] = sort(model.mets); -0061 else -0062 pos = 1:numel(model.mets); -0063 end -0064 for i = 1:length(model.mets) -0065 fprintf(fid,' - !!omap\n'); -0066 writeField(model, fid, 'mets', 'txt', pos(i), ' - id', preserveQuotes) -0067 writeField(model, fid, 'metNames', 'txt', pos(i), ' - name', preserveQuotes) -0068 writeField(model, fid, 'metComps', 'txt', pos(i), ' - compartment', preserveQuotes) -0069 writeField(model, fid, 'metFormulas', 'txt', pos(i), ' - formula', preserveQuotes) -0070 writeField(model, fid, 'metCharges', 'num', pos(i), ' - charge', preserveQuotes) -0071 writeField(model, fid, 'inchis', 'txt', pos(i), ' - inchis', preserveQuotes) -0072 writeField(model, fid, 'metMiriams', 'txt', pos(i), ' - annotation', preserveQuotes) -0073 writeField(model, fid, 'metFrom', 'txt', pos(i), ' - metFrom', preserveQuotes) -0074 end -0075 -0076 %Reactions: -0077 fprintf(fid,'- reactions:\n'); -0078 if sortIds==true -0079 [~,pos] = sort(model.rxns); -0080 else -0081 pos = 1:numel(model.rxns); -0082 end -0083 for i = 1:length(model.rxns) -0084 fprintf(fid,' - !!omap\n'); -0085 writeField(model, fid, 'rxns', 'txt', pos(i), ' - id', preserveQuotes) -0086 writeField(model, fid, 'rxnNames', 'txt', pos(i), ' - name', preserveQuotes) -0087 writeField(model, fid, 'S', 'txt', pos(i), ' - metabolites', preserveQuotes) -0088 writeField(model, fid, 'lb', 'num', pos(i), ' - lower_bound', preserveQuotes) -0089 writeField(model, fid, 'ub', 'num', pos(i), ' - upper_bound', preserveQuotes) -0090 writeField(model, fid, 'grRules', 'txt', pos(i), ' - gene_reaction_rule', preserveQuotes) -0091 writeField(model, fid, 'rxnFrom', 'txt', pos(i), ' - rxnFrom', preserveQuotes) -0092 if model.c(i)~=0 -0093 writeField(model, fid, 'c', 'num', pos(i), ' - objective_coefficient', preserveQuotes) -0094 end -0095 writeField(model, fid, 'eccodes', 'txt', pos(i), ' - eccodes', preserveQuotes) -0096 writeField(model, fid, 'rxnReferences', 'txt', pos(i), ' - references', preserveQuotes) -0097 writeField(model, fid, 'subSystems', 'txt', pos(i), ' - subsystem', preserveQuotes) -0098 writeField(model, fid, 'rxnMiriams', 'txt', pos(i), ' - annotation', preserveQuotes) -0099 writeField(model, fid, 'rxnConfidenceScores', 'num', pos(i), ' - confidence_score', preserveQuotes) -0100 end -0101 -0102 %Genes: -0103 fprintf(fid,'- genes:\n'); -0104 if sortIds==true -0105 [~,pos] = sort(model.genes); -0106 else -0107 pos = 1:numel(model.genes); -0108 end -0109 for i = 1:length(model.genes) -0110 fprintf(fid,' - !!omap\n'); -0111 writeField(model, fid, 'genes', 'txt', pos(i), ' - id', preserveQuotes) -0112 writeField(model, fid, 'geneShortNames', 'txt', pos(i), ' - name', preserveQuotes) -0113 writeField(model, fid, 'geneMiriams', 'txt', pos(i), ' - annotation', preserveQuotes) -0114 end -0115 -0116 %Compartments: -0117 fprintf(fid,'- compartments: !!omap\n'); -0118 if sortIds==true -0119 [~,pos] = sort(model.comps); -0120 else -0121 pos = 1:numel(model.comps); -0122 end -0123 for i = 1:length(model.comps) -0124 writeField(model, fid, 'compNames', 'txt', pos(i), ['- ' model.comps{pos(i)}], preserveQuotes) -0125 writeField(model, fid, 'compMiriams', 'txt', pos(i), '- annotation', preserveQuotes) -0126 end -0127 -0128 %Close file: -0129 fclose(fid); -0130 -0131 end -0132 -0133 function writeField(model,fid,fieldName,type,pos,name,preserveQuotes) -0134 %Writes a new line in the yaml file if the field exists and the field is -0135 %not empty at the correspoinding position. It's recursive for some fields -0136 %(metMiriams, rxnMiriams, and S) -0137 -0138 if isfield(model,fieldName) -0139 if strcmp(fieldName,'metComps') -0140 %metComps: write full name -0141 fieldName = 'comps'; -0142 pos = model.metComps(pos); -0143 end -0144 -0145 field = eval(['model.' fieldName]); -0146 -0147 if strcmp(fieldName,'metMiriams') -0148 if ~isempty(model.metMiriams{pos}) -0149 fprintf(fid,[' ' name ': !!omap\n']); -0150 for i=1:size(model.newMetMiriams,2) -0151 %'i' represents the different miriam names, e.g. -0152 %kegg.compound or chebi -0153 if ~isempty(model.newMetMiriams{pos,i}) -0154 %As during the following writeField call the value of -0155 %'i' would be lost, it is temporarily concatenated to -0156 %'name' parameter, which will be edited later -0157 writeField(model, fid, 'newMetMiriams', 'txt', pos, [' - ' model.newMetMiriamNames{i} '_' num2str(i)], preserveQuotes) -0158 end -0159 end -0160 end -0161 -0162 elseif strcmp(fieldName,'rxnMiriams') -0163 if ~isempty(model.rxnMiriams{pos}) -0164 fprintf(fid,[' ' name ': !!omap\n']); -0165 for i=1:size(model.newRxnMiriams,2) -0166 if ~isempty(model.newRxnMiriams{pos,i}) -0167 writeField(model, fid, 'newRxnMiriams', 'txt', pos, [' - ' model.newRxnMiriamNames{i} '_' num2str(i)], preserveQuotes) -0168 end -0169 end -0170 end -0171 -0172 elseif strcmp(fieldName,'geneMiriams') -0173 if ~isempty(model.geneMiriams{pos}) -0174 fprintf(fid,[' ' name ': !!omap\n']); -0175 for i=1:size(model.newGeneMiriams,2) -0176 if ~isempty(model.newGeneMiriams{pos,i}) -0177 writeField(model, fid, 'newGeneMiriams', 'txt', pos, [' - ' model.newGeneMiriamNames{i} '_' num2str(i)], preserveQuotes) -0178 end -0179 end -0180 end -0181 -0182 elseif strcmp(fieldName,'compMiriams') -0183 if ~isempty(model.compMiriams{pos}) -0184 fprintf(fid,[' ' name ': !!omap\n']); -0185 for i=1:size(model.newCompMiriams,2) -0186 if ~isempty(model.newCompMiriams{pos,i}) -0187 writeField(model, fid, 'newCompMiriams', 'txt', pos, [' - ' model.newCompMiriamNames{i} '_' num2str(i)], preserveQuotes) -0188 end -0189 end -0190 end -0191 -0192 elseif strcmp(fieldName,'S') -0193 %S: create header & write each metabolite in a new line -0194 fprintf(fid,[' ' name ': !!omap\n']); -0195 if sum(field(:,pos) ~= 0) > 0 -0196 model.mets = model.mets(field(:,pos) ~= 0); -0197 model.coeffs = field(field(:,pos) ~= 0,pos); -0198 %Sort metabolites: -0199 [model.mets,order] = sort(model.mets); -0200 model.coeffs = model.coeffs(order); -0201 for i = 1:length(model.mets) -0202 writeField(model, fid, 'coeffs', 'num', i, [' - ' model.mets{i}], preserveQuotes) -0203 end -0204 end -0205 -0206 elseif sum(strcmp({'subSystems','newMetMiriams','newRxnMiriams','newGeneMiriams','newCompMiriams','eccodes'},fieldName)) > 0 -0207 %eccodes/rxnNotes: if 1 write in 1 line, if more create header and list -0208 if strcmp(fieldName,'subSystems') -0209 list = field{pos}; %subSystems already comes in a cell array -0210 elseif strcmp(fieldName,'newMetMiriams') -0211 index = str2double(regexprep(name,'^.+_','')); -0212 name = regexprep(name,'_\d+$',''); -0213 list = strsplit(model.newMetMiriams{pos,index},'; '); -0214 elseif strcmp(fieldName,'newRxnMiriams') -0215 index = str2double(regexprep(name,'^.+_','')); -0216 name = regexprep(name,'_\d+$',''); -0217 list = strsplit(model.newRxnMiriams{pos,index},'; '); -0218 elseif strcmp(fieldName,'newGeneMiriams') -0219 index = str2double(regexprep(name,'^.+_','')); -0220 name = regexprep(name,'_\d+$',''); -0221 list = strsplit(model.newGeneMiriams{pos,index},'; '); -0222 elseif strcmp(fieldName,'newCompMiriams') -0223 index = str2double(regexprep(name,'^.+_','')); -0224 name = regexprep(name,'_\d+$',''); -0225 list = strsplit(model.newCompMiriams{pos,index},'; '); -0226 else -0227 list = strrep(field{pos},' ',''); %Exception for eccodes -0228 list = strsplit(list,';'); -0229 end -0230 -0231 if length(list) == 1 && ~strcmp(list{1},'') && ~strcmp(fieldName,'subSystems') -0232 if preserveQuotes -0233 list = strcat('"',list,'"'); -0234 end -0235 fprintf(fid,[' ' name ': ' list{1} '\n']); -0236 elseif length(list) > 1 || strcmp(fieldName,'subSystems') -0237 if preserveQuotes -0238 list = strcat('"',list,'"'); -0239 end -0240 fprintf(fid,[' ' name ':\n']); -0241 for i = 1:length(list) -0242 fprintf(fid,[regexprep(name,'(^\s*).*','$1') ' - ' list{i} '\n']); -0243 end -0244 end -0245 -0246 elseif sum(pos) > 0 -0247 %All other fields: -0248 if strcmp(type,'txt') -0249 value = field{pos}; -0250 if preserveQuotes && ~isempty(value) -0251 value = strcat('"',value,'"'); -0252 end -0253 elseif strcmp(type,'num') -0254 if isnan(field(pos)) -0255 value = []; -0256 else -0257 value = num2str(field(pos),12); -0258 end -0259 end -0260 if ~isempty(value) -0261 fprintf(fid,[' ' name ': ' value '\n']); -0262 end -0263 end -0264 end -0265 -0266 -0267 end -0268 -0269 function writeMetadata(model,fid) -0270 % Writes model metadata to the yaml file. This information will eventually -0271 % be extracted entirely from the model, but for now, many of the entries -0272 % are hard-coded defaults for HumanGEM. -0273 -0274 fprintf(fid, '- metaData:\n'); -0275 fprintf(fid, [' id: "', model.id, '"\n']); -0276 fprintf(fid, [' name: "', model.name, '"\n']); -0277 if isfield(model,'version') -0278 fprintf(fid, [' version: "', model.version, '"\n']); -0279 end -0280 fprintf(fid, [' date: "', datestr(now,29), '"\n']); % 29=YYYY-MM-DD -0281 if isfield(model,'annotation') -0282 if isfield(model.annotation,'defaultLB') -0283 fprintf(fid, [' defaultLB: "', num2str(model.annotation.defaultLB), '"\n']); -0284 end -0285 if isfield(model.annotation,'defaultUB') -0286 fprintf(fid, [' defaultUB: "', num2str(model.annotation.defaultUB), '"\n']); -0287 end -0288 if isfield(model.annotation,'givenName') -0289 fprintf(fid, [' givenName: "', model.annotation.givenName, '"\n']); -0290 end -0291 if isfield(model.annotation,'familyName') -0292 fprintf(fid, [' familyName: "', model.annotation.familyName, '"\n']); -0293 end -0294 if isfield(model.annotation,'authors') -0295 fprintf(fid, [' authors: "', model.annotation.authors, '"\n']); -0296 end -0297 if isfield(model.annotation,'email') -0298 fprintf(fid, [' email: "', model.annotation.email, '"\n']); -0299 end -0300 if isfield(model.annotation,'organization') -0301 fprintf(fid, [' organization: "', model.annotation.organization, '"\n']); -0302 end -0303 if isfield(model.annotation,'taxonomy') -0304 fprintf(fid, [' taxonomy: "', model.annotation.taxonomy, '"\n']); -0305 end -0306 if isfield(model.annotation,'note') -0307 fprintf(fid, [' note: "', model.annotation.note, '"\n']); -0308 end -0309 if isfield(model.annotation,'sourceUrl') -0310 fprintf(fid, [' sourceUrl: "', model.annotation.sourceUrl, '"\n']); -0311 end -0312 end -0313 end

    +0054 %Open file: +0055 fid = fopen(name,'wt'); +0056 fprintf(fid,'---\n!!omap\n'); +0057 +0058 %Insert file header (metadata) +0059 writeMetadata(model,fid); +0060 +0061 %Metabolites: +0062 fprintf(fid,'- metabolites:\n'); +0063 for i = 1:length(model.mets) +0064 fprintf(fid,' - !!omap\n'); +0065 writeField(model, fid, 'mets', 'txt', pos(i), ' - id', preserveQuotes) +0066 writeField(model, fid, 'metNames', 'txt', pos(i), ' - name', preserveQuotes) +0067 writeField(model, fid, 'metComps', 'txt', pos(i), ' - compartment', preserveQuotes) +0068 writeField(model, fid, 'metFormulas', 'txt', pos(i), ' - formula', preserveQuotes) +0069 writeField(model, fid, 'metCharges', 'num', pos(i), ' - charge', preserveQuotes) +0070 writeField(model, fid, 'inchis', 'txt', pos(i), ' - inchis', preserveQuotes) +0071 writeField(model, fid, 'metMiriams', 'txt', pos(i), ' - annotation', preserveQuotes) +0072 writeField(model, fid, 'metFrom', 'txt', pos(i), ' - metFrom', preserveQuotes) +0073 end +0074 +0075 %Reactions: +0076 fprintf(fid,'- reactions:\n'); +0077 for i = 1:length(model.rxns) +0078 fprintf(fid,' - !!omap\n'); +0079 writeField(model, fid, 'rxns', 'txt', pos(i), ' - id', preserveQuotes) +0080 writeField(model, fid, 'rxnNames', 'txt', pos(i), ' - name', preserveQuotes) +0081 writeField(model, fid, 'S', 'txt', pos(i), ' - metabolites', preserveQuotes) +0082 writeField(model, fid, 'lb', 'num', pos(i), ' - lower_bound', preserveQuotes) +0083 writeField(model, fid, 'ub', 'num', pos(i), ' - upper_bound', preserveQuotes) +0084 writeField(model, fid, 'grRules', 'txt', pos(i), ' - gene_reaction_rule', preserveQuotes) +0085 writeField(model, fid, 'rxnFrom', 'txt', pos(i), ' - rxnFrom', preserveQuotes) +0086 if model.c(i)~=0 +0087 writeField(model, fid, 'c', 'num', pos(i), ' - objective_coefficient', preserveQuotes) +0088 end +0089 writeField(model, fid, 'eccodes', 'txt', pos(i), ' - eccodes', preserveQuotes) +0090 writeField(model, fid, 'rxnReferences', 'txt', pos(i), ' - references', preserveQuotes) +0091 writeField(model, fid, 'subSystems', 'txt', pos(i), ' - subsystem', preserveQuotes) +0092 writeField(model, fid, 'rxnMiriams', 'txt', pos(i), ' - annotation', preserveQuotes) +0093 writeField(model, fid, 'rxnConfidenceScores', 'num', pos(i), ' - confidence_score', preserveQuotes) +0094 end +0095 +0096 %Genes: +0097 fprintf(fid,'- genes:\n'); +0098 for i = 1:length(model.genes) +0099 fprintf(fid,' - !!omap\n'); +0100 writeField(model, fid, 'genes', 'txt', pos(i), ' - id', preserveQuotes) +0101 writeField(model, fid, 'geneShortNames', 'txt', pos(i), ' - name', preserveQuotes) +0102 writeField(model, fid, 'geneMiriams', 'txt', pos(i), ' - annotation', preserveQuotes) +0103 end +0104 +0105 %Compartments: +0106 fprintf(fid,'- compartments: !!omap\n'); +0107 for i = 1:length(model.comps) +0108 writeField(model, fid, 'compNames', 'txt', pos(i), ['- ' model.comps{pos(i)}], preserveQuotes) +0109 writeField(model, fid, 'compMiriams', 'txt', pos(i), '- annotation', preserveQuotes) +0110 end +0111 +0112 %Close file: +0113 fclose(fid); +0114 +0115 end +0116 +0117 function writeField(model,fid,fieldName,type,pos,name,preserveQuotes) +0118 %Writes a new line in the yaml file if the field exists and the field is +0119 %not empty at the correspoinding position. It's recursive for some fields +0120 %(metMiriams, rxnMiriams, and S) +0121 +0122 if isfield(model,fieldName) +0123 if strcmp(fieldName,'metComps') +0124 %metComps: write full name +0125 fieldName = 'comps'; +0126 pos = model.metComps(pos); +0127 end +0128 +0129 field = eval(['model.' fieldName]); +0130 +0131 if strcmp(fieldName,'metMiriams') +0132 if ~isempty(model.metMiriams{pos}) +0133 fprintf(fid,[' ' name ': !!omap\n']); +0134 for i=1:size(model.newMetMiriams,2) +0135 %'i' represents the different miriam names, e.g. +0136 %kegg.compound or chebi +0137 if ~isempty(model.newMetMiriams{pos,i}) +0138 %As during the following writeField call the value of +0139 %'i' would be lost, it is temporarily concatenated to +0140 %'name' parameter, which will be edited later +0141 writeField(model, fid, 'newMetMiriams', 'txt', pos, [' - ' model.newMetMiriamNames{i} '_' num2str(i)], preserveQuotes) +0142 end +0143 end +0144 end +0145 +0146 elseif strcmp(fieldName,'rxnMiriams') +0147 if ~isempty(model.rxnMiriams{pos}) +0148 fprintf(fid,[' ' name ': !!omap\n']); +0149 for i=1:size(model.newRxnMiriams,2) +0150 if ~isempty(model.newRxnMiriams{pos,i}) +0151 writeField(model, fid, 'newRxnMiriams', 'txt', pos, [' - ' model.newRxnMiriamNames{i} '_' num2str(i)], preserveQuotes) +0152 end +0153 end +0154 end +0155 +0156 elseif strcmp(fieldName,'geneMiriams') +0157 if ~isempty(model.geneMiriams{pos}) +0158 fprintf(fid,[' ' name ': !!omap\n']); +0159 for i=1:size(model.newGeneMiriams,2) +0160 if ~isempty(model.newGeneMiriams{pos,i}) +0161 writeField(model, fid, 'newGeneMiriams', 'txt', pos, [' - ' model.newGeneMiriamNames{i} '_' num2str(i)], preserveQuotes) +0162 end +0163 end +0164 end +0165 +0166 elseif strcmp(fieldName,'compMiriams') +0167 if ~isempty(model.compMiriams{pos}) +0168 fprintf(fid,[' ' name ': !!omap\n']); +0169 for i=1:size(model.newCompMiriams,2) +0170 if ~isempty(model.newCompMiriams{pos,i}) +0171 writeField(model, fid, 'newCompMiriams', 'txt', pos, [' - ' model.newCompMiriamNames{i} '_' num2str(i)], preserveQuotes) +0172 end +0173 end +0174 end +0175 +0176 elseif strcmp(fieldName,'S') +0177 %S: create header & write each metabolite in a new line +0178 fprintf(fid,[' ' name ': !!omap\n']); +0179 if sum(field(:,pos) ~= 0) > 0 +0180 model.mets = model.mets(field(:,pos) ~= 0); +0181 model.coeffs = field(field(:,pos) ~= 0,pos); +0182 %Sort metabolites: +0183 [model.mets,order] = sort(model.mets); +0184 model.coeffs = model.coeffs(order); +0185 for i = 1:length(model.mets) +0186 writeField(model, fid, 'coeffs', 'num', i, [' - ' model.mets{i}], preserveQuotes) +0187 end +0188 end +0189 +0190 elseif sum(strcmp({'subSystems','newMetMiriams','newRxnMiriams','newGeneMiriams','newCompMiriams','eccodes'},fieldName)) > 0 +0191 %eccodes/rxnNotes: if 1 write in 1 line, if more create header and list +0192 if strcmp(fieldName,'subSystems') +0193 list = field{pos}; %subSystems already comes in a cell array +0194 elseif strcmp(fieldName,'newMetMiriams') +0195 index = str2double(regexprep(name,'^.+_','')); +0196 name = regexprep(name,'_\d+$',''); +0197 list = strsplit(model.newMetMiriams{pos,index},'; '); +0198 elseif strcmp(fieldName,'newRxnMiriams') +0199 index = str2double(regexprep(name,'^.+_','')); +0200 name = regexprep(name,'_\d+$',''); +0201 list = strsplit(model.newRxnMiriams{pos,index},'; '); +0202 elseif strcmp(fieldName,'newGeneMiriams') +0203 index = str2double(regexprep(name,'^.+_','')); +0204 name = regexprep(name,'_\d+$',''); +0205 list = strsplit(model.newGeneMiriams{pos,index},'; '); +0206 elseif strcmp(fieldName,'newCompMiriams') +0207 index = str2double(regexprep(name,'^.+_','')); +0208 name = regexprep(name,'_\d+$',''); +0209 list = strsplit(model.newCompMiriams{pos,index},'; '); +0210 else +0211 list = strrep(field{pos},' ',''); %Exception for eccodes +0212 list = strsplit(list,';'); +0213 end +0214 +0215 if length(list) == 1 && ~strcmp(list{1},'') && ~strcmp(fieldName,'subSystems') +0216 if preserveQuotes +0217 list = strcat('"',list,'"'); +0218 end +0219 fprintf(fid,[' ' name ': ' list{1} '\n']); +0220 elseif length(list) > 1 || strcmp(fieldName,'subSystems') +0221 if preserveQuotes +0222 list = strcat('"',list,'"'); +0223 end +0224 fprintf(fid,[' ' name ':\n']); +0225 for i = 1:length(list) +0226 fprintf(fid,[regexprep(name,'(^\s*).*','$1') ' - ' list{i} '\n']); +0227 end +0228 end +0229 +0230 elseif sum(pos) > 0 +0231 %All other fields: +0232 if strcmp(type,'txt') +0233 value = field{pos}; +0234 if preserveQuotes && ~isempty(value) +0235 value = strcat('"',value,'"'); +0236 end +0237 elseif strcmp(type,'num') +0238 if isnan(field(pos)) +0239 value = []; +0240 else +0241 value = num2str(field(pos),12); +0242 end +0243 end +0244 if ~isempty(value) +0245 fprintf(fid,[' ' name ': ' value '\n']); +0246 end +0247 end +0248 end +0249 +0250 +0251 end +0252 +0253 function writeMetadata(model,fid) +0254 % Writes model metadata to the yaml file. This information will eventually +0255 % be extracted entirely from the model, but for now, many of the entries +0256 % are hard-coded defaults for HumanGEM. +0257 +0258 fprintf(fid, '- metaData:\n'); +0259 fprintf(fid, [' id: "', model.id, '"\n']); +0260 fprintf(fid, [' name: "', model.name, '"\n']); +0261 if isfield(model,'version') +0262 fprintf(fid, [' version: "', model.version, '"\n']); +0263 end +0264 fprintf(fid, [' date: "', datestr(now,29), '"\n']); % 29=YYYY-MM-DD +0265 if isfield(model,'annotation') +0266 if isfield(model.annotation,'defaultLB') +0267 fprintf(fid, [' defaultLB: "', num2str(model.annotation.defaultLB), '"\n']); +0268 end +0269 if isfield(model.annotation,'defaultUB') +0270 fprintf(fid, [' defaultUB: "', num2str(model.annotation.defaultUB), '"\n']); +0271 end +0272 if isfield(model.annotation,'givenName') +0273 fprintf(fid, [' givenName: "', model.annotation.givenName, '"\n']); +0274 end +0275 if isfield(model.annotation,'familyName') +0276 fprintf(fid, [' familyName: "', model.annotation.familyName, '"\n']); +0277 end +0278 if isfield(model.annotation,'authors') +0279 fprintf(fid, [' authors: "', model.annotation.authors, '"\n']); +0280 end +0281 if isfield(model.annotation,'email') +0282 fprintf(fid, [' email: "', model.annotation.email, '"\n']); +0283 end +0284 if isfield(model.annotation,'organization') +0285 fprintf(fid, [' organization: "', model.annotation.organization, '"\n']); +0286 end +0287 if isfield(model.annotation,'taxonomy') +0288 fprintf(fid, [' taxonomy: "', model.annotation.taxonomy, '"\n']); +0289 end +0290 if isfield(model.annotation,'note') +0291 fprintf(fid, [' note: "', model.annotation.note, '"\n']); +0292 end +0293 if isfield(model.annotation,'sourceUrl') +0294 fprintf(fid, [' sourceUrl: "', model.annotation.sourceUrl, '"\n']); +0295 end +0296 end +0297 end

    Generated by m2html © 2005
    \ No newline at end of file diff --git a/doc/testing/index.html b/doc/testing/index.html deleted file mode 100644 index e9dab614..00000000 --- a/doc/testing/index.html +++ /dev/null @@ -1,31 +0,0 @@ - - - - Index for Directory testing - - - - - - - - - - -
    < Master indexIndex for testing >
    - -

    Index for testing

    - -

    Matlab files in this directory:

    - -
     testBlasttestBlast
     testDiamondtestDiamond
    - - -

    Subsequent directories:

    - - -
    Generated by m2html © 2005
    - - \ No newline at end of file diff --git a/doc/testing/testBlast.html b/doc/testing/testBlast.html deleted file mode 100644 index e0b78733..00000000 --- a/doc/testing/testBlast.html +++ /dev/null @@ -1,189 +0,0 @@ - - - - Description of testBlast - - - - - - - - - -
    Home > testing > testBlast.m
    - - - -

    testBlast -

    - -

    PURPOSE ^

    -
    testBlast
    - -

    SYNOPSIS ^

    -
    function [success,blastStructure]=testBlast(fullCheck)
    - -

    DESCRIPTION ^

    -
     testBlast
    -   Performs a check for BLAST+ functionality in RAVEN. Depending on the
    -   parameter settings the user can choose between a quick check for
    -   binaries or the thorough testing while building BLAST database and
    -   running homology search with BLASTP
    -
    -   Input:
    -   fullCheck       true if the thorough BLAST+ testing should be performed
    -                   (opt, default true)
    -
    -   Output: 
    -   success         true if the test was successful, otherwise equal to
    -                   zero
    -   blastStructure    blastStructure resulting from the thorough BLAST+ check
    -
    -   NOTE: The purpose of the thorough check is to assess whether the
    -   homology search can be successfully performed using existing BLAST+
    -   binaries. This testing function is completely standalone, only
    -   requiring BLAST+ binaries and multi-FASTA file sce.fa from tutorials
    -   directory
    -
    -   Usage: [success,blastStructure]=testBlast(fullCheck)
    - - -

    CROSS-REFERENCE INFORMATION ^

    -This function calls: - -This function is called by: - - - - - -

    SOURCE CODE ^

    -
    0001 function [success,blastStructure]=testBlast(fullCheck)
    -0002 % testBlast
    -0003 %   Performs a check for BLAST+ functionality in RAVEN. Depending on the
    -0004 %   parameter settings the user can choose between a quick check for
    -0005 %   binaries or the thorough testing while building BLAST database and
    -0006 %   running homology search with BLASTP
    -0007 %
    -0008 %   Input:
    -0009 %   fullCheck       true if the thorough BLAST+ testing should be performed
    -0010 %                   (opt, default true)
    -0011 %
    -0012 %   Output:
    -0013 %   success         true if the test was successful, otherwise equal to
    -0014 %                   zero
    -0015 %   blastStructure    blastStructure resulting from the thorough BLAST+ check
    -0016 %
    -0017 %   NOTE: The purpose of the thorough check is to assess whether the
    -0018 %   homology search can be successfully performed using existing BLAST+
    -0019 %   binaries. This testing function is completely standalone, only
    -0020 %   requiring BLAST+ binaries and multi-FASTA file sce.fa from tutorials
    -0021 %   directory
    -0022 %
    -0023 %   Usage: [success,blastStructure]=testBlast(fullCheck)
    -0024 
    -0025 if nargin<1
    -0026     fullCheck=true;
    -0027 end
    -0028 
    -0029 %Get the directory for RAVEN Toolbox
    -0030 [ST, I]=dbstack('-completenames');
    -0031 ravenPath=fileparts(fileparts(ST(I).file));
    -0032 
    -0033 if isunix
    -0034     if ismac
    -0035         binEnd='.mac';
    -0036     else
    -0037         binEnd='';
    -0038     end
    -0039 elseif ispc
    -0040     binEnd='';
    -0041     setenv('BLASTDB_LMDB_MAP_SIZE','1000000');
    -0042 else
    -0043     dispEM('Unknown OS, exiting.')
    -0044     return
    -0045 end
    -0046 
    -0047 %Create an empty blastStructure. Even if a quick BLAST+ evaluation is
    -0048 %considered, blastStructure should still be in the output
    -0049 blastStructure=[];
    -0050 
    -0051 if ~fullCheck
    -0052     fprintf(['Checking blastp' binEnd '... ']);
    -0053     [res,~]=system(['"' fullfile(ravenPath,'software','blast+',['blastp' binEnd]) '"']);
    -0054     if res==1
    -0055         fprintf('OK\n');
    -0056     else
    -0057         fprintf('Not OK! The binary must be recompiled from source before running RAVEN\n');
    -0058     end
    -0059     fprintf(['Checking makeblastdb' binEnd '... ']);
    -0060     [res,~]=system(['"' fullfile(ravenPath,'software','blast+',['makeblastdb' binEnd]) '"']);
    -0061     if res==1
    -0062         fprintf('OK\n');
    -0063     else
    -0064         fprintf('Not OK! The binary must be recompiled from source before running RAVEN\n');
    -0065     end
    -0066 else    
    -0067     %Generate temporary names for working directory and outFile
    -0068     tmpDB=tempname;
    -0069     outFile=tempname;
    -0070     
    -0071     %Run BLAST multi-threaded to use all logical cores assigned to MATLAB
    -0072     cores = evalc('feature(''numcores'')');
    -0073     cores = strsplit(cores, 'MATLAB was assigned: ');
    -0074     cores = regexp(cores{2},'^\d*','match');
    -0075     cores = cores{1};
    -0076     
    -0077     %Create a temporary folder and copy multi-FASTA file there
    -0078     [~, ~]=system(['mkdir "' tmpDB '"']);
    -0079     copyfile(fullfile(ravenPath,'tutorial','sce.fa'),tmpDB);
    -0080     
    -0081     %Construct a BLAST database
    -0082     fprintf('Testing makeblastdb... ');
    -0083     [res, ~]=system(['"' fullfile(ravenPath,'software','blast+',['makeblastdb' binEnd]) '" -in "' fullfile(tmpDB,'sce.fa') '" -out "' tmpDB '" -dbtype prot']);
    -0084     if res~=0
    -0085         fprintf('Not OK\n');
    -0086         EM=['makeblastdb did not run successfully, error: ', num2str(res)];
    -0087         dispEM(EM,true);
    -0088     end
    -0089     fprintf('OK\n');
    -0090     
    -0091     %Run a homology search
    -0092     fprintf('Testing blastp... ');
    -0093     [res, ~]=system(['"' fullfile(ravenPath,'software','blast+',['blastp' binEnd]) '" -query "' fullfile(tmpDB,'sce.fa') '" -out "' outFile '" -db "' tmpDB '" -evalue 10e-5 -outfmt "10 qseqid sseqid evalue pident length bitscore ppos" -num_threads "' cores '"']);
    -0094     if res~=0
    -0095         fprintf('Not OK\n');
    -0096         EM=['blastp did not run successfully, error: ', num2str(res)];
    -0097         dispEM(EM,true);
    -0098     end
    -0099     fprintf('OK\n');
    -0100     
    -0101     %Remove temporary folder, since homology search is finished
    -0102     [~, ~]=system(['rm "' tmpDB '" -r']);
    -0103     
    -0104     %Done with the BLAST, do the parsing of the text file
    -0105     blastStructure.fromId='sce';
    -0106     blastStructure.toId='sco';
    -0107     A=readtable(outFile,'Delimiter',',','Format','%s%s%f%f%f%f%f');
    -0108     blastStructure.fromGenes=A{:,1};
    -0109     blastStructure.toGenes=A{:,2};
    -0110     blastStructure.evalue=table2array(A(:,3));
    -0111     blastStructure.identity=table2array(A(:,4));
    -0112     blastStructure.aligLen=table2array(A(:,5));
    -0113     blastStructure.bitscore=table2array(A(:,6));
    -0114     blastStructure.ppos=table2array(A(:,7));
    -0115     
    -0116     %Remove the old tempfiles
    -0117     delete([outFile '*']);
    -0118 end
    -0119 
    -0120 success=1;
    -0121 end
    -
    Generated by m2html © 2005
    - - \ No newline at end of file diff --git a/doc/testing/testDiamond.html b/doc/testing/testDiamond.html deleted file mode 100644 index 3e9aa29e..00000000 --- a/doc/testing/testDiamond.html +++ /dev/null @@ -1,181 +0,0 @@ - - - - Description of testDiamond - - - - - - - - - -
    Home > testing > testDiamond.m
    - - - -

    testDiamond -

    - -

    PURPOSE ^

    -
    testDiamond
    - -

    SYNOPSIS ^

    -
    function [success,blastStructure]=testDiamond(fullCheck)
    - -

    DESCRIPTION ^

    -
     testDiamond
    -   Performs a check for DIAMOND functionality in RAVEN. Depending on the
    -   parameter settings the user can choose between a quick check for
    -   binaries or the thorough testing while building DIAMOND database and
    -   running homology search with DIAMOND
    -
    -   Input:
    -   fullCheck       true if the thorough DIAMOND testing should be
    -                   performed (opt, default true)
    -
    -   Output: 
    -   success         true if the test was successful, otherwise equal to
    -                   zero
    -   blastStructure    blastStructure resulting from the thorough BLAST+ check
    -
    -   NOTE: The purpose of the thorough check is to assess whether the
    -   homology search can be successfully performed using existing BLAST+
    -   binaries. This testing function is completely standalone, only
    -   requiring DIAMOND binary and multi-FASTA file sce.fa from tutorials
    -   directory
    -
    -   Usage: [success,blastStructure]=testDiamond(fullCheck)
    - - -

    CROSS-REFERENCE INFORMATION ^

    -This function calls: - -This function is called by: - - - - - -

    SOURCE CODE ^

    -
    0001 function [success,blastStructure]=testDiamond(fullCheck)
    -0002 % testDiamond
    -0003 %   Performs a check for DIAMOND functionality in RAVEN. Depending on the
    -0004 %   parameter settings the user can choose between a quick check for
    -0005 %   binaries or the thorough testing while building DIAMOND database and
    -0006 %   running homology search with DIAMOND
    -0007 %
    -0008 %   Input:
    -0009 %   fullCheck       true if the thorough DIAMOND testing should be
    -0010 %                   performed (opt, default true)
    -0011 %
    -0012 %   Output:
    -0013 %   success         true if the test was successful, otherwise equal to
    -0014 %                   zero
    -0015 %   blastStructure    blastStructure resulting from the thorough BLAST+ check
    -0016 %
    -0017 %   NOTE: The purpose of the thorough check is to assess whether the
    -0018 %   homology search can be successfully performed using existing BLAST+
    -0019 %   binaries. This testing function is completely standalone, only
    -0020 %   requiring DIAMOND binary and multi-FASTA file sce.fa from tutorials
    -0021 %   directory
    -0022 %
    -0023 %   Usage: [success,blastStructure]=testDiamond(fullCheck)
    -0024 
    -0025 if nargin<1
    -0026     fullCheck=true;
    -0027 end
    -0028 
    -0029 %Get the directory for RAVEN Toolbox
    -0030 [ST, I]=dbstack('-completenames');
    -0031 ravenPath=fileparts(fileparts(ST(I).file));
    -0032 
    -0033 if isunix
    -0034     if ismac
    -0035         binEnd='.mac';
    -0036     else
    -0037         binEnd='';
    -0038     end
    -0039 elseif ispc
    -0040     binEnd='';
    -0041 else
    -0042     dispEM('Unknown OS, exiting.')
    -0043     return
    -0044 end
    -0045 
    -0046 %Create an empty blastStructure. Even if a quick DIAMOND evaluation is
    -0047 %considered, blastStructure should still be in the output
    -0048 blastStructure=[];
    -0049 
    -0050 if ~fullCheck
    -0051     fprintf(['Checking diamond' binEnd '... ']);
    -0052     [res,~]=system(['"' fullfile(ravenPath,'software','diamond',['diamond' binEnd]) '"']);
    -0053     if res==1
    -0054         fprintf('OK\n');
    -0055     else
    -0056         fprintf('Not OK! The binary must be recompiled from source before running RAVEN\n');
    -0057     end
    -0058 else    
    -0059     %Generate temporary names for working directory and outFile
    -0060     tmpDB=tempname;
    -0061     outFile=tempname;
    -0062     
    -0063     %Run DIAMOND multi-threaded to use all logical cores assigned to MATLAB
    -0064     cores = evalc('feature(''numcores'')');
    -0065     cores = strsplit(cores, 'MATLAB was assigned: ');
    -0066     cores = regexp(cores{2},'^\d*','match');
    -0067     cores = cores{1};
    -0068     
    -0069     %Create a temporary folder and copy multi-FASTA file there
    -0070     [~, ~]=system(['mkdir "' tmpDB '"']);
    -0071     copyfile(fullfile(ravenPath,'tutorial','sce.fa'),tmpDB);
    -0072     
    -0073     %Construct a DIAMOND database
    -0074     fprintf('Testing DIAMOND makedb... ');
    -0075     [res, ~]=system(['"' fullfile(ravenPath,'software','diamond',['diamond' binEnd]) '" makedb --in "' fullfile(tmpDB,'sce.fa') '" --db "' tmpDB '"']);
    -0076     if res~=0
    -0077         fprintf('Not OK\n');
    -0078         EM=['DIAMOND makedb did not run successfully, error: ', num2str(res)];
    -0079         dispEM(EM,true);
    -0080     end
    -0081     fprintf('OK\n');
    -0082     
    -0083     %Run a homology search
    -0084     fprintf('Testing DIAMOND blastp... ');
    -0085     [res, ~]=system(['"' fullfile(ravenPath,'software','diamond',['diamond' binEnd]) '" blastp --query "' fullfile(tmpDB,'sce.fa') '" --out "' outFile '" --db "' tmpDB '" --more-sensitive --outfmt 6 qseqid sseqid evalue pident length bitscore ppos --threads ' cores ]);
    -0086     if res~=0
    -0087         fprintf('Not OK\n');
    -0088         EM=['DIAMOND blastp did not run successfully, error: ', num2str(res)];
    -0089         dispEM(EM,true);
    -0090     end
    -0091     fprintf('OK\n');
    -0092     
    -0093     %Remove temporary folder, since homology search is finished
    -0094     [~, ~]=system(['rm "' tmpDB '" -r']);
    -0095     
    -0096     %Done with the DIAMOND, do the parsing of the text file
    -0097     blastStructure.fromId='sce';
    -0098     blastStructure.toId='sco';
    -0099     A=readtable(outFile,'Delimiter','\t','Format','%s%s%f%f%f%f%f');
    -0100     blastStructure.fromGenes=A{:,1};
    -0101     blastStructure.toGenes=A{:,2};
    -0102     blastStructure.evalue=table2array(A(:,3));
    -0103     blastStructure.identity=table2array(A(:,4));
    -0104     blastStructure.aligLen=table2array(A(:,5));
    -0105     blastStructure.bitscore=table2array(A(:,6));
    -0106     blastStructure.ppos=table2array(A(:,7));
    -0107     
    -0108     %Remove the old tempfiles
    -0109     delete([outFile '*']);
    -0110 end
    -0111 
    -0112 success=1;
    -0113 end
    -
    Generated by m2html © 2005
    - - \ No newline at end of file diff --git a/doc/testing/unit_tests/blastPlusTests.html b/doc/testing/unit_tests/blastPlusTests.html new file mode 100644 index 00000000..2b0ba05b --- /dev/null +++ b/doc/testing/unit_tests/blastPlusTests.html @@ -0,0 +1,108 @@ + + + + Description of blastPlusTests + + + + + + + + + +
    Home > testing > unit_tests > blastPlusTests.m
    + + + +

    blastPlusTests +

    + +

    PURPOSE ^

    +
    run this test case with the command
    + +

    SYNOPSIS ^

    +
    function tests = blastPlusTests
    + +

    DESCRIPTION ^

    +
    run this test case with the command
    +results = runtests('blastPlusTests.m')
    + + +

    CROSS-REFERENCE INFORMATION ^

    +This function calls: + +This function is called by: + + + +

    SUBFUNCTIONS ^

    + + +

    SOURCE CODE ^

    +
    0001 %run this test case with the command
    +0002 %results = runtests('blastPlusTests.m')
    +0003 function tests = blastPlusTests
    +0004 tests = functiontests(localfunctions);
    +0005 end
    +0006 
    +0007 function testBlastPlus(testCase)
    +0008 %This unit test comprises several functionality tests for BLAST+ in RAVEN:
    +0009 % 1. MD5 checksum check for BLAST database files. This check is applied for
    +0010 %    "phr", "pot", "psq" and "pto" files. The remaining files (i.e. "pdb",
    +0011 %    "pin" and "ptf") are not compared as these seem to be
    +0012 %    machine-specific.
    +0013 % 2. Non-parsed text check for BLAST result files. Although the content of
    +0014 %    the files is exactly the same, their MD5 hashes are somehow different
    +0015 %    between the operating systems.
    +0016 % 3. Check of resulting blastStructure against the expected one. This is
    +0017 %    done to test BLAST results parsing in RAVEN.
    +0018 
    +0019 %%
    +0020 %Get the directory for RAVEN Toolbox
    +0021 [ST, I]=dbstack('-completenames');
    +0022 ravenPath=fileparts(fileparts(fileparts(ST(I).file)));
    +0023 
    +0024 %Import structures that contain expected MD5 hashes and BLAST results
    +0025 sourceDir = fileparts(which(mfilename));
    +0026 load([sourceDir,'/test_data/expBlastResults.mat'],'expBlastStructure','expBlastReport');
    +0027 
    +0028 organismID='sce';
    +0029 fastaFile=fullfile(ravenPath,'testing','unit_tests','test_data','yeast_galactosidases.fa');
    +0030 modelIDs={'hsa' 'afv'};
    +0031 refFastaFiles={fullfile(ravenPath,'testing','unit_tests','test_data','human_galactosidases.fa') fullfile(ravenPath,'testing','unit_tests','test_data','aflavus_galactosidases.fa')};
    +0032 
    +0033 %%
    +0034 %Run BLAST
    +0035 [actBlastStructure,actBlastReport]=getBlast(organismID,fastaFile,modelIDs,refFastaFiles,true,true);
    +0036 
    +0037 %%
    +0038 %Test 1a: Check if MD5 checksums for BLAST database files are the same
    +0039 verifyEqual(testCase,actBlastReport.dbHashes,expBlastReport.dbHashes);
    +0040 
    +0041 %Test 1b: Change one of the MD5 checksums and check if test fails
    +0042 actBlastReport.dbHashes.phr{1,1}=actBlastReport.dbHashes.phr{1,2};
    +0043 verifyNotEqual(testCase,actBlastReport.dbHashes,expBlastReport.dbHashes);
    +0044 
    +0045 %Test 2a: Check if BLAST result files are the same
    +0046 verifyEqual(testCase,actBlastReport.blastTxtOutput,expBlastReport.blastTxtOutput);
    +0047 
    +0048 %Test 2b: Change actual BLAST result file and check if test fails
    +0049 actBlastReport.blastTxtOutput='empty';
    +0050 verifyNotEqual(testCase,actBlastReport.blastTxtOutput,expBlastReport.blastTxtOutput);
    +0051 
    +0052 %Test 3a: Check if BLAST structures are the same
    +0053 verifyEqual(testCase,actBlastStructure,expBlastStructure);
    +0054 
    +0055 %Test 3b: Modify actual BLAST structure and check if test fails
    +0056 actBlastStructure(1,1).toId=actBlastStructure(1,1).fromId;
    +0057 verifyNotEqual(testCase,actBlastStructure,expBlastStructure);
    +0058 end
    +
    Generated by m2html © 2005
    + + \ No newline at end of file diff --git a/doc/testing/unit_tests/cdhitTests.html b/doc/testing/unit_tests/cdhitTests.html new file mode 100644 index 00000000..c2a87baa --- /dev/null +++ b/doc/testing/unit_tests/cdhitTests.html @@ -0,0 +1,130 @@ + + + + Description of cdhitTests + + + + + + + + + +
    Home > testing > unit_tests > cdhitTests.m
    + + + +

    cdhitTests +

    + +

    PURPOSE ^

    +
    run this test case with the command
    + +

    SYNOPSIS ^

    +
    function tests = cdhitTests
    + +

    DESCRIPTION ^

    +
    run this test case with the command
    +results = runtests('cdhitTests.m')
    + + +

    CROSS-REFERENCE INFORMATION ^

    +This function calls: + +This function is called by: + + + +

    SUBFUNCTIONS ^

    + + +

    SOURCE CODE ^

    +
    0001 %run this test case with the command
    +0002 %results = runtests('cdhitTests.m')
    +0003 function tests = cdhitTests
    +0004 tests = functiontests(localfunctions);
    +0005 end
    +0006 
    +0007 function testCdhit(testCase)
    +0008 %This unit test comprises the functionality test for CD-HIT in RAVEN:
    +0009 % 1. Check for resulting file against the expected one.
    +0010 
    +0011 %%
    +0012 %Get the directory for RAVEN Toolbox
    +0013 [ST, I]=dbstack('-completenames');
    +0014 ravenPath=fileparts(fileparts(fileparts(ST(I).file)));
    +0015 
    +0016 %Identify the operating system
    +0017 if isunix
    +0018     if ismac
    +0019         binEnd='.mac';
    +0020     else
    +0021         binEnd='';
    +0022     end
    +0023 elseif ispc
    +0024     binEnd='.exe';
    +0025 else
    +0026     dispEM('Unknown OS, exiting.')
    +0027     return
    +0028 end
    +0029 
    +0030 %Import structure that contains expected MAFFT results
    +0031 sourceDir = fileparts(which(mfilename));
    +0032 load([sourceDir,'/test_data/expCdhitMafftOutput.mat'],'expCdhitMafftOutput');
    +0033 
    +0034 %Generate temporary names for working directory and outFile
    +0035 tmpDIR=tempname;
    +0036 outFile=tempname;
    +0037 
    +0038 %Run CD-HIT multi-threaded to use all logical cores assigned to MATLAB
    +0039 cores = evalc('feature(''numcores'')');
    +0040 cores = strsplit(cores, 'MATLAB was assigned: ');
    +0041 cores = regexp(cores{2},'^\d*','match');
    +0042 cores = cores{1};
    +0043 
    +0044 %Create a temporary folder and copy multi-FASTA file there
    +0045 [~, ~]=system(['mkdir "' tmpDIR '"']);
    +0046 
    +0047 sourceDir = fileparts(which(mfilename));
    +0048 copyfile(fullfile(sourceDir,'test_data','yeast_galactosidases.fa'),tmpDIR);
    +0049 
    +0050 % Define WSL paths
    +0051 wslPath.fastaFile=getWSLpath([tmpDIR filesep 'yeast_galactosidases.fa']);
    +0052 wslPath.outFile=getWSLpath(outFile);
    +0053 wslPath.cdhit=getWSLpath(fullfile(ravenPath,'software','cd-hit','cd-hit'));
    +0054 
    +0055 %%
    +0056 %Run protein clustering with CD-HIT
    +0057 if ispc
    +0058     [~, ~]=system(['wsl "' wslPath.cdhit '" -T "' num2str(cores) '" -i "' wslPath.fastaFile '" -o "' wslPath.outFile '" -c 1.0 -n 5 -M 2000']);
    +0059 else
    +0060     [~, ~]=system(['"' fullfile(ravenPath,'software','cd-hit',['cd-hit' binEnd]) '" -T "' num2str(cores) '" -i "' fullfile(tmpDIR, 'yeast_galactosidases.fa') '" -o "' outFile '" -c 1.0 -n 5 -M 2000']);
    +0061 end
    +0062 
    +0063 %%
    +0064 %Open actual MAFFT results file
    +0065 actCdhitOutput=importdata(fullfile(outFile));
    +0066 
    +0067 %Remove the old tempfiles
    +0068 delete([outFile '*']);
    +0069 
    +0070 %Remove temporary folder, since testing is finished
    +0071 [~, ~]=system(['rm "' tmpDIR '" -r']);
    +0072 
    +0073 %%
    +0074 %Check 1a: Check if files for CD-HIT results are the same
    +0075 verifyEqual(testCase,actCdhitOutput,expCdhitMafftOutput);
    +0076 
    +0077 %Check 1b: Change actual CD-HIT results file and check if test fails
    +0078 actCdhitOutput='abc';
    +0079 verifyNotEqual(testCase,actCdhitOutput,expCdhitMafftOutput);
    +0080 end
    +
    Generated by m2html © 2005
    + + \ No newline at end of file diff --git a/doc/testing/unit_tests/diamondTests.html b/doc/testing/unit_tests/diamondTests.html new file mode 100644 index 00000000..5af96de4 --- /dev/null +++ b/doc/testing/unit_tests/diamondTests.html @@ -0,0 +1,107 @@ + + + + Description of diamondTests + + + + + + + + + +
    Home > testing > unit_tests > diamondTests.m
    + + + +

    diamondTests +

    + +

    PURPOSE ^

    +
    run this test case with the command
    + +

    SYNOPSIS ^

    +
    function tests = diamondTests
    + +

    DESCRIPTION ^

    +
    run this test case with the command
    +results = runtests('diamondTests.m')
    + + +

    CROSS-REFERENCE INFORMATION ^

    +This function calls: + +This function is called by: + + + +

    SUBFUNCTIONS ^

    + + +

    SOURCE CODE ^

    +
    0001 %run this test case with the command
    +0002 %results = runtests('diamondTests.m')
    +0003 function tests = diamondTests
    +0004 tests = functiontests(localfunctions);
    +0005 end
    +0006 
    +0007 function testDiamond(testCase)
    +0008 %This unit test comprises several functionality tests for DIAMOND blastp in
    +0009 %RAVEN:
    +0010 % 1. MD5 checksum check for DIAMOND database files.
    +0011 % 2. Non-parsed text check for DIAMOND result files. Although the content
    +0012 %    of the files is exactly the same, their MD5 hashes are somehow
    +0013 %    different between the operating systems.
    +0014 % 3. Check of resulting blastStructure against the expected one. This is
    +0015 %    done to test DIAMOND blastp results parsing in RAVEN.
    +0016 
    +0017 %%
    +0018 %Get the directory for RAVEN Toolbox
    +0019 [ST, I]=dbstack('-completenames');
    +0020 ravenPath=fileparts(fileparts(fileparts(ST(I).file)));
    +0021 
    +0022 %Import structures that contain expected MD5 hashes and DIAMOND blastp
    +0023 %results
    +0024 sourceDir = fileparts(which(mfilename));
    +0025 load([sourceDir,'/test_data/expDiamondResults.mat'],'expBlastStructure','expDiamondReport');
    +0026 
    +0027 organismID='sce';
    +0028 fastaFile=fullfile(ravenPath,'testing','unit_tests','test_data','yeast_galactosidases.fa');
    +0029 modelIDs={'hsa' 'afv'};
    +0030 refFastaFiles={fullfile(ravenPath,'testing','unit_tests','test_data','human_galactosidases.fa') fullfile(ravenPath,'testing','unit_tests','test_data','aflavus_galactosidases.fa')};
    +0031 
    +0032 %%
    +0033 %Run DIAMOND blastp
    +0034 [actBlastStructure,actDiamondReport]=getDiamond(organismID,fastaFile,modelIDs,refFastaFiles,true,true);
    +0035 
    +0036 %%
    +0037 %Test 1a: Check if MD5 checksums for DIAMOND blastp database files are the same
    +0038 verifyEqual(testCase,actDiamondReport.dbHashes,expDiamondReport.dbHashes);
    +0039 
    +0040 %Test 1b: Change one of the MD5 checksums and check if test fails
    +0041 actDiamondReport.dbHashes{1,1}=actDiamondReport.dbHashes{1,2};
    +0042 verifyNotEqual(testCase,actDiamondReport.dbHashes,expDiamondReport.dbHashes);
    +0043 
    +0044 %Test 2a: Check if DIAMOND blastp result files are the same
    +0045 verifyEqual(testCase,actDiamondReport.diamondTxtOutput,expDiamondReport.diamondTxtOutput);
    +0046 
    +0047 %Test 2b: Change actual DIAMOND blastp result file and check if test fails
    +0048 actDiamondReport.diamondTxtOutput='empty';
    +0049 verifyNotEqual(testCase,actDiamondReport.diamondTxtOutput,expDiamondReport.diamondTxtOutput);
    +0050 
    +0051 %Test 3a: Check if DIAMOND blastp structures are the same
    +0052 verifyEqual(testCase,actBlastStructure,expBlastStructure);
    +0053 
    +0054 %Test 3b: Modify actual DIAMOND blastp structure and check if test fails
    +0055 actBlastStructure(1,1).toId=actBlastStructure(1,1).fromId;
    +0056 verifyNotEqual(testCase,actBlastStructure,expBlastStructure);
    +0057 end
    +
    Generated by m2html © 2005
    + + \ No newline at end of file diff --git a/doc/testing/unit_tests/hmmerTests.html b/doc/testing/unit_tests/hmmerTests.html new file mode 100644 index 00000000..dec29c58 --- /dev/null +++ b/doc/testing/unit_tests/hmmerTests.html @@ -0,0 +1,191 @@ + + + + Description of hmmerTests + + + + + + + + + +
    Home > testing > unit_tests > hmmerTests.m
    + + + +

    hmmerTests +

    + +

    PURPOSE ^

    +
    run this test case with the command
    + +

    SYNOPSIS ^

    +
    function tests = hmmerTests
    + +

    DESCRIPTION ^

    +
    run this test case with the command
    +results = runtests('hmmerTests.m')
    + + +

    CROSS-REFERENCE INFORMATION ^

    +This function calls: + +This function is called by: + + + +

    SUBFUNCTIONS ^

    + + +

    SOURCE CODE ^

    +
    0001 %run this test case with the command
    +0002 %results = runtests('hmmerTests.m')
    +0003 function tests = hmmerTests
    +0004 tests = functiontests(localfunctions);
    +0005 end
    +0006 
    +0007 function testHmmer(testCase)
    +0008 %This unit test comprises the functionality test for HMMER in RAVEN:
    +0009 % 1. Check of parsed HMMER results against the expected.
    +0010 %
    +0011 % NOTE: as hmm and HMMER results files are time-specific, no checks for
    +0012 % these files existence are done. Also, due to the way HMMER is utilized in
    +0013 % getKEGGModelForOrganism (HMMER result files can be parsed only once all
    +0014 % required hmm files are generated), the code segment involving HMMER
    +0015 % results parsing is pasted in this test function. Should the parsing problems
    +0016 % occur in the results processing, the code modifications shall be done in
    +0017 % this function and getKEGGModelForOrganism respectively.
    +0018 
    +0019 %%
    +0020 %Get the directory for RAVEN Toolbox
    +0021 [ST, I]=dbstack('-completenames');
    +0022 ravenPath=fileparts(fileparts(fileparts(ST(I).file)));
    +0023 
    +0024 %Identify the operating system
    +0025 if isunix
    +0026     if ismac
    +0027         binEnd='.mac';
    +0028     else
    +0029         binEnd='';
    +0030     end
    +0031 elseif ispc
    +0032     binEnd='.exe';
    +0033 else
    +0034     dispEM('Unknown OS, exiting.')
    +0035     return
    +0036 end
    +0037 
    +0038 %Create empty structures needed for HMMER results
    +0039 actHmmResult.genes = {};
    +0040 actHmmResult.scores = [];
    +0041 
    +0042 %Create structures that contain expected HMMER results
    +0043 expHmmResult.genes = {'sp|P41947|MEL6_YEASX','sp|P41946|MEL5_YEASX', 'sp|P41945|MEL2_YEASX', 'sp|P04824|MEL1_YEASX'};
    +0044 expHmmResult.scores = [10^-250, 10^-250, 10^-250, 10^-250];
    +0045 
    +0046 %Generate temporary names for working directory and outFile
    +0047 tmpDIR=tempname;
    +0048 outFile=tempname;
    +0049 
    +0050 %Run HMMER multi-threaded to use all logical cores assigned to MATLAB
    +0051 cores = evalc('feature(''numcores'')');
    +0052 cores = strsplit(cores, 'MATLAB was assigned: ');
    +0053 cores = regexp(cores{2},'^\d*','match');
    +0054 cores = cores{1};
    +0055 
    +0056 %Create a temporary folder and copy multi-FASTA file there
    +0057 [~, ~]=system(['mkdir "' tmpDIR '"']);
    +0058 
    +0059 sourceDir = fileparts(which(mfilename));
    +0060 copyfile(fullfile(sourceDir,'test_data','yeast_galactosidases.fa'),tmpDIR);
    +0061 copyfile(fullfile(sourceDir,'test_data','human_galactosidases.fa'),tmpDIR);
    +0062 
    +0063 %%
    +0064 %Train a hidden Markov model
    +0065 [~, ~]=system(['"' fullfile(ravenPath,'software','hmmer',['hmmbuild' binEnd]) '" --cpu "' num2str(cores) '" "' fullfile(tmpDIR,'human_galactosidases.hmm') '" "' fullfile(tmpDIR,'yeast_galactosidases.fa') '"']);
    +0066 
    +0067 %Run a homology search against the newly-trained HMM
    +0068 [~, output]=system(['"' fullfile(ravenPath,'software','hmmer',['hmmsearch' binEnd]) '" --cpu "' num2str(cores) '" "' fullfile(tmpDIR,'human_galactosidases.hmm') '" "' fullfile(tmpDIR,'yeast_galactosidases.fa') '"']);
    +0069 
    +0070 %Save the output to a file
    +0071 fid=fopen(outFile,'w');
    +0072 fwrite(fid,output);
    +0073 fclose(fid);
    +0074 
    +0075 %%
    +0076 %Parse the results
    +0077 geneCounter=0;
    +0078 fid=fopen(outFile,'r');
    +0079 beginMatches=false;
    +0080 while 1
    +0081     %Get the next line
    +0082     tline = fgetl(fid);
    +0083     
    +0084     %Abort at end of file
    +0085     if ~ischar(tline)
    +0086         break;
    +0087     end
    +0088     
    +0089     if and(beginMatches,strcmp(tline,'  ------ inclusion threshold ------'))
    +0090         break;
    +0091     end
    +0092     
    +0093     if beginMatches==false
    +0094         %This is how the listing of matches begins
    +0095         if any(strfind(tline,'E-value '))
    +0096             %Read one more line that is only padding
    +0097             tline = fgetl(fid);
    +0098             beginMatches=true;
    +0099         end
    +0100     else
    +0101         %If matches should be read
    +0102         if ~strcmp(tline,'   [No hits detected that satisfy reporting thresholds]') && ~isempty(tline)
    +0103             elements=regexp(tline,' ','split');
    +0104             elements=elements(cellfun(@any,elements));
    +0105             
    +0106             %Check if the match is below the treshhold
    +0107             score=str2double(elements{1});
    +0108             gene=elements{9};
    +0109             if score<=10^-50
    +0110                 %If the score is exactly 0, change it to a very
    +0111                 %small value to avoid NaN
    +0112                 if score==0
    +0113                     score=10^-250;
    +0114                 end
    +0115                 %Check if the gene is added already and, is so, get
    +0116                 %the best score for it
    +0117                 geneCounter=geneCounter+1;
    +0118                 actHmmResult.genes{geneCounter}=gene;
    +0119                 actHmmResult.scores(geneCounter)=score;
    +0120             end
    +0121         else
    +0122             break;
    +0123         end
    +0124     end
    +0125 end
    +0126 fclose(fid);
    +0127 
    +0128 %Remove the old tempfiles
    +0129 delete([outFile '*']);
    +0130 
    +0131 %Remove temporary folder, since testing is finished
    +0132 [~, ~]=system(['rm "' tmpDIR '" -r']);
    +0133 
    +0134 %%
    +0135 %Test 1a: Check if HMMER results match the expected ones
    +0136 verifyEqual(testCase,actHmmResult,expHmmResult);
    +0137 
    +0138 %Test 1b: Modify actual HMMER results structure and check if test fails
    +0139 actHmmResult.score(2)=1;
    +0140 verifyNotEqual(testCase,actHmmResult,expHmmResult);
    +0141 end
    +
    Generated by m2html © 2005
    + + \ No newline at end of file diff --git a/doc/testing/unit_tests/index.html b/doc/testing/unit_tests/index.html index 3e1a88a1..92bfb81c 100644 --- a/doc/testing/unit_tests/index.html +++ b/doc/testing/unit_tests/index.html @@ -19,7 +19,7 @@

    Index for testing\unit_tests

    Matlab files in this directory:

    -
     tinitTestsrun this test case with the command
    + blastPlusTestsrun this test case with the command  cdhitTestsrun this test case with the command  diamondTestsrun this test case with the command  hmmerTestsrun this test case with the command  mafftTestsrun this test case with the command  sortIdentifiers_and_permuteModelTestsrun this test case with the command  tinitTestsrun this test case with the command

    Subsequent directories:

    diff --git a/doc/testing/unit_tests/mafftTests.html b/doc/testing/unit_tests/mafftTests.html new file mode 100644 index 00000000..4a31ad61 --- /dev/null +++ b/doc/testing/unit_tests/mafftTests.html @@ -0,0 +1,118 @@ + + + + Description of mafftTests + + + + + + + + + +
    Home > testing > unit_tests > mafftTests.m
    + + + +

    mafftTests +

    + +

    PURPOSE ^

    +
    run this test case with the command
    + +

    SYNOPSIS ^

    +
    function tests = mafftTests
    + +

    DESCRIPTION ^

    +
    run this test case with the command
    +results = runtests('mafftTests.m')
    + + +

    CROSS-REFERENCE INFORMATION ^

    +This function calls: + +This function is called by: + + + +

    SUBFUNCTIONS ^

    + + +

    SOURCE CODE ^

    +
    0001 %run this test case with the command
    +0002 %results = runtests('mafftTests.m')
    +0003 function tests = mafftTests
    +0004 tests = functiontests(localfunctions);
    +0005 end
    +0006 
    +0007 function testMafft(testCase)
    +0008 %This unit test comprises the functionality test for MAFFT in RAVEN:
    +0009 % 1. Check for resulting file against the expected one.
    +0010 
    +0011 %%
    +0012 %Get the directory for RAVEN Toolbox
    +0013 [ST, I]=dbstack('-completenames');
    +0014 ravenPath=fileparts(fileparts(fileparts(ST(I).file)));
    +0015 
    +0016 %Import structure that contains expected MAFFT results
    +0017 sourceDir = fileparts(which(mfilename));
    +0018 load([sourceDir,'/test_data/expCdhitMafftOutput.mat'],'expCdhitMafftOutput');
    +0019 
    +0020 %Generate temporary names for working directory and outFile
    +0021 tmpDIR=tempname;
    +0022 outFile=tempname;
    +0023 
    +0024 %Run MAFFT multi-threaded to use all logical cores assigned to MATLAB
    +0025 cores = evalc('feature(''numcores'')');
    +0026 cores = strsplit(cores, 'MATLAB was assigned: ');
    +0027 cores = regexp(cores{2},'^\d*','match');
    +0028 cores = cores{1};
    +0029 
    +0030 %Create a temporary folder and copy multi-FASTA file there
    +0031 [~, ~]=system(['mkdir "' tmpDIR '"']);
    +0032 
    +0033 sourceDir = fileparts(which(mfilename));
    +0034 copyfile(fullfile(sourceDir,'test_data','yeast_galactosidases.fa'),tmpDIR);
    +0035 
    +0036 % Define WSL paths
    +0037 wslPath.fastaFile=getWSLpath([tmpDIR filesep 'yeast_galactosidases.fa']);
    +0038 wslPath.outFile=getWSLpath(outFile);
    +0039 wslPath.mafft=getWSLpath(fullfile(ravenPath,'software','mafft','mafft-linux64','mafft.bat'));
    +0040 
    +0041 %%
    +0042 %Run protein multi-sequence alignment with MAFFT
    +0043 if ismac
    +0044     [~, ~]=system(['"' fullfile(ravenPath,'software','mafft','mafft-mac','mafft.bat') '" --auto --anysymbol --thread "' num2str(cores) '" "' fullfile(tmpDIR, 'yeast_galactosidases.fa') '" > "' outFile '"']);
    +0045 elseif isunix
    +0046     [~, ~]=system(['"' fullfile(ravenPath,'software','mafft','mafft-linux64','mafft.bat') '" --auto --anysymbol --thread "' num2str(cores) '" "' fullfile(tmpDIR, 'yeast_galactosidases.fa') '" > "' outFile '"']);
    +0047 elseif ispc
    +0048     [~, ~]=system(['wsl "' wslPath.mafft '" --auto --anysymbol --quiet --thread "' num2str(cores) '" --out "' wslPath.outFile '" "' wslPath.fastaFile '"']);
    +0049 end
    +0050 
    +0051 %%
    +0052 %Open actual MAFFT results file
    +0053 actMafftOutput=importdata(fullfile(outFile));
    +0054 
    +0055 %Remove the old tempfiles
    +0056 delete([outFile '*']);
    +0057 
    +0058 %Remove temporary folder, since testing is finished
    +0059 [~, ~]=system(['rm "' tmpDIR '" -r']);
    +0060 
    +0061 %%
    +0062 %Check 1a: Check if files for MAFFT results are the same
    +0063 verifyEqual(testCase,actMafftOutput,expCdhitMafftOutput);
    +0064 
    +0065 %Check 1b: Change actual MAFFT results file and check if test fails
    +0066 actMafftOutput='abc';
    +0067 verifyNotEqual(testCase,actMafftOutput,expCdhitMafftOutput);
    +0068 end
    +
    Generated by m2html © 2005
    + + \ No newline at end of file diff --git a/doc/testing/unit_tests/sortIdentifiers_and_permuteModelTests.html b/doc/testing/unit_tests/sortIdentifiers_and_permuteModelTests.html new file mode 100644 index 00000000..7584e7e1 --- /dev/null +++ b/doc/testing/unit_tests/sortIdentifiers_and_permuteModelTests.html @@ -0,0 +1,104 @@ + + + + Description of sortIdentifiers_and_permuteModelTests + + + + + + + + + +
    Home > testing > unit_tests > sortIdentifiers_and_permuteModelTests.m
    + + + +

    sortIdentifiers_and_permuteModelTests +

    + +

    PURPOSE ^

    +
    run this test case with the command
    + +

    SYNOPSIS ^

    +
    function tests = sortIdentifiers_and_permuteModelTests
    + +

    DESCRIPTION ^

    +
    run this test case with the command
    +results = runtests('sortIdentifiers_and_permuteModelTests.m')
    + + +

    CROSS-REFERENCE INFORMATION ^

    +This function calls: + +This function is called by: + + + +

    SUBFUNCTIONS ^

    + + +

    SOURCE CODE ^

    +
    0001 %run this test case with the command
    +0002 %results = runtests('sortIdentifiers_and_permuteModelTests.m')
    +0003 function tests = sortIdentifiers_and_permuteModelTests
    +0004 tests = functiontests(localfunctions);
    +0005 end
    +0006 
    +0007 function sortRandomizedModelTest(testCase)
    +0008 
    +0009 %Load the expected (i.e. sorted) model
    +0010 sourceDir = fileparts(which(mfilename));
    +0011 load([sourceDir,'/test_data/ecoli_textbook.mat'], 'model');
    +0012 expModel = model;
    +0013 
    +0014 %Create the actual model that will be permuted and sorted
    +0015 actModel = expModel;
    +0016 
    +0017 %Randomly permutate model, do not use RAVEN functions
    +0018 rndIdx = randperm(numel(actModel.rxns));
    +0019 fieldsToChange = {'rxns','lb','ub','rev','c','rxnNames','grRules','eccodes'};
    +0020 for i=1:numel(fieldsToChange)
    +0021     actModel.(fieldsToChange{i}) = actModel.(fieldsToChange{i})(rndIdx);
    +0022 end
    +0023 actModel.S          = actModel.S(:,rndIdx);
    +0024 actModel.rxnGeneMat = actModel.rxnGeneMat(rndIdx,:);
    +0025 
    +0026 rndIdx = randperm(numel(actModel.mets));
    +0027 fieldsToChange = {'mets','metNames','metComps','metFormulas'};
    +0028 for i=1:numel(fieldsToChange)
    +0029     actModel.(fieldsToChange{i}) = actModel.(fieldsToChange{i})(rndIdx);
    +0030 end
    +0031 actModel.S     = actModel.S(rndIdx,:);
    +0032 
    +0033 rndIdx = randperm(numel(actModel.genes));
    +0034 fieldsToChange = {'genes','geneShortNames'};
    +0035 for i=1:numel(fieldsToChange)
    +0036     actModel.(fieldsToChange{i}) = actModel.(fieldsToChange{i})(rndIdx);
    +0037 end
    +0038 actModel.rxnGeneMat = actModel.rxnGeneMat(:,rndIdx);
    +0039 
    +0040 rndIdx = randperm(numel(actModel.comps));
    +0041 fieldsToChange = {'comps','compNames'};
    +0042 for i=1:numel(fieldsToChange)
    +0043     actModel.(fieldsToChange{i}) = actModel.(fieldsToChange{i})(rndIdx);
    +0044 end
    +0045 [~,J]=sort(rndIdx);
    +0046 [toreplace, bywhat] = ismember(actModel.metComps,1:length(J));
    +0047 actModel.metComps(toreplace) = J(bywhat(toreplace));
    +0048 
    +0049 %Sort randomly permutated model
    +0050 actModel = sortIdentifiers(actModel);
    +0051 
    +0052 %Check that the actual model is the same as the expected model
    +0053 verifyEqual(testCase,actModel,expModel)
    +0054 end
    +
    Generated by m2html © 2005
    + + \ No newline at end of file diff --git a/doc/testing/unit_tests/tinitTests.html b/doc/testing/unit_tests/tinitTests.html index 2ad5a268..247f542f 100644 --- a/doc/testing/unit_tests/tinitTests.html +++ b/doc/testing/unit_tests/tinitTests.html @@ -12,7 +12,7 @@ -
    Home > testing > unit_tests > tinitTests.m
    +
    Home > testing > unit_tests > tinitTests.m
    diff --git a/doc/tutorial/tutorial5.html b/doc/tutorial/tutorial5.html index 177ff49e..6aa64dc0 100644 --- a/doc/tutorial/tutorial5.html +++ b/doc/tutorial/tutorial5.html @@ -70,10 +70,10 @@

    SOURCE CODE ^%are for. This process takes up to 20-35 minutes in macOS, Unix systems and 0020 %40-55 minutes in Windows, depending on your hardware and the size of 0021 %target organism proteome -0022 model=getKEGGModelForOrganism('sce','sce.fa','euk100_kegg94','output',false,false,false,false,10^-30,0.8,0.3,-1); +0022 model=getKEGGModelForOrganism('sce','sce.fa','euk90_kegg100','output',false,false,false,false,10^-30,0.8,0.3,-1); 0023 -0024 %The resulting model should contain around 1669 reactions, 1677 -0025 %metabolites and 838 genes. Small variations are possible since it is an +0024 %The resulting model should contain around 1590 reactions, 1571 +0025 %metabolites and 837 genes. Small variations are possible since it is an 0026 %heuristic algorithm and different KEGG versions will give slightly 0027 %different results. 0028 disp(model); diff --git a/external/getBlast.m b/external/getBlast.m index 44e3d11f..f9567527 100755 --- a/external/getBlast.m +++ b/external/getBlast.m @@ -1,7 +1,8 @@ -function blastStructure=getBlast(organismID,fastaFile,modelIDs,refFastaFiles) +function [blastStructure,blastReport]=getBlast(organismID,fastaFile,... + modelIDs,refFastaFiles,develMode,hideVerbose) % getBlast -% Performs a bidirectional BLASTP between the organism of interest and a -% set of template organisms. +% Performs a bidirectional BLAST between the organism of interest and a +% set of template organisms % % Input: % organismID the id of the organism of interest. This should also @@ -13,19 +14,34 @@ % output is to be used with getModelFromHomology % refFastaFiles a cell array with the paths to the corresponding FASTA % files -% -% Output: +% develMode true if blastReport should be generated that is used +% in the unit testing function for BLAST+ (opt, default +% false) +% hideVerbose true if no status messages should be printed (opt, +% default false) +% +% Output: % blastStructure structure containing the bidirectional homology -% measurements which are used by getModelFromHomology +% measurements that can be used by getModelFromHomology +% blastReport structure containing MD5 hashes for FASTA database +% files and non-parsed BLAST output data. Will be blank +% if develMode is false. % -% NOTE: This function calls BLASTP to perform a bidirectional homology +% NOTE: This function calls BLAST+ to perform a bidirectional homology % test between the organism of interest and a set of other organisms -% using standard settings. The only filtering this functions does is the -% removal of hits with E value higher than 10e-5. If you would like to -% use other homology measurements, please see getBlastFromExcel. +% using standard settings. The only filtering this function does is the +% removal of hits with an E-value higher than 10e-5. The other homology +% measurements can be implemented using getBlastFromExcel. % -% Usage: blastStructure=getBlast(organismID,fastaFile,modelIDs,... -% refFastaFiles) +% Usage: [blastStructure,blastReport]=getBlast(organismID,fastaFile,... +% modelIDs,refFastaFiles,develMode,hideVerbose) + +if nargin<5 + develMode = false; +end +if nargin<6 + hideVerbose = false; +end %Everything should be cell arrays organismID=cellstr(organismID); @@ -33,14 +49,20 @@ modelIDs=cellstr(modelIDs); refFastaFiles=cellstr(refFastaFiles); +%Create blank structures for results blastStructure=[]; +blastReport.dbHashes.phr={}; +blastReport.dbHashes.pot={}; +blastReport.dbHashes.psq={}; +blastReport.dbHashes.pto={}; +blastReport.blastTxtOutput={}; %Get the directory for RAVEN Toolbox. This may not be the easiest or best %way to do this [ST, I]=dbstack('-completenames'); ravenPath=fileparts(fileparts(ST(I).file)); -%Construct databases and output file +%Generate temporary names for BLAST databases and output files tmpDB=tempname; outFile=tempname; @@ -52,12 +74,11 @@ files=vertcat(fastaFile,refFastaFiles); end -files=checkFileExistence(files,true,false); %No whitespace allowed +files=checkFileExistence(files,2); %Copy files to temp dir fastaFile = files(1); refFastaFiles = files(2:end); -%Create a database for the new organism and blast each of the refFastaFiles -%against it +%Identify the operating system if isunix if ismac binEnd='.mac'; @@ -65,50 +86,72 @@ binEnd=''; end elseif ispc - binEnd=''; + binEnd='.exe'; setenv('BLASTDB_LMDB_MAP_SIZE','1000000'); else dispEM('Unknown OS, exiting.') return end -% Run BLAST multi-threaded to use all logical cores assigned to MATLAB. +%Run BLAST multi-threaded to use all logical cores assigned to MATLAB cores = evalc('feature(''numcores'')'); cores = strsplit(cores, 'MATLAB was assigned: '); cores = regexp(cores{2},'^\d*','match'); cores = cores{1}; -[status, ~]=system(['"' fullfile(ravenPath,'software','blast+',['makeblastdb' binEnd]) '" -in "' fastaFile{1} '" -out "' tmpDB '" -dbtype prot']); +%Create a database for the new organism and blast each of the refFastaFiles +%against it +[status, ~]=system(['"' fullfile(ravenPath,'software','blast+',['makeblastdb' binEnd]) '" -in ' fastaFile{1} ' -out "' fullfile(tmpDB, 'tmpDB') '" -dbtype prot']); +if develMode + blastReport.dbHashes.phr{numel(blastReport.dbHashes.phr)+1}=getMD5Hash(fullfile(tmpDB, 'tmpDB.phr')); + blastReport.dbHashes.pot{numel(blastReport.dbHashes.pot)+1}=getMD5Hash(fullfile(tmpDB, 'tmpDB.pot')); + blastReport.dbHashes.psq{numel(blastReport.dbHashes.psq)+1}=getMD5Hash(fullfile(tmpDB, 'tmpDB.psq')); + blastReport.dbHashes.pto{numel(blastReport.dbHashes.pto)+1}=getMD5Hash(fullfile(tmpDB, 'tmpDB.pto')); +end if status~=0 EM=['makeblastdb did not run successfully, error: ', num2str(status)]; dispEM(EM,true); end for i=1:numel(refFastaFiles) - fprintf(['BLASTing "' modelIDs{i} '" against "' organismID{1} '"..\n']); - [status, ~]=system(['"' fullfile(ravenPath,'software','blast+',['blastp' binEnd]) '" -query "' refFastaFiles{i} '" -out "' outFile '_' num2str(i) '" -db "' tmpDB '" -evalue 10e-5 -outfmt "10 qseqid sseqid evalue pident length bitscore ppos" -num_threads "' cores '"']); + if ~hideVerbose + fprintf(['BLASTing "' modelIDs{i} '" against "' organismID{1} '"..\n']); + end + [status, ~]=system(['"' fullfile(ravenPath,'software','blast+',['blastp' binEnd]) '" -query ' refFastaFiles{i} ' -out "' outFile '_' num2str(i) '" -db "' fullfile(tmpDB, 'tmpDB') '" -evalue 10e-5 -outfmt "10 qseqid sseqid evalue pident length bitscore ppos" -num_threads "' cores '"']); + if develMode + blastReport.blastTxtOutput{numel(blastReport.blastTxtOutput)+1}=importdata([outFile '_' num2str(i)]); + end if status~=0 EM=['blastp did not run successfully, error: ', num2str(status)]; dispEM(EM,true); end end -delete([tmpDB '*']); +delete([tmpDB filesep 'tmpDB*']); %Then create a database for each of the reference organisms and blast the %new organism against them for i=1:numel(refFastaFiles) - fprintf(['BLASTing "' organismID{1} '" against "' modelIDs{i} '"..\n']); - [status, ~]=system(['"' fullfile(ravenPath,'software','blast+',['makeblastdb' binEnd]) '" -in "' refFastaFiles{i} '" -out "' tmpDB '" -dbtype prot']); + if ~hideVerbose + fprintf(['BLASTing "' organismID{1} '" against "' modelIDs{i} '"..\n']); + end + [status, ~]=system(['"' fullfile(ravenPath,'software','blast+',['makeblastdb' binEnd]) '" -in ' refFastaFiles{i} ' -out "' fullfile(tmpDB, 'tmpDB') '" -dbtype prot']); if status~=0 EM=['makeblastdb did not run successfully, error: ', num2str(status)]; dispEM(EM,true); end - [status, ~]=system(['"' fullfile(ravenPath,'software','blast+',['blastp' binEnd]) '" -query "' fastaFile{1} '" -out "' outFile '_r' num2str(i) '" -db "' tmpDB '" -evalue 10e-5 -outfmt "10 qseqid sseqid evalue pident length bitscore ppos" -num_threads "' cores '"']); - delete([tmpDB '*']); + [status, ~]=system(['"' fullfile(ravenPath,'software','blast+',['blastp' binEnd]) '" -query ' fastaFile{1} ' -out "' outFile '_r' num2str(i) '" -db "' fullfile(tmpDB, 'tmpDB') '" -evalue 10e-5 -outfmt "10 qseqid sseqid evalue pident length bitscore ppos" -num_threads "' cores '"']); + if develMode + blastReport.dbHashes.phr{numel(blastReport.dbHashes.phr)+1}=getMD5Hash(fullfile(tmpDB, 'tmpDB.phr')); + blastReport.dbHashes.pot{numel(blastReport.dbHashes.pot)+1}=getMD5Hash(fullfile(tmpDB, 'tmpDB.pot')); + blastReport.dbHashes.psq{numel(blastReport.dbHashes.psq)+1}=getMD5Hash(fullfile(tmpDB, 'tmpDB.psq')); + blastReport.dbHashes.pto{numel(blastReport.dbHashes.pto)+1}=getMD5Hash(fullfile(tmpDB, 'tmpDB.pto')); + blastReport.blastTxtOutput{numel(blastReport.blastTxtOutput)+1}=importdata([outFile '_r' num2str(i)]); + end if status~=0 EM=['blastp did not run successfully, error: ', num2str(status)]; dispEM(EM,true); end + delete([tmpDB filesep 'tmpDB*']); end %Done with the BLAST, do the parsing of the text files @@ -135,4 +178,6 @@ %Remove the old tempfiles delete([outFile '*']); +%Remove the temp fasta files +delete(files{:}) end diff --git a/external/getDiamond.m b/external/getDiamond.m index 57820799..92018543 100755 --- a/external/getDiamond.m +++ b/external/getDiamond.m @@ -1,7 +1,8 @@ -function blastStructure=getDiamond(organismID,fastaFile,modelIDs,refFastaFiles) +function [blastStructure,diamondReport]=getDiamond(organismID,fastaFile,... + modelIDs,refFastaFiles,develMode,hideVerbose) % getDiamond -% Uses DIAMOND to performs a bidirectional BLASTP between the organism -% of interest and a set of template organisms. +% Uses DIAMOND to perform a bidirectional BLAST between the organism +% of interest and a set of template organisms % % Input: % organismID the id of the organism of interest. This should also @@ -13,20 +14,35 @@ % output is to be used with getModelFromHomology % refFastaFiles a cell array with the paths to the corresponding FASTA % files +% develMode true if blastReport should be generated that is used +% in the unit testing function for DIAMOND (opt, default +% false) +% hideVerbose true if no status messages should be printed (opt, +% default false) % % Output: % blastStructure structure containing the bidirectional homology % measurements which are used by getModelFromHomology +% diamondReport structure containing MD5 hashes for FASTA database +% files and non-parsed BLAST output data. Will be blank +% if develMode is false. % % NOTE: This function calls DIAMOND to perform a bidirectional homology -% test between the organism of interest and a set of other organisms +% search between the organism of interest and a set of other organisms % using the '--more-sensitive' setting from DIAMOND. For the most % sensitive results, the use of getBlast() is adviced, however, % getDiamond() is a fast alternative (>15x faster). The blastStructure % generated is in the same format as those obtained from getBlast(). % -% Usage: blastStructure=getDiamond(organismID,fastaFile,modelIDs,... -% refFastaFiles) +% Usage: [blastStructure,diamondReport]=getDiamond(organismID,fastaFile,... +% modelIDs,refFastaFiles,develMode,hideVerbose) + +if nargin<5 + develMode = false; +end +if nargin<6 + hideVerbose = false; +end %Everything should be cell arrays organismID=cellstr(organismID); @@ -34,14 +50,17 @@ modelIDs=cellstr(modelIDs); refFastaFiles=cellstr(refFastaFiles); +%Create blank structures for results blastStructure=[]; +diamondReport.dbHashes={}; +diamondReport.diamondTxtOutput={}; %Get the directory for RAVEN Toolbox. This may not be the easiest or best %way to do this [ST, I]=dbstack('-completenames'); ravenPath=fileparts(fileparts(ST(I).file)); -%Construct databases and output file +%Generate temporary names for DIAMOND databases and output files tmpDB=tempname; outFile=tempname; @@ -53,13 +72,11 @@ files=vertcat(fastaFile,refFastaFiles); end -files=checkFileExistence(files,true,false); %No whitespace allowed +files=checkFileExistence(files,2); %Copy files to temp dir fastaFile = files(1); refFastaFiles = files(2:end); -%Create a database for the new organism and blast each of the refFastaFiles -%against it - +%Identify the operating system if isunix if ismac binEnd='.mac'; @@ -67,49 +84,65 @@ binEnd=''; end elseif ispc - binEnd=''; + binEnd='.exe'; else dispEM('Unknown OS, exiting.') return end -% Run BLAST multi-threaded to use all logical cores assigned to MATLAB. +%Run DIAMOND multi-threaded to use all logical cores assigned to MATLAB. cores = evalc('feature(''numcores'')'); cores = strsplit(cores, 'MATLAB was assigned: '); cores = regexp(cores{2},'^\d*','match'); cores = cores{1}; -[status, ~]=system(['"' fullfile(ravenPath,'software','diamond',['diamond' binEnd]) '" makedb --in "' fastaFile{1} '" --db "' tmpDB '"']); +%Create a database for the new organism and blast each of the refFastaFiles +%against it +[status, message]=system(['"' fullfile(ravenPath,'software','diamond',['diamond' binEnd]) '" makedb --in "' fastaFile{1} '" --db "' fullfile(tmpDB) '"']); +if develMode + diamondReport.dbHashes{numel(diamondReport.dbHashes)+1} = char(regexp(message,'[a-f0-9]{32}','match')); +end if status~=0 EM=['DIAMOND makedb did not run successfully, error: ', num2str(status)]; dispEM(EM,true); end for i=1:numel(refFastaFiles) - fprintf(['Running DIAMOND blastp with "' modelIDs{i} '" against "' organismID{1} '"..\n']); - [status, ~]=system(['"' fullfile(ravenPath,'software','diamond',['diamond' binEnd]) '" blastp --query "' refFastaFiles{i} '" --out "' outFile '_' num2str(i) '" --db "' tmpDB '" --more-sensitive --outfmt 6 qseqid sseqid evalue pident length bitscore ppos --threads ' cores ]); + if ~hideVerbose + fprintf(['Running DIAMOND blastp with "' modelIDs{i} '" against "' organismID{1} '"..\n']); + end + [status, ~]=system(['"' fullfile(ravenPath,'software','diamond',['diamond' binEnd]) '" blastp --query "' refFastaFiles{i} '" --out "' outFile '_' num2str(i) '" --db "' fullfile(tmpDB) '" --more-sensitive --outfmt 6 qseqid sseqid evalue pident length bitscore ppos --threads ' cores ]); + if develMode + diamondReport.diamondTxtOutput{numel(diamondReport.diamondTxtOutput)+1}=importdata([outFile '_' num2str(i)]); + end if status~=0 EM=['DIAMOND blastp did not run successfully, error: ', num2str(status)]; dispEM(EM,true); end end -delete([tmpDB '*']); +delete([tmpDB filesep 'tmpDB*']); %Then create a database for each of the reference organisms and blast the %new organism against them for i=1:numel(refFastaFiles) - fprintf(['Running DIAMOND blastp with "' organismID{1} '" against "' modelIDs{i} '"..\n']); - [status, ~]=system(['"' fullfile(ravenPath,'software','diamond',['diamond' binEnd]) '" makedb --in "' refFastaFiles{i} '" --db "' tmpDB '"']); + if ~hideVerbose + fprintf(['Running DIAMOND blastp with "' organismID{1} '" against "' modelIDs{i} '"..\n']); + end + [status, message]=system(['"' fullfile(ravenPath,'software','diamond',['diamond' binEnd]) '" makedb --in "' refFastaFiles{i} '" --db "' fullfile(tmpDB) '"']); if status~=0 EM=['DIAMOND makedb did not run successfully, error: ', num2str(status)]; dispEM(EM,true); end - [status, ~]=system(['"' fullfile(ravenPath,'software','diamond',['diamond' binEnd]) '" blastp --query "' fastaFile{1} '" --out "' outFile '_r' num2str(i) '" --db "' tmpDB '" --more-sensitive --outfmt 6 qseqid sseqid evalue pident length bitscore ppos --threads ' cores]); - delete([tmpDB '*']); + [status, ~]=system(['"' fullfile(ravenPath,'software','diamond',['diamond' binEnd]) '" blastp --query "' fastaFile{1} '" --out "' outFile '_r' num2str(i) '" --db "' fullfile(tmpDB) '" --more-sensitive --outfmt 6 qseqid sseqid evalue pident length bitscore ppos --threads ' cores]); + if develMode + diamondReport.dbHashes{numel(diamondReport.dbHashes)+1} = char(regexp(message,'[a-f0-9]{32}','match')); + diamondReport.diamondTxtOutput{numel(diamondReport.diamondTxtOutput)+1}=importdata([outFile '_r' num2str(i)]); + end if status~=0 EM=['DIAMOND blastp did not run successfully, error: ', num2str(status)]; dispEM(EM,true); end + delete([tmpDB filesep 'tmpDB*']); end %Done with the DIAMOND blastp, do the parsing of the text files @@ -136,4 +169,6 @@ %Remove the old tempfiles delete([outFile '*']); +%Remove the temp fasta files +delete(files{:}) end diff --git a/external/kegg/constructMultiFasta.m b/external/kegg/constructMultiFasta.m index 83e97663..daaec2aa 100755 --- a/external/kegg/constructMultiFasta.m +++ b/external/kegg/constructMultiFasta.m @@ -90,11 +90,10 @@ function constructMultiFasta(model,sourceFile,outputDir) end end end -fprintf('COMPLETE\n'); +fprintf('COMPLETE\n'); -fprintf('Generating the KEGG Orthology specific multi-FASTA files... '); +fprintf('Generating the KEGG Orthology specific multi-FASTA files... 0%% complete'); %Loop through the reactions and print the corresponding sequences -progressFlag=0; for i=1:numel(model.rxns) %Do not overwrite existing files @@ -141,17 +140,14 @@ function constructMultiFasta(model,sourceFile,outputDir) end fclose(rxnfid); end - %Print the progress: no need to update this for every - %iteration, just report once 25%, 50% and 75% are done - if progressFlag==0 && i>numel(model.rxns)*0.25 - fprintf('%*.*f%% complete',5,2,(numel(listFiles(fullfile(outputDir,'*.fa')))/numel(model.rxns))*100); - progressFlag=progressFlag+1; - elseif (progressFlag==1 && i>=numel(model.rxns)*0.5) || (progressFlag==2 && i>=numel(model.rxns)*0.75) - fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b%*.*f%% complete',5,2,(numel(listFiles(fullfile(outputDir,'*.fa')))/numel(model.rxns))*100); - progressFlag=progressFlag+1; + %Print the progress + if rem(i-1,50) == 0 + progress=num2str(i/numel(model.rxns)); + progress=pad(progress,3,'left'); + fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\b%s%% complete',progress); end end -fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\b\b\bCOMPLETE\n'); +fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\bCOMPLETE\n'); %Close the source file fclose(fid); diff --git a/external/kegg/getKEGGModelForOrganism.m b/external/kegg/getKEGGModelForOrganism.m index 8d4fd8c6..efdaed86 100755 --- a/external/kegg/getKEGGModelForOrganism.m +++ b/external/kegg/getKEGGModelForOrganism.m @@ -1,7 +1,7 @@ function model=getKEGGModelForOrganism(organismID,fastaFile,dataDir,... outDir,keepSpontaneous,keepUndefinedStoich,keepIncomplete,... keepGeneral,cutOff,minScoreRatioKO,minScoreRatioG,maxPhylDist,... - nSequences,seqIdentity) + nSequences,seqIdentity,globalModel) % getKEGGModelForOrganism % Reconstructs a genome-scale metabolic model based on protein homology % to the orthologies in KEGG. If the target species is not available in @@ -42,7 +42,7 @@ % the HMMs were trained on pro- or eukaryotic % sequences, using a sequence similarity threshold of % XXX %, fitting the KEGG version YY. E.g. -% euk100_kegg82. (opt, see note about fastaFile. Note +% euk90_kegg100. (opt, see note about fastaFile. Note % that in order to rebuild the KEGG model from a % database dump, as opposed to using the version % supplied with RAVEN, you would still need to supply @@ -99,10 +99,16 @@ % running CD-HIT (opt, default inf) % seqIdentity sequence identity threshold in CD-HIT, referred as % "global sequence identity" in CD-HIT User's Guide. -% The only possible options are 1 (100 %), 0.9 (90 %) -% and 0.5 (50 %). If other values are provided, -% CD-HIT is skipped (opt, default -1, i.e. CD-HIT is -% skipped) +% If -1 is provided, CD-HIT is skipped (opt, default 0.9) +% globalModel structure containing both model and KOModel +% structures as generated by getModelFromKEGG. These +% will otherwise be loaded by via getModelFromKEGG. +% Providing globalKEGGmodel can speed up model +% generation if getKEGGModelForOrganism is run +% multiple times for different strains. Example: +% [globalModel.model,globalModel.KOModel] = getModelFromKEGG; +% (opt, default empty, global model is loaded by +% getModelFromKEGG) % % Output: % model the reconstructed model @@ -144,30 +150,8 @@ % 2b. KO-specific protein FASTA files are re-organised into % non-redundant protein sets with CD-HIT. The user can only set % seqIdentity parameter, which corresponds to '-c' parameter in -% CD-HIT, described as "sequence identity threshold". The following -% non-default parameter settings are used depending on seqIdentity -% value: -% __________________________________________________________________ -% | | seqIdentity value | -% | -------------------------------------- -% | | 1.0 | 0.9 | 0.5 | x | -% | CD-HIT parameters -------------------------------------| -% -----------------------------------------------------------------| -% | Input Dataset (-i) | raw | cdh100 | cdh90 | raw | -% | Output Dataset (-o) | cdh100 | cdh90 | cdh50 | cdhOth | -% | Sequence identity (-c) | 1.0 | 0.9 | 0.5 | x | -% | word_length (-n) | 5 | 5 | 4 | 2-5* | -% | Max available memory (-M) | 2000 | -% ------------------------------------------------------------------ -% * - word length depends from sequence identity value (see CD-HIT -% manual for more details) -% -% The table reads as follows: if seqIdentity is equal to 1, then -% "cdh100" set is produced from raw set of proteins. If seqIdentity -% is equal to 0.9, then "cdh90" is produced from "cdh100" proteins -% set. When seqIdentity is equal to 0.5, "cdh50" is obtained from -% "cdh90" protein set. Finally, if other seqIdentity value is used, -% it is obtained directly from the raw set of proteins. +% CD-HIT, described as "sequence identity threshold". CD-HIT suggsted +% sequence identity specific word_length (-n) parameters are used. % 2c. Does a multi sequence alignment for multi-FASTA files obtained in % Step 2b for future use. MAFFT software with automatic selection of % alignment algorithm is used in this step ('--auto'). @@ -311,8 +295,7 @@ %Include all sequences for each reaction end if nargin<14 - seqIdentity=-1; - %CD-HIT is not used in the pipeline + seqIdentity=0.9; end if isempty(fastaFile) @@ -320,7 +303,7 @@ else fprintf('\n\n*** The model reconstruction from KEGG based on the protein homology search against KEGG Orthology specific HMMs ***\n\n'); %Check if query fasta exists - fastaFile=checkFileExistence(fastaFile,true,false); + fastaFile=checkFileExistence(fastaFile,2); %Copy file to temp dir end %Run the external binaries multi-threaded to use all logical cores assigned @@ -340,27 +323,14 @@ %required zip file already in working directory or have it extracted. If %the zip file and directory is not here, it is downloaded from the cloud if ~isempty(dataDir) - hmmOptions={'euk100_kegg94', ... - 'euk90_kegg94', ... - 'euk50_kegg94', ... - 'prok100_kegg94', ... - 'prok90_kegg94', ... - 'prok50_kegg94'}; - hmmLinks={'wbnghgtpgftb6pcw572bhkl9a8ekh32d', ... - '754bdz0965261fktzlwc77rcv7me87i6', ... - '5xwgv17cn099xn7bxo2dq5h1dsdxrhn7', ... - 'azpn5lwrb4gind2mn5hnbmux0lao5vt5', ... - 'j19ybilr7js34uisnss92gvq5g6lljkk', ... - 'b5vn631jrwdzcj4uwmvbshe2ws3zoalm'}; - if all(cellfun(@isempty,regexp(dataDir,strcat(hmmOptions,'$')))) %Check if dataDir ends with any of the hmmOptions + hmmOptions={'euk90_kegg100','prok90_kegg100'}; + if ~endsWith(dataDir,hmmOptions) %Check if dataDir ends with any of the hmmOptions. + %If not, then check whether the required folders exist anyway. if ~exist(fullfile(dataDir,'keggdb','genes.pep'),'file') && ... ~exist(fullfile(dataDir,'fasta'),'dir') && ... ~exist(fullfile(dataDir,'aligned'),'dir') && ... ~exist(fullfile(dataDir,'hmms'),'dir') - EM='Pre-trained HMMs set is not recognised. It should match any of the following sets:'; - disp(EM); - disp(hmmOptions); - error('Fatal error occured. See the details above'); + error(['Pre-trained HMMs set is not recognised. If you want download RAVEN provided sets, it should match any of the following: ' strjoin(hmmOptions,' or ')]) end else if exist(dataDir,'dir') && exist(fullfile(dataDir,'hmms','K00844.hmm'),'file') @@ -370,14 +340,18 @@ unzip([dataDir,'.zip']); fprintf('COMPLETE\n'); else - hmmIndex=regexp(dataDir,hmmOptions); - hmmIndex=~cellfun(@isempty,hmmIndex); - fprintf('Downloading the HMMs archive file... '); - try - websave([dataDir,'.zip'],['https://chalmersuniversity.box.com/shared/static/',hmmLinks{hmmIndex},'.zip']); - catch ME - if strcmp(ME.identifier,'MATLAB:webservices:HTTP404StatusCodeError') - error('Failed to download the HMMs archive file, the server returned a 404 error, try again later. If the problem persists please report it on the RAVEN GitHub Issues page: https://github.com/SysBioChalmers/RAVEN/issues') + hmmIndex=strcmp(dataDir,hmmOptions); + if ~any(hmmIndex) + error(['Pre-trained HMMs are only provided with proteins clustered at 90% sequence identity (i.e. prok90_kegg100 and euk90_kegg100). ' ... + 'Use either of these datasets, or otherwise download the relevant sequence data from KEGG to train HMMs with your desired sequence identity']) + else + fprintf('Downloading the HMMs archive file... '); + try + websave([dataDir,'.zip'],['https://github.com/SysBioChalmers/RAVEN/releases/download/v2.6.0/',hmmOptions{hmmIndex},'.zip']); + catch ME + if strcmp(ME.identifier,'MATLAB:webservices:HTTP404StatusCodeError') + error('Failed to download the HMMs archive file, the server returned a 404 error, try again later. If the problem persists please report it on the RAVEN GitHub Issues page: https://github.com/SysBioChalmers/RAVEN/issues') + end end end @@ -388,9 +362,7 @@ end %Check if HMMs are extracted if ~exist(fullfile(dataDir,'hmms','K00844.hmm'),'file') - EM=['The HMM files seem improperly extracted and not found in ',dataDir,'/hmms. Please remove ',dataDir,' folder and rerun getKEGGModelForOrganism']; - disp(EM); - error('Fatal error occured. See the details above'); + error(['The HMM files seem improperly extracted and not found in ',dataDir,'/hmms. Please remove ',dataDir,' folder and rerun getKEGGModelForOrganism']); end end end @@ -419,9 +391,13 @@ end end -%First generate the full KEGG model. The dataDir must not be supplied as +%First generate the full global KEGG model. Can be provided as input. +%Otherwise, getModelFromKEGG is run. The dataDir must not be supplied as %there is also an internal RAVEN version available -if any(dataDir) +if nargin==15 + model=globalModel.model; + KOModel=globalModel.KOModel; +elseif any(dataDir) [model, KOModel]=getModelFromKEGG(fullfile(dataDir,'keggdb'),keepSpontaneous,keepUndefinedStoich,keepIncomplete,keepGeneral); else [model, KOModel]=getModelFromKEGG([],keepSpontaneous,keepUndefinedStoich,keepIncomplete,keepGeneral); @@ -432,6 +408,13 @@ %If no FASTA file is supplied, then just remove all genes which are not for %the given organism ID if isempty(fastaFile) + %Check if organismID can be found in KEGG species list or is + %set to "eukaryotes" or "prokaryotes" + phylDistsFull=getPhylDist(fullfile(dataDir,'keggdb'),true); + if ~ismember(organismID,[phylDistsFull.ids 'eukaryotes' 'prokaryotes']) + error('Provided organismID is incorrect. Only species abbreviations from KEGG Species List or "eukaryotes"/"prokaryotes" are allowed.'); + end + fprintf(['Pruning the model from non-' organismID ' genes... ']); if ismember(organismID,{'eukaryotes','prokaryotes'}) phylDists=getPhylDist(fullfile(dataDir,'keggdb'),maxPhylDist==-1); @@ -477,15 +460,8 @@ %Clean gene names fprintf('Fixing gene names in the model... '); -for i=1:numel(model.genes) - %First get rid of the prefix organism id - model.genes{i}=model.genes{i}(strfind(model.genes{i},':')+1:end); - %Find and remove the description in parentheses if any - s=strfind(model.genes{i},'('); - if any(s) - model.genes{i}=model.genes{i}(1:s-1); - end -end +%Get rid of the prefix organism id +model.genes=regexprep(model.genes,'^\w+?:',''); fprintf('COMPLETE\n'); %If no FASTA file is supplied, then we are done here @@ -588,16 +564,23 @@ missingAligned=setdiff(KOModel.rxns,[alignedFiles;hmmFiles;alignedWorking;outFiles]); if ~isempty(missingAligned) if seqIdentity==-1 - fprintf('Performing the multiple alignment for KEGG Orthology specific protein sets... '); + fprintf('Performing the multiple alignment for KEGG Orthology specific protein sets... 0%% complete'); else - fprintf('Performing clustering and multiple alignment for KEGG Orthology specific protein sets... '); + fprintf('Performing clustering and multiple alignment for KEGG Orthology specific protein sets... 0%% complete'); end missingAligned=missingAligned(randperm(RandStream.create('mrg32k3a','Seed',cputime()),numel(missingAligned))); - progressFlag=0; - %Update fastaFiles. This is needed once rebuilding KEGG from FTP dump - %files for more accurate progress reporting - fastaFiles=listFiles(fullfile(dataDir,'fasta','*.fa')); - %Align all sequences using MAFFT + tmpFile=tempname; + %On Windows, paths need to be translated to Unix before parsing it to WSL + if ispc + wslPath.tmpFile=getWSLpath(tmpFile); + %mafft has problems writing to terminal (/dev/stderr) when running + %on WSL via MATLAB, instead write and read progress file + mafftOutput = tempname; + wslPath.mafftOutput=getWSLpath(mafftOutput); + wslPath.mafft=getWSLpath(fullfile(ravenPath,'software','mafft','mafft-linux64','mafft.bat')); + wslPath.cdhit=getWSLpath(fullfile(ravenPath,'software','cd-hit','cd-hit')); + end + for i=1:numel(missingAligned) %This is checked here because it could be that it is created by a %parallel process. The faw-files are saved as temporary files to @@ -662,78 +645,27 @@ %Do the clustering and alignment if there are more than one %sequences, otherwise just save the sequence (or an empty file) if numel(fastaStruct)>1 - if seqIdentity==0.9 - cdhitInp100=tempname; - fastawrite(cdhitInp100,fastaStruct); - cdhitInp90=tempname; - [status, output]=system(['"' fullfile(ravenPath,'software','cd-hit',['cd-hit' binEnd]) '" -T "' num2str(cores) '" -i "' cdhitInp100 '" -o "' cdhitInp90 '" -c 1.0 -n 5 -M 2000']); - if status~=0 - EM=['Error when performing clustering of ' missingAligned{i} ':\n' output]; - dispEM(EM); - end - %Remove the old tempfile - if exist(cdhitInp100, 'file') - delete([cdhitInp100 '*']); - end - tmpFile=tempname; - [status, output]=system(['"' fullfile(ravenPath,'software','cd-hit',['cd-hit' binEnd]) '" -T "' num2str(cores) '" -i "' cdhitInp90 '" -o "' tmpFile '" -c 0.9 -n 5 -M 2000 -aL 0.8']); - if status~=0 - EM=['Error when performing clustering of ' missingAligned{i} ':\n' output]; - dispEM(EM); - end - %Remove the old tempfile - if exist(cdhitInp90, 'file') - delete([cdhitInp90 '*']); - end - elseif seqIdentity==0.5 - cdhitInp100=tempname; - fastawrite(cdhitInp100,fastaStruct); - cdhitInp90=tempname; - [status, output]=system(['"' fullfile(ravenPath,'software','cd-hit',['cd-hit' binEnd]) '" -T "' num2str(cores) '" -i "' cdhitInp100 '" -o "' cdhitInp90 '" -c 1.0 -n 5 -M 2000']); - if status~=0 - EM=['Error when performing clustering of ' missingAligned{i} ':\n' output]; - dispEM(EM); - end - %Remove the old tempfile - if exist(cdhitInp100, 'file') - delete([cdhitInp100 '*']); - end - cdhitInp50=tempname; - [status, output]=system(['"' fullfile(ravenPath,'software','cd-hit',['cd-hit' binEnd]) '" -T "' num2str(cores) '" -i "' cdhitInp90 '" -o "' cdhitInp50 '" -c 0.9 -n 5 -M 2000 -aL 0.8']); - if status~=0 - EM=['Error when performing clustering of ' missingAligned{i} ':\n' output]; - dispEM(EM); - end - %Remove the old tempfile - if exist(cdhitInp90, 'file') - delete([cdhitInp90 '*']); - end - tmpFile=tempname; - [status, output]=system(['"' fullfile(ravenPath,'software','cd-hit',['cd-hit' binEnd]) '" -T "' num2str(cores) '" -i "' cdhitInp50 '" -o "' tmpFile '" -c 0.5 -n 3 -M 2000 -aL 0.8']); - if status~=0 - EM=['Error when performing clustering of ' missingAligned{i} ':\n' output]; - dispEM(EM); - end - %Remove the old tempfile - if exist(cdhitInp50, 'file') - delete([cdhitInp50 '*']); - end - elseif seqIdentity~=-1 + if seqIdentity~=-1 cdhitInpCustom=tempname; fastawrite(cdhitInpCustom,fastaStruct); - tmpFile=tempname; if seqIdentity<=1 && seqIdentity>0.7 - [status, output]=system(['"' fullfile(ravenPath,'software','cd-hit',['cd-hit' binEnd]) '" -T "' num2str(cores) '" -i "' cdhitInpCustom '" -o "' tmpFile '" -c "' num2str(seqIdentity) '" -n 5 -M 2000']); + nparam='5'; elseif seqIdentity>0.6 - [status, output]=system(['"' fullfile(ravenPath,'software','cd-hit',['cd-hit' binEnd]) '" -T "' num2str(cores) '" -i "' cdhitInpCustom '" -o "' tmpFile '" -c "' num2str(seqIdentity) '" -n 4 -M 2000']); - elseif seqidentity>0.5 - [status, output]=system(['"' fullfile(ravenPath,'software','cd-hit',['cd-hit' binEnd]) '" -T "' num2str(cores) '" -i "' cdhitInpCustom '" -o "' tmpFile '" -c "' num2str(seqIdentity) '" -n 3 -M 2000']); - elseif seqidentity>0.4 - [status, output]=system(['"' fullfile(ravenPath,'software','cd-hit',['cd-hit' binEnd]) '" -T "' num2str(cores) '" -i "' cdhitInpCustom '" -o "' tmpFile '" -c "' num2str(seqIdentity) '" -n 2 -M 2000']); + nparam='4'; + elseif seqIdentity>0.5 + nparam='3'; + elseif seqIdentity>0.4 + nparam='2'; else EM='The provided seqIdentity must be between 0 and 1\n'; dispEM(EM); end + if ispc + wslPath.cdhitInpCustom=getWSLpath(cdhitInpCustom); + [status, output]=system(['wsl "' wslPath.cdhit '" -T "' num2str(cores) '" -i "' wslPath.cdhitInpCustom '" -o "' wslPath.tmpFile '" -c "' num2str(seqIdentity) '" -n ' nparam ' -M 2000']); + elseif ismac || isunix + [status, output]=system(['"' fullfile(ravenPath,'software','cd-hit',['cd-hit' binEnd]) '" -T "' num2str(cores) '" -i "' cdhitInpCustom '" -o "' tmpFile '" -c "' num2str(seqIdentity) '" -n ' nparam ' -M 2000']); + end if status~=0 EM=['Error when performing clustering of ' missingAligned{i} ':\n' output]; dispEM(EM); @@ -745,7 +677,6 @@ else %This means that CD-HIT should be skipped since %seqIdentity is equal to -1 - tmpFile=tempname; fastawrite(tmpFile,fastaStruct); end %Do the alignment for this file @@ -754,7 +685,10 @@ elseif isunix [status, output]=system(['"' fullfile(ravenPath,'software','mafft','mafft-linux64','mafft.bat') '" --auto --anysymbol --thread "' num2str(cores) '" "' tmpFile '" > "' fullfile(dataDir,'aligned',[missingAligned{i} '.faw']) '"']); elseif ispc - [status, output]=system(['"' fullfile(ravenPath,'software','mafft','mafft-win','mafft.bat') '" --auto --anysymbol --thread "' num2str(cores) '" "' tmpFile '" > "' fullfile(dataDir,'aligned',[missingAligned{i} '.faw']) '"']); + wslPath.fawFile=getWSLpath(fullfile(dataDir,'aligned',[missingAligned{i} '.faw'])); + [status, ~]=system(['wsl "' wslPath.mafft '" --auto --anysymbol --progress "' wslPath.mafftOutput '" --thread "' num2str(cores) '" --out "' wslPath.fawFile '" "' wslPath.tmpFile '"']); + output=fileread(mafftOutput); + delete(mafftOutput); end if status~=0 %It could be that alignment failed because only one @@ -778,24 +712,24 @@ %empty file was written previously so that doesn't have to %be dealt with if numel(fastaStruct)==1 + warnState = warning; %Save the current warning state + warning('off','Bioinfo:fastawrite:AppendToFile'); fastawrite(fullfile(dataDir,'aligned',[missingAligned{i} '.faw']),fastaStruct); + warning(warnState) %Reset warning state to previous settings end end %Move the temporary file to the real one movefile(fullfile(dataDir,'aligned',[missingAligned{i} '.faw']),fullfile(dataDir,'aligned',[missingAligned{i} '.fa']),'f'); - %Print the progress: no need to update this for every - %iteration, just report once 25%, 50% and 75% are done - if progressFlag==0 && i>numel(missingAligned)*0.25 - fprintf('%*.*f%% complete',5,2,(numel(listFiles(fullfile(dataDir,'*.fa')))/numel(fastaFiles))*100); - progressFlag=progressFlag+1; - elseif (progressFlag==1 && i>=numel(missingAligned)*0.5) || (progressFlag==2 && i>=numel(missingAligned)*0.75) - fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b%*.*f%% complete',5,2,(numel(listFiles(fullfile(dataDir,'*.fa')))/numel(fastaFiles))*100); - progressFlag=progressFlag+1; + %Print the progress every 25 files + if rem(i-1,25) == 0 + progress=num2str(floor(100*numel(listFiles(fullfile(dataDir,'aligned','*.fa')))/numel(KOModel.rxns))); + progress=pad(progress,3,'left'); + fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\b%s%% complete',progress); end end end - fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\b\b\bCOMPLETE\n'); + fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\bCOMPLETE\n'); else if seqIdentity==-1 fprintf('Performing the multiple alignment for KEGG Orthology specific protein sets... COMPLETE\n'); @@ -807,12 +741,8 @@ %Check if training of Hidden Markov models should be performed missingHMMs=setdiff(KOModel.rxns,[hmmFiles;outFiles]); if ~isempty(missingHMMs) - fprintf('Training the KEGG Orthology specific HMMs... '); + fprintf('Training the KEGG Orthology specific HMMs... 0%% complete'); missingHMMs=missingHMMs(randperm(RandStream.create('mrg32k3a','Seed',cputime()),numel(missingHMMs))); - progressFlag=0; - %Update alignedFiles. This is needed once rebuilding KEGG from FTP dump - %files for more accurate progress reporting - alignedFiles=listFiles(fullfile(dataDir,'aligned','*.fa')); %Train models for all missing KOs for i=1:numel(missingHMMs) %This is checked here because it could be that it is created by a @@ -851,19 +781,16 @@ %Delete the temporary file delete(fullfile(dataDir,'hmms',[missingHMMs{i} '.hmw'])); - - %Print the progress: no need to update this for every - %iteration, just report once 25%, 50% and 75% are done - if progressFlag==0 && i>numel(missingHMMs)*0.25 - fprintf('%*.*f%% complete',5,2,(numel(listFiles(fullfile(dataDir,'*.hmm')))/numel(alignedFiles))*100); - progressFlag=progressFlag+1; - elseif (progressFlag==1 && i>=numel(missingHMMs)*0.5) || (progressFlag==2 && i>=numel(missingHMMs)*0.75) - fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b%*.*f%% complete',5,2,(numel(listFiles(fullfile(dataDir,'*.hmm')))/numel(alignedFiles))*100); - progressFlag=progressFlag+1; + + %Print the progress every 25 files + if rem(i-1,25) == 0 + progress=num2str(floor(100*numel(listFiles(fullfile(dataDir,'hmms','*.hmm')))/numel(KOModel.rxns))); + progress=pad(progress,3,'left'); + fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\b%s%% complete',progress); end end end - fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\b\b\bCOMPLETE\n'); + fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\bCOMPLETE\n'); else fprintf('Training the KEGG Orthology specific HMMs... COMPLETE\n'); end @@ -872,12 +799,8 @@ %Hidden Markov models should be performed missingOUT=setdiff(KOModel.rxns,outFiles); if ~isempty(missingOUT) - fprintf(['Querying ' strrep(fastaFile,'\','/') ' against the KEGG Orthology specific HMMs... ']); + fprintf('Querying the user-specified FASTA file against the KEGG Orthology specific HMMs... 0%% complete'); missingOUT=missingOUT(randperm(RandStream.create('mrg32k3a','Seed',cputime()),numel(missingOUT))); - progressFlag=0; - %Update hmmFiles. This is needed once rebuilding KEGG from FTP dump - %files for more accurate progress reporting - hmmFiles=listFiles(fullfile(dataDir,'hmms','*.hmm')); for i=1:numel(missingOUT) %This is checked here because it could be that it is created by a %parallel process @@ -914,20 +837,17 @@ fwrite(fid,output); fclose(fid); - %Print the progress: no need to update this for every - %iteration, just report once 25%, 50% and 75% are done - if progressFlag==0 && i>numel(missingOUT)*0.25 - fprintf('%*.*f%% complete',5,2,(numel(listFiles(fullfile(outDir,'*.out')))/numel(hmmFiles))*100); - progressFlag=progressFlag+1; - elseif (progressFlag==1 && i>=numel(missingOUT)*0.5) || (progressFlag==2 && i>=numel(missingOUT)*0.75) - fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b%*.*f%% complete',5,2,(numel(listFiles(fullfile(outDir,'*.out')))/numel(hmmFiles))*100); - progressFlag=progressFlag+1; + %Print the progress every 25 files + if rem(i-1,25) == 0 + progress=num2str(floor(100*numel(listFiles(fullfile(outDir,'*.out')))/numel(KOModel.rxns))); + progress=pad(progress,3,'left'); + fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\b%s%% complete',progress); end end end - fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\b\b\bCOMPLETE\n'); + fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\bCOMPLETE\n'); else - fprintf(['Querying ' fastaFile ' against the KEGG Orthology specific HMMs... COMPLETE\n']); + fprintf('Querying the user-specified FASTA file against the KEGG Orthology specific HMMs... COMPLETE\n'); end @@ -1108,6 +1028,8 @@ model.rxnNotes(i)={'Included by getKEGGModelForOrganism (using HMMs)'}; end end +%Remove the temp fasta file +delete(fastaFile) fprintf('COMPLETE\n\n*** Model reconstruction complete ***\n'); end diff --git a/external/kegg/getWSLpath.m b/external/kegg/getWSLpath.m new file mode 100644 index 00000000..b989caa4 --- /dev/null +++ b/external/kegg/getWSLpath.m @@ -0,0 +1,19 @@ +function path=getWSLpath(path) +% getWSLpath +% Translate Windows-style path to its Unix WSL (Windows Subsystem for +% Linux) equivalent. +% +% Input: +% path string with directory of file path, in Windows-style (e.g. +% 'C:\Directory\') +% +% Output: +% path string with directory of file path, in Unix style (e.g. +% '/mnt/c/Directory/') +% +% Uses the WSL function 'wslpath' to translate the path. +% +% Usage: path=getWSLpath(path) +[~,path]=system(['wsl wslpath ''' path '''']); +path=path(1:end-1);% Remove final character (line-break) +end diff --git a/external/kegg/keggGenes.mat b/external/kegg/keggGenes.mat index 33a349d3..809cdc14 100644 Binary files a/external/kegg/keggGenes.mat and b/external/kegg/keggGenes.mat differ diff --git a/external/kegg/keggMets.mat b/external/kegg/keggMets.mat index e415d3b0..1f86d788 100644 Binary files a/external/kegg/keggMets.mat and b/external/kegg/keggMets.mat differ diff --git a/external/kegg/keggPhylDist.mat b/external/kegg/keggPhylDist.mat index f190cf85..f170ef86 100644 Binary files a/external/kegg/keggPhylDist.mat and b/external/kegg/keggPhylDist.mat differ diff --git a/external/kegg/keggRxns.mat b/external/kegg/keggRxns.mat index 5a587f67..4e19b048 100644 Binary files a/external/kegg/keggRxns.mat and b/external/kegg/keggRxns.mat differ diff --git a/external/metacyc/getMetaCycModelForOrganism.m b/external/metacyc/getMetaCycModelForOrganism.m index fcc3b600..87be4cf0 100755 --- a/external/metacyc/getMetaCycModelForOrganism.m +++ b/external/metacyc/getMetaCycModelForOrganism.m @@ -58,7 +58,7 @@ else fprintf('\nChecking existence of query FASTA file... '); %Check if query fasta exists - fastaFile=checkFileExistence(fastaFile,true,false); + fastaFile=checkFileExistence(fastaFile,2); %Copy file to temp dir fprintf('done\n'); end @@ -247,4 +247,6 @@ [grRules,rxnGeneMat] = standardizeGrRules(model,false); %Get detailed output model.grRules = grRules; model.rxnGeneMat = rxnGeneMat; +%Remove the temp fasta file +delete(fastaFile) end diff --git a/external/updateDocumentation.m b/external/updateDocumentation.m index 885e9c59..beef0bf5 100755 --- a/external/updateDocumentation.m +++ b/external/updateDocumentation.m @@ -19,6 +19,11 @@ function updateDocumentation() %Get rid of MATLAB functions from external software ravenDirs(:,contains(ravenDirs(1,:),'software'))=[]; +%Remove keggModel.mat if it exists +if exist(fullfile(ravenDir,'external','kegg','keggModel.mat'), 'file') == 2 + delete(fullfile(ravenDir,'external','kegg','keggModel.mat')); +end + %Remove existing "doc" directory from RAVEN rmdir(fullfile(ravenDir,'doc'),'s'); diff --git a/installation/checkInstallation.m b/installation/checkInstallation.m index 6c58bd74..6619546d 100755 --- a/installation/checkInstallation.m +++ b/installation/checkInstallation.m @@ -1,11 +1,20 @@ -function checkInstallation() +function checkInstallation(develMode) % checkInstallation % The purpose of this function is to check if all necessary functions are % installed and working. It also checks whether there are any functions % with overlapping names between RAVEN and other toolboxes or % user-defined functions, which are accessible from MATLAB pathlist % -% Usage: checkInstallation() +% Input: +% develMode logical indicating development mode, which includes +% testing of binaries that are required to update KEGG +% HMMs (opt, default false) +% +% Usage: checkInstallation(develMode) + +if nargin<1 + develMode=false; +end %Check if RAVEN is in the MATLAB path list paths=textscan(path,'%s','delimiter', pathsep); @@ -22,6 +31,8 @@ function checkInstallation() fprintf('\n*** THE RAVEN TOOLBOX - DEVELOPMENT VERSION ***\n\n'); end +fprintf(['MATLAB R' version('-release') ' detected\n\n']); + fprintf('Checking if RAVEN is on the MATLAB path...\t\t\t\t\t\t\t\t\t'); if ismember(ravenDir,paths) fprintf('OK\n'); @@ -118,74 +129,29 @@ function checkInstallation() setRavenSolver(curSolv); fprintf(['WARNING: No working solver was found!\n'... 'Install the solver, set it using setRavenSolver(''solverName'') and run checkInstallation again\n'... - 'Available solverName options are ''mosek'', ''gurobi'' and ''cobra''\n\n']); + 'Available solverName options are ''gurobi'' and ''cobra''\n\n']); end -if ismac - binEnd='.mac'; -elseif isunix - binEnd=''; -elseif ispc - binEnd='.exe'; -end fprintf('Checking essential binary executables:\n'); -fprintf('NOTE: Broken binary executables must be fixed before running RAVEN\n'); -fprintf(['\tmakeblastdb' binEnd '...\t\t\t\t\t\t\t']); -[res,~]=system(['"' fullfile(ravenDir,'software','blast+',['makeblastdb' binEnd]) '"']); -if res==1 - fprintf('OK\n'); -else - fprintf('Not OK! Download/compile the binary and run checkInstallation again\n'); -end -fprintf(['\tblastp' binEnd '...\t\t\t\t\t\t\t\t']); -[res,~]=system(['"' fullfile(ravenDir,'software','blast+',['blastp' binEnd]) '"']); -if res==1 - fprintf('OK\n'); -else - fprintf('Not OK! Download/compile the binary and run checkInstallation again\n'); -end -fprintf(['\tdiamond' binEnd '...\t\t\t\t\t\t\t\t']); -[res,~]=system(['"' fullfile(ravenDir,'software','diamond',['diamond' binEnd]) '"']); -if res==1 - fprintf('OK\n'); -else - fprintf('Not OK! Download/compile the binary and run checkInstallation again\n'); -end -fprintf(['\thmmsearch' binEnd '...\t\t\t\t\t\t\t']); -[res,~]=system(['"' fullfile(ravenDir,'software','hmmer',['hmmsearch' binEnd]) '"']); -if res==1 - fprintf('OK\n'); -else - fprintf('Not OK! Download/compile the binary and run checkInstallation again\n'); -end -fprintf('Checking non-essential/development binary executables:\n'); -fprintf('NOTE: Only fix these binaries if planning to use KEGG FTP dump files in getKEGGModelForOrganism\n'); -fprintf(['\tcd-hit' binEnd '...\t\t\t\t\t\t\t\t']); -[res,~]=system(['"' fullfile(ravenDir,'software','cd-hit',['cd-hit' binEnd]) '"']); -if res==1 - fprintf('OK\n'); -else - fprintf('Not OK! If necessary, download/compile the binary and run checkInstallation again\n'); -end -fprintf('\tmafft.bat...\t\t\t\t\t\t\t\t'); -if ismac - [res,~]=system(['"' fullfile(ravenDir,'software','mafft','mafft-mac','mafft.bat') '" --help ']); -elseif isunix - [res,~]=system(['"' fullfile(ravenDir,'software','mafft','mafft-linux64','mafft.bat') '" --help ']); -elseif ispc - [res,~]=system(['"' fullfile(ravenDir,'software','mafft','mafft-win','mafft.bat') '" --help ']); -end -if res==1 - fprintf('OK\n'); -else - fprintf('Not OK! If necessary, download/compile the binary and run checkInstallation again\n'); -end -fprintf(['\thmmbuild' binEnd '...\t\t\t\t\t\t\t\t']); -[res,~]=system(['"' fullfile(ravenDir,'software','hmmer',['hmmbuild' binEnd]) '"']); -if res==1 - fprintf('OK\n\n'); -else - fprintf('Not OK! If necessary, download/compile the binary and run checkInstallation again\n'); + +fprintf('\tBLAST+... '); +res=runtests('blastPlusTests.m','OutputDetail',0); +interpretResults(res); +fprintf('\tDIAMOND... '); +res=runtests('diamondTests.m','OutputDetail',0); +interpretResults(res); +fprintf('\tHMMER... '); +res=runtests('hmmerTests.m','OutputDetail',0); +interpretResults(res); + +if develMode + fprintf('NOTE: Only fix these binaries if planning to use KEGG FTP dump files in getKEGGModelForOrganism\n'); + fprintf('\tCD-HIT... '); + res=runtests('cdhitTests.m','OutputDetail',0); + interpretResults(res); + fprintf('\tMAFFT... '); + res=runtests('mafftTests.m','OutputDetail',0); + interpretResults(res); end fprintf('Checking whether RAVEN functions are non-redundant across MATLAB path...\t'); @@ -193,3 +159,11 @@ function checkInstallation() fprintf('\n*** checkInstallation complete ***\n\n'); end + +function interpretResults(results) +if results.Failed==0 && results.Incomplete==0 + fprintf('OK\n'); +else + fprintf('Not OK! Download/compile the binary and rerun checkInstallation\n'); +end +end diff --git a/io/checkFileExistence.m b/io/checkFileExistence.m index 351af939..35152388 100755 --- a/io/checkFileExistence.m +++ b/io/checkFileExistence.m @@ -1,4 +1,4 @@ -function files=checkFileExistence(files,makeFullPath,allowSpace) +function files=checkFileExistence(files,fullOrTemp,allowSpace) % checkFileExistence % Check whether files exist. If no full path is given a file should be % located in the current folder, which by default is appended to the @@ -7,16 +7,23 @@ % Input: % files string or cell array of strings with path to file(s) or % path or filename(s) -% makeFullPath logical, whether files located in the current folder -% should be provided with the full path (opt, default -% true) +% fullOrTemp 0: do not change path to file(s) +% 1: return full path to file(s) +% 2: copy file(s) to system default temporary folder and +% return full path +% (opt, default 0) % allowSpace logical, whether 'space' character is allowed in the % path (opt, default true) +% +% Output: +% files string or cell array of strings with updated paths if +% fullOrTemp was set as 1 or 2, otherwise original paths +% are returned % -% Usage: files=checkFileExistence(files,makeFullPath,allowSpace) +% Usage: files=checkFileExistence(files,fullOrTemp,allowSpace) if nargin<2 - makeFullPath = true; + fullOrTemp = 0; end if nargin<3 allowSpace = true; @@ -30,20 +37,36 @@ end filesOriginal = files; -inCurrDir = ~contains(files,'\') & ~contains(files,'/'); +%Make all full paths before check of file existence +if ispc % full path starts like "C:\" + inCurrDir = cellfun(@isempty,regexpi(files,'^[a-z]\:\\')); +else %isunix full path starts like "/" + inCurrDir = cellfun(@isempty,regexpi(files,'\/')); +end files(inCurrDir) = fullfile(cd,files(inCurrDir)); +%Check existence for i=1:numel(files) if ~exist(files{i},'file') error('File "%s" cannot be found\n',files{i}); - elseif allowSpace == true & strfind(files{i},' ') + elseif allowSpace == false & strfind(files{i},' ') error('File "%s" has an invalid space in the filename or path, please remove this before running this function\n',files{i}); end end -if makeFullPath == false - files = filesOriginal; +switch fullOrTemp + case 0 + files = filesOriginal; + case 1 + % files already contains full path + case 2 + for i=1:numel(files) + tmpFile=tempname; + copyfile(files{i},tmpFile); + files{i}=tmpFile; + end end + if oneFile == true files = files{1}; end \ No newline at end of file diff --git a/io/exportForGit.m b/io/exportForGit.m index e7da5112..c273929d 100755 --- a/io/exportForGit.m +++ b/io/exportForGit.m @@ -1,4 +1,4 @@ -function out=exportForGit(model,prefix,path,formats,masterFlag,subDirs) +function out=exportForGit(model,prefix,path,formats,mainBranchFlag,subDirs) % exportForGit % Generates a directory structure and populates this with model files, ready % to be commited to a Git(Hub) maintained model repository. Writes the model @@ -13,8 +13,8 @@ % formats cell array of strings specifying in what file formats % the model should be exported (opt, default to all % formats as {'mat', 'txt', 'xlsx', 'xml', 'yml'}) -% masterFlag logical, if true, function will error if RAVEN (and -% COBRA if detected) is/are not on the master branch. +% mainBranchFlag logical, if true, function will error if RAVEN (and +% COBRA if detected) is/are not on the main branch. % (opt, default false) % subDirs logical, whether model files for each file format % should be written in its own subdirectory, with @@ -22,12 +22,12 @@ % standard-GEM repository format. If false, all files % are stored in the same folder. (opt, default true) % -% Usage: exportForGit(model,prefix,path,formats,masterFlag) +% Usage: exportForGit(model,prefix,path,formats,mainBranchFlag) if nargin<6 subDirs=true; end if nargin<5 - masterFlag=false; + mainBranchFlag=false; end if nargin<4 || isempty(formats) formats={'mat', 'txt', 'xlsx', 'xml', 'yml'}; @@ -46,9 +46,12 @@ prefix='model'; end +%Sort reactions, metabolites and genes alphabetically +model=sortIdentifiers(model); + %Get versions or commits of toolboxes: -RAVENver = getToolboxVersion('RAVEN','ravenCobraWrapper.m',masterFlag); -COBRAver = getToolboxVersion('COBRA','initCobraToolbox.m',masterFlag); +RAVENver = getToolboxVersion('RAVEN','ravenCobraWrapper.m',mainBranchFlag); +COBRAver = getToolboxVersion('COBRA','initCobraToolbox.m',mainBranchFlag); %Retrieve libSBML version: try % 5.17.0 and newer diff --git a/io/exportModel.m b/io/exportModel.m index fa9c64ed..be5b64ea 100755 --- a/io/exportModel.m +++ b/io/exportModel.m @@ -1,4 +1,4 @@ -function exportModel(model,fileName,exportGeneComplexes,supressWarnings) +function exportModel(model,fileName,exportGeneComplexes,supressWarnings,sortIds) % exportModel % Exports a constraint-based model to an SBML file (L3V1 FBCv2) % @@ -10,15 +10,23 @@ function exportModel(model,fileName,exportGeneComplexes,supressWarnings) % (opt, default false) % supressWarnings true if warnings should be supressed (opt, default % false) +% sortIds logical whether metabolites, reactions and genes +% should be sorted alphabetically by their +% identifiers (opt, default false) % -% -% Usage: exportModel(model,fileName,exportGeneComplexes,supressWarnings) +% Usage: exportModel(model,fileName,exportGeneComplexes,supressWarnings,sortIds) if nargin<3 exportGeneComplexes=false; end if nargin<4 supressWarnings=false; end +if nargin<5 + sortIds=false; +end +if sortIds==true + model=sortIdentifiers(model); +end %If no subSystems are defined, then no need to use groups package if isfield(model,'subSystems') @@ -222,7 +230,7 @@ function exportModel(model,fileName,exportGeneComplexes,supressWarnings) end if isfield(modelSBML.compartment,'metaid') - if ~isnan(str2double(model.comps(i))) + if regexp(model.comps{i},'^[^a-zA-Z_]') EM='The compartment IDs are in numeric format. For the compliance with SBML specifications, compartment IDs will be preceded with "c_" string'; dispEM(EM,false); model.comps(i)=strcat('c_',model.comps(i)); diff --git a/io/exportToExcelFormat.m b/io/exportToExcelFormat.m index d15a006b..75a6cd34 100755 --- a/io/exportToExcelFormat.m +++ b/io/exportToExcelFormat.m @@ -1,4 +1,4 @@ -function exportToExcelFormat(model,filename) +function exportToExcelFormat(model,filename,sortIds) % exportToExcelFormat % Exports a model structure to the Microsoft Excel model format % @@ -8,6 +8,9 @@ function exportToExcelFormat(model,filename) % be only a path, in which case the model is exported to a set % of tab-delimited text files instead. See exportToTabDelimited % for details regarding that functionality +% sortIds logical whether metabolites, reactions and genes should be +% sorted alphabetically by their identifiers (opt, default +% false) % % The resulting Excel file can be used with importExcelModel/SBMLFromExcel % for modelling or to generate a SBML file. @@ -15,7 +18,13 @@ function exportToExcelFormat(model,filename) % NOTE: No checks are made regarding the correctness of the model. Use % checkModelStruct to identify problems in the model structure % -% Usage: exportToExcelFormat(model,filename) +% Usage: exportToExcelFormat(model,filename,sortIds) +if nargin<3 + sortIds=false; +end +if sortIds==true + model=sortIdentifiers(model); +end [~, A, B]=fileparts(filename); diff --git a/io/exportToTabDelimited.m b/io/exportToTabDelimited.m index b015bad1..137f7601 100755 --- a/io/exportToTabDelimited.m +++ b/io/exportToTabDelimited.m @@ -1,4 +1,4 @@ -function exportToTabDelimited(model,path) +function exportToTabDelimited(model,path,sortIds) % exportToTabDelimited % Exports a model structure to a set of tab-delimited text files % @@ -6,6 +6,8 @@ function exportToTabDelimited(model,path) % path the path to export to. The resulting text files will be saved % under the names excelRxns.txt, excelMets.txt, excelGenes.txt, % excelModel.txt, and excelComps.txt +% sortIds logical whether metabolites, reactions and genes should be +% sorted alphabetically by their identifiers (opt, default false) % % NOTE: This functionality was previously a part of exportToExcelFormat. % The naming of the resulting text files is to preserve backward @@ -14,11 +16,17 @@ function exportToTabDelimited(model,path) % NOTE: No checks are made regarding the correctness of the model. Use % checkModelStruct to identify problems in the model structure % -% Usage: exportToTabDelimited(model,path) +% Usage: exportToTabDelimited(model,path,sortIds) if nargin<2 path='./'; end +if nargin<3 + sortIds=false; +end +if sortIds==true + model=sortIdentifiers(model); +end %If the folder doesn't exist then create it if ~exist(path,'dir') diff --git a/io/getMD5Hash.m b/io/getMD5Hash.m new file mode 100755 index 00000000..2499b089 --- /dev/null +++ b/io/getMD5Hash.m @@ -0,0 +1,55 @@ +function md5Hash=getMD5Hash(inputFile,binEnd) +% getMD5Hash +% Calculates MD5 hash for a file +% +% Input: +% inputFile string with the path to file for which MD5 hash should +% be calculated +% binEnd string that shows the operating system running in the +% client's computer. Use ".exe" for Windows, ".mac" for +% macOS or leave it blank for Linux (""). (opt, by +% default the function automatically detects the client's +% operating system) +% +% Output: +% md5Hash string containing an MD5 hash for inputFile +% +% Usage: md5Hash=getMD5Hash(inputFile,binEnd) + +if nargin<2 + if isunix + if ismac + binEnd='.mac'; + else + binEnd=''; + end + elseif ispc + binEnd='.exe'; + else + dispEM('Unknown OS, exiting.') + return + end +end + +%Check if binEnd is valid +if ~strcmp(binEnd,'.mac') && ~strcmp(binEnd,'') && ~strcmp(binEnd,'.exe') + dispEM('Unknown OS, exiting.') + return +end + +%Check file existence +inputFile=checkFileExistence(inputFile); + +%Get string containing an MD5 hash +switch binEnd + case '.mac' + [~, md5HashMessage]=system(['md5 "' inputFile '"']); + case '' + [~, md5HashMessage]=system(['md5sum "' inputFile '"']); + case '.exe' + [~, md5HashMessage]=system(['certutil -hashfile "' inputFile '" MD5"']); +end + +%Extract MD5 hash from a string +md5Hash = char(regexp(md5HashMessage,'[a-f0-9]{32}','match')); +end diff --git a/io/getToolboxVersion.m b/io/getToolboxVersion.m index 729e31b6..4c9b4b69 100755 --- a/io/getToolboxVersion.m +++ b/io/getToolboxVersion.m @@ -1,4 +1,4 @@ -function version = getToolboxVersion(toolbox,fileID,masterFlag) +function version = getToolboxVersion(toolbox,fileID,mainBranchFlag) % getToolboxVersion % Returns the version of a given toolbox, or if not available the latest % commit hash (7 characters). @@ -6,16 +6,16 @@ % toolbox string with the toolbox name (e.g. "RAVEN") % fileID string with the name of a file that is only found in % the corresponding toolbox (e.g. "ravenCobraWrapper.m"). -% masterFlag logical, if true, function will error if the toolbox is -% not on the master branch (opt, default false). +% mainBranchFlag logical, if true, function will error if the toolbox is +% not on the main branch (opt, default false). % % version string containing either the toolbox version or latest % commit hash (7 characters). % -% Usage: version = getToolboxVersion(toolbox,fileID,masterFlag) +% Usage: version = getToolboxVersion(toolbox,fileID,mainBranchFlag) if nargin<3 - masterFlag = false; + mainBranchFlag = false; end currentPath = pwd; @@ -38,12 +38,12 @@ disp([toolbox ' toolbox cannot be found']) version = 'unknown'; end -%Check if in master: -if masterFlag +%Check if in main: +if mainBranchFlag currentBranch = git('rev-parse --abbrev-ref HEAD'); - if ~strcmp(currentBranch,'master') + if any([strcmp(currentBranch, "main"), strcmp(currentBranch, "master")]) cd(currentPath); - error(['ERROR: ' toolbox ' not in master. Check-out the master branch of ' toolbox ' before submitting model for Git.']) + error(['ERROR: ' toolbox ' not in main (or master) branch. Check-out this branch of ' toolbox ' before submitting model for Git.']) end end %Try to find version file of the toolbox: diff --git a/io/sortIdentifiers.m b/io/sortIdentifiers.m new file mode 100644 index 00000000..2974a9eb --- /dev/null +++ b/io/sortIdentifiers.m @@ -0,0 +1,25 @@ +function newModel = sortIdentifiers(model) +% exportModel +% Sort reactions, metabolites, genes and compartments alphabetically by +% their identifier. +% +% Input: +% model a model structure +% +% Output: +% newModel an updated model structure with alphabetically sorted +% identifiers +% +% Usage: newModel=sortIdentifiers(model) + +[~,I]=sort(model.rxns); +newModel=permuteModel(model,I,'rxns'); +[~,I]=sort(newModel.mets); +newModel=permuteModel(newModel,I,'mets'); +if isfield(newModel,'genes') + [~,I]=sort(newModel.genes); + newModel=permuteModel(newModel,I,'genes'); +end +[~,I]=sort(newModel.comps); +newModel=permuteModel(newModel,I,'comps'); +end \ No newline at end of file diff --git a/io/writeYaml.m b/io/writeYaml.m index 1a29ccdd..0f5199d6 100755 --- a/io/writeYaml.m +++ b/io/writeYaml.m @@ -19,7 +19,6 @@ function writeYaml(model,name,preserveQuotes,sortIds) if nargin < 4 sortIds = false; end - if ~endsWith(name,{'.yml','.yaml'}) name = strcat(name,'.yml'); end @@ -29,6 +28,11 @@ function writeYaml(model,name,preserveQuotes,sortIds) model = ravenCobraWrapper(model); end +%Sort identifiers alphabetically +if sortIds == true + model = sortIdentifiers(model); +end + %Simplify Miriam fields: if isfield(model,'metMiriams') [model.newMetMiriams,model.newMetMiriamNames] = extractMiriam(model.metMiriams); @@ -56,11 +60,6 @@ function writeYaml(model,name,preserveQuotes,sortIds) %Metabolites: fprintf(fid,'- metabolites:\n'); -if sortIds==true - [~,pos] = sort(model.mets); -else - pos = 1:numel(model.mets); -end for i = 1:length(model.mets) fprintf(fid,' - !!omap\n'); writeField(model, fid, 'mets', 'txt', pos(i), ' - id', preserveQuotes) @@ -75,11 +74,6 @@ function writeYaml(model,name,preserveQuotes,sortIds) %Reactions: fprintf(fid,'- reactions:\n'); -if sortIds==true - [~,pos] = sort(model.rxns); -else - pos = 1:numel(model.rxns); -end for i = 1:length(model.rxns) fprintf(fid,' - !!omap\n'); writeField(model, fid, 'rxns', 'txt', pos(i), ' - id', preserveQuotes) @@ -101,11 +95,6 @@ function writeYaml(model,name,preserveQuotes,sortIds) %Genes: fprintf(fid,'- genes:\n'); -if sortIds==true - [~,pos] = sort(model.genes); -else - pos = 1:numel(model.genes); -end for i = 1:length(model.genes) fprintf(fid,' - !!omap\n'); writeField(model, fid, 'genes', 'txt', pos(i), ' - id', preserveQuotes) @@ -115,11 +104,6 @@ function writeYaml(model,name,preserveQuotes,sortIds) %Compartments: fprintf(fid,'- compartments: !!omap\n'); -if sortIds==true - [~,pos] = sort(model.comps); -else - pos = 1:numel(model.comps); -end for i = 1:length(model.comps) writeField(model, fid, 'compNames', 'txt', pos(i), ['- ' model.comps{pos(i)}], preserveQuotes) writeField(model, fid, 'compMiriams', 'txt', pos(i), '- annotation', preserveQuotes) diff --git a/software/blast+/blastp b/software/blast+/blastp index 56cc7479..807f90c2 100755 Binary files a/software/blast+/blastp and b/software/blast+/blastp differ diff --git a/software/blast+/blastp.exe b/software/blast+/blastp.exe old mode 100755 new mode 100644 index e7fa489a..3927fb36 Binary files a/software/blast+/blastp.exe and b/software/blast+/blastp.exe differ diff --git a/software/blast+/blastp.mac b/software/blast+/blastp.mac index b82607f6..c94f53aa 100755 Binary files a/software/blast+/blastp.mac and b/software/blast+/blastp.mac differ diff --git a/software/blast+/makeblastdb b/software/blast+/makeblastdb index 5817d20c..d191a43d 100755 Binary files a/software/blast+/makeblastdb and b/software/blast+/makeblastdb differ diff --git a/software/blast+/makeblastdb.exe b/software/blast+/makeblastdb.exe old mode 100755 new mode 100644 index d0ee5c16..331a2bfb Binary files a/software/blast+/makeblastdb.exe and b/software/blast+/makeblastdb.exe differ diff --git a/software/blast+/makeblastdb.mac b/software/blast+/makeblastdb.mac index f76fedcb..578acabf 100755 Binary files a/software/blast+/makeblastdb.mac and b/software/blast+/makeblastdb.mac differ diff --git a/software/cd-hit/cd-hit b/software/cd-hit/cd-hit index d4a6408d..618ad929 100755 Binary files a/software/cd-hit/cd-hit and b/software/cd-hit/cd-hit differ diff --git a/software/cd-hit/cd-hit.exe b/software/cd-hit/cd-hit.exe deleted file mode 100755 index 715d4888..00000000 Binary files a/software/cd-hit/cd-hit.exe and /dev/null differ diff --git a/software/cd-hit/cd-hit.mac b/software/cd-hit/cd-hit.mac index 3147efe5..6e7f77e9 100755 Binary files a/software/cd-hit/cd-hit.mac and b/software/cd-hit/cd-hit.mac differ diff --git a/software/cd-hit/cyggcc_s-seh-1.dll b/software/cd-hit/cyggcc_s-seh-1.dll deleted file mode 100755 index 1ab74c94..00000000 Binary files a/software/cd-hit/cyggcc_s-seh-1.dll and /dev/null differ diff --git a/software/cd-hit/cyggomp-1.dll b/software/cd-hit/cyggomp-1.dll deleted file mode 100755 index f2914549..00000000 Binary files a/software/cd-hit/cyggomp-1.dll and /dev/null differ diff --git a/software/cd-hit/cygstdc++-6.dll b/software/cd-hit/cygstdc++-6.dll deleted file mode 100755 index 75235e90..00000000 Binary files a/software/cd-hit/cygstdc++-6.dll and /dev/null differ diff --git a/software/cd-hit/cygwin1.dll b/software/cd-hit/cygwin1.dll deleted file mode 100755 index 3a196edb..00000000 Binary files a/software/cd-hit/cygwin1.dll and /dev/null differ diff --git a/software/cd-hit/cygz.dll b/software/cd-hit/cygz.dll deleted file mode 100755 index 07392898..00000000 Binary files a/software/cd-hit/cygz.dll and /dev/null differ diff --git a/software/diamond/LICENSE b/software/diamond/LICENSE index f288702d..ed990f8c 100644 --- a/software/diamond/LICENSE +++ b/software/diamond/LICENSE @@ -618,57 +618,4 @@ an absolute waiver of all civil liability in connection with the Program, unless a warranty or assumption of liability accompanies a copy of the Program in return for a fee. - END OF TERMS AND CONDITIONS - - How to Apply These Terms to Your New Programs - - If you develop a new program, and you want it to be of the greatest -possible use to the public, the best way to achieve this is to make it -free software which everyone can redistribute and change under these terms. - - To do so, attach the following notices to the program. It is safest -to attach them to the start of each source file to most effectively -state the exclusion of warranty; and each file should have at least -the "copyright" line and a pointer to where the full notice is found. - - - Copyright (C) - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . - -Also add information on how to contact you by electronic and paper mail. - - If the program does terminal interaction, make it output a short -notice like this when it starts in an interactive mode: - - Copyright (C) - This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. - This is free software, and you are welcome to redistribute it - under certain conditions; type `show c' for details. - -The hypothetical commands `show w' and `show c' should show the appropriate -parts of the General Public License. Of course, your program's commands -might be different; for a GUI interface, you would use an "about box". - - You should also get your employer (if you work as a programmer) or school, -if any, to sign a "copyright disclaimer" for the program, if necessary. -For more information on this, and how to apply and follow the GNU GPL, see -. - - The GNU General Public License does not permit incorporating your program -into proprietary programs. If your program is a subroutine library, you -may consider it more useful to permit linking proprietary applications with -the library. If this is what you want to do, use the GNU Lesser General -Public License instead of this License. But first, please read -. + END OF TERMS AND CONDITIONS \ No newline at end of file diff --git a/software/diamond/diamond b/software/diamond/diamond index 36736af9..546b8e06 100755 Binary files a/software/diamond/diamond and b/software/diamond/diamond differ diff --git a/software/diamond/diamond.exe b/software/diamond/diamond.exe old mode 100755 new mode 100644 index 0051e585..ad36d58b Binary files a/software/diamond/diamond.exe and b/software/diamond/diamond.exe differ diff --git a/software/diamond/diamond.mac b/software/diamond/diamond.mac index ac4a0742..b5e1c833 100755 Binary files a/software/diamond/diamond.mac and b/software/diamond/diamond.mac differ diff --git a/software/hmmer/LICENSE b/software/hmmer/LICENSE index 4a2b1745..663d4f55 100644 --- a/software/hmmer/LICENSE +++ b/software/hmmer/LICENSE @@ -1,7 +1,7 @@ HMMER - Biological sequence analysis with profile hidden Markov models -Copyright (C) 1992-2019 Sean R. Eddy -Copyright (C) 2000-2019 Howard Hughes Medical Institute -Copyright (C) 2015-2019 President and Fellows of Harvard College +Copyright (C) 1992-2020 Sean R. Eddy +Copyright (C) 2000-2020 Howard Hughes Medical Institute +Copyright (C) 2015-2020 President and Fellows of Harvard College Copyright (C) 1992-2004 Washington University School of Medicine Copyright (C) 1992-1994 MRC Laboratory of Molecular Biology ----------------------------------------------------------------------- @@ -90,3 +90,4 @@ BSD three-clause license: + diff --git a/software/hmmer/cygwin1.dll b/software/hmmer/cygwin1.dll index dce2caef..350a74ba 100644 Binary files a/software/hmmer/cygwin1.dll and b/software/hmmer/cygwin1.dll differ diff --git a/software/hmmer/hmmbuild b/software/hmmer/hmmbuild index cc51cda0..a6f81a95 100755 Binary files a/software/hmmer/hmmbuild and b/software/hmmer/hmmbuild differ diff --git a/software/hmmer/hmmbuild.exe b/software/hmmer/hmmbuild.exe index 00c9d6bf..950a8394 100644 Binary files a/software/hmmer/hmmbuild.exe and b/software/hmmer/hmmbuild.exe differ diff --git a/software/hmmer/hmmbuild.mac b/software/hmmer/hmmbuild.mac index 25c71efe..1f457757 100755 Binary files a/software/hmmer/hmmbuild.mac and b/software/hmmer/hmmbuild.mac differ diff --git a/software/hmmer/hmmsearch b/software/hmmer/hmmsearch index 85330817..ab090fdc 100755 Binary files a/software/hmmer/hmmsearch and b/software/hmmer/hmmsearch differ diff --git a/software/hmmer/hmmsearch.exe b/software/hmmer/hmmsearch.exe index 15998cf2..66856d57 100644 Binary files a/software/hmmer/hmmsearch.exe and b/software/hmmer/hmmsearch.exe differ diff --git a/software/hmmer/hmmsearch.mac b/software/hmmer/hmmsearch.mac index 4fbe585d..0f393cfd 100755 Binary files a/software/hmmer/hmmsearch.mac and b/software/hmmer/hmmsearch.mac differ diff --git a/software/mafft/mafft-linux64/mafftdir/bin/mafft b/software/mafft/mafft-linux64/mafftdir/bin/mafft index 2c86f603..cc0af8e6 100755 --- a/software/mafft/mafft-linux64/mafftdir/bin/mafft +++ b/software/mafft/mafft-linux64/mafftdir/bin/mafft @@ -1,7 +1,7 @@ #! /bin/bash er=0; myself=`dirname "$0"`/`basename "$0"`; export myself -version="v7.467 (2020/May/14)"; export version +version="v7.490 (2021/Oct/30)"; export version LANG=C; export LANG os=`uname` progname=`basename "$0"` @@ -257,6 +257,7 @@ laraparams="/dev/null" foldalignopt=" " treealg=" -X 0.1 " sueff="1.0" +maxambiguous="1.0" scoreoutarg=" " numthreads=0 numthreadsit=-1 @@ -293,6 +294,7 @@ enrich=0 # ato de kezuru enrichseq=0 # ato de kezuru enrichstr=0 # ato de kezuru seektarget="" # ato de kezuru +dashserver="https://sysimm.org/dash/REST1.0/" newdash=0 newdash_originalsequenceonly=0 exclude_ho=0 @@ -405,6 +407,9 @@ if [ $# -gt 0 ]; then shift sueff="$1" treealg=" -X $1" + elif [ "$1" = "--maxambiguous" ]; then + shift + maxambiguous="$1" elif [ "$1" = "--noscore" ]; then scorecalcopt=" -Z " elif [ "$1" = "--6mermultipair" ]; then @@ -946,8 +951,15 @@ if [ $# -gt 0 ]; then # seektarget="-str" elif [ "$1" = "--dash" ]; then newdash=1 - distance="global" # can be override - iterate=3 # can be override + if [ "$distance" != "local" -a "$distance" != "localgenaf" ]; then # 2021/Oct + distance="global" + fi + if [ "$iterate" -eq 0 ]; then # 2021/Oct + iterate=3 + fi + elif [ "$1" = "--dashserver" ]; then + shift + dashserver="$1" elif [ "$1" = "--originalseqonly" ]; then newdash_originalsequenceonly=1 elif [ "$1" = "--excludehomologs" ]; then # works with --dash only @@ -1030,7 +1042,7 @@ if [ $# -gt 0 ]; then TMPFILE=`cygpath -w "$TMPFILE"` # necessary to pass path to f2cl on cyswin, somehow unnecessary in msys. fi - umask 077 +# umask 077 # 2021/Jan # mkdir "$TMPFILE" || er=1 @@ -1046,7 +1058,7 @@ function removetmpfile() { # for MPI if [ $debug -eq 1 ]; then # trap "tar cfvz debuginfo.tgz $TMPFILE; rm -rf $TMPFILE " 0 # does not work in msys # trap "tar cfv - $TMPFILE | gzip -c > debuginfo.tgz; rm -rf $TMPFILE " 0 15 - trap "tar cfv - \"$TMPFILE\" | gzip -c > debuginfo.tgz; removetmpfile" 0 15 + trap "popd > /dev/null 2>&1; tar cfv - \"$TMPFILE\" | gzip -c > debuginfo.tgz; removetmpfile" 0 15 else # trap "rm -rf $TMPFILE" 0 15 trap "removetmpfile" 0 15 @@ -1065,8 +1077,16 @@ function removetmpfile() { # for MPI cat "$1" | tr "\r" "\n" > "$TMPFILE/infile" echo "" >> "$TMPFILE/infile" - cat "$addfile" | tr "\r" "\n" | grep -v "^$" >> "$TMPFILE/infile" cat "$addfile" | tr "\r" "\n" | grep -v "^$" > "$TMPFILE/_addfile" + + if [ $maxambiguous != "1.0" ]; then + mv "$TMPFILE/infile" "$TMPFILE/_tofilter" + "$prefix/filter" -m $maxambiguous $seqtype -i "$TMPFILE/_tofilter" > "$TMPFILE/infile" 2>>"$progressfile" || exit 1 + mv "$TMPFILE/_addfile" "$TMPFILE/_tofilter" + "$prefix/filter" -m $maxambiguous $seqtype -i "$TMPFILE/_tofilter" > "$TMPFILE/_addfile" 2>>"$progressfile" || exit 1 + fi + cat "$TMPFILE/_addfile" >> "$TMPFILE/infile" + cat "$scorematrix" | tr "\r" "\n" | grep -v "^$" > "$TMPFILE/_aamtx" cat "$mergetable" | tr "\r" "\n" | grep -v "^$" > "$TMPFILE/_subalignmentstable" cat "$treeinfile" | tr "\r" "\n" | grep -v "^$" > "$TMPFILE/_guidetree" @@ -1077,11 +1097,19 @@ function removetmpfile() { # for MPI cat "$anchorfile" | tr "\r" "\n" | grep -v "^$" > "$TMPFILE/_externalanchors" SAVEIFS=$IFS - IFS='\n' - filelist="$1\n$addfile\n$scorematrix\n$mergetable\n$treeinfile\n$seedtablefile\n$laraparams\n$pdblist\n$ownlist" +# Fixed a bug: 'n' was misinterpreted as delimiter, 2020/Jun/19 + IFS=$'\n' + filelist="$1 +$addfile +$scorematrix +$mergetable +$treeinfile +$seedtablefile +$laraparams +$pdblist +$ownlist" for f in $filelist; do - # $f="" demo mudani file ga hashiru. - file "$f" 2>/dev/null | grep "UTF-[13][62]" >& /dev/null && printf "$f: UTF-16 or UTF-32? Convert this file to ASCII\n\n" 1>&2 && exit 1; + file "$f" 2>/dev/null | grep -e 'UTF-16' -e 'UTF-32' >& /dev/null && printf "$f: UTF-16 or UTF-32? Convert this file to ASCII\n\n" 1>&2 && exit 1; done IFS=$SAVEIFS @@ -1095,7 +1123,7 @@ function removetmpfile() { # for MPI shift if [ -r "$1" ]; then cat "$1" | tr "\r" "\n" > "$TMPFILE/seed$#" - file "$1" | grep "UTF-[13][62]" >& /dev/null && printf "$1: UTF-16 or UTF-32? Convert this file to ASCII\n\n" 1>&2 && exit 1; + file "$1" | grep -e 'UTF-16' -e 'UTF-32' >& /dev/null && printf "$1: UTF-16 or UTF-32? Convert this file to ASCII\n\n" 1>&2 && exit 1; else echo "$0": Cannot open "$1". 1>&2 echo "" 1>&2 @@ -1182,7 +1210,9 @@ function removetmpfile() { # for MPI exit 1; fi + if [ "$addarg0" != " " ]; then + # iterate=0 # 2013/03/23 -> commented out, 2017/12 "$prefix/countlen" < "$TMPFILE/_addfile" > "$TMPFILE/addsize" 2>>"$progressfile" nadd=`awk '{print $1}' "$TMPFILE/addsize"` @@ -1297,6 +1327,13 @@ function removetmpfile() { # for MPI echo "npair = " $npair 1>>"$progressfile" echo "nseq = " $nseq 1>>"$progressfile" echo "nlen = " $nlen 1>>"$progressfile" + + if [ $norg -eq 0 ]; then + echo "" 1>>"$progressfile" + echo "The reference sequence was removed because of ambiguous letters?" 1>>"$progressfile" + echo "" 1>>"$progressfile" + exit 1; + fi # nagasa check! # if [ $npair -gt 10000000 -o $nlen -gt 5000 ]; then # 2017/Oct @@ -1319,6 +1356,11 @@ function removetmpfile() { # for MPI exit 1; fi + if [ `awk "BEGIN {print( 0.0+\"$maxambiguous\" < 0.0 || 0.0+\"$maxambiguous\" > 1.0 )}"` -gt 0 ]; then + printf "\n%s\n\n" "The argument of --maxambiguous must be between 0.0 and 1.0" 1>>"$progressfile" + exit 1; + fi + if [ $allowshift -eq 1 ]; then if [ $unalignspecified -ne 1 ]; then unalignlevel="0.8" @@ -1937,6 +1979,7 @@ function removetmpfile() { # for MPI fi + if [ $nadd -gt "0" ]; then if [ $fragment -eq "1" ]; then addarg="$addarg0 $nadd -g -0.01" @@ -1964,12 +2007,13 @@ function removetmpfile() { # for MPI bunkatsuopt=" -B " # fftnsi demo bunktasu shinai if [ "$add2ndhalfarg" != " " ]; then if [ $auto -eq 1 -o $iterate -gt 0 ]; then - echo '' 1>>"$progressfile" - echo 'The --keeplength and --mapout options are not supported' 1>>"$progressfile" - echo 'with the --auto or --maxiterate >0 options.' 1>>"$progressfile" - echo 'Use the --maxiterate 0 option (= progressive method).' 1>>"$progressfile" - echo '' 1>>"$progressfile" - exit 1 +# echo '' 1>>"$progressfile" +# echo 'The --keeplength and --mapout options are not supported' 1>>"$progressfile" +# echo 'with the --auto or --maxiterate >0 options.' 1>>"$progressfile" +# echo 'Use the --maxiterate 0 option (= progressive method).' 1>>"$progressfile" +# echo '' 1>>"$progressfile" +# exit 1 + iterate=0 fi fi fi @@ -2171,6 +2215,7 @@ function removetmpfile() { # for MPI # echo "iterate = " $iterate 1>>"$progressfile" # echo "cycle = " $cycle 1>>"$progressfile" + if [ $anysymbol -eq 1 ]; then mv infile orig "$prefix/replaceu" $seqtype -i orig > infile 2>>"$progressfile" || exit 1 @@ -2317,7 +2362,8 @@ function removetmpfile() { # for MPI if [ $anysymbol -eq 1 ]; then mv orig infile # replaceu wo mukouka fi - sed 's/-//g' infile > dashin # gap nozoku +# sed 's/-//g' infile > dashin # gap nozoku + awk '{if(/^>/)print; else {gsub( /-/,"" ); print;}}' infile > dashin if [ ! -x "$prefix/dash_client" -o ! -x "$prefix/dash_client" ]; then echo "" 1>&2 @@ -2332,11 +2378,11 @@ function removetmpfile() { # for MPI fi echo "Calling DASH (https://sysimm.org/dash/)" 1>>"$progressfile" - "$prefix/dash_client" -i dashin -sequences dashsequences -hat3 hat3.seed 1>>"$progressfile" + "$prefix/dash_client" -url "$dashserver" -i dashin -sequences dashsequences -hat3 hat3.seed 1>>"$progressfile" dashres="$?" if [ $dashres -ne "0" ]; then echo "Error in DASH" 1>>"$progressfile" - echo "To use this feature, compiled with" 1>>"$progressfile" + echo "To enable this feature, compile with DASH_CLIENT=dash_client. Go compiler is necessary." 1>>"$progressfile" exit 1; fi @@ -2908,8 +2954,8 @@ BEGIN { while( 1 ) { go = 0; - printf( "OK?\n" ) > "/dev/tty"; - printf( "@ [Y] " ) > "/dev/tty"; + printf( "Type Y or just enter to run this command.\n" ) > "/dev/tty"; + printf( "@ " ) > "/dev/tty"; res = getline < "/dev/tty"; close( "/dev/tty" ); if( res == 0 ) diff --git a/software/mafft/mafft-linux64/mafftdir/libexec/addsingle b/software/mafft/mafft-linux64/mafftdir/libexec/addsingle index a1de066d..11a61d6d 100755 Binary files a/software/mafft/mafft-linux64/mafftdir/libexec/addsingle and b/software/mafft/mafft-linux64/mafftdir/libexec/addsingle differ diff --git a/software/mafft/mafft-linux64/mafftdir/libexec/contrafoldwrap b/software/mafft/mafft-linux64/mafftdir/libexec/contrafoldwrap index 80e14330..0e1beac9 100755 Binary files a/software/mafft/mafft-linux64/mafftdir/libexec/contrafoldwrap and b/software/mafft/mafft-linux64/mafftdir/libexec/contrafoldwrap differ diff --git a/software/mafft/mafft-linux64/mafftdir/libexec/countlen b/software/mafft/mafft-linux64/mafftdir/libexec/countlen index 5bb872d9..0efbbab1 100755 Binary files a/software/mafft/mafft-linux64/mafftdir/libexec/countlen and b/software/mafft/mafft-linux64/mafftdir/libexec/countlen differ diff --git a/software/mafft/mafft-linux64/mafftdir/libexec/dash_alignments b/software/mafft/mafft-linux64/mafftdir/libexec/dash_alignments new file mode 100644 index 00000000..6047f59e --- /dev/null +++ b/software/mafft/mafft-linux64/mafftdir/libexec/dash_alignments @@ -0,0 +1,483 @@ +Query 3J5P_A||281||579 Template 3J5P_A||281||579 +QUERY EKNSVLEVIAYSSSETPNRHDMLLVEPLNRLLQDKWDRFVKRIFYFNFFVYCLYMIIFTAAAYYRPVEGLPPYKLKNTVGDYFRVTGEILSVSGGVYFFFRGIQYFLQRRPSLKSLFVDSYSEILFFVQSLFMLVSVVLYFSQRKEYVASMVFSLAMGWTNMLYYTRGFQQMGIYAVMIEKMILRDLCRFMFVYLVFLFGFSTAVVTLIEDGKYNSLYSTCLELFKFTIGMGDLEFTENYDFKAVFIILLLAYVILTYILLLNMLIALMGETVNKIAQESKNIWKLQRAITILDTEKSF +QUERY +TEMPL EKNSVLEVIAYSSSETPNRHDMLLVEPLNRLLQDKWDRFVKRIFYFNFFVYCLYMIIFTAAAYYRPVEGLPPYKLKNTVGDYFRVTGEILSVSGGVYFFFRGIQYFLQRRPSLKSLFVDSYSEILFFVQSLFMLVSVVLYFSQRKEYVASMVFSLAMGWTNMLYYTRGFQQMGIYAVMIEKMILRDLCRFMFVYLVFLFGFSTAVVTLIEDGKYNSLYSTCLELFKFTIGMGDLEFTENYDFKAVFIILLLAYVILTYILLLNMLIALMGETVNKIAQESKNIWKLQRAITILDTEKSF +TEMPL +Equivalence 99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 + +Query 3J5P_A||283||585 Template 3J5P_A||283||585 +QUERY NSVLEVIAYSSSETPNRHDMLLVEPLNRLLQDKWDRFVKRIFYFNFFVYCLYMIIFTAAAYYRPVEGLPPYKLKNTVGDYFRVTGEILSVSGGVYFFFRGIQYFLQRRPSLKSLFVDSYSEILFFVQSLFMLVSVVLYFSQRKEYVASMVFSLAMGWTNMLYYTRGFQQMGIYAVMIEKMILRDLCRFMFVYLVFLFGFSTAVVTLIEDGKYNSLYSTCLELFKFTIGMGDLEFTENYDFKAVFIILLLAYVILTYILLLNMLIALMGETVNKIAQESKNIWKLQRAITILDTEKSFLKCMRK +QUERY +TEMPL NSVLEVIAYSSSETPNRHDMLLVEPLNRLLQDKWDRFVKRIFYFNFFVYCLYMIIFTAAAYYRPVEGLPPYKLKNTVGDYFRVTGEILSVSGGVYFFFRGIQYFLQRRPSLKSLFVDSYSEILFFVQSLFMLVSVVLYFSQRKEYVASMVFSLAMGWTNMLYYTRGFQQMGIYAVMIEKMILRDLCRFMFVYLVFLFGFSTAVVTLIEDGKYNSLYSTCLELFKFTIGMGDLEFTENYDFKAVFIILLLAYVILTYILLLNMLIALMGETVNKIAQESKNIWKLQRAITILDTEKSFLKCMRK +TEMPL +Equivalence 999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 + +Query 3J9P_A||1089||1399 Template 3J9P_A||1089||1399 +QUERY YEPLTALNAMVQNNRIELLNHPVCKEYLLMKWLAYGFRAHMMNLGSYCLGLIPMTILVVNIKPGMAFNSTGIINETSDHSEILDTTNSYLIKTCMILVFLSSIFGYCKEAGQIFQQKRNYFMDISNVLEWIIYTTGIIFVLPLFVEIPAHLQWQCGAIAVYFYWMNFLLYLQRFENCGIFIVMLEVILKTLLRSTVVFIFLLLAFGLSFYILLNLQDPFSSPLLSIIQTFSMMLGDINYRESFLEPYLRNELAHPVLSFAQLVSFTIFVPIVLMNLLIGLAVGDIADVQKHASLKRIAMQVELHTSLEKKL +QUERY +TEMPL YEPLTALNAMVQNNRIELLNHPVCKEYLLMKWLAYGFRAHMMNLGSYCLGLIPMTILVVNIKPGMAFNSTGIINETSDHSEILDTTNSYLIKTCMILVFLSSIFGYCKEAGQIFQQKRNYFMDISNVLEWIIYTTGIIFVLPLFVEIPAHLQWQCGAIAVYFYWMNFLLYLQRFENCGIFIVMLEVILKTLLRSTVVFIFLLLAFGLSFYILLNLQDPFSSPLLSIIQTFSMMLGDINYRESFLEPYLRNELAHPVLSFAQLVSFTIFVPIVLMNLLIGLAVGDIADVQKHASLKRIAMQVELHTSLEKKL +TEMPL +Equivalence 99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 + +Query 3J9P_A||1091||1408 Template 3J9P_A||1091||1408 +QUERY PLTALNAMVQNNRIELLNHPVCKEYLLMKWLAYGFRAHMMNLGSYCLGLIPMTILVVNIKPGMAFNSTGIINETSDHSEILDTTNSYLIKTCMILVFLSSIFGYCKEAGQIFQQKRNYFMDISNVLEWIIYTTGIIFVLPLFVEIPAHLQWQCGAIAVYFYWMNFLLYLQRFENCGIFIVMLEVILKTLLRSTVVFIFLLLAFGLSFYILLNLQDPFSSPLLSIIQTFSMMLGDINYRESFLEPYLRNELAHPVLSFAQLVSFTIFVPIVLMNLLIGLAVGDIADVQKHASLKRIAMQVELHTSLEKKLPLWFLRKVD +QUERY +TEMPL PLTALNAMVQNNRIELLNHPVCKEYLLMKWLAYGFRAHMMNLGSYCLGLIPMTILVVNIKPGMAFNSTGIINETSDHSEILDTTNSYLIKTCMILVFLSSIFGYCKEAGQIFQQKRNYFMDISNVLEWIIYTTGIIFVLPLFVEIPAHLQWQCGAIAVYFYWMNFLLYLQRFENCGIFIVMLEVILKTLLRSTVVFIFLLLAFGLSFYILLNLQDPFSSPLLSIIQTFSMMLGDINYRESFLEPYLRNELAHPVLSFAQLVSFTIFVPIVLMNLLIGLAVGDIADVQKHASLKRIAMQVELHTSLEKKLPLWFLRKVD +TEMPL +Equivalence 999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 + +Query 3J9P_A||1092||1402 Template 3J9P_A||1092||1402 +QUERY LTALNAMVQNNRIELLNHPVCKEYLLMKWLAYGFRAHMMNLGSYCLGLIPMTILVVNIKPGMAFNSTGIINETSDHSEILDTTNSYLIKTCMILVFLSSIFGYCKEAGQIFQQKRNYFMDISNVLEWIIYTTGIIFVLPLFVEIPAHLQWQCGAIAVYFYWMNFLLYLQRFENCGIFIVMLEVILKTLLRSTVVFIFLLLAFGLSFYILLNLQDPFSSPLLSIIQTFSMMLGDINYRESFLEPYLRNELAHPVLSFAQLVSFTIFVPIVLMNLLIGLAVGDIADVQKHASLKRIAMQVELHTSLEKKLPLW +QUERY +TEMPL LTALNAMVQNNRIELLNHPVCKEYLLMKWLAYGFRAHMMNLGSYCLGLIPMTILVVNIKPGMAFNSTGIINETSDHSEILDTTNSYLIKTCMILVFLSSIFGYCKEAGQIFQQKRNYFMDISNVLEWIIYTTGIIFVLPLFVEIPAHLQWQCGAIAVYFYWMNFLLYLQRFENCGIFIVMLEVILKTLLRSTVVFIFLLLAFGLSFYILLNLQDPFSSPLLSIIQTFSMMLGDINYRESFLEPYLRNELAHPVLSFAQLVSFTIFVPIVLMNLLIGLAVGDIADVQKHASLKRIAMQVELHTSLEKKLPLW +TEMPL +Equivalence 99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 + +Query 3J9P_A||1096||1410 Template 3J9P_A||1096||1410 +QUERY NAMVQNNRIELLNHPVCKEYLLMKWLAYGFRAHMMNLGSYCLGLIPMTILVVNIKPGMAFNSTGIINETSDHSEILDTTNSYLIKTCMILVFLSSIFGYCKEAGQIFQQKRNYFMDISNVLEWIIYTTGIIFVLPLFVEIPAHLQWQCGAIAVYFYWMNFLLYLQRFENCGIFIVMLEVILKTLLRSTVVFIFLLLAFGLSFYILLNLQDPFSSPLLSIIQTFSMMLGDINYRESFLEPYLRNELAHPVLSFAQLVSFTIFVPIVLMNLLIGLAVGDIADVQKHASLKRIAMQVELHTSLEKKLPLWFLRKVDQK +QUERY +TEMPL NAMVQNNRIELLNHPVCKEYLLMKWLAYGFRAHMMNLGSYCLGLIPMTILVVNIKPGMAFNSTGIINETSDHSEILDTTNSYLIKTCMILVFLSSIFGYCKEAGQIFQQKRNYFMDISNVLEWIIYTTGIIFVLPLFVEIPAHLQWQCGAIAVYFYWMNFLLYLQRFENCGIFIVMLEVILKTLLRSTVVFIFLLLAFGLSFYILLNLQDPFSSPLLSIIQTFSMMLGDINYRESFLEPYLRNELAHPVLSFAQLVSFTIFVPIVLMNLLIGLAVGDIADVQKHASLKRIAMQVELHTSLEKKLPLWFLRKVDQK +TEMPL +Equivalence 999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 + +Query 5AN8_A||277||586 Template 5AN8_A||277||586 +QUERY NSVLEIIAFHSRSPHRHRMVVLEPLNKLLQAKWDRLIPRFCFNFLCYLVYMLIFTAVAYHQPALEKQGFPPLKATAGNSMLLLGHILILLGGVYLLLGQLWYFWRRRLFIWISFMDSYFEILFLLQALLTVLSQVLCFLAIEWYLPLLVSSLVLGWLNLLYYTRGFQHTGIYSVMIQKVILRDLLRFLLVYLVFLFGFAVALVSLSREAQEDEAPPYRSILDASLELFKFTIGMGELAFQEQLRFRGVVLLLLLAYVLLTYVLLLNMLIALMSETVNSVATDSWSIWKLQKAISVLEMENGYWWCRRKKQ +QUERY +TEMPL NSVLEIIAFHSRSPHRHRMVVLEPLNKLLQAKWDRLIPRFCFNFLCYLVYMLIFTAVAYHQPALEKQGFPPLKATAGNSMLLLGHILILLGGVYLLLGQLWYFWRRRLFIWISFMDSYFEILFLLQALLTVLSQVLCFLAIEWYLPLLVSSLVLGWLNLLYYTRGFQHTGIYSVMIQKVILRDLLRFLLVYLVFLFGFAVALVSLSREAQEDEAPPYRSILDASLELFKFTIGMGELAFQEQLRFRGVVLLLLLAYVLLTYVLLLNMLIALMSETVNSVATDSWSIWKLQKAISVLEMENGYWWCRRKKQ +TEMPL +Equivalence 9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 + +Query 5IRX_A||286||584 Template 5IRX_A||286||584 +QUERY EKNSVLEVIAYSSSETPNRHDMLLVEPLNRLLQDKWDRFVKRIFYFNFFVYCLYMIIFTAAAYYRPVEGLPPYKLKNTVGDYFRVTGEILSVSGGVYFFFRGIQYFLQRRPSLKSLFVDSYSEILFFVQSLFMLVSVVLYFSQRKEYVASMVFSLAMGWTNMLYYTRGFQQMGIYAVMIEKMILRDLCRFMFVYLVFLFGFSTAVVTLIEDGKYNSLYSTCLELFKFTIGMGDLEFTENYDFKAVFIILLLAYVILTYILLLNMLIALMGETVNKIAQESKNIWKLQRAITILDTEKSF +QUERY +TEMPL EKNSVLEVIAYSSSETPNRHDMLLVEPLNRLLQDKWDRFVKRIFYFNFFVYCLYMIIFTAAAYYRPVEGLPPYKLKNTVGDYFRVTGEILSVSGGVYFFFRGIQYFLQRRPSLKSLFVDSYSEILFFVQSLFMLVSVVLYFSQRKEYVASMVFSLAMGWTNMLYYTRGFQQMGIYAVMIEKMILRDLCRFMFVYLVFLFGFSTAVVTLIEDGKYNSLYSTCLELFKFTIGMGDLEFTENYDFKAVFIILLLAYVILTYILLLNMLIALMGETVNKIAQESKNIWKLQRAITILDTEKSF +TEMPL +Equivalence 99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 + +Query 5IRX_A||288||590 Template 5IRX_A||288||590 +QUERY NSVLEVIAYSSSETPNRHDMLLVEPLNRLLQDKWDRFVKRIFYFNFFVYCLYMIIFTAAAYYRPVEGLPPYKLKNTVGDYFRVTGEILSVSGGVYFFFRGIQYFLQRRPSLKSLFVDSYSEILFFVQSLFMLVSVVLYFSQRKEYVASMVFSLAMGWTNMLYYTRGFQQMGIYAVMIEKMILRDLCRFMFVYLVFLFGFSTAVVTLIEDGKYNSLYSTCLELFKFTIGMGDLEFTENYDFKAVFIILLLAYVILTYILLLNMLIALMGETVNKIAQESKNIWKLQRAITILDTEKSFLKCMRK +QUERY +TEMPL NSVLEVIAYSSSETPNRHDMLLVEPLNRLLQDKWDRFVKRIFYFNFFVYCLYMIIFTAAAYYRPVEGLPPYKLKNTVGDYFRVTGEILSVSGGVYFFFRGIQYFLQRRPSLKSLFVDSYSEILFFVQSLFMLVSVVLYFSQRKEYVASMVFSLAMGWTNMLYYTRGFQQMGIYAVMIEKMILRDLCRFMFVYLVFLFGFSTAVVTLIEDGKYNSLYSTCLELFKFTIGMGDLEFTENYDFKAVFIILLLAYVILTYILLLNMLIALMGETVNKIAQESKNIWKLQRAITILDTEKSFLKCMRK +TEMPL +Equivalence 999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 + +Query 5IWK_A||287||613 Template 5IWK_A||287||613 +QUERY DDQSLLELIVTTKKREARQILDQTPVKELVSLKWKRYGRPYFCVLGAIYVLYIICFTMCCVYRPLKPRITNRTNPRDNTLLQQKLLQEAYVTPKDDLRLVGELVSIVGAVIILLVEIPDIFRLGVTRFFGQTILGGPFHVIIVTYAFMVLVTMVMRLTNSDGEVVPMSFALVLGWCNVMYFARGFQMLGPFTIMIQKMIFGDLMRFCWQMAVVILGFASAFYIIFQTEDPDELGHFYDYPMALFSTFELFLTIIDGPANYDVDLPFMYSITYAAFAIIATLLMLNLLIAMMGDTHWRVAHERDELWRAQVVATTVMLERKLPRCLWP +QUERY +TEMPL DDQSLLELIVTTKKREARQILDQTPVKELVSLKWKRYGRPYFCVLGAIYVLYIICFTMCCVYRPLKPRITNRTNPRDNTLLQQKLLQEAYVTPKDDLRLVGELVSIVGAVIILLVEIPDIFRLGVTRFFGQTILGGPFHVIIVTYAFMVLVTMVMRLTNSDGEVVPMSFALVLGWCNVMYFARGFQMLGPFTIMIQKMIFGDLMRFCWQMAVVILGFASAFYIIFQTEDPDELGHFYDYPMALFSTFELFLTIIDGPANYDVDLPFMYSITYAAFAIIATLLMLNLLIAMMGDTHWRVAHERDELWRAQVVATTVMLERKLPRCLWP +TEMPL +Equivalence 999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 + +Query 5IWK_A||383||608 Template 5IWK_A||383||608 +QUERY LRLVGELVSIVGAVIILLVEIPDIFRLGVTRFFGQTILGGPFHVIIVTYAFMVLVTMVMRLTNSDGEVVPMSFALVLGWCNVMYFARGFQMLGPFTIMIQKMIFGDLMRFCWQMAVVILGFASAFYIIFQTEDPDELGHFYDYPMALFSTFELFLTIIDGPANYDVDLPFMYSITYAAFAIIATLLMLNLLIAMMGDTHWRVAHERDELWRAQVVATTVMLERKLP +QUERY +TEMPL LRLVGELVSIVGAVIILLVEIPDIFRLGVTRFFGQTILGGPFHVIIVTYAFMVLVTMVMRLTNSDGEVVPMSFALVLGWCNVMYFARGFQMLGPFTIMIQKMIFGDLMRFCWQMAVVILGFASAFYIIFQTEDPDELGHFYDYPMALFSTFELFLTIIDGPANYDVDLPFMYSITYAAFAIIATLLMLNLLIAMMGDTHWRVAHERDELWRAQVVATTVMLERKLP +TEMPL +Equivalence 9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 + +Query 5K47_A||19||514 Template 5K47_A||19||514 +QUERY GTRLMEESSTNREKYLKSVLRELVTYLLFLIVLCILTYGMMSSNVYYYTRMMSQLFLDTPVSKTEKTNFKTLSSMEDFWKFTEGSLLDGLYWKMQPSNQTEADNRSFIFYENLLLGVPRIRQLRVRNGSCSIPQDLRDEIKECYDVYSVSSEDRAPFGPRNGTAWIYTSEKDLNGSSHWGIIATYSGAGYYLDLSRTREETAAQVASLKKNVWLDRGTRATFIDFSVYNANINLFCVVRLLVEFPATGGVIPSWQFQPLKLIRYVTTFDFFLAACEIIFCFFIFYYVVEEILEIRIHKLHYFRSFWNCLDVVIVVLSVVAIGINIYRTSNVEVLLQFLEDQNTFPNFEHLAYWQIQFNNIAAVTVFFVWIKLFKFINFNRTMSQLSTTMSRCAKDLFGFAIMFFIIFLAYAQLAYLVFGTQVDDFSTFQECIFTQFRIILGDINFAEIEEANRVLGPIYFTTFVFFMFFILLNMFLAIINDTYSEVKSDLAQQKAE +QUERY +TEMPL GTRLMEESSTNREKYLKSVLRELVTYLLFLIVLCILTYGMMSSNVYYYTRMMSQLFLDTPVSKTEKTNFKTLSSMEDFWKFTEGSLLDGLYWKMQPSNQTEADNRSFIFYENLLLGVPRIRQLRVRNGSCSIPQDLRDEIKECYDVYSVSSEDRAPFGPRNGTAWIYTSEKDLNGSSHWGIIATYSGAGYYLDLSRTREETAAQVASLKKNVWLDRGTRATFIDFSVYNANINLFCVVRLLVEFPATGGVIPSWQFQPLKLIRYVTTFDFFLAACEIIFCFFIFYYVVEEILEIRIHKLHYFRSFWNCLDVVIVVLSVVAIGINIYRTSNVEVLLQFLEDQNTFPNFEHLAYWQIQFNNIAAVTVFFVWIKLFKFINFNRTMSQLSTTMSRCAKDLFGFAIMFFIIFLAYAQLAYLVFGTQVDDFSTFQECIFTQFRIILGDINFAEIEEANRVLGPIYFTTFVFFMFFILLNMFLAIINDTYSEVKSDLAQQKAE +TEMPL +Equivalence 9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 + +Query 5K47_A||20||521 Template 5K47_A||20||521 +QUERY TRLMEESSTNREKYLKSVLRELVTYLLFLIVLCILTYGMMSSNVYYYTRMMSQLFLDTPVSKTEKTNFKTLSSMEDFWKFTEGSLLDGLYWKMQPSNQTEADNRSFIFYENLLLGVPRIRQLRVRNGSCSIPQDLRDEIKECYDVYSVSSEDRAPFGPRNGTAWIYTSEKDLNGSSHWGIIATYSGAGYYLDLSRTREETAAQVASLKKNVWLDRGTRATFIDFSVYNANINLFCVVRLLVEFPATGGVIPSWQFQPLKLIRYVTTFDFFLAACEIIFCFFIFYYVVEEILEIRIHKLHYFRSFWNCLDVVIVVLSVVAIGINIYRTSNVEVLLQFLEDQNTFPNFEHLAYWQIQFNNIAAVTVFFVWIKLFKFINFNRTMSQLSTTMSRCAKDLFGFAIMFFIIFLAYAQLAYLVFGTQVDDFSTFQECIFTQFRIILGDINFAEIEEANRVLGPIYFTTFVFFMFFILLNMFLAIINDTYSEVKSDLAQQKAEMELSDLI +QUERY +TEMPL TRLMEESSTNREKYLKSVLRELVTYLLFLIVLCILTYGMMSSNVYYYTRMMSQLFLDTPVSKTEKTNFKTLSSMEDFWKFTEGSLLDGLYWKMQPSNQTEADNRSFIFYENLLLGVPRIRQLRVRNGSCSIPQDLRDEIKECYDVYSVSSEDRAPFGPRNGTAWIYTSEKDLNGSSHWGIIATYSGAGYYLDLSRTREETAAQVASLKKNVWLDRGTRATFIDFSVYNANINLFCVVRLLVEFPATGGVIPSWQFQPLKLIRYVTTFDFFLAACEIIFCFFIFYYVVEEILEIRIHKLHYFRSFWNCLDVVIVVLSVVAIGINIYRTSNVEVLLQFLEDQNTFPNFEHLAYWQIQFNNIAAVTVFFVWIKLFKFINFNRTMSQLSTTMSRCAKDLFGFAIMFFIIFLAYAQLAYLVFGTQVDDFSTFQECIFTQFRIILGDINFAEIEEANRVLGPIYFTTFVFFMFFILLNMFLAIINDTYSEVKSDLAQQKAEMELSDLI +TEMPL +Equivalence 9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 + +Query 5K47_A||247||526 Template 5K47_A||247||526 +QUERY NANINLFCVVRLLVEFPATGGVIPSWQFQPLKLIRYVTTFDFFLAACEIIFCFFIFYYVVEEILEIRIHKLHYFRSFWNCLDVVIVVLSVVAIGINIYRTSNVEVLLQFLEDQNTFPNFEHLAYWQIQFNNIAAVTVFFVWIKLFKFINFNRTMSQLSTTMSRCAKDLFGFAIMFFIIFLAYAQLAYLVFGTQVDDFSTFQECIFTQFRIILGDINFAEIEEANRVLGPIYFTTFVFFMFFILLNMFLAIINDTYSEVKSDLAQQKAEMELSDLIRKGYH +QUERY +TEMPL NANINLFCVVRLLVEFPATGGVIPSWQFQPLKLIRYVTTFDFFLAACEIIFCFFIFYYVVEEILEIRIHKLHYFRSFWNCLDVVIVVLSVVAIGINIYRTSNVEVLLQFLEDQNTFPNFEHLAYWQIQFNNIAAVTVFFVWIKLFKFINFNRTMSQLSTTMSRCAKDLFGFAIMFFIIFLAYAQLAYLVFGTQVDDFSTFQECIFTQFRIILGDINFAEIEEANRVLGPIYFTTFVFFMFFILLNMFLAIINDTYSEVKSDLAQQKAEMELSDLIRKGYH +TEMPL +Equivalence 9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 + +Query 5K47_A||2||537 Template 5K47_A||2||537 +QUERY PRVAWAERLVRGLRGLWGTRLMEESSTNREKYLKSVLRELVTYLLFLIVLCILTYGMMSSNVYYYTRMMSQLFLDTPVSKTEKTNFKTLSSMEDFWKFTEGSLLDGLYWKMQPSNQTEADNRSFIFYENLLLGVPRIRQLRVRNGSCSIPQDLRDEIKECYDVYSVSSEDRAPFGPRNGTAWIYTSEKDLNGSSHWGIIATYSGAGYYLDLSRTREETAAQVASLKKNVWLDRGTRATFIDFSVYNANINLFCVVRLLVEFPATGGVIPSWQFQPLKLIRYVTTFDFFLAACEIIFCFFIFYYVVEEILEIRIHKLHYFRSFWNCLDVVIVVLSVVAIGINIYRTSNVEVLLQFLEDQNTFPNFEHLAYWQIQFNNIAAVTVFFVWIKLFKFINFNRTMSQLSTTMSRCAKDLFGFAIMFFIIFLAYAQLAYLVFGTQVDDFSTFQECIFTQFRIILGDINFAEIEEANRVLGPIYFTTFVFFMFFILLNMFLAIINDTYSEVKSDLAQQKAEMELSDLIRKGYHKALVKLKLKKN +QUERY +TEMPL PRVAWAERLVRGLRGLWGTRLMEESSTNREKYLKSVLRELVTYLLFLIVLCILTYGMMSSNVYYYTRMMSQLFLDTPVSKTEKTNFKTLSSMEDFWKFTEGSLLDGLYWKMQPSNQTEADNRSFIFYENLLLGVPRIRQLRVRNGSCSIPQDLRDEIKECYDVYSVSSEDRAPFGPRNGTAWIYTSEKDLNGSSHWGIIATYSGAGYYLDLSRTREETAAQVASLKKNVWLDRGTRATFIDFSVYNANINLFCVVRLLVEFPATGGVIPSWQFQPLKLIRYVTTFDFFLAACEIIFCFFIFYYVVEEILEIRIHKLHYFRSFWNCLDVVIVVLSVVAIGINIYRTSNVEVLLQFLEDQNTFPNFEHLAYWQIQFNNIAAVTVFFVWIKLFKFINFNRTMSQLSTTMSRCAKDLFGFAIMFFIIFLAYAQLAYLVFGTQVDDFSTFQECIFTQFRIILGDINFAEIEEANRVLGPIYFTTFVFFMFFILLNMFLAIINDTYSEVKSDLAQQKAEMELSDLIRKGYHKALVKLKLKKN +TEMPL +Equivalence 99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 + +Query 5K47_A||30||525 Template 5K47_A||30||525 +QUERY REKYLKSVLRELVTYLLFLIVLCILTYGMMSSNVYYYTRMMSQLFLDTPVSKTEKTNFKTLSSMEDFWKFTEGSLLDGLYWKMQPSNQTEADNRSFIFYENLLLGVPRIRQLRVRNGSCSIPQDLRDEIKECYDVYSVSSEDRAPFGPRNGTAWIYTSEKDLNGSSHWGIIATYSGAGYYLDLSRTREETAAQVASLKKNVWLDRGTRATFIDFSVYNANINLFCVVRLLVEFPATGGVIPSWQFQPLKLIRYVTTFDFFLAACEIIFCFFIFYYVVEEILEIRIHKLHYFRSFWNCLDVVIVVLSVVAIGINIYRTSNVEVLLQFLEDQNTFPNFEHLAYWQIQFNNIAAVTVFFVWIKLFKFINFNRTMSQLSTTMSRCAKDLFGFAIMFFIIFLAYAQLAYLVFGTQVDDFSTFQECIFTQFRIILGDINFAEIEEANRVLGPIYFTTFVFFMFFILLNMFLAIINDTYSEVKSDLAQQKAEMELSDLIRKGY +QUERY +TEMPL REKYLKSVLRELVTYLLFLIVLCILTYGMMSSNVYYYTRMMSQLFLDTPVSKTEKTNFKTLSSMEDFWKFTEGSLLDGLYWKMQPSNQTEADNRSFIFYENLLLGVPRIRQLRVRNGSCSIPQDLRDEIKECYDVYSVSSEDRAPFGPRNGTAWIYTSEKDLNGSSHWGIIATYSGAGYYLDLSRTREETAAQVASLKKNVWLDRGTRATFIDFSVYNANINLFCVVRLLVEFPATGGVIPSWQFQPLKLIRYVTTFDFFLAACEIIFCFFIFYYVVEEILEIRIHKLHYFRSFWNCLDVVIVVLSVVAIGINIYRTSNVEVLLQFLEDQNTFPNFEHLAYWQIQFNNIAAVTVFFVWIKLFKFINFNRTMSQLSTTMSRCAKDLFGFAIMFFIIFLAYAQLAYLVFGTQVDDFSTFQECIFTQFRIILGDINFAEIEEANRVLGPIYFTTFVFFMFFILLNMFLAIINDTYSEVKSDLAQQKAEMELSDLIRKGY +TEMPL +Equivalence 9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 + +Query 5MKF_A||158||704 Template 5MKF_A||158||704 +QUERY EDQGPPCPSPVGGGDPLHRHLPLEGQPPRVAWAERLVRGLRGLWGTRLMEESSTNREKYLKSVLRELVTYLLFLIVLCILTYGMMSSNVYYYTRMMSQLFLDTPVSKTEKTNFKTLSSMEDFWKFTEGSLLDGLYWKMQPSNQTEADNRSFIFYENLLLGVPRIRQLRVRNGSCSIPQDLRDEIKECYDVYSVSSEDRAPFGPRNGTAWIYTSEKDLNGSSHWGIIATYSGAGYYLDLSRTREETAAQVASLKKNVWLDRGTRATFIDFSVYNANINLFCVVRLLVEFPATGGVIPSWQFQPLKLIRYVTTFDFFLAACEIIFCFFIFYYVVEEILEIRIHKLHYFRSFWNCLDVVIVVLSVVAIGINIYRTSNVEVLLQFLEDQNTFPNFEHLAYWQIQFNNIAAVTVFFVWIKLFKFINFNRTMSQLSTTMSRCAKDLFGFAIMFFIIFLAYAQLAYLVFGTQVDDFSTFQECIFTQFRIILGDINFAEIEEANRVLGPIYFTTFVFFMFFILLNMFLAIINDTYSEVKSDLAQQKAEMELSDLI +QUERY +TEMPL EDQGPPCPSPVGGGDPLHRHLPLEGQPPRVAWAERLVRGLRGLWGTRLMEESSTNREKYLKSVLRELVTYLLFLIVLCILTYGMMSSNVYYYTRMMSQLFLDTPVSKTEKTNFKTLSSMEDFWKFTEGSLLDGLYWKMQPSNQTEADNRSFIFYENLLLGVPRIRQLRVRNGSCSIPQDLRDEIKECYDVYSVSSEDRAPFGPRNGTAWIYTSEKDLNGSSHWGIIATYSGAGYYLDLSRTREETAAQVASLKKNVWLDRGTRATFIDFSVYNANINLFCVVRLLVEFPATGGVIPSWQFQPLKLIRYVTTFDFFLAACEIIFCFFIFYYVVEEILEIRIHKLHYFRSFWNCLDVVIVVLSVVAIGINIYRTSNVEVLLQFLEDQNTFPNFEHLAYWQIQFNNIAAVTVFFVWIKLFKFINFNRTMSQLSTTMSRCAKDLFGFAIMFFIIFLAYAQLAYLVFGTQVDDFSTFQECIFTQFRIILGDINFAEIEEANRVLGPIYFTTFVFFMFFILLNMFLAIINDTYSEVKSDLAQQKAEMELSDLI +TEMPL +Equivalence 9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 + +Query 5MKF_A||185||720 Template 5MKF_A||185||720 +QUERY PRVAWAERLVRGLRGLWGTRLMEESSTNREKYLKSVLRELVTYLLFLIVLCILTYGMMSSNVYYYTRMMSQLFLDTPVSKTEKTNFKTLSSMEDFWKFTEGSLLDGLYWKMQPSNQTEADNRSFIFYENLLLGVPRIRQLRVRNGSCSIPQDLRDEIKECYDVYSVSSEDRAPFGPRNGTAWIYTSEKDLNGSSHWGIIATYSGAGYYLDLSRTREETAAQVASLKKNVWLDRGTRATFIDFSVYNANINLFCVVRLLVEFPATGGVIPSWQFQPLKLIRYVTTFDFFLAACEIIFCFFIFYYVVEEILEIRIHKLHYFRSFWNCLDVVIVVLSVVAIGINIYRTSNVEVLLQFLEDQNTFPNFEHLAYWQIQFNNIAAVTVFFVWIKLFKFINFNRTMSQLSTTMSRCAKDLFGFAIMFFIIFLAYAQLAYLVFGTQVDDFSTFQECIFTQFRIILGDINFAEIEEANRVLGPIYFTTFVFFMFFILLNMFLAIINDTYSEVKSDLAQQKAEMELSDLIRKGYHKALVKLKLKKN +QUERY +TEMPL PRVAWAERLVRGLRGLWGTRLMEESSTNREKYLKSVLRELVTYLLFLIVLCILTYGMMSSNVYYYTRMMSQLFLDTPVSKTEKTNFKTLSSMEDFWKFTEGSLLDGLYWKMQPSNQTEADNRSFIFYENLLLGVPRIRQLRVRNGSCSIPQDLRDEIKECYDVYSVSSEDRAPFGPRNGTAWIYTSEKDLNGSSHWGIIATYSGAGYYLDLSRTREETAAQVASLKKNVWLDRGTRATFIDFSVYNANINLFCVVRLLVEFPATGGVIPSWQFQPLKLIRYVTTFDFFLAACEIIFCFFIFYYVVEEILEIRIHKLHYFRSFWNCLDVVIVVLSVVAIGINIYRTSNVEVLLQFLEDQNTFPNFEHLAYWQIQFNNIAAVTVFFVWIKLFKFINFNRTMSQLSTTMSRCAKDLFGFAIMFFIIFLAYAQLAYLVFGTQVDDFSTFQECIFTQFRIILGDINFAEIEEANRVLGPIYFTTFVFFMFFILLNMFLAIINDTYSEVKSDLAQQKAEMELSDLIRKGYHKALVKLKLKKN +TEMPL +Equivalence 99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 + +Query 5MKF_A||430||709 Template 5MKF_A||430||709 +QUERY NANINLFCVVRLLVEFPATGGVIPSWQFQPLKLIRYVTTFDFFLAACEIIFCFFIFYYVVEEILEIRIHKLHYFRSFWNCLDVVIVVLSVVAIGINIYRTSNVEVLLQFLEDQNTFPNFEHLAYWQIQFNNIAAVTVFFVWIKLFKFINFNRTMSQLSTTMSRCAKDLFGFAIMFFIIFLAYAQLAYLVFGTQVDDFSTFQECIFTQFRIILGDINFAEIEEANRVLGPIYFTTFVFFMFFILLNMFLAIINDTYSEVKSDLAQQKAEMELSDLIRKGYH +QUERY +TEMPL NANINLFCVVRLLVEFPATGGVIPSWQFQPLKLIRYVTTFDFFLAACEIIFCFFIFYYVVEEILEIRIHKLHYFRSFWNCLDVVIVVLSVVAIGINIYRTSNVEVLLQFLEDQNTFPNFEHLAYWQIQFNNIAAVTVFFVWIKLFKFINFNRTMSQLSTTMSRCAKDLFGFAIMFFIIFLAYAQLAYLVFGTQVDDFSTFQECIFTQFRIILGDINFAEIEEANRVLGPIYFTTFVFFMFFILLNMFLAIINDTYSEVKSDLAQQKAEMELSDLIRKGYH +TEMPL +Equivalence 9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 + +Query 5TJA_A||3||217 Template 5TJA_A||3||217 +QUERY GLSNQLAVTFREENTIAFRHLFLLGYSDGADDTFAAYTREQLYQAIFHAVDQYLALPDVSLGRYAYVRGGGDPWTNGSGLALCQRYYHRGHVDPANDTFDIDPMVVTDCIQVDPPERPPPPPSDDLTLLESSSSYKNLTLKFHKLVNVTIHFRLKTINLQSLINNEIPDCYTFSVLITFDNKAHSGRIPISLETQAHIQECKHPSVFQHGDNSLE +QUERY +TEMPL GLSNQLAVTFREENTIAFRHLFLLGYSDGADDTFAAYTREQLYQAIFHAVDQYLALPDVSLGRYAYVRGGGDPWTNGSGLALCQRYYHRGHVDPANDTFDIDPMVVTDCIQVDPPERPPPPPSDDLTLLESSSSYKNLTLKFHKLVNVTIHFRLKTINLQSLINNEIPDCYTFSVLITFDNKAHSGRIPISLETQAHIQECKHPSVFQHGDNSLE +TEMPL +Equivalence 99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 + +Query 5VKQ_A||1228||1596 Template 5VKQ_A||1228||1596 +QUERY DKRNVEFLDVLIENEQKEVIAHTVVQRYLQELWHGSLTWASWKILLLLVAFIVCPPVWIGFTFPMGHKFNKVPIIKFMSYLTSHIYLMIHLSIVGITPIYPVLRLSLVPYWYEVGLLIWLSGLLLFELTNPSDKSGLGSIKVLVLLLGMAGVGVHVSAFLFVSKEYWPTLVYCRNQCFALAFLLACVQILDFLSFHHLFGPWAIIIGDLLKDLARFLAVLAIFVFGFSMHIVALNQSFANFSPEDLRSFEKKNRNRGYFSDVRMHPINSFELLFFAVFGQTTTEQTQVDKIKNVATPTQPYWVEYLFKIVFGIYMLVSVVVLIQLLIAMMSDTYQRIQAQSDIEWKFGLSKLIRNMHRTTTAPSPLNLV +QUERY +TEMPL DKRNVEFLDVLIENEQKEVIAHTVVQRYLQELWHGSLTWASWKILLLLVAFIVCPPVWIGFTFPMGHKFNKVPIIKFMSYLTSHIYLMIHLSIVGITPIYPVLRLSLVPYWYEVGLLIWLSGLLLFELTNPSDKSGLGSIKVLVLLLGMAGVGVHVSAFLFVSKEYWPTLVYCRNQCFALAFLLACVQILDFLSFHHLFGPWAIIIGDLLKDLARFLAVLAIFVFGFSMHIVALNQSFANFSPEDLRSFEKKNRNRGYFSDVRMHPINSFELLFFAVFGQTTTEQTQVDKIKNVATPTQPYWVEYLFKIVFGIYMLVSVVVLIQLLIAMMSDTYQRIQAQSDIEWKFGLSKLIRNMHRTTTAPSPLNLV +TEMPL +Equivalence 999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 + +Query 5W3S_A||14||541 Template 5W3S_A||14||541 +QUERY HEEENRCNFNQHTSPSEELLLEDQMRRKLKFFFMNPCEKFWARGRKPWKLAIQILKIAMVTIQLVLFGLSNQMVVAFKEENTVAFKHLFLKGYIDRMDDTYAVYTQSDVYDQIIFAVNQYLQLYQVSVGNHAYENKGTDQSAMAICQHFYKRGNIYPGNDTFDIDPEIETDCFFVEPDEPFHIGTPAENKLNLTLDFHRLLTVELQFKLKAINLQTVRHQELPDCYDFTLTITFDNKAHSGRIKISLDNDISIRECKDWHVSGSIQKNTHNMMIFDAFVILTCLVSLILCIRSVISGLQLQQEFVNFFLLHYKKDVSVSDQMEFVNGWYIMIIISDILTIIGSILKMEIQAKSLTSYDVCSILLGTSTMLVWLGVIRYLGFFAKYNLLILTLQAALPNVIRFCCCAAMIYLGYCFCGWIVLGPYHNKFRSLNMVSECLFSLINGDDMFATFAKMQQKSYLVWLFSRIYLYSFISLFIYMILSLFIALITDTYETIKHYQQDGFPETELRTFISECKDLPNSGKFRLED +QUERY +TEMPL HEEENRCNFNQHTSPSEELLLEDQMRRKLKFFFMNPCEKFWARGRKPWKLAIQILKIAMVTIQLVLFGLSNQMVVAFKEENTVAFKHLFLKGYIDRMDDTYAVYTQSDVYDQIIFAVNQYLQLYQVSVGNHAYENKGTDQSAMAICQHFYKRGNIYPGNDTFDIDPEIETDCFFVEPDEPFHIGTPAENKLNLTLDFHRLLTVELQFKLKAINLQTVRHQELPDCYDFTLTITFDNKAHSGRIKISLDNDISIRECKDWHVSGSIQKNTHNMMIFDAFVILTCLVSLILCIRSVISGLQLQQEFVNFFLLHYKKDVSVSDQMEFVNGWYIMIIISDILTIIGSILKMEIQAKSLTSYDVCSILLGTSTMLVWLGVIRYLGFFAKYNLLILTLQAALPNVIRFCCCAAMIYLGYCFCGWIVLGPYHNKFRSLNMVSECLFSLINGDDMFATFAKMQQKSYLVWLFSRIYLYSFISLFIYMILSLFIALITDTYETIKHYQQDGFPETELRTFISECKDLPNSGKFRLED +TEMPL +Equivalence 999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 + +Query 5W3S_A||28||546 Template 5W3S_A||28||546 +QUERY PSEELLLEDQMRRKLKFFFMNPCEKFWARGRKPWKLAIQILKIAMVTIQLVLFGLSNQMVVAFKEENTVAFKHLFLKGYIDRMDDTYAVYTQSDVYDQIIFAVNQYLQLYQVSVGNHAYENKGTDQSAMAICQHFYKRGNIYPGNDTFDIDPEIETDCFFVEPDEPFHIGTPAENKLNLTLDFHRLLTVELQFKLKAINLQTVRHQELPDCYDFTLTITFDNKAHSGRIKISLDNDISIRECKDWHVSGSIQKNTHNMMIFDAFVILTCLVSLILCIRSVISGLQLQQEFVNFFLLHYKKDVSVSDQMEFVNGWYIMIIISDILTIIGSILKMEIQAKSLTSYDVCSILLGTSTMLVWLGVIRYLGFFAKYNLLILTLQAALPNVIRFCCCAAMIYLGYCFCGWIVLGPYHNKFRSLNMVSECLFSLINGDDMFATFAKMQQKSYLVWLFSRIYLYSFISLFIYMILSLFIALITDTYETIKHYQQDGFPETELRTFISECKDLPNSGKFRLEDDPPVS +QUERY +TEMPL PSEELLLEDQMRRKLKFFFMNPCEKFWARGRKPWKLAIQILKIAMVTIQLVLFGLSNQMVVAFKEENTVAFKHLFLKGYIDRMDDTYAVYTQSDVYDQIIFAVNQYLQLYQVSVGNHAYENKGTDQSAMAICQHFYKRGNIYPGNDTFDIDPEIETDCFFVEPDEPFHIGTPAENKLNLTLDFHRLLTVELQFKLKAINLQTVRHQELPDCYDFTLTITFDNKAHSGRIKISLDNDISIRECKDWHVSGSIQKNTHNMMIFDAFVILTCLVSLILCIRSVISGLQLQQEFVNFFLLHYKKDVSVSDQMEFVNGWYIMIIISDILTIIGSILKMEIQAKSLTSYDVCSILLGTSTMLVWLGVIRYLGFFAKYNLLILTLQAALPNVIRFCCCAAMIYLGYCFCGWIVLGPYHNKFRSLNMVSECLFSLINGDDMFATFAKMQQKSYLVWLFSRIYLYSFISLFIYMILSLFIALITDTYETIKHYQQDGFPETELRTFISECKDLPNSGKFRLEDDPPVS +TEMPL +Equivalence 999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 + +Query 5WJ9_A||16||557 Template 5WJ9_A||16||557 +QUERY LTPNPGYGTQAGPSPAPPTPPEEEDLRRRLKYFFMSPCDKFRAKGRKPCKLMLQVVKILVVTVQLILFGLSNQLAVTFREENTIAFRHLFLLGYSDGADDTFAAYTREQLYQAIFHAVDQYLALPDVSLGRYAYVRGGGDPWTNGSGLALCQRYYHRGHVDPANDTFDIDPMVVTDCIQVDPPERPPPPPSDDLTLLESSSSYKNLTLKFHKLVNVTIHFRLKTINLQSLINNEIPDCYTFSVLITFDNKAHSGRIPISLETQAHIQECKHPSVFQHGDNSFRLLFDVVVILTCSLSFLLCARSLLRGFLLQNEFVGFMWRQRGRVISLWERLEFVNGWYILLVTSDVLTISGTIMKIGIEAKNLASYDVCSILLGTSTLLVWVGVIRYLTFFHNYNILIATLRVALPSVMRFCCCVAVIYLGYCFCGWIVLGPYHVKFRSLSMVSECLFSLINGDDMFVTFAAMQAQQGRSSLVWLFSQLYLYSFISLFIYMVLSLFIALITGAYDTIKHPGGAGAEESELQAYIAQCQDSPTSGKFRRGS +QUERY +TEMPL LTPNPGYGTQAGPSPAPPTPPEEEDLRRRLKYFFMSPCDKFRAKGRKPCKLMLQVVKILVVTVQLILFGLSNQLAVTFREENTIAFRHLFLLGYSDGADDTFAAYTREQLYQAIFHAVDQYLALPDVSLGRYAYVRGGGDPWTNGSGLALCQRYYHRGHVDPANDTFDIDPMVVTDCIQVDPPERPPPPPSDDLTLLESSSSYKNLTLKFHKLVNVTIHFRLKTINLQSLINNEIPDCYTFSVLITFDNKAHSGRIPISLETQAHIQECKHPSVFQHGDNSFRLLFDVVVILTCSLSFLLCARSLLRGFLLQNEFVGFMWRQRGRVISLWERLEFVNGWYILLVTSDVLTISGTIMKIGIEAKNLASYDVCSILLGTSTLLVWVGVIRYLTFFHNYNILIATLRVALPSVMRFCCCVAVIYLGYCFCGWIVLGPYHVKFRSLSMVSECLFSLINGDDMFVTFAAMQAQQGRSSLVWLFSQLYLYSFISLFIYMVLSLFIALITGAYDTIKHPGGAGAEESELQAYIAQCQDSPTSGKFRRGS +TEMPL +Equivalence 99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 + +Query 5WPV_A||16||557 Template 5WPV_A||16||557 +QUERY LTPNPGYGTQVGTSPAPTTPTEEEDLRRRLKYFFMSPCDKFRAKGRKPCKLMLQVVKILVVTVQLILFGLSNQLVVTFREENTIAFRHLFLLGYSDGSDDTFAAYTQEQLYQAIFYAVDQYLILPEISLGRYAYVRGGGGPWANGSALALCQRYYHRGHVDPANDTFDIDPRVVTDCIQVDPPDRPPDIPSEDLDFLDGSASYKNLTLKFHKLINVTIHFQLKTINLQSLINNEIPDCYTFSILITFDNKAHSGRIPIRLETKTHIQECKHPSVSRHGDNSFRLLFDVVVILTCSLSFLLCARSLLRGFLLQNEFVVFMWRRRGREISLWERLEFVNGWYILLVTSDVLTISGTVMKIGIEAKNLASYDVCSILLGTSTLLVWVGVIRYLTFFHKYNILIATLRVALPSVMRFCCCVAVIYLGYCFCGWIVLGPYHVKFRSLSMVSECLFSLINGDDMFVTFAAMQAQQGHSSLVWLFSQLYLYSFISLFIYMVLSLFIALITGAYDTIKHPGGTGTEKSELQAYIEQCQDSPTSGKFRRGS +QUERY +TEMPL LTPNPGYGTQVGTSPAPTTPTEEEDLRRRLKYFFMSPCDKFRAKGRKPCKLMLQVVKILVVTVQLILFGLSNQLVVTFREENTIAFRHLFLLGYSDGSDDTFAAYTQEQLYQAIFYAVDQYLILPEISLGRYAYVRGGGGPWANGSALALCQRYYHRGHVDPANDTFDIDPRVVTDCIQVDPPDRPPDIPSEDLDFLDGSASYKNLTLKFHKLINVTIHFQLKTINLQSLINNEIPDCYTFSILITFDNKAHSGRIPIRLETKTHIQECKHPSVSRHGDNSFRLLFDVVVILTCSLSFLLCARSLLRGFLLQNEFVVFMWRRRGREISLWERLEFVNGWYILLVTSDVLTISGTVMKIGIEAKNLASYDVCSILLGTSTLLVWVGVIRYLTFFHKYNILIATLRVALPSVMRFCCCVAVIYLGYCFCGWIVLGPYHVKFRSLSMVSECLFSLINGDDMFVTFAAMQAQQGHSSLVWLFSQLYLYSFISLFIYMVLSLFIALITGAYDTIKHPGGTGTEKSELQAYIEQCQDSPTSGKFRRGS +TEMPL +Equivalence 99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 + +Query 5Z1W_A||127||515 Template 5Z1W_A||127||515 +QUERY YENLLLGAPRLRQLRVRNDSCVVHEDFREDILNCYDVYSPDKEDQLPFGPQNGTAWTYHSQNELGGSSHWGRLTSYSGGGYYLDLPGSRQASAEALQGLQEGLWLDRGTRVVFIDFSVYNANINLFCILRLVVEFPATGGTIPSWQIRTVKLIRYVNNWDFFIVGCEVVFCVFIFYYVVEEILEIHLHRLRYLSSVWNILDLVVILLSIVAVGFHIFRTLEVNRLMGKLLQQPDTYADFEFLAFWQTQYNNMNAVNLFFAWIKIFKYISFNKTMTQLSSTLARCAKDILGFAIMFFIVFFAYAQLGYLLFGTQVENFSTFVKCIFTQFRIILGDFDYNAIDNANRILGPVYFVTYVFFVFFVLLNMFLAIINDTYSEVKEELAGQKDQL +QUERY +TEMPL YENLLLGAPRLRQLRVRNDSCVVHEDFREDILNCYDVYSPDKEDQLPFGPQNGTAWTYHSQNELGGSSHWGRLTSYSGGGYYLDLPGSRQASAEALQGLQEGLWLDRGTRVVFIDFSVYNANINLFCILRLVVEFPATGGTIPSWQIRTVKLIRYVNNWDFFIVGCEVVFCVFIFYYVVEEILEIHLHRLRYLSSVWNILDLVVILLSIVAVGFHIFRTLEVNRLMGKLLQQPDTYADFEFLAFWQTQYNNMNAVNLFFAWIKIFKYISFNKTMTQLSSTLARCAKDILGFAIMFFIVFFAYAQLGYLLFGTQVENFSTFVKCIFTQFRIILGDFDYNAIDNANRILGPVYFVTYVFFVFFVLLNMFLAIINDTYSEVKEELAGQKDQL +TEMPL +Equivalence 99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 + +Query 5Z1W_A||13||506 Template 5Z1W_A||13||506 +QUERY SIRGLWGTTLTENTAENRELYVKTTLRELVVYIVFLVDICLLTYGMTSSSAYYYTKVMSELFLHTPSDSGVSFQTISSMSDFWDFAQGPLLDSLYWTKWYNNQSLGRGSHSFIYYENLLLGAPRLRQLRVRNDSCVVHEDFREDILNCYDVYSPDKEDQLPFGPQNGTAWTYHSQNELGGSSHWGRLTSYSGGGYYLDLPGSRQASAEALQGLQEGLWLDRGTRVVFIDFSVYNANINLFCILRLVVEFPATGGTIPSWQIRTVKLIRYVNNWDFFIVGCEVVFCVFIFYYVVEEILEIHLHRLRYLSSVWNILDLVVILLSIVAVGFHIFRTLEVNRLMGKLLQQPDTYADFEFLAFWQTQYNNMNAVNLFFAWIKIFKYISFNKTMTQLSSTLARCAKDILGFAIMFFIVFFAYAQLGYLLFGTQVENFSTFVKCIFTQFRIILGDFDYNAIDNANRILGPVYFVTYVFFVFFVLLNMFLAIINDTYSEVKE +QUERY +TEMPL SIRGLWGTTLTENTAENRELYVKTTLRELVVYIVFLVDICLLTYGMTSSSAYYYTKVMSELFLHTPSDSGVSFQTISSMSDFWDFAQGPLLDSLYWTKWYNNQSLGRGSHSFIYYENLLLGAPRLRQLRVRNDSCVVHEDFREDILNCYDVYSPDKEDQLPFGPQNGTAWTYHSQNELGGSSHWGRLTSYSGGGYYLDLPGSRQASAEALQGLQEGLWLDRGTRVVFIDFSVYNANINLFCILRLVVEFPATGGTIPSWQIRTVKLIRYVNNWDFFIVGCEVVFCVFIFYYVVEEILEIHLHRLRYLSSVWNILDLVVILLSIVAVGFHIFRTLEVNRLMGKLLQQPDTYADFEFLAFWQTQYNNMNAVNLFFAWIKIFKYISFNKTMTQLSSTLARCAKDILGFAIMFFIVFFAYAQLGYLLFGTQVENFSTFVKCIFTQFRIILGDFDYNAIDNANRILGPVYFVTYVFFVFFVLLNMFLAIINDTYSEVKE +TEMPL +Equivalence 99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 + +Query 5Z1W_A||246||537 Template 5Z1W_A||246||537 +QUERY NANINLFCILRLVVEFPATGGTIPSWQIRTVKLIRYVNNWDFFIVGCEVVFCVFIFYYVVEEILEIHLHRLRYLSSVWNILDLVVILLSIVAVGFHIFRTLEVNRLMGKLLQQPDTYADFEFLAFWQTQYNNMNAVNLFFAWIKIFKYISFNKTMTQLSSTLARCAKDILGFAIMFFIVFFAYAQLGYLLFGTQVENFSTFVKCIFTQFRIILGDFDYNAIDNANRILGPVYFVTYVFFVFFVLLNMFLAIINDTYSEVKEELAGQKDQLQLSDFLKQSYNKTLLRLRLRKE +QUERY +TEMPL NANINLFCILRLVVEFPATGGTIPSWQIRTVKLIRYVNNWDFFIVGCEVVFCVFIFYYVVEEILEIHLHRLRYLSSVWNILDLVVILLSIVAVGFHIFRTLEVNRLMGKLLQQPDTYADFEFLAFWQTQYNNMNAVNLFFAWIKIFKYISFNKTMTQLSSTLARCAKDILGFAIMFFIVFFAYAQLGYLLFGTQVENFSTFVKCIFTQFRIILGDFDYNAIDNANRILGPVYFVTYVFFVFFVLLNMFLAIINDTYSEVKEELAGQKDQLQLSDFLKQSYNKTLLRLRLRKE +TEMPL +Equivalence 9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 + +Query 5Z1W_A||29||447 Template 5Z1W_A||29||447 +QUERY NRELYVKTTLRELVVYIVFLVDICLLTYGMTSSSAYYYTKVMSELFLHTPSDSGVSFQTISSMSDFWDFAQGPLLDSLYWTKWYNNQSLGRGSHSFIYYENLLLGAPRLRQLRVRNDSCVVHEDFREDILNCYDVYSPDKEDQLPFGPQNGTAWTYHSQNELGGSSHWGRLTSYSGGGYYLDLPGSRQASAEALQGLQEGLWLDRGTRVVFIDFSVYNANINLFCILRLVVEFPATGGTIPSWQIRTVKLIRYVNNWDFFIVGCEVVFCVFIFYYVVEEILEIHLHRLRYLSSVWNILDLVVILLSIVAVGFHIFRTLEVNRLMGKLLQQPDTYADFEFLAFWQTQYNNMNAVNLFFAWIKIFKYISFNKTMTQLSSTLARCAKDILGFAIMFFIVFFAYAQLGYLLFGTQVENFSTFV +QUERY +TEMPL NRELYVKTTLRELVVYIVFLVDICLLTYGMTSSSAYYYTKVMSELFLHTPSDSGVSFQTISSMSDFWDFAQGPLLDSLYWTKWYNNQSLGRGSHSFIYYENLLLGAPRLRQLRVRNDSCVVHEDFREDILNCYDVYSPDKEDQLPFGPQNGTAWTYHSQNELGGSSHWGRLTSYSGGGYYLDLPGSRQASAEALQGLQEGLWLDRGTRVVFIDFSVYNANINLFCILRLVVEFPATGGTIPSWQIRTVKLIRYVNNWDFFIVGCEVVFCVFIFYYVVEEILEIHLHRLRYLSSVWNILDLVVILLSIVAVGFHIFRTLEVNRLMGKLLQQPDTYADFEFLAFWQTQYNNMNAVNLFFAWIKIFKYISFNKTMTQLSSTLARCAKDILGFAIMFFIVFFAYAQLGYLLFGTQVENFSTFV +TEMPL +Equivalence 99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 + +Query 5Z1W_A||41||508 Template 5Z1W_A||41||508 +QUERY LVVYIVFLVDICLLTYGMTSSSAYYYTKVMSELFLHTPSDSGVSFQTISSMSDFWDFAQGPLLDSLYWTKWYNNQSLGRGSHSFIYYENLLLGAPRLRQLRVRNDSCVVHEDFREDILNCYDVYSPDKEDQLPFGPQNGTAWTYHSQNELGGSSHWGRLTSYSGGGYYLDLPGSRQASAEALQGLQEGLWLDRGTRVVFIDFSVYNANINLFCILRLVVEFPATGGTIPSWQIRTVKLIRYVNNWDFFIVGCEVVFCVFIFYYVVEEILEIHLHRLRYLSSVWNILDLVVILLSIVAVGFHIFRTLEVNRLMGKLLQQPDTYADFEFLAFWQTQYNNMNAVNLFFAWIKIFKYISFNKTMTQLSSTLARCAKDILGFAIMFFIVFFAYAQLGYLLFGTQVENFSTFVKCIFTQFRIILGDFDYNAIDNANRILGPVYFVTYVFFVFFVLLNMFLAIINDTYSEVKEEL +QUERY +TEMPL LVVYIVFLVDICLLTYGMTSSSAYYYTKVMSELFLHTPSDSGVSFQTISSMSDFWDFAQGPLLDSLYWTKWYNNQSLGRGSHSFIYYENLLLGAPRLRQLRVRNDSCVVHEDFREDILNCYDVYSPDKEDQLPFGPQNGTAWTYHSQNELGGSSHWGRLTSYSGGGYYLDLPGSRQASAEALQGLQEGLWLDRGTRVVFIDFSVYNANINLFCILRLVVEFPATGGTIPSWQIRTVKLIRYVNNWDFFIVGCEVVFCVFIFYYVVEEILEIHLHRLRYLSSVWNILDLVVILLSIVAVGFHIFRTLEVNRLMGKLLQQPDTYADFEFLAFWQTQYNNMNAVNLFFAWIKIFKYISFNKTMTQLSSTLARCAKDILGFAIMFFIVFFAYAQLGYLLFGTQVENFSTFVKCIFTQFRIILGDFDYNAIDNANRILGPVYFVTYVFFVFFVLLNMFLAIINDTYSEVKEEL +TEMPL +Equivalence 999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 + +Query 5Z1W_A||9||528 Template 5Z1W_A||9||528 +QUERY HICRSIRGLWGTTLTENTAENRELYVKTTLRELVVYIVFLVDICLLTYGMTSSSAYYYTKVMSELFLHTPSDSGVSFQTISSMSDFWDFAQGPLLDSLYWTKWYNNQSLGRGSHSFIYYENLLLGAPRLRQLRVRNDSCVVHEDFREDILNCYDVYSPDKEDQLPFGPQNGTAWTYHSQNELGGSSHWGRLTSYSGGGYYLDLPGSRQASAEALQGLQEGLWLDRGTRVVFIDFSVYNANINLFCILRLVVEFPATGGTIPSWQIRTVKLIRYVNNWDFFIVGCEVVFCVFIFYYVVEEILEIHLHRLRYLSSVWNILDLVVILLSIVAVGFHIFRTLEVNRLMGKLLQQPDTYADFEFLAFWQTQYNNMNAVNLFFAWIKIFKYISFNKTMTQLSSTLARCAKDILGFAIMFFIVFFAYAQLGYLLFGTQVENFSTFVKCIFTQFRIILGDFDYNAIDNANRILGPVYFVTYVFFVFFVLLNMFLAIINDTYSEVKEELAGQKDQLQLSDFLKQSYNKT +QUERY +TEMPL HICRSIRGLWGTTLTENTAENRELYVKTTLRELVVYIVFLVDICLLTYGMTSSSAYYYTKVMSELFLHTPSDSGVSFQTISSMSDFWDFAQGPLLDSLYWTKWYNNQSLGRGSHSFIYYENLLLGAPRLRQLRVRNDSCVVHEDFREDILNCYDVYSPDKEDQLPFGPQNGTAWTYHSQNELGGSSHWGRLTSYSGGGYYLDLPGSRQASAEALQGLQEGLWLDRGTRVVFIDFSVYNANINLFCILRLVVEFPATGGTIPSWQIRTVKLIRYVNNWDFFIVGCEVVFCVFIFYYVVEEILEIHLHRLRYLSSVWNILDLVVILLSIVAVGFHIFRTLEVNRLMGKLLQQPDTYADFEFLAFWQTQYNNMNAVNLFFAWIKIFKYISFNKTMTQLSSTLARCAKDILGFAIMFFIVFFAYAQLGYLLFGTQVENFSTFVKCIFTQFRIILGDFDYNAIDNANRILGPVYFVTYVFFVFFVLLNMFLAIINDTYSEVKEELAGQKDQLQLSDFLKQSYNKT +TEMPL +Equivalence 9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 + +Query 5Z96_A||286||659 Template 5Z96_A||286||659 +QUERY LARLKLAIKYRQKEFVAQPNCQQLLASRWYDEFPGWRRRHWAVKMVTCFIIGLLFPVFSVCYLIAPKSPLGLFIRKPFIKFICHTASYLTFLFLLLLASQHIDRSDLNRQGPPPTIVEWMILPWVLGFIWGEIKQMWDGGLQDYIHDWWNLMDFVMNSLYLATISLKIVAFVKYSALNPRESWDMWHPTLVAEALFAIANIFSSLRLISLFTANSHLGPLQISLGRMLLDILKFLFIYCLVLLAFANGLNQLYFYYEETKGLSCKGIRCEKQNNAFSTLFETLQSLFWSIFGLINLYVTNVKAQHEFTEFVGATMFGTYNVISLVVLLNMLIAMMNNSYQLIADHADIEWKFARTKLWMSYFEEGGTLPTPFNV +QUERY +TEMPL LARLKLAIKYRQKEFVAQPNCQQLLASRWYDEFPGWRRRHWAVKMVTCFIIGLLFPVFSVCYLIAPKSPLGLFIRKPFIKFICHTASYLTFLFLLLLASQHIDRSDLNRQGPPPTIVEWMILPWVLGFIWGEIKQMWDGGLQDYIHDWWNLMDFVMNSLYLATISLKIVAFVKYSALNPRESWDMWHPTLVAEALFAIANIFSSLRLISLFTANSHLGPLQISLGRMLLDILKFLFIYCLVLLAFANGLNQLYFYYEETKGLSCKGIRCEKQNNAFSTLFETLQSLFWSIFGLINLYVTNVKAQHEFTEFVGATMFGTYNVISLVVLLNMLIAMMNNSYQLIADHADIEWKFARTKLWMSYFEEGGTLPTPFNV +TEMPL +Equivalence 99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 + +Query 5Z96_A||289||660 Template 5Z96_A||289||660 +QUERY LKLAIKYRQKEFVAQPNCQQLLASRWYDEFPGWRRRHWAVKMVTCFIIGLLFPVFSVCYLIAPKSPLGLFIRKPFIKFICHTASYLTFLFLLLLASQHIDRSDLNRQGPPPTIVEWMILPWVLGFIWGEIKQMWDGGLQDYIHDWWNLMDFVMNSLYLATISLKIVAFVKYSALNPRESWDMWHPTLVAEALFAIANIFSSLRLISLFTANSHLGPLQISLGRMLLDILKFLFIYCLVLLAFANGLNQLYFYYEETKGLSCKGIRCEKQNNAFSTLFETLQSLFWSIFGLINLYVTNVKAQHEFTEFVGATMFGTYNVISLVVLLNMLIAMMNNSYQLIADHADIEWKFARTKLWMSYFEEGGTLPTPFNVI +QUERY +TEMPL LKLAIKYRQKEFVAQPNCQQLLASRWYDEFPGWRRRHWAVKMVTCFIIGLLFPVFSVCYLIAPKSPLGLFIRKPFIKFICHTASYLTFLFLLLLASQHIDRSDLNRQGPPPTIVEWMILPWVLGFIWGEIKQMWDGGLQDYIHDWWNLMDFVMNSLYLATISLKIVAFVKYSALNPRESWDMWHPTLVAEALFAIANIFSSLRLISLFTANSHLGPLQISLGRMLLDILKFLFIYCLVLLAFANGLNQLYFYYEETKGLSCKGIRCEKQNNAFSTLFETLQSLFWSIFGLINLYVTNVKAQHEFTEFVGATMFGTYNVISLVVLLNMLIAMMNNSYQLIADHADIEWKFARTKLWMSYFEEGGTLPTPFNVI +TEMPL +Equivalence 999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 + +Query 5Z96_A||290||663 Template 5Z96_A||290||663 +QUERY KLAIKYRQKEFVAQPNCQQLLASRWYDEFPGWRRRHWAVKMVTCFIIGLLFPVFSVCYLIAPKSPLGLFIRKPFIKFICHTASYLTFLFLLLLASQHIDRSDLNRQGPPPTIVEWMILPWVLGFIWGEIKQMWDGGLQDYIHDWWNLMDFVMNSLYLATISLKIVAFVKYSALNPRESWDMWHPTLVAEALFAIANIFSSLRLISLFTANSHLGPLQISLGRMLLDILKFLFIYCLVLLAFANGLNQLYFYYEETKGLSCKGIRCEKQNNAFSTLFETLQSLFWSIFGLINLYVTNVKAQHEFTEFVGATMFGTYNVISLVVLLNMLIAMMNNSYQLIADHADIEWKFARTKLWMSYFEEGGTLPTPFNVIPSP +QUERY +TEMPL KLAIKYRQKEFVAQPNCQQLLASRWYDEFPGWRRRHWAVKMVTCFIIGLLFPVFSVCYLIAPKSPLGLFIRKPFIKFICHTASYLTFLFLLLLASQHIDRSDLNRQGPPPTIVEWMILPWVLGFIWGEIKQMWDGGLQDYIHDWWNLMDFVMNSLYLATISLKIVAFVKYSALNPRESWDMWHPTLVAEALFAIANIFSSLRLISLFTANSHLGPLQISLGRMLLDILKFLFIYCLVLLAFANGLNQLYFYYEETKGLSCKGIRCEKQNNAFSTLFETLQSLFWSIFGLINLYVTNVKAQHEFTEFVGATMFGTYNVISLVVLLNMLIAMMNNSYQLIADHADIEWKFARTKLWMSYFEEGGTLPTPFNVIPSP +TEMPL +Equivalence 99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 + +Query 5ZX5_A||814||1056 Template 5ZX5_A||814||1056 +QUERY FKEVKILDSSDGKNEMEIHIKSKKLPITRKFYAFYHAPIVKFWFNTLAYLGFLMLYTFVVLVKMEQLPSVQEWIVIAYIFTYAIEKVREVFMSEAGKISQKIKVWFSDYFNVSDTIAIISFFVGFGLRFGAKWNYINAYDNHVFVAGRLIYCLNIIFWYVRLLDFLAVNQQAGPYVMMIGKMVANMFYIVVIMALVLLSFGVPRKAILYPHEEPSWSLAKDIVFHPYWMIFGEVYAYEIDVCA +QUERY +TEMPL FKEVKILDSSDGKNEMEIHIKSKKLPITRKFYAFYHAPIVKFWFNTLAYLGFLMLYTFVVLVKMEQLPSVQEWIVIAYIFTYAIEKVREVFMSEAGKISQKIKVWFSDYFNVSDTIAIISFFVGFGLRFGAKWNYINAYDNHVFVAGRLIYCLNIIFWYVRLLDFLAVNQQAGPYVMMIGKMVANMFYIVVIMALVLLSFGVPRKAILYPHEEPSWSLAKDIVFHPYWMIFGEVYAYEIDVCA +TEMPL +Equivalence 999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 + +Query 5ZX5_A||820||1131 Template 5ZX5_A||820||1131 +QUERY LDSSDGKNEMEIHIKSKKLPITRKFYAFYHAPIVKFWFNTLAYLGFLMLYTFVVLVKMEQLPSVQEWIVIAYIFTYAIEKVREVFMSEAGKISQKIKVWFSDYFNVSDTIAIISFFVGFGLRFGAKWNYINAYDNHVFVAGRLIYCLNIIFWYVRLLDFLAVNQQAGPYVMMIGKMVANMFYIVVIMALVLLSFGVPRKAILYPHEEPSWSLAKDIVFHPYWMIFGEVYAYEIDVCANDSTLPTICGPGTWLTPFLQAVYLFVQYIIMVNLLIAFFNNVYLQVKAISNIVWKYQRYHFIMAYHEKPVLPPPL +QUERY +TEMPL LDSSDGKNEMEIHIKSKKLPITRKFYAFYHAPIVKFWFNTLAYLGFLMLYTFVVLVKMEQLPSVQEWIVIAYIFTYAIEKVREVFMSEAGKISQKIKVWFSDYFNVSDTIAIISFFVGFGLRFGAKWNYINAYDNHVFVAGRLIYCLNIIFWYVRLLDFLAVNQQAGPYVMMIGKMVANMFYIVVIMALVLLSFGVPRKAILYPHEEPSWSLAKDIVFHPYWMIFGEVYAYEIDVCANDSTLPTICGPGTWLTPFLQAVYLFVQYIIMVNLLIAFFNNVYLQVKAISNIVWKYQRYHFIMAYHEKPVLPPPL +TEMPL +Equivalence 999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 + +Query 5ZX5_A||821||1134 Template 5ZX5_A||821||1134 +QUERY DSSDGKNEMEIHIKSKKLPITRKFYAFYHAPIVKFWFNTLAYLGFLMLYTFVVLVKMEQLPSVQEWIVIAYIFTYAIEKVREVFMSEAGKISQKIKVWFSDYFNVSDTIAIISFFVGFGLRFGAKWNYINAYDNHVFVAGRLIYCLNIIFWYVRLLDFLAVNQQAGPYVMMIGKMVANMFYIVVIMALVLLSFGVPRKAILYPHEEPSWSLAKDIVFHPYWMIFGEVYAYEIDVCANDSTLPTICGPGTWLTPFLQAVYLFVQYIIMVNLLIAFFNNVYLQVKAISNIVWKYQRYHFIMAYHEKPVLPPPLIIL +QUERY +TEMPL DSSDGKNEMEIHIKSKKLPITRKFYAFYHAPIVKFWFNTLAYLGFLMLYTFVVLVKMEQLPSVQEWIVIAYIFTYAIEKVREVFMSEAGKISQKIKVWFSDYFNVSDTIAIISFFVGFGLRFGAKWNYINAYDNHVFVAGRLIYCLNIIFWYVRLLDFLAVNQQAGPYVMMIGKMVANMFYIVVIMALVLLSFGVPRKAILYPHEEPSWSLAKDIVFHPYWMIFGEVYAYEIDVCANDSTLPTICGPGTWLTPFLQAVYLFVQYIIMVNLLIAFFNNVYLQVKAISNIVWKYQRYHFIMAYHEKPVLPPPLIIL +TEMPL +Equivalence 99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 + +Query 5ZX5_A||827||1228 Template 5ZX5_A||827||1228 +QUERY NEMEIHIKSKKLPITRKFYAFYHAPIVKFWFNTLAYLGFLMLYTFVVLVKMEQLPSVQEWIVIAYIFTYAIEKVREVFMSEAGKISQKIKVWFSDYFNVSDTIAIISFFVGFGLRFGAKWNYINAYDNHVFVAGRLIYCLNIIFWYVRLLDFLAVNQQAGPYVMMIGKMVANMFYIVVIMALVLLSFGVPRKAILYPHEEPSWSLAKDIVFHPYWMIFGEVYAYEIDVCANDSTLPTICGPGTWLTPFLQAVYLFVQYIIMVNLLIAFFNNVYLQVKAISNIVWKYQRYHFIMAYHEKPVLPPPLIILSHIVSLFCCVCKRRKKDKTSDGPKLFLTEEDQKKLHDFEEQCVEMYFDEKDDKFNSGSEERIRVTFERVEQMSIQIKEVGDRVNYIKRSLQSLD +QUERY +TEMPL NEMEIHIKSKKLPITRKFYAFYHAPIVKFWFNTLAYLGFLMLYTFVVLVKMEQLPSVQEWIVIAYIFTYAIEKVREVFMSEAGKISQKIKVWFSDYFNVSDTIAIISFFVGFGLRFGAKWNYINAYDNHVFVAGRLIYCLNIIFWYVRLLDFLAVNQQAGPYVMMIGKMVANMFYIVVIMALVLLSFGVPRKAILYPHEEPSWSLAKDIVFHPYWMIFGEVYAYEIDVCANDSTLPTICGPGTWLTPFLQAVYLFVQYIIMVNLLIAFFNNVYLQVKAISNIVWKYQRYHFIMAYHEKPVLPPPLIILSHIVSLFCCVCKRRKKDKTSDGPKLFLTEEDQKKLHDFEEQCVEMYFDEKDDKFNSGSEERIRVTFERVEQMSIQIKEVGDRVNYIKRSLQSLD +TEMPL +Equivalence 999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 + +Query 6A70_A||27||559 Template 6A70_A||27||559 +QUERY SHPQFEKGSAAAPRVAWAERLVRGLRGLWGTRLMEESSTNREKYLKSVLRELVTYLLFLIVLCILTYGMMSSNVYYYTRMMSQLFLDTPVSKTEKTNFKTLSSMEDFWKFTEGSLLDGLYWKMQPSNQTEADNRSFIFYENLLLGVPRIRQLRVRNGSCSIPQDLRDEIKECYDVYSVSSEDRAPFGPRNGTAWIYTSEKDLNGSSHWGIIATYSGAGYYLDLSRTREETAAQVASLKKNVWLDRGTRATFIDFSVYNANINLFCVVRLLVEFPATGGVIPSWQFQPLKLIRYVTTFDFFLAACEIIFCFFIFYYVVEEILEIRIHKLHYFRSFWNCLDVVIVVLSVVAIGINIYRTSNVEVLLQFLEDQNTFPNFEHLAYWQIQFNNIAAVTVFFVWIKLFKFINFNRTMSQLSTTMSRCAKDLFGFAIMFFIIFLAYAQLAYLVFGTQVDDFSTFQECIFTQFRIILGDINFAEIEEANRVLGPIYFTTFVFFMFFILLNMFLAIINDTYSEVKSDLAQQKAEMELSDLIR +QUERY +TEMPL SHPQFEKGSAAAPRVAWAERLVRGLRGLWGTRLMEESSTNREKYLKSVLRELVTYLLFLIVLCILTYGMMSSNVYYYTRMMSQLFLDTPVSKTEKTNFKTLSSMEDFWKFTEGSLLDGLYWKMQPSNQTEADNRSFIFYENLLLGVPRIRQLRVRNGSCSIPQDLRDEIKECYDVYSVSSEDRAPFGPRNGTAWIYTSEKDLNGSSHWGIIATYSGAGYYLDLSRTREETAAQVASLKKNVWLDRGTRATFIDFSVYNANINLFCVVRLLVEFPATGGVIPSWQFQPLKLIRYVTTFDFFLAACEIIFCFFIFYYVVEEILEIRIHKLHYFRSFWNCLDVVIVVLSVVAIGINIYRTSNVEVLLQFLEDQNTFPNFEHLAYWQIQFNNIAAVTVFFVWIKLFKFINFNRTMSQLSTTMSRCAKDLFGFAIMFFIIFLAYAQLAYLVFGTQVDDFSTFQECIFTQFRIILGDINFAEIEEANRVLGPIYFTTFVFFMFFILLNMFLAIINDTYSEVKSDLAQQKAEMELSDLIR +TEMPL +Equivalence 99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 + +Query 6A70_A||284||563 Template 6A70_A||284||563 +QUERY NANINLFCVVRLLVEFPATGGVIPSWQFQPLKLIRYVTTFDFFLAACEIIFCFFIFYYVVEEILEIRIHKLHYFRSFWNCLDVVIVVLSVVAIGINIYRTSNVEVLLQFLEDQNTFPNFEHLAYWQIQFNNIAAVTVFFVWIKLFKFINFNRTMSQLSTTMSRCAKDLFGFAIMFFIIFLAYAQLAYLVFGTQVDDFSTFQECIFTQFRIILGDINFAEIEEANRVLGPIYFTTFVFFMFFILLNMFLAIINDTYSEVKSDLAQQKAEMELSDLIRKGYH +QUERY +TEMPL NANINLFCVVRLLVEFPATGGVIPSWQFQPLKLIRYVTTFDFFLAACEIIFCFFIFYYVVEEILEIRIHKLHYFRSFWNCLDVVIVVLSVVAIGINIYRTSNVEVLLQFLEDQNTFPNFEHLAYWQIQFNNIAAVTVFFVWIKLFKFINFNRTMSQLSTTMSRCAKDLFGFAIMFFIIFLAYAQLAYLVFGTQVDDFSTFQECIFTQFRIILGDINFAEIEEANRVLGPIYFTTFVFFMFFILLNMFLAIINDTYSEVKSDLAQQKAEMELSDLIRKGYH +TEMPL +Equivalence 9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 + +Query 6A70_A||39||574 Template 6A70_A||39||574 +QUERY PRVAWAERLVRGLRGLWGTRLMEESSTNREKYLKSVLRELVTYLLFLIVLCILTYGMMSSNVYYYTRMMSQLFLDTPVSKTEKTNFKTLSSMEDFWKFTEGSLLDGLYWKMQPSNQTEADNRSFIFYENLLLGVPRIRQLRVRNGSCSIPQDLRDEIKECYDVYSVSSEDRAPFGPRNGTAWIYTSEKDLNGSSHWGIIATYSGAGYYLDLSRTREETAAQVASLKKNVWLDRGTRATFIDFSVYNANINLFCVVRLLVEFPATGGVIPSWQFQPLKLIRYVTTFDFFLAACEIIFCFFIFYYVVEEILEIRIHKLHYFRSFWNCLDVVIVVLSVVAIGINIYRTSNVEVLLQFLEDQNTFPNFEHLAYWQIQFNNIAAVTVFFVWIKLFKFINFNRTMSQLSTTMSRCAKDLFGFAIMFFIIFLAYAQLAYLVFGTQVDDFSTFQECIFTQFRIILGDINFAEIEEANRVLGPIYFTTFVFFMFFILLNMFLAIINDTYSEVKSDLAQQKAEMELSDLIRKGYHKALVKLKLKKN +QUERY +TEMPL PRVAWAERLVRGLRGLWGTRLMEESSTNREKYLKSVLRELVTYLLFLIVLCILTYGMMSSNVYYYTRMMSQLFLDTPVSKTEKTNFKTLSSMEDFWKFTEGSLLDGLYWKMQPSNQTEADNRSFIFYENLLLGVPRIRQLRVRNGSCSIPQDLRDEIKECYDVYSVSSEDRAPFGPRNGTAWIYTSEKDLNGSSHWGIIATYSGAGYYLDLSRTREETAAQVASLKKNVWLDRGTRATFIDFSVYNANINLFCVVRLLVEFPATGGVIPSWQFQPLKLIRYVTTFDFFLAACEIIFCFFIFYYVVEEILEIRIHKLHYFRSFWNCLDVVIVVLSVVAIGINIYRTSNVEVLLQFLEDQNTFPNFEHLAYWQIQFNNIAAVTVFFVWIKLFKFINFNRTMSQLSTTMSRCAKDLFGFAIMFFIIFLAYAQLAYLVFGTQVDDFSTFQECIFTQFRIILGDINFAEIEEANRVLGPIYFTTFVFFMFFILLNMFLAIINDTYSEVKSDLAQQKAEMELSDLIRKGYHKALVKLKLKKN +TEMPL +Equivalence 99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 + +Query 6A70_A||67||562 Template 6A70_A||67||562 +QUERY REKYLKSVLRELVTYLLFLIVLCILTYGMMSSNVYYYTRMMSQLFLDTPVSKTEKTNFKTLSSMEDFWKFTEGSLLDGLYWKMQPSNQTEADNRSFIFYENLLLGVPRIRQLRVRNGSCSIPQDLRDEIKECYDVYSVSSEDRAPFGPRNGTAWIYTSEKDLNGSSHWGIIATYSGAGYYLDLSRTREETAAQVASLKKNVWLDRGTRATFIDFSVYNANINLFCVVRLLVEFPATGGVIPSWQFQPLKLIRYVTTFDFFLAACEIIFCFFIFYYVVEEILEIRIHKLHYFRSFWNCLDVVIVVLSVVAIGINIYRTSNVEVLLQFLEDQNTFPNFEHLAYWQIQFNNIAAVTVFFVWIKLFKFINFNRTMSQLSTTMSRCAKDLFGFAIMFFIIFLAYAQLAYLVFGTQVDDFSTFQECIFTQFRIILGDINFAEIEEANRVLGPIYFTTFVFFMFFILLNMFLAIINDTYSEVKSDLAQQKAEMELSDLIRKGY +QUERY +TEMPL REKYLKSVLRELVTYLLFLIVLCILTYGMMSSNVYYYTRMMSQLFLDTPVSKTEKTNFKTLSSMEDFWKFTEGSLLDGLYWKMQPSNQTEADNRSFIFYENLLLGVPRIRQLRVRNGSCSIPQDLRDEIKECYDVYSVSSEDRAPFGPRNGTAWIYTSEKDLNGSSHWGIIATYSGAGYYLDLSRTREETAAQVASLKKNVWLDRGTRATFIDFSVYNANINLFCVVRLLVEFPATGGVIPSWQFQPLKLIRYVTTFDFFLAACEIIFCFFIFYYVVEEILEIRIHKLHYFRSFWNCLDVVIVVLSVVAIGINIYRTSNVEVLLQFLEDQNTFPNFEHLAYWQIQFNNIAAVTVFFVWIKLFKFINFNRTMSQLSTTMSRCAKDLFGFAIMFFIIFLAYAQLAYLVFGTQVDDFSTFQECIFTQFRIILGDINFAEIEEANRVLGPIYFTTFVFFMFFILLNMFLAIINDTYSEVKSDLAQQKAEMELSDLIRKGY +TEMPL +Equivalence 9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 + +Query 6A70_B||227||1129 Template 6A70_B||227||1129 +QUERY LRFRRLLVAELQRGFFDKHIWLSIWDRPPRSRFTRIQRATCCVLLICLFLGANAVWYGAVGDSAYSTGHVSRLSPLSVDTVAVGLVSSVVVYPVYLAILFLFRMSRSKVAGSPSPTPAGQQVLDIDSCLDSSVLDSSFLTFSGLHAEQAFVGQMKSDLFLDDSKSLVCWPSGEGTLSWPDLLSDPSIVGSNLRQLARGQAGHGLGPEEDGFSLASPYSPAKSFSASDEDLIQQVLAEGVSSPAPTQDTHMETDLLSSLSSTPGEKTETLALQRLGELGPPSPGLNWEQPQAARLSRTGLVEGLRKRLLPAWCASLAHGLSLLLVAVAVAVSGWVGASFPPGVSVAWLLSSSASFLASFLGWEPLKVLLEALYFSLVAKRLHPDEDDTLVESPAVTPVSARVPRVRPPHGFALFLAKEEARKVKRLHGMLRSLLVYMLFLLVTLLASYGDASCHGHAYRLQSAIKQELHSRAFLAITRSEELWPWMAHVLLPYVHGNQSSPELGPPRLRQVRLQEALYPDPPGPRVHTCSAAGGFSTSDYDVGWESPHNGSGTWAYSAPDLLGAWSWGSCAVYDSGGYVQELGLSLEESRDRLRFLQLHNWLDNRSRAVFLELTRYSPAVGLHAAVTLRLEFPAAGRALAALSVRPFALRRLSAGLSLPLLTSVCLLLFAVHFAVAEARTWHREGRWRVLRLGAWARWLLVALTAATALVRLAQLGAADRQWTRFVRGRPRRFTSFDQVAQLSSAARGLAASLLFLLLVKAAQQLRFVRQWSVFGKTLCRALPELLGVTLGLVVLGVAYAQLAILLVSSCVDSLWSVAQALLVLCPGTGLSTLCPAESWHLSPLLCVGLWALRLWGALRLGAVILRWRYHALRGELYRPAWEPQDYEMVELFLRRLRLWMGLSK +QUERY +TEMPL LRFRRLLVAELQRGFFDKHIWLSIWDRPPRSRFTRIQRATCCVLLICLFLGANAVWYGAVGDSAYSTGHVSRLSPLSVDTVAVGLVSSVVVYPVYLAILFLFRMSRSKVAGSPSPTPAGQQVLDIDSCLDSSVLDSSFLTFSGLHAEQAFVGQMKSDLFLDDSKSLVCWPSGEGTLSWPDLLSDPSIVGSNLRQLARGQAGHGLGPEEDGFSLASPYSPAKSFSASDEDLIQQVLAEGVSSPAPTQDTHMETDLLSSLSSTPGEKTETLALQRLGELGPPSPGLNWEQPQAARLSRTGLVEGLRKRLLPAWCASLAHGLSLLLVAVAVAVSGWVGASFPPGVSVAWLLSSSASFLASFLGWEPLKVLLEALYFSLVAKRLHPDEDDTLVESPAVTPVSARVPRVRPPHGFALFLAKEEARKVKRLHGMLRSLLVYMLFLLVTLLASYGDASCHGHAYRLQSAIKQELHSRAFLAITRSEELWPWMAHVLLPYVHGNQSSPELGPPRLRQVRLQEALYPDPPGPRVHTCSAAGGFSTSDYDVGWESPHNGSGTWAYSAPDLLGAWSWGSCAVYDSGGYVQELGLSLEESRDRLRFLQLHNWLDNRSRAVFLELTRYSPAVGLHAAVTLRLEFPAAGRALAALSVRPFALRRLSAGLSLPLLTSVCLLLFAVHFAVAEARTWHREGRWRVLRLGAWARWLLVALTAATALVRLAQLGAADRQWTRFVRGRPRRFTSFDQVAQLSSAARGLAASLLFLLLVKAAQQLRFVRQWSVFGKTLCRALPELLGVTLGLVVLGVAYAQLAILLVSSCVDSLWSVAQALLVLCPGTGLSTLCPAESWHLSPLLCVGLWALRLWGALRLGAVILRWRYHALRGELYRPAWEPQDYEMVELFLRRLRLWMGLSK +TEMPL +Equivalence 999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 + +Query 6AEI_A||287||663 Template 6AEI_A||287||663 +QUERY LAKLKVAIKYHQKEFVAQPNCQQLLATLWYDGFPGWRRKHWVVKLLTCMTIGFLFPMLSIAYLISPRSNLGLFIKKPFIKFICHTASYLTFLFMLLLASQHIVRTDLHVQGPPPTVVEWMILPWVLGFIWGEIKEMWDGGFTEYIHDWWNLMDFAMNSLYLATISLKIVAYVKYNGSRPREEWEMWHPTLIAEALFAISNILSSLRLISLFTANSHLGPLQISLGRMLLDILKFLFIYCLVLLAFANGLNQLYFYYETRAIDEPNNCKGIRCEKQNNAFSTLFETLQSLFWSVFGLLNLYVTNVKARHEFTEFVGATMFGTYNVISLVVLLNMLIAMMNNSYQLIADHADIEWKFARTKLWMSYFDEGGTLPPPFNI +QUERY +TEMPL LAKLKVAIKYHQKEFVAQPNCQQLLATLWYDGFPGWRRKHWVVKLLTCMTIGFLFPMLSIAYLISPRSNLGLFIKKPFIKFICHTASYLTFLFMLLLASQHIVRTDLHVQGPPPTVVEWMILPWVLGFIWGEIKEMWDGGFTEYIHDWWNLMDFAMNSLYLATISLKIVAYVKYNGSRPREEWEMWHPTLIAEALFAISNILSSLRLISLFTANSHLGPLQISLGRMLLDILKFLFIYCLVLLAFANGLNQLYFYYETRAIDEPNNCKGIRCEKQNNAFSTLFETLQSLFWSVFGLLNLYVTNVKARHEFTEFVGATMFGTYNVISLVVLLNMLIAMMNNSYQLIADHADIEWKFARTKLWMSYFDEGGTLPPPFNI +TEMPL +Equivalence 99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 + +Query 6AEI_A||290||664 Template 6AEI_A||290||664 +QUERY LKVAIKYHQKEFVAQPNCQQLLATLWYDGFPGWRRKHWVVKLLTCMTIGFLFPMLSIAYLISPRSNLGLFIKKPFIKFICHTASYLTFLFMLLLASQHIVRTDLHVQGPPPTVVEWMILPWVLGFIWGEIKEMWDGGFTEYIHDWWNLMDFAMNSLYLATISLKIVAYVKYNGSRPREEWEMWHPTLIAEALFAISNILSSLRLISLFTANSHLGPLQISLGRMLLDILKFLFIYCLVLLAFANGLNQLYFYYETRAIDEPNNCKGIRCEKQNNAFSTLFETLQSLFWSVFGLLNLYVTNVKARHEFTEFVGATMFGTYNVISLVVLLNMLIAMMNNSYQLIADHADIEWKFARTKLWMSYFDEGGTLPPPFNII +QUERY +TEMPL LKVAIKYHQKEFVAQPNCQQLLATLWYDGFPGWRRKHWVVKLLTCMTIGFLFPMLSIAYLISPRSNLGLFIKKPFIKFICHTASYLTFLFMLLLASQHIVRTDLHVQGPPPTVVEWMILPWVLGFIWGEIKEMWDGGFTEYIHDWWNLMDFAMNSLYLATISLKIVAYVKYNGSRPREEWEMWHPTLIAEALFAISNILSSLRLISLFTANSHLGPLQISLGRMLLDILKFLFIYCLVLLAFANGLNQLYFYYETRAIDEPNNCKGIRCEKQNNAFSTLFETLQSLFWSVFGLLNLYVTNVKARHEFTEFVGATMFGTYNVISLVVLLNMLIAMMNNSYQLIADHADIEWKFARTKLWMSYFDEGGTLPPPFNII +TEMPL +Equivalence 999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 + +Query 6AEI_A||291||667 Template 6AEI_A||291||667 +QUERY KVAIKYHQKEFVAQPNCQQLLATLWYDGFPGWRRKHWVVKLLTCMTIGFLFPMLSIAYLISPRSNLGLFIKKPFIKFICHTASYLTFLFMLLLASQHIVRTDLHVQGPPPTVVEWMILPWVLGFIWGEIKEMWDGGFTEYIHDWWNLMDFAMNSLYLATISLKIVAYVKYNGSRPREEWEMWHPTLIAEALFAISNILSSLRLISLFTANSHLGPLQISLGRMLLDILKFLFIYCLVLLAFANGLNQLYFYYETRAIDEPNNCKGIRCEKQNNAFSTLFETLQSLFWSVFGLLNLYVTNVKARHEFTEFVGATMFGTYNVISLVVLLNMLIAMMNNSYQLIADHADIEWKFARTKLWMSYFDEGGTLPPPFNIIPSP +QUERY +TEMPL KVAIKYHQKEFVAQPNCQQLLATLWYDGFPGWRRKHWVVKLLTCMTIGFLFPMLSIAYLISPRSNLGLFIKKPFIKFICHTASYLTFLFMLLLASQHIVRTDLHVQGPPPTVVEWMILPWVLGFIWGEIKEMWDGGFTEYIHDWWNLMDFAMNSLYLATISLKIVAYVKYNGSRPREEWEMWHPTLIAEALFAISNILSSLRLISLFTANSHLGPLQISLGRMLLDILKFLFIYCLVLLAFANGLNQLYFYYETRAIDEPNNCKGIRCEKQNNAFSTLFETLQSLFWSVFGLLNLYVTNVKARHEFTEFVGATMFGTYNVISLVVLLNMLIAMMNNSYQLIADHADIEWKFARTKLWMSYFDEGGTLPPPFNIIPSP +TEMPL +Equivalence 99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 + +Query 6AYF_A||19||546 Template 6AYF_A||19||546 +QUERY HEEENRCNFNQQTSPSEELLLEDQMRRKLKFFFMNPCEKFWARGRKPWKLAIQILKIAMVTIQLVLFGLSNQMVVAFKEENTIAFKHLFLKGYMDRMDDTYAVYTQSDVYDQLIFAVNQYLQLYNVSVGNHAYENKGTKQSAMAICQHFYKRGNIYPGNDTFDIDPEIETECFFVEPDEPFHIGTPAENKLNLTLDFHRLLTVELQFKLKAINLQTVRHQELPDCYDFTLTITFDNKAHSGRIKISLDNDISIRECKDWHVSGSIQKNTHYMMIFDAFVILTCLVSLILCIRSVIRGLQLQQEFVNFFLLHYKKEVSVSDQMEFVNGWYIMIIISDILTIIGSILKMEIQAKSLTSYDVCSILLGTSTMLVWLGVIRYLGFFAKYNLLILTLQAALPNVIRFCCCAAMIYLGYCFCGWIVLGPYHDKFRSLNMVSECLFSLINGDDMFATFAKMQQKSYLVWLFSRIYLYSFISLFIYMILSLFIALITDTYETIKQYQQDGFPETELRTFISECKDLPNSGKYRLED +QUERY +TEMPL HEEENRCNFNQQTSPSEELLLEDQMRRKLKFFFMNPCEKFWARGRKPWKLAIQILKIAMVTIQLVLFGLSNQMVVAFKEENTIAFKHLFLKGYMDRMDDTYAVYTQSDVYDQLIFAVNQYLQLYNVSVGNHAYENKGTKQSAMAICQHFYKRGNIYPGNDTFDIDPEIETECFFVEPDEPFHIGTPAENKLNLTLDFHRLLTVELQFKLKAINLQTVRHQELPDCYDFTLTITFDNKAHSGRIKISLDNDISIRECKDWHVSGSIQKNTHYMMIFDAFVILTCLVSLILCIRSVIRGLQLQQEFVNFFLLHYKKEVSVSDQMEFVNGWYIMIIISDILTIIGSILKMEIQAKSLTSYDVCSILLGTSTMLVWLGVIRYLGFFAKYNLLILTLQAALPNVIRFCCCAAMIYLGYCFCGWIVLGPYHDKFRSLNMVSECLFSLINGDDMFATFAKMQQKSYLVWLFSRIYLYSFISLFIYMILSLFIALITDTYETIKQYQQDGFPETELRTFISECKDLPNSGKYRLED +TEMPL +Equivalence 999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 + +Query 6BBJ_A||424||746 Template 6BBJ_A||424||746 +QUERY GEEVSVLEILVYNSKVENRHEMLAVEPINELLRDKWQKFGAVSFYISVVSYLIAMIIFTLIAYYRPMDGTPPYPYRTTMDYMRLAGEIVTLLTGVVFFITNIKDLFMKKCPGVNSLFIDGSFQLLYFIYSVLVIITAVLYLVGIESYLAVMVFALVLGWMNALYFTRGLKLTGTYSIMLQKILFKDLFRFLLVYLLFMIGYASALVSLLNPCTSQESCIETSSNCTVPEYPSCRDSSTFSKFLLDLFKLTIGMGDLEMINSAKYPAVFIILLVTYIILTFVLLLNMLIALMGETVGQVSKESKQIWKLQWATTILDIERSFPV +QUERY +TEMPL GEEVSVLEILVYNSKVENRHEMLAVEPINELLRDKWQKFGAVSFYISVVSYLIAMIIFTLIAYYRPMDGTPPYPYRTTMDYMRLAGEIVTLLTGVVFFITNIKDLFMKKCPGVNSLFIDGSFQLLYFIYSVLVIITAVLYLVGIESYLAVMVFALVLGWMNALYFTRGLKLTGTYSIMLQKILFKDLFRFLLVYLLFMIGYASALVSLLNPCTSQESCIETSSNCTVPEYPSCRDSSTFSKFLLDLFKLTIGMGDLEMINSAKYPAVFIILLVTYIILTFVLLLNMLIALMGETVGQVSKESKQIWKLQWATTILDIERSFPV +TEMPL +Equivalence 99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 + +Query 6BBJ_A||428||750 Template 6BBJ_A||428||750 +QUERY SVLEILVYNSKVENRHEMLAVEPINELLRDKWQKFGAVSFYISVVSYLIAMIIFTLIAYYRPMDGTPPYPYRTTMDYMRLAGEIVTLLTGVVFFITNIKDLFMKKCPGVNSLFIDGSFQLLYFIYSVLVIITAVLYLVGIESYLAVMVFALVLGWMNALYFTRGLKLTGTYSIMLQKILFKDLFRFLLVYLLFMIGYASALVSLLNPCTSQESCIETSSNCTVPEYPSCRDSSTFSKFLLDLFKLTIGMGDLEMINSAKYPAVFIILLVTYIILTFVLLLNMLIALMGETVGQVSKESKQIWKLQWATTILDIERSFPVCMRK +QUERY +TEMPL SVLEILVYNSKVENRHEMLAVEPINELLRDKWQKFGAVSFYISVVSYLIAMIIFTLIAYYRPMDGTPPYPYRTTMDYMRLAGEIVTLLTGVVFFITNIKDLFMKKCPGVNSLFIDGSFQLLYFIYSVLVIITAVLYLVGIESYLAVMVFALVLGWMNALYFTRGLKLTGTYSIMLQKILFKDLFRFLLVYLLFMIGYASALVSLLNPCTSQESCIETSSNCTVPEYPSCRDSSTFSKFLLDLFKLTIGMGDLEMINSAKYPAVFIILLVTYIILTFVLLLNMLIALMGETVGQVSKESKQIWKLQWATTILDIERSFPVCMRK +TEMPL +Equivalence 99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 + +Query 6BCO_A||735||1081 Template 6BCO_A||735||1081 +QUERY PPGTVEPSAKVALERRQRRRPGRALCCGKFSKRWSDFWGAPVTAFLGNVVSYLLFLLLFAHVLLVDFQPTKPSVSELLLYFWAFTLLCEELRQGLGGGWGSLASGGRGPDRAPLRHRLHLYLSDTWNQCDLLALTCFLLGVGCRLTPGLFDLGRTVLCLDFMIFTLRLLHIFTVNKQLGPKIVIVSKMMKDVFFFLFFLCVWLVAYGVATEGILRPQDRSLPSILRRVFYRPYLQIFGQIPQEEMDVALMIPGNCSMERGSWAHPEGPVAGSCVSQYANWLVVLLLIVFLLVANILLLNLLIAMFSYTFSKVHGNSDLYWKAQRYSLIREFHSRPALAPPLIIISHV +QUERY +TEMPL PPGTVEPSAKVALERRQRRRPGRALCCGKFSKRWSDFWGAPVTAFLGNVVSYLLFLLLFAHVLLVDFQPTKPSVSELLLYFWAFTLLCEELRQGLGGGWGSLASGGRGPDRAPLRHRLHLYLSDTWNQCDLLALTCFLLGVGCRLTPGLFDLGRTVLCLDFMIFTLRLLHIFTVNKQLGPKIVIVSKMMKDVFFFLFFLCVWLVAYGVATEGILRPQDRSLPSILRRVFYRPYLQIFGQIPQEEMDVALMIPGNCSMERGSWAHPEGPVAGSCVSQYANWLVVLLLIVFLLVANILLLNLLIAMFSYTFSKVHGNSDLYWKAQRYSLIREFHSRPALAPPLIIISHV +TEMPL +Equivalence 99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 + +Query 6BO5_A||314||642 Template 6BO5_A||314||642 +QUERY NSVLEIIAFHCKSPNRHRMVVLEPLNKLLQEKWDRLVSRFFFNFACYLVYMFIFTVVAYHQPSLDQPAIPSSKATFGESMLLLGHILILLGGIYLLLGQLWYFWRRRLFIWISFMDSYFEILFLLQALLTVLSQVLRFMETEWYLPLLVLSLVLGWLNLLYYTRGFQHTGIYSVMIQKVILRDLLRFLLVYLVFLFGFAVALVSLSREARSPKAPEDNNSTVTEQPTVGQEEEPAPYRSILDASLELFKFTIGMGELAFQEQLRFRGVVLLLLLAYVLLTYVLLLNMLIALMSETVNHVADNSWSIWKLQKAISVLEMENGYWWCRRKK +QUERY +TEMPL NSVLEIIAFHCKSPNRHRMVVLEPLNKLLQEKWDRLVSRFFFNFACYLVYMFIFTVVAYHQPSLDQPAIPSSKATFGESMLLLGHILILLGGIYLLLGQLWYFWRRRLFIWISFMDSYFEILFLLQALLTVLSQVLRFMETEWYLPLLVLSLVLGWLNLLYYTRGFQHTGIYSVMIQKVILRDLLRFLLVYLVFLFGFAVALVSLSREARSPKAPEDNNSTVTEQPTVGQEEEPAPYRSILDASLELFKFTIGMGELAFQEQLRFRGVVLLLLLAYVLLTYVLLLNMLIALMSETVNHVADNSWSIWKLQKAISVLEMENGYWWCRRKK +TEMPL +Equivalence 99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 + +Query 6BO8_A||288||614 Template 6BO8_A||288||614 +QUERY DEQSLLELIITTKKREARQILDQTPVKELVSLKWKRYGRPYFCMLGAIYLLYIICFTMCCIYRPLKPRTNNRTSPRDNTLLQQKLLQEAYMTPKDDIRLVGELVTVIGAIIILLVEVPDIFRMGVTRFFGQTILGGPFHVLIITYAFMVLVTMVMRLISASGEVVPMSFALVLGWCNVMYFARGFQMLGPFTIMIQKMIFGDLMRFCWLMAVVILGFASAFYIIFQTEDPEELGHFYDYPMALFSTFELFLTIIDGPANYNVDLPFMYSITYAAFAIIATLLMLNLLIAMMGDTHWRVAHERDELWRAQIVATTVMLERKLPRCLWP +QUERY +TEMPL DEQSLLELIITTKKREARQILDQTPVKELVSLKWKRYGRPYFCMLGAIYLLYIICFTMCCIYRPLKPRTNNRTSPRDNTLLQQKLLQEAYMTPKDDIRLVGELVTVIGAIIILLVEVPDIFRMGVTRFFGQTILGGPFHVLIITYAFMVLVTMVMRLISASGEVVPMSFALVLGWCNVMYFARGFQMLGPFTIMIQKMIFGDLMRFCWLMAVVILGFASAFYIIFQTEDPEELGHFYDYPMALFSTFELFLTIIDGPANYNVDLPFMYSITYAAFAIIATLLMLNLLIAMMGDTHWRVAHERDELWRAQIVATTVMLERKLPRCLWP +TEMPL +Equivalence 999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 + +Query 6BO8_A||384||609 Template 6BO8_A||384||609 +QUERY IRLVGELVTVIGAIIILLVEVPDIFRMGVTRFFGQTILGGPFHVLIITYAFMVLVTMVMRLISASGEVVPMSFALVLGWCNVMYFARGFQMLGPFTIMIQKMIFGDLMRFCWLMAVVILGFASAFYIIFQTEDPEELGHFYDYPMALFSTFELFLTIIDGPANYNVDLPFMYSITYAAFAIIATLLMLNLLIAMMGDTHWRVAHERDELWRAQIVATTVMLERKLP +QUERY +TEMPL IRLVGELVTVIGAIIILLVEVPDIFRMGVTRFFGQTILGGPFHVLIITYAFMVLVTMVMRLISASGEVVPMSFALVLGWCNVMYFARGFQMLGPFTIMIQKMIFGDLMRFCWLMAVVILGFASAFYIIFQTEDPEELGHFYDYPMALFSTFELFLTIIDGPANYNVDLPFMYSITYAAFAIIATLLMLNLLIAMMGDTHWRVAHERDELWRAQIVATTVMLERKLP +TEMPL +Equivalence 9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 + +Query 6BPQ_A||570||883 Template 6BPQ_A||570||883 +QUERY IILCLFFFPLIGCGFISFRKKPVEKTKKLFLYYVSFFTSPFVVFSWNVIFYIAFLLLFAYVLLMDFQKEPTALEIILYVLVFILLCDEVRQWYMNGSKYFSDLWNVMDTLAIFYFIAGIVFRLHSDESSWYSGRVIFCLDYIVFTLRLIHIFTVSRNLGPKIIMLQRMMIDVFFFLFLFAVWMVAFGVARQGILRKNEHRWEWIFRSVIYEPYLAMFGQYPDDIDGTTYNFDHCTFSGNESKPLCVELDANNQPRFPEWITIPLVCIYMLSTNILLVNLLVAMFGYTVGSVQENNDQVWKFQRFFLVQEYCSRL +QUERY +TEMPL IILCLFFFPLIGCGFISFRKKPVEKTKKLFLYYVSFFTSPFVVFSWNVIFYIAFLLLFAYVLLMDFQKEPTALEIILYVLVFILLCDEVRQWYMNGSKYFSDLWNVMDTLAIFYFIAGIVFRLHSDESSWYSGRVIFCLDYIVFTLRLIHIFTVSRNLGPKIIMLQRMMIDVFFFLFLFAVWMVAFGVARQGILRKNEHRWEWIFRSVIYEPYLAMFGQYPDDIDGTTYNFDHCTFSGNESKPLCVELDANNQPRFPEWITIPLVCIYMLSTNILLVNLLVAMFGYTVGSVQENNDQVWKFQRFFLVQEYCSRL +TEMPL +Equivalence 99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 + +Query 6BQR_A||661||1011 Template 6BQR_A||661||1011 +QUERY PVGTADPAEKTPLGVPRQSGRPGCCGGRCGGRRCLRRWFHFWGAPVTIFMGNVVSYLLFLLLFSRVLLVDFQPAPPGSLELLLYFWAFTLLCEELRQGLSGGGGSLASGGPGPGHASLSQRLRLYLADSWNQCDLVALTCFLLGVGCRLTPGLYHLGRTVLCIDFMVFTVRLLHIFTVNKQLGPKIVIVSKMMKDVFFFLFFLGVWLVAYGVATEGLLRPRDSDFPSILRRVFYRPYLQIFGQIPQEDMDVALMEHSNCSSEPGFWAHPPGAQAGTCVSQYANWLVVLLLVIFLLVANILLVNLLIAMFSYTFGKVQGNSDLYWKAQRYRLIREFHSRPALAPPFIVISHL +QUERY +TEMPL PVGTADPAEKTPLGVPRQSGRPGCCGGRCGGRRCLRRWFHFWGAPVTIFMGNVVSYLLFLLLFSRVLLVDFQPAPPGSLELLLYFWAFTLLCEELRQGLSGGGGSLASGGPGPGHASLSQRLRLYLADSWNQCDLVALTCFLLGVGCRLTPGLYHLGRTVLCIDFMVFTVRLLHIFTVNKQLGPKIVIVSKMMKDVFFFLFFLGVWLVAYGVATEGLLRPRDSDFPSILRRVFYRPYLQIFGQIPQEDMDVALMEHSNCSSEPGFWAHPPGAQAGTCVSQYANWLVVLLLVIFLLVANILLVNLLIAMFSYTFGKVQGNSDLYWKAQRYRLIREFHSRPALAPPFIVISHL +TEMPL +Equivalence 999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 + +Query 6BQV_A||739||1089 Template 6BQV_A||739||1089 +QUERY PVGTADPAEKTPLGVPRQSGRPGCCGGRCGGRRCLRRWFHFWGAPVTIFMGNVVSYLLFLLLFSRVLLVDFQPAPPGSLELLLYFWAFTLLCEELRQGLSGGGGSLASGGPGPGHASLSQRLRLYLADSWNQCDLVALTCFLLGVGCRLTPGLYHLGRTVLCIDFMVFTVRLLHIFTVNKQLGPKIVIVSKMMKDVFFFLFFLGVWLVAYGVATEGLLRPRDSDFPSILRRVFYRPYLQIFGQIPQEDMDVALMEHSNCSSEPGFWAHPPGAQAGTCVSQYANWLVVLLLVIFLLVANILLVNLLIAMFSYTFGKVQGNSDLYWKAQRYRLIREFHSRPALAPPFIVISHL +QUERY +TEMPL PVGTADPAEKTPLGVPRQSGRPGCCGGRCGGRRCLRRWFHFWGAPVTIFMGNVVSYLLFLLLFSRVLLVDFQPAPPGSLELLLYFWAFTLLCEELRQGLSGGGGSLASGGPGPGHASLSQRLRLYLADSWNQCDLVALTCFLLGVGCRLTPGLYHLGRTVLCIDFMVFTVRLLHIFTVNKQLGPKIVIVSKMMKDVFFFLFFLGVWLVAYGVATEGLLRPRDSDFPSILRRVFYRPYLQIFGQIPQEDMDVALMEHSNCSSEPGFWAHPPGAQAGTCVSQYANWLVVLLLVIFLLVANILLVNLLIAMFSYTFGKVQGNSDLYWKAQRYRLIREFHSRPALAPPFIVISHL +TEMPL +Equivalence 999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 + +Query 6BWD_A||470||712 Template 6BWD_A||470||712 +QUERY FKEVKILDSSDGKNEMEIHIKSKKLPITRKFYAFYHAPIVKFWFNTLAYLGFLMLYTFVVLVKMEQLPSVQEWIVIAYIFTYAIEKVREVFMSEAGKISQKIKVWFSDYFNVSDTIAIISFFVGFGLRFGAKWNYINAYDNHVFVAGRLIYCLNIIFWYVRLLDFLAVNQQAGPYVMMIGKMVANMFYIVVIMALVLLSFGVPRKAILYPHEEPSWSLAKDIVFHPYWMIFGEVYAYEIDVCA +QUERY +TEMPL FKEVKILDSSDGKNEMEIHIKSKKLPITRKFYAFYHAPIVKFWFNTLAYLGFLMLYTFVVLVKMEQLPSVQEWIVIAYIFTYAIEKVREVFMSEAGKISQKIKVWFSDYFNVSDTIAIISFFVGFGLRFGAKWNYINAYDNHVFVAGRLIYCLNIIFWYVRLLDFLAVNQQAGPYVMMIGKMVANMFYIVVIMALVLLSFGVPRKAILYPHEEPSWSLAKDIVFHPYWMIFGEVYAYEIDVCA +TEMPL +Equivalence 999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 + +Query 6BWD_A||476||787 Template 6BWD_A||476||787 +QUERY LDSSDGKNEMEIHIKSKKLPITRKFYAFYHAPIVKFWFNTLAYLGFLMLYTFVVLVKMEQLPSVQEWIVIAYIFTYAIEKVREVFMSEAGKISQKIKVWFSDYFNVSDTIAIISFFVGFGLRFGAKWNYINAYDNHVFVAGRLIYCLNIIFWYVRLLDFLAVNQQAGPYVMMIGKMVANMFYIVVIMALVLLSFGVPRKAILYPHEEPSWSLAKDIVFHPYWMIFGEVYAYEIDVCANDSTLPTICGPGTWLTPFLQAVYLFVQYIIMVNLLIAFFNNVYLQVKAISNIVWKYQRYHFIMAYHEKPVLPPPL +QUERY +TEMPL LDSSDGKNEMEIHIKSKKLPITRKFYAFYHAPIVKFWFNTLAYLGFLMLYTFVVLVKMEQLPSVQEWIVIAYIFTYAIEKVREVFMSEAGKISQKIKVWFSDYFNVSDTIAIISFFVGFGLRFGAKWNYINAYDNHVFVAGRLIYCLNIIFWYVRLLDFLAVNQQAGPYVMMIGKMVANMFYIVVIMALVLLSFGVPRKAILYPHEEPSWSLAKDIVFHPYWMIFGEVYAYEIDVCANDSTLPTICGPGTWLTPFLQAVYLFVQYIIMVNLLIAFFNNVYLQVKAISNIVWKYQRYHFIMAYHEKPVLPPPL +TEMPL +Equivalence 999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 + +Query 6BWD_A||477||790 Template 6BWD_A||477||790 +QUERY DSSDGKNEMEIHIKSKKLPITRKFYAFYHAPIVKFWFNTLAYLGFLMLYTFVVLVKMEQLPSVQEWIVIAYIFTYAIEKVREVFMSEAGKISQKIKVWFSDYFNVSDTIAIISFFVGFGLRFGAKWNYINAYDNHVFVAGRLIYCLNIIFWYVRLLDFLAVNQQAGPYVMMIGKMVANMFYIVVIMALVLLSFGVPRKAILYPHEEPSWSLAKDIVFHPYWMIFGEVYAYEIDVCANDSTLPTICGPGTWLTPFLQAVYLFVQYIIMVNLLIAFFNNVYLQVKAISNIVWKYQRYHFIMAYHEKPVLPPPLIIL +QUERY +TEMPL DSSDGKNEMEIHIKSKKLPITRKFYAFYHAPIVKFWFNTLAYLGFLMLYTFVVLVKMEQLPSVQEWIVIAYIFTYAIEKVREVFMSEAGKISQKIKVWFSDYFNVSDTIAIISFFVGFGLRFGAKWNYINAYDNHVFVAGRLIYCLNIIFWYVRLLDFLAVNQQAGPYVMMIGKMVANMFYIVVIMALVLLSFGVPRKAILYPHEEPSWSLAKDIVFHPYWMIFGEVYAYEIDVCANDSTLPTICGPGTWLTPFLQAVYLFVQYIIMVNLLIAFFNNVYLQVKAISNIVWKYQRYHFIMAYHEKPVLPPPLIIL +TEMPL +Equivalence 99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 + +Query 6BWD_A||480||793 Template 6BWD_A||480||793 +QUERY DGKNEMEIHIKSKKLPITRKFYAFYHAPIVKFWFNTLAYLGFLMLYTFVVLVKMEQLPSVQEWIVIAYIFTYAIEKVREVFMSEAGKISQKIKVWFSDYFNVSDTIAIISFFVGFGLRFGAKWNYINAYDNHVFVAGRLIYCLNIIFWYVRLLDFLAVNQQAGPYVMMIGKMVANMFYIVVIMALVLLSFGVPRKAILYPHEEPSWSLAKDIVFHPYWMIFGEVYAYEIDVCANDSTLPTICGPGTWLTPFLQAVYLFVQYIIMVNLLIAFFNNVYLQVKAISNIVWKYQRYHFIMAYHEKPVLPPPLIILSHI +QUERY +TEMPL DGKNEMEIHIKSKKLPITRKFYAFYHAPIVKFWFNTLAYLGFLMLYTFVVLVKMEQLPSVQEWIVIAYIFTYAIEKVREVFMSEAGKISQKIKVWFSDYFNVSDTIAIISFFVGFGLRFGAKWNYINAYDNHVFVAGRLIYCLNIIFWYVRLLDFLAVNQQAGPYVMMIGKMVANMFYIVVIMALVLLSFGVPRKAILYPHEEPSWSLAKDIVFHPYWMIFGEVYAYEIDVCANDSTLPTICGPGTWLTPFLQAVYLFVQYIIMVNLLIAFFNNVYLQVKAISNIVWKYQRYHFIMAYHEKPVLPPPLIILSHI +TEMPL +Equivalence 99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 + +Query 6BWD_A||483||904 Template 6BWD_A||483||904 +QUERY NEMEIHIKSKKLPITRKFYAFYHAPIVKFWFNTLAYLGFLMLYTFVVLVKMEQLPSVQEWIVIAYIFTYAIEKVREVFMSEAGKISQKIKVWFSDYFNVSDTIAIISFFVGFGLRFGAKWNYINAYDNHVFVAGRLIYCLNIIFWYVRLLDFLAVNQQAGPYVMMIGKMVANMFYIVVIMALVLLSFGVPRKAILYPHEEPSWSLAKDIVFHPYWMIFGEVYAYEIDVCANDSTLPTICGPGTWLTPFLQAVYLFVQYIIMVNLLIAFFNNVYLQVKAISNIVWKYQRYHFIMAYHEKPVLPPPLIILSHIVSLFCCVCKRRKKDKTSDGPKLFLTEEDQKKLHDFEEQCVEMYFDEKDDKFNSGSEERIRVTFERVEQMSIQIKEVGDRVNYIKRSLQSLDSQIGHLQDLSALTVDTLKTL +QUERY +TEMPL NEMEIHIKSKKLPITRKFYAFYHAPIVKFWFNTLAYLGFLMLYTFVVLVKMEQLPSVQEWIVIAYIFTYAIEKVREVFMSEAGKISQKIKVWFSDYFNVSDTIAIISFFVGFGLRFGAKWNYINAYDNHVFVAGRLIYCLNIIFWYVRLLDFLAVNQQAGPYVMMIGKMVANMFYIVVIMALVLLSFGVPRKAILYPHEEPSWSLAKDIVFHPYWMIFGEVYAYEIDVCANDSTLPTICGPGTWLTPFLQAVYLFVQYIIMVNLLIAFFNNVYLQVKAISNIVWKYQRYHFIMAYHEKPVLPPPLIILSHIVSLFCCVCKRRKKDKTSDGPKLFLTEEDQKKLHDFEEQCVEMYFDEKDDKFNSGSEERIRVTFERVEQMSIQIKEVGDRVNYIKRSLQSLDSQIGHLQDLSALTVDTLKTL +TEMPL +Equivalence 99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 + +Query 6BWI_A||536||886 Template 6BWI_A||536||886 +QUERY PVGTADPAEKTPLGVPRQSGRPGCCGGRCGGRRCLRRWFHFWGAPVTIFMGNVVSYLLFLLLFSRVLLVDFQPAPPGSLELLLYFWAFTLLCEELRQGLSGGGGSLASGGPGPGHASLSQRLRLYLADSWNQCDLVALTCFLLGVGCRLTPGLYHLGRTVLCIDFMVFTVRLLHIFTVNKQLGPKIVIVSKMMKDVFFFLFFLGVWLVAYGVATEGLLRPRDSDFPSILRRVFYRPYLQIFGQIPQEDMDVALMEHSNCSSEPGFWAHPPGAQAGTCVSQYANWLVVLLLVIFLLVANILLVNLLIAMFSYTFGKVQGNSDLYWKAQRYRLIREFHSRPALAPPFIVISHL +QUERY +TEMPL PVGTADPAEKTPLGVPRQSGRPGCCGGRCGGRRCLRRWFHFWGAPVTIFMGNVVSYLLFLLLFSRVLLVDFQPAPPGSLELLLYFWAFTLLCEELRQGLSGGGGSLASGGPGPGHASLSQRLRLYLADSWNQCDLVALTCFLLGVGCRLTPGLYHLGRTVLCIDFMVFTVRLLHIFTVNKQLGPKIVIVSKMMKDVFFFLFFLGVWLVAYGVATEGLLRPRDSDFPSILRRVFYRPYLQIFGQIPQEDMDVALMEHSNCSSEPGFWAHPPGAQAGTCVSQYANWLVVLLLVIFLLVANILLVNLLIAMFSYTFGKVQGNSDLYWKAQRYRLIREFHSRPALAPPFIVISHL +TEMPL +Equivalence 999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 + +Query 6BWJ_A||351||680 Template 6BWJ_A||351||680 +QUERY NSVLEIIAFHSRSPHRHRMVVLEPLNKLLQAKWDRLIPRFCFNFLCYLVYMLIFTAVAYHQPALEKQGFPPLKATAGNSMLLLGHILILLGGVYLLLGQLWYFWRRRLFIWISFMDSYSEILFLLQALLTVLSQVLCFLAIEWYLPLLVSSLVMGWTNLLYYTRGFQHTGIYSVMIEKVILRDLLRFLLVYLVFLFGFAVALVSLSREAQNSRTPAGPNATEVGQPGAGQEDEAPPYRSILDASLELFKFTIGMGELAFQEQLRFRGVVLLLLLAYVLLTYVLLLNMLIALMSETVNSVATDSWSIWKLQKAISVLEMENGYWWCRRKKQ +QUERY +TEMPL NSVLEIIAFHSRSPHRHRMVVLEPLNKLLQAKWDRLIPRFCFNFLCYLVYMLIFTAVAYHQPALEKQGFPPLKATAGNSMLLLGHILILLGGVYLLLGQLWYFWRRRLFIWISFMDSYSEILFLLQALLTVLSQVLCFLAIEWYLPLLVSSLVMGWTNLLYYTRGFQHTGIYSVMIEKVILRDLLRFLLVYLVFLFGFAVALVSLSREAQNSRTPAGPNATEVGQPGAGQEDEAPPYRSILDASLELFKFTIGMGELAFQEQLRFRGVVLLLLLAYVLLTYVLLLNMLIALMSETVNSVATDSWSIWKLQKAISVLEMENGYWWCRRKKQ +TEMPL +Equivalence 999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 + +Query 6C8G_A||293||615 Template 6C8G_A||293||615 +QUERY GEEVSVLEILVYNSKVENRHEMLAVEPINELLRDKWQKFGAVSFYISVVSYLIAMIIFTLIAYYRPMDGTPPYPYRTTMDYMRLAGEIVTLLTGVVFFITNIKDLFMKKCPGVNSLFIDGSFQLLYFIYSVLVIITAVLYLVGIESYLAVMVFALVLGWMNALYFTRGLKLTGTYSIMLQKILFKDLFRFLLVYLLFMIGYASALVSLLNPCTSQESCIETSSQCTVPEYPSCRDSSTFSKFLLDLFKLTIGMGDLEMINSAKYPAVFIILLVTYIILTFVLLLNMLIALMGETVGQVSKESKQIWKLQWATTILDIERSFPV +QUERY +TEMPL GEEVSVLEILVYNSKVENRHEMLAVEPINELLRDKWQKFGAVSFYISVVSYLIAMIIFTLIAYYRPMDGTPPYPYRTTMDYMRLAGEIVTLLTGVVFFITNIKDLFMKKCPGVNSLFIDGSFQLLYFIYSVLVIITAVLYLVGIESYLAVMVFALVLGWMNALYFTRGLKLTGTYSIMLQKILFKDLFRFLLVYLLFMIGYASALVSLLNPCTSQESCIETSSQCTVPEYPSCRDSSTFSKFLLDLFKLTIGMGDLEMINSAKYPAVFIILLVTYIILTFVLLLNMLIALMGETVGQVSKESKQIWKLQWATTILDIERSFPV +TEMPL +Equivalence 99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 + +Query 6C8G_A||294||617 Template 6C8G_A||294||617 +QUERY EEVSVLEILVYNSKVENRHEMLAVEPINELLRDKWQKFGAVSFYISVVSYLIAMIIFTLIAYYRPMDGTPPYPYRTTMDYMRLAGEIVTLLTGVVFFITNIKDLFMKKCPGVNSLFIDGSFQLLYFIYSVLVIITAVLYLVGIESYLAVMVFALVLGWMNALYFTRGLKLTGTYSIMLQKILFKDLFRFLLVYLLFMIGYASALVSLLNPCTSQESCIETSSQCTVPEYPSCRDSSTFSKFLLDLFKLTIGMGDLEMINSAKYPAVFIILLVTYIILTFVLLLNMLIALMGETVGQVSKESKQIWKLQWATTILDIERSFPVCM +QUERY +TEMPL EEVSVLEILVYNSKVENRHEMLAVEPINELLRDKWQKFGAVSFYISVVSYLIAMIIFTLIAYYRPMDGTPPYPYRTTMDYMRLAGEIVTLLTGVVFFITNIKDLFMKKCPGVNSLFIDGSFQLLYFIYSVLVIITAVLYLVGIESYLAVMVFALVLGWMNALYFTRGLKLTGTYSIMLQKILFKDLFRFLLVYLLFMIGYASALVSLLNPCTSQESCIETSSQCTVPEYPSCRDSSTFSKFLLDLFKLTIGMGDLEMINSAKYPAVFIILLVTYIILTFVLLLNMLIALMGETVGQVSKESKQIWKLQWATTILDIERSFPVCM +TEMPL +Equivalence 999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 + +Query 6C8G_A||295||619 Template 6C8G_A||295||619 +QUERY EVSVLEILVYNSKVENRHEMLAVEPINELLRDKWQKFGAVSFYISVVSYLIAMIIFTLIAYYRPMDGTPPYPYRTTMDYMRLAGEIVTLLTGVVFFITNIKDLFMKKCPGVNSLFIDGSFQLLYFIYSVLVIITAVLYLVGIESYLAVMVFALVLGWMNALYFTRGLKLTGTYSIMLQKILFKDLFRFLLVYLLFMIGYASALVSLLNPCTSQESCIETSSQCTVPEYPSCRDSSTFSKFLLDLFKLTIGMGDLEMINSAKYPAVFIILLVTYIILTFVLLLNMLIALMGETVGQVSKESKQIWKLQWATTILDIERSFPVCMRK +QUERY +TEMPL EVSVLEILVYNSKVENRHEMLAVEPINELLRDKWQKFGAVSFYISVVSYLIAMIIFTLIAYYRPMDGTPPYPYRTTMDYMRLAGEIVTLLTGVVFFITNIKDLFMKKCPGVNSLFIDGSFQLLYFIYSVLVIITAVLYLVGIESYLAVMVFALVLGWMNALYFTRGLKLTGTYSIMLQKILFKDLFRFLLVYLLFMIGYASALVSLLNPCTSQESCIETSSQCTVPEYPSCRDSSTFSKFLLDLFKLTIGMGDLEMINSAKYPAVFIILLVTYIILTFVLLLNMLIALMGETVGQVSKESKQIWKLQWATTILDIERSFPVCMRK +TEMPL +Equivalence 9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 + +Query 6C8G_A||419||661 Template 6C8G_A||419||661 +QUERY FIYSVLVIITAVLYLVGIESYLAVMVFALVLGWMNALYFTRGLKLTGTYSIMLQKILFKDLFRFLLVYLLFMIGYASALVSLLNPCTSQESCIETSSQCTVPEYPSCRDSSTFSKFLLDLFKLTIGMGDLEMINSAKYPAVFIILLVTYIILTFVLLLNMLIALMGETVGQVSKESKQIWKLQWATTILDIERSFPVCMRKAFRSGEMVTVGKNLDGTPDRRWCFRVDEVNWSHWNQNLGIIN +QUERY +TEMPL FIYSVLVIITAVLYLVGIESYLAVMVFALVLGWMNALYFTRGLKLTGTYSIMLQKILFKDLFRFLLVYLLFMIGYASALVSLLNPCTSQESCIETSSQCTVPEYPSCRDSSTFSKFLLDLFKLTIGMGDLEMINSAKYPAVFIILLVTYIILTFVLLLNMLIALMGETVGQVSKESKQIWKLQWATTILDIERSFPVCMRKAFRSGEMVTVGKNLDGTPDRRWCFRVDEVNWSHWNQNLGIIN +TEMPL +Equivalence 999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 + +Query 6CO7_A||808||1043 Template 6CO7_A||808||1043 +QUERY EIREDDSMEVIMRNKKLGFCDRIMHFYSAPFSKFVGNVVGYLAFIFLYAYVVLFNFPRFDPAKTLGGIHPTEIVLYFWVFTILIEEIRQLAAKPPKYIKDKVSVYFSDTWNFVDIFSLTVFIIAIILRFFTNSRIFTASRIILSLDIIFFIVRSLQIFSVNRLLGPKLVMIQKMMQDLAQFIIILAVFTIAYGIALHAVMFPSPGIYARNNTWVTITSVVQYPYWQMYGELFLDEI +QUERY +TEMPL EIREDDSMEVIMRNKKLGFCDRIMHFYSAPFSKFVGNVVGYLAFIFLYAYVVLFNFPRFDPAKTLGGIHPTEIVLYFWVFTILIEEIRQLAAKPPKYIKDKVSVYFSDTWNFVDIFSLTVFIIAIILRFFTNSRIFTASRIILSLDIIFFIVRSLQIFSVNRLLGPKLVMIQKMMQDLAQFIIILAVFTIAYGIALHAVMFPSPGIYARNNTWVTITSVVQYPYWQMYGELFLDEI +TEMPL +Equivalence 99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 + +Query 6CO7_A||810||1124 Template 6CO7_A||810||1124 +QUERY REDDSMEVIMRNKKLGFCDRIMHFYSAPFSKFVGNVVGYLAFIFLYAYVVLFNFPRFDPAKTLGGIHPTEIVLYFWVFTILIEEIRQLAAKPPKYIKDKVSVYFSDTWNFVDIFSLTVFIIAIILRFFTNSRIFTASRIILSLDIIFFIVRSLQIFSVNRLLGPKLVMIQKMMQDLAQFIIILAVFTIAYGIALHAVMFPSPGIYARNNTWVTITSVVQYPYWQMYGELFLDEIQGEKPKEFGEVDPDGRWLSPLLLAIYMVFTNILLLNLLIAIFNYTFERVQEDSDKVWKFQRYDLVQEYHSRPVFAPPLVLL +QUERY +TEMPL REDDSMEVIMRNKKLGFCDRIMHFYSAPFSKFVGNVVGYLAFIFLYAYVVLFNFPRFDPAKTLGGIHPTEIVLYFWVFTILIEEIRQLAAKPPKYIKDKVSVYFSDTWNFVDIFSLTVFIIAIILRFFTNSRIFTASRIILSLDIIFFIVRSLQIFSVNRLLGPKLVMIQKMMQDLAQFIIILAVFTIAYGIALHAVMFPSPGIYARNNTWVTITSVVQYPYWQMYGELFLDEIQGEKPKEFGEVDPDGRWLSPLLLAIYMVFTNILLLNLLIAIFNYTFERVQEDSDKVWKFQRYDLVQEYHSRPVFAPPLVLL +TEMPL +Equivalence 999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 + diff --git a/software/mafft/mafft-linux64/mafftdir/libexec/dash_client b/software/mafft/mafft-linux64/mafftdir/libexec/dash_client index dd4ff391..e30f683e 100755 Binary files a/software/mafft/mafft-linux64/mafftdir/libexec/dash_client and b/software/mafft/mafft-linux64/mafftdir/libexec/dash_client differ diff --git a/software/mafft/mafft-linux64/mafftdir/libexec/dash_sequences.fa b/software/mafft/mafft-linux64/mafftdir/libexec/dash_sequences.fa new file mode 100644 index 00000000..bec30a1e --- /dev/null +++ b/software/mafft/mafft-linux64/mafftdir/libexec/dash_sequences.fa @@ -0,0 +1,138 @@ +>DASH_3J5P_A||281||579 +EKNSVLEVIAYSSSETPNRHDMLLVEPLNRLLQDKWDRFVKRIFYFNFFVYCLYMIIFTAAAYYRPVEGLPPYKLKNTVGDYFRVTGEILSVSGGVYFFFRGIQYFLQRRPSLKSLFVDSYSEILFFVQSLFMLVSVVLYFSQRKEYVASMVFSLAMGWTNMLYYTRGFQQMGIYAVMIEKMILRDLCRFMFVYLVFLFGFSTAVVTLIEDGKYNSLYSTCLELFKFTIGMGDLEFTENYDFKAVFIILLLAYVILTYILLLNMLIALMGETVNKIAQESKNIWKLQRAITILDTEKSF +>DASH_3J5P_A||283||585 +NSVLEVIAYSSSETPNRHDMLLVEPLNRLLQDKWDRFVKRIFYFNFFVYCLYMIIFTAAAYYRPVEGLPPYKLKNTVGDYFRVTGEILSVSGGVYFFFRGIQYFLQRRPSLKSLFVDSYSEILFFVQSLFMLVSVVLYFSQRKEYVASMVFSLAMGWTNMLYYTRGFQQMGIYAVMIEKMILRDLCRFMFVYLVFLFGFSTAVVTLIEDGKYNSLYSTCLELFKFTIGMGDLEFTENYDFKAVFIILLLAYVILTYILLLNMLIALMGETVNKIAQESKNIWKLQRAITILDTEKSFLKCMRK +>DASH_3J9P_A||1089||1399 +YEPLTALNAMVQNNRIELLNHPVCKEYLLMKWLAYGFRAHMMNLGSYCLGLIPMTILVVNIKPGMAFNSTGIINETSDHSEILDTTNSYLIKTCMILVFLSSIFGYCKEAGQIFQQKRNYFMDISNVLEWIIYTTGIIFVLPLFVEIPAHLQWQCGAIAVYFYWMNFLLYLQRFENCGIFIVMLEVILKTLLRSTVVFIFLLLAFGLSFYILLNLQDPFSSPLLSIIQTFSMMLGDINYRESFLEPYLRNELAHPVLSFAQLVSFTIFVPIVLMNLLIGLAVGDIADVQKHASLKRIAMQVELHTSLEKKL +>DASH_3J9P_A||1091||1408 +PLTALNAMVQNNRIELLNHPVCKEYLLMKWLAYGFRAHMMNLGSYCLGLIPMTILVVNIKPGMAFNSTGIINETSDHSEILDTTNSYLIKTCMILVFLSSIFGYCKEAGQIFQQKRNYFMDISNVLEWIIYTTGIIFVLPLFVEIPAHLQWQCGAIAVYFYWMNFLLYLQRFENCGIFIVMLEVILKTLLRSTVVFIFLLLAFGLSFYILLNLQDPFSSPLLSIIQTFSMMLGDINYRESFLEPYLRNELAHPVLSFAQLVSFTIFVPIVLMNLLIGLAVGDIADVQKHASLKRIAMQVELHTSLEKKLPLWFLRKVD +>DASH_3J9P_A||1092||1402 +LTALNAMVQNNRIELLNHPVCKEYLLMKWLAYGFRAHMMNLGSYCLGLIPMTILVVNIKPGMAFNSTGIINETSDHSEILDTTNSYLIKTCMILVFLSSIFGYCKEAGQIFQQKRNYFMDISNVLEWIIYTTGIIFVLPLFVEIPAHLQWQCGAIAVYFYWMNFLLYLQRFENCGIFIVMLEVILKTLLRSTVVFIFLLLAFGLSFYILLNLQDPFSSPLLSIIQTFSMMLGDINYRESFLEPYLRNELAHPVLSFAQLVSFTIFVPIVLMNLLIGLAVGDIADVQKHASLKRIAMQVELHTSLEKKLPLW +>DASH_3J9P_A||1096||1410 +NAMVQNNRIELLNHPVCKEYLLMKWLAYGFRAHMMNLGSYCLGLIPMTILVVNIKPGMAFNSTGIINETSDHSEILDTTNSYLIKTCMILVFLSSIFGYCKEAGQIFQQKRNYFMDISNVLEWIIYTTGIIFVLPLFVEIPAHLQWQCGAIAVYFYWMNFLLYLQRFENCGIFIVMLEVILKTLLRSTVVFIFLLLAFGLSFYILLNLQDPFSSPLLSIIQTFSMMLGDINYRESFLEPYLRNELAHPVLSFAQLVSFTIFVPIVLMNLLIGLAVGDIADVQKHASLKRIAMQVELHTSLEKKLPLWFLRKVDQK +>DASH_5AN8_A||277||586 +NSVLEIIAFHSRSPHRHRMVVLEPLNKLLQAKWDRLIPRFCFNFLCYLVYMLIFTAVAYHQPALEKQGFPPLKATAGNSMLLLGHILILLGGVYLLLGQLWYFWRRRLFIWISFMDSYFEILFLLQALLTVLSQVLCFLAIEWYLPLLVSSLVLGWLNLLYYTRGFQHTGIYSVMIQKVILRDLLRFLLVYLVFLFGFAVALVSLSREAQEDEAPPYRSILDASLELFKFTIGMGELAFQEQLRFRGVVLLLLLAYVLLTYVLLLNMLIALMSETVNSVATDSWSIWKLQKAISVLEMENGYWWCRRKKQ +>DASH_5IRX_A||286||584 +EKNSVLEVIAYSSSETPNRHDMLLVEPLNRLLQDKWDRFVKRIFYFNFFVYCLYMIIFTAAAYYRPVEGLPPYKLKNTVGDYFRVTGEILSVSGGVYFFFRGIQYFLQRRPSLKSLFVDSYSEILFFVQSLFMLVSVVLYFSQRKEYVASMVFSLAMGWTNMLYYTRGFQQMGIYAVMIEKMILRDLCRFMFVYLVFLFGFSTAVVTLIEDGKYNSLYSTCLELFKFTIGMGDLEFTENYDFKAVFIILLLAYVILTYILLLNMLIALMGETVNKIAQESKNIWKLQRAITILDTEKSF +>DASH_5IRX_A||288||590 +NSVLEVIAYSSSETPNRHDMLLVEPLNRLLQDKWDRFVKRIFYFNFFVYCLYMIIFTAAAYYRPVEGLPPYKLKNTVGDYFRVTGEILSVSGGVYFFFRGIQYFLQRRPSLKSLFVDSYSEILFFVQSLFMLVSVVLYFSQRKEYVASMVFSLAMGWTNMLYYTRGFQQMGIYAVMIEKMILRDLCRFMFVYLVFLFGFSTAVVTLIEDGKYNSLYSTCLELFKFTIGMGDLEFTENYDFKAVFIILLLAYVILTYILLLNMLIALMGETVNKIAQESKNIWKLQRAITILDTEKSFLKCMRK +>DASH_5IWK_A||287||613 +DDQSLLELIVTTKKREARQILDQTPVKELVSLKWKRYGRPYFCVLGAIYVLYIICFTMCCVYRPLKPRITNRTNPRDNTLLQQKLLQEAYVTPKDDLRLVGELVSIVGAVIILLVEIPDIFRLGVTRFFGQTILGGPFHVIIVTYAFMVLVTMVMRLTNSDGEVVPMSFALVLGWCNVMYFARGFQMLGPFTIMIQKMIFGDLMRFCWQMAVVILGFASAFYIIFQTEDPDELGHFYDYPMALFSTFELFLTIIDGPANYDVDLPFMYSITYAAFAIIATLLMLNLLIAMMGDTHWRVAHERDELWRAQVVATTVMLERKLPRCLWP +>DASH_5IWK_A||383||608 +LRLVGELVSIVGAVIILLVEIPDIFRLGVTRFFGQTILGGPFHVIIVTYAFMVLVTMVMRLTNSDGEVVPMSFALVLGWCNVMYFARGFQMLGPFTIMIQKMIFGDLMRFCWQMAVVILGFASAFYIIFQTEDPDELGHFYDYPMALFSTFELFLTIIDGPANYDVDLPFMYSITYAAFAIIATLLMLNLLIAMMGDTHWRVAHERDELWRAQVVATTVMLERKLP +>DASH_5K47_A||19||514 +GTRLMEESSTNREKYLKSVLRELVTYLLFLIVLCILTYGMMSSNVYYYTRMMSQLFLDTPVSKTEKTNFKTLSSMEDFWKFTEGSLLDGLYWKMQPSNQTEADNRSFIFYENLLLGVPRIRQLRVRNGSCSIPQDLRDEIKECYDVYSVSSEDRAPFGPRNGTAWIYTSEKDLNGSSHWGIIATYSGAGYYLDLSRTREETAAQVASLKKNVWLDRGTRATFIDFSVYNANINLFCVVRLLVEFPATGGVIPSWQFQPLKLIRYVTTFDFFLAACEIIFCFFIFYYVVEEILEIRIHKLHYFRSFWNCLDVVIVVLSVVAIGINIYRTSNVEVLLQFLEDQNTFPNFEHLAYWQIQFNNIAAVTVFFVWIKLFKFINFNRTMSQLSTTMSRCAKDLFGFAIMFFIIFLAYAQLAYLVFGTQVDDFSTFQECIFTQFRIILGDINFAEIEEANRVLGPIYFTTFVFFMFFILLNMFLAIINDTYSEVKSDLAQQKAE +>DASH_5K47_A||20||521 +TRLMEESSTNREKYLKSVLRELVTYLLFLIVLCILTYGMMSSNVYYYTRMMSQLFLDTPVSKTEKTNFKTLSSMEDFWKFTEGSLLDGLYWKMQPSNQTEADNRSFIFYENLLLGVPRIRQLRVRNGSCSIPQDLRDEIKECYDVYSVSSEDRAPFGPRNGTAWIYTSEKDLNGSSHWGIIATYSGAGYYLDLSRTREETAAQVASLKKNVWLDRGTRATFIDFSVYNANINLFCVVRLLVEFPATGGVIPSWQFQPLKLIRYVTTFDFFLAACEIIFCFFIFYYVVEEILEIRIHKLHYFRSFWNCLDVVIVVLSVVAIGINIYRTSNVEVLLQFLEDQNTFPNFEHLAYWQIQFNNIAAVTVFFVWIKLFKFINFNRTMSQLSTTMSRCAKDLFGFAIMFFIIFLAYAQLAYLVFGTQVDDFSTFQECIFTQFRIILGDINFAEIEEANRVLGPIYFTTFVFFMFFILLNMFLAIINDTYSEVKSDLAQQKAEMELSDLI +>DASH_5K47_A||247||526 +NANINLFCVVRLLVEFPATGGVIPSWQFQPLKLIRYVTTFDFFLAACEIIFCFFIFYYVVEEILEIRIHKLHYFRSFWNCLDVVIVVLSVVAIGINIYRTSNVEVLLQFLEDQNTFPNFEHLAYWQIQFNNIAAVTVFFVWIKLFKFINFNRTMSQLSTTMSRCAKDLFGFAIMFFIIFLAYAQLAYLVFGTQVDDFSTFQECIFTQFRIILGDINFAEIEEANRVLGPIYFTTFVFFMFFILLNMFLAIINDTYSEVKSDLAQQKAEMELSDLIRKGYH +>DASH_5K47_A||2||537 +PRVAWAERLVRGLRGLWGTRLMEESSTNREKYLKSVLRELVTYLLFLIVLCILTYGMMSSNVYYYTRMMSQLFLDTPVSKTEKTNFKTLSSMEDFWKFTEGSLLDGLYWKMQPSNQTEADNRSFIFYENLLLGVPRIRQLRVRNGSCSIPQDLRDEIKECYDVYSVSSEDRAPFGPRNGTAWIYTSEKDLNGSSHWGIIATYSGAGYYLDLSRTREETAAQVASLKKNVWLDRGTRATFIDFSVYNANINLFCVVRLLVEFPATGGVIPSWQFQPLKLIRYVTTFDFFLAACEIIFCFFIFYYVVEEILEIRIHKLHYFRSFWNCLDVVIVVLSVVAIGINIYRTSNVEVLLQFLEDQNTFPNFEHLAYWQIQFNNIAAVTVFFVWIKLFKFINFNRTMSQLSTTMSRCAKDLFGFAIMFFIIFLAYAQLAYLVFGTQVDDFSTFQECIFTQFRIILGDINFAEIEEANRVLGPIYFTTFVFFMFFILLNMFLAIINDTYSEVKSDLAQQKAEMELSDLIRKGYHKALVKLKLKKN +>DASH_5K47_A||30||525 +REKYLKSVLRELVTYLLFLIVLCILTYGMMSSNVYYYTRMMSQLFLDTPVSKTEKTNFKTLSSMEDFWKFTEGSLLDGLYWKMQPSNQTEADNRSFIFYENLLLGVPRIRQLRVRNGSCSIPQDLRDEIKECYDVYSVSSEDRAPFGPRNGTAWIYTSEKDLNGSSHWGIIATYSGAGYYLDLSRTREETAAQVASLKKNVWLDRGTRATFIDFSVYNANINLFCVVRLLVEFPATGGVIPSWQFQPLKLIRYVTTFDFFLAACEIIFCFFIFYYVVEEILEIRIHKLHYFRSFWNCLDVVIVVLSVVAIGINIYRTSNVEVLLQFLEDQNTFPNFEHLAYWQIQFNNIAAVTVFFVWIKLFKFINFNRTMSQLSTTMSRCAKDLFGFAIMFFIIFLAYAQLAYLVFGTQVDDFSTFQECIFTQFRIILGDINFAEIEEANRVLGPIYFTTFVFFMFFILLNMFLAIINDTYSEVKSDLAQQKAEMELSDLIRKGY +>DASH_5MKF_A||158||704 +EDQGPPCPSPVGGGDPLHRHLPLEGQPPRVAWAERLVRGLRGLWGTRLMEESSTNREKYLKSVLRELVTYLLFLIVLCILTYGMMSSNVYYYTRMMSQLFLDTPVSKTEKTNFKTLSSMEDFWKFTEGSLLDGLYWKMQPSNQTEADNRSFIFYENLLLGVPRIRQLRVRNGSCSIPQDLRDEIKECYDVYSVSSEDRAPFGPRNGTAWIYTSEKDLNGSSHWGIIATYSGAGYYLDLSRTREETAAQVASLKKNVWLDRGTRATFIDFSVYNANINLFCVVRLLVEFPATGGVIPSWQFQPLKLIRYVTTFDFFLAACEIIFCFFIFYYVVEEILEIRIHKLHYFRSFWNCLDVVIVVLSVVAIGINIYRTSNVEVLLQFLEDQNTFPNFEHLAYWQIQFNNIAAVTVFFVWIKLFKFINFNRTMSQLSTTMSRCAKDLFGFAIMFFIIFLAYAQLAYLVFGTQVDDFSTFQECIFTQFRIILGDINFAEIEEANRVLGPIYFTTFVFFMFFILLNMFLAIINDTYSEVKSDLAQQKAEMELSDLI +>DASH_5MKF_A||185||720 +PRVAWAERLVRGLRGLWGTRLMEESSTNREKYLKSVLRELVTYLLFLIVLCILTYGMMSSNVYYYTRMMSQLFLDTPVSKTEKTNFKTLSSMEDFWKFTEGSLLDGLYWKMQPSNQTEADNRSFIFYENLLLGVPRIRQLRVRNGSCSIPQDLRDEIKECYDVYSVSSEDRAPFGPRNGTAWIYTSEKDLNGSSHWGIIATYSGAGYYLDLSRTREETAAQVASLKKNVWLDRGTRATFIDFSVYNANINLFCVVRLLVEFPATGGVIPSWQFQPLKLIRYVTTFDFFLAACEIIFCFFIFYYVVEEILEIRIHKLHYFRSFWNCLDVVIVVLSVVAIGINIYRTSNVEVLLQFLEDQNTFPNFEHLAYWQIQFNNIAAVTVFFVWIKLFKFINFNRTMSQLSTTMSRCAKDLFGFAIMFFIIFLAYAQLAYLVFGTQVDDFSTFQECIFTQFRIILGDINFAEIEEANRVLGPIYFTTFVFFMFFILLNMFLAIINDTYSEVKSDLAQQKAEMELSDLIRKGYHKALVKLKLKKN +>DASH_5MKF_A||430||709 +NANINLFCVVRLLVEFPATGGVIPSWQFQPLKLIRYVTTFDFFLAACEIIFCFFIFYYVVEEILEIRIHKLHYFRSFWNCLDVVIVVLSVVAIGINIYRTSNVEVLLQFLEDQNTFPNFEHLAYWQIQFNNIAAVTVFFVWIKLFKFINFNRTMSQLSTTMSRCAKDLFGFAIMFFIIFLAYAQLAYLVFGTQVDDFSTFQECIFTQFRIILGDINFAEIEEANRVLGPIYFTTFVFFMFFILLNMFLAIINDTYSEVKSDLAQQKAEMELSDLIRKGYH +>DASH_5TJA_A||3||217 +GLSNQLAVTFREENTIAFRHLFLLGYSDGADDTFAAYTREQLYQAIFHAVDQYLALPDVSLGRYAYVRGGGDPWTNGSGLALCQRYYHRGHVDPANDTFDIDPMVVTDCIQVDPPERPPPPPSDDLTLLESSSSYKNLTLKFHKLVNVTIHFRLKTINLQSLINNEIPDCYTFSVLITFDNKAHSGRIPISLETQAHIQECKHPSVFQHGDNSLE +>DASH_5VKQ_A||1228||1596 +DKRNVEFLDVLIENEQKEVIAHTVVQRYLQELWHGSLTWASWKILLLLVAFIVCPPVWIGFTFPMGHKFNKVPIIKFMSYLTSHIYLMIHLSIVGITPIYPVLRLSLVPYWYEVGLLIWLSGLLLFELTNPSDKSGLGSIKVLVLLLGMAGVGVHVSAFLFVSKEYWPTLVYCRNQCFALAFLLACVQILDFLSFHHLFGPWAIIIGDLLKDLARFLAVLAIFVFGFSMHIVALNQSFANFSPEDLRSFEKKNRNRGYFSDVRMHPINSFELLFFAVFGQTTTEQTQVDKIKNVATPTQPYWVEYLFKIVFGIYMLVSVVVLIQLLIAMMSDTYQRIQAQSDIEWKFGLSKLIRNMHRTTTAPSPLNLV +>DASH_5W3S_A||14||541 +HEEENRCNFNQHTSPSEELLLEDQMRRKLKFFFMNPCEKFWARGRKPWKLAIQILKIAMVTIQLVLFGLSNQMVVAFKEENTVAFKHLFLKGYIDRMDDTYAVYTQSDVYDQIIFAVNQYLQLYQVSVGNHAYENKGTDQSAMAICQHFYKRGNIYPGNDTFDIDPEIETDCFFVEPDEPFHIGTPAENKLNLTLDFHRLLTVELQFKLKAINLQTVRHQELPDCYDFTLTITFDNKAHSGRIKISLDNDISIRECKDWHVSGSIQKNTHNMMIFDAFVILTCLVSLILCIRSVISGLQLQQEFVNFFLLHYKKDVSVSDQMEFVNGWYIMIIISDILTIIGSILKMEIQAKSLTSYDVCSILLGTSTMLVWLGVIRYLGFFAKYNLLILTLQAALPNVIRFCCCAAMIYLGYCFCGWIVLGPYHNKFRSLNMVSECLFSLINGDDMFATFAKMQQKSYLVWLFSRIYLYSFISLFIYMILSLFIALITDTYETIKHYQQDGFPETELRTFISECKDLPNSGKFRLED +>DASH_5W3S_A||28||546 +PSEELLLEDQMRRKLKFFFMNPCEKFWARGRKPWKLAIQILKIAMVTIQLVLFGLSNQMVVAFKEENTVAFKHLFLKGYIDRMDDTYAVYTQSDVYDQIIFAVNQYLQLYQVSVGNHAYENKGTDQSAMAICQHFYKRGNIYPGNDTFDIDPEIETDCFFVEPDEPFHIGTPAENKLNLTLDFHRLLTVELQFKLKAINLQTVRHQELPDCYDFTLTITFDNKAHSGRIKISLDNDISIRECKDWHVSGSIQKNTHNMMIFDAFVILTCLVSLILCIRSVISGLQLQQEFVNFFLLHYKKDVSVSDQMEFVNGWYIMIIISDILTIIGSILKMEIQAKSLTSYDVCSILLGTSTMLVWLGVIRYLGFFAKYNLLILTLQAALPNVIRFCCCAAMIYLGYCFCGWIVLGPYHNKFRSLNMVSECLFSLINGDDMFATFAKMQQKSYLVWLFSRIYLYSFISLFIYMILSLFIALITDTYETIKHYQQDGFPETELRTFISECKDLPNSGKFRLEDDPPVS +>DASH_5WJ9_A||16||557 +LTPNPGYGTQAGPSPAPPTPPEEEDLRRRLKYFFMSPCDKFRAKGRKPCKLMLQVVKILVVTVQLILFGLSNQLAVTFREENTIAFRHLFLLGYSDGADDTFAAYTREQLYQAIFHAVDQYLALPDVSLGRYAYVRGGGDPWTNGSGLALCQRYYHRGHVDPANDTFDIDPMVVTDCIQVDPPERPPPPPSDDLTLLESSSSYKNLTLKFHKLVNVTIHFRLKTINLQSLINNEIPDCYTFSVLITFDNKAHSGRIPISLETQAHIQECKHPSVFQHGDNSFRLLFDVVVILTCSLSFLLCARSLLRGFLLQNEFVGFMWRQRGRVISLWERLEFVNGWYILLVTSDVLTISGTIMKIGIEAKNLASYDVCSILLGTSTLLVWVGVIRYLTFFHNYNILIATLRVALPSVMRFCCCVAVIYLGYCFCGWIVLGPYHVKFRSLSMVSECLFSLINGDDMFVTFAAMQAQQGRSSLVWLFSQLYLYSFISLFIYMVLSLFIALITGAYDTIKHPGGAGAEESELQAYIAQCQDSPTSGKFRRGS +>DASH_5WPV_A||16||557 +LTPNPGYGTQVGTSPAPTTPTEEEDLRRRLKYFFMSPCDKFRAKGRKPCKLMLQVVKILVVTVQLILFGLSNQLVVTFREENTIAFRHLFLLGYSDGSDDTFAAYTQEQLYQAIFYAVDQYLILPEISLGRYAYVRGGGGPWANGSALALCQRYYHRGHVDPANDTFDIDPRVVTDCIQVDPPDRPPDIPSEDLDFLDGSASYKNLTLKFHKLINVTIHFQLKTINLQSLINNEIPDCYTFSILITFDNKAHSGRIPIRLETKTHIQECKHPSVSRHGDNSFRLLFDVVVILTCSLSFLLCARSLLRGFLLQNEFVVFMWRRRGREISLWERLEFVNGWYILLVTSDVLTISGTVMKIGIEAKNLASYDVCSILLGTSTLLVWVGVIRYLTFFHKYNILIATLRVALPSVMRFCCCVAVIYLGYCFCGWIVLGPYHVKFRSLSMVSECLFSLINGDDMFVTFAAMQAQQGHSSLVWLFSQLYLYSFISLFIYMVLSLFIALITGAYDTIKHPGGTGTEKSELQAYIEQCQDSPTSGKFRRGS +>DASH_5Z1W_A||127||515 +YENLLLGAPRLRQLRVRNDSCVVHEDFREDILNCYDVYSPDKEDQLPFGPQNGTAWTYHSQNELGGSSHWGRLTSYSGGGYYLDLPGSRQASAEALQGLQEGLWLDRGTRVVFIDFSVYNANINLFCILRLVVEFPATGGTIPSWQIRTVKLIRYVNNWDFFIVGCEVVFCVFIFYYVVEEILEIHLHRLRYLSSVWNILDLVVILLSIVAVGFHIFRTLEVNRLMGKLLQQPDTYADFEFLAFWQTQYNNMNAVNLFFAWIKIFKYISFNKTMTQLSSTLARCAKDILGFAIMFFIVFFAYAQLGYLLFGTQVENFSTFVKCIFTQFRIILGDFDYNAIDNANRILGPVYFVTYVFFVFFVLLNMFLAIINDTYSEVKEELAGQKDQL +>DASH_5Z1W_A||13||506 +SIRGLWGTTLTENTAENRELYVKTTLRELVVYIVFLVDICLLTYGMTSSSAYYYTKVMSELFLHTPSDSGVSFQTISSMSDFWDFAQGPLLDSLYWTKWYNNQSLGRGSHSFIYYENLLLGAPRLRQLRVRNDSCVVHEDFREDILNCYDVYSPDKEDQLPFGPQNGTAWTYHSQNELGGSSHWGRLTSYSGGGYYLDLPGSRQASAEALQGLQEGLWLDRGTRVVFIDFSVYNANINLFCILRLVVEFPATGGTIPSWQIRTVKLIRYVNNWDFFIVGCEVVFCVFIFYYVVEEILEIHLHRLRYLSSVWNILDLVVILLSIVAVGFHIFRTLEVNRLMGKLLQQPDTYADFEFLAFWQTQYNNMNAVNLFFAWIKIFKYISFNKTMTQLSSTLARCAKDILGFAIMFFIVFFAYAQLGYLLFGTQVENFSTFVKCIFTQFRIILGDFDYNAIDNANRILGPVYFVTYVFFVFFVLLNMFLAIINDTYSEVKE +>DASH_5Z1W_A||246||537 +NANINLFCILRLVVEFPATGGTIPSWQIRTVKLIRYVNNWDFFIVGCEVVFCVFIFYYVVEEILEIHLHRLRYLSSVWNILDLVVILLSIVAVGFHIFRTLEVNRLMGKLLQQPDTYADFEFLAFWQTQYNNMNAVNLFFAWIKIFKYISFNKTMTQLSSTLARCAKDILGFAIMFFIVFFAYAQLGYLLFGTQVENFSTFVKCIFTQFRIILGDFDYNAIDNANRILGPVYFVTYVFFVFFVLLNMFLAIINDTYSEVKEELAGQKDQLQLSDFLKQSYNKTLLRLRLRKE +>DASH_5Z1W_A||29||447 +NRELYVKTTLRELVVYIVFLVDICLLTYGMTSSSAYYYTKVMSELFLHTPSDSGVSFQTISSMSDFWDFAQGPLLDSLYWTKWYNNQSLGRGSHSFIYYENLLLGAPRLRQLRVRNDSCVVHEDFREDILNCYDVYSPDKEDQLPFGPQNGTAWTYHSQNELGGSSHWGRLTSYSGGGYYLDLPGSRQASAEALQGLQEGLWLDRGTRVVFIDFSVYNANINLFCILRLVVEFPATGGTIPSWQIRTVKLIRYVNNWDFFIVGCEVVFCVFIFYYVVEEILEIHLHRLRYLSSVWNILDLVVILLSIVAVGFHIFRTLEVNRLMGKLLQQPDTYADFEFLAFWQTQYNNMNAVNLFFAWIKIFKYISFNKTMTQLSSTLARCAKDILGFAIMFFIVFFAYAQLGYLLFGTQVENFSTFV +>DASH_5Z1W_A||41||508 +LVVYIVFLVDICLLTYGMTSSSAYYYTKVMSELFLHTPSDSGVSFQTISSMSDFWDFAQGPLLDSLYWTKWYNNQSLGRGSHSFIYYENLLLGAPRLRQLRVRNDSCVVHEDFREDILNCYDVYSPDKEDQLPFGPQNGTAWTYHSQNELGGSSHWGRLTSYSGGGYYLDLPGSRQASAEALQGLQEGLWLDRGTRVVFIDFSVYNANINLFCILRLVVEFPATGGTIPSWQIRTVKLIRYVNNWDFFIVGCEVVFCVFIFYYVVEEILEIHLHRLRYLSSVWNILDLVVILLSIVAVGFHIFRTLEVNRLMGKLLQQPDTYADFEFLAFWQTQYNNMNAVNLFFAWIKIFKYISFNKTMTQLSSTLARCAKDILGFAIMFFIVFFAYAQLGYLLFGTQVENFSTFVKCIFTQFRIILGDFDYNAIDNANRILGPVYFVTYVFFVFFVLLNMFLAIINDTYSEVKEEL +>DASH_5Z1W_A||9||528 +HICRSIRGLWGTTLTENTAENRELYVKTTLRELVVYIVFLVDICLLTYGMTSSSAYYYTKVMSELFLHTPSDSGVSFQTISSMSDFWDFAQGPLLDSLYWTKWYNNQSLGRGSHSFIYYENLLLGAPRLRQLRVRNDSCVVHEDFREDILNCYDVYSPDKEDQLPFGPQNGTAWTYHSQNELGGSSHWGRLTSYSGGGYYLDLPGSRQASAEALQGLQEGLWLDRGTRVVFIDFSVYNANINLFCILRLVVEFPATGGTIPSWQIRTVKLIRYVNNWDFFIVGCEVVFCVFIFYYVVEEILEIHLHRLRYLSSVWNILDLVVILLSIVAVGFHIFRTLEVNRLMGKLLQQPDTYADFEFLAFWQTQYNNMNAVNLFFAWIKIFKYISFNKTMTQLSSTLARCAKDILGFAIMFFIVFFAYAQLGYLLFGTQVENFSTFVKCIFTQFRIILGDFDYNAIDNANRILGPVYFVTYVFFVFFVLLNMFLAIINDTYSEVKEELAGQKDQLQLSDFLKQSYNKT +>DASH_5Z96_A||286||659 +LARLKLAIKYRQKEFVAQPNCQQLLASRWYDEFPGWRRRHWAVKMVTCFIIGLLFPVFSVCYLIAPKSPLGLFIRKPFIKFICHTASYLTFLFLLLLASQHIDRSDLNRQGPPPTIVEWMILPWVLGFIWGEIKQMWDGGLQDYIHDWWNLMDFVMNSLYLATISLKIVAFVKYSALNPRESWDMWHPTLVAEALFAIANIFSSLRLISLFTANSHLGPLQISLGRMLLDILKFLFIYCLVLLAFANGLNQLYFYYEETKGLSCKGIRCEKQNNAFSTLFETLQSLFWSIFGLINLYVTNVKAQHEFTEFVGATMFGTYNVISLVVLLNMLIAMMNNSYQLIADHADIEWKFARTKLWMSYFEEGGTLPTPFNV +>DASH_5Z96_A||289||660 +LKLAIKYRQKEFVAQPNCQQLLASRWYDEFPGWRRRHWAVKMVTCFIIGLLFPVFSVCYLIAPKSPLGLFIRKPFIKFICHTASYLTFLFLLLLASQHIDRSDLNRQGPPPTIVEWMILPWVLGFIWGEIKQMWDGGLQDYIHDWWNLMDFVMNSLYLATISLKIVAFVKYSALNPRESWDMWHPTLVAEALFAIANIFSSLRLISLFTANSHLGPLQISLGRMLLDILKFLFIYCLVLLAFANGLNQLYFYYEETKGLSCKGIRCEKQNNAFSTLFETLQSLFWSIFGLINLYVTNVKAQHEFTEFVGATMFGTYNVISLVVLLNMLIAMMNNSYQLIADHADIEWKFARTKLWMSYFEEGGTLPTPFNVI +>DASH_5Z96_A||290||663 +KLAIKYRQKEFVAQPNCQQLLASRWYDEFPGWRRRHWAVKMVTCFIIGLLFPVFSVCYLIAPKSPLGLFIRKPFIKFICHTASYLTFLFLLLLASQHIDRSDLNRQGPPPTIVEWMILPWVLGFIWGEIKQMWDGGLQDYIHDWWNLMDFVMNSLYLATISLKIVAFVKYSALNPRESWDMWHPTLVAEALFAIANIFSSLRLISLFTANSHLGPLQISLGRMLLDILKFLFIYCLVLLAFANGLNQLYFYYEETKGLSCKGIRCEKQNNAFSTLFETLQSLFWSIFGLINLYVTNVKAQHEFTEFVGATMFGTYNVISLVVLLNMLIAMMNNSYQLIADHADIEWKFARTKLWMSYFEEGGTLPTPFNVIPSP +>DASH_5ZX5_A||814||1056 +FKEVKILDSSDGKNEMEIHIKSKKLPITRKFYAFYHAPIVKFWFNTLAYLGFLMLYTFVVLVKMEQLPSVQEWIVIAYIFTYAIEKVREVFMSEAGKISQKIKVWFSDYFNVSDTIAIISFFVGFGLRFGAKWNYINAYDNHVFVAGRLIYCLNIIFWYVRLLDFLAVNQQAGPYVMMIGKMVANMFYIVVIMALVLLSFGVPRKAILYPHEEPSWSLAKDIVFHPYWMIFGEVYAYEIDVCA +>DASH_5ZX5_A||820||1131 +LDSSDGKNEMEIHIKSKKLPITRKFYAFYHAPIVKFWFNTLAYLGFLMLYTFVVLVKMEQLPSVQEWIVIAYIFTYAIEKVREVFMSEAGKISQKIKVWFSDYFNVSDTIAIISFFVGFGLRFGAKWNYINAYDNHVFVAGRLIYCLNIIFWYVRLLDFLAVNQQAGPYVMMIGKMVANMFYIVVIMALVLLSFGVPRKAILYPHEEPSWSLAKDIVFHPYWMIFGEVYAYEIDVCANDSTLPTICGPGTWLTPFLQAVYLFVQYIIMVNLLIAFFNNVYLQVKAISNIVWKYQRYHFIMAYHEKPVLPPPL +>DASH_5ZX5_A||821||1134 +DSSDGKNEMEIHIKSKKLPITRKFYAFYHAPIVKFWFNTLAYLGFLMLYTFVVLVKMEQLPSVQEWIVIAYIFTYAIEKVREVFMSEAGKISQKIKVWFSDYFNVSDTIAIISFFVGFGLRFGAKWNYINAYDNHVFVAGRLIYCLNIIFWYVRLLDFLAVNQQAGPYVMMIGKMVANMFYIVVIMALVLLSFGVPRKAILYPHEEPSWSLAKDIVFHPYWMIFGEVYAYEIDVCANDSTLPTICGPGTWLTPFLQAVYLFVQYIIMVNLLIAFFNNVYLQVKAISNIVWKYQRYHFIMAYHEKPVLPPPLIIL +>DASH_5ZX5_A||827||1228 +NEMEIHIKSKKLPITRKFYAFYHAPIVKFWFNTLAYLGFLMLYTFVVLVKMEQLPSVQEWIVIAYIFTYAIEKVREVFMSEAGKISQKIKVWFSDYFNVSDTIAIISFFVGFGLRFGAKWNYINAYDNHVFVAGRLIYCLNIIFWYVRLLDFLAVNQQAGPYVMMIGKMVANMFYIVVIMALVLLSFGVPRKAILYPHEEPSWSLAKDIVFHPYWMIFGEVYAYEIDVCANDSTLPTICGPGTWLTPFLQAVYLFVQYIIMVNLLIAFFNNVYLQVKAISNIVWKYQRYHFIMAYHEKPVLPPPLIILSHIVSLFCCVCKRRKKDKTSDGPKLFLTEEDQKKLHDFEEQCVEMYFDEKDDKFNSGSEERIRVTFERVEQMSIQIKEVGDRVNYIKRSLQSLD +>DASH_6A70_A||27||559 +SHPQFEKGSAAAPRVAWAERLVRGLRGLWGTRLMEESSTNREKYLKSVLRELVTYLLFLIVLCILTYGMMSSNVYYYTRMMSQLFLDTPVSKTEKTNFKTLSSMEDFWKFTEGSLLDGLYWKMQPSNQTEADNRSFIFYENLLLGVPRIRQLRVRNGSCSIPQDLRDEIKECYDVYSVSSEDRAPFGPRNGTAWIYTSEKDLNGSSHWGIIATYSGAGYYLDLSRTREETAAQVASLKKNVWLDRGTRATFIDFSVYNANINLFCVVRLLVEFPATGGVIPSWQFQPLKLIRYVTTFDFFLAACEIIFCFFIFYYVVEEILEIRIHKLHYFRSFWNCLDVVIVVLSVVAIGINIYRTSNVEVLLQFLEDQNTFPNFEHLAYWQIQFNNIAAVTVFFVWIKLFKFINFNRTMSQLSTTMSRCAKDLFGFAIMFFIIFLAYAQLAYLVFGTQVDDFSTFQECIFTQFRIILGDINFAEIEEANRVLGPIYFTTFVFFMFFILLNMFLAIINDTYSEVKSDLAQQKAEMELSDLIR +>DASH_6A70_A||284||563 +NANINLFCVVRLLVEFPATGGVIPSWQFQPLKLIRYVTTFDFFLAACEIIFCFFIFYYVVEEILEIRIHKLHYFRSFWNCLDVVIVVLSVVAIGINIYRTSNVEVLLQFLEDQNTFPNFEHLAYWQIQFNNIAAVTVFFVWIKLFKFINFNRTMSQLSTTMSRCAKDLFGFAIMFFIIFLAYAQLAYLVFGTQVDDFSTFQECIFTQFRIILGDINFAEIEEANRVLGPIYFTTFVFFMFFILLNMFLAIINDTYSEVKSDLAQQKAEMELSDLIRKGYH +>DASH_6A70_A||39||574 +PRVAWAERLVRGLRGLWGTRLMEESSTNREKYLKSVLRELVTYLLFLIVLCILTYGMMSSNVYYYTRMMSQLFLDTPVSKTEKTNFKTLSSMEDFWKFTEGSLLDGLYWKMQPSNQTEADNRSFIFYENLLLGVPRIRQLRVRNGSCSIPQDLRDEIKECYDVYSVSSEDRAPFGPRNGTAWIYTSEKDLNGSSHWGIIATYSGAGYYLDLSRTREETAAQVASLKKNVWLDRGTRATFIDFSVYNANINLFCVVRLLVEFPATGGVIPSWQFQPLKLIRYVTTFDFFLAACEIIFCFFIFYYVVEEILEIRIHKLHYFRSFWNCLDVVIVVLSVVAIGINIYRTSNVEVLLQFLEDQNTFPNFEHLAYWQIQFNNIAAVTVFFVWIKLFKFINFNRTMSQLSTTMSRCAKDLFGFAIMFFIIFLAYAQLAYLVFGTQVDDFSTFQECIFTQFRIILGDINFAEIEEANRVLGPIYFTTFVFFMFFILLNMFLAIINDTYSEVKSDLAQQKAEMELSDLIRKGYHKALVKLKLKKN +>DASH_6A70_A||67||562 +REKYLKSVLRELVTYLLFLIVLCILTYGMMSSNVYYYTRMMSQLFLDTPVSKTEKTNFKTLSSMEDFWKFTEGSLLDGLYWKMQPSNQTEADNRSFIFYENLLLGVPRIRQLRVRNGSCSIPQDLRDEIKECYDVYSVSSEDRAPFGPRNGTAWIYTSEKDLNGSSHWGIIATYSGAGYYLDLSRTREETAAQVASLKKNVWLDRGTRATFIDFSVYNANINLFCVVRLLVEFPATGGVIPSWQFQPLKLIRYVTTFDFFLAACEIIFCFFIFYYVVEEILEIRIHKLHYFRSFWNCLDVVIVVLSVVAIGINIYRTSNVEVLLQFLEDQNTFPNFEHLAYWQIQFNNIAAVTVFFVWIKLFKFINFNRTMSQLSTTMSRCAKDLFGFAIMFFIIFLAYAQLAYLVFGTQVDDFSTFQECIFTQFRIILGDINFAEIEEANRVLGPIYFTTFVFFMFFILLNMFLAIINDTYSEVKSDLAQQKAEMELSDLIRKGY +>DASH_6A70_B||227||1129 +LRFRRLLVAELQRGFFDKHIWLSIWDRPPRSRFTRIQRATCCVLLICLFLGANAVWYGAVGDSAYSTGHVSRLSPLSVDTVAVGLVSSVVVYPVYLAILFLFRMSRSKVAGSPSPTPAGQQVLDIDSCLDSSVLDSSFLTFSGLHAEQAFVGQMKSDLFLDDSKSLVCWPSGEGTLSWPDLLSDPSIVGSNLRQLARGQAGHGLGPEEDGFSLASPYSPAKSFSASDEDLIQQVLAEGVSSPAPTQDTHMETDLLSSLSSTPGEKTETLALQRLGELGPPSPGLNWEQPQAARLSRTGLVEGLRKRLLPAWCASLAHGLSLLLVAVAVAVSGWVGASFPPGVSVAWLLSSSASFLASFLGWEPLKVLLEALYFSLVAKRLHPDEDDTLVESPAVTPVSARVPRVRPPHGFALFLAKEEARKVKRLHGMLRSLLVYMLFLLVTLLASYGDASCHGHAYRLQSAIKQELHSRAFLAITRSEELWPWMAHVLLPYVHGNQSSPELGPPRLRQVRLQEALYPDPPGPRVHTCSAAGGFSTSDYDVGWESPHNGSGTWAYSAPDLLGAWSWGSCAVYDSGGYVQELGLSLEESRDRLRFLQLHNWLDNRSRAVFLELTRYSPAVGLHAAVTLRLEFPAAGRALAALSVRPFALRRLSAGLSLPLLTSVCLLLFAVHFAVAEARTWHREGRWRVLRLGAWARWLLVALTAATALVRLAQLGAADRQWTRFVRGRPRRFTSFDQVAQLSSAARGLAASLLFLLLVKAAQQLRFVRQWSVFGKTLCRALPELLGVTLGLVVLGVAYAQLAILLVSSCVDSLWSVAQALLVLCPGTGLSTLCPAESWHLSPLLCVGLWALRLWGALRLGAVILRWRYHALRGELYRPAWEPQDYEMVELFLRRLRLWMGLSK +>DASH_6AEI_A||287||663 +LAKLKVAIKYHQKEFVAQPNCQQLLATLWYDGFPGWRRKHWVVKLLTCMTIGFLFPMLSIAYLISPRSNLGLFIKKPFIKFICHTASYLTFLFMLLLASQHIVRTDLHVQGPPPTVVEWMILPWVLGFIWGEIKEMWDGGFTEYIHDWWNLMDFAMNSLYLATISLKIVAYVKYNGSRPREEWEMWHPTLIAEALFAISNILSSLRLISLFTANSHLGPLQISLGRMLLDILKFLFIYCLVLLAFANGLNQLYFYYETRAIDEPNNCKGIRCEKQNNAFSTLFETLQSLFWSVFGLLNLYVTNVKARHEFTEFVGATMFGTYNVISLVVLLNMLIAMMNNSYQLIADHADIEWKFARTKLWMSYFDEGGTLPPPFNI +>DASH_6AEI_A||290||664 +LKVAIKYHQKEFVAQPNCQQLLATLWYDGFPGWRRKHWVVKLLTCMTIGFLFPMLSIAYLISPRSNLGLFIKKPFIKFICHTASYLTFLFMLLLASQHIVRTDLHVQGPPPTVVEWMILPWVLGFIWGEIKEMWDGGFTEYIHDWWNLMDFAMNSLYLATISLKIVAYVKYNGSRPREEWEMWHPTLIAEALFAISNILSSLRLISLFTANSHLGPLQISLGRMLLDILKFLFIYCLVLLAFANGLNQLYFYYETRAIDEPNNCKGIRCEKQNNAFSTLFETLQSLFWSVFGLLNLYVTNVKARHEFTEFVGATMFGTYNVISLVVLLNMLIAMMNNSYQLIADHADIEWKFARTKLWMSYFDEGGTLPPPFNII +>DASH_6AEI_A||291||667 +KVAIKYHQKEFVAQPNCQQLLATLWYDGFPGWRRKHWVVKLLTCMTIGFLFPMLSIAYLISPRSNLGLFIKKPFIKFICHTASYLTFLFMLLLASQHIVRTDLHVQGPPPTVVEWMILPWVLGFIWGEIKEMWDGGFTEYIHDWWNLMDFAMNSLYLATISLKIVAYVKYNGSRPREEWEMWHPTLIAEALFAISNILSSLRLISLFTANSHLGPLQISLGRMLLDILKFLFIYCLVLLAFANGLNQLYFYYETRAIDEPNNCKGIRCEKQNNAFSTLFETLQSLFWSVFGLLNLYVTNVKARHEFTEFVGATMFGTYNVISLVVLLNMLIAMMNNSYQLIADHADIEWKFARTKLWMSYFDEGGTLPPPFNIIPSP +>DASH_6AYF_A||19||546 +HEEENRCNFNQQTSPSEELLLEDQMRRKLKFFFMNPCEKFWARGRKPWKLAIQILKIAMVTIQLVLFGLSNQMVVAFKEENTIAFKHLFLKGYMDRMDDTYAVYTQSDVYDQLIFAVNQYLQLYNVSVGNHAYENKGTKQSAMAICQHFYKRGNIYPGNDTFDIDPEIETECFFVEPDEPFHIGTPAENKLNLTLDFHRLLTVELQFKLKAINLQTVRHQELPDCYDFTLTITFDNKAHSGRIKISLDNDISIRECKDWHVSGSIQKNTHYMMIFDAFVILTCLVSLILCIRSVIRGLQLQQEFVNFFLLHYKKEVSVSDQMEFVNGWYIMIIISDILTIIGSILKMEIQAKSLTSYDVCSILLGTSTMLVWLGVIRYLGFFAKYNLLILTLQAALPNVIRFCCCAAMIYLGYCFCGWIVLGPYHDKFRSLNMVSECLFSLINGDDMFATFAKMQQKSYLVWLFSRIYLYSFISLFIYMILSLFIALITDTYETIKQYQQDGFPETELRTFISECKDLPNSGKYRLED +>DASH_6BBJ_A||424||746 +GEEVSVLEILVYNSKVENRHEMLAVEPINELLRDKWQKFGAVSFYISVVSYLIAMIIFTLIAYYRPMDGTPPYPYRTTMDYMRLAGEIVTLLTGVVFFITNIKDLFMKKCPGVNSLFIDGSFQLLYFIYSVLVIITAVLYLVGIESYLAVMVFALVLGWMNALYFTRGLKLTGTYSIMLQKILFKDLFRFLLVYLLFMIGYASALVSLLNPCTSQESCIETSSNCTVPEYPSCRDSSTFSKFLLDLFKLTIGMGDLEMINSAKYPAVFIILLVTYIILTFVLLLNMLIALMGETVGQVSKESKQIWKLQWATTILDIERSFPV +>DASH_6BBJ_A||428||750 +SVLEILVYNSKVENRHEMLAVEPINELLRDKWQKFGAVSFYISVVSYLIAMIIFTLIAYYRPMDGTPPYPYRTTMDYMRLAGEIVTLLTGVVFFITNIKDLFMKKCPGVNSLFIDGSFQLLYFIYSVLVIITAVLYLVGIESYLAVMVFALVLGWMNALYFTRGLKLTGTYSIMLQKILFKDLFRFLLVYLLFMIGYASALVSLLNPCTSQESCIETSSNCTVPEYPSCRDSSTFSKFLLDLFKLTIGMGDLEMINSAKYPAVFIILLVTYIILTFVLLLNMLIALMGETVGQVSKESKQIWKLQWATTILDIERSFPVCMRK +>DASH_6BCO_A||735||1081 +PPGTVEPSAKVALERRQRRRPGRALCCGKFSKRWSDFWGAPVTAFLGNVVSYLLFLLLFAHVLLVDFQPTKPSVSELLLYFWAFTLLCEELRQGLGGGWGSLASGGRGPDRAPLRHRLHLYLSDTWNQCDLLALTCFLLGVGCRLTPGLFDLGRTVLCLDFMIFTLRLLHIFTVNKQLGPKIVIVSKMMKDVFFFLFFLCVWLVAYGVATEGILRPQDRSLPSILRRVFYRPYLQIFGQIPQEEMDVALMIPGNCSMERGSWAHPEGPVAGSCVSQYANWLVVLLLIVFLLVANILLLNLLIAMFSYTFSKVHGNSDLYWKAQRYSLIREFHSRPALAPPLIIISHV +>DASH_6BO5_A||314||642 +NSVLEIIAFHCKSPNRHRMVVLEPLNKLLQEKWDRLVSRFFFNFACYLVYMFIFTVVAYHQPSLDQPAIPSSKATFGESMLLLGHILILLGGIYLLLGQLWYFWRRRLFIWISFMDSYFEILFLLQALLTVLSQVLRFMETEWYLPLLVLSLVLGWLNLLYYTRGFQHTGIYSVMIQKVILRDLLRFLLVYLVFLFGFAVALVSLSREARSPKAPEDNNSTVTEQPTVGQEEEPAPYRSILDASLELFKFTIGMGELAFQEQLRFRGVVLLLLLAYVLLTYVLLLNMLIALMSETVNHVADNSWSIWKLQKAISVLEMENGYWWCRRKK +>DASH_6BO8_A||288||614 +DEQSLLELIITTKKREARQILDQTPVKELVSLKWKRYGRPYFCMLGAIYLLYIICFTMCCIYRPLKPRTNNRTSPRDNTLLQQKLLQEAYMTPKDDIRLVGELVTVIGAIIILLVEVPDIFRMGVTRFFGQTILGGPFHVLIITYAFMVLVTMVMRLISASGEVVPMSFALVLGWCNVMYFARGFQMLGPFTIMIQKMIFGDLMRFCWLMAVVILGFASAFYIIFQTEDPEELGHFYDYPMALFSTFELFLTIIDGPANYNVDLPFMYSITYAAFAIIATLLMLNLLIAMMGDTHWRVAHERDELWRAQIVATTVMLERKLPRCLWP +>DASH_6BO8_A||384||609 +IRLVGELVTVIGAIIILLVEVPDIFRMGVTRFFGQTILGGPFHVLIITYAFMVLVTMVMRLISASGEVVPMSFALVLGWCNVMYFARGFQMLGPFTIMIQKMIFGDLMRFCWLMAVVILGFASAFYIIFQTEDPEELGHFYDYPMALFSTFELFLTIIDGPANYNVDLPFMYSITYAAFAIIATLLMLNLLIAMMGDTHWRVAHERDELWRAQIVATTVMLERKLP +>DASH_6BPQ_A||570||883 +IILCLFFFPLIGCGFISFRKKPVEKTKKLFLYYVSFFTSPFVVFSWNVIFYIAFLLLFAYVLLMDFQKEPTALEIILYVLVFILLCDEVRQWYMNGSKYFSDLWNVMDTLAIFYFIAGIVFRLHSDESSWYSGRVIFCLDYIVFTLRLIHIFTVSRNLGPKIIMLQRMMIDVFFFLFLFAVWMVAFGVARQGILRKNEHRWEWIFRSVIYEPYLAMFGQYPDDIDGTTYNFDHCTFSGNESKPLCVELDANNQPRFPEWITIPLVCIYMLSTNILLVNLLVAMFGYTVGSVQENNDQVWKFQRFFLVQEYCSRL +>DASH_6BQR_A||661||1011 +PVGTADPAEKTPLGVPRQSGRPGCCGGRCGGRRCLRRWFHFWGAPVTIFMGNVVSYLLFLLLFSRVLLVDFQPAPPGSLELLLYFWAFTLLCEELRQGLSGGGGSLASGGPGPGHASLSQRLRLYLADSWNQCDLVALTCFLLGVGCRLTPGLYHLGRTVLCIDFMVFTVRLLHIFTVNKQLGPKIVIVSKMMKDVFFFLFFLGVWLVAYGVATEGLLRPRDSDFPSILRRVFYRPYLQIFGQIPQEDMDVALMEHSNCSSEPGFWAHPPGAQAGTCVSQYANWLVVLLLVIFLLVANILLVNLLIAMFSYTFGKVQGNSDLYWKAQRYRLIREFHSRPALAPPFIVISHL +>DASH_6BQV_A||739||1089 +PVGTADPAEKTPLGVPRQSGRPGCCGGRCGGRRCLRRWFHFWGAPVTIFMGNVVSYLLFLLLFSRVLLVDFQPAPPGSLELLLYFWAFTLLCEELRQGLSGGGGSLASGGPGPGHASLSQRLRLYLADSWNQCDLVALTCFLLGVGCRLTPGLYHLGRTVLCIDFMVFTVRLLHIFTVNKQLGPKIVIVSKMMKDVFFFLFFLGVWLVAYGVATEGLLRPRDSDFPSILRRVFYRPYLQIFGQIPQEDMDVALMEHSNCSSEPGFWAHPPGAQAGTCVSQYANWLVVLLLVIFLLVANILLVNLLIAMFSYTFGKVQGNSDLYWKAQRYRLIREFHSRPALAPPFIVISHL +>DASH_6BWD_A||470||712 +FKEVKILDSSDGKNEMEIHIKSKKLPITRKFYAFYHAPIVKFWFNTLAYLGFLMLYTFVVLVKMEQLPSVQEWIVIAYIFTYAIEKVREVFMSEAGKISQKIKVWFSDYFNVSDTIAIISFFVGFGLRFGAKWNYINAYDNHVFVAGRLIYCLNIIFWYVRLLDFLAVNQQAGPYVMMIGKMVANMFYIVVIMALVLLSFGVPRKAILYPHEEPSWSLAKDIVFHPYWMIFGEVYAYEIDVCA +>DASH_6BWD_A||476||787 +LDSSDGKNEMEIHIKSKKLPITRKFYAFYHAPIVKFWFNTLAYLGFLMLYTFVVLVKMEQLPSVQEWIVIAYIFTYAIEKVREVFMSEAGKISQKIKVWFSDYFNVSDTIAIISFFVGFGLRFGAKWNYINAYDNHVFVAGRLIYCLNIIFWYVRLLDFLAVNQQAGPYVMMIGKMVANMFYIVVIMALVLLSFGVPRKAILYPHEEPSWSLAKDIVFHPYWMIFGEVYAYEIDVCANDSTLPTICGPGTWLTPFLQAVYLFVQYIIMVNLLIAFFNNVYLQVKAISNIVWKYQRYHFIMAYHEKPVLPPPL +>DASH_6BWD_A||477||790 +DSSDGKNEMEIHIKSKKLPITRKFYAFYHAPIVKFWFNTLAYLGFLMLYTFVVLVKMEQLPSVQEWIVIAYIFTYAIEKVREVFMSEAGKISQKIKVWFSDYFNVSDTIAIISFFVGFGLRFGAKWNYINAYDNHVFVAGRLIYCLNIIFWYVRLLDFLAVNQQAGPYVMMIGKMVANMFYIVVIMALVLLSFGVPRKAILYPHEEPSWSLAKDIVFHPYWMIFGEVYAYEIDVCANDSTLPTICGPGTWLTPFLQAVYLFVQYIIMVNLLIAFFNNVYLQVKAISNIVWKYQRYHFIMAYHEKPVLPPPLIIL +>DASH_6BWD_A||480||793 +DGKNEMEIHIKSKKLPITRKFYAFYHAPIVKFWFNTLAYLGFLMLYTFVVLVKMEQLPSVQEWIVIAYIFTYAIEKVREVFMSEAGKISQKIKVWFSDYFNVSDTIAIISFFVGFGLRFGAKWNYINAYDNHVFVAGRLIYCLNIIFWYVRLLDFLAVNQQAGPYVMMIGKMVANMFYIVVIMALVLLSFGVPRKAILYPHEEPSWSLAKDIVFHPYWMIFGEVYAYEIDVCANDSTLPTICGPGTWLTPFLQAVYLFVQYIIMVNLLIAFFNNVYLQVKAISNIVWKYQRYHFIMAYHEKPVLPPPLIILSHI +>DASH_6BWD_A||483||904 +NEMEIHIKSKKLPITRKFYAFYHAPIVKFWFNTLAYLGFLMLYTFVVLVKMEQLPSVQEWIVIAYIFTYAIEKVREVFMSEAGKISQKIKVWFSDYFNVSDTIAIISFFVGFGLRFGAKWNYINAYDNHVFVAGRLIYCLNIIFWYVRLLDFLAVNQQAGPYVMMIGKMVANMFYIVVIMALVLLSFGVPRKAILYPHEEPSWSLAKDIVFHPYWMIFGEVYAYEIDVCANDSTLPTICGPGTWLTPFLQAVYLFVQYIIMVNLLIAFFNNVYLQVKAISNIVWKYQRYHFIMAYHEKPVLPPPLIILSHIVSLFCCVCKRRKKDKTSDGPKLFLTEEDQKKLHDFEEQCVEMYFDEKDDKFNSGSEERIRVTFERVEQMSIQIKEVGDRVNYIKRSLQSLDSQIGHLQDLSALTVDTLKTL +>DASH_6BWI_A||536||886 +PVGTADPAEKTPLGVPRQSGRPGCCGGRCGGRRCLRRWFHFWGAPVTIFMGNVVSYLLFLLLFSRVLLVDFQPAPPGSLELLLYFWAFTLLCEELRQGLSGGGGSLASGGPGPGHASLSQRLRLYLADSWNQCDLVALTCFLLGVGCRLTPGLYHLGRTVLCIDFMVFTVRLLHIFTVNKQLGPKIVIVSKMMKDVFFFLFFLGVWLVAYGVATEGLLRPRDSDFPSILRRVFYRPYLQIFGQIPQEDMDVALMEHSNCSSEPGFWAHPPGAQAGTCVSQYANWLVVLLLVIFLLVANILLVNLLIAMFSYTFGKVQGNSDLYWKAQRYRLIREFHSRPALAPPFIVISHL +>DASH_6BWJ_A||351||680 +NSVLEIIAFHSRSPHRHRMVVLEPLNKLLQAKWDRLIPRFCFNFLCYLVYMLIFTAVAYHQPALEKQGFPPLKATAGNSMLLLGHILILLGGVYLLLGQLWYFWRRRLFIWISFMDSYSEILFLLQALLTVLSQVLCFLAIEWYLPLLVSSLVMGWTNLLYYTRGFQHTGIYSVMIEKVILRDLLRFLLVYLVFLFGFAVALVSLSREAQNSRTPAGPNATEVGQPGAGQEDEAPPYRSILDASLELFKFTIGMGELAFQEQLRFRGVVLLLLLAYVLLTYVLLLNMLIALMSETVNSVATDSWSIWKLQKAISVLEMENGYWWCRRKKQ +>DASH_6C8G_A||293||615 +GEEVSVLEILVYNSKVENRHEMLAVEPINELLRDKWQKFGAVSFYISVVSYLIAMIIFTLIAYYRPMDGTPPYPYRTTMDYMRLAGEIVTLLTGVVFFITNIKDLFMKKCPGVNSLFIDGSFQLLYFIYSVLVIITAVLYLVGIESYLAVMVFALVLGWMNALYFTRGLKLTGTYSIMLQKILFKDLFRFLLVYLLFMIGYASALVSLLNPCTSQESCIETSSQCTVPEYPSCRDSSTFSKFLLDLFKLTIGMGDLEMINSAKYPAVFIILLVTYIILTFVLLLNMLIALMGETVGQVSKESKQIWKLQWATTILDIERSFPV +>DASH_6C8G_A||294||617 +EEVSVLEILVYNSKVENRHEMLAVEPINELLRDKWQKFGAVSFYISVVSYLIAMIIFTLIAYYRPMDGTPPYPYRTTMDYMRLAGEIVTLLTGVVFFITNIKDLFMKKCPGVNSLFIDGSFQLLYFIYSVLVIITAVLYLVGIESYLAVMVFALVLGWMNALYFTRGLKLTGTYSIMLQKILFKDLFRFLLVYLLFMIGYASALVSLLNPCTSQESCIETSSQCTVPEYPSCRDSSTFSKFLLDLFKLTIGMGDLEMINSAKYPAVFIILLVTYIILTFVLLLNMLIALMGETVGQVSKESKQIWKLQWATTILDIERSFPVCM +>DASH_6C8G_A||295||619 +EVSVLEILVYNSKVENRHEMLAVEPINELLRDKWQKFGAVSFYISVVSYLIAMIIFTLIAYYRPMDGTPPYPYRTTMDYMRLAGEIVTLLTGVVFFITNIKDLFMKKCPGVNSLFIDGSFQLLYFIYSVLVIITAVLYLVGIESYLAVMVFALVLGWMNALYFTRGLKLTGTYSIMLQKILFKDLFRFLLVYLLFMIGYASALVSLLNPCTSQESCIETSSQCTVPEYPSCRDSSTFSKFLLDLFKLTIGMGDLEMINSAKYPAVFIILLVTYIILTFVLLLNMLIALMGETVGQVSKESKQIWKLQWATTILDIERSFPVCMRK +>DASH_6C8G_A||419||661 +FIYSVLVIITAVLYLVGIESYLAVMVFALVLGWMNALYFTRGLKLTGTYSIMLQKILFKDLFRFLLVYLLFMIGYASALVSLLNPCTSQESCIETSSQCTVPEYPSCRDSSTFSKFLLDLFKLTIGMGDLEMINSAKYPAVFIILLVTYIILTFVLLLNMLIALMGETVGQVSKESKQIWKLQWATTILDIERSFPVCMRKAFRSGEMVTVGKNLDGTPDRRWCFRVDEVNWSHWNQNLGIIN +>DASH_6CO7_A||808||1043 +EIREDDSMEVIMRNKKLGFCDRIMHFYSAPFSKFVGNVVGYLAFIFLYAYVVLFNFPRFDPAKTLGGIHPTEIVLYFWVFTILIEEIRQLAAKPPKYIKDKVSVYFSDTWNFVDIFSLTVFIIAIILRFFTNSRIFTASRIILSLDIIFFIVRSLQIFSVNRLLGPKLVMIQKMMQDLAQFIIILAVFTIAYGIALHAVMFPSPGIYARNNTWVTITSVVQYPYWQMYGELFLDEI +>DASH_6CO7_A||810||1124 +REDDSMEVIMRNKKLGFCDRIMHFYSAPFSKFVGNVVGYLAFIFLYAYVVLFNFPRFDPAKTLGGIHPTEIVLYFWVFTILIEEIRQLAAKPPKYIKDKVSVYFSDTWNFVDIFSLTVFIIAIILRFFTNSRIFTASRIILSLDIIFFIVRSLQIFSVNRLLGPKLVMIQKMMQDLAQFIIILAVFTIAYGIALHAVMFPSPGIYARNNTWVTITSVVQYPYWQMYGELFLDEIQGEKPKEFGEVDPDGRWLSPLLLAIYMVFTNILLLNLLIAIFNYTFERVQEDSDKVWKFQRYDLVQEYHSRPVFAPPLVLL diff --git a/software/mafft/mafft-linux64/mafftdir/libexec/disttbfast b/software/mafft/mafft-linux64/mafftdir/libexec/disttbfast index 95d397f0..4d950cf7 100755 Binary files a/software/mafft/mafft-linux64/mafftdir/libexec/disttbfast and b/software/mafft/mafft-linux64/mafftdir/libexec/disttbfast differ diff --git a/software/mafft/mafft-linux64/mafftdir/libexec/dndblast b/software/mafft/mafft-linux64/mafftdir/libexec/dndblast index 2dc2c6e0..b4b112d0 100755 Binary files a/software/mafft/mafft-linux64/mafftdir/libexec/dndblast and b/software/mafft/mafft-linux64/mafftdir/libexec/dndblast differ diff --git a/software/mafft/mafft-linux64/mafftdir/libexec/dndfast7 b/software/mafft/mafft-linux64/mafftdir/libexec/dndfast7 index 51faee70..0ee51697 100755 Binary files a/software/mafft/mafft-linux64/mafftdir/libexec/dndfast7 and b/software/mafft/mafft-linux64/mafftdir/libexec/dndfast7 differ diff --git a/software/mafft/mafft-linux64/mafftdir/libexec/dndpre b/software/mafft/mafft-linux64/mafftdir/libexec/dndpre index d2ff7554..4b27abed 100755 Binary files a/software/mafft/mafft-linux64/mafftdir/libexec/dndpre and b/software/mafft/mafft-linux64/mafftdir/libexec/dndpre differ diff --git a/software/mafft/mafft-linux64/mafftdir/libexec/dvtditr b/software/mafft/mafft-linux64/mafftdir/libexec/dvtditr index 370e90a1..115b2ec7 100755 Binary files a/software/mafft/mafft-linux64/mafftdir/libexec/dvtditr and b/software/mafft/mafft-linux64/mafftdir/libexec/dvtditr differ diff --git a/software/mafft/mafft-linux64/mafftdir/libexec/f2cl b/software/mafft/mafft-linux64/mafftdir/libexec/f2cl index 286dad87..d740559a 100755 Binary files a/software/mafft/mafft-linux64/mafftdir/libexec/f2cl and b/software/mafft/mafft-linux64/mafftdir/libexec/f2cl differ diff --git a/software/mafft/mafft-linux64/mafftdir/libexec/filter b/software/mafft/mafft-linux64/mafftdir/libexec/filter new file mode 100755 index 00000000..f0ad2ead Binary files /dev/null and b/software/mafft/mafft-linux64/mafftdir/libexec/filter differ diff --git a/software/mafft/mafft-linux64/mafftdir/libexec/getlag b/software/mafft/mafft-linux64/mafftdir/libexec/getlag index 863dc5e7..7c4ea918 100755 Binary files a/software/mafft/mafft-linux64/mafftdir/libexec/getlag and b/software/mafft/mafft-linux64/mafftdir/libexec/getlag differ diff --git a/software/mafft/mafft-win/usr/share/misc/magic b/software/mafft/mafft-linux64/mafftdir/libexec/hat3 similarity index 100% rename from software/mafft/mafft-win/usr/share/misc/magic rename to software/mafft/mafft-linux64/mafftdir/libexec/hat3 diff --git a/software/mafft/mafft-linux64/mafftdir/libexec/mafft-distance b/software/mafft/mafft-linux64/mafftdir/libexec/mafft-distance index 32b8d77a..be7f77a2 100755 Binary files a/software/mafft/mafft-linux64/mafftdir/libexec/mafft-distance and b/software/mafft/mafft-linux64/mafftdir/libexec/mafft-distance differ diff --git a/software/mafft/mafft-linux64/mafftdir/libexec/mafft-profile b/software/mafft/mafft-linux64/mafftdir/libexec/mafft-profile index 00dc0d19..73936551 100755 Binary files a/software/mafft/mafft-linux64/mafftdir/libexec/mafft-profile and b/software/mafft/mafft-linux64/mafftdir/libexec/mafft-profile differ diff --git a/software/mafft/mafft-linux64/mafftdir/libexec/makedirectionlist b/software/mafft/mafft-linux64/mafftdir/libexec/makedirectionlist index a03893f6..89f3ccb3 100755 Binary files a/software/mafft/mafft-linux64/mafftdir/libexec/makedirectionlist and b/software/mafft/mafft-linux64/mafftdir/libexec/makedirectionlist differ diff --git a/software/mafft/mafft-linux64/mafftdir/libexec/mccaskillwrap b/software/mafft/mafft-linux64/mafftdir/libexec/mccaskillwrap index 69bb00a1..85bd1665 100755 Binary files a/software/mafft/mafft-linux64/mafftdir/libexec/mccaskillwrap and b/software/mafft/mafft-linux64/mafftdir/libexec/mccaskillwrap differ diff --git a/software/mafft/mafft-linux64/mafftdir/libexec/multi2hat3s b/software/mafft/mafft-linux64/mafftdir/libexec/multi2hat3s index 11aab741..4b06fcf4 100755 Binary files a/software/mafft/mafft-linux64/mafftdir/libexec/multi2hat3s and b/software/mafft/mafft-linux64/mafftdir/libexec/multi2hat3s differ diff --git a/software/mafft/mafft-linux64/mafftdir/libexec/nodepair b/software/mafft/mafft-linux64/mafftdir/libexec/nodepair index bd38ad41..581fefea 100755 Binary files a/software/mafft/mafft-linux64/mafftdir/libexec/nodepair and b/software/mafft/mafft-linux64/mafftdir/libexec/nodepair differ diff --git a/software/mafft/mafft-linux64/mafftdir/libexec/pairash b/software/mafft/mafft-linux64/mafftdir/libexec/pairash index e46be817..307a0534 100755 Binary files a/software/mafft/mafft-linux64/mafftdir/libexec/pairash and b/software/mafft/mafft-linux64/mafftdir/libexec/pairash differ diff --git a/software/mafft/mafft-linux64/mafftdir/libexec/pairlocalalign b/software/mafft/mafft-linux64/mafftdir/libexec/pairlocalalign index d303df10..475cba76 100755 Binary files a/software/mafft/mafft-linux64/mafftdir/libexec/pairlocalalign and b/software/mafft/mafft-linux64/mafftdir/libexec/pairlocalalign differ diff --git a/software/mafft/mafft-linux64/mafftdir/libexec/regtable2seq b/software/mafft/mafft-linux64/mafftdir/libexec/regtable2seq index 5a2b5082..45e90d17 100755 Binary files a/software/mafft/mafft-linux64/mafftdir/libexec/regtable2seq and b/software/mafft/mafft-linux64/mafftdir/libexec/regtable2seq differ diff --git a/software/mafft/mafft-linux64/mafftdir/libexec/replaceu b/software/mafft/mafft-linux64/mafftdir/libexec/replaceu index b3ed3503..fcd6ab7e 100755 Binary files a/software/mafft/mafft-linux64/mafftdir/libexec/replaceu and b/software/mafft/mafft-linux64/mafftdir/libexec/replaceu differ diff --git a/software/mafft/mafft-linux64/mafftdir/libexec/restoreu b/software/mafft/mafft-linux64/mafftdir/libexec/restoreu index 40271f91..ff6bbb5f 100755 Binary files a/software/mafft/mafft-linux64/mafftdir/libexec/restoreu and b/software/mafft/mafft-linux64/mafftdir/libexec/restoreu differ diff --git a/software/mafft/mafft-linux64/mafftdir/libexec/score b/software/mafft/mafft-linux64/mafftdir/libexec/score index c0ec883b..c815992f 100755 Binary files a/software/mafft/mafft-linux64/mafftdir/libexec/score and b/software/mafft/mafft-linux64/mafftdir/libexec/score differ diff --git a/software/mafft/mafft-linux64/mafftdir/libexec/seq2regtable b/software/mafft/mafft-linux64/mafftdir/libexec/seq2regtable index 46df62a8..4b2b440e 100755 Binary files a/software/mafft/mafft-linux64/mafftdir/libexec/seq2regtable and b/software/mafft/mafft-linux64/mafftdir/libexec/seq2regtable differ diff --git a/software/mafft/mafft-linux64/mafftdir/libexec/setcore b/software/mafft/mafft-linux64/mafftdir/libexec/setcore index 866273b2..30472e54 100755 Binary files a/software/mafft/mafft-linux64/mafftdir/libexec/setcore and b/software/mafft/mafft-linux64/mafftdir/libexec/setcore differ diff --git a/software/mafft/mafft-linux64/mafftdir/libexec/setdirection b/software/mafft/mafft-linux64/mafftdir/libexec/setdirection index 529e48cb..95b15080 100755 Binary files a/software/mafft/mafft-linux64/mafftdir/libexec/setdirection and b/software/mafft/mafft-linux64/mafftdir/libexec/setdirection differ diff --git a/software/mafft/mafft-linux64/mafftdir/libexec/sextet5 b/software/mafft/mafft-linux64/mafftdir/libexec/sextet5 index 4fd249ea..0df7bbc5 100755 Binary files a/software/mafft/mafft-linux64/mafftdir/libexec/sextet5 and b/software/mafft/mafft-linux64/mafftdir/libexec/sextet5 differ diff --git a/software/mafft/mafft-linux64/mafftdir/libexec/splittbfast b/software/mafft/mafft-linux64/mafftdir/libexec/splittbfast index 9428a0ce..c35e2881 100755 Binary files a/software/mafft/mafft-linux64/mafftdir/libexec/splittbfast and b/software/mafft/mafft-linux64/mafftdir/libexec/splittbfast differ diff --git a/software/mafft/mafft-linux64/mafftdir/libexec/tbfast b/software/mafft/mafft-linux64/mafftdir/libexec/tbfast index a96ad5bb..1ab0e605 100755 Binary files a/software/mafft/mafft-linux64/mafftdir/libexec/tbfast and b/software/mafft/mafft-linux64/mafftdir/libexec/tbfast differ diff --git a/software/mafft/mafft-linux64/mafftdir/libexec/version b/software/mafft/mafft-linux64/mafftdir/libexec/version index 53cc6716..e30b3a86 100755 Binary files a/software/mafft/mafft-linux64/mafftdir/libexec/version and b/software/mafft/mafft-linux64/mafftdir/libexec/version differ diff --git a/software/mafft/mafft-mac/mafftdir/bin/mafft b/software/mafft/mafft-mac/mafftdir/bin/mafft index 3bd3a2b1..91b5fc97 100755 --- a/software/mafft/mafft-mac/mafftdir/bin/mafft +++ b/software/mafft/mafft-mac/mafftdir/bin/mafft @@ -1,7 +1,7 @@ #! /bin/bash er=0; myself=`dirname "$0"`/`basename "$0"`; export myself -version="v7.467 (2020/May/14)"; export version +version="v7.490 (2021/Oct/30)"; export version LANG=C; export LANG os=`uname` progname=`basename "$0"` @@ -257,6 +257,7 @@ laraparams="/dev/null" foldalignopt=" " treealg=" -X 0.1 " sueff="1.0" +maxambiguous="1.0" scoreoutarg=" " numthreads=0 numthreadsit=-1 @@ -293,6 +294,7 @@ enrich=0 # ato de kezuru enrichseq=0 # ato de kezuru enrichstr=0 # ato de kezuru seektarget="" # ato de kezuru +dashserver="https://sysimm.org/dash/REST1.0/" newdash=0 newdash_originalsequenceonly=0 exclude_ho=0 @@ -405,6 +407,9 @@ if [ $# -gt 0 ]; then shift sueff="$1" treealg=" -X $1" + elif [ "$1" = "--maxambiguous" ]; then + shift + maxambiguous="$1" elif [ "$1" = "--noscore" ]; then scorecalcopt=" -Z " elif [ "$1" = "--6mermultipair" ]; then @@ -946,8 +951,15 @@ if [ $# -gt 0 ]; then # seektarget="-str" elif [ "$1" = "--dash" ]; then newdash=1 - distance="global" # can be override - iterate=3 # can be override + if [ "$distance" != "local" -a "$distance" != "localgenaf" ]; then # 2021/Oct + distance="global" + fi + if [ "$iterate" -eq 0 ]; then # 2021/Oct + iterate=3 + fi + elif [ "$1" = "--dashserver" ]; then + shift + dashserver="$1" elif [ "$1" = "--originalseqonly" ]; then newdash_originalsequenceonly=1 elif [ "$1" = "--excludehomologs" ]; then # works with --dash only @@ -1030,7 +1042,7 @@ if [ $# -gt 0 ]; then TMPFILE=`cygpath -w "$TMPFILE"` # necessary to pass path to f2cl on cyswin, somehow unnecessary in msys. fi - umask 077 +# umask 077 # 2021/Jan # mkdir "$TMPFILE" || er=1 @@ -1046,7 +1058,7 @@ function removetmpfile() { # for MPI if [ $debug -eq 1 ]; then # trap "tar cfvz debuginfo.tgz $TMPFILE; rm -rf $TMPFILE " 0 # does not work in msys # trap "tar cfv - $TMPFILE | gzip -c > debuginfo.tgz; rm -rf $TMPFILE " 0 15 - trap "tar cfv - \"$TMPFILE\" | gzip -c > debuginfo.tgz; removetmpfile" 0 15 + trap "popd > /dev/null 2>&1; tar cfv - \"$TMPFILE\" | gzip -c > debuginfo.tgz; removetmpfile" 0 15 else # trap "rm -rf $TMPFILE" 0 15 trap "removetmpfile" 0 15 @@ -1065,8 +1077,16 @@ function removetmpfile() { # for MPI cat "$1" | tr "\r" "\n" > "$TMPFILE/infile" echo "" >> "$TMPFILE/infile" - cat "$addfile" | tr "\r" "\n" | grep -v "^$" >> "$TMPFILE/infile" cat "$addfile" | tr "\r" "\n" | grep -v "^$" > "$TMPFILE/_addfile" + + if [ $maxambiguous != "1.0" ]; then + mv "$TMPFILE/infile" "$TMPFILE/_tofilter" + "$prefix/filter" -m $maxambiguous $seqtype -i "$TMPFILE/_tofilter" > "$TMPFILE/infile" 2>>"$progressfile" || exit 1 + mv "$TMPFILE/_addfile" "$TMPFILE/_tofilter" + "$prefix/filter" -m $maxambiguous $seqtype -i "$TMPFILE/_tofilter" > "$TMPFILE/_addfile" 2>>"$progressfile" || exit 1 + fi + cat "$TMPFILE/_addfile" >> "$TMPFILE/infile" + cat "$scorematrix" | tr "\r" "\n" | grep -v "^$" > "$TMPFILE/_aamtx" cat "$mergetable" | tr "\r" "\n" | grep -v "^$" > "$TMPFILE/_subalignmentstable" cat "$treeinfile" | tr "\r" "\n" | grep -v "^$" > "$TMPFILE/_guidetree" @@ -1077,11 +1097,19 @@ function removetmpfile() { # for MPI cat "$anchorfile" | tr "\r" "\n" | grep -v "^$" > "$TMPFILE/_externalanchors" SAVEIFS=$IFS - IFS='\n' - filelist="$1\n$addfile\n$scorematrix\n$mergetable\n$treeinfile\n$seedtablefile\n$laraparams\n$pdblist\n$ownlist" +# Fixed a bug: 'n' was misinterpreted as delimiter, 2020/Jun/19 + IFS=$'\n' + filelist="$1 +$addfile +$scorematrix +$mergetable +$treeinfile +$seedtablefile +$laraparams +$pdblist +$ownlist" for f in $filelist; do - # $f="" demo mudani file ga hashiru. - file "$f" 2>/dev/null | grep "UTF-[13][62]" >& /dev/null && printf "$f: UTF-16 or UTF-32? Convert this file to ASCII\n\n" 1>&2 && exit 1; + file "$f" 2>/dev/null | grep -e 'UTF-16' -e 'UTF-32' >& /dev/null && printf "$f: UTF-16 or UTF-32? Convert this file to ASCII\n\n" 1>&2 && exit 1; done IFS=$SAVEIFS @@ -1095,7 +1123,7 @@ function removetmpfile() { # for MPI shift if [ -r "$1" ]; then cat "$1" | tr "\r" "\n" > "$TMPFILE/seed$#" - file "$1" | grep "UTF-[13][62]" >& /dev/null && printf "$1: UTF-16 or UTF-32? Convert this file to ASCII\n\n" 1>&2 && exit 1; + file "$1" | grep -e 'UTF-16' -e 'UTF-32' >& /dev/null && printf "$1: UTF-16 or UTF-32? Convert this file to ASCII\n\n" 1>&2 && exit 1; else echo "$0": Cannot open "$1". 1>&2 echo "" 1>&2 @@ -1182,7 +1210,9 @@ function removetmpfile() { # for MPI exit 1; fi + if [ "$addarg0" != " " ]; then + # iterate=0 # 2013/03/23 -> commented out, 2017/12 "$prefix/countlen" < "$TMPFILE/_addfile" > "$TMPFILE/addsize" 2>>"$progressfile" nadd=`awk '{print $1}' "$TMPFILE/addsize"` @@ -1297,6 +1327,13 @@ function removetmpfile() { # for MPI echo "npair = " $npair 1>>"$progressfile" echo "nseq = " $nseq 1>>"$progressfile" echo "nlen = " $nlen 1>>"$progressfile" + + if [ $norg -eq 0 ]; then + echo "" 1>>"$progressfile" + echo "The reference sequence was removed because of ambiguous letters?" 1>>"$progressfile" + echo "" 1>>"$progressfile" + exit 1; + fi # nagasa check! # if [ $npair -gt 10000000 -o $nlen -gt 5000 ]; then # 2017/Oct @@ -1319,6 +1356,11 @@ function removetmpfile() { # for MPI exit 1; fi + if [ `awk "BEGIN {print( 0.0+\"$maxambiguous\" < 0.0 || 0.0+\"$maxambiguous\" > 1.0 )}"` -gt 0 ]; then + printf "\n%s\n\n" "The argument of --maxambiguous must be between 0.0 and 1.0" 1>>"$progressfile" + exit 1; + fi + if [ $allowshift -eq 1 ]; then if [ $unalignspecified -ne 1 ]; then unalignlevel="0.8" @@ -1937,6 +1979,7 @@ function removetmpfile() { # for MPI fi + if [ $nadd -gt "0" ]; then if [ $fragment -eq "1" ]; then addarg="$addarg0 $nadd -g -0.01" @@ -1964,12 +2007,13 @@ function removetmpfile() { # for MPI bunkatsuopt=" -B " # fftnsi demo bunktasu shinai if [ "$add2ndhalfarg" != " " ]; then if [ $auto -eq 1 -o $iterate -gt 0 ]; then - echo '' 1>>"$progressfile" - echo 'The --keeplength and --mapout options are not supported' 1>>"$progressfile" - echo 'with the --auto or --maxiterate >0 options.' 1>>"$progressfile" - echo 'Use the --maxiterate 0 option (= progressive method).' 1>>"$progressfile" - echo '' 1>>"$progressfile" - exit 1 +# echo '' 1>>"$progressfile" +# echo 'The --keeplength and --mapout options are not supported' 1>>"$progressfile" +# echo 'with the --auto or --maxiterate >0 options.' 1>>"$progressfile" +# echo 'Use the --maxiterate 0 option (= progressive method).' 1>>"$progressfile" +# echo '' 1>>"$progressfile" +# exit 1 + iterate=0 fi fi fi @@ -2171,6 +2215,7 @@ function removetmpfile() { # for MPI # echo "iterate = " $iterate 1>>"$progressfile" # echo "cycle = " $cycle 1>>"$progressfile" + if [ $anysymbol -eq 1 ]; then mv infile orig "$prefix/replaceu" $seqtype -i orig > infile 2>>"$progressfile" || exit 1 @@ -2317,7 +2362,8 @@ function removetmpfile() { # for MPI if [ $anysymbol -eq 1 ]; then mv orig infile # replaceu wo mukouka fi - sed 's/-//g' infile > dashin # gap nozoku +# sed 's/-//g' infile > dashin # gap nozoku + awk '{if(/^>/)print; else {gsub( /-/,"" ); print;}}' infile > dashin if [ ! -x "$prefix/dash_client" -o ! -x "$prefix/dash_client" ]; then echo "" 1>&2 @@ -2332,11 +2378,11 @@ function removetmpfile() { # for MPI fi echo "Calling DASH (https://sysimm.org/dash/)" 1>>"$progressfile" - "$prefix/dash_client" -i dashin -sequences dashsequences -hat3 hat3.seed 1>>"$progressfile" + "$prefix/dash_client" -url "$dashserver" -i dashin -sequences dashsequences -hat3 hat3.seed 1>>"$progressfile" dashres="$?" if [ $dashres -ne "0" ]; then echo "Error in DASH" 1>>"$progressfile" - echo "To use this feature, compiled with" 1>>"$progressfile" + echo "To enable this feature, compile with DASH_CLIENT=dash_client. Go compiler is necessary." 1>>"$progressfile" exit 1; fi @@ -2908,8 +2954,8 @@ BEGIN { while( 1 ) { go = 0; - printf( "OK?\n" ) > "/dev/tty"; - printf( "@ [Y] " ) > "/dev/tty"; + printf( "Type Y or just enter to run this command.\n" ) > "/dev/tty"; + printf( "@ " ) > "/dev/tty"; res = getline < "/dev/tty"; close( "/dev/tty" ); if( res == 0 ) diff --git a/software/mafft/mafft-mac/mafftdir/libexec/addsingle b/software/mafft/mafft-mac/mafftdir/libexec/addsingle index 57b73aca..1ff0f8f6 100755 Binary files a/software/mafft/mafft-mac/mafftdir/libexec/addsingle and b/software/mafft/mafft-mac/mafftdir/libexec/addsingle differ diff --git a/software/mafft/mafft-mac/mafftdir/libexec/contrafoldwrap b/software/mafft/mafft-mac/mafftdir/libexec/contrafoldwrap index f0f8cf14..1402942a 100755 Binary files a/software/mafft/mafft-mac/mafftdir/libexec/contrafoldwrap and b/software/mafft/mafft-mac/mafftdir/libexec/contrafoldwrap differ diff --git a/software/mafft/mafft-mac/mafftdir/libexec/countlen b/software/mafft/mafft-mac/mafftdir/libexec/countlen index b0c91349..84789f2a 100755 Binary files a/software/mafft/mafft-mac/mafftdir/libexec/countlen and b/software/mafft/mafft-mac/mafftdir/libexec/countlen differ diff --git a/software/mafft/mafft-mac/mafftdir/libexec/dash_client b/software/mafft/mafft-mac/mafftdir/libexec/dash_client index 63e661f6..86d8de74 100755 Binary files a/software/mafft/mafft-mac/mafftdir/libexec/dash_client and b/software/mafft/mafft-mac/mafftdir/libexec/dash_client differ diff --git a/software/mafft/mafft-mac/mafftdir/libexec/disttbfast b/software/mafft/mafft-mac/mafftdir/libexec/disttbfast index 1f60f3d9..8a1c0147 100755 Binary files a/software/mafft/mafft-mac/mafftdir/libexec/disttbfast and b/software/mafft/mafft-mac/mafftdir/libexec/disttbfast differ diff --git a/software/mafft/mafft-mac/mafftdir/libexec/dndblast b/software/mafft/mafft-mac/mafftdir/libexec/dndblast index 4ea56bd8..dafe7731 100755 Binary files a/software/mafft/mafft-mac/mafftdir/libexec/dndblast and b/software/mafft/mafft-mac/mafftdir/libexec/dndblast differ diff --git a/software/mafft/mafft-mac/mafftdir/libexec/dndfast7 b/software/mafft/mafft-mac/mafftdir/libexec/dndfast7 index 996680a4..66ebce1c 100755 Binary files a/software/mafft/mafft-mac/mafftdir/libexec/dndfast7 and b/software/mafft/mafft-mac/mafftdir/libexec/dndfast7 differ diff --git a/software/mafft/mafft-mac/mafftdir/libexec/dndpre b/software/mafft/mafft-mac/mafftdir/libexec/dndpre index 8d46f8b8..4fe0bc7b 100755 Binary files a/software/mafft/mafft-mac/mafftdir/libexec/dndpre and b/software/mafft/mafft-mac/mafftdir/libexec/dndpre differ diff --git a/software/mafft/mafft-mac/mafftdir/libexec/dvtditr b/software/mafft/mafft-mac/mafftdir/libexec/dvtditr index 2403a287..d72a3d38 100755 Binary files a/software/mafft/mafft-mac/mafftdir/libexec/dvtditr and b/software/mafft/mafft-mac/mafftdir/libexec/dvtditr differ diff --git a/software/mafft/mafft-mac/mafftdir/libexec/f2cl b/software/mafft/mafft-mac/mafftdir/libexec/f2cl index 323a4c0a..461549c1 100755 Binary files a/software/mafft/mafft-mac/mafftdir/libexec/f2cl and b/software/mafft/mafft-mac/mafftdir/libexec/f2cl differ diff --git a/software/mafft/mafft-mac/mafftdir/libexec/filter b/software/mafft/mafft-mac/mafftdir/libexec/filter new file mode 100755 index 00000000..f5c6dd2e Binary files /dev/null and b/software/mafft/mafft-mac/mafftdir/libexec/filter differ diff --git a/software/mafft/mafft-mac/mafftdir/libexec/getlag b/software/mafft/mafft-mac/mafftdir/libexec/getlag index c6e6dd77..71cdce71 100755 Binary files a/software/mafft/mafft-mac/mafftdir/libexec/getlag and b/software/mafft/mafft-mac/mafftdir/libexec/getlag differ diff --git a/software/mafft/mafft-mac/mafftdir/libexec/hex2maffttext b/software/mafft/mafft-mac/mafftdir/libexec/hex2maffttext index 1caa9eb4..d5a9601f 100755 Binary files a/software/mafft/mafft-mac/mafftdir/libexec/hex2maffttext and b/software/mafft/mafft-mac/mafftdir/libexec/hex2maffttext differ diff --git a/software/mafft/mafft-mac/mafftdir/libexec/mafft-distance b/software/mafft/mafft-mac/mafftdir/libexec/mafft-distance index c2bbd579..fcd8f50e 100755 Binary files a/software/mafft/mafft-mac/mafftdir/libexec/mafft-distance and b/software/mafft/mafft-mac/mafftdir/libexec/mafft-distance differ diff --git a/software/mafft/mafft-mac/mafftdir/libexec/mafft-profile b/software/mafft/mafft-mac/mafftdir/libexec/mafft-profile index c096fc9e..ecb9e93c 100755 Binary files a/software/mafft/mafft-mac/mafftdir/libexec/mafft-profile and b/software/mafft/mafft-mac/mafftdir/libexec/mafft-profile differ diff --git a/software/mafft/mafft-mac/mafftdir/libexec/maffttext2hex b/software/mafft/mafft-mac/mafftdir/libexec/maffttext2hex index ca59092d..f33f5b60 100755 Binary files a/software/mafft/mafft-mac/mafftdir/libexec/maffttext2hex and b/software/mafft/mafft-mac/mafftdir/libexec/maffttext2hex differ diff --git a/software/mafft/mafft-mac/mafftdir/libexec/makedirectionlist b/software/mafft/mafft-mac/mafftdir/libexec/makedirectionlist index 2fe6d014..53db353a 100755 Binary files a/software/mafft/mafft-mac/mafftdir/libexec/makedirectionlist and b/software/mafft/mafft-mac/mafftdir/libexec/makedirectionlist differ diff --git a/software/mafft/mafft-mac/mafftdir/libexec/mccaskillwrap b/software/mafft/mafft-mac/mafftdir/libexec/mccaskillwrap index 5feaf984..ddf86a43 100755 Binary files a/software/mafft/mafft-mac/mafftdir/libexec/mccaskillwrap and b/software/mafft/mafft-mac/mafftdir/libexec/mccaskillwrap differ diff --git a/software/mafft/mafft-mac/mafftdir/libexec/multi2hat3s b/software/mafft/mafft-mac/mafftdir/libexec/multi2hat3s index ad9c4944..5599e514 100755 Binary files a/software/mafft/mafft-mac/mafftdir/libexec/multi2hat3s and b/software/mafft/mafft-mac/mafftdir/libexec/multi2hat3s differ diff --git a/software/mafft/mafft-mac/mafftdir/libexec/nodepair b/software/mafft/mafft-mac/mafftdir/libexec/nodepair index 68a7de9a..acf55903 100755 Binary files a/software/mafft/mafft-mac/mafftdir/libexec/nodepair and b/software/mafft/mafft-mac/mafftdir/libexec/nodepair differ diff --git a/software/mafft/mafft-mac/mafftdir/libexec/pairash b/software/mafft/mafft-mac/mafftdir/libexec/pairash index ccb35c20..b64df32f 100755 Binary files a/software/mafft/mafft-mac/mafftdir/libexec/pairash and b/software/mafft/mafft-mac/mafftdir/libexec/pairash differ diff --git a/software/mafft/mafft-mac/mafftdir/libexec/pairlocalalign b/software/mafft/mafft-mac/mafftdir/libexec/pairlocalalign index c9ba3921..80e7b591 100755 Binary files a/software/mafft/mafft-mac/mafftdir/libexec/pairlocalalign and b/software/mafft/mafft-mac/mafftdir/libexec/pairlocalalign differ diff --git a/software/mafft/mafft-mac/mafftdir/libexec/regtable2seq b/software/mafft/mafft-mac/mafftdir/libexec/regtable2seq index ec8760d3..b5c4af47 100755 Binary files a/software/mafft/mafft-mac/mafftdir/libexec/regtable2seq and b/software/mafft/mafft-mac/mafftdir/libexec/regtable2seq differ diff --git a/software/mafft/mafft-mac/mafftdir/libexec/replaceu b/software/mafft/mafft-mac/mafftdir/libexec/replaceu index ddaf3095..4e7bbbf8 100755 Binary files a/software/mafft/mafft-mac/mafftdir/libexec/replaceu and b/software/mafft/mafft-mac/mafftdir/libexec/replaceu differ diff --git a/software/mafft/mafft-mac/mafftdir/libexec/restoreu b/software/mafft/mafft-mac/mafftdir/libexec/restoreu index ce5759fa..a8eeac63 100755 Binary files a/software/mafft/mafft-mac/mafftdir/libexec/restoreu and b/software/mafft/mafft-mac/mafftdir/libexec/restoreu differ diff --git a/software/mafft/mafft-mac/mafftdir/libexec/score b/software/mafft/mafft-mac/mafftdir/libexec/score index d1be07fb..66977105 100755 Binary files a/software/mafft/mafft-mac/mafftdir/libexec/score and b/software/mafft/mafft-mac/mafftdir/libexec/score differ diff --git a/software/mafft/mafft-mac/mafftdir/libexec/seq2regtable b/software/mafft/mafft-mac/mafftdir/libexec/seq2regtable index 37f6b675..f283d554 100755 Binary files a/software/mafft/mafft-mac/mafftdir/libexec/seq2regtable and b/software/mafft/mafft-mac/mafftdir/libexec/seq2regtable differ diff --git a/software/mafft/mafft-mac/mafftdir/libexec/setcore b/software/mafft/mafft-mac/mafftdir/libexec/setcore index b1080b89..ba798fea 100755 Binary files a/software/mafft/mafft-mac/mafftdir/libexec/setcore and b/software/mafft/mafft-mac/mafftdir/libexec/setcore differ diff --git a/software/mafft/mafft-mac/mafftdir/libexec/setdirection b/software/mafft/mafft-mac/mafftdir/libexec/setdirection index 144e1fdb..1f06ecf4 100755 Binary files a/software/mafft/mafft-mac/mafftdir/libexec/setdirection and b/software/mafft/mafft-mac/mafftdir/libexec/setdirection differ diff --git a/software/mafft/mafft-mac/mafftdir/libexec/sextet5 b/software/mafft/mafft-mac/mafftdir/libexec/sextet5 index 20cdebcd..25d8d165 100755 Binary files a/software/mafft/mafft-mac/mafftdir/libexec/sextet5 and b/software/mafft/mafft-mac/mafftdir/libexec/sextet5 differ diff --git a/software/mafft/mafft-mac/mafftdir/libexec/splittbfast b/software/mafft/mafft-mac/mafftdir/libexec/splittbfast index 9fc1b85f..14741baf 100755 Binary files a/software/mafft/mafft-mac/mafftdir/libexec/splittbfast and b/software/mafft/mafft-mac/mafftdir/libexec/splittbfast differ diff --git a/software/mafft/mafft-mac/mafftdir/libexec/tbfast b/software/mafft/mafft-mac/mafftdir/libexec/tbfast index 98ce5aca..174b5772 100755 Binary files a/software/mafft/mafft-mac/mafftdir/libexec/tbfast and b/software/mafft/mafft-mac/mafftdir/libexec/tbfast differ diff --git a/software/mafft/mafft-mac/mafftdir/libexec/version b/software/mafft/mafft-mac/mafftdir/libexec/version index cd9f5c91..de766d27 100755 Binary files a/software/mafft/mafft-mac/mafftdir/libexec/version and b/software/mafft/mafft-mac/mafftdir/libexec/version differ diff --git a/software/mafft/mafft-win/mafft-signed.ps1 b/software/mafft/mafft-win/mafft-signed.ps1 deleted file mode 100755 index f09c3c70..00000000 --- a/software/mafft/mafft-win/mafft-signed.ps1 +++ /dev/null @@ -1,176 +0,0 @@ -Write-Host; -Write-Host Preparing environment to run MAFFT on Windows. -Write-Host This may take a while, if real-time scanning by anti-virus software is on. - -Set-Item Env:Path "/usr/bin;$Env:Path" -Set-Item Env:MAFFT_BINARIES "/usr/lib/mafft" -Set-Item Env:TMPDIR "$Env:TMP" -Set-Item Env:MAFFT_TMPDIR "$Env:TMP" -Set-Item Env:mafft_working_dir "$PWD" - -#Set-Item Env:TMPDIR "/tmp" -#Set-Item Env:MAFFT_TMPDIR "/tmp" -# If you do not have write permission for standard temporary folder -# (typically C:\Users\username\AppData\Local\Temp\), then -# uncomment (remove #) the above two lines to use an alternative -# temporary folder. - -#$ROOTDIR=$PSScriptRoot # not supported by powershell versions <= 2 -$ROOTDIR=Split-Path -Parent $MyInvocation.MyCommand.Path -$proc = Start-Process -Wait -NoNewWindow -PassThru -FilePath "$ROOTDIR\usr\bin\bash.exe" -ArgumentList "'/usr/bin/mafft' $args" -exit $proc.ExitCode - -# SIG # Begin signature block -# MIIcVQYJKoZIhvcNAQcCoIIcRjCCHEICAQExCzAJBgUrDgMCGgUAMGkGCisGAQQB -# gjcCAQSgWzBZMDQGCisGAQQBgjcCAR4wJgIDAQAABBAfzDtgWUsITrck0sYpfvNR -# AgEAAgEAAgEAAgEAAgEAMCEwCQYFKw4DAhoFAAQUqI6m13VqWCFoXSfDfv8r86zz -# q4KgggmVMIIElDCCA3ygAwIBAgIOSBtqBybS6D8mAtSCWs0wDQYJKoZIhvcNAQEL -# BQAwTDEgMB4GA1UECxMXR2xvYmFsU2lnbiBSb290IENBIC0gUjMxEzARBgNVBAoT -# Ckdsb2JhbFNpZ24xEzARBgNVBAMTCkdsb2JhbFNpZ24wHhcNMTYwNjE1MDAwMDAw -# WhcNMjQwNjE1MDAwMDAwWjBaMQswCQYDVQQGEwJCRTEZMBcGA1UEChMQR2xvYmFs -# U2lnbiBudi1zYTEwMC4GA1UEAxMnR2xvYmFsU2lnbiBDb2RlU2lnbmluZyBDQSAt -# IFNIQTI1NiAtIEczMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAjYVV -# I6kfU6/J7TbCKbVu2PlC9SGLh/BDoS/AP5fjGEfUlk6Iq8Zj6bZJFYXx2Zt7G/3Y -# SsxtToZAF817ukcotdYUQAyG7h5LM/MsVe4hjNq2wf6wTjquUZ+lFOMQ5pPK+vld -# sZCH7/g1LfyiXCbuexWLH9nDoZc1QbMw/XITrZGXOs5ynQYKdTwfmOPLGC+MnwhK -# kQrZ2TXZg5J2Yl7fg67k1gFOzPM8cGFYNx8U42qgr2v02dJsLBkwXaBvUt/RnMng -# Ddl1EWWW2UO0p5A5rkccVMuxlW4l3o7xEhzw127nFE2zGmXWhEpX7gSvYjjFEJtD -# jlK4PrauniyX/4507wIDAQABo4IBZDCCAWAwDgYDVR0PAQH/BAQDAgEGMB0GA1Ud -# JQQWMBQGCCsGAQUFBwMDBggrBgEFBQcDCTASBgNVHRMBAf8ECDAGAQH/AgEAMB0G -# A1UdDgQWBBQPOueslJF0LZYCc4OtnC5JPxmqVDAfBgNVHSMEGDAWgBSP8Et/qC5F -# JK5NUPpjmove4t0bvDA+BggrBgEFBQcBAQQyMDAwLgYIKwYBBQUHMAGGImh0dHA6 -# Ly9vY3NwMi5nbG9iYWxzaWduLmNvbS9yb290cjMwNgYDVR0fBC8wLTAroCmgJ4Yl -# aHR0cDovL2NybC5nbG9iYWxzaWduLmNvbS9yb290LXIzLmNybDBjBgNVHSAEXDBa -# MAsGCSsGAQQBoDIBMjAIBgZngQwBBAEwQQYJKwYBBAGgMgFfMDQwMgYIKwYBBQUH -# AgEWJmh0dHBzOi8vd3d3Lmdsb2JhbHNpZ24uY29tL3JlcG9zaXRvcnkvMA0GCSqG -# SIb3DQEBCwUAA4IBAQAVhCgM7aHDGYLbYydB18xjfda8zzabz9JdTAKLWBoWCHqx -# mJl/2DOKXJ5iCprqkMLFYwQL6IdYBgAHglnDqJQy2eAUTaDVI+DH3brwaeJKRWUt -# TUmQeGYyDrBowLCIsI7tXAb4XBBIPyNzujtThFKAzfCzFcgRCosFeEZZCNS+t/9L -# 9ZxqTJx2ohGFRYzUN+5Q3eEzNKmhHzoL8VZEim+zM9CxjtEMYAfuMsLwJG+/r/uB -# AXZnxKPo4KvcM1Uo42dHPOtqpN+U6fSmwIHRUphRptYCtzzqSu/QumXSN4NTS35n -# fIxA9gccsK8EBtz4bEaIcpzrTp3DsLlUo7lOl8oUMIIE+TCCA+GgAwIBAgIMFc8a -# jh5+hrY2+89QMA0GCSqGSIb3DQEBCwUAMFoxCzAJBgNVBAYTAkJFMRkwFwYDVQQK -# ExBHbG9iYWxTaWduIG52LXNhMTAwLgYDVQQDEydHbG9iYWxTaWduIENvZGVTaWdu -# aW5nIENBIC0gU0hBMjU2IC0gRzMwHhcNMTgwNjEyMDcyOTM2WhcNMTkwNzE2MDU0 -# MDA3WjBzMQswCQYDVQQGEwJKUDEOMAwGA1UECBMFT3Nha2ExDjAMBgNVBAcTBVN1 -# aXRhMRkwFwYDVQQKExBPc2FrYSBVbml2ZXJzaXR5MQ4wDAYDVQQLEwVJRlJlQzEZ -# MBcGA1UEAxMQT3Nha2EgVW5pdmVyc2l0eTCCASIwDQYJKoZIhvcNAQEBBQADggEP -# ADCCAQoCggEBALewrK03sLqADU9LkvAwx8r28L2zo+qUROEz9oNZW6xYPylSzEYp -# 7RRAYbwQA6rlIHtW8A8/+SjA4fNwcy9qGBiu1OZJmeX4sLZzr48rUFRKyZqfraIo -# ILT8GVNZBMW39K78ZIP+1eTXjcBXyX2Ri2BIEH0fgtYO/q9Hz3NLZkWv2InxZvkk -# Xh8khFAPwDSI6IEy5K972kyfp5liilDzBVBQmYve3O8lMNTpg6PseWxruBkr0kQs -# FXzHLJ+tNVQGRHPF81xMGD8YPhI1IySUgDZXqU98u+EB5uBfXNyMgv9PuTy/8zx/ -# mlAMbVMvwWBL9y+fcZIfYHuKI/swraWHt7kCAwEAAaOCAaQwggGgMA4GA1UdDwEB -# /wQEAwIHgDCBlAYIKwYBBQUHAQEEgYcwgYQwSAYIKwYBBQUHMAKGPGh0dHA6Ly9z -# ZWN1cmUuZ2xvYmFsc2lnbi5jb20vY2FjZXJ0L2dzY29kZXNpZ25zaGEyZzNvY3Nw -# LmNydDA4BggrBgEFBQcwAYYsaHR0cDovL29jc3AyLmdsb2JhbHNpZ24uY29tL2dz -# Y29kZXNpZ25zaGEyZzMwVgYDVR0gBE8wTTBBBgkrBgEEAaAyATIwNDAyBggrBgEF -# BQcCARYmaHR0cHM6Ly93d3cuZ2xvYmFsc2lnbi5jb20vcmVwb3NpdG9yeS8wCAYG -# Z4EMAQQBMAkGA1UdEwQCMAAwPwYDVR0fBDgwNjA0oDKgMIYuaHR0cDovL2NybC5n -# bG9iYWxzaWduLmNvbS9nc2NvZGVzaWduc2hhMmczLmNybDATBgNVHSUEDDAKBggr -# BgEFBQcDAzAdBgNVHQ4EFgQUz880T9hEerWAjdohhwXpJTk1A2swHwYDVR0jBBgw -# FoAUDzrnrJSRdC2WAnODrZwuST8ZqlQwDQYJKoZIhvcNAQELBQADggEBAE5mwpcK -# Xp/0Ira6wn74jx3CK70qtlC0q++Dfmc8TJvkfzWZQgnbAeLFwsDVcGzDAQ+LUl/Y -# TzgBWR4loyv2L4NWG02z9jpwoKnKYvw4+3y2oqSWqUprUqSO0cl1WO7YRfgFLQ8Q -# udFWHvdrsFcCowDKlHAvN0q5nfLR7imclVhHwTAn0ydXpvDYcsqZSu4WSSTi0VBk -# Li32Ch7xANr6kTKoP3kSc4qo/GWDpRuQ7kTym2B17p9kdylwsAYTtlYAL3cqd5a8 -# Z8jFHECC2n4yNN7H/ZkyG/g/wpbeM9Gh1plPed+8CtzFB3tdYXghRezmCOILLlb5 -# T4SfajNQvchmuGAxghIqMIISJgIBATBqMFoxCzAJBgNVBAYTAkJFMRkwFwYDVQQK -# ExBHbG9iYWxTaWduIG52LXNhMTAwLgYDVQQDEydHbG9iYWxTaWduIENvZGVTaWdu -# aW5nIENBIC0gU0hBMjU2IC0gRzMCDBXPGo4efoa2NvvPUDAJBgUrDgMCGgUAoHAw -# EAYKKwYBBAGCNwIBDDECMAAwGQYJKoZIhvcNAQkDMQwGCisGAQQBgjcCAQQwHAYK -# KwYBBAGCNwIBCzEOMAwGCisGAQQBgjcCARUwIwYJKoZIhvcNAQkEMRYEFBRU4nWu -# yEz++xp0jM+X0oCnjMBWMA0GCSqGSIb3DQEBAQUABIIBABMR+aFvaFANzFRPfG38 -# R/AplM4WYdMxh/3uDWNMV1mcVYVBMK3+zB0RXzSPIf1m9VoVDYHmjMnn9WEZukJC -# PH3v97H1KZdWj5QiuTB2+8EEzhQq3YojeUgQ7t11sC86IOIo93bXfbhWrt2C3ZJH -# NeEgdRLx6VBgG6BgwS8xbdRyrr5j+6p1rrMPlsMTdc4UsqU/AFbiBFKAD42OJ55x -# HFgOMnm7MVYIAn2zY7yopqHoNYMd9HdPs7YsRSsNp6lBZxfTyHxg5WfTML9xlw1j -# M/Dd8K5jdjUjKyeSHLvWnSsK52umbrhfr9fB5r3gVnAihEdJA7c9SSqftdc46mD5 -# 0KyhghAjMIIQHwYKKwYBBAGCNwMDATGCEA8wghALBgkqhkiG9w0BBwKggg/8MIIP -# +AIBAzEPMA0GCWCGSAFlAwQCAQUAMIHmBgsqhkiG9w0BCRABBKCB1gSB0zCB0AIB -# AQYJKwYBBAGgMgIDMDEwDQYJYIZIAWUDBAIBBQAEIBzVNuZO6bYKQYkG2U2bXfv6 -# xRgjuqIqX83afNZJ3kMYAg4BZYmsk78AAAAAALHeJhgTMjAxOTA1MjEwODQyMzAu -# NjU0WjADAgEBoGOkYTBfMQswCQYDVQQGEwJKUDEcMBoGA1UEChMTR01PIEdsb2Jh -# bFNpZ24gSy5LLjEyMDAGA1UEAxMpR2xvYmFsU2lnbiBUU0EgZm9yIEFkdmFuY2Vk -# IC0gRzMgLSAwMDMtMDKgggxqMIIE6jCCA9KgAwIBAgIMUGf6Rs5s/pUVpp6yMA0G -# CSqGSIb3DQEBCwUAMFsxCzAJBgNVBAYTAkJFMRkwFwYDVQQKExBHbG9iYWxTaWdu -# IG52LXNhMTEwLwYDVQQDEyhHbG9iYWxTaWduIFRpbWVzdGFtcGluZyBDQSAtIFNI -# QTI1NiAtIEcyMB4XDTE4MDYxNDEwMDAwMFoXDTI5MDMxODEwMDAwMFowXzELMAkG -# A1UEBhMCSlAxHDAaBgNVBAoTE0dNTyBHbG9iYWxTaWduIEsuSy4xMjAwBgNVBAMT -# KUdsb2JhbFNpZ24gVFNBIGZvciBBZHZhbmNlZCAtIEczIC0gMDAzLTAyMIIBIjAN -# BgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAyPnKCDjJ248NqgMvZFZv5OK9o365 -# HS6YQtac8NSD3Dz+MstX8Zkx3I45s0JISWN/SzDV1ESFJ0GSOfpQNVM3wvuGI43T -# 6SGOtZYeCFioA85bseoZslrJnGe7gLcxG9qGx7t4KTQfE4xqFZHLsgx/jfkLvIa8 -# FcfxUNSAvs2RmmiQfPzmXQ3h6K7JL7ghe2TI26IHA/mN9ItnV43C8EHY34VEHkZ2 -# SJOy3B1IlsIKRixt4QDIGmVJep8L6sKvMnD8HrfV+NeAnwqPQmalyXQoZEsq+ls4 -# aIEtFAUgzhVhDxuXJTgWOU59bSAvrxxGwd9AlnwZvzlFuW5cHel2F3dHEwIDAQAB -# o4IBqDCCAaQwDgYDVR0PAQH/BAQDAgeAMEwGA1UdIARFMEMwQQYJKwYBBAGgMgEe -# MDQwMgYIKwYBBQUHAgEWJmh0dHBzOi8vd3d3Lmdsb2JhbHNpZ24uY29tL3JlcG9z -# aXRvcnkvMAkGA1UdEwQCMAAwFgYDVR0lAQH/BAwwCgYIKwYBBQUHAwgwRgYDVR0f -# BD8wPTA7oDmgN4Y1aHR0cDovL2NybC5nbG9iYWxzaWduLmNvbS9ncy9nc3RpbWVz -# dGFtcGluZ3NoYTJnMi5jcmwwgZgGCCsGAQUFBwEBBIGLMIGIMEgGCCsGAQUFBzAC -# hjxodHRwOi8vc2VjdXJlLmdsb2JhbHNpZ24uY29tL2NhY2VydC9nc3RpbWVzdGFt -# cGluZ3NoYTJnMi5jcnQwPAYIKwYBBQUHMAGGMGh0dHA6Ly9vY3NwMi5nbG9iYWxz -# aWduLmNvbS9nc3RpbWVzdGFtcGluZ3NoYTJnMjAdBgNVHQ4EFgQUXS7r1FaeHBn5 -# WJwbtS/oyNt5HucwHwYDVR0jBBgwFoAUkiGnSpVdZLCbtB7mADdH5p1BK0wwDQYJ -# KoZIhvcNAQELBQADggEBACu34JGiT64w8FccYtF0BAJBjFxugoVSZC3JmEa6lp+j -# Q+EyzKAnguXZB6DOfm5vfFv4QdM2DnVWh4FdRBLj/Pf7ooZlINqgel5vcQsAkqmj -# ILQiohGOyA2cewUT5Fre6cPacgzJgZwXnnwFpG2wnMqkLnYmlmS8ZLJtIo/CsKnI -# kfKDNv8YUiwmrhuKM+bSoHXZMT2jQorYSassv9qGn3z5Gvey8tWTT7S8CR9LgCje -# sC/dsblwzY6w1lE/UdnffQ30iYc8IGFPfnfcXhl/0XAWBkDxHwdJLPIHoOPHJGBw -# h9cpuYf0KakLookYrSslTm/7m4uLrvmYlh1ajtzePrAwggQVMIIC/aADAgECAgsE -# AAAAAAExicZQBDANBgkqhkiG9w0BAQsFADBMMSAwHgYDVQQLExdHbG9iYWxTaWdu -# IFJvb3QgQ0EgLSBSMzETMBEGA1UEChMKR2xvYmFsU2lnbjETMBEGA1UEAxMKR2xv -# YmFsU2lnbjAeFw0xMTA4MDIxMDAwMDBaFw0yOTAzMjkxMDAwMDBaMFsxCzAJBgNV -# BAYTAkJFMRkwFwYDVQQKExBHbG9iYWxTaWduIG52LXNhMTEwLwYDVQQDEyhHbG9i -# YWxTaWduIFRpbWVzdGFtcGluZyBDQSAtIFNIQTI1NiAtIEcyMIIBIjANBgkqhkiG -# 9w0BAQEFAAOCAQ8AMIIBCgKCAQEAqpuOw6sRUSUBtpaU4k/YwQj2RiPZRcWVl1ur -# Gr/SbFfJMwYfoA/GPH5TSHq/nYeer+7DjEfhQuzj46FKbAwXxKbBuc1b8R5EiY7+ -# C94hWBPuTcjFZwscsrPxNHaRossHbTfFoEcmAhWkkJGpeZ7X61edK3wi2BTX8Qce -# eCI2a3d5r6/5f45O4bUIMf3q7UtxYowj8QM5j0R5tnYDV56tLwhG3NKMvPSOdM7I -# aGlRdhGLD10kWxlUPSbMQI2CJxtZIH1Z9pOAjvgqOP1roEBlH1d2zFuOBE8sqNuE -# UBNPxtyLufjdaUyI65x7MCb8eli7WbwUcpKBV7d2ydiACoBuCQIDAQABo4HoMIHl -# MA4GA1UdDwEB/wQEAwIBBjASBgNVHRMBAf8ECDAGAQH/AgEAMB0GA1UdDgQWBBSS -# IadKlV1ksJu0HuYAN0fmnUErTDBHBgNVHSAEQDA+MDwGBFUdIAAwNDAyBggrBgEF -# BQcCARYmaHR0cHM6Ly93d3cuZ2xvYmFsc2lnbi5jb20vcmVwb3NpdG9yeS8wNgYD -# VR0fBC8wLTAroCmgJ4YlaHR0cDovL2NybC5nbG9iYWxzaWduLm5ldC9yb290LXIz -# LmNybDAfBgNVHSMEGDAWgBSP8Et/qC5FJK5NUPpjmove4t0bvDANBgkqhkiG9w0B -# AQsFAAOCAQEABFaCSnzQzsm/NmbRvjWek2yX6AbOMRhZ+WxBX4AuwEIluBjH/NSx -# N8RooM8oagN0S2OXhXdhO9cv4/W9M6KSfREfnops7yyw9GKNNnPRFjbxvF7stICY -# ePzSdnno4SGU4B/EouGqZ9uznHPlQCLPOc7b5neVp7uyy/YZhp2fyNSYBbJxb051 -# rvE9ZGo7Xk5GpipdCJLxo/MddL9iDSOMXCo4ldLA1c3PiNofKLW6gWlkKrWmotVz -# r9xG2wSukdduxZi61EfEVnSAR3hYjL7vK/3sbL/RlPe/UOB74JD9IBh4GCJdCC6M -# HKCX8x2ZfaOdkdMGRE4EbnocIOM28LZQuTCCA18wggJHoAMCAQICCwQAAAAAASFY -# UwiiMA0GCSqGSIb3DQEBCwUAMEwxIDAeBgNVBAsTF0dsb2JhbFNpZ24gUm9vdCBD -# QSAtIFIzMRMwEQYDVQQKEwpHbG9iYWxTaWduMRMwEQYDVQQDEwpHbG9iYWxTaWdu -# MB4XDTA5MDMxODEwMDAwMFoXDTI5MDMxODEwMDAwMFowTDEgMB4GA1UECxMXR2xv -# YmFsU2lnbiBSb290IENBIC0gUjMxEzARBgNVBAoTCkdsb2JhbFNpZ24xEzARBgNV -# BAMTCkdsb2JhbFNpZ24wggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDM -# JXaQeQZ4Ihb1wIO2hMoonv0FdhHFrYhy/EYCQ8eyip0EXyTLLkvhYIJG4VKrDIFH -# cGzdZNHr9SyjD4I9DCuul9e2FIYQebs7E4B3jAjhSdJqYi8fXvqWaN+JJ5U4nwbX -# PsnLJlkNc96wyOkmDoMVxu9bi9IEYMpJpij2aTv2y8gokeWdimFXN6x0FNx04Dru -# ci8unPvQu7/1PQDhBjPogiuuU6Y6FnOM3UEOIDrAtKeh6bJPkC4yYOlXy7kEkmho -# 5TgmYHWyn3f/kRTvriBJ/K1AFUjRAjFhGV64l++td7dkmnq/X8ET75ti+w1s4FRp -# FqkD2m7pg5NxdsZphYIXAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNVHRMB -# Af8EBTADAQH/MB0GA1UdDgQWBBSP8Et/qC5FJK5NUPpjmove4t0bvDANBgkqhkiG -# 9w0BAQsFAAOCAQEAS0DbwFCq/sgM7/eWVEVJu5YACUGssxOGhigHM8pr5nS5ugAt -# rqQK0/Xx8Q+Kv3NnSoPHRHt44K9ubG8DKY4zOUXDjuS5V2yq/BKW7FPGLeQkbLmU -# Y/vcU2hnVj6DuM81IcPJaP7O2sJTqsyQiunwXUaMld16WCgaLx3ezQA3QY/tRG3X -# UyiXfvNnBB4V14qWtNPeTCekTBtzc3b0F5nCH3oO4y0IrQocLP88q1UOD5F+NuvD -# V0m+4S4tfGCLw0FREyOdzvcya5QBqJnnLDMfOjsl0oZAzjsshnjJYS8Uuu7bVW/f -# hO4FCU29KNhyztNiUGUe65KXgzHZs7XKR1g/XzGCAokwggKFAgEBMGswWzELMAkG -# A1UEBhMCQkUxGTAXBgNVBAoTEEdsb2JhbFNpZ24gbnYtc2ExMTAvBgNVBAMTKEds -# b2JhbFNpZ24gVGltZXN0YW1waW5nIENBIC0gU0hBMjU2IC0gRzICDFBn+kbObP6V -# FaaesjANBglghkgBZQMEAgEFAKCB8DAaBgkqhkiG9w0BCQMxDQYLKoZIhvcNAQkQ -# AQQwLwYJKoZIhvcNAQkEMSIEIPIVPJdBTy7cKZA+XrSqloyPlzW3UATmJ02NxLxE -# HU5zMIGgBgsqhkiG9w0BCRACDDGBkDCBjTCBijCBhwQUe190nJDud78pQ3Xomb4K -# ZtCFB0wwbzBfpF0wWzELMAkGA1UEBhMCQkUxGTAXBgNVBAoTEEdsb2JhbFNpZ24g -# bnYtc2ExMTAvBgNVBAMTKEdsb2JhbFNpZ24gVGltZXN0YW1waW5nIENBIC0gU0hB -# MjU2IC0gRzICDFBn+kbObP6VFaaesjANBgkqhkiG9w0BAQEFAASCAQCiGLQOdvUl -# LoRipx/QB0JGJ3xYrRQ5O+HaX4B3GyCa4QLjRWrgmvp/5Es68txxwHLtBZLwOYck -# E8NfCtziQAe2xhRXBN1TkcY8488L7RDfnTOpTbpT6ZghgzLyonBcp1NpCUrL9/Ft -# qSNNhDg2w9kPgPWrrhDCqtzsMKVF6+ZR1GCT1SSxJpD7VUv0g5bBfmB1PHbl6oNS -# 4XQELjp32JQeKYu/GuqH2kDfYRbHUjZqhuMFmZChvMUuRJ9mhGL3SbsIMeN6yRRu -# I+Eu7jO2gYeVYz7SPqoiiG3XNU8haUMVIjRIX/10d5nV1m1uuhfGucnxPpSe3D2e -# pjzY5hjHLZ6r -# SIG # End signature block diff --git a/software/mafft/mafft-win/mafft.bat b/software/mafft/mafft-win/mafft.bat deleted file mode 100755 index c674ca60..00000000 --- a/software/mafft/mafft-win/mafft.bat +++ /dev/null @@ -1,33 +0,0 @@ -@echo off -setlocal enabledelayedexpansion -cls; 1>&2 -chcp 65001 1>&2 -for /f "usebackq tokens=*" %%i IN (`cd`) DO @set current_dir=%%i -if /i "%current_dir%" == "%systemroot%" ( - set mafft_working_dir="%~dp0" -) else ( - set mafft_working_dir="%current_dir%" -) -pushd "%mafft_working_dir%" - -echo; 1>&2 -echo Preparing environment to run MAFFT on Windows. 1>&2 -echo This may take a while, if real-time scanning by anti-virus software is on. 1>&2 - -set ROOTDIR=%~d0%~p0 -set PATH=/usr/bin/:%PATH% -set MAFFT_BINARIES=/usr/lib/mafft -set TMPDIR=%TMP% -set MAFFT_TMPDIR=%TMPDIR% - -REM set TMPDIR=%ROOTDIR%/tmp -REM set MAFFT_TMPDIR=%TMPDIR% -REM If you do not have write permission for Windows temporary folder -REM (typically C:\Users\username\AppData\Local\Temp\), then -REM uncomment (remove REM) the above two lines to use an alternative -REM temporary folder. - -"%ROOTDIR%\usr\bin\bash" "/usr/bin/mafft" %* - -popd -exit /b diff --git a/software/mafft/mafft-win/testdata.txt b/software/mafft/mafft-win/testdata.txt deleted file mode 100755 index bbba8ae0..00000000 --- a/software/mafft/mafft-win/testdata.txt +++ /dev/null @@ -1,4 +0,0 @@ ->1 -acdefghijklm ->2 -ghijkm diff --git a/software/mafft/mafft-win/usr/bin/awk.exe b/software/mafft/mafft-win/usr/bin/awk.exe deleted file mode 100755 index ba7929a7..00000000 Binary files a/software/mafft/mafft-win/usr/bin/awk.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/bin/basename.exe b/software/mafft/mafft-win/usr/bin/basename.exe deleted file mode 100755 index 3e63ef79..00000000 Binary files a/software/mafft/mafft-win/usr/bin/basename.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/bin/bash.exe b/software/mafft/mafft-win/usr/bin/bash.exe deleted file mode 100755 index 32149dab..00000000 Binary files a/software/mafft/mafft-win/usr/bin/bash.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/bin/cat.exe b/software/mafft/mafft-win/usr/bin/cat.exe deleted file mode 100755 index c98a8d70..00000000 Binary files a/software/mafft/mafft-win/usr/bin/cat.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/bin/chmod.exe b/software/mafft/mafft-win/usr/bin/chmod.exe deleted file mode 100755 index e026d85c..00000000 Binary files a/software/mafft/mafft-win/usr/bin/chmod.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/bin/comm.exe b/software/mafft/mafft-win/usr/bin/comm.exe deleted file mode 100755 index 759879ef..00000000 Binary files a/software/mafft/mafft-win/usr/bin/comm.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/bin/cp.exe b/software/mafft/mafft-win/usr/bin/cp.exe deleted file mode 100755 index 269ce23f..00000000 Binary files a/software/mafft/mafft-win/usr/bin/cp.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/bin/cut.exe b/software/mafft/mafft-win/usr/bin/cut.exe deleted file mode 100755 index 55496345..00000000 Binary files a/software/mafft/mafft-win/usr/bin/cut.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/bin/date.exe b/software/mafft/mafft-win/usr/bin/date.exe deleted file mode 100755 index be06d12e..00000000 Binary files a/software/mafft/mafft-win/usr/bin/date.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/bin/dirname.exe b/software/mafft/mafft-win/usr/bin/dirname.exe deleted file mode 100755 index 10c35f3b..00000000 Binary files a/software/mafft/mafft-win/usr/bin/dirname.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/bin/echo.exe b/software/mafft/mafft-win/usr/bin/echo.exe deleted file mode 100755 index a6263790..00000000 Binary files a/software/mafft/mafft-win/usr/bin/echo.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/bin/env.exe b/software/mafft/mafft-win/usr/bin/env.exe deleted file mode 100755 index 1a194a21..00000000 Binary files a/software/mafft/mafft-win/usr/bin/env.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/bin/expr.exe b/software/mafft/mafft-win/usr/bin/expr.exe deleted file mode 100755 index d26179ab..00000000 Binary files a/software/mafft/mafft-win/usr/bin/expr.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/bin/false.exe b/software/mafft/mafft-win/usr/bin/false.exe deleted file mode 100755 index 6a5a4ae0..00000000 Binary files a/software/mafft/mafft-win/usr/bin/false.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/bin/file.exe b/software/mafft/mafft-win/usr/bin/file.exe deleted file mode 100755 index af6dd183..00000000 Binary files a/software/mafft/mafft-win/usr/bin/file.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/bin/fold.exe b/software/mafft/mafft-win/usr/bin/fold.exe deleted file mode 100755 index 967a451a..00000000 Binary files a/software/mafft/mafft-win/usr/bin/fold.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/bin/grep.exe b/software/mafft/mafft-win/usr/bin/grep.exe deleted file mode 100755 index a31c8871..00000000 Binary files a/software/mafft/mafft-win/usr/bin/grep.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/bin/gzip.exe b/software/mafft/mafft-win/usr/bin/gzip.exe deleted file mode 100755 index 766dcfb3..00000000 Binary files a/software/mafft/mafft-win/usr/bin/gzip.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/bin/head.exe b/software/mafft/mafft-win/usr/bin/head.exe deleted file mode 100755 index efdeb1d4..00000000 Binary files a/software/mafft/mafft-win/usr/bin/head.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/bin/id.exe b/software/mafft/mafft-win/usr/bin/id.exe deleted file mode 100755 index 8fef3ca5..00000000 Binary files a/software/mafft/mafft-win/usr/bin/id.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/bin/info.exe b/software/mafft/mafft-win/usr/bin/info.exe deleted file mode 100755 index f53a2187..00000000 Binary files a/software/mafft/mafft-win/usr/bin/info.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/bin/join.exe b/software/mafft/mafft-win/usr/bin/join.exe deleted file mode 100755 index 854de6fa..00000000 Binary files a/software/mafft/mafft-win/usr/bin/join.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/bin/ln.exe b/software/mafft/mafft-win/usr/bin/ln.exe deleted file mode 100755 index 109ba1a0..00000000 Binary files a/software/mafft/mafft-win/usr/bin/ln.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/bin/ls.exe b/software/mafft/mafft-win/usr/bin/ls.exe deleted file mode 100755 index 2e5e4f87..00000000 Binary files a/software/mafft/mafft-win/usr/bin/ls.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/bin/mafft b/software/mafft/mafft-win/usr/bin/mafft deleted file mode 100755 index 3bd3a2b1..00000000 --- a/software/mafft/mafft-win/usr/bin/mafft +++ /dev/null @@ -1,2937 +0,0 @@ -#! /bin/bash -er=0; -myself=`dirname "$0"`/`basename "$0"`; export myself -version="v7.467 (2020/May/14)"; export version -LANG=C; export LANG -os=`uname` -progname=`basename "$0"` -windows="no" -if [ `echo $os | grep -i cygwin` ]; then - os="cygwin" - windows="yes" -elif [ `echo $os | grep -i msys` ]; then - os="msys" - windows="yes" -elif [ `echo $os | grep -i mingw` ]; then - os="mingw" - windows="yes" -elif [ `echo $os | grep -i darwin` ]; then - os="darwin" -elif [ `echo $os | grep -i sunos` ]; then - os="sunos" -elif [ `echo $os | grep -i linux` ]; then - os="linux" -else - os="unix" -fi -#export os # iranai -if [ "$windows" = "yes" ]; then - echo "" 1>&2 - echo "It may take a while before the calculation starts" 1>&2 - echo "if being scanned by anti-virus software." 1>&2 - echo "Also consider using a faster version for Windows 10:" 1>&2 - echo "https://mafft.cbrc.jp/alignment/software/wsl.html" 1>&2 -fi - -if [ "$MAFFT_BINARIES" ]; then - prefix="$MAFFT_BINARIES" -else - prefix=/usr/local/libexec/mafft -fi -export prefix # iranai - -if [ $# -gt 0 ]; then - if [ "$1" = "--man" ]; then - man "$prefix/mafft.1" - exit 0; - fi -fi - -if [ -x "$prefix/version" ]; then -# versionbin=`"$prefix/version" | awk '{print $1}'` # for cygwin - versionbin=`"$prefix/version"` # for cygwin 2.7 - else - versionbin="0.000" -fi - -if ! expr "$version" : v"$versionbin" > /dev/null ; then - echo "" 1>&2 - echo "v$versionbin != $version" 1>&2 - echo "" 1>&2 - echo "There is a problem in the configuration of your shell." 1>&2 - echo "Check the MAFFT_BINARIES environmental variable by" 1>&2 - echo "$ echo \$MAFFT_BINARIES" 1>&2 - echo "" 1>&2 - echo "This variable must be *unset*, unless you have installed MAFFT" 1>&2 - echo "with a special configuration. To unset this variable, type" 1>&2 - echo "$ unset MAFFT_BINARIES" 1>&2 - echo "or" 1>&2 - echo "% unsetenv MAFFT_BINARIES" 1>&2 - echo "Then retry" 1>&2 - echo "$ mafft input > output" 1>&2 - echo "" 1>&2 - echo "To keep this change permanently, edit setting files" 1>&2 - echo "(.bash_profile, .profile, .cshrc, etc) in your home directory" 1>&2 - echo "to delete the MAFFT_BINARIES line." 1>&2 - echo "On MacOSX, also edit or remove the .MacOSX/environment.plist file" 1>&2 - echo "and then re-login (MacOSX 10.6) or reboot (MacOSX 10.7)." 1>&2 - echo "" 1>&2 - echo "Please send a problem report to katoh@ifrec.osaka-u.ac.jp," 1>&2 - echo "if this problem remains." 1>&2 - echo "" 1>&2 - exit 1 - er=1 -fi - -defaultiterate=0 -defaultcycle=2 -defaultgop="1.53" -#defaultaof="0.123" -defaultaof="0.000" -defaultlaof="0.100" -defaultlgop="-2.00" -defaultfft=1 -defaultrough=0 -defaultdistance="ktuples" -#defaultdistance="local" -defaultweighti="2.7" -defaultweightr="0.0" -defaultweightm="1.0" -defaultdafs=0 -defaultmccaskill=0 -defaultcontrafold=0 -defaultalgopt=" " -defaultalgoptit=" " -defaultsbstmodel=" -b 62 " -defaultfmodel=" " -defaultkappa=" " -if [ $progname = "xinsi" -o $progname = "mafft-xinsi" ]; then - defaultfft=1 - defaultcycle=1 - defaultiterate=1000 - defaultdistance="scarna" - defaultweighti="3.2" - defaultweightr="8.0" - defaultweightm="2.0" - defaultmccaskill=1 - defaultcontrafold=0 - defaultdafs=0 - defaultalgopt=" -A " - defaultalgoptit=" -AB " ## chui - defaultaof="0.0" - defaultsbstmodel=" -b 62 " - defaultkappa=" " - defaultfmodel=" " # 2013/06/18 -elif [ $progname = "qinsi" -o $progname = "mafft-qinsi" ]; then - defaultfft=1 - defaultcycle=1 - defaultiterate=1000 - defaultdistance="global" - defaultweighti="3.2" - defaultweightr="8.0" - defaultweightm="2.0" - defaultmccaskill=1 - defaultcontrafold=0 - defaultdafs=0 - defaultalgopt=" -A " - defaultalgoptit=" -AB " ## chui - defaultaof="0.0" - defaultsbstmodel=" -b 62 " - defaultkappa=" " - defaultfmodel=" " # 2013/06/18 -elif [ $progname = "linsi" -o $progname = "mafft-linsi" ]; then - defaultfft=0 - defaultcycle=1 - defaultiterate=1000 - defaultdistance="local" -elif [ $progname = "ginsi" -o $progname = "mafft-ginsi" ]; then - defaultfft=1 - defaultcycle=1 - defaultiterate=1000 - defaultdistance="global" -elif [ $progname = "einsi" -o $progname = "mafft-einsi" ]; then - defaultfft=0 - defaultcycle=1 - defaultiterate=1000 - defaultdistance="localgenaf" -elif [ $progname = "fftns" -o $progname = "mafft-fftns" ]; then - defaultfft=1 - defaultcycle=2 - defaultdistance="ktuples" -elif [ $progname = "fftnsi" -o $progname = "mafft-fftnsi" ]; then - defaultfft=1 - defaultcycle=2 - defaultiterate=2 - defaultdistance="ktuples" -elif [ $progname = "nwns" -o $progname = "mafft-nwns" ]; then - defaultfft=0 - defaultcycle=2 - defaultdistance="ktuples" -elif [ $progname = "nwnsi" -o $progname = "mafft-nwnsi" ]; then - defaultfft=0 - defaultcycle=2 - defaultiterate=2 - defaultdistance="ktuples" -fi -outputfile="" -namelength=-1 -anysymbol=0 -parallelizationstrategy="BAATARI2" -kappa=$defaultkappa -sbstmodel=$defaultsbstmodel -fmodel=$defaultfmodel -nmodel=" " -gexp=0 -gop=$defaultgop -gopdist=$defaultgop -aof=$defaultaof -cycle=$defaultcycle -iterate=$defaultiterate -fft=$defaultfft -rough=$defaultrough -distance=$defaultdistance -forcefft=0 -memopt=" " -weightopt=" " -GGOP="-6.00" -LGOP="-6.00" -LEXP="-0.000" -GEXP="-0.000" -lgop=$defaultlgop -lexp="-0.100" -laof=$defaultlaof -pggop="-2.00" -pgexp="-0.10" -pgaof="0.10" -rgop="-1.530" -rgep="-0.000" -seqtype=" " -weighti=$defaultweighti -weightr=$defaultweightr -weightm=$defaultweightm -rnaalifold=0 -dafs=$defaultdafs -mccaskill=$defaultmccaskill -contrafold=$defaultcontrafold -progressfile="/dev/stderr" -anchorfile="/dev/null" -anchoropt="" -maxanchorseparation=1000 -debug=0 -sw=0 -algopt=$defaultalgopt -algoptit=$defaultalgoptit -#algspecified=0 -pairspecified=0 -scorecalcopt=" " -coreout=0 -corethr="0.5" -corewin="100" -coreext=" " -outputformat="pir" -f2clext="-N" -outorder="input" -seed="x" -seedtable="x" -auto=0 -groupsize=-1 -partsize=50 -partdist="ktuples" -partorderopt=" -x " -treeout=0 -nodeout=0 -distout=0 -treein=0 -topin=0 -treeinopt=" " -seedfiles="/dev/null" -seedtablefile="/dev/null" -pdblist="/dev/null" -ownlist="/dev/null" -strdir="$PWD" -scorematrix="/dev/null" -textmatrix="/dev/null" -treeinfile="/dev/null" -rnascoremtx=" " -laraparams="/dev/null" -foldalignopt=" " -treealg=" -X 0.1 " -sueff="1.0" -scoreoutarg=" " -numthreads=0 -numthreadsit=-1 -numthreadstb=-1 -randomseed=0 -addfile="/dev/null" -addarg0=" " -addarg=" " -addsinglearg=" " -add2ndhalfarg=" " -mapoutfile="/dev/null" -fragment=0 -legacygapopt=" " -mergetable="/dev/null" -mergearg=" " -seedoffset=0 -outnum=" " -last_e=5000 -last_m=3 -last_subopt=" " -last_once=" " -adjustdirection=0 -tuplesize=6 -termgapopt=" -O " -#termgapopt=" " # gap/gap ga kakenai node -similarityoffset="0.0" -unalignlevel="0.0" -unalignspecified=0 -spfactor="100.0" -shiftpenaltyspecified=0 -opdistspecified=0 -allowshift=0 -enrich=0 # ato de kezuru -enrichseq=0 # ato de kezuru -enrichstr=0 # ato de kezuru -seektarget="" # ato de kezuru -newdash=0 -newdash_originalsequenceonly=0 -exclude_ho=0 -fixthreshold="0.0" -bunkatsuopt=" " -npickup=0 -minimumweight="0.00001" # 2016/Mar -usenaivepairscore=" " -oldgenafparam=0 -sprigorous=0 -treeext="none" -initialramusage="20GB" -focusarg=" " -lhlimit=" " -mpiscript="/dev/null" -if [ $# -gt 0 ]; then - if [ "$1" = "--version" ]; then - echo "$version" 1>&2 - exit 0; - elif [ "$1" = "--help" -o "$1" = "--info" ]; then - shift - er=1; - fi - while [ $# -gt 1 ]; - do - if [ "$1" = "--auto" ]; then - auto=1 - elif [ "$1" = "--anysymbol" ]; then - anysymbol=1 - elif [ "$1" = "--preservecase" ]; then - anysymbol=1 - elif [ "$1" = "--clustalout" ]; then - outputformat="clustal" - elif [ "$1" = "--phylipout" ]; then - outputformat="phylip" - elif [ "$1" = "--reorder" ]; then - outorder="aligned" - partorderopt=" " - elif [ "$1" = "--inputorder" ]; then - outorder="input" - partorderopt=" -x " - elif [ "$1" = "--unweight" ]; then - weightopt=" -u " - elif [ "$1" = "--termgappenalty" ]; then - termgapopt=" " - elif [ "$1" = "--alga" ]; then - algopt=" " - algoptit=" " -# algspecified=1 - elif [ "$1" = "--algq" ]; then - algopt=" -Q " - algoptit=" " - echo "" 1>&2 - echo "--algq is no longer supported!" 1>&2 - echo "" 1>&2 - exit 1; -# algspecified=1 - elif [ "$1" = "--namelength" ]; then - shift - namelength=`expr "$1" - 0` - if ! expr "$1" : "[0-9]" > /dev/null ; then - echo "Specify the length of name in clustal format output!" 1>&2 - exit - fi - elif [ "$1" = "--groupsize" ]; then - shift - groupsize=`expr "$1" - 0` - if ! expr "$1" : "[0-9]" > /dev/null ; then - echo "Specify groupsize!" 1>&2 - exit - fi - elif [ "$1" = "--partsize" ]; then - shift - partsize=`expr "$1" - 0` - if ! expr "$1" : "[0-9]" > /dev/null ; then - echo "Specify partsize!" 1>&2 - exit - fi - elif [ "$1" = "--parttree" ]; then - distance="parttree" - partdist="ktuples" - elif [ "$1" = "--dpparttree" ]; then - distance="parttree" - partdist="localalign" - elif [ "$1" = "--fastaparttree" ]; then - distance="parttree" - partdist="fasta" - elif [ "$1" = "--treeout" ]; then - treeout=1 - elif [ "$1" = "--nodeout" ]; then - nodeout=1 - treeout=1 - elif [ "$1" = "--distout" ]; then - distout=1 - elif [ "$1" = "--fastswpair" ]; then - distance="fasta" - pairspecified=1 - sw=1 - elif [ "$1" = "--fastapair" ]; then - distance="fasta" - pairspecified=1 - sw=0 - elif [ "$1" = "--averagelinkage" ]; then - treealg=" -X 1.0 " - sueff="1.0" - elif [ "$1" = "--minimumlinkage" ]; then - treealg=" -X 0.0 " - sueff="0.0" - elif [ "$1" = "--mixedlinkage" ]; then - shift - sueff="$1" - treealg=" -X $1" - elif [ "$1" = "--noscore" ]; then - scorecalcopt=" -Z " - elif [ "$1" = "--6mermultipair" ]; then - distance="ktuplesmulti" - tuplesize=6 - pairspecified=1 - elif [ "$1" = "--10mermultipair" ]; then - distance="ktuplesmulti" - tuplesize=10 - pairspecified=1 - elif [ "$1" = "--6merpair" ]; then - distance="ktuples" - tuplesize=6 - pairspecified=1 - elif [ "$1" = "--10merpair" ]; then - distance="ktuples" - tuplesize=10 - pairspecified=1 - elif [ "$1" = "--blastpair" ]; then - distance="blast" - pairspecified=1 - elif [ "$1" = "--lastmultipair" ]; then - distance="lastmulti" - pairspecified=1 - elif [ "$1" = "--globalpair" ]; then - distance="global" - pairspecified=1 - elif [ "$1" = "--shortlongpair" ]; then - distance="local" - usenaivepairscore="-Z" - laof=0.0 # addfull no tokini tsukawareru. - lexp=0.0 # addfull no tokini tsukawareru. - pgaof=0.0 # local nara iranai - pgexp=0.0 # local nara iranai - pairspecified=1 - elif [ "$1" = "--longshortpair" ]; then - distance="local" - usenaivepairscore="-Z" - laof=0.0 # addfull no tokini tsukawareru. - lexp=0.0 # addfull no tokini tsukawareru. - pgaof=0.0 # local nara iranai - pgexp=0.0 # local nara iranai - pairspecified=1 - elif [ "$1" = "--localpair" ]; then - distance="local" - pairspecified=1 - elif [ "$1" = "--lastpair" ]; then - distance="last" - pairspecified=1 - elif [ "$1" = "--multipair" ]; then - distance="multi" - pairspecified=1 - elif [ "$1" = "--hybridpair" ]; then - distance="hybrid" - pairspecified=1 - elif [ "$1" = "--scarnapair" ]; then - distance="scarna" - pairspecified=1 - elif [ "$1" = "--dafspair" ]; then - distance="dafs" - pairspecified=1 - elif [ "$1" = "--larapair" ]; then - distance="lara" - pairspecified=1 - elif [ "$1" = "--slarapair" ]; then - distance="slara" - pairspecified=1 - elif [ "$1" = "--foldalignpair" ]; then - distance="foldalignlocal" - pairspecified=1 - elif [ "$1" = "--foldalignlocalpair" ]; then - distance="foldalignlocal" - pairspecified=1 - elif [ "$1" = "--foldalignglobalpair" ]; then - distance="foldalignglobal" - pairspecified=1 - elif [ "$1" = "--globalgenafpair" ]; then - distance="globalgenaf" - pairspecified=1 - echo "" 1>&2 - echo "--globalgenaf is no longer supported!" 1>&2 - echo "" 1>&2 - exit 1; - elif [ "$1" = "--localgenafpair" ]; then - distance="localgenaf" - pairspecified=1 - elif [ "$1" = "--genafpair" ]; then - distance="localgenaf" - pairspecified=1 - elif [ "$1" = "--oldgenafpair" ]; then - distance="localgenaf" - pairspecified=1 - oldgenafparam=1 - elif [ "$1" = "--memsave" ]; then - memopt=" -M -B " # -B (bunkatsunashi no riyu ga omoidasenai) - elif [ "$1" = "--nomemsave" ]; then - memopt=" -N " - elif [ "$1" = "--nuc" ]; then - seqtype="-D" # Deleted space, 2018/Dec - elif [ "$1" = "--amino" ]; then - seqtype="-P" # Deleted space, 2018/Dec - elif [ "$1" = "--fft" ]; then - fft=1 - forcefft=1 - elif [ "$1" = "--nofft" ]; then - fft=0 - elif [ "$1" = "--quiet" ]; then -# if [ $os = "msys" ]; then -# progressfile="nul" -# else - progressfile="/dev/null" -# fi - elif [ "$1" = "--debug" ]; then - debug=1 - elif [ "$1" = "--coreext" ]; then - coreext=" -c " - elif [ "$1" = "--core" ]; then - coreout=1 - elif [ "$1" = "--adjustdirection" ]; then - adjustdirection=1 - elif [ "$1" = "--adjustdirectionaccurately" ]; then - adjustdirection=2 - elif [ "$1" = "--oneiteration" ]; then - oneiterationopt=" -r " - elif [ "$1" = "--progress" ]; then - shift - progressfile="$1" - if ! ( expr "$progressfile" : "\/" > /dev/null || expr "$progressfile" : "[A-Za-z]\:" > /dev/null ) ; then - echo "Specify a progress file name with the absolute path!" 1>&2 - exit - fi - elif [ "$1" = "--out" ]; then - shift - outputfile="$1" - elif [ "$1" = "--skipanchorsremoterthan" ]; then - shift - if ! expr "$1" : "[0-9]" > /dev/null ; then - echo "Specify maximum gap length between anchors." 1>&2 - exit - fi - maxanchorseparation=`expr "$1" - 0` - elif [ "$1" = "--anchors" ]; then - shift - anchorfile="$1" - anchoropt=" -l " -# memopt=" -M -B " # ato de kentou -# memopt=" -N " # no memsave - if [ ! -e "$anchorfile" ]; then - echo "Cannot open $anchorfile" 1>&2 - echo "" 1>&2 - exit - fi - elif [ "$1" = "--thread" ]; then - shift - if ! expr "$1" : "[0-9\-]" > /dev/null ; then - echo "Specify the number of threads. Or, use --thread -1" 1>&2 - exit - fi - numthreads=`expr "$1" - 0` - elif [ "$1" = "--threadtb" ]; then - shift - if ! expr "$1" : "[0-9\-]" > /dev/null ; then - echo "Check the argument after --threadtb, the number of threads for the progressive step." 1>&2 - exit - fi - numthreadstb=`expr "$1" - 0` - elif [ "$1" = "--threadit" ]; then - shift - if ! expr "$1" : "[0-9\-]" > /dev/null ; then - echo "Check the argument after --threadit, the number of threads for the iterative step." 1>&2 - exit - fi - numthreadsit=`expr "$1" - 0` - elif [ "$1" = "--last_subopt" ]; then - last_subopt="-S" - elif [ "$1" = "--last_once" ]; then - last_once="-U" - elif [ "$1" = "--last_m" ]; then - shift - last_m=`expr "$1" - 0` - elif [ "$1" = "--last_e" ]; then - shift - last_e=`expr "$1" - 0` - elif [ "$1" = "--randomseed" ]; then - shift - randomseed=`expr "$1" - 0` - elif [ "$1" = "--bestfirst" ]; then - parallelizationstrategy="BESTFIRST" - elif [ "$1" = "--adhoc0" ]; then - parallelizationstrategy="BAATARI0" - elif [ "$1" = "--adhoc1" ]; then - parallelizationstrategy="BAATARI1" - elif [ "$1" = "--adhoc2" ]; then - parallelizationstrategy="BAATARI2" - elif [ "$1" = "--simplehillclimbing" ]; then - parallelizationstrategy="BAATARI2" - elif [ "$1" = "--scoreout" ]; then - scoreoutarg="-S -B" - elif [ "$1" = "--outnum" ]; then - outnum="-n" - elif [ "$1" = "--leavegappyregion" ]; then - legacygapopt="-L" - elif [ "$1" = "--legacygappenalty" ]; then - legacygapopt="-L" - elif [ "$1" = "--merge" ]; then - shift - mergetable="$1" - if [ ! -e "$mergetable" ]; then - echo "Cannot open $mergetable" 1>&2 - echo "" 1>&2 - exit - fi - elif [ "$1" = "--addprofile" ]; then - shift - addarg0="-I" - addfile="$1" - elif [ "$1" = "--add" ]; then - shift - addarg0="-K -I" - addfile="$1" - elif [ "$1" = "--addfragments" ]; then - shift - addarg0="-K -I" - addfile="$1" - fragment=1 - elif [ "$1" = "--addfull" ]; then - shift - addarg0="-K -I" - addfile="$1" - fragment=-1 - elif [ "$1" = "--addlong" ]; then - shift - addarg0="-K -I" - addfile="$1" - fragment=-2 - elif [ "$1" = "--smoothing" ]; then - add2ndhalfarg=$add2ndhalfarg" -p " - elif [ "$1" = "--keeplength" ]; then - add2ndhalfarg=$add2ndhalfarg" -Y " - elif [ "$1" = "--mapout" ]; then - add2ndhalfarg=$add2ndhalfarg" -Z -Y " - elif [ "$1" = "--mapoutfile" ]; then - shift - add2ndhalfarg=$add2ndhalfarg" -Z -Y " - mapoutfile="$1" - elif [ "$1" = "--maxiterate" ]; then - shift - iterate=`expr "$1" - 0` - if ! expr "$1" : "[0-9]" > /dev/null ; then - echo "Specify the number of iterations!" 1>&2 - exit - fi - elif [ "$1" = "--retree" ]; then - shift - cycle=`expr "$1" - 0` - if ! expr "$1" : "[0-9]" > /dev/null ; then - echo "Specify the number of tree rebuilding!" 1>&2 - exit - fi - elif [ "$1" = "--aamatrix" ]; then - shift - f2clext="-N" - sbstmodel=" -b -1 " - scorematrix="$1" - if [ ! -e "$scorematrix" ]; then - echo "Cannot open $scorematrix" 1>&2 - echo "" 1>&2 - exit - fi - elif [ "$1" = "--textmatrix" ]; then - shift - f2clext="-E" - seqtype="-P" - fft=0 - sbstmodel=" -b -2 -a " - scorematrix="$1" - if [ ! -e "$scorematrix" ]; then - echo "Cannot open $scorematrix" 1>&2 - echo "" 1>&2 - exit - fi - elif [ "$1" = "--text" ]; then - f2clext="-E" - seqtype="-P" - fft=0 - sbstmodel=" -b -2 -a " - elif [ "$1" = "--treein" ]; then - shift - treeinopt=" -U " - treein=1 - treeinfile="$1" - if [ ! -e "$treeinfile" ]; then - echo "Cannot open $treeinfile" 1>&2 - echo "" 1>&2 - exit - fi - elif [ "$1" = "--pileup" ]; then -# treeinopt=" -U " -# treein=1 - treeext="pileup" - elif [ "$1" = "--randomchain" ]; then -# treeinopt=" -U " -# treein=1 -# pileuporshuffle="s" - treeext="randomchain" - elif [ "$1" = "--topin" ]; then - shift - treeinopt=" -V " - treein=1 - treeinfile="$1" - echo "The --topin option has been disabled." 1>&2 - echo "There was a bug in version < 6.530." 1>&2 - echo "This bug has not yet been fixed." 1>&2 - exit 1 - elif [ "$1" = "--mpi" ]; then - mpiscript="$prefix/mpiscript" - elif [ "$1" = "--large" ]; then - treeext="memsavetree" - elif [ "$1" = "--memsavetree" ]; then - treeext="memsavetree" - elif [ "$1" = "--memsavetreex" ]; then - treeext="memsavetreex" - elif [ "$1" = "--stepadd" ]; then - treeext="stepadd" - elif [ "$1" = "--youngestlinkage" ]; then - treeext="youngestlinkage" - elif [ "$1" = "--kappa" ]; then - shift - kappa=" -k $1 " - if ! expr "$1" : "[0-9]" > /dev/null ; then - echo "Specify kappa value!" 1>&2 - exit - fi - elif [ "$1" = "--fmodel" ]; then - fmodel=" -a " - elif [ "$1" = "--nwildcard" ]; then - nmodel=" -: " - elif [ "$1" = "--nzero" ]; then - nmodel=" " - elif [ "$1" = "--jtt" ]; then - shift - f2clext="-N" - sbstmodel=" -j $1" -# if ! expr "$1" : "[0-9]" > /dev/null ; then -# echo "Specify pam value!" 1>&2 -# exit -# fi - elif [ "$1" = "--kimura" ]; then - shift - f2clext="-N" - sbstmodel=" -j $1" -# if ! expr "$1" : "[0-9]" > /dev/null ; then -# echo "Specify pam value!" 1>&2 -# exit -# fi - elif [ "$1" = "--tm" ]; then - shift - f2clext="-N" - sbstmodel=" -m $1" -# if ! expr "$1" : "[0-9]" > /dev/null ; then -# echo "Specify pam value!" 1>&2 -# exit -# fi - elif [ "$1" = "--bl" ]; then - shift - f2clext="-N" - sbstmodel=" -b $1" - if ! expr "$1" : "[0-9]" > /dev/null ; then - echo "blosum $1?" 1>&2 - exit - fi - elif [ "$1" = "--weighti" ]; then - shift - weighti="$1" - if ! expr "$1" : "[0-9]" > /dev/null ; then - echo "Specify weighti value!" 1>&2 - exit - fi - elif [ "$1" = "--weightr" ]; then - shift - weightr="$1" - if ! expr "$1" : "[0-9]" > /dev/null ; then - echo "Specify weightr value!" 1>&2 - exit - fi - elif [ "$1" = "--weightm" ]; then - shift - weightm="$1" - if ! expr "$1" : "[0-9]" > /dev/null ; then - echo "Specify weightm value!" 1>&2 - exit - fi - elif [ "$1" = "--rnaalifold" ]; then - rnaalifold=1 - elif [ "$1" = "--mccaskill" ]; then - mccaskill=1 - contrafold=0 - dafs=0 - elif [ "$1" = "--contrafold" ]; then - mccaskill=0 - contrafold=1 - dafs=0 - elif [ "$1" = "--dafs" ]; then - mccaskill=0 - contrafold=0 - dafs=1 - elif [ "$1" = "--ribosum" ]; then - rnascoremtx=" -s " - elif [ "$1" = "--op" ]; then - shift - gop="$1" - if ! expr "$1" : "[0-9]" > /dev/null ; then - echo "Specify op!" 1>&2 - exit - fi - elif [ "$1" = "--opdist" ]; then - shift - gopdist="$1" - if ! expr "$1" : "[0-9]" > /dev/null ; then - echo "Specify opdist!" 1>&2 - exit - fi - opdistspecified=1 - elif [ "$1" = "--allowshift" ]; then - allowshift=1 - elif [ "$1" = "--shiftpenalty" ]; then - shift - spfactor="$1" - if ! expr "$1" : "[0-9]" > /dev/null ; then - echo "Specify sf!" 1>&2 - exit - fi - shiftpenaltyspecified=1 - elif [ "$1" = "--exp" ]; then - shift -# gexp="$1" - tmpval="$1" - gexp=`awk "BEGIN{ print -1.0 * \"$tmpval\"}"` - if ! expr "$gexp" : "[0-9\-]" > /dev/null ; then - printf "\nSpecify a number for exp, like --exp 0.1\n" 1>&2 - printf "'$1' cannot be interpreted as a number..\n\n" 1>&2 - exit - fi - elif [ "$1" = "--ep" ]; then - shift -# aof="$1" - tmpval="$1" - aof=`awk "BEGIN{ print -1.0 * \"$tmpval\"}"` - if ! expr "$aof" : "[0-9\-]" > /dev/null ; then - printf "\nSpecify a number for ep, like --ep 0.1\n" 1>&2 - printf "'$1' cannot be interpreted as a number..\n\n" 1>&2 - exit - fi - elif [ "$1" = "--rop" ]; then - shift - rgop="$1" -# Atode check - elif [ "$1" = "--rep" ]; then - shift - rgep="$1" - elif [ "$1" = "--lop" ]; then - shift - lgop="$1" - elif [ "$1" = "--LOP" ]; then - shift - LGOP="$1" - elif [ "$1" = "--lep" ]; then - shift - laof="$1" - elif [ "$1" = "--lexp" ]; then - shift - lexp="$1" - elif [ "$1" = "--LEXP" ]; then - shift - LEXP="$1" - elif [ "$1" = "--GEXP" ]; then - shift - GEXP="$1" - elif [ "$1" = "--GOP" ]; then - shift - GGOP="$1" - elif [ "$1" = "--gop" ]; then - shift - pggop="$1" - elif [ "$1" = "--gep" ]; then - shift - pgaof="$1" - elif [ "$1" = "--gexp" ]; then - shift - pgexp="$1" - elif [ "$1" = "--laraparams" ]; then - shift - laraparams="$1" - elif [ "$1" = "--corethr" ]; then - shift - corethr="$1" - elif [ "$1" = "--corewin" ]; then - shift - corewin="$1" - elif [ "$1" = "--strdir" ]; then - shift - strdir="$1" - elif [ "$1" = "--pdbidlist" ]; then - echo "--pdbidlist is temporarily unavailable, 2018/Dec." 1>&2 - echo "" 1>&2 - exit - shift - pdblist="$1" - if [ ! -e "$pdblist" ]; then - echo "Cannot open $pdblist" 1>&2 - echo "" 1>&2 - exit - fi - elif [ "$1" = "--pdbfilelist" ]; then - echo "--pdbfilelist is temporarily unavailable, 2018/Dec." 1>&2 - echo "" 1>&2 - exit - shift - ownlist="$1" - if [ ! -e "$ownlist" ]; then - echo "Cannot open $ownlist" 1>&2 - echo "" 1>&2 - exit - fi -# elif [ "$1" = "--enrich" ]; then -# enrich=1 -# enrichseq=1 -# enrichstr=1 -# seektarget="" -# elif [ "$1" = "--enrichseq" ]; then -# enrich=1 -# enrichseq=1 -# enrichstr=0 -# seektarget="-seq" -# elif [ "$1" = "--enrichstr" ]; then -# enrich=1 -# enrichseq=0 -# enrichstr=1 -# seektarget="-str" - elif [ "$1" = "--dash" ]; then - newdash=1 - distance="global" # can be override - iterate=3 # can be override - elif [ "$1" = "--originalseqonly" ]; then - newdash_originalsequenceonly=1 - elif [ "$1" = "--excludehomologs" ]; then # works with --dash only - exclude_ho=1 - elif [ "$1" = "--seedtable" ]; then - shift - seedtable="y" - seedtablefile="$1" - elif [ "$1" = "--seed" ]; then - shift - seed="m" - seedfiles="$seedfiles $1" - elif [ "$1" = "--minimumweight" ]; then - shift - minimumweight="$1" - elif [ "$1" = "--similaritylevel" ]; then - shift - similarityoffset="$1" - elif [ "$1" = "--unalignlevel" ]; then - shift - unalignlevel="$1" - unalignspecified=1 - elif [ "$1" = "--skipiterate" ]; then - shift - fixthreshold="$1" - elif [ "$1" = "--bunkatsunashi" ]; then - bunkatsuopt=" -B " - elif [ "$1" = "--sp" ]; then - sprigorous=1 - elif [ "$1" = "--focus" ]; then - focusarg=" -= " - elif [ "$1" = "--lhlimit" ]; then # atode namae henkou - shift - lhlimit=" -q $1 " - elif [ "$1" = "--sparsepickup" ]; then - shift - npickup="$1" - elif [ $progname = "fftns" -o $progname = "nwns" ]; then - if [ "$1" -gt 0 ]; then - cycle=`expr "$1" - 0` - fi - else - echo "Unknown option: $1" 1>&2 - er=1; -# exit 1; - fi - shift - done; - - -# echo "" 1>"$progressfile" - - if [ $treeext = "memsavetree" ] || [ $treeext = "stepadd" ]; then - if [ $distance != "ktuples" ]; then -# auto -> memsave && globalpair ha erabarenai node, -# ~/maffttmp wo tsukau noha meijiteki ni shitei saretatoki dake. - if [ ! "$MAFFT_TMPDIR" ]; then # space is acceptable, 2018/Mar/17 - MAFFT_TMPDIR="$HOME/maffttmp" - mkdir -p "$MAFFT_TMPDIR" || exit - fi - fi - fi - - - if [ ! "$MAFFT_TMPDIR" ]; then - MAFFT_TMPDIR="$TMPDIR" - fi - - TMPFILE=`env TMPDIR="$MAFFT_TMPDIR" mktemp -dt "$progname.XXXXXXXXXX"` - if [ $? -ne 0 ]; then - echo "mktemp seems to be obsolete. Re-trying without -t" 1>&2 - mkdir -p "$MAFFT_TMPDIR/tmp" 1>&2 - TMPFILE=`mktemp -d "$MAFFT_TMPDIR/tmp/$progname.XXXXXXXXXX"` - fi - -# lfs getstripe $TMPFILE 2>/dev/null && lfs setstripe -c 1 $TMPFILE # 2017/Oct - lfs getstripe "$TMPFILE" > /dev/null 2>&1 && lfs setstripe -c 1 "$TMPFILE" > /dev/null 2>&1 # 2018/Feb - - if [ $os = "cygwin" ]; then - TMPFILE=`cygpath -w "$TMPFILE"` # necessary to pass path to f2cl on cyswin, somehow unnecessary in msys. - fi - - umask 077 -# mkdir "$TMPFILE" || er=1 - - -function removetmpfile() { # for MPI - while true - do - rm -rf "$TMPFILE" && break - echo Retrying to remove "$TMPFILE". It may take several seconds. 1>&2 - sleep 2 - done -} - - if [ $debug -eq 1 ]; then -# trap "tar cfvz debuginfo.tgz $TMPFILE; rm -rf $TMPFILE " 0 # does not work in msys -# trap "tar cfv - $TMPFILE | gzip -c > debuginfo.tgz; rm -rf $TMPFILE " 0 15 - trap "tar cfv - \"$TMPFILE\" | gzip -c > debuginfo.tgz; removetmpfile" 0 15 - else -# trap "rm -rf $TMPFILE" 0 15 - trap "removetmpfile" 0 15 - fi - if [ $# -eq 1 ]; then - if [ -r "$1" -o "$1" = - ]; then - - if [ -r "$addfile" ]; then - printf ''; - else - echo "$0": Cannot open "$addfile". 1>&2 - echo "" 1>&2 - exit 1; - fi - - - cat "$1" | tr "\r" "\n" > "$TMPFILE/infile" - echo "" >> "$TMPFILE/infile" - cat "$addfile" | tr "\r" "\n" | grep -v "^$" >> "$TMPFILE/infile" - cat "$addfile" | tr "\r" "\n" | grep -v "^$" > "$TMPFILE/_addfile" - cat "$scorematrix" | tr "\r" "\n" | grep -v "^$" > "$TMPFILE/_aamtx" - cat "$mergetable" | tr "\r" "\n" | grep -v "^$" > "$TMPFILE/_subalignmentstable" - cat "$treeinfile" | tr "\r" "\n" | grep -v "^$" > "$TMPFILE/_guidetree" - cat "$seedtablefile" | tr "\r" "\n" | grep -v "^$" > "$TMPFILE/_seedtablefile" - cat "$laraparams" | tr "\r" "\n" | grep -v "^$" > "$TMPFILE/_lara.params" - cat "$pdblist" | tr "\r" "\n" | grep -v "^$" > "$TMPFILE/pdblist" - cat "$ownlist" | tr "\r" "\n" | grep -v "^$" > "$TMPFILE/ownlist" - cat "$anchorfile" | tr "\r" "\n" | grep -v "^$" > "$TMPFILE/_externalanchors" - - SAVEIFS=$IFS - IFS='\n' - filelist="$1\n$addfile\n$scorematrix\n$mergetable\n$treeinfile\n$seedtablefile\n$laraparams\n$pdblist\n$ownlist" - for f in $filelist; do - # $f="" demo mudani file ga hashiru. - file "$f" 2>/dev/null | grep "UTF-[13][62]" >& /dev/null && printf "$f: UTF-16 or UTF-32? Convert this file to ASCII\n\n" 1>&2 && exit 1; - done - IFS=$SAVEIFS - -# echo $seedfiles - infilename="$1" - seedfilesintmp="/dev/null" - seednseq="0" - set $seedfiles > /dev/null - while [ $# -gt 1 ]; - do - shift - if [ -r "$1" ]; then - cat "$1" | tr "\r" "\n" > "$TMPFILE/seed$#" - file "$1" | grep "UTF-[13][62]" >& /dev/null && printf "$1: UTF-16 or UTF-32? Convert this file to ASCII\n\n" 1>&2 && exit 1; - else - echo "$0": Cannot open "$1". 1>&2 - echo "" 1>&2 - exit 1; - fi - seednseq=$seednseq" "`grep -c '^[>|=]' "$TMPFILE/seed$#"` - seedfilesintmp=$seedfilesintmp" "seed$# - done -# ls $TMPFILE -# echo $seedfilesintmp -# echo $seednseq - - - else - echo "$0": Cannot open "$1". 1>&2 - echo "" 1>&2 - er=1 -# exit 1; - fi - else -# echo '$#'"=$#" 1>&2 - er=1 - fi - - - if [ $numthreads -lt 0 ]; then - if [ $os = "linux" ]; then - nlogicalcore=`cat /proc/cpuinfo | grep "^processor" | uniq | wc -l` - ncoresinacpu=`cat /proc/cpuinfo | grep 'cpu cores' | uniq | awk '{print $4}'` - nphysicalcpu=`cat /proc/cpuinfo | grep 'physical id' | sort | uniq | wc -l` - if [ $nlogicalcore -eq 0 ]; then - echo "Cannot get the number of processors from /proc/cpuinfo" 1>>"$progressfile" - exit 1 - fi - if [ ${#ncoresinacpu} -gt 0 -a $nphysicalcpu -gt 0 ]; then - numthreads=`expr $ncoresinacpu '*' $nphysicalcpu` -# if [ $nlogicalcore -gt $numthreads ]; then # Hyperthreading -# numthreads=`expr $numthreads '+' 1` -# fi - else - numthreads=$nlogicalcore - fi - elif [ $os = "darwin" ]; then - numthreads=`sysctl -n hw.physicalcpu` - if [ -z $numthreads ]; then - echo "Cannot get the number of physical cores from sysctl" 1>>"$progressfile" - exit 1 - fi -# nlogicalcore=`sysctl -n hw.logicalcpu` -# if [ $nlogicalcore -gt $numthreads ]; then # Hyperthreading -# numthreads=`expr $numthreads '+' 1` -# fi - elif [ "$windows" = "yes" ]; then -# numthreads=`wmic cpu get NumberOfCores | head -2 | tail -1 | awk '{print $1}'` - numthreads=`wmic cpu get NumberOfCores | awk 'BEGIN{n=0} {n+=$1} END{print n}'` - - else - echo "Cannot count the number of physical cores." 1>>"$progressfile" - exit 1 - fi - echo "OS = "$os 1>>"$progressfile" - echo "The number of physical cores = " $numthreads 1>>"$progressfile" - fi - - if [ $numthreadstb -lt 0 ]; then - if [ $numthreads -lt 16 -o "$mpiscript" != "/dev/null" ]; then # mpi: museigen, multithread: 16 made -# if [ $numthreads -lt 31 ]; then - numthreadstb=$numthreads - else - numthreadstb=16 - fi - fi - - if [ $numthreadsit -lt 0 ]; then - if [ $numthreads -lt 8 ]; then - numthreadsit=$numthreads - else - numthreadsit=8 - fi - fi - - if [ $numthreadsit -eq 0 -a $parallelizationstrategy = "BESTFIRST" ]; then - echo 'Impossible' 1>&2; - exit 1; - fi - - if [ "$addarg0" != " " ]; then -# iterate=0 # 2013/03/23 -> commented out, 2017/12 - "$prefix/countlen" < "$TMPFILE/_addfile" > "$TMPFILE/addsize" 2>>"$progressfile" - nadd=`awk '{print $1}' "$TMPFILE/addsize"` - if [ $nadd -eq "0" ]; then - echo Check $addfile 1>&2 - exit 1; - fi - if [ $seed != "x" -o $seedtable != "x" ]; then - echo 'Impossible' 1>&2; - echo 'Use either ONE of --seed, --seedtable, --addprofile and --add.' 1>&2 - exit 1; - fi - else - nadd="0" - fi - - if [ $auto -eq 1 ]; then - "$prefix/countlen" < "$TMPFILE/infile" > "$TMPFILE/size" 2>>"$progressfile" - nseq=`awk '{print $1}' "$TMPFILE/size"` - nlen=`awk '{print $3}' "$TMPFILE/size"` - - if [ $nlen -lt 3000 -a $nseq -lt 100 ]; then - distance="local" - iterate=1000 - cycle=1 - treeext="none" - elif [ $nlen -lt 1000 -a $nseq -lt 200 ]; then - distance="local" - iterate=2 - cycle=1 - treeext="none" - elif [ $nlen -lt 10000 -a $nseq -lt 500 ]; then - distance="ktuples" - iterate=2 - cycle=2 - treeext="none" - elif [ $nseq -lt 20000 ]; then # changed from 10000 2014/Oct/4 - distance="ktuples" - iterate=0 - cycle=2 - treeext="none" - elif [ $nseq -lt 100000 ]; then # changed from 50000 2017/Nov/24 - distance="ktuples" - iterate=0 - cycle=2 - if [ $fragment -eq 0 -a "$mergetable" = "/dev/null" ]; then - treeext="memsavetree" - fi - elif [ $nseq -lt 200000 ]; then # changed from 90000 2017/Nov/24 - distance="ktuples" - iterate=0 - cycle=1 - if [ $fragment -eq 0 -a "$mergetable" = "/dev/null" ]; then - treeext="memsavetree" - fi - elif [ $nlen -lt 3000 ]; then - distance="parttree" - partdist="localalign" - algopt=" " - algoptit=" " -# algspecified=1 - cycle=1 - else - distance="parttree" - partdist="ktuples" - algopt=" " - algoptit=" " -# algspecified=1 - cycle=1 - fi - - -# if [ $nlen -lt 3000 -a $nseq -lt 100 ]; then -# distance="local" -# iterate=1000 -# cycle=1 -# elif [ $nlen -lt 1000 -a $nseq -lt 200 ]; then -# distance="local" -# iterate=2 -# cycle=1 -# elif [ $nlen -lt 10000 -a $nseq -lt 500 ]; then -# distance="ktuples" -# iterate=2 -# cycle=2 -# elif [ $nseq -lt 200000 ]; then -# distance="ktuples" -# iterate=0 -# treeinopt=" -U " -# treein=1 -# pileuporshuffle="a" -# elif [ $nlen -lt 3000 ]; then -# distance="parttree" -# partdist="localalign" -# algopt=" " -# algoptit=" " -## algspecified=1 -# cycle=1 -# else -# distance="parttree" -# partdist="ktuples" -# algopt=" " -# algoptit=" " -## algspecified=1 -# cycle=1 -# fi - - - if [ $fragment -ne 0 ]; then - norg=`expr $nseq '-' $nadd` - npair=`expr $norg '*' $nadd` - echo "nadd = " $nadd 1>>"$progressfile" - echo "npair = " $npair 1>>"$progressfile" - echo "nseq = " $nseq 1>>"$progressfile" - echo "nlen = " $nlen 1>>"$progressfile" -# nagasa check! -# - if [ $npair -gt 10000000 -o $nlen -gt 5000 ]; then # 2017/Oct - distance="ktuples" - echo "use ktuples, size=$tuplesize!" 1>>"$progressfile" - elif [ $npair -gt 3000000 -o $nlen -gt 5000 ]; then # 2017/Oct - distance="multi" - weighti="0.0" - echo "use multipair, weighti=0.0!" 1>>"$progressfile" - else - distance="multi" - echo "use multipair, weighti=$weighti!" 1>>"$progressfile" - fi - pairspecified=1 - fi - fi - - if [ `awk "BEGIN {print( 0.0+\"$sueff\" < 0.0 || 0.0+\"$sueff\" > 1.0 )}"` -gt 0 ]; then - printf "\n%s\n\n" "The argument of --mixedlinkage must be between 0.0 and 1.0" 1>>"$progressfile" - exit 1; - fi - - if [ $allowshift -eq 1 ]; then - if [ $unalignspecified -ne 1 ]; then - unalignlevel="0.8" - fi - if [ $shiftpenaltyspecified -ne 1 ]; then - spfactor="2.00" - fi - fi - - if [ $opdistspecified -ne 1 ]; then - gopdist=$gop - fi - - if [ $unalignlevel != "0.0" -o `awk "BEGIN {print( 0.0+\"$spfactor\" < 100.0 )}"` -gt 0 ]; then - nmodel=" -: " - termgapopt=" " - if [ $distance = "localgenaf" ]; then - printf "\n%s\n" "The combination of --allowshift and --genafpair (E-INS-i/-1) is not supported." 1>>"$progressfile" - printf "%s\n" "Instead, please try --allowshift --globalpair (G-INS-i/-1 in the web version)," 1>>"$progressfile" - printf "%s\n\n" "which covers the situation for --genafpair (E-INS-i/-1), too." 1>>"$progressfile" - exit 1; - fi - if [ $distance != "global" -o `awk "BEGIN {print( 0.0+\"$weighti\" < 1.0 )}"` -gt 0 ]; then - printf "\n%s\n\n" "At present, --unalignlevel # or --allowshift is supported only with the --globalpair option." 1>>"$progressfile" - exit 1; - fi - if [ $fragment -ne 0 ]; then - printf "\n%s\n\n" "At present, --unalignlevel # or --allowshift is not supported with the --addfragments option." 1>>"$progressfile" - exit 1; - fi - fi - - if [ `awk "BEGIN {print( 0.0+\"$spfactor\" < 1.0 )}"` -gt 0 ]; then - printf "\n%s\n" "shiftpenalty must be >1." 1>>"$progressfile" - exit 1; - fi - - if [ `awk "BEGIN {print( 0.0+\"$fixthreshold\" < 0.0 )}"` -gt 0 ]; then - printf "\n%s\n\n" "The 'fix' parameter must be >= 0.0" 1>>"$progressfile" - exit 1; - fi - - if [ `awk "BEGIN {print( 0.0+\"$unalignlevel\" < 0.0 || 0.0+\"$unalignlevel\" > 1.0 )}"` -gt 0 ]; then - printf "\n%s\n\n" "The 'unalignlevel' parameter must be between 0.0 and 1.0" 1>>"$progressfile" - exit 1; - fi - if [ `awk "BEGIN {print( 0.0+\"$unalignlevel\" > 0.0 )}"` -gt 0 ]; then - laof="0" - lexp="0" - pgaof="0" - pgexp="0" - LEXP="0" - GEXP="0" - termgapopt=" " -# if [ $auto -eq 1 -o $fragment -ne 0 -o $iterate -gt 0 ]; then - if [ $fragment -ne 0 ]; then - printf "\n%s\n\n" "At present, the 'unalignlevel > 0' mode is not supported with the --addfragments option." 1>>"$progressfile" - exit 1; - fi - if [ $distance = "parttree" ]; then - printf "\n%s\n\n" "At present, the 'unalignlevel > 0' mode is not supported in the (dp)parttree option." 1>>"$progressfile" - exit 1; - fi - if [ $distance = "localgenaf" ]; then - printf "\n%s\n" "The --genafpair is not supported in the 'unalignlevel > 0' mode." 1>>"$progressfile" - printf "%s\n" "Instead, please try --unalignlevel xx --globalpair," 1>>"$progressfile" - printf "%s\n\n" "which covers the situation for --genafpair (E-INS-i), too." 1>>"$progressfile" - exit 1; - fi -# if [ $distance != "ktuples" -a `awk "BEGIN {print( 0.0+\"$weighti\" > 0.0 )}"` -gt 0 -a $iterate -gt 0 ]; then -# printf "\n%s\n\n" "Please add --weighti 0.0, for now." 1>>"$progressfile" -# exit 1; -# fi - fi - - if [ `awk "BEGIN {print( 0.0+\"$similarityoffset\" != 0.0 && 0.0+\"$unalignlevel\" != 0.0 )}"` -gt 0 ]; then - printf "\n%s\n\n" "Do not simultaneously specify --similaritylevel and --unalignlevel" 1>>"$progressfile" - exit 1; - fi - - if [ `awk "BEGIN {print( 0.0+\"$similarityoffset\" < -1.0 || 0.0+\"$similarityoffset\" > 1.0 )}"` -gt 0 ]; then - printf "\n%s\n\n" "Similarity must be between -1.0 and +1.0" 1>>"$progressfile" - exit 1; - fi - aof=`awk "BEGIN{print 0.0 + \"$similarityoffset\" + $aof}"` - laof=`awk "BEGIN{print 0.0 + \"$similarityoffset\" + $laof}"` - pgaof=`awk "BEGIN{print 0.0 + \"$similarityoffset\" + $pgaof}"` - - - if [ $parallelizationstrategy = "BESTFIRST" -o $parallelizationstrategy = "BAATARI0" ]; then - iteratelimit=254 - else - iteratelimit=16 - fi - if [ $iterate -gt $iteratelimit ]; then #?? - iterate=$iteratelimit - fi - - if [ $rnaalifold -eq 1 ]; then - rnaopt=" -e $rgep -o $rgop -c $weightm -r $weightr -R $rnascoremtx " -# rnaoptit=" -o $rgop -BT -c $weightm -r $weightr -R " - rnaoptit=" -o $rgop -F -c $weightm -r $weightr -R " - elif [ $mccaskill -eq 1 -o $dafs -eq 1 -o $contrafold -eq 1 ]; then - rnaopt=" -o $rgop -c $weightm -r $weightr " -# rnaoptit=" -e $rgep -o $rgop -BT -c $weightm -r $weightr $rnascoremtx " - rnaoptit=" -e $rgep -o $rgop -F -c $weightm -r $weightr $rnascoremtx " - else - rnaopt=" " - rnaoptit=" -F " - fi - -# if [ $algspecified -eq 0 ]; then -# if [ $distance = "parttree" ]; then -# algopt=" -Q " -# algoptit=" " -# else -# algopt=" " -# algoptit=" " -# fi -# fi - - if [ $sprigorous -eq 1 ]; then - algopt=" -@ " - if [ $iterate -gt 0 ]; then - if [ $numthreadsit -eq 0 ]; then - algoptit=" -@ -B -Z -z 1000 " - else - echo "" 1>>"$progressfile" - echo "At present, the combination of --sp and iterative refinement is supported only in a single thread." 1>>"$progressfile" - echo "Please try \"--thread -1 --threadit 0\", which runs the iterative refinment calculation on a single thread." 1>>"$progressfile" - echo "" 1>>"$progressfile" - exit 1; -# algoptit=" -@ -B -z 1000 " - fi - fi - termgapopt=" " - fft=0 - memopt=" -N " - fi - - model="$sbstmodel $kappa $fmodel $nmodel" - - if [ $er -eq 1 ]; then - echo "------------------------------------------------------------------------------" 1>&2 - echo " MAFFT" $version 1>&2 -# echo "" 1>&2 -# echo " Input format: fasta" 1>&2 -# echo "" 1>&2 -# echo " Usage: `basename $0` [options] inputfile > outputfile" 1>&2 - echo " https://mafft.cbrc.jp/alignment/software/" 1>&2 - echo " MBE 30:772-780 (2013), NAR 30:3059-3066 (2002)" 1>&2 -# echo "------------------------------------------------------------------------------" 1>&2 -# echo " % mafft in > out" 1>&2 - echo "------------------------------------------------------------------------------" 1>&2 -# echo "" 1>&2 - echo "High speed:" 1>&2 - echo " % mafft in > out" 1>&2 - echo " % mafft --retree 1 in > out (fast)" 1>&2 - echo "" 1>&2 - echo "High accuracy (for <~200 sequences x <~2,000 aa/nt):" 1>&2 - echo " % mafft --maxiterate 1000 --localpair in > out (% linsi in > out is also ok)" 1>&2 - echo " % mafft --maxiterate 1000 --genafpair in > out (% einsi in > out)" 1>&2 - echo " % mafft --maxiterate 1000 --globalpair in > out (% ginsi in > out)" 1>&2 - echo "" 1>&2 - echo "If unsure which option to use:" 1>&2 - echo " % mafft --auto in > out" 1>&2 - echo "" 1>&2 -# echo "Other options:" 1>&2 - echo "--op # : Gap opening penalty, default: 1.53" 1>&2 - echo "--ep # : Offset (works like gap extension penalty), default: 0.0" 1>&2 - echo "--maxiterate # : Maximum number of iterative refinement, default: 0" 1>&2 - echo "--clustalout : Output: clustal format, default: fasta" 1>&2 - echo "--reorder : Outorder: aligned, default: input order" 1>&2 - echo "--quiet : Do not report progress" 1>&2 - echo "--thread # : Number of threads (if unsure, --thread -1)" 1>&2 - echo "--dash : Add structural information (Rozewicki et al, submitted)" 1>&2 -# echo "" 1>&2 -# echo " % mafft --maxiterate 1000 --localpair in > out (L-INS-i)" 1>&2 -# echo " most accurate in many cases, assumes only one alignable domain" 1>&2 -# echo "" 1>&2 -# echo " % mafft --maxiterate 1000 --genafpair in > out (E-INS-i)" 1>&2 -# echo " works well if many unalignable residues exist between alignable domains" 1>&2 -# echo "" 1>&2 -# echo " % mafft --maxiterate 1000 --globalpair in > out (G-INS-i)" 1>&2 -# echo " suitable for globally alignable sequences " 1>&2 -# echo "" 1>&2 -# echo " % mafft --maxiterate 1000 in > out (FFT-NS-i)" 1>&2 -# echo " accurate and slow, iterative refinement method " 1>&2 -# echo "" 1>&2 -# echo "If the input sequences are long (~1,000,000nt)," 1>&2 -# echo " % mafft --retree 1 --memsave --fft in > out (FFT-NS-1-memsave, new in v5.8)" 1>&2 -# echo "" 1>&2 -# echo "If many (~5,000) sequences are to be aligned," 1>&2 -# echo "" 1>&2 -# echo " % mafft --retree 1 [--memsave] --nofft in > out (NW-NS-1, new in v5.8)" 1>&2 -# echo "" 1>&2 -# echo " --localpair : All pairwise local alignment information is included" 1>&2 -# echo " to the objective function, default: off" 1>&2 -# echo " --globalpair : All pairwise global alignment information is included" 1>&2 -# echo " to the objective function, default: off" 1>&2 -# echo " --op # : Gap opening penalty, default: $defaultgop " 1>&2 -# echo " --ep # : Offset (works like gap extension penalty), default: $defaultaof " 1>&2 -# echo " --bl #, --jtt # : Scoring matrix, default: BLOSUM62" 1>&2 -# echo " Alternatives are BLOSUM (--bl) 30, 45, 62, 80, " 1>&2 -# echo " or JTT (--jtt) # PAM. " 1>&2 -# echo " --nuc or --amino : Sequence type, default: auto" 1>&2 -# echo " --retree # : The number of tree building in progressive method " 1>&2 -# echo " (see the paper for detail), default: $defaultcycle " 1>&2 -# echo " --maxiterate # : Maximum number of iterative refinement, default: $defaultiterate " 1>&2 -# if [ $defaultfft -eq 1 ]; then -# echo " --fft or --nofft: FFT is enabled or disabled, default: enabled" 1>&2 -# else -# echo " --fft or --nofft: FFT is enabled or disabled, default: disabled" 1>&2 -# fi -# echo " --memsave: Memory saving mode" 1>&2 -# echo " (for long genomic sequences), default: off" 1>&2 -# echo " --clustalout : Output: clustal format, default: fasta" 1>&2 -# echo " --reorder : Outorder: aligned, default: input order" 1>&2 -# echo " --quiet : Do not report progress" 1>&2 -# echo "-----------------------------------------------------------------------------" 1>&2 - exit 1; - fi - if [ $sw -eq 1 ]; then - swopt=" -A " - else - swopt=" " - fi - - if [ $distance = "fasta" -o $partdist = "fasta" ]; then - if [ ! "$FASTA_4_MAFFT" ]; then - FASTA_4_MAFFT=`which fasta34` - fi - - if [ ! -x "$FASTA_4_MAFFT" ]; then - echo "" 1>&2 - echo "== Install FASTA ========================================================" 1>&2 - echo "This option requires the fasta34 program (FASTA version x.xx or higher)" 1>&2 - echo "installed in your PATH. If you have the fasta34 program but have renamed" 1>&2 - echo "(like /usr/local/bin/myfasta), set the FASTA_4_MAFFT environment variable" 1>&2 - echo "to point your fasta34 (like setenv FASTA_4_MAFFT /usr/local/bin/myfasta)." 1>&2 - echo "=========================================================================" 1>&2 - echo "" 1>&2 - exit 1 - fi - fi - if [ $distance = "last" -o $distance = "lastmulti" ]; then - if [ ! -x "$prefix/lastal" -o ! -x "$prefix/lastdb" ]; then - echo "" 1>&2 - echo "== Install LAST ============================================================" 1>&2 - echo "LAST (Kielbasa, Wan, Sato, Horton, Frith 2011 Genome Res. 21:487) is required." 1>&2 - echo "http://last.cbrc.jp/" 1>&2 - echo "http://mafft.cbrc.jp/alignment/software/xxxxxxx.html " 1>&2 - echo "============================================================================" 1>&2 - echo "" 1>&2 - exit 1 - fi - fi - if [ $distance = "lara" -o $distance = "slara" ]; then - if [ ! -x "$prefix/mafft_lara" ]; then - echo "" 1>&2 - echo "== Install LaRA =========================================================" 1>&2 - echo "This option requires LaRA (Bauer et al. http://www.planet-lisa.net/)." 1>&2 - echo "The executable have to be renamed to 'mafft_lara' and installed into " 1>&2 - echo "the $prefix directory. " 1>&2 - echo "A configuration file of LaRA also have to be given" 1>&2 - echo "mafft-xinsi --larapair --laraparams parameter_file" 1>&2 - echo "mafft-xinsi --slarapair --laraparams parameter_file" 1>&2 - echo "=========================================================================" 1>&2 - echo "" 1>&2 - exit 1 - fi - if [ ! -s "$laraparams" ]; then - echo "" 1>&2 - echo "== Configure LaRA =======================================================" 1>&2 - echo "A configuration file of LaRA have to be given" 1>&2 - echo "mafft-xinsi --larapair --laraparams parameter_file" 1>&2 - echo "mafft-xinsi --slarapair --laraparams parameter_file" 1>&2 - echo "=========================================================================" 1>&2 - echo "" 1>&2 - exit 1 - fi - fi - if [ $distance = "foldalignlocal" -o $distance = "foldalignglobal" ]; then - if [ ! -x "$prefix/foldalign210" ]; then - echo "" 1>&2 - echo "== Install FOLDALIGN ====================================================" 1>&2 - echo "This option requires FOLDALIGN (Havgaard et al. http://foldalign.ku.dk/)." 1>&2 - echo "The executable have to be renamed to 'foldalign210' and installed into " 1>&2 - echo "the $prefix directory. " 1>&2 - echo "=========================================================================" 1>&2 - echo "" 1>&2 - exit 1 - fi - fi - if [ $distance = "scarna" -o $mccaskill -eq 1 ]; then - if [ ! -x "$prefix/mxscarnamod" ]; then - echo "" 1>&2 - echo "== Install MXSCARNA ======================================================" 1>&2 - echo "MXSCARNA (Tabei et al. BMC Bioinformatics 2008 9:33) is required." 1>&2 - echo "Please 'make' at the 'extensions' directory of the MAFFT source package," 1>&2 - echo "which contains the modified version of MXSCARNA." 1>&2 - echo "http://mafft.cbrc.jp/alignment/software/source.html " 1>&2 - echo "==========================================================================" 1>&2 - echo "" 1>&2 - exit 1 - fi - fi - if [ $distance = "dafs" -o $dafs -eq 1 ]; then - if [ ! -x "$prefix/dafs" ]; then - echo "" 1>&2 - echo "== Install DAFS===========================================================" 1>&2 - echo "DAFS (Sato et al. Journal 2012 issue:page) is required." 1>&2 - echo "http://www.ncrna.org/ " 1>&2 - echo "==========================================================================" 1>&2 - echo "" 1>&2 - exit 1 - fi - fi - if [ $contrafold -eq 1 ]; then - if [ ! -x "$prefix/contrafold" ]; then - echo "" 1>&2 - echo "== Install CONTRAfold ===================================================" 1>&2 - echo "This option requires CONTRAfold" 1>&2 - echo "(Do et al. http://contra.stanford.edu/contrafold/)." 1>&2 - echo "The executable 'contrafold' have to be installed into " 1>&2 - echo "the $prefix directory. " 1>&2 - echo "=========================================================================" 1>&2 - echo "" 1>&2 - exit 1 - fi - fi - -#old -# if [ $treeout -eq 1 ]; then -# parttreeoutopt="-t" -# if [ $cycle -eq 0 ]; then -# treeoutopt="-t -T" -# groupsize=1 -# iterate=0 -# if [ $distance = "global" -o $distance = "local" -o $distance = "localgenaf" -o $distance = "globalgenaf" ]; then -# distance="distonly" -# fi -# else -# treeoutopt="-t" -# fi -# else -# parttreeoutopt=" " -# if [ $cycle -eq 0 ]; then -# treeoutopt="-t -T" -# iterate=0 -# if [ $distance = "global" -o $distance = "local" -o $distance = "localgenaf" -o $distance = "globalgenaf" ]; then -# distance="distonly" -# fi -# else -# treeoutopt=" " -# fi -# fi - -#new - if [ $cycle -eq 0 ]; then - if [ $nodeout -eq 1 ]; then - treeoutopt="-^ -T" - else - treeoutopt="-t -T" - fi - iterate=0 - weighti="0.0" # 2016Jul31, tbfast.c kara idou -# if [ $distance = "global" -o $distance = "local" -o $distance = "localgenaf" -o $distance = "globalgenaf" ]; then # 2012/04, localpair --> local alignment distance -# if [ $distance = "global" ]; then -# distance="distonly" -# fi - if [ $treeout -eq 1 ]; then - parttreeoutopt="-t" - groupsize=1 - else - parttreeoutopt=" " - fi - if [ $distout -eq 1 ]; then - distoutopt="-y -T" - if [ $treeout -eq 0 ]; then - treeoutopt="" - fi - fi - else - if [ $nodeout -eq 1 ]; then - if [ $iterate -gt 0 ]; then - echo "The --nodeout option supports only progressive method (--maxiterate 0) for now." 1>&2 - exit 1 - fi - parttreeoutopt="-t" - treeoutopt="-^" - elif [ $treeout -eq 1 ]; then - parttreeoutopt="-t" - treeoutopt="-t" - else - parttreeoutopt=" " - treeoutopt=" " - fi - if [ $distout -eq 1 ]; then - distoutopt="-y" - fi - fi -# - - formatcheck=`grep -c '^[[:blank:]]\+>' "$TMPFILE/infile" | head -1 ` - if [ $formatcheck -gt 0 ]; then - echo "The first character of a description line must be " 1>&2 - echo "the greater-than (>) symbol, not a blank." 1>&2 - echo "Please check the format around the following line(s):" 1>&2 - grep -n '^[[:blank:]]\+>' "$TMPFILE/infile" 1>&2 - exit 1 - fi - - nseq=`grep -c '^[>|=]' "$TMPFILE/infile" | head -1 ` - if [ $nseq -eq 2 ]; then - cycle=1 - fi - if [ $cycle -gt 3 ]; then - cycle=3 - fi - - if [ $nseq -gt 60000 -a $iterate -gt 1 ]; then # 2014/Oct/22, test - echo "Too many sequences to perform iterative refinement!" 1>&2 - echo "Please use a progressive method." 1>&2 - exit 1 - fi - if [ $distance = "lastmulti" -o $distance = "multi" ]; then - if [ $fragment -eq 0 ]; then - echo 'Specify --addfragments too' 1>&2 - exit 1 - fi - fi - - if [ $fragment -ne 0 ]; then - if [ $pairspecified -eq 0 ]; then - distance="multi" - fi - if [ $distance != "multi" -a $distance != "hybrid" -a $distance != "lastmulti" -a $distance != "local" -a $distance != "last" -a $distance != "ktuples" -a $distance != "ktuplesmulti" ]; then - echo 'Specify --multipair, --lastmultipair, --lastpair, --localpair, --6merpair, --6mermultipair or --hybridpair' 1>&2 - exit 1 - fi - fi - - if [ "$memopt" = " -M -B " -a "$distance" != "ktuples" ]; then - echo "Impossible" 1>&2 - exit 1 - fi - - if [ $distance = "parttree" ]; then - if [ "$mergetable" != "/dev/null" ]; then - echo "The combination of (dp)parttree and merge is Impossible. " 1>&2 - exit 1 - fi - if [ $addfile != "/dev/null" ]; then - echo "The combination of (dp)parttree and add(fragments) is Impossible. " 1>&2 - exit 1 - fi - if [ $seed != "x" -o $seedtable != "x" ]; then - echo "Impossible" 1>&2 - exit 1 - fi - if [ $iterate -gt 1 ]; then - echo "Impossible" 1>&2 - exit 1 - fi - if [ $outorder = "aligned" ]; then - outorder="input" - fi - outorder="input" # partorder ga kiku - if [ $partdist = "localalign" ]; then - splitopt=" -U " # -U -l -> fast - cycle=1 - elif [ $partdist = "fasta" ]; then - splitopt=" -S " - cycle=1 - else - splitopt=" " - fi - fi - - - if [ \( $distance = "ktuples" -o $distance = "ktuplesmulti" \) -a \( $seed = "x" -a $seedtable = "x" -a $ownlist = "/dev/null" -a $pdblist = "/dev/null" -a $enrichstr -eq 0 -a $newdash -eq 0 \) ]; then - localparam="" - weighti="0.0" - elif [ \( $distance = "ktuples" -o $distance = "ktuplesmulti" \) -a \( $seed != "x" -o $seedtable != "x" -o $ownlist != "/dev/null" -o $pdblist != "/dev/null" -o $enrichstr -eq 1 -o $newdash -eq 1 \) ]; then - if [ $cycle -lt 2 ]; then - cycle=2 # disttbfast ha seed hi-taiou # chuui 2014Aug21 - fi - if [ $iterate -lt 2 ]; then - echo "############################################################################" 1>&2 - echo "# Warning:" 1>&2 - echo "# Progressive alignment method is incompatible with the --seed option." 1>&2 - echo "# Automatically switched to the iterative refinement method." 1>&2 - echo "# " 1>&2 - echo "# Also consider using the '--add' option, which is compatible with" 1>&2 - echo "# the progressive method and FASTER than the '--seed' option." 1>&2 - echo "# Usage is:" 1>&2 - echo "# % mafft --add newSequences existingAlignment > output" 1>&2 - echo "############################################################################" 1>&2 - iterate=2 - fi - localparam="-l "$weighti - elif [ $distance = "parttree" ]; then - localparam="" - weighti="0.0" - if [ $groupsize -gt -1 ]; then - cycle=1 - fi - else -# localparam="-B -l "$weighti # weighti=0 demo bunkatsu nashi - localparam="-l "$weighti # -B (bunkatsunashi) ha dvtditr.c de taiou (17/Jan/15) - if [ $cycle -gt 1 ]; then # 09/01/08 - cycle=1 - fi - fi - - - if [ $distance = "localgenaf" -o $distance = "globalgenaf" ]; then - aof="0.000" - if [ $oldgenafparam -ne 1 ]; then - laof="0.0" - lexp="0.0" -# LEXP="0.0" # default = 0.0 - usenaivepairscore="-Z" - fi - fi - - -# if [ $nseq -gt 5000 ]; then -# fft=0 -# fi - if [ $forcefft -eq 1 ]; then - param_fft=" -G " - fft=1 - elif [ $fft -eq 1 ]; then - param_fft=" -F " - else - param_fft=" " - fi - - if [ $seed != "x" -a $seedtable != "x" ]; then - echo 'Use either one of seedtable and seed. Not both.' 1>&2 - exit 1 - fi - if [ $f2clext = "-E" -a $anysymbol -gt 0 ]; then - echo '' 1>&2 - echo 'The combination of --text and ( --anysymbol or --preservecase ) is impossible.' 1>&2 - echo '' 1>&2 - exit 1 - fi - -# if [ $f2clext = "-E" -a $scorematrix != "/dev/null" ]; then -# echo '' 1>&2 -# echo 'At present, the combination of --text and (--aamatrix) is impossible.' 1>&2 -# echo '' 1>&2 -# exit 1 -# fi - - memsavetree=0 - if [ $treeext != "none" ]; then - if [ $distance == "ktuples" ]; then - treein=1 - treeinopt=" -U " - if [ $treeext == "randomchain" ]; then - echo "shuffle $randomseed" > "$TMPFILE/_guidetree" - cycle=1 # disttbfast.c dem shitei - elif [ $treeext == "pileup" ]; then - echo "pileup" > "$TMPFILE/_guidetree" - cycle=1 # disttbfast. shitei - elif [ $treeext == "memsavetree" ]; then - echo "very compact" > "$TMPFILE/_guidetree" - memsavetree=1 - elif [ $treeext == "memsavetreex" ]; then - echo "compact " "$initialramusage" > "$TMPFILE/_guidetree" - memsavetree=1 - elif [ $treeext == "stepadd" ]; then - echo "stepadd" > "$TMPFILE/_guidetree" - memsavetree=1 - elif [ $treeext == "youngestlinkage" ]; then - echo "youngestlinkage" > "$TMPFILE/_guidetree" - memsavetree=1 - else - echo "error in mafft.tmpl" - exit - fi - else # globalpair, localpair, genafpair, oldgenafpair -# treein, treeinopt ha kimaranai - if [ $treeext == "memsavetree" -o $treeext == "stepadd" ]; then - memsavetree=1 - else - echo "With globalpair, localpair or genafpair," 1>>"$progressfile" - echo "Use --large, --minimumlinkage, --averagelinkage or --mixedlinkage." 1>>"$progressfile" - echo "--$treeext is supported only with --6merpair." 1>>"$progressfile" - echo '' 1>>"$progressfile" - exit - fi - fi - if [ $iterate -gt 0 ]; then - echo 'Iterative refinment is not supported for --large or --'$treeext 1>>"$progressfile" - echo '' 1>>"$progressfile" - exit 1 - fi - if [ $fragment -ne 0 ]; then - echo '--addfragments, --addfull or --addlong is not yet supported for --large or --'$treeext 1>>"$progressfile" - echo "Use --add newsequences --$treeext" 1>>"$progressfile" - echo "Or, --addfragments (long, full) newsequences, without --"$treeext 1>>"$progressfile" - echo '' 1>>"$progressfile" - exit 1 - fi - if [ "$mergetable" != "/dev/null" ]; then # 2018/Mar/2 - echo '--merge is not yet supported for --large or --'$treeext 1>>"$progressfile" - echo "Use --merge without --"$treeext 1>>"$progressfile" - echo '' 1>>"$progressfile" - exit 1 - fi - fi - - - if [ $nadd -gt "0" ]; then - if [ $fragment -eq "1" ]; then - addarg="$addarg0 $nadd -g -0.01" - addsinglearg="" - cycle=1 # chuui 2014Aug25 - iterate=0 - elif [ $fragment -eq "-1" ]; then - addarg="$addarg0 $nadd" - addsinglearg="-V" # allowlongadds, 2014/04/02 - cycle=1 # chuui 2014Aug25 - iterate=0 - elif [ $fragment -eq "-2" ]; then - addarg="$addarg0 $nadd" - addsinglearg="-V" # allowlongadds + smoothing - add2ndhalfarg=$add2ndhalfarg" -p " - cycle=1 # chuui 2014Aug25 - usenaivepairscore="-Z" # 2015Jun01 - laof=0.0 # 2015Jun01 - lexp=0.0 # 2015Jun01 - iterate=0 - else - addarg="$addarg0 $nadd" - addsinglearg="" -# iterate=1 # iterate ha shitei dori - bunkatsuopt=" -B " # fftnsi demo bunktasu shinai - if [ "$add2ndhalfarg" != " " ]; then - if [ $auto -eq 1 -o $iterate -gt 0 ]; then - echo '' 1>>"$progressfile" - echo 'The --keeplength and --mapout options are not supported' 1>>"$progressfile" - echo 'with the --auto or --maxiterate >0 options.' 1>>"$progressfile" - echo 'Use the --maxiterate 0 option (= progressive method).' 1>>"$progressfile" - echo '' 1>>"$progressfile" - exit 1 - fi - fi - fi - - -# cycle=1 # chuui 2014Aug19 -# iterate=0 -# treealg=" -q " ## 2012/01/24 ## removed 2012/02/06 - else - if [ "$add2ndhalfarg" != " " ]; then - echo '' 1>>"$progressfile" - echo 'The --keeplength and --mapout options are supported' 1>>"$progressfile" - echo 'only with --add, --addfragments or --addlong.' 1>>"$progressfile" - echo '' 1>>"$progressfile" - exit 1 - fi - fi - - - if [ -z "$localparam" -a $fragment -eq 0 -a $distance != "parttree" ]; then -# echo "use disttbfast" -# echo cycle = $cycle - cycletbfast=1 # tbfast wo jikkou shinai - cycledisttbfast=$cycle # disttbfast ni -E cycle wo watasu - if [ $cycledisttbfast -eq 0 ]; then # --treeout de tsukau - cycledisttbfast=1 - fi - else -# echo "use tbfast" -# echo cycle = $cycle - cycletbfast=$cycle # 1 ijou nara jikkou - cycledisttbfast=1 # disttbfast ha ikkai dake - fi - -# echo localparam= -# echo $localparam -# echo cycletbfast= -# echo $cycletbfast -# echo cycledisttbfast= -# echo $cycledisttbfast - -#exit - - if [ $adjustdirection -gt 0 -a $seed != "x" ]; then - echo '' 1>&2 - echo 'The combination of --adjustdirection(accurately) and --seed is not supported.' 1>&2 - echo '' 1>&2 - exit 1 - fi - - - if [ $mccaskill -eq 1 -o $dafs -eq 1 -o $rnaalifold -eq 1 -o $contrafold -eq 1 ]; then - if [ $distance = "ktuples" ]; then - echo 'Not supported.' 1>&2 - echo 'Please add --globalpair, --localpair, --scarnapair, --dafspair' 1>&2 - echo '--larapair, --slarapair, --foldalignlocalpair or --foldalignglobalpair' 1>&2 - exit 1 - fi - if [ $f2clext = "-E" ]; then - echo '' 1>&2 - echo 'For RNA alignment, the --text mode is impossible.' 1>&2 - echo '' 1>&2 - exit 1 - fi - fi - -# cycle ga atode henkou sareru node koko de strategy no namae wo kimeru. -# kokokara - if [ $treeext = "pileup" ]; then - strategy="Pileup-" - elif [ $treeext = "randomchain" ]; then - strategy="Randomchain-" - elif [ $mccaskill -eq 1 -o $dafs -eq 1 -o $rnaalifold -eq 1 -o $contrafold -eq 1 ]; then - if [ $distance = "scarna" -o $distance = "dafs" -o $distance = "lara" -o $distance = "slara" -o $distance = "foldalignlocal" -o $distance = "foldalignglobal" ]; then - strategy="X-" - elif [ $distance = "global" -o $distance = "local" -o $distance = "localgenaf" -o "globalgenaf" ]; then - strategy="Q-" - fi - elif [ $distance = "fasta" -a $sw -eq 0 ]; then - strategy="F-" - elif [ $distance = "fasta" -a $sw -eq 1 ]; then - strategy="H-" - elif [ $distance = "blast" ]; then - strategy="B-" - elif [ $distance = "global" -o $distance = "distonly" ]; then - strategy="G-" - elif [ $distance = "local" ]; then - strategy="L-" - elif [ $distance = "last" ]; then - strategy="Last-" - elif [ $distance = "hybrid" ]; then - strategy="Hybrid-" - elif [ $distance = "multi" ]; then - strategy="Multi-" - elif [ $distance = "lastmulti" ]; then - strategy="LastMulti-" - elif [ $distance = "localgenaf" ]; then - strategy="E-" - elif [ $distance = "globalgenaf" ]; then - strategy="K-" - elif [ $fft -eq 1 ]; then - strategy="FFT-" - else - strategy="NW-" - fi - if [ $memsavetree -eq 1 ]; then - strategy=$strategy"large-" - fi -# if [ `echo "$weighti>0.0" | bc` -gt 0 ]; then - if [ `awk "BEGIN {print(0.0+\"$weighti\">0.0)}"` -gt 0 ]; then - strategy=$strategy"I" - fi - strategy=$strategy"NS-" - if [ $iterate -gt 0 ]; then - strategy=$strategy"i" - elif [ $distance = "parttree" ]; then - if [ $partdist = "fasta" ]; then - strategy=$strategy"FastaPartTree-"$cycle - elif [ $partdist = "localalign" ]; then - strategy=$strategy"DPPartTree-"$cycle - else - strategy=$strategy"PartTree-"$cycle - fi - elif [ $fragment -eq 1 ]; then - strategy=$strategy"fragment" - elif [ $fragment -eq -1 ]; then - strategy=$strategy"full" - elif [ $fragment -eq -2 ]; then - strategy=$strategy"long" - else - strategy=$strategy$cycle - fi - - explanation='?' - performance='Not tested.' - if [ $strategy = "F-INS-i" ]; then - explanation='Iterative refinement method (<'$iterate') with LOCAL pairwise alignment information' - performance='Most accurate, but very slow' - elif [ $strategy = "L-INS-i" ]; then - explanation='Iterative refinement method (<'$iterate') with LOCAL pairwise alignment information' - performance='Probably most accurate, very slow' - elif [ $strategy = "E-INS-i" ]; then - explanation='Iterative refinement method (<'$iterate') with LOCAL pairwise alignment with generalized affine gap costs (Altschul 1998)' - performance='Suitable for sequences with long unalignable regions, very slow' - elif [ $strategy = "G-INS-i" ]; then - explanation='Iterative refinement method (<'$iterate') with GLOBAL pairwise alignment information' - performance='Suitable for sequences of similar lengths, very slow' - elif [ $strategy = "X-INS-i" ]; then - explanation='RNA secondary structure information is taken into account.' - performance='For short RNA sequences only, extremely slow' - elif [ $strategy = "F-INS-1" ]; then - explanation='Progressive method incorporating LOCAL pairwise alignment information' - elif [ $strategy = "L-INS-1" ]; then - explanation='Progressive method incorporating LOCAL pairwise alignment information' - elif [ $strategy = "G-INS-1" ]; then - explanation='Progressive method incorporating GLOBAL pairwise alignment information' - elif [ $strategy = "FFT-NS-i" -o $strategy = "NW-NS-i" ]; then - explanation='Iterative refinement method (max. '$iterate' iterations)' - if [ $iterate -gt 2 ]; then - performance='Accurate but slow' - else - performance='Standard' - fi - elif [ $strategy = "FFT-NS-2" -o $strategy = "NW-NS-2" ]; then - explanation='Progressive method (guide trees were built '$cycle' times.)' - performance='Fast but rough' - elif [ $strategy = "FFT-NS-1" -o $strategy = "NW-NS-1" ]; then - explanation='Progressive method (rough guide tree was used.)' - performance='Very fast but very rough' - fi - - if [ $outputformat = "clustal" -a $outorder = "aligned" ]; then - outputopt=" -c $strategy -r $TMPFILE/order $f2clext " - elif [ $outputformat = "clustal" -a $outorder = "input" ]; then - outputopt=" -c $strategy $f2clext " - elif [ $outputformat = "phylip" -a $outorder = "aligned" ]; then - outputopt=" -y -r $TMPFILE/order " - elif [ $outputformat = "phylip" -a $outorder = "input" ]; then - outputopt=" -y " - elif [ $outputformat = "pir" -a $outorder = "aligned" ]; then - outputopt=" -f -r $TMPFILE/order " - else - outputopt="-f" - fi - - if [ $newdash_originalsequenceonly -eq 1 ]; then - outputopt="$outputopt -d " - fi -# kokomade - - - - pushd "$TMPFILE" > /dev/null - - cat /dev/null > pre - -# echo "nseq = " $nseq 1>>"$progressfile" -# echo "distance = " $distance 1>>"$progressfile" -# echo "iterate = " $iterate 1>>"$progressfile" -# echo "cycle = " $cycle 1>>"$progressfile" - - if [ $anysymbol -eq 1 ]; then - mv infile orig - "$prefix/replaceu" $seqtype -i orig > infile 2>>"$progressfile" || exit 1 - fi - - if [ $mergetable != "/dev/null" ]; then - if [ $nadd -gt "0" ]; then - echo "Impossible" 1>&2 - exit 1 - fi -# if [ $seed != "x" -o $seedtable != "x" ]; then -# echo "This version does not support the combination of merge and seed." 1>&2 -# exit 1 -# fi -# iterate=0 # 2013/04/16 - mergearg="-H $seedoffset" - fi - - if [ $adjustdirection -gt 0 ]; then - if [ $fragment -ne 0 ]; then - fragarg="-F" # - else - fragarg="-F" # 2014/02/06, do not consider other additional sequences, even in the case of --add - fi - if [ $adjustdirection -eq 1 ]; then - "$prefix/makedirectionlist" $fragarg -C $numthreads -m -I $nadd -i infile -t 0.00 -r 5000 -o a > _direction 2>>"$progressfile" - elif [ $adjustdirection -eq 2 ]; then - "$prefix/makedirectionlist" $fragarg -C $numthreads -m -I $nadd -i infile -t 0.00 -r 100 -o a -d > _direction 2>>"$progressfile" - fi - "$prefix/setdirection" $mergearg -d _direction -i infile > infiled 2>>"$progressfile" || exit - mv infiled infile - if [ $anysymbol -eq 1 ]; then - "$prefix/setdirection" $mergearg -d _direction -i orig -r > origd 2>>"$progressfile" || exit - mv origd orig - fi - fi - - if [ $seed != "x" -o $seedtable != "x" ]; then - if [ $pdblist != "/dev/null" -o $ownlist != "/dev/null" ]; then - echo "The combination of --seed and (--pdbidlist or --pdbfilelist) is impossible." 1>>"$progressfile" - exit 1 - fi -# if [ $enrich -eq 1 ]; then -# echo "The combination of --seed and (--enrich, --enrichseq or --enrichstr) is impossible at present." 1>>"$progressfile" -# exit 1 -# fi - - if [ $newdash -eq 1 ]; then - echo "The combination of --seed and --dash is impossible at present." 1>>"$progressfile" - exit 1 - fi - fi - - -# if [ $enrich -eq 1 ]; then -# if [ $ownlist != "/dev/null" ]; then -# echo "Warning: Sequence homologs of the structures given with the --pdbfilelist option cannot be collected.\n" 1>>"$progressfile" -# fi -# echo "SEEKQUENCER (http://sysimm.ifrec.osaka-u.ac.jp/seekquencer/) is" 1>>"$progressfile" -# if [ $pdblist != "/dev/null" ]; then -# echo "collecting homoplogs of the input sequences and the structures given with the --pdbidlist option." 1>>"$progressfile" -# perl "$prefix/seekquencer_premafft.pl" $seektarget -run thread -trd 2 -seqd uniref90 -blim 1000 -noin -seqf infile -idf pdblist -out seekout -mod mafftash-split 2>>"seekerr" -# seekres="$?" -# else -# echo "collecting homologs of the input sequences." 1>>"$progressfile" -# perl "$prefix/seekquencer_premafft.pl" $seektarget -run thread -trd 2 -seqd uniref90 -blim 1000 -noin -seqf infile -out seekout -mod mafftash-split 2>>"seekerr" -# seekres="$?" -# fi -# cat seekerr 1>>"$progressfile" -# -# if [ $seekres -ne "0" ]; then -# echo "Error in SEEKQUENCER" 1>>"$progressfile" -# exit 1; -# fi -# echo "Done." 1>>"$progressfile" -# -# if [ $enrichseq -eq 1 ]; then -## cat seekout.seq >> infile -# if [ $anysymbol -eq 1 ]; then -# "$prefix/replaceu" $seqtype -i seekout.seq -o $nseq >> infile -# cat seekout.seq >> orig -# else -# "$prefix/replaceu" $seqtype -i seekout.seq | sed 's/_os_[0-9]*_oe_//' >> infile -# fi -# -# fi -# if [ $enrichstr -eq 1 ]; then -# nseekstr=`wc -l < seekout.str` -# if [ $nseekstr -gt 1 ]; then -# cat seekout.str >> pdblist -# pdblist="tsukaimasu" -# fi -# fi -# fi - - if [ $seed != "x" ]; then - mv infile infile2 - if [ $anysymbol -eq 1 ]; then - mv orig orig2 - cat /dev/null > orig - fi - cat /dev/null > infile - cat /dev/null > hat3.seed - seedoffset=0 -# echo "seednseq="$seednseq -# echo "seedoffset="$seedoffset - set $seednseq >> "$progressfile" -# echo $# - while [ $# -gt 1 ] - do - shift -# echo "num="$# - - if [ $anysymbol -eq 1 ]; then - cat seed$# >> orig - "$prefix/replaceu" $seqtype -i seed$# -o $seedoffset > clean 2>>"$progressfile" || exit 1 - mv clean seed$# - fi - "$prefix/multi2hat3s" -t $nseq -o $seedoffset -i seed$# >> infile 2>>"$progressfile" || exit 1 - cat hat3 >> hat3.seed -# echo "$1" - seedoffset=`expr $seedoffset + $1` -# echo "$1" -# echo "seedoffset="$seedoffset - done; -# echo "seedoffset="$seedoffset - if [ $anysymbol -eq 1 ]; then - "$prefix/replaceu" $seqtype -i orig2 -o $seedoffset >> infile 2>>"$progressfile" || exit 1 # yarinaoshi - cat orig2 >> orig - else - cat infile2 >> infile - fi - elif [ $seedtable != "x" ]; then - cat _seedtablefile > hat3.seed - elif [ $newdash -eq 1 ]; then - seemstobe=`"$prefix/countlen" -i infile | awk '{print $6}'` - if [ $seemstobe = "d" -a "x$seqtype" != "x-P" ]; then - echo "" 1>>"$progressfile" - echo "Error: This data seems to be nucleotide sequences." 1>>"$progressfile" - echo "Add the --amino flag if this is surely protein." 1>>"$progressfile" - echo "" 1>>"$progressfile" - exit 1; - fi - if [ $anysymbol -eq 1 ]; then - mv orig infile # replaceu wo mukouka - fi - sed 's/-//g' infile > dashin # gap nozoku - - if [ ! -x "$prefix/dash_client" -o ! -x "$prefix/dash_client" ]; then - echo "" 1>&2 - echo "== Install DASH client =====================================================" 1>&2 - echo "To use this feature, uncomment the following line in Makefile" 1>&2 - echo "DASH_CLIENT = dash_client" 1>&2 - echo "and re-compile the source." 1>&2 - echo "Note that it requires the 'Go' compiler." 1>&2 - echo "============================================================================" 1>&2 - echo "" 1>&2 - exit 1 - fi - - echo "Calling DASH (https://sysimm.org/dash/)" 1>>"$progressfile" - "$prefix/dash_client" -i dashin -sequences dashsequences -hat3 hat3.seed 1>>"$progressfile" - dashres="$?" - if [ $dashres -ne "0" ]; then - echo "Error in DASH" 1>>"$progressfile" - echo "To use this feature, compiled with" 1>>"$progressfile" - exit 1; - fi - - if [ $exclude_ho -eq 1 ]; then # amari yokunai - awk 'BEGIN{out=1} !/^>_addedbymaffte_/{if(out) print; out=1} /^>_addedbymaffte_/{out=0}' dashsequences | sed 's/>DASH_/>DASH|/' > ho_excluded - mv ho_excluded dashsequences - fi - - if [ "$mergetable" != "/dev/null" ]; then # 2020/Apr/30 - ndash=`grep -c '>DASH_' dashsequences | head -1` -# echo "ndash = " $ndash - awk "{for( i=1;i<=NF;i++){if(0+\$i==0)break; printf( \"%d \", $ndash+\$i); } print \"\" }" _subalignmentstable > _subalignmentstableshifted - mv _subalignmentstableshifted _subalignmentstable - cp dashsequences dashsequences.bk - awk "BEGIN{nout=0} {if(\$1~/^>/) nout++; if( nout <= $ndash ) print;}" dashsequences > infile2 - cat infile >> infile2 - cp infile2 dashsequences - fi - - sed 's/>DASH_/>DASH|/' dashsequences > renamed - mv renamed dashsequences - echo "Done." 1>>"$progressfile" -# cat hat3.seed - seedoffset=`grep -c '^[>|=]' dashsequences | head -1 ` - echo "# of structures = " 1>>"$progressfile" - echo $seedoffset 1>>"$progressfile" - if [ $anysymbol -eq 1 ]; then - cat dashsequences >> orig - "$prefix/replaceu" $seqtype -i dashsequences -o 0 > clean 2>>"$progressfile" || exit 1 - mv clean infile - -# "$prefix/replaceu" $seqtype -i orig2 -o $seedoffset >> infile 2>>"$progressfile" || exit 1 # yarinaoshi -# cat orig2 >> orig - else - cat dashsequences > infile -# cat infile2 >> infile - fi - else - cat /dev/null > hat3.seed - fi -# cat hat3.seed - - if [ $mccaskill -eq 1 ]; then - "$prefix/mccaskillwrap" -s -C $numthreads -d "$prefix" -i infile > hat4 2>>"$progressfile" || exit 1 - elif [ $dafs -eq 1 ]; then - "$prefix/mccaskillwrap" -G -C $numthreads -d "$prefix" -i infile > hat4 2>>"$progressfile" || exit 1 - elif [ $contrafold -eq 1 ]; then - "$prefix/contrafoldwrap" -d "$prefix" -i infile > hat4 2>>"$progressfile" || exit 1 - fi - if [ $distance = "fasta" ]; then - "$prefix/dndfast7" $swopt < infile > /dev/null 2>>"$progressfile" || exit 1 - cat hat3.seed hat3 > hatx - mv hatx hat3 - "$prefix/tbfast" -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1 - elif [ $distance = "blast" ]; then - "$prefix/dndblast" < infile > /dev/null 2>>"$progressfile" || exit 1 - cat hat3.seed hat3 > hatx - mv hatx hat3 - "$prefix/tbfast" -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1 - elif [ $distance = "foldalignlocal" ]; then - "$prefix/pairlocalalign" -C $numthreads $seqtype $foldalignopt $model -g $lexp -f $lgop -Q $spfactor -h $laof -H -d "$prefix" < infile > /dev/null 2>>"$progressfile" || exit 1 - cat hat3.seed hat3 > hatx - mv hatx hat3 - "$prefix/tbfast" -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1 - elif [ $distance = "foldalignglobal" ]; then - "$prefix/pairlocalalign" -C $numthreads $seqtype $foldalignopt $model -g $pgexp -f $pggop -Q $spfactor -h $pgaof -H -o -global -d "$prefix" < infile > /dev/null 2>>"$progressfile" || exit 1 - cat hat3.seed hat3 > hatx - mv hatx hat3 - "$prefix/tbfast" -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1 - elif [ $distance = "slara" ]; then - "$prefix/pairlocalalign" -C $numthreads -p $laraparams $seqtype $model -f $lgop -Q $spfactor -T -d "$prefix" < infile > /dev/null 2>>"$progressfile" || exit 1 - cat hat3.seed hat3 > hatx - mv hatx hat3 - "$prefix/tbfast" -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1 - elif [ $distance = "lara" ]; then - "$prefix/pairlocalalign" -C $numthreads -p $laraparams $seqtype $model -f $lgop -Q $spfactor -B -d "$prefix" < infile > /dev/null 2>>"$progressfile" || exit 1 - cat hat3.seed hat3 > hatx - mv hatx hat3 - "$prefix/tbfast" -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1 - elif [ $distance = "scarna" ]; then -# "$prefix/pairlocalalign" -C $numthreads $seqtype $model -f $pggop -Q $spfactor -s -d "$prefix" < infile > /dev/null 2>>"$progressfile" || exit 1 -# cat hat3.seed hat3 > hatx -# mv hatx hat3 -# "$prefix/tbfast" -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1 - "$prefix/tbfast" _ -C $numthreads $seqtype $model -f $pggop -Q $spfactor -s -d "$prefix" _ -+ $iterate -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1 - elif [ $distance = "dafs" ]; then - "$prefix/pairlocalalign" -C $numthreads $seqtype $model -f $pggop -Q $spfactor -G -d "$prefix" < infile > /dev/null 2>>"$progressfile" || exit 1 - cat hat3.seed hat3 > hatx - mv hatx hat3 - "$prefix/tbfast" -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1 - elif [ $distance = "global" -a $memsavetree -eq 1 ]; then - if [ "$mpiscript" != "/dev/null" ]; then - sh $mpiscript "$prefix/nodepair_mpi" $lhlimit -u $unalignlevel $localparam $seqtype $model -g $pgexp -f $pggop -Q $spfactor -h $pgaof -A $usenaivepairscore $focusarg $treeinopt $treeoutopt -i infile > /dev/null 2>>"$progressfile" || exit 1 - else - "$prefix/nodepair" $lhlimit -u $unalignlevel $localparam -C $numthreads $seqtype $model -g $pgexp -f $pggop -Q $spfactor -h $pgaof -A $usenaivepairscore $focusarg $treeinopt $treeoutopt -i infile > /dev/null 2>>"$progressfile" || exit 1 - fi - echo 'nodepair' > _guidetree - "$prefix/tbfast" -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt -U $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg $focusarg < infile > /dev/null 2>>"$progressfile" || exit 1 - elif [ $distance = "local" -a $memsavetree -eq 1 ]; then - if [ "$mpiscript" != "/dev/null" ]; then - sh $mpiscript "$prefix/nodepair_mpi" $lhlimit -u $unalignlevel $localparam $seqtype $model -g $lexp -f $lgop -Q $spfactor -h $laof -L $usenaivepairscore $focusarg $treeinopt $treeoutopt -i infile > /dev/null 2>>"$progressfile" || exit 1 - else - "$prefix/nodepair" $lhlimit -u $unalignlevel $localparam -C $numthreads $seqtype $model -g $lexp -f $lgop -Q $spfactor -h $laof -L $usenaivepairscore $focusarg $treeinopt $treeoutopt -i infile > /dev/null 2>>"$progressfile" || exit 1 - fi - - echo 'nodepair' > _guidetree - "$prefix/tbfast" -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $termgapopt $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt -U $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg $focusarg < infile > /dev/null 2>>"$progressfile" || exit 1 - elif [ $distance = "localgenaf" -a $memsavetree -eq 1 ]; then - "$prefix/nodepair" $lhlimit -u $unalignlevel $localparam -C $numthreads $seqtype $model -g $lexp -f $lgop -Q $spfactor -h $laof -O $LGOP -E $LEXP -N $usenaivepairscore $focusarg $treeinopt $treeoutopt -i infile > /dev/null 2>>"$progressfile" || exit 1 - echo 'nodepair' > _guidetree - "$prefix/tbfast" -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $termgapopt $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt -U $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg $focusarg < infile > /dev/null 2>>"$progressfile" || exit 1 - elif [ $distance = "global" -a $memsavetree -eq 0 ]; then - "$prefix/tbfast" _ -u $unalignlevel $localparam -C $numthreads $seqtype $model -g $pgexp -f $pggop -Q $spfactor -h $pgaof -A $usenaivepairscore $focusarg _ -+ $iterate -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg $focusarg < infile > /dev/null 2>>"$progressfile" || exit 1 - - elif [ $distance = "local" -a $memsavetree -eq 0 ]; then - if [ $fragment -ne 0 ]; then - "$prefix/pairlocalalign" $localparam $addarg -C $numthreads $seqtype $model -g $lexp -f $lgop -Q $spfactor -h $laof -L $usenaivepairscore < infile > /dev/null 2>>"$progressfile" || exit 1 - cat hat3.seed hat3 > hatx - mv hatx hat3 - "$prefix/addsingle" -Q 100 $legacygapopt -O $outnum $addsinglearg $addarg $add2ndhalfarg -C $numthreads $memopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1 - else - "$prefix/tbfast" _ -u $unalignlevel $localparam -C $numthreads $seqtype $model -g $lexp -f $lgop -Q $spfactor -h $laof -L $usenaivepairscore $focusarg _ -+ $iterate -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $termgapopt $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg $focusarg < infile > /dev/null 2>>"$progressfile" || exit 1 - fi - elif [ $distance = "globalgenaf" ]; then - "$prefix/pairlocalalign" -u $unalignlevel $localparam -C $numthreads $seqtype $model -g $pgexp -f $pggop -Q $spfactor -h $pgaof -O $GGOP -E $GEXP -K $usenaivepairscore < infile > /dev/null 2>>"$progressfile" || exit 1 - cat hat3.seed hat3 > hatx - mv hatx hat3 - "$prefix/tbfast" -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1 - elif [ $distance = "localgenaf" -a $memsavetree -eq 0 ]; then - "$prefix/tbfast" _ -u $unalignlevel $localparam -C $numthreads $seqtype $model -g $lexp -f $lgop -Q $spfactor -h $laof -O $LGOP -E $LEXP -N $usenaivepairscore $focusarg _ -+ $iterate -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $termgapopt $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg $focusarg < infile > /dev/null 2>>"$progressfile" || exit 1 - elif [ $distance = "last" ]; then - if [ $fragment -ne 0 ]; then - "$prefix/pairlocalalign" $addarg -C $numthreads $seqtype $model -e $last_e -w $last_m -g $lexp -f $lgop -Q $spfactor -h $laof -R $last_subopt $last_once -d "$prefix" < infile > /dev/null 2>>"$progressfile" || exit 1 - cat hat3.seed hat3 > hatx - mv hatx hat3 - "$prefix/addsingle" -Q 100 $legacygapopt -O $outnum $addsinglearg $addarg $add2ndhalfarg -C $numthreads $memopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1 - else - "$prefix/pairlocalalign" -C $numthreads $seqtype $model -e $last_e -w $last_m -g $lexp -f $lgop -Q $spfactor -h $laof -R $last_subopt $last_once -d "$prefix" < infile > /dev/null 2>>"$progressfile" || exit 1 -# addarg wo watasanai - cat hat3.seed hat3 > hatx - mv hatx hat3 - "$prefix/tbfast" -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $termgapopt $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1 - fi - elif [ $distance = "lastmulti" ]; then - "$prefix/dndpre" $model -M 2 $addarg -C $numthreads $seqtype $model -g $lexp -f $lgop -Q $spfactor -h $laof < infile > /dev/null 2>>"$progressfile" || exit 1 - mv hat2 hat2i - "$prefix/pairlocalalign" $addarg -C $numthreads $seqtype $model -e $last_e -w $last_m -g $lexp -f $lgop -Q $spfactor -h $laof -r $last_subopt $last_once -d "$prefix" < infile > /dev/null 2>>"$progressfile" || exit 1 - cat hat3.seed hat3 > hatx - mv hat2 hat2n - mv hatx hat3 - if [ $fragment -ne 0 ]; then - "$prefix/addsingle" -Q 100 $legacygapopt -d -O $outnum $addsinglearg $addarg $add2ndhalfarg -C $numthreads $memopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1 - else - echo "Impossible" 1>&2 - exit 1 - fi - elif [ $distance = "multi" ]; then - "$prefix/dndpre" $model -M 2 $addarg -C $numthreads $seqtype $model -g $lexp -f $lgop -h $laof $usenaivepairscore < infile > /dev/null 2>>"$progressfile" || exit 1 - mv hat2 hat2i - "$prefix/pairlocalalign" $localparam $addarg -C $numthreads $seqtype $model -g $lexp -f $lgop -Q $spfactor -h $laof -Y $usenaivepairscore < infile > /dev/null 2>>"$progressfile" || exit 1 - cat hat3.seed hat3 > hatx - mv hat2 hat2n - mv hatx hat3 - if [ $fragment -ne 0 ]; then - "$prefix/addsingle" -Q 100 $legacygapopt -d -O $outnum $addsinglearg $addarg $add2ndhalfarg -C $numthreads $memopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1 - else - echo "Impossible" 1>&2 - exit 1 - fi - elif [ $distance = "hybrid" ]; then - "$prefix/pairlocalalign" $addarg -C $numthreads $seqtype $model -g $lexp -f $lgop -Q $spfactor -h $laof -Y < infile > /dev/null 2>>"$progressfile" || exit 1 - cat hat3.seed hat3 > hatx - mv hatx hat3 - "$prefix/disttbfast" -E 1 -s $unalignlevel $legacygapopt -W $tuplesize $termgapopt $outnum $addarg $add2ndhalfarg -C $numthreads-$numthreadstb $memopt $weightopt $treeinopt $treeoutopt -T -y $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1 - if [ $fragment -ne 0 ]; then - "$prefix/addsingle" -Q 100 $legacygapopt -O $outnum $addsinglearg $addarg $add2ndhalfarg -C $numthreads $memopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1 - else - "$prefix/tbfast" -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $termgapopt $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1 - fi -# elif [ $distance = "distonly" ]; then -# "$prefix/pairlocalalign" -C $numthreads $seqtype $model -g $pgexp -f $pggop -Q $spfactor -h $pgaof -t < infile > /dev/null 2>>"$progressfile" || exit 1 -# "$prefix/tbfast" -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1 - elif [ $distance = "parttree" ]; then - "$prefix/splittbfast" $legacygapopt $algopt $splitopt $partorderopt $parttreeoutopt $memopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof -p $partsize -s $groupsize $treealg $outnum -i infile > pre 2>>"$progressfile" || exit 1 - mv hat3.seed hat3 - elif [ $distance = "ktuplesmulti" ]; then -# "$prefix/dndpre" $model -M 1 $addarg -C $numthreads $seqtype $model -g $lexp -f $lgop -h $laof < infile > /dev/null 2>>"$progressfile" || exit 1 -# mv hat2 hat2i -# "$prefix/disttbfast" -E 1 -s $unalignlevel $legacygapopt -W $tuplesize $termgapopt $outnum $addarg $add2ndhalfarg -C $numthreads-$numthreadstb $memopt $weightopt $treeinopt $treeoutopt -T -y $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1 -# mv hat2 hat2n - if [ $fragment -ne 0 ]; then - "$prefix/addsingle" -Q 100 $legacygapopt -d -W $tuplesize -O $outnum $addsinglearg $addarg $add2ndhalfarg -C $numthreads $memopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1 -# "$prefix/addsingle" -Q 100 $legacygapopt -d -O $outnum $addsinglearg $addarg $add2ndhalfarg -C $numthreads $memopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1 - else - echo "Impossible" 1>&2 - exit 1 - fi - else - if [ $fragment -ne 0 ]; then - "$prefix/addsingle" -Q 100 $legacygapopt -W $tuplesize -O $outnum $addsinglearg $addarg $add2ndhalfarg -C $numthreads $memopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1 - else - "$prefix/disttbfast" -q $npickup -E $cycledisttbfast -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg -W $tuplesize $termgapopt $outnum $addarg $add2ndhalfarg -C $numthreads-$numthreadstb $memopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -g $gexp -f "-"$gop -Q $spfactor -h $aof $param_fft $algopt $treealg $scoreoutarg $anchoropt -x $maxanchorseparation $oneiterationopt < infile > pre 2>>"$progressfile" || exit 1 - mv hat3.seed hat3 - fi - fi - while [ $cycletbfast -gt 1 ] - do - if [ $distance = "parttree" ]; then - mv pre infile - "$prefix/splittbfast" $legacygapopt -Z $algopt $splitopt $partorderopt $parttreeoutopt $memopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof -p $partsize -s $groupsize $treealg $outnum -i infile > pre 2>>"$progressfile" || exit 1 - else - "$prefix/tbfast" -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $termgapopt $outnum -C $numthreadstb $rnaopt $weightopt $treeoutopt $distoutopt $memopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt -J $treealg $scoreoutarg < pre > /dev/null 2>>"$progressfile" || exit 1 -# fragment>0 no baai, nanimoshinai -# seed youchuui!! - fi - cycletbfast=`expr $cycletbfast - 1` - done - if [ $iterate -gt 0 ]; then - if [ $distance = "ktuples" ]; then - "$prefix/dndpre" $seqtype $model -M 2 -C $numthreads < pre > /dev/null 2>>"$progressfile" || exit 1 - fi - "$prefix/dvtditr" -W $minimumweight $bunkatsuopt -E $fixthreshold -s $unalignlevel $legacygapopt $mergearg $outnum -C $numthreadsit -t $randomseed $rnaoptit $memopt $scorecalcopt $localparam -z 50 $seqtype $model -f "-"$gop -Q $spfactor -h $aof -I $iterate $weightopt $treeinopt $algoptit $treealg -p $parallelizationstrategy $scoreoutarg -K $nadd < pre > /dev/null 2>>"$progressfile" || exit 1 - fi - if [ $coreout -eq 1 ]; then - "$prefix/setcore" -w $corewin -i $corethr $coreext < pre > pre2 - mv pre2 pre - elif [ $anysymbol -eq 1 ]; then - "$prefix/restoreu" $add2ndhalfarg -a pre -i orig > restored || exit 1 - mv restored pre - fi - - - - - echo '' 1>>"$progressfile" - if [ $mccaskill -eq 1 ]; then - echo "RNA base pairing probaility was calculated by the McCaskill algorithm (1)" 1>>"$progressfile" - echo "implemented in Vienna RNA package (2) and MXSCARNA (3), and then" 1>>"$progressfile" - echo "incorporated in the iterative alignment process (4)." 1>>"$progressfile" - echo "(1) McCaskill, 1990, Biopolymers 29:1105-1119" 1>>"$progressfile" - echo "(2) Hofacker et al., 2002, J. Mol. Biol. 319:3724-3732" 1>>"$progressfile" - echo "(3) Tabei et al., 2008, BMC Bioinformatics 9:33" 1>>"$progressfile" - echo "(4) Katoh and Toh, 2008, BMC Bioinformatics 9:212" 1>>"$progressfile" - echo "" 1>>"$progressfile" - elif [ $contrafold -eq 1 ]; then - echo "RNA base pairing probaility was calculated by the CONTRAfold algorithm (1)" 1>>"$progressfile" - echo "and then incorporated in the iterative alignment process (4)." 1>>"$progressfile" - echo "(1) Do et al., 2006, Bioinformatics 22:e90-98" 1>>"$progressfile" - echo "(2) Katoh and Toh, 2008, BMC Bioinformatics 9:212" 1>>"$progressfile" - echo "" 1>>"$progressfile" - fi - if [ $pdblist != "/dev/null" -o $ownlist != "/dev/null" ]; then - echo "Input structures are decomposed into structural domains using" 1>>"$progressfile" - echo "Protein Domain Parser (Alexandrov & Shindyalov 2003)." 1>>"$progressfile" - echo "Domain pairs are aligned using the rash function in" 1>>"$progressfile" - echo "the ASH structural alignment package (Standley et al. 2007)." 1>>"$progressfile" - fi - if [ $pdblist != "/dev/null" ]; then - echo "Pre-computed alignments stored in " 1>>"$progressfile" - echo "DASH (http://sysimm.ifrec.osaka-u.ac.jp/dash/) are used. " 1>>"$progressfile" - fi - if [ $distance = "fasta" -o $partdist = "fasta" ]; then - echo "Pairwise alignments were computed by FASTA" 1>>"$progressfile" - echo "(Pearson & Lipman, 1988, PNAS 85:2444-2448)" 1>>"$progressfile" - fi - if [ $distance = "blast" ]; then - echo "Pairwise alignments were computed by BLAST" 1>>"$progressfile" - echo "(Altschul et al., 1997, NAR 25:3389-3402)" 1>>"$progressfile" - fi - if [ $distance = "last" -o $distance = "lastmulti" ]; then - echo "Pairwise alignments were computed by LAST" 1>>"$progressfile" - echo "http://last.cbrc.jp/" 1>>"$progressfile" - echo "Kielbasa, Wan, Sato, Horton, Frith 2011 Genome Res. 21:487" 1>>"$progressfile" - fi - if [ $distance = "scarna" ]; then - echo "Pairwise alignments were computed by MXSCARNA" 1>>"$progressfile" - echo "(Tabei et al., 2008, BMC Bioinformatics 9:33)." 1>>"$progressfile" - fi - if [ $distance = "dafs" ]; then - echo "Pairwise alignments were computed by DAFS" 1>>"$progressfile" - echo "(Sato et al., 2012,,,,)." 1>>"$progressfile" - fi - if [ $distance = "lara" -o $distance = "slara" ]; then - echo "Pairwise alignments were computed by LaRA" 1>>"$progressfile" - echo "(Bauer et al., 2007, BMC Bioinformatics 8:271)." 1>>"$progressfile" - fi - if [ $distance = "foldalignlocal" ]; then - echo "Pairwise alignments were computed by FOLDALIGN (local)" 1>>"$progressfile" - echo "(Havgaard et al., 2007, PLoS Computational Biology 3:e193)." 1>>"$progressfile" - fi - if [ $distance = "foldalignglobal" ]; then - echo "Pairwise alignments were computed by FOLDALIGN (global)" 1>>"$progressfile" - echo "(Havgaard et al., 2007, PLoS Computational Biology 3:e193)." 1>>"$progressfile" - fi -# printf "\n" 1>>"$progressfile" - echo 'Strategy:' 1>>"$progressfile" - printf ' '$strategy 1>>"$progressfile" - echo ' ('$performance')' 1>>"$progressfile" - echo ' '$explanation 1>>"$progressfile" - echo '' 1>>"$progressfile" - echo "If unsure which option to use, try 'mafft --auto input > output'." 1>>"$progressfile" - echo "For more information, see 'mafft --help', 'mafft --man' and the mafft page." 1>>"$progressfile" - echo "" 1>>"$progressfile" - echo "The default gap scoring scheme has been changed in version 7.110 (2013 Oct)." 1>>"$progressfile" - echo "It tends to insert more gaps into gap-rich regions than previous versions." 1>>"$progressfile" - echo "To disable this change, add the --leavegappyregion option." 1>>"$progressfile" -# echo "If long gaps are expected, try 'mafft --ep 0.0 --auto input > output'." 1>>"$progressfile" -# echo "If the possibility of long gaps can be excluded, add '--ep 0.123'." 1>>"$progressfile" - if [ $distance = "localgenaf" -o $distance = "globalgenaf" ]; then - echo "" 1>>"$progressfile" - if [ $oldgenafparam -eq 1 ]; then - echo "Obsolete parameters used for this calculation." 1>>"$progressfile" - echo "Also try the new parameters for E-INS-i, by not specifying --oldgenafpair." 1>>"$progressfile" - else - echo "Parameters for the E-INS-i option have been changed in version 7.243 (2015 Jun)." 1>>"$progressfile" - echo "To switch to the old parameters, use --oldgenafpair, instead of --genafpair." 1>>"$progressfile" - fi - fi - echo '' 1>>"$progressfile" - - - if [ $pdblist != "/dev/null" -o $ownlist != "/dev/null" ]; then -# cat dasherr >>"$progressfile" - echo '' >>"$progressfile" - fi - - popd > /dev/null - - if [ "$outputopt" != "-f" -o "$windows" = "yes" ]; then # Windows deha kaigyo code wo f2cl de modosu. -# ln -s "$TMPFILE/order" _order$$ # f2cl ga space ari filename ni taiou shiteinainode -# cp "$TMPFILE/order" _order$$ # ln -s no error wo sakeru - if [ "$outputfile" = "" ]; then - "$prefix/f2cl" -n $namelength $outputopt < "$TMPFILE/pre" 2>"/dev/null" || exit 1 - else - "$prefix/f2cl" -n $namelength $outputopt < "$TMPFILE/pre" > "$outputfile" 2>"/dev/null" || exit 1 - fi -# rm _order$$ - else - if [ "$outputfile" = "" ]; then - cat < "$TMPFILE/pre" || exit 1 - else - cat < "$TMPFILE/pre" > "$outputfile" || exit 1 - fi - fi - - if [ $treeout -eq 1 ]; then - cp "$TMPFILE/infile.tree" "$infilename.tree" - fi - - if [ -s "$TMPFILE/GuideTree" ]; then # --merge no toki dake - cp "$TMPFILE/GuideTree" . - fi - - if [ $distout -eq 1 ]; then - cp "$TMPFILE/hat2" "$infilename.hat2" - fi - - if [ $npickup -ne 0 ]; then - cp "$TMPFILE/notused" "$infilename.notused" - fi - - if [ -s "$TMPFILE/_deletemap" ]; then - if [ "$mapoutfile" = "/dev/null" ]; then - cp "$TMPFILE/_deletemap" "$addfile.map" - else - cp "$TMPFILE/_deletemap" "$mapoutfile" - fi - fi - - exit 0; -fi - -prog="awk" - -#tmpawk=`which nawk 2>/dev/null | awk '{print $1}'` -#if [ -x "$tmpawk" ]; then -# prog="$tmpawk" -#fi -# -#tmpawk=`which gawk 2>/dev/null | awk '{print $1}'` -#if [ -x "$tmpawk" ]; then -# prog="$tmpawk" -#fi -# 2017/May/12, Windows no gawk wo sakeru tame - -#echo "prog="$prog 1>&2 - -umask 077 -( -$prog ' -BEGIN { - prefix = ENVIRON["prefix"]; - version = ENVIRON["version"]; - myself = ENVIRON["myself"]; - pwd = ENVIRON["mafft_working_dir"]; # from mafft.bat on windows - if( pwd == "" ) pwd = ENVIRON["PWD"]; - while( 1 ) - { - options = "" - printf( "\n" ) > "/dev/tty"; - printf( "---------------------------------------------------------------------\n" ) > "/dev/tty"; - printf( "\n" ) > "/dev/tty"; - printf( " MAFFT %s\n", version ) > "/dev/tty"; - printf( "\n" ) > "/dev/tty"; -# printf( " Copyright (c) 2002- Kazutaka Katoh\n" ) > "/dev/tty"; - printf( " MBE 30:772-780 (2013), NAR 30:3059-3066 (2002)\n" ) > "/dev/tty"; - printf( " https://mafft.cbrc.jp/alignment/software/\n" ) > "/dev/tty"; - printf( "---------------------------------------------------------------------\n" ) > "/dev/tty"; - printf( "\n" ) > "/dev/tty"; - - while( 1 ) - { - printf( "\n" ) > "/dev/tty"; - printf( "Input file? (FASTA format; Folder=%s)\n@ ", pwd ) > "/dev/tty"; - res = getline < "/dev/tty"; - close( "/dev/tty" ) - if( res == 0 || NF == 0 ) - continue; - infile = sprintf( "%s", $0 ); - - res = getline < infile; - close( infile ); - if( res == -1 ) - { - printf( "%s: No such file.\n\n", infile ) > "/dev/tty"; - printf( "Filename extension (eg., .txt) must be typed, if any.\n\n" ) > "/dev/tty"; - } - else if( res == 0 ) - printf( "%s: Empty.\n", infile ) > "/dev/tty"; - else - { - printf( "OK. infile = %s\n\n", infile ) > "/dev/tty"; - break; - } - } - nseq = 0; - - while( 1 ) - { - printf( "\n" ) > "/dev/tty"; - printf( "Output file?\n" ) > "/dev/tty"; - printf( "@ " ) > "/dev/tty"; - res = getline < "/dev/tty"; - close( "/dev/tty" ); - if( res == 0 || NF == 0 ) - continue; - else - { - outfile = sprintf( "%s", $0 ); - printf( "OK. outfile = %s\n\n", outfile ) > "/dev/tty"; - break; - } - } - - while( 1 ) - { - outargs = ""; - printf( "\n" ) > "/dev/tty"; - printf( "Output format?\n" ) > "/dev/tty"; - printf( " 1. Clustal format / Sorted\n" ) > "/dev/tty"; - printf( " 2. Clustal format / Input order\n" ) > "/dev/tty"; - printf( " 3. Fasta format / Sorted\n" ) > "/dev/tty"; - printf( " 4. Fasta format / Input order\n" ) > "/dev/tty"; - printf( " 5. Phylip format / Sorted\n" ) > "/dev/tty"; - printf( " 6. Phylip format / Input order\n" ) > "/dev/tty"; - printf( "@ " ) > "/dev/tty"; - res = getline < "/dev/tty"; - close( "/dev/tty" ); -# printf( "res=%d, NF=%d\n", res, NF ); - - resnum = 0 + $1; -# printf( "resnum=%d\n", resnum ); - - if( resnum < 1 || 6 < resnum ) - continue; - else - { - if( resnum == 1 ) - outargs = "--clustalout --reorder"; - else if( resnum == 2 ) - outargs = "--clustalout --inputorder"; - else if( resnum == 3 ) - outargs = "--reorder"; - else if( resnum == 4 ) - outargs = "--inputorder"; - else if( resnum == 5 ) - outargs = "--phylipout --reorder"; - else if( resnum == 6 ) - outargs = "--phylipout --inputorder"; - else - continue; - printf( "OK. arguments = %s\n\n", outargs ) > "/dev/tty"; - break; - } - } - - while( 1 ) - { - arguments = ""; - printf( "\n" ) > "/dev/tty"; - printf( "Strategy?\n" ) > "/dev/tty"; - printf( " 1. --auto\n" ) > "/dev/tty"; - printf( " 2. FFT-NS-1 (fast)\n" ) > "/dev/tty"; - printf( " 3. FFT-NS-2 (default)\n" ) > "/dev/tty"; - printf( " 4. G-INS-i (accurate)\n" ) > "/dev/tty"; - printf( " 5. L-INS-i (accurate)\n" ) > "/dev/tty"; - printf( " 6. E-INS-i (accurate)\n" ) > "/dev/tty"; - printf( "@ " ) > "/dev/tty"; - res = getline < "/dev/tty"; - close( "/dev/tty" ); -# printf( "res=%d, NF=%d\n", res, NF ); - - resnum = 0 + $1; -# printf( "resnum=%d\n", resnum ); - - if( resnum < 1 || 6 < resnum ) - continue; - else - { - if( resnum == 1 ) - arguments = "--auto"; - else if( resnum == 2 ) - arguments = "--retree 1"; - else if( resnum == 3 ) - arguments = "--retree 2"; - else if( resnum == 4 ) - arguments = "--globalpair --maxiterate 16"; - else if( resnum == 5 ) - arguments = "--localpair --maxiterate 16"; - else if( resnum == 6 ) - arguments = "--genafpair --maxiterate 16"; - else - arguments = sprintf( "%s", $0 ); - printf( "OK. arguments = %s %s\n\n", arguments, outargs ) > "/dev/tty"; - break; - } - } - - - while( 1 ) - { - printf( "\n" ) > "/dev/tty"; - printf( "Additional arguments? (--ep # --op # --kappa # etc)\n" ) > "/dev/tty"; - printf( "@ " ) > "/dev/tty"; - res = getline < "/dev/tty"; - close( "/dev/tty" ); - if( res == 0 || NF == 0 ) - { - break; - } - else - { - addargs = sprintf( "%s", $0 ); - printf( "OK. arguments = %s %s %s\n\n", addargs, arguments, outargs ) > "/dev/tty"; - break; - } - } - - arguments = sprintf( "%s %s %s", addargs, arguments, outargs ); - - print "" - command = sprintf( "\"%s\" %s \"%s\" > \"%s\"", myself, arguments, infile, outfile ); - gsub( /\\/, "/", command ); - - - printf( "command=\n%s\n", command ) > "/dev/tty"; - - - while( 1 ) - { - go = 0; - printf( "OK?\n" ) > "/dev/tty"; - printf( "@ [Y] " ) > "/dev/tty"; - res = getline < "/dev/tty"; - close( "/dev/tty" ); - if( res == 0 ) - continue; - else if( NF == 0 || $0 ~ /^[Yy]/ ) - { - go=1; - break; - } - else - break; - } - if( go ) break; - printf( "\n" ) > "/dev/tty"; - printf( "\n" ) > "/dev/tty"; - } - system( command ); - command = sprintf( "more \"%s\"", outfile ); - system( command ); - printf( "Press Enter to exit." ) > "/dev/tty"; - res = getline < "/dev/tty"; -} -' -) -exit 0; diff --git a/software/mafft/mafft-win/usr/bin/mafft-homologs.rb b/software/mafft/mafft-win/usr/bin/mafft-homologs.rb deleted file mode 100755 index 3a62cc4d..00000000 --- a/software/mafft/mafft-win/usr/bin/mafft-homologs.rb +++ /dev/null @@ -1,505 +0,0 @@ -#!/usr/bin/env ruby - -localdb = "sp" -# database name from which homologues are collected -# by locally installed blast. Leave this if you do -# not use the '-l' option. - -mafftpath = "/usr/local/bin/mafft" -# path of mafft. "/usr/local/bin/mafft" -# if mafft is in your command path, "mafft" is ok. - -blastpath = "psiblast" -# path of blastall. -# if blastall is in your command path, "blastall" is ok. - -# mafft-homologs.rb v. 2.1 aligns sequences together with homologues -# automatically collected from SwissProt via NCBI BLAST. -# -# mafft > 5.58 is required -# -# Usage: -# mafft-homologs.rb [options] input > output -# Options: -# -a # the number of collected sequences (default: 50) -# -e # threshold value (default: 1e-10) -# -o "xxx" options for mafft -# (default: " --op 1.53 --ep 0.123 --maxiterate 1000") -# -l locally carries out blast searches instead of NCBI blast -# (requires locally installed blast and a database) -# -f outputs collected homologues also (default: off) -# -w entire sequences are subjected to BLAST search -# (default: well-aligned region only) - -#require 'getopts' -require 'optparse' -require 'tempfile' - -if ENV["MAFFT_BLAST"] && ENV["MAFFT_BLAST"] != "" then - blastpath = ENV["MAFFT_BLAST"] -end - -if ENV["MAFFT_HOMOLOGS_MAFFT"] && ENV["MAFFT_HOMOLOGS_MAFFT"] != "" then - mafftpath = ENV["MAFFT_HOMOLOGS_MAFFT"] -end - -# mktemp -GC.disable -temp_vf = Tempfile.new("_vf").path -temp_if = Tempfile.new("_if").path -temp_pf = Tempfile.new("_pf").path -temp_af = Tempfile.new("_af").path -temp_qf = Tempfile.new("_qf").path -temp_bf = Tempfile.new("_bf").path -temp_rid = Tempfile.new("_rid").path -temp_res = Tempfile.new("_res").path - - -system( mafftpath + " --help > #{temp_vf} 2>&1" ) -pfp = File.open( "#{temp_vf}", 'r' ) -while pfp.gets - break if $_ =~ /MAFFT v/ -end -pfp.close - -if( $_ ) then - mafftversion = $_.sub( /^\D*/, "" ).split(" ").slice(0).strip.to_s -else - mafftversion = "0" -end -if( mafftversion < "5.58" ) then - STDERR.puts "" - STDERR.puts "======================================================" - STDERR.puts "Install new mafft (v. >= 5.58)" - STDERR.puts "======================================================" - STDERR.puts "" - exit -end - -srand ( 0 ) - -def readfasta( fp, name, seq ) - nseq = 0 - tmpseq = "" - while fp.gets - if $_ =~ /^>/ then - name.push( $_.sub(/>/,"").strip ) - seq.push( tmpseq ) if nseq > 0 - nseq += 1 - tmpseq = "" - else - tmpseq += $_.strip - end - end - seq.push( tmpseq ) - return nseq -end - -nadd = 600 -num_alignments = 600 -num_threads_blast = 4 -eval = 1e-1 -local = 0 -fullout = 0 -entiresearch = 1 -corewin = 50 -corethr = 0.3 -#mafftopt = " --op 1.53 --ep 0.123 --localpair --maxiterate 1000 --reorder " -mafftopt = " --op 1.53 --ep 0.0 --globalpair --maxiterate 1000 --reorder " - - -#if getopts( "s", "f", "w", "l", "h", "e:", "a:", "o:", "c:", "d:" ) == nil || ARGV.length == 0 || $OPT_h then -# puts "Usage: #{$0} [-h -l -e# -a# -o\"[options for mafft]\"] input_file" -# exit -#end -params = ARGV.getopts( "sfwlhe:a:o:c:d:n:N:" ) - - -#if $OPT_c then -if params["c"] != nil then - corewin = params["c"].to_i -end - -#if $OPT_d then -#if params["d"] != nil then -# corethr = params["d"].to_f -#end -# -if params["d"] != nil then - localdb = params["d"].to_s -end - -if params["n"] != nil then - num_alignments = params["n"].to_s -end - -if params["N"] != nil then - num_threads_blast = params["N"].to_s -end - -#if $OPT_w -if params["w"] == true then - entiresearch = 1 -end - -#if $OPT_f -if params["f"] == true then - fullout = 1 -end - -#if $OPT_s -if params["s"] == true then - fullout = 0 -end - -#if $OPT_l -if params["l"] == true then - local = 1 -end - -#if $OPT_e then -if params["e"] != nil then -# eval = $OPT_e.to_f - eval = params["e"].to_f -end - -#if $OPT_a then -if params["a"] != nil then - nadd = params["a"].to_i -end - -#if $OPT_o then -if params["o"] != nil then - mafftopt += " " + params["o"] + " " -end - -infn = ARGV[0].to_s.strip - -system "cat " + infn + " > #{temp_if}" -ar = mafftopt.split(" ") -nar = ar.length -for i in 0..(nar-1) - if ar[i] == "--seed" then - system "cat #{ar[i+1]} >> #{temp_if}" - end -end - -if fullout == 0 then - mafftopt += " --excludehomologs " -end - -nseq = 0 -ifp = File.open( "#{temp_if}", 'r' ) - while ifp.gets - nseq += 1 if $_ =~ /^>/ - end -ifp.close - -if nseq >= 10000 then - STDERR.puts "The number of input sequences must be <10000." - exit -elsif nseq == 1 then - system( "cp #{temp_if}" + " #{temp_pf}" ) -else - STDERR.puts "Performing preliminary alignment .. " - if entiresearch == 1 then -# system( mafftpath + " --maxiterate 1000 --localpair #{temp_if} > #{temp_pf}" ) - system( mafftpath + " --maxiterate 0 --retree 2 #{temp_if} > #{temp_pf}" ) - else - system( mafftpath + " --maxiterate 1000 --localpair --core --coreext --corethr #{corethr.to_s} --corewin #{corewin.to_s} #{temp_if} > #{temp_pf}" ) - end -end - -pfp = File.open( "#{temp_pf}", 'r' ) -inname = [] -inseq = [] -slen = [] -act = [] -nin = 0 -nin = readfasta( pfp, inname, inseq ) -for i in 0..(nin-1) - slen.push( inseq[i].gsub(/-/,"").length ) - act.push( 1 ) -end -pfp.close - -pfp = File.open( "#{temp_if}", 'r' ) -orname = [] -orseq = [] -nin = 0 -nin = readfasta( pfp, orname, orseq ) -pfp.close - -allen = inseq[0].length -for i in 0..(nin-2) - for j in (i+1)..(nin-1) - next if act[i] == 0 - next if act[j] == 0 - pid = 0.0 - total = 0 - for a in 0..(allen-1) - next if inseq[i][a,1] == "-" || inseq[j][a,1] == "-" - total += 1 - pid += 1.0 if inseq[i][a,1] == inseq[j][a,1] - end - pid /= total -# puts "#{i.to_s}, #{j.to_s}, #{pid.to_s}" - if pid > 0.5 then - if slen[i] < slen[j] - act[i] = 0 - else - act[j] = 0 - end - end - end -end -#p act - - -afp = File.open( "#{temp_af}", 'w' ) - -STDERR.puts "Searching .. \n" -ids = [] -add = [] -sco = [] -nblast = 0 # ato de tsukau kamo -for i in 0..(nin-1) - singleids = [] - singleadd = [] - - inseq[i].gsub!(/-/,"") - afp.puts ">" + orname[i] - afp.puts orseq[i] - -# afp.puts ">" + inname[i] -# afp.puts inseq[i] - - STDERR.puts "Query (#{i+1}/#{nin})\n" + inname[i] - if act[i] == 0 then - STDERR.puts "Skip.\n\n" - next - end - - if local == 0 then - command = "lynx -source 'https://www.ncbi.nlm.nih.gov/blast/Blast.cgi?QUERY=" + inseq[i] + "&DATABASE=swissprot&HITLIST_SIZE=" + nadd.to_s + "&FILTER=L&EXPECT='" + eval.to_s + "'&FORMAT_TYPE=TEXT&PROGRAM=blastp&SERVICE=plain&NCBI_GI=on&PAGE=Proteins&CMD=Put' > #{temp_rid}" - system command - - ridp = File.open( "#{temp_rid}", 'r' ) - while ridp.gets - break if $_ =~ / RID = (.*)/ - end - ridp.close - rid = $1.strip - STDERR.puts "Submitted to NCBI. rid = " + rid - - STDERR.printf "Waiting " - while 1 - STDERR.printf "." - sleep 10 - command = "lynx -source 'https://www.ncbi.nlm.nih.gov/blast/Blast.cgi?RID=" + rid + "&DESCRIPTIONS=500&ALIGNMENTS=" + nadd.to_s + "&ALIGNMENT_TYPE=Pairwise&OVERVIEW=no&CMD=Get&FORMAT_TYPE=XML' > #{temp_res}" - system command - resp = File.open( "#{temp_res}", 'r' ) -# resp.gets -# if $_ =~ /WAITING/ then -# resp.close -# next -# end - while( resp.gets ) - break if $_ =~ /QBlastInfoBegin/ - end - resp.gets - if $_ =~ /WAITING/ then - resp.close - next - else - resp.close - break - end - end - else -# puts "Not supported" -# exit - qfp = File.open( "#{temp_qf}", 'w' ) - qfp.puts "> " - qfp.puts inseq[i] - qfp.close - command = blastpath + " -num_iterations 2 -num_threads #{num_threads_blast} -evalue #{eval} -num_alignments #{num_alignments} -outfmt 5 -query #{temp_qf} -db #{localdb} > #{temp_res}" - system command -# system "cp #{temp_res} _res" - end - STDERR.puts " Done.\n\n" - - resp = File.open( "#{temp_res}", 'r' ) - hitnum = 0 - lasteval = "nohit" - - while resp.gets - break if $_ =~ /2<\/Iteration_iter-num>/ - end - - if $_ == nil then - STDERR.puts "no hit" - else - while 1 - while resp.gets - break if $_ =~ /(.*)<\/Hit_id>/ || $_ =~ /()/ - end - id = $1 - break if $_ =~ // - # p id - - starthit = 9999999 - endhit = -1 - startquery = 9999999 - endquery = -1 - target = "" - score = 0.0 - - while line = resp.gets - if line =~ /(.*)<\/Hsp_hit-from>/ - starthitcand=$1.to_i - elsif line =~ /(.*)<\/Hsp_hit-to>/ - endhitcand=$1.to_i - elsif line =~ /(.*)<\/Hsp_query-from>/ - startquerycand=$1.to_i - elsif line =~ /(.*)<\/Hsp_query-to>/ - endquerycand=$1.to_i - elsif $_ =~ /(.*)<\/Hsp_hseq>/ - targetcand = $1.sub( /-/, "" ).sub( /U/, "X" ) - elsif line =~ /(.*)<\/Hsp_bit-score>/ - scorecand=$1.to_f - elsif line =~ /(.*)<\/Hsp_evalue>/ - evalcand=$1.to_s - elsif line =~ /<\/Hsp>/ - if endhit == -1 then - starthit = starthitcand - endhit= endhitcand - startquery = startquerycand - endquery= endquerycand - target = targetcand - score = scorecand - lasteval = evalcand - else - # if endhit <= endhitcand && endquery <= endquerycand then - if endhit <= starthitcand && endquery <= startquerycand then - endhit = endhitcand - endquery = endquerycand - target = target + "XX" + targetcand - score = score + scorecand - end - # if starthitcand <= starthit && startquerycand <= startquery then - if endhitcand <= starthit && endquerycand <= startquery then - starthit = starthitcand - startquery = startquerycand - target = targetcand + "XX" + target - score = score + scorecand - end - end - elsif line =~ /<\/Hit>/ - hitnum = hitnum + 1 - break; - end - end - - singleids.push( id ) - singleadd.push( target ) - - known = ids.index( id ) - if known != nil then - if sco[known] >= score then - next - else - ids.delete_at( known ) - add.delete_at( known ) - sco.delete_at( known ) - end - end - ids.push( id ) - sco.push( score ) - add.push( target ) - - end - resp.close - end - - n = singleids.length - outnum = 0 - - totalprob = 0 - prob = [] - for m in 0..(n-1) -# prob[m] = 1.0 / population[eclass[m]] - prob[m] = 1.0 - totalprob += prob[m] - end -# puts "" - for m in 0..(n-1) - prob[m] /= (totalprob) - prob[m] *= (nadd.to_f / nin.to_f) - prob[m] = 1 if prob[m] > 1 - end - - - for m in 0..(n-1) - if rand( 1000000 ).to_f/1000000 < prob[m] then -# STDERR.puts "hit in " + m.to_s - afp.puts ">_addedbymaffte_" + singleids[m] - afp.puts singleadd[m] - end - end -end -afp.close - -STDERR.puts "Aligning .. " -system( mafftpath + mafftopt + "#{temp_af} > #{temp_bf}" ) -STDERR.puts "done." - -bfp = File.open( "#{temp_bf}", 'r' ) -outseq = [] -outnam = [] -readfasta( bfp, outnam, outseq ) -bfp.close - -outseq2 = [] -outnam2 = [] - -len = outseq.length -for i in 0..(len-1) -# p outnam[i] - if fullout == 0 && outnam[i] =~ /_addedbymaffte_/ then - next - end - outseq2.push( outseq[i] ) - outnam2.push( outnam[i].sub( /_addedbymaffte_/, "_ho_" ) ) -end - -nout = outseq2.length -len = outseq[0].length -p = len -while p>0 - p -= 1 - allgap = 1 - for j in 0..(nout-1) - if outseq2[j][p,1] != "-" then - allgap = 0 - break - end - end - if allgap == 1 then - for j in 0..(nout-1) - outseq2[j][p,1] = "" - end - end -end -for i in 0..(nout-1) - puts ">" + outnam2[i] - puts outseq2[i].gsub( /.{1,60}/, "\\0\n" ) -end - - -system( "rm -rf #{temp_if} #{temp_vf} #{temp_af} #{temp_bf} #{temp_pf} #{temp_qf} #{temp_res} #{temp_rid}" ) -#system( "cp #{temp_if} #{temp_vf} #{temp_af} #{temp_bf} #{temp_pf} #{temp_qf} #{temp_res} #{temp_rid} ." ) -if File.exist?( "#{temp_af}.tree" ) then - system( "sed 's/_addedbymaffte_/_ho_/' #{temp_af}.tree > #{ARGV[0].to_s}.tree" ) - system( "rm #{temp_af}.tree" ) -end diff --git a/software/mafft/mafft-win/usr/bin/md5sum.exe b/software/mafft/mafft-win/usr/bin/md5sum.exe deleted file mode 100755 index 7026e212..00000000 Binary files a/software/mafft/mafft-win/usr/bin/md5sum.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/bin/mkdir.exe b/software/mafft/mafft-win/usr/bin/mkdir.exe deleted file mode 100755 index e1805fd5..00000000 Binary files a/software/mafft/mafft-win/usr/bin/mkdir.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/bin/mktemp.exe b/software/mafft/mafft-win/usr/bin/mktemp.exe deleted file mode 100755 index b0bbacbf..00000000 Binary files a/software/mafft/mafft-win/usr/bin/mktemp.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/bin/more.exe b/software/mafft/mafft-win/usr/bin/more.exe deleted file mode 100755 index f12f754a..00000000 Binary files a/software/mafft/mafft-win/usr/bin/more.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/bin/msys-2.0.dll b/software/mafft/mafft-win/usr/bin/msys-2.0.dll deleted file mode 100755 index 479748a8..00000000 Binary files a/software/mafft/mafft-win/usr/bin/msys-2.0.dll and /dev/null differ diff --git a/software/mafft/mafft-win/usr/bin/msys-gcc_s-seh-1.dll b/software/mafft/mafft-win/usr/bin/msys-gcc_s-seh-1.dll deleted file mode 100755 index af4ced68..00000000 Binary files a/software/mafft/mafft-win/usr/bin/msys-gcc_s-seh-1.dll and /dev/null differ diff --git a/software/mafft/mafft-win/usr/bin/msys-gmp-10.dll b/software/mafft/mafft-win/usr/bin/msys-gmp-10.dll deleted file mode 100755 index ac4ec265..00000000 Binary files a/software/mafft/mafft-win/usr/bin/msys-gmp-10.dll and /dev/null differ diff --git a/software/mafft/mafft-win/usr/bin/msys-iconv-2.dll b/software/mafft/mafft-win/usr/bin/msys-iconv-2.dll deleted file mode 100755 index e80a674d..00000000 Binary files a/software/mafft/mafft-win/usr/bin/msys-iconv-2.dll and /dev/null differ diff --git a/software/mafft/mafft-win/usr/bin/msys-intl-8.dll b/software/mafft/mafft-win/usr/bin/msys-intl-8.dll deleted file mode 100755 index 5359e5f7..00000000 Binary files a/software/mafft/mafft-win/usr/bin/msys-intl-8.dll and /dev/null differ diff --git a/software/mafft/mafft-win/usr/bin/msys-magic-1.dll b/software/mafft/mafft-win/usr/bin/msys-magic-1.dll deleted file mode 100755 index befd0978..00000000 Binary files a/software/mafft/mafft-win/usr/bin/msys-magic-1.dll and /dev/null differ diff --git a/software/mafft/mafft-win/usr/bin/msys-mpfr-4.dll b/software/mafft/mafft-win/usr/bin/msys-mpfr-4.dll deleted file mode 100755 index e084a721..00000000 Binary files a/software/mafft/mafft-win/usr/bin/msys-mpfr-4.dll and /dev/null differ diff --git a/software/mafft/mafft-win/usr/bin/msys-ncursesw6.dll b/software/mafft/mafft-win/usr/bin/msys-ncursesw6.dll deleted file mode 100755 index c1c17295..00000000 Binary files a/software/mafft/mafft-win/usr/bin/msys-ncursesw6.dll and /dev/null differ diff --git a/software/mafft/mafft-win/usr/bin/msys-pcre-1.dll b/software/mafft/mafft-win/usr/bin/msys-pcre-1.dll deleted file mode 100755 index 7dace263..00000000 Binary files a/software/mafft/mafft-win/usr/bin/msys-pcre-1.dll and /dev/null differ diff --git a/software/mafft/mafft-win/usr/bin/msys-readline6.dll b/software/mafft/mafft-win/usr/bin/msys-readline6.dll deleted file mode 100755 index e7e08d76..00000000 Binary files a/software/mafft/mafft-win/usr/bin/msys-readline6.dll and /dev/null differ diff --git a/software/mafft/mafft-win/usr/bin/msys-z.dll b/software/mafft/mafft-win/usr/bin/msys-z.dll deleted file mode 100755 index f529060f..00000000 Binary files a/software/mafft/mafft-win/usr/bin/msys-z.dll and /dev/null differ diff --git a/software/mafft/mafft-win/usr/bin/mv.exe b/software/mafft/mafft-win/usr/bin/mv.exe deleted file mode 100755 index 60fa5a9c..00000000 Binary files a/software/mafft/mafft-win/usr/bin/mv.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/bin/od.exe b/software/mafft/mafft-win/usr/bin/od.exe deleted file mode 100755 index 13c56022..00000000 Binary files a/software/mafft/mafft-win/usr/bin/od.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/bin/paste.exe b/software/mafft/mafft-win/usr/bin/paste.exe deleted file mode 100755 index 8892c40a..00000000 Binary files a/software/mafft/mafft-win/usr/bin/paste.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/bin/printf.exe b/software/mafft/mafft-win/usr/bin/printf.exe deleted file mode 100755 index 8809c217..00000000 Binary files a/software/mafft/mafft-win/usr/bin/printf.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/bin/ps.exe b/software/mafft/mafft-win/usr/bin/ps.exe deleted file mode 100755 index 9bce829b..00000000 Binary files a/software/mafft/mafft-win/usr/bin/ps.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/bin/pwd.exe b/software/mafft/mafft-win/usr/bin/pwd.exe deleted file mode 100755 index 7144845c..00000000 Binary files a/software/mafft/mafft-win/usr/bin/pwd.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/bin/rm.exe b/software/mafft/mafft-win/usr/bin/rm.exe deleted file mode 100755 index b389367e..00000000 Binary files a/software/mafft/mafft-win/usr/bin/rm.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/bin/rmdir.exe b/software/mafft/mafft-win/usr/bin/rmdir.exe deleted file mode 100755 index 3503aadf..00000000 Binary files a/software/mafft/mafft-win/usr/bin/rmdir.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/bin/sed.exe b/software/mafft/mafft-win/usr/bin/sed.exe deleted file mode 100755 index 04ebb79c..00000000 Binary files a/software/mafft/mafft-win/usr/bin/sed.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/bin/sh.exe b/software/mafft/mafft-win/usr/bin/sh.exe deleted file mode 100755 index 508ddda5..00000000 Binary files a/software/mafft/mafft-win/usr/bin/sh.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/bin/sleep.exe b/software/mafft/mafft-win/usr/bin/sleep.exe deleted file mode 100755 index 7294da35..00000000 Binary files a/software/mafft/mafft-win/usr/bin/sleep.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/bin/sort.exe b/software/mafft/mafft-win/usr/bin/sort.exe deleted file mode 100755 index fa5209af..00000000 Binary files a/software/mafft/mafft-win/usr/bin/sort.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/bin/split.exe b/software/mafft/mafft-win/usr/bin/split.exe deleted file mode 100755 index 4636beac..00000000 Binary files a/software/mafft/mafft-win/usr/bin/split.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/bin/stty.exe b/software/mafft/mafft-win/usr/bin/stty.exe deleted file mode 100755 index 2768ab7f..00000000 Binary files a/software/mafft/mafft-win/usr/bin/stty.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/bin/tail.exe b/software/mafft/mafft-win/usr/bin/tail.exe deleted file mode 100755 index dab174e9..00000000 Binary files a/software/mafft/mafft-win/usr/bin/tail.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/bin/tar.exe b/software/mafft/mafft-win/usr/bin/tar.exe deleted file mode 100755 index ec23afee..00000000 Binary files a/software/mafft/mafft-win/usr/bin/tar.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/bin/touch.exe b/software/mafft/mafft-win/usr/bin/touch.exe deleted file mode 100755 index d7f345fb..00000000 Binary files a/software/mafft/mafft-win/usr/bin/touch.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/bin/tr.exe b/software/mafft/mafft-win/usr/bin/tr.exe deleted file mode 100755 index 0a12ceb8..00000000 Binary files a/software/mafft/mafft-win/usr/bin/tr.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/bin/true.exe b/software/mafft/mafft-win/usr/bin/true.exe deleted file mode 100755 index be85bce9..00000000 Binary files a/software/mafft/mafft-win/usr/bin/true.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/bin/uname.exe b/software/mafft/mafft-win/usr/bin/uname.exe deleted file mode 100755 index e800ba50..00000000 Binary files a/software/mafft/mafft-win/usr/bin/uname.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/bin/uniq.exe b/software/mafft/mafft-win/usr/bin/uniq.exe deleted file mode 100755 index b1fe65eb..00000000 Binary files a/software/mafft/mafft-win/usr/bin/uniq.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/bin/wc.exe b/software/mafft/mafft-win/usr/bin/wc.exe deleted file mode 100755 index 1be2d5d4..00000000 Binary files a/software/mafft/mafft-win/usr/bin/wc.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/bin/which.exe b/software/mafft/mafft-win/usr/bin/which.exe deleted file mode 100755 index 8d20acc1..00000000 Binary files a/software/mafft/mafft-win/usr/bin/which.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/bin/xargs.exe b/software/mafft/mafft-win/usr/bin/xargs.exe deleted file mode 100755 index c1ec324e..00000000 Binary files a/software/mafft/mafft-win/usr/bin/xargs.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/lib/mafft/addsingle.exe b/software/mafft/mafft-win/usr/lib/mafft/addsingle.exe deleted file mode 100755 index 90223008..00000000 Binary files a/software/mafft/mafft-win/usr/lib/mafft/addsingle.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/lib/mafft/contrafoldwrap.exe b/software/mafft/mafft-win/usr/lib/mafft/contrafoldwrap.exe deleted file mode 100755 index d27aa853..00000000 Binary files a/software/mafft/mafft-win/usr/lib/mafft/contrafoldwrap.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/lib/mafft/countlen.exe b/software/mafft/mafft-win/usr/lib/mafft/countlen.exe deleted file mode 100755 index 74dbddd7..00000000 Binary files a/software/mafft/mafft-win/usr/lib/mafft/countlen.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/lib/mafft/dash_client.exe b/software/mafft/mafft-win/usr/lib/mafft/dash_client.exe deleted file mode 100755 index bb15107c..00000000 Binary files a/software/mafft/mafft-win/usr/lib/mafft/dash_client.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/lib/mafft/disttbfast.exe b/software/mafft/mafft-win/usr/lib/mafft/disttbfast.exe deleted file mode 100755 index 123d3ea0..00000000 Binary files a/software/mafft/mafft-win/usr/lib/mafft/disttbfast.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/lib/mafft/dndblast.exe b/software/mafft/mafft-win/usr/lib/mafft/dndblast.exe deleted file mode 100755 index ec1985cf..00000000 Binary files a/software/mafft/mafft-win/usr/lib/mafft/dndblast.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/lib/mafft/dndfast7.exe b/software/mafft/mafft-win/usr/lib/mafft/dndfast7.exe deleted file mode 100755 index 5487386a..00000000 Binary files a/software/mafft/mafft-win/usr/lib/mafft/dndfast7.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/lib/mafft/dndpre.exe b/software/mafft/mafft-win/usr/lib/mafft/dndpre.exe deleted file mode 100755 index 3c4aac73..00000000 Binary files a/software/mafft/mafft-win/usr/lib/mafft/dndpre.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/lib/mafft/dvtditr.exe b/software/mafft/mafft-win/usr/lib/mafft/dvtditr.exe deleted file mode 100755 index 71aa596a..00000000 Binary files a/software/mafft/mafft-win/usr/lib/mafft/dvtditr.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/lib/mafft/f2cl.exe b/software/mafft/mafft-win/usr/lib/mafft/f2cl.exe deleted file mode 100755 index 5a9283d4..00000000 Binary files a/software/mafft/mafft-win/usr/lib/mafft/f2cl.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/lib/mafft/getlag.exe b/software/mafft/mafft-win/usr/lib/mafft/getlag.exe deleted file mode 100755 index abb5ce9a..00000000 Binary files a/software/mafft/mafft-win/usr/lib/mafft/getlag.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/lib/mafft/hex2maffttext.exe b/software/mafft/mafft-win/usr/lib/mafft/hex2maffttext.exe deleted file mode 100755 index b7bcbfcf..00000000 Binary files a/software/mafft/mafft-win/usr/lib/mafft/hex2maffttext.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/lib/mafft/mafft-distance.exe b/software/mafft/mafft-win/usr/lib/mafft/mafft-distance.exe deleted file mode 100755 index 4e93bbaf..00000000 Binary files a/software/mafft/mafft-win/usr/lib/mafft/mafft-distance.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/lib/mafft/mafft-homologs.1 b/software/mafft/mafft-win/usr/lib/mafft/mafft-homologs.1 deleted file mode 100755 index a9c96825..00000000 --- a/software/mafft/mafft-win/usr/lib/mafft/mafft-homologs.1 +++ /dev/null @@ -1,131 +0,0 @@ -.\" Title: MAFFT-HOMOLOGS -.\" Author: Kazutaka Katoh -.\" Generator: DocBook XSL Stylesheets v1.72.0 -.\" Date: 2007-08-14 -.\" Manual: Mafft Manual -.\" Source: mafft-homologs 2.1 -.\" -.TH "MAFFT\-HOMOLOGS" "1" "2007\-06\-09" "mafft\-homologs 2.1" "Mafft Manual" -.\" disable hyphenation -.nh -.\" disable justification (adjust text to left margin only) -.ad l -.SH "NAME" -.RS 0 -mafft\-homologs \- aligns sequences together with homologues automatically collected from SwissProt via NCBI BLAST -.RE -.SH "SYNOPSIS" -.RS 0 -\fBmafft\-homologs\fR [\fBoptions\fR] \fIinput\fR [>\ \fIoutput\fR] -.RE -.SH "DESCRIPTION" -.RS 0 -The accuracy of an alignment of a few distantly related sequences is considerably improved when being aligned together with their close homologs. The reason for the improvement is probably the same as that for PSI\-BLAST. That is, the positions of highly conserved residues, those with many gaps and other additional information is brought by close homologs. According to Katoh et al. (2005), the improvement by adding close homologs is 10% or so, which is comparable to the improvement by incorporating structural information of a pair of sequences. Mafft\-homologs in a mafft server works like this: -.sp -.RS 4 -\h'-04' 1.\h'+02'Collect a number (50 by default) of close homologs (E=1e\-10 by default) of the input sequences. -.RE -.sp -.RS 4 -\h'-04' 2.\h'+02'Align the input sequences and homologs all together using the L\-INS\-i strategy. -.RE -.sp -.RS 4 -\h'-04' 3.\h'+02'Remove the homologs. -.RE -.RE -.SH "OPTIONS" -.RS 0 -.PP -\fB\-a\fR \fI\fIn\fR\fR -.RS 4 -The number of collected sequences (default: 50). -.RE -.PP -\fB\-e\fR \fI\fIn\fR\fR -.RS 4 -Threshold value (default: 1e\-10). -.RE -.PP -\fB\-o\fR \fI\fIxxx\fR\fR -.RS 4 -Options for mafft (default: " \-\-op 1.53 \-\-ep 0.123 \-\-maxiterate 1000 --localpair --reorder"). -.RE -.PP -\fB\-l\fR -.RS 4 -Locally carries out BLAST searches instead of NCBI BLAST (requires locally installed BLAST and a database). -.RE -.PP -\fB\-f\fR -.RS 4 -Outputs collected homologues also (default: off). -.RE -.PP -\fB\-w\fR -.RS 4 -entire sequences are subjected to BLAST search (default: well\-aligned region only) -.RE -.RE -.SH "REQUIREMENTS" -.RS 0 -.PP -MAFFT version > 5.58. -.PP -Either of -.RS 4 -.PP -lynx (when remote BLAST server is used) -.PP -BLAST and a protein sequence database (when local BLAST is used) -.RE -.RE -.SH "REFERENCES" -.RS 0 -.PP -Katoh, Kuma, Toh and Miyata (Nucleic Acids Res. 33:511\-518, 2005) MAFFT version 5: improvement in accuracy of multiple sequence alignment. -.RE -.SH "SEE ALSO" -.RS 0 -.PP -\fBmafft\fR(1) -.RE -.SH "AUTHORS" -.RS 0 -.PP -\fBKazutaka Katoh\fR <\&katoh_at_bioreg.kyushu\-u.ac.jp.\&> -.sp -1n -.IP "" 4 -Wrote Mafft. -.PP -\fBCharles Plessy\fR <\&charles\-debian\-nospam@plessy.org\&> -.sp -1n -.IP "" 4 -Wrote this manpage in DocBook XML for the Debian distribution, using Mafft's homepage as a template. -.RE -.SH "COPYRIGHT" -.RS 0 -Copyright \(co 2002\-2007 Kazutaka Katoh (mafft) -.br -Copyright \(co 2007 Charles Plessy (this manpage) -.br -.PP -Mafft and its manpage are offered under the following conditions: -.PP -Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: -.sp -.RS 4 -\h'-04' 1.\h'+02'Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. -.RE -.sp -.RS 4 -\h'-04' 2.\h'+02'Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. -.RE -.sp -.RS 4 -\h'-04' 3.\h'+02'The name of the author may not be used to endorse or promote products derived from this software without specific prior written permission. -.RE -.PP -THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -.br -.RE diff --git a/software/mafft/mafft-win/usr/lib/mafft/mafft-profile.exe b/software/mafft/mafft-win/usr/lib/mafft/mafft-profile.exe deleted file mode 100755 index d2652adc..00000000 Binary files a/software/mafft/mafft-win/usr/lib/mafft/mafft-profile.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/lib/mafft/mafft.1 b/software/mafft/mafft-win/usr/lib/mafft/mafft.1 deleted file mode 100755 index b5344c57..00000000 --- a/software/mafft/mafft-win/usr/lib/mafft/mafft.1 +++ /dev/null @@ -1,479 +0,0 @@ -.\" Title: MAFFT -.\" Author: Kazutaka Katoh -.\" Generator: DocBook XSL Stylesheets v1.72.0 -.\" Date: 2007-08-14 -.\" Manual: Mafft Manual -.\" Source: mafft 6.240 -.\" -.TH "MAFFT" "1" "2007\-06\-09" "mafft 6.240" "Mafft Manual" -.\" disable hyphenation -.nh -.\" disable justification (adjust text to left margin only) -.ad l -.SH "THIS MANUAL IS FOR V6.2XX (2007)" -Recent versions (v7.1xx; 2013 Jan.) have more features than those described below. -See also the tips page at -http://mafft.cbrc.jp/alignment/software/tips0.html -.SH "NAME" -.RS 0 -.sp -mafft \- Multiple alignment program for amino acid or nucleotide sequences -.RE -.SH "SYNOPSIS" -.RS 0 -.HP 6 -\fBmafft\fR [\fBoptions\fR] \fIinput\fR [>\ \fIoutput\fR] -.HP 6 -\fBlinsi\fR \fIinput\fR [>\ \fIoutput\fR] -.HP 6 -\fBginsi\fR \fIinput\fR [>\ \fIoutput\fR] -.HP 6 -\fBeinsi\fR \fIinput\fR [>\ \fIoutput\fR] -.HP 7 -\fBfftnsi\fR \fIinput\fR [>\ \fIoutput\fR] -.HP 6 -\fBfftns\fR \fIinput\fR [>\ \fIoutput\fR] -.HP 5 -\fBnwns\fR \fIinput\fR [>\ \fIoutput\fR] -.HP 6 -\fBnwnsi\fR \fIinput\fR [>\ \fIoutput\fR] -.HP 14 -\fBmafft\-profile\fR \fIgroup1\fR \fIgroup2\fR [>\ \fIoutput\fR] -.HP -.sp -\fIinput\fR, \fIgroup1\fR and \fIgroup2\fR must be in FASTA format. -.RE -.SH "DESCRIPTION" -.RS 0 -\fBMAFFT\fR is a multiple sequence alignment program for unix\-like operating systems. It offers a range of multiple alignment methods. -.SS "Accuracy\-oriented methods:" -.sp -.RS 4 -\h'-04'\(bu\h'+03'L\-INS\-i (probably most accurate; recommended for <200 sequences; iterative refinement method incorporating local pairwise alignment information): -.HP 6 -\fBmafft\fR \fB\-\-localpair\fR \fB\-\-maxiterate\fR\ \fI1000\fR \fIinput\fR [>\ \fIoutput\fR] -.HP 6 -\fBlinsi\fR \fIinput\fR [>\ \fIoutput\fR] -.RE -.sp -.RS 4 -\h'-04'\(bu\h'+03'G\-INS\-i (suitable for sequences of similar lengths; recommended for <200 sequences; iterative refinement method incorporating global pairwise alignment information): -.HP 6 -\fBmafft\fR \fB\-\-globalpair\fR \fB\-\-maxiterate\fR\ \fI1000\fR \fIinput\fR [>\ \fIoutput\fR] -.HP 6 -\fBginsi\fR \fIinput\fR [>\ \fIoutput\fR] -.RE -.sp -.RS 4 -\h'-04'\(bu\h'+03'E\-INS\-i (suitable for sequences containing large unalignable regions; recommended for <200 sequences): -.HP 6 -\fBmafft\fR \fB\-\-ep\fR\ \fI0\fR \fB\-\-genafpair\fR \fB\-\-maxiterate\fR\ \fI1000\fR \fIinput\fR [>\ \fIoutput\fR] -.HP 6 -\fBeinsi\fR \fIinput\fR [>\ \fIoutput\fR] -.br - -For E\-INS\-i, the -\fB\-\-ep\fR -\fI0\fR -option is recommended to allow large gaps. -.RE -.SS "Speed\-oriented methods:" -.sp -.RS 4 -\h'-04'\(bu\h'+03'FFT\-NS\-i (iterative refinement method; two cycles only): -.HP 6 -\fBmafft\fR \fB\-\-retree\fR\ \fI2\fR \fB\-\-maxiterate\fR\ \fI2\fR \fIinput\fR [>\ \fIoutput\fR] -.HP 7 -\fBfftnsi\fR \fIinput\fR [>\ \fIoutput\fR] -.RE -.sp -.RS 4 -\h'-04'\(bu\h'+03'FFT\-NS\-i (iterative refinement method; max. 1000 iterations): -.HP 6 -\fBmafft\fR \fB\-\-retree\fR\ \fI2\fR \fB\-\-maxiterate\fR\ \fI1000\fR \fIinput\fR [>\ \fIoutput\fR] -.RE -.sp -.RS 4 -\h'-04'\(bu\h'+03'FFT\-NS\-2 (fast; progressive method): -.HP 6 -\fBmafft\fR \fB\-\-retree\fR\ \fI2\fR \fB\-\-maxiterate\fR\ \fI0\fR \fIinput\fR [>\ \fIoutput\fR] -.HP 6 -\fBfftns\fR \fIinput\fR [>\ \fIoutput\fR] -.RE -.sp -.RS 4 -\h'-04'\(bu\h'+03'FFT\-NS\-1 (very fast; recommended for >2000 sequences; progressive method with a rough guide tree): -.HP 6 -\fBmafft\fR \fB\-\-retree\fR\ \fI1\fR \fB\-\-maxiterate\fR\ \fI0\fR \fIinput\fR [>\ \fIoutput\fR] -.RE -.sp -.RS 4 -\h'-04'\(bu\h'+03'NW\-NS\-i (iterative refinement method without FFT approximation; two cycles only): -.HP 6 -\fBmafft\fR \fB\-\-retree\fR\ \fI2\fR \fB\-\-maxiterate\fR\ \fI2\fR \fB\-\-nofft\fR\ \fIinput\fR [>\ \fIoutput\fR] -.HP 7 -\fBnwnsi\fR \fIinput\fR [>\ \fIoutput\fR] -.RE -.sp -.RS 4 -\h'-04'\(bu\h'+03'NW\-NS\-2 (fast; progressive method without the FFT approximation): -.HP 6 -\fBmafft\fR \fB\-\-retree\fR\ \fI2\fR \fB\-\-maxiterate\fR\ \fI0\fR \fB\-\-nofft\fR\ \fIinput\fR [>\ \fIoutput\fR] -.HP 6 -\fBnwns\fR \fIinput\fR [>\ \fIoutput\fR] -.RE -.sp -.RS 4 -\h'-04'\(bu\h'+03'NW\-NS\-PartTree\-1 (recommended for ~10,000 to ~50,000 sequences; progressive method with the PartTree algorithm): -.HP 6 -\fBmafft\fR \fB\-\-retree\fR\ \fI1\fR \fB\-\-maxiterate\fR\ \fI0\fR \fB\-\-nofft\fR\ \fB\-\-parttree\fR \fIinput\fR [>\ \fIoutput\fR] -.RE -.SS "Group\-to\-group alignments" -.HP 6 -.RS 4 -\fBmafft\-profile\fR \fIgroup1\fR \fIgroup2\fR [>\ \fIoutput\fR] -.sp -or: -.sp -\fBmafft\fR \fB\-\-maxiterate\fR\ \fI1000\fR \fB\-\-seed\fR\ \fIgroup1\fR \fB\-\-seed\fR\ \fIgroup2\fR /dev/null [>\ \fIoutput\fR] -.RE -.RE -.RE -.SH "OPTIONS" -.SS "Algorithm" -.RS 0 -.PP -\fB\-\-auto\fR -.RS 4 -Automatically selects an appropriate strategy from L\-INS\-i, FFT\-NS\-i and FFT\-NS\-2, according to data -size. Default: off (always FFT\-NS\-2) -.RE -.PP -\fB\-\-6merpair\fR -.RS 4 -Distance is calculated based on the number of shared 6mers. Default: on -.RE -.PP -\fB\-\-globalpair\fR -.RS 4 -All pairwise alignments are computed with the Needleman\-Wunsch -algorithm. More accurate but slower -than \-\-6merpair. Suitable for a set of -globally alignable sequences. Applicable to -up to ~200 sequences. A combination with \-\-maxiterate 1000 is recommended (G\-INS\-i). Default: off (6mer distance is used) -.RE -.PP -\fB\-\-localpair\fR -.RS 4 -All pairwise alignments are computed with the Smith\-Waterman -algorithm. More accurate but slower -than \-\-6merpair. Suitable for a set of -locally alignable sequences. Applicable to -up to ~200 sequences. A combination with \-\-maxiterate 1000 is recommended (L\-INS\-i). Default: off (6mer distance is used) -.RE -.PP -\fB\-\-genafpair\fR -.RS 4 -All pairwise alignments are computed with a local -algorithm with the generalized affine gap cost -(Altschul 1998). More accurate but slower -than \-\-6merpair. Suitable when large internal gaps -are expected. Applicable to -up to ~200 sequences. A combination with \-\-maxiterate 1000 is recommended (E\-INS\-i). Default: off (6mer distance is used) -.RE -.\".PP -.\"\fB\-\-fastswpair\fR -.\".RS 4 -.\"Distance is calculated based on a FASTA alignment. -.\"FASTA is required. Default: off (6mer distance is used) -.\".RE -.PP -\fB\-\-fastapair\fR -.RS 4 -All pairwise alignments are computed with FASTA (Pearson and Lipman 1988). -FASTA is required. Default: off (6mer distance is used) -.RE -.\".PP -.\"\fB\-\-blastpair\fR -.\".RS 4 -.\"Distance is calculated based on a BLAST alignment. BLAST is -.\"required. Default: off (6mer distance is used) -.\".RE -.PP -\fB\-\-weighti\fR \fInumber\fR -.RS 4 -Weighting factor for the consistency term calculated from pairwise alignments. Valid when -either of \-\-globalpair, \-\-localpair, \-\-genafpair, \-\-fastapair or -\-\-blastpair is selected. Default: 2.7 -.RE -.PP -\fB\-\-retree\fR \fInumber\fR -.RS 4 -Guide tree is built \fInumber\fR times in the -progressive stage. Valid with 6mer distance. Default: 2 -.RE -.PP -\fB\-\-maxiterate\fR \fInumber\fR -.RS 4 -\fInumber\fR cycles of iterative refinement are performed. Default: 0 -.RE -.PP -\fB\-\-fft\fR -.RS 4 -Use FFT approximation in group\-to\-group alignment. Default: on -.RE -.PP -\fB\-\-nofft\fR -.RS 4 -Do not use FFT approximation in group\-to\-group alignment. Default: off -.RE -.PP -\fB\-\-noscore\fR -.RS 4 -Alignment score is not checked in the iterative refinement stage. Default: off (score is checked) -.RE -.PP -\fB\-\-memsave\fR -.RS 4 -Use the Myers\-Miller (1988) algorithm. Default: automatically turned on when the alignment length exceeds 10,000 (aa/nt). -.RE -.PP -\fB\-\-parttree\fR -.RS 4 -Use a fast tree\-building method (PartTree, Katoh and Toh 2007) with -the 6mer distance. Recommended for a large number (> ~10,000) -of sequences are input. Default: off -.RE -.PP -\fB\-\-dpparttree\fR -.RS 4 -The PartTree algorithm is used with distances based on DP. Slightly -more accurate and slower than \-\-parttree. Recommended for a large -number (> ~10,000) of sequences are input. Default: off -.RE -.PP -\fB\-\-fastaparttree\fR -.RS 4 -The PartTree algorithm is used with distances based on FASTA. Slightly more accurate and slower than \-\-parttree. Recommended for a large number (> ~10,000) of sequences are input. FASTA is required. Default: off -.RE -.PP -\fB\-\-partsize\fR \fInumber\fR -.RS 4 -The number of partitions in the PartTree algorithm. Default: 50 -.RE -.PP -\fB\-\-groupsize\fR \fInumber\fR -.RS 4 -Do not make alignment larger than \fInumber\fR sequences. Valid only with the \-\-*parttree options. Default: the number of input sequences -.RE -.RE -.SS "Parameter" -.RS 0 -.PP -\fB\-\-op\fR \fInumber\fR -.RS 4 -Gap opening penalty at group\-to\-group alignment. Default: 1.53 -.RE -.PP -\fB\-\-ep\fR \fInumber\fR -.RS 4 -Offset value, which works like gap extension penalty, for -group\-to\-group alignment. Default: 0.123 -.RE -.PP -\fB\-\-lop\fR \fInumber\fR -.RS 4 -Gap opening penalty at local pairwise -alignment. Valid when -the \-\-localpair or \-\-genafpair option is selected. Default: \-2.00 -.RE -.PP -\fB\-\-lep\fR \fInumber\fR -.RS 4 -Offset value at local pairwise alignment. Valid when -the \-\-localpair or \-\-genafpair option is selected. Default: 0.1 -.RE -.PP -\fB\-\-lexp\fR \fInumber\fR -.RS 4 -Gap extension penalty at local pairwise alignment. Valid when -the \-\-localpair or \-\-genafpair option is selected. Default: \-0.1 -.RE -.PP -\fB\-\-LOP\fR \fInumber\fR -.RS 4 -Gap opening penalty to skip the alignment. Valid when the -\-\-genafpair option is selected. Default: \-6.00 -.RE -.PP -\fB\-\-LEXP\fR \fInumber\fR -.RS 4 -Gap extension penalty to skip the alignment. Valid when the -\-\-genafpair option is selected. Default: 0.00 -.RE -.PP -\fB\-\-bl\fR \fInumber\fR -.RS 4 -BLOSUM \fInumber\fR matrix (Henikoff and Henikoff 1992) is used. \fInumber\fR=30, 45, 62 or 80. Default: 62 -.RE -.PP -\fB\-\-jtt\fR \fInumber\fR -.RS 4 -JTT PAM \fInumber\fR (Jones et al. 1992) matrix is used. \fInumber\fR>0. Default: BLOSUM62 -.RE -.PP -\fB\-\-tm\fR \fInumber\fR -.RS 4 -Transmembrane PAM \fInumber\fR (Jones et al. 1994) matrix is used. \fInumber\fR>0. Default: BLOSUM62 -.RE -.PP -\fB\-\-aamatrix\fR \fImatrixfile\fR -.RS 4 -Use a user\-defined AA scoring matrix. The format of \fImatrixfile\fR is -the same to that of BLAST. Ignored when nucleotide sequences are input. Default: BLOSUM62 -.RE -.PP -\fB\-\-fmodel\fR -.RS 4 -Incorporate the AA/nuc composition information into -the scoring matrix. Default: off -.RE -.RE -.SS "Output" -.RS 0 -.PP -\fB\-\-clustalout\fR -.RS 4 -Output format: clustal format. Default: off (fasta format) -.RE -.PP -\fB\-\-inputorder\fR -.RS 4 -Output order: same as input. Default: on -.RE -.PP -\fB\-\-reorder\fR -.RS 4 -Output order: aligned. Default: off (inputorder) -.RE -.PP -\fB\-\-treeout\fR -.RS 4 -Guide tree is output to the \fIinput\fR.tree file. Default: off -.RE -.PP -\fB\-\-quiet\fR -.RS 4 -Do not report progress. Default: off -.RE -.RE -.SS "Input" -.RS 0 -.PP -\fB\-\-nuc\fR -.RS 4 -Assume the sequences are nucleotide. Default: auto -.RE -.PP -\fB\-\-amino\fR -.RS 4 -Assume the sequences are amino acid. Default: auto -.RE -.PP -\fB\-\-seed\fR \fIalignment1\fR [\fB--seed\fR \fIalignment2\fR \fB--seed\fR \fIalignment3\fR ...] -.RS 4 -Seed alignments given in \fIalignment_n\fR (fasta format) are aligned with -sequences in \fIinput\fR. The alignment within every seed is preserved. -.RE -.RE -.SH "FILES" -.RS 0 -.PP -Mafft stores the input sequences and other files in a temporary directory, which by default is located in -\fI/tmp\fR. -.RE -.SH "ENVIONMENT" -.RS 0 -.PP -\fBMAFFT_BINARIES\fR -.RS 4 -Indicates the location of the binary files used by mafft. By default, they are searched in -\fI/usr/local/lib/mafft\fR, but on Debian systems, they are searched in -\fI/usr/lib/mafft\fR. -.RE -.PP -\fBFASTA_4_MAFFT\fR -.RS 4 -This variable can be set to indicate to mafft the location to the fasta34 program if it is not in the PATH. -.RE -.RE -.SH "SEE ALSO" -.RS 0 -.PP - -\fBmafft\-homologs\fR(1) -.RE -.SH "REFERENCES" -.RS 0 -.SS "In English" -.sp -.RS 4 -\h'-04'\(bu\h'+03'Katoh and Toh (Bioinformatics 23:372\-374, 2007) PartTree: an algorithm to build an approximate tree from a large number of unaligned sequences (describes the PartTree algorithm). -.RE -.sp -.RS 4 -\h'-04'\(bu\h'+03'Katoh, Kuma, Toh and Miyata (Nucleic Acids Res. 33:511\-518, 2005) MAFFT version 5: improvement in accuracy of multiple sequence alignment (describes [ancestral versions of] the G\-INS\-i, L\-INS\-i and E\-INS\-i strategies) -.RE -.sp -.RS 4 -\h'-04'\(bu\h'+03'Katoh, Misawa, Kuma and Miyata (Nucleic Acids Res. 30:3059\-3066, 2002) MAFFT: a novel method for rapid multiple sequence alignment based on fast Fourier transform (describes the FFT\-NS\-1, FFT\-NS\-2 and FFT\-NS\-i strategies) -.RE -.SS "In Japanese" -.sp -.RS 4 -\h'-04'\(bu\h'+03'Katoh and Misawa (Seibutsubutsuri 46:312\-317, 2006) Multiple Sequence Alignments: the Next Generation -.RE -.sp -.RS 4 -\h'-04'\(bu\h'+03'Katoh and Kuma (Kagaku to Seibutsu 44:102\-108, 2006) Jissen\-teki Multiple Alignment -.RE -.RE -.SH "AUTHORS" -.RS 0 -.PP -\fBKazutaka Katoh\fR <\&kazutaka.katoh_at_aist.go.jp\&> -.sp -1n -.IP "" 4 -Wrote Mafft. -.PP -\fBCharles Plessy\fR <\&charles\-debian\-nospam_at_plessy.org\&> -.sp -1n -.IP "" 4 -Wrote this manpage in DocBook XML for the Debian distribution, using Mafft's homepage as a template. -.RE -.SH "COPYRIGHT" -.RS 0 -Copyright \(co 2002\-2007 Kazutaka Katoh (mafft) -.br -Copyright \(co 2007 Charles Plessy (this manpage) -.br -.PP -Mafft and its manpage are offered under the following conditions: -.PP -Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: -.sp -.RS 4 -\h'-04' 1.\h'+02'Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. -.RE -.sp -.RS 4 -\h'-04' 2.\h'+02'Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. -.RE -.sp -.RS 4 -\h'-04' 3.\h'+02'The name of the author may not be used to endorse or promote products derived from this software without specific prior written permission. -.RE -.PP -THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -.br -.RE diff --git a/software/mafft/mafft-win/usr/lib/mafft/mafftash_premafft.pl b/software/mafft/mafft-win/usr/lib/mafft/mafftash_premafft.pl deleted file mode 100755 index 6dc3b9c4..00000000 --- a/software/mafft/mafft-win/usr/lib/mafft/mafftash_premafft.pl +++ /dev/null @@ -1,464 +0,0 @@ -#!/usr/bin/perl - -##################################################################### -# Author: KM Amada (kmamada@ifrec.osaka-u.ac.jp) -# -# Ver. Date Changelog -##################################################################### -# 1.0 07.26.13 Initial release -# 2.0 09.03.13 Added extensive warnings and error messages -# 3.0 10.28.13 Fix for retrieving large files. Added STDERR logs -# 3.1 11.08.13 Added LWP failsafe. Made hat3 not a required output -# 3.2 12.08.14 Removed 5-char restriction for own structure files -# -##################################################################### - -use strict; -use Getopt::Long; -use File::Path qw(make_path remove_tree); -use LWP::Simple; -use LWP::UserAgent; - -# to prevent error 'Header line too long (limit is 8192)' [v3.1] -use LWP::Protocol::http; -push(@LWP::Protocol::http::EXTRA_SOCK_OPTS, MaxLineLength => 0); - - - -my $BASEURL = "http://sysimm.ifrec.osaka-u.ac.jp/MAFFTash/REST/service.cgi/premafft"; - -my ( $WORKDIR, $PDBLIST, $OWNLIST, $HAT3FILE, $INSTRFILE ); - -GetOptions -( - 'd=s' => \$WORKDIR, - 'p=s' => \$PDBLIST, - 'o=s' => \$OWNLIST, - 'h=s' => \$HAT3FILE, - 'i=s' => \$INSTRFILE, -); - -print STDERR "[MAFFTash-premafft]\n"; - -# set temp directory -my $TMP = "/tmp/mapremafft$$"; -make_path($TMP) unless -d $TMP; - - - -###### -# validation -&help("Required parameter : atleast one of either '-p' or '-o'") unless ( defined $PDBLIST || defined $OWNLIST); -&help("Required parameter : '-d'") if defined $OWNLIST && ! defined $WORKDIR; - -$HAT3FILE = "hat3" unless defined $HAT3FILE; -$INSTRFILE = "instr" unless defined $INSTRFILE; -chop $WORKDIR if defined $WORKDIR && $WORKDIR =~ m/\/$/g; - - -###### -# prepare inputs -print STDERR "Preparing inputs for service request...\n"; - -my @files = (); -push(@files, "strweight" => "0.5"); -push(@files, "premafft" => "1"); - - -# pdb entries -if ( defined $PDBLIST ) -{ - print STDERR "PDB List defined!\n"; - &bail("Error: Input file $PDBLIST does not exists!") unless -e $PDBLIST; - my $listfile = "$TMP/pdblist.inp"; - - - open(INPF,"<$PDBLIST") or &bail("Error: Cannot open file $PDBLIST for reading!"); - open(OUTF,">$listfile") or &bail("Error: Cannot open temporary file $listfile for writing!"); - - while() - { - chomp; - if ( /^(\w{5})$/ ) - { - print OUTF ">PDBID\n$1\n"; - } - } - - close OUTF; - close INPF; - - push(@files, "inputfile" => ["$listfile"]); -} - - - -# upload own structures -my %ownids = (); - -if ( defined $OWNLIST ) -{ - print STDERR "OWN List defined!\n"; - &bail("Error: Input file $OWNLIST does not exists!") unless -e $OWNLIST; - - - open(OWNINPF,"<$OWNLIST") or &bail("Error: Cannot open file $OWNLIST for reading!"); - - while() - { - chomp; - - if ( /^(\S+)$/ ) - { - my $fileref = "$WORKDIR/$1.pdb"; - - unless (-e $fileref) - { - close OWNINPF; - &bail("Error: File $fileref does not exists!"); - } - - push(@files, "inputownfile[]" => ["$fileref"]); - $ownids{$1} = 1; - } - } - - close OWNINPF; -} - - - -###### -# start rest service -print STDERR "Sending service request...\n"; - -my $browser = LWP::UserAgent->new; -$browser->timeout(0); - - -# post: running a mafftash job -my $postResponse = $browser->post( $BASEURL, \@files, 'Content_Type' => 'form-data' ); -&bail(sprintf("[%d] %s\n", $postResponse->code, &parseError($postResponse->content))) unless($postResponse->is_success); - - -# get response from post request -my ($status, $mafftashid) = &parseResponse($postResponse->content); - - - -my $MAXTRIES = 3; -my $STIMER = 4; -my $longtimer = 0; - -print STDERR "Request sent! Waiting for response...[$mafftashid]\n"; - - -# wait for results until it becomes available -while(1) -{ - $longtimer = $longtimer <= ($STIMER*3) ? $longtimer+$STIMER : $STIMER; - sleep $longtimer; - - - # get: get results for mafftash job - my $getResponse = $browser->get("$BASEURL/$mafftashid"); - - if ( $getResponse->is_success ) - { - - # get response from get request - ($status, $mafftashid) = &parseResponse($getResponse->content); - next unless ( $status eq "done" ); - - - # if job is finished and ready - print STDERR "Results found!\n"; - my $csfile = "$TMP/checksum.tar.gz"; - my $try1 = 1; - - - while(1) - { - print STDERR "Fetching Results... [Trial $try1]\n"; - - if ( is_success(getstore("$BASEURL/getmdlist/$mafftashid", $csfile)) && -e $csfile && -s $csfile ) - { - # get response from get request - my $checklist = &extractchecksum($csfile); - &bail("Error retrieving list of compressed files!") unless ( scalar %$checklist > 0 ); - - - foreach my $id ( keys %$checklist ) - { - my $checkfile = "$TMP/$id"; - my $checkid = $checklist->{$id}; - my $try2 = 1; - - while(1) - { - unlink $checkfile if -e $checkfile; - - if ( is_success(getstore("$BASEURL/get/$mafftashid/$id", $checkfile)) && -e $checkfile && -s $checkfile ) - { - my $hashid = &getchecksum($checkfile); - #print STDERR "[hashid]$hashid [checkid]$checkid\n"; - - if ($hashid ne "" && $hashid ne $checkid ) - { - unlink $checkfile if -e $checkfile; - &bail("Error retrieving compressed file from server! [Checksum Failed]") if $try2 >= $MAXTRIES; - $try2++; - sleep $STIMER; - } - else - { - last; - } - } - else - { - &bail("Error retrieving compressed file from server!") if $try2 >= $MAXTRIES; - $try2++; - sleep $STIMER; - } - } - } - - last; - } - else - { - &bail("Error retrieving list of compressed files from server!") if $try1 >= $MAXTRIES; - $try1++; - sleep $STIMER; - } - } - - last; - - } - else - { - &bail(sprintf("[%d] %s\n", $getResponse->code, &parseError($getResponse->content))); - } - -} - - -# make sure outputs were generated -# decompress -print STDERR "Assembling final results...\n"; - -&backticks("cat $TMP/archive.tar.gz* | tar -zxf - -C $TMP/"); -&backticks("mv -f $TMP/instr $INSTRFILE") if -e "$TMP/instr"; -&backticks("mv -f $TMP/hat3 $HAT3FILE") if -e "$TMP/hat3"; - -# sometimes no hat3 file is generated [v3.1] -#&bail("Error: Output file $HAT3FILE not found!") unless -e $HAT3FILE; -&bail("Error: Output file $INSTRFILE not found!") unless -e $INSTRFILE; - - -# warn if some ownids were ommitted -if ( scalar keys(%ownids) > 0 ) -{ - my %instrids = (); - - open(INSTRF,"<$INSTRFILE") or &bail("Error: Cannot open file $INSTRFILE for reading!"); - - while() - { - chomp; - - if ( /^>\d+_(\S+)$/ ) - { - $instrids{$1} = 1; - } - } - - close INSTRF; - - foreach my $id ( keys %ownids ) - { - warn "Warning: Own structure $id was excluded from instr/hat3.\n" unless $instrids{$id}; - } - -} - - - -&cleanup(); - - - -#################### -#################### - - - -sub parseResponse -{ - my $response = shift; - - #"status":"wait","mafftashid":"Ma8211432R" - - my $status = ""; - my $mafftashid = ""; - - if ( $response =~ /^([^\s:]+):([^\s:]+)$/ ) - { - $mafftashid = $1; - $status = $2; - } - - return ($status, $mafftashid); - -} - - -sub extractchecksum -{ - my $infile = shift; - my %dataset = (); - - open CSUM, "tar -zxf $infile -O|" or return \%dataset; - - while() - { - chomp; - if ( /^(\S+)\s+(\S+)$/ ) - { - $dataset{$2} = $1; - } - - } - - close CSUM; - - return \%dataset; - -} - - -sub parseError -{ - my $response = shift; - - #"error":"Invalid number of inputs found." - my $errorstr = ( $response =~ /\"error\"\s*:\s*\"([^\"]+)\"/ ) ? $1 : ""; - return $errorstr; -} - - -sub getchecksum -{ - my $infile = shift; - - # md5 binary check - my $MD5BIN = ""; - - if ( -x "/usr/bin/md5sum" ) - { - $MD5BIN = "/usr/bin/md5sum"; - } - elsif ( -x "/sbin/md5" ) - { - $MD5BIN = "/sbin/md5 -q"; - } - - return "" if $MD5BIN eq ""; - - - my $checksum = ""; - open MD5EXE, "$MD5BIN $infile|" or return ""; - - while() - { - if (/^(\S+)\s+(\S+)$/) - { - $checksum = $1; - last; - } - elsif (/^(\S+)$/) - { - $checksum = $1; - last; - } - } - - close MD5EXE; - - return $checksum; - -} - - -sub backticks -{ - my $command = shift; - - `$command`; - return ($? == -1) ? 0 : 1; -} - - -sub bail -{ - my $str = shift; - print STDERR "$str\n" if defined $str; - - &cleanup(); - exit(1); -} - - -sub cleanup -{ - return if ($TMP eq "" || !-d $TMP); - - opendir(MAINDIR, $TMP); - my @files = readdir(MAINDIR); - closedir(MAINDIR); - - foreach my $file (@files) - { - unlink "$TMP/$file" if -e "$TMP/$file"; - } - - remove_tree($TMP); - -} - - -sub help -{ - my $str = shift; - - print <<'HELPME'; - -USAGE - ./mafftash_premafft.pl -p [FILE] - ./mafftash_premafft.pl -o [FILE] -d [DIRECTORY] - ./mafftash_premafft.pl -p [FILE] -o [FILE] -d [DIRECTORY] - - -PARAMETERS - -p [FILE] - FILE contains a list of PDBIDs (one entry per line); make sure that the PDBIDs are in the standard 5-character pdbid+chain naming format - - -o [FILE] -d [DIRECTORY] - FILE contains a list of IDs from your own structure/pdb files (one entry per line) - for each ID in the list make sure that a corresponding structure file (same ID with .pdb extension) is stored in DIRECTORY - - -h [HATFILE] - save the output hat3 file in HATFILE; if not set, the output is written to a file named 'hat3' in your current directory - - -i [INSTRFILE] - save the output instr file in INSTRFILE; if not set, the output is written to a file named 'instr' in your current directory - -HELPME - - &bail($str); -} - - - diff --git a/software/mafft/mafft-win/usr/lib/mafft/maffttext2hex.exe b/software/mafft/mafft-win/usr/lib/mafft/maffttext2hex.exe deleted file mode 100755 index 32309e33..00000000 Binary files a/software/mafft/mafft-win/usr/lib/mafft/maffttext2hex.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/lib/mafft/makedirectionlist.exe b/software/mafft/mafft-win/usr/lib/mafft/makedirectionlist.exe deleted file mode 100755 index bdf916cd..00000000 Binary files a/software/mafft/mafft-win/usr/lib/mafft/makedirectionlist.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/lib/mafft/mccaskillwrap.exe b/software/mafft/mafft-win/usr/lib/mafft/mccaskillwrap.exe deleted file mode 100755 index f7a43b7a..00000000 Binary files a/software/mafft/mafft-win/usr/lib/mafft/mccaskillwrap.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/lib/mafft/multi2hat3s.exe b/software/mafft/mafft-win/usr/lib/mafft/multi2hat3s.exe deleted file mode 100755 index 8084eee3..00000000 Binary files a/software/mafft/mafft-win/usr/lib/mafft/multi2hat3s.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/lib/mafft/nodepair.exe b/software/mafft/mafft-win/usr/lib/mafft/nodepair.exe deleted file mode 100755 index e162841d..00000000 Binary files a/software/mafft/mafft-win/usr/lib/mafft/nodepair.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/lib/mafft/pairash.exe b/software/mafft/mafft-win/usr/lib/mafft/pairash.exe deleted file mode 100755 index 4a2f905f..00000000 Binary files a/software/mafft/mafft-win/usr/lib/mafft/pairash.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/lib/mafft/pairlocalalign.exe b/software/mafft/mafft-win/usr/lib/mafft/pairlocalalign.exe deleted file mode 100755 index f47e1879..00000000 Binary files a/software/mafft/mafft-win/usr/lib/mafft/pairlocalalign.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/lib/mafft/regtable2seq.exe b/software/mafft/mafft-win/usr/lib/mafft/regtable2seq.exe deleted file mode 100755 index 7abd1dc8..00000000 Binary files a/software/mafft/mafft-win/usr/lib/mafft/regtable2seq.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/lib/mafft/replaceu.exe b/software/mafft/mafft-win/usr/lib/mafft/replaceu.exe deleted file mode 100755 index d8548c9e..00000000 Binary files a/software/mafft/mafft-win/usr/lib/mafft/replaceu.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/lib/mafft/restoreu.exe b/software/mafft/mafft-win/usr/lib/mafft/restoreu.exe deleted file mode 100755 index 9adaa482..00000000 Binary files a/software/mafft/mafft-win/usr/lib/mafft/restoreu.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/lib/mafft/score.exe b/software/mafft/mafft-win/usr/lib/mafft/score.exe deleted file mode 100755 index 2b5c7d61..00000000 Binary files a/software/mafft/mafft-win/usr/lib/mafft/score.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/lib/mafft/seekquencer_premafft.pl b/software/mafft/mafft-win/usr/lib/mafft/seekquencer_premafft.pl deleted file mode 100755 index a9040fdd..00000000 --- a/software/mafft/mafft-win/usr/lib/mafft/seekquencer_premafft.pl +++ /dev/null @@ -1,600 +0,0 @@ -#!/usr/bin/perl - -#################################################################################### -# Author: KM Amada (kmamada@ifrec.osaka-u.ac.jp) -# -# Ver. Date Changelog -#################################################################################### -# 1.0 11.01.13 Initial release -# -# **Skipped version 2 to standardise version numbers to seekquencer.pl script** -# -# 3.0 04.24.14 Added split option -mod for output -# Uses seekquencer_v3 backend -# -# 4.0 05.12.14 Added new options: -run -trd -noin -# Sets -seqa fast in seekquencer.pl -# Uses seekquencer_v4 backend -# -# 4.1 05.19.14 Added a check on running REST requests before proceeding -# to avoid server load problems -# -# 4.2 05.27.14 Seq limit processing done in seekquencer.pl script -# to avoid server load problems -# -# 4.3 07.22.14 Added new option: -seqd -# Blast limit changed from factor of 10 to -blim option -# Timing on sleep changed; added srand() for making seed -# Moved the job limit processing to server side -# -# 4.4 08.05.14 Modified to work in multiple OS -# -# -#################################################################################### - -use strict; -use Getopt::Long; -use File::Path qw(make_path remove_tree); -use Cwd; -use LWP::Simple; -use LWP::UserAgent; - -# to prevent error: Header line too long (limit is 8192) -use LWP::Protocol::http; -push(@LWP::Protocol::http::EXTRA_SOCK_OPTS, MaxLineLength => 0); - - - -my $BASEURL = "http://sysimm.ifrec.osaka-u.ac.jp/seekquencer/REST/service.cgi/premafft"; -my ( $INPUTFILE, $IDLISTFILE, $SEQFASTAFILE, $OUTPUTFILE, $SEQFLAG, $STRFLAG, $EVALFLAG, $NOINFLAG ); -my $OUTTYPE = "mafftash"; - -my $SEQDATABASE = "uniref100"; -my $SEQLIMIT = 100; -my $SEQBLASTLIMIT = 100; - -my $RUNMODE = "normal"; # thread|normal -my $THREADCOUNT = 3; - - -GetOptions -( - 'inp=s' => \$INPUTFILE, - 'idf=s' => \$IDLISTFILE, - 'seqf=s' => \$SEQFASTAFILE, - 'out=s' => \$OUTPUTFILE, - 'str' => \$STRFLAG, - 'seq' => \$SEQFLAG, - 'seqd=s' => \$SEQDATABASE, - 'lim=i' => \$SEQLIMIT, - 'blim=i' => \$SEQBLASTLIMIT, - 'pre' => \$EVALFLAG, - 'noin' => \$NOINFLAG, - 'mod=s' => \$OUTTYPE, - 'run=s' => \$RUNMODE, - 'trd=i' => \$THREADCOUNT, - - -); - -my $ISWINDOWS = ( $^O =~ /^MSWin/ ) ? 1 : 0; -print STDERR "[Seekquencer-premafft 4.4 on $^O]\n"; - - -# set temp directory -my $CWD = getcwd; -my $TMP = "$CWD/seekpremafft$$"; -make_path($TMP) unless -d $TMP; - - - -###### -# validation -help("Required parameter: define input as '-inp' or '-idf' or '-seqf'") if ( !defined $INPUTFILE && !defined $IDLISTFILE && !defined $SEQFASTAFILE ); -help("'-inp' is already defined") if ( defined $INPUTFILE && (defined $IDLISTFILE || defined $SEQFASTAFILE) ); -help("Input file $INPUTFILE does not exist (or filesize is 0)") if ( defined $INPUTFILE && (! -e $INPUTFILE || !-s $INPUTFILE) ); -help("Input file $IDLISTFILE does not exist (or filesize is 0)") if ( defined $IDLISTFILE && (! -e $IDLISTFILE || !-s $IDLISTFILE) ); -help("Input file $SEQFASTAFILE does not exist (or filesize is 0)") if ( defined $SEQFASTAFILE && (! -e $SEQFASTAFILE || !-s $SEQFASTAFILE) ); -help("Required parameter: output file '-out'") unless ( defined $OUTPUTFILE ); -help("Set either '-str' or '-seq' or dont set any at all") if ( defined $STRFLAG && defined $SEQFLAG ); - -help("Invalid value for '-seqd '") if ( $SEQDATABASE ne "uniref100" && $SEQDATABASE ne "uniref90" && $SEQDATABASE ne "uniref70" && $SEQDATABASE ne "uniprot"); -help("Invalid value for '-mod '") if ( $OUTTYPE ne "fasta" && $OUTTYPE ne "mafftash" && $OUTTYPE ne "mafftash-split" ); -help("Invalid value for '-run '") if ( $RUNMODE ne "thread" && $RUNMODE ne "normal" ); -help("Invalid value for '-trd '; count should be between 1 and 5 (inclusive)") if ( $RUNMODE eq "thread" && ($THREADCOUNT <= 0 || $THREADCOUNT > 5) ); - - -###### -# check existing requests -print STDERR "Checking server status...\n"; - -# generate seed -srand($$); - -# sleep a bit to give time for lsf response -sleep(int(rand(6))+1); - - -my $browser = LWP::UserAgent->new; -$browser->timeout(0); - -# get: check if you can send a new request this time -my $jobsResponse = $browser->get("$BASEURL/isAllowed"); - -if ( $jobsResponse->is_success ) -{ - my $status = parseJobQueryResponse($jobsResponse->content); - bail("Max jobs reached. The server cannot process your request right now; try again later.", 0) unless $status > 0; -} -else -{ - bail(sprintf("[%d] %s\n", $jobsResponse->code, parseError($jobsResponse->content))); -} - - -###### -# make a temporary input if lists were provided -unless ( defined $INPUTFILE ) -{ - $INPUTFILE = "$TMP/input.homemade"; - open INPF, ">$INPUTFILE" or bail("Error writing to input file."); - - if ( defined $IDLISTFILE ) - { - open IDLIST, "<$IDLISTFILE" or bail("Error reading input file."); - while( ) - { - chomp; - if ( /(\w{5})/ ) - { - print INPF ">PDBID\n$1\n"; - } - } - close IDLIST; - } - - - if ( defined $SEQFASTAFILE ) - { - open FASTA, "<$SEQFASTAFILE" or bail("Error reading input file."); - while( ) - { - chomp; - print INPF "$_\n"; - } - close FASTA; - } - - close INPF; -} - - -###### -# prepare parameters -print STDERR "Preparing parameters for service request...\n"; - -my @parameters = (); -push(@parameters, "fileinput" => ["$INPUTFILE"]); -push(@parameters, "out_type" => $OUTTYPE); - -push(@parameters, "rest_flag" => "1"); -push(@parameters, "cls_flag" => "1"); -push(@parameters, "pre_flag" => "1") if defined $EVALFLAG; -push(@parameters, "noin_flag" => "1") if defined $NOINFLAG; - -push(@parameters, "run_mode" => $RUNMODE); -push(@parameters, "thread_count" => $THREADCOUNT) if $RUNMODE eq "thread"; - - -if ( defined $STRFLAG ) -{ - push(@parameters, "str_flag" => "1"); - push(@parameters, "ash_flag" => "1"); -} -elsif ( defined $SEQFLAG ) -{ - push(@parameters, "seq_flag" => "1"); - push(@parameters, "seq_algorithm" => "fast"); - push(@parameters, "seq_database" => $SEQDATABASE); - push(@parameters, "seq_blastlimit" => $SEQBLASTLIMIT); - push(@parameters, "seq_outputlimit" => $SEQLIMIT); -} -else -{ - push(@parameters, "str_flag" => "1"); - push(@parameters, "ash_flag" => "1"); - push(@parameters, "seq_flag" => "1"); - push(@parameters, "seq_algorithm" => "fast"); - push(@parameters, "seq_database" => $SEQDATABASE); - push(@parameters, "seq_blastlimit" => $SEQBLASTLIMIT); - push(@parameters, "seq_outputlimit" => $SEQLIMIT); -} - - - -###### -# start rest service -print STDERR "Sending service request...\n"; - -# post: running a mafftash job -my $postResponse = $browser->post( $BASEURL, \@parameters, 'Content_Type' => 'form-data' ); -bail(sprintf("[%d] %s\n", $postResponse->code, parseError($postResponse->content))) unless($postResponse->is_success); - - -# get response from post request -my ($status, $seekid) = parseResponse($postResponse->content); - -my $MAXTRIES = 3; -my $STIMER = 5; -my $timer = 0; - -print STDERR "Request sent! Waiting for response...[$seekid]\n"; - -my $checklist = {}; - -# wait for results until it becomes available -while(1) -{ - # sleeps for 5+random, 10+random, 15+random, 20+random, 25+random, 30+random ,,, 60+random, 60+random,,, - $timer = $timer >= 60 ? 60 : $timer+$STIMER; - sleep($timer+int(rand(4))); - - # get: get results for mafftash job - my $getResponse = $browser->get("$BASEURL/$seekid"); - - if ( $getResponse->is_success ) - { - - # get response from get request - ($status, $seekid) = parseResponse($getResponse->content); - next unless ( $status eq "done" ); - - - # if job is finished and ready - print STDERR "Results found!\n"; - my $csfile = "$TMP/checksum"; - my $try1 = 1; - - - while(1) - { - print STDERR "Fetching Results... [Trial $try1]\n"; - - if ( is_success(getstore("$BASEURL/get/$seekid/checksum", $csfile)) && -e $csfile && -s $csfile ) - { - # get response from get request - $checklist = extractchecksum($csfile); - bail("Error retrieving list of compressed files!") unless ( scalar %$checklist > 0 ); - - - foreach my $id ( sort keys %$checklist ) - { - sleep 1; - my $checkfile = "$TMP/$id"; - my $checkid = $checklist->{$id}; - my $try2 = 1; - - while(1) - { - unlink $checkfile if -e $checkfile; - - if ( is_success(getstore("$BASEURL/get/$seekid/$id", $checkfile)) && -e $checkfile && -s $checkfile ) - { - last if $ISWINDOWS; - - my $hashid = getchecksum($checkfile); - #print STDERR "[hashid]$hashid [checkid]$checkid\n"; - - if ($hashid ne "" && $hashid ne $checkid ) - { - #unlink $checkfile if -e $checkfile; - bail("Error retrieving compressed file from server! [Checksum Failed]") if $try2 >= $MAXTRIES; - $try2++; - sleep $STIMER; - } - else - { - last; - } - } - else - { - bail("Error retrieving compressed file from server!") if $try2 >= $MAXTRIES; - $try2++; - sleep $STIMER; - } - } - } - - last; - } - else - { - bail("Error retrieving list of compressed files from server!") if $try1 >= $MAXTRIES; - $try1++; - sleep $STIMER; - } - } - - last; - - } - else - { - bail(sprintf("[%d] %s\n", $getResponse->code, parseError($getResponse->content))); - } - -} - - -# make sure outputs were generated -# decompress -print STDERR "Assembling final results...\n"; - -foreach my $id ( sort keys %$checklist ) -{ - if ( $id =~ /^$seekid\.out(\.str|\.seq)?/ ) - { - bail("Error: Output file corrupted!") unless -e "$TMP/$id"; - appendToFile("$TMP/$id","$OUTPUTFILE".$1); - } -} - -cleanup(); - - - -#################### -#################### - - -sub parseResponse -{ - my $response = shift; - my $status = ""; - my $seekid = ""; - - if ( $response =~ /^([^\s:]+):([^\s:]+)$/ ) - { - $seekid = $1; - $status = $2; - } - - return ($status, $seekid); -} - - -sub parseJobQueryResponse -{ - my $response = shift; - my $jobs = 100; - - if ( $response =~ /^(\d+)$/ ) - { - $jobs = $1; - } - - return $jobs; -} - - -sub extractchecksum -{ - my $infile = shift; - my %dataset = (); - - #open CSUM, "tar -zxf $infile -O|" or return \%dataset; - open CSUM, "<$infile" or return \%dataset; - - while() - { - chomp; - if ( /^(\S+)\s+(\S+)$/ ) - { - $dataset{$2} = $1; - } - } - - close CSUM; - - return \%dataset; -} - - -sub parseError -{ - my $response = shift; - - #"error":"Invalid number of inputs found." - my $errorstr = ( $response =~ /\"error\"\s*:\s*\"([^\"]+)\"/ ) ? $1 : $response; - return $errorstr; -} - - -sub getchecksum -{ - my $infile = shift; - - # md5 binary check - my $MD5BIN = ""; - - if ( -x "/usr/bin/md5sum" ) - { - $MD5BIN = "/usr/bin/md5sum"; - } - elsif ( -x "/sbin/md5" ) - { - $MD5BIN = "/sbin/md5 -q"; - } - - return "" if $MD5BIN eq ""; - - - my $checksum = ""; - open MD5EXE, "$MD5BIN $infile|" or return ""; - - while() - { - if (/^(\S+)\s+(\S+)$/) - { - $checksum = $1; - last; - } - elsif (/^(\S+)$/) - { - $checksum = $1; - last; - } - } - - close MD5EXE; - - return $checksum; - -} - - -sub backticks -{ - my $command = shift; - - `$command`; - return ($? == -1) ? 0 : 1; -} - - -sub bail -{ - my $str = shift; - my $status = shift; - - #0 for success and 1 for error - $status = 1 unless defined; - - print STDERR "$str\n" if defined $str; - - cleanup(); - - exit($status); -} - - -sub cleanup -{ - return if ($TMP eq "" || !-d $TMP); - - opendir(MAINDIR, $TMP); - my @files = readdir(MAINDIR); - closedir(MAINDIR); - - foreach my $file (@files) - { - unlink "$TMP/$file" if -e "$TMP/$file"; - } - - remove_tree($TMP); - -} - - -sub appendToFile -{ - my $inpfile = shift; - my $outfile = shift; - - open INPF, "<$inpfile" or bail("Server Error: Error in reading file."); - open OUTF, ">>$outfile" or bail("Server Error: Error in writing to file."); - - while() - { - print OUTF $_; - } - - close OUTF; - close INPF; -} - - - -sub help -{ - my $str = shift; - - print <<'HELPME'; - -USAGE - ./seekquencer_premafft.pl -inp -out [-str|-seq] - ./seekquencer_premafft.pl -idf -seqf -out [-str|-seq] - - -PARAMETERS - -inp - INFILE is a FASTA-formatted file - PDB entries are written as: - >PDBID - [5-character pdbid+chain] - - While sequence entries are written as: - >[id] - [sequence] - - -idf - IDLISTFILE is a file containing a list of pdbids - pdbids should be a 5-character pdbid + chain - - -seqf - SEQFASTA is a fasta file - entries are written as: - >[id] - [sequence] - - -out - Results are writen to a file named OUTFILE - - -str - Only structures will be collected by Seekquencer - If neither -str nor -seq is set, both structures and sequences will be collected by Seekquencer - - -seq - Only sequences will be collected by Seekquencer - If neither -str nor -seq is set, both structures and sequences will be collected by Seekquencer - - -OPTIONAL PARAMETERS: - -seqd - Search Database for sequence homologs. Default value: uniref100 - - -lim - this sets the maximum number of sequence homologs collected. Default value: 100 - - -blim - this sets the -b and -v value when running blastall. Default value: 100 - - -pre - When -str is set, this will compare all structures against all using pdp-ash - This would ensure that all structures collected are matching - All structures that do not match will be removed - - -noin - When set, inputs will not be included in the output - - -mod - Defines the output format - mafftash (default) will print a mafftash-formatted fasta file - mafftash-split will make 2 files separating the structures (OUTFILE.str) from sequences (OUTFILE.seq) - fasta will print a regular fasta file - - -run - thread will run simultaneous jobs during blast queries (faster but takes more nodes) - normal will run sequential blast queries (slower but takes less nodes) - Default value: normal - - -trd - if -run is defined, this sets the number of parallel jobs to run. Default value: 3 - - -HELPME - - bail($str); -} - diff --git a/software/mafft/mafft-win/usr/lib/mafft/seq2regtable.exe b/software/mafft/mafft-win/usr/lib/mafft/seq2regtable.exe deleted file mode 100755 index 3b830829..00000000 Binary files a/software/mafft/mafft-win/usr/lib/mafft/seq2regtable.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/lib/mafft/setcore.exe b/software/mafft/mafft-win/usr/lib/mafft/setcore.exe deleted file mode 100755 index 55d547df..00000000 Binary files a/software/mafft/mafft-win/usr/lib/mafft/setcore.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/lib/mafft/setdirection.exe b/software/mafft/mafft-win/usr/lib/mafft/setdirection.exe deleted file mode 100755 index 30a43ec1..00000000 Binary files a/software/mafft/mafft-win/usr/lib/mafft/setdirection.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/lib/mafft/sextet5.exe b/software/mafft/mafft-win/usr/lib/mafft/sextet5.exe deleted file mode 100755 index 9f03a81a..00000000 Binary files a/software/mafft/mafft-win/usr/lib/mafft/sextet5.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/lib/mafft/splittbfast.exe b/software/mafft/mafft-win/usr/lib/mafft/splittbfast.exe deleted file mode 100755 index 687845f5..00000000 Binary files a/software/mafft/mafft-win/usr/lib/mafft/splittbfast.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/lib/mafft/tbfast.exe b/software/mafft/mafft-win/usr/lib/mafft/tbfast.exe deleted file mode 100755 index f9ee9049..00000000 Binary files a/software/mafft/mafft-win/usr/lib/mafft/tbfast.exe and /dev/null differ diff --git a/software/mafft/mafft-win/usr/lib/mafft/version.exe b/software/mafft/mafft-win/usr/lib/mafft/version.exe deleted file mode 100755 index c1b7ee50..00000000 Binary files a/software/mafft/mafft-win/usr/lib/mafft/version.exe and /dev/null differ diff --git a/software/versions.txt b/software/versions.txt index 8e8ca7b8..afe86e47 100644 --- a/software/versions.txt +++ b/software/versions.txt @@ -1,9 +1,9 @@ Apache-POI: 3.8 -BLAST+: 2.10.0 +BLAST+: 2.12.0 CD-HIT: 4.8.1 -DIAMOND: 0.9.32 +DIAMOND: 2.0.13 GLPK: 4.40 M2HTML: 1.5 -HMMER: 3.3 -MAFFT: 7.467 +HMMER: 3.3.2 +MAFFT: 7.490 WoLFPSORT: v0.2 diff --git a/testing/testBlast.m b/testing/testBlast.m deleted file mode 100755 index e0c84cd9..00000000 --- a/testing/testBlast.m +++ /dev/null @@ -1,121 +0,0 @@ -function [success,blastStructure]=testBlast(fullCheck) -% testBlast -% Performs a check for BLAST+ functionality in RAVEN. Depending on the -% parameter settings the user can choose between a quick check for -% binaries or the thorough testing while building BLAST database and -% running homology search with BLASTP -% -% Input: -% fullCheck true if the thorough BLAST+ testing should be performed -% (opt, default true) -% -% Output: -% success true if the test was successful, otherwise equal to -% zero -% blastStructure blastStructure resulting from the thorough BLAST+ check -% -% NOTE: The purpose of the thorough check is to assess whether the -% homology search can be successfully performed using existing BLAST+ -% binaries. This testing function is completely standalone, only -% requiring BLAST+ binaries and multi-FASTA file sce.fa from tutorials -% directory -% -% Usage: [success,blastStructure]=testBlast(fullCheck) - -if nargin<1 - fullCheck=true; -end - -%Get the directory for RAVEN Toolbox -[ST, I]=dbstack('-completenames'); -ravenPath=fileparts(fileparts(ST(I).file)); - -if isunix - if ismac - binEnd='.mac'; - else - binEnd=''; - end -elseif ispc - binEnd=''; - setenv('BLASTDB_LMDB_MAP_SIZE','1000000'); -else - dispEM('Unknown OS, exiting.') - return -end - -%Create an empty blastStructure. Even if a quick BLAST+ evaluation is -%considered, blastStructure should still be in the output -blastStructure=[]; - -if ~fullCheck - fprintf(['Checking blastp' binEnd '... ']); - [res,~]=system(['"' fullfile(ravenPath,'software','blast+',['blastp' binEnd]) '"']); - if res==1 - fprintf('OK\n'); - else - fprintf('Not OK! The binary must be recompiled from source before running RAVEN\n'); - end - fprintf(['Checking makeblastdb' binEnd '... ']); - [res,~]=system(['"' fullfile(ravenPath,'software','blast+',['makeblastdb' binEnd]) '"']); - if res==1 - fprintf('OK\n'); - else - fprintf('Not OK! The binary must be recompiled from source before running RAVEN\n'); - end -else - %Generate temporary names for working directory and outFile - tmpDB=tempname; - outFile=tempname; - - %Run BLAST multi-threaded to use all logical cores assigned to MATLAB - cores = evalc('feature(''numcores'')'); - cores = strsplit(cores, 'MATLAB was assigned: '); - cores = regexp(cores{2},'^\d*','match'); - cores = cores{1}; - - %Create a temporary folder and copy multi-FASTA file there - [~, ~]=system(['mkdir "' tmpDB '"']); - copyfile(fullfile(ravenPath,'tutorial','sce.fa'),tmpDB); - - %Construct a BLAST database - fprintf('Testing makeblastdb... '); - [res, ~]=system(['"' fullfile(ravenPath,'software','blast+',['makeblastdb' binEnd]) '" -in "' fullfile(tmpDB,'sce.fa') '" -out "' tmpDB '" -dbtype prot']); - if res~=0 - fprintf('Not OK\n'); - EM=['makeblastdb did not run successfully, error: ', num2str(res)]; - dispEM(EM,true); - end - fprintf('OK\n'); - - %Run a homology search - fprintf('Testing blastp... '); - [res, ~]=system(['"' fullfile(ravenPath,'software','blast+',['blastp' binEnd]) '" -query "' fullfile(tmpDB,'sce.fa') '" -out "' outFile '" -db "' tmpDB '" -evalue 10e-5 -outfmt "10 qseqid sseqid evalue pident length bitscore ppos" -num_threads "' cores '"']); - if res~=0 - fprintf('Not OK\n'); - EM=['blastp did not run successfully, error: ', num2str(res)]; - dispEM(EM,true); - end - fprintf('OK\n'); - - %Remove temporary folder, since homology search is finished - [~, ~]=system(['rm "' tmpDB '" -r']); - - %Done with the BLAST, do the parsing of the text file - blastStructure.fromId='sce'; - blastStructure.toId='sco'; - A=readtable(outFile,'Delimiter',',','Format','%s%s%f%f%f%f%f'); - blastStructure.fromGenes=A{:,1}; - blastStructure.toGenes=A{:,2}; - blastStructure.evalue=table2array(A(:,3)); - blastStructure.identity=table2array(A(:,4)); - blastStructure.aligLen=table2array(A(:,5)); - blastStructure.bitscore=table2array(A(:,6)); - blastStructure.ppos=table2array(A(:,7)); - - %Remove the old tempfiles - delete([outFile '*']); -end - -success=1; -end diff --git a/testing/testDiamond.m b/testing/testDiamond.m deleted file mode 100755 index b7435bc6..00000000 --- a/testing/testDiamond.m +++ /dev/null @@ -1,113 +0,0 @@ -function [success,blastStructure]=testDiamond(fullCheck) -% testDiamond -% Performs a check for DIAMOND functionality in RAVEN. Depending on the -% parameter settings the user can choose between a quick check for -% binaries or the thorough testing while building DIAMOND database and -% running homology search with DIAMOND -% -% Input: -% fullCheck true if the thorough DIAMOND testing should be -% performed (opt, default true) -% -% Output: -% success true if the test was successful, otherwise equal to -% zero -% blastStructure blastStructure resulting from the thorough BLAST+ check -% -% NOTE: The purpose of the thorough check is to assess whether the -% homology search can be successfully performed using existing BLAST+ -% binaries. This testing function is completely standalone, only -% requiring DIAMOND binary and multi-FASTA file sce.fa from tutorials -% directory -% -% Usage: [success,blastStructure]=testDiamond(fullCheck) - -if nargin<1 - fullCheck=true; -end - -%Get the directory for RAVEN Toolbox -[ST, I]=dbstack('-completenames'); -ravenPath=fileparts(fileparts(ST(I).file)); - -if isunix - if ismac - binEnd='.mac'; - else - binEnd=''; - end -elseif ispc - binEnd=''; -else - dispEM('Unknown OS, exiting.') - return -end - -%Create an empty blastStructure. Even if a quick DIAMOND evaluation is -%considered, blastStructure should still be in the output -blastStructure=[]; - -if ~fullCheck - fprintf(['Checking diamond' binEnd '... ']); - [res,~]=system(['"' fullfile(ravenPath,'software','diamond',['diamond' binEnd]) '"']); - if res==1 - fprintf('OK\n'); - else - fprintf('Not OK! The binary must be recompiled from source before running RAVEN\n'); - end -else - %Generate temporary names for working directory and outFile - tmpDB=tempname; - outFile=tempname; - - %Run DIAMOND multi-threaded to use all logical cores assigned to MATLAB - cores = evalc('feature(''numcores'')'); - cores = strsplit(cores, 'MATLAB was assigned: '); - cores = regexp(cores{2},'^\d*','match'); - cores = cores{1}; - - %Create a temporary folder and copy multi-FASTA file there - [~, ~]=system(['mkdir "' tmpDB '"']); - copyfile(fullfile(ravenPath,'tutorial','sce.fa'),tmpDB); - - %Construct a DIAMOND database - fprintf('Testing DIAMOND makedb... '); - [res, ~]=system(['"' fullfile(ravenPath,'software','diamond',['diamond' binEnd]) '" makedb --in "' fullfile(tmpDB,'sce.fa') '" --db "' tmpDB '"']); - if res~=0 - fprintf('Not OK\n'); - EM=['DIAMOND makedb did not run successfully, error: ', num2str(res)]; - dispEM(EM,true); - end - fprintf('OK\n'); - - %Run a homology search - fprintf('Testing DIAMOND blastp... '); - [res, ~]=system(['"' fullfile(ravenPath,'software','diamond',['diamond' binEnd]) '" blastp --query "' fullfile(tmpDB,'sce.fa') '" --out "' outFile '" --db "' tmpDB '" --more-sensitive --outfmt 6 qseqid sseqid evalue pident length bitscore ppos --threads ' cores ]); - if res~=0 - fprintf('Not OK\n'); - EM=['DIAMOND blastp did not run successfully, error: ', num2str(res)]; - dispEM(EM,true); - end - fprintf('OK\n'); - - %Remove temporary folder, since homology search is finished - [~, ~]=system(['rm "' tmpDB '" -r']); - - %Done with the DIAMOND, do the parsing of the text file - blastStructure.fromId='sce'; - blastStructure.toId='sco'; - A=readtable(outFile,'Delimiter','\t','Format','%s%s%f%f%f%f%f'); - blastStructure.fromGenes=A{:,1}; - blastStructure.toGenes=A{:,2}; - blastStructure.evalue=table2array(A(:,3)); - blastStructure.identity=table2array(A(:,4)); - blastStructure.aligLen=table2array(A(:,5)); - blastStructure.bitscore=table2array(A(:,6)); - blastStructure.ppos=table2array(A(:,7)); - - %Remove the old tempfiles - delete([outFile '*']); -end - -success=1; -end diff --git a/testing/unit_tests/blastPlusTests.m b/testing/unit_tests/blastPlusTests.m new file mode 100755 index 00000000..b47c8099 --- /dev/null +++ b/testing/unit_tests/blastPlusTests.m @@ -0,0 +1,58 @@ +%run this test case with the command +%results = runtests('blastPlusTests.m') +function tests = blastPlusTests +tests = functiontests(localfunctions); +end + +function testBlastPlus(testCase) +%This unit test comprises several functionality tests for BLAST+ in RAVEN: +% 1. MD5 checksum check for BLAST database files. This check is applied for +% "phr", "pot", "psq" and "pto" files. The remaining files (i.e. "pdb", +% "pin" and "ptf") are not compared as these seem to be +% machine-specific. +% 2. Non-parsed text check for BLAST result files. Although the content of +% the files is exactly the same, their MD5 hashes are somehow different +% between the operating systems. +% 3. Check of resulting blastStructure against the expected one. This is +% done to test BLAST results parsing in RAVEN. + +%% +%Get the directory for RAVEN Toolbox +[ST, I]=dbstack('-completenames'); +ravenPath=fileparts(fileparts(fileparts(ST(I).file))); + +%Import structures that contain expected MD5 hashes and BLAST results +sourceDir = fileparts(which(mfilename)); +load([sourceDir,'/test_data/expBlastResults.mat'],'expBlastStructure','expBlastReport'); + +organismID='sce'; +fastaFile=fullfile(ravenPath,'testing','unit_tests','test_data','yeast_galactosidases.fa'); +modelIDs={'hsa' 'afv'}; +refFastaFiles={fullfile(ravenPath,'testing','unit_tests','test_data','human_galactosidases.fa') fullfile(ravenPath,'testing','unit_tests','test_data','aflavus_galactosidases.fa')}; + +%% +%Run BLAST +[actBlastStructure,actBlastReport]=getBlast(organismID,fastaFile,modelIDs,refFastaFiles,true,true); + +%% +%Test 1a: Check if MD5 checksums for BLAST database files are the same +verifyEqual(testCase,actBlastReport.dbHashes,expBlastReport.dbHashes); + +%Test 1b: Change one of the MD5 checksums and check if test fails +actBlastReport.dbHashes.phr{1,1}=actBlastReport.dbHashes.phr{1,2}; +verifyNotEqual(testCase,actBlastReport.dbHashes,expBlastReport.dbHashes); + +%Test 2a: Check if BLAST result files are the same +verifyEqual(testCase,actBlastReport.blastTxtOutput,expBlastReport.blastTxtOutput); + +%Test 2b: Change actual BLAST result file and check if test fails +actBlastReport.blastTxtOutput='empty'; +verifyNotEqual(testCase,actBlastReport.blastTxtOutput,expBlastReport.blastTxtOutput); + +%Test 3a: Check if BLAST structures are the same +verifyEqual(testCase,actBlastStructure,expBlastStructure); + +%Test 3b: Modify actual BLAST structure and check if test fails +actBlastStructure(1,1).toId=actBlastStructure(1,1).fromId; +verifyNotEqual(testCase,actBlastStructure,expBlastStructure); +end diff --git a/testing/unit_tests/cdhitTests.m b/testing/unit_tests/cdhitTests.m new file mode 100755 index 00000000..d3d3e82b --- /dev/null +++ b/testing/unit_tests/cdhitTests.m @@ -0,0 +1,80 @@ +%run this test case with the command +%results = runtests('cdhitTests.m') +function tests = cdhitTests +tests = functiontests(localfunctions); +end + +function testCdhit(testCase) +%This unit test comprises the functionality test for CD-HIT in RAVEN: +% 1. Check for resulting file against the expected one. + +%% +%Get the directory for RAVEN Toolbox +[ST, I]=dbstack('-completenames'); +ravenPath=fileparts(fileparts(fileparts(ST(I).file))); + +%Identify the operating system +if isunix + if ismac + binEnd='.mac'; + else + binEnd=''; + end +elseif ispc + binEnd='.exe'; +else + dispEM('Unknown OS, exiting.') + return +end + +%Import structure that contains expected MAFFT results +sourceDir = fileparts(which(mfilename)); +load([sourceDir,'/test_data/expCdhitMafftOutput.mat'],'expCdhitMafftOutput'); + +%Generate temporary names for working directory and outFile +tmpDIR=tempname; +outFile=tempname; + +%Run CD-HIT multi-threaded to use all logical cores assigned to MATLAB +cores = evalc('feature(''numcores'')'); +cores = strsplit(cores, 'MATLAB was assigned: '); +cores = regexp(cores{2},'^\d*','match'); +cores = cores{1}; + +%Create a temporary folder and copy multi-FASTA file there +[~, ~]=system(['mkdir "' tmpDIR '"']); + +sourceDir = fileparts(which(mfilename)); +copyfile(fullfile(sourceDir,'test_data','yeast_galactosidases.fa'),tmpDIR); + +% Define WSL paths +wslPath.fastaFile=getWSLpath([tmpDIR filesep 'yeast_galactosidases.fa']); +wslPath.outFile=getWSLpath(outFile); +wslPath.cdhit=getWSLpath(fullfile(ravenPath,'software','cd-hit','cd-hit')); + +%% +%Run protein clustering with CD-HIT +if ispc + [~, ~]=system(['wsl "' wslPath.cdhit '" -T "' num2str(cores) '" -i "' wslPath.fastaFile '" -o "' wslPath.outFile '" -c 1.0 -n 5 -M 2000']); +else + [~, ~]=system(['"' fullfile(ravenPath,'software','cd-hit',['cd-hit' binEnd]) '" -T "' num2str(cores) '" -i "' fullfile(tmpDIR, 'yeast_galactosidases.fa') '" -o "' outFile '" -c 1.0 -n 5 -M 2000']); +end + +%% +%Open actual MAFFT results file +actCdhitOutput=importdata(fullfile(outFile)); + +%Remove the old tempfiles +delete([outFile '*']); + +%Remove temporary folder, since testing is finished +[~, ~]=system(['rm "' tmpDIR '" -r']); + +%% +%Check 1a: Check if files for CD-HIT results are the same +verifyEqual(testCase,actCdhitOutput,expCdhitMafftOutput); + +%Check 1b: Change actual CD-HIT results file and check if test fails +actCdhitOutput='abc'; +verifyNotEqual(testCase,actCdhitOutput,expCdhitMafftOutput); +end diff --git a/testing/unit_tests/diamondTests.m b/testing/unit_tests/diamondTests.m new file mode 100755 index 00000000..3138d89f --- /dev/null +++ b/testing/unit_tests/diamondTests.m @@ -0,0 +1,57 @@ +%run this test case with the command +%results = runtests('diamondTests.m') +function tests = diamondTests +tests = functiontests(localfunctions); +end + +function testDiamond(testCase) +%This unit test comprises several functionality tests for DIAMOND blastp in +%RAVEN: +% 1. MD5 checksum check for DIAMOND database files. +% 2. Non-parsed text check for DIAMOND result files. Although the content +% of the files is exactly the same, their MD5 hashes are somehow +% different between the operating systems. +% 3. Check of resulting blastStructure against the expected one. This is +% done to test DIAMOND blastp results parsing in RAVEN. + +%% +%Get the directory for RAVEN Toolbox +[ST, I]=dbstack('-completenames'); +ravenPath=fileparts(fileparts(fileparts(ST(I).file))); + +%Import structures that contain expected MD5 hashes and DIAMOND blastp +%results +sourceDir = fileparts(which(mfilename)); +load([sourceDir,'/test_data/expDiamondResults.mat'],'expBlastStructure','expDiamondReport'); + +organismID='sce'; +fastaFile=fullfile(ravenPath,'testing','unit_tests','test_data','yeast_galactosidases.fa'); +modelIDs={'hsa' 'afv'}; +refFastaFiles={fullfile(ravenPath,'testing','unit_tests','test_data','human_galactosidases.fa') fullfile(ravenPath,'testing','unit_tests','test_data','aflavus_galactosidases.fa')}; + +%% +%Run DIAMOND blastp +[actBlastStructure,actDiamondReport]=getDiamond(organismID,fastaFile,modelIDs,refFastaFiles,true,true); + +%% +%Test 1a: Check if MD5 checksums for DIAMOND blastp database files are the same +verifyEqual(testCase,actDiamondReport.dbHashes,expDiamondReport.dbHashes); + +%Test 1b: Change one of the MD5 checksums and check if test fails +actDiamondReport.dbHashes{1,1}=actDiamondReport.dbHashes{1,2}; +verifyNotEqual(testCase,actDiamondReport.dbHashes,expDiamondReport.dbHashes); + +%Test 2a: Check if DIAMOND blastp result files are the same +verifyEqual(testCase,actDiamondReport.diamondTxtOutput,expDiamondReport.diamondTxtOutput); + +%Test 2b: Change actual DIAMOND blastp result file and check if test fails +actDiamondReport.diamondTxtOutput='empty'; +verifyNotEqual(testCase,actDiamondReport.diamondTxtOutput,expDiamondReport.diamondTxtOutput); + +%Test 3a: Check if DIAMOND blastp structures are the same +verifyEqual(testCase,actBlastStructure,expBlastStructure); + +%Test 3b: Modify actual DIAMOND blastp structure and check if test fails +actBlastStructure(1,1).toId=actBlastStructure(1,1).fromId; +verifyNotEqual(testCase,actBlastStructure,expBlastStructure); +end diff --git a/testing/unit_tests/hmmerTests.m b/testing/unit_tests/hmmerTests.m new file mode 100755 index 00000000..d17b8e8d --- /dev/null +++ b/testing/unit_tests/hmmerTests.m @@ -0,0 +1,141 @@ +%run this test case with the command +%results = runtests('hmmerTests.m') +function tests = hmmerTests +tests = functiontests(localfunctions); +end + +function testHmmer(testCase) +%This unit test comprises the functionality test for HMMER in RAVEN: +% 1. Check of parsed HMMER results against the expected. +% +% NOTE: as hmm and HMMER results files are time-specific, no checks for +% these files existence are done. Also, due to the way HMMER is utilized in +% getKEGGModelForOrganism (HMMER result files can be parsed only once all +% required hmm files are generated), the code segment involving HMMER +% results parsing is pasted in this test function. Should the parsing problems +% occur in the results processing, the code modifications shall be done in +% this function and getKEGGModelForOrganism respectively. + +%% +%Get the directory for RAVEN Toolbox +[ST, I]=dbstack('-completenames'); +ravenPath=fileparts(fileparts(fileparts(ST(I).file))); + +%Identify the operating system +if isunix + if ismac + binEnd='.mac'; + else + binEnd=''; + end +elseif ispc + binEnd='.exe'; +else + dispEM('Unknown OS, exiting.') + return +end + +%Create empty structures needed for HMMER results +actHmmResult.genes = {}; +actHmmResult.scores = []; + +%Create structures that contain expected HMMER results +expHmmResult.genes = {'sp|P41947|MEL6_YEASX','sp|P41946|MEL5_YEASX', 'sp|P41945|MEL2_YEASX', 'sp|P04824|MEL1_YEASX'}; +expHmmResult.scores = [10^-250, 10^-250, 10^-250, 10^-250]; + +%Generate temporary names for working directory and outFile +tmpDIR=tempname; +outFile=tempname; + +%Run HMMER multi-threaded to use all logical cores assigned to MATLAB +cores = evalc('feature(''numcores'')'); +cores = strsplit(cores, 'MATLAB was assigned: '); +cores = regexp(cores{2},'^\d*','match'); +cores = cores{1}; + +%Create a temporary folder and copy multi-FASTA file there +[~, ~]=system(['mkdir "' tmpDIR '"']); + +sourceDir = fileparts(which(mfilename)); +copyfile(fullfile(sourceDir,'test_data','yeast_galactosidases.fa'),tmpDIR); +copyfile(fullfile(sourceDir,'test_data','human_galactosidases.fa'),tmpDIR); + +%% +%Train a hidden Markov model +[~, ~]=system(['"' fullfile(ravenPath,'software','hmmer',['hmmbuild' binEnd]) '" --cpu "' num2str(cores) '" "' fullfile(tmpDIR,'human_galactosidases.hmm') '" "' fullfile(tmpDIR,'yeast_galactosidases.fa') '"']); + +%Run a homology search against the newly-trained HMM +[~, output]=system(['"' fullfile(ravenPath,'software','hmmer',['hmmsearch' binEnd]) '" --cpu "' num2str(cores) '" "' fullfile(tmpDIR,'human_galactosidases.hmm') '" "' fullfile(tmpDIR,'yeast_galactosidases.fa') '"']); + +%Save the output to a file +fid=fopen(outFile,'w'); +fwrite(fid,output); +fclose(fid); + +%% +%Parse the results +geneCounter=0; +fid=fopen(outFile,'r'); +beginMatches=false; +while 1 + %Get the next line + tline = fgetl(fid); + + %Abort at end of file + if ~ischar(tline) + break; + end + + if and(beginMatches,strcmp(tline,' ------ inclusion threshold ------')) + break; + end + + if beginMatches==false + %This is how the listing of matches begins + if any(strfind(tline,'E-value ')) + %Read one more line that is only padding + tline = fgetl(fid); + beginMatches=true; + end + else + %If matches should be read + if ~strcmp(tline,' [No hits detected that satisfy reporting thresholds]') && ~isempty(tline) + elements=regexp(tline,' ','split'); + elements=elements(cellfun(@any,elements)); + + %Check if the match is below the treshhold + score=str2double(elements{1}); + gene=elements{9}; + if score<=10^-50 + %If the score is exactly 0, change it to a very + %small value to avoid NaN + if score==0 + score=10^-250; + end + %Check if the gene is added already and, is so, get + %the best score for it + geneCounter=geneCounter+1; + actHmmResult.genes{geneCounter}=gene; + actHmmResult.scores(geneCounter)=score; + end + else + break; + end + end +end +fclose(fid); + +%Remove the old tempfiles +delete([outFile '*']); + +%Remove temporary folder, since testing is finished +[~, ~]=system(['rm "' tmpDIR '" -r']); + +%% +%Test 1a: Check if HMMER results match the expected ones +verifyEqual(testCase,actHmmResult,expHmmResult); + +%Test 1b: Modify actual HMMER results structure and check if test fails +actHmmResult.score(2)=1; +verifyNotEqual(testCase,actHmmResult,expHmmResult); +end diff --git a/testing/unit_tests/mafftTests.m b/testing/unit_tests/mafftTests.m new file mode 100755 index 00000000..b4ffdfd4 --- /dev/null +++ b/testing/unit_tests/mafftTests.m @@ -0,0 +1,68 @@ +%run this test case with the command +%results = runtests('mafftTests.m') +function tests = mafftTests +tests = functiontests(localfunctions); +end + +function testMafft(testCase) +%This unit test comprises the functionality test for MAFFT in RAVEN: +% 1. Check for resulting file against the expected one. + +%% +%Get the directory for RAVEN Toolbox +[ST, I]=dbstack('-completenames'); +ravenPath=fileparts(fileparts(fileparts(ST(I).file))); + +%Import structure that contains expected MAFFT results +sourceDir = fileparts(which(mfilename)); +load([sourceDir,'/test_data/expCdhitMafftOutput.mat'],'expCdhitMafftOutput'); + +%Generate temporary names for working directory and outFile +tmpDIR=tempname; +outFile=tempname; + +%Run MAFFT multi-threaded to use all logical cores assigned to MATLAB +cores = evalc('feature(''numcores'')'); +cores = strsplit(cores, 'MATLAB was assigned: '); +cores = regexp(cores{2},'^\d*','match'); +cores = cores{1}; + +%Create a temporary folder and copy multi-FASTA file there +[~, ~]=system(['mkdir "' tmpDIR '"']); + +sourceDir = fileparts(which(mfilename)); +copyfile(fullfile(sourceDir,'test_data','yeast_galactosidases.fa'),tmpDIR); + +% Define WSL paths +wslPath.fastaFile=getWSLpath([tmpDIR filesep 'yeast_galactosidases.fa']); +wslPath.outFile=getWSLpath(outFile); +wslPath.mafft=getWSLpath(fullfile(ravenPath,'software','mafft','mafft-linux64','mafft.bat')); + +%% +%Run protein multi-sequence alignment with MAFFT +if ismac + [~, ~]=system(['"' fullfile(ravenPath,'software','mafft','mafft-mac','mafft.bat') '" --auto --anysymbol --thread "' num2str(cores) '" "' fullfile(tmpDIR, 'yeast_galactosidases.fa') '" > "' outFile '"']); +elseif isunix + [~, ~]=system(['"' fullfile(ravenPath,'software','mafft','mafft-linux64','mafft.bat') '" --auto --anysymbol --thread "' num2str(cores) '" "' fullfile(tmpDIR, 'yeast_galactosidases.fa') '" > "' outFile '"']); +elseif ispc + [~, ~]=system(['wsl "' wslPath.mafft '" --auto --anysymbol --quiet --thread "' num2str(cores) '" --out "' wslPath.outFile '" "' wslPath.fastaFile '"']); +end + +%% +%Open actual MAFFT results file +actMafftOutput=importdata(fullfile(outFile)); + +%Remove the old tempfiles +delete([outFile '*']); + +%Remove temporary folder, since testing is finished +[~, ~]=system(['rm "' tmpDIR '" -r']); + +%% +%Check 1a: Check if files for MAFFT results are the same +verifyEqual(testCase,actMafftOutput,expCdhitMafftOutput); + +%Check 1b: Change actual MAFFT results file and check if test fails +actMafftOutput='abc'; +verifyNotEqual(testCase,actMafftOutput,expCdhitMafftOutput); +end diff --git a/testing/unit_tests/sortIdentifiers_and_permuteModelTests.m b/testing/unit_tests/sortIdentifiers_and_permuteModelTests.m new file mode 100644 index 00000000..26cb61a4 --- /dev/null +++ b/testing/unit_tests/sortIdentifiers_and_permuteModelTests.m @@ -0,0 +1,54 @@ +%run this test case with the command +%results = runtests('sortIdentifiers_and_permuteModelTests.m') +function tests = sortIdentifiers_and_permuteModelTests +tests = functiontests(localfunctions); +end + +function sortRandomizedModelTest(testCase) + +%Load the expected (i.e. sorted) model +sourceDir = fileparts(which(mfilename)); +load([sourceDir,'/test_data/ecoli_textbook.mat'], 'model'); +expModel = model; + +%Create the actual model that will be permuted and sorted +actModel = expModel; + +%Randomly permutate model, do not use RAVEN functions +rndIdx = randperm(numel(actModel.rxns)); +fieldsToChange = {'rxns','lb','ub','rev','c','rxnNames','grRules','eccodes'}; +for i=1:numel(fieldsToChange) + actModel.(fieldsToChange{i}) = actModel.(fieldsToChange{i})(rndIdx); +end +actModel.S = actModel.S(:,rndIdx); +actModel.rxnGeneMat = actModel.rxnGeneMat(rndIdx,:); + +rndIdx = randperm(numel(actModel.mets)); +fieldsToChange = {'mets','metNames','metComps','metFormulas'}; +for i=1:numel(fieldsToChange) + actModel.(fieldsToChange{i}) = actModel.(fieldsToChange{i})(rndIdx); +end +actModel.S = actModel.S(rndIdx,:); + +rndIdx = randperm(numel(actModel.genes)); +fieldsToChange = {'genes','geneShortNames'}; +for i=1:numel(fieldsToChange) + actModel.(fieldsToChange{i}) = actModel.(fieldsToChange{i})(rndIdx); +end +actModel.rxnGeneMat = actModel.rxnGeneMat(:,rndIdx); + +rndIdx = randperm(numel(actModel.comps)); +fieldsToChange = {'comps','compNames'}; +for i=1:numel(fieldsToChange) + actModel.(fieldsToChange{i}) = actModel.(fieldsToChange{i})(rndIdx); +end +[~,J]=sort(rndIdx); +[toreplace, bywhat] = ismember(actModel.metComps,1:length(J)); +actModel.metComps(toreplace) = J(bywhat(toreplace)); + +%Sort randomly permutated model +actModel = sortIdentifiers(actModel); + +%Check that the actual model is the same as the expected model +verifyEqual(testCase,actModel,expModel) +end diff --git a/testing/unit_tests/test_data/aflavus_galactosidases.fa b/testing/unit_tests/test_data/aflavus_galactosidases.fa new file mode 100644 index 00000000..20c8730b --- /dev/null +++ b/testing/unit_tests/test_data/aflavus_galactosidases.fa @@ -0,0 +1,45 @@ +>sp|B8NWY6|AGALC_ASPFN Probable alpha-galactosidase C OS=Aspergillus flavus (strain ATCC 200026 / FGSC A1120 / NRRL 3357 / JCM 12722 / SRRC 167) OX=332952 GN=aglC PE=3 SV=2 +MFGSPKRAALAAASLLAIFGNGPSVMAQETSSNNAVVADGKTFALNGENVSYRFRVNETT +GDLVSDHFGGSITGDLFPGFGAEALGGWVGLAGRFRREFPDHGRGDFRIPAVRIRQEAGY +TVTDLQYQSYSVIPGKPALPGLPSTFGSEEDVTTLVVHLYDNYSSIAVDLSYSIFPKYDA +IVRSANVTNKGTQNITVEALSSFSFDFPYEDLEMISLRGDWAREAHRQRRKVEYGLQGFG +SSTGFSSHLHNPFLAIVHPSTTESQGEAWGFNLVYTGSFSVDVEKGSQGLTRALLGFNPS +QLSWQLGAGETLTSPECVSVYSSDGIGGMSRSFHRLYRNHLIKSKFATSDRPPLLNSWEG +LYFDYNESTIYRLAEESAALGVKLFVMDDGWFGDKYPRVSDNAGLGDWVPNPDRFPDGLT +PLVEDVTKLKAGNSSTDLRFGLWVEPEMANPNSTLYHEHPDWVLHAGQYPRTLQRNQLVL +NLALPEVQDYIIDEITNILNSSAISYVKWDFNRAMHETPSPSNDHEYILGMYRVFDTLTT +RFPDVLWEGCASGGGRFDPGVLEYFPQIWTSDNTDALMRITIQLGTSLAYPPSAMGAHLS +AVPNAQTGRTIPVKFRGHVAMMGGSFGLELDPAELQEDEKAEVPGLIALAEKVNPIILTG +DMWRLRLPEESNWPAVLFISEDGNQAVLFYFQLGPNVNHATPWLRLQGLDPKATYSVDGN +GSYSGATLMNMGLQYKFESDYDSKVVFLQKQ +>sp|B8MWJ5|AGALA_ASPFN Probable alpha-galactosidase A OS=Aspergillus flavus (strain ATCC 200026 / FGSC A1120 / NRRL 3357 / JCM 12722 / SRRC 167) OX=332952 GN=aglA PE=3 SV=1 +MRLITRWIPLANALASTMPVQVVASIENPSLLPTPPMGFNNWARFMCDLNETLFVETTDA +MASNGLLEAGYNRINLDDCWMNYDRAENGSLEWNVTKFPRGLPWLGQYVKSKGFNFGIYE +DSGNLTCGGYPGSEGYEEIDAETFAAWGIDYLKLDGCNVYPKEGRTLQEEYKYLYGNWHE +ILSKMQQPLIFSESAPAYFSMTDNLTDWHTVMDWVPEYGELARHSVDILVYSGEGSAWDS +IMTNYKFNTLVARYQRPGYYNDPDFLIADHPGLSLDEKRSQFALWASFSAPLIISAHIPD +LSSEDLEYLTNQALIAVDQDPLAQQATLASRDGSLDVLTRNLADGSRLVTILNHGSESIE +TDISLDILGLSTDCTYKAQDLWGGSTQTIKDAIRIKLNTHATAVYKIDTDEKCSQVIPTG +LIFNTASGKCLTGTSSSVGSESCNGSKSQIWQIDASGVIRTLSEQSKCLTADGKAISLQE +CSENNGQKWSYAITGNLKNADTGYCLTNGGGVSACGFETNSQVFGLPAGVHVAL +>sp|B8N7Z0|AGALD_ASPFN Probable alpha-galactosidase D OS=Aspergillus flavus (strain ATCC 200026 / FGSC A1120 / NRRL 3357 / JCM 12722 / SRRC 167) OX=332952 GN=aglD PE=3 SV=1 +MLPKIFYLSLLPAALGHPHLQPRLDNGLARTPQMGWNTYNHYSCSPNETIVRSNAQALVD +LGLASLGYRYVTTDCGWTVADRLSDGSLTWNETLFPEGFPALGKYLHDLDLLFGVYQDSG +IKLCGSPPDNVGNYEDQDARTFASWEVDSLKYDNCYSDAATGYPNVNYEPSTSPQPRFAN +MSRALAAQNRSMVFQVCEWGIDFPARWAPALGHSWRIGNDIIPHWRAIYRTLNQAVPQTS +FAGPGQWPDLDMLFVGNDILSIPEEQTHFSLWAILKSPLTIGAALKDDETSINDESLQIL +KQADIIGYNQDSLGVSASLRRRWTEEGYEVWSGPLSGGRTVAALINWRNESRDLTLDLPD +IGLQYAGTVKNIWDGTTAQNVKTSYTAKVQGHGTILLELQDTTASGQYPGDTFATSTGSS +TTFESIYGVTTSFRYNITVKLSEASSSSDVKIQSTASNKTITAQVSASGTEASAQIPLLA +GSSNSITIVSPQSVDAITITPPNGTYFPNTAFTTIGDADTVSCGAGYCQPVGSKIGNIST +NGTARAVIPATAGTKYLAIDYINNDVAFDSAWDWGSNSRNLTVSVNGNKPVRIEVPLSGQ +HSELFGPGKGWWDTATIGVLTEGWKDGDNDVVIGNEGGESGFTSYGPDFVGLRVL +>sp|B8N306|AGALB_ASPFN Probable alpha-galactosidase B OS=Aspergillus flavus (strain ATCC 200026 / FGSC A1120 / NRRL 3357 / JCM 12722 / SRRC 167) OX=332952 GN=aglB PE=3 SV=1 +MQRYISLSVSLSLLSGANALVRPDGVGRLPALGWNTWNAFGCDIDASKVLTAAEETINLG +LKDAGYEYINIDDCWSVKSGRDPNTKRIIPDSAKFPDGISGVASKIHDLGLKVGIYSSAG +TETCAGYPASLGYEKIDAESFAEWGIDYLKYDNCGVPTNWTDTYTHCVPDNSNGSKFPNG +TCPDISNPAPTAYDWSSSNTAQRYNAMRDALLGVNRTILYSLCEWGQADVNTWGNGTGNS +WRTTGDITPDWSRIVEIANENSFLMNYADFWGYPDPDMLEVGNGNLTLEENRAHFALWAA +MKSPLIIGTALDSINEEHLAILKNKPLLSFHQDPVIGRPAYPYKWGYNPDWTFDPAHPAE +YWSGPSSTLGGTLVLMFNSEDSAKHRTAVWSEIPELKDSAEKGSGYRVTDIWTGEDLGCV +KDQYDVELQSHDIAALVVGESC diff --git a/testing/unit_tests/test_data/ecoli_textbook.mat b/testing/unit_tests/test_data/ecoli_textbook.mat new file mode 100644 index 00000000..546abbae Binary files /dev/null and b/testing/unit_tests/test_data/ecoli_textbook.mat differ diff --git a/testing/unit_tests/test_data/expBlastResults.mat b/testing/unit_tests/test_data/expBlastResults.mat new file mode 100644 index 00000000..051bfa72 Binary files /dev/null and b/testing/unit_tests/test_data/expBlastResults.mat differ diff --git a/testing/unit_tests/test_data/expCdhitMafftOutput.mat b/testing/unit_tests/test_data/expCdhitMafftOutput.mat new file mode 100644 index 00000000..6d80662d Binary files /dev/null and b/testing/unit_tests/test_data/expCdhitMafftOutput.mat differ diff --git a/testing/unit_tests/test_data/expDiamondResults.mat b/testing/unit_tests/test_data/expDiamondResults.mat new file mode 100644 index 00000000..d064d42c Binary files /dev/null and b/testing/unit_tests/test_data/expDiamondResults.mat differ diff --git a/testing/unit_tests/test_data/human_galactosidases.fa b/testing/unit_tests/test_data/human_galactosidases.fa new file mode 100644 index 00000000..13b2b0a0 --- /dev/null +++ b/testing/unit_tests/test_data/human_galactosidases.fa @@ -0,0 +1,39 @@ +>sp|P07602|SAP_HUMAN Prosaposin OS=Homo sapiens OX=9606 GN=PSAP PE=1 SV=2 +MYALFLLASLLGAALAGPVLGLKECTRGSAVWCQNVKTASDCGAVKHCLQTVWNKPTVKS +LPCDICKDVVTAAGDMLKDNATEEEILVYLEKTCDWLPKPNMSASCKEIVDSYLPVILDI +IKGEMSRPGEVCSALNLCESLQKHLAELNHQKQLESNKIPELDMTEVVAPFMANIPLLLY +PQDGPRSKPQPKDNGDVCQDCIQMVTDIQTAVRTNSTFVQALVEHVKEECDRLGPGMADI +CKNYISQYSEIAIQMMMHMQPKEICALVGFCDEVKEMPMQTLVPAKVASKNVIPALELVE +PIKKHEVPAKSDVYCEVCEFLVKEVTKLIDNNKTEKEILDAFDKMCSKLPKSLSEECQEV +VDTYGSSILSILLEEVSPELVCSMLHLCSGTRLPALTVHVTQPKDGGFCEVCKKLVGYLD +RNLEKNSTKQEILAALEKGCSFLPDPYQKQCDQFVAEYEPVLIEILVEVMDPSFVCLKIG +ACPSAHKPLLGTEKCIWGPSYWCQNTETAAQCNAVEHCKRHVWN +>sp|P17050|NAGAB_HUMAN Alpha-N-acetylgalactosaminidase OS=Homo sapiens OX=9606 GN=NAGA PE=1 SV=2 +MLLKTVLLLGHVAQVLMLDNGLLQTPPMGWLAWERFRCNINCDEDPKNCISEQLFMEMAD +RMAQDGWRDMGYTYLNIDDCWIGGRDASGRLMPDPKRFPHGIPFLADYVHSLGLKLGIYA +DMGNFTCMGYPGTTLDKVVQDAQTFAEWKVDMLKLDGCFSTPEERAQGYPKMAAALNATG +RPIAFSCSWPAYEGGLPPRVNYSLLADICNLWRNYDDIQDSWWSVLSILNWFVEHQDILQ +PVAGPGHWNDPDMLLIGNFGLSLEQSRAQMALWTVLAAPLLMSTDLRTISAQNMDILQNP +LMIKINQDPLGIQGRRIHKEKSLIEVYMRPLSNKASALVFFSCRTDMPYRYHSSLGQLNF +TGSVIYEAQDVYSGDIISGLRDETNFTVIINPSGVVMWYLYPIKNLEMSQQ +>sp|Q06187|BTK_HUMAN Tyrosine-protein kinase BTK OS=Homo sapiens OX=9606 GN=BTK PE=1 SV=3 +MAAVILESIFLKRSQQKKKTSPLNFKKRLFLLTVHKLSYYEYDFERGRRGSKKGSIDVEK +ITCVETVVPEKNPPPERQIPRRGEESSEMEQISIIERFPYPFQVVYDEGPLYVFSPTEEL +RKRWIHQLKNVIRYNSDLVQKYHPCFWIDGQYLCCSQTAKNAMGCQILENRNGSLKPGSS +HRKTKKPLPPTPEEDQILKKPLPPEPAAAPVSTSELKKVVALYDYMPMNANDLQLRKGDE +YFILEESNLPWWRARDKNGQEGYIPSNYVTEAEDSIEMYEWYSKHMTRSQAEQLLKQEGK +EGGFIVRDSSKAGKYTVSVFAKSTGDPQGVIRHYVVCSTPQSQYYLAEKHLFSTIPELIN +YHQHNSAGLISRLKYPVSQQNKNAPSTAGLGYGSWEIDPKDLTFLKELGTGQFGVVKYGK +WRGQYDVAIKMIKEGSMSEDEFIEEAKVMMNLSHEKLVQLYGVCTKQRPIFIITEYMANG +CLLNYLREMRHRFQTQQLLEMCKDVCEAMEYLESKQFLHRDLAARNCLVNDQGVVKVSDF +GLSRYVLDDEYTSSVGSKFPVRWSPPEVLMYSKFSSKSDIWAFGVLMWEIYSLGKMPYER +FTNSETAEHIAQGLRLYRPHLASEKVYTIMYSCWHEKADERPTFKILLSNILDVMDEES +>sp|P06280|AGAL_HUMAN Alpha-galactosidase A OS=Homo sapiens OX=9606 GN=GLA PE=1 SV=1 +MQLRNPELHLGCALALRFLALVSWDIPGARALDNGLARTPTMGWLHWERFMCNLDCQEEP +DSCISEKLFMEMAELMVSEGWKDAGYEYLCIDDCWMAPQRDSEGRLQADPQRFPHGIRQL +ANYVHSKGLKLGIYADVGNKTCAGFPGSFGYYDIDAQTFADWGVDLLKFDGCYCDSLENL +ADGYKHMSLALNRTGRSIVYSCEWPLYMWPFQKPNYTEIRQYCNHWRNFADIDDSWKSIK +SILDWTSFNQERIVDVAGPGGWNDPDMLVIGNFGLSWNQQVTQMALWAIMAAPLFMSNDL +RHISPQAKALLQDKDVIAINQDPLGKQGYQLRQGDNFEVWERPLSGLAWAVAMINRQEIG +GPRSYTIAVASLGKGVACNPACFITQLLPVKRKLGFYEWTSRLRSHINPTGTVLLQLENT +MQMSLKDLL diff --git a/testing/unit_tests/test_data/yeast_galactosidases.fa b/testing/unit_tests/test_data/yeast_galactosidases.fa new file mode 100644 index 00000000..894ec4d0 --- /dev/null +++ b/testing/unit_tests/test_data/yeast_galactosidases.fa @@ -0,0 +1,36 @@ +>sp|P04824|MEL1_YEASX Alpha-galactosidase 1 OS=Saccharomyces cerevisiae OX=4932 GN=MEL1 PE=1 SV=1 +MFAFYFLTACISLKGVFGVSPSYNGLGLTPQMGWDNWNTFACDVSEQLLLDTADRISDLG +LKDMGYKYIILDDCWSSGRDSDGFLVADEQKFPNGMGHVADHLHNNSFLFGMYSSAGEYT +CAGYPGSLGREEEDAQFFANNRVDYLKYDNCYNKGQFGTPEISYHRYKAMSDALNKTGRP +IFYSLCNWGQDLTFYWGSGIANSWRMSGDVTAEFTRPDSRCPCDGDEYDCKYAGFHCSIM +NILNKAAPMGQNAGVGGWNDLDNLEVGVGNLTDDEEKAHFSMWAMVKSPLIIGANVNNLK +ASSYSIYSQASVIAINQDSNGIPATRVWRYYVSDTDEYGQGEIQMWSGPLDNGDQVVALL +NGGSVSRPMNTTLEEIFFDSNLGSKKLTSTWDIYDLWANRVDNSTASAILGRNKTATGIL +YNATEQSYKDGLSKNDTRLFGQKIGSLSPNAILNTTVPAHGIAFYRLRPSS +>sp|P41947|MEL6_YEASX Alpha-galactosidase 6 OS=Saccharomyces cerevisiae OX=4932 GN=MEL6 PE=3 SV=1 +MFAFYFLTACISLKGVFGVSPSYNGLGLTPQMGWDNWNTFACDVSEQLLLDTADRISDLG +LKDMGYKYVILDDCWSSGRDSDGFLVADKHKFPNGMGHVADHLHNNSFLFGMYSSAGEYT +CAGYPGSLGREEEDAQFFANNRVDYLKYDNCYNKGQFGTPDVSYHRYKAMSDALNKTGRP +IFYSLCNWGQDLTFYWGSGIANSWRMSGDITAEFTRPDSRCPCDGDEYDCKYAGFHCSIM +NILNKAAPMGQNAGVGGWNDLDNLEVGVGNLTDDEEKAHFSMWAMVKSPLIIGADVNHLK +ASSYSIYSQASVIAINQDPKGIPATRVWRYYVSDTDEYGQGEIQMWSGPLDNGDQVVALL +NGGSVARPMNTTLEEIFFDSNLGSKELTSTWDIYDLWANRVDNSTASAILEQNKAATGIL +YNATEQSYKDGLSKNDTRLFGQKIGSLSPNAILNTTVPAHGIAFYRLRPSA +>sp|P41945|MEL2_YEASX Alpha-galactosidase 2 OS=Saccharomyces cerevisiae OX=4932 GN=MEL2 PE=3 SV=1 +MFAFYFLTACISLKGVFGVSPSYNGLGLTPQMGWDNWNTFACDVSEQLLLDTADRISDLG +LKDMGYKYIILDDCWSSGRDSDGFLVADEQKFPNGMGHVADHLHNNSFLFGMYSSAGEYT +CAGYPGSLGREEEDAQFFANNRVDYLKYDNCYNKGQFGTPEISYHRYKAMSDALNKTGRP +IFYSLCNWGQDLTFYWGSGIANSWRMSGDITAEFTRPDSRCPCDGDEYDCKYAGFHCSIM +NILNKAAPMGQNAGVGGWNDLDNLEVRVGNLTDDEEKAHFPMWAMVKSPLIIGADVNTLK +PSSYSIYSQASVIAINQDPKGIPATRVWRYYVSDTDEYGQGEIQMWSGPLDNGDQVVALL +NGGSVPRPMNTTLEEIFFDSNLGSKELTSTWDIYDLWANRVDNSTASAILGQNKTATGIL +YNATEQSYKDGLSKNDTRLFGQKIGSLSPNAILNTTVPAHGIAFYRLRPSA +>sp|P41946|MEL5_YEASX Alpha-galactosidase 5 OS=Saccharomyces cerevisiae OX=4932 GN=MEL5 PE=3 SV=1 +MFAFYFLTACTTLKGVFGVSPSYNGLGLTPQMGWDSWNTFACDVSEQLLLDTADRISDLG +LKDMGYKYVILDDCWSSGRDSDGFLVADKHKFPNGMGHVADHLHNNSFLFGMYSSAGEYT +CAGYPGSLGREEEDAQFFANNRVDYLKYDNCYNKGQFGTPDVSYHRYKAMSDALNKTGRP +IFYSLCNWGQDLTFYWGSGIANSWRMSGDITAEFTRPDSRCPCDGDEYDCKYAGFHCSIM +NILNKAAPMGQNAGVGGWNDLDNLEVGVGNLTDDEEKAHFSMWAMVKSPLIIGADVNHLK +ASSYSIYSQASVIAINQDPKGIPATRVWRYYVSDTDEYGQGEIQMWSGPLDNGDQVVALL +NGGSVARPMNTTLEEIFFDSNLGSKELTSTWDIYDLWANRVDNSTASAILEQNKAATGIL +YNATEQSYKDGLSKNDTRLFGQKIGSLSPNAILNTTVPAHGIAFYRLRPSA diff --git a/testing/unit_tests/tinitTests.m b/testing/unit_tests/tinitTests.m old mode 100644 new mode 100755 diff --git a/tutorial/tutorial5.m b/tutorial/tutorial5.m index e47bcd78..f408339b 100755 --- a/tutorial/tutorial5.m +++ b/tutorial/tutorial5.m @@ -19,10 +19,10 @@ %are for. This process takes up to 20-35 minutes in macOS, Unix systems and %40-55 minutes in Windows, depending on your hardware and the size of %target organism proteome -model=getKEGGModelForOrganism('sce','sce.fa','euk100_kegg94','output',false,false,false,false,10^-30,0.8,0.3,-1); +model=getKEGGModelForOrganism('sce','sce.fa','euk90_kegg100','output',false,false,false,false,10^-30,0.8,0.3,-1); -%The resulting model should contain around 1669 reactions, 1677 -%metabolites and 838 genes. Small variations are possible since it is an +%The resulting model should contain around 1590 reactions, 1571 +%metabolites and 837 genes. Small variations are possible since it is an %heuristic algorithm and different KEGG versions will give slightly %different results. disp(model);