Skip to content

Commit

Permalink
fix: getBlast if temppath has space char
Browse files Browse the repository at this point in the history
  • Loading branch information
edkerk committed Jul 17, 2024
1 parent 49d21be commit 9a2dcc1
Show file tree
Hide file tree
Showing 2 changed files with 129 additions and 111 deletions.
231 changes: 120 additions & 111 deletions doc/external/getBlast.html
Original file line number Diff line number Diff line change
Expand Up @@ -141,118 +141,127 @@ <h2><a name="_source"></a>SOURCE CODE <a href="#_top"><img alt="^" border="0" sr
0063 <span class="comment">%Generate temporary names for BLAST databases and output files</span>
0064 tmpDB=tempname;
0065 outFile=tempname;
0066
0067 <span class="comment">%Check for existence of files. If no full path is specified for a file,</span>
0068 <span class="comment">%assume that it is in the current folder</span>
0069 <span class="keyword">if</span> isrow(refFastaFiles)
0070 files=horzcat(fastaFile,refFastaFiles);
0071 <span class="keyword">else</span>
0072 files=vertcat(fastaFile,refFastaFiles);
0073 <span class="keyword">end</span>
0074
0075 files=checkFileExistence(files,2); <span class="comment">%Copy files to temp dir</span>
0076 fastaFile = files(1);
0077 refFastaFiles = files(2:end);
0078
0079 <span class="comment">%Identify the operating system</span>
0080 <span class="keyword">if</span> isunix
0081 <span class="keyword">if</span> ismac
0082 binEnd=<span class="string">'.mac'</span>;
0083 <span class="keyword">else</span>
0084 binEnd=<span class="string">''</span>;
0085 <span class="keyword">end</span>
0086 <span class="keyword">elseif</span> ispc
0087 binEnd=<span class="string">'.exe'</span>;
0088 setenv(<span class="string">'BLASTDB_LMDB_MAP_SIZE'</span>,<span class="string">'1000000'</span>);
0089 <span class="keyword">else</span>
0090 dispEM(<span class="string">'Unknown OS, exiting.'</span>)
0091 <span class="keyword">return</span>
0092 <span class="keyword">end</span>
0093
0094 <span class="comment">%Run BLAST multi-threaded to use all logical cores assigned to MATLAB</span>
0095 cores = evalc(<span class="string">'feature(''numcores'')'</span>);
0096 cores = strsplit(cores, <span class="string">'MATLAB was assigned: '</span>);
0097 cores = regexp(cores{2},<span class="string">'^\d*'</span>,<span class="string">'match'</span>);
0098 cores = cores{1};
0099
0100 <span class="comment">%Create a database for the new organism and blast each of the refFastaFiles</span>
0101 <span class="comment">%against it</span>
0102 [status, message]=system([<span class="string">'&quot;'</span> fullfile(ravenPath,<span class="string">'software'</span>,<span class="string">'blast+'</span>,[<span class="string">'makeblastdb'</span> binEnd]) <span class="string">'&quot; -in &quot;'</span> fastaFile{1} <span class="string">'&quot; -out &quot;'</span> fullfile(tmpDB, <span class="string">'tmpDB'</span>) <span class="string">'&quot; -dbtype prot'</span>]);
0103 <span class="keyword">if</span> developMode
0104 blastReport.dbHashes.phr{numel(blastReport.dbHashes.phr)+1}=getMD5Hash(fullfile(tmpDB, <span class="string">'tmpDB.phr'</span>));
0105 blastReport.dbHashes.pot{numel(blastReport.dbHashes.pot)+1}=getMD5Hash(fullfile(tmpDB, <span class="string">'tmpDB.pot'</span>));
0106 blastReport.dbHashes.psq{numel(blastReport.dbHashes.psq)+1}=getMD5Hash(fullfile(tmpDB, <span class="string">'tmpDB.psq'</span>));
0107 blastReport.dbHashes.pto{numel(blastReport.dbHashes.pto)+1}=getMD5Hash(fullfile(tmpDB, <span class="string">'tmpDB.pto'</span>));
0108 <span class="keyword">end</span>
0109 <span class="keyword">if</span> status~=0
0110 error(<span class="string">'makeblastdb did not run successfully, error:\n%s'</span>,strip(message))
0111 <span class="keyword">end</span>
0112
0113 <span class="keyword">for</span> i=1:numel(refFastaFiles)
0114 <span class="keyword">if</span> ~hideVerbose
0115 fprintf([<span class="string">'BLASTing &quot;'</span> modelIDs{i} <span class="string">'&quot; against &quot;'</span> organismID{1} <span class="string">'&quot;..\n'</span>]);
0116 <span class="keyword">end</span>
0117 [status, message]=system([<span class="string">'&quot;'</span> fullfile(ravenPath,<span class="string">'software'</span>,<span class="string">'blast+'</span>,[<span class="string">'blastp'</span> binEnd]) <span class="string">'&quot; -query &quot;'</span> refFastaFiles{i} <span class="string">'&quot; -out &quot;'</span> outFile <span class="string">'_'</span> num2str(i) <span class="string">'&quot; -db &quot;'</span> fullfile(tmpDB, <span class="string">'tmpDB'</span>) <span class="string">'&quot; -evalue 10e-5 -outfmt &quot;10 qseqid sseqid evalue pident length bitscore ppos&quot; -num_threads &quot;'</span> cores <span class="string">'&quot;'</span>]);
0118 <span class="keyword">if</span> developMode
0119 blastReport.blastTxtOutput{numel(blastReport.blastTxtOutput)+1}=importdata([outFile <span class="string">'_'</span> num2str(i)]);
0120 <span class="keyword">end</span>
0121 <span class="keyword">if</span> status~=0
0122 error(<span class="string">'blastp did not run successfully, error:\n%s'</span>,strip(message))
0123 <span class="keyword">end</span>
0124 <span class="keyword">end</span>
0125 delete([tmpDB filesep <span class="string">'tmpDB*'</span>]);
0126
0127 <span class="comment">%Then create a database for each of the reference organisms and blast the</span>
0128 <span class="comment">%new organism against them</span>
0129 <span class="keyword">for</span> i=1:numel(refFastaFiles)
0130 <span class="keyword">if</span> ~hideVerbose
0131 fprintf([<span class="string">'BLASTing &quot;'</span> organismID{1} <span class="string">'&quot; against &quot;'</span> modelIDs{i} <span class="string">'&quot;..\n'</span>]);
0066 <span class="keyword">if</span> ispc &amp;&amp; contains(tmpDB,<span class="string">' '</span>)
0067 warning([<span class="string">'MATLAB assigned ''%s'' as temporary file path, but it '</span><span class="keyword">...</span>
0068 <span class="string">'contains a space character, which is not compatible with '</span><span class="keyword">...</span>
0069 <span class="string">'BLAST. Instead, a temporary folder will be initiated at '</span><span class="keyword">...</span>
0070 <span class="string">'''C:\\tempRAVEN\\'' which should manually be removed when '</span><span class="keyword">...</span>
0071 <span class="string">'finished.'</span>],tmpDB)
0072 tmpDB=tempname(<span class="string">'C:\tempRAVEN'</span>);
0073 outFile=tempname(<span class="string">'C:\tempRAVEN'</span>);
0074 <span class="keyword">end</span>
0075
0076 <span class="comment">%Check for existence of files. If no full path is specified for a file,</span>
0077 <span class="comment">%assume that it is in the current folder</span>
0078 <span class="keyword">if</span> isrow(refFastaFiles)
0079 files=horzcat(fastaFile,refFastaFiles);
0080 <span class="keyword">else</span>
0081 files=vertcat(fastaFile,refFastaFiles);
0082 <span class="keyword">end</span>
0083
0084 files=checkFileExistence(files,2); <span class="comment">%Copy files to temp dir</span>
0085 fastaFile = files(1);
0086 refFastaFiles = files(2:end);
0087
0088 <span class="comment">%Identify the operating system</span>
0089 <span class="keyword">if</span> isunix
0090 <span class="keyword">if</span> ismac
0091 binEnd=<span class="string">'.mac'</span>;
0092 <span class="keyword">else</span>
0093 binEnd=<span class="string">''</span>;
0094 <span class="keyword">end</span>
0095 <span class="keyword">elseif</span> ispc
0096 binEnd=<span class="string">'.exe'</span>;
0097 setenv(<span class="string">'BLASTDB_LMDB_MAP_SIZE'</span>,<span class="string">'1000000'</span>);
0098 <span class="keyword">else</span>
0099 dispEM(<span class="string">'Unknown OS, exiting.'</span>)
0100 <span class="keyword">return</span>
0101 <span class="keyword">end</span>
0102
0103 <span class="comment">%Run BLAST multi-threaded to use all logical cores assigned to MATLAB</span>
0104 cores = evalc(<span class="string">'feature(''numcores'')'</span>);
0105 cores = strsplit(cores, <span class="string">'MATLAB was assigned: '</span>);
0106 cores = regexp(cores{2},<span class="string">'^\d*'</span>,<span class="string">'match'</span>);
0107 cores = cores{1};
0108
0109 <span class="comment">%Create a database for the new organism and blast each of the refFastaFiles</span>
0110 <span class="comment">%against it</span>
0111 [status, message]=system([<span class="string">'&quot;'</span> fullfile(ravenPath,<span class="string">'software'</span>,<span class="string">'blast+'</span>,[<span class="string">'makeblastdb'</span> binEnd]) <span class="string">'&quot; -in &quot;'</span> fastaFile{1} <span class="string">'&quot; -out &quot;'</span> fullfile(tmpDB, <span class="string">'tmpDB'</span>) <span class="string">'&quot; -dbtype prot'</span>]);
0112 <span class="keyword">if</span> developMode
0113 blastReport.dbHashes.phr{numel(blastReport.dbHashes.phr)+1}=getMD5Hash(fullfile(tmpDB, <span class="string">'tmpDB.phr'</span>));
0114 blastReport.dbHashes.pot{numel(blastReport.dbHashes.pot)+1}=getMD5Hash(fullfile(tmpDB, <span class="string">'tmpDB.pot'</span>));
0115 blastReport.dbHashes.psq{numel(blastReport.dbHashes.psq)+1}=getMD5Hash(fullfile(tmpDB, <span class="string">'tmpDB.psq'</span>));
0116 blastReport.dbHashes.pto{numel(blastReport.dbHashes.pto)+1}=getMD5Hash(fullfile(tmpDB, <span class="string">'tmpDB.pto'</span>));
0117 <span class="keyword">end</span>
0118 <span class="keyword">if</span> status~=0
0119 error(<span class="string">'makeblastdb did not run successfully, error:\n%s'</span>,strip(message))
0120 <span class="keyword">end</span>
0121
0122 <span class="keyword">for</span> i=1:numel(refFastaFiles)
0123 <span class="keyword">if</span> ~hideVerbose
0124 fprintf([<span class="string">'BLASTing &quot;'</span> modelIDs{i} <span class="string">'&quot; against &quot;'</span> organismID{1} <span class="string">'&quot;..\n'</span>]);
0125 <span class="keyword">end</span>
0126 [status, message]=system([<span class="string">'&quot;'</span> fullfile(ravenPath,<span class="string">'software'</span>,<span class="string">'blast+'</span>,[<span class="string">'blastp'</span> binEnd]) <span class="string">'&quot; -query &quot;'</span> refFastaFiles{i} <span class="string">'&quot; -out &quot;'</span> outFile <span class="string">'_'</span> num2str(i) <span class="string">'&quot; -db &quot;'</span> fullfile(tmpDB, <span class="string">'tmpDB'</span>) <span class="string">'&quot; -evalue 10e-5 -outfmt &quot;10 qseqid sseqid evalue pident length bitscore ppos&quot; -num_threads &quot;'</span> cores <span class="string">'&quot;'</span>]);
0127 <span class="keyword">if</span> developMode
0128 blastReport.blastTxtOutput{numel(blastReport.blastTxtOutput)+1}=importdata([outFile <span class="string">'_'</span> num2str(i)]);
0129 <span class="keyword">end</span>
0130 <span class="keyword">if</span> status~=0
0131 error(<span class="string">'blastp did not run successfully, error:\n%s'</span>,strip(message))
0132 <span class="keyword">end</span>
0133 [status, message]=system([<span class="string">'&quot;'</span> fullfile(ravenPath,<span class="string">'software'</span>,<span class="string">'blast+'</span>,[<span class="string">'makeblastdb'</span> binEnd]) <span class="string">'&quot; -in &quot;'</span> refFastaFiles{i} <span class="string">'&quot; -out &quot;'</span> fullfile(tmpDB, <span class="string">'tmpDB'</span>) <span class="string">'&quot; -dbtype prot'</span>]);
0134 <span class="keyword">if</span> status~=0
0135 error(<span class="string">'makeblastdb did not run successfully, error:\n%s'</span>,strip(message))
0136 <span class="keyword">end</span>
0137 [status, message]=system([<span class="string">'&quot;'</span> fullfile(ravenPath,<span class="string">'software'</span>,<span class="string">'blast+'</span>,[<span class="string">'blastp'</span> binEnd]) <span class="string">'&quot; -query &quot;'</span> fastaFile{1} <span class="string">'&quot; -out &quot;'</span> outFile <span class="string">'_r'</span> num2str(i) <span class="string">'&quot; -db &quot;'</span> fullfile(tmpDB, <span class="string">'tmpDB'</span>) <span class="string">'&quot; -evalue 10e-5 -outfmt &quot;10 qseqid sseqid evalue pident length bitscore ppos&quot; -num_threads &quot;'</span> cores <span class="string">'&quot;'</span>]);
0138 <span class="keyword">if</span> developMode
0139 blastReport.dbHashes.phr{numel(blastReport.dbHashes.phr)+1}=getMD5Hash(fullfile(tmpDB, <span class="string">'tmpDB.phr'</span>));
0140 blastReport.dbHashes.pot{numel(blastReport.dbHashes.pot)+1}=getMD5Hash(fullfile(tmpDB, <span class="string">'tmpDB.pot'</span>));
0141 blastReport.dbHashes.psq{numel(blastReport.dbHashes.psq)+1}=getMD5Hash(fullfile(tmpDB, <span class="string">'tmpDB.psq'</span>));
0142 blastReport.dbHashes.pto{numel(blastReport.dbHashes.pto)+1}=getMD5Hash(fullfile(tmpDB, <span class="string">'tmpDB.pto'</span>));
0143 blastReport.blastTxtOutput{numel(blastReport.blastTxtOutput)+1}=importdata([outFile <span class="string">'_r'</span> num2str(i)]);
0144 <span class="keyword">end</span>
0145 <span class="keyword">if</span> status~=0
0146 error(<span class="string">'blastp did not run successfully, error:\n%s'</span>,strip(message))
0147 <span class="keyword">end</span>
0148 delete([tmpDB filesep <span class="string">'tmpDB*'</span>]);
0149 <span class="keyword">end</span>
0150
0151 <span class="comment">%Done with the BLAST, do the parsing of the text files</span>
0152 <span class="keyword">for</span> i=1:numel(refFastaFiles)*2
0153 tempStruct=[];
0154 <span class="keyword">if</span> i&lt;=numel(refFastaFiles)
0155 tempStruct.fromId=modelIDs{i};
0156 tempStruct.toId=organismID{1};
0157 A=readtable([outFile <span class="string">'_'</span> num2str(i)],<span class="string">'Delimiter'</span>,<span class="string">','</span>,<span class="string">'Format'</span>,<span class="string">'%s%s%f%f%f%f%f'</span>);
0158 <span class="keyword">else</span>
0159 tempStruct.fromId=organismID{1};
0160 tempStruct.toId=modelIDs{i-numel(refFastaFiles)};
0161 A=readtable([outFile <span class="string">'_r'</span> num2str(i-numel(refFastaFiles))],<span class="string">'Delimiter'</span>,<span class="string">','</span>,<span class="string">'Format'</span>,<span class="string">'%s%s%f%f%f%f%f'</span>);
0162 <span class="keyword">end</span>
0163 tempStruct.fromGenes=A{:,1};
0164 tempStruct.toGenes=A{:,2};
0165 tempStruct.evalue=table2array(A(:,3));
0166 tempStruct.identity=table2array(A(:,4));
0167 tempStruct.aligLen=table2array(A(:,5));
0168 tempStruct.bitscore=table2array(A(:,6));
0169 tempStruct.ppos=table2array(A(:,7));
0170 blastStructure=[blastStructure tempStruct];
0171 <span class="keyword">end</span>
0172
0173 <span class="comment">%Remove the old tempfiles</span>
0174 delete([outFile <span class="string">'*'</span>]);
0175 <span class="comment">%Remove the temp fasta files</span>
0176 delete(files{:})
0177 <span class="keyword">end</span></pre></div>
0133 <span class="keyword">end</span>
0134 delete([tmpDB filesep <span class="string">'tmpDB*'</span>]);
0135
0136 <span class="comment">%Then create a database for each of the reference organisms and blast the</span>
0137 <span class="comment">%new organism against them</span>
0138 <span class="keyword">for</span> i=1:numel(refFastaFiles)
0139 <span class="keyword">if</span> ~hideVerbose
0140 fprintf([<span class="string">'BLASTing &quot;'</span> organismID{1} <span class="string">'&quot; against &quot;'</span> modelIDs{i} <span class="string">'&quot;..\n'</span>]);
0141 <span class="keyword">end</span>
0142 [status, message]=system([<span class="string">'&quot;'</span> fullfile(ravenPath,<span class="string">'software'</span>,<span class="string">'blast+'</span>,[<span class="string">'makeblastdb'</span> binEnd]) <span class="string">'&quot; -in &quot;'</span> refFastaFiles{i} <span class="string">'&quot; -out &quot;'</span> fullfile(tmpDB, <span class="string">'tmpDB'</span>) <span class="string">'&quot; -dbtype prot'</span>]);
0143 <span class="keyword">if</span> status~=0
0144 error(<span class="string">'makeblastdb did not run successfully, error:\n%s'</span>,strip(message))
0145 <span class="keyword">end</span>
0146 [status, message]=system([<span class="string">'&quot;'</span> fullfile(ravenPath,<span class="string">'software'</span>,<span class="string">'blast+'</span>,[<span class="string">'blastp'</span> binEnd]) <span class="string">'&quot; -query &quot;'</span> fastaFile{1} <span class="string">'&quot; -out &quot;'</span> outFile <span class="string">'_r'</span> num2str(i) <span class="string">'&quot; -db &quot;'</span> fullfile(tmpDB, <span class="string">'tmpDB'</span>) <span class="string">'&quot; -evalue 10e-5 -outfmt &quot;10 qseqid sseqid evalue pident length bitscore ppos&quot; -num_threads &quot;'</span> cores <span class="string">'&quot;'</span>]);
0147 <span class="keyword">if</span> developMode
0148 blastReport.dbHashes.phr{numel(blastReport.dbHashes.phr)+1}=getMD5Hash(fullfile(tmpDB, <span class="string">'tmpDB.phr'</span>));
0149 blastReport.dbHashes.pot{numel(blastReport.dbHashes.pot)+1}=getMD5Hash(fullfile(tmpDB, <span class="string">'tmpDB.pot'</span>));
0150 blastReport.dbHashes.psq{numel(blastReport.dbHashes.psq)+1}=getMD5Hash(fullfile(tmpDB, <span class="string">'tmpDB.psq'</span>));
0151 blastReport.dbHashes.pto{numel(blastReport.dbHashes.pto)+1}=getMD5Hash(fullfile(tmpDB, <span class="string">'tmpDB.pto'</span>));
0152 blastReport.blastTxtOutput{numel(blastReport.blastTxtOutput)+1}=importdata([outFile <span class="string">'_r'</span> num2str(i)]);
0153 <span class="keyword">end</span>
0154 <span class="keyword">if</span> status~=0
0155 error(<span class="string">'blastp did not run successfully, error:\n%s'</span>,strip(message))
0156 <span class="keyword">end</span>
0157 delete([tmpDB filesep <span class="string">'tmpDB*'</span>]);
0158 <span class="keyword">end</span>
0159
0160 <span class="comment">%Done with the BLAST, do the parsing of the text files</span>
0161 <span class="keyword">for</span> i=1:numel(refFastaFiles)*2
0162 tempStruct=[];
0163 <span class="keyword">if</span> i&lt;=numel(refFastaFiles)
0164 tempStruct.fromId=modelIDs{i};
0165 tempStruct.toId=organismID{1};
0166 A=readtable([outFile <span class="string">'_'</span> num2str(i)],<span class="string">'Delimiter'</span>,<span class="string">','</span>,<span class="string">'Format'</span>,<span class="string">'%s%s%f%f%f%f%f'</span>);
0167 <span class="keyword">else</span>
0168 tempStruct.fromId=organismID{1};
0169 tempStruct.toId=modelIDs{i-numel(refFastaFiles)};
0170 A=readtable([outFile <span class="string">'_r'</span> num2str(i-numel(refFastaFiles))],<span class="string">'Delimiter'</span>,<span class="string">','</span>,<span class="string">'Format'</span>,<span class="string">'%s%s%f%f%f%f%f'</span>);
0171 <span class="keyword">end</span>
0172 tempStruct.fromGenes=A{:,1};
0173 tempStruct.toGenes=A{:,2};
0174 tempStruct.evalue=table2array(A(:,3));
0175 tempStruct.identity=table2array(A(:,4));
0176 tempStruct.aligLen=table2array(A(:,5));
0177 tempStruct.bitscore=table2array(A(:,6));
0178 tempStruct.ppos=table2array(A(:,7));
0179 blastStructure=[blastStructure tempStruct];
0180 <span class="keyword">end</span>
0181
0182 <span class="comment">%Remove the old tempfiles</span>
0183 delete([outFile <span class="string">'*'</span>]);
0184 <span class="comment">%Remove the temp fasta files</span>
0185 delete(files{:})
0186 <span class="keyword">end</span></pre></div>
<hr><address>Generated by <strong><a href="http://www.artefact.tk/software/matlab/m2html/" title="Matlab Documentation in HTML">m2html</a></strong> &copy; 2005</address>
</body>
</html>
Loading

0 comments on commit 9a2dcc1

Please sign in to comment.