Skip to content

Commit

Permalink
Should fix issue #3.
Browse files Browse the repository at this point in the history
	modified:   ../common/algorithms/alignment/printers/SAMPrinter.h
  • Loading branch information
Mark Chaisson committed Jul 7, 2015
1 parent a282fa9 commit 680a4a8
Show file tree
Hide file tree
Showing 7 changed files with 66 additions and 95 deletions.
65 changes: 29 additions & 36 deletions alignment/Blasr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1196,13 +1196,10 @@ void AlignIntervals(T_TargetSequence &genome, T_QuerySequence &read, T_QuerySequ
intervalContigStartPos = 0;
intervalContigEndPos = genome.length;
}

alignment->qName = read.title;

int alignScore;
alignScore = 0;



alignment->tAlignedSeqPos = matchIntervalStart;
alignment->tAlignedSeqLength = matchIntervalEnd - matchIntervalStart;

Expand All @@ -1211,17 +1208,7 @@ void AlignIntervals(T_TargetSequence &genome, T_QuerySequence &read, T_QuerySequ
// target interval because of an N in the query sequence. If the match interval is truncated,
// it is possible that the matches index past the interval length. This trims it back.
//
/*
if (trimMatches == true) {
int m;
int lastMatch = (*intvIt).matches.size()-1;
if ((*intvIt).matches[lastMatch].t + (*intvIt).matches[lastMatch].l > matchIntervalEnd) {
ChainedMatchPos mp((const ChainedMatchPos&)(*intvIt).matches[lastMatch]);
mp.l = matchIntervalEnd - (*intvIt).matches[lastMatch].t;
(*intvIt).matches[lastMatch] = mp;
}
}
*/

if ((*intvIt).GetStrandIndex() == Forward) {
alignment->tAlignedSeq.Copy(genome, alignment->tAlignedSeqPos, alignment->tAlignedSeqLength);
alignment->tStrand = Forward;
Expand Down Expand Up @@ -1311,7 +1298,7 @@ void AlignIntervals(T_TargetSequence &genome, T_QuerySequence &read, T_QuerySequ
//
// The first step to refining between anchors only is to make
// the anchors relative to the tAlignedSeq.

//


matches = (vector<ChainedMatchPos>*) &(*intvIt).matches;
Expand All @@ -1325,16 +1312,16 @@ void AlignIntervals(T_TargetSequence &genome, T_QuerySequence &read, T_QuerySequ
}
}
else {
//
// Flip the entire alignment if it is on the reverse strand.
//
// Flip the entire alignment if it is on the reverse strand.
//

DNALength rcAlignedSeqPos = genome.MakeRCCoordinate(alignment->tAlignedSeqPos + alignment->tAlignedSeqLength - 1);
for (m = 0; m < matches->size(); m++) {
(*matches)[m].t -= rcAlignedSeqPos;
(*matches)[m].q -= alignment->qAlignedSeqPos;
}

// alignment->tAlignedSeq.CopyAsRC(tAlignedSeq);
rcMatches.resize((*intvIt).matches.size());
//
// Make the reverse complement of the match list.
Expand Down Expand Up @@ -1523,7 +1510,7 @@ void AlignIntervals(T_TargetSequence &genome, T_QuerySequence &read, T_QuerySequ
anchorsOnly.qPos = alignment->qPos;

//
// Adjacent blocks without gaps are not merged here. Do the merging now.
// Adjacent blocks without gaps (e.g. 50M50M50M) are not yet merged. Do the merging now.
//
int cur = 0, next = 1;
while (next < alignment->blocks.size()) {
Expand Down Expand Up @@ -4651,6 +4638,7 @@ int main(int argc, char* argv[]) {
*outFilePtr << hdString << endl;
seqdb.MakeSAMSQString(sqString);
*outFilePtr << sqString; // this already outputs endl
set<string> readGroups;
for (readsFileIndex = 0; readsFileIndex < params.readsFileNames.size()-1; readsFileIndex++ ) {
reader->SetReadFileName(params.readsFileNames[readsFileIndex]);
reader->Initialize();
Expand All @@ -4660,24 +4648,29 @@ int main(int argc, char* argv[]) {
MakeMD5(scanData.movieName, movieNameMD5, 10);
string chipId;
ParseChipIdFromMovieName(scanData.movieName, chipId);

*outFilePtr << "@RG\t"
<< "ID:" << movieNameMD5 << "\t"
<< "PU:"<< scanData.movieName << "\t"
<< "SM:"<< chipId << "\t"
<< "PL:PACBIO" << "\t"
<< "DS:READTYPE=SUBREAD;"
<< "BINDINGKIT=" << scanData.bindingKit << ";"
<< "SEQUENCINGKIT=" << scanData.sequencingKit << ";"
<< "BASECALLERVERSION=" << "2.1";

int q;
for (q = 0; q < params.samQVList.nTags; q++){
if (params.samQVList.useqv & params.samQVList.qvFlagIndex[q]) {
*outFilePtr << ";" << params.samQVList.qvNames[q] << "=" << params.samQVList.qvTags[q];
//
// Each movie may only be represented once in the header.
//
if (readGroups.find(scanData.movieName) == readGroups.end()) {
*outFilePtr << "@RG\t"
<< "ID:" << movieNameMD5 << "\t"
<< "PU:"<< scanData.movieName << "\t"
<< "SM:"<< chipId << "\t"
<< "PL:PACBIO" << "\t"
<< "DS:READTYPE=SUBREAD;"
<< "BINDINGKIT=" << scanData.bindingKit << ";"
<< "SEQUENCINGKIT=" << scanData.sequencingKit << ";"
<< "BASECALLERVERSION=" << "2.1";

int q;
for (q = 0; q < params.samQVList.nTags; q++){
if (params.samQVList.useqv & params.samQVList.qvFlagIndex[q]) {
*outFilePtr << ";" << params.samQVList.qvNames[q] << "=" << params.samQVList.qvTags[q];
}
}
*outFilePtr << endl;
}
*outFilePtr << endl;
readGroups.insert(scanData.movieName);
reader->Close();
}
string commandLineString;
Expand Down
5 changes: 3 additions & 2 deletions alignment/MappingParameters.h
Original file line number Diff line number Diff line change
Expand Up @@ -424,11 +424,12 @@ class MappingParameters {
refineBetweenAnchorsOnly = true;
minMatchLength = anchorParameters.minMatchLength = 25;
anchorParameters.advanceExactMatches = advanceExactMatches = 25;
anchorParameters.maxLCPLength = 50;
anchorParameters.maxLCPLength = 40;
affineAlign = true;
affineExtend = 0;
affineOpen = 100;
affineOpen = 20;
anchorParameters.maxAnchorsPerPosition = 30;
indelRate = 0.0001;
}

if (emulateNucmer) {
Expand Down
50 changes: 24 additions & 26 deletions common/algorithms/alignment/printers/SAMPrinter.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
namespace SAMOutput {

enum Clipping {hard, soft, subread, none};

static string SAMVersion;
void BuildFlag(T_AlignmentCandidate &alignment, AlignmentContext &context, uint16_t &flag) {

/*
Expand Down Expand Up @@ -97,7 +97,6 @@ namespace SAMOutput {
else if (clipping == soft) {
clippedReadLength = read.length - read.lowQualityPrefix - read.lowQualitySuffix;
clippedStartPos = read.lowQualityPrefix;

}
else if (clipping == subread) {
clippedReadLength = read.subreadEnd - read.subreadStart;
Expand Down Expand Up @@ -396,26 +395,9 @@ namespace SAMOutput {

string rNext;
rNext = "*";
/*
if (context.hasNextSubreadPos == false) {
rNext = "*";
}
else {
if (context.rNext == alignment.tName) {
rNext = "=";
}
else {
rNext = context.rNext;
}
}
*/
samFile << rNext << "\t"; // RNEXT

DNALength nextSubreadPos = 0;
/*
if (context.hasNextSubreadPos) {
nextSubreadPos = context.nextSubreadPos + 1;
}*/
samFile << nextSubreadPos << "\t"; // RNEXT, add 1 for 1 based
// indexing

Expand All @@ -426,6 +408,7 @@ namespace SAMOutput {
// newline (by setting the line length to alignedSequence.length
((DNASequence)alignedSequence).PrintSeq(samFile, 0); // SEQ
samFile << "\t";

if (alignedSequence.qual.data != NULL) {
alignedSequence.PrintAsciiQual(samFile, 0); // QUAL
}
Expand Down Expand Up @@ -524,16 +507,14 @@ namespace SAMOutput {
}

// Unmapped reads have fixed defaults for most fields.
samFile << read.title << "\t" << flag << "\t*\t0\t0\t*\t*\t0\t0\t"; // RNAME, POS, MAP, CIGAR, RNEXT, PNEXT, TLEN
samFile << read.GetName() << "\t" << flag << "\t*\t0\t0\t*\t*\t0\t0\t"; // RNAME, POS, MAP, CIGAR, RNEXT, PNEXT, TLEN

((DNASequence)alignedSequence).PrintSeq(samFile, 0); // SEQ
samFile << "\t";
if (alignedSequence.qual.data != NULL) {
alignedSequence.PrintAsciiQual(samFile, 0); // QUAL
}
else {
samFile <<"*";
}
//
// The quality is largely uninformative, so it is skipped and the null flag is output.
//
samFile <<"*";
samFile << "\t";

//
Expand All @@ -547,6 +528,23 @@ namespace SAMOutput {
// "XQ" query sequence length
// "XT" # of continues reads, always 1 for blasr
//
if (clipping == subread) {
DNALength xs = read.subreadStart;
DNALength xe = read.subreadEnd;
samFile << "XS:i:" << xs + 1 << "\t"; // add 1 for 1-based indexing in sam
samFile << "XE:i:" << xe + 1 << "\t";
samFile << "qs:i:" << xs + 1 << "\t"; // add 1 for 1-based indexing in sam
samFile << "qe:i:" << xe + 1 << "\t";
}
else {
samFile << "XS:i:1\t"
<< "XE:i:"<<alignedSequence.length <<"\t"
<< "qs:i:1\t"
<< "qe:i:"<<alignedSequence.length << "\t";
}
samFile << "zm:i:" << read.holeNumber << "\t";
samFile << "XL:i:" << 0 << "\t";

samFile << "XT:i:1"; // reads are allways continuous reads, not
// referenced based circular consensus when
// output by blasr.
Expand Down
25 changes: 2 additions & 23 deletions common/algorithms/alignment/sdp/SparseDynamicProgramming.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
#include "FragmentSort.h"
#include "algorithms/alignment/AlignmentUtils.h"
#include "defs.h"

#include <algorithm>

/*******************************************************************************
* Sparse dynamic programming implementation of Longest Common Subsequence
Expand Down Expand Up @@ -157,34 +157,14 @@ int SDPLongestCommonSubsequence(DNALength queryLength,
int aboveIndex;
if (fragmentSet[fSweep].GetAbove(aboveIndex)) {

/* if (sweepSet.Successor(fragmentSet[fSweep], succ) and
succ.y < fragmentSet[fSweep].y and
succ.y + fragmentLength >= fragmentSet[fSweep].y) {
*/
// assert(((int)succ.y) - ((int)succ.x) >= ((int)fragmentSet[fSweep].y) - ((int)fragmentSet[fSweep].x));


/*
Baker and Giancarlo LCS cost
*/
/*
ca = fragmentSet[aboveIndex].cost +
(fragmentSet[aboveIndex].y - fragmentSet[aboveIndex].x) -
(fragmentSet[fSweep].y - fragmentSet[fSweep].x);
*/
/*
ca = succ.cost +
(succ.y - succ.x) -
(fragmentSet[fSweep].y - fragmentSet[fSweep].x);
*/

ca = (fragmentSet[aboveIndex].cost +
(fragmentLength - (int)(fragmentSet[fSweep].y - fragmentSet[aboveIndex].y)) * match +
IndelPenalty(fragmentSet[fSweep].x, fragmentSet[fSweep].y, fragmentSet[aboveIndex].x, fragmentSet[aboveIndex].y, insertion, deletion));

// ca = (succ.cost +
// (fragmentLength - (int)(fragmentSet[fSweep].y - succ.y)) * match +
// IndelPenalty(fragmentSet[fSweep].x, fragmentSet[fSweep].y, succ.x, succ.y, insertion, deletion));
foundPrev = 1;
}

Expand Down Expand Up @@ -228,7 +208,7 @@ int SDPLongestCommonSubsequence(DNALength queryLength,
fragmentSet[fSweep].chainLength = 1;
}
}
// cout << fSweep << " " << fragmentSet[fSweep].chainPrev << " " << foundPrev << " " << fragmentSet[fSweep].x << " " << fragmentSet[fSweep].y << " " << cp << " " << cl << " " << ca << endl;

if (minFragmentCost > fragmentSet[fSweep].cost) {
minFragmentCost = fragmentSet[fSweep].cost;
minFragmentIndex = fSweep;
Expand All @@ -250,7 +230,6 @@ int SDPLongestCommonSubsequence(DNALength queryLength,
fSweep = startF;
while (fSweep < fragmentSetSize and
fragmentSet[fSweep].x == sweepRow) {
// cout << "inserting sweep set with index" << fragmentSet[fSweep].index << endl;
sweepSet.Insert(fragmentSet[fSweep]);
++fSweep;
}
Expand Down
1 change: 0 additions & 1 deletion common/algorithms/anchoring/FindMaxInterval.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
#include <semaphore.h>
#include <fstream>
#include <iostream>
#include "LongestIncreasingSubsequence.h"
#include "GlobalChain.h"
#include "BasicEndpoint.h"
#include "datastructures/anchoring/WeightedInterval.h"
Expand Down
13 changes: 7 additions & 6 deletions common/tuples/TupleMatching.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ template<typename Sequence, typename T_TupleList>


template<typename TSequence, typename TMatch, typename T_TupleList>
int StoreMatchingPositions(TSequence &querySeq, TupleMetrics &tm, T_TupleList &targetTupleList, vector<TMatch> &matchSet) {
int StoreMatchingPositions(TSequence &querySeq, TupleMetrics &tm, T_TupleList &targetTupleList, vector<TMatch> &matchSet, int maxMatches=0) {
DNALength s;
// TQueryTuple queryTuple;
typename T_TupleList::Tuple queryTuple;
Expand All @@ -48,11 +48,12 @@ template<typename TSequence, typename TMatch, typename T_TupleList>
int targetListIndex = 0;
typename vector<typename T_TupleList::Tuple>::const_iterator curIt, endIt;
targetTupleList.FindAll(queryTuple, curIt, endIt);

for(; curIt != endIt; curIt++) {
matchSet.push_back(TMatch(s, (*curIt).pos));
++targetListIndex;
}
if (maxMatches == 0 or endIt - curIt <= maxMatches) {
for(; curIt != endIt; curIt++) {
matchSet.push_back(TMatch(s, (*curIt).pos));
++targetListIndex;
}
}
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion samutils/SamToM0.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ int main(int argc, char* argv[]) {
clp.RegisterStringListOption("sam", &samFileNames, "Alignments.", true);
clp.RegisterPreviousFlagsAsHidden();
clp.RegisterStringOption("out", &outFileName, "Output file. Default to stdout", false);
clp.RegisterIntOption("format", &format, "Format (0,1,4,5)", CommandLineParser::NonNegativeInteger, false);
clp.RegisterIntOption("format", &format, "Format (supports 0 only, formats 1,4,5 to be added)", CommandLineParser::NonNegativeInteger, false);
clp.ParseCommandLine(argc, argv);

FASTAReader fastaReader;
Expand Down

0 comments on commit 680a4a8

Please sign in to comment.