Skip to content

Commit

Permalink
Initial implementation for #193
Browse files Browse the repository at this point in the history
  • Loading branch information
douweschulte committed Jan 10, 2023
1 parent a2429f4 commit 76c36f7
Show file tree
Hide file tree
Showing 8 changed files with 116 additions and 71 deletions.
2 changes: 1 addition & 1 deletion assets
Submodule assets updated 1 files
+10 −0 styles.css
31 changes: 16 additions & 15 deletions batchfiles/monoclonal.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ Input ->
Format : X+
Name : 01
CutoffALC: 90
RawDataDirectory: R:\F1\peng0013\201912
<-
<-

Expand All @@ -18,7 +19,7 @@ TemplateMatching ->
AmbiguityThreshold : 0.9

Alphabet ->
Characters : ARNDCQEGHILKMFPSTWYVBZX.*
Characters : ARNDCQEGHILKMFPSTWYVBZXJ.*
Identity : 8
Mismatch : -1
GapStart : -12
Expand All @@ -29,7 +30,7 @@ TemplateMatching ->
Symmetric sets ->
Score: 8
Sets :>
I,L
I,L,J
<:
<-

Expand All @@ -38,27 +39,27 @@ TemplateMatching ->
Sets :>
N,GG
Q,AG
AV,GL,GI
AV,GL,GI,GJ
AN,QG,AGG
LS,IS,TV
LS,IS,JS,TV
AM,CV
NV,AAA,GGV
NT,QS,AGS,GGT
LN,IN,QV,AGV,GGL,GGI
DL,DI,EV
LN,IN,JN,QV,AGV,GGL,GGI,GGJ
DL,DI,DJ,EV
QT,AAS,AGT
AY,FS
LQ,IQ,AAV,AGL,AGI
LQ,IQ,AAV,AGL,AGI,AGJ
NQ,ANG,QGG
KN,GGK
EN,DQ,ADG,EGG
DK,AAT,GSV
MN,AAC,GGM
AS,GT
AAL,AAI,GVV
AAL,AAI,AAJ,GVV
QQ,AAN,AQG
EQ,AAD,AEG
EK,ASV,GLS,GIS,GTV
EK,ASV,GLS,GIS,GJS,GTV
MQ,AGM,CGV
AAQ,NGV
<:
Expand All @@ -67,16 +68,16 @@ TemplateMatching ->
Asymmetric sets ->
Score: 1
Sets :>
X->A,R,N,D,C,Q,E,G,H,I,L,K,M,F,P,S,T,W,Y,V,B,Z -Remove mismatch penalty on single gaps
A,R,N,D,C,Q,E,G,H,I,L,K,M,F,P,S,T,W,Y,V,B,Z->X -Remove mismatch penalty on single gaps
X->A,R,N,D,C,Q,E,G,H,I,L,J,K,M,F,P,S,T,W,Y,V,B,Z -Remove mismatch penalty on single gaps
A,R,N,D,C,Q,E,G,H,I,L,J,K,M,F,P,S,T,W,Y,V,B,Z->X -Remove mismatch penalty on single gaps
<:
<-

Asymmetric sets ->
Score: -4
Sets :>
.->A,R,N,D,C,Q,E,G,H,I,L,K,M,F,P,S,T,W,Y,V,B,Z -Add additional penalty on bigger gaps
A,R,N,D,C,Q,E,G,H,I,L,K,M,F,P,S,T,W,Y,V,B,Z->. -Add additional penalty on bigger gaps
.->A,R,N,D,C,Q,E,G,H,I,L,J,K,M,F,P,S,T,W,Y,V,B,Z -Add additional penalty on bigger gaps
A,R,N,D,C,Q,E,G,H,I,L,J,K,M,F,P,S,T,W,Y,V,B,Z->. -Add additional penalty on bigger gaps
<:
<-

Expand All @@ -89,12 +90,12 @@ TemplateMatching ->
T->D -Methylation
S->T -Methylation
D->E -Methylation
R->AV,GL,GI -Methylation
R->AV,GL,GI,GJ -Methylation
Q->AA -Methylation
W->DS,AM,CV,TT -Oxidation
M->F -Oxidation
S->E -Acetylation
K->AV,GL,GI -Acetylation/Homoarginine
K->AV,GL,GI,GJ -Acetylation/Homoarginine
<:
<-
<-
Expand Down
100 changes: 71 additions & 29 deletions stitch/Fragmentation/Spectrum.cs
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@ public static class Fragmentation {
/// <param name="peptides">All peptides to find the spectra for.</param>
/// <param name="directory">The directory in which to search for the raw data files.</param>
/// <returns></returns>
public static Dictionary<string, List<AnnotatedSpectrumMatch>> GetSpectra(IEnumerable<ReadFormat.General> peptides) {
public static Dictionary<string, List<AnnotatedSpectrumMatch>> GetSpectra(IEnumerable<ReadFormat.General> peptides, bool xle_disambiguation) {
var fragments = new Dictionary<string, List<AnnotatedSpectrumMatch>>(peptides.Count());
var scans = peptides.SelectMany(p => p.ScanNumbers.Select(s => (p.EscapedIdentifier, s.RawFile, s.Scan, s.OriginalTag)));
var scans = peptides.SelectMany(p => p.ScanNumbers.Select(s => (p.EscapedIdentifier, s.RawFile, s.Scan, s.OriginalTag, p)));

foreach (var group in scans.GroupBy(m => m.RawFile)) {
ThermoRawFile raw_file = new ThermoRawFile();
Expand All @@ -39,9 +39,7 @@ public static Dictionary<string, List<AnnotatedSpectrumMatch>> GetSpectra(IEnume
}

foreach (var scan in group) {
string transformedPeaksPeptide = scan.OriginalTag.Replace("(", "[").Replace(")", "]");
// Get the information to find this peptide

double[] mzs;
float[] intensities;

Expand All @@ -53,10 +51,15 @@ public static Dictionary<string, List<AnnotatedSpectrumMatch>> GetSpectra(IEnume
if (precursor.MonoIsotopicMass < 100 || precursor.ChargeState < 2)
continue;

var tic = raw_file.GetScanHeaderInfoForScanNum(scan.Scan).Tic;

// determine the type of fragmentation used
var model = PeptideFragment.GetFragmentModel(filter.Fragmentation);
var model = new PeptideFragment.FragmentModel(PeptideFragment.GetFragmentModel(filter.Fragmentation)) {
W = new PeptideFragment.FragmentRange {
MinPos = 1,
MaxPos = PeptideFragment.SEQUENCELENGTH - 1,
HigherCharges = true,
MassShifts = PeptideFragment.MASSSHIFT_WATERLOSS | PeptideFragment.MASSSHIFT_AMMONIALOSS
}
};
raw_file.GetMassListFromScanNum(scan.Scan, false, out mzs, out intensities);

// Default set to 20, should be lower for BU
Expand All @@ -72,30 +75,50 @@ public static Dictionary<string, List<AnnotatedSpectrumMatch>> GetSpectra(IEnume
}

// Deisotope: not strictly necessary
IsotopePattern[] isotopes = IsotopePatternDetection.Process(spectrum, new IsotopePatternDetection.Settings { MaxCharge = (short)precursor.ChargeState });
var spectrumDeiso = SpectrumUtils.Deisotope(spectrum, isotopes, (short)precursor.ChargeState, true);

var spectrumTopX = SpectrumUtils.TopX(spectrum, 40, 100, out int[] ranks);
var spectrumDeisoTopX = SpectrumUtils.TopX(spectrum, 40, 100, out int[] ranks2);
//IsotopePattern[] isotopes = IsotopePatternDetection.Process(spectrum, new IsotopePatternDetection.Settings { MaxCharge = (short)precursor.ChargeState });
//var spectrumDeiso = SpectrumUtils.Deisotope(spectrum, isotopes, (short)precursor.ChargeState, true);
//var spectrumTopX = SpectrumUtils.TopX(spectrum, 40, 100, out int[] ranks);
//var spectrumDeisoTopX = SpectrumUtils.TopX(spectrum, 40, 100, out int[] ranks2);

string transformedPeaksPeptide = scan.OriginalTag.Replace("(", "[").Replace(")", "]");
string sequence = FastaParser.ParseProForma(transformedPeaksPeptide, Modification.Parse(), out Modification n_term, out Modification c_term, out Modification[] modifications);

Peptide peptide = new Peptide(sequence, n_term, c_term, modifications);

// If not deisotoped, should be charge of the precursor
short maxCharge = 1;
maxCharge = (short)precursor.ChargeState;
PeptideFragment[] peptide_fragments = PeptideFragment.Generate(peptide, maxCharge, model);
var matchedFragments = SpectrumUtils.MatchFragments(peptide, maxCharge, spectrum, peptide_fragments, model.tolerance, model.IsotopeError);

var hl_precursor = new PrecursorInfo {
Mz = filter.ParentMass,
Fragmentation = filter.Fragmentation,
FragmentationEnergy = filter.FragmentationEnergy,
RawFile = raw_file.GetFilename(),
ScanNumber = scan.Scan,
};
var asm = new AnnotatedSpectrumMatch(new SpectrumContainer(spectrum, hl_precursor, toleranceInPpm: 20), peptide, matchedFragments);
if (xle_disambiguation) {
string pureL = new String(sequence.Select(c => c == 'I' ? 'L' : c).ToArray());
string pureI = new String(sequence.Select(c => c == 'L' ? 'I' : c).ToArray());

var asmL = GetASM(pureL, n_term, c_term, modifications, raw_file.GetFilename(), scan.Scan, precursor, model, spectrum, filter);
var asmI = GetASM(pureI, n_term, c_term, modifications, raw_file.GetFilename(), scan.Scan, precursor, model, spectrum, filter);

var builder = new StringBuilder();
for (int i = 0; i < pureI.Length; i++) {
if (pureI[i] == 'I') {
var supportL = asmL.FragmentMatches.FirstOrDefault(f => f != null && f.FragmentType == PeptideFragment.ION_W && f.Position == pureI.Length - i); // w ions always have C terminal position
var supportI = asmI.FragmentMatches.FirstOrDefault(f => f != null && f.FragmentType == PeptideFragment.ION_W && f.Position == pureI.Length - i);
var d = default(PeptideFragment);
if (supportL == d && supportI == d) {
builder.Append('L');
if (scan.p.Sequence.AminoAcids[i].Character != 'J')
scan.p.Sequence.UpdateSequence(i, 1, new AminoAcid[] { new AminoAcid('J') }, "No w ion support for either L or I");
} else if (supportL == d) {
builder.Append('I');
if (scan.p.Sequence.AminoAcids[i].Character != 'I')
scan.p.Sequence.UpdateSequence(i, 1, new AminoAcid[] { new AminoAcid('I') }, $"Support for I based on w ions {supportI}");
} else if (supportI == d) {
builder.Append('L');
if (scan.p.Sequence.AminoAcids[i].Character != 'L')
scan.p.Sequence.UpdateSequence(i, 1, new AminoAcid[] { new AminoAcid('L') }, $"Support for L based on w ions {supportL}");
} else {
builder.Append('L');
if (scan.p.Sequence.AminoAcids[i].Character != 'J')
scan.p.Sequence.UpdateSequence(i, 1, new AminoAcid[] { new AminoAcid('J') }, "w Ion support for both L and I");
}
} else {
builder.Append(sequence[i]);
}
}
sequence = builder.ToString();
}
var asm = GetASM(sequence, n_term, c_term, modifications, raw_file.GetFilename(), scan.Scan, precursor, model, spectrum, filter);

if (fragments.ContainsKey(scan.EscapedIdentifier)) {
fragments[scan.EscapedIdentifier].Add(asm);
Expand All @@ -106,5 +129,24 @@ public static Dictionary<string, List<AnnotatedSpectrumMatch>> GetSpectra(IEnume
}
return fragments;
}

static AnnotatedSpectrumMatch GetASM(string sequence, Modification n_term, Modification c_term, Modification[] modifications, string RawFile, int ScanNumber, ThermoRawFile.PrecursorInfo precursor, PeptideFragment.FragmentModel model, Centroid[] spectrum, ThermoRawFile.FilterLine filter) {
Peptide peptide = new Peptide(sequence, n_term, c_term, modifications);

// If not deisotoped, should be charge of the precursor
short maxCharge = 1;
maxCharge = (short)precursor.ChargeState;
PeptideFragment[] peptide_fragments = PeptideFragment.Generate(peptide, maxCharge, model);
var matchedFragments = SpectrumUtils.MatchFragments(peptide, maxCharge, spectrum, peptide_fragments, model.tolerance, model.IsotopeError);

var hl_precursor = new PrecursorInfo {
Mz = filter.ParentMass,
Fragmentation = filter.Fragmentation,
FragmentationEnergy = filter.FragmentationEnergy,
RawFile = RawFile,
ScanNumber = ScanNumber,
};
return new AnnotatedSpectrumMatch(new SpectrumContainer(spectrum, hl_precursor, toleranceInPpm: (int)model.tolerance.Value), peptide, matchedFragments);
}
}
}
5 changes: 5 additions & 0 deletions stitch/OpenReads/Read.cs
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ public override HtmlBuilder ToHTML() {
html.OpenAndClose(HtmlTag.p, "", Identifier);
html.OpenAndClose(HtmlTag.h3, "", "Fasta header");
html.OpenAndClose(HtmlTag.p, "", FastaHeader);
html.Add(Sequence.RenderToHtml());
html.Add(File.ToHTML());
return html;
}
Expand Down Expand Up @@ -452,6 +453,7 @@ public override HtmlBuilder ToHTML() {
html.OpenAndClose(HtmlTag.p, "", FragmentationMode);
}

html.Add(Sequence.RenderToHtml());
html.Add(File.ToHTML());

return html;
Expand Down Expand Up @@ -483,6 +485,7 @@ public override HtmlBuilder ToHTML() {
html.OpenAndClose(HtmlTag.p, "", Intensity.ToString("G4"));
html.OpenAndClose(HtmlTag.h3, "", "TotalArea");
html.OpenAndClose(HtmlTag.p, "", TotalArea.ToString("G4"));
html.Add(Sequence.RenderToHtml());
html.Add(HTMLNameSpace.HTMLGraph.Bargraph(HTMLNameSpace.HTMLGraph.AnnotateDOCData(Sequence.PositionalScore.Select(a => (double)a).ToList()), new HtmlGenerator.HtmlBuilder("Positional Score"), null, null, 1));
foreach (var child in Children) {
html.Add(child.ToHTML());
Expand Down Expand Up @@ -560,6 +563,7 @@ public override HtmlBuilder ToHTML() {
html.OpenAndClose(HtmlTag.p, "", Error.ToString());
html.OpenAndClose(HtmlTag.h3, "", "Original Sequence");
html.OpenAndClose(HtmlTag.p, "", OriginalSequence);
html.Add(Sequence.RenderToHtml());
html.Add(File.ToHTML());
return html;
}
Expand Down Expand Up @@ -620,6 +624,7 @@ public override HtmlBuilder ToHTML() {
html.OpenAndClose(HtmlTag.h2, "", "Meta Information from a structural read");
html.OpenAndClose(HtmlTag.h3, "", "Name");
html.OpenAndClose(HtmlTag.p, "", this.Name);
html.Add(Sequence.RenderToHtml());
html.Add(File.ToHTML());
return html;
}
Expand Down
2 changes: 1 addition & 1 deletion stitch/Reporting/HTMLReport/Asides.cs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ public static HtmlBuilder CreateReadAside(ReadFormat.General MetaData, ReadOnlyC

if (Fragments != null && Fragments.ContainsKey(MetaData.EscapedIdentifier)) {
foreach (var spectrum in Fragments[MetaData.EscapedIdentifier]) {
html.Add(Graph.RenderSpectrum(spectrum, new HtmlBuilder(HtmlTag.p, HTMLHelp.Spectrum)));
html.Add(Graph.RenderSpectrum(spectrum, new HtmlBuilder(HtmlTag.p, HTMLHelp.Spectrum), null, AminoAcid.ArrayToString(MetaData.Sequence.AminoAcids)));
}
}
html.TagWithHelp(HtmlTag.h2, "Reverse Lookup", new HtmlBuilder(HTMLHelp.ReadLookup));
Expand Down
29 changes: 10 additions & 19 deletions stitch/RunParameters/Run.cs
Original file line number Diff line number Diff line change
Expand Up @@ -53,33 +53,24 @@ public Run() { }
/// <summary> Runs this run. Runs the assembly, and generates the reports. </summary>
public void Calculate() {
Template.AmbiguityThreshold = TemplateMatching.AmbiguityThreshold;
// Template Matching
var stopWatch = new Stopwatch();
stopWatch.Start();

var segments = RunTemplateMatching();

stopWatch.Stop();
// Raw data
Dictionary<string, List<AnnotatedSpectrumMatch>> fragments = null;
if (this.LoadRawData) {
fragments = Fragmentation.GetSpectra(Input.Data.Cleaned, true);
ProgressBar.Update();
}

var recombined_segment = new List<Segment>();
// Template Matching
var segments = RunTemplateMatching();

// Recombine
var recombined_segment = new List<Segment>();
if (Recombine != null) {
var recombine_sw = new Stopwatch();
recombine_sw.Start();

RunRecombine(segments, recombined_segment);

recombine_sw.Stop();
}

// Raw data
Dictionary<string, List<AnnotatedSpectrumMatch>> fragments = null;
if (this.LoadRawData) {
fragments = Fragmentation.GetSpectra(Input.Data.Cleaned);
ProgressBar.Update();
}

// Generate report parameters
var parameters = new ReportInputParameters(Input.Data.Cleaned, segments, recombined_segment, this.BatchFile, this.extraArguments, this.Runname, fragments);

// If there is an expected outcome present to answers here
Expand Down
Loading

0 comments on commit 76c36f7

Please sign in to comment.