diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/groundtruth/GroundTruthScorer.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/groundtruth/GroundTruthScorer.java
index ff44ac5ca5d..f3191fbaadb 100644
--- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/groundtruth/GroundTruthScorer.java
+++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/groundtruth/GroundTruthScorer.java
@@ -37,6 +37,99 @@
import java.util.*;
import java.util.zip.GZIPOutputStream;
+/**
+ * Converts Ultima reads into flow-based annotation, and provides some general statistics regarding
+ * quality and errors relative to the reference. Ultima data is flow-based, and thus the original computed
+ * quality refers to each flow, rather than each base. In the Ultima cram/bam, there is a per-base representation
+ * of the original flow qualities, where the original quality is distributed along each flow (homopolymer).
+ * In order to reconstitute the original flow information, the tool incorporates the information encoded
+ * in the Ultima cram/bam, and outputs both the read in flow space, as well as a conversion of the aligned
+ * reference portion into flow space, and an alignment score.
+ *
+ *
Input
+ *
+ * - Ultima aligned SAM/BAM/CRAM
+ *
+ *
+ * Output
+ *
+ * - Per read ground truth information CSV and a ground truth scoring quality report, in GATK report format
+ *
+ *
+ * CSV Output Description
+ * csv with the read representation in flow space. The csv includes the following columns:
+ * ReadName
+ * ReadKey : The signal of the read at each flow according to the flow order
+ * ReadIsReversed : Whether the read is reversed in the alignment
+ * ReadMQ : The mapping quality of the read
+ * ReadRQ : The read rq value
+ * GroundTruthKey : The aligned reference section, translated into per-flow signals
+ * ReadSequence
+ * Score : A flow-based alignment score. Since the alignment is per-flow, in the case that there’s a cycle skip, the read and reference flow signals will not be aligned, and therefore the score will be inaccurate.
+ * NormalizedScore: A flow-based normalized alignment score
+ * ErrorProbability : The error of each flow (corresponds to the signals in ReadKey)
+ * ReadKeyLength
+ * GroundTruthKeyLength
+ * CycleSkipStatus : One of NS (Non Skip), PCS (Possible Cycle Skip), or CS (Cycle Skip)
+ * Cigar
+ * LowestQBaseTP
+ *
+ * GATK Report Description
+ * In the quality report (optional), the following tables are included:
+ *
+ * qualReport:error rate per qual : The error rate for each quality. Columns:
+ *
+ * - qual: The encoded quality
+ *
- count: The number of times the quality was observed
+ *
- error: The error rate of the flows with this qual
+ *
- phred: the error translated into a phred score
+ *
+ *
+ * qual_hmerReport:error rate per qual by hmer. The error rate for each quality and hmer combination. Columns:
+ *
+ * - qual: The encoded quality
+ *
- hmer: The hmer length
+ *
- count: The number of times the quality was observed
+ *
- error: The error rate of the flows with this qual
+ *
+ *
+ * qual_hmer_deviation_base_Report:error rate per qual by hmer and deviation. The count of errors for each qual, hmer, deviation and base
+ *
+ * - qual: The encoded quality
+ *
- hmer: The hmer length
+ *
- deviation: The deviation (difference in signal, relative to the reference)
+ *
- base: The base
+ *
- count: The number of times the deviation was observed
+ *
+ *
+ * Phred/qual statistics per flow position report. Various statistics for each flow position in relationship to the found quality value. Columns:
+ *
+ * - flow - flow position
+ * - count - count of observations
+ * - min - minimal observed quality
+ * - max - maximal observed quality
+ * - mean - mean observed value
+ * - median - median observed value
+ * - std - standard deviation
+ * - p1...Pn - percentil columns, accotding to the --quality-percentiles parameter
+ *
+ *
+ * Usage examples
+ *
+ * gatk GroundTruthScorer \
+ * -I input.bam \
+ * -R reference.fasta.gz
+ * -L chr20 \
+ * --output-csv output.csv \
+ * --report-file report.txt \
+ * --omit-zeros-from-report \ (optional)
+ * --features-file dbsnp.chr9.vcf.gz \ (optional)
+ * --genome-prior genome_prior.csv (optional)
+ *
+ *
+ * {@GATK.walkertype ReadWalker}
+ */
+
@CommandLineProgramProperties(
summary = "Ground Truth Scorer",
oneLineSummary = "Score reads against a reference/ground truth",