Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improvements to CompareMetrics #1839

Merged
merged 3 commits into from
Nov 2, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
157 changes: 134 additions & 23 deletions src/main/java/picard/analysis/CompareMetrics.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import htsjdk.samtools.metrics.MetricsFile;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.StringUtil;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.barclay.help.DocumentedFeature;
Expand All @@ -47,6 +48,7 @@
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
Expand Down Expand Up @@ -87,11 +89,26 @@ public class CompareMetrics extends CommandLineProgram {
doc = "Output file to write comparison results to.", optional = true)
public File OUTPUT;

@Argument(
doc = "Output file to write table of differences to.",
optional = true
)
public File OUTPUT_TABLE;

@Argument(shortName = "MI",
doc = "Metrics to ignore. Any metrics specified here will be excluded from comparison by the tool.",
doc = "Metrics to ignore. Any metrics specified here will be excluded from comparison by the tool. " +
"Note that while the values of these metrics are not compared, if they are missing from either file that will be considered a difference. " +
"Use METRICS_NOT_REQUIRED to specify metrics which can be missing from either file without being considered a difference.",
optional = true)
public List<String> METRICS_TO_IGNORE;

@Argument(shortName = "MNR",
doc = "Metrics which are not required. Any metrics specified here may be missing from either of the files in the comparison, and this will not affect " +
"the result of the comparison. If metrics specified here are included in both files, their results will not be compared (they will be treated as " +
"METRICS_TO_IGNORE.",
optional = true)
public List<String> METRICS_NOT_REQUIRED;

@Argument(shortName = "MARC",
doc = "Metric Allowable Relative Change. A colon separate pair of metric name and an absolute relative change. For any metric specified here, " +
" when the values are compared between the two files, the program will allow that much relative change between the " +
Expand All @@ -104,7 +121,14 @@ public class CompareMetrics extends CommandLineProgram {
optional = true)
public boolean IGNORE_HISTOGRAM_DIFFERENCES = false;

@Argument(
doc = "Columns to use as keys for matching metrics rows that should agree. If not specified, it is assumed that rows should be in the same order.",
optional = true
)
public List<String> KEY;

private final List<String> differences = new ArrayList<>();
private final List<MetricComparisonDifferences> valueDifferences = new ArrayList<>();

private String metricClassName = "Unknown";

Expand All @@ -129,9 +153,14 @@ protected int doWork() {
+ INPUT.get(0).getAbsolutePath() + " and " + INPUT.get(1).getAbsolutePath() + "\n\nMetrics are " + status;
writeTextToFile(OUTPUT, header, differences);
}
if (OUTPUT_TABLE != null) {
final MetricsFile<MetricComparisonDifferences, ?> metricDifferencesFile = getMetricsFile();
metricDifferencesFile.addAllMetrics(valueDifferences);
metricDifferencesFile.write(OUTPUT_TABLE);
}
return retVal;
} catch (final Exception e) {
throw new PicardException(e.getMessage());
throw new PicardException(e.getMessage(), e);
}
}

Expand Down Expand Up @@ -170,7 +199,7 @@ protected String[] customCommandLineValidation() {
return errs.toArray(new String[0]);
}

private int compareMetricsFiles(final File metricFile1, final File metricFile2) throws IOException, IllegalAccessException {
private int compareMetricsFiles(final File metricFile1, final File metricFile2) throws IOException, IllegalAccessException, NoSuchFieldException {
final MetricsFile<?, ?> mf1 = new MetricsFile<>();
final MetricsFile<?, ?> mf2 = new MetricsFile<>();
mf1.read(new FileReader(metricFile1));
Expand Down Expand Up @@ -203,43 +232,74 @@ else if (!mf2.getMetrics().isEmpty()) {
differences.add("Number of metric rows differ between " + metricFile1.getAbsolutePath() + " and " + metricFile2.getAbsolutePath());
return 1;
}
else if (!mf1.getMetrics().get(0).getClass().equals(mf2.getMetrics().get(0).getClass())) {
if (!mf1.getMetrics().get(0).getClass().equals(mf2.getMetrics().get(0).getClass())) {
throw new PicardException("Metrics are of differing class between " + metricFile1.getAbsolutePath() + " and " + metricFile2.getAbsolutePath());
}
else if (!mf1.getMetricsColumnLabels().equals(mf2.getMetricsColumnLabels())) {
differences.add("Metric columns differ between " + metricFile1.getAbsolutePath() + " and " + metricFile2.getAbsolutePath());

final Set<String> columns1 = new HashSet<>(mf1.getMetricsColumnLabels());
final Set<String> columns2 = new HashSet<>(mf2.getMetricsColumnLabels());
columns1.removeAll(METRICS_NOT_REQUIRED);
columns2.removeAll(METRICS_NOT_REQUIRED);
if (!columns1.equals(columns2)) {
final Set<String> missingColumns = new HashSet<>(columns1);
missingColumns.removeAll(columns2);
final Set<String> inColumns2NotColumns1 = new HashSet<>(columns2);
inColumns2NotColumns1.removeAll(columns1);
missingColumns.addAll(inColumns2NotColumns1);
differences.add("Metric columns differ between " + metricFile1.getAbsolutePath() + " and " + metricFile2.getAbsolutePath() + " (" +
StringUtil.join(",", missingColumns) + ")");

return 1;
}

validateMetricNames(mf1, metricFile1, METRICS_TO_IGNORE);
validateMetricNames(mf1, metricFile1, MetricToAllowableRelativeChange.keySet());

Set<String> metricsToIgnore = new HashSet<>(METRICS_TO_IGNORE);
metricsToIgnore.addAll(METRICS_NOT_REQUIRED);

final Class<? extends MetricBase> metricClass = mf1.getMetrics().get(0).getClass();

int retVal = 0;
int rowNumber = -1;
final Field[] fields = metricClass.getFields();
Iterator<?> mf1Iterator = mf1.getMetrics().iterator();
Iterator<?> mf2Iterator = mf2.getMetrics().iterator();
while (mf1Iterator.hasNext()) {
rowNumber++;
MetricBase metric1 = (MetricBase) mf1Iterator.next();
MetricBase metric2 = (MetricBase) mf2Iterator.next();
for (Field field : fields) {
if (!metricsToIgnore.contains(field.getName())) {
final Object value1 = field.get(metric1);
final Object value2 = field.get(metric2);
SimpleResult result = compareMetricValues(value1, value2, field.getName());
if (!result.equal) {

int retVal = 0;
if (KEY.size() == 0) {
//key by row number
int rowNumber = -1;
Iterator<?> mf1Iterator = mf1.getMetrics().iterator();
Iterator<?> mf2Iterator = mf2.getMetrics().iterator();
while (mf1Iterator.hasNext()) {
rowNumber++;
MetricBase metric1 = (MetricBase) mf1Iterator.next();
MetricBase metric2 = (MetricBase) mf2Iterator.next();
if (compareMetricsForEntry(metric1, metric2, fields, metricsToIgnore, String.valueOf(rowNumber)) == 1) {
retVal = 1;
}
}
} else {
//build each map of metrics
final Map<List<Object>, ? extends MetricBase> metricMap1 = buildMetricsMap(mf1.getMetrics());
final Map<List<Object>, ? extends MetricBase> metricMap2 = buildMetricsMap(mf2.getMetrics());

for (final Map.Entry<List<Object>, ? extends MetricBase> entry1 : metricMap1.entrySet()) {
final List<Object> key = entry1.getKey();
final MetricBase metric1 = entry1.getValue();

final MetricBase metric2 = metricMap2.remove(key);
if (metric2 != null) {
if (compareMetricsForEntry(metric1, metric2, fields, metricsToIgnore, StringUtil.join(",", key)) == 1) {
retVal = 1;
final String diffString = "Row: " + rowNumber + " Metric: " + field.getName() +
" values differ. Value1: " + value1 + " Value2: " + value2 + " " + result.description;
differences.add(diffString);
}
} else {
differences.add("KEY " + StringUtil.join(",", key) + " found in " + metricFile1 + " but not in " + metricFile2);
retVal = 1;
}
}
//check that all entries in metricMap2 have been matched
for (final Map.Entry<List<Object>, ? extends MetricBase> entry : metricMap2.entrySet()) {
differences.add("KEY " + StringUtil.join(",", entry.getKey()) + " found in " + metricFile2 + " but not in " + metricFile1);
retVal = 1;
}
}

if (!IGNORE_HISTOGRAM_DIFFERENCES) {
Expand All @@ -255,6 +315,50 @@ else if (!mf1.getMetricsColumnLabels().equals(mf2.getMetricsColumnLabels())) {
return retVal;
}

protected Map<List<Object>, MetricBase> buildMetricsMap(final List<? extends MetricBase> metrics) throws NoSuchFieldException, IllegalAccessException {
final HashMap<List<Object>, MetricBase> retMap = new LinkedHashMap<>();
final Class<? extends MetricBase> clazz = metrics.get(0).getClass();
final List<Field> keyFields = new ArrayList<>();
for (final String key : KEY) {
final Field keyField = clazz.getField(key);
keyFields.add(keyField);
}

for (final MetricBase metric : metrics) {
final List<Object> mapKey = new ArrayList<>();
for (final Field keyField : keyFields) {
mapKey.add(keyField.get(metric));
}
retMap.put(mapKey, metric);
}

return retMap;
}

protected int compareMetricsForEntry(final MetricBase metric1, final MetricBase metric2, final Field[] fields, final Set<String> metricsToIgnore, final String key) throws IllegalAccessException {
int retVal = 0;
for (Field field : fields) {
if (!metricsToIgnore.contains(field.getName())) {
final Object value1 = field.get(metric1);
final Object value2 = field.get(metric2);
SimpleResult result = compareMetricValues(value1, value2, field.getName());
if (!result.equal) {
retVal = 1;
final String diffString = "Key: " + key + " Metric: " + field.getName() +
" values differ. Value1: " + value1 + " Value2: " + value2 + " " + result.description;
differences.add(diffString);
final MetricComparisonDifferences metricDifferences = new MetricComparisonDifferences();
metricDifferences.KEY = key;
metricDifferences.METRIC = field.getName();
metricDifferences.VALUE1 = value1;
metricDifferences.VALUE2 = value2;
valueDifferences.add(metricDifferences);
}
}
}
return retVal;
}

protected SimpleResult compareMetricValues(final Object value1, final Object value2, final String metricName) {
boolean equal = true;
String description = "";
Expand Down Expand Up @@ -323,4 +427,11 @@ public SimpleResult(boolean equal, String description) {
this.description = description;
}
}

static public class MetricComparisonDifferences extends MetricBase {
public String KEY;
public String METRIC;
public Object VALUE1;
public Object VALUE2;
}
}
48 changes: 29 additions & 19 deletions src/test/java/picard/analysis/CompareMetricsTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,31 +17,35 @@ public class CompareMetricsTest {
@DataProvider(name = "testCompareMetricsDataProvider")
public Object[][] testCompareMetricsDataProvider() {
return new Object[][]{
{ "wgs_test1.wgs_metrics", "wgs_test2.wgs_metrics", null, null, null, 1 },
{ "wgs_test1.wgs_metrics", "wgs_test2.wgs_metrics", "GENOME_TERRITORY", Collections.singletonList("SD_COVERAGE:0.00002"), true, 0 },
{ "wgs_test1.wgs_metrics", "wgs_test2.wgs_metrics", "GENOME_TERRITORY", Collections.singletonList("SD_COVERAGE:0.00002"), false, 1 },
{ "wgs_test1.wgs_metrics", "wgs_test2.wgs_metrics", "GENOME_TERRITORY", Collections.singletonList("SD_COVERAGE:0.00002"), null, 1 },
{ "wgs_test1.wgs_metrics", "wgs_test2.wgs_metrics", "GENOME_TERRITORY", Collections.singletonList("SD_COVERAGE:0.00001"), null, 1 },
{ "wgs_test1.wgs_metrics", "wgs_test2.wgs_metrics", null, asList("GENOME_TERRITORY:0.0001", "SD_COVERAGE:0.00002"), true, 0 },
{ "wgs_test1.wgs_metrics", "wgs_test2.wgs_metrics", null, asList("GENOME_TERRITORY:0.0001", "SD_COVERAGE:0.00002"), false, 1 },
{ "wgs_test1.wgs_metrics", "wgs_test2.wgs_metrics", null, asList("GENOME_TERRITORY:0.0001", "SD_COVERAGE:0.00002"), null, 1 },
{ "wgs_test1.wgs_metrics", "wgs_test2.wgs_metrics", null, asList("GENOME_TERRITORY:0.0001", "SD_COVERAGE:0.00001"), null, 1 },
{ "wgs_test1.wgs_metrics", "wgs_test1.wgs_metrics", null, null, null, 0 },
{ "wgs_test1.raw_wgs_metrics", "wgs_test2.raw_wgs_metrics", null, null, null, 1 },
{ "wgs_test1.2rows.raw_wgs_metrics", "wgs_test1.2rows.raw_wgs_metrics", null, null, null, 0 },
{ "wgs_test1.2rows.raw_wgs_metrics", "wgs_test2.2rows.raw_wgs_metrics", null, null, null, 1 },
{ "wgs_test1.raw_wgs_metrics", "wgs_test2.2rows.raw_wgs_metrics", null, null, null, 1 },
{ "wgs_test1.fingerprinting_summary_metrics", "wgs_test2.fingerprinting_summary_metrics", null, null, null, 1 },
{ "test1.arrays_variant_calling_detail_metrics", "test2.arrays_variant_calling_detail_metrics", null, null, null, 1 },
{ "test1.arrays_variant_calling_detail_metrics", "test2.arrays_variant_calling_detail_metrics", "AUTOCALL_DATE", null, null, 0 },
{ "test1.arrays_variant_calling_summary_metrics", "test2.arrays_variant_calling_summary_metrics", null, null, null, 0 },
{ "wgs_test1.wgs_metrics", "wgs_test2.wgs_metrics", null, null, null, null, null, 1 },
{ "wgs_test1.wgs_metrics", "wgs_test2.wgs_metrics", "GENOME_TERRITORY", Collections.singletonList("SD_COVERAGE:0.00002"), true, null, null, 0 },
{ "wgs_test1.wgs_metrics", "wgs_test2.wgs_metrics", "GENOME_TERRITORY", Collections.singletonList("SD_COVERAGE:0.00002"), false, null, null, 1 },
{ "wgs_test1.wgs_metrics", "wgs_test2.wgs_metrics", "GENOME_TERRITORY", Collections.singletonList("SD_COVERAGE:0.00002"), null, null, null, 1 },
{ "wgs_test1.wgs_metrics", "wgs_test2.wgs_metrics", "GENOME_TERRITORY", Collections.singletonList("SD_COVERAGE:0.00001"), null, null, null, 1 },
{ "wgs_test1.wgs_metrics", "wgs_test2.wgs_metrics", null, asList("GENOME_TERRITORY:0.0001", "SD_COVERAGE:0.00002"), true, null, null, 0 },
{ "wgs_test1.wgs_metrics", "wgs_test2.wgs_metrics", null, asList("GENOME_TERRITORY:0.0001", "SD_COVERAGE:0.00002"), false, null, null, 1 },
{ "wgs_test1.wgs_metrics", "wgs_test2.wgs_metrics", null, asList("GENOME_TERRITORY:0.0001", "SD_COVERAGE:0.00002"), null, null, null, 1 },
{ "wgs_test1.wgs_metrics", "wgs_test2.wgs_metrics", null, asList("GENOME_TERRITORY:0.0001", "SD_COVERAGE:0.00001"), null, null, null, 1 },
{ "wgs_test1.wgs_metrics", "wgs_test1.wgs_metrics", null, null, null, null, null, 0 },
{ "wgs_test1.raw_wgs_metrics", "wgs_test2.raw_wgs_metrics", null, null, null, null, null, 1 },
{ "wgs_test1.2rows.raw_wgs_metrics", "wgs_test1.2rows.raw_wgs_metrics", null, null, null, null, null, 0 },
{ "wgs_test1.2rows.raw_wgs_metrics", "wgs_test2.2rows.raw_wgs_metrics", null, null, null, null, null, 1 },
{ "wgs_test1.raw_wgs_metrics", "wgs_test2.2rows.raw_wgs_metrics", null, null, null, null, null, 1 },
{ "wgs_test1.fingerprinting_summary_metrics", "wgs_test2.fingerprinting_summary_metrics", null, null, null, null, null, 1 },
{ "test1.arrays_variant_calling_detail_metrics", "test2.arrays_variant_calling_detail_metrics", null, null, null, null, null, 1 },
{ "test1.arrays_variant_calling_detail_metrics", "test2.arrays_variant_calling_detail_metrics", "AUTOCALL_DATE", null, null, null, null, 0 },
{ "test1.arrays_variant_calling_summary_metrics", "test2.arrays_variant_calling_summary_metrics", null, null, null, null, null, 0 },
{ "wgs_test1.2rows.fingerprinting_summary_metrics", "wgs_test2.2rows.fingerprinting_summary_metrics", null, null, false, null, null, 1},
{ "wgs_test1.2rows.fingerprinting_summary_metrics", "wgs_test2.2rows.fingerprinting_summary_metrics", null, null, false, Collections.singletonList("SAMPLE"), null, 0},
{ "wgs_test1.missing_metric.wgs_metrics", "wgs_test1.wgs_metrics", null, null, false, null, null, 1},
{ "wgs_test1.missing_metric.wgs_metrics", "wgs_test1.wgs_metrics", null, null, false, null, Collections.singletonList("HET_SNP_SENSITIVITY"), 0}
};
}

@Test(dataProvider = "testCompareMetricsDataProvider")
public void testCompareMetrics(final String file1, final String file2,
final String metricsToIgnore, final List<String> metricsToAllowableRelativeChange,
final Boolean ignoreHistogramDifferences, final int expectedReturnValue) {
final Boolean ignoreHistogramDifferences, final List<String> keys, final List<String> metricsNotRequired, final int expectedReturnValue) {
final File input1 = new File(TEST_DATA_DIR, file1);
final File input2 = new File(TEST_DATA_DIR, file2);
final CompareMetrics compareMetrics = new CompareMetrics();
Expand All @@ -55,6 +59,12 @@ public void testCompareMetrics(final String file1, final String file2,
if (ignoreHistogramDifferences != null) {
compareMetrics.IGNORE_HISTOGRAM_DIFFERENCES = ignoreHistogramDifferences;
}
if (keys != null) {
compareMetrics.KEY = keys;
}
if (metricsNotRequired != null) {
compareMetrics.METRICS_NOT_REQUIRED = metricsNotRequired;
}
compareMetrics.customCommandLineValidation();
Assert.assertEquals(compareMetrics.instanceMain(new String[0]), expectedReturnValue);
}
Expand Down
Loading