-
Notifications
You must be signed in to change notification settings - Fork 594
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Rewrote M2 isActive method as special case of somatic likelihoods model #5814
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
package org.broadinstitute.hellbender.utils; | ||
|
||
import org.apache.commons.math3.special.Gamma; | ||
|
||
public final class DigammaCache extends IntToDoubleFunctionCache { | ||
private static final int CACHE_SIZE = 100_000; | ||
|
||
@Override | ||
protected int maxSize() { | ||
return CACHE_SIZE; | ||
} | ||
|
||
@Override | ||
protected double compute(final int n) { | ||
return Gamma.digamma(n); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
package org.broadinstitute.hellbender.utils; | ||
|
||
import org.apache.logging.log4j.LogManager; | ||
import org.apache.logging.log4j.Logger; | ||
|
||
/** | ||
* A helper class to maintain a cache of an int to double function | ||
* The cache expands when a number is not available. | ||
* NOTE: this cache is thread safe and it may be accessed from multiple threads. | ||
*/ | ||
public abstract class IntToDoubleFunctionCache { | ||
private static final Logger logger = LogManager.getLogger(IntToDoubleFunctionCache.class); | ||
|
||
private double[] cache = new double[] { }; | ||
|
||
protected abstract int maxSize(); | ||
|
||
protected abstract double compute(final int n); | ||
|
||
public IntToDoubleFunctionCache() { } | ||
|
||
/** | ||
* Get the value of log10(n), expanding the cache as necessary | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. update this javadoc? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
* @param i operand | ||
* @return log10(n) | ||
*/ | ||
public double get(final int i) { | ||
Utils.validateArg(i >= 0, () -> String.format("Cache doesn't apply to negative number %d", i)); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. maybe document this restriction (or even change the class name) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
if (i >= cache.length) { | ||
if (i >= maxSize()) { | ||
return compute(i); | ||
} | ||
final int newCapacity = Math.max(i + 10, 2 * cache.length); | ||
logger.debug("cache miss " + i + " > " + (cache.length-1) + " expanding to " + newCapacity); | ||
expandCache(newCapacity); | ||
} | ||
/* | ||
Array lookups are not atomic. It's possible that the reference to cache could be | ||
changed between the time the reference is loaded and the data is fetched from the correct | ||
offset. However, the value retrieved can't change, and it's guaranteed to be present in the | ||
old reference by the conditional above. | ||
*/ | ||
return cache[i]; | ||
} | ||
|
||
/** | ||
* Ensures that the cache contains a value for n. After completion of expandCache(n), | ||
* get(n) is guaranteed to return without causing a cache expansion | ||
* @param newCapacity desired value to be precomputed | ||
*/ | ||
public synchronized void expandCache(final int newCapacity) { | ||
if (newCapacity < cache.length) { | ||
//prevents a race condition when multiple threads want to expand the cache at the same time. | ||
//in that case, one of them will be first to enter the synchronized method expandCache and | ||
//so the others may end up in this method even if n < cache.length | ||
return; | ||
} | ||
final double[] newCache = new double[newCapacity + 1]; | ||
System.arraycopy(cache, 0, newCache, 0, cache.length); | ||
for (int i = cache.length; i < newCache.length; i++) { | ||
newCache[i] = compute(i); | ||
} | ||
cache = newCache; | ||
} | ||
|
||
public int size() { | ||
return cache.length; | ||
} | ||
|
||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
package org.broadinstitute.hellbender.utils; | ||
|
||
public final class Log10Cache extends IntToDoubleFunctionCache { | ||
@Override | ||
protected int maxSize() { | ||
return Integer.MAX_VALUE; | ||
} | ||
|
||
@Override | ||
protected double compute(final int n) { | ||
return Math.log10(n); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
package org.broadinstitute.hellbender.utils; | ||
|
||
import org.apache.commons.math3.special.Gamma; | ||
|
||
/** | ||
* Wrapper class so that the log10Factorial array is only calculated if it's used | ||
*/ | ||
public class Log10FactorialCache extends IntToDoubleFunctionCache { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Make this final There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
private static final int CACHE_SIZE = 10_000; | ||
|
||
@Override | ||
protected int maxSize() { | ||
return CACHE_SIZE; | ||
} | ||
|
||
@Override | ||
protected double compute(final int n) { | ||
return MathUtils.log10Gamma(n + 1); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Without delving into the details of what exactly your branch is computing as of right now, it looks like this isn't actually doing the factorial computation here but rather calling out to the logGamma function. This may be equivalent? If not then you should update the comments on this class to reflect what its doing. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, the gamma function satisfied Gamma(n + 1) = n! for integers n. The old implementation instead grew the cache incrementally as log((n+1)!) = log(n!) + log(n+1). Since log(n+1) in the old implementation was also cached this was faster, but the one-time cost of computing logGamma a few thousand times to fill the cache is probably a few milliseconds, if that. |
||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Don't we obtain this equation from the equation about z_bar (Equation 6) right above
\ref{f-tilde}
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
fixed