-
Notifications
You must be signed in to change notification settings - Fork 9
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
GH-528 cache the locations in the super blocks #529
Changes from 5 commits
f5bb5ce
5a45912
9b7e6e2
4d56b27
342e3e8
c97d4f1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -23,6 +23,8 @@ | |
import com.the_qa_company.qendpoint.core.util.io.CloseSuppressPath; | ||
import com.the_qa_company.qendpoint.core.util.io.Closer; | ||
import com.the_qa_company.qendpoint.core.util.io.IOUtil; | ||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
|
||
import java.io.Closeable; | ||
import java.io.IOException; | ||
|
@@ -39,14 +41,31 @@ | |
* @author mario.arias | ||
*/ | ||
public class Bitmap375Big extends Bitmap64Big { | ||
|
||
private static final Logger logger = LoggerFactory.getLogger(Bitmap375Big.class); | ||
|
||
private static final boolean oldBinarySearch; | ||
|
||
static { | ||
// check if the system property "useOldBinarySeearch" is set to true | ||
String useOldBinarySearch = System.getProperty("useOldBinarySearch"); | ||
if (useOldBinarySearch != null && useOldBinarySearch.equalsIgnoreCase("true")) { | ||
oldBinarySearch = true; | ||
logger.debug("Using old binary search"); | ||
} else { | ||
logger.debug("Using new binary search"); | ||
oldBinarySearch = false; | ||
} | ||
|
||
} | ||
|
||
/** | ||
* create disk version bitmap with in memory super index | ||
* | ||
* @param location location | ||
* @param nbits number of bits | ||
* @return bitmap | ||
*/ | ||
|
||
public static Bitmap375Big disk(Path location, long nbits) { | ||
return disk(location, nbits, false); | ||
} | ||
|
@@ -181,6 +200,7 @@ public void updateIndex() { | |
} | ||
pop = countSuperBlock + countBlock; | ||
indexUpToDate = true; | ||
superBlocks.recalculateEstimatedValueLocation(); | ||
} | ||
|
||
/* | ||
|
@@ -189,8 +209,9 @@ public void updateIndex() { | |
*/ | ||
@Override | ||
public boolean access(long bitIndex) { | ||
if (bitIndex < 0) | ||
if (bitIndex < 0) { | ||
throw new IndexOutOfBoundsException("bitIndex < 0: " + bitIndex); | ||
} | ||
|
||
long wordIndex = wordIndex(bitIndex); | ||
if (wordIndex >= words.length()) { | ||
|
@@ -324,15 +345,14 @@ public long select1(long x) { | |
return 0; | ||
} | ||
// Search superblock (binary Search) | ||
long superBlockIndex = binarySearch(superBlocks, x); | ||
long superBlockIndex = oldBinarySearch ? binarySearch(superBlocks, x) : binarySearchNew(superBlocks, x); | ||
|
||
// If there is a run of many zeros, two correlative superblocks may have | ||
// the same value, | ||
// We need to position at the first of them. | ||
|
||
while (superBlockIndex > 0 && (superBlocks.get(superBlockIndex) >= x)) { | ||
superBlockIndex--; | ||
|
||
} | ||
|
||
long countdown = x - superBlocks.get(superBlockIndex); | ||
|
@@ -444,6 +464,7 @@ public static long binarySearch0(LongArray arr, long fromIndex, long toIndex, lo | |
* @param val val | ||
* @return index | ||
*/ | ||
|
||
public static long binarySearch(LongArray arr, long val) { | ||
long min = 0, max = arr.length(), mid; | ||
|
||
|
@@ -460,11 +481,52 @@ public static long binarySearch(LongArray arr, long val) { | |
return min; | ||
} | ||
|
||
public static long binarySearchNew(LongArray arr, long val) { | ||
|
||
long min = arr.getEstimatedLocationLowerBound(val); | ||
long max = arr.getEstimatedLocationUpperBound(val); | ||
long mid = arr.getEstimatedLocation(val, min, max); | ||
|
||
int i = 0; | ||
while (min + 1 < max) { | ||
// After the first iteration, the value that we are looking for is | ||
// typically very close to the min value. Using linear search for | ||
// the next two iterations improves the chances that we find the | ||
// value faster than with binary search. | ||
if (i == 1 || i == 2) { | ||
long v = arr.get(min + 1); | ||
if (v >= val) { | ||
max = min + 1; | ||
} else { | ||
min = min + 1; | ||
} | ||
} else { | ||
long v = arr.get(mid); | ||
if (v >= val) { | ||
max = mid; | ||
} else { | ||
min = mid; | ||
} | ||
} | ||
mid = (min + max) / 2; | ||
i++; | ||
} | ||
|
||
arr.updateEstimatedValueLocation(val, min); | ||
|
||
return min; | ||
} | ||
|
||
public CloseSuppressPath getBlocksPath() { | ||
return blocksPath; | ||
} | ||
|
||
public CloseSuppressPath getSuperBlocksPath() { | ||
return superBlocksPath; | ||
} | ||
|
||
@Override | ||
public String toString() { | ||
return "Bitmap375Big{}"; | ||
} | ||
Comment on lines
+527
to
+531
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. IntelliJ likes to show the objects in the debugger, which is usually a good idea except that Bitmap375Big is usually a very very big object...so overriding toString() forces IntelliJ to use the results of that instead of analyzing and displaying the entire object. |
||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -64,9 +64,10 @@ public static void writeLowerBitsByteAligned(long value, long numbits, OutputStr | |
public static int select1(long value, int rank) { | ||
int bitpos = 0; | ||
while (rank > 0 && value != 0) { | ||
rank -= value & 1; | ||
bitpos++; | ||
value >>>= 1; | ||
int trailingZeros = Long.numberOfTrailingZeros(value); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is faster. Not certain why, but I assume that the Long.numberOfTrailingZeros(value) method uses a SIMD instruction to check multiple bits in a single operation. |
||
bitpos += trailingZeros + 1; | ||
value >>>= trailingZeros + 1; | ||
rank--; | ||
} | ||
return bitpos; | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,109 @@ | ||
package com.the_qa_company.qendpoint.core.util.disk; | ||
|
||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
|
||
public abstract class AbstractLongArray implements LongArray { | ||
|
||
private final Logger logger = LoggerFactory.getLogger(getClass()); | ||
|
||
private static final int ESTIMATED_LOCATION_ARRAY_SIZE; | ||
|
||
static { | ||
// get total amount of memory that this java program is allowed to use | ||
long maxMemory = Runtime.getRuntime().maxMemory(); | ||
|
||
if (maxMemory >= 1024 * 1024 * 512) { | ||
ESTIMATED_LOCATION_ARRAY_SIZE = 1024 * 128; | ||
} else if (maxMemory >= 1024 * 1024 * 256) { | ||
ESTIMATED_LOCATION_ARRAY_SIZE = 1024 * 64; | ||
} else if (maxMemory >= 1024 * 1024 * 128) { | ||
ESTIMATED_LOCATION_ARRAY_SIZE = 1024 * 32; | ||
} else { | ||
ESTIMATED_LOCATION_ARRAY_SIZE = 1024 * 16; | ||
} | ||
|
||
} | ||
|
||
private final long[] estimatedLocationMax = new long[ESTIMATED_LOCATION_ARRAY_SIZE]; | ||
private final long[] estimatedLocationMin = new long[ESTIMATED_LOCATION_ARRAY_SIZE]; | ||
private final long[] estimatedLocation = new long[ESTIMATED_LOCATION_ARRAY_SIZE]; | ||
|
||
private int estimatedLocationBucketSize; | ||
|
||
long maxValue = 1; | ||
|
||
@Override | ||
public int getEstimatedLocationArrayBucketSize() { | ||
return estimatedLocationBucketSize; | ||
} | ||
|
||
private void updateEstimatedLocationArrayBucketSize() { | ||
int minBucketSize = (int) (maxValue / ESTIMATED_LOCATION_ARRAY_SIZE); | ||
// we want to have the next power of 2 | ||
int next = 1; | ||
while (next < minBucketSize) { | ||
next <<= 1; | ||
} | ||
this.estimatedLocationBucketSize = next; | ||
} | ||
|
||
@Override | ||
public long[] getEstimatedLocationArray() { | ||
return estimatedLocation; | ||
} | ||
|
||
@Override | ||
public long[] getEstimatedLocationArrayMin() { | ||
return estimatedLocationMin; | ||
} | ||
|
||
@Override | ||
public long[] getEstimatedLocationArrayMax() { | ||
return estimatedLocationMax; | ||
} | ||
|
||
@Override | ||
public void recalculateEstimatedValueLocation() { | ||
updateEstimatedLocationArrayBucketSize(); | ||
int estimatedLocationBucketSize = getEstimatedLocationArrayBucketSize(); | ||
long len = length(); | ||
boolean shouldLog = len > 1024 * 1024 * 2; | ||
if (shouldLog) { | ||
logger.info("Recalculating estimated location array 0%"); | ||
} | ||
|
||
for (int i = 0; i < len; i++) { | ||
long val = get(i); | ||
if (val == 0) { | ||
continue; | ||
} | ||
|
||
int index = (int) (val / estimatedLocationBucketSize + 1); | ||
estimatedLocationMax[index] = Math.max(estimatedLocationMax[index], i); | ||
if (estimatedLocationMin[index] == 0) { | ||
estimatedLocationMin[index] = i; | ||
} else { | ||
estimatedLocationMin[index] = Math.min(estimatedLocationMin[index], i); | ||
} | ||
estimatedLocation[index] = (estimatedLocationMax[index] + estimatedLocationMin[index]) / 2; | ||
|
||
if (shouldLog && i % (1024 * 1024) == 0) { | ||
logger.info("Recalculating estimated location array {}%", (int) Math.floor(100.0 / len * i)); | ||
} | ||
} | ||
|
||
if (shouldLog) { | ||
logger.info("Recalculating estimated location array 100%"); | ||
} | ||
} | ||
|
||
@Override | ||
public final void set(long index, long value) { | ||
maxValue = Math.max(maxValue, value); | ||
innerSet(index, value); | ||
} | ||
|
||
abstract protected void innerSet(long index, long value); | ||
|
||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -55,4 +55,5 @@ public void resize(long newSize) throws IOException { | |
public void clear() { | ||
array.clear(); | ||
} | ||
|
||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Any reason for this add? Was this part of a compilation issue?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Runtime issue actually. Complained of a method not found when generating the query explanation json. The project inherits two different versions of Jackson, so we apparently need to decide which one to use.
It would be cleaner to use dependency management instead, but I couldn't find that in any of the poms. Maybe I overlooked something?