Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GH-528 cache the locations in the super blocks #529

Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions qendpoint-backend/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -46,12 +46,23 @@
<rdf4j.version>5.0.2</rdf4j.version>
<spring.version>3.4.0</spring.version>
<logback.version>1.5.6</logback.version>
<jackson.version>2.18.1</jackson.version>

<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
</properties>

<dependencies>
<dependency>
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any reason for this add? Was this part of a compilation issue?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Runtime issue actually. Complained of a method not found when generating the query explanation json. The project inherits two different versions of Jackson, so we apparently need to decide which one to use.

It would be cleaner to use dependency management instead, but I couldn't find that in any of the poms. Maybe I overlooked something?

<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
<version>${jackson.version}</version>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-core</artifactId>
<version>${jackson.version}</version>
</dependency>
<dependency>
<groupId>ch.qos.logback</groupId>
<artifactId>logback-core</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
import com.the_qa_company.qendpoint.core.util.io.CloseSuppressPath;
import com.the_qa_company.qendpoint.core.util.io.Closer;
import com.the_qa_company.qendpoint.core.util.io.IOUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.Closeable;
import java.io.IOException;
Expand All @@ -39,14 +41,31 @@
* @author mario.arias
*/
public class Bitmap375Big extends Bitmap64Big {

private static final Logger logger = LoggerFactory.getLogger(Bitmap375Big.class);

private static final boolean oldBinarySearch;

static {
// check if the system property "useOldBinarySeearch" is set to true
String useOldBinarySearch = System.getProperty("useOldBinarySearch");
if (useOldBinarySearch != null && useOldBinarySearch.equalsIgnoreCase("true")) {
oldBinarySearch = true;
logger.debug("Using old binary search");
} else {
logger.debug("Using new binary search");
oldBinarySearch = false;
}

}

/**
* create disk version bitmap with in memory super index
*
* @param location location
* @param nbits number of bits
* @return bitmap
*/

public static Bitmap375Big disk(Path location, long nbits) {
return disk(location, nbits, false);
}
Expand Down Expand Up @@ -181,6 +200,7 @@ public void updateIndex() {
}
pop = countSuperBlock + countBlock;
indexUpToDate = true;
superBlocks.recalculateEstimatedValueLocation();
}

/*
Expand All @@ -189,8 +209,9 @@ public void updateIndex() {
*/
@Override
public boolean access(long bitIndex) {
if (bitIndex < 0)
if (bitIndex < 0) {
throw new IndexOutOfBoundsException("bitIndex < 0: " + bitIndex);
}

long wordIndex = wordIndex(bitIndex);
if (wordIndex >= words.length()) {
Expand Down Expand Up @@ -324,15 +345,14 @@ public long select1(long x) {
return 0;
}
// Search superblock (binary Search)
long superBlockIndex = binarySearch(superBlocks, x);
long superBlockIndex = oldBinarySearch ? binarySearch(superBlocks, x) : binarySearchNew(superBlocks, x);

// If there is a run of many zeros, two correlative superblocks may have
// the same value,
// We need to position at the first of them.

while (superBlockIndex > 0 && (superBlocks.get(superBlockIndex) >= x)) {
superBlockIndex--;

}

long countdown = x - superBlocks.get(superBlockIndex);
Expand Down Expand Up @@ -444,6 +464,7 @@ public static long binarySearch0(LongArray arr, long fromIndex, long toIndex, lo
* @param val val
* @return index
*/

public static long binarySearch(LongArray arr, long val) {
long min = 0, max = arr.length(), mid;

Expand All @@ -460,11 +481,52 @@ public static long binarySearch(LongArray arr, long val) {
return min;
}

public static long binarySearchNew(LongArray arr, long val) {

long min = arr.getEstimatedLocationLowerBound(val);
long max = arr.getEstimatedLocationUpperBound(val);
long mid = arr.getEstimatedLocation(val, min, max);

int i = 0;
while (min + 1 < max) {
// After the first iteration, the value that we are looking for is
// typically very close to the min value. Using linear search for
// the next two iterations improves the chances that we find the
// value faster than with binary search.
if (i == 1 || i == 2) {
long v = arr.get(min + 1);
if (v >= val) {
max = min + 1;
} else {
min = min + 1;
}
} else {
long v = arr.get(mid);
if (v >= val) {
max = mid;
} else {
min = mid;
}
}
mid = (min + max) / 2;
i++;
}

arr.updateEstimatedValueLocation(val, min);

return min;
}

public CloseSuppressPath getBlocksPath() {
return blocksPath;
}

public CloseSuppressPath getSuperBlocksPath() {
return superBlocksPath;
}

@Override
public String toString() {
return "Bitmap375Big{}";
}
Comment on lines +527 to +531
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IntelliJ likes to show the objects in the debugger, which is usually a good idea except that Bitmap375Big is usually a very very big object...so overriding toString() forces IntelliJ to use the results of that instead of analyzing and displaying the entire object.

}
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,10 @@ public static void writeLowerBitsByteAligned(long value, long numbits, OutputStr
public static int select1(long value, int rank) {
int bitpos = 0;
while (rank > 0 && value != 0) {
rank -= value & 1;
bitpos++;
value >>>= 1;
int trailingZeros = Long.numberOfTrailingZeros(value);
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is faster. Not certain why, but I assume that the Long.numberOfTrailingZeros(value) method uses a SIMD instruction to check multiple bits in a single operation.

bitpos += trailingZeros + 1;
value >>>= trailingZeros + 1;
rank--;
}
return bitpos;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
package com.the_qa_company.qendpoint.core.util.disk;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public abstract class AbstractLongArray implements LongArray {

private final Logger logger = LoggerFactory.getLogger(getClass());

private static final int ESTIMATED_LOCATION_ARRAY_SIZE;

static {
// get total amount of memory that this java program is allowed to use
long maxMemory = Runtime.getRuntime().maxMemory();

if (maxMemory >= 1024 * 1024 * 512) {
ESTIMATED_LOCATION_ARRAY_SIZE = 1024 * 128;
} else if (maxMemory >= 1024 * 1024 * 256) {
ESTIMATED_LOCATION_ARRAY_SIZE = 1024 * 64;
} else if (maxMemory >= 1024 * 1024 * 128) {
ESTIMATED_LOCATION_ARRAY_SIZE = 1024 * 32;
} else {
ESTIMATED_LOCATION_ARRAY_SIZE = 1024 * 16;
}

}

private final long[] estimatedLocationMax = new long[ESTIMATED_LOCATION_ARRAY_SIZE];
private final long[] estimatedLocationMin = new long[ESTIMATED_LOCATION_ARRAY_SIZE];
private final long[] estimatedLocation = new long[ESTIMATED_LOCATION_ARRAY_SIZE];

private int estimatedLocationBucketSize;

long maxValue = 1;

@Override
public int getEstimatedLocationArrayBucketSize() {
return estimatedLocationBucketSize;
}

private void updateEstimatedLocationArrayBucketSize() {
int minBucketSize = (int) (maxValue / ESTIMATED_LOCATION_ARRAY_SIZE);
// we want to have the next power of 2
int next = 1;
while (next < minBucketSize) {
next <<= 1;
}
this.estimatedLocationBucketSize = next;
}

@Override
public long[] getEstimatedLocationArray() {
return estimatedLocation;
}

@Override
public long[] getEstimatedLocationArrayMin() {
return estimatedLocationMin;
}

@Override
public long[] getEstimatedLocationArrayMax() {
return estimatedLocationMax;
}

@Override
public void recalculateEstimatedValueLocation() {
updateEstimatedLocationArrayBucketSize();
int estimatedLocationBucketSize = getEstimatedLocationArrayBucketSize();
long len = length();
boolean shouldLog = len > 1024 * 1024 * 2;
if (shouldLog) {
logger.info("Recalculating estimated location array 0%");
}

for (int i = 0; i < len; i++) {
long val = get(i);
if (val == 0) {
continue;
}

int index = (int) (val / estimatedLocationBucketSize + 1);
estimatedLocationMax[index] = Math.max(estimatedLocationMax[index], i);
if (estimatedLocationMin[index] == 0) {
estimatedLocationMin[index] = i;
} else {
estimatedLocationMin[index] = Math.min(estimatedLocationMin[index], i);
}
estimatedLocation[index] = (estimatedLocationMax[index] + estimatedLocationMin[index]) / 2;

if (shouldLog && i % (1024 * 1024) == 0) {
logger.info("Recalculating estimated location array {}%", (int) Math.floor(100.0 / len * i));
}
}

if (shouldLog) {
logger.info("Recalculating estimated location array 100%");
}
}

@Override
public final void set(long index, long value) {
maxValue = Math.max(maxValue, value);
innerSet(index, value);
}

abstract protected void innerSet(long index, long value);

}
Original file line number Diff line number Diff line change
Expand Up @@ -55,4 +55,5 @@ public void resize(long newSize) throws IOException {
public void clear() {
array.clear();
}

}
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
package com.the_qa_company.qendpoint.core.util.disk;

import com.the_qa_company.qendpoint.core.util.io.IOUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.util.Iterator;
Expand All @@ -10,6 +12,10 @@
* Describe a large array of longs
*/
public interface LongArray extends Iterable<Long> {

Logger logger = LoggerFactory.getLogger(LongArray.class);
long[] EMPTY_ARRAY = new long[0];

/**
* create an in memory long array
*
Expand Down Expand Up @@ -208,4 +214,84 @@ public Long next() {
}
};
}

/**
* @return the estimated location array that contains the highest location
* for a given value
*/
default long[] getEstimatedLocationArrayMax() {
return getEstimatedLocationArray();
}

/**
* @return the estimated location array that contains the lowest location
* for a given value
*/
default long[] getEstimatedLocationArrayMin() {
return getEstimatedLocationArray();
}

/**
* @return the estimated location array
*/
default long[] getEstimatedLocationArray() {
return EMPTY_ARRAY;
}

default int getEstimatedLocationArrayBucketSize() {
return 65536;
}

default long getEstimatedLocationLowerBound(long val) {
int index = (int) (val / getEstimatedLocationArrayBucketSize() + 1);
if (index - 1 >= 0) {
long t = getEstimatedLocationArrayMax()[index - 1];
if (t > 0) {
return t;
}
}
return 0;
}

default long getEstimatedLocationUpperBound(long val) {
int index = (int) (val / getEstimatedLocationArrayBucketSize() + 1);
long[] estimatedLocationMin = getEstimatedLocationArrayMin();
if (index + 1 < estimatedLocationMin.length) {
long t = estimatedLocationMin[index + 1];
if (t > 0) {
return Math.min(length(), t);
}
}

return length();
}

default long getEstimatedLocation(long val, long min, long max) {
int index = (int) (val / getEstimatedLocationArrayBucketSize() + 1);
var estimatedLocation = getEstimatedLocationArray();

if (index >= estimatedLocation.length) {
return (min + max) / 2;
}
long t = estimatedLocation[index];
if (t > min && t < max) {
return t;
} else {
return (min + max) / 2;
}
}

default void recalculateEstimatedValueLocation() {
logger.info("Class {} does not support recalculateEstimatedValueLocation()",
this.getClass().getCanonicalName());
}

default void updateEstimatedValueLocation(long val, long min) {
int index = (int) (val / getEstimatedLocationArrayBucketSize() + 1);
long[] estimatedLocation = getEstimatedLocationArray();
if (index >= estimatedLocation.length) {
return;
}
estimatedLocation[index] = min;
}
}
Loading
Loading