-
Notifications
You must be signed in to change notification settings - Fork 674
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
SOLR-10255 Add support for docValues to solr.BinaryField #2536
Changes from 4 commits
d4e8907
6eba352
90ed36b
66692af
2aa8b24
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,7 +20,11 @@ | |
import java.lang.invoke.MethodHandles; | ||
import java.nio.ByteBuffer; | ||
import java.nio.charset.StandardCharsets; | ||
import java.util.ArrayList; | ||
import java.util.Base64; | ||
import java.util.Collections; | ||
import java.util.List; | ||
import org.apache.lucene.document.BinaryDocValuesField; | ||
import org.apache.lucene.index.IndexableField; | ||
import org.apache.lucene.search.SortField; | ||
import org.apache.lucene.util.BytesRef; | ||
|
@@ -42,6 +46,11 @@ public void checkSchemaField(SchemaField field) { | |
SolrException.ErrorCode.SERVER_ERROR, | ||
"Field type " + this + " is 'large'; not supported (yet)"); | ||
} | ||
if (field.hasDocValues() && field.multiValued()) { | ||
throw new SolrException( | ||
SolrException.ErrorCode.SERVER_ERROR, | ||
"Field type " + this + " does not support multiple doc values"); | ||
} | ||
} | ||
|
||
private String toBase64String(ByteBuffer buf) { | ||
|
@@ -94,6 +103,10 @@ public IndexableField createField(SchemaField field, Object val) { | |
log.trace("Ignoring unstored binary field: {}", field); | ||
return null; | ||
} | ||
return new org.apache.lucene.document.StoredField(field.getName(), getBytesRef(val)); | ||
} | ||
|
||
private static BytesRef getBytesRef(Object val) { | ||
byte[] buf = null; | ||
int offset = 0, len = 0; | ||
if (val instanceof byte[]) { | ||
|
@@ -112,7 +125,31 @@ public IndexableField createField(SchemaField field, Object val) { | |
len = buf.length; | ||
} | ||
|
||
return new org.apache.lucene.document.StoredField(field.getName(), buf, offset, len); | ||
return new BytesRef(buf, offset, len); | ||
} | ||
|
||
@Override | ||
public List<IndexableField> createFields(SchemaField field, Object val) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is pretty much borrowed from |
||
IndexableField fval = createField(field, val); | ||
|
||
if (field.hasDocValues() && !field.multiValued()) { | ||
IndexableField docval = new BinaryDocValuesField(field.getName(), getBytesRef(val)); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is the crux of the changes - we want to use Lucene |
||
|
||
// Only create list if we have 2 values... | ||
if (fval != null) { | ||
List<IndexableField> fields = new ArrayList<>(2); | ||
fields.add(fval); | ||
fields.add(docval); | ||
return fields; | ||
} | ||
|
||
fval = docval; | ||
} | ||
return Collections.singletonList(fval); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. May it be simplified as
? UPD: However, Streams may put too much footprint. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, my understanding is that this code is getting executed every time the field of this type is being indexed. I borrowed the code from I guess it makes sense to keep this code consistent across different field types until there's some appetite in rewriting this in |
||
} | ||
|
||
@Override | ||
protected void checkSupportsDocValues() { // we support DocValues | ||
} | ||
|
||
@Override | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -639,7 +639,7 @@ private Object decodeDVField( | |
case BINARY: | ||
BinaryDocValues bdv = e.getBinaryDocValues(localId, leafReader, readerOrd); | ||
if (bdv != null) { | ||
return BytesRef.deepCopyOf(bdv.binaryValue()); | ||
return BytesRef.deepCopyOf(bdv.binaryValue()).bytes; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Previously this code was returning Now we return a Note that there's no any additional performance overhead as we were already doing |
||
} | ||
return null; | ||
case SORTED: | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure why to bother with random field. Shouldn't we just remove this file and test, since now we have binary DV? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this test is not testing binary doc values per say, but exception handling for any field type that does not support docValues. I was going through different field types to find the best candidate now and figured |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -82,73 +82,78 @@ public void testSimple() throws Exception { | |
doc = new SolrInputDocument(); | ||
doc.addField("id", 1); | ||
doc.addField("data", ByteBuffer.wrap(buf, 2, 5)); | ||
doc.addField("data_dv", ByteBuffer.wrap(buf, 2, 5)); | ||
client.add(doc); | ||
|
||
doc = new SolrInputDocument(); | ||
doc.addField("id", 2); | ||
doc.addField("data", ByteBuffer.wrap(buf, 4, 3)); | ||
doc.addField("data_dv", ByteBuffer.wrap(buf, 4, 3)); | ||
client.add(doc); | ||
|
||
doc = new SolrInputDocument(); | ||
doc.addField("id", 3); | ||
doc.addField("data", buf); | ||
doc.addField("data_dv", buf); | ||
client.add(doc); | ||
|
||
client.commit(); | ||
|
||
QueryResponse resp = client.query(new SolrQuery("*:*")); | ||
QueryResponse resp = client.query(new SolrQuery("*:*").setFields("id", "data", "data_dv")); | ||
SolrDocumentList res = resp.getResults(); | ||
List<Bean> beans = resp.getBeans(Bean.class); | ||
assertEquals(3, res.size()); | ||
assertEquals(3, beans.size()); | ||
for (SolrDocument d : res) { | ||
|
||
Integer id = Integer.parseInt(d.getFieldValue("id").toString()); | ||
byte[] data = (byte[]) d.getFieldValue("data"); | ||
if (id == 1) { | ||
assertEquals(5, data.length); | ||
for (int i = 0; i < data.length; i++) { | ||
byte b = data[i]; | ||
assertEquals((byte) (i + 2), b); | ||
} | ||
|
||
} else if (id == 2) { | ||
assertEquals(3, data.length); | ||
for (int i = 0; i < data.length; i++) { | ||
byte b = data[i]; | ||
assertEquals((byte) (i + 4), b); | ||
} | ||
|
||
} else if (id == 3) { | ||
assertEquals(10, data.length); | ||
for (int i = 0; i < data.length; i++) { | ||
byte b = data[i]; | ||
assertEquals((byte) i, b); | ||
for (String field : new String[] {"data", "data_dv"}) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
byte[] data = (byte[]) d.getFieldValue(field); | ||
if (id == 1) { | ||
assertEquals(5, data.length); | ||
for (int i = 0; i < data.length; i++) { | ||
byte b = data[i]; | ||
assertEquals((byte) (i + 2), b); | ||
} | ||
|
||
} else if (id == 2) { | ||
assertEquals(3, data.length); | ||
for (int i = 0; i < data.length; i++) { | ||
byte b = data[i]; | ||
assertEquals((byte) (i + 4), b); | ||
} | ||
|
||
} else if (id == 3) { | ||
assertEquals(10, data.length); | ||
for (int i = 0; i < data.length; i++) { | ||
byte b = data[i]; | ||
assertEquals((byte) i, b); | ||
} | ||
} | ||
} | ||
} | ||
for (Bean d : beans) { | ||
Integer id = Integer.parseInt(d.id); | ||
byte[] data = d.data; | ||
if (id == 1) { | ||
assertEquals(5, data.length); | ||
for (int i = 0; i < data.length; i++) { | ||
byte b = data[i]; | ||
assertEquals((byte) (i + 2), b); | ||
} | ||
|
||
} else if (id == 2) { | ||
assertEquals(3, data.length); | ||
for (int i = 0; i < data.length; i++) { | ||
byte b = data[i]; | ||
assertEquals((byte) (i + 4), b); | ||
} | ||
|
||
} else if (id == 3) { | ||
assertEquals(10, data.length); | ||
for (int i = 0; i < data.length; i++) { | ||
byte b = data[i]; | ||
assertEquals((byte) i, b); | ||
for (byte[] data : new byte[][] {d.data, d.data_dv}) { | ||
if (id == 1) { | ||
assertEquals(5, data.length); | ||
for (int i = 0; i < data.length; i++) { | ||
byte b = data[i]; | ||
assertEquals((byte) (i + 2), b); | ||
} | ||
|
||
} else if (id == 2) { | ||
assertEquals(3, data.length); | ||
for (int i = 0; i < data.length; i++) { | ||
byte b = data[i]; | ||
assertEquals((byte) (i + 4), b); | ||
} | ||
|
||
} else if (id == 3) { | ||
assertEquals(10, data.length); | ||
for (int i = 0; i < data.length; i++) { | ||
byte b = data[i]; | ||
assertEquals((byte) i, b); | ||
} | ||
} | ||
} | ||
} | ||
|
@@ -158,5 +163,6 @@ public void testSimple() throws Exception { | |
public static class Bean { | ||
@Field String id; | ||
@Field byte[] data; | ||
@Field byte[] data_dv; | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Moved the code for converting
Object val
toBytesRef
for code reuse (and readability)