From d4e8907b0c2b275bf75479b2ac6195370b479b52 Mon Sep 17 00:00:00 2001 From: Alexey Serba Date: Tue, 25 Jun 2024 22:38:39 -0700 Subject: [PATCH 1/3] SOLR-10255 Add support for docValues to solr.BinaryField --- .../org/apache/solr/schema/BinaryField.java | 40 ++++++++- .../solr/search/SolrDocumentFetcher.java | 2 +- .../conf/bad-schema-unsupported-docValues.xml | 6 +- .../collection1/conf/schema-binaryfield.xml | 1 + .../apache/solr/schema/TestBinaryField.java | 90 ++++++++++--------- 5 files changed, 92 insertions(+), 47 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/schema/BinaryField.java b/solr/core/src/java/org/apache/solr/schema/BinaryField.java index 320daee9dcb..7a355900aaa 100644 --- a/solr/core/src/java/org/apache/solr/schema/BinaryField.java +++ b/solr/core/src/java/org/apache/solr/schema/BinaryField.java @@ -20,7 +20,12 @@ import java.lang.invoke.MethodHandles; import java.nio.ByteBuffer; import java.nio.charset.StandardCharsets; +import java.util.ArrayList; import java.util.Base64; +import java.util.Collections; +import java.util.List; +import org.apache.lucene.document.BinaryDocValuesField; +import org.apache.lucene.document.SortedSetDocValuesField; import org.apache.lucene.index.IndexableField; import org.apache.lucene.search.SortField; import org.apache.lucene.util.BytesRef; @@ -94,6 +99,10 @@ public IndexableField createField(SchemaField field, Object val) { log.trace("Ignoring unstored binary field: {}", field); return null; } + return new org.apache.lucene.document.StoredField(field.getName(), getBytesRef(val)); + } + + private static BytesRef getBytesRef(Object val) { byte[] buf = null; int offset = 0, len = 0; if (val instanceof byte[]) { @@ -112,7 +121,36 @@ public IndexableField createField(SchemaField field, Object val) { len = buf.length; } - return new org.apache.lucene.document.StoredField(field.getName(), buf, offset, len); + return new BytesRef(buf, offset, len); + } + + @Override + public List createFields(SchemaField field, Object val) { + IndexableField fval = createField(field, val); + + if (field.hasDocValues()) { + IndexableField docval; + if (field.multiValued()) { + docval = new SortedSetDocValuesField(field.getName(), getBytesRef(val)); + } else { + docval = new BinaryDocValuesField(field.getName(), getBytesRef(val)); + } + + // Only create list if we have 2 values... + if (fval != null) { + List fields = new ArrayList<>(2); + fields.add(fval); + fields.add(docval); + return fields; + } + + fval = docval; + } + return Collections.singletonList(fval); + } + + @Override + protected void checkSupportsDocValues() { // we support DocValues } @Override diff --git a/solr/core/src/java/org/apache/solr/search/SolrDocumentFetcher.java b/solr/core/src/java/org/apache/solr/search/SolrDocumentFetcher.java index e6ccb9edd18..37a68324e61 100644 --- a/solr/core/src/java/org/apache/solr/search/SolrDocumentFetcher.java +++ b/solr/core/src/java/org/apache/solr/search/SolrDocumentFetcher.java @@ -609,7 +609,7 @@ private Object decodeDVField( case BINARY: BinaryDocValues bdv = e.getBinaryDocValues(localId, leafReader, readerOrd); if (bdv != null) { - return BytesRef.deepCopyOf(bdv.binaryValue()); + return BytesRef.deepCopyOf(bdv.binaryValue()).bytes; } return null; case SORTED: diff --git a/solr/core/src/test-files/solr/collection1/conf/bad-schema-unsupported-docValues.xml b/solr/core/src/test-files/solr/collection1/conf/bad-schema-unsupported-docValues.xml index 9741a387e24..903531cb3d6 100644 --- a/solr/core/src/test-files/solr/collection1/conf/bad-schema-unsupported-docValues.xml +++ b/solr/core/src/test-files/solr/collection1/conf/bad-schema-unsupported-docValues.xml @@ -17,10 +17,10 @@ --> - + - - + + diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-binaryfield.xml b/solr/core/src/test-files/solr/collection1/conf/schema-binaryfield.xml index f0f216b2b12..faee854ca97 100644 --- a/solr/core/src/test-files/solr/collection1/conf/schema-binaryfield.xml +++ b/solr/core/src/test-files/solr/collection1/conf/schema-binaryfield.xml @@ -33,6 +33,7 @@ + id diff --git a/solr/core/src/test/org/apache/solr/schema/TestBinaryField.java b/solr/core/src/test/org/apache/solr/schema/TestBinaryField.java index 87fbe5b2d12..93595656017 100644 --- a/solr/core/src/test/org/apache/solr/schema/TestBinaryField.java +++ b/solr/core/src/test/org/apache/solr/schema/TestBinaryField.java @@ -82,73 +82,78 @@ public void testSimple() throws Exception { doc = new SolrInputDocument(); doc.addField("id", 1); doc.addField("data", ByteBuffer.wrap(buf, 2, 5)); + doc.addField("data_dv", ByteBuffer.wrap(buf, 2, 5)); client.add(doc); doc = new SolrInputDocument(); doc.addField("id", 2); doc.addField("data", ByteBuffer.wrap(buf, 4, 3)); + doc.addField("data_dv", ByteBuffer.wrap(buf, 4, 3)); client.add(doc); doc = new SolrInputDocument(); doc.addField("id", 3); doc.addField("data", buf); + doc.addField("data_dv", buf); client.add(doc); client.commit(); - QueryResponse resp = client.query(new SolrQuery("*:*")); + QueryResponse resp = client.query(new SolrQuery("*:*").setFields("id", "data", "data_dv")); SolrDocumentList res = resp.getResults(); List beans = resp.getBeans(Bean.class); assertEquals(3, res.size()); assertEquals(3, beans.size()); for (SolrDocument d : res) { - Integer id = Integer.parseInt(d.getFieldValue("id").toString()); - byte[] data = (byte[]) d.getFieldValue("data"); - if (id == 1) { - assertEquals(5, data.length); - for (int i = 0; i < data.length; i++) { - byte b = data[i]; - assertEquals((byte) (i + 2), b); - } - - } else if (id == 2) { - assertEquals(3, data.length); - for (int i = 0; i < data.length; i++) { - byte b = data[i]; - assertEquals((byte) (i + 4), b); - } - - } else if (id == 3) { - assertEquals(10, data.length); - for (int i = 0; i < data.length; i++) { - byte b = data[i]; - assertEquals((byte) i, b); + for (String field : new String[] {"data", "data_dv"}) { + byte[] data = (byte[]) d.getFieldValue(field); + if (id == 1) { + assertEquals(5, data.length); + for (int i = 0; i < data.length; i++) { + byte b = data[i]; + assertEquals((byte) (i + 2), b); + } + + } else if (id == 2) { + assertEquals(3, data.length); + for (int i = 0; i < data.length; i++) { + byte b = data[i]; + assertEquals((byte) (i + 4), b); + } + + } else if (id == 3) { + assertEquals(10, data.length); + for (int i = 0; i < data.length; i++) { + byte b = data[i]; + assertEquals((byte) i, b); + } } } } for (Bean d : beans) { Integer id = Integer.parseInt(d.id); - byte[] data = d.data; - if (id == 1) { - assertEquals(5, data.length); - for (int i = 0; i < data.length; i++) { - byte b = data[i]; - assertEquals((byte) (i + 2), b); - } - - } else if (id == 2) { - assertEquals(3, data.length); - for (int i = 0; i < data.length; i++) { - byte b = data[i]; - assertEquals((byte) (i + 4), b); - } - - } else if (id == 3) { - assertEquals(10, data.length); - for (int i = 0; i < data.length; i++) { - byte b = data[i]; - assertEquals((byte) i, b); + for (byte[] data : new byte[][] {d.data, d.data_dv}) { + if (id == 1) { + assertEquals(5, data.length); + for (int i = 0; i < data.length; i++) { + byte b = data[i]; + assertEquals((byte) (i + 2), b); + } + + } else if (id == 2) { + assertEquals(3, data.length); + for (int i = 0; i < data.length; i++) { + byte b = data[i]; + assertEquals((byte) (i + 4), b); + } + + } else if (id == 3) { + assertEquals(10, data.length); + for (int i = 0; i < data.length; i++) { + byte b = data[i]; + assertEquals((byte) i, b); + } } } } @@ -158,5 +163,6 @@ public void testSimple() throws Exception { public static class Bean { @Field String id; @Field byte[] data; + @Field byte[] data_dv; } } From 90ed36bc1c30202731149a52fe89d5f7fe897c32 Mon Sep 17 00:00:00 2001 From: Alexey Serba Date: Wed, 26 Jun 2024 13:55:24 -0700 Subject: [PATCH 2/3] Remove support for multivalued binary doc values We can add it later if it is needed. I don't see a big value in storing multiple binary payloads where you simply can store a single binary payload with multiple values. --- .../java/org/apache/solr/schema/BinaryField.java | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/schema/BinaryField.java b/solr/core/src/java/org/apache/solr/schema/BinaryField.java index 7a355900aaa..94bc94fd86f 100644 --- a/solr/core/src/java/org/apache/solr/schema/BinaryField.java +++ b/solr/core/src/java/org/apache/solr/schema/BinaryField.java @@ -25,7 +25,6 @@ import java.util.Collections; import java.util.List; import org.apache.lucene.document.BinaryDocValuesField; -import org.apache.lucene.document.SortedSetDocValuesField; import org.apache.lucene.index.IndexableField; import org.apache.lucene.search.SortField; import org.apache.lucene.util.BytesRef; @@ -47,6 +46,11 @@ public void checkSchemaField(SchemaField field) { SolrException.ErrorCode.SERVER_ERROR, "Field type " + this + " is 'large'; not supported (yet)"); } + if (field.hasDocValues() && field.multiValued()) { + throw new SolrException( + SolrException.ErrorCode.SERVER_ERROR, + "Field type " + this + " does not support multiple doc values"); + } } private String toBase64String(ByteBuffer buf) { @@ -128,13 +132,8 @@ private static BytesRef getBytesRef(Object val) { public List createFields(SchemaField field, Object val) { IndexableField fval = createField(field, val); - if (field.hasDocValues()) { - IndexableField docval; - if (field.multiValued()) { - docval = new SortedSetDocValuesField(field.getName(), getBytesRef(val)); - } else { - docval = new BinaryDocValuesField(field.getName(), getBytesRef(val)); - } + if (field.hasDocValues() && !field.multiValued()) { + IndexableField docval = new BinaryDocValuesField(field.getName(), getBytesRef(val)); // Only create list if we have 2 values... if (fval != null) { From 2aa8b247acc7db5f801f3e2ace7f2c5b2217fcdc Mon Sep 17 00:00:00 2001 From: Alexey Serba Date: Wed, 10 Jul 2024 23:14:28 -0700 Subject: [PATCH 3/3] Add CHANGES.txt entry --- solr/CHANGES.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 61af4795afa..c44312c04f5 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -122,6 +122,8 @@ New Features overridden with a property ('solr.query.minPrefixLength'). Users may also override their collection-wide setting for individual queries by providing a `minPrefixQueryTermLength` local-param. (Jason Gerlowski, David Smiley) +* SOLR-10255: Add support for docValues to solr.BinaryField. (Alexey Serba via Mikhail Khludnev, David Smiley) + Improvements --------------------- * SOLR-17137: Enable Prometheus exporter to communicate with SSL protected Solr. (Eivind Bergstøl via Eric Pugh)