Skip to content

Commit

Permalink
[Derived Fields] Add support for emitting multiple values in DerivedF…
Browse files Browse the repository at this point in the history
…ieldScripts (opensearch-project#12837)


---------

Signed-off-by: Mohammad Qureshi <[email protected]>
Signed-off-by: Rishabh Maurya <[email protected]>
Co-authored-by: Rishabh Maurya <[email protected]>
  • Loading branch information
qreshi and rishabhmaurya authored Apr 3, 2024
1 parent 434dc61 commit e8c5daf
Show file tree
Hide file tree
Showing 9 changed files with 381 additions and 15 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@
import org.opensearch.repositories.RepositoriesService;
import org.opensearch.rest.RestController;
import org.opensearch.rest.RestHandler;
import org.opensearch.script.DerivedFieldScript;
import org.opensearch.script.IngestScript;
import org.opensearch.script.ScoreScript;
import org.opensearch.script.ScriptContext;
Expand Down Expand Up @@ -108,6 +109,11 @@ public final class PainlessModulePlugin extends Plugin implements ScriptPlugin,
ingest.add(AllowlistLoader.loadFromResourceFiles(Allowlist.class, "org.opensearch.ingest.txt"));
map.put(IngestScript.CONTEXT, ingest);

// Functions available to derived fields
List<Allowlist> derived = new ArrayList<>(Allowlist.BASE_ALLOWLISTS);
derived.add(AllowlistLoader.loadFromResourceFiles(Allowlist.class, "org.opensearch.derived.txt"));
map.put(DerivedFieldScript.CONTEXT, derived);

allowlists = map;
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#

# This file contains an allowlist for functions to be used in derived field context

class org.opensearch.script.DerivedFieldScript @no_import {
}

static_import {
void emit(org.opensearch.script.DerivedFieldScript, Object) bound_to org.opensearch.script.ScriptEmitValues$EmitSingle
void emit(org.opensearch.script.DerivedFieldScript, double, double) bound_to org.opensearch.script.ScriptEmitValues$GeoPoint
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,227 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.painless;

import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.memory.MemoryIndex;
import org.opensearch.common.collect.Tuple;
import org.opensearch.common.geo.GeoPoint;
import org.opensearch.common.settings.Settings;
import org.opensearch.index.fielddata.IndexGeoPointFieldData;
import org.opensearch.index.fielddata.IndexNumericFieldData;
import org.opensearch.index.fielddata.LeafGeoPointFieldData;
import org.opensearch.index.fielddata.LeafNumericFieldData;
import org.opensearch.index.fielddata.MultiGeoPointValues;
import org.opensearch.index.fielddata.SortedNumericDoubleValues;
import org.opensearch.index.fielddata.plain.AbstractLeafGeoPointFieldData;
import org.opensearch.index.fielddata.plain.LeafDoubleFieldData;
import org.opensearch.index.mapper.GeoPointFieldMapper.GeoPointFieldType;
import org.opensearch.index.mapper.MapperService;
import org.opensearch.index.mapper.NumberFieldMapper.NumberFieldType;
import org.opensearch.index.mapper.NumberFieldMapper.NumberType;
import org.opensearch.painless.spi.Allowlist;
import org.opensearch.painless.spi.AllowlistLoader;
import org.opensearch.script.DerivedFieldScript;
import org.opensearch.script.ScriptContext;
import org.opensearch.script.ScriptException;
import org.opensearch.search.lookup.LeafSearchLookup;
import org.opensearch.search.lookup.SearchLookup;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;

import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.anyInt;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;

public class DerivedFieldScriptTests extends ScriptTestCase {

private static PainlessScriptEngine SCRIPT_ENGINE;

@Override
public void setUp() throws Exception {
super.setUp();

// Adding derived field script to the contexts for the script engine
Map<ScriptContext<?>, List<Allowlist>> contexts = newDefaultContexts();
List<Allowlist> allowlists = new ArrayList<>(Allowlist.BASE_ALLOWLISTS);
allowlists.add(AllowlistLoader.loadFromResourceFiles(Allowlist.class, "org.opensearch.derived.txt"));
contexts.put(DerivedFieldScript.CONTEXT, allowlists);

SCRIPT_ENGINE = new PainlessScriptEngine(Settings.EMPTY, contexts);
}

@Override
public void tearDown() throws Exception {
super.tearDown();
SCRIPT_ENGINE = null;
}

@Override
protected PainlessScriptEngine getEngine() {
return SCRIPT_ENGINE;
}

private DerivedFieldScript.LeafFactory compile(String expression, SearchLookup lookup) {
DerivedFieldScript.Factory factory = SCRIPT_ENGINE.compile(
"derived_script_test",
expression,
DerivedFieldScript.CONTEXT,
Collections.emptyMap()
);
return factory.newFactory(Collections.emptyMap(), lookup);
}

public void testEmittingDoubleField() throws IOException {
// Mocking field value to be returned
NumberFieldType fieldType = new NumberFieldType("test_double_field", NumberType.DOUBLE);
MapperService mapperService = mock(MapperService.class);
when(mapperService.fieldType("test_double_field")).thenReturn(fieldType);

SortedNumericDoubleValues doubleValues = mock(SortedNumericDoubleValues.class);
when(doubleValues.docValueCount()).thenReturn(1);
when(doubleValues.advanceExact(anyInt())).thenReturn(true);
when(doubleValues.nextValue()).thenReturn(2.718);

LeafNumericFieldData atomicFieldData = mock(LeafDoubleFieldData.class); // SortedNumericDoubleFieldData
when(atomicFieldData.getDoubleValues()).thenReturn(doubleValues);

IndexNumericFieldData fieldData = mock(IndexNumericFieldData.class); // SortedNumericIndexFieldData
when(fieldData.getFieldName()).thenReturn("test_double_field");
when(fieldData.load(any())).thenReturn(atomicFieldData);

SearchLookup lookup = new SearchLookup(mapperService, (ignored, searchLookup) -> fieldData);

// We don't need a real index, just need to construct a LeafReaderContext which cannot be mocked
MemoryIndex index = new MemoryIndex();
LeafReaderContext leafReaderContext = index.createSearcher().getIndexReader().leaves().get(0);

// Execute the script
DerivedFieldScript script = compile("emit(doc['test_double_field'].value)", lookup).newInstance(leafReaderContext);
script.setDocument(1);
script.execute();

List<Object> result = script.getEmittedValues();
assertEquals(List.of(2.718), result);
}

public void testEmittingGeoPoint() throws IOException {
// Mocking field value to be returned
GeoPointFieldType fieldType = new GeoPointFieldType("test_geo_field");
MapperService mapperService = mock(MapperService.class);
when(mapperService.fieldType("test_geo_field")).thenReturn(fieldType);

MultiGeoPointValues geoPointValues = mock(MultiGeoPointValues.class);
when(geoPointValues.docValueCount()).thenReturn(1);
when(geoPointValues.advanceExact(anyInt())).thenReturn(true);
when(geoPointValues.nextValue()).thenReturn(new GeoPoint(5, 8));

LeafGeoPointFieldData atomicFieldData = mock(AbstractLeafGeoPointFieldData.class); // LatLonPointDVLeafFieldData
when(atomicFieldData.getGeoPointValues()).thenReturn(geoPointValues);

IndexGeoPointFieldData fieldData = mock(IndexGeoPointFieldData.class);
when(fieldData.getFieldName()).thenReturn("test_geo_field");
when(fieldData.load(any())).thenReturn(atomicFieldData);

SearchLookup lookup = new SearchLookup(mapperService, (ignored, searchLookup) -> fieldData);

// We don't need a real index, just need to construct a LeafReaderContext which cannot be mocked
MemoryIndex index = new MemoryIndex();
LeafReaderContext leafReaderContext = index.createSearcher().getIndexReader().leaves().get(0);

// Execute the script
DerivedFieldScript script = compile("emit(doc['test_geo_field'].value.getLat(), doc['test_geo_field'].value.getLon())", lookup)
.newInstance(leafReaderContext);
script.setDocument(1);
script.execute();

List<Object> result = script.getEmittedValues();
assertEquals(List.of(new Tuple<>(5.0, 8.0)), result);
}

public void testEmittingMultipleValues() throws IOException {
SearchLookup lookup = mock(SearchLookup.class);

// We don't need a real index, just need to construct a LeafReaderContext which cannot be mocked
MemoryIndex index = new MemoryIndex();
LeafReaderContext leafReaderContext = index.createSearcher().getIndexReader().leaves().get(0);

LeafSearchLookup leafSearchLookup = mock(LeafSearchLookup.class);
when(lookup.getLeafSearchLookup(leafReaderContext)).thenReturn(leafSearchLookup);

// Execute the script
DerivedFieldScript script = compile(
"def l = new ArrayList(); l.add('test'); l.add('multiple'); l.add('values'); for (String x : l) emit(x)",
lookup
).newInstance(leafReaderContext);
script.setDocument(1);
script.execute();

List<Object> result = script.getEmittedValues();
assertEquals(List.of("test", "multiple", "values"), result);
}

public void testExceedingByteSizeLimit() throws IOException {
SearchLookup lookup = mock(SearchLookup.class);

// We don't need a real index, just need to construct a LeafReaderContext which cannot be mocked
MemoryIndex index = new MemoryIndex();
LeafReaderContext leafReaderContext = index.createSearcher().getIndexReader().leaves().get(0);

LeafSearchLookup leafSearchLookup = mock(LeafSearchLookup.class);
when(lookup.getLeafSearchLookup(leafReaderContext)).thenReturn(leafSearchLookup);

// Emitting a large string to exceed the byte size limit
DerivedFieldScript stringScript = compile("for (int i = 0; i < 1024 * 1024; i++) emit('a' + i);", lookup).newInstance(
leafReaderContext
);
expectThrows(ScriptException.class, () -> {
stringScript.setDocument(1);
stringScript.execute();
});

// Emitting an integer to check byte size limit
DerivedFieldScript intScript = compile("for (int i = 0; i < 1024 * 1024; i++) emit(42)", lookup).newInstance(leafReaderContext);
expectThrows(ScriptException.class, "Expected IllegalStateException for exceeding byte size limit", () -> {
intScript.setDocument(1);
intScript.execute();
});

// Emitting a long to check byte size limit
DerivedFieldScript longScript = compile("for (int i = 0; i < 1024 * 1024; i++) emit(1234567890123456789L)", lookup).newInstance(
leafReaderContext
);
expectThrows(ScriptException.class, "Expected IllegalStateException for exceeding byte size limit", () -> {
longScript.setDocument(1);
longScript.execute();
});

// Emitting a double to check byte size limit
DerivedFieldScript doubleScript = compile("for (int i = 0; i < 1024 * 1024; i++) emit(3.14159)", lookup).newInstance(
leafReaderContext
);
expectThrows(ScriptException.class, "Expected IllegalStateException for exceeding byte size limit", () -> {
doubleScript.setDocument(1);
doubleScript.execute();
});

// Emitting a GeoPoint to check byte size limit
DerivedFieldScript geoPointScript = compile("for (int i = 0; i < 1024 * 1024; i++) emit(1.23, 4.56);", lookup).newInstance(
leafReaderContext
);
expectThrows(ScriptException.class, "Expected IllegalStateException for exceeding byte size limit", () -> {
geoPointScript.setDocument(1);
geoPointScript.execute();
});
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ public DerivedFieldValueFetcher(DerivedFieldScript.LeafFactory derivedFieldScrip
@Override
public List<Object> fetchValues(SourceLookup lookup) {
derivedFieldScript.setDocument(lookup.docId());
// TODO: remove List.of() when derivedFieldScript.execute() returns list of objects.
return List.of(derivedFieldScript.execute());
derivedFieldScript.execute();
return derivedFieldScript.getEmittedValues();
}

public void setNextReader(LeafReaderContext context) {
Expand Down
67 changes: 60 additions & 7 deletions server/src/main/java/org/opensearch/script/DerivedFieldScript.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,17 @@
package org.opensearch.script;

import org.apache.lucene.index.LeafReaderContext;
import org.opensearch.common.logging.DeprecationLogger;
import org.opensearch.common.collect.Tuple;
import org.opensearch.index.fielddata.ScriptDocValues;
import org.opensearch.search.lookup.LeafSearchLookup;
import org.opensearch.search.lookup.SearchLookup;
import org.opensearch.search.lookup.SourceLookup;

import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.function.Function;

Expand All @@ -30,7 +33,7 @@ public abstract class DerivedFieldScript {

public static final String[] PARAMETERS = {};
public static final ScriptContext<Factory> CONTEXT = new ScriptContext<>("derived_field", Factory.class);
private static final DeprecationLogger deprecationLogger = DeprecationLogger.getLogger(DynamicMap.class);
private static final int MAX_BYTE_SIZE = 1024 * 1024; // Maximum allowed byte size (1 MB)

private static final Map<String, Function<Object, Object>> PARAMS_FUNCTIONS = Map.of(
"doc",
Expand All @@ -49,16 +52,27 @@ public abstract class DerivedFieldScript {
*/
private final LeafSearchLookup leafLookup;

/**
* The field values emitted from the script.
*/
private List<Object> emittedValues;

private int totalByteSize;

public DerivedFieldScript(Map<String, Object> params, SearchLookup lookup, LeafReaderContext leafContext) {
Map<String, Object> parameters = new HashMap<>(params);
this.leafLookup = lookup.getLeafSearchLookup(leafContext);
parameters.putAll(leafLookup.asMap());
this.params = new DynamicMap(parameters, PARAMS_FUNCTIONS);
this.emittedValues = new ArrayList<>();
this.totalByteSize = 0;
}

protected DerivedFieldScript() {
params = null;
leafLookup = null;
public DerivedFieldScript() {
this.params = null;
this.leafLookup = null;
this.emittedValues = new ArrayList<>();
this.totalByteSize = 0;
}

/**
Expand All @@ -75,14 +89,54 @@ public Map<String, ScriptDocValues<?>> getDoc() {
return leafLookup.doc();
}

/**
* Return the emitted values from the script execution.
*/
public List<Object> getEmittedValues() {
return emittedValues;
}

/**
* Set the current document to run the script on next.
* Clears the emittedValues as well since they should be scoped per document.
*/
public void setDocument(int docid) {
this.emittedValues = new ArrayList<>();
this.totalByteSize = 0;
leafLookup.setDocument(docid);
}

public abstract Object execute();
public void addEmittedValue(Object o) {
int byteSize = getObjectByteSize(o);
int newTotalByteSize = totalByteSize + byteSize;
if (newTotalByteSize <= MAX_BYTE_SIZE) {
emittedValues.add(o);
totalByteSize = newTotalByteSize;
} else {
throw new IllegalStateException("Exceeded maximum allowed byte size for emitted values");
}
}

private int getObjectByteSize(Object obj) {
if (obj instanceof String) {
return ((String) obj).getBytes(StandardCharsets.UTF_8).length;
} else if (obj instanceof Integer) {
return Integer.BYTES;
} else if (obj instanceof Long) {
return Long.BYTES;
} else if (obj instanceof Double) {
return Double.BYTES;
} else if (obj instanceof Boolean) {
return Byte.BYTES; // Assuming 1 byte for boolean
} else if (obj instanceof Tuple) {
// Assuming each element in the tuple is a double for GeoPoint case
return Double.BYTES * 2;
} else {
throw new IllegalArgumentException("Unsupported object type passed in emit()");
}
}

public void execute() {}

/**
* A factory to construct {@link DerivedFieldScript} instances.
Expand All @@ -95,7 +149,6 @@ public interface LeafFactory {

/**
* A factory to construct stateful {@link DerivedFieldScript} factories for a specific index.
*
* @opensearch.internal
*/
public interface Factory extends ScriptFactory {
Expand Down
Loading

0 comments on commit e8c5daf

Please sign in to comment.