elastic · mayya-sharipova · Mar 28, 2019 · Mar 18, 2019 · Mar 25, 2019 · Mar 25, 2019
diff --git a/docs/reference/query-dsl/script-score-query.asciidoc b/docs/reference/query-dsl/script-score-query.asciidoc
@@ -182,60 +182,44 @@ different from the query's vector, 0 is used for missing dimensions
 in the calculations of vector functions.
 
 
-[[random-functions]]
-===== Random functions
-There are two predefined ways to produce random values:
-`randomNotReproducible` and `randomReproducible`.
+[[random-score-function]]
+===== Random score function
+`random_score` function generates scores that are uniformly distributed
+from 0 up to but not including 1.
 
-`randomNotReproducible()` uses `java.util.Random` class
-to generate a random value of the type `long`.
-The generated values are not reproducible between requests' invocations.
+`randomScore` function has the following syntax:
+`randomScore(<seed>, <fieldName>)`.
+It has a required parameter - `seed` as an integer value,
+and an optional parameter - `fieldName` as a string value.
 
 [source,js]
 --------------------------------------------------
 "script" : {
-    "source" : "randomNotReproducible()"
+    "source" : "randomScore(100, '_seq_no')"
 }
 --------------------------------------------------
 // NOTCONSOLE
 
-
-`randomReproducible(String seedValue, int seed)` produces
-reproducible random values of type `long`. This function requires
-more computational time and memory than the non-reproducible version.
-
-A good candidate for the `seedValue` is document field values that
-are unique across documents and already pre-calculated and preloaded
-in the memory. For example, values of the document's `_seq_no` field
-is a good candidate, as documents on the same shard have unique values
-for the `_seq_no` field.
+If the `fieldName` parameter is omitted, the internal Lucene
+document ids will be used as a source of randomness. This is very efficient,
+but unfortunately not reproducible since documents might be renumbered
+by merges.
 
 [source,js]
 --------------------------------------------------
 "script" : {
-    "source" : "randomReproducible(Long.toString(doc['_seq_no'].value), 100)"
+    "source" : "randomScore(100)"
 }
 --------------------------------------------------
 // NOTCONSOLE
 
 
-A drawback of using `_seq_no` is that generated values change if
-documents are updated. Another drawback is not absolute uniqueness, as
-documents from different shards with the same sequence numbers
-generate the same random values.
-
-If you need random values to be distinct across different shards,
-you can use a field with unique values across shards,
-such as  `_id`, but watch out for the memory usage as all
-these unique values need to be loaded into memory.
-
-[source,js]
---------------------------------------------------
-"script" : {
-    "source" : "randomReproducible(doc['_id'].value, 100)"
-}
---------------------------------------------------
-// NOTCONSOLE
+Note that documents that are within the same shard and have the
+same value for field will get the same score, so it is usually desirable
+to use a field that has unique values for all documents across a shard.
+A good default choice might be to use the `_seq_no`
+field, whose only drawback is that scores will change if the document is
+updated since update operations also update the value of the `_seq_no` field.
 
 
 [[decay-functions]]
@@ -349,8 +333,8 @@ the following script:
 
 ===== `random_score`
 
-Use `randomReproducible` and `randomNotReproducible` functions
-as described in <<random-functions, random functions>>.
+Use `randomScore` function
+as described in <<random-score-function, random score function>>.
 
 
 ===== `field_value_factor`

diff --git a/...ng-painless/src/main/resources/org/elasticsearch/painless/spi/org.elasticsearch.score.txt b/...ng-painless/src/main/resources/org/elasticsearch/painless/spi/org.elasticsearch.score.txt
@@ -19,11 +19,14 @@
 
 # This file contains a whitelist for functions to be used in Score context
 
+class org.elasticsearch.script.ScoreScript no_import {
+}
+
 static_import {
     double saturation(double, double) from_class org.elasticsearch.script.ScoreScriptUtils
     double sigmoid(double, double, double) from_class org.elasticsearch.script.ScoreScriptUtils
-    double randomReproducible(String, int) from_class org.elasticsearch.script.ScoreScriptUtils
-    double randomNotReproducible()  bound_to org.elasticsearch.script.ScoreScriptUtils$RandomNotReproducible
+    double randomScore(org.elasticsearch.script.ScoreScript, int, String) bound_to org.elasticsearch.script.ScoreScriptUtils$RandomScore
+    double randomScore(org.elasticsearch.script.ScoreScript, int) bound_to org.elasticsearch.script.ScoreScriptUtils$RandomScoreDoc
     double decayGeoLinear(String, String, String, double, GeoPoint)  bound_to org.elasticsearch.script.ScoreScriptUtils$DecayGeoLinear
     double decayGeoExp(String, String, String, double, GeoPoint)  bound_to org.elasticsearch.script.ScoreScriptUtils$DecayGeoExp
     double decayGeoGauss(String, String, String, double, GeoPoint)  bound_to org.elasticsearch.script.ScoreScriptUtils$DecayGeoGauss

diff --git a/modules/lang-painless/src/test/resources/rest-api-spec/test/painless/80_script_score.yml b/modules/lang-painless/src/test/resources/rest-api-spec/test/painless/80_script_score.yml
@@ -72,61 +72,6 @@ setup:
     - match: { hits.hits.1._id: d2 }
     - match: { hits.hits.2._id: d1 }
 
----
-"Random functions":
-    - do:
-        indices.create:
-            index: test
-            body:
-                settings:
-                    number_of_shards: 2
-                mappings:
-                    properties:
-                        f1:
-                            type: keyword
-    - do:
-        index:
-            index: test
-            id: 1
-            body: {"f1": "v1"}
-    - do:
-        index:
-            index: test
-            id: 2
-            body: {"f1": "v2"}
-    - do:
-        index:
-            index: test
-            id: 3
-            body: {"f1": "v3"}
-
-    - do:
-        indices.refresh: {}
-
-    - do:
-        search:
-            rest_total_hits_as_int: true
-            index: test
-            body:
-                query:
-                    script_score:
-                        query: {match_all: {} }
-                        script:
-                            source: "randomReproducible(Long.toString(doc['_seq_no'].value), 100)"
-    - match: { hits.total: 3 }
-
-    - do:
-        search:
-            rest_total_hits_as_int: true
-            index: test
-            body:
-                query:
-                    script_score:
-                        query: {match_all: {} }
-                        script:
-                            source: "randomNotReproducible()"
-    - match: { hits.total: 3 }
-
 ---
 "Decay geo functions":
     - do:

diff --git a/...-painless/src/test/resources/rest-api-spec/test/painless/85_script_score_random_score.yml b/...-painless/src/test/resources/rest-api-spec/test/painless/85_script_score_random_score.yml
@@ -0,0 +1,146 @@
+# Integration tests for ScriptScoreQuery using Painless
+
+setup:
+- skip:
+    version: " - 7.99.99"  # correct to 7.09.99 after backporting to 7.1
+    reason: "random score function of script score was added in 7.1"
+
+---
+"Random score function with _seq_no field":
+    - do:
+        indices.create:
+            index: test
+            body:
+                settings:
+                    number_of_shards: 2
+                mappings:
+                    properties:
+                        f1:
+                            type: keyword
+
+    - do:
+        bulk:
+            refresh: true
+            body:
+                - '{"index": {"_index": "test"}}'
+                - '{"f1": "v0"}'
+                - '{"index": {"_index": "test"}}'
+                - '{"f1": "v1"}'
+                - '{"index": {"_index": "test"}}'
+                - '{"f1": "v2"}'
+                - '{"index": {"_index": "test"}}'
+                - '{"f1": "v3"}'
+                - '{"index": {"_index": "test"}}'
+                - '{"f1": "v4"}'
+                - '{"index": {"_index": "test"}}'
+                - '{"f1": "v5"}'
+                - '{"index": {"_index": "test"}}'
+                - '{"f1": "v6"}'
+
+    - do:
+        search:
+            rest_total_hits_as_int: true
+            index: test
+            body:
+                query:
+                    script_score:
+                        query: {match_all: {} }
+                        script:
+                            source: "randomScore(100, '_seq_no')"
+    # stash ids to check for reproducibility of ranking
+    - set: { hits.hits.0._id: id0 }
+    - set: { hits.hits.1._id: id1 }
+    - set: { hits.hits.2._id: id2 }
+    - set: { hits.hits.3._id: id3 }
+    - set: { hits.hits.4._id: id4 }
+    - set: { hits.hits.5._id: id5 }
+    - set: { hits.hits.6._id: id6 }
+
+    # check that ranking is reproducible
+    - do:
+        search:
+            rest_total_hits_as_int: true
+            index: test
+            body:
+                query:
+                    script_score:
+                        query: {match_all: {} }
+                        script:
+                            source: "randomScore(100, '_seq_no')"
+    - match: { hits.hits.0._id: $id0 }
+    - match: { hits.hits.1._id: $id1 }
+    - match: { hits.hits.2._id: $id2 }
+    - match: { hits.hits.3._id: $id3 }
+    - match: { hits.hits.4._id: $id4 }
+    - match: { hits.hits.5._id: $id5 }
+    - match: { hits.hits.6._id: $id6 }
+
+---
+"Random score function with internal doc Ids":
+    - do:
+        indices.create:
+            index: test
+            body:
+                settings:
+                    number_of_shards: 1
+                mappings:
+                    properties:
+                        f1:
+                            type: keyword
+
+    - do:
+        bulk:
+            refresh: true
+            body:
+                - '{"index": {"_index": "test"}}'
+                - '{"f1": "v0"}'
+                - '{"index": {"_index": "test"}}'
+                - '{"f1": "v1"}'
+                - '{"index": {"_index": "test"}}'
+                - '{"f1": "v2"}'
+                - '{"index": {"_index": "test"}}'
+                - '{"f1": "v3"}'
+                - '{"index": {"_index": "test"}}'
+                - '{"f1": "v4"}'
+                - '{"index": {"_index": "test"}}'
+                - '{"f1": "v5"}'
+                - '{"index": {"_index": "test"}}'
+                - '{"f1": "v6"}'
+
+    - do:
+        search:
+            rest_total_hits_as_int: true
+            index: test
+            body:
+                query:
+                    script_score:
+                        query: {match_all: {} }
+                        script:
+                            source: "randomScore(100)"
+    # stash ids to check for reproducibility of ranking
+    - set: { hits.hits.0._id: id0 }
+    - set: { hits.hits.1._id: id1 }
+    - set: { hits.hits.2._id: id2 }
+    - set: { hits.hits.3._id: id3 }
+    - set: { hits.hits.4._id: id4 }
+    - set: { hits.hits.5._id: id5 }
+    - set: { hits.hits.6._id: id6 }
+
+    # check that ranking is reproducible
+    - do:
+        search:
+            rest_total_hits_as_int: true
+            index: test
+            body:
+                query:
+                    script_score:
+                        query: {match_all: {} }
+                        script:
+                            source: "randomScore(100)"
+    - match: { hits.hits.0._id: $id0 }
+    - match: { hits.hits.1._id: $id1 }
+    - match: { hits.hits.2._id: $id2 }
+    - match: { hits.hits.3._id: $id3 }
+    - match: { hits.hits.4._id: $id4 }
+    - match: { hits.hits.5._id: $id5 }
+    - match: { hits.hits.6._id: $id6 }
diff --git a/...er/src/main/java/org/elasticsearch/common/lucene/search/function/ScriptScoreFunction.java b/...er/src/main/java/org/elasticsearch/common/lucene/search/function/ScriptScoreFunction.java
@@ -50,18 +50,33 @@ public float score() {
 
     private final ScoreScript.LeafFactory script;
 
+    private final int shardId;
+    private final String indexName;
+
 
     public ScriptScoreFunction(Script sScript, ScoreScript.LeafFactory script) {
         super(CombineFunction.REPLACE);
         this.sScript = sScript;
         this.script = script;
+        this.indexName = null;
+        this.shardId = -1;
+    }
+
+    public ScriptScoreFunction(Script sScript, ScoreScript.LeafFactory script, String indexName, int shardId) {
+        super(CombineFunction.REPLACE);
+        this.sScript = sScript;
+        this.script = script;
+        this.indexName = indexName;
+        this.shardId = shardId;
     }
 
     @Override
     public LeafScoreFunction getLeafScoreFunction(LeafReaderContext ctx) throws IOException {
         final ScoreScript leafScript = script.newInstance(ctx);
         final CannedScorer scorer = new CannedScorer();
         leafScript.setScorer(scorer);
+        leafScript._setIndexName(indexName);
+        leafScript._setShard(shardId);
         return new LeafScoreFunction() {
             @Override
             public double score(int docId, float subQueryScore) throws IOException {

diff --git a/...src/main/java/org/elasticsearch/index/query/functionscore/ScriptScoreFunctionBuilder.java b/...src/main/java/org/elasticsearch/index/query/functionscore/ScriptScoreFunctionBuilder.java
@@ -94,7 +94,7 @@ protected ScoreFunction doToFunction(QueryShardContext context) {
         try {
             ScoreScript.Factory factory = context.getScriptService().compile(script, ScoreScript.CONTEXT);
             ScoreScript.LeafFactory searchScript = factory.newFactory(script.getParams(), context.lookup());
-            return new ScriptScoreFunction(script, searchScript);
+            return new ScriptScoreFunction(script, searchScript, context.index().getName(), context.getShardId());
         } catch (Exception e) {
             throw new QueryShardException(context, "script_score: the script could not be loaded", e);
         }