From bf4d90a5dc2fe2ee51ec182a166e636d88bf5d05 Mon Sep 17 00:00:00 2001 From: Mayya Sharipova Date: Wed, 24 Oct 2018 14:27:22 -0400 Subject: [PATCH] HLRC API for _termvectors (#33447) * HLRC API for _termvectors relates to #27205 --- .../client/RequestConverters.java | 14 + .../client/RestHighLevelClient.java | 32 ++ .../client/core/TermVectorsRequest.java | 228 ++++++++ .../client/core/TermVectorsResponse.java | 486 ++++++++++++++++++ .../java/org/elasticsearch/client/CrudIT.java | 81 +++ .../client/RequestConvertersTests.java | 41 ++ .../client/RestHighLevelClientTests.java | 3 +- .../client/core/TermVectorsResponseTests.java | 203 ++++++++ .../documentation/CRUDDocumentationIT.java | 123 +++++ .../high-level/document/term-vectors.asciidoc | 134 +++++ .../high-level/supported-apis.asciidoc | 4 +- .../index/reindex/DeleteByQueryRequest.java | 2 +- 12 files changed, 1347 insertions(+), 4 deletions(-) create mode 100644 client/rest-high-level/src/main/java/org/elasticsearch/client/core/TermVectorsRequest.java create mode 100644 client/rest-high-level/src/main/java/org/elasticsearch/client/core/TermVectorsResponse.java create mode 100644 client/rest-high-level/src/test/java/org/elasticsearch/client/core/TermVectorsResponseTests.java create mode 100644 docs/java-rest/high-level/document/term-vectors.asciidoc diff --git a/client/rest-high-level/src/main/java/org/elasticsearch/client/RequestConverters.java b/client/rest-high-level/src/main/java/org/elasticsearch/client/RequestConverters.java index f9094c14a9d84..106caea027e27 100644 --- a/client/rest-high-level/src/main/java/org/elasticsearch/client/RequestConverters.java +++ b/client/rest-high-level/src/main/java/org/elasticsearch/client/RequestConverters.java @@ -77,6 +77,7 @@ import org.elasticsearch.script.mustache.SearchTemplateRequest; import org.elasticsearch.search.fetch.subphase.FetchSourceContext; import org.elasticsearch.tasks.TaskId; +import org.elasticsearch.client.core.TermVectorsRequest; import java.io.ByteArrayOutputStream; import java.io.IOException; @@ -578,6 +579,19 @@ static Request analyze(AnalyzeRequest request) throws IOException { return req; } + static Request termVectors(TermVectorsRequest tvrequest) throws IOException { + String endpoint = new EndpointBuilder().addPathPart( + tvrequest.getIndex(), tvrequest.getType(), tvrequest.getId()).addPathPartAsIs("_termvectors").build(); + Request request = new Request(HttpGet.METHOD_NAME, endpoint); + Params params = new Params(request); + params.withRouting(tvrequest.getRouting()); + params.withPreference(tvrequest.getPreference()); + params.withFields(tvrequest.getFields()); + params.withRealtime(tvrequest.getRealtime()); + request.setEntity(createEntity(tvrequest, REQUEST_BODY_CONTENT_TYPE)); + return request; + } + static Request getScript(GetStoredScriptRequest getStoredScriptRequest) { String endpoint = new EndpointBuilder().addPathPartAsIs("_scripts").addPathPart(getStoredScriptRequest.id()).build(); Request request = new Request(HttpGet.METHOD_NAME, endpoint); diff --git a/client/rest-high-level/src/main/java/org/elasticsearch/client/RestHighLevelClient.java b/client/rest-high-level/src/main/java/org/elasticsearch/client/RestHighLevelClient.java index eb041c77c542a..342e3efbb6a35 100644 --- a/client/rest-high-level/src/main/java/org/elasticsearch/client/RestHighLevelClient.java +++ b/client/rest-high-level/src/main/java/org/elasticsearch/client/RestHighLevelClient.java @@ -56,6 +56,8 @@ import org.elasticsearch.action.support.master.AcknowledgedResponse; import org.elasticsearch.action.update.UpdateRequest; import org.elasticsearch.action.update.UpdateResponse; +import org.elasticsearch.client.core.TermVectorsResponse; +import org.elasticsearch.client.core.TermVectorsRequest; import org.elasticsearch.common.CheckedConsumer; import org.elasticsearch.common.CheckedFunction; import org.elasticsearch.common.ParseField; @@ -1029,6 +1031,36 @@ public final void explainAsync(ExplainRequest explainRequest, RequestOptions opt listener, singleton(404)); } + + /** + * Calls the Term Vectors API + * + * See Term Vectors API on + * elastic.co + * + * @param request the request + * @param options the request options (e.g. headers), use {@link RequestOptions#DEFAULT} if nothing needs to be customized + */ + public final TermVectorsResponse termvectors(TermVectorsRequest request, RequestOptions options) throws IOException { + return performRequestAndParseEntity(request, RequestConverters::termVectors, options, TermVectorsResponse::fromXContent, + emptySet()); + } + + /** + * Asynchronously calls the Term Vectors API + * + * See Term Vectors API on + * elastic.co + * @param request the request + * @param options the request options (e.g. headers), use {@link RequestOptions#DEFAULT} if nothing needs to be customized + * @param listener the listener to be notified upon request completion + */ + public final void termvectorsAsync(TermVectorsRequest request, RequestOptions options, ActionListener listener) { + performRequestAsyncAndParseEntity(request, RequestConverters::termVectors, options, TermVectorsResponse::fromXContent, listener, + emptySet()); + } + + /** * Executes a request using the Ranking Evaluation API. * See Ranking Evaluation API diff --git a/client/rest-high-level/src/main/java/org/elasticsearch/client/core/TermVectorsRequest.java b/client/rest-high-level/src/main/java/org/elasticsearch/client/core/TermVectorsRequest.java new file mode 100644 index 0000000000000..5c94dfd0a3375 --- /dev/null +++ b/client/rest-high-level/src/main/java/org/elasticsearch/client/core/TermVectorsRequest.java @@ -0,0 +1,228 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.client.core; + +import org.elasticsearch.client.Validatable; +import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.xcontent.ToXContentObject; +import org.elasticsearch.common.xcontent.XContentBuilder; + +import java.io.IOException; +import java.io.InputStream; +import java.util.Map; + +public class TermVectorsRequest implements ToXContentObject, Validatable { + + private final String index; + private final String type; + private String id = null; + private String routing = null; + private String preference = null; + private boolean realtime = true; + private String[] fields = null; + private boolean requestPositions = true; + private boolean requestPayloads = true; + private boolean requestOffsets = true; + private boolean requestFieldStatistics = true; + private boolean requestTermStatistics = false; + private Map perFieldAnalyzer = null; + private Map filterSettings = null; + private XContentBuilder docBuilder = null; + + + /** + * Constructs TermVectorRequest for the given document + * @param index - index of the document + * @param type - type of the document + * @param docId - id of the document + */ + public TermVectorsRequest(String index, String type, String docId) { + this(index, type); + this.id = docId; + } + + /** + * Constructs TermVectorRequest for an artificial document + * @param index - index of the document + * @param type - type of the document + */ + public TermVectorsRequest(String index, String type) { + this.index = index; + this.type = type; + } + + /** + * Returns the index of the request + */ + public String getIndex() { + return index; + } + + /** + * Returns the type of the request + */ + public String getType() { + return type; + } + + /** + * Returns the id of the request + * can be NULL if there is no document ID + */ + public String getId() { + return id; + } + + /** + * Sets the fields for which term vectors information should be retrieved + */ + public void setFields(String... fields) { + this.fields = fields; + } + + public String[] getFields() { + return fields; + } + + /** + * Sets whether to request term positions + */ + public void setPositions(boolean requestPositions) { + this.requestPositions = requestPositions; + } + + /** + * Sets whether to request term payloads + */ + public void setPayloads(boolean requestPayloads) { + this.requestPayloads = requestPayloads; + } + + /** + * Sets whether to request term offsets + */ + public void setOffsets(boolean requestOffsets) { + this.requestOffsets = requestOffsets; + } + + /** + * Sets whether to request field statistics + */ + public void setFieldStatistics(boolean requestFieldStatistics) { + this.requestFieldStatistics = requestFieldStatistics; + } + + /** + * Sets whether to request term statistics + */ + public void setTermStatistics(boolean requestTermStatistics) { + this.requestTermStatistics = requestTermStatistics; + } + + /** + * Sets different analyzers than the one at the fields + */ + public void setPerFieldAnalyzer(Map perFieldAnalyzer) { + this.perFieldAnalyzer = perFieldAnalyzer; + } + + /** + * Sets an artifical document on what to request _termvectors + */ + public void setDoc(XContentBuilder docBuilder) { + this.docBuilder = docBuilder; + } + + /** + * Sets conditions for terms filtering + */ + public void setFilterSettings(Map filterSettings) { + this.filterSettings = filterSettings; + } + + /** + * Sets a routing to route a request to a particular shard + */ + public void setRouting(String routing) { + this.routing = routing; + } + + public String getRouting() { + return routing; + } + + /** + * Set a preference of which shard copies to execute the request + */ + public void setPreference(String preference) { + this.preference = preference; + } + + public String getPreference() { + return preference; + } + + /** + * Sets if the request should be realtime or near-realtime + */ + public void setRealtime(boolean realtime) { + this.realtime = realtime; + } + + /** + * Returns if the request is realtime(true) or near-realtime(false) + */ + public boolean getRealtime() { + return realtime; + } + + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + // set values only when different from defaults + if (requestPositions == false) builder.field("positions", false); + if (requestPayloads == false) builder.field("payloads", false); + if (requestOffsets == false) builder.field("offsets", false); + if (requestFieldStatistics == false) builder.field("field_statistics", false); + if (requestTermStatistics) builder.field("term_statistics", true); + if (perFieldAnalyzer != null) builder.field("per_field_analyzer", perFieldAnalyzer); + + if (docBuilder != null) { + BytesReference doc = BytesReference.bytes(docBuilder); + try (InputStream stream = doc.streamInput()) { + builder.rawField("doc", stream, docBuilder.contentType()); + } + } + + if (filterSettings != null) { + builder.startObject("filter"); + String[] filterSettingNames = + {"max_num_terms", "min_term_freq", "max_term_freq", "min_doc_freq", "max_doc_freq", "min_word_length", "max_word_length"}; + for (String settingName : filterSettingNames) { + if (filterSettings.containsKey(settingName)) builder.field(settingName, filterSettings.get(settingName)); + } + builder.endObject(); + } + builder.endObject(); + return builder; + } + +} diff --git a/client/rest-high-level/src/main/java/org/elasticsearch/client/core/TermVectorsResponse.java b/client/rest-high-level/src/main/java/org/elasticsearch/client/core/TermVectorsResponse.java new file mode 100644 index 0000000000000..5c57fc11b6fe0 --- /dev/null +++ b/client/rest-high-level/src/main/java/org/elasticsearch/client/core/TermVectorsResponse.java @@ -0,0 +1,486 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.client.core; + +import org.elasticsearch.common.Nullable; +import org.elasticsearch.common.ParseField; +import org.elasticsearch.common.xcontent.ConstructingObjectParser; +import org.elasticsearch.common.xcontent.XContentParser; +import static org.elasticsearch.common.xcontent.ConstructingObjectParser.constructorArg; +import static org.elasticsearch.common.xcontent.ConstructingObjectParser.optionalConstructorArg; + +import java.util.Collections; +import java.util.List; +import java.util.Comparator; +import java.util.Objects; + +public class TermVectorsResponse { + private final String index; + private final String type; + private final String id; + private final long docVersion; + private final boolean found; + private final long tookInMillis; + private final List termVectorList; + + public TermVectorsResponse( + String index, String type, String id, long version, boolean found, long tookInMillis, List termVectorList) { + this.index = index; + this.type = type; + this.id = id; + this.docVersion = version; + this.found = found; + this.tookInMillis = tookInMillis; + this.termVectorList = termVectorList; + } + + private static ConstructingObjectParser PARSER = new ConstructingObjectParser<>("term_vectors", true, + args -> { + // as the response comes from server, we are sure that args[6] will be a list of TermVector + @SuppressWarnings("unchecked") List termVectorList = (List) args[6]; + if (termVectorList != null) { + Collections.sort(termVectorList, Comparator.comparing(TermVector::getFieldName)); + } + return new TermVectorsResponse( + (String) args[0], + (String) args[1], + (String) args[2], + (long) args[3], + (boolean) args[4], + (long) args[5], + termVectorList + ); + } + ); + + static { + PARSER.declareString(constructorArg(), new ParseField("_index")); + PARSER.declareString(constructorArg(), new ParseField("_type")); + PARSER.declareString(optionalConstructorArg(), new ParseField("_id")); + PARSER.declareLong(constructorArg(), new ParseField("_version")); + PARSER.declareBoolean(constructorArg(), new ParseField("found")); + PARSER.declareLong(constructorArg(), new ParseField("took")); + PARSER.declareNamedObjects(optionalConstructorArg(), + (p, c, fieldName) -> TermVector.fromXContent(p, fieldName), new ParseField("term_vectors")); + } + + public static TermVectorsResponse fromXContent(XContentParser parser) { + return PARSER.apply(parser, null); + } + + /** + * Returns the index for the response + */ + public String getIndex() { + return index; + } + + /** + * Returns the type for the response + */ + public String getType() { + return type; + } + + /** + * Returns the id of the request + * can be NULL if there is no document ID + */ + public String getId() { + return id; + } + + /** + * Returns if the document is found + * always true for artificial documents + */ + public boolean getFound() { + return found; + } + + /** + * Returns the document version + */ + public long getDocVersion() { + return docVersion; + } + + /** + * Returns the time that a request took in milliseconds + */ + public long getTookInMillis() { + return tookInMillis; + } + + /** + * Returns the list of term vectors + */ + public List getTermVectorsList(){ + return termVectorList; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) return true; + if (!(obj instanceof TermVectorsResponse)) return false; + TermVectorsResponse other = (TermVectorsResponse) obj; + return index.equals(other.index) + && type.equals(other.type) + && Objects.equals(id, other.id) + && docVersion == other.docVersion + && found == other.found + && tookInMillis == tookInMillis + && Objects.equals(termVectorList, other.termVectorList); + } + + @Override + public int hashCode() { + return Objects.hash(index, type, id, docVersion, found, tookInMillis, termVectorList); + } + + + public static final class TermVector { + + private static ConstructingObjectParser PARSER = new ConstructingObjectParser<>("term_vector", true, + (args, ctxFieldName) -> { + // as the response comes from server, we are sure that args[1] will be a list of Term + @SuppressWarnings("unchecked") List terms = (List) args[1]; + if (terms != null) { + Collections.sort(terms, Comparator.comparing(Term::getTerm)); + } + return new TermVector(ctxFieldName, (FieldStatistics) args[0], terms); + } + ); + + static { + PARSER.declareObject(optionalConstructorArg(), + (p,c) -> FieldStatistics.fromXContent(p), new ParseField("field_statistics")); + PARSER.declareNamedObjects(optionalConstructorArg(), (p, c, term) -> Term.fromXContent(p, term), new ParseField("terms")); + } + + private final String fieldName; + @Nullable + private final FieldStatistics fieldStatistics; + @Nullable + private final List terms; + + public TermVector(String fieldName, FieldStatistics fieldStatistics, List terms) { + this.fieldName = fieldName; + this.fieldStatistics = fieldStatistics; + this.terms = terms; + } + + public static TermVector fromXContent(XContentParser parser, String fieldName) { + return PARSER.apply(parser, fieldName); + } + + /** + * Returns the field name of the current term vector + */ + public String getFieldName() { + return fieldName; + } + + /** + * Returns the list of terms for the current term vector + */ + public List getTerms() { + return terms; + } + + /** + * Returns the field statistics for the current field + */ + public FieldStatistics getFieldStatistics() { + return fieldStatistics; + } + + + @Override + public boolean equals(Object obj) { + if (this == obj) return true; + if (!(obj instanceof TermVector)) return false; + TermVector other = (TermVector) obj; + return fieldName.equals(other.fieldName) + && Objects.equals(fieldStatistics, other.fieldStatistics) + && Objects.equals(terms, other.terms); + } + + @Override + public int hashCode() { + return Objects.hash(fieldName, fieldStatistics, terms); + } + + // Class containing a general field statistics for the field + public static final class FieldStatistics { + + private static ConstructingObjectParser PARSER = new ConstructingObjectParser<>( + "field_statistics", true, + args -> { + return new FieldStatistics((long) args[0], (int) args[1], (long) args[2]); + } + ); + + static { + PARSER.declareLong(constructorArg(), new ParseField("sum_doc_freq")); + PARSER.declareInt(constructorArg(), new ParseField("doc_count")); + PARSER.declareLong(constructorArg(), new ParseField("sum_ttf")); + } + private final long sumDocFreq; + private final int docCount; + private final long sumTotalTermFreq; + + public FieldStatistics(long sumDocFreq, int docCount, long sumTotalTermFreq) { + this.sumDocFreq = sumDocFreq; + this.docCount = docCount; + this.sumTotalTermFreq = sumTotalTermFreq; + } + + public static FieldStatistics fromXContent(XContentParser parser) { + return PARSER.apply(parser, null); + } + + /* + * Returns how many documents this field contains + */ + public int getDocCount() { + return docCount; + } + + /** + * Returns the sum of document frequencies for all terms in this field + */ + public long getSumDocFreq() { + return sumDocFreq; + } + + /** + * Returns the sum of total term frequencies of all terms in this field + */ + public long getSumTotalTermFreq() { + return sumTotalTermFreq; + } + @Override + public boolean equals(Object obj) { + if (this == obj) return true; + if (!(obj instanceof FieldStatistics)) return false; + FieldStatistics other = (FieldStatistics) obj; + return docCount == other.docCount + && sumDocFreq == other.sumDocFreq + && sumTotalTermFreq == other.sumTotalTermFreq; + } + + @Override + public int hashCode() { + return Objects.hash(docCount, sumDocFreq, sumTotalTermFreq); + } + } + + + public static final class Term { + private static ConstructingObjectParser PARSER = new ConstructingObjectParser<>("token", true, + (args, ctxTerm) -> { + // as the response comes from server, we are sure that args[4] will be a list of Token + @SuppressWarnings("unchecked") List tokens = (List) args[4]; + if (tokens != null) { + Collections.sort( + tokens, + Comparator.comparing(Token::getPosition, Comparator.nullsFirst(Integer::compareTo)) + .thenComparing(Token::getStartOffset, Comparator.nullsFirst(Integer::compareTo)) + .thenComparing(Token::getEndOffset, Comparator.nullsFirst(Integer::compareTo)) + ); + } + return new Term(ctxTerm, (int) args[0], (Integer) args[1], (Long) args[2], (Float) args[3], tokens); + } + ); + static { + PARSER.declareInt(constructorArg(), new ParseField("term_freq")); + PARSER.declareInt(optionalConstructorArg(), new ParseField("doc_freq")); + PARSER.declareLong(optionalConstructorArg(), new ParseField("ttf")); + PARSER.declareFloat(optionalConstructorArg(), new ParseField("score")); + PARSER.declareObjectArray(optionalConstructorArg(), (p,c) -> Token.fromXContent(p), new ParseField("tokens")); + } + + private final String term; + private final int termFreq; + @Nullable + private final Integer docFreq; + @Nullable + private final Long totalTermFreq; + @Nullable + private final Float score; + @Nullable + private final List tokens; + + public Term(String term, int termFreq, Integer docFreq, Long totalTermFreq, Float score, List tokens) { + this.term = term; + this.termFreq = termFreq; + this.docFreq = docFreq; + this.totalTermFreq = totalTermFreq; + this.score = score; + this.tokens = tokens; + } + + public static Term fromXContent(XContentParser parser, String term) { + return PARSER.apply(parser, term); + } + + /** + * Returns the string representation of the term + */ + public String getTerm() { + return term; + } + + /** + * Returns term frequency - the number of times this term occurs in the current document + */ + public int getTermFreq() { + return termFreq; + } + + /** + * Returns document frequency - the number of documents in the index that contain this term + */ + public Integer getDocFreq() { + return docFreq; + } + + /** + * Returns total term frequency - the number of times this term occurs across all documents + */ + public Long getTotalTermFreq( ){ + return totalTermFreq; + } + + /** + * Returns tf-idf score, if the request used some form of terms filtering + */ + public Float getScore(){ + return score; + } + + /** + * Returns a list of tokens for the term + */ + public List getTokens() { + return tokens; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) return true; + if (!(obj instanceof Term)) return false; + Term other = (Term) obj; + return term.equals(other.term) + && termFreq == other.termFreq + && Objects.equals(docFreq, other.docFreq) + && Objects.equals(totalTermFreq, other.totalTermFreq) + && Objects.equals(score, other.score) + && Objects.equals(tokens, other.tokens); + } + + @Override + public int hashCode() { + return Objects.hash(term, termFreq, docFreq, totalTermFreq, score, tokens); + } + } + + + public static final class Token { + + private static ConstructingObjectParser PARSER = new ConstructingObjectParser<>("token", true, + args -> { + return new Token((Integer) args[0], (Integer) args[1], (Integer) args[2], (String) args[3]); + }); + static { + PARSER.declareInt(optionalConstructorArg(), new ParseField("start_offset")); + PARSER.declareInt(optionalConstructorArg(), new ParseField("end_offset")); + PARSER.declareInt(optionalConstructorArg(), new ParseField("position")); + PARSER.declareString(optionalConstructorArg(), new ParseField("payload")); + } + + @Nullable + private final Integer startOffset; + @Nullable + private final Integer endOffset; + @Nullable + private final Integer position; + @Nullable + private final String payload; + + + public Token(Integer startOffset, Integer endOffset, Integer position, String payload) { + this.startOffset = startOffset; + this.endOffset = endOffset; + this.position = position; + this.payload = payload; + } + + public static Token fromXContent(XContentParser parser) { + return PARSER.apply(parser, null); + } + + /** + * Returns the start offset of the token in the document's field + */ + public Integer getStartOffset() { + return startOffset; + } + + /** + * Returns the end offset of the token in the document's field + */ + public Integer getEndOffset() { + return endOffset; + } + + /** + * Returns the position of the token in the document's field + */ + public Integer getPosition() { + return position; + } + + /** + * Returns the payload of the token or null if the payload doesn't exist + */ + public String getPayload() { + return payload; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) return true; + if (!(obj instanceof Token)) return false; + Token other = (Token) obj; + return Objects.equals(startOffset, other.startOffset) + && Objects.equals(endOffset,other.endOffset) + && Objects.equals(position, other.position) + && Objects.equals(payload, other.payload); + } + + @Override + public int hashCode() { + return Objects.hash(startOffset, endOffset, position, payload); + } + } + } +} diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/CrudIT.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/CrudIT.java index 3f90552fe9b54..b303d7df90427 100644 --- a/client/rest-high-level/src/test/java/org/elasticsearch/client/CrudIT.java +++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/CrudIT.java @@ -44,12 +44,15 @@ import org.elasticsearch.action.support.WriteRequest.RefreshPolicy; import org.elasticsearch.action.update.UpdateRequest; import org.elasticsearch.action.update.UpdateResponse; +import org.elasticsearch.client.core.TermVectorsRequest; +import org.elasticsearch.client.core.TermVectorsResponse; import org.elasticsearch.common.Strings; import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.unit.ByteSizeUnit; import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.common.xcontent.XContentType; import org.elasticsearch.index.VersionType; import org.elasticsearch.index.get.GetResult; @@ -73,6 +76,7 @@ import java.io.IOException; import java.util.Collections; +import java.util.List; import java.util.Map; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; @@ -80,6 +84,7 @@ import static java.util.Collections.singletonMap; import static org.hamcrest.Matchers.empty; +import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.hasSize; import static org.hamcrest.Matchers.instanceOf; import static org.hamcrest.Matchers.lessThan; @@ -1154,4 +1159,80 @@ public void testParamsEncode() throws IOException { assertEquals(routing, getResponse.getField("_routing").getValue()); } } + + // Not entirely sure if _termvectors belongs to CRUD, and in the absence of a better place, will have it here + public void testTermvectors() throws IOException { + final String sourceIndex = "index1"; + { + // prepare : index docs + Settings settings = Settings.builder() + .put("number_of_shards", 1) + .put("number_of_replicas", 0) + .build(); + String mappings = "\"_doc\":{\"properties\":{\"field\":{\"type\":\"text\"}}}"; + createIndex(sourceIndex, settings, mappings); + assertEquals( + RestStatus.OK, + highLevelClient().bulk( + new BulkRequest() + .add(new IndexRequest(sourceIndex, "_doc", "1") + .source(Collections.singletonMap("field", "value1"), XContentType.JSON)) + .add(new IndexRequest(sourceIndex, "_doc", "2") + .source(Collections.singletonMap("field", "value2"), XContentType.JSON)) + .setRefreshPolicy(RefreshPolicy.IMMEDIATE), + RequestOptions.DEFAULT + ).status() + ); + } + { + // test _termvectors on real documents + TermVectorsRequest tvRequest = new TermVectorsRequest(sourceIndex, "_doc", "1"); + tvRequest.setFields("field"); + TermVectorsResponse tvResponse = execute(tvRequest, highLevelClient()::termvectors, highLevelClient()::termvectorsAsync); + + TermVectorsResponse.TermVector.Token expectedToken = new TermVectorsResponse.TermVector.Token(0, 6, 0, null); + TermVectorsResponse.TermVector.Term expectedTerm = new TermVectorsResponse.TermVector.Term( + "value1", 1, null, null, null, Collections.singletonList(expectedToken)); + TermVectorsResponse.TermVector.FieldStatistics expectedFieldStats = + new TermVectorsResponse.TermVector.FieldStatistics(2, 2, 2); + TermVectorsResponse.TermVector expectedTV = + new TermVectorsResponse.TermVector("field", expectedFieldStats, Collections.singletonList(expectedTerm)); + List expectedTVlist = Collections.singletonList(expectedTV); + + assertThat(tvResponse.getIndex(), equalTo(sourceIndex)); + assertThat(Integer.valueOf(tvResponse.getId()), equalTo(1)); + assertTrue(tvResponse.getFound()); + assertEquals(expectedTVlist, tvResponse.getTermVectorsList()); + } + { + // test _termvectors on artificial documents + TermVectorsRequest tvRequest = new TermVectorsRequest(sourceIndex, "_doc"); + XContentBuilder docBuilder = XContentFactory.jsonBuilder(); + docBuilder.startObject().field("field", "valuex").endObject(); + tvRequest.setDoc(docBuilder); + TermVectorsResponse tvResponse = execute(tvRequest, highLevelClient()::termvectors, highLevelClient()::termvectorsAsync); + + TermVectorsResponse.TermVector.Token expectedToken = new TermVectorsResponse.TermVector.Token(0, 6, 0, null); + TermVectorsResponse.TermVector.Term expectedTerm = new TermVectorsResponse.TermVector.Term( + "valuex", 1, null, null, null, Collections.singletonList(expectedToken)); + TermVectorsResponse.TermVector.FieldStatistics expectedFieldStats = + new TermVectorsResponse.TermVector.FieldStatistics(2, 2, 2); + TermVectorsResponse.TermVector expectedTV = + new TermVectorsResponse.TermVector("field", expectedFieldStats, Collections.singletonList(expectedTerm)); + List expectedTVlist = Collections.singletonList(expectedTV); + + assertThat(tvResponse.getIndex(), equalTo(sourceIndex)); + assertTrue(tvResponse.getFound()); + assertEquals(expectedTVlist, tvResponse.getTermVectorsList()); + } + } + + // Not entirely sure if _termvectors belongs to CRUD, and in the absence of a better place, will have it here + public void testTermvectorsWithNonExistentIndex() { + TermVectorsRequest request = new TermVectorsRequest("non-existent", "non-existent", "non-existent"); + + ElasticsearchException exception = expectThrows(ElasticsearchException.class, + () -> execute(request, highLevelClient()::termvectors, highLevelClient()::termvectorsAsync)); + assertEquals(RestStatus.NOT_FOUND, exception.status()); + } } diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/RequestConvertersTests.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/RequestConvertersTests.java index 8887bed226ca1..0dc0a67cf7e16 100644 --- a/client/rest-high-level/src/test/java/org/elasticsearch/client/RequestConvertersTests.java +++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/RequestConvertersTests.java @@ -53,6 +53,7 @@ import org.elasticsearch.action.support.master.MasterNodeReadRequest; import org.elasticsearch.action.support.master.MasterNodeRequest; import org.elasticsearch.action.support.replication.ReplicationRequest; +import org.elasticsearch.client.core.TermVectorsRequest; import org.elasticsearch.action.update.UpdateRequest; import org.elasticsearch.client.RequestConverters.EndpointBuilder; import org.elasticsearch.common.CheckedBiConsumer; @@ -1177,6 +1178,46 @@ public void testExplain() throws IOException { assertToXContentBody(explainRequest, request.getEntity()); } + public void testTermVectors() throws IOException { + String index = randomAlphaOfLengthBetween(3, 10); + String type = randomAlphaOfLengthBetween(3, 10); + String id = randomAlphaOfLengthBetween(3, 10); + TermVectorsRequest tvRequest = new TermVectorsRequest(index, type, id); + Map expectedParams = new HashMap<>(); + String[] fields; + if (randomBoolean()) { + String routing = randomAlphaOfLengthBetween(3, 10); + tvRequest.setRouting(routing); + expectedParams.put("routing", routing); + } + if (randomBoolean()) { + tvRequest.setRealtime(false); + expectedParams.put("realtime", "false"); + } + + boolean hasFields = randomBoolean(); + if (hasFields) { + fields = generateRandomStringArray(10, 5, false, false); + tvRequest.setFields(fields); + } + + Request request = RequestConverters.termVectors(tvRequest); + StringJoiner endpoint = new StringJoiner("/", "/", ""); + endpoint.add(index).add(type).add(id).add("_termvectors"); + + assertEquals(HttpGet.METHOD_NAME, request.getMethod()); + assertEquals(endpoint.toString(), request.getEndpoint()); + if (hasFields) { + assertThat(request.getParameters(), hasKey("fields")); + String[] requestFields = Strings.splitStringByCommaToArray(request.getParameters().get("fields")); + assertArrayEquals(tvRequest.getFields(), requestFields); + } + for (Map.Entry param : expectedParams.entrySet()) { + assertThat(request.getParameters(), hasEntry(param.getKey(), param.getValue())); + } + assertToXContentBody(tvRequest, request.getEntity()); + } + public void testFieldCaps() { // Create a random request. String[] indices = randomIndicesNames(0, 5); diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/RestHighLevelClientTests.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/RestHighLevelClientTests.java index fda7ecdd6d6a2..9535043e395d1 100644 --- a/client/rest-high-level/src/test/java/org/elasticsearch/client/RestHighLevelClientTests.java +++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/RestHighLevelClientTests.java @@ -661,8 +661,7 @@ public void testApiNamingConventions() throws Exception { "mtermvectors", "render_search_template", "scripts_painless_execute", - "tasks.get", - "termvectors" + "tasks.get" }; //These API are not required for high-level client feature completeness String[] notRequiredApi = new String[] { diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/core/TermVectorsResponseTests.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/core/TermVectorsResponseTests.java new file mode 100644 index 0000000000000..67b2704c58d29 --- /dev/null +++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/core/TermVectorsResponseTests.java @@ -0,0 +1,203 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.client.core; + +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.test.ESTestCase; + +import java.util.ArrayList; +import java.util.List; +import java.io.IOException; +import java.util.Collections; +import java.util.Comparator; + +import static org.elasticsearch.test.AbstractXContentTestCase.xContentTester; + +public class TermVectorsResponseTests extends ESTestCase { + + public void testFromXContent() throws IOException { + xContentTester( + this::createParser, + this::createTestInstance, + this::toXContent, + TermVectorsResponse::fromXContent) + .supportsUnknownFields(true) + .randomFieldsExcludeFilter(field -> + field.endsWith("term_vectors") || field.endsWith("terms") || field.endsWith("tokens")) + .test(); + } + + private void toXContent(TermVectorsResponse response, XContentBuilder builder) throws IOException { + builder.startObject(); + builder.field("_index", response.getIndex()); + builder.field("_type", response.getType()); + if (response.getId() != null) { + builder.field("_id", response.getId()); + } + builder.field("_version", response.getDocVersion()); + builder.field("found", response.getFound()); + builder.field("took", response.getTookInMillis()); + List termVectorList = response.getTermVectorsList(); + if (termVectorList != null) { + Collections.sort(termVectorList, Comparator.comparing(TermVectorsResponse.TermVector::getFieldName)); + builder.startObject("term_vectors"); + for (TermVectorsResponse.TermVector tv : termVectorList) { + toXContent(tv, builder); + } + builder.endObject(); + } + builder.endObject(); + } + + private void toXContent(TermVectorsResponse.TermVector tv, XContentBuilder builder) throws IOException { + builder.startObject(tv.getFieldName()); + // build fields_statistics + if (tv.getFieldStatistics() != null) { + builder.startObject("field_statistics"); + builder.field("sum_doc_freq", tv.getFieldStatistics().getSumDocFreq()); + builder.field("doc_count", tv.getFieldStatistics().getDocCount()); + builder.field("sum_ttf", tv.getFieldStatistics().getSumTotalTermFreq()); + builder.endObject(); + } + // build terms + List terms = tv.getTerms(); + if (terms != null) { + Collections.sort(terms, Comparator.comparing(TermVectorsResponse.TermVector.Term::getTerm)); + builder.startObject("terms"); + for (TermVectorsResponse.TermVector.Term term : terms) { + builder.startObject(term.getTerm()); + // build term_statistics + if (term.getDocFreq() != null) builder.field("doc_freq", term.getDocFreq()); + if (term.getTotalTermFreq() != null) builder.field("ttf", term.getTotalTermFreq()); + builder.field("term_freq", term.getTermFreq()); + + // build tokens + List tokens = term.getTokens(); + if (tokens != null) { + Collections.sort( + tokens, + Comparator.comparing(TermVectorsResponse.TermVector.Token::getPosition, Comparator.nullsFirst(Integer::compareTo)) + .thenComparing(TermVectorsResponse.TermVector.Token::getStartOffset, Comparator.nullsFirst(Integer::compareTo)) + .thenComparing(TermVectorsResponse.TermVector.Token::getEndOffset, Comparator.nullsFirst(Integer::compareTo)) + ); + builder.startArray("tokens"); + for (TermVectorsResponse.TermVector.Token token : tokens) { + builder.startObject(); + if (token.getPosition() != null) builder.field("position", token.getPosition()); + if (token.getStartOffset()!= null) builder.field("start_offset", token.getStartOffset()); + if (token.getEndOffset() != null) builder.field("end_offset", token.getEndOffset()); + if (token.getPayload() != null) builder.field("payload", token.getPayload()); + builder.endObject(); + } + builder.endArray(); + } + if (term.getScore() != null) builder.field("score", term.getScore()); + builder.endObject(); + } + builder.endObject(); + } + builder.endObject(); + } + + + protected TermVectorsResponse createTestInstance() { + String index = randomAlphaOfLength(5); + String type = randomAlphaOfLength(5); + String id = String.valueOf(randomIntBetween(1,100)); + long version = randomNonNegativeLong(); + long tookInMillis = randomNonNegativeLong(); + boolean found = randomBoolean(); + List tvList = null; + if (found == true){ + boolean hasFieldStatistics = randomBoolean(); + boolean hasTermStatistics = randomBoolean(); + boolean hasScores = randomBoolean(); + boolean hasOffsets = randomBoolean(); + boolean hasPositions = randomBoolean(); + boolean hasPayloads = randomBoolean(); + int fieldsCount = randomIntBetween(1, 3); + tvList = new ArrayList<>(fieldsCount); + for (int i = 0; i < fieldsCount; i++) { + tvList.add(randomTermVector(hasFieldStatistics, hasTermStatistics, hasScores, hasOffsets, hasPositions, hasPayloads)); + } + } + TermVectorsResponse tvresponse = new TermVectorsResponse(index, type, id, version, found, tookInMillis, tvList); + return tvresponse; + } + + private TermVectorsResponse.TermVector randomTermVector(boolean hasFieldStatistics, boolean hasTermStatistics, boolean hasScores, + boolean hasOffsets, boolean hasPositions, boolean hasPayloads) { + TermVectorsResponse.TermVector.FieldStatistics fs = null; + if (hasFieldStatistics) { + long sumDocFreq = randomNonNegativeLong(); + int docCount = randomInt(1000); + long sumTotalTermFreq = randomNonNegativeLong(); + fs = new TermVectorsResponse.TermVector.FieldStatistics(sumDocFreq, docCount, sumTotalTermFreq); + } + + int termsCount = randomIntBetween(1, 5); + List terms = new ArrayList<>(termsCount); + for (int i = 0; i < termsCount; i++) { + terms.add(randomTerm(hasTermStatistics, hasScores, hasOffsets, hasPositions, hasPayloads)); + } + + TermVectorsResponse.TermVector tv = new TermVectorsResponse.TermVector("field" + randomAlphaOfLength(2), fs, terms); + return tv; + } + + private TermVectorsResponse.TermVector.Term randomTerm(boolean hasTermStatistics, boolean hasScores, + boolean hasOffsets, boolean hasPositions, boolean hasPayloads) { + + String termTxt = "term" + randomAlphaOfLength(2); + int termFreq = randomInt(10000); + Integer docFreq = null; + Long totalTermFreq = null; + Float score = null; + List tokens = null; + if (hasTermStatistics) { + docFreq = randomInt(1000); + totalTermFreq = randomNonNegativeLong(); + } + if (hasScores) score = randomFloat(); + if (hasOffsets || hasPositions || hasPayloads ){ + int tokensCount = randomIntBetween(1, 5); + tokens = new ArrayList<>(tokensCount); + for (int i = 0; i < tokensCount; i++) { + Integer startOffset = null; + Integer endOffset = null; + Integer position = null; + String payload = null; + if (hasOffsets) { + startOffset = randomInt(1000); + endOffset = randomInt(2000); + } + if (hasPositions) position = randomInt(100); + if (hasPayloads) payload = "payload" + randomAlphaOfLength(2); + TermVectorsResponse.TermVector.Token token = + new TermVectorsResponse.TermVector.Token(startOffset, endOffset, position, payload); + tokens.add(token); + } + } + TermVectorsResponse.TermVector.Term term = + new TermVectorsResponse.TermVector.Term(termTxt, termFreq, docFreq, totalTermFreq, score, tokens); + return term; + } + +} diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/documentation/CRUDDocumentationIT.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/documentation/CRUDDocumentationIT.java index 4e3f778cd151b..f80b532f5a4b9 100644 --- a/client/rest-high-level/src/test/java/org/elasticsearch/client/documentation/CRUDDocumentationIT.java +++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/documentation/CRUDDocumentationIT.java @@ -25,6 +25,8 @@ import org.elasticsearch.action.DocWriteResponse; import org.elasticsearch.action.LatchedActionListener; import org.elasticsearch.action.admin.cluster.node.tasks.list.ListTasksResponse; +import org.elasticsearch.action.admin.indices.create.CreateIndexRequest; +import org.elasticsearch.action.admin.indices.create.CreateIndexResponse; import org.elasticsearch.action.bulk.BackoffPolicy; import org.elasticsearch.action.bulk.BulkItemResponse; import org.elasticsearch.action.bulk.BulkProcessor; @@ -52,6 +54,8 @@ import org.elasticsearch.client.Response; import org.elasticsearch.client.RestHighLevelClient; import org.elasticsearch.client.RethrottleRequest; +import org.elasticsearch.client.core.TermVectorsRequest; +import org.elasticsearch.client.core.TermVectorsResponse; import org.elasticsearch.common.Strings; import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.common.settings.Settings; @@ -1503,6 +1507,125 @@ public void afterBulk(long executionId, BulkRequest request, Throwable failure) } } + // Not entirely sure if _termvectors belongs to CRUD, and in the absence of a better place, will have it here + public void testTermVectors() throws Exception { + RestHighLevelClient client = highLevelClient(); + CreateIndexRequest authorsRequest = new CreateIndexRequest("authors").mapping("doc", "user", "type=keyword"); + CreateIndexResponse authorsResponse = client.indices().create(authorsRequest, RequestOptions.DEFAULT); + assertTrue(authorsResponse.isAcknowledged()); + client.index(new IndexRequest("index", "doc", "1").source("user", "kimchy"), RequestOptions.DEFAULT); + Response refreshResponse = client().performRequest(new Request("POST", "/authors/_refresh")); + assertEquals(200, refreshResponse.getStatusLine().getStatusCode()); + + { + // tag::term-vectors-request + TermVectorsRequest request = new TermVectorsRequest("authors", "doc", "1"); + request.setFields("user"); + // end::term-vectors-request + } + + { + // tag::term-vectors-request-artificial + TermVectorsRequest request = new TermVectorsRequest("authors", "doc"); + XContentBuilder docBuilder = XContentFactory.jsonBuilder(); + docBuilder.startObject().field("user", "guest-user").endObject(); + request.setDoc(docBuilder); // <1> + // end::term-vectors-request-artificial + + // tag::term-vectors-request-optional-arguments + request.setFieldStatistics(false); // <1> + request.setTermStatistics(true); // <2> + request.setPositions(false); // <3> + request.setOffsets(false); // <4> + request.setPayloads(false); // <5> + + Map filterSettings = new HashMap<>(); + filterSettings.put("max_num_terms", 3); + filterSettings.put("min_term_freq", 1); + filterSettings.put("max_term_freq", 10); + filterSettings.put("min_doc_freq", 1); + filterSettings.put("max_doc_freq", 100); + filterSettings.put("min_word_length", 1); + filterSettings.put("max_word_length", 10); + + request.setFilterSettings(filterSettings); // <6> + + Map perFieldAnalyzer = new HashMap<>(); + perFieldAnalyzer.put("user", "keyword"); + request.setPerFieldAnalyzer(perFieldAnalyzer); // <7> + + request.setRealtime(false); // <8> + request.setRouting("routing"); // <9> + // end::term-vectors-request-optional-arguments + } + + TermVectorsRequest request = new TermVectorsRequest("authors", "doc", "1"); + request.setFields("user"); + + // tag::term-vectors-execute + TermVectorsResponse response = client.termvectors(request, RequestOptions.DEFAULT); + // end:::term-vectors-execute + + + // tag::term-vectors-response + String index = response.getIndex(); // <1> + String type = response.getType(); // <2> + String id = response.getId(); // <3> + boolean found = response.getFound(); // <4> + // end:::term-vectors-response + + // tag::term-vectors-term-vectors + if (response.getTermVectorsList() != null) { + List tvList = response.getTermVectorsList(); // <1> + for (TermVectorsResponse.TermVector tv : tvList) { + String fieldname = tv.getFieldName(); // <2> + int docCount = tv.getFieldStatistics().getDocCount(); // <3> + long sumTotalTermFreq = tv.getFieldStatistics().getSumTotalTermFreq(); // <4> + long sumDocFreq = tv.getFieldStatistics().getSumDocFreq(); // <5> + if (tv.getTerms() != null) { + List terms = tv.getTerms(); // <6> + for (TermVectorsResponse.TermVector.Term term : terms) { + String termStr = term.getTerm(); // <7> + int termFreq = term.getTermFreq(); // <8> + int docFreq = term.getDocFreq(); // <9> + long totalTermFreq = term.getTotalTermFreq(); // <10> + float score = term.getScore(); // <11> + if (term.getTokens() != null) { + List tokens = term.getTokens(); // <12> + for (TermVectorsResponse.TermVector.Token token : tokens) { + int position = token.getPosition(); // <13> + int startOffset = token.getStartOffset(); // <14> + int endOffset = token.getEndOffset(); // <15> + String payload = token.getPayload(); // <16> + } + } + } + } + } + } + // end:::term-vectors-term-vectors + + // tag::term-vectors-execute-listener + ActionListener listener = new ActionListener() { + @Override + public void onResponse(TermVectorsResponse termVectorsResponse) { + // <1> + } + @Override + public void onFailure(Exception e) { + // <2> + } + }; + // end::term-vectors-execute-listener + CountDownLatch latch = new CountDownLatch(1); + listener = new LatchedActionListener<>(listener, latch); + // tag::term-vectors-execute-async + client.termvectorsAsync(request, RequestOptions.DEFAULT, listener); // <1> + // end::term-vectors-execute-async + assertTrue(latch.await(30L, TimeUnit.SECONDS)); + + } + @SuppressWarnings("unused") public void testMultiGet() throws Exception { RestHighLevelClient client = highLevelClient(); diff --git a/docs/java-rest/high-level/document/term-vectors.asciidoc b/docs/java-rest/high-level/document/term-vectors.asciidoc new file mode 100644 index 0000000000000..ec24a0ecef413 --- /dev/null +++ b/docs/java-rest/high-level/document/term-vectors.asciidoc @@ -0,0 +1,134 @@ +-- +:api: term-vectors +:request: TermVectorsRequest +:response: TermVectorsResponse +-- + +[id="{upid}-{api}"] +=== Term Vectors API + +Term Vectors API returns information and statistics on terms in the fields +of a particular document. The document could be stored in the index or +artificially provided by the user. + + +[id="{upid}-{api}-request"] +==== Term Vectors Request + +A +{request}+ expects an `index`, a `type` and an `id` to specify +a certain document, and fields for which the information is retrieved. + +["source","java",subs="attributes,callouts,macros"] +-------------------------------------------------- +include-tagged::{doc-tests-file}[{api}-request] +-------------------------------------------------- + +Term vectors can also be generated for artificial documents, that is for +documents not present in the index: + +["source","java",subs="attributes,callouts,macros"] +-------------------------------------------------- +include-tagged::{doc-tests-file}[{api}-request-artificial] +-------------------------------------------------- +<1> An artificial document is provided as an `XContentBuilder` object, +the Elasticsearch built-in helper to generate JSON content. + +===== Optional arguments + +["source","java",subs="attributes,callouts,macros"] +-------------------------------------------------- +include-tagged::{doc-tests-file}[{api}-request-optional-arguments] +-------------------------------------------------- +<1> Set `fieldStatistics` to `false` (default is `true`) to omit document count, +sum of document frequencies, sum of total term frequencies. +<2> Set `termStatistics` to `true` (default is `false`) to display +total term frequency and document frequency. +<3> Set `positions` to `false` (default is `true`) to omit the output of +positions. +<4> Set `offsets` to `false` (default is `true`) to omit the output of +offsets. +<5> Set `payloads` to `false` (default is `true`) to omit the output of +payloads. +<6> Set `filterSettings` to filter the terms that can be returned based +on their tf-idf scores. +<7> Set `perFieldAnalyzer` to specify a different analyzer than +the one that the field has. +<8> Set `realtime` to `false` (default is `true`) to retrieve term vectors +near realtime. +<9> Set a routing parameter + + +include::../execution.asciidoc[] + + +[id="{upid}-{api}-response"] +==== TermVectorsResponse + +The `TermVectorsResponse` contains the following information: + +["source","java",subs="attributes,callouts,macros"] +-------------------------------------------------- +include-tagged::{doc-tests-file}[{api}-response] +-------------------------------------------------- +<1> The index name of the document. +<2> The type name of the document. +<3> The id of the document. +<4> Indicates whether or not the document found. + + +===== Inspecting Term Vectors +If `TermVectorsResponse` contains non-null list of term vectors, +more information about them can be obtained using following: + +["source","java",subs="attributes,callouts,macros"] +-------------------------------------------------- +include-tagged::{doc-tests-file}[{api}-term-vectors] +-------------------------------------------------- +<1> The list of `TermVector` for the document +<2> The name of the current field +<3> Fields statistics for the current field - document count +<4> Fields statistics for the current field - sum of total term frequencies +<5> Fields statistics for the current field - sum of document frequencies +<6> Terms for the current field +<7> The name of the term +<8> Term frequency of the term +<9> Document frequency of the term +<10> Total term frequency of the term +<11> Score of the term +<12> Tokens of the term +<13> Position of the token +<14> Start offset of the token +<15> End offset of the token +<16> Payload of the token + + +[id="{upid}-{api}-response"] +==== TermVectorsResponse + +The `TermVectorsResponse` contains the following information: + +["source","java",subs="attributes,callouts,macros"] +-------------------------------------------------- +include-tagged::{doc-tests-file}[{api}-response] +-------------------------------------------------- +<1> The index name of the document. +<2> The type name of the document. +<3> The id of the document. +<4> Indicates whether or not the document found. +<5> Indicates whether or not there are term vectors for this document. +<6> The list of `TermVector` for the document +<7> The name of the current field +<8> Fields statistics for the current field - document count +<9> Fields statistics for the current field - sum of total term frequencies +<10> Fields statistics for the current field - sum of document frequencies +<11> Terms for the current field +<12> The name of the term +<13> Term frequency of the term +<14> Document frequency of the term +<15> Total term frequency of the term +<16> Score of the term +<17> Tokens of the term +<18> Position of the token +<19> Start offset of the token +<20> End offset of the token +<21> Payload of the token \ No newline at end of file diff --git a/docs/java-rest/high-level/supported-apis.asciidoc b/docs/java-rest/high-level/supported-apis.asciidoc index 6cde79a22e5c1..5801d79a5b771 100644 --- a/docs/java-rest/high-level/supported-apis.asciidoc +++ b/docs/java-rest/high-level/supported-apis.asciidoc @@ -14,6 +14,7 @@ Single document APIs:: * <<{upid}-exists>> * <<{upid}-delete>> * <<{upid}-update>> +* <<{upid}-term-vectors>> [[multi-doc]] Multi-document APIs:: @@ -29,6 +30,7 @@ include::document/get.asciidoc[] include::document/exists.asciidoc[] include::document/delete.asciidoc[] include::document/update.asciidoc[] +include::document/term-vectors.asciidoc[] include::document/bulk.asciidoc[] include::document/multi-get.asciidoc[] include::document/reindex.asciidoc[] @@ -372,4 +374,4 @@ don't leak into the rest of the documentation. :response!: :doc-tests-file!: :upid!: --- +-- \ No newline at end of file diff --git a/server/src/main/java/org/elasticsearch/index/reindex/DeleteByQueryRequest.java b/server/src/main/java/org/elasticsearch/index/reindex/DeleteByQueryRequest.java index 2713e5e2661da..18307e0a56812 100644 --- a/server/src/main/java/org/elasticsearch/index/reindex/DeleteByQueryRequest.java +++ b/server/src/main/java/org/elasticsearch/index/reindex/DeleteByQueryRequest.java @@ -51,7 +51,7 @@ * */ public class DeleteByQueryRequest extends AbstractBulkByScrollRequest - implements IndicesRequest.Replaceable, ToXContentObject { + implements IndicesRequest.Replaceable, ToXContentObject { public DeleteByQueryRequest() { this(new SearchRequest());