From 11c539d7754baa5ba2bc93e73b5b4797b2b07078 Mon Sep 17 00:00:00 2001 From: Josue Valenzuela Date: Thu, 1 Aug 2024 19:22:37 -0600 Subject: [PATCH] Add similar documents query to index #755 --- .code-samples.meilisearch.yaml | 5 ++ src/main/java/com/meilisearch/sdk/Index.java | 31 +++++------ .../sdk/SimilarDocumentRequest.java | 52 ++++++++++++++++++ .../com/meilisearch/sdk/model/Embedders.java | 18 +++++++ .../com/meilisearch/sdk/model/Settings.java | 1 + .../sdk/model/SimilarDocumentsResults.java | 25 +++++++++ .../meilisearch/integration/SearchTest.java | 53 ++++++++++++++----- .../integration/classes/AbstractIT.java | 2 + .../java/com/meilisearch/sdk/utils/Movie.java | 1 + src/test/resources/vector_movies.json | 32 +++++++++++ 10 files changed, 193 insertions(+), 27 deletions(-) create mode 100644 src/main/java/com/meilisearch/sdk/SimilarDocumentRequest.java create mode 100644 src/main/java/com/meilisearch/sdk/model/Embedders.java create mode 100644 src/main/java/com/meilisearch/sdk/model/SimilarDocumentsResults.java create mode 100644 src/test/resources/vector_movies.json diff --git a/.code-samples.meilisearch.yaml b/.code-samples.meilisearch.yaml index d0fad51a..63fae981 100644 --- a/.code-samples.meilisearch.yaml +++ b/.code-samples.meilisearch.yaml @@ -822,3 +822,8 @@ multi_search_1: |- multiIndexSearch.addQuery(new IndexSearchRequest("movie_ratings").setQuery("us")); client.multiSearch(multiSearchRequest); +get_similar_post_1: + SimilarDocumentRequest query = new SimilarDocumentRequest() + .setId("143") + .setEmbedder("manual"); + client.index("movies").searchSimilarDocuments(query) diff --git a/src/main/java/com/meilisearch/sdk/Index.java b/src/main/java/com/meilisearch/sdk/Index.java index 3d2668f6..91c0439f 100644 --- a/src/main/java/com/meilisearch/sdk/Index.java +++ b/src/main/java/com/meilisearch/sdk/Index.java @@ -1,21 +1,9 @@ package com.meilisearch.sdk; import com.meilisearch.sdk.exceptions.MeilisearchException; -import com.meilisearch.sdk.model.DocumentQuery; -import com.meilisearch.sdk.model.DocumentsQuery; -import com.meilisearch.sdk.model.FacetSearchable; -import com.meilisearch.sdk.model.Faceting; -import com.meilisearch.sdk.model.IndexStats; -import com.meilisearch.sdk.model.Pagination; -import com.meilisearch.sdk.model.Results; -import com.meilisearch.sdk.model.SearchResult; -import com.meilisearch.sdk.model.Searchable; -import com.meilisearch.sdk.model.Settings; -import com.meilisearch.sdk.model.Task; -import com.meilisearch.sdk.model.TaskInfo; -import com.meilisearch.sdk.model.TasksQuery; -import com.meilisearch.sdk.model.TasksResults; -import com.meilisearch.sdk.model.TypoTolerance; +import com.meilisearch.sdk.http.URLBuilder; +import com.meilisearch.sdk.model.*; + import java.io.Serializable; import java.util.ArrayList; import java.util.List; @@ -1214,4 +1202,17 @@ public TaskInfo updateSearchCutoffMsSettings(Integer milliseconds) throws Meilis public TaskInfo resetSearchCutoffMsSettings() throws MeilisearchException { return this.settingsHandler.resetSearchCutoffMsSettings(this.uid); } + + public SimilarDocumentsResults searchSimilarDocuments(SimilarDocumentRequest query) + throws MeilisearchException { + return this.config.httpClient.post( + new URLBuilder("/indexes") + .addSubroute(this.uid) + .addSubroute("/similar") + .getURL(), + query, + SimilarDocumentsResults.class + ); + } + } diff --git a/src/main/java/com/meilisearch/sdk/SimilarDocumentRequest.java b/src/main/java/com/meilisearch/sdk/SimilarDocumentRequest.java new file mode 100644 index 00000000..52f086e6 --- /dev/null +++ b/src/main/java/com/meilisearch/sdk/SimilarDocumentRequest.java @@ -0,0 +1,52 @@ +package com.meilisearch.sdk; + +import lombok.*; +import lombok.experimental.Accessors; +import org.json.JSONObject; + +@Builder +@AllArgsConstructor(access = AccessLevel.PACKAGE) +@Getter +@Setter +@Accessors(chain = true) +public class SimilarDocumentRequest { + private String id; + private String embedder; + private String[] attributesToRetrieve; + private Integer offset; + private Integer limit; + private String filter; + private Boolean showRankingScore; + private Boolean showRankingScoreDetails; + private Double rankingScoreThreshold; + private Boolean retrieveVectors; + + + /** + * Constructor for SimilarDocumentsRequest for building search request for similar documents + * with the default values: + * id null, embedder "default", attributesToRetrieve ["*"], offset 0, limit 20, + * filter null, showRankingScore false, showRankingScoreDetails false, rankingScoreThreshold null, + * retrieveVectors false + * + */ + public SimilarDocumentRequest() {} + + @Override + public String toString() { + JSONObject jsonObject = + new JSONObject() + .put("id", this.id) + .put("embedder", this.embedder) + .put("attributesToRetrieve", this.attributesToRetrieve) + .put("offset", this.offset) + .put("limit", this.limit) + .put("filter", this.filter) + .put("showRankingScore", this.showRankingScore) + .put("showRankingScoreDetails", this.showRankingScoreDetails) + .put("rankingScoreThreshold", this.rankingScoreThreshold) + .put("retrieveVectors", this.retrieveVectors); + + return jsonObject.toString(); + } +} diff --git a/src/main/java/com/meilisearch/sdk/model/Embedders.java b/src/main/java/com/meilisearch/sdk/model/Embedders.java new file mode 100644 index 00000000..76a16cd1 --- /dev/null +++ b/src/main/java/com/meilisearch/sdk/model/Embedders.java @@ -0,0 +1,18 @@ +package com.meilisearch.sdk.model; + +import lombok.*; +import lombok.experimental.Accessors; + +@Builder +@AllArgsConstructor(access = AccessLevel.PACKAGE) +@Getter +@Setter +@Accessors(chain = true) +public class Embedders { + protected String source; + protected String model; + protected String documentTemplate; + protected Integer dimensions; + + public Embedders() {} +} diff --git a/src/main/java/com/meilisearch/sdk/model/Settings.java b/src/main/java/com/meilisearch/sdk/model/Settings.java index 9f553ee5..c450185f 100644 --- a/src/main/java/com/meilisearch/sdk/model/Settings.java +++ b/src/main/java/com/meilisearch/sdk/model/Settings.java @@ -31,6 +31,7 @@ public class Settings { protected Integer searchCutoffMs; protected String[] separatorTokens; protected String[] nonSeparatorTokens; + protected HashMap embedders; public Settings() {} } diff --git a/src/main/java/com/meilisearch/sdk/model/SimilarDocumentsResults.java b/src/main/java/com/meilisearch/sdk/model/SimilarDocumentsResults.java new file mode 100644 index 00000000..b1bc9599 --- /dev/null +++ b/src/main/java/com/meilisearch/sdk/model/SimilarDocumentsResults.java @@ -0,0 +1,25 @@ +package com.meilisearch.sdk.model; + +import lombok.Getter; +import lombok.ToString; + +import java.util.ArrayList; +import java.util.HashMap; + +/** + * Meilisearch similar documents results data structure + * + * @see API + * specification + */ +@Getter +@ToString +public class SimilarDocumentsResults { + ArrayList> hits; + String id; + int processingTimeMs; + int offset; + int limit; + int estimatedTotalHits; +} + diff --git a/src/test/java/com/meilisearch/integration/SearchTest.java b/src/test/java/com/meilisearch/integration/SearchTest.java index 81f6b46f..a9526568 100644 --- a/src/test/java/com/meilisearch/integration/SearchTest.java +++ b/src/test/java/com/meilisearch/integration/SearchTest.java @@ -13,20 +13,12 @@ import com.meilisearch.integration.classes.AbstractIT; import com.meilisearch.integration.classes.TestData; -import com.meilisearch.sdk.Index; -import com.meilisearch.sdk.IndexSearchRequest; -import com.meilisearch.sdk.MultiSearchRequest; -import com.meilisearch.sdk.SearchRequest; +import com.meilisearch.sdk.*; import com.meilisearch.sdk.json.GsonJsonHandler; -import com.meilisearch.sdk.model.FacetRating; -import com.meilisearch.sdk.model.MatchingStrategy; -import com.meilisearch.sdk.model.MultiSearchResult; -import com.meilisearch.sdk.model.SearchResult; -import com.meilisearch.sdk.model.SearchResultPaginated; -import com.meilisearch.sdk.model.Searchable; -import com.meilisearch.sdk.model.Settings; -import com.meilisearch.sdk.model.TaskInfo; +import com.meilisearch.sdk.model.*; import com.meilisearch.sdk.utils.Movie; + +import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import org.junit.jupiter.api.AfterAll; @@ -843,4 +835,41 @@ public void testMultiSearchWithRankingScoreThreshold() throws Exception { assertThat(rankingScore, is(greaterThanOrEqualTo(0.98))); } } + + @Test + public void testSimilarDocuments() throws Exception { + String indexUid = "SimilarDocuments"; + Index index = client.index(indexUid); + HashMap embedders = new HashMap<>(); + + embedders.put( + "manual", + new Embedders() + .setSource("userProvided") + .setDimensions(3) + ); + + index.updateSettings( + new Settings() + .setEmbedders(embedders) + ); + + TestData testData = this.getTestData(VECTOR_MOVIES, Movie.class); + TaskInfo task = index.addDocuments(testData.getRaw()); + + index.waitForTask(task.getTaskUid()); + + SimilarDocumentsResults results = index.searchSimilarDocuments( + new SimilarDocumentRequest() + .setId("143") + .setEmbedder("manual") + ); + + ArrayList> hits = results.getHits(); + assertThat(hits.size(), is(4)); + assertThat(hits.get(0).get("title"), is("Escape Room")); + assertThat(hits.get(1).get("title"), is("Captain Marvel")); + assertThat(hits.get(2).get("title"), is("How to Train Your Dragon: The Hidden World")); + assertThat(hits.get(3).get("title"), is("Shazam!")); + } } diff --git a/src/test/java/com/meilisearch/integration/classes/AbstractIT.java b/src/test/java/com/meilisearch/integration/classes/AbstractIT.java index f23a0b31..a6a211e7 100644 --- a/src/test/java/com/meilisearch/integration/classes/AbstractIT.java +++ b/src/test/java/com/meilisearch/integration/classes/AbstractIT.java @@ -26,11 +26,13 @@ public abstract class AbstractIT { public static final String MOVIES_INDEX = "movies.json"; public static final String NESTED_MOVIES = "nested_movies.json"; + public static final String VECTOR_MOVIES = "vector_movies.json"; public AbstractIT() { try { loadResource(MOVIES_INDEX); loadResource(NESTED_MOVIES); + loadResource(VECTOR_MOVIES); } catch (IOException e) { e.printStackTrace(); } diff --git a/src/test/java/com/meilisearch/sdk/utils/Movie.java b/src/test/java/com/meilisearch/sdk/utils/Movie.java index 2a881d8f..d59926b2 100644 --- a/src/test/java/com/meilisearch/sdk/utils/Movie.java +++ b/src/test/java/com/meilisearch/sdk/utils/Movie.java @@ -15,6 +15,7 @@ public class Movie { private HashMap> _matchesPosition; private Double _rankingScore; private HashMap _rankingScoreDetails; + private HashMap _vectors; public class Match { public int start; diff --git a/src/test/resources/vector_movies.json b/src/test/resources/vector_movies.json new file mode 100644 index 00000000..344f04d7 --- /dev/null +++ b/src/test/resources/vector_movies.json @@ -0,0 +1,32 @@ +[ + { + "title": "Shazam!", + "release_year": 2019, + "id": "287947", + "_vectors": { "manual": [0.8, 0.4, -0.5]} + }, + { + "title": "Captain Marvel", + "release_year": 2019, + "id": "299537", + "_vectors": { "manual": [0.6, 0.8, -0.2] } + }, + { + "title": "Escape Room", + "release_year": 2019, + "id": "522681", + "_vectors": { "manual": [0.1, 0.6, 0.8] } + }, + { + "title": "How to Train Your Dragon: The Hidden World", + "release_year": 2019, + "id": "166428", + "_vectors": { "manual": [0.7, 0.7, -0.4] } + }, + { + "title": "All Quiet on the Western Front", + "release_year": 1930, + "id": "143", + "_vectors": { "manual": [-0.5, 0.3, 0.85] } + } +]