diff --git a/solr/api/src/java/org/apache/solr/client/api/endpoint/CollectionStatusApi.java b/solr/api/src/java/org/apache/solr/client/api/endpoint/CollectionStatusApi.java index 5929820490a..5db36b2e1d3 100644 --- a/solr/api/src/java/org/apache/solr/client/api/endpoint/CollectionStatusApi.java +++ b/solr/api/src/java/org/apache/solr/client/api/endpoint/CollectionStatusApi.java @@ -34,8 +34,7 @@ public interface CollectionStatusApi { // TODO Query parameters currently match those offered by the v1 - // /admin/collections?action=COLSTATUS. Should param names be updated/clarified? Are all params - // still relevant? ('segments', 'sizeInfo', and 'fieldInfo' seem to do very little) + // /admin/collections?action=COLSTATUS. Should param names be updated/clarified? @GET @Operation( summary = "Fetches metadata about the specified collection", @@ -46,14 +45,22 @@ CollectionStatusResponse getCollectionStatus( String collectionName, @Parameter( description = - "Boolean flag to include metadata (e.g. index an data directories, IndexWriter configuration, etc.) about the leader cores for each shard") + "Boolean flag to include metadata (e.g. index an data directories, IndexWriter configuration, etc.) about each shard leader's core") @QueryParam("coreInfo") Boolean coreInfo, - @QueryParam("segments") Boolean segments, - @QueryParam("fieldInfo") Boolean fieldInfo, @Parameter( description = - "Boolean flag to include simple estimates of the disk size taken up by each field (e.g. \"id\", \"_version_\") and by each index data structure (e.g. 'storedFields', 'docValues_numeric')") + "Boolean flag to include metadata and statistics about the segments used by each shard leader. Implicitly set to true by 'fieldInfo' and 'sizeInfo'") + @QueryParam("segments") + Boolean segments, + @Parameter( + description = + "Boolean flag to include statistics about the indexed fields present on each shard leader. Implicitly sets the 'segments' flag to 'true'") + @QueryParam("fieldInfo") + Boolean fieldInfo, + @Parameter( + description = + "Boolean flag to include simple estimates of the disk size taken up by each field (e.g. \"id\", \"_version_\") and by each index data structure (e.g. 'storedFields', 'docValues_numeric').") @QueryParam("rawSize") Boolean rawSize, @Parameter( @@ -71,6 +78,10 @@ CollectionStatusResponse getCollectionStatus( "Percentage (between 0 and 100) of data to read when estimating index size and statistics. Defaults to 5.0 (i.e. 5%).") @QueryParam("rawSizeSamplingPercent") Float rawSizeSamplingPercent, - @QueryParam("sizeInfo") Boolean sizeInfo) + @Parameter( + description = + "Boolean flag to include information about the largest index files for each Lucene segment. Implicitly sets the 'segment' flag to 'true'") + @QueryParam("sizeInfo") + Boolean sizeInfo) throws Exception; } diff --git a/solr/api/src/java/org/apache/solr/client/api/model/CollectionStatusResponse.java b/solr/api/src/java/org/apache/solr/client/api/model/CollectionStatusResponse.java index 6be2c8cbe21..fafea4e75be 100644 --- a/solr/api/src/java/org/apache/solr/client/api/model/CollectionStatusResponse.java +++ b/solr/api/src/java/org/apache/solr/client/api/model/CollectionStatusResponse.java @@ -16,7 +16,10 @@ */ package org.apache.solr.client.api.model; +import com.fasterxml.jackson.annotation.JsonAnyGetter; +import com.fasterxml.jackson.annotation.JsonAnySetter; import com.fasterxml.jackson.annotation.JsonProperty; +import java.util.HashMap; import java.util.List; import java.util.Map; @@ -41,6 +44,18 @@ public static class CollectionMetadata { @JsonProperty public Integer tlogReplicas; @JsonProperty public Map router; @JsonProperty public Integer replicationFactor; + + private Map unknownFields = new HashMap<>(); + + @JsonAnyGetter + public Map unknownProperties() { + return unknownFields; + } + + @JsonAnySetter + public void setUnknownProperty(String field, Object value) { + unknownFields.put(field, value); + } } // Always present in response @@ -51,7 +66,7 @@ public static class ShardMetadata { @JsonProperty public LeaderSummary leader; } - // ALways present in response + // Always present in response public static class ReplicaSummary { @JsonProperty public Integer total; @JsonProperty public Integer active; @@ -84,19 +99,79 @@ public static class LeaderSummary { @JsonProperty public SegmentInfo segInfos; } - // Present with coreInfo=true || sizeInfo=true unless otherwise specified + // Present with segments=true || coreInfo=true || sizeInfo=true || fieldInfo=true unless otherwise + // specified public static class SegmentInfo { - // Present with coreInfo=true || sizeInfo=true unless otherwise specified @JsonProperty public SegmentSummary info; + // Present with segments=true || sizeInfo=true || fieldInfo=true + @JsonProperty public Map segments; + // Present with rawSize=true @JsonProperty public RawSize rawSize; - // Present with fieldInfo=true....this seems pretty useless in isolation? Is it maybe a bad - // param name? + // Present only with fieldInfo=true @JsonProperty public List fieldInfoLegend; } + // Present with segment=true || sizeInfo=true + public static class SingleSegmentData { + @JsonProperty public String name; + @JsonProperty public Integer delCount; + @JsonProperty public Integer softDelCount; + @JsonProperty public Boolean hasFieldUpdates; + @JsonProperty public Long sizeInBytes; + @JsonProperty public Integer size; + // A date string of the form "2024-12-17T17:35:18.275Z" + @JsonProperty public String age; + @JsonProperty public String source; + @JsonProperty public String version; + @JsonProperty public Integer createdVersionMajor; + @JsonProperty public String minVersion; + @JsonProperty public SegmentDiagnosticInfo diagnostics; + @JsonProperty public Map attributes; + + // Present only when fieldInfo=true + @JsonProperty public Map fields; + + // Present only when sizeInfo=true + @JsonProperty("largestFiles") + public Map largestFilesByName; + } + + public static class SegmentSingleFieldInfo { + @JsonProperty public String flags; + @JsonProperty public Integer docCount; + @JsonProperty public Integer termCount; + @JsonProperty public Integer sumDocFreq; + @JsonProperty public Integer sumTotalTermFreq; + @JsonProperty public String schemaType; + @JsonProperty public Map nonCompliant; + } + + // Present with segments=true + public static class SegmentDiagnosticInfo { + @JsonProperty("os.version") + public String osVersion; + + @JsonProperty("lucene.version") + public String luceneVersion; + + @JsonProperty public String source; + @JsonProperty public Long timestamp; + + @JsonProperty("java.runtime.version") + public String javaRuntimeVersion; + + @JsonProperty public String os; + + @JsonProperty("java.vendor") + public String javaVendor; + + @JsonProperty("os.arch") + public String osArchitecture; + } + // Present with rawSize=true unless otherwise specified public static class RawSize { @JsonProperty public Map fieldsBySize; @@ -146,7 +221,7 @@ public static class IndexWriterConfigSummary { @JsonProperty public String similarity; @JsonProperty public String mergeScheduler; @JsonProperty public String codec; - @JsonProperty public String InfoStream; + @JsonProperty public String infoStream; @JsonProperty public String mergePolicy; @JsonProperty public Boolean readerPooling; @JsonProperty public Integer perThreadHardLimitMB; diff --git a/solr/core/src/test/org/apache/solr/cloud/CollectionsAPISolrJTest.java b/solr/core/src/test/org/apache/solr/cloud/CollectionsAPISolrJTest.java index 8f4a91a76b1..21d82719e38 100644 --- a/solr/core/src/test/org/apache/solr/cloud/CollectionsAPISolrJTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/CollectionsAPISolrJTest.java @@ -21,6 +21,9 @@ import static org.apache.solr.common.cloud.ZkStateReader.NUM_SHARDS_PROP; import static org.apache.solr.common.params.CollectionAdminParams.COLLECTION; import static org.apache.solr.common.params.CollectionAdminParams.DEFAULTS; +import static org.hamcrest.Matchers.emptyString; +import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.not; import java.io.IOException; import java.lang.invoke.MethodHandles; @@ -39,6 +42,7 @@ import java.util.concurrent.atomic.AtomicReference; import java.util.stream.Collectors; import org.apache.lucene.tests.util.TestUtil; +import org.apache.lucene.util.Version; import org.apache.solr.client.solrj.SolrClient; import org.apache.solr.client.solrj.SolrRequest; import org.apache.solr.client.solrj.SolrServerException; @@ -570,14 +574,7 @@ private void checkCollectionProperty(String collection, String propertyName, Str fail("Timed out waiting for cluster property value"); } - @Test - public void testColStatus() throws Exception { - String collectionName = getSaferTestName(); - CollectionAdminRequest.createCollection(collectionName, "conf2", 2, 2) - .process(cluster.getSolrClient()); - - cluster.waitForActiveCollection(collectionName, 2, 4); - + private void indexSomeDocs(String collectionName) throws SolrServerException, IOException { SolrClient client = cluster.getSolrClient(); byte[] binData = collectionName.getBytes(StandardCharsets.UTF_8); // index some docs @@ -603,13 +600,113 @@ public void testColStatus() throws Exception { client.add(collectionName, doc); } client.commit(collectionName); + } + + @Test + @SuppressWarnings("unchecked") + public void testColStatus() throws Exception { + String collectionName = getSaferTestName(); + CollectionAdminRequest.createCollection(collectionName, "conf2", 2, 2) + .process(cluster.getSolrClient()); + cluster.waitForActiveCollection(collectionName, 2, 4); + indexSomeDocs(collectionName); + + // Returns basic info if no additional flags are set CollectionAdminRequest.ColStatus req = CollectionAdminRequest.collectionStatus(collectionName); + CollectionAdminResponse rsp = req.process(cluster.getSolrClient()); + assertEquals(0, rsp.getStatus()); + assertNotNull(rsp.getResponse().get(collectionName)); + assertNotNull(rsp.getResponse().findRecursive(collectionName, "properties")); + final var collPropMap = + (Map) rsp.getResponse().findRecursive(collectionName, "properties"); + assertEquals("conf2", collPropMap.get("configName")); + assertEquals(2L, collPropMap.get("nrtReplicas")); + assertEquals("0", collPropMap.get("tlogReplicas")); + assertEquals("0", collPropMap.get("pullReplicas")); + assertEquals( + 2, ((NamedList) rsp.getResponse().findRecursive(collectionName, "shards")).size()); + assertNotNull(rsp.getResponse().findRecursive(collectionName, "shards", "shard1", "leader")); + // Ensure more advanced info is not returned + assertNull( + rsp.getResponse().findRecursive(collectionName, "shards", "shard1", "leader", "segInfos")); + + // Returns segment metadata iff requested + req = CollectionAdminRequest.collectionStatus(collectionName); + req.setWithSegments(true); + rsp = req.process(cluster.getSolrClient()); + assertEquals(0, rsp.getStatus()); + assertNotNull(rsp.getResponse().get(collectionName)); + assertNotNull( + rsp.getResponse() + .findRecursive( + collectionName, "shards", "shard1", "leader", "segInfos", "segments", "_0")); + // Ensure field, size, etc. information isn't returned if only segment data was requested + assertNull( + rsp.getResponse() + .findRecursive( + collectionName, + "shards", + "shard1", + "leader", + "segInfos", + "segments", + "_0", + "fields")); + assertNull( + rsp.getResponse() + .findRecursive( + collectionName, + "shards", + "shard1", + "leader", + "segInfos", + "segments", + "_0", + "largestFiles")); + + // Returns segment metadata and file-size info iff requested + // (Note that 'sizeInfo=true' should implicitly enable segments=true) + req = CollectionAdminRequest.collectionStatus(collectionName); + req.setWithSizeInfo(true); + rsp = req.process(cluster.getSolrClient()); + assertEquals(0, rsp.getStatus()); + assertNotNull(rsp.getResponse().get(collectionName)); + assertNotNull( + rsp.getResponse() + .findRecursive( + collectionName, "shards", "shard1", "leader", "segInfos", "segments", "_0")); + assertNotNull( + rsp.getResponse() + .findRecursive( + collectionName, + "shards", + "shard1", + "leader", + "segInfos", + "segments", + "_0", + "largestFiles")); + // Ensure field, etc. information isn't returned if only segment+size data was requested + assertNull( + rsp.getResponse() + .findRecursive( + collectionName, + "shards", + "shard1", + "leader", + "segInfos", + "segments", + "_0", + "fields")); + + // Set all flags and ensure everything is returned as expected + req = CollectionAdminRequest.collectionStatus(collectionName); + req.setWithSegments(true); req.setWithFieldInfo(true); req.setWithCoreInfo(true); - req.setWithSegments(true); req.setWithSizeInfo(true); - CollectionAdminResponse rsp = req.process(cluster.getSolrClient()); + rsp = req.process(cluster.getSolrClient()); assertEquals(0, rsp.getStatus()); @SuppressWarnings({"unchecked"}) List nonCompliant = @@ -625,6 +722,7 @@ public void testColStatus() throws Exception { assertNotNull(Utils.toJSONString(rsp), segInfos.get("fieldInfoLegend")); assertNotNull( Utils.toJSONString(rsp), segInfos.findRecursive("segments", "_0", "fields", "id", "flags")); + // test for replicas not active - SOLR-13882 DocCollection coll = cluster.getSolrClient().getClusterState().getCollection(collectionName); Replica firstReplica = coll.getSlice("shard1").getReplicas().iterator().next(); @@ -709,6 +807,7 @@ public void testV2BasicCollectionStatus() throws Exception { CollectionAdminRequest.createCollection(simpleCollName, "conf2", 2, 1, 1, 1) .process(cluster.getSolrClient()); cluster.waitForActiveCollection(simpleCollName, 2, 6); + indexSomeDocs(simpleCollName); final var simpleResponse = new CollectionsApi.GetCollectionStatus(simpleCollName) @@ -722,6 +821,42 @@ public void testV2BasicCollectionStatus() throws Exception { assertEquals(Integer.valueOf(1), simpleResponse.properties.replicationFactor); assertEquals(Integer.valueOf(1), simpleResponse.properties.pullReplicas); assertEquals(Integer.valueOf(1), simpleResponse.properties.tlogReplicas); + assertNotNull(simpleResponse.shards.get("shard1").leader); + assertNull(simpleResponse.shards.get("shard1").leader.segInfos); + + // Ensure segment data present when request sets 'segments=true' flag + final var segmentDataRequest = new CollectionsApi.GetCollectionStatus(simpleCollName); + segmentDataRequest.setSegments(true); + final var segmentDataResponse = segmentDataRequest.process(cluster.getSolrClient()).getParsed(); + var segmentData = segmentDataResponse.shards.get("shard1").leader.segInfos; + assertNotNull(segmentData); + assertTrue(segmentData.info.numSegments > 0); // Expect at least one segment + assertEquals(segmentData.info.numSegments.intValue(), segmentData.segments.size()); + assertEquals(Version.LATEST.toString(), segmentData.info.commitLuceneVersion); + // Ensure field, size, etc. data not provided + assertNull(segmentData.segments.get("_0").fields); + assertNull(segmentData.segments.get("_0").largestFilesByName); + + // Ensure file-size data present when request sets sizeInfo flag + final var segmentFileSizeRequest = new CollectionsApi.GetCollectionStatus(simpleCollName); + segmentFileSizeRequest.setSizeInfo(true); + final var segmentFileSizeResponse = + segmentFileSizeRequest.process(cluster.getSolrClient()).getParsed(); + segmentData = segmentFileSizeResponse.shards.get("shard1").leader.segInfos; + assertNotNull(segmentData); + final var largeFileList = segmentData.segments.get("_0").largestFilesByName; + assertNotNull(largeFileList); + // Hard to assert what the largest index files should be, but: + // - there should be at least 1 entry and... + // - all keys/values should be non-empty + assertTrue(largeFileList.size() > 0); + largeFileList.forEach( + (fileName, size) -> { + assertThat(fileName, is(not(emptyString()))); + assertThat(size, is(not(emptyString()))); + }); + // Ensure field, etc. data not provided + assertNull(segmentData.segments.get("_0").fields); } private static final int NUM_DOCS = 10;