Skip to content

Commit

Permalink
Update CollectionStatusResponse, post COLSTATUS bug-fix
Browse files Browse the repository at this point in the history
  • Loading branch information
gerlowskija committed Dec 18, 2024
1 parent f7bc066 commit e06c21a
Show file tree
Hide file tree
Showing 3 changed files with 244 additions and 23 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,7 @@
public interface CollectionStatusApi {

// TODO Query parameters currently match those offered by the v1
// /admin/collections?action=COLSTATUS. Should param names be updated/clarified? Are all params
// still relevant? ('segments', 'sizeInfo', and 'fieldInfo' seem to do very little)
// /admin/collections?action=COLSTATUS. Should param names be updated/clarified?
@GET
@Operation(
summary = "Fetches metadata about the specified collection",
Expand All @@ -46,14 +45,22 @@ CollectionStatusResponse getCollectionStatus(
String collectionName,
@Parameter(
description =
"Boolean flag to include metadata (e.g. index an data directories, IndexWriter configuration, etc.) about the leader cores for each shard")
"Boolean flag to include metadata (e.g. index an data directories, IndexWriter configuration, etc.) about each shard leader's core")
@QueryParam("coreInfo")
Boolean coreInfo,
@QueryParam("segments") Boolean segments,
@QueryParam("fieldInfo") Boolean fieldInfo,
@Parameter(
description =
"Boolean flag to include simple estimates of the disk size taken up by each field (e.g. \"id\", \"_version_\") and by each index data structure (e.g. 'storedFields', 'docValues_numeric')")
"Boolean flag to include metadata and statistics about the segments used by each shard leader. Implicitly set to true by 'fieldInfo' and 'sizeInfo'")
@QueryParam("segments")
Boolean segments,
@Parameter(
description =
"Boolean flag to include statistics about the indexed fields present on each shard leader. Implicitly sets the 'segments' flag to 'true'")
@QueryParam("fieldInfo")
Boolean fieldInfo,
@Parameter(
description =
"Boolean flag to include simple estimates of the disk size taken up by each field (e.g. \"id\", \"_version_\") and by each index data structure (e.g. 'storedFields', 'docValues_numeric').")
@QueryParam("rawSize")
Boolean rawSize,
@Parameter(
Expand All @@ -71,6 +78,10 @@ CollectionStatusResponse getCollectionStatus(
"Percentage (between 0 and 100) of data to read when estimating index size and statistics. Defaults to 5.0 (i.e. 5%).")
@QueryParam("rawSizeSamplingPercent")
Float rawSizeSamplingPercent,
@QueryParam("sizeInfo") Boolean sizeInfo)
@Parameter(
description =
"Boolean flag to include information about the largest index files for each Lucene segment. Implicitly sets the 'segment' flag to 'true'")
@QueryParam("sizeInfo")
Boolean sizeInfo)
throws Exception;
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,10 @@
*/
package org.apache.solr.client.api.model;

import com.fasterxml.jackson.annotation.JsonAnyGetter;
import com.fasterxml.jackson.annotation.JsonAnySetter;
import com.fasterxml.jackson.annotation.JsonProperty;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

Expand All @@ -41,6 +44,18 @@ public static class CollectionMetadata {
@JsonProperty public Integer tlogReplicas;
@JsonProperty public Map<String, String> router;
@JsonProperty public Integer replicationFactor;

private Map<String, Object> unknownFields = new HashMap<>();

@JsonAnyGetter
public Map<String, Object> unknownProperties() {
return unknownFields;
}

@JsonAnySetter
public void setUnknownProperty(String field, Object value) {
unknownFields.put(field, value);
}
}

// Always present in response
Expand All @@ -51,7 +66,7 @@ public static class ShardMetadata {
@JsonProperty public LeaderSummary leader;
}

// ALways present in response
// Always present in response
public static class ReplicaSummary {
@JsonProperty public Integer total;
@JsonProperty public Integer active;
Expand Down Expand Up @@ -84,19 +99,79 @@ public static class LeaderSummary {
@JsonProperty public SegmentInfo segInfos;
}

// Present with coreInfo=true || sizeInfo=true unless otherwise specified
// Present with segments=true || coreInfo=true || sizeInfo=true || fieldInfo=true unless otherwise
// specified
public static class SegmentInfo {
// Present with coreInfo=true || sizeInfo=true unless otherwise specified
@JsonProperty public SegmentSummary info;

// Present with segments=true || sizeInfo=true || fieldInfo=true
@JsonProperty public Map<String, SingleSegmentData> segments;

// Present with rawSize=true
@JsonProperty public RawSize rawSize;

// Present with fieldInfo=true....this seems pretty useless in isolation? Is it maybe a bad
// param name?
// Present only with fieldInfo=true
@JsonProperty public List<String> fieldInfoLegend;
}

// Present with segment=true || sizeInfo=true
public static class SingleSegmentData {
@JsonProperty public String name;
@JsonProperty public Integer delCount;
@JsonProperty public Integer softDelCount;
@JsonProperty public Boolean hasFieldUpdates;
@JsonProperty public Long sizeInBytes;
@JsonProperty public Integer size;
// A date string of the form "2024-12-17T17:35:18.275Z"
@JsonProperty public String age;
@JsonProperty public String source;
@JsonProperty public String version;
@JsonProperty public Integer createdVersionMajor;
@JsonProperty public String minVersion;
@JsonProperty public SegmentDiagnosticInfo diagnostics;
@JsonProperty public Map<String, Object> attributes;

// Present only when fieldInfo=true
@JsonProperty public Map<String, SegmentSingleFieldInfo> fields;

// Present only when sizeInfo=true
@JsonProperty("largestFiles")
public Map<String, String> largestFilesByName;
}

public static class SegmentSingleFieldInfo {
@JsonProperty public String flags;
@JsonProperty public Integer docCount;
@JsonProperty public Integer termCount;
@JsonProperty public Integer sumDocFreq;
@JsonProperty public Integer sumTotalTermFreq;
@JsonProperty public String schemaType;
@JsonProperty public Map<String, String> nonCompliant;
}

// Present with segments=true
public static class SegmentDiagnosticInfo {
@JsonProperty("os.version")
public String osVersion;

@JsonProperty("lucene.version")
public String luceneVersion;

@JsonProperty public String source;
@JsonProperty public Long timestamp;

@JsonProperty("java.runtime.version")
public String javaRuntimeVersion;

@JsonProperty public String os;

@JsonProperty("java.vendor")
public String javaVendor;

@JsonProperty("os.arch")
public String osArchitecture;
}

// Present with rawSize=true unless otherwise specified
public static class RawSize {
@JsonProperty public Map<String, String> fieldsBySize;
Expand Down Expand Up @@ -146,7 +221,7 @@ public static class IndexWriterConfigSummary {
@JsonProperty public String similarity;
@JsonProperty public String mergeScheduler;
@JsonProperty public String codec;
@JsonProperty public String InfoStream;
@JsonProperty public String infoStream;
@JsonProperty public String mergePolicy;
@JsonProperty public Boolean readerPooling;
@JsonProperty public Integer perThreadHardLimitMB;
Expand Down
155 changes: 145 additions & 10 deletions solr/core/src/test/org/apache/solr/cloud/CollectionsAPISolrJTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@
import static org.apache.solr.common.cloud.ZkStateReader.NUM_SHARDS_PROP;
import static org.apache.solr.common.params.CollectionAdminParams.COLLECTION;
import static org.apache.solr.common.params.CollectionAdminParams.DEFAULTS;
import static org.hamcrest.Matchers.emptyString;
import static org.hamcrest.Matchers.is;
import static org.hamcrest.Matchers.not;

import java.io.IOException;
import java.lang.invoke.MethodHandles;
Expand All @@ -39,6 +42,7 @@
import java.util.concurrent.atomic.AtomicReference;
import java.util.stream.Collectors;
import org.apache.lucene.tests.util.TestUtil;
import org.apache.lucene.util.Version;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.SolrRequest;
import org.apache.solr.client.solrj.SolrServerException;
Expand Down Expand Up @@ -570,14 +574,7 @@ private void checkCollectionProperty(String collection, String propertyName, Str
fail("Timed out waiting for cluster property value");
}

@Test
public void testColStatus() throws Exception {
String collectionName = getSaferTestName();
CollectionAdminRequest.createCollection(collectionName, "conf2", 2, 2)
.process(cluster.getSolrClient());

cluster.waitForActiveCollection(collectionName, 2, 4);

private void indexSomeDocs(String collectionName) throws SolrServerException, IOException {
SolrClient client = cluster.getSolrClient();
byte[] binData = collectionName.getBytes(StandardCharsets.UTF_8);
// index some docs
Expand All @@ -603,13 +600,113 @@ public void testColStatus() throws Exception {
client.add(collectionName, doc);
}
client.commit(collectionName);
}

@Test
@SuppressWarnings("unchecked")
public void testColStatus() throws Exception {
String collectionName = getSaferTestName();
CollectionAdminRequest.createCollection(collectionName, "conf2", 2, 2)
.process(cluster.getSolrClient());

cluster.waitForActiveCollection(collectionName, 2, 4);
indexSomeDocs(collectionName);

// Returns basic info if no additional flags are set
CollectionAdminRequest.ColStatus req = CollectionAdminRequest.collectionStatus(collectionName);
CollectionAdminResponse rsp = req.process(cluster.getSolrClient());
assertEquals(0, rsp.getStatus());
assertNotNull(rsp.getResponse().get(collectionName));
assertNotNull(rsp.getResponse().findRecursive(collectionName, "properties"));
final var collPropMap =
(Map<String, Object>) rsp.getResponse().findRecursive(collectionName, "properties");
assertEquals("conf2", collPropMap.get("configName"));
assertEquals(2L, collPropMap.get("nrtReplicas"));
assertEquals("0", collPropMap.get("tlogReplicas"));
assertEquals("0", collPropMap.get("pullReplicas"));
assertEquals(
2, ((NamedList<Object>) rsp.getResponse().findRecursive(collectionName, "shards")).size());
assertNotNull(rsp.getResponse().findRecursive(collectionName, "shards", "shard1", "leader"));
// Ensure more advanced info is not returned
assertNull(
rsp.getResponse().findRecursive(collectionName, "shards", "shard1", "leader", "segInfos"));

// Returns segment metadata iff requested
req = CollectionAdminRequest.collectionStatus(collectionName);
req.setWithSegments(true);
rsp = req.process(cluster.getSolrClient());
assertEquals(0, rsp.getStatus());
assertNotNull(rsp.getResponse().get(collectionName));
assertNotNull(
rsp.getResponse()
.findRecursive(
collectionName, "shards", "shard1", "leader", "segInfos", "segments", "_0"));
// Ensure field, size, etc. information isn't returned if only segment data was requested
assertNull(
rsp.getResponse()
.findRecursive(
collectionName,
"shards",
"shard1",
"leader",
"segInfos",
"segments",
"_0",
"fields"));
assertNull(
rsp.getResponse()
.findRecursive(
collectionName,
"shards",
"shard1",
"leader",
"segInfos",
"segments",
"_0",
"largestFiles"));

// Returns segment metadata and file-size info iff requested
// (Note that 'sizeInfo=true' should implicitly enable segments=true)
req = CollectionAdminRequest.collectionStatus(collectionName);
req.setWithSizeInfo(true);
rsp = req.process(cluster.getSolrClient());
assertEquals(0, rsp.getStatus());
assertNotNull(rsp.getResponse().get(collectionName));
assertNotNull(
rsp.getResponse()
.findRecursive(
collectionName, "shards", "shard1", "leader", "segInfos", "segments", "_0"));
assertNotNull(
rsp.getResponse()
.findRecursive(
collectionName,
"shards",
"shard1",
"leader",
"segInfos",
"segments",
"_0",
"largestFiles"));
// Ensure field, etc. information isn't returned if only segment+size data was requested
assertNull(
rsp.getResponse()
.findRecursive(
collectionName,
"shards",
"shard1",
"leader",
"segInfos",
"segments",
"_0",
"fields"));

// Set all flags and ensure everything is returned as expected
req = CollectionAdminRequest.collectionStatus(collectionName);
req.setWithSegments(true);
req.setWithFieldInfo(true);
req.setWithCoreInfo(true);
req.setWithSegments(true);
req.setWithSizeInfo(true);
CollectionAdminResponse rsp = req.process(cluster.getSolrClient());
rsp = req.process(cluster.getSolrClient());
assertEquals(0, rsp.getStatus());
@SuppressWarnings({"unchecked"})
List<Object> nonCompliant =
Expand All @@ -625,6 +722,7 @@ public void testColStatus() throws Exception {
assertNotNull(Utils.toJSONString(rsp), segInfos.get("fieldInfoLegend"));
assertNotNull(
Utils.toJSONString(rsp), segInfos.findRecursive("segments", "_0", "fields", "id", "flags"));

// test for replicas not active - SOLR-13882
DocCollection coll = cluster.getSolrClient().getClusterState().getCollection(collectionName);
Replica firstReplica = coll.getSlice("shard1").getReplicas().iterator().next();
Expand Down Expand Up @@ -709,6 +807,7 @@ public void testV2BasicCollectionStatus() throws Exception {
CollectionAdminRequest.createCollection(simpleCollName, "conf2", 2, 1, 1, 1)
.process(cluster.getSolrClient());
cluster.waitForActiveCollection(simpleCollName, 2, 6);
indexSomeDocs(simpleCollName);

final var simpleResponse =
new CollectionsApi.GetCollectionStatus(simpleCollName)
Expand All @@ -722,6 +821,42 @@ public void testV2BasicCollectionStatus() throws Exception {
assertEquals(Integer.valueOf(1), simpleResponse.properties.replicationFactor);
assertEquals(Integer.valueOf(1), simpleResponse.properties.pullReplicas);
assertEquals(Integer.valueOf(1), simpleResponse.properties.tlogReplicas);
assertNotNull(simpleResponse.shards.get("shard1").leader);
assertNull(simpleResponse.shards.get("shard1").leader.segInfos);

// Ensure segment data present when request sets 'segments=true' flag
final var segmentDataRequest = new CollectionsApi.GetCollectionStatus(simpleCollName);
segmentDataRequest.setSegments(true);
final var segmentDataResponse = segmentDataRequest.process(cluster.getSolrClient()).getParsed();
var segmentData = segmentDataResponse.shards.get("shard1").leader.segInfos;
assertNotNull(segmentData);
assertTrue(segmentData.info.numSegments > 0); // Expect at least one segment
assertEquals(segmentData.info.numSegments.intValue(), segmentData.segments.size());
assertEquals(Version.LATEST.toString(), segmentData.info.commitLuceneVersion);
// Ensure field, size, etc. data not provided
assertNull(segmentData.segments.get("_0").fields);
assertNull(segmentData.segments.get("_0").largestFilesByName);

// Ensure file-size data present when request sets sizeInfo flag
final var segmentFileSizeRequest = new CollectionsApi.GetCollectionStatus(simpleCollName);
segmentFileSizeRequest.setSizeInfo(true);
final var segmentFileSizeResponse =
segmentFileSizeRequest.process(cluster.getSolrClient()).getParsed();
segmentData = segmentFileSizeResponse.shards.get("shard1").leader.segInfos;
assertNotNull(segmentData);
final var largeFileList = segmentData.segments.get("_0").largestFilesByName;
assertNotNull(largeFileList);
// Hard to assert what the largest index files should be, but:
// - there should be at least 1 entry and...
// - all keys/values should be non-empty
assertTrue(largeFileList.size() > 0);
largeFileList.forEach(
(fileName, size) -> {
assertThat(fileName, is(not(emptyString())));
assertThat(size, is(not(emptyString())));
});
// Ensure field, etc. data not provided
assertNull(segmentData.segments.get("_0").fields);
}

private static final int NUM_DOCS = 10;
Expand Down

0 comments on commit e06c21a

Please sign in to comment.