Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SOLR-17582 Stream CLUSTERSTATUS API response #2916

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion solr/core/src/java/org/apache/solr/cli/StatusTool.java
Original file line number Diff line number Diff line change
Expand Up @@ -353,7 +353,8 @@ protected Map<String, String> getCloudStatus(SolrClient solrClient, String zkHos
cloudStatus.put("liveNodes", String.valueOf(liveNodes.size()));

// TODO get this as a metric from the metrics API instead, or something else.
var collections = (NamedList<Object>) json.findRecursive("cluster", "collections");
Map<String, Object> collections =
(Map<String, Object>) json.findRecursive("cluster", "collections");
cloudStatus.put("collections", String.valueOf(collections.size()));

return cloudStatus;
Expand Down
152 changes: 86 additions & 66 deletions solr/core/src/java/org/apache/solr/handler/admin/ClusterStatus.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
*/
package org.apache.solr.handler.admin;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
Expand All @@ -27,6 +27,7 @@
import java.util.Objects;
import java.util.Set;
import java.util.stream.Stream;
import org.apache.solr.common.MapWriter;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.cloud.Aliases;
import org.apache.solr.common.cloud.ClusterState;
Expand Down Expand Up @@ -180,6 +181,14 @@ private void fetchClusterStatusForCollOrAlias(
String routeKey = solrParams.get(ShardParams._ROUTE_);
String shard = solrParams.get(ZkStateReader.SHARD_ID_PROP);

Set<String> requestedShards;
if (shard != null) {
String[] paramShards = shard.split(",");
requestedShards = Set.of(paramShards);
} else {
requestedShards = Set.of();
}

Stream<DocCollection> collectionStream;
if (collection == null) {
collectionStream = clusterState.collectionStream();
Expand All @@ -205,54 +214,25 @@ private void fetchClusterStatusForCollOrAlias(
}
}

// TODO use an Iterable to stream the data to the client instead of gathering it all in mem

NamedList<Object> collectionProps = new SimpleOrderedMap<>();

collectionStream.forEach(
clusterStateCollection -> {
Map<String, Object> collectionStatus;
String name = clusterStateCollection.getName();

Set<String> requestedShards = new HashSet<>();
if (routeKey != null) {
DocRouter router = clusterStateCollection.getRouter();
Collection<Slice> slices =
router.getSearchSlices(routeKey, null, clusterStateCollection);
for (Slice slice : slices) {
requestedShards.add(slice.getName());
}
}
if (shard != null) {
String[] paramShards = shard.split(",");
requestedShards.addAll(Arrays.asList(paramShards));
}

byte[] bytes = Utils.toJSON(clusterStateCollection);
@SuppressWarnings("unchecked")
Map<String, Object> docCollection = (Map<String, Object>) Utils.fromJSON(bytes);
collectionStatus = getCollectionStatus(docCollection, name, requestedShards);

collectionStatus.put("znodeVersion", clusterStateCollection.getZNodeVersion());
collectionStatus.put(
"creationTimeMillis", clusterStateCollection.getCreationTime().toEpochMilli());

if (collectionVsAliases.containsKey(name) && !collectionVsAliases.get(name).isEmpty()) {
collectionStatus.put("aliases", collectionVsAliases.get(name));
}
String configName = clusterStateCollection.getConfigName();
collectionStatus.put("configName", configName);
if (solrParams.getBool("prs", false) && clusterStateCollection.isPerReplicaState()) {
PerReplicaStates prs = clusterStateCollection.getPerReplicaStates();
collectionStatus.put("PRS", prs);
}
collectionProps.add(name, collectionStatus);
});

// now we need to walk the collectionProps tree to cross-check replica state with live nodes
crossCheckReplicaStateWithLiveNodes(liveNodes, collectionProps);

clusterStatus.add("collections", collectionProps);
MapWriter collectionPropsWriter =
ew -> {
collectionStream.forEach(
(collectionState) -> {
Comment on lines +210 to +211
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

friggin beautiful now

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

putNoEx removes the try/catch we can get it cleaner!

try {
ew.put(
collectionState.getName(),
buildResponseForCollection(
collectionState,
collectionVsAliases,
routeKey,
liveNodes,
requestedShards));
} catch (IOException e) {
throw new RuntimeException(e);
Copy link
Contributor

@dsmiley dsmiley Dec 20, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why does buildResponseForCollection throw IOException? That's suspicious. If you must, catch in there and throw a suitable exception like SolrException (which extends RuntimeException and is generally preferred within Solr over RE).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

buildResponseForCollection doesn't throw the IOException, the EntryWriter does but looks like there is actually a putNoEx method that catches for us and that throws a generic RuntimeException. I could throw a SolrException there with better logging if you think its better.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

putNoEx then

}
});
};
clusterStatus.add("collections", collectionPropsWriter);
}

private void addAliasMap(Aliases aliases, NamedList<Object> clusterStatus) {
Expand Down Expand Up @@ -307,23 +287,20 @@ private Map<String, Object> getCollectionStatus(
*/
@SuppressWarnings("unchecked")
protected void crossCheckReplicaStateWithLiveNodes(
List<String> liveNodes, NamedList<Object> collectionProps) {
for (Map.Entry<String, Object> next : collectionProps) {
Map<String, Object> collMap = (Map<String, Object>) next.getValue();
Map<String, Object> shards = (Map<String, Object>) collMap.get("shards");
for (Object nextShard : shards.values()) {
Map<String, Object> shardMap = (Map<String, Object>) nextShard;
Map<String, Object> replicas = (Map<String, Object>) shardMap.get("replicas");
for (Object nextReplica : replicas.values()) {
Map<String, Object> replicaMap = (Map<String, Object>) nextReplica;
if (Replica.State.getState((String) replicaMap.get(ZkStateReader.STATE_PROP))
!= Replica.State.DOWN) {
// not down, so verify the node is live
String node_name = (String) replicaMap.get(ZkStateReader.NODE_NAME_PROP);
if (!liveNodes.contains(node_name)) {
// node is not live, so this replica is actually down
replicaMap.put(ZkStateReader.STATE_PROP, Replica.State.DOWN.toString());
}
List<String> liveNodes, Map<String, Object> collectionProps) {
Map<String, Object> shards = (Map<String, Object>) collectionProps.get("shards");
for (Object nextShard : shards.values()) {
Map<String, Object> shardMap = (Map<String, Object>) nextShard;
Map<String, Object> replicas = (Map<String, Object>) shardMap.get("replicas");
for (Object nextReplica : replicas.values()) {
Map<String, Object> replicaMap = (Map<String, Object>) nextReplica;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I almost want to cry just glancing at this.
This is the poster-child for why Java introduced "var". And there may be other approaches to improve it but that's the simplest.
(Yeah you didn't write it; I know)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll change those Maps to var but its another reason I think I'll come back to this. I could try to improve on this and I think its possible to remove a bunch of code here like buildResponseForCollection and postProcessCollectionJSON

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

totally understood. In some old Solr code like this, there's always "and one more thing" we could/should do but ultimately snowballs the scope out of control. I leave it to you to do as you wish. Thank you for your contribution here; I didn't mean to get more out of you than you bargained for :-)

if (Replica.State.getState((String) replicaMap.get(ZkStateReader.STATE_PROP))
!= Replica.State.DOWN) {
// not down, so verify the node is live
String node_name = (String) replicaMap.get(ZkStateReader.NODE_NAME_PROP);
if (!liveNodes.contains(node_name)) {
// node is not live, so this replica is actually down
replicaMap.put(ZkStateReader.STATE_PROP, Replica.State.DOWN.toString());
}
}
}
Expand Down Expand Up @@ -368,4 +345,47 @@ public static Map<String, Object> postProcessCollectionJSON(Map<String, Object>
collection.put("health", Health.combine(healthStates).toString());
return collection;
}

private Map<String, Object> buildResponseForCollection(
DocCollection clusterStateCollection,
Map<String, List<String>> collectionVsAliases,
String routeKey,
List<String> liveNodes,
Set<String> requestedShards) {
Map<String, Object> collectionStatus;
Set<String> shards = new HashSet<>(requestedShards);
String name = clusterStateCollection.getName();

if (routeKey != null) {
DocRouter router = clusterStateCollection.getRouter();
Collection<Slice> slices = router.getSearchSlices(routeKey, null, clusterStateCollection);
for (Slice slice : slices) {
shards.add(slice.getName());
}
}

byte[] bytes = Utils.toJSON(clusterStateCollection);
@SuppressWarnings("unchecked")
Map<String, Object> docCollection = (Map<String, Object>) Utils.fromJSON(bytes);
collectionStatus = getCollectionStatus(docCollection, name, shards);

collectionStatus.put("znodeVersion", clusterStateCollection.getZNodeVersion());
collectionStatus.put(
"creationTimeMillis", clusterStateCollection.getCreationTime().toEpochMilli());

if (collectionVsAliases.containsKey(name) && !collectionVsAliases.get(name).isEmpty()) {
collectionStatus.put("aliases", collectionVsAliases.get(name));
}
String configName = clusterStateCollection.getConfigName();
collectionStatus.put("configName", configName);
if (solrParams.getBool("prs", false) && clusterStateCollection.isPerReplicaState()) {
PerReplicaStates prs = clusterStateCollection.getPerReplicaStates();
collectionStatus.put("PRS", prs);
}

// now we need to walk the collectionProps tree to cross-check replica state with live nodes
crossCheckReplicaStateWithLiveNodes(liveNodes, collectionStatus);

return collectionStatus;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -129,10 +129,12 @@ private void testModifyCollection() throws Exception {
.getResponse();
NamedList<?> cluster = (NamedList<?>) rsp.get("cluster");
assertNotNull("Cluster state should not be null", cluster);
NamedList<?> collections = (NamedList<?>) cluster.get("collections");
Map<?, ?> collections = (Map<?, ?>) cluster.get("collections");
assertNotNull("Collections should not be null in cluster state", collections);
assertEquals(1, collections.size());
assertEquals("25", collections._getStr(List.of(COLLECTION_NAME, "replicationFactor"), null));
Map<?, ?> collectionProperties = (Map<?, ?>) collections.get(COLLECTION_NAME);
collectionProperties.get("replicationFactor");
assertEquals("25", collectionProperties.get("replicationFactor"));

params = new ModifiableSolrParams();
params.set("action", CollectionParams.CollectionAction.MODIFYCOLLECTION.toString());
Expand All @@ -151,10 +153,12 @@ private void testModifyCollection() throws Exception {
System.out.println(rsp);
cluster = (NamedList<?>) rsp.get("cluster");
assertNotNull("Cluster state should not be null", cluster);
collections = (NamedList<?>) cluster.get("collections");
collections = (Map<?, ?>) cluster.get("collections");
assertNotNull("Collections should not be null in cluster state", collections);
assertEquals(1, collections.size());
assertNull(collections._getStr(List.of(COLLECTION_NAME, "replicationFactor"), null));
collectionProperties = (Map<?, ?>) collections.get(COLLECTION_NAME);
collectionProperties.get("replicationFactor");
assertNull(collectionProperties.get("replicationFactor"));

params = new ModifiableSolrParams();
params.set("action", CollectionParams.CollectionAction.MODIFYCOLLECTION.toString());
Expand Down Expand Up @@ -253,7 +257,7 @@ private void testNoConfigset() throws Exception {
NamedList<?> rsp = client.request(req);
NamedList<?> cluster = (NamedList<?>) rsp.get("cluster");
assertNotNull("Cluster state should not be null", cluster);
NamedList<?> collections = (NamedList<?>) cluster.get("collections");
Map<?, ?> collections = (Map<?, ?>) cluster.get("collections");
assertNotNull("Collections should not be null in cluster state", collections);
assertNotNull(
"Testing to insure collections are returned", collections.get(COLLECTION_NAME1));
Expand All @@ -280,7 +284,7 @@ private void assertCountsForRepFactorAndNrtReplicas(CloudSolrClient client, Stri
NamedList<Object> rsp = client.request(request);
NamedList<?> cluster = (NamedList<?>) rsp.get("cluster");
assertNotNull("Cluster state should not be null", cluster);
NamedList<?> collections = (NamedList<?>) cluster.get("collections");
Map<?, ?> collections = (Map<?, ?>) cluster.get("collections");
assertNotNull("Collections should not be null in cluster state", collections);
assertEquals(1, collections.size());
@SuppressWarnings({"unchecked"})
Expand All @@ -302,7 +306,7 @@ private void clusterStatusWithCollectionAndShard() throws IOException, SolrServe
NamedList<Object> rsp = client.request(request);
NamedList<?> cluster = (NamedList<?>) rsp.get("cluster");
assertNotNull("Cluster state should not be null", cluster);
NamedList<?> collections = (NamedList<?>) cluster.get("collections");
Map<?, ?> collections = (Map<?, ?>) cluster.get("collections");
assertNotNull("Collections should not be null in cluster state", collections);
assertNotNull(collections.get(COLLECTION_NAME));
assertEquals(1, collections.size());
Expand All @@ -328,7 +332,7 @@ private void clusterStatusWithCollectionAndMultipleShards()
NamedList<Object> rsp = request.process(client).getResponse();
NamedList<?> cluster = (NamedList<?>) rsp.get("cluster");
assertNotNull("Cluster state should not be null", cluster);
NamedList<?> collections = (NamedList<?>) cluster.get("collections");
Map<?, ?> collections = (Map<?, ?>) cluster.get("collections");
assertNotNull("Collections should not be null in cluster state", collections);
assertNotNull(collections.get(COLLECTION_NAME));
assertEquals(1, collections.size());
Expand Down Expand Up @@ -463,7 +467,7 @@ private void clusterStatusNoCollection() throws Exception {
NamedList<Object> rsp = client.request(request);
NamedList<?> cluster = (NamedList<?>) rsp.get("cluster");
assertNotNull("Cluster state should not be null", cluster);
NamedList<?> collections = (NamedList<?>) cluster.get("collections");
Map<?, ?> collections = (Map<?, ?>) cluster.get("collections");
assertNotNull("Collections should not be null in cluster state", collections);
assertNotNull(collections.get(COLLECTION_NAME1));
assertEquals(4, collections.size());
Expand All @@ -485,7 +489,7 @@ private void clusterStatusWithCollection() throws IOException, SolrServerExcepti
NamedList<Object> rsp = client.request(request);
NamedList<?> cluster = (NamedList<?>) rsp.get("cluster");
assertNotNull("Cluster state should not be null", cluster);
NamedList<?> collections = (NamedList<?>) cluster.get("collections");
Map<?, ?> collections = (Map<?, ?>) cluster.get("collections");
assertNotNull("Collections should not be null in cluster state", collections);
assertEquals(1, collections.size());
@SuppressWarnings({"unchecked"})
Expand Down Expand Up @@ -515,7 +519,7 @@ private void clusterStatusZNodeVersion() throws Exception {
NamedList<Object> rsp = client.request(request);
NamedList<Object> cluster = (NamedList<Object>) rsp.get("cluster");
assertNotNull("Cluster state should not be null", cluster);
NamedList<Object> collections = (NamedList<Object>) cluster.get("collections");
Map<?, ?> collections = (Map<?, ?>) cluster.get("collections");
assertNotNull("Collections should not be null in cluster state", collections);
assertEquals(1, collections.size());
Map<String, Object> collection = (Map<String, Object>) collections.get(cname);
Expand All @@ -531,7 +535,7 @@ private void clusterStatusZNodeVersion() throws Exception {

rsp = client.request(request);
cluster = (NamedList<Object>) rsp.get("cluster");
collections = (NamedList<Object>) cluster.get("collections");
collections = (Map<?, ?>) cluster.get("collections");
collection = (Map<String, Object>) collections.get(cname);
Integer newVersion = (Integer) collection.get("znodeVersion");
assertNotNull(newVersion);
Expand All @@ -558,7 +562,7 @@ private void clusterStatusWithRouteKey() throws IOException, SolrServerException
NamedList<Object> cluster = (NamedList<Object>) rsp.get("cluster");
assertNotNull("Cluster state should not be null", cluster);
@SuppressWarnings({"unchecked"})
NamedList<Object> collections = (NamedList<Object>) cluster.get("collections");
Map<?, ?> collections = (Map<?, ?>) cluster.get("collections");
assertNotNull("Collections should not be null in cluster state", collections);
assertNotNull(collections.get(DEFAULT_COLLECTION));
assertEquals(1, collections.size());
Expand Down Expand Up @@ -605,7 +609,7 @@ private void clusterStatusAliasTest() throws Exception {
DEFAULT_COLLECTION + "," + COLLECTION_NAME,
aliases.get("myalias"));

NamedList<Object> collections = (NamedList<Object>) cluster.get("collections");
Map<?, ?> collections = (Map<?, ?>) cluster.get("collections");
assertNotNull("Collections should not be null in cluster state", collections);
assertNotNull(collections.get(DEFAULT_COLLECTION));
Map<String, Object> collection = (Map<String, Object>) collections.get(DEFAULT_COLLECTION);
Expand All @@ -625,7 +629,7 @@ private void clusterStatusAliasTest() throws Exception {

cluster = (NamedList<Object>) rsp.get("cluster");
assertNotNull("Cluster state should not be null", cluster);
collections = (NamedList<Object>) cluster.get("collections");
collections = (Map<?, ?>) cluster.get("collections");
assertNotNull("Collections should not be null in cluster state", collections);
assertNotNull(collections.get(DEFAULT_COLLECTION));
assertNotNull(collections.get(COLLECTION_NAME));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.util.CollectionUtil;
import org.apache.solr.common.util.EnvUtils;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.common.util.Utils;
import org.slf4j.Logger;
Expand Down Expand Up @@ -140,11 +139,11 @@ private ClusterState fetchClusterState(SolrClient client)
liveNodesTimestamp = System.nanoTime();
}

var collectionsNl = (NamedList<Map<String, Object>>) cluster.get("collections");
var collectionsMap = (Map<String, Map<String, Object>>) cluster.get("collections");

Map<String, DocCollection> collStateByName =
CollectionUtil.newLinkedHashMap(collectionsNl.size());
for (Entry<String, Map<String, Object>> entry : collectionsNl) {
CollectionUtil.newLinkedHashMap(collectionsMap.size());
for (Entry<String, Map<String, Object>> entry : collectionsMap.entrySet()) {
collStateByName.put(
entry.getKey(), getDocCollectionFromObjects(entry.getKey(), entry.getValue()));
}
Expand Down Expand Up @@ -185,7 +184,9 @@ private DocCollection fetchCollectionState(SolrClient client, String collection)
SimpleOrderedMap<?> cluster =
submitClusterStateRequest(client, collection, ClusterStateRequestType.FETCH_COLLECTION);

var collStateMap = (Map<String, Object>) cluster.findRecursive("collections", collection);
var collState = (Map<String, Object>) cluster.findRecursive("collections");
mlbiscoc marked this conversation as resolved.
Show resolved Hide resolved
var collStateMap = (Map<String, Object>) collState.get(collection);

if (collStateMap == null) {
throw new NotACollectionException(); // probably an alias
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ private Instant getCreationTimeFromClusterStatus(String collectionName)
NamedList<Object> response = clusterStatusResponse.getResponse();

NamedList<Object> cluster = (NamedList<Object>) response.get("cluster");
NamedList<Object> collections = (NamedList<Object>) cluster.get("collections");
Map<String, Object> collections = (Map<String, Object>) cluster.get("collections");
Map<String, Object> collection = (Map<String, Object>) collections.get(collectionName);
return Instant.ofEpochMilli((long) collection.get("creationTimeMillis"));
}
Expand Down
Loading