Skip to content

Commit

Permalink
Search coordinator tracing
Browse files Browse the repository at this point in the history
Signed-off-by: David Zane <[email protected]>
  • Loading branch information
dzane17 committed Jan 9, 2024
1 parent 3a3da4f commit 4d3794d
Show file tree
Hide file tree
Showing 17 changed files with 285 additions and 96 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,7 @@ public final void start() {
null
)
);
searchRequestContext.getSearchRequestOperationsListener().onRequestEnd(searchRequestContext);
return;
}
executePhase(this);
Expand Down Expand Up @@ -439,18 +440,18 @@ private void onPhaseEnd(SearchRequestContext searchRequestContext) {
this.searchRequestContext.getSearchRequestOperationsListener().onPhaseEnd(this, searchRequestContext);
}

private void onPhaseStart(SearchPhase phase) {
private void onPhaseStart(SearchPhase phase, SearchRequestContext searchRequestContext) {
setCurrentPhase(phase);
this.searchRequestContext.getSearchRequestOperationsListener().onPhaseStart(this);
this.searchRequestContext.getSearchRequestOperationsListener().onPhaseStart(this, searchRequestContext);
}

private void onRequestEnd(SearchRequestContext searchRequestContext) {
this.searchRequestContext.getSearchRequestOperationsListener().onRequestEnd(this, searchRequestContext);
this.searchRequestContext.getSearchRequestOperationsListener().onRequestEnd(searchRequestContext);
}

private void executePhase(SearchPhase phase) {
try {
onPhaseStart(phase);
onPhaseStart(phase, searchRequestContext);
phase.recordAndRun();
} catch (Exception e) {
if (logger.isDebugEnabled()) {
Expand Down Expand Up @@ -703,6 +704,7 @@ public void sendSearchResponse(InternalSearchResponse internalSearchResponse, At
searchContextId = null;
}
}
searchRequestContext.setSearchTask(getTask());
searchRequestContext.setTotalHits(internalSearchResponse.hits().getTotalHits());
searchRequestContext.setShardStats(results.getNumShards(), successfulOps.get(), skippedOps.get(), failures.length);
onPhaseEnd(searchRequestContext);
Expand All @@ -714,7 +716,7 @@ public void sendSearchResponse(InternalSearchResponse internalSearchResponse, At

@Override
public final void onPhaseFailure(SearchPhase phase, String msg, Throwable cause) {
this.searchRequestContext.getSearchRequestOperationsListener().onPhaseFailure(this);
this.searchRequestContext.getSearchRequestOperationsListener().onPhaseFailure(this, searchRequestContext);
raisePhaseFailure(new SearchPhaseExecutionException(phase.getName(), msg, cause, buildShardFailures()));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import org.apache.logging.log4j.LogManager;
import org.apache.lucene.search.TotalHits;
import org.opensearch.common.annotation.InternalApi;
import org.opensearch.telemetry.tracing.Span;

import java.util.EnumMap;
import java.util.HashMap;
Expand All @@ -25,26 +26,50 @@
*/
@InternalApi
class SearchRequestContext {
private final SearchRequest searchRequest;
private SearchTask searchTask;
private final SearchRequestOperationsListener searchRequestOperationsListener;
private long absoluteStartNanos;
private final Map<String, Long> phaseTookMap;
private TotalHits totalHits;
private final EnumMap<ShardStatsFieldNames, Integer> shardStats;
private Span requestSpan;
private Span phaseSpan;

/**
* This constructor is for testing only
*/
SearchRequestContext() {
this(new SearchRequestOperationsListener.CompositeListener(List.of(), LogManager.getLogger()));
this(new SearchRequest());
}

SearchRequestContext(SearchRequestOperationsListener searchRequestOperationsListener) {
/**
* This constructor is for testing only
*/
SearchRequestContext(SearchRequest searchRequest) {
this(searchRequest, new SearchRequestOperationsListener.CompositeListener(List.of(), LogManager.getLogger()));
}

SearchRequestContext(SearchRequest searchRequest, SearchRequestOperationsListener searchRequestOperationsListener) {
this.searchRequest = searchRequest;
this.searchRequestOperationsListener = searchRequestOperationsListener;
this.absoluteStartNanos = System.nanoTime();
this.phaseTookMap = new HashMap<>();
this.shardStats = new EnumMap<>(ShardStatsFieldNames.class);
}

public SearchRequest getSearchRequest() {
return searchRequest;
}

public void setSearchTask(SearchTask searchTask) {
this.searchTask = searchTask;
}

public SearchTask getSearchTask() {
return searchTask;
}

SearchRequestOperationsListener getSearchRequestOperationsListener() {
return searchRequestOperationsListener;
}
Expand Down Expand Up @@ -76,7 +101,7 @@ void setTotalHits(TotalHits totalHits) {
this.totalHits = totalHits;
}

TotalHits totalHits() {
public TotalHits totalHits() {
return totalHits;
}

Expand All @@ -87,7 +112,7 @@ void setShardStats(int total, int successful, int skipped, int failed) {
this.shardStats.put(ShardStatsFieldNames.SEARCH_REQUEST_SLOWLOG_SHARD_FAILED, failed);
}

String formattedShardStats() {
public String formattedShardStats() {
if (shardStats.isEmpty()) {
return "";
} else {
Expand All @@ -105,6 +130,22 @@ String formattedShardStats() {
);
}
}

public void setRequestSpan(Span requestSpan) {
this.requestSpan = requestSpan;
}

public Span getRequestSpan() {
return requestSpan;
}

public void setPhaseSpan(Span phaseSpan) {
this.phaseSpan = phaseSpan;
}

public Span getPhaseSpan() {
return phaseSpan;
}
}

enum ShardStatsFieldNames {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.action.search;

import org.opensearch.telemetry.tracing.AttributeNames;
import org.opensearch.telemetry.tracing.Span;
import org.opensearch.telemetry.tracing.SpanBuilder;
import org.opensearch.telemetry.tracing.SpanContext;
import org.opensearch.telemetry.tracing.SpanScope;
import org.opensearch.telemetry.tracing.Tracer;

import static org.opensearch.core.common.Strings.capitalize;

/**
* Listener for search request tracing on the coordinator node
*
* @opensearch.internal
*/
public final class SearchRequestCoordinatorTrace extends SearchRequestOperationsListener {
private final Tracer tracer;

public SearchRequestCoordinatorTrace(Tracer tracer) {
this.tracer = tracer;
}

@Override
void onPhaseStart(SearchPhaseContext context, SearchRequestContext searchRequestContext) {
if (searchRequestContext.getPhaseSpan() != null) {
searchRequestContext.getPhaseSpan().endSpan();
}
searchRequestContext.setPhaseSpan(
tracer.startSpan(
SpanBuilder.from(
"coordinator" + capitalize(context.getCurrentPhase().getName()),
new SpanContext(searchRequestContext.getRequestSpan())
)
)
);
SpanScope spanScope = tracer.withSpanInScope(searchRequestContext.getPhaseSpan());
}

@Override
void onPhaseEnd(SearchPhaseContext context, SearchRequestContext searchRequestContext) {
searchRequestContext.getPhaseSpan().endSpan();
searchRequestContext.setPhaseSpan(null);
}

@Override
void onPhaseFailure(SearchPhaseContext context, SearchRequestContext searchRequestContext) {
searchRequestContext.getPhaseSpan().endSpan();
searchRequestContext.setPhaseSpan(null);
}

@Override
void onRequestEnd(SearchRequestContext searchRequestContext) {
Span requestSpan = searchRequestContext.getRequestSpan();

// add response-related attributes on request end
requestSpan.addAttribute(
AttributeNames.TOTAL_HITS,
searchRequestContext.totalHits() == null ? "0" : searchRequestContext.totalHits().toString()
);
requestSpan.addAttribute(
AttributeNames.SHARDS,
searchRequestContext.formattedShardStats().isEmpty() ? "no data" : searchRequestContext.formattedShardStats()
);
requestSpan.addAttribute(
AttributeNames.SOURCE,
searchRequestContext.getSearchRequest().source() == null
? "no source"
: searchRequestContext.getSearchRequest().source().toString()
);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,15 @@
@InternalApi
abstract class SearchRequestOperationsListener {

abstract void onPhaseStart(SearchPhaseContext context);
abstract void onPhaseStart(SearchPhaseContext context, SearchRequestContext searchRequestContext);

abstract void onPhaseEnd(SearchPhaseContext context, SearchRequestContext searchRequestContext);

abstract void onPhaseFailure(SearchPhaseContext context);
abstract void onPhaseFailure(SearchPhaseContext context, SearchRequestContext searchRequestContext);

void onRequestStart(SearchRequestContext searchRequestContext) {}

void onRequestEnd(SearchPhaseContext context, SearchRequestContext searchRequestContext) {}
void onRequestEnd(SearchRequestContext searchRequestContext) {}

/**
* Holder of Composite Listeners
Expand All @@ -48,10 +48,10 @@ static final class CompositeListener extends SearchRequestOperationsListener {
}

@Override
void onPhaseStart(SearchPhaseContext context) {
void onPhaseStart(SearchPhaseContext context, SearchRequestContext searchRequestContext) {
for (SearchRequestOperationsListener listener : listeners) {
try {
listener.onPhaseStart(context);
listener.onPhaseStart(context, searchRequestContext);
} catch (Exception e) {
logger.warn(() -> new ParameterizedMessage("onPhaseStart listener [{}] failed", listener), e);
}
Expand All @@ -70,10 +70,10 @@ void onPhaseEnd(SearchPhaseContext context, SearchRequestContext searchRequestCo
}

@Override
void onPhaseFailure(SearchPhaseContext context) {
void onPhaseFailure(SearchPhaseContext context, SearchRequestContext searchRequestContext) {
for (SearchRequestOperationsListener listener : listeners) {
try {
listener.onPhaseFailure(context);
listener.onPhaseFailure(context, searchRequestContext);
} catch (Exception e) {
logger.warn(() -> new ParameterizedMessage("onPhaseFailure listener [{}] failed", listener), e);
}
Expand All @@ -92,10 +92,10 @@ void onRequestStart(SearchRequestContext searchRequestContext) {
}

@Override
public void onRequestEnd(SearchPhaseContext context, SearchRequestContext searchRequestContext) {
public void onRequestEnd(SearchRequestContext searchRequestContext) {
for (SearchRequestOperationsListener listener : listeners) {
try {
listener.onRequestEnd(context, searchRequestContext);
listener.onRequestEnd(searchRequestContext);
} catch (Exception e) {
logger.warn(() -> new ParameterizedMessage("onRequestEnd listener [{}] failed", listener), e);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -134,29 +134,29 @@ public SearchRequestSlowLog(ClusterService clusterService) {
}

@Override
void onPhaseStart(SearchPhaseContext context) {}
void onPhaseStart(SearchPhaseContext context, SearchRequestContext searchRequestContext) {}

@Override
void onPhaseEnd(SearchPhaseContext context, SearchRequestContext searchRequestContext) {}

@Override
void onPhaseFailure(SearchPhaseContext context) {}
void onPhaseFailure(SearchPhaseContext context, SearchRequestContext searchRequestContext) {}

@Override
void onRequestStart(SearchRequestContext searchRequestContext) {}

@Override
void onRequestEnd(SearchPhaseContext context, SearchRequestContext searchRequestContext) {
void onRequestEnd(SearchRequestContext searchRequestContext) {
long tookInNanos = System.nanoTime() - searchRequestContext.getAbsoluteStartNanos();

if (warnThreshold >= 0 && tookInNanos > warnThreshold && level.isLevelEnabledFor(SlowLogLevel.WARN)) {
logger.warn(new SearchRequestSlowLogMessage(context, tookInNanos, searchRequestContext));
logger.warn(new SearchRequestSlowLogMessage(tookInNanos, searchRequestContext));
} else if (infoThreshold >= 0 && tookInNanos > infoThreshold && level.isLevelEnabledFor(SlowLogLevel.INFO)) {
logger.info(new SearchRequestSlowLogMessage(context, tookInNanos, searchRequestContext));
logger.info(new SearchRequestSlowLogMessage(tookInNanos, searchRequestContext));
} else if (debugThreshold >= 0 && tookInNanos > debugThreshold && level.isLevelEnabledFor(SlowLogLevel.DEBUG)) {
logger.debug(new SearchRequestSlowLogMessage(context, tookInNanos, searchRequestContext));
logger.debug(new SearchRequestSlowLogMessage(tookInNanos, searchRequestContext));
} else if (traceThreshold >= 0 && tookInNanos > traceThreshold && level.isLevelEnabledFor(SlowLogLevel.TRACE)) {
logger.trace(new SearchRequestSlowLogMessage(context, tookInNanos, searchRequestContext));
logger.trace(new SearchRequestSlowLogMessage(tookInNanos, searchRequestContext));
}
}

Expand All @@ -167,15 +167,11 @@ void onRequestEnd(SearchPhaseContext context, SearchRequestContext searchRequest
*/
static final class SearchRequestSlowLogMessage extends OpenSearchLogMessage {

SearchRequestSlowLogMessage(SearchPhaseContext context, long tookInNanos, SearchRequestContext searchRequestContext) {
super(prepareMap(context, tookInNanos, searchRequestContext), message(context, tookInNanos, searchRequestContext));
SearchRequestSlowLogMessage(long tookInNanos, SearchRequestContext searchRequestContext) {
super(prepareMap(tookInNanos, searchRequestContext), message(tookInNanos, searchRequestContext));
}

private static Map<String, Object> prepareMap(
SearchPhaseContext context,
long tookInNanos,
SearchRequestContext searchRequestContext
) {
private static Map<String, Object> prepareMap(long tookInNanos, SearchRequestContext searchRequestContext) {
final Map<String, Object> messageFields = new HashMap<>();
messageFields.put("took", TimeValue.timeValueNanos(tookInNanos));
messageFields.put("took_millis", TimeUnit.NANOSECONDS.toMillis(tookInNanos));
Expand All @@ -185,22 +181,24 @@ private static Map<String, Object> prepareMap(
} else {
messageFields.put("total_hits", "-1");
}
messageFields.put("search_type", context.getRequest().searchType());
messageFields.put("search_type", searchRequestContext.getSearchRequest().searchType());
messageFields.put("shards", searchRequestContext.formattedShardStats());

if (context.getRequest().source() != null) {
String source = escapeJson(context.getRequest().source().toString(FORMAT_PARAMS));
if (searchRequestContext.getSearchRequest().source() != null) {
String source = escapeJson(searchRequestContext.getSearchRequest().source().toString(FORMAT_PARAMS));
messageFields.put("source", source);
} else {
messageFields.put("source", "{}");
}

messageFields.put("id", context.getTask().getHeader(Task.X_OPAQUE_ID));
if (searchRequestContext.getSearchTask() != null) {
messageFields.put("id", searchRequestContext.getSearchTask().getHeader(Task.X_OPAQUE_ID));
} else {
messageFields.put("id", "");
}
return messageFields;
}

// Message will be used in plaintext logs
private static String message(SearchPhaseContext context, long tookInNanos, SearchRequestContext searchRequestContext) {
private static String message(long tookInNanos, SearchRequestContext searchRequestContext) {
final StringBuilder sb = new StringBuilder();
sb.append("took[").append(TimeValue.timeValueNanos(tookInNanos)).append("], ");
sb.append("took_millis[").append(TimeUnit.NANOSECONDS.toMillis(tookInNanos)).append("], ");
Expand All @@ -210,15 +208,15 @@ private static String message(SearchPhaseContext context, long tookInNanos, Sear
} else {
sb.append("total_hits[-1]");
}
sb.append("search_type[").append(context.getRequest().searchType()).append("], ");
sb.append("search_type[").append(searchRequestContext.getSearchRequest().searchType()).append("], ");
sb.append("shards[").append(searchRequestContext.formattedShardStats()).append("], ");
if (context.getRequest().source() != null) {
sb.append("source[").append(context.getRequest().source().toString(FORMAT_PARAMS)).append("], ");
if (searchRequestContext.getSearchRequest().source() != null) {
sb.append("source[").append(searchRequestContext.getSearchRequest().source().toString(FORMAT_PARAMS)).append("], ");
} else {
sb.append("source[], ");
}
if (context.getTask().getHeader(Task.X_OPAQUE_ID) != null) {
sb.append("id[").append(context.getTask().getHeader(Task.X_OPAQUE_ID)).append("]");
if (searchRequestContext.getSearchTask() != null) {
sb.append("id[").append(searchRequestContext.getSearchTask().getHeader(Task.X_OPAQUE_ID)).append("]");
} else {
sb.append("id[]");
}
Expand Down
Loading

0 comments on commit 4d3794d

Please sign in to comment.