Skip to content

Commit

Permalink
Crash reporting enhancements (#323)
Browse files Browse the repository at this point in the history
  • Loading branch information
Mateusz Rzeszutek authored Aug 10, 2022
1 parent 0864aed commit 957d99f
Show file tree
Hide file tree
Showing 5 changed files with 99 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@
import java.io.IOException;
import java.security.KeyManagementException;
import java.security.NoSuchAlgorithmException;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;
import javax.net.ssl.SSLContext;
import javax.net.ssl.SSLSocketFactory;
import javax.net.ssl.TrustManager;
Expand Down Expand Up @@ -77,10 +79,7 @@ public void onViewCreated(@NonNull View view, Bundle savedInstanceState) {
NavHostFragment.findNavController(FirstFragment.this)
.navigate(R.id.action_FirstFragment_to_SecondFragment));

binding.crash.setOnClickListener(
v -> {
throw new IllegalStateException("Crashing due to a bug!");
});
binding.crash.setOnClickListener(v -> multiThreadCrashing());

binding.httpMe.setOnClickListener(
v -> {
Expand All @@ -105,6 +104,35 @@ public void onViewCreated(@NonNull View view, Bundle savedInstanceState) {
sessionId.postValue(splunkRum.getRumSessionId());
}

private void multiThreadCrashing() {
CountDownLatch latch = new CountDownLatch(1);
int numThreads = 4;
for (int i = 0; i < numThreads; ++i) {
Thread t =
new Thread(
() -> {
try {
if (latch.await(10, TimeUnit.SECONDS)) {
throw new IllegalStateException(
"Failure from thread "
+ Thread.currentThread().getName());
}
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
});
t.setName("crash-thread-" + i);
t.start();
}
try {
Thread.sleep(100);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
return;
}
latch.countDown();
}

private void makeCall(String url, Span workflow) {
// make sure the span is in the current context so it can be propagated into the async call.
try (Scope scope = workflow.makeCurrent()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import io.opentelemetry.sdk.OpenTelemetrySdk;
import io.opentelemetry.sdk.trace.SdkTracerProvider;
import io.opentelemetry.semconv.trace.attributes.SemanticAttributes;
import java.util.concurrent.atomic.AtomicBoolean;

class CrashReporter {

Expand All @@ -38,6 +39,7 @@ static class CrashReportingExceptionHandler implements Thread.UncaughtExceptionH
private final Tracer tracer;
private final Thread.UncaughtExceptionHandler existingHandler;
private final SdkTracerProvider sdkTracerProvider;
private final AtomicBoolean crashHappened = new AtomicBoolean(false);

CrashReportingExceptionHandler(
Tracer tracer,
Expand All @@ -50,12 +52,22 @@ static class CrashReportingExceptionHandler implements Thread.UncaughtExceptionH

@Override
public void uncaughtException(@NonNull Thread t, @NonNull Throwable e) {
// the idea here is to set component=crash only for the first error that arrives here
// when multiple threads fail at roughly the same time (e.g. because of an OOM error),
// the first error to arrive here is actually responsible for crashing the app; and all
// the others that are captured before OS actually kills the process are just additional
// info (component=error)
String component =
crashHappened.compareAndSet(false, true)
? SplunkRum.COMPONENT_CRASH
: SplunkRum.COMPONENT_ERROR;

String exceptionType = e.getClass().getSimpleName();
tracer.spanBuilder(exceptionType)
.setAttribute(SemanticAttributes.THREAD_ID, t.getId())
.setAttribute(SemanticAttributes.THREAD_NAME, t.getName())
.setAttribute(SemanticAttributes.EXCEPTION_ESCAPED, true)
.setAttribute(SplunkRum.COMPONENT_KEY, SplunkRum.COMPONENT_CRASH)
.setAttribute(SplunkRum.COMPONENT_KEY, component)
.startSpan()
.recordException(e)
.setStatus(StatusCode.ERROR)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,10 @@

class RumInitializer {

// we're setting a fairly large length limit to capture long stack traces; ~256 lines,
// assuming 128 chars per line
static final int MAX_ATTRIBUTE_LENGTH = 256 * 128;

private final Config config;
private final Application application;
private final AppStartupTimer startupTimer;
Expand Down Expand Up @@ -295,7 +299,9 @@ private SdkTracerProvider buildTracerProvider(
.addSpanProcessor(batchSpanProcessor)
.addSpanProcessor(attributeAppender)
.setSpanLimits(
SpanLimits.builder().setMaxAttributeValueLength(2048).build())
SpanLimits.builder()
.setMaxAttributeValueLength(MAX_ATTRIBUTE_LENGTH)
.build())
.setResource(resource);
initializationEvents.add(
new RumInitializer.InitializationEvent(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;

import androidx.annotation.NonNull;
Expand Down Expand Up @@ -75,6 +76,47 @@ public void crashReportingSpan() {
verify(sdkTracerProvider).forceFlush();
}

@Test
public void multipleErrorsDuringACrash() {
SdkTracerProvider sdkTracerProvider = mock(SdkTracerProvider.class);
CrashReporter.CrashReportingExceptionHandler crashReporter =
new CrashReporter.CrashReportingExceptionHandler(tracer, sdkTracerProvider, null);

Exception firstError = new NullPointerException("boom!");
Thread crashThread = new Thread("crashThread");

Exception secondError = new IllegalArgumentException("boom again!");
Thread anotherThread = new Thread("someOtherThread");

crashReporter.uncaughtException(crashThread, firstError);
crashReporter.uncaughtException(anotherThread, secondError);

List<SpanData> spans = otelTesting.getSpans();
assertEquals(2, spans.size());

assertThat(spans.get(0))
.hasName("NullPointerException")
.hasAttributesSatisfyingExactly(
equalTo(SemanticAttributes.THREAD_ID, crashThread.getId()),
equalTo(SemanticAttributes.THREAD_NAME, "crashThread"),
equalTo(SemanticAttributes.EXCEPTION_ESCAPED, true),
equalTo(SplunkRum.COMPONENT_KEY, SplunkRum.COMPONENT_CRASH))
.hasException(firstError)
.hasStatus(StatusData.error());

assertThat(spans.get(1))
.hasName("IllegalArgumentException")
.hasAttributesSatisfyingExactly(
equalTo(SemanticAttributes.THREAD_ID, anotherThread.getId()),
equalTo(SemanticAttributes.THREAD_NAME, "someOtherThread"),
equalTo(SemanticAttributes.EXCEPTION_ESCAPED, true),
equalTo(SplunkRum.COMPONENT_KEY, SplunkRum.COMPONENT_ERROR))
.hasException(secondError)
.hasStatus(StatusData.error());

verify(sdkTracerProvider, times(2)).forceFlush();
}

private static class TestDelegateHandler implements Thread.UncaughtExceptionHandler {
final AtomicBoolean wasDelegatedTo = new AtomicBoolean(false);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,10 @@ SpanExporter buildFilteringExporter(ConnectionUtil connectionUtil) {

AttributeKey<String> longAttributeKey = stringKey("longAttribute");
splunkRum.addRumEvent(
"testEvent", Attributes.of(longAttributeKey, Strings.repeat("a", 3000)));
"testEvent",
Attributes.of(
longAttributeKey,
Strings.repeat("a", RumInitializer.MAX_ATTRIBUTE_LENGTH + 1)));

splunkRum.flushSpans();
List<SpanData> spans = testExporter.getFinishedSpanItems();
Expand All @@ -137,7 +140,7 @@ SpanExporter buildFilteringExporter(ConnectionUtil connectionUtil) {
SpanData eventSpan = spans.get(0);
assertEquals("testEvent", eventSpan.getName());
String truncatedValue = eventSpan.getAttributes().get(longAttributeKey);
assertEquals(Strings.repeat("a", 2048), truncatedValue);
assertEquals(Strings.repeat("a", RumInitializer.MAX_ATTRIBUTE_LENGTH), truncatedValue);
}

/** Verify that we have buffering in place in our exporter implementation. */
Expand Down

0 comments on commit 957d99f

Please sign in to comment.