From 957d99f675f4ad7fe5eedd4733fd303450ebbbe3 Mon Sep 17 00:00:00 2001 From: Mateusz Rzeszutek Date: Thu, 11 Aug 2022 00:14:04 +0200 Subject: [PATCH] Crash reporting enhancements (#323) --- .../splunk/android/sample/FirstFragment.java | 36 ++++++++++++++-- .../java/com/splunk/rum/CrashReporter.java | 14 ++++++- .../java/com/splunk/rum/RumInitializer.java | 8 +++- .../com/splunk/rum/CrashReporterTest.java | 42 +++++++++++++++++++ .../com/splunk/rum/RumInitializerTest.java | 7 +++- 5 files changed, 99 insertions(+), 8 deletions(-) diff --git a/sample-app/src/main/java/com/splunk/android/sample/FirstFragment.java b/sample-app/src/main/java/com/splunk/android/sample/FirstFragment.java index 2c32a72ae..2d72c21d9 100644 --- a/sample-app/src/main/java/com/splunk/android/sample/FirstFragment.java +++ b/sample-app/src/main/java/com/splunk/android/sample/FirstFragment.java @@ -36,6 +36,8 @@ import java.io.IOException; import java.security.KeyManagementException; import java.security.NoSuchAlgorithmException; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; import javax.net.ssl.SSLContext; import javax.net.ssl.SSLSocketFactory; import javax.net.ssl.TrustManager; @@ -77,10 +79,7 @@ public void onViewCreated(@NonNull View view, Bundle savedInstanceState) { NavHostFragment.findNavController(FirstFragment.this) .navigate(R.id.action_FirstFragment_to_SecondFragment)); - binding.crash.setOnClickListener( - v -> { - throw new IllegalStateException("Crashing due to a bug!"); - }); + binding.crash.setOnClickListener(v -> multiThreadCrashing()); binding.httpMe.setOnClickListener( v -> { @@ -105,6 +104,35 @@ public void onViewCreated(@NonNull View view, Bundle savedInstanceState) { sessionId.postValue(splunkRum.getRumSessionId()); } + private void multiThreadCrashing() { + CountDownLatch latch = new CountDownLatch(1); + int numThreads = 4; + for (int i = 0; i < numThreads; ++i) { + Thread t = + new Thread( + () -> { + try { + if (latch.await(10, TimeUnit.SECONDS)) { + throw new IllegalStateException( + "Failure from thread " + + Thread.currentThread().getName()); + } + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + }); + t.setName("crash-thread-" + i); + t.start(); + } + try { + Thread.sleep(100); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + return; + } + latch.countDown(); + } + private void makeCall(String url, Span workflow) { // make sure the span is in the current context so it can be propagated into the async call. try (Scope scope = workflow.makeCurrent()) { diff --git a/splunk-otel-android/src/main/java/com/splunk/rum/CrashReporter.java b/splunk-otel-android/src/main/java/com/splunk/rum/CrashReporter.java index 08d37a72d..6a526cab4 100644 --- a/splunk-otel-android/src/main/java/com/splunk/rum/CrashReporter.java +++ b/splunk-otel-android/src/main/java/com/splunk/rum/CrashReporter.java @@ -22,6 +22,7 @@ import io.opentelemetry.sdk.OpenTelemetrySdk; import io.opentelemetry.sdk.trace.SdkTracerProvider; import io.opentelemetry.semconv.trace.attributes.SemanticAttributes; +import java.util.concurrent.atomic.AtomicBoolean; class CrashReporter { @@ -38,6 +39,7 @@ static class CrashReportingExceptionHandler implements Thread.UncaughtExceptionH private final Tracer tracer; private final Thread.UncaughtExceptionHandler existingHandler; private final SdkTracerProvider sdkTracerProvider; + private final AtomicBoolean crashHappened = new AtomicBoolean(false); CrashReportingExceptionHandler( Tracer tracer, @@ -50,12 +52,22 @@ static class CrashReportingExceptionHandler implements Thread.UncaughtExceptionH @Override public void uncaughtException(@NonNull Thread t, @NonNull Throwable e) { + // the idea here is to set component=crash only for the first error that arrives here + // when multiple threads fail at roughly the same time (e.g. because of an OOM error), + // the first error to arrive here is actually responsible for crashing the app; and all + // the others that are captured before OS actually kills the process are just additional + // info (component=error) + String component = + crashHappened.compareAndSet(false, true) + ? SplunkRum.COMPONENT_CRASH + : SplunkRum.COMPONENT_ERROR; + String exceptionType = e.getClass().getSimpleName(); tracer.spanBuilder(exceptionType) .setAttribute(SemanticAttributes.THREAD_ID, t.getId()) .setAttribute(SemanticAttributes.THREAD_NAME, t.getName()) .setAttribute(SemanticAttributes.EXCEPTION_ESCAPED, true) - .setAttribute(SplunkRum.COMPONENT_KEY, SplunkRum.COMPONENT_CRASH) + .setAttribute(SplunkRum.COMPONENT_KEY, component) .startSpan() .recordException(e) .setStatus(StatusCode.ERROR) diff --git a/splunk-otel-android/src/main/java/com/splunk/rum/RumInitializer.java b/splunk-otel-android/src/main/java/com/splunk/rum/RumInitializer.java index d5f905630..9849296b3 100644 --- a/splunk-otel-android/src/main/java/com/splunk/rum/RumInitializer.java +++ b/splunk-otel-android/src/main/java/com/splunk/rum/RumInitializer.java @@ -57,6 +57,10 @@ class RumInitializer { + // we're setting a fairly large length limit to capture long stack traces; ~256 lines, + // assuming 128 chars per line + static final int MAX_ATTRIBUTE_LENGTH = 256 * 128; + private final Config config; private final Application application; private final AppStartupTimer startupTimer; @@ -295,7 +299,9 @@ private SdkTracerProvider buildTracerProvider( .addSpanProcessor(batchSpanProcessor) .addSpanProcessor(attributeAppender) .setSpanLimits( - SpanLimits.builder().setMaxAttributeValueLength(2048).build()) + SpanLimits.builder() + .setMaxAttributeValueLength(MAX_ATTRIBUTE_LENGTH) + .build()) .setResource(resource); initializationEvents.add( new RumInitializer.InitializationEvent( diff --git a/splunk-otel-android/src/test/java/com/splunk/rum/CrashReporterTest.java b/splunk-otel-android/src/test/java/com/splunk/rum/CrashReporterTest.java index 0f830669a..ffa7de97b 100644 --- a/splunk-otel-android/src/test/java/com/splunk/rum/CrashReporterTest.java +++ b/splunk-otel-android/src/test/java/com/splunk/rum/CrashReporterTest.java @@ -21,6 +21,7 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; import androidx.annotation.NonNull; @@ -75,6 +76,47 @@ public void crashReportingSpan() { verify(sdkTracerProvider).forceFlush(); } + @Test + public void multipleErrorsDuringACrash() { + SdkTracerProvider sdkTracerProvider = mock(SdkTracerProvider.class); + CrashReporter.CrashReportingExceptionHandler crashReporter = + new CrashReporter.CrashReportingExceptionHandler(tracer, sdkTracerProvider, null); + + Exception firstError = new NullPointerException("boom!"); + Thread crashThread = new Thread("crashThread"); + + Exception secondError = new IllegalArgumentException("boom again!"); + Thread anotherThread = new Thread("someOtherThread"); + + crashReporter.uncaughtException(crashThread, firstError); + crashReporter.uncaughtException(anotherThread, secondError); + + List spans = otelTesting.getSpans(); + assertEquals(2, spans.size()); + + assertThat(spans.get(0)) + .hasName("NullPointerException") + .hasAttributesSatisfyingExactly( + equalTo(SemanticAttributes.THREAD_ID, crashThread.getId()), + equalTo(SemanticAttributes.THREAD_NAME, "crashThread"), + equalTo(SemanticAttributes.EXCEPTION_ESCAPED, true), + equalTo(SplunkRum.COMPONENT_KEY, SplunkRum.COMPONENT_CRASH)) + .hasException(firstError) + .hasStatus(StatusData.error()); + + assertThat(spans.get(1)) + .hasName("IllegalArgumentException") + .hasAttributesSatisfyingExactly( + equalTo(SemanticAttributes.THREAD_ID, anotherThread.getId()), + equalTo(SemanticAttributes.THREAD_NAME, "someOtherThread"), + equalTo(SemanticAttributes.EXCEPTION_ESCAPED, true), + equalTo(SplunkRum.COMPONENT_KEY, SplunkRum.COMPONENT_ERROR)) + .hasException(secondError) + .hasStatus(StatusData.error()); + + verify(sdkTracerProvider, times(2)).forceFlush(); + } + private static class TestDelegateHandler implements Thread.UncaughtExceptionHandler { final AtomicBoolean wasDelegatedTo = new AtomicBoolean(false); diff --git a/splunk-otel-android/src/test/java/com/splunk/rum/RumInitializerTest.java b/splunk-otel-android/src/test/java/com/splunk/rum/RumInitializerTest.java index 950994383..f2963983e 100644 --- a/splunk-otel-android/src/test/java/com/splunk/rum/RumInitializerTest.java +++ b/splunk-otel-android/src/test/java/com/splunk/rum/RumInitializerTest.java @@ -128,7 +128,10 @@ SpanExporter buildFilteringExporter(ConnectionUtil connectionUtil) { AttributeKey longAttributeKey = stringKey("longAttribute"); splunkRum.addRumEvent( - "testEvent", Attributes.of(longAttributeKey, Strings.repeat("a", 3000))); + "testEvent", + Attributes.of( + longAttributeKey, + Strings.repeat("a", RumInitializer.MAX_ATTRIBUTE_LENGTH + 1))); splunkRum.flushSpans(); List spans = testExporter.getFinishedSpanItems(); @@ -137,7 +140,7 @@ SpanExporter buildFilteringExporter(ConnectionUtil connectionUtil) { SpanData eventSpan = spans.get(0); assertEquals("testEvent", eventSpan.getName()); String truncatedValue = eventSpan.getAttributes().get(longAttributeKey); - assertEquals(Strings.repeat("a", 2048), truncatedValue); + assertEquals(Strings.repeat("a", RumInitializer.MAX_ATTRIBUTE_LENGTH), truncatedValue); } /** Verify that we have buffering in place in our exporter implementation. */