From abfcd5466902b1eb21b4a3b4e8519fec10d5f0ab Mon Sep 17 00:00:00 2001 From: Kayla Reopelle Date: Thu, 31 Oct 2024 12:17:44 -0700 Subject: [PATCH] feat: Add log output for send_bytes failure cases --- .../exporter/otlp/logs_exporter.rb | 47 +++++++----- .../exporter/otlp/logs_exporter_test.rb | 72 +++++++++++++++++-- 2 files changed, 98 insertions(+), 21 deletions(-) diff --git a/exporter/otlp-logs/lib/opentelemetry/exporter/otlp/logs_exporter.rb b/exporter/otlp-logs/lib/opentelemetry/exporter/otlp/logs_exporter.rb index fe6cfd2fd..a0cb83042 100644 --- a/exporter/otlp-logs/lib/opentelemetry/exporter/otlp/logs_exporter.rb +++ b/exporter/otlp-logs/lib/opentelemetry/exporter/otlp/logs_exporter.rb @@ -109,6 +109,10 @@ def shutdown(timeout: nil) private + def handle_http_error(response) + OpenTelemetry.handle_error(message: "OTLP logs exporter received #{response.class.name}, http.code=#{response.code}, for uri: '#{@path}'") + end + def http_connection(uri, ssl_verify_mode, certificate_file, client_certificate_file, client_key_file) http = Net::HTTP.new(uri.host, uri.port) http.use_ssl = uri.scheme == 'https' @@ -166,14 +170,16 @@ def send_bytes(bytes, timeout:) # rubocop:disable Metrics/CyclomaticComplexity, SUCCESS when Net::HTTPServiceUnavailable, Net::HTTPTooManyRequests response.body # Read and discard body - redo if backoff?(retry_after: response['Retry-After'], retry_count: retry_count += 1, reason: response.code) + handle_http_error(response) + redo if backoff?(retry_after: response['Retry-After'], retry_count: retry_count += 1) FAILURE when Net::HTTPRequestTimeOut, Net::HTTPGatewayTimeOut, Net::HTTPBadGateway response.body # Read and discard body - redo if backoff?(retry_count: retry_count += 1, reason: response.code) + handle_http_error(response) + redo if backoff?(retry_count: retry_count += 1) FAILURE when Net::HTTPNotFound - OpenTelemetry.handle_error(message: "OTLP exporter received http.code=404 for uri: '#{@path}'") + handle_http_error(response) FAILURE when Net::HTTPBadRequest, Net::HTTPClientError, Net::HTTPServerError log_status(response.body) @@ -181,28 +187,35 @@ def send_bytes(bytes, timeout:) # rubocop:disable Metrics/CyclomaticComplexity, when Net::HTTPRedirection @http.finish handle_redirect(response['location']) - redo if backoff?(retry_after: 0, retry_count: retry_count += 1, reason: response.code) + redo if backoff?(retry_after: 0, retry_count: retry_count += 1) else @http.finish + handle_http_error(response) FAILURE end - rescue Net::OpenTimeout, Net::ReadTimeout - retry if backoff?(retry_count: retry_count += 1, reason: 'timeout') + rescue Net::OpenTimeout, Net::ReadTimeout => e + OpenTelemetry.handle_error(exception: e) + retry if backoff?(retry_count: retry_count += 1, exception: e) return FAILURE - rescue OpenSSL::SSL::SSLError - retry if backoff?(retry_count: retry_count += 1, reason: 'openssl_error') + rescue OpenSSL::SSL::SSLError => e + OpenTelemetry.handle_error(exception: e) + retry if backoff?(retry_count: retry_count += 1) return FAILURE - rescue SocketError - retry if backoff?(retry_count: retry_count += 1, reason: 'socket_error') + rescue SocketError => e + OpenTelemetry.handle_error(exception: e) + retry if backoff?(retry_count: retry_count += 1) return FAILURE rescue SystemCallError => e - retry if backoff?(retry_count: retry_count += 1, reason: e.class.name) + retry if backoff?(retry_count: retry_count += 1) + OpenTelemetry.handle_error(exception: e) return FAILURE - rescue EOFError - retry if backoff?(retry_count: retry_count += 1, reason: 'eof_error') + rescue EOFError => e + OpenTelemetry.handle_error(exception: e) + retry if backoff?(retry_count: retry_count += 1) return FAILURE - rescue Zlib::DataError - retry if backoff?(retry_count: retry_count += 1, reason: 'zlib_error') + rescue Zlib::DataError => e + OpenTelemetry.handle_error(exception: e) + retry if backoff?(retry_count: retry_count += 1) return FAILURE rescue StandardError => e OpenTelemetry.handle_error(exception: e, message: 'unexpected error in OTLP::Exporter#send_bytes') @@ -225,12 +238,12 @@ def log_status(body) klass_or_nil = ::Google::Protobuf::DescriptorPool.generated_pool.lookup(detail.type_name).msgclass detail.unpack(klass_or_nil) if klass_or_nil end.compact - OpenTelemetry.handle_error(message: "OTLP exporter received rpc.Status{message=#{status.message}, details=#{details}}") + OpenTelemetry.handle_error(message: "OTLP logs exporter received rpc.Status{message=#{status.message}, details=#{details}}") rescue StandardError => e OpenTelemetry.handle_error(exception: e, message: 'unexpected error decoding rpc.Status in OTLP::Exporter#log_status') end - def backoff?(retry_count:, reason:, retry_after: nil) # rubocop:disable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity + def backoff?(retry_count:, retry_after: nil) # rubocop:disable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity return false if retry_count > RETRY_COUNT sleep_interval = nil diff --git a/exporter/otlp-logs/test/opentelemetry/exporter/otlp/logs_exporter_test.rb b/exporter/otlp-logs/test/opentelemetry/exporter/otlp/logs_exporter_test.rb index b77843abf..a1f59c696 100644 --- a/exporter/otlp-logs/test/opentelemetry/exporter/otlp/logs_exporter_test.rb +++ b/exporter/otlp-logs/test/opentelemetry/exporter/otlp/logs_exporter_test.rb @@ -358,7 +358,7 @@ _(result).must_equal(SUCCESS) end - it 'returns TIMEOUT on timeout' do + it 'returns FAILURE on timeout' do stub_request(:post, 'http://localhost:4318/v1/logs').to_return(status: 200) log_record_data = OpenTelemetry::TestHelpers.create_log_record_data result = exporter.export([log_record_data], timeout: 0) @@ -402,7 +402,64 @@ OpenTelemetry.logger = logger end - it 'returns TIMEOUT on timeout after retrying' do + { 'Net::HTTPServiceUnavailable' => 503, + 'Net::HTTPTooManyRequests' => 429, + 'Net::HTTPRequestTimeout' => 408, + 'Net::HTTPGatewayTimeout' => 504, + 'Net::HTTPBadGateway' => 502, + 'Net::HTTPNotFound' => 404 }.each do |klass, code| + it "logs an error and returns FAILURE with #{code}s" do + OpenTelemetry::Exporter::OTLP::LogsExporter.stub_const(:RETRY_COUNT, 0) do + log_stream = StringIO.new + OpenTelemetry.logger = ::Logger.new(log_stream) + + stub_request(:post, 'http://localhost:4318/v1/logs').to_return(status: code) + log_record_data = OpenTelemetry::TestHelpers.create_log_record_data + _(exporter.export([log_record_data])).must_equal(FAILURE) + _(log_stream.string).must_match( + %r{ERROR -- : OpenTelemetry error: OTLP logs exporter received #{klass}, http.code=#{code}, for uri: '/v1/logs'} + ) + end + end + end + + [ + Net::OpenTimeout, + Net::ReadTimeout, + OpenSSL::SSL::SSLError, + SocketError, + EOFError, + Zlib::DataError + ].each do |error| + it "logs error and returns FAILURE when #{error} is raised" do + OpenTelemetry::Exporter::OTLP::LogsExporter.stub_const(:RETRY_COUNT, 0) do + log_stream = StringIO.new + OpenTelemetry.logger = ::Logger.new(log_stream) + + stub_request(:post, 'http://localhost:4318/v1/logs').to_raise(error.send(:new)) + log_record_data = OpenTelemetry::TestHelpers.create_log_record_data + _(exporter.export([log_record_data])).must_equal(FAILURE) + _(log_stream.string).must_match( + /ERROR -- : OpenTelemetry error: #{error}/ + ) + end + end + end + + it 'works with a SystemCallError' do + OpenTelemetry::Exporter::OTLP::LogsExporter.stub_const(:RETRY_COUNT, 0) do + log_stream = StringIO.new + OpenTelemetry.logger = ::Logger.new(log_stream) + stub_request(:post, 'http://localhost:4318/v1/logs').to_raise(SystemCallError.new('Failed to open TCP connection', 61)) + log_record_data = OpenTelemetry::TestHelpers.create_log_record_data + _(exporter.export([log_record_data])).must_equal(FAILURE) + _(log_stream.string).must_match( + /ERROR -- : OpenTelemetry error: Connection refused - Failed to open TCP connection/ + ) + end + end + + it 'returns FAILURE on timeout after retrying' do stub_request(:post, 'http://localhost:4318/v1/logs').to_timeout.then.to_raise('this should not be reached') log_record_data = OpenTelemetry::TestHelpers.create_log_record_data @@ -427,11 +484,18 @@ end it 'returns FAILURE when encryption to receiver endpoint fails' do + log_stream = StringIO.new + OpenTelemetry.logger = ::Logger.new(log_stream) + exporter = OpenTelemetry::Exporter::OTLP::LogsExporter.new(endpoint: 'https://localhost:4318/v1/logs') stub_request(:post, 'https://localhost:4318/v1/logs').to_raise(OpenSSL::SSL::SSLError.new('enigma wedged')) log_record_data = OpenTelemetry::TestHelpers.create_log_record_data exporter.stub(:backoff?, ->(**_) { false }) do _(exporter.export([log_record_data])).must_equal(FAILURE) + + _(log_stream.string).must_match( + /ERROR -- : OpenTelemetry error: enigma wedged/ + ) end end @@ -474,7 +538,7 @@ result = exporter.export([log_record_data]) _(log_stream.string).must_match( - /ERROR -- : OpenTelemetry error: OTLP exporter received rpc.Status{message=bad request, details=\[.*you are a bad request.*\]}/ + /ERROR -- : OpenTelemetry error: OTLP logs exporter received rpc.Status{message=bad request, details=\[.*you are a bad request.*\]}/ ) _(result).must_equal(FAILURE) @@ -493,7 +557,7 @@ result = exporter.export([log_record_data]) _(log_stream.string).must_match( - %r{ERROR -- : OpenTelemetry error: OTLP exporter received http\.code=404 for uri: '/v1/logs'} + %r{ERROR -- : OpenTelemetry error: OTLP logs exporter received Net::HTTPNotFound, http.code=404, for uri: '/v1/logs'\n} ) _(result).must_equal(FAILURE)