diff --git a/CHANGELOG.md b/CHANGELOG.md index 00b3640cf13..241c8252d5d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ([#2097](https://github.com/open-telemetry/opentelemetry-python/pull/2097)) - `opentelemetry-test`: Add `HttpTestBase` to allow tests with actual TCP sockets ([#2101](https://github.com/open-telemetry/opentelemetry-python/pull/2101)) +- Fix incorrect headers parsing via environment variables + ([#2103](https://github.com/open-telemetry/opentelemetry-python/pull/2103)) ## [1.5.0-0.24b0](https://github.com/open-telemetry/opentelemetry-python/releases/tag/v1.5.0-0.24b0) - 2021-08-26 diff --git a/exporter/opentelemetry-exporter-otlp-proto-grpc/src/opentelemetry/exporter/otlp/proto/grpc/exporter.py b/exporter/opentelemetry-exporter-otlp-proto-grpc/src/opentelemetry/exporter/otlp/proto/grpc/exporter.py index 661d7a96ea6..15596ebc9f8 100644 --- a/exporter/opentelemetry-exporter-otlp-proto-grpc/src/opentelemetry/exporter/otlp/proto/grpc/exporter.py +++ b/exporter/opentelemetry-exporter-otlp-proto-grpc/src/opentelemetry/exporter/otlp/proto/grpc/exporter.py @@ -50,6 +50,7 @@ OTEL_EXPORTER_OTLP_TIMEOUT, ) from opentelemetry.sdk.resources import Resource as SDKResource +from opentelemetry.util.re import parse_headers logger = logging.getLogger(__name__) SDKDataT = TypeVar("SDKDataT") @@ -228,19 +229,8 @@ def __init__( self._headers = headers or environ.get(OTEL_EXPORTER_OTLP_HEADERS) if isinstance(self._headers, str): - temp_headers = [] - for header_pair in self._headers.split(","): - key, value = header_pair.split("=", maxsplit=1) - key = key.strip().lower() - value = value.strip() - temp_headers.append( - ( - key, - value, - ) - ) - - self._headers = tuple(temp_headers) + temp_headers = parse_headers(self._headers) + self._headers = tuple(temp_headers.items()) self._timeout = timeout or int( environ.get(OTEL_EXPORTER_OTLP_TIMEOUT, 10) diff --git a/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/trace_exporter/__init__.py b/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/trace_exporter/__init__.py index 061484d0e38..211d037999c 100644 --- a/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/trace_exporter/__init__.py +++ b/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/trace_exporter/__init__.py @@ -40,6 +40,7 @@ from opentelemetry.exporter.otlp.proto.http.trace_exporter.encoder import ( _ProtobufEncoder, ) +from opentelemetry.util.re import parse_headers _logger = logging.getLogger(__name__) @@ -70,7 +71,11 @@ def __init__( OTEL_EXPORTER_OTLP_TRACES_CERTIFICATE, environ.get(OTEL_EXPORTER_OTLP_CERTIFICATE, True), ) - self._headers = headers or _headers_from_env() + headers_string = environ.get( + OTEL_EXPORTER_OTLP_TRACES_HEADERS, + environ.get(OTEL_EXPORTER_OTLP_HEADERS, ""), + ) + self._headers = headers or parse_headers(headers_string) self._timeout = timeout or int( environ.get( OTEL_EXPORTER_OTLP_TRACES_TIMEOUT, @@ -155,24 +160,6 @@ def shutdown(self): self._shutdown = True -def _headers_from_env() -> Optional[Dict[str, str]]: - headers_str = environ.get( - OTEL_EXPORTER_OTLP_TRACES_HEADERS, - environ.get(OTEL_EXPORTER_OTLP_HEADERS), - ) - headers = {} - if headers_str: - for header in headers_str.split(","): - try: - header_name, header_value = header.split("=") - headers[header_name.strip()] = header_value.strip() - except ValueError: - _logger.warning( - "Skipped invalid OTLP exporter header: %r", header - ) - return headers - - def _compression_from_env() -> Compression: compression = ( environ.get( diff --git a/exporter/opentelemetry-exporter-otlp-proto-http/tests/test_proto_span_exporter.py b/exporter/opentelemetry-exporter-otlp-proto-http/tests/test_proto_span_exporter.py index 9b4bb53ad4a..7f91e05984f 100644 --- a/exporter/opentelemetry-exporter-otlp-proto-http/tests/test_proto_span_exporter.py +++ b/exporter/opentelemetry-exporter-otlp-proto-http/tests/test_proto_span_exporter.py @@ -64,7 +64,7 @@ def test_constructor_default(self): OTEL_EXPORTER_OTLP_TRACES_CERTIFICATE: "traces/certificate.env", OTEL_EXPORTER_OTLP_TRACES_COMPRESSION: Compression.Deflate.value, OTEL_EXPORTER_OTLP_TRACES_ENDPOINT: "https://traces.endpoint.env", - OTEL_EXPORTER_OTLP_TRACES_HEADERS: "tracesEnv1=val1,tracesEnv2=val2", + OTEL_EXPORTER_OTLP_TRACES_HEADERS: "tracesEnv1=val1,tracesEnv2=val2,traceEnv3===val3==", OTEL_EXPORTER_OTLP_TRACES_TIMEOUT: "40", }, ) @@ -77,7 +77,11 @@ def test_exporter_traces_env_take_priority(self): self.assertIs(exporter._compression, Compression.Deflate) self.assertEqual( exporter._headers, - {"tracesEnv1": "val1", "tracesEnv2": "val2"}, + { + "tracesenv1": "val1", + "tracesenv2": "val2", + "traceenv3": "==val3==", + }, ) @patch.dict( @@ -127,7 +131,7 @@ def test_exporter_env(self): self.assertEqual(exporter._timeout, int(OS_ENV_TIMEOUT)) self.assertIs(exporter._compression, Compression.Gzip) self.assertEqual( - exporter._headers, {"envHeader1": "val1", "envHeader2": "val2"} + exporter._headers, {"envheader1": "val1", "envheader2": "val2"} ) @patch.dict( @@ -143,5 +147,5 @@ def test_headers_parse_from_env(self): self.assertEqual( cm.records[0].message, - "Skipped invalid OTLP exporter header: 'missingValue'", + "Header doesn't match the format: missingValue.", ) diff --git a/opentelemetry-api/src/opentelemetry/util/re.py b/opentelemetry-api/src/opentelemetry/util/re.py new file mode 100644 index 00000000000..b503c4893aa --- /dev/null +++ b/opentelemetry-api/src/opentelemetry/util/re.py @@ -0,0 +1,56 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +from re import compile, split +from typing import Mapping + +_logger = logging.getLogger(__name__) + + +# https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/protocol/exporter.md#specifying-headers-via-environment-variables +_OWS = r"[ \t]*" +# A key contains one or more US-ASCII character except CTLs or separators. +_KEY_FORMAT = ( + r"[\x21\x23-\x27\x2a\x2b\x2d\x2e\x30-\x39\x41-\x5a\x5e-\x7a\x7c\x7e]+" +) +# A value contains a URL encoded UTF-8 string. +_VALUE_FORMAT = r"[\x21\x23-\x2b\x2d-\x3a\x3c-\x5b\x5d-\x7e]*" +_HEADER_FORMAT = _KEY_FORMAT + _OWS + r"=" + _OWS + _VALUE_FORMAT +_HEADER_PATTERN = compile(_HEADER_FORMAT) +_DELIMITER_PATTERN = compile(r"[ \t]*,[ \t]*") + + +# pylint: disable=invalid-name +def parse_headers(s: str) -> Mapping[str, str]: + """ + Parse ``s`` (a ``str`` instance containing HTTP headers). Uses W3C Baggage + HTTP header format https://www.w3.org/TR/baggage/#baggage-http-header-format, except that + additional semi-colon delimited metadata is not supported. + """ + headers = {} + for header in split(_DELIMITER_PATTERN, s): + if not header: # empty string + continue + match = _HEADER_PATTERN.fullmatch(header.strip()) + if not match: + _logger.warning("Header doesn't match the format: %s.", header) + continue + # value may contain any number of `=` + name, value = match.string.split("=", 1) + name = name.strip().lower() + value = value.strip() + headers[name] = value + + return headers diff --git a/opentelemetry-api/tests/util/test_re.py b/opentelemetry-api/tests/util/test_re.py new file mode 100644 index 00000000000..9c726a7e574 --- /dev/null +++ b/opentelemetry-api/tests/util/test_re.py @@ -0,0 +1,66 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# type: ignore + +import unittest + +from opentelemetry.util.re import parse_headers + + +class TestParseHeaders(unittest.TestCase): + def test_parse_headers(self): + inp = [ + # invalid header name + ("=value", [], True), + ("}key=value", [], True), + ("@key()=value", [], True), + ("/key=value", [], True), + # invalid header value + ("name=\\", [], True), + ('name=value"', [], True), + ("name=;value", [], True), + # different header values + ("name=", [("name", "")], False), + ("name===value=", [("name", "==value=")], False), + # mix of valid and invalid headers + ( + "name1=value1,invalidName, name2 = value2 , name3=value3==", + [ + ( + "name1", + "value1", + ), + ("name2", "value2"), + ("name3", "value3=="), + ], + True, + ), + ( + "=name=valu3; key1; key2, content = application, red=\tvelvet; cake", + [("content", "application")], + True, + ), + ] + for case in inp: + s, expected, warn = case + if warn: + with self.assertLogs(level="WARNING") as cm: + self.assertEqual(parse_headers(s), dict(expected)) + self.assertTrue( + "Header doesn't match the format:" + in cm.records[0].message, + ) + else: + self.assertEqual(parse_headers(s), dict(expected))