-
Notifications
You must be signed in to change notification settings - Fork 2.4k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[receiver/kakfareceiver] add: text unmarshaler support in kafka recei… (
#20857) Add text unmarshaler, which will decode the kafka message as text and insert it as the body of a log record. Link to tracking Issue: #20734
- Loading branch information
Showing
11 changed files
with
487 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' | ||
change_type: enhancement | ||
|
||
# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver) | ||
component: kafkareceiver | ||
|
||
# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). | ||
note: Add `text` unmarshaler, which will decode the kafka message as text and insert it as the body of a log record. | ||
|
||
# One or more tracking issues related to the change | ||
issues: [20734] | ||
|
||
# (Optional) One or more lines of additional information to render under the primary note. | ||
# These lines will be padded with 2 spaces and then inserted directly into the document. | ||
# Use pipe (|) for multiline entries. | ||
subtext: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
// Copyright The OpenTelemetry Authors | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
package textutils // import "github.com/open-telemetry/opentelemetry-collector-contrib/internal/coreinternal/textutils" | ||
|
||
import ( | ||
"errors" | ||
"fmt" | ||
"strings" | ||
|
||
"golang.org/x/text/encoding" | ||
"golang.org/x/text/encoding/ianaindex" | ||
"golang.org/x/text/encoding/unicode" | ||
"golang.org/x/text/transform" | ||
) | ||
|
||
// NewBasicConfig creates a new Encoding config | ||
func NewEncodingConfig() EncodingConfig { | ||
return EncodingConfig{ | ||
Encoding: "utf-8", | ||
} | ||
} | ||
|
||
// EncodingConfig is the configuration of a Encoding helper | ||
type EncodingConfig struct { | ||
Encoding string `mapstructure:"encoding,omitempty"` | ||
} | ||
|
||
// Build will build an Encoding operator. | ||
func (c EncodingConfig) Build() (Encoding, error) { | ||
enc, err := lookupEncoding(c.Encoding) | ||
if err != nil { | ||
return Encoding{}, err | ||
} | ||
|
||
return Encoding{ | ||
Encoding: enc, | ||
decodeBuffer: make([]byte, 1<<12), | ||
decoder: enc.NewDecoder(), | ||
}, nil | ||
} | ||
|
||
type Encoding struct { | ||
Encoding encoding.Encoding | ||
decoder *encoding.Decoder | ||
decodeBuffer []byte | ||
} | ||
|
||
// Decode converts the bytes in msgBuf to utf-8 from the configured encoding | ||
func (e *Encoding) Decode(msgBuf []byte) ([]byte, error) { | ||
for { | ||
e.decoder.Reset() | ||
nDst, _, err := e.decoder.Transform(e.decodeBuffer, msgBuf, true) | ||
if err == nil { | ||
return e.decodeBuffer[:nDst], nil | ||
} | ||
if errors.Is(err, transform.ErrShortDst) { | ||
e.decodeBuffer = make([]byte, len(e.decodeBuffer)*2) | ||
continue | ||
} | ||
return nil, fmt.Errorf("transform encoding: %w", err) | ||
} | ||
} | ||
|
||
var encodingOverrides = map[string]encoding.Encoding{ | ||
"utf-16": unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM), | ||
"utf16": unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM), | ||
"utf-8": unicode.UTF8, | ||
"utf8": unicode.UTF8, | ||
"ascii": unicode.UTF8, | ||
"us-ascii": unicode.UTF8, | ||
"nop": encoding.Nop, | ||
"": unicode.UTF8, | ||
} | ||
|
||
func lookupEncoding(enc string) (encoding.Encoding, error) { | ||
if e, ok := encodingOverrides[strings.ToLower(enc)]; ok { | ||
return e, nil | ||
} | ||
e, err := ianaindex.IANA.Encoding(enc) | ||
if err != nil { | ||
return nil, fmt.Errorf("unsupported encoding '%s'", enc) | ||
} | ||
if e == nil { | ||
return nil, fmt.Errorf("no charmap defined for encoding '%s'", enc) | ||
} | ||
return e, nil | ||
} | ||
|
||
func IsNop(enc string) bool { | ||
e, err := lookupEncoding(enc) | ||
if err != nil { | ||
return false | ||
} | ||
return e == encoding.Nop | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
// Copyright The OpenTelemetry Authors | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
package textutils | ||
|
||
import ( | ||
"testing" | ||
|
||
"github.com/stretchr/testify/assert" | ||
"golang.org/x/text/encoding" | ||
"golang.org/x/text/encoding/japanese" | ||
"golang.org/x/text/encoding/korean" | ||
"golang.org/x/text/encoding/simplifiedchinese" | ||
"golang.org/x/text/encoding/unicode" | ||
) | ||
|
||
func TestUTF8Encoding(t *testing.T) { | ||
tests := []struct { | ||
name string | ||
encoding encoding.Encoding | ||
encodingName string | ||
}{ | ||
{ | ||
name: "UTF8 encoding", | ||
encoding: unicode.UTF8, | ||
encodingName: "utf8", | ||
}, | ||
{ | ||
name: "GBK encoding", | ||
encoding: simplifiedchinese.GBK, | ||
encodingName: "gbk", | ||
}, | ||
{ | ||
name: "SHIFT_JIS encoding", | ||
encoding: japanese.ShiftJIS, | ||
encodingName: "shift_jis", | ||
}, | ||
{ | ||
name: "EUC-KR encoding", | ||
encoding: korean.EUCKR, | ||
encodingName: "euc-kr", | ||
}, | ||
} | ||
for _, test := range tests { | ||
t.Run(test.name, func(t *testing.T) { | ||
encCfg := NewEncodingConfig() | ||
encCfg.Encoding = test.encodingName | ||
enc, err := encCfg.Build() | ||
assert.NoError(t, err) | ||
assert.Equal(t, test.encoding, enc.Encoding) | ||
}) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.