Skip to content

Commit

Permalink
UTF-8 support in validation, and some parsers and formatters (#537)
Browse files Browse the repository at this point in the history
UTF-8 support in validation, and some parsers and formatters

---------

Signed-off-by: Owen Williams <[email protected]>
  • Loading branch information
ywwg authored Jan 23, 2024
1 parent 7e44242 commit bd0376d
Show file tree
Hide file tree
Showing 13 changed files with 574 additions and 150 deletions.
2 changes: 1 addition & 1 deletion config/http_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ import (
"sync"
"time"

"github.com/mwitkow/go-conntrack"
conntrack "github.com/mwitkow/go-conntrack"
"golang.org/x/net/http/httpproxy"
"golang.org/x/net/http2"
"golang.org/x/oauth2"
Expand Down
52 changes: 49 additions & 3 deletions expfmt/decode_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"bufio"
"errors"
"io"
"math"
"net/http"
"reflect"
"sort"
Expand Down Expand Up @@ -104,9 +105,10 @@ func TestProtoDecoder(t *testing.T) {
testTime := model.Now()

scenarios := []struct {
in string
expected model.Vector
fail bool
in string
expected model.Vector
legacyNameFail bool
fail bool
}{
{
in: "",
Expand Down Expand Up @@ -332,6 +334,30 @@ func TestProtoDecoder(t *testing.T) {
},
},
},
{
in: "\xa8\x01\n\ngauge.name\x12\x11gauge\ndoc\nstr\"ing\x18\x01\"T\n\x1b\n\x06name.1\x12\x11val with\nnew line\n*\n\x06name*2\x12 val with \\backslash and \"quotes\"\x12\t\t\x00\x00\x00\x00\x00\x00\xf0\x7f\"/\n\x10\n\x06name.1\x12\x06Björn\n\x10\n\x06name*2\x12\x06佖佥\x12\t\t\xd1\xcfD\xb9\xd0\x05\xc2H",
legacyNameFail: true,
expected: model.Vector{
&model.Sample{
Metric: model.Metric{
model.MetricNameLabel: "gauge.name",
"name.1": "val with\nnew line",
"name*2": "val with \\backslash and \"quotes\"",
},
Value: model.SampleValue(math.Inf(+1)),
Timestamp: testTime,
},
&model.Sample{
Metric: model.Metric{
model.MetricNameLabel: "gauge.name",
"name.1": "Björn",
"name*2": "佖佥",
},
Value: 3.14e42,
Timestamp: testTime,
},
},
},
}

for i, scenario := range scenarios {
Expand All @@ -344,11 +370,31 @@ func TestProtoDecoder(t *testing.T) {

var all model.Vector
for {
model.NameValidationScheme = model.LegacyValidation
var smpls model.Vector
err := dec.Decode(&smpls)
if err != nil && errors.Is(err, io.EOF) {
break
}
if scenario.legacyNameFail {
if err == nil {
t.Fatal("Expected error when decoding without UTF-8 support enabled but got none")
}
model.NameValidationScheme = model.UTF8Validation
dec = &SampleDecoder{
Dec: &protoDecoder{r: strings.NewReader(scenario.in)},
Opts: &DecodeOptions{
Timestamp: testTime,
},
}
err = dec.Decode(&smpls)
if errors.Is(err, io.EOF) {
break
}
if err != nil {
t.Fatalf("Unexpected error when decoding with UTF-8 support: %v", err)
}
}
if scenario.fail {
if err == nil {
t.Fatal("Expected error but got none")
Expand Down
8 changes: 7 additions & 1 deletion expfmt/expfmt.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,13 @@ package expfmt
// Format specifies the HTTP content type of the different wire protocols.
type Format string

// Constants to assemble the Content-Type values for the different wire protocols.
// Constants to assemble the Content-Type values for the different wire
// protocols. The Content-Type strings here are all for the legacy exposition
// formats, where valid characters for metric names and label names are limited.
// Support for arbitrary UTF-8 characters in those names is already partially
// implemented in this module (see model.ValidationScheme), but to actually use
// it on the wire, new content-type strings will have to be agreed upon and
// added here.
const (
TextVersion = "0.0.4"
ProtoType = `application/vnd.google.protobuf`
Expand Down
85 changes: 58 additions & 27 deletions expfmt/openmetrics_create.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,18 @@ import (
// sanity checks. If the input contains duplicate metrics or invalid metric or
// label names, the conversion will result in invalid text format output.
//
// If metric names conform to the legacy validation pattern, they will be placed
// outside the brackets in the traditional way, like `foo{}`. If the metric name
// fails the legacy validation check, it will be placed quoted inside the
// brackets: `{"foo"}`. As stated above, the input is assumed to be santized and
// no error will be thrown in this case.
//
// Similar to metric names, if label names conform to the legacy validation
// pattern, they will be unquoted as normal, like `foo{bar="baz"}`. If the label
// name fails the legacy validation check, it will be quoted:
// `foo{"bar"="baz"}`. As stated above, the input is assumed to be santized and
// no error will be thrown in this case.
//
// This function fulfills the type 'expfmt.encoder'.
//
// Note that OpenMetrics requires a final `# EOF` line. Since this function acts
Expand Down Expand Up @@ -98,7 +110,7 @@ func MetricFamilyToOpenMetrics(out io.Writer, in *dto.MetricFamily) (written int
if err != nil {
return
}
n, err = w.WriteString(shortName)
n, err = writeName(w, shortName)
written += n
if err != nil {
return
Expand All @@ -124,7 +136,7 @@ func MetricFamilyToOpenMetrics(out io.Writer, in *dto.MetricFamily) (written int
if err != nil {
return
}
n, err = w.WriteString(shortName)
n, err = writeName(w, shortName)
written += n
if err != nil {
return
Expand Down Expand Up @@ -303,21 +315,9 @@ func writeOpenMetricsSample(
floatValue float64, intValue uint64, useIntValue bool,
exemplar *dto.Exemplar,
) (int, error) {
var written int
n, err := w.WriteString(name)
written += n
if err != nil {
return written, err
}
if suffix != "" {
n, err = w.WriteString(suffix)
written += n
if err != nil {
return written, err
}
}
n, err = writeOpenMetricsLabelPairs(
w, metric.Label, additionalLabelName, additionalLabelValue,
written := 0
n, err := writeOpenMetricsNameAndLabelPairs(
w, name+suffix, metric.Label, additionalLabelName, additionalLabelValue,
)
written += n
if err != nil {
Expand Down Expand Up @@ -365,27 +365,58 @@ func writeOpenMetricsSample(
return written, nil
}

// writeOpenMetricsLabelPairs works like writeOpenMetrics but formats the float
// in OpenMetrics style.
func writeOpenMetricsLabelPairs(
// writeOpenMetricsNameAndLabelPairs works like writeOpenMetricsSample but
// formats the float in OpenMetrics style.
func writeOpenMetricsNameAndLabelPairs(
w enhancedWriter,
name string,
in []*dto.LabelPair,
additionalLabelName string, additionalLabelValue float64,
) (int, error) {
if len(in) == 0 && additionalLabelName == "" {
return 0, nil
}
var (
written int
separator byte = '{'
written int
separator byte = '{'
metricInsideBraces = false
)

if name != "" {
// If the name does not pass the legacy validity check, we must put the
// metric name inside the braces, quoted.
if !model.IsValidLegacyMetricName(model.LabelValue(name)) {
metricInsideBraces = true
err := w.WriteByte(separator)
written++
if err != nil {
return written, err
}
separator = ','
}

n, err := writeName(w, name)
written += n
if err != nil {
return written, err
}
}

if len(in) == 0 && additionalLabelName == "" {
if metricInsideBraces {
err := w.WriteByte('}')
written++
if err != nil {
return written, err
}
}
return written, nil
}

for _, lp := range in {
err := w.WriteByte(separator)
written++
if err != nil {
return written, err
}
n, err := w.WriteString(lp.GetName())
n, err := writeName(w, lp.GetName())
written += n
if err != nil {
return written, err
Expand Down Expand Up @@ -451,7 +482,7 @@ func writeExemplar(w enhancedWriter, e *dto.Exemplar) (int, error) {
if err != nil {
return written, err
}
n, err = writeOpenMetricsLabelPairs(w, e.Label, "", 0)
n, err = writeOpenMetricsNameAndLabelPairs(w, "", e.Label, "", 0)
written += n
if err != nil {
return written, err
Expand Down
Loading

0 comments on commit bd0376d

Please sign in to comment.