Skip to content

Commit

Permalink
UTF-8: Add support for parsing UTF8 metric and label names
Browse files Browse the repository at this point in the history
This adds support for the new grammar of `{"metric_name", "l1"="val"}` to promql and some of the exposition formats.
This grammar will also be valid for non-UTF-8 names.
UTF-8 names will not be considered valid unless model.NameValidationScheme is changed.

This does not update the go expfmt parser in text_parse.go, which will be addressed by prometheus/common#554.

Part of prometheus#13095

Signed-off-by: Owen Williams <[email protected]>
  • Loading branch information
ywwg committed Feb 15, 2024
1 parent 935d570 commit b6c5ac3
Show file tree
Hide file tree
Showing 14 changed files with 620 additions and 1,562 deletions.
4 changes: 1 addition & 3 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ require (
github.com/prometheus/alertmanager v0.26.0
github.com/prometheus/client_golang v1.18.0
github.com/prometheus/client_model v0.5.0
github.com/prometheus/common v0.46.0
github.com/prometheus/common v0.47.0
github.com/prometheus/common/assets v0.2.0
github.com/prometheus/common/sigv4 v0.1.0
github.com/prometheus/exporter-toolkit v0.11.0
Expand Down Expand Up @@ -203,8 +203,6 @@ require (
)

replace (
github.com/prometheus/client_golang => /home/owilliams/src/grafana/client_golang
github.com/prometheus/common => /home/owilliams/src/third_party/common
k8s.io/klog => github.com/simonpasquier/klog-gokit v0.3.0
k8s.io/klog/v2 => github.com/simonpasquier/klog-gokit/v3 v3.3.0
)
Expand Down
1,179 changes: 67 additions & 1,112 deletions go.sum

Large diffs are not rendered by default.

73 changes: 34 additions & 39 deletions model/textparse/openmetricsparse.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ import (

"github.com/prometheus/common/model"

"github.com/prometheus/common/model"
"github.com/prometheus/prometheus/model/exemplar"
"github.com/prometheus/prometheus/model/histogram"
"github.com/prometheus/prometheus/model/labels"
Expand Down Expand Up @@ -82,6 +81,12 @@ type OpenMetricsParser struct {
ts int64
hasTS bool
start int
// offsets is a list of offsets into series that describe the positions
// of the metric name and label names and values for this series.
// p.offsets[0] is the start character of the metric name.
// p.offsets[1] is the end of the metric name.
// Subsequently, p.offsets is a pair of pair of offsets for the positions
// of the label name and value start and end characters.
offsets []int

eOffsets []int
Expand Down Expand Up @@ -154,20 +159,18 @@ func (p *OpenMetricsParser) Metric(l *labels.Labels) string {
s := string(p.series)

p.builder.Reset()
p.builder.Add(labels.MetricName, s[p.offsets[0]-p.start:p.offsets[1]-p.start])
metricName := unreplace(s[p.offsets[0]-p.start : p.offsets[1]-p.start])
p.builder.Add(labels.MetricName, metricName)

for i := 2; i < len(p.offsets); i += 4 {
a := p.offsets[i] - p.start
b := p.offsets[i+1] - p.start
label := unreplace(s[a:b])
c := p.offsets[i+2] - p.start
d := p.offsets[i+3] - p.start
value := unreplace(s[c:d])

value := s[c:d]
// Replacer causes allocations. Replace only when necessary.
if strings.IndexByte(s[c:d], byte('\\')) >= 0 {
value = lvalReplacer.Replace(value)
}
p.builder.Add(s[a:b], value)
p.builder.Add(label, value)
}

p.builder.Sort()
Expand Down Expand Up @@ -226,12 +229,11 @@ func (p *OpenMetricsParser) nextToken() token {
}

func (p *OpenMetricsParser) parseError(exp string, got token) error {
e := p.l.i + 80
e := p.l.i + 1
if len(p.l.b) < e {
e = len(p.l.b)
}
start := int(math.Max(0, float64(p.start-80)))
return fmt.Errorf("%s, got %q (%q) while parsing: %q", exp, p.l.b[p.l.start:e], got, p.l.b[start:e])
return fmt.Errorf("%s, got %q (%q) while parsing: %q", exp, p.l.b[p.l.start:e], got, p.l.b[p.start:e])
}

// Next advances the parser to the next sample. It returns false if no
Expand All @@ -255,7 +257,6 @@ func (p *OpenMetricsParser) Next() (Entry, error) {
case tEOF:
return EntryInvalid, errors.New("data does not end with # EOF")
case tHelp, tType, tUnit:
tStart := p.l.start
switch t2 := p.nextToken(); t2 {
case tMName:
mStart := p.l.start
Expand All @@ -266,7 +267,7 @@ func (p *OpenMetricsParser) Next() (Entry, error) {
}
p.offsets = append(p.offsets, mStart, mEnd)
default:
return EntryInvalid, p.parseError("expected metric name after "+t.String()+" "+string(p.l.b), t2)
return EntryInvalid, p.parseError("expected metric name after "+t.String(), t2)
}
switch t2 := p.nextToken(); t2 {
case tText:
Expand All @@ -276,11 +277,7 @@ func (p *OpenMetricsParser) Next() (Entry, error) {
p.text = []byte{}
}
default:
end := tStart + 40
if end >= len(p.l.b) {
end = len(p.l.b) - 1
}
return EntryInvalid, fmt.Errorf("expected text in %s: got %v (%v)", t.String(), t2.String(), string(p.l.b[tStart:end]))
return EntryInvalid, fmt.Errorf("expected text in %s", t.String())
}
switch t {
case tType:
Expand Down Expand Up @@ -332,22 +329,19 @@ func (p *OpenMetricsParser) Next() (Entry, error) {
if len(p.offsets) == 0 {
p.offsets = []int{-1, -1}
}
if p.offsets, err = p.parseLVals(p.offsets); err != nil {
if p.offsets, err = p.parseLVals(p.offsets, false); err != nil {
return EntryInvalid, err
}

p.series = p.l.b[p.start:p.l.i]
return p.parseMetricSuffix(p.nextToken())
case tMName:
p.offsets = append(p.offsets, p.start, p.l.i)
if err := p.verifyMetricName(p.offsets[0], p.offsets[1]); err != nil {
return EntryInvalid, err
}
p.series = p.l.b[p.start:p.l.i]

t2 := p.nextToken()
if t2 == tBraceOpen {
p.offsets, err = p.parseLVals(p.offsets)
p.offsets, err = p.parseLVals(p.offsets, false)
if err != nil {
return EntryInvalid, err
}
Expand All @@ -365,7 +359,7 @@ func (p *OpenMetricsParser) Next() (Entry, error) {
func (p *OpenMetricsParser) parseComment() error {
var err error
// Parse the labels.
p.eOffsets, err = p.parseLVals(p.eOffsets)
p.eOffsets, err = p.parseLVals(p.eOffsets, true)
if err != nil {
return err
}
Expand Down Expand Up @@ -406,7 +400,7 @@ func (p *OpenMetricsParser) parseComment() error {
return nil
}

func (p *OpenMetricsParser) parseLVals(offsets []int) ([]int, error) {
func (p *OpenMetricsParser) parseLVals(offsets []int, isExemplar bool) ([]int, error) {
t := p.nextToken()
for {
curTStart := p.l.start
Expand All @@ -422,23 +416,28 @@ func (p *OpenMetricsParser) parseLVals(offsets []int) ([]int, error) {

t = p.nextToken()
// A quoted string followed by a comma or brace is a metric name. Set the
// offsets and continue processing.
// offsets and continue processing. If this is an exemplar, this format
// is not allowed.
if t == tComma || t == tBraceClose {
if isExemplar {
return nil, p.parseError("expected label name", t)
}
if offsets[0] != -1 || offsets[1] != -1 {
return nil, fmt.Errorf("metric name already set while parsing: %q", p.l.b[p.start:p.l.i])
}
offsets[0] = curTStart + 1
offsets[1] = curTI - 1
if err := p.verifyMetricName(offsets[0], offsets[1]); err != nil {
return nil, err
}
if t == tBraceClose {
return offsets, nil
}
t = p.nextToken()
continue
}
// We have a label name.
// We have a label name, and it might be quoted.
if p.l.b[curTStart] == '"' {
curTStart++
curTI--
}
offsets = append(offsets, curTStart, curTI)

if t != tEqual {
Expand Down Expand Up @@ -468,6 +467,10 @@ func (p *OpenMetricsParser) parseLVals(offsets []int) ([]int, error) {
// parseMetricSuffix parses the end of the line after the metric name and
// labels. It starts parsing with the provided token.
func (p *OpenMetricsParser) parseMetricSuffix(t token) (Entry, error) {
if p.offsets[0] == -1 {
return EntryInvalid, fmt.Errorf("metric name not set while parsing: %q", p.l.b[p.start:p.l.i])
}

var err error
p.val, err = p.getFloatValue(t, "metric")
if err != nil {
Expand All @@ -489,7 +492,7 @@ func (p *OpenMetricsParser) parseMetricSuffix(t token) (Entry, error) {
var ts float64
// A float is enough to hold what we need for millisecond resolution.
if ts, err = parseFloat(yoloString(p.l.buf()[1:])); err != nil {
return EntryInvalid, fmt.Errorf("%v while parsing: %q", err, p.l.b[p.start:p.l.i])
return EntryInvalid, fmt.Errorf("%w while parsing: %q", err, p.l.b[p.start:p.l.i])
}
if math.IsNaN(ts) || math.IsInf(ts, 0) {
return EntryInvalid, fmt.Errorf("invalid timestamp %f", ts)
Expand Down Expand Up @@ -522,11 +525,3 @@ func (p *OpenMetricsParser) getFloatValue(t token, after string) (float64, error
}
return val, nil
}

func (p *OpenMetricsParser) verifyMetricName(start, end int) error {
m := yoloString(p.l.b[start:end])
if !model.IsValidMetricName(model.LabelValue(m)) {
return fmt.Errorf("metric name %q is not valid", m)
}
return nil
}
Loading

0 comments on commit b6c5ac3

Please sign in to comment.