From 7f8831d120d288217059901269ac961f2dfae988 Mon Sep 17 00:00:00 2001 From: Owen Williams Date: Thu, 15 Feb 2024 14:25:12 -0500 Subject: [PATCH] UTF-8: Add support for parsing UTF8 metric and label names This adds support for the new grammar of `{"metric_name", "l1"="val"}` to promql and some of the exposition formats. This grammar will also be valid for non-UTF-8 names. UTF-8 names will not be considered valid unless model.NameValidationScheme is changed. This does not update the go expfmt parser in text_parse.go, which will be addressed by https://github.com/prometheus/common/issues/554/. Part of https://github.com/prometheus/prometheus/issues/13095 Signed-off-by: Owen Williams --- go.mod | 2 +- go.sum | 4 +- model/textparse/openmetricslex.l | 3 + model/textparse/openmetricslex.l.go | 470 ++++++----- model/textparse/openmetricsparse.go | 195 +++-- model/textparse/openmetricsparse_test.go | 165 +++- model/textparse/promlex.l | 3 + model/textparse/promlex.l.go | 302 ++++--- model/textparse/promparse.go | 153 +++- model/textparse/promparse_test.go | 142 +++- promql/parser/generated_parser.y | 22 +- promql/parser/generated_parser.y.go | 959 ++++++++++------------- promql/parser/parse.go | 23 + promql/parser/parse_test.go | 65 ++ scrape/scrape_test.go | 8 +- storage/remote/write_handler.go | 2 +- web/federate.go | 3 +- 17 files changed, 1539 insertions(+), 982 deletions(-) diff --git a/go.mod b/go.mod index 9a22f62c5f..cedb2ad1de 100644 --- a/go.mod +++ b/go.mod @@ -52,7 +52,7 @@ require ( github.com/prometheus/alertmanager v0.26.0 github.com/prometheus/client_golang v1.18.0 github.com/prometheus/client_model v0.5.0 - github.com/prometheus/common v0.46.0 + github.com/prometheus/common v0.47.0 github.com/prometheus/common/assets v0.2.0 github.com/prometheus/common/sigv4 v0.1.0 github.com/prometheus/exporter-toolkit v0.11.0 diff --git a/go.sum b/go.sum index c8115669e4..d5000f4ad7 100644 --- a/go.sum +++ b/go.sum @@ -678,8 +678,8 @@ github.com/prometheus/common v0.9.1/go.mod h1:yhUN8i9wzaXS3w1O07YhxHEBxD+W35wd8b github.com/prometheus/common v0.10.0/go.mod h1:Tlit/dnDKsSWFlCLTWaA1cyBgKHSMdTB80sz/V91rCo= github.com/prometheus/common v0.26.0/go.mod h1:M7rCNAaPfAosfx8veZJCuw84e35h3Cfd9VFqTh1DIvc= github.com/prometheus/common v0.29.0/go.mod h1:vu+V0TpY+O6vW9J44gczi3Ap/oXXR10b+M/gUGO4Hls= -github.com/prometheus/common v0.46.0 h1:doXzt5ybi1HBKpsZOL0sSkaNHJJqkyfEWZGGqqScV0Y= -github.com/prometheus/common v0.46.0/go.mod h1:Tp0qkxpb9Jsg54QMe+EAmqXkSV7Evdy1BTn+g2pa/hQ= +github.com/prometheus/common v0.47.0 h1:p5Cz0FNHo7SnWOmWmoRozVcjEp0bIVU8cV7OShpjL1k= +github.com/prometheus/common v0.47.0/go.mod h1:0/KsvlIEfPQCQ5I2iNSAWKPZziNCvRs5EC6ILDTlAPc= github.com/prometheus/common/assets v0.2.0 h1:0P5OrzoHrYBOSM1OigWL3mY8ZvV2N4zIE/5AahrSrfM= github.com/prometheus/common/assets v0.2.0/go.mod h1:D17UVUE12bHbim7HzwUvtqm6gwBEaDQ0F+hIGbFbccI= github.com/prometheus/common/sigv4 v0.1.0 h1:qoVebwtwwEhS85Czm2dSROY5fTo2PAPEVdDeppTwGX4= diff --git a/model/textparse/openmetricslex.l b/model/textparse/openmetricslex.l index 91e4439423..9afbbbd8bd 100644 --- a/model/textparse/openmetricslex.l +++ b/model/textparse/openmetricslex.l @@ -50,12 +50,15 @@ S [ ] TYPE{S} l.state = sMeta1; return tType UNIT{S} l.state = sMeta1; return tUnit "EOF"\n? l.state = sInit; return tEOFWord +\"(\\.|[^\\"])*\" l.state = sMeta2; return tMName {M}({M}|{D})* l.state = sMeta2; return tMName {S}{C}*\n l.state = sInit; return tText {M}({M}|{D})* l.state = sValue; return tMName \{ l.state = sLabels; return tBraceOpen +\{ l.state = sLabels; return tBraceOpen {L}({L}|{D})* return tLName +\"(\\.|[^\\"])*\" l.state = sLabels; return tQString \} l.state = sValue; return tBraceClose = l.state = sLValue; return tEqual , return tComma diff --git a/model/textparse/openmetricslex.l.go b/model/textparse/openmetricslex.l.go index 9f17cfd436..c8789ef60d 100644 --- a/model/textparse/openmetricslex.l.go +++ b/model/textparse/openmetricslex.l.go @@ -37,25 +37,25 @@ yystate0: case 0: // start condition: INITIAL goto yystart1 case 1: // start condition: sComment - goto yystart5 + goto yystart6 case 2: // start condition: sMeta1 - goto yystart25 + goto yystart26 case 3: // start condition: sMeta2 - goto yystart27 + goto yystart31 case 4: // start condition: sLabels - goto yystart30 + goto yystart34 case 5: // start condition: sLValue - goto yystart35 + goto yystart42 case 6: // start condition: sValue - goto yystart39 + goto yystart46 case 7: // start condition: sTimestamp - goto yystart43 - case 8: // start condition: sExemplar goto yystart50 + case 8: // start condition: sExemplar + goto yystart57 case 9: // start condition: sEValue - goto yystart55 + goto yystart62 case 10: // start condition: sETimestamp - goto yystart61 + goto yystart68 } yystate1: @@ -68,6 +68,8 @@ yystart1: goto yystate2 case c == ':' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z': goto yystate4 + case c == '{': + goto yystate5 } yystate2: @@ -87,514 +89,574 @@ yystate4: c = l.next() switch { default: - goto yyrule8 + goto yyrule9 case c >= '0' && c <= ':' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z': goto yystate4 } yystate5: c = l.next() -yystart5: + goto yyrule11 + +yystate6: + c = l.next() +yystart6: switch { default: goto yyabort case c == 'E': - goto yystate6 + goto yystate7 case c == 'H': - goto yystate10 + goto yystate11 case c == 'T': - goto yystate15 + goto yystate16 case c == 'U': - goto yystate20 + goto yystate21 } -yystate6: +yystate7: c = l.next() switch { default: goto yyabort case c == 'O': - goto yystate7 + goto yystate8 } -yystate7: +yystate8: c = l.next() switch { default: goto yyabort case c == 'F': - goto yystate8 + goto yystate9 } -yystate8: +yystate9: c = l.next() switch { default: goto yyrule5 case c == '\n': - goto yystate9 + goto yystate10 } -yystate9: +yystate10: c = l.next() goto yyrule5 -yystate10: +yystate11: c = l.next() switch { default: goto yyabort case c == 'E': - goto yystate11 + goto yystate12 } -yystate11: +yystate12: c = l.next() switch { default: goto yyabort case c == 'L': - goto yystate12 + goto yystate13 } -yystate12: +yystate13: c = l.next() switch { default: goto yyabort case c == 'P': - goto yystate13 + goto yystate14 } -yystate13: +yystate14: c = l.next() switch { default: goto yyabort case c == ' ': - goto yystate14 + goto yystate15 } -yystate14: +yystate15: c = l.next() goto yyrule2 -yystate15: +yystate16: c = l.next() switch { default: goto yyabort case c == 'Y': - goto yystate16 + goto yystate17 } -yystate16: +yystate17: c = l.next() switch { default: goto yyabort case c == 'P': - goto yystate17 + goto yystate18 } -yystate17: +yystate18: c = l.next() switch { default: goto yyabort case c == 'E': - goto yystate18 + goto yystate19 } -yystate18: +yystate19: c = l.next() switch { default: goto yyabort case c == ' ': - goto yystate19 + goto yystate20 } -yystate19: +yystate20: c = l.next() goto yyrule3 -yystate20: +yystate21: c = l.next() switch { default: goto yyabort case c == 'N': - goto yystate21 + goto yystate22 } -yystate21: +yystate22: c = l.next() switch { default: goto yyabort case c == 'I': - goto yystate22 + goto yystate23 } -yystate22: +yystate23: c = l.next() switch { default: goto yyabort case c == 'T': - goto yystate23 + goto yystate24 } -yystate23: +yystate24: c = l.next() switch { default: goto yyabort case c == ' ': - goto yystate24 + goto yystate25 } -yystate24: +yystate25: c = l.next() goto yyrule4 -yystate25: +yystate26: c = l.next() -yystart25: +yystart26: switch { default: goto yyabort + case c == '"': + goto yystate27 case c == ':' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z': - goto yystate26 + goto yystate30 } -yystate26: +yystate27: c = l.next() switch { default: - goto yyrule6 + goto yyabort + case c == '"': + goto yystate28 + case c == '\\': + goto yystate29 + case c >= '\x01' && c <= '!' || c >= '#' && c <= '[' || c >= ']' && c <= 'ÿ': + goto yystate27 + } + +yystate28: + c = l.next() + goto yyrule6 + +yystate29: + c = l.next() + switch { + default: + goto yyabort + case c >= '\x01' && c <= '\t' || c >= '\v' && c <= 'ÿ': + goto yystate27 + } + +yystate30: + c = l.next() + switch { + default: + goto yyrule7 case c >= '0' && c <= ':' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z': - goto yystate26 + goto yystate30 } -yystate27: +yystate31: c = l.next() -yystart27: +yystart31: switch { default: goto yyabort case c == ' ': - goto yystate28 + goto yystate32 } -yystate28: +yystate32: c = l.next() switch { default: goto yyabort case c == '\n': - goto yystate29 + goto yystate33 case c >= '\x01' && c <= '\t' || c >= '\v' && c <= 'ÿ': - goto yystate28 + goto yystate32 } -yystate29: +yystate33: c = l.next() - goto yyrule7 + goto yyrule8 -yystate30: +yystate34: c = l.next() -yystart30: +yystart34: switch { default: goto yyabort + case c == '"': + goto yystate35 case c == ',': - goto yystate31 + goto yystate38 case c == '=': - goto yystate32 + goto yystate39 case c == '}': - goto yystate34 + goto yystate41 case c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z': - goto yystate33 + goto yystate40 } -yystate31: +yystate35: + c = l.next() + switch { + default: + goto yyabort + case c == '"': + goto yystate36 + case c == '\\': + goto yystate37 + case c >= '\x01' && c <= '!' || c >= '#' && c <= '[' || c >= ']' && c <= 'ÿ': + goto yystate35 + } + +yystate36: c = l.next() goto yyrule13 -yystate32: +yystate37: + c = l.next() + switch { + default: + goto yyabort + case c >= '\x01' && c <= '\t' || c >= '\v' && c <= 'ÿ': + goto yystate35 + } + +yystate38: c = l.next() - goto yyrule12 + goto yyrule16 -yystate33: +yystate39: + c = l.next() + goto yyrule15 + +yystate40: c = l.next() switch { default: - goto yyrule10 + goto yyrule12 case c >= '0' && c <= '9' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z': - goto yystate33 + goto yystate40 } -yystate34: +yystate41: c = l.next() - goto yyrule11 + goto yyrule14 -yystate35: +yystate42: c = l.next() -yystart35: +yystart42: switch { default: goto yyabort case c == '"': - goto yystate36 + goto yystate43 } -yystate36: +yystate43: c = l.next() switch { default: goto yyabort case c == '"': - goto yystate37 + goto yystate44 case c == '\\': - goto yystate38 + goto yystate45 case c >= '\x01' && c <= '\t' || c >= '\v' && c <= '!' || c >= '#' && c <= '[' || c >= ']' && c <= 'ÿ': - goto yystate36 + goto yystate43 } -yystate37: +yystate44: c = l.next() - goto yyrule14 + goto yyrule17 -yystate38: +yystate45: c = l.next() switch { default: goto yyabort case c >= '\x01' && c <= '\t' || c >= '\v' && c <= 'ÿ': - goto yystate36 + goto yystate43 } -yystate39: +yystate46: c = l.next() -yystart39: +yystart46: switch { default: goto yyabort case c == ' ': - goto yystate40 + goto yystate47 case c == '{': - goto yystate42 + goto yystate49 } -yystate40: +yystate47: c = l.next() switch { default: goto yyabort case c >= '\x01' && c <= '\t' || c >= '\v' && c <= '\x1f' || c >= '!' && c <= 'ÿ': - goto yystate41 + goto yystate48 } -yystate41: +yystate48: c = l.next() switch { default: - goto yyrule15 + goto yyrule18 case c >= '\x01' && c <= '\t' || c >= '\v' && c <= '\x1f' || c >= '!' && c <= 'ÿ': - goto yystate41 + goto yystate48 } -yystate42: +yystate49: c = l.next() - goto yyrule9 + goto yyrule10 -yystate43: +yystate50: c = l.next() -yystart43: +yystart50: switch { default: goto yyabort case c == ' ': - goto yystate45 + goto yystate52 case c == '\n': - goto yystate44 + goto yystate51 } -yystate44: +yystate51: c = l.next() - goto yyrule17 + goto yyrule20 -yystate45: +yystate52: c = l.next() switch { default: goto yyabort case c == '#': - goto yystate47 + goto yystate54 case c >= '\x01' && c <= '\t' || c >= '\v' && c <= '\x1f' || c == '!' || c == '"' || c >= '$' && c <= 'ÿ': - goto yystate46 + goto yystate53 } -yystate46: +yystate53: c = l.next() switch { default: - goto yyrule16 + goto yyrule19 case c >= '\x01' && c <= '\t' || c >= '\v' && c <= '\x1f' || c >= '!' && c <= 'ÿ': - goto yystate46 + goto yystate53 } -yystate47: +yystate54: c = l.next() switch { default: - goto yyrule16 + goto yyrule19 case c == ' ': - goto yystate48 + goto yystate55 case c >= '\x01' && c <= '\t' || c >= '\v' && c <= '\x1f' || c >= '!' && c <= 'ÿ': - goto yystate46 + goto yystate53 } -yystate48: +yystate55: c = l.next() switch { default: goto yyabort case c == '{': - goto yystate49 + goto yystate56 } -yystate49: +yystate56: c = l.next() - goto yyrule18 + goto yyrule21 -yystate50: +yystate57: c = l.next() -yystart50: +yystart57: switch { default: goto yyabort case c == ',': - goto yystate51 + goto yystate58 case c == '=': - goto yystate52 + goto yystate59 case c == '}': - goto yystate54 + goto yystate61 case c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z': - goto yystate53 + goto yystate60 } -yystate51: +yystate58: c = l.next() - goto yyrule23 + goto yyrule26 -yystate52: +yystate59: c = l.next() - goto yyrule21 + goto yyrule24 -yystate53: +yystate60: c = l.next() switch { default: - goto yyrule19 + goto yyrule22 case c >= '0' && c <= '9' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z': - goto yystate53 + goto yystate60 } -yystate54: +yystate61: c = l.next() - goto yyrule20 + goto yyrule23 -yystate55: +yystate62: c = l.next() -yystart55: +yystart62: switch { default: goto yyabort case c == ' ': - goto yystate56 + goto yystate63 case c == '"': - goto yystate58 + goto yystate65 } -yystate56: +yystate63: c = l.next() switch { default: goto yyabort case c >= '\x01' && c <= '\t' || c >= '\v' && c <= '\x1f' || c >= '!' && c <= 'ÿ': - goto yystate57 + goto yystate64 } -yystate57: +yystate64: c = l.next() switch { default: - goto yyrule24 + goto yyrule27 case c >= '\x01' && c <= '\t' || c >= '\v' && c <= '\x1f' || c >= '!' && c <= 'ÿ': - goto yystate57 + goto yystate64 } -yystate58: +yystate65: c = l.next() switch { default: goto yyabort case c == '"': - goto yystate59 + goto yystate66 case c == '\\': - goto yystate60 + goto yystate67 case c >= '\x01' && c <= '\t' || c >= '\v' && c <= '!' || c >= '#' && c <= '[' || c >= ']' && c <= 'ÿ': - goto yystate58 + goto yystate65 } -yystate59: +yystate66: c = l.next() - goto yyrule22 + goto yyrule25 -yystate60: +yystate67: c = l.next() switch { default: goto yyabort case c >= '\x01' && c <= '\t' || c >= '\v' && c <= 'ÿ': - goto yystate58 + goto yystate65 } -yystate61: +yystate68: c = l.next() -yystart61: +yystart68: switch { default: goto yyabort case c == ' ': - goto yystate63 + goto yystate70 case c == '\n': - goto yystate62 + goto yystate69 } -yystate62: +yystate69: c = l.next() - goto yyrule26 + goto yyrule29 -yystate63: +yystate70: c = l.next() switch { default: goto yyabort case c >= '\x01' && c <= '\t' || c >= '\v' && c <= '\x1f' || c >= '!' && c <= 'ÿ': - goto yystate64 + goto yystate71 } -yystate64: +yystate71: c = l.next() switch { default: - goto yyrule25 + goto yyrule28 case c >= '\x01' && c <= '\t' || c >= '\v' && c <= '\x1f' || c >= '!' && c <= 'ÿ': - goto yystate64 + goto yystate71 } yyrule1: // #{S} @@ -626,115 +688,133 @@ yyrule5: // "EOF"\n? return tEOFWord goto yystate0 } -yyrule6: // {M}({M}|{D})* +yyrule6: // \"(\\.|[^\\"])*\" { l.state = sMeta2 return tMName goto yystate0 } -yyrule7: // {S}{C}*\n +yyrule7: // {M}({M}|{D})* + { + l.state = sMeta2 + return tMName + goto yystate0 + } +yyrule8: // {S}{C}*\n { l.state = sInit return tText goto yystate0 } -yyrule8: // {M}({M}|{D})* +yyrule9: // {M}({M}|{D})* { l.state = sValue return tMName goto yystate0 } -yyrule9: // \{ +yyrule10: // \{ + { + l.state = sLabels + return tBraceOpen + goto yystate0 + } +yyrule11: // \{ { l.state = sLabels return tBraceOpen goto yystate0 } -yyrule10: // {L}({L}|{D})* +yyrule12: // {L}({L}|{D})* { return tLName } -yyrule11: // \} +yyrule13: // \"(\\.|[^\\"])*\" + { + l.state = sLabels + return tQString + goto yystate0 + } +yyrule14: // \} { l.state = sValue return tBraceClose goto yystate0 } -yyrule12: // = +yyrule15: // = { l.state = sLValue return tEqual goto yystate0 } -yyrule13: // , +yyrule16: // , { return tComma } -yyrule14: // \"(\\.|[^\\"\n])*\" +yyrule17: // \"(\\.|[^\\"\n])*\" { l.state = sLabels return tLValue goto yystate0 } -yyrule15: // {S}[^ \n]+ +yyrule18: // {S}[^ \n]+ { l.state = sTimestamp return tValue goto yystate0 } -yyrule16: // {S}[^ \n]+ +yyrule19: // {S}[^ \n]+ { return tTimestamp } -yyrule17: // \n +yyrule20: // \n { l.state = sInit return tLinebreak goto yystate0 } -yyrule18: // {S}#{S}\{ +yyrule21: // {S}#{S}\{ { l.state = sExemplar return tComment goto yystate0 } -yyrule19: // {L}({L}|{D})* +yyrule22: // {L}({L}|{D})* { return tLName } -yyrule20: // \} +yyrule23: // \} { l.state = sEValue return tBraceClose goto yystate0 } -yyrule21: // = +yyrule24: // = { l.state = sEValue return tEqual goto yystate0 } -yyrule22: // \"(\\.|[^\\"\n])*\" +yyrule25: // \"(\\.|[^\\"\n])*\" { l.state = sExemplar return tLValue goto yystate0 } -yyrule23: // , +yyrule26: // , { return tComma } -yyrule24: // {S}[^ \n]+ +yyrule27: // {S}[^ \n]+ { l.state = sETimestamp return tValue goto yystate0 } -yyrule25: // {S}[^ \n]+ +yyrule28: // {S}[^ \n]+ { return tTimestamp } -yyrule26: // \n +yyrule29: // \n if true { // avoid go vet determining the below panic will not be reached l.state = sInit return tLinebreak @@ -743,9 +823,7 @@ yyrule26: // \n panic("unreachable") yyabort: // no lexem recognized - // // silence unused label errors for build and satisfy go vet reachability analysis - // { if false { goto yyabort @@ -757,34 +835,34 @@ yyabort: // no lexem recognized goto yystate1 } if false { - goto yystate5 + goto yystate6 } if false { - goto yystate25 + goto yystate26 } if false { - goto yystate27 + goto yystate31 } if false { - goto yystate30 + goto yystate34 } if false { - goto yystate35 + goto yystate42 } if false { - goto yystate39 + goto yystate46 } if false { - goto yystate43 + goto yystate50 } if false { - goto yystate50 + goto yystate57 } if false { - goto yystate55 + goto yystate62 } if false { - goto yystate61 + goto yystate68 } } diff --git a/model/textparse/openmetricsparse.go b/model/textparse/openmetricsparse.go index 4c15ff5fc0..2a7eae080f 100644 --- a/model/textparse/openmetricsparse.go +++ b/model/textparse/openmetricsparse.go @@ -81,6 +81,12 @@ type OpenMetricsParser struct { ts int64 hasTS bool start int + // offsets is a list of offsets into series that describe the positions + // of the metric name and label names and values for this series. + // p.offsets[0] is the start character of the metric name. + // p.offsets[1] is the end of the metric name. + // Subsequently, p.offsets is a pair of pair of offsets for the positions + // of the label name and value start and end characters. offsets []int eOffsets []int @@ -153,20 +159,18 @@ func (p *OpenMetricsParser) Metric(l *labels.Labels) string { s := string(p.series) p.builder.Reset() - p.builder.Add(labels.MetricName, s[:p.offsets[0]-p.start]) + metricName := unreplace(s[p.offsets[0]-p.start : p.offsets[1]-p.start]) + p.builder.Add(labels.MetricName, metricName) - for i := 1; i < len(p.offsets); i += 4 { + for i := 2; i < len(p.offsets); i += 4 { a := p.offsets[i] - p.start b := p.offsets[i+1] - p.start + label := unreplace(s[a:b]) c := p.offsets[i+2] - p.start d := p.offsets[i+3] - p.start + value := unreplace(s[c:d]) - value := s[c:d] - // Replacer causes allocations. Replace only when necessary. - if strings.IndexByte(s[c:d], byte('\\')) >= 0 { - value = lvalReplacer.Replace(value) - } - p.builder.Add(s[a:b], value) + p.builder.Add(label, value) } p.builder.Sort() @@ -255,7 +259,13 @@ func (p *OpenMetricsParser) Next() (Entry, error) { case tHelp, tType, tUnit: switch t2 := p.nextToken(); t2 { case tMName: - p.offsets = append(p.offsets, p.l.start, p.l.i) + mStart := p.l.start + mEnd := p.l.i + if p.l.b[mStart] == '"' && p.l.b[mEnd-1] == '"' { + mStart++ + mEnd-- + } + p.offsets = append(p.offsets, mStart, mEnd) default: return EntryInvalid, p.parseError("expected metric name after "+t.String(), t2) } @@ -312,58 +322,33 @@ func (p *OpenMetricsParser) Next() (Entry, error) { return EntryUnit, nil } + case tBraceOpen: + // We found a brace, so make room for the eventual metric name. If these + // values aren't updated, then the metric name was not set inside the + // braces and we can return an error. + if len(p.offsets) == 0 { + p.offsets = []int{-1, -1} + } + if p.offsets, err = p.parseLVals(p.offsets, false); err != nil { + return EntryInvalid, err + } + + p.series = p.l.b[p.start:p.l.i] + return p.parseMetricSuffix(p.nextToken()) case tMName: - p.offsets = append(p.offsets, p.l.i) + p.offsets = append(p.offsets, p.start, p.l.i) p.series = p.l.b[p.start:p.l.i] t2 := p.nextToken() if t2 == tBraceOpen { - p.offsets, err = p.parseLVals(p.offsets) + p.offsets, err = p.parseLVals(p.offsets, false) if err != nil { return EntryInvalid, err } p.series = p.l.b[p.start:p.l.i] t2 = p.nextToken() } - p.val, err = p.getFloatValue(t2, "metric") - if err != nil { - return EntryInvalid, err - } - - p.hasTS = false - switch t2 := p.nextToken(); t2 { - case tEOF: - return EntryInvalid, errors.New("data does not end with # EOF") - case tLinebreak: - break - case tComment: - if err := p.parseComment(); err != nil { - return EntryInvalid, err - } - case tTimestamp: - p.hasTS = true - var ts float64 - // A float is enough to hold what we need for millisecond resolution. - if ts, err = parseFloat(yoloString(p.l.buf()[1:])); err != nil { - return EntryInvalid, fmt.Errorf("%w while parsing: %q", err, p.l.b[p.start:p.l.i]) - } - if math.IsNaN(ts) || math.IsInf(ts, 0) { - return EntryInvalid, fmt.Errorf("invalid timestamp %f", ts) - } - p.ts = int64(ts * 1000) - switch t3 := p.nextToken(); t3 { - case tLinebreak: - case tComment: - if err := p.parseComment(); err != nil { - return EntryInvalid, err - } - default: - return EntryInvalid, p.parseError("expected next entry after timestamp", t3) - } - default: - return EntryInvalid, p.parseError("expected timestamp or # symbol", t2) - } - return EntrySeries, nil + return p.parseMetricSuffix(t2) default: err = p.parseError("expected a valid start token", t) @@ -374,7 +359,7 @@ func (p *OpenMetricsParser) Next() (Entry, error) { func (p *OpenMetricsParser) parseComment() error { var err error // Parse the labels. - p.eOffsets, err = p.parseLVals(p.eOffsets) + p.eOffsets, err = p.parseLVals(p.eOffsets, true) if err != nil { return err } @@ -415,38 +400,47 @@ func (p *OpenMetricsParser) parseComment() error { return nil } -func (p *OpenMetricsParser) parseLVals(offsets []int) ([]int, error) { - first := true +func (p *OpenMetricsParser) parseLVals(offsets []int, isExemplar bool) ([]int, error) { + t := p.nextToken() for { - t := p.nextToken() + curTStart := p.l.start + curTI := p.l.i switch t { case tBraceClose: return offsets, nil - case tComma: - if first { - return nil, p.parseError("expected label name or left brace", t) - } - t = p.nextToken() - if t != tLName { + case tLName: + case tQString: + default: + return nil, p.parseError("expected label name", t) + } + + t = p.nextToken() + // A quoted string followed by a comma or brace is a metric name. Set the + // offsets and continue processing. If this is an exemplar, this format + // is not allowed. + if t == tComma || t == tBraceClose { + if isExemplar { return nil, p.parseError("expected label name", t) } - case tLName: - if !first { - return nil, p.parseError("expected comma", t) + if offsets[0] != -1 || offsets[1] != -1 { + return nil, fmt.Errorf("metric name already set while parsing: %q", p.l.b[p.start:p.l.i]) } - default: - if first { - return nil, p.parseError("expected label name or left brace", t) + offsets[0] = curTStart + 1 + offsets[1] = curTI - 1 + if t == tBraceClose { + return offsets, nil } - return nil, p.parseError("expected comma or left brace", t) - + t = p.nextToken() + continue } - first = false - // t is now a label name. - - offsets = append(offsets, p.l.start, p.l.i) + // We have a label name, and it might be quoted. + if p.l.b[curTStart] == '"' { + curTStart++ + curTI-- + } + offsets = append(offsets, curTStart, curTI) - if t := p.nextToken(); t != tEqual { + if t != tEqual { return nil, p.parseError("expected equal", t) } if t := p.nextToken(); t != tLValue { @@ -459,7 +453,62 @@ func (p *OpenMetricsParser) parseLVals(offsets []int) ([]int, error) { // The openMetricsLexer ensures the value string is quoted. Strip first // and last character. offsets = append(offsets, p.l.start+1, p.l.i-1) + + // Free trailing commas are allowed. + t = p.nextToken() + if t == tComma { + t = p.nextToken() + } else if t != tBraceClose { + return nil, p.parseError("expected comma or brace close", t) + } + } +} + +// parseMetricSuffix parses the end of the line after the metric name and +// labels. It starts parsing with the provided token. +func (p *OpenMetricsParser) parseMetricSuffix(t token) (Entry, error) { + if p.offsets[0] == -1 { + return EntryInvalid, fmt.Errorf("metric name not set while parsing: %q", p.l.b[p.start:p.l.i]) + } + + var err error + p.val, err = p.getFloatValue(t, "metric") + if err != nil { + return EntryInvalid, err + } + + p.hasTS = false + switch t2 := p.nextToken(); t2 { + case tEOF: + return EntryInvalid, errors.New("data does not end with # EOF") + case tLinebreak: + break + case tComment: + if err := p.parseComment(); err != nil { + return EntryInvalid, err + } + case tTimestamp: + p.hasTS = true + var ts float64 + // A float is enough to hold what we need for millisecond resolution. + if ts, err = parseFloat(yoloString(p.l.buf()[1:])); err != nil { + return EntryInvalid, fmt.Errorf("%w while parsing: %q", err, p.l.b[p.start:p.l.i]) + } + if math.IsNaN(ts) || math.IsInf(ts, 0) { + return EntryInvalid, fmt.Errorf("invalid timestamp %f", ts) + } + p.ts = int64(ts * 1000) + switch t3 := p.nextToken(); t3 { + case tLinebreak: + case tComment: + if err := p.parseComment(); err != nil { + return EntryInvalid, err + } + default: + return EntryInvalid, p.parseError("expected next entry after timestamp", t3) + } } + return EntrySeries, nil } func (p *OpenMetricsParser) getFloatValue(t token, after string) (float64, error) { diff --git a/model/textparse/openmetricsparse_test.go b/model/textparse/openmetricsparse_test.go index 29f31664fe..d128761e39 100644 --- a/model/textparse/openmetricsparse_test.go +++ b/model/textparse/openmetricsparse_test.go @@ -301,6 +301,137 @@ foo_total 17.0 1520879607.789 # {id="counter-test"} 5` require.Len(t, exp, i) } +func TestUTF8OpenMetricsParse(t *testing.T) { + oldValidationScheme := model.NameValidationScheme + model.NameValidationScheme = model.UTF8Validation + defer func() { + model.NameValidationScheme = oldValidationScheme + }() + + input := `# HELP "go.gc_duration_seconds" A summary of the GC invocation durations. +# TYPE "go.gc_duration_seconds" summary +# UNIT "go.gc_duration_seconds" seconds +{"go.gc_duration_seconds",quantile="0"} 4.9351e-05 +{"go.gc_duration_seconds",quantile="0.25"} 7.424100000000001e-05 +{"go.gc_duration_seconds",quantile="0.5",a="b"} 8.3835e-05 +{"http.status",q="0.9",a="b"} 8.3835e-05 +{"http.status",q="0.9",a="b"} 8.3835e-05 +{q="0.9","http.status",a="b"} 8.3835e-05 +{"go.gc_duration_seconds_sum"} 0.004304266 +{"Heizölrückstoßabdämpfung 10€ metric with \"interesting\" {character\nchoices}","strange©™\n'quoted' \"name\""="6"} 10.0` + + input += "\n# EOF\n" + + exp := []struct { + lset labels.Labels + m string + t *int64 + v float64 + typ model.MetricType + help string + unit string + comment string + e *exemplar.Exemplar + }{ + { + m: "go.gc_duration_seconds", + help: "A summary of the GC invocation durations.", + }, { + m: "go.gc_duration_seconds", + typ: model.MetricTypeSummary, + }, { + m: "go.gc_duration_seconds", + unit: "seconds", + }, { + m: `{"go.gc_duration_seconds",quantile="0"}`, + v: 4.9351e-05, + lset: labels.FromStrings("__name__", "go.gc_duration_seconds", "quantile", "0"), + }, { + m: `{"go.gc_duration_seconds",quantile="0.25"}`, + v: 7.424100000000001e-05, + lset: labels.FromStrings("__name__", "go.gc_duration_seconds", "quantile", "0.25"), + }, { + m: `{"go.gc_duration_seconds",quantile="0.5",a="b"}`, + v: 8.3835e-05, + lset: labels.FromStrings("__name__", "go.gc_duration_seconds", "quantile", "0.5", "a", "b"), + }, { + m: `{"http.status",q="0.9",a="b"}`, + v: 8.3835e-05, + lset: labels.FromStrings("__name__", "http.status", "q", "0.9", "a", "b"), + }, { + m: `{"http.status",q="0.9",a="b"}`, + v: 8.3835e-05, + lset: labels.FromStrings("__name__", "http.status", "q", "0.9", "a", "b"), + }, { + m: `{q="0.9","http.status",a="b"}`, + v: 8.3835e-05, + lset: labels.FromStrings("__name__", "http.status", "q", "0.9", "a", "b"), + }, { + m: `{"go.gc_duration_seconds_sum"}`, + v: 0.004304266, + lset: labels.FromStrings("__name__", "go.gc_duration_seconds_sum"), + }, { + m: `{"Heizölrückstoßabdämpfung 10€ metric with \"interesting\" {character\nchoices}","strange©™\n'quoted' \"name\""="6"}`, + v: 10.0, + lset: labels.FromStrings("__name__", `Heizölrückstoßabdämpfung 10€ metric with "interesting" {character +choices}`, "strange©™\n'quoted' \"name\"", "6"), + }, + } + + p := NewOpenMetricsParser([]byte(input)) + i := 0 + + var res labels.Labels + + for { + et, err := p.Next() + if errors.Is(err, io.EOF) { + break + } + require.NoError(t, err) + + switch et { + case EntrySeries: + m, ts, v := p.Series() + + var e exemplar.Exemplar + p.Metric(&res) + found := p.Exemplar(&e) + require.Equal(t, exp[i].m, string(m)) + require.Equal(t, exp[i].t, ts) + require.Equal(t, exp[i].v, v) + require.Equal(t, exp[i].lset, res) + if exp[i].e == nil { + require.False(t, found) + } else { + require.True(t, found) + require.Equal(t, *exp[i].e, e) + } + + case EntryType: + m, typ := p.Type() + require.Equal(t, exp[i].m, string(m)) + require.Equal(t, exp[i].typ, typ) + + case EntryHelp: + m, h := p.Help() + require.Equal(t, exp[i].m, string(m)) + require.Equal(t, exp[i].help, string(h)) + + case EntryUnit: + m, u := p.Unit() + require.Equal(t, exp[i].m, string(m)) + require.Equal(t, exp[i].unit, string(u)) + + case EntryComment: + require.Equal(t, exp[i].comment, string(p.Comment())) + } + + i++ + } + require.Len(t, exp, i) +} + func TestOpenMetricsParseErrors(t *testing.T) { cases := []struct { input string @@ -457,17 +588,13 @@ func TestOpenMetricsParseErrors(t *testing.T) { input: "a{b='c'} 1\n# EOF\n", err: "expected label value, got \"'\" (\"INVALID\") while parsing: \"a{b='\"", }, - { - input: "a{b=\"c\",} 1\n# EOF\n", - err: "expected label name, got \"} \" (\"BCLOSE\") while parsing: \"a{b=\\\"c\\\",} \"", - }, { input: "a{,b=\"c\"} 1\n# EOF\n", - err: "expected label name or left brace, got \",b\" (\"COMMA\") while parsing: \"a{,b\"", + err: "expected label name, got \",b\" (\"COMMA\") while parsing: \"a{,b\"", }, { input: "a{b=\"c\"d=\"e\"} 1\n# EOF\n", - err: "expected comma, got \"d=\" (\"LNAME\") while parsing: \"a{b=\\\"c\\\"d=\"", + err: "expected comma or brace close, got \"d=\" (\"LNAME\") while parsing: \"a{b=\\\"c\\\"d=\"", }, { input: "a{b=\"c\",,d=\"e\"} 1\n# EOF\n", @@ -479,12 +606,24 @@ func TestOpenMetricsParseErrors(t *testing.T) { }, { input: "a{\xff=\"foo\"} 1\n# EOF\n", - err: "expected label name or left brace, got \"\\xff\" (\"INVALID\") while parsing: \"a{\\xff\"", + err: "expected label name, got \"\\xff\" (\"INVALID\") while parsing: \"a{\\xff\"", }, { input: "a{b=\"\xff\"} 1\n# EOF\n", err: "invalid UTF-8 label value: \"\\\"\\xff\\\"\"", }, + { + input: `{"a","b = "c"} +# EOF +`, + err: "expected equal, got \"c\\\"\" (\"LNAME\") while parsing: \"{\\\"a\\\",\\\"b = \\\"c\\\"\"", + }, + { + input: `{"a",b\nc="d"} 1 +# EOF +`, + err: "expected equal, got \"\\\\\" (\"INVALID\") while parsing: \"{\\\"a\\\",b\\\\\"", + }, { input: "a true\n", err: "strconv.ParseFloat: parsing \"true\": invalid syntax while parsing: \"a true\"", @@ -495,7 +634,7 @@ func TestOpenMetricsParseErrors(t *testing.T) { }, { input: "empty_label_name{=\"\"} 0\n# EOF\n", - err: "expected label name or left brace, got \"=\\\"\" (\"EQUAL\") while parsing: \"empty_label_name{=\\\"\"", + err: "expected label name, got \"=\\\"\" (\"EQUAL\") while parsing: \"empty_label_name{=\\\"\"", }, { input: "foo 1_2\n\n# EOF\n", @@ -525,6 +664,14 @@ func TestOpenMetricsParseErrors(t *testing.T) { input: `custom_metric_total 1 # {aa="bb"}`, err: "expected value after exemplar labels, got \"}\" (\"EOF\") while parsing: \"custom_metric_total 1 # {aa=\\\"bb\\\"}\"", }, + { + input: `custom_metric_total 1 # {bb}`, + err: "expected label name, got \"}\" (\"BCLOSE\") while parsing: \"custom_metric_total 1 # {bb}\"", + }, + { + input: `custom_metric_total 1 # {bb, a="dd"}`, + err: "expected label name, got \", \" (\"COMMA\") while parsing: \"custom_metric_total 1 # {bb, \"", + }, { input: `custom_metric_total 1 # {aa="bb",,cc="dd"} 1`, err: "expected label name, got \",c\" (\"COMMA\") while parsing: \"custom_metric_total 1 # {aa=\\\"bb\\\",,c\"", @@ -551,7 +698,7 @@ func TestOpenMetricsParseErrors(t *testing.T) { }, { input: `{b="c",} 1`, - err: "expected a valid start token, got \"{\" (\"INVALID\") while parsing: \"{\"", + err: "metric name not set while parsing: \"{b=\\\"c\\\",} 1\"", }, { input: `a 1 NaN`, diff --git a/model/textparse/promlex.l b/model/textparse/promlex.l index c1bc8e7766..e9fa1fb71c 100644 --- a/model/textparse/promlex.l +++ b/model/textparse/promlex.l @@ -66,12 +66,15 @@ C [^\n] # return l.consumeComment() HELP[\t ]+ l.state = sMeta1; return tHelp TYPE[\t ]+ l.state = sMeta1; return tType +\"(\\.|[^\\"])*\" l.state = sMeta2; return tMName {M}({M}|{D})* l.state = sMeta2; return tMName {C}* l.state = sInit; return tText {M}({M}|{D})* l.state = sValue; return tMName \{ l.state = sLabels; return tBraceOpen +\{ l.state = sLabels; return tBraceOpen {L}({L}|{D})* return tLName +\"(\\.|[^\\"])*\" l.state = sLabels; return tQString \} l.state = sValue; return tBraceClose = l.state = sLValue; return tEqual , return tComma diff --git a/model/textparse/promlex.l.go b/model/textparse/promlex.l.go index 4076aae610..a083e5549b 100644 --- a/model/textparse/promlex.l.go +++ b/model/textparse/promlex.l.go @@ -51,19 +51,19 @@ yystate0: case 0: // start condition: INITIAL goto yystart1 case 1: // start condition: sComment - goto yystart8 + goto yystart9 case 2: // start condition: sMeta1 - goto yystart19 + goto yystart20 case 3: // start condition: sMeta2 - goto yystart21 + goto yystart25 case 4: // start condition: sLabels - goto yystart24 + goto yystart28 case 5: // start condition: sLValue - goto yystart29 + goto yystart36 case 6: // start condition: sValue - goto yystart33 + goto yystart40 case 7: // start condition: sTimestamp - goto yystart36 + goto yystart43 } yystate1: @@ -82,6 +82,8 @@ yystart1: goto yystate3 case c == '\x00': goto yystate2 + case c == '{': + goto yystate8 } yystate2: @@ -123,297 +125,357 @@ yystate7: c = l.next() switch { default: - goto yyrule10 + goto yyrule11 case c >= '0' && c <= ':' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z': goto yystate7 } yystate8: c = l.next() -yystart8: + goto yyrule13 + +yystate9: + c = l.next() +yystart9: switch { default: goto yyabort case c == 'H': - goto yystate9 + goto yystate10 case c == 'T': - goto yystate14 + goto yystate15 case c == '\t' || c == ' ': goto yystate3 } -yystate9: +yystate10: c = l.next() switch { default: goto yyabort case c == 'E': - goto yystate10 + goto yystate11 } -yystate10: +yystate11: c = l.next() switch { default: goto yyabort case c == 'L': - goto yystate11 + goto yystate12 } -yystate11: +yystate12: c = l.next() switch { default: goto yyabort case c == 'P': - goto yystate12 + goto yystate13 } -yystate12: +yystate13: c = l.next() switch { default: goto yyabort case c == '\t' || c == ' ': - goto yystate13 + goto yystate14 } -yystate13: +yystate14: c = l.next() switch { default: goto yyrule6 case c == '\t' || c == ' ': - goto yystate13 + goto yystate14 } -yystate14: +yystate15: c = l.next() switch { default: goto yyabort case c == 'Y': - goto yystate15 + goto yystate16 } -yystate15: +yystate16: c = l.next() switch { default: goto yyabort case c == 'P': - goto yystate16 + goto yystate17 } -yystate16: +yystate17: c = l.next() switch { default: goto yyabort case c == 'E': - goto yystate17 + goto yystate18 } -yystate17: +yystate18: c = l.next() switch { default: goto yyabort case c == '\t' || c == ' ': - goto yystate18 + goto yystate19 } -yystate18: +yystate19: c = l.next() switch { default: goto yyrule7 case c == '\t' || c == ' ': - goto yystate18 + goto yystate19 } -yystate19: +yystate20: c = l.next() -yystart19: +yystart20: switch { default: goto yyabort + case c == '"': + goto yystate21 case c == ':' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z': - goto yystate20 + goto yystate24 case c == '\t' || c == ' ': goto yystate3 } -yystate20: +yystate21: c = l.next() switch { default: - goto yyrule8 - case c >= '0' && c <= ':' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z': - goto yystate20 + goto yyabort + case c == '"': + goto yystate22 + case c == '\\': + goto yystate23 + case c >= '\x01' && c <= '!' || c >= '#' && c <= '[' || c >= ']' && c <= 'ÿ': + goto yystate21 } -yystate21: +yystate22: + c = l.next() + goto yyrule8 + +yystate23: + c = l.next() + switch { + default: + goto yyabort + case c >= '\x01' && c <= '\t' || c >= '\v' && c <= 'ÿ': + goto yystate21 + } + +yystate24: c = l.next() -yystart21: switch { default: goto yyrule9 + case c >= '0' && c <= ':' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z': + goto yystate24 + } + +yystate25: + c = l.next() +yystart25: + switch { + default: + goto yyrule10 case c == '\t' || c == ' ': - goto yystate23 + goto yystate27 case c >= '\x01' && c <= '\b' || c >= '\v' && c <= '\x1f' || c >= '!' && c <= 'ÿ': - goto yystate22 + goto yystate26 } -yystate22: +yystate26: c = l.next() switch { default: - goto yyrule9 + goto yyrule10 case c >= '\x01' && c <= '\t' || c >= '\v' && c <= 'ÿ': - goto yystate22 + goto yystate26 } -yystate23: +yystate27: c = l.next() switch { default: goto yyrule3 case c == '\t' || c == ' ': - goto yystate23 + goto yystate27 case c >= '\x01' && c <= '\b' || c >= '\v' && c <= '\x1f' || c >= '!' && c <= 'ÿ': - goto yystate22 + goto yystate26 } -yystate24: +yystate28: c = l.next() -yystart24: +yystart28: switch { default: goto yyabort + case c == '"': + goto yystate29 case c == ',': - goto yystate25 + goto yystate32 case c == '=': - goto yystate26 + goto yystate33 case c == '\t' || c == ' ': goto yystate3 case c == '}': - goto yystate28 + goto yystate35 case c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z': - goto yystate27 + goto yystate34 } -yystate25: +yystate29: + c = l.next() + switch { + default: + goto yyabort + case c == '"': + goto yystate30 + case c == '\\': + goto yystate31 + case c >= '\x01' && c <= '!' || c >= '#' && c <= '[' || c >= ']' && c <= 'ÿ': + goto yystate29 + } + +yystate30: c = l.next() goto yyrule15 -yystate26: +yystate31: + c = l.next() + switch { + default: + goto yyabort + case c >= '\x01' && c <= '\t' || c >= '\v' && c <= 'ÿ': + goto yystate29 + } + +yystate32: c = l.next() - goto yyrule14 + goto yyrule18 -yystate27: +yystate33: + c = l.next() + goto yyrule17 + +yystate34: c = l.next() switch { default: - goto yyrule12 + goto yyrule14 case c >= '0' && c <= '9' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z': - goto yystate27 + goto yystate34 } -yystate28: +yystate35: c = l.next() - goto yyrule13 + goto yyrule16 -yystate29: +yystate36: c = l.next() -yystart29: +yystart36: switch { default: goto yyabort case c == '"': - goto yystate30 + goto yystate37 case c == '\t' || c == ' ': goto yystate3 } -yystate30: +yystate37: c = l.next() switch { default: goto yyabort case c == '"': - goto yystate31 + goto yystate38 case c == '\\': - goto yystate32 + goto yystate39 case c >= '\x01' && c <= '!' || c >= '#' && c <= '[' || c >= ']' && c <= 'ÿ': - goto yystate30 + goto yystate37 } -yystate31: +yystate38: c = l.next() - goto yyrule16 + goto yyrule19 -yystate32: +yystate39: c = l.next() switch { default: goto yyabort case c >= '\x01' && c <= '\t' || c >= '\v' && c <= 'ÿ': - goto yystate30 + goto yystate37 } -yystate33: +yystate40: c = l.next() -yystart33: +yystart40: switch { default: goto yyabort case c == '\t' || c == ' ': goto yystate3 case c == '{': - goto yystate35 + goto yystate42 case c >= '\x01' && c <= '\b' || c >= '\v' && c <= '\x1f' || c >= '!' && c <= 'z' || c >= '|' && c <= 'ÿ': - goto yystate34 + goto yystate41 } -yystate34: +yystate41: c = l.next() switch { default: - goto yyrule17 + goto yyrule20 case c >= '\x01' && c <= '\b' || c >= '\v' && c <= '\x1f' || c >= '!' && c <= 'z' || c >= '|' && c <= 'ÿ': - goto yystate34 + goto yystate41 } -yystate35: +yystate42: c = l.next() - goto yyrule11 + goto yyrule12 -yystate36: +yystate43: c = l.next() -yystart36: +yystart43: switch { default: goto yyabort case c == '\n': - goto yystate37 + goto yystate44 case c == '\t' || c == ' ': goto yystate3 case c >= '0' && c <= '9': - goto yystate38 + goto yystate45 } -yystate37: +yystate44: c = l.next() - goto yyrule19 + goto yyrule22 -yystate38: +yystate45: c = l.next() switch { default: - goto yyrule18 + goto yyrule21 case c >= '0' && c <= '9': - goto yystate38 + goto yystate45 } yyrule1: // \0 @@ -451,67 +513,85 @@ yyrule7: // TYPE[\t ]+ return tType goto yystate0 } -yyrule8: // {M}({M}|{D})* +yyrule8: // \"(\\.|[^\\"])*\" + { + l.state = sMeta2 + return tMName + goto yystate0 + } +yyrule9: // {M}({M}|{D})* { l.state = sMeta2 return tMName goto yystate0 } -yyrule9: // {C}* +yyrule10: // {C}* { l.state = sInit return tText goto yystate0 } -yyrule10: // {M}({M}|{D})* +yyrule11: // {M}({M}|{D})* { l.state = sValue return tMName goto yystate0 } -yyrule11: // \{ +yyrule12: // \{ { l.state = sLabels return tBraceOpen goto yystate0 } -yyrule12: // {L}({L}|{D})* +yyrule13: // \{ + { + l.state = sLabels + return tBraceOpen + goto yystate0 + } +yyrule14: // {L}({L}|{D})* { return tLName } -yyrule13: // \} +yyrule15: // \"(\\.|[^\\"])*\" + { + l.state = sLabels + return tQString + goto yystate0 + } +yyrule16: // \} { l.state = sValue return tBraceClose goto yystate0 } -yyrule14: // = +yyrule17: // = { l.state = sLValue return tEqual goto yystate0 } -yyrule15: // , +yyrule18: // , { return tComma } -yyrule16: // \"(\\.|[^\\"])*\" +yyrule19: // \"(\\.|[^\\"])*\" { l.state = sLabels return tLValue goto yystate0 } -yyrule17: // [^{ \t\n]+ +yyrule20: // [^{ \t\n]+ { l.state = sTimestamp return tValue goto yystate0 } -yyrule18: // {D}+ +yyrule21: // {D}+ { return tTimestamp } -yyrule19: // \n +yyrule22: // \n if true { // avoid go vet determining the below panic will not be reached l.state = sInit return tLinebreak @@ -520,9 +600,7 @@ yyrule19: // \n panic("unreachable") yyabort: // no lexem recognized - // // silence unused label errors for build and satisfy go vet reachability analysis - // { if false { goto yyabort @@ -534,25 +612,25 @@ yyabort: // no lexem recognized goto yystate1 } if false { - goto yystate8 + goto yystate9 } if false { - goto yystate19 + goto yystate20 } if false { - goto yystate21 + goto yystate25 } if false { - goto yystate24 + goto yystate28 } if false { - goto yystate29 + goto yystate36 } if false { - goto yystate33 + goto yystate40 } if false { - goto yystate36 + goto yystate43 } } diff --git a/model/textparse/promparse.go b/model/textparse/promparse.go index 7123e52c33..1de783b0d0 100644 --- a/model/textparse/promparse.go +++ b/model/textparse/promparse.go @@ -57,6 +57,7 @@ const ( tComment tBlank tMName + tQString tBraceOpen tBraceClose tLName @@ -93,6 +94,8 @@ func (t token) String() string { return "BLANK" case tMName: return "MNAME" + case tQString: + return "QSTRING" case tBraceOpen: return "BOPEN" case tBraceClose: @@ -153,6 +156,12 @@ type PromParser struct { ts int64 hasTS bool start int + // offsets is a list of offsets into series that describe the positions + // of the metric name and label names and values for this series. + // p.offsets[0] is the start character of the metric name. + // p.offsets[1] is the end of the metric name. + // Subsequently, p.offsets is a pair of pair of offsets for the positions + // of the label name and value start and end characters. offsets []int } @@ -218,20 +227,17 @@ func (p *PromParser) Metric(l *labels.Labels) string { s := string(p.series) p.builder.Reset() - p.builder.Add(labels.MetricName, s[:p.offsets[0]-p.start]) + metricName := unreplace(s[p.offsets[0]-p.start : p.offsets[1]-p.start]) + p.builder.Add(labels.MetricName, metricName) - for i := 1; i < len(p.offsets); i += 4 { + for i := 2; i < len(p.offsets); i += 4 { a := p.offsets[i] - p.start b := p.offsets[i+1] - p.start + label := unreplace(s[a:b]) c := p.offsets[i+2] - p.start d := p.offsets[i+3] - p.start - - value := s[c:d] - // Replacer causes allocations. Replace only when necessary. - if strings.IndexByte(s[c:d], byte('\\')) >= 0 { - value = lvalReplacer.Replace(value) - } - p.builder.Add(s[a:b], value) + value := unreplace(s[c:d]) + p.builder.Add(label, value) } p.builder.Sort() @@ -289,7 +295,13 @@ func (p *PromParser) Next() (Entry, error) { case tHelp, tType: switch t2 := p.nextToken(); t2 { case tMName: - p.offsets = append(p.offsets, p.l.start, p.l.i) + mStart := p.l.start + mEnd := p.l.i + if p.l.b[mStart] == '"' && p.l.b[mEnd-1] == '"' { + mStart++ + mEnd-- + } + p.offsets = append(p.offsets, mStart, mEnd) default: return EntryInvalid, p.parseError("expected metric name after "+t.String(), t2) } @@ -301,7 +313,7 @@ func (p *PromParser) Next() (Entry, error) { p.text = []byte{} } default: - return EntryInvalid, fmt.Errorf("expected text in %s", t.String()) + return EntryInvalid, fmt.Errorf("expected text in %s, got %v", t.String(), t2.String()) } switch t { case tType: @@ -339,12 +351,24 @@ func (p *PromParser) Next() (Entry, error) { return EntryInvalid, p.parseError("linebreak expected after comment", t) } return EntryComment, nil + case tBraceOpen: + // We found a brace, so make room for the eventual metric name. If these + // values aren't updated, then the metric name was not set inside the + // braces and we can return an error. + if len(p.offsets) == 0 { + p.offsets = []int{-1, -1} + } + if err := p.parseLVals(); err != nil { + return EntryInvalid, err + } + p.series = p.l.b[p.start:p.l.i] + return p.parseMetricSuffix(p.nextToken()) case tMName: - p.offsets = append(p.offsets, p.l.i) + p.offsets = append(p.offsets, p.start, p.l.i) p.series = p.l.b[p.start:p.l.i] - t2 := p.nextToken() + // If there's a brace, consume and parse the label values. if t2 == tBraceOpen { if err := p.parseLVals(); err != nil { return EntryInvalid, err @@ -352,32 +376,7 @@ func (p *PromParser) Next() (Entry, error) { p.series = p.l.b[p.start:p.l.i] t2 = p.nextToken() } - if t2 != tValue { - return EntryInvalid, p.parseError("expected value after metric", t2) - } - if p.val, err = parseFloat(yoloString(p.l.buf())); err != nil { - return EntryInvalid, fmt.Errorf("%w while parsing: %q", err, p.l.b[p.start:p.l.i]) - } - // Ensure canonical NaN value. - if math.IsNaN(p.val) { - p.val = math.Float64frombits(value.NormalNaN) - } - p.hasTS = false - switch t := p.nextToken(); t { - case tLinebreak: - break - case tTimestamp: - p.hasTS = true - if p.ts, err = strconv.ParseInt(yoloString(p.l.buf()), 10, 64); err != nil { - return EntryInvalid, fmt.Errorf("%w while parsing: %q", err, p.l.b[p.start:p.l.i]) - } - if t2 := p.nextToken(); t2 != tLinebreak { - return EntryInvalid, p.parseError("expected next entry after timestamp", t2) - } - default: - return EntryInvalid, p.parseError("expected timestamp or new record", t) - } - return EntrySeries, nil + return p.parseMetricSuffix(t2) default: err = p.parseError("expected a valid start token", t) @@ -385,19 +384,43 @@ func (p *PromParser) Next() (Entry, error) { return EntryInvalid, err } +// parseLVals parses the contents inside the braces. func (p *PromParser) parseLVals() error { t := p.nextToken() for { + curTStart := p.l.start + curTI := p.l.i switch t { case tBraceClose: return nil case tLName: + case tQString: default: return p.parseError("expected label name", t) } - p.offsets = append(p.offsets, p.l.start, p.l.i) - if t := p.nextToken(); t != tEqual { + t = p.nextToken() + // A quoted string followed by a comma or brace is a metric name. Set the + // offsets and continue processing. + if t == tComma || t == tBraceClose { + if p.offsets[0] != -1 || p.offsets[1] != -1 { + return fmt.Errorf("metric name already set while parsing: %q", p.l.b[p.start:p.l.i]) + } + p.offsets[0] = curTStart + 1 + p.offsets[1] = curTI - 1 + if t == tBraceClose { + return nil + } + t = p.nextToken() + continue + } + // We have a label name, and it might be quoted. + if p.l.b[curTStart] == '"' { + curTStart++ + curTI-- + } + p.offsets = append(p.offsets, curTStart, curTI) + if t != tEqual { return p.parseError("expected equal", t) } if t := p.nextToken(); t != tLValue { @@ -411,13 +434,51 @@ func (p *PromParser) parseLVals() error { // and last character. p.offsets = append(p.offsets, p.l.start+1, p.l.i-1) - // Free trailing commas are allowed. + // Free trailing commas are allowed. NOTE: this allows spaces between label + // names, unlike in OpenMetrics. It is not clear if this is intended or an + // accidental bug. if t = p.nextToken(); t == tComma { t = p.nextToken() } } } +// parseMetricSuffix parses the end of the line after the metric name and +// labels. It starts parsing with the provided token. +func (p *PromParser) parseMetricSuffix(t token) (Entry, error) { + if p.offsets[0] == -1 { + return EntryInvalid, fmt.Errorf("metric name not set while parsing: %q", p.l.b[p.start:p.l.i]) + } + if t != tValue { + return EntryInvalid, p.parseError("expected value after metric", t) + } + var err error + if p.val, err = parseFloat(yoloString(p.l.buf())); err != nil { + return EntryInvalid, fmt.Errorf("%w while parsing: %q", err, p.l.b[p.start:p.l.i]) + } + // Ensure canonical NaN value. + if math.IsNaN(p.val) { + p.val = math.Float64frombits(value.NormalNaN) + } + p.hasTS = false + switch t := p.nextToken(); t { + case tLinebreak: + break + case tTimestamp: + p.hasTS = true + if p.ts, err = strconv.ParseInt(yoloString(p.l.buf()), 10, 64); err != nil { + return EntryInvalid, fmt.Errorf("%w while parsing: %q", err, p.l.b[p.start:p.l.i]) + } + if t2 := p.nextToken(); t2 != tLinebreak { + return EntryInvalid, p.parseError("expected next entry after timestamp", t2) + } + default: + return EntryInvalid, p.parseError("expected timestamp or new record", t) + } + + return EntrySeries, nil +} + var lvalReplacer = strings.NewReplacer( `\"`, "\"", `\\`, "\\", @@ -429,6 +490,14 @@ var helpReplacer = strings.NewReplacer( `\n`, "\n", ) +func unreplace(s string) string { + // Replacer causes allocations. Replace only when necessary. + if strings.IndexByte(s, byte('\\')) >= 0 { + return lvalReplacer.Replace(s) + } + return s +} + func yoloString(b []byte) string { return *((*string)(unsafe.Pointer(&b))) } diff --git a/model/textparse/promparse_test.go b/model/textparse/promparse_test.go index ccd7ef9ccc..d82bfe598d 100644 --- a/model/textparse/promparse_test.go +++ b/model/textparse/promparse_test.go @@ -48,6 +48,7 @@ go_gc_duration_seconds{ quantile="1.0", a="b" } 8.3835e-05 go_gc_duration_seconds { quantile="1.0", a="b" } 8.3835e-05 go_gc_duration_seconds { quantile= "1.0", a= "b", } 8.3835e-05 go_gc_duration_seconds { quantile = "1.0", a = "b" } 8.3835e-05 +go_gc_duration_seconds { quantile = "2.0" a = "b" } 8.3835e-05 go_gc_duration_seconds_count 99 some:aggregate:rate5m{a_b="c"} 1 # HELP go_goroutines Number of goroutines that currently exist. @@ -130,6 +131,11 @@ testmetric{label="\"bar\""} 1` m: `go_gc_duration_seconds { quantile = "1.0", a = "b" }`, v: 8.3835e-05, lset: labels.FromStrings("__name__", "go_gc_duration_seconds", "quantile", "1.0", "a", "b"), + }, { + // NOTE: Unlike OpenMetrics, Promparse allows spaces between label terms. This appears to be unintended and should probably be fixed. + m: `go_gc_duration_seconds { quantile = "2.0" a = "b" }`, + v: 8.3835e-05, + lset: labels.FromStrings("__name__", "go_gc_duration_seconds", "quantile", "2.0", "a", "b"), }, { m: `go_gc_duration_seconds_count`, v: 99, @@ -213,6 +219,132 @@ testmetric{label="\"bar\""} 1` require.Len(t, exp, i) } +func TestUTF8PromParse(t *testing.T) { + oldValidationScheme := model.NameValidationScheme + model.NameValidationScheme = model.UTF8Validation + defer func() { + model.NameValidationScheme = oldValidationScheme + }() + + input := `# HELP "go.gc_duration_seconds" A summary of the GC invocation durations. +# TYPE "go.gc_duration_seconds" summary +{"go.gc_duration_seconds",quantile="0"} 4.9351e-05 +{"go.gc_duration_seconds",quantile="0.25",} 7.424100000000001e-05 +{"go.gc_duration_seconds",quantile="0.5",a="b"} 8.3835e-05 +{"go.gc_duration_seconds",quantile="0.8", a="b"} 8.3835e-05 +{"go.gc_duration_seconds", quantile="0.9", a="b"} 8.3835e-05 +{"go.gc_duration_seconds", quantile="1.0", a="b" } 8.3835e-05 +{ "go.gc_duration_seconds", quantile="1.0", a="b" } 8.3835e-05 +{ "go.gc_duration_seconds", quantile= "1.0", a= "b", } 8.3835e-05 +{ "go.gc_duration_seconds", quantile = "1.0", a = "b" } 8.3835e-05 +{"go.gc_duration_seconds_count"} 99 +{"Heizölrückstoßabdämpfung 10€ metric with \"interesting\" {character\nchoices}","strange©™\n'quoted' \"name\""="6"} 10.0` + + exp := []struct { + lset labels.Labels + m string + t *int64 + v float64 + typ model.MetricType + help string + comment string + }{ + { + m: "go.gc_duration_seconds", + help: "A summary of the GC invocation durations.", + }, { + m: "go.gc_duration_seconds", + typ: model.MetricTypeSummary, + }, { + m: `{"go.gc_duration_seconds",quantile="0"}`, + v: 4.9351e-05, + lset: labels.FromStrings("__name__", "go.gc_duration_seconds", "quantile", "0"), + }, { + m: `{"go.gc_duration_seconds",quantile="0.25",}`, + v: 7.424100000000001e-05, + lset: labels.FromStrings("__name__", "go.gc_duration_seconds", "quantile", "0.25"), + }, { + m: `{"go.gc_duration_seconds",quantile="0.5",a="b"}`, + v: 8.3835e-05, + lset: labels.FromStrings("__name__", "go.gc_duration_seconds", "quantile", "0.5", "a", "b"), + }, { + m: `{"go.gc_duration_seconds",quantile="0.8", a="b"}`, + v: 8.3835e-05, + lset: labels.FromStrings("__name__", "go.gc_duration_seconds", "quantile", "0.8", "a", "b"), + }, { + m: `{"go.gc_duration_seconds", quantile="0.9", a="b"}`, + v: 8.3835e-05, + lset: labels.FromStrings("__name__", "go.gc_duration_seconds", "quantile", "0.9", "a", "b"), + }, { + m: `{"go.gc_duration_seconds", quantile="1.0", a="b" }`, + v: 8.3835e-05, + lset: labels.FromStrings("__name__", "go.gc_duration_seconds", "quantile", "1.0", "a", "b"), + }, { + m: `{ "go.gc_duration_seconds", quantile="1.0", a="b" }`, + v: 8.3835e-05, + lset: labels.FromStrings("__name__", "go.gc_duration_seconds", "quantile", "1.0", "a", "b"), + }, { + m: `{ "go.gc_duration_seconds", quantile= "1.0", a= "b", }`, + v: 8.3835e-05, + lset: labels.FromStrings("__name__", "go.gc_duration_seconds", "quantile", "1.0", "a", "b"), + }, { + m: `{ "go.gc_duration_seconds", quantile = "1.0", a = "b" }`, + v: 8.3835e-05, + lset: labels.FromStrings("__name__", "go.gc_duration_seconds", "quantile", "1.0", "a", "b"), + }, { + m: `{"go.gc_duration_seconds_count"}`, + v: 99, + lset: labels.FromStrings("__name__", "go.gc_duration_seconds_count"), + }, { + m: `{"Heizölrückstoßabdämpfung 10€ metric with \"interesting\" {character\nchoices}","strange©™\n'quoted' \"name\""="6"}`, + v: 10.0, + lset: labels.FromStrings("__name__", `Heizölrückstoßabdämpfung 10€ metric with "interesting" {character +choices}`, "strange©™\n'quoted' \"name\"", "6"), + }, + } + + p := NewPromParser([]byte(input)) + i := 0 + + var res labels.Labels + + for { + et, err := p.Next() + if errors.Is(err, io.EOF) { + break + } + require.NoError(t, err) + + switch et { + case EntrySeries: + m, ts, v := p.Series() + + p.Metric(&res) + + require.Equal(t, exp[i].m, string(m)) + require.Equal(t, exp[i].t, ts) + require.Equal(t, exp[i].v, v) + require.Equal(t, exp[i].lset, res) + + case EntryType: + m, typ := p.Type() + require.Equal(t, exp[i].m, string(m)) + require.Equal(t, exp[i].typ, typ) + + case EntryHelp: + m, h := p.Help() + require.Equal(t, exp[i].m, string(m)) + require.Equal(t, exp[i].help, string(h)) + + case EntryComment: + require.Equal(t, exp[i].comment, string(p.Comment())) + } + + i++ + } + require.Len(t, exp, i) +} + func TestPromParseErrors(t *testing.T) { cases := []struct { input string @@ -238,6 +370,14 @@ func TestPromParseErrors(t *testing.T) { input: "a{b=\"\xff\"} 1\n", err: "invalid UTF-8 label value: \"\\\"\\xff\\\"\"", }, + { + input: `{"a", "b = "c"}`, + err: "expected equal, got \"c\\\"\" (\"LNAME\") while parsing: \"{\\\"a\\\", \\\"b = \\\"c\\\"\"", + }, + { + input: `{"a",b\nc="d"} 1`, + err: "expected equal, got \"\\\\\" (\"INVALID\") while parsing: \"{\\\"a\\\",b\\\\\"", + }, { input: "a true\n", err: "strconv.ParseFloat: parsing \"true\": invalid syntax while parsing: \"a true\"", @@ -268,7 +408,7 @@ func TestPromParseErrors(t *testing.T) { }, { input: `{a="ok"} 1`, - err: "expected a valid start token, got \"{\" (\"INVALID\") while parsing: \"{\"", + err: "metric name not set while parsing: \"{a=\\\"ok\\\"} 1\"", }, { input: "# TYPE #\n#EOF\n", diff --git a/promql/parser/generated_parser.y b/promql/parser/generated_parser.y index dce79f7693..841bd31c19 100644 --- a/promql/parser/generated_parser.y +++ b/promql/parser/generated_parser.y @@ -161,7 +161,7 @@ START_METRIC_SELECTOR // Type definitions for grammar rules. %type label_match_list %type label_matcher -%type aggregate_op grouping_label match_op maybe_label metric_identifier unary_op at_modifier_preprocessors +%type aggregate_op grouping_label match_op maybe_label metric_identifier unary_op at_modifier_preprocessors string_identifier %type label_set metric %type label_set_list %type