-
-
Notifications
You must be signed in to change notification settings - Fork 219
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
V3/improves parser performance #412
Changes from 5 commits
f358e81
2156019
717e8c0
525e9b3
ba4e4e4
42243da
934e655
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10,7 +10,6 @@ import ( | |
"io/fs" | ||
"os" | ||
"path/filepath" | ||
"regexp" | ||
"strings" | ||
|
||
"github.com/corazawaf/coraza/v3" | ||
|
@@ -77,40 +76,50 @@ func (p *Parser) FromFile(profilePath string) error { | |
// or arguments are invalid | ||
func (p *Parser) FromString(data string) error { | ||
scanner := bufio.NewScanner(strings.NewReader(data)) | ||
var linebuffer = "" | ||
pattern := regexp.MustCompile(`\\(\s+)?$`) | ||
var linebuffer strings.Builder | ||
inQuotes := false | ||
for scanner.Scan() { | ||
p.currentLine++ | ||
line := strings.TrimSpace(scanner.Text()) | ||
if !inQuotes && len(line) > 0 && line[len(line)-1] == '`' { | ||
lineLen := len(line) | ||
if lineLen == 0 { | ||
continue | ||
} | ||
|
||
if !inQuotes && line[lineLen-1] == '`' { | ||
inQuotes = true | ||
} else if inQuotes && len(line) > 0 && line[0] == '`' { | ||
} else if inQuotes && line[0] == '`' { | ||
inQuotes = false | ||
} | ||
|
||
if inQuotes { | ||
linebuffer += line + "\n" | ||
} else { | ||
linebuffer += line | ||
linebuffer.WriteString(line) | ||
linebuffer.WriteString("\n") | ||
continue | ||
} | ||
|
||
if line[0] == '#' { | ||
continue | ||
} | ||
|
||
// Check if line ends with \ | ||
if !pattern.MatchString(line) && !inQuotes { | ||
err := p.evaluate(linebuffer) | ||
if line[lineLen-1] == '\\' { | ||
linebuffer.WriteString(strings.TrimSuffix(line, "\\")) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Use slice instead of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I was inclined to do that but I wondered if something like
is possible where you have more than one There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah good example. I think if it's supported, having spaces inside is also supposed to be supported, e.g. I don't think either the old or new code handle this. Is it possible to define a custom split function for the bufio.Scanner that treats newlines and There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry my idea about customizing the scanner doesn't work for |
||
} else { | ||
linebuffer.WriteString(line) | ||
err := p.evaluateLine(linebuffer.String()) | ||
if err != nil { | ||
return err | ||
} | ||
linebuffer = "" | ||
} else if !inQuotes { | ||
linebuffer = strings.TrimSuffix(linebuffer, "\\") | ||
linebuffer.Reset() | ||
} | ||
} | ||
return nil | ||
} | ||
|
||
func (p *Parser) evaluate(data string) error { | ||
func (p *Parser) evaluateLine(data string) error { | ||
if data == "" || data[0] == '#' { | ||
return nil | ||
return errors.New("invalid lines") | ||
} | ||
// first we get the directive | ||
spl := strings.SplitN(data, " ", 2) | ||
|
@@ -119,12 +128,11 @@ func (p *Parser) evaluate(data string) error { | |
opts = spl[1] | ||
} | ||
p.options.WAF.Logger.Debug("parsing directive %q", data) | ||
directive := spl[0] | ||
directive := strings.ToLower(spl[0]) | ||
|
||
if len(opts) >= 3 && opts[0] == '"' && opts[len(opts)-1] == '"' { | ||
opts = strings.Trim(opts, `"`) | ||
} | ||
directive = strings.ToLower(directive) | ||
if directive == "include" { | ||
// this is a special hardcoded case | ||
// we cannot add it as a directive type because there are recursion issues | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -317,7 +317,7 @@ var ruleTokenRegex = regexp.MustCompile(`"(?:[^"\\]|\\.)*"`) | |
// In case WithOperator is false, the rule will be parsed without operator | ||
// This function is created for external plugin directives | ||
func ParseRule(options RuleOptions) (*coraza.Rule, error) { | ||
if strings.Trim(options.Data, " ") == "" { | ||
if strings.TrimSpace(options.Data) == "" { | ||
return nil, errors.New("empty rule") | ||
} | ||
|
||
|
@@ -342,17 +342,15 @@ func ParseRule(options RuleOptions) (*coraza.Rule, error) { | |
actions := "" | ||
|
||
if options.WithOperator { | ||
matches := ruleTokenRegex.FindAllString(options.Data, -1) | ||
matches := ruleTokenRegex.FindAllString(options.Data, 3) // we use at most second match | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, 1 matches the whole thing, plus the first operator block, then actions block. |
||
if len(matches) == 0 { | ||
return nil, fmt.Errorf("invalid rule with no transformation matches: %q", options.Data) | ||
} | ||
operator := utils.RemoveQuotes(matches[0]) | ||
if utils.InSlice(operator, disabledRuleOperators) { | ||
return nil, fmt.Errorf("%s rule operator is disabled", operator) | ||
} | ||
|
||
rulePieces := strings.SplitN(options.Data, " ", 2) | ||
vars := rulePieces[0] | ||
vars, _, _ := strings.Cut(options.Data, " ") | ||
err = rp.ParseVariables(vars) | ||
if err != nil { | ||
return nil, err | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Still hoping for an example on this one :)
#398 (comment)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It was implemented for SecDataset:
It's an ugly code, please if you have a better idea go ahead
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah thanks for the info! IIUC, then we could replace to just be
line == "`"
- that would clear up the confusion I had nicelyThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ok so I think then we should call it
inBackticks
instead ofinQuotes
and then, what we are trying to support is something like:So the first condition matches the last backtick in
SecDataset test `
And as for the last we can simply match it with
line == "`"
as rag suggested.I wonder if there is a case (cc @fzipi @M4tteoP @piyushroshan) where a backtick is at the beginning or at the end and it is not inside a
SectDataset
or a similar construct.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I see where the len-1 comes from now. I think a
\
or a#
may be able to break that assumption though. Let's add test cases for these both on first quote and last.But ok with filing an issue and handling in a separate PR since it's not related to performance which this PR is handling.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think the backtick is a blackhole in that sense, whatever you add inside despite a keyword somewhere else (e.g.
#
for comments) lost its ability inside backticks.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Speaking about
SecDataset
, comments (#
) are evaluated and stripped later on, demanding it to directiveSecDataset and not to the initial parser.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah I was thinking mostly about trailing comments, especially of the ending quote. But actually I guess might not handle them at all right now. Basically a line should be
trim(line)[0:LastIndexByte('#')]
type of thing