corazawaf · jptosso · Sep 12, 2022 · Sep 8, 2022 · Sep 8, 2022 · Sep 8, 2022
diff --git a/seclang/parser.go b/seclang/parser.go
@@ -10,7 +10,6 @@ import (
 	"io/fs"
 	"os"
 	"path/filepath"
-	"regexp"
 	"strings"
 
 	"github.com/corazawaf/coraza/v3"
@@ -77,40 +76,50 @@ func (p *Parser) FromFile(profilePath string) error {
 // or arguments are invalid
 func (p *Parser) FromString(data string) error {
 	scanner := bufio.NewScanner(strings.NewReader(data))
-	var linebuffer = ""
-	pattern := regexp.MustCompile(`\\(\s+)?$`)
+	var linebuffer strings.Builder
 	inQuotes := false
 	for scanner.Scan() {
 		p.currentLine++
 		line := strings.TrimSpace(scanner.Text())
-		if !inQuotes && len(line) > 0 && line[len(line)-1] == '`' {
+		lineLen := len(line)
+		if lineLen == 0 {
+			continue
+		}
+
+		if !inQuotes && line[lineLen-1] == '`' {
 			inQuotes = true
-		} else if inQuotes && len(line) > 0 && line[0] == '`' {
+		} else if inQuotes && line[0] == '`' {
 			inQuotes = false
 		}
+
 		if inQuotes {
-			linebuffer += line + "\n"
-		} else {
-			linebuffer += line
+			linebuffer.WriteString(line)
+			linebuffer.WriteString("\n")
+			continue
+		}
+
+		if line[0] == '#' {
+			continue
 		}
 
 		// Check if line ends with \
-		if !pattern.MatchString(line) && !inQuotes {
-			err := p.evaluate(linebuffer)
+		if line[lineLen-1] == '\\' {
+			linebuffer.WriteString(strings.TrimSuffix(line, "\\"))
+		} else {
+			linebuffer.WriteString(line)
+			err := p.evaluateLine(linebuffer.String())
 			if err != nil {
 				return err
 			}
-			linebuffer = ""
-		} else if !inQuotes {
-			linebuffer = strings.TrimSuffix(linebuffer, "\\")
+			linebuffer.Reset()
 		}
 	}
 	return nil
 }
 
-func (p *Parser) evaluate(data string) error {
+func (p *Parser) evaluateLine(data string) error {
 	if data == "" || data[0] == '#' {
-		return nil
+		return errors.New("invalid lines")
 	}
 	// first we get the directive
 	spl := strings.SplitN(data, " ", 2)
@@ -119,12 +128,11 @@ func (p *Parser) evaluate(data string) error {
 		opts = spl[1]
 	}
 	p.options.WAF.Logger.Debug("parsing directive %q", data)
-	directive := spl[0]
+	directive := strings.ToLower(spl[0])
 
 	if len(opts) >= 3 && opts[0] == '"' && opts[len(opts)-1] == '"' {
 		opts = strings.Trim(opts, `"`)
 	}
-	directive = strings.ToLower(directive)
 	if directive == "include" {
 		// this is a special hardcoded case
 		// we cannot add it as a directive type because there are recursion issues

diff --git a/seclang/parser_test.go b/seclang/parser_test.go
@@ -14,14 +14,13 @@ import (
 	"testing"
 
 	"github.com/corazawaf/coraza/v3"
-	engine "github.com/corazawaf/coraza/v3"
 )
 
 //go:embed testdata
 var testdata embed.FS
 
 func TestInterruption(t *testing.T) {
-	waf := engine.NewWAF()
+	waf := coraza.NewWAF()
 	p := NewParser(waf)
 	if err := p.FromString(`SecAction "id:1,deny,log,phase:1"`); err != nil {
 		t.Error("Could not create from string")
@@ -33,7 +32,7 @@ func TestInterruption(t *testing.T) {
 }
 
 func TestDirectivesCaseInsensitive(t *testing.T) {
-	waf := engine.NewWAF()
+	waf := coraza.NewWAF()
 	p := NewParser(waf)
 	err := p.FromString("seCwEbAppid 15")
 	if err != nil {
@@ -42,7 +41,7 @@ func TestDirectivesCaseInsensitive(t *testing.T) {
 }
 
 func TestDefaultConfigurationFile(t *testing.T) {
-	waf := engine.NewWAF()
+	waf := coraza.NewWAF()
 	p := NewParser(waf)
 	err := p.FromFile("../coraza.conf-recommended")
 	if err != nil {
@@ -170,3 +169,15 @@ func TestEmbedFS(t *testing.T) {
 		t.Error("Expected 4 rules loaded using include directive. Found: ", waf.Rules.Count())
 	}
 }
+
+//go:embed testdata/parserbenchmark.conf
+var parsingRule string
+
+func BenchmarkParseFromString(b *testing.B) {
+	waf := coraza.NewWAF()
+	parser := NewParser(waf)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		_ = parser.FromString(parsingRule)
+	}
+}
diff --git a/seclang/rule_parser.go b/seclang/rule_parser.go
@@ -317,7 +317,7 @@ var ruleTokenRegex = regexp.MustCompile(`"(?:[^"\\]|\\.)*"`)
 // In case WithOperator is false, the rule will be parsed without operator
 // This function is created for external plugin directives
 func ParseRule(options RuleOptions) (*coraza.Rule, error) {
-	if strings.Trim(options.Data, " ") == "" {
+	if strings.TrimSpace(options.Data) == "" {
 		return nil, errors.New("empty rule")
 	}
 
@@ -342,17 +342,15 @@ func ParseRule(options RuleOptions) (*coraza.Rule, error) {
 	actions := ""
 
 	if options.WithOperator {
-		matches := ruleTokenRegex.FindAllString(options.Data, -1)
+		matches := ruleTokenRegex.FindAllString(options.Data, 3) // we use at most second match
 		if len(matches) == 0 {
 			return nil, fmt.Errorf("invalid rule with no transformation matches: %q", options.Data)
 		}
 		operator := utils.RemoveQuotes(matches[0])
 		if utils.InSlice(operator, disabledRuleOperators) {
 			return nil, fmt.Errorf("%s rule operator is disabled", operator)
 		}
-
-		rulePieces := strings.SplitN(options.Data, " ", 2)
-		vars := rulePieces[0]
+		vars, _, _ := strings.Cut(options.Data, " ")
 		err = rp.ParseVariables(vars)
 		if err != nil {
 			return nil, err