endgameinc · rw-access · Apr 23, 2020 · Apr 17, 2020 · Apr 17, 2020 · Apr 17, 2020
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,6 +4,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
 ## Version 0.8.7
 _Released 2020-##-##_
 
+### Added
+* Support for escaped identifiers in fields using \`backtick\` syntax
+
 ### Changed
 * (Internal) Changed the type system to use TypeHint/TypeFoldCheck/NodeInfo instead of tuples
 

diff --git a/eql/ast.py b/eql/ast.py
@@ -384,6 +384,8 @@ class Field(Expression):
     __slots__ = 'base', 'path',
     precedence = Expression.precedence + 1
 
+    field_re = re.compile("^[_A-Za-z][_A-Za-z0-9]+$")
+
     def __init__(self, base, path=None):
         """Query the event for the field expression.
 
@@ -405,14 +407,24 @@ def full_path(self):  # type: () -> list[str]
         """Get the full path for a field."""
         return [self.base] + self.path
 
+    @classmethod
+    def escape_ident(cls, key):
+        """Escape identifiers that are keywords."""
+        from .parser import keywords
+
+        if key in keywords or cls.field_re.match(key) is None:
+            return "`{key}`".format(key=key)
+        return key
+
     def _render(self):
-        text = self.base
+        text = [self.escape_ident(self.base)]
+
         for key in self.path:
             if is_number(key):
-                text += "[{}]".format(key)
+                text.append("[{}]".format(key))
             else:
-                text += ".{}".format(key)
-        return text
+                text.append(".{}".format(self.escape_ident(key)))
+        return "".join(text)
 
 
 class FunctionCall(Expression):

diff --git a/eql/etc/eql.g b/eql/etc/eql.g
@@ -62,7 +62,7 @@ function_call.2: name "(" [expressions] ")"
 ?single_atom: literal
             | field
             | base_field
-base_field: name
+base_field: name | escaped_name
 field: FIELD
 literal: number
        | string
@@ -75,19 +75,31 @@ string: DQ_STRING
 
 // Check against keyword usage
 name: NAME
+escaped_name: ESCAPED_NAME
 
 // Tokens
-// make this a token to avoid ambiguity, and make more rigid on whitespace
+// pin the first "." or "[" to resolve token ambiguities
 // sequence by pid [1] [true] looks identical to:
 // sequence by pid[1] [true]
-FIELD: NAME ("." WHITESPACE* NAME | "[" WHITESPACE* UNSIGNED_INTEGER WHITESPACE* "]")+
+FIELD: FIELD_IDENT (ATTR | INDEX)+
+ATTR: "." WHITESPACE? FIELD_IDENT
+INDEX: "[" WHITESPACE? UNSIGNED_INTEGER WHITESPACE? "]"
+FIELD_IDENT: NAME | ESCAPED_NAME
+
+// create a non-conflicting helper rule to deconstruct
+field_parts: field_ident ("." field_ident | "[" array_index "]")+
+!array_index: UNSIGNED_INTEGER
+!field_ident: NAME | ESCAPED_NAME
+
+
 LCASE_LETTER: "a".."z"
 UCASE_LETTER: "A".."Z"
 DIGIT: "0".."9"
 
 LETTER: UCASE_LETTER | LCASE_LETTER
 WORD: LETTER+
 
+ESCAPED_NAME: "`" /[^`\r\n]+/ "`"
 NAME: ("_"|LETTER) ("_"|LETTER|DIGIT)*
 UNSIGNED_INTEGER: /[0-9]+/
 EXPONENT: /[Ee][-+]?\d+/

diff --git a/eql/parser.py b/eql/parser.py
@@ -481,15 +481,17 @@ def string(self, node):
 
     def base_field(self, node):
         """Get a base field."""
-        name = node["name"]
-        text = name["NAME"]
+        child = node.children[0]
+        token = child["NAME"] or child["ESCAPED_NAME"]
+        name = token.value.strip("`")
 
-        if text in RESERVED:
-            value = RESERVED[text]
-            return NodeInfo(value, value.type_hint, source=node)
+        if token.type != "ESCAPED_NAME":
+            if name in RESERVED:
+                value = RESERVED[name]
+                return NodeInfo(value, value.type_hint, source=node)
 
-        # validate against the remaining keywords
-        name = self.visit(name)
+            # validate against the remaining keywords
+            self.visit(child)
 
         if name in self.preprocessor.constants:
             constant = self.preprocessor.constants[name]
@@ -505,11 +507,24 @@ def base_field(self, node):
 
     def field(self, node):
         """Callback function to walk the AST."""
-        full_path = [s.strip() for s in re.split(r"[.\[\]]+", node.children[0])]
-        full_path = [int(s) if s.isdigit() else s for s in full_path if s]
-
-        if any(p in keywords for p in full_path):
-            raise self._error(node, "Invalid use of keyword", cls=EqlSyntaxError)
+        full_path = []
+
+        # to get around parser ambiguities, we had to create a token to mash all of the parts together
+        # but we have a separate rule "field_parts" that can safely re-parse and separate out the tokens.
+        # we can walk through each token, and build the field path accordingly
+        for part in lark_parser.parse(node.children[0], "field_parts").children:
+            if part["NAME"]:
+                name = to_unicode(part["NAME"])
+                full_path.append(name)
+
+                if name in keywords:
+                    raise self._error(node, "Invalid use of keyword", cls=EqlSyntaxError)
+            elif part["ESCAPED_NAME"]:
+                full_path.append(to_unicode(part["ESCAPED_NAME"]).strip("`"))
+            elif part["UNSIGNED_INTEGER"]:
+                full_path.append(int(part["UNSIGNED_INTEGER"]))
+            else:
+                raise self._error(node, "Unable to parser field", cls=EqlSyntaxError)
 
         base, path = full_path[0], full_path[1:]
 
@@ -1037,7 +1052,7 @@ def constant(self, node):
 
 lark_parser = Lark(get_etc_file('eql.g'), debug=False,
                    propagate_positions=True, tree_class=KvTree, parser='lalr',
-                   start=['piped_query', 'definition', 'definitions',
+                   start=['piped_query', 'definition', 'definitions', 'field_parts',
                           'query_with_definitions', 'expr', 'signed_single_atom'])
 
 

diff --git a/tests/test_parser.py b/tests/test_parser.py
@@ -1,15 +1,16 @@
 """Test case."""
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
 import datetime
 import sys
 import traceback
 import unittest
-from collections import OrderedDict  # noqa: F403
 
 from eql.ast import *  # noqa: F403
 from eql.errors import EqlSyntaxError, EqlSemanticError, EqlParseError
 from eql.parser import (
     parse_query, parse_expression, parse_definitions, ignore_missing_functions, parse_field, parse_literal,
-    extract_query_terms
+    extract_query_terms, keywords
 )
 from eql.walkers import DepthFirstWalker
 from eql.pipes import *   # noqa: F403
@@ -301,6 +302,38 @@ def test_invalid_queries(self):
         for query in invalid:
             self.assertRaises(EqlParseError, parse_query, query)
 
+    def test_backtick_fields(self):
+        """Test that backticks are accepted with fields."""
+        def parse_to(text, path):
+            node = parse_expression(text)
+            self.assertIsInstance(node, Field)
+            self.assertEqual(node.full_path, path)
+
+            # now render back as text and parse again
+            node2 = parse_expression(node.render())
+            self.assertEqual(node2, node)
+
+        parse_to("`foo-bar-baz`", ["foo-bar-baz"])
+        parse_to("`foo bar baz`", ["foo bar baz"])
+        parse_to("`foo.bar.baz`", ["foo.bar.baz"])
+        parse_to("`foo`.`bar-baz`", ["foo", "bar-baz"])
+        parse_to("`foo.bar-baz`", ["foo.bar-baz"])
+        parse_to("`💩`", ["💩"])
+
+        parse_to("`foo`[0]", ["foo", 0])
+        parse_to("`foo`[0].`bar`", ["foo", 0, "bar"])
+
+        # keywords
+        for keyword in keywords:
+            parse_to("`{keyword}`".format(keyword=keyword), [keyword])
+            parse_to("prefix.`{keyword}`".format(keyword=keyword), ["prefix", keyword])
+            parse_to("`{keyword}`[0].suffix".format(keyword=keyword), [keyword, 0, "suffix"])
+
+    def test_backtick_split_lines(self):
+        """Confirm that backticks can't be split across lines."""
+        with self.assertRaises(EqlSyntaxError):
+            parse_expression("`abc \n def`")
+
     def test_query_events(self):
         """Test that event queries work with events[n].* syntax in pipes."""
         base_queries = ['abc', 'abc[123]', 'abc.def.ghi', 'abc.def[123].ghi[456]']

diff --git a/tests/test_python_engine.py b/tests/test_python_engine.py
@@ -5,6 +5,7 @@
 
 from eql import *  # noqa: F403
 from eql.ast import *  # noqa: F403
+from eql.engine import Scope
 from eql.parser import ignore_missing_functions
 from eql.schema import EVENT_TYPE_GENERIC
 from eql.tests.base import TestEngine
@@ -494,3 +495,19 @@ def test_relationship_pid_collision(self):
         output = self.get_output(queries=[parse_query(query)], config=config, events=events)
         event_ids = [event.data['unique_pid'] for event in output]
         self.validate_results(event_ids, ['host1-1003'], "Relationships failed due to pid collision")
+
+    def test_backticks(self):
+        """Check that backtick fields are indexing into events."""
+        def evaluate(expr, event):
+            engine = PythonEngine()
+            cb = engine.convert(parse_expression(expr))
+            scope = Scope([Event.from_data(event)], None)
+            return cb(scope)
+
+        self.assertIsNone(evaluate("a.b", {}))
+        self.assertEqual(evaluate("a.b", {"a": {"b": 1}}), 1)
+
+        self.assertIsNone(evaluate("`a.b`", {}))
+        self.assertEqual(evaluate("`a.b`", {"a.b": 1}), 1)
+        self.assertEqual(evaluate("a.`b.c`[0]", {"a": {"b.c": [1]}}), 1)
+        self.assertEqual(evaluate("`!@#$%^&*().`", {"!@#$%^&*().": 1}), 1)