Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add backticks to the gramar #20

Merged
merged 5 commits into from
Apr 23, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
## Version 0.8.7
_Released 2020-##-##_

### Added
* Support for escaped identifiers in fields using \`backtick\` syntax

### Changed
* (Internal) Changed the type system to use TypeHint/TypeFoldCheck/NodeInfo instead of tuples

Expand Down
20 changes: 16 additions & 4 deletions eql/ast.py
Original file line number Diff line number Diff line change
Expand Up @@ -384,6 +384,8 @@ class Field(Expression):
__slots__ = 'base', 'path',
precedence = Expression.precedence + 1

field_re = re.compile("^[_A-Za-z][_A-Za-z0-9]+$")

def __init__(self, base, path=None):
"""Query the event for the field expression.

Expand All @@ -405,14 +407,24 @@ def full_path(self): # type: () -> list[str]
"""Get the full path for a field."""
return [self.base] + self.path

@classmethod
def escape_ident(cls, key):
"""Escape identifiers that are keywords."""
from .parser import keywords

if key in keywords or cls.field_re.match(key) is None:
return "`{key}`".format(key=key)
return key

def _render(self):
text = self.base
text = [self.escape_ident(self.base)]

for key in self.path:
if is_number(key):
text += "[{}]".format(key)
text.append("[{}]".format(key))
else:
text += ".{}".format(key)
return text
text.append(".{}".format(self.escape_ident(key)))
return "".join(text)


class FunctionCall(Expression):
Expand Down
18 changes: 15 additions & 3 deletions eql/etc/eql.g
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ function_call.2: name "(" [expressions] ")"
?single_atom: literal
| field
| base_field
base_field: name
base_field: name | escaped_name
field: FIELD
literal: number
| string
Expand All @@ -75,19 +75,31 @@ string: DQ_STRING

// Check against keyword usage
name: NAME
escaped_name: ESCAPED_NAME

// Tokens
// make this a token to avoid ambiguity, and make more rigid on whitespace
// pin the first "." or "[" to resolve token ambiguities
// sequence by pid [1] [true] looks identical to:
// sequence by pid[1] [true]
FIELD: NAME ("." WHITESPACE* NAME | "[" WHITESPACE* UNSIGNED_INTEGER WHITESPACE* "]")+
FIELD: FIELD_IDENT (ATTR | INDEX)+
ATTR: "." WHITESPACE? FIELD_IDENT
INDEX: "[" WHITESPACE? UNSIGNED_INTEGER WHITESPACE? "]"
FIELD_IDENT: NAME | ESCAPED_NAME

// create a non-conflicting helper rule to deconstruct
field_parts: field_ident ("." field_ident | "[" array_index "]")+
!array_index: UNSIGNED_INTEGER
!field_ident: NAME | ESCAPED_NAME


LCASE_LETTER: "a".."z"
UCASE_LETTER: "A".."Z"
DIGIT: "0".."9"

LETTER: UCASE_LETTER | LCASE_LETTER
WORD: LETTER+

ESCAPED_NAME: "`" /[^`\r\n]+/ "`"
NAME: ("_"|LETTER) ("_"|LETTER|DIGIT)*
UNSIGNED_INTEGER: /[0-9]+/
EXPONENT: /[Ee][-+]?\d+/
Expand Down
41 changes: 28 additions & 13 deletions eql/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -481,15 +481,17 @@ def string(self, node):

def base_field(self, node):
"""Get a base field."""
name = node["name"]
text = name["NAME"]
child = node.children[0]
token = child["NAME"] or child["ESCAPED_NAME"]
name = token.value.strip("`")

if text in RESERVED:
value = RESERVED[text]
return NodeInfo(value, value.type_hint, source=node)
if token.type != "ESCAPED_NAME":
if name in RESERVED:
value = RESERVED[name]
return NodeInfo(value, value.type_hint, source=node)

# validate against the remaining keywords
name = self.visit(name)
# validate against the remaining keywords
self.visit(child)

if name in self.preprocessor.constants:
constant = self.preprocessor.constants[name]
Expand All @@ -505,11 +507,24 @@ def base_field(self, node):

def field(self, node):
"""Callback function to walk the AST."""
full_path = [s.strip() for s in re.split(r"[.\[\]]+", node.children[0])]
full_path = [int(s) if s.isdigit() else s for s in full_path if s]

if any(p in keywords for p in full_path):
raise self._error(node, "Invalid use of keyword", cls=EqlSyntaxError)
full_path = []

# to get around parser ambiguities, we had to create a token to mash all of the parts together
# but we have a separate rule "field_parts" that can safely re-parse and separate out the tokens.
# we can walk through each token, and build the field path accordingly
for part in lark_parser.parse(node.children[0], "field_parts").children:
if part["NAME"]:
name = to_unicode(part["NAME"])
full_path.append(name)

if name in keywords:
raise self._error(node, "Invalid use of keyword", cls=EqlSyntaxError)
elif part["ESCAPED_NAME"]:
full_path.append(to_unicode(part["ESCAPED_NAME"]).strip("`"))
elif part["UNSIGNED_INTEGER"]:
full_path.append(int(part["UNSIGNED_INTEGER"]))
else:
raise self._error(node, "Unable to parser field", cls=EqlSyntaxError)

base, path = full_path[0], full_path[1:]

Expand Down Expand Up @@ -1037,7 +1052,7 @@ def constant(self, node):

lark_parser = Lark(get_etc_file('eql.g'), debug=False,
propagate_positions=True, tree_class=KvTree, parser='lalr',
start=['piped_query', 'definition', 'definitions',
start=['piped_query', 'definition', 'definitions', 'field_parts',
'query_with_definitions', 'expr', 'signed_single_atom'])


Expand Down
37 changes: 35 additions & 2 deletions tests/test_parser.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
"""Test case."""
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import datetime
import sys
import traceback
import unittest
from collections import OrderedDict # noqa: F403

from eql.ast import * # noqa: F403
from eql.errors import EqlSyntaxError, EqlSemanticError, EqlParseError
from eql.parser import (
parse_query, parse_expression, parse_definitions, ignore_missing_functions, parse_field, parse_literal,
extract_query_terms
extract_query_terms, keywords
)
from eql.walkers import DepthFirstWalker
from eql.pipes import * # noqa: F403
Expand Down Expand Up @@ -301,6 +302,38 @@ def test_invalid_queries(self):
for query in invalid:
self.assertRaises(EqlParseError, parse_query, query)

def test_backtick_fields(self):
"""Test that backticks are accepted with fields."""
def parse_to(text, path):
node = parse_expression(text)
self.assertIsInstance(node, Field)
self.assertEqual(node.full_path, path)

# now render back as text and parse again
node2 = parse_expression(node.render())
self.assertEqual(node2, node)

parse_to("`foo-bar-baz`", ["foo-bar-baz"])
parse_to("`foo bar baz`", ["foo bar baz"])
parse_to("`foo.bar.baz`", ["foo.bar.baz"])
parse_to("`foo`.`bar-baz`", ["foo", "bar-baz"])
parse_to("`foo.bar-baz`", ["foo.bar-baz"])
parse_to("`💩`", ["💩"])

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

💩


parse_to("`foo`[0]", ["foo", 0])
parse_to("`foo`[0].`bar`", ["foo", 0, "bar"])

# keywords
for keyword in keywords:
parse_to("`{keyword}`".format(keyword=keyword), [keyword])
parse_to("prefix.`{keyword}`".format(keyword=keyword), ["prefix", keyword])
parse_to("`{keyword}`[0].suffix".format(keyword=keyword), [keyword, 0, "suffix"])

def test_backtick_split_lines(self):
"""Confirm that backticks can't be split across lines."""
with self.assertRaises(EqlSyntaxError):
parse_expression("`abc \n def`")

def test_query_events(self):
"""Test that event queries work with events[n].* syntax in pipes."""
base_queries = ['abc', 'abc[123]', 'abc.def.ghi', 'abc.def[123].ghi[456]']
Expand Down
17 changes: 17 additions & 0 deletions tests/test_python_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

from eql import * # noqa: F403
from eql.ast import * # noqa: F403
from eql.engine import Scope
from eql.parser import ignore_missing_functions
from eql.schema import EVENT_TYPE_GENERIC
from eql.tests.base import TestEngine
Expand Down Expand Up @@ -494,3 +495,19 @@ def test_relationship_pid_collision(self):
output = self.get_output(queries=[parse_query(query)], config=config, events=events)
event_ids = [event.data['unique_pid'] for event in output]
self.validate_results(event_ids, ['host1-1003'], "Relationships failed due to pid collision")

def test_backticks(self):
"""Check that backtick fields are indexing into events."""
def evaluate(expr, event):
engine = PythonEngine()
cb = engine.convert(parse_expression(expr))
scope = Scope([Event.from_data(event)], None)
return cb(scope)

self.assertIsNone(evaluate("a.b", {}))
self.assertEqual(evaluate("a.b", {"a": {"b": 1}}), 1)

self.assertIsNone(evaluate("`a.b`", {}))
self.assertEqual(evaluate("`a.b`", {"a.b": 1}), 1)
self.assertEqual(evaluate("a.`b.c`[0]", {"a": {"b.c": [1]}}), 1)
self.assertEqual(evaluate("`!@#$%^&*().`", {"!@#$%^&*().": 1}), 1)