Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[FR] Add ES|QL Custom Library for Rule Support #3134

Closed
wants to merge 43 commits into from
Closed
Show file tree
Hide file tree
Changes from 6 commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
c90ab9d
load unsupported rule type from schema
Mikaayenson Jun 29, 2023
f92b34f
Merge branch 'main' of github.com:elastic/detection-rules
Mikaayenson Jul 20, 2023
b245d5b
Merge branch 'main' of github.com:elastic/detection-rules
Mikaayenson Aug 1, 2023
f589ad4
Merge branch 'main' of github.com:elastic/detection-rules
Mikaayenson Aug 11, 2023
7ad3012
Merge branch 'main' of github.com:elastic/detection-rules
Mikaayenson Aug 24, 2023
7887392
Merge branch 'main' of github.com:elastic/detection-rules
Mikaayenson Aug 24, 2023
40a8e64
Merge branch 'main' of github.com:elastic/detection-rules
Mikaayenson Sep 11, 2023
172aa04
Merge branch 'main' of github.com:elastic/detection-rules
Mikaayenson Sep 22, 2023
c4af6f8
initial development for ESQL
terrancedejesus Sep 26, 2023
0a9e0bb
updated workflow to view-rule correctly
terrancedejesus Sep 26, 2023
538fc8c
Merge branch 'main' of github.com:elastic/detection-rules
Mikaayenson Nov 6, 2023
db43a33
Merge branch 'main' into esql-dev
Mikaayenson Nov 11, 2023
9b1eda2
Merge branch 'main' into esql-dev
Mikaayenson Nov 27, 2023
8c4a6ca
Add initial semantic validation and core logic
Mikaayenson Nov 27, 2023
b0ce5e9
Merge branch 'main' into esql-dev
Mikaayenson Nov 27, 2023
2e3d0c0
update grammar and add license headers
Mikaayenson Nov 27, 2023
ed76f4f
light cleanup
Mikaayenson Nov 27, 2023
c930c1b
refactor base esql methods and remove extra antlr4 files
Mikaayenson Nov 28, 2023
1e08afd
use packaged integrations
Mikaayenson Nov 28, 2023
652167b
updating walker; bug in utils.get_node with recursion
terrancedejesus Nov 30, 2023
62ca601
Merge branch 'esql-dev' of github.com:elastic/detection-rules into es…
Mikaayenson Nov 30, 2023
c81455d
update recursion to get multiple nodes
Mikaayenson Nov 30, 2023
49251aa
added event.dataset schema validation
terrancedejesus Dec 1, 2023
bb54c20
capture syntax errors and lint
Mikaayenson Dec 1, 2023
3b9128c
addressed TODOs; added more TODOs
terrancedejesus Dec 1, 2023
1662378
adding changes from #3297
terrancedejesus Dec 1, 2023
ba20d47
Merge branch 'main' into esql-dev
terrancedejesus Dec 1, 2023
e30e8c9
remove duplicate class
Mikaayenson Dec 1, 2023
2f03d86
adding metadata and stats semantic validation
terrancedejesus Dec 1, 2023
f2e2616
Add more type check support and lint
Mikaayenson Dec 1, 2023
17c9dd3
Merge branch 'esql-dev' of github.com:elastic/detection-rules into es…
Mikaayenson Dec 1, 2023
ee40a53
update flake linting
Mikaayenson Dec 1, 2023
e6f8b19
small cleanup
Mikaayenson Dec 1, 2023
1c9b91c
add more support for type checking
Mikaayenson Dec 1, 2023
b5e1672
small cleanup
Mikaayenson Dec 1, 2023
776b1dc
add support for related integrations
Mikaayenson Dec 1, 2023
d1fdd67
add initial esql unit tests
Mikaayenson Dec 4, 2023
e91d294
remove java related logic
Mikaayenson Dec 4, 2023
df5d442
refactor esql logic to support versioned grammars, parsers, and liste…
Mikaayenson Dec 5, 2023
ecb83e6
update devtool commands to use new paths
Mikaayenson Dec 5, 2023
182af10
lint
Mikaayenson Dec 5, 2023
09cf26c
lint and docstrings
Mikaayenson Dec 5, 2023
7c5199d
small cleanup
Mikaayenson Dec 5, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 66 additions & 0 deletions detection_rules/devtools.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
from .version_lock import VersionLockFile, default_version_lock

RULES_DIR = get_path('rules')
ESQL_DIR = get_path('esql')
GH_CONFIG = Path.home() / ".config" / "gh" / "hosts.yml"
NAVIGATOR_GIST_ID = '1a3f65224822a30a8228a8ed20289a89'
NAVIGATOR_URL = 'https://ela.st/detection-rules-navigator'
Expand Down Expand Up @@ -1488,3 +1489,68 @@ def guide_plugin_to_rule(ctx: click.Context, rule_path: Path, save: bool = True)
updated_rule.save_toml()

return updated_rule


@dev_group.group('esql')
def esql_group():
"""Commands for managing ESQL library."""


@esql_group.command('pull-grammar')
@click.option('--token', required=True, prompt=get_github_token() is None,
default=get_github_token(), help='GitHub personal access token.')
@click.pass_context
def pull_grammar(ctx: click.Context, token: str, branch: str = 'esql/lang'):
"""Pull the ESQL grammar from the specified repository."""
github = GithubClient(token)
client = github.authenticated_client
repo_instance = client.get_repo('elastic/elasticsearch-internal')

for filename, path in definitions.ELASTICSEARCH_ESQL_GRAMMAR_PATHS.items():
try:
file_content = repo_instance.get_contents(path, ref=branch).decoded_content.decode("utf-8")

# Write content to file
with open(Path(ESQL_DIR) / "grammar" / filename, 'w') as file:
file.write(file_content)

click.echo(f"Successfully downloaded {filename}.")

except Exception as e:
click.echo(f"Failed to download {filename}. Error: {e}")


@esql_group.command('build-parser')
@click.pass_context
def build_parser(antlr_jar: str):
"""Build the ESQL parser using ANTLR."""
# Define paths
lexer_file = Path(ESQL_DIR) / 'grammar' / 'EsqlBaseLexer.g4'
parser_file = Path(ESQL_DIR) / 'grammar' / 'EsqlBaseParser.g4'
antlr_file = Path(get_path('detection_rules')) / 'etc' / 'antlr-4.13.1-complete.jar'
Copy link
Contributor

@Mikaayenson Mikaayenson Nov 27, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Reminder to myself to delete this and all the checks below.


# Ensure files exist
if not lexer_file.exists() or not parser_file.exists():
click.echo("Error: Required grammar files are missing.")
return

# ensure ANTLR JAR exists
if not antlr_file.exists():
click.echo("Error: ANTLR JAR file is missing.")
return

# Use the JAR to generate parser and lexer
cmd_common = [
"java", "-jar", str(antlr_file),
"-Dlanguage=Python3",
"-o", str(ESQL_DIR)
]
cmd_lexer = cmd_common + [str(lexer_file)]
cmd_parser = cmd_common + [str(parser_file)]

try:
subprocess.run(cmd_lexer, check=True, cwd=ESQL_DIR)
subprocess.run(cmd_parser, check=True, cwd=ESQL_DIR)
click.echo("ES|QL parser and lexer generated successfully.")
except subprocess.CalledProcessError:
click.echo("Failed to generate ES|QLparser and lexer.")
Binary file added detection_rules/etc/antlr-4.13.1-complete.jar
Binary file not shown.
5 changes: 3 additions & 2 deletions detection_rules/integrations.py
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,7 @@ def get_integration_schema_data(data, meta, package_integrations: dict) -> Gener

# lazy import to avoid circular import
from .rule import ( # pylint: disable=import-outside-toplevel
QueryRuleData, RuleMeta)
ESQLRuleData, QueryRuleData, RuleMeta)

data: QueryRuleData = data
meta: RuleMeta = meta
Expand All @@ -301,7 +301,8 @@ def get_integration_schema_data(data, meta, package_integrations: dict) -> Gener
integrations_schemas = load_integrations_schemas()

# validate the query against related integration fields
if isinstance(data, QueryRuleData) and data.language != 'lucene' and meta.maturity == "production":
if (isinstance(data, QueryRuleData) or isinstance(data, ESQLRuleData)) \
and data.language != 'lucene' and meta.maturity == "production":

# flag to only warn once per integration for available upgrades
notify_update_available = True
Expand Down
40 changes: 30 additions & 10 deletions detection_rules/rule.py
Original file line number Diff line number Diff line change
Expand Up @@ -569,6 +569,8 @@ def validator(self) -> Optional[QueryValidator]:
return KQLValidator(self.query)
elif self.language == "eql":
return EQLValidator(self.query)
elif self.language == "esql":
return ESQLValidator(self.query)

def validate_query(self, meta: RuleMeta) -> None:
validator = self.validator
Expand Down Expand Up @@ -714,6 +716,21 @@ def interval_ratio(self) -> Optional[float]:
return interval / self.max_span


@dataclass(frozen=True)
class ESQLRuleData(BaseRuleData):
"""ESQL rules are a special case of query rules."""
type: Literal["esql"]
language: Literal["esql"]
query: str

@cached_property
def validator(self) -> Optional[QueryValidator]:
return ESQLValidator(self.query)

def validate_query(self, meta: RuleMeta) -> None:
return self.validator.validate(self, meta)


@dataclass(frozen=True)
class ThreatMatchRuleData(QueryRuleData):
"""Specific fields for indicator (threat) match rule."""
Expand Down Expand Up @@ -761,7 +778,7 @@ def validate_query(self, meta: RuleMeta) -> None:
# All of the possible rule types
# Sort inverse of any inheritance - see comment in TOMLRuleContents.to_dict
AnyRuleData = Union[EQLRuleData, ThresholdQueryRuleData, ThreatMatchRuleData,
MachineLearningRuleData, QueryRuleData, NewTermsRuleData]
MachineLearningRuleData, QueryRuleData, NewTermsRuleData, ESQLRuleData]


class BaseRuleContents(ABC):
Expand Down Expand Up @@ -945,9 +962,10 @@ def _post_dict_conversion(self, obj: dict) -> dict:
super()._post_dict_conversion(obj)

# build time fields
self._convert_add_related_integrations(obj)
self._convert_add_required_fields(obj)
self._convert_add_setup(obj)
if not isinstance(self.data, ESQLRuleData):
self._convert_add_related_integrations(obj)
self._convert_add_required_fields(obj)
self._convert_add_setup(obj)

# validate new fields against the schema
rule_type = obj['type']
Expand Down Expand Up @@ -1084,11 +1102,13 @@ def get_packaged_integrations(cls, data: QueryRuleData, meta: RuleMeta,
packaged_integrations = []
datasets = set()

for node in data.get('ast', []):
if isinstance(node, eql.ast.Comparison) and str(node.left) == 'event.dataset':
datasets.update(set(n.value for n in node if isinstance(n, eql.ast.Literal)))
elif isinstance(node, FieldComparison) and str(node.field) == 'event.dataset':
datasets.update(set(str(n) for n in node if isinstance(n, kql.ast.Value)))
if data.type != "esql":
# skip ES|QL rules until ast is available
for node in data.get('ast', []):
if isinstance(node, eql.ast.Comparison) and str(node.left) == 'event.dataset':
datasets.update(set(n.value for n in node if isinstance(n, eql.ast.Literal)))
elif isinstance(node, FieldComparison) and str(node.field) == 'event.dataset':
datasets.update(set(str(n) for n in node if isinstance(n, kql.ast.Value)))

# integration is None to remove duplicate references upstream in Kibana
# chronologically, event.dataset is checked for package:integration, then rule tags
Expand Down Expand Up @@ -1333,4 +1353,4 @@ def get_unique_query_fields(rule: TOMLRule) -> List[str]:


# avoid a circular import
from .rule_validators import EQLValidator, KQLValidator # noqa: E402
from .rule_validators import EQLValidator, KQLValidator, ESQLValidator # noqa: E402
Loading
Loading