elastic · terrancedejesus · Jun 29, 2023 · Jul 20, 2023 · Aug 1, 2023 · Aug 11, 2023
@@ -58,6 +58,7 @@
 from .version_lock import VersionLockFile, default_version_lock
 
 RULES_DIR = get_path('rules')
+ESQL_DIR = get_path('esql')
 GH_CONFIG = Path.home() / ".config" / "gh" / "hosts.yml"
 NAVIGATOR_GIST_ID = '1a3f65224822a30a8228a8ed20289a89'
 NAVIGATOR_URL = 'https://ela.st/detection-rules-navigator'
@@ -1488,3 +1489,68 @@ def guide_plugin_to_rule(ctx: click.Context, rule_path: Path, save: bool = True)
         updated_rule.save_toml()
 
     return updated_rule
+
+
+@dev_group.group('esql')
+def esql_group():
+    """Commands for managing ESQL library."""
+
+
+@esql_group.command('pull-grammar')
+@click.option('--token', required=True, prompt=get_github_token() is None,
+              default=get_github_token(), help='GitHub personal access token.')
+@click.pass_context
+def pull_grammar(ctx: click.Context, token: str, branch: str = 'esql/lang'):
+    """Pull the ESQL grammar from the specified repository."""
+    github = GithubClient(token)
+    client = github.authenticated_client
+    repo_instance = client.get_repo('elastic/elasticsearch-internal')
+
+    for filename, path in definitions.ELASTICSEARCH_ESQL_GRAMMAR_PATHS.items():
+        try:
+            file_content = repo_instance.get_contents(path, ref=branch).decoded_content.decode("utf-8")
+
+            # Write content to file
+            with open(Path(ESQL_DIR) / "grammar" / filename, 'w') as file:
+                file.write(file_content)
+
+            click.echo(f"Successfully downloaded {filename}.")
+
+        except Exception as e:
+            click.echo(f"Failed to download {filename}. Error: {e}")
+
+
+@esql_group.command('build-parser')
+@click.pass_context
+def build_parser(antlr_jar: str):
+    """Build the ESQL parser using ANTLR."""
+    # Define paths
+    lexer_file = Path(ESQL_DIR) / 'grammar' / 'EsqlBaseLexer.g4'
+    parser_file = Path(ESQL_DIR) / 'grammar' / 'EsqlBaseParser.g4'
+    antlr_file = Path(get_path('detection_rules')) / 'etc' / 'antlr-4.13.1-complete.jar'
+
+    # Ensure files exist
+    if not lexer_file.exists() or not parser_file.exists():
+        click.echo("Error: Required grammar files are missing.")
+        return
+
+    # ensure ANTLR JAR exists
+    if not antlr_file.exists():
+        click.echo("Error: ANTLR JAR file is missing.")
+        return
+
+    # Use the JAR to generate parser and lexer
+    cmd_common = [
+        "java", "-jar", str(antlr_file),
+        "-Dlanguage=Python3",
+        "-o", str(ESQL_DIR)
+    ]
+    cmd_lexer = cmd_common + [str(lexer_file)]
+    cmd_parser = cmd_common + [str(parser_file)]
+
+    try:
+        subprocess.run(cmd_lexer, check=True, cwd=ESQL_DIR)
+        subprocess.run(cmd_parser, check=True, cwd=ESQL_DIR)
+        click.echo("ES|QL parser and lexer generated successfully.")
+    except subprocess.CalledProcessError:
+        click.echo("Failed to generate ES|QLparser and lexer.")
diff --git a/detection_rules/etc/antlr-4.13.1-complete.jar b/detection_rules/etc/antlr-4.13.1-complete.jar
diff --git a/detection_rules/integrations.py b/detection_rules/integrations.py
@@ -292,7 +292,7 @@ def get_integration_schema_data(data, meta, package_integrations: dict) -> Gener
 
     # lazy import to avoid circular import
     from .rule import (  # pylint: disable=import-outside-toplevel
-        QueryRuleData, RuleMeta)
+        ESQLRuleData, QueryRuleData, RuleMeta)
 
     data: QueryRuleData = data
     meta: RuleMeta = meta
@@ -301,7 +301,8 @@ def get_integration_schema_data(data, meta, package_integrations: dict) -> Gener
     integrations_schemas = load_integrations_schemas()
 
     # validate the query against related integration fields
-    if isinstance(data, QueryRuleData) and data.language != 'lucene' and meta.maturity == "production":
+    if (isinstance(data, QueryRuleData) or isinstance(data, ESQLRuleData)) \
+       and data.language != 'lucene' and meta.maturity == "production":
 
         # flag to only warn once per integration for available upgrades
         notify_update_available = True

@@ -569,6 +569,8 @@ def validator(self) -> Optional[QueryValidator]:
             return KQLValidator(self.query)
         elif self.language == "eql":
             return EQLValidator(self.query)
+        elif self.language == "esql":
+            return ESQLValidator(self.query)
 
     def validate_query(self, meta: RuleMeta) -> None:
         validator = self.validator
@@ -714,6 +716,21 @@ def interval_ratio(self) -> Optional[float]:
             return interval / self.max_span
 
 
+@dataclass(frozen=True)
+class ESQLRuleData(BaseRuleData):
+    """ESQL rules are a special case of query rules."""
+    type: Literal["esql"]
+    language: Literal["esql"]
+    query: str
+
+    @cached_property
+    def validator(self) -> Optional[QueryValidator]:
+        return ESQLValidator(self.query)
+
+    def validate_query(self, meta: RuleMeta) -> None:
+        return self.validator.validate(self, meta)
+
+
 @dataclass(frozen=True)
 class ThreatMatchRuleData(QueryRuleData):
     """Specific fields for indicator (threat) match rule."""
@@ -761,7 +778,7 @@ def validate_query(self, meta: RuleMeta) -> None:
 # All of the possible rule types
 # Sort inverse of any inheritance - see comment in TOMLRuleContents.to_dict
 AnyRuleData = Union[EQLRuleData, ThresholdQueryRuleData, ThreatMatchRuleData,
-                    MachineLearningRuleData, QueryRuleData, NewTermsRuleData]
+                    MachineLearningRuleData, QueryRuleData, NewTermsRuleData, ESQLRuleData]
 
 
 class BaseRuleContents(ABC):
@@ -945,9 +962,10 @@ def _post_dict_conversion(self, obj: dict) -> dict:
         super()._post_dict_conversion(obj)
 
         # build time fields
-        self._convert_add_related_integrations(obj)
-        self._convert_add_required_fields(obj)
-        self._convert_add_setup(obj)
+        if not isinstance(self.data, ESQLRuleData):
+            self._convert_add_related_integrations(obj)
+            self._convert_add_required_fields(obj)
+            self._convert_add_setup(obj)
 
         # validate new fields against the schema
         rule_type = obj['type']
@@ -1084,11 +1102,13 @@ def get_packaged_integrations(cls, data: QueryRuleData, meta: RuleMeta,
         packaged_integrations = []
         datasets = set()
 
-        for node in data.get('ast', []):
-            if isinstance(node, eql.ast.Comparison) and str(node.left) == 'event.dataset':
-                datasets.update(set(n.value for n in node if isinstance(n, eql.ast.Literal)))
-            elif isinstance(node, FieldComparison) and str(node.field) == 'event.dataset':
-                datasets.update(set(str(n) for n in node if isinstance(n, kql.ast.Value)))
+        if data.type != "esql":
+            # skip ES|QL rules until ast is available
+            for node in data.get('ast', []):
+                if isinstance(node, eql.ast.Comparison) and str(node.left) == 'event.dataset':
+                    datasets.update(set(n.value for n in node if isinstance(n, eql.ast.Literal)))
+                elif isinstance(node, FieldComparison) and str(node.field) == 'event.dataset':
+                    datasets.update(set(str(n) for n in node if isinstance(n, kql.ast.Value)))
 
         # integration is None to remove duplicate references upstream in Kibana
         # chronologically, event.dataset is checked for package:integration, then rule tags
@@ -1333,4 +1353,4 @@ def get_unique_query_fields(rule: TOMLRule) -> List[str]:
 
 
 # avoid a circular import
-from .rule_validators import EQLValidator, KQLValidator  # noqa: E402
+from .rule_validators import EQLValidator, KQLValidator, ESQLValidator  # noqa: E402