FR] Add Core Support for ES|QL Rule Type (#3292)

(cherry picked from commit bc39c20)
elastic · Nov 28, 2023 · 2c5e0fa · 2c5e0fa
1 parent 9d34fc2
commit 2c5e0fa
Show file tree

Hide file tree

Showing 4 changed files with 50 additions and 11 deletions.
diff --git a/detection_rules/integrations.py b/detection_rules/integrations.py
@@ -292,7 +292,7 @@ def get_integration_schema_data(data, meta, package_integrations: dict) -> Gener
 
     # lazy import to avoid circular import
     from .rule import (  # pylint: disable=import-outside-toplevel
-        QueryRuleData, RuleMeta)
+        ESQLRuleData, QueryRuleData, RuleMeta)
 
     data: QueryRuleData = data
     meta: RuleMeta = meta
@@ -301,7 +301,8 @@ def get_integration_schema_data(data, meta, package_integrations: dict) -> Gener
     integrations_schemas = load_integrations_schemas()
 
     # validate the query against related integration fields
-    if isinstance(data, QueryRuleData) and data.language != 'lucene' and meta.maturity == "production":
+    if (isinstance(data, QueryRuleData) or isinstance(data, ESQLRuleData)) \
+       and data.language != 'lucene' and meta.maturity == "production":
 
         # flag to only warn once per integration for available upgrades
         notify_update_available = True

diff --git a/detection_rules/rule.py b/detection_rules/rule.py
@@ -569,6 +569,8 @@ def validator(self) -> Optional[QueryValidator]:
             return KQLValidator(self.query)
         elif self.language == "eql":
             return EQLValidator(self.query)
+        elif self.language == "esql":
+            return ESQLValidator(self.query)
 
     def validate_query(self, meta: RuleMeta) -> None:
         validator = self.validator
@@ -602,6 +604,21 @@ def validate_exceptions(self, data, **kwargs):
             raise ValidationError("Alert suppression is only valid for query rule type.")
 
 
+@dataclass(frozen=True)
+class ESQLRuleData(BaseRuleData):
+    """ESQL rules are a special case of query rules."""
+    type: Literal["esql"]
+    language: Literal["esql"]
+    query: str
+
+    @cached_property
+    def validator(self) -> Optional[QueryValidator]:
+        return ESQLValidator(self.query)
+
+    def validate_query(self, meta: RuleMeta) -> None:
+        return self.validator.validate(self, meta)
+
+
 @dataclass(frozen=True)
 class MachineLearningRuleData(BaseRuleData):
     type: Literal["machine_learning"]
@@ -760,7 +777,7 @@ def validate_query(self, meta: RuleMeta) -> None:
 
 # All of the possible rule types
 # Sort inverse of any inheritance - see comment in TOMLRuleContents.to_dict
-AnyRuleData = Union[EQLRuleData, ThresholdQueryRuleData, ThreatMatchRuleData,
+AnyRuleData = Union[EQLRuleData, ESQLRuleData, ThresholdQueryRuleData, ThreatMatchRuleData,
                     MachineLearningRuleData, QueryRuleData, NewTermsRuleData]
 
 
@@ -1084,11 +1101,12 @@ def get_packaged_integrations(cls, data: QueryRuleData, meta: RuleMeta,
         packaged_integrations = []
         datasets = set()
 
-        for node in data.get('ast', []):
-            if isinstance(node, eql.ast.Comparison) and str(node.left) == 'event.dataset':
-                datasets.update(set(n.value for n in node if isinstance(n, eql.ast.Literal)))
-            elif isinstance(node, FieldComparison) and str(node.field) == 'event.dataset':
-                datasets.update(set(str(n) for n in node if isinstance(n, kql.ast.Value)))
+        if data.type != "esql":
+            for node in data.get('ast', []):
+                if isinstance(node, eql.ast.Comparison) and str(node.left) == 'event.dataset':
+                    datasets.update(set(n.value for n in node if isinstance(n, eql.ast.Literal)))
+                elif isinstance(node, FieldComparison) and str(node.field) == 'event.dataset':
+                    datasets.update(set(str(n) for n in node if isinstance(n, kql.ast.Value)))
 
         # integration is None to remove duplicate references upstream in Kibana
         # chronologically, event.dataset is checked for package:integration, then rule tags
@@ -1333,4 +1351,4 @@ def get_unique_query_fields(rule: TOMLRule) -> List[str]:
 
 
 # avoid a circular import
-from .rule_validators import EQLValidator, KQLValidator  # noqa: E402
+from .rule_validators import EQLValidator, ESQLValidator, KQLValidator  # noqa: E402
diff --git a/detection_rules/rule_validators.py b/detection_rules/rule_validators.py
@@ -346,6 +346,26 @@ def validate_rule_type_configurations(self, data: EQLRuleData, meta: RuleMeta) -
             return [], False
 
 
+class ESQLValidator(QueryValidator):
+    """Validate specific fields for ESQL query event types."""
+
+    @cached_property
+    def ast(self):
+        """Return an AST."""
+        return None
+
+    @cached_property
+    def unique_fields(self) -> List[str]:
+        """Return a list of unique fields in the query."""
+        # return empty list for ES|QL rules until ast is available
+        return []
+
+    def validate(self, data: 'QueryRuleData', meta: RuleMeta) -> None:
+        """Validate an ESQL query while checking TOMLRule."""
+        print("Warning: ESQL queries are not validated at this time.")
+        return None
+
+
 def extract_error_field(exc: Union[eql.EqlParseError, kql.KqlParseError]) -> Optional[str]:
     """Extract the field name from an EQL or KQL parse error."""
     lines = exc.source.splitlines()

diff --git a/detection_rules/schemas/definitions.py b/detection_rules/schemas/definitions.py
@@ -138,7 +138,7 @@
 CodeString = NewType("CodeString", str)
 ConditionSemVer = NewType('ConditionSemVer', str, validate=validate.Regexp(CONDITION_VERSION_PATTERN))
 Date = NewType('Date', str, validate=validate.Regexp(DATE_PATTERN))
-FilterLanguages = Literal["kuery", "lucene"]
+FilterLanguages = Literal["kuery", "lucene", "eql", "esql"]
 Interval = NewType('Interval', str, validate=validate.Regexp(INTERVAL_PATTERN))
 Markdown = NewType("MarkdownField", CodeString)
 Maturity = Literal['development', 'experimental', 'beta', 'production', 'deprecated']
@@ -149,7 +149,7 @@
 PositiveInteger = NewType('PositiveInteger', int, validate=validate.Range(min=1))
 RiskScore = NewType("MaxSignals", int, validate=validate.Range(min=1, max=100))
 RuleName = NewType('RuleName', str, validate=validate.Regexp(NAME_PATTERN))
-RuleType = Literal['query', 'machine_learning', 'eql', 'threshold', 'threat_match', 'new_terms']
+RuleType = Literal['query', 'saved_query', 'machine_learning', 'eql', 'esql', 'threshold', 'threat_match', 'new_terms']
 SemVer = NewType('SemVer', str, validate=validate.Regexp(VERSION_PATTERN))
 SemVerMinorOnly = NewType('SemVerFullStrict', str, validate=validate.Regexp(MINOR_SEMVER))
 Severity = Literal['low', 'medium', 'high', 'critical']