Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Change canonicalization to normalization #490

Merged
merged 3 commits into from
Feb 15, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 21 additions & 21 deletions stix2/equivalence/pattern/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,17 @@
from .compare.observation import observation_expression_cmp
from .transform import ChainTransformer, SettleTransformer
from .transform.observation import (
AbsorptionTransformer, CanonicalizeComparisonExpressionsTransformer,
DNFTransformer, FlattenTransformer, OrderDedupeTransformer,
AbsorptionTransformer, DNFTransformer, FlattenTransformer,
NormalizeComparisonExpressionsTransformer, OrderDedupeTransformer,
)

# Lazy-initialize
_pattern_canonicalizer = None
_pattern_normalizer = None


def _get_pattern_canonicalizer():
def _get_pattern_normalizer():
"""
Get a canonicalization transformer for STIX patterns.
Get a normalization transformer for STIX patterns.

Returns:
The transformer
Expand All @@ -33,11 +33,11 @@ def _get_pattern_canonicalizer():
# The transformers are either stateless or contain no state which changes
# with each use. So we can setup the transformers once and keep reusing
# them.
global _pattern_canonicalizer
global _pattern_normalizer

if not _pattern_canonicalizer:
canonicalize_comp_expr = \
CanonicalizeComparisonExpressionsTransformer()
if not _pattern_normalizer:
normalize_comp_expr = \
NormalizeComparisonExpressionsTransformer()

obs_expr_flatten = FlattenTransformer()
obs_expr_order = OrderDedupeTransformer()
Expand All @@ -49,12 +49,12 @@ def _get_pattern_canonicalizer():

obs_dnf = DNFTransformer()

_pattern_canonicalizer = ChainTransformer(
canonicalize_comp_expr,
_pattern_normalizer = ChainTransformer(
normalize_comp_expr,
obs_settle_simplify, obs_dnf, obs_settle_simplify,
)

return _pattern_canonicalizer
return _pattern_normalizer


def equivalent_patterns(pattern1, pattern2, stix_version=DEFAULT_VERSION):
Expand All @@ -77,11 +77,11 @@ def equivalent_patterns(pattern1, pattern2, stix_version=DEFAULT_VERSION):
pattern2, version=stix_version,
)

pattern_canonicalizer = _get_pattern_canonicalizer()
canon_patt1, _ = pattern_canonicalizer.transform(patt_ast1)
canon_patt2, _ = pattern_canonicalizer.transform(patt_ast2)
pattern_normalizer = _get_pattern_normalizer()
norm_patt1, _ = pattern_normalizer.transform(patt_ast1)
norm_patt2, _ = pattern_normalizer.transform(patt_ast2)

result = observation_expression_cmp(canon_patt1, canon_patt2)
result = observation_expression_cmp(norm_patt1, norm_patt2)

return result == 0

Expand All @@ -92,7 +92,7 @@ def find_equivalent_patterns(
"""
Find patterns from a sequence which are equivalent to a given pattern.
This is more efficient than using equivalent_patterns() in a loop, because
it doesn't re-canonicalize the search pattern over and over. This works
it doesn't re-normalize the search pattern over and over. This works
on an input iterable and is implemented as a generator of matches. So you
can "stream" patterns in and matching patterns will be streamed out.

Expand All @@ -109,19 +109,19 @@ def find_equivalent_patterns(
search_pattern, version=stix_version,
)

pattern_canonicalizer = _get_pattern_canonicalizer()
canon_search_pattern_ast, _ = pattern_canonicalizer.transform(
pattern_normalizer = _get_pattern_normalizer()
norm_search_pattern_ast, _ = pattern_normalizer.transform(
search_pattern_ast,
)

for pattern in patterns:
pattern_ast = pattern_visitor.create_pattern_object(
pattern, version=stix_version,
)
canon_pattern_ast, _ = pattern_canonicalizer.transform(pattern_ast)
norm_pattern_ast, _ = pattern_normalizer.transform(pattern_ast)

result = observation_expression_cmp(
canon_search_pattern_ast, canon_pattern_ast,
norm_search_pattern_ast, norm_pattern_ast,
)

if result == 0:
Expand Down
2 changes: 1 addition & 1 deletion stix2/equivalence/pattern/compare/comparison.py
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,7 @@ def comparison_expression_cmp(expr1, expr2):
"""
Compare two comparison expressions. This is sensitive to the order of the
expressions' sub-components. To achieve an order-insensitive comparison,
the ASTs must be canonically ordered first.
the sub-component ASTs must be ordered first.

Args:
expr1: The first comparison expression
Expand Down
2 changes: 1 addition & 1 deletion stix2/equivalence/pattern/compare/observation.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def observation_expression_cmp(expr1, expr2):
"""
Compare two observation expression ASTs. This is sensitive to the order of
the expressions' sub-components. To achieve an order-insensitive
comparison, the ASTs must be canonically ordered first.
comparison, the sub-component ASTs must be ordered first.

Args:
expr1: The first observation expression
Expand Down
7 changes: 3 additions & 4 deletions stix2/equivalence/pattern/transform/comparison.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def _dupe_ast(ast):

elif isinstance(ast, _ComparisonExpression):
# Change this to create a dupe, if we ever need to change simple
# comparison expressions as part of canonicalization.
# comparison expressions as part of normalization.
result = ast

else:
Expand Down Expand Up @@ -147,9 +147,8 @@ class OrderDedupeTransformer(
ComparisonExpressionTransformer,
):
"""
Canonically order the children of all nodes in the AST. Because the
deduping algorithm is based on sorted data, this transformation also does
deduping.
Order the children of all nodes in the AST. Because the deduping algorithm
is based on sorted data, this transformation also does deduping.

E.g.:
A and A => A
Expand Down
12 changes: 6 additions & 6 deletions stix2/equivalence/pattern/transform/observation.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ class OrderDedupeTransformer(
ObservationExpressionTransformer,
):
"""
Canonically order AND/OR expressions, and dedupe ORs. E.g.:
Order AND/OR expressions, and dedupe ORs. E.g.:

A or A => A
B or A => A or B
Expand Down Expand Up @@ -489,11 +489,11 @@ def transform_followedby(self, ast):
return self.__transform(ast)


class CanonicalizeComparisonExpressionsTransformer(
class NormalizeComparisonExpressionsTransformer(
ObservationExpressionTransformer,
):
"""
Canonicalize all comparison expressions.
Normalize all comparison expressions.
"""
def __init__(self):
comp_flatten = CFlattenTransformer()
Expand All @@ -504,13 +504,13 @@ def __init__(self):

comp_special = SpecialValueCanonicalization()
comp_dnf = CDNFTransformer()
self.__comp_canonicalize = ChainTransformer(
self.__comp_normalize = ChainTransformer(
comp_special, settle_simplify, comp_dnf, settle_simplify,
)

def transform_observation(self, ast):
comp_expr = ast.operand
canon_comp_expr, changed = self.__comp_canonicalize.transform(comp_expr)
ast.operand = canon_comp_expr
norm_comp_expr, changed = self.__comp_normalize.transform(comp_expr)
ast.operand = norm_comp_expr

return ast, changed
2 changes: 1 addition & 1 deletion stix2/equivalence/pattern/transform/specials.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
Some simple comparison expression canonicalization functions.
Some simple comparison expression normalization functions.
"""
import socket

Expand Down