diff --git a/stix2/environment.py b/stix2/environment.py
index c58fa15c..09d7ed58 100644
--- a/stix2/environment.py
+++ b/stix2/environment.py
@@ -198,7 +198,7 @@ def creator_of(self, obj):
@staticmethod
def semantically_equivalent(obj1, obj2, prop_scores={}, **weight_dict):
- """This method is meant to verify if two objects of the same type are
+ """This method verifies if two objects of the same type are
semantically equivalent.
Args:
@@ -214,9 +214,10 @@ def semantically_equivalent(obj1, obj2, prop_scores={}, **weight_dict):
Warning:
Object types need to have property weights defined for the equivalence process.
- Otherwise, those objects will not influence the final score. Use the WEIGHTS
- dictionary under `stix2.equivalence.object` to add new entries. Similarly,
- the values or methods can be fine tuned for a particular use case.
+ Otherwise, those objects will not influence the final score. The WEIGHTS
+ dictionary under `stix2.equivalence.object` can give you an idea on how to add
+ new entries and pass them via the `weight_dict` argument. Similarly, the values
+ or methods can be fine tuned for a particular use case.
Note:
Default weights_dict:
@@ -232,8 +233,11 @@ def semantically_equivalent(obj1, obj2, prop_scores={}, **weight_dict):
@staticmethod
def graphically_equivalent(ds1, ds2, prop_scores={}, **weight_dict):
- """This method is meant to verify if two graphs are semantically
- equivalent.
+ """This method verifies if two graphs are semantically equivalent.
+ Each DataStore can contain a connected or disconnected graph and the
+ final result is weighted over the amount of objects we managed to compare.
+ This approach builds on top of the object-based semantic equivalence process
+ and each comparison can return a value between 0 and 100.
Args:
ds1: A DataStore object instance representing your graph
@@ -248,15 +252,19 @@ def graphically_equivalent(ds1, ds2, prop_scores={}, **weight_dict):
Warning:
Object types need to have property weights defined for the equivalence process.
- Otherwise, those objects will not influence the final score. Use the WEIGHTS
- dictionary under `stix2.equivalence.graph` to add new entries. Similarly,
- the values or methods can be fine tuned for a particular use case.
- Graph equivalence has additional entries and methods defined.
+ Otherwise, those objects will not influence the final score. The WEIGHTS
+ dictionary under `stix2.equivalence.graph` can give you an idea on how to add
+ new entries and pass them via the `weight_dict` argument. Similarly, the values
+ or methods can be fine tuned for a particular use case.
Note:
Default weights_dict:
.. include:: ../default_sem_eq_weights.rst
+ Note:
+ This implementation follows the Semantic Equivalence Committee Note.
+ see `the Committee Note `__.
+
"""
return graphically_equivalent(ds1, ds2, prop_scores, **weight_dict)
diff --git a/stix2/equivalence/graph/__init__.py b/stix2/equivalence/graph/__init__.py
index 73cf81ae..4f66e5e8 100644
--- a/stix2/equivalence/graph/__init__.py
+++ b/stix2/equivalence/graph/__init__.py
@@ -1,18 +1,19 @@
import logging
from ..object import (
- custom_pattern_based, exact_match, list_reference_check,
- partial_external_reference_based, partial_list_based,
- partial_location_distance, partial_string_based, partial_timestamp_based,
- reference_check, semantically_equivalent,
+ WEIGHTS, exact_match, list_reference_check, partial_string_based,
+ partial_timestamp_based, reference_check, semantically_equivalent,
)
logger = logging.getLogger(__name__)
def graphically_equivalent(ds1, ds2, prop_scores={}, **weight_dict):
- """This method is meant to verify if two graphs are semantically
- equivalent.
+ """This method verifies if two graphs are semantically equivalent.
+ Each DataStore can contain a connected or disconnected graph and the
+ final result is weighted over the amount of objects we managed to compare.
+ This approach builds on top of the object-based semantic equivalence process
+ and each comparison can return a value between 0 and 100.
Args:
ds1: A DataStore object instance representing your graph
@@ -26,17 +27,21 @@ def graphically_equivalent(ds1, ds2, prop_scores={}, **weight_dict):
float: A number between 0.0 and 100.0 as a measurement of equivalence.
Warning:
- Some object types do not have an entry for use in the equivalence process.
- In order for those objects to influence the final score a new entry needs to
- be defined in the WEIGHTS dictionary. Similarly, the values can be fine tuned
- for a particular use case. Graph equivalence has additional entries. The
- complete graph is needed for the two graphs that are being checked.
+ Object types need to have property weights defined for the equivalence process.
+ Otherwise, those objects will not influence the final score. The WEIGHTS
+ dictionary under `stix2.equivalence.graph` can give you an idea on how to add
+ new entries and pass them via the `weight_dict` argument. Similarly, the values
+ or methods can be fine tuned for a particular use case.
Note:
Default weights_dict:
.. include:: ../default_sem_eq_weights.rst
+ Note:
+ This implementation follows the Semantic Equivalence Committee Note.
+ see `the Committee Note `__.
+
"""
weights = WEIGHTS.copy()
@@ -95,83 +100,31 @@ def graphically_equivalent(ds1, ds2, prop_scores={}, **weight_dict):
# default weights used for the graph semantic equivalence process
-# values are re-balanced to account for new property checks and add up to 100
-WEIGHTS = {
- "attack-pattern": {
- "name": (30, partial_string_based),
- "external_references": (70, partial_external_reference_based),
- },
- "campaign": {
- "name": (60, partial_string_based),
- "aliases": (40, partial_list_based),
- },
- "course-of-action": {
- "name": (60, partial_string_based),
- "external_references": (40, partial_external_reference_based),
- },
- "identity": {
- "name": (60, partial_string_based),
- "identity_class": (20, exact_match),
- "sectors": (20, partial_list_based),
- },
- "indicator": {
- "indicator_types": (15, partial_list_based),
- "pattern": (80, custom_pattern_based),
- "valid_from": (5, partial_timestamp_based),
- "tdelta": 1, # One day interval
- },
- "intrusion-set": {
+WEIGHTS.update({
+ "grouping": {
"name": (20, partial_string_based),
- "external_references": (60, partial_external_reference_based),
- "aliases": (20, partial_list_based),
- },
- "location": {
- "longitude_latitude": (34, partial_location_distance),
- "region": (33, exact_match),
- "country": (33, exact_match),
- "threshold": 1000.0,
- },
- "malware": {
- "malware_types": (20, partial_list_based),
- "name": (80, partial_string_based),
- },
- "marking-definition": {
- "name": (20, exact_match),
- "definition": (60, exact_match),
- "definition_type": (20, exact_match),
+ "context": (20, partial_string_based),
+ "object_refs": (60, list_reference_check),
},
"relationship": {
+ "relationship_type": (20, exact_match),
"source_ref": (40, reference_check),
"target_ref": (40, reference_check),
- "relationship_type": (20, exact_match),
},
"report": {
- "object_refs": (60, list_reference_check),
"name": (30, partial_string_based),
"published": (10, partial_timestamp_based),
+ "object_refs": (60, list_reference_check),
"tdelta": 1, # One day interval
},
"sighting": {
"first_seen": (5, partial_timestamp_based),
"last_seen": (5, partial_timestamp_based),
- "where_sighted_refs": (20, list_reference_check),
- "observed_data_refs": (20, list_reference_check),
"sighting_of_ref": (40, reference_check),
+ "observed_data_refs": (20, list_reference_check),
+ "where_sighted_refs": (20, list_reference_check),
"summary": (10, exact_match),
},
- "threat-actor": {
- "name": (60, partial_string_based),
- "threat_actor_types": (20, partial_list_based),
- "aliases": (20, partial_list_based),
- },
- "tool": {
- "tool_types": (20, partial_list_based),
- "name": (80, partial_string_based),
- },
- "vulnerability": {
- "name": (30, partial_string_based),
- "external_references": (70, partial_external_reference_based),
- },
"_internal": {
"ignore_spec_version": False,
"versioning_checks": False,
@@ -179,4 +132,4 @@ def graphically_equivalent(ds1, ds2, prop_scores={}, **weight_dict):
"ds2": None,
"max_depth": 1,
},
-} #: :autodoc-skip:
+}) #: :autodoc-skip:
diff --git a/stix2/equivalence/object/__init__.py b/stix2/equivalence/object/__init__.py
index 3e291ac1..c24fa3cc 100644
--- a/stix2/equivalence/object/__init__.py
+++ b/stix2/equivalence/object/__init__.py
@@ -8,7 +8,7 @@
def semantically_equivalent(obj1, obj2, prop_scores={}, **weight_dict):
- """This method is meant to verify if two objects of the same type are
+ """This method verifies if two objects of the same type are
semantically equivalent.
Args:
@@ -23,10 +23,11 @@ def semantically_equivalent(obj1, obj2, prop_scores={}, **weight_dict):
float: A number between 0.0 and 100.0 as a measurement of equivalence.
Warning:
- Some object types do not have an entry for use in the equivalence process.
- In order for those objects to influence the final score a new entry needs to
- be defined in the WEIGHTS dictionary. Similarly, the values can be fine tuned
- for a particular use case.
+ Object types need to have property weights defined for the equivalence process.
+ Otherwise, those objects will not influence the final score. The WEIGHTS
+ dictionary under `stix2.equivalence.object` can give you an idea on how to add
+ new entries and pass them via the `weight_dict` argument. Similarly, the values
+ or methods can be fine tuned for a particular use case.
Note:
Default weights_dict:
@@ -396,6 +397,10 @@ def list_reference_check(refs1, refs2, ds1, ds2, **weights):
"name": (60, partial_string_based),
"aliases": (40, partial_list_based),
},
+ "course-of-action": {
+ "name": (60, partial_string_based),
+ "external_references": (40, partial_external_reference_based),
+ },
"identity": {
"name": (60, partial_string_based),
"identity_class": (20, exact_match),
@@ -407,6 +412,11 @@ def list_reference_check(refs1, refs2, ds1, ds2, **weights):
"valid_from": (5, partial_timestamp_based),
"tdelta": 1, # One day interval
},
+ "intrusion-set": {
+ "name": (20, partial_string_based),
+ "external_references": (60, partial_external_reference_based),
+ "aliases": (20, partial_list_based),
+ },
"location": {
"longitude_latitude": (34, partial_location_distance),
"region": (33, exact_match),
@@ -417,6 +427,11 @@ def list_reference_check(refs1, refs2, ds1, ds2, **weights):
"malware_types": (20, partial_list_based),
"name": (80, partial_string_based),
},
+ "marking-definition": {
+ "name": (20, exact_match),
+ "definition": (60, exact_match),
+ "definition_type": (20, exact_match),
+ },
"threat-actor": {
"name": (60, partial_string_based),
"threat_actor_types": (20, partial_list_based),