Merge pull request #496 from emmanvg/semantic-equivalence-part3

Similarity/Equivalence Changes
oasis-open · Mar 10, 2021 · f155e3e · f155e3e
2 parents 1735752 + c9e66de
commit f155e3e
Show file tree

Hide file tree

Showing 8 changed files with 369 additions and 434 deletions.
diff --git a/.gitignore b/.gitignore
@@ -55,8 +55,7 @@ coverage.xml
 # Sphinx documentation
 docs/_build/
 .ipynb_checkpoints
-graph_default_sem_eq_weights.rst
-object_default_sem_eq_weights.rst
+similarity_weights.rst
 
 # PyBuilder
 target/

diff --git a/docs/conf.py b/docs/conf.py
@@ -7,7 +7,6 @@
 from sphinx.ext.autodoc import ClassDocumenter
 
 from stix2.base import _STIXBase
-from stix2.equivalence.graph import GRAPH_WEIGHTS
 from stix2.equivalence.object import WEIGHTS
 from stix2.version import __version__
 
@@ -66,16 +65,9 @@
 object_default_sem_eq_weights = object_default_sem_eq_weights.replace('\n', '\n    ')
 object_default_sem_eq_weights = object_default_sem_eq_weights.replace('               "', '               ')
 object_default_sem_eq_weights = object_default_sem_eq_weights.replace('"\n', '\n')
-with open('object_default_sem_eq_weights.rst', 'w') as f:
+with open('similarity_weights.rst', 'w') as f:
     f.write(".. code-block:: python\n\n   {}\n\n".format(object_default_sem_eq_weights))
 
-graph_default_sem_eq_weights = json.dumps(GRAPH_WEIGHTS, indent=4, default=lambda o: o.__name__)
-graph_default_sem_eq_weights = graph_default_sem_eq_weights.replace('\n', '\n    ')
-graph_default_sem_eq_weights = graph_default_sem_eq_weights.replace('               "', '               ')
-graph_default_sem_eq_weights = graph_default_sem_eq_weights.replace('"\n', '\n')
-with open('graph_default_sem_eq_weights.rst', 'w') as f:
-    f.write(".. code-block:: python\n\n   {}\n\n".format(graph_default_sem_eq_weights))
-
 
 def get_property_type(prop):
     """Convert property classname into pretty string name of property.

diff --git a/docs/guide/equivalence.ipynb b/docs/guide/equivalence.ipynb
@@ -4607,20 +4607,11 @@
     "    ),\n",
     "]\n",
     "\n",
-    "\n",
-    "weights = {\n",
-    "    \"_internal\": {\n",
-    "        \"ignore_spec_version\": False,\n",
-    "        \"versioning_checks\": False,\n",
-    "        \"max_depth\": 1,\n",
-    "    },\n",
-    "}\n",
-    "\n",
     "memstore1 = MemoryStore(g1)\n",
     "memstore2 = MemoryStore(g2)\n",
     "prop_scores = {}\n",
     "\n",
-    "similarity_result = env.graph_similarity(memstore1, memstore2, prop_scores, **weights)\n",
+    "similarity_result = env.graph_similarity(memstore1, memstore2, prop_scores)\n",
     "equivalence_result = env.graph_equivalence(memstore1, memstore2, threshold=60)\n",
     "\n",
     "print(similarity_result)\n",

diff --git a/stix2/environment.py b/stix2/environment.py
@@ -189,16 +189,31 @@ def creator_of(self, obj):
             return None
 
     @staticmethod
-    def object_similarity(obj1, obj2, prop_scores={}, **weight_dict):
+    def object_similarity(
+        obj1, obj2, prop_scores={}, ds1=None, ds2=None,
+        ignore_spec_version=False, versioning_checks=False,
+        max_depth=1, **weight_dict
+    ):
         """This method returns a measure of how similar the two objects are.
 
         Args:
             obj1: A stix2 object instance
             obj2: A stix2 object instance
             prop_scores: A dictionary that can hold individual property scores,
                 weights, contributing score, matching score and sum of weights.
-            weight_dict: A dictionary that can be used to override settings
-                in the similarity process
+            ds1 (optional): A DataStore object instance from which to pull related objects
+            ds2 (optional): A DataStore object instance from which to pull related objects
+            ignore_spec_version: A boolean indicating whether to test object types
+                that belong to different spec versions (STIX 2.0 and STIX 2.1 for example).
+                If set to True this check will be skipped.
+            versioning_checks: A boolean indicating whether to test multiple revisions
+                of the same object (when present) to maximize similarity against a
+                particular version. If set to True the algorithm will perform this step.
+            max_depth: A positive integer indicating the maximum recursion depth the
+                algorithm can reach when de-referencing objects and performing the
+                object_similarity algorithm.
+            weight_dict: A dictionary that can be used to override what checks are done
+                to objects in the similarity process.
 
         Returns:
             float: A number between 0.0 and 100.0 as a measurement of similarity.
@@ -213,17 +228,24 @@ def object_similarity(obj1, obj2, prop_scores={}, **weight_dict):
         Note:
             Default weight_dict:
 
-            .. include:: ../object_default_sem_eq_weights.rst
+            .. include:: ../similarity_weights.rst
 
         Note:
             This implementation follows the Semantic Equivalence Committee Note.
             see `the Committee Note <link here>`__.
 
         """
-        return object_similarity(obj1, obj2, prop_scores, **weight_dict)
+        return object_similarity(
+            obj1, obj2, prop_scores, ds1, ds2, ignore_spec_version,
+            versioning_checks, max_depth, **weight_dict
+        )
 
     @staticmethod
-    def object_equivalence(obj1, obj2, prop_scores={}, threshold=70, **weight_dict):
+    def object_equivalence(
+        obj1, obj2, prop_scores={}, threshold=70, ds1=None, ds2=None,
+        ignore_spec_version=False, versioning_checks=False,
+        max_depth=1, **weight_dict
+    ):
         """This method returns a true/false value if two objects are semantically equivalent.
         Internally, it calls the object_similarity function and compares it against the given
         threshold value.
@@ -236,8 +258,19 @@ def object_equivalence(obj1, obj2, prop_scores={}, threshold=70, **weight_dict):
             threshold: A numerical value between 0 and 100 to determine the minimum
                 score to result in successfully calling both objects equivalent. This
                 value can be tuned.
-            weight_dict: A dictionary that can be used to override settings
-                in the similarity process
+            ds1 (optional): A DataStore object instance from which to pull related objects
+            ds2 (optional): A DataStore object instance from which to pull related objects
+            ignore_spec_version: A boolean indicating whether to test object types
+                that belong to different spec versions (STIX 2.0 and STIX 2.1 for example).
+                If set to True this check will be skipped.
+            versioning_checks: A boolean indicating whether to test multiple revisions
+                of the same object (when present) to maximize similarity against a
+                particular version. If set to True the algorithm will perform this step.
+            max_depth: A positive integer indicating the maximum recursion depth the
+                algorithm can reach when de-referencing objects and performing the
+                object_similarity algorithm.
+            weight_dict: A dictionary that can be used to override what checks are done
+                to objects in the similarity process.
 
         Returns:
             bool: True if the result of the object similarity is greater than or equal to
@@ -253,17 +286,23 @@ def object_equivalence(obj1, obj2, prop_scores={}, threshold=70, **weight_dict):
         Note:
             Default weight_dict:
 
-            .. include:: ../object_default_sem_eq_weights.rst
+            .. include:: ../similarity_weights.rst
 
         Note:
             This implementation follows the Semantic Equivalence Committee Note.
             see `the Committee Note <link here>`__.
 
         """
-        return object_equivalence(obj1, obj2, prop_scores, threshold, **weight_dict)
+        return object_equivalence(
+            obj1, obj2, prop_scores, threshold, ds1, ds2,
+            ignore_spec_version, versioning_checks, max_depth, **weight_dict
+        )
 
     @staticmethod
-    def graph_similarity(ds1, ds2, prop_scores={}, **weight_dict):
+    def graph_similarity(
+        ds1, ds2, prop_scores={}, ignore_spec_version=False,
+        versioning_checks=False, max_depth=1, **weight_dict
+    ):
         """This method returns a similarity score for two given graphs.
         Each DataStore can contain a connected or disconnected graph and the
         final result is weighted over the amount of objects we managed to compare.
@@ -275,8 +314,17 @@ def graph_similarity(ds1, ds2, prop_scores={}, **weight_dict):
             ds2: A DataStore object instance representing your graph
             prop_scores: A dictionary that can hold individual property scores,
                 weights, contributing score, matching score and sum of weights.
-            weight_dict: A dictionary that can be used to override settings
-                in the similarity process
+            ignore_spec_version: A boolean indicating whether to test object types
+                that belong to different spec versions (STIX 2.0 and STIX 2.1 for example).
+                If set to True this check will be skipped.
+            versioning_checks: A boolean indicating whether to test multiple revisions
+                of the same object (when present) to maximize similarity against a
+                particular version. If set to True the algorithm will perform this step.
+            max_depth: A positive integer indicating the maximum recursion depth the
+                algorithm can reach when de-referencing objects and performing the
+                object_similarity algorithm.
+            weight_dict: A dictionary that can be used to override what checks are done
+                to objects in the similarity process.
 
         Returns:
             float: A number between 0.0 and 100.0 as a measurement of similarity.
@@ -291,17 +339,24 @@ def graph_similarity(ds1, ds2, prop_scores={}, **weight_dict):
         Note:
             Default weight_dict:
 
-            .. include:: ../graph_default_sem_eq_weights.rst
+            .. include:: ../similarity_weights.rst
 
         Note:
             This implementation follows the Semantic Equivalence Committee Note.
             see `the Committee Note <link here>`__.
 
         """
-        return graph_similarity(ds1, ds2, prop_scores, **weight_dict)
+        return graph_similarity(
+            ds1, ds2, prop_scores, ignore_spec_version,
+            versioning_checks, max_depth, **weight_dict
+        )
 
     @staticmethod
-    def graph_equivalence(ds1, ds2, prop_scores={}, threshold=70, **weight_dict):
+    def graph_equivalence(
+        ds1, ds2, prop_scores={}, threshold=70,
+        ignore_spec_version=False, versioning_checks=False,
+        max_depth=1, **weight_dict
+    ):
         """This method returns a true/false value if two graphs are semantically equivalent.
         Internally, it calls the graph_similarity function and compares it against the given
         threshold value.
@@ -314,8 +369,17 @@ def graph_equivalence(ds1, ds2, prop_scores={}, threshold=70, **weight_dict):
             threshold: A numerical value between 0 and 100 to determine the minimum
                 score to result in successfully calling both graphs equivalent. This
                 value can be tuned.
-            weight_dict: A dictionary that can be used to override settings
-                in the similarity process
+            ignore_spec_version: A boolean indicating whether to test object types
+                that belong to different spec versions (STIX 2.0 and STIX 2.1 for example).
+                If set to True this check will be skipped.
+            versioning_checks: A boolean indicating whether to test multiple revisions
+                of the same object (when present) to maximize similarity against a
+                particular version. If set to True the algorithm will perform this step.
+            max_depth: A positive integer indicating the maximum recursion depth the
+                algorithm can reach when de-referencing objects and performing the
+                object_similarity algorithm.
+            weight_dict: A dictionary that can be used to override what checks are done
+                to objects in the similarity process.
 
         Returns:
             bool: True if the result of the graph similarity is greater than or equal to
@@ -331,11 +395,14 @@ def graph_equivalence(ds1, ds2, prop_scores={}, threshold=70, **weight_dict):
         Note:
             Default weight_dict:
 
-            .. include:: ../graph_default_sem_eq_weights.rst
+            .. include:: ../similarity_weights.rst
 
         Note:
             This implementation follows the Semantic Equivalence Committee Note.
             see `the Committee Note <link here>`__.
 
         """
-        return graph_equivalence(ds1, ds2, prop_scores, threshold, **weight_dict)
+        return graph_equivalence(
+            ds1, ds2, prop_scores, threshold, ignore_spec_version,
+            versioning_checks, max_depth, **weight_dict
+        )