nodestream-proj · zprobst · Aug 22, 2023 · Aug 15, 2023 · Aug 15, 2023
@@ -110,3 +110,52 @@ We can use the following YAML definition to instruct the interpreter on how to p
           relationship_type: IN_CITY
           search: !jmespath city
 ```
+
+
+Mutliple interpretation passes are also allowed for the `before_iteration` block. 
+In this case, the `iterate_on` and `iterpretation` blocks are applied on the result of each pass pass of the `before_iteration` block.
+
+For example, imagine we have data like this:
+
+```json
+{
+  "site": "github.com",
+  "other_site": "another-git-host.com",
+  "people": [
+    {"name": "John", "address": "123 Test St"},
+    {"name": "Jane", "address": "456 Test St"}
+  ]
+}
+```
+
+We can use the following YAML definition to instruct the interpreter on how to parse this data:
+
+```yaml
+- implementation: nodestream.interpreting:Interpreter
+  arguments:
+    before_iteration:
+      - - type: variables
+          site: !jmespath site
+      - - type: variables
+          site: !jmespath other_site
+    iterate_on: !jmespath people[*]
+    interpretations:
+      - type: source_node
+        name: !jmespath name
+        node_type: AwesomePerson
+      - type: relationship
+        node_type: Website
+        relationship_type: VISITS
+        node_key:
+          site: !variable site
+```
+
+This would result in the following graph:
+
+```mermaid
+graph LR
+  A[John] -->|VISITS| B[github.com];
+  C[Jane] -->|VISITS| B[github.com];
+  D[John] -->|VISITS| E[another-git-host.com];
+  F[Jane] -->|VISITS| E[another-git-host.com];
+```
@@ -131,9 +131,9 @@ async def handle_async_record_stream(self, record_stream):
 
     def interpret_record(self, record):
         context = ProviderContext.fresh(record)
-        self.before_iteration.apply_interpretations(context)
-        for sub_context in self.decomposer.decompose_record(context):
-            yield from self.interpretations.apply_interpretations(sub_context)
+        for base_context in self.before_iteration.apply_interpretations(context):
+            for sub_context in self.decomposer.decompose_record(base_context):
+                yield from self.interpretations.apply_interpretations(sub_context)
 
     def all_subordinate_components(self) -> Iterable[IntrospectiveIngestionComponent]:
         yield self.before_iteration

@@ -82,18 +82,31 @@ def test_interpretation_pass_passing_list_of_list_returns_multi_pass():
 def test_intepret_record_iterates_through_interpretation_process(stubbed_interpreter):
     input_record = 1
     stubbed_interpreter.decomposer.decompose_record.return_value = [1, 2, 3]
+    stubbed_interpreter.before_iteration.apply_interpretations.return_value = [
+        "base1",
+        "base2",
+        "base3",
+    ]
     stubbed_interpreter.interpretations.apply_interpretations.return_value = [
-        "a",
-        "b",
-        "c",
+        "inner1",
+        "inner2",
+        "inner3",
     ]
 
     results = list(stubbed_interpreter.interpret_record(input_record))
-    assert_that(results, equal_to(["a", "b", "c"] * 3))
 
-    stubbed_interpreter.decomposer.decompose_record.assert_called_once()
+    # On each source record, the record can be decomposed into multiple records (3 in this case)
+    # each of those records can be interpreted with multiple base contexts (before iteration; 3 in this case)
+    # each of those items can return multiple results (3 in this case). Hence a total of 27 results (3 * 3 * 3)
+    assert_that(results, equal_to(["inner1", "inner2", "inner3"] * 9))
+
+    # We should decompose the record once for each base context we got from before_iteration.
+    assert_that(stubbed_interpreter.decomposer.decompose_record.call_count, equal_to(3))
+
+    # The inner interpretations should be called once for each sub-record, for each sub-context.
+    # In this case, 3 sub-records, 3 sub-contexts, so 9 calls.
     stubbed_interpreter.interpretations.apply_interpretations.assert_has_calls(
-        (call(1), call(2), call(3))
+        (call(1), call(2), call(3)) * 3
     )