ethyca · seanpreston · Mar 1, 2022 · Feb 22, 2022 · Feb 22, 2022 · Feb 22, 2022
diff --git a/README.md b/README.md
@@ -55,76 +55,97 @@ information for Jane across tables in both the postgres and mongo databases.
 
 ```json
 {
-    "mongo_test:flights": [
+  "mongo_test:flights": [
+    {
+      "passenger_information": {
+        "full_name": "Jane Customer"
+      }
+    }
+  ],
+  "mongo_test:customer_details": [
+    {
+      "gender": "female",
+      "children": [
+        "Erica Example"
+      ],
+      "birthday": "1990-02-28T00:00:00"
+    }
+  ],
+  "postgres_example:address": [
+    {
+      "city": "Example Mountain",
+      "state": "TX",
+      "house": 1111,
+      "zip": "54321",
+      "street": "Example Place"
+    }
+  ],
+  "postgres_example:customer": [
+    {
+      "email": "[email protected]",
+      "name": "Jane Customer"
+    }
+  ],
+  "mongo_test:rewards": [
+    {
+      "owner": [
         {
-            "passenger_information": {
-                "full_name": "Jane Customer"
-            }
-        }
-    ],
-    "mongo_test:payment_card": [
-        {
-            "ccn": "987654321",
-            "name": "Example Card 2",
-            "code": "123"
-        }
-    ],
-    "postgres_example_test_dataset:address": [
-        {
-            "zip": "54321",
-            "street": "Example Place",
-            "state": "TX",
-            "city": "Example Mountain",
-            "house": 1111
-        }
-    ],
-    "mongo_test:customer_details": [
-        {
-            "birthday": "1990-02-28T00:00:00",
-            "gender": "female",
-            "children": [
-                "Erica Example"
-            ]
-        }
-    ],
-    "postgres_example_test_dataset:customer": [
+          "phone": "530-486-6983"
+        },
         {
-            "email": "[email protected]",
-            "name": "Jane Customer"
+          "phone": "818-695-1881"
         }
-    ],
-    "postgres_example_test_dataset:payment_card": [
+      ]
+    },
+    {
+      "owner": [
         {
-            "ccn": 373719391,
-            "name": "Example Card 3",
-            "code": 222
+          "phone": "254-344-9868"
         }
-    ],
-    "mongo_test:employee": [
+      ]
+    }
+  ],
+  "mongo_test:employee": [
+    {
+      "email": "[email protected]",
+      "name": "Jane Employee"
+    }
+  ],
+  "mongo_test:conversations": [
+    {
+      "thread": [
         {
-            "email": "[email protected]",
-            "name": "Jane Employee"
+          "ccn": "987654321",
+          "chat_name": "Jane C"
         }
-    ],
-    "mongo_test:conversations": [
+      ]
+    },
+    {
+      "thread": [
         {
-            "thread": [
-                {
-                    "chat_name": "Jane C"
-                }
-            ]
+          "ccn": "987654321",
+          "chat_name": "Jane C"
         },
         {
-            "thread": [
-                {
-                    "chat_name": "Jane C"
-                },
-                {
-                    "chat_name": "Jane C"
-                }
-            ]
+          "chat_name": "Jane C"
         }
-    ]
+      ]
+    }
+  ],
+  "mongo_test:payment_card": [
+    {
+      "ccn": "987654321",
+      "code": "123",
+      "name": "Example Card 2"
+    }
+  ],
+  "postgres_example:payment_card": [
+    {
+      "ccn": 373719391,
+      "code": 222,
+      "name": "Example Card 3"
+    }
+  ]
 }
 
 ```

diff --git a/data/dataset/mongo_example_test_dataset.yml b/data/dataset/mongo_example_test_dataset.yml
@@ -94,6 +94,12 @@ dataset:
                 fidesops_meta:
                   data_type: string[]
                   identity: email
+              - name: derived_phone
+                data_categories: [ user.derived ]
+                fidesops_meta:
+                  data_type: string[]
+                  return_all_elements: true
+                  identity: phone_number
           - name: derived_interests
             data_categories: [ user.derived ]
             fidesops_meta:
@@ -257,3 +263,27 @@ dataset:
             data_categories: [ user.provided.identifiable.financial ]
           - name: preferred
             data_categories: [ user.provided.nonidentifiable ]
+      - name: rewards
+        fields:
+          - name: _id
+            fidesops_meta:
+              primary_key: True
+              data_type: object_id
+          - name: owner
+            fidesops_meta:
+              data_type: object[]
+              return_all_elements: true
+            fields:
+              - name: phone
+                data_categories: [ user.provided.identifiable.contact.phone_number ]
+                fidesops_meta:
+                  data_type: string
+                  references:
+                    - dataset: mongo_test
+                      field: internal_customer_profile.customer_identifiers.derived_phone
+                      direction: from
+              - name: shopper_name
+          - name: points
+            fidesops_meta:
+              data_type: integer
+          - name: expiration_date
diff --git a/data/nosql/mongo-init.js b/data/nosql/mongo-init.js
@@ -87,14 +87,16 @@ db.internal_customer_profile.insert([
     },
     {
          "customer_identifiers": {
-            "internal_id": "cust_002"
+            "internal_id": "cust_002",
+            "derived_phone": ["757-499-5508"]
         },
         "derived_interests": ["programming", "hiking", "skateboarding"]
     },
     {
         "customer_identifiers": {
             "internal_id": "cust_003",
-            "derived_emails": ["[email protected]", "[email protected]"]  // Identity within an array field
+            "derived_emails": ["[email protected]", "[email protected]"],  // Identity within an array field
+            "derived_phone": ["530-486-6983", "254-344-9868"]
         },
         "derived_interests": ["interior design", "travel", "photography"]
     }
@@ -246,6 +248,11 @@ db.customer.insert([
     }
 ]);
 
+db.rewards.insert([
+    {"owner": [{"phone": "530-486-6983", "shopper_name": "janec"}, {"phone": "818-695-1881", "shopper_name": "janec"}], "points": 95, "expiration": Date("2023-01-05")},
+    {"owner": [{"phone": "254-344-9868", "shopper_name": "janec"}], "points": 50, "expiration": Date("2023-02-05")},
+    {"owner": [{"phone": "304-969-7140", "shopper-name": "timc"}], "points": 3, "expiration": Date("2022-02-05")}
+])
 
 
 db.payment_card.insert([

diff --git a/docs/fidesops/docs/guides/complex_fields.md b/docs/fidesops/docs/guides/complex_fields.md
@@ -267,8 +267,9 @@ arrays should be considered.
 1) If an array is the entry point into a node, we will search for corresponding matches across the entire array. You cannot specify a certain index.
 2) Everything is basically an "OR" query. Data returned from multiple array fields will be flattened before being passed into the next collection.
    1) For example, say Collection A returned values [1, 2, 3] and Collection B returned values [4, 5, 6].  Collection C has an array field that depends on both Collection A and Collection B. We search Collection C's array field to return any record that contains one of the values [1, 2, 3, 4, 5, 6] in the array.
-3. If an array field is an entry point to a node, only matching indices in that array are considered, both for access and erasures, as well as for subsequent queries on dependent collections where applicable.
-   1. For example, a query on Collection A only matched indices 0 and 1 in an array.  Only the data located at indices 0 and 1 are used to query data on dependent collection C.
+3. By default, if an array field is an entry point to a node, only matching indices in that array are considered, both for access and erasures, as well as for subsequent queries on dependent collections where applicable.
+   1. For example, a query on Collection A only matched indices 0 and 1 in an array.  Only the data located at indices 0 and 1 will be returned, and used to query data on dependent collection C.
+   2. This can be overridden by specifying `return_all_elements=true` on an entrypoint array field, in which case, the query will return the entire array and mask the entire array.
 4.  Individual array elements are masked, not the entire array, e.g. ["MASKED", "MASKED", "MASKED"]
 
 ### Can I see a more detailed example of a query traversal with complex objects?

diff --git a/docs/fidesops/docs/guides/datasets.md b/docs/fidesops/docs/guides/datasets.md
@@ -101,11 +101,12 @@ dataset:
 - `name`: The name of the field will be used to generate query and update statements. Please note that Fidesops does not do automated schema discovery. It is only aware of the fields you declare. This means that the only fields that will be addressed and retrieved by Fidesops queries are the fields you declare.
 - `data_categories`: Annotating data\_categories connects fields to policy rules, and determines which actions apply to each field. For more information see [Policies](policies.md)
 - `fidesops_meta`: The fidesops\_meta section specifies some additional fields that control how Fidesops manages your data:
-	- `references`:  A declaration of relationships between collections. Where the configuration declares a reference to `mydatabase:address:id` it means Fidesops will use the values from `mydatabase.address.id` to search for related values in `customer`. Unlike the SQL declaration, this is not an enforceable relationship, but simply a statement of which values are connected.  In the example above, the references from the `customer` field to `mydatabase.address.id` is analogous to a SQL statement `customer id REFERENCES address.id`, with the exception that any dataset and collection can be referenced. The relationship requires you to specify the dataset as well as the collection for relationships, because you may declare a configuration with multiple datasets, where values in one collection in the first dataset are searched using values found in the second dataset.
-	- `field`: The specified linked field, using the syntax `[dataset name].[collection name ].[field name]`.
-	- `identity`: Signifies that this field is an identity value that can be used as the root for a traversal [See graph traversal](query_execution.md)
-	- `direction`(_Optional_): Accepted values are `from` or `to`. This determines how Fidesops uses the relationships to discover data. If the direction is `to`, Fidesops will only use data in the _source_ collection to discover data in the _referenced_ collection. If the direction is `from`, Fidesops will only use data in the _referenced_ collection to discover data in the _source_ collection. If the direction is omitted, Fidesops will traverse the relation in whatever direction works to discover all related data.
-	- `primary_key` (_Optional_): A boolean value that means that Fidesops will treat this field as a unique row identifier for generating update statements. If no primary key is specified for any field on a collection, no updates will be generated against that collection. If multiple fields are marked as primary keys the combination of their values will be treated as a combined key. In SQL terms, we'd issue a query that looked like `SELECT ... FROM TABLE WHERE primary_key_name_1 = value1 AND primary_key_name_2 = value2`. 
-	- `data_type` (_Optional_): An indication of the type of data held by this field. Data types are used to convert values to the appropriate type when those values are used in queries. This is especially necessary when using data of one type to help locate data of another type.  Data types are also used to generate the appropriate masked value when running erasures, since Fidesops needs to know the type of data expected by the field in order to generate an appropriate masked value. Available data types are `string`, `integer`, `float`, `boolean`, and `object_id`. `object` types are also supported for MongoDB.
-	- `length` (_Optional_): An indicator of field length.
+    - `references`:  A declaration of relationships between collections. Where the configuration declares a reference to `mydatabase:address:id` it means Fidesops will use the values from `mydatabase.address.id` to search for related values in `customer`. Unlike the SQL declaration, this is not an enforceable relationship, but simply a statement of which values are connected.  In the example above, the references from the `customer` field to `mydatabase.address.id` is analogous to a SQL statement `customer id REFERENCES address.id`, with the exception that any dataset and collection can be referenced. The relationship requires you to specify the dataset as well as the collection for relationships, because you may declare a configuration with multiple datasets, where values in one collection in the first dataset are searched using values found in the second dataset.
+    - `field`: The specified linked field, using the syntax `[dataset name].[collection name ].[field name]`.
+    - `identity`: Signifies that this field is an identity value that can be used as the root for a traversal [See graph traversal](query_execution.md)
+    - `direction`(_Optional_): Accepted values are `from` or `to`. This determines how Fidesops uses the relationships to discover data. If the direction is `to`, Fidesops will only use data in the _source_ collection to discover data in the _referenced_ collection. If the direction is `from`, Fidesops will only use data in the _referenced_ collection to discover data in the _source_ collection. If the direction is omitted, Fidesops will traverse the relation in whatever direction works to discover all related data.
+    - `primary_key` (_Optional_): A boolean value that means that Fidesops will treat this field as a unique row identifier for generating update statements. If no primary key is specified for any field on a collection, no updates will be generated against that collection. If multiple fields are marked as primary keys the combination of their values will be treated as a combined key. In SQL terms, we'd issue a query that looked like `SELECT ... FROM TABLE WHERE primary_key_name_1 = value1 AND primary_key_name_2 = value2`. 
+    - `data_type` (_Optional_): An indication of the type of data held by this field. Data types are used to convert values to the appropriate type when those values are used in queries. This is especially necessary when using data of one type to help locate data of another type.  Data types are also used to generate the appropriate masked value when running erasures, since Fidesops needs to know the type of data expected by the field in order to generate an appropriate masked value. Available data types are `string`, `integer`, `float`, `boolean`, and `object_id`. `object` types are also supported for MongoDB.
+    - `length` (_Optional_): An indicator of field length.
+    - `return_all_elements`: (_Optional_):  For array entrypoint fields, specify whether the query should return/mask all fields, or just matching fields.  By default, we just return/mask matching fields.  `return_all_elements=true` will return/mask the entire array.
 
diff --git a/docs/fidesops/docs/postman/Fidesops.postman_collection.json b/docs/fidesops/docs/postman/Fidesops.postman_collection.json
diff --git a/src/fidesops/graph/config.py b/src/fidesops/graph/config.py
@@ -240,6 +240,8 @@ class Field(BaseModel, ABC):
     """an optional pointer to an arbitrary key in an expected json package provided as a seed value"""
     data_categories: Optional[List[FidesOpsKey]]
     data_type_converter: DataTypeConverter = DataType.no_op.value
+    return_all_elements: Optional[bool] = None
+    # Should field be returned by query if it is in an entrypoint array field, or just if it matches query?
 
     """Known type of held data"""
     length: Optional[int]
@@ -351,6 +353,7 @@ def generate_field(
     length: Optional[int],
     is_array: bool,
     sub_fields: List[Field],
+    return_all_elements: Optional[bool],
 ) -> Field:
     """Generate a graph field."""
 
@@ -361,6 +364,7 @@ def generate_field(
             is_array=is_array,
             fields={f.name: f for f in sub_fields},
             data_type_converter=DataType.object.value,
+            return_all_elements=return_all_elements,
         )
     return ScalarField(
         name=name,
@@ -371,6 +375,7 @@ def generate_field(
         primary_key=is_pk,
         length=length,
         is_array=is_array,
+        return_all_elements=return_all_elements,
     )
 
 

diff --git a/src/fidesops/models/datasetconfig.py b/src/fidesops/models/datasetconfig.py
@@ -1,5 +1,5 @@
 import logging
-from typing import Dict, Any, Set
+from typing import Dict, Any, Set, Optional
 
 from boto3 import Session
 from sqlalchemy import (
@@ -96,7 +96,9 @@ def get_graph(self) -> Dataset:
         return dataset_graph
 
 
-def to_graph_field(field: FidesopsDatasetField) -> Field:
+def to_graph_field(
+    field: FidesopsDatasetField, return_all_elements: Optional[bool] = None
+) -> Field:
     """Flattens the dataset field type into its graph representation"""
 
     # NOTE: on the dataset field, annotations like identity & references are
@@ -148,9 +150,13 @@ def to_graph_field(field: FidesopsDatasetField) -> Field:
 
         (data_type_name, is_array) = parse_data_type_string(meta_section.data_type)
 
-    if field.fields:
-        sub_fields = [to_graph_field(fld) for fld in field.fields]
+        if meta_section.return_all_elements:
+            # If specified on array field, lifts and passes into sub-fields, for example,
+            # arrays of objects
+            return_all_elements = True
 
+    if field.fields:
+        sub_fields = [to_graph_field(fld, return_all_elements) for fld in field.fields]
     return generate_field(
         name=field.name,
         data_categories=field.data_categories,
@@ -161,6 +167,7 @@ def to_graph_field(field: FidesopsDatasetField) -> Field:
         length=length,
         is_array=is_array,
         sub_fields=sub_fields,
+        return_all_elements=return_all_elements,
     )
 
 

diff --git a/src/fidesops/schemas/dataset.py b/src/fidesops/schemas/dataset.py
@@ -105,6 +105,8 @@ class FidesopsMeta(BaseModel):
     """Optionally specify the data type. Fidesops will attempt to cast values to this type when querying."""
     length: Optional[int]
     """Optionally specify the allowable field length. Fidesops will not generate values that exceed this size."""
+    return_all_elements: Optional[bool]
+    """Optionally specify to query for the entire array if the array is an entrypoint into the node. Default is False."""
 
     @validator("data_type")
     def valid_data_type(cls, v: Optional[str]) -> Optional[str]:
@@ -131,6 +133,19 @@ def valid_data_categories(
         """Validate that all annotated data categories exist in the taxonomy"""
         return _valid_data_categories(v)
 
+    @validator("fidesops_meta")
+    def valid_meta(cls, meta_values: Optional[FidesopsMeta]) -> Optional[FidesopsMeta]:
+        """Validate upfront that the return_all_elements flag can only be specified on array fields"""
+        if not meta_values:
+            return meta_values
+
+        is_array: bool = bool(meta_values.data_type and "[]" in meta_values.data_type)
+        if not is_array and meta_values.return_all_elements is not None:
+            raise ValueError(
+                "The 'return_all_elements' attribute can only be specified on array fields."
+            )
+        return meta_values
+
     @validator("fields")
     def validate_object_fields(
         cls,
@@ -143,7 +158,7 @@ def validate_object_fields(
         """
         declared_data_type = None
 
-        if values["fidesops_meta"]:
+        if values.get("fidesops_meta"):
             declared_data_type = values["fidesops_meta"].data_type
 
         if fields and declared_data_type:

diff --git a/src/fidesops/task/filter_element_match.py b/src/fidesops/task/filter_element_match.py
@@ -6,11 +6,11 @@
 
 import pydash
 
-from fidesops.graph.config import FieldPath
 from fidesops.task.refine_target_path import (
     build_refined_target_paths,
     DetailedPath,
     join_detailed_path,
+    FieldPathNodeInput,
 )
 from fidesops.util.collection_util import FIDESOPS_DO_NOT_MASK_INDEX, Row
 
@@ -19,7 +19,7 @@
 
 def filter_element_match(
     row: Row,
-    query_paths: Dict[FieldPath, List[Any]],
+    query_paths: FieldPathNodeInput,
     delete_elements: bool = True,
 ) -> Dict[str, Any]:
     """