-
Notifications
You must be signed in to change notification settings - Fork 351
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Generalize graph write queries #895
Closed
Closed
Changes from all commits
Commits
Show all changes
12 commits
Select commit
Hold shift + click to select a range
45ec55e
Build ingest query
a641ad0
Linter
5b3c83c
Save cleanup query for another PR
71edc8b
Implement schema
41c9a24
Merge latest master
519cce9
bump mypy to 0.981 for python/mypy#13398
b900146
linter
05973fb
make load_graph_data interface make more sense
b6f3faf
fix comment
aafa38d
Docs and some better names
1293b2e
add a todo
b810d75
Doc updates, rename some fields
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,234 @@ | ||
import abc | ||
from dataclasses import dataclass | ||
from dataclasses import field | ||
from enum import auto | ||
from enum import Enum | ||
from typing import List | ||
from typing import Optional | ||
|
||
|
||
class LinkDirection(Enum): | ||
""" | ||
Each CartographyRelSchema has a LinkDirection that determines whether the relationship points toward the original | ||
node ("INWARD") or away from the original node ("OUTWARD"). | ||
|
||
For example the following code creates the path `(:EMRCluster)<-[:RESOURCE]-(:AWSAccount)`: | ||
|
||
class EMRCluster(CartographyNodeSchema): | ||
label: str = "EMRCluster" | ||
sub_resource_relationship: CartographyRelSchema = EMRClusterToAWSAccount() | ||
# ... | ||
|
||
class EMRClusterToAWSAccount(CartographyRelSchema): | ||
target_node_label: str = "AWSAccount" | ||
rel_label: str = "RESOURCE" | ||
direction: LinkDirection = LinkDirection.INWARD | ||
# ... | ||
|
||
If `EMRClusterToAWSAccount.direction` was LinkDirection.OUTWARD, then the directionality of the relationship would | ||
be `(:EMRCluster)-[:RESOURCE]->(:AWSAccount)` instead. | ||
""" | ||
INWARD = auto() | ||
OUTWARD = auto() | ||
|
||
|
||
class PropertyRef: | ||
""" | ||
PropertyRefs represent properties on cartography nodes and relationships. | ||
|
||
cartography takes lists of Python dicts and loads them to Neo4j. PropertyRefs allow our dynamically generated Neo4j | ||
ingestion queries to set values for a given node or relationship property from (A) a field on the dict being | ||
processed (PropertyRef.override=False, default), or (B) from a single variable provided by a keyword argument | ||
(PropertyRef.override=True). | ||
""" | ||
|
||
def __init__(self, name: str, set_in_kwargs=False): | ||
""" | ||
:param name: The name of the property | ||
:param set_in_kwargs: Optional. If True, the property is not defined on the data dict, and we expect to find the | ||
property in the kwargs. | ||
If False, looks for the property in the data dict. | ||
Defaults to False. | ||
""" | ||
self.name = name | ||
self.set_in_kwargs = set_in_kwargs | ||
|
||
def _parameterize_name(self) -> str: | ||
return f"${self.name}" | ||
|
||
def __repr__(self) -> str: | ||
return f"item.{self.name}" if not self.set_in_kwargs else self._parameterize_name() | ||
|
||
|
||
@dataclass | ||
class CartographyNodeProperties(abc.ABC): | ||
""" | ||
Abstract base dataclass that represents the properties on a CartographyNodeSchema. This is intended to enforce that | ||
all subclasses will have an id and a lastupdated field defined on their resulting nodes. | ||
""" | ||
id: PropertyRef = field(init=False) | ||
lastupdated: PropertyRef = field(init=False) | ||
|
||
def __post_init__(self): | ||
""" | ||
Designed to prevent direct instantiation. This workaround is needed since this is both an abstract class and a | ||
dataclass. | ||
""" | ||
if self.__class__ == CartographyNodeProperties: | ||
raise TypeError("Cannot instantiate abstract class.") | ||
|
||
|
||
@dataclass | ||
class CartographyRelProperties(abc.ABC): | ||
""" | ||
Abstract class that represents the properties on a CartographyRelSchema. This is intended to enforce that all | ||
subclasses will have a lastupdated field defined on their resulting relationships. | ||
""" | ||
lastupdated: PropertyRef = field(init=False) | ||
|
||
def __post_init__(self): | ||
""" | ||
Designed to prevent direct instantiation. This workaround is needed since this is both an abstract class and a | ||
dataclass. | ||
""" | ||
if self.__class__ == CartographyRelProperties: | ||
raise TypeError("Cannot instantiate abstract class.") | ||
|
||
|
||
@dataclass | ||
class CartographyRelSchema(abc.ABC): | ||
""" | ||
Abstract base dataclass that represents a cartography relationship. | ||
|
||
The CartographyRelSchema contains properties that make it possible to connect the CartographyNodeSchema to other | ||
existing nodes in the graph. | ||
""" | ||
@property | ||
@abc.abstractmethod | ||
def properties(self) -> CartographyRelProperties: | ||
""" | ||
:return: The properties of the relationship. | ||
""" | ||
pass | ||
|
||
@property | ||
@abc.abstractmethod | ||
def target_node_label(self) -> str: | ||
""" | ||
:return: The target node label that this relationship will connect to. | ||
""" | ||
pass | ||
|
||
@property | ||
@abc.abstractmethod | ||
def target_node_key(self) -> str: | ||
""" | ||
:return: The attribute name on the target_node_label used to uniquely identify what node to connect to. | ||
""" | ||
pass | ||
|
||
@property | ||
@abc.abstractmethod | ||
def target_node_key_property_ref(self) -> PropertyRef: | ||
""" | ||
:return: The value of the target_node_key used to uniquely identify what node to connect to. This is given as a | ||
PropertyRef. | ||
""" | ||
pass | ||
|
||
@property | ||
@abc.abstractmethod | ||
def rel_label(self) -> str: | ||
""" | ||
:return: The str label of the relationship. | ||
""" | ||
pass | ||
|
||
@property | ||
@abc.abstractmethod | ||
def direction(self) -> LinkDirection: | ||
""" | ||
:return: The LinkDirection of the query. Please see the `LinkDirection` docs for a detailed explanation. | ||
""" | ||
pass | ||
|
||
|
||
@dataclass | ||
class CartographyNodeSchema(abc.ABC): | ||
""" | ||
Abstract base dataclass that represents a graph node in cartography. This is used to dynamically generate graph | ||
ingestion queries. | ||
|
||
A CartographyNodeSchema is composed of: | ||
|
||
- CartographyNodeProperties: contains the properties on the node and where to find their values with PropertyRef | ||
objects. | ||
- [Optional] A CartographyRelSchema pointing to the node's sub-resource (see the docstring on | ||
`sub_resource_relationship` for details. | ||
- [Optional] One or more other CartographyRelSchemas to other nodes. | ||
""" | ||
_extra_labels: Optional[List[str]] = field(init=False, default=None) | ||
_other_relationships: Optional[List[CartographyRelSchema]] = field(init=False, default=None) | ||
|
||
@property | ||
@abc.abstractmethod | ||
def label(self) -> str: | ||
""" | ||
:return: The primary str label of the node. | ||
""" | ||
pass | ||
|
||
@property | ||
@abc.abstractmethod | ||
def properties(self) -> CartographyNodeProperties: | ||
""" | ||
:return: The properties of the node. | ||
""" | ||
pass | ||
|
||
@property | ||
def sub_resource_relationship(self) -> Optional[CartographyRelSchema]: | ||
""" | ||
Optional. | ||
Allows subclasses to specify a subresource relationship for the given node. "Sub resource" is a term we made up | ||
best defined by examples: | ||
- In the AWS module, the subresource is an AWSAccount | ||
- In Azure, the subresource is a Subscription | ||
- In GCP, the subresource is a GCPProject | ||
- In Okta, the subresource is an OktaOrganization | ||
... and so on and so forth. | ||
:return: | ||
""" | ||
return None | ||
|
||
@property | ||
def other_relationships(self) -> Optional[List[CartographyRelSchema]]: | ||
""" | ||
Optional. | ||
Allows subclasses to specify additional cartography relationships on the node. | ||
:return: None of not overriden. Else return a list of CartographyRelSchema associated with the node. | ||
""" | ||
return self._other_relationships | ||
|
||
@other_relationships.setter | ||
def other_relationships(self, other_rels: List[CartographyRelSchema]) -> None: | ||
""" | ||
Boilerplate setter function used to keep typehints happy. | ||
""" | ||
self._other_relationships = other_rels | ||
|
||
@property | ||
def extra_labels(self) -> Optional[List[str]]: | ||
""" | ||
Optional. | ||
Allows specifying extra labels on the node. | ||
:return: None if not overriden. Else return a str list of the extra labels specified on the node. | ||
""" | ||
return self._extra_labels | ||
|
||
@extra_labels.setter | ||
def extra_labels(self, labels: List[str]) -> None: | ||
""" | ||
Boilerplate setter function used to keep typehints happy. | ||
""" | ||
self._extra_labels = labels |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Because we need python/mypy#13398