Skip to content

Commit

Permalink
Support all Gremlin message serializer formats for Neptune (#685)
Browse files Browse the repository at this point in the history
* Support all TinkerPop message serializers for Neptune

* update unit tests for new defaults

* linter fix

* Fix generate_config normalization of inferred GraphSONV1-typed input

* remove debug

* update changelog
  • Loading branch information
michaelnchin authored Sep 20, 2024
1 parent 5b91a2e commit 8fa1749
Show file tree
Hide file tree
Showing 6 changed files with 102 additions and 50 deletions.
5 changes: 3 additions & 2 deletions ChangeLog.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,12 @@ Starting with v1.31.6, this file will contain a record of major features and upd

## Upcoming

- Updated Gremlin config `message_serializer` to accept all TinkerPop serializers ([Link to PR](https://github.com/aws/graph-notebook/pull/685))
- Added `%get_import_task` line magic ([Link to PR](https://github.com/aws/graph-notebook/pull/668))
- Added `--export-to` JSON file option to `%%graph_notebook_config` ([Link to PR](https://github.com/aws/graph-notebook/pull/684))
- Deprecated Python 3.8 support ([Link to PR](https://github.com/aws/graph-notebook/pull/683))
- Upgraded Neo4j Bolt driver to v5.x ([Link to PR](https://github.com/aws/graph-notebook/pull/682))
- Upgraded nest_asyncio to 1.6.0 ([Link to PR](https://github.com/aws/graph-notebook/pull/698))
- Added `%get_import_task` line magic ([Link to PR](https://github.com/aws/graph-notebook/pull/668))
- Added `--export-to` JSON file option to `%%graph_notebook_config` ([Link to PR](https://github.com/aws/graph-notebook/pull/684))
- Improved iPython config directory retrieval logic ([Link to PR](https://github.com/aws/graph-notebook/pull/687))
- Fixed `%db_reset` output for token modes ([Link to PR](https://github.com/aws/graph-notebook/pull/691))
- Fixed `%%gremlin profile` serialization issue on Neptune DB v1.2 and older ([Link to PR](https://github.com/aws/graph-notebook/pull/694))
Expand Down
49 changes: 33 additions & 16 deletions src/graph_notebook/configuration/generate_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
HTTP_PROTOCOL_FORMATS, WS_PROTOCOL_FORMATS,
DEFAULT_NEO4J_USERNAME, DEFAULT_NEO4J_PASSWORD, DEFAULT_NEO4J_DATABASE,
NEPTUNE_CONFIG_HOST_IDENTIFIERS, is_allowed_neptune_host, false_str_variants,
GRAPHSONV3_VARIANTS, GRAPHSONV2_VARIANTS, GRAPHBINARYV1_VARIANTS,
GRAPHBINARYV1, GREMLIN_SERIALIZERS_HTTP,
NEPTUNE_DB_SERVICE_NAME, NEPTUNE_ANALYTICS_SERVICE_NAME,
normalize_service_name)

Expand Down Expand Up @@ -73,17 +73,29 @@ def __init__(self, traversal_source: str = '', username: str = '', password: str

serializer_lower = message_serializer.lower()
# TODO: Update with untyped serializers once supported in GremlinPython
# Accept TinkerPop serializer class name
# https://github.com/apache/tinkerpop/blob/fd040c94a66516e473811fe29eaeaf4081cf104c/docs/src/reference/gremlin-applications.asciidoc#graphson
# https://github.com/apache/tinkerpop/blob/fd040c94a66516e473811fe29eaeaf4081cf104c/docs/src/reference/gremlin-applications.asciidoc#graphbinary
if serializer_lower == '':
message_serializer = DEFAULT_GREMLIN_SERIALIZER
elif serializer_lower in GRAPHSONV3_VARIANTS:
message_serializer = 'graphsonv3'
elif serializer_lower in GRAPHSONV2_VARIANTS:
message_serializer = 'graphsonv2'
elif serializer_lower in GRAPHBINARYV1_VARIANTS:
message_serializer = 'graphbinaryv1'
elif 'graphson' in serializer_lower:
message_serializer = 'GraphSON'
if 'untyped' in serializer_lower:
message_serializer += 'Untyped'
if 'v1' in serializer_lower:
if 'untyped' in serializer_lower:
message_serializer += 'MessageSerializerV1'
else:
message_serializer += 'MessageSerializerGremlinV1'
elif 'v2' in serializer_lower:
message_serializer += 'MessageSerializerV2'
else:
message_serializer += 'MessageSerializerV3'
elif 'graphbinary' in serializer_lower:
message_serializer = GRAPHBINARYV1
else:
print(f'Invalid Gremlin serializer specified, defaulting to graphsonv3. '
f'Valid serializers: [graphsonv3, graphsonv2, graphbinaryv1].')
f'Valid serializers: {GREMLIN_SERIALIZERS_HTTP}.')
message_serializer = DEFAULT_GREMLIN_SERIALIZER

self.traversal_source = traversal_source
Expand All @@ -93,16 +105,21 @@ def __init__(self, traversal_source: str = '', username: str = '', password: str

if include_protocol:
protocol_lower = connection_protocol.lower()
if protocol_lower == '':
connection_protocol = DEFAULT_GREMLIN_PROTOCOL
elif protocol_lower in HTTP_PROTOCOL_FORMATS:
if message_serializer in GREMLIN_SERIALIZERS_HTTP:
connection_protocol = DEFAULT_HTTP_PROTOCOL
elif protocol_lower in WS_PROTOCOL_FORMATS:
connection_protocol = DEFAULT_WS_PROTOCOL
if protocol_lower != '' and protocol_lower not in HTTP_PROTOCOL_FORMATS:
print(f"Enforcing HTTP protocol usage for serializer: {message_serializer}.")
else:
print(f"Invalid connection protocol specified, defaulting to {DEFAULT_GREMLIN_PROTOCOL}. "
f"Valid protocols: [websockets, http].")
connection_protocol = DEFAULT_GREMLIN_PROTOCOL
if protocol_lower == '':
connection_protocol = DEFAULT_GREMLIN_PROTOCOL
elif protocol_lower in HTTP_PROTOCOL_FORMATS:
connection_protocol = DEFAULT_HTTP_PROTOCOL
elif protocol_lower in WS_PROTOCOL_FORMATS:
connection_protocol = DEFAULT_WS_PROTOCOL
else:
print(f"Invalid connection protocol specified, defaulting to {DEFAULT_GREMLIN_PROTOCOL}. "
f"Valid protocols: [websockets, http].")
connection_protocol = DEFAULT_GREMLIN_PROTOCOL
self.connection_protocol = connection_protocol

def to_dict(self):
Expand Down
23 changes: 16 additions & 7 deletions src/graph_notebook/magics/graph_magic.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,10 @@
NEPTUNE_CONFIG_HOST_IDENTIFIERS, is_allowed_neptune_host, \
STATISTICS_LANGUAGE_INPUTS, STATISTICS_LANGUAGE_INPUTS_SPARQL, STATISTICS_MODES, SUMMARY_MODES, \
SPARQL_EXPLAIN_MODES, OPENCYPHER_EXPLAIN_MODES, GREMLIN_EXPLAIN_MODES, \
OPENCYPHER_PLAN_CACHE_MODES, OPENCYPHER_DEFAULT_TIMEOUT, OPENCYPHER_STATUS_STATE_MODES,
OPENCYPHER_PLAN_CACHE_MODES, OPENCYPHER_DEFAULT_TIMEOUT, OPENCYPHER_STATUS_STATE_MODES, \
normalize_service_name, NEPTUNE_DB_SERVICE_NAME, NEPTUNE_ANALYTICS_SERVICE_NAME, GRAPH_PG_INFO_METRICS, \
DEFAULT_GREMLIN_PROTOCOL, GREMLIN_PROTOCOL_FORMATS, DEFAULT_HTTP_PROTOCOL, normalize_protocol_name,
generate_snapshot_name)
DEFAULT_GREMLIN_PROTOCOL, GREMLIN_PROTOCOL_FORMATS, DEFAULT_HTTP_PROTOCOL, DEFAULT_WS_PROTOCOL, \
GREMLIN_SERIALIZERS_WS, GREMLIN_SERIALIZERS_CLASS_TO_MIME_MAP, normalize_protocol_name, generate_snapshot_name)
from graph_notebook.network import SPARQLNetwork
from graph_notebook.network.gremlin.GremlinNetwork import parse_pattern_list_str, GremlinNetwork
from graph_notebook.visualization.rows_and_columns import sparql_get_rows_and_columns, opencypher_get_rows_and_columns
Expand Down Expand Up @@ -1249,14 +1249,23 @@ def gremlin(self, line, cell, local_ns: dict = None):
using_http = False
query_start = time.time() * 1000 # time.time() returns time in seconds w/high precision; x1000 to get in ms
if self.client.is_neptune_domain():
connection_protocol = normalize_protocol_name(args.connection_protocol) \
if args.connection_protocol != '' \
else self.graph_notebook_config.gremlin.connection_protocol
if args.connection_protocol != '':
connection_protocol = normalize_protocol_name(args.connection_protocol)
if connection_protocol == DEFAULT_WS_PROTOCOL and \
self.graph_notebook_config.gremlin.message_serializer not in GREMLIN_SERIALIZERS_WS:
print("Unsupported serializer for GremlinPython client, "
"compatible serializers are: {GREMLIN_SERIALIZERS_WS}")
print("Defaulting to HTTP protocol.")
connection_protocol = DEFAULT_HTTP_PROTOCOL
else:
connection_protocol = self.graph_notebook_config.gremlin.connection_protocol
try:
if connection_protocol == DEFAULT_HTTP_PROTOCOL:
using_http = True
message_serializer = self.graph_notebook_config.gremlin.message_serializer
message_serializer_mime = GREMLIN_SERIALIZERS_CLASS_TO_MIME_MAP[message_serializer]
query_res_http = self.client.gremlin_http_query(cell, headers={
'Accept': 'application/vnd.gremlin-v1.0+json;types=false'})
'Accept': message_serializer_mime})
query_res_http.raise_for_status()
try:
query_res_http_json = query_res_http.json()
Expand Down
47 changes: 36 additions & 11 deletions src/graph_notebook/neptune/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@
# client >= 3.5.0 as the HashableDict is now part of that client driver.
# import graph_notebook.neptune.gremlin.graphsonV3d0_MapType_objectify_patch # noqa F401

DEFAULT_GREMLIN_SERIALIZER = 'graphsonv3'
DEFAULT_GREMLIN_TRAVERSAL_SOURCE = 'g'
DEFAULT_SPARQL_CONTENT_TYPE = 'application/x-www-form-urlencoded'
DEFAULT_PORT = 8182
Expand Down Expand Up @@ -119,16 +118,35 @@

false_str_variants = [False, 'False', 'false', 'FALSE']

GRAPHSONV3_VARIANTS = ['graphsonv3', 'graphsonv3d0', 'graphsonserializersv3d0', 'graphsonmessageserializerv3']
GRAPHSONV2_VARIANTS = ['graphsonv2', 'graphsonv2d0', 'graphsonserializersv2d0', 'graphsonmessageserializerv2']
GRAPHBINARYV1_VARIANTS = ['graphbinaryv1', 'graphbinary', 'graphbinaryserializersv1', 'graphbinarymessageserializerv1']
GRAPHSONV1 = 'GraphSONMessageSerializerGremlinV1'
GRAPHSONV2 = 'GraphSONMessageSerializerV2'
GRAPHSONV3 = 'GraphSONMessageSerializerV3'
GRAPHSONV1_UNTYPED = 'GraphSONUntypedMessageSerializerV1'
GRAPHSONV2_UNTYPED = 'GraphSONUntypedMessageSerializerV2'
GRAPHSONV3_UNTYPED = 'GraphSONUntypedMessageSerializerV3'
GRAPHBINARYV1 = 'GraphBinaryMessageSerializerV1'

GREMLIN_SERIALIZERS_CLASS_TO_MIME_MAP = {
GRAPHSONV1: 'application/vnd.gremlin-v1.0+json',
GRAPHSONV2: 'application/vnd.gremlin-v2.0+json',
GRAPHSONV3: 'application/vnd.gremlin-v3.0+json',
GRAPHSONV1_UNTYPED: 'application/vnd.gremlin-v1.0+json;types=false',
GRAPHSONV2_UNTYPED: 'application/vnd.gremlin-v2.0+json;types=false',
GRAPHSONV3_UNTYPED: 'application/vnd.gremlin-v3.0+json;types=false',
GRAPHBINARYV1: 'application/vnd.graphbinary-v1.0'
}

GREMLIN_SERIALIZERS_WS = [GRAPHSONV2, GRAPHSONV3, GRAPHBINARYV1]
GREMLIN_SERIALIZERS_HTTP = [GRAPHSONV1, GRAPHSONV1_UNTYPED, GRAPHSONV2_UNTYPED, GRAPHSONV3_UNTYPED]
GREMLIN_SERIALIZERS_ALL = GREMLIN_SERIALIZERS_WS + GREMLIN_SERIALIZERS_HTTP
DEFAULT_GREMLIN_SERIALIZER = GRAPHSONV1_UNTYPED

DEFAULT_WS_PROTOCOL = "websockets"
DEFAULT_HTTP_PROTOCOL = "http"
WS_PROTOCOL_FORMATS = ["ws", "websocket", DEFAULT_WS_PROTOCOL]
HTTP_PROTOCOL_FORMATS = ["https", "rest", DEFAULT_HTTP_PROTOCOL]
GREMLIN_PROTOCOL_FORMATS = WS_PROTOCOL_FORMATS + HTTP_PROTOCOL_FORMATS
DEFAULT_GREMLIN_PROTOCOL = DEFAULT_WS_PROTOCOL
DEFAULT_GREMLIN_PROTOCOL = DEFAULT_HTTP_PROTOCOL

STATISTICS_MODES = ["", "status", "disableAutoCompute", "enableAutoCompute", "refresh", "delete"]
SUMMARY_MODES = ["", "basic", "detailed"]
Expand All @@ -153,16 +171,22 @@ def is_allowed_neptune_host(hostname: str, host_allowlist: list):
return False


def get_gremlin_serializer(serializer_str: str):
serializer_lower = serializer_str.lower()
if serializer_lower == 'graphbinaryv1':
def get_gremlin_serializer_driver_class(serializer_str: str):
if serializer_str == GRAPHBINARYV1:
return serializer.GraphBinarySerializersV1()
elif serializer_lower == 'graphsonv2':
elif serializer_str == GRAPHSONV2:
return serializer.GraphSONSerializersV2d0()
else:
return serializer.GraphSONSerializersV3d0()


def get_gremlin_serializer_mime(serializer_str: str):
if serializer_str in GREMLIN_SERIALIZERS_CLASS_TO_MIME_MAP.keys():
return GREMLIN_SERIALIZERS_CLASS_TO_MIME_MAP[serializer_str]
else:
return GREMLIN_SERIALIZERS_CLASS_TO_MIME_MAP[GRAPHSONV1_UNTYPED]


def normalize_protocol_name(protocol: str):
if protocol in WS_PROTOCOL_FORMATS:
return DEFAULT_WS_PROTOCOL
Expand Down Expand Up @@ -223,7 +247,7 @@ def __init__(self, host: str, port: int = DEFAULT_PORT,
self.gremlin_traversal_source = gremlin_traversal_source
self.gremlin_username = gremlin_username
self.gremlin_password = gremlin_password
self.gremlin_serializer = get_gremlin_serializer(gremlin_serializer)
self.gremlin_serializer = gremlin_serializer
self.neo4j_username = neo4j_username
self.neo4j_password = neo4j_password
self.neo4j_auth = neo4j_auth
Expand Down Expand Up @@ -373,9 +397,10 @@ def get_gremlin_connection(self, transport_kwargs) -> client.Client:
request = self._prepare_request('GET', ws_url)

traversal_source = 'g' if self.is_neptune_domain() else self.gremlin_traversal_source
message_serializer = get_gremlin_serializer_driver_class(self.gremlin_serializer)
return client.Client(ws_url, traversal_source, transport_factory=transport_factory_args,
username=self.gremlin_username, password=self.gremlin_password,
message_serializer=self.gremlin_serializer,
message_serializer=message_serializer,
headers=dict(request.headers), **transport_kwargs)

def gremlin_query(self, query, transport_args=None, bindings=None):
Expand Down
22 changes: 11 additions & 11 deletions test/unit/configuration/test_configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def test_generate_default_config(self):
self.assertEqual('', config.gremlin.username)
self.assertEqual('', config.gremlin.password)
self.assertEqual(DEFAULT_GREMLIN_PROTOCOL, config.gremlin.connection_protocol)
self.assertEqual('graphsonv3', config.gremlin.message_serializer)
self.assertEqual('GraphSONUntypedMessageSerializerV1', config.gremlin.message_serializer)
self.assertEqual('neo4j', config.neo4j.username)
self.assertEqual('password', config.neo4j.password)
self.assertEqual(True, config.neo4j.auth)
Expand Down Expand Up @@ -170,7 +170,7 @@ def test_get_configuration_generic_required_input(self):
'traversal_source': 'g',
'username': '',
'password': '',
'message_serializer': 'graphsonv3'
'message_serializer': 'GraphSONUntypedMessageSerializerV1'
},
'neo4j': {
'username': 'neo4j',
Expand All @@ -197,7 +197,7 @@ def test_get_configuration_generic_all_input(self):
'traversal_source': 'a',
'username': 'user',
'password': 'pass',
'message_serializer': 'graphbinaryv1'
'message_serializer': 'GraphBinaryMessageSerializerV1'
},
'neo4j': {
'username': 'neo_user',
Expand Down Expand Up @@ -267,8 +267,8 @@ def test_get_configuration_neptune_required_input(self):
'traversal_source': 'g',
'username': '',
'password': '',
'message_serializer': 'graphsonv3',
'connection_protocol': 'websockets'
'message_serializer': 'GraphSONUntypedMessageSerializerV1',
'connection_protocol': 'http'
},
'neo4j': {
'username': 'neo4j',
Expand Down Expand Up @@ -300,7 +300,7 @@ def test_get_configuration_neptune_all_input(self):
'traversal_source': 'a',
'username': 'a_user',
'password': 'a_pass',
'message_serializer': 'graphbinaryv1',
'message_serializer': 'GraphSONUntypedMessageSerializerV3',
'connection_protocol': 'http'
},
'neo4j': {
Expand Down Expand Up @@ -328,7 +328,7 @@ def test_get_configuration_neptune_all_input(self):
'traversal_source': 'g',
'username': '',
'password': '',
'message_serializer': 'graphbinaryv1',
'message_serializer': 'GraphSONUntypedMessageSerializerV3',
'connection_protocol': 'http'
},
'neo4j': {
Expand Down Expand Up @@ -472,7 +472,7 @@ def test_configuration_gremlinsection_generic_default(self):
self.assertEqual(config.gremlin.traversal_source, 'g')
self.assertEqual(config.gremlin.username, '')
self.assertEqual(config.gremlin.password, '')
self.assertEqual(config.gremlin.message_serializer, 'graphsonv3')
self.assertEqual(config.gremlin.message_serializer, 'GraphSONUntypedMessageSerializerV1')
self.assertFalse(hasattr(config.gremlin, "connection_protocol"))

def test_configuration_gremlinsection_generic_override(self):
Expand All @@ -486,15 +486,15 @@ def test_configuration_gremlinsection_generic_override(self):
self.assertEqual(config.gremlin.traversal_source, 't')
self.assertEqual(config.gremlin.username, 'foo')
self.assertEqual(config.gremlin.password, 'bar')
self.assertEqual(config.gremlin.message_serializer, 'graphbinaryv1')
self.assertEqual(config.gremlin.message_serializer, 'GraphBinaryMessageSerializerV1')
self.assertFalse(hasattr(config.gremlin, "connection_protocol"))

def test_configuration_gremlinsection_neptune_default(self):
config = Configuration(self.neptune_host_reg, self.port)
self.assertEqual(config.gremlin.traversal_source, 'g')
self.assertEqual(config.gremlin.username, '')
self.assertEqual(config.gremlin.password, '')
self.assertEqual(config.gremlin.message_serializer, 'graphsonv3')
self.assertEqual(config.gremlin.message_serializer, 'GraphSONUntypedMessageSerializerV1')
self.assertEqual(config.gremlin.connection_protocol, DEFAULT_GREMLIN_PROTOCOL)

def test_configuration_gremlinsection_neptune_override(self):
Expand All @@ -510,7 +510,7 @@ def test_configuration_gremlinsection_neptune_override(self):
self.assertEqual(config.gremlin.traversal_source, 'g')
self.assertEqual(config.gremlin.username, '')
self.assertEqual(config.gremlin.password, '')
self.assertEqual(config.gremlin.message_serializer, 'graphbinaryv1')
self.assertEqual(config.gremlin.message_serializer, 'GraphBinaryMessageSerializerV1')
self.assertEqual(config.gremlin.connection_protocol, DEFAULT_HTTP_PROTOCOL)

def test_configuration_gremlinsection_protocol_neptune_default_with_proxy(self):
Expand Down
Loading

0 comments on commit 8fa1749

Please sign in to comment.