Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support all Gremlin message serializer formats for Neptune #685

Merged
merged 10 commits into from
Sep 20, 2024
5 changes: 3 additions & 2 deletions ChangeLog.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,12 @@ Starting with v1.31.6, this file will contain a record of major features and upd

## Upcoming

- Updated Gremlin config `message_serializer` to accept all TinkerPop serializers ([Link to PR](https://github.com/aws/graph-notebook/pull/685))
- Added `%get_import_task` line magic ([Link to PR](https://github.com/aws/graph-notebook/pull/668))
- Added `--export-to` JSON file option to `%%graph_notebook_config` ([Link to PR](https://github.com/aws/graph-notebook/pull/684))
- Deprecated Python 3.8 support ([Link to PR](https://github.com/aws/graph-notebook/pull/683))
- Upgraded Neo4j Bolt driver to v5.x ([Link to PR](https://github.com/aws/graph-notebook/pull/682))
- Upgraded nest_asyncio to 1.6.0 ([Link to PR](https://github.com/aws/graph-notebook/pull/698))
- Added `%get_import_task` line magic ([Link to PR](https://github.com/aws/graph-notebook/pull/668))
- Added `--export-to` JSON file option to `%%graph_notebook_config` ([Link to PR](https://github.com/aws/graph-notebook/pull/684))
- Improved iPython config directory retrieval logic ([Link to PR](https://github.com/aws/graph-notebook/pull/687))
- Fixed `%db_reset` output for token modes ([Link to PR](https://github.com/aws/graph-notebook/pull/691))
- Fixed `%%gremlin profile` serialization issue on Neptune DB v1.2 and older ([Link to PR](https://github.com/aws/graph-notebook/pull/694))
Expand Down
49 changes: 33 additions & 16 deletions src/graph_notebook/configuration/generate_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
HTTP_PROTOCOL_FORMATS, WS_PROTOCOL_FORMATS,
DEFAULT_NEO4J_USERNAME, DEFAULT_NEO4J_PASSWORD, DEFAULT_NEO4J_DATABASE,
NEPTUNE_CONFIG_HOST_IDENTIFIERS, is_allowed_neptune_host, false_str_variants,
GRAPHSONV3_VARIANTS, GRAPHSONV2_VARIANTS, GRAPHBINARYV1_VARIANTS,
GRAPHBINARYV1, GREMLIN_SERIALIZERS_HTTP,
NEPTUNE_DB_SERVICE_NAME, NEPTUNE_ANALYTICS_SERVICE_NAME,
normalize_service_name)

Expand Down Expand Up @@ -73,17 +73,29 @@ def __init__(self, traversal_source: str = '', username: str = '', password: str

serializer_lower = message_serializer.lower()
# TODO: Update with untyped serializers once supported in GremlinPython
# Accept TinkerPop serializer class name
# https://github.com/apache/tinkerpop/blob/fd040c94a66516e473811fe29eaeaf4081cf104c/docs/src/reference/gremlin-applications.asciidoc#graphson
# https://github.com/apache/tinkerpop/blob/fd040c94a66516e473811fe29eaeaf4081cf104c/docs/src/reference/gremlin-applications.asciidoc#graphbinary
if serializer_lower == '':
message_serializer = DEFAULT_GREMLIN_SERIALIZER
elif serializer_lower in GRAPHSONV3_VARIANTS:
message_serializer = 'graphsonv3'
elif serializer_lower in GRAPHSONV2_VARIANTS:
message_serializer = 'graphsonv2'
elif serializer_lower in GRAPHBINARYV1_VARIANTS:
message_serializer = 'graphbinaryv1'
elif 'graphson' in serializer_lower:
message_serializer = 'GraphSON'
if 'untyped' in serializer_lower:
message_serializer += 'Untyped'
if 'v1' in serializer_lower:
if 'untyped' in serializer_lower:
message_serializer += 'MessageSerializerV1'
else:
message_serializer += 'MessageSerializerGremlinV1'
elif 'v2' in serializer_lower:
message_serializer += 'MessageSerializerV2'
else:
message_serializer += 'MessageSerializerV3'
elif 'graphbinary' in serializer_lower:
message_serializer = GRAPHBINARYV1
else:
print(f'Invalid Gremlin serializer specified, defaulting to graphsonv3. '
f'Valid serializers: [graphsonv3, graphsonv2, graphbinaryv1].')
f'Valid serializers: {GREMLIN_SERIALIZERS_HTTP}.')
message_serializer = DEFAULT_GREMLIN_SERIALIZER

self.traversal_source = traversal_source
Expand All @@ -93,16 +105,21 @@ def __init__(self, traversal_source: str = '', username: str = '', password: str

if include_protocol:
protocol_lower = connection_protocol.lower()
if protocol_lower == '':
connection_protocol = DEFAULT_GREMLIN_PROTOCOL
elif protocol_lower in HTTP_PROTOCOL_FORMATS:
if message_serializer in GREMLIN_SERIALIZERS_HTTP:
connection_protocol = DEFAULT_HTTP_PROTOCOL
elif protocol_lower in WS_PROTOCOL_FORMATS:
connection_protocol = DEFAULT_WS_PROTOCOL
if protocol_lower != '' and protocol_lower not in HTTP_PROTOCOL_FORMATS:
print(f"Enforcing HTTP protocol usage for serializer: {message_serializer}.")
else:
print(f"Invalid connection protocol specified, defaulting to {DEFAULT_GREMLIN_PROTOCOL}. "
f"Valid protocols: [websockets, http].")
connection_protocol = DEFAULT_GREMLIN_PROTOCOL
if protocol_lower == '':
connection_protocol = DEFAULT_GREMLIN_PROTOCOL
elif protocol_lower in HTTP_PROTOCOL_FORMATS:
connection_protocol = DEFAULT_HTTP_PROTOCOL
elif protocol_lower in WS_PROTOCOL_FORMATS:
connection_protocol = DEFAULT_WS_PROTOCOL
else:
print(f"Invalid connection protocol specified, defaulting to {DEFAULT_GREMLIN_PROTOCOL}. "
f"Valid protocols: [websockets, http].")
connection_protocol = DEFAULT_GREMLIN_PROTOCOL
self.connection_protocol = connection_protocol

def to_dict(self):
Expand Down
23 changes: 16 additions & 7 deletions src/graph_notebook/magics/graph_magic.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,10 @@
NEPTUNE_CONFIG_HOST_IDENTIFIERS, is_allowed_neptune_host, \
STATISTICS_LANGUAGE_INPUTS, STATISTICS_LANGUAGE_INPUTS_SPARQL, STATISTICS_MODES, SUMMARY_MODES, \
SPARQL_EXPLAIN_MODES, OPENCYPHER_EXPLAIN_MODES, GREMLIN_EXPLAIN_MODES, \
OPENCYPHER_PLAN_CACHE_MODES, OPENCYPHER_DEFAULT_TIMEOUT, OPENCYPHER_STATUS_STATE_MODES,
OPENCYPHER_PLAN_CACHE_MODES, OPENCYPHER_DEFAULT_TIMEOUT, OPENCYPHER_STATUS_STATE_MODES, \
normalize_service_name, NEPTUNE_DB_SERVICE_NAME, NEPTUNE_ANALYTICS_SERVICE_NAME, GRAPH_PG_INFO_METRICS, \
DEFAULT_GREMLIN_PROTOCOL, GREMLIN_PROTOCOL_FORMATS, DEFAULT_HTTP_PROTOCOL, normalize_protocol_name,
generate_snapshot_name)
DEFAULT_GREMLIN_PROTOCOL, GREMLIN_PROTOCOL_FORMATS, DEFAULT_HTTP_PROTOCOL, DEFAULT_WS_PROTOCOL, \
GREMLIN_SERIALIZERS_WS, GREMLIN_SERIALIZERS_CLASS_TO_MIME_MAP, normalize_protocol_name, generate_snapshot_name)
from graph_notebook.network import SPARQLNetwork
from graph_notebook.network.gremlin.GremlinNetwork import parse_pattern_list_str, GremlinNetwork
from graph_notebook.visualization.rows_and_columns import sparql_get_rows_and_columns, opencypher_get_rows_and_columns
Expand Down Expand Up @@ -1249,14 +1249,23 @@ def gremlin(self, line, cell, local_ns: dict = None):
using_http = False
query_start = time.time() * 1000 # time.time() returns time in seconds w/high precision; x1000 to get in ms
if self.client.is_neptune_domain():
connection_protocol = normalize_protocol_name(args.connection_protocol) \
if args.connection_protocol != '' \
else self.graph_notebook_config.gremlin.connection_protocol
if args.connection_protocol != '':
connection_protocol = normalize_protocol_name(args.connection_protocol)
if connection_protocol == DEFAULT_WS_PROTOCOL and \
self.graph_notebook_config.gremlin.message_serializer not in GREMLIN_SERIALIZERS_WS:
print("Unsupported serializer for GremlinPython client, "
"compatible serializers are: {GREMLIN_SERIALIZERS_WS}")
print("Defaulting to HTTP protocol.")
connection_protocol = DEFAULT_HTTP_PROTOCOL
else:
connection_protocol = self.graph_notebook_config.gremlin.connection_protocol
try:
if connection_protocol == DEFAULT_HTTP_PROTOCOL:
using_http = True
message_serializer = self.graph_notebook_config.gremlin.message_serializer
message_serializer_mime = GREMLIN_SERIALIZERS_CLASS_TO_MIME_MAP[message_serializer]
query_res_http = self.client.gremlin_http_query(cell, headers={
'Accept': 'application/vnd.gremlin-v1.0+json;types=false'})
'Accept': message_serializer_mime})
query_res_http.raise_for_status()
try:
query_res_http_json = query_res_http.json()
Expand Down
47 changes: 36 additions & 11 deletions src/graph_notebook/neptune/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@
# client >= 3.5.0 as the HashableDict is now part of that client driver.
# import graph_notebook.neptune.gremlin.graphsonV3d0_MapType_objectify_patch # noqa F401

DEFAULT_GREMLIN_SERIALIZER = 'graphsonv3'
DEFAULT_GREMLIN_TRAVERSAL_SOURCE = 'g'
DEFAULT_SPARQL_CONTENT_TYPE = 'application/x-www-form-urlencoded'
DEFAULT_PORT = 8182
Expand Down Expand Up @@ -119,16 +118,35 @@

false_str_variants = [False, 'False', 'false', 'FALSE']

GRAPHSONV3_VARIANTS = ['graphsonv3', 'graphsonv3d0', 'graphsonserializersv3d0', 'graphsonmessageserializerv3']
GRAPHSONV2_VARIANTS = ['graphsonv2', 'graphsonv2d0', 'graphsonserializersv2d0', 'graphsonmessageserializerv2']
GRAPHBINARYV1_VARIANTS = ['graphbinaryv1', 'graphbinary', 'graphbinaryserializersv1', 'graphbinarymessageserializerv1']
GRAPHSONV1 = 'GraphSONMessageSerializerGremlinV1'
GRAPHSONV2 = 'GraphSONMessageSerializerV2'
GRAPHSONV3 = 'GraphSONMessageSerializerV3'
GRAPHSONV1_UNTYPED = 'GraphSONUntypedMessageSerializerV1'
GRAPHSONV2_UNTYPED = 'GraphSONUntypedMessageSerializerV2'
GRAPHSONV3_UNTYPED = 'GraphSONUntypedMessageSerializerV3'
GRAPHBINARYV1 = 'GraphBinaryMessageSerializerV1'

GREMLIN_SERIALIZERS_CLASS_TO_MIME_MAP = {
GRAPHSONV1: 'application/vnd.gremlin-v1.0+json',
GRAPHSONV2: 'application/vnd.gremlin-v2.0+json',
GRAPHSONV3: 'application/vnd.gremlin-v3.0+json',
GRAPHSONV1_UNTYPED: 'application/vnd.gremlin-v1.0+json;types=false',
GRAPHSONV2_UNTYPED: 'application/vnd.gremlin-v2.0+json;types=false',
GRAPHSONV3_UNTYPED: 'application/vnd.gremlin-v3.0+json;types=false',
GRAPHBINARYV1: 'application/vnd.graphbinary-v1.0'
}

GREMLIN_SERIALIZERS_WS = [GRAPHSONV2, GRAPHSONV3, GRAPHBINARYV1]
GREMLIN_SERIALIZERS_HTTP = [GRAPHSONV1, GRAPHSONV1_UNTYPED, GRAPHSONV2_UNTYPED, GRAPHSONV3_UNTYPED]
GREMLIN_SERIALIZERS_ALL = GREMLIN_SERIALIZERS_WS + GREMLIN_SERIALIZERS_HTTP
DEFAULT_GREMLIN_SERIALIZER = GRAPHSONV1_UNTYPED

DEFAULT_WS_PROTOCOL = "websockets"
DEFAULT_HTTP_PROTOCOL = "http"
WS_PROTOCOL_FORMATS = ["ws", "websocket", DEFAULT_WS_PROTOCOL]
HTTP_PROTOCOL_FORMATS = ["https", "rest", DEFAULT_HTTP_PROTOCOL]
GREMLIN_PROTOCOL_FORMATS = WS_PROTOCOL_FORMATS + HTTP_PROTOCOL_FORMATS
DEFAULT_GREMLIN_PROTOCOL = DEFAULT_WS_PROTOCOL
DEFAULT_GREMLIN_PROTOCOL = DEFAULT_HTTP_PROTOCOL

STATISTICS_MODES = ["", "status", "disableAutoCompute", "enableAutoCompute", "refresh", "delete"]
SUMMARY_MODES = ["", "basic", "detailed"]
Expand All @@ -153,16 +171,22 @@ def is_allowed_neptune_host(hostname: str, host_allowlist: list):
return False


def get_gremlin_serializer(serializer_str: str):
serializer_lower = serializer_str.lower()
if serializer_lower == 'graphbinaryv1':
def get_gremlin_serializer_driver_class(serializer_str: str):
if serializer_str == GRAPHBINARYV1:
return serializer.GraphBinarySerializersV1()
elif serializer_lower == 'graphsonv2':
elif serializer_str == GRAPHSONV2:
return serializer.GraphSONSerializersV2d0()
else:
return serializer.GraphSONSerializersV3d0()


def get_gremlin_serializer_mime(serializer_str: str):
if serializer_str in GREMLIN_SERIALIZERS_CLASS_TO_MIME_MAP.keys():
return GREMLIN_SERIALIZERS_CLASS_TO_MIME_MAP[serializer_str]
else:
return GREMLIN_SERIALIZERS_CLASS_TO_MIME_MAP[GRAPHSONV1_UNTYPED]


def normalize_protocol_name(protocol: str):
if protocol in WS_PROTOCOL_FORMATS:
return DEFAULT_WS_PROTOCOL
Expand Down Expand Up @@ -223,7 +247,7 @@ def __init__(self, host: str, port: int = DEFAULT_PORT,
self.gremlin_traversal_source = gremlin_traversal_source
self.gremlin_username = gremlin_username
self.gremlin_password = gremlin_password
self.gremlin_serializer = get_gremlin_serializer(gremlin_serializer)
self.gremlin_serializer = gremlin_serializer
self.neo4j_username = neo4j_username
self.neo4j_password = neo4j_password
self.neo4j_auth = neo4j_auth
Expand Down Expand Up @@ -373,9 +397,10 @@ def get_gremlin_connection(self, transport_kwargs) -> client.Client:
request = self._prepare_request('GET', ws_url)

traversal_source = 'g' if self.is_neptune_domain() else self.gremlin_traversal_source
message_serializer = get_gremlin_serializer_driver_class(self.gremlin_serializer)
return client.Client(ws_url, traversal_source, transport_factory=transport_factory_args,
username=self.gremlin_username, password=self.gremlin_password,
message_serializer=self.gremlin_serializer,
message_serializer=message_serializer,
headers=dict(request.headers), **transport_kwargs)

def gremlin_query(self, query, transport_args=None, bindings=None):
Expand Down
22 changes: 11 additions & 11 deletions test/unit/configuration/test_configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def test_generate_default_config(self):
self.assertEqual('', config.gremlin.username)
self.assertEqual('', config.gremlin.password)
self.assertEqual(DEFAULT_GREMLIN_PROTOCOL, config.gremlin.connection_protocol)
self.assertEqual('graphsonv3', config.gremlin.message_serializer)
self.assertEqual('GraphSONUntypedMessageSerializerV1', config.gremlin.message_serializer)
self.assertEqual('neo4j', config.neo4j.username)
self.assertEqual('password', config.neo4j.password)
self.assertEqual(True, config.neo4j.auth)
Expand Down Expand Up @@ -170,7 +170,7 @@ def test_get_configuration_generic_required_input(self):
'traversal_source': 'g',
'username': '',
'password': '',
'message_serializer': 'graphsonv3'
'message_serializer': 'GraphSONUntypedMessageSerializerV1'
},
'neo4j': {
'username': 'neo4j',
Expand All @@ -197,7 +197,7 @@ def test_get_configuration_generic_all_input(self):
'traversal_source': 'a',
'username': 'user',
'password': 'pass',
'message_serializer': 'graphbinaryv1'
'message_serializer': 'GraphBinaryMessageSerializerV1'
},
'neo4j': {
'username': 'neo_user',
Expand Down Expand Up @@ -267,8 +267,8 @@ def test_get_configuration_neptune_required_input(self):
'traversal_source': 'g',
'username': '',
'password': '',
'message_serializer': 'graphsonv3',
'connection_protocol': 'websockets'
'message_serializer': 'GraphSONUntypedMessageSerializerV1',
'connection_protocol': 'http'
},
'neo4j': {
'username': 'neo4j',
Expand Down Expand Up @@ -300,7 +300,7 @@ def test_get_configuration_neptune_all_input(self):
'traversal_source': 'a',
'username': 'a_user',
'password': 'a_pass',
'message_serializer': 'graphbinaryv1',
'message_serializer': 'GraphSONUntypedMessageSerializerV3',
'connection_protocol': 'http'
},
'neo4j': {
Expand Down Expand Up @@ -328,7 +328,7 @@ def test_get_configuration_neptune_all_input(self):
'traversal_source': 'g',
'username': '',
'password': '',
'message_serializer': 'graphbinaryv1',
'message_serializer': 'GraphSONUntypedMessageSerializerV3',
'connection_protocol': 'http'
},
'neo4j': {
Expand Down Expand Up @@ -472,7 +472,7 @@ def test_configuration_gremlinsection_generic_default(self):
self.assertEqual(config.gremlin.traversal_source, 'g')
self.assertEqual(config.gremlin.username, '')
self.assertEqual(config.gremlin.password, '')
self.assertEqual(config.gremlin.message_serializer, 'graphsonv3')
self.assertEqual(config.gremlin.message_serializer, 'GraphSONUntypedMessageSerializerV1')
self.assertFalse(hasattr(config.gremlin, "connection_protocol"))

def test_configuration_gremlinsection_generic_override(self):
Expand All @@ -486,15 +486,15 @@ def test_configuration_gremlinsection_generic_override(self):
self.assertEqual(config.gremlin.traversal_source, 't')
self.assertEqual(config.gremlin.username, 'foo')
self.assertEqual(config.gremlin.password, 'bar')
self.assertEqual(config.gremlin.message_serializer, 'graphbinaryv1')
self.assertEqual(config.gremlin.message_serializer, 'GraphBinaryMessageSerializerV1')
self.assertFalse(hasattr(config.gremlin, "connection_protocol"))

def test_configuration_gremlinsection_neptune_default(self):
config = Configuration(self.neptune_host_reg, self.port)
self.assertEqual(config.gremlin.traversal_source, 'g')
self.assertEqual(config.gremlin.username, '')
self.assertEqual(config.gremlin.password, '')
self.assertEqual(config.gremlin.message_serializer, 'graphsonv3')
self.assertEqual(config.gremlin.message_serializer, 'GraphSONUntypedMessageSerializerV1')
self.assertEqual(config.gremlin.connection_protocol, DEFAULT_GREMLIN_PROTOCOL)

def test_configuration_gremlinsection_neptune_override(self):
Expand All @@ -510,7 +510,7 @@ def test_configuration_gremlinsection_neptune_override(self):
self.assertEqual(config.gremlin.traversal_source, 'g')
self.assertEqual(config.gremlin.username, '')
self.assertEqual(config.gremlin.password, '')
self.assertEqual(config.gremlin.message_serializer, 'graphbinaryv1')
self.assertEqual(config.gremlin.message_serializer, 'GraphBinaryMessageSerializerV1')
self.assertEqual(config.gremlin.connection_protocol, DEFAULT_HTTP_PROTOCOL)

def test_configuration_gremlinsection_protocol_neptune_default_with_proxy(self):
Expand Down
Loading
Loading