Skip to content

Commit

Permalink
feature/auto-cast (#222)
Browse files Browse the repository at this point in the history
  • Loading branch information
alan890104 authored Aug 11, 2022
1 parent 9507876 commit addf1ca
Show file tree
Hide file tree
Showing 4 changed files with 195 additions and 44 deletions.
43 changes: 43 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,49 @@ with connection_pool.session_context('root', 'nebula') as session:
connection_pool.close()
```

## Quick example to fetch result to dataframe

```python
from nebula3.gclient.net import ConnectionPool
from nebula3.Config import Config
import pandas as pd
from typing import Dict
from nebula3.data.ResultSet import ResultSet

def result_to_df(result: ResultSet) -> pd.DataFrame:
"""
build list for each column, and transform to dataframe
"""
assert result.is_succeeded()
columns = result.keys()
d: Dict[str, list] = {}
for col_num in range(result.col_size()):
col_name = columns[col_num]
col_list = result.column_values(col_name)
d[col_name] = [x.cast() for x in col_list]
return pd.DataFrame.from_dict(d, columns=columns)

# define a config
config = Config()

# init connection pool
connection_pool = ConnectionPool()

# if the given servers are ok, return true, else return false
ok = connection_pool.init([('127.0.0.1', 9669)], config)

# option 2 with session_context, session will be released automatically
with connection_pool.session_context('root', 'nebula') as session:
session.execute('USE <your graph space>')
result = session.execute('<your query>')
df = result_to_df(result)
print(df)

# close the pool
connection_pool.close()

```

## Quick example to use storage-client to scan vertex and edge

You should make sure the scan client can connect to the address of storage which see from `SHOW HOSTS`
Expand Down
100 changes: 57 additions & 43 deletions example/FormatResp.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,57 +6,71 @@
# This source code is licensed under Apache 2.0 License.


from typing import Dict

import pandas as pd
import prettytable
from nebula3.data.DataObject import Value, ValueWrapper
from nebula3.data.ResultSet import ResultSet


################################
# Method 1 (Recommended) #
################################
def result_to_df(result: ResultSet) -> pd.DataFrame:
"""
build list for each column, and transform to dataframe
"""
assert result.is_succeeded()
columns = result.keys()
d: Dict[str, list] = {}
for col_num in range(result.col_size()):
col_name = columns[col_num]
col_list = result.column_values(col_name)
d[col_name] = [x.cast() for x in col_list]
return pd.DataFrame.from_dict(d, columns=columns)


################################
# Method 2 (Customize) #
################################
cast_as = {
Value.NVAL: "as_null",
Value.__EMPTY__: "as_empty",
Value.BVAL: "as_bool",
Value.IVAL: "as_int",
Value.FVAL: "as_double",
Value.SVAL: "as_string",
Value.LVAL: "as_list",
Value.UVAL: "as_set",
Value.MVAL: "as_map",
Value.TVAL: "as_time",
Value.DVAL: "as_date",
Value.DTVAL: "as_datetime",
Value.VVAL: "as_vertex",
Value.EVAL: "as_edge",
Value.PVAL: "as_path",
Value.GGVAL: "as_geography",
Value.DUVAL: "as_duration",
}


def customized_cast_with_dict(val: ValueWrapper):
_type = val._value.getType()
method = cast_as.get(_type)
if method is not None:
return getattr(val, method, lambda *args, **kwargs: None)()
raise KeyError("No such key: {}".format(_type))


from nebula3.data.DataObject import ValueWrapper


def cast(val: ValueWrapper):
if val.is_empty():
return '__EMPTY__'
elif val.is_null():
return '__NULL__'
elif val.is_bool():
return val.as_bool()
elif val.is_int():
return val.as_int()
elif val.is_double():
return val.as_double()
elif val.is_string():
return val.as_string()
elif val.is_time():
return val.as_time()
elif val.is_date():
return val.as_date()
elif val.is_datetime():
return val.as_datetime()
elif val.is_list():
return [cast(x) for x in val.as_list()]
elif val.is_set():
return {cast(x) for x in val.as_set()}
elif val.is_map():
return {k: cast(v) for k, v in val.as_map()}
elif val.is_vertex():
return val.as_node()
elif val.is_edge():
return val.as_relationship()
elif val.is_path():
return val.as_path()
elif val.is_geography():
return val.as_geography()
else:
print("ERROR: Type unsupported")
return None


def print_resp(resp):
def print_resp(resp: ResultSet):
assert resp.is_succeeded()
output_table = prettytable.PrettyTable()
output_table.field_names = resp.keys()
for recode in resp:
value_list = []
for col in recode:
val = cast(col)
val = customized_cast_with_dict(col)
value_list.append(val)
output_table.add_row(value_list)
print(output_table)
38 changes: 37 additions & 1 deletion nebula3/data/DataObject.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#
# This source code is licensed under Apache 2.0 License.

from typing import Dict, List, Set
from typing import Any, Dict, List, Set
import pytz
from datetime import datetime, timezone, timedelta
from nebula3.Exception import (
Expand All @@ -24,6 +24,23 @@
Time,
)

__AS_MAP__ = {
Value.NVAL: "as_null",
Value.__EMPTY__: "as_empty",
Value.BVAL: "as_bool",
Value.IVAL: "as_int",
Value.FVAL: "as_double",
Value.SVAL: "as_string",
Value.TVAL: "as_time",
Value.DVAL: "as_date",
Value.DTVAL: "as_datetime",
Value.VVAL: "as_vertex",
Value.EVAL: "as_edge",
Value.PVAL: "as_path",
Value.GGVAL: "as_geography",
Value.DUVAL: "as_duration",
}


def date_time_convert_with_timezone(date_time: DateTime, timezone_offset: int):
"""the function to convert utc date_time to local date_time
Expand Down Expand Up @@ -662,6 +679,25 @@ def as_duration(self) -> "DurationWrapper":
"expect duration type, but is " + self._get_type_name()
)

def cast(self) -> Any:
"""
automatically convert value wrapper to concrete type by calling casting method.
: return: Any type (e.g. int, float, List[Dict[str, int]], Set[List[float]])
"""
_type = self._value.getType()
if _type in __AS_MAP__:
# Considering the most efficient way, we should call `cast` in every iterable method over their items,
# such as `as_list`, `as_set`, and `as_map`. However, the returned type will change and cause incompatibility.
# So I put the common types set (time complexity O(1)) at first, and call their method via dict ( O(1) )
return getattr(self, __AS_MAP__[_type])()
if _type == Value.LVAL:
return [x.cast() for x in self.as_list()]
if _type == Value.UVAL:
return {x.cast() for x in self.as_set()}
if _type == Value.MVAL:
return {k: v.cast() for k, v in self.as_map().items()}

def _get_type_name(self):
if self.is_empty():
return "empty"
Expand Down
58 changes: 58 additions & 0 deletions tests/test_data_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,64 @@ def test_as_map(self):
expect_result["b"] = ValueWrapper(ttypes.Value(sVal=b"car"))
assert map_val == expect_result

def test_cast(self):
value = ttypes.Value()

bool_val = ttypes.Value()
bool_val.set_bVal(False)

int_val = ttypes.Value()
int_val.set_iVal(100)

float_val = ttypes.Value()
float_val.set_fVal(10.10)

str_val1 = ttypes.Value()
str_val1.set_sVal(b"word")

str_val2 = ttypes.Value()
str_val2.set_sVal(b"car")

set_val = ttypes.Value()
tmp_set_val = NSet()
tmp_set_val.values = set()
tmp_set_val.values.add(str_val1)
tmp_set_val.values.add(str_val2)
set_val.set_uVal(tmp_set_val)

map_val = ttypes.Value()
tmp_map_val = NMap()
tmp_map_val.kvs = {b"a": str_val1, b"b": str_val2}
map_val.set_mVal(tmp_map_val)

tmp_list_val = NList()
tmp_list_val.values = [
bool_val,
int_val,
float_val,
str_val1,
str_val2,
set_val,
map_val,
]
value.set_lVal(tmp_list_val)

value = ValueWrapper(value)

list_val = value.cast()
assert isinstance(list_val, list)

expect_result = [
False,
100,
10.10,
"word",
"car",
{"word", "car"},
{"a": "word", "b": "car"},
]
assert list_val == expect_result

def test_as_time(self):
time = Time()
time.hour = 10
Expand Down

0 comments on commit addf1ca

Please sign in to comment.