feature/auto-cast (#222)

vesoft-inc · Aug 11, 2022 · addf1ca · addf1ca
1 parent 9507876
commit addf1ca
Show file tree

Hide file tree

Showing 4 changed files with 195 additions and 44 deletions.
diff --git a/README.md b/README.md
@@ -99,6 +99,49 @@ with connection_pool.session_context('root', 'nebula') as session:
 connection_pool.close()
 ```
 
+## Quick example to fetch result to dataframe
+
+```python
+from nebula3.gclient.net import ConnectionPool
+from nebula3.Config import Config
+import pandas as pd
+from typing import Dict
+from nebula3.data.ResultSet import ResultSet
+
+def result_to_df(result: ResultSet) -> pd.DataFrame:
+    """
+    build list for each column, and transform to dataframe
+    """
+    assert result.is_succeeded()
+    columns = result.keys()
+    d: Dict[str, list] = {}
+    for col_num in range(result.col_size()):
+        col_name = columns[col_num]
+        col_list = result.column_values(col_name)
+        d[col_name] = [x.cast() for x in col_list]
+    return pd.DataFrame.from_dict(d, columns=columns)
+
+# define a config
+config = Config()
+
+# init connection pool
+connection_pool = ConnectionPool()
+
+# if the given servers are ok, return true, else return false
+ok = connection_pool.init([('127.0.0.1', 9669)], config)
+
+# option 2 with session_context, session will be released automatically
+with connection_pool.session_context('root', 'nebula') as session:
+    session.execute('USE <your graph space>')
+    result = session.execute('<your query>')
+    df = result_to_df(result)
+    print(df)
+
+# close the pool
+connection_pool.close()
+
+```
+
 ## Quick example to use storage-client to scan vertex and edge
 
 You should make sure the scan client can connect to the address of storage which see from `SHOW HOSTS` 

diff --git a/example/FormatResp.py b/example/FormatResp.py
@@ -6,57 +6,71 @@
 # This source code is licensed under Apache 2.0 License.
 
 
+from typing import Dict
+
+import pandas as pd
 import prettytable
+from nebula3.data.DataObject import Value, ValueWrapper
+from nebula3.data.ResultSet import ResultSet
+
+
+################################
+#     Method 1 (Recommended)   #
+################################
+def result_to_df(result: ResultSet) -> pd.DataFrame:
+    """
+    build list for each column, and transform to dataframe
+    """
+    assert result.is_succeeded()
+    columns = result.keys()
+    d: Dict[str, list] = {}
+    for col_num in range(result.col_size()):
+        col_name = columns[col_num]
+        col_list = result.column_values(col_name)
+        d[col_name] = [x.cast() for x in col_list]
+    return pd.DataFrame.from_dict(d, columns=columns)
+
+
+################################
+#     Method 2   (Customize)   #
+################################
+cast_as = {
+    Value.NVAL: "as_null",
+    Value.__EMPTY__: "as_empty",
+    Value.BVAL: "as_bool",
+    Value.IVAL: "as_int",
+    Value.FVAL: "as_double",
+    Value.SVAL: "as_string",
+    Value.LVAL: "as_list",
+    Value.UVAL: "as_set",
+    Value.MVAL: "as_map",
+    Value.TVAL: "as_time",
+    Value.DVAL: "as_date",
+    Value.DTVAL: "as_datetime",
+    Value.VVAL: "as_vertex",
+    Value.EVAL: "as_edge",
+    Value.PVAL: "as_path",
+    Value.GGVAL: "as_geography",
+    Value.DUVAL: "as_duration",
+}
+
+
+def customized_cast_with_dict(val: ValueWrapper):
+    _type = val._value.getType()
+    method = cast_as.get(_type)
+    if method is not None:
+        return getattr(val, method, lambda *args, **kwargs: None)()
+    raise KeyError("No such key: {}".format(_type))
+
 
-from nebula3.data.DataObject import ValueWrapper
-
-
-def cast(val: ValueWrapper):
-    if val.is_empty():
-        return '__EMPTY__'
-    elif val.is_null():
-        return '__NULL__'
-    elif val.is_bool():
-        return val.as_bool()
-    elif val.is_int():
-        return val.as_int()
-    elif val.is_double():
-        return val.as_double()
-    elif val.is_string():
-        return val.as_string()
-    elif val.is_time():
-        return val.as_time()
-    elif val.is_date():
-        return val.as_date()
-    elif val.is_datetime():
-        return val.as_datetime()
-    elif val.is_list():
-        return [cast(x) for x in val.as_list()]
-    elif val.is_set():
-        return {cast(x) for x in val.as_set()}
-    elif val.is_map():
-        return {k: cast(v) for k, v in val.as_map()}
-    elif val.is_vertex():
-        return val.as_node()
-    elif val.is_edge():
-        return val.as_relationship()
-    elif val.is_path():
-        return val.as_path()
-    elif val.is_geography():
-        return val.as_geography()
-    else:
-        print("ERROR: Type unsupported")
-        return None
-
-
-def print_resp(resp):
+def print_resp(resp: ResultSet):
     assert resp.is_succeeded()
     output_table = prettytable.PrettyTable()
     output_table.field_names = resp.keys()
     for recode in resp:
         value_list = []
         for col in recode:
-            val = cast(col)
+            val = customized_cast_with_dict(col)
             value_list.append(val)
         output_table.add_row(value_list)
     print(output_table)
diff --git a/nebula3/data/DataObject.py b/nebula3/data/DataObject.py
@@ -5,7 +5,7 @@
 #
 # This source code is licensed under Apache 2.0 License.
 
-from typing import Dict, List, Set
+from typing import Any, Dict, List, Set
 import pytz
 from datetime import datetime, timezone, timedelta
 from nebula3.Exception import (
@@ -24,6 +24,23 @@
     Time,
 )
 
+__AS_MAP__ = {
+    Value.NVAL: "as_null",
+    Value.__EMPTY__: "as_empty",
+    Value.BVAL: "as_bool",
+    Value.IVAL: "as_int",
+    Value.FVAL: "as_double",
+    Value.SVAL: "as_string",
+    Value.TVAL: "as_time",
+    Value.DVAL: "as_date",
+    Value.DTVAL: "as_datetime",
+    Value.VVAL: "as_vertex",
+    Value.EVAL: "as_edge",
+    Value.PVAL: "as_path",
+    Value.GGVAL: "as_geography",
+    Value.DUVAL: "as_duration",
+}
+
 
 def date_time_convert_with_timezone(date_time: DateTime, timezone_offset: int):
     """the function to convert utc date_time to local date_time
@@ -662,6 +679,25 @@ def as_duration(self) -> "DurationWrapper":
             "expect duration type, but is " + self._get_type_name()
         )
 
+    def cast(self) -> Any:
+        """
+        automatically convert value wrapper to concrete type by calling casting method.
+
+        : return: Any type (e.g. int, float, List[Dict[str, int]], Set[List[float]])
+        """
+        _type = self._value.getType()
+        if _type in __AS_MAP__:
+            # Considering the most efficient way, we should call `cast` in every iterable method over their items,
+            # such as `as_list`, `as_set`, and `as_map`. However, the returned type will change and cause incompatibility.
+            # So I put the common types set (time complexity O(1)) at first, and call their method via dict ( O(1) )
+            return getattr(self, __AS_MAP__[_type])()
+        if _type == Value.LVAL:
+            return [x.cast() for x in self.as_list()]
+        if _type == Value.UVAL:
+            return {x.cast() for x in self.as_set()}
+        if _type == Value.MVAL:
+            return {k: v.cast() for k, v in self.as_map().items()}
+
     def _get_type_name(self):
         if self.is_empty():
             return "empty"

diff --git a/tests/test_data_type.py b/tests/test_data_type.py
@@ -318,6 +318,64 @@ def test_as_map(self):
         expect_result["b"] = ValueWrapper(ttypes.Value(sVal=b"car"))
         assert map_val == expect_result
 
+    def test_cast(self):
+        value = ttypes.Value()
+
+        bool_val = ttypes.Value()
+        bool_val.set_bVal(False)
+
+        int_val = ttypes.Value()
+        int_val.set_iVal(100)
+
+        float_val = ttypes.Value()
+        float_val.set_fVal(10.10)
+
+        str_val1 = ttypes.Value()
+        str_val1.set_sVal(b"word")
+
+        str_val2 = ttypes.Value()
+        str_val2.set_sVal(b"car")
+
+        set_val = ttypes.Value()
+        tmp_set_val = NSet()
+        tmp_set_val.values = set()
+        tmp_set_val.values.add(str_val1)
+        tmp_set_val.values.add(str_val2)
+        set_val.set_uVal(tmp_set_val)
+
+        map_val = ttypes.Value()
+        tmp_map_val = NMap()
+        tmp_map_val.kvs = {b"a": str_val1, b"b": str_val2}
+        map_val.set_mVal(tmp_map_val)
+
+        tmp_list_val = NList()
+        tmp_list_val.values = [
+            bool_val,
+            int_val,
+            float_val,
+            str_val1,
+            str_val2,
+            set_val,
+            map_val,
+        ]
+        value.set_lVal(tmp_list_val)
+
+        value = ValueWrapper(value)
+
+        list_val = value.cast()
+        assert isinstance(list_val, list)
+
+        expect_result = [
+            False,
+            100,
+            10.10,
+            "word",
+            "car",
+            {"word", "car"},
+            {"a": "word", "b": "car"},
+        ]
+        assert list_val == expect_result
+
     def test_as_time(self):
         time = Time()
         time.hour = 10