Skip to content

Commit

Permalink
chore(client): prefer using builtin list when building dataset (#2913)
Browse files Browse the repository at this point in the history
  • Loading branch information
jialeicui authored Oct 27, 2023
1 parent 5a4073b commit 872e4b1
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 38 deletions.
6 changes: 6 additions & 0 deletions client/starwhale/api/_impl/data_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -571,6 +571,9 @@ def __eq__(self, other: Any) -> bool:
)
return False

def __hash__(self) -> int:
return hash(str(self))


class SwTupleType(SwListType):
def __init__(
Expand All @@ -580,6 +583,9 @@ def __init__(
) -> None:
super().__init__(element_types, sparse_types, True)

def __hash__(self) -> int:
return hash(str(self))


class SwMapType(SwCompositeType):
def __init__(self, key_type: SwType, value_type: SwType) -> None:
Expand Down
19 changes: 2 additions & 17 deletions client/starwhale/core/dataset/tabular.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,7 @@

from typing_extensions import Protocol

from starwhale.utils import (
console,
gen_uniq_version,
validate_obj_name,
check_python_scalar_type,
)
from starwhale.utils import console, gen_uniq_version, validate_obj_name
from starwhale.consts import ENV_POD_NAME
from starwhale.base.mixin import ASDictMixin, _do_asdict_convert
from starwhale.utils.error import (
Expand Down Expand Up @@ -216,17 +211,7 @@ def _transform(data: t.Any) -> t.Any:
elif isinstance(data, dict):
return {k: _transform(v) for k, v in data.items()}
elif isinstance(data, (list, tuple)):
data = type(data)([_transform(v) for v in data])
types = set(type(i) for i in data)
if (
not types
or len(types) == 1
and check_python_scalar_type(types.pop())
):
# Only keep the simple format(the scalar type in list/tuple) the original type for dataset viewer, such as: [1,2,3]
return data
else:
return Sequence(data=data, auto_convert=True)
return type(data)([_transform(v) for v in data])
else:
return data

Expand Down
26 changes: 5 additions & 21 deletions client/tests/sdk/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,29 +232,13 @@ def test_upload(self, rm: Mocker) -> None:

assert {
"attributes": [
{"name": "_type", "type": "STRING"},
{"name": "sequence_type", "type": "STRING"},
{"name": "_cnt", "type": "INT64"},
{
"attributes": [
{"name": "i0", "type": "INT64"},
{"name": "i1", "type": "STRING"},
{"name": "i2", "type": "FLOAT64"},
{
"elementType": {"type": "STRING"},
"name": "i3",
"type": "TUPLE",
},
],
"name": "data",
"pythonType": "starwhale.base.data_type.JsonDict",
"type": "OBJECT",
},
{"name": "auto_convert", "type": "BOOL"},
{"index": 0, "type": "INT64"},
{"index": 1, "type": "STRING"},
{"index": 2, "type": "FLOAT64"},
],
"elementType": {"elementType": {"type": "STRING"}, "type": "TUPLE"},
"name": "features/mixed_types_tuple",
"pythonType": "starwhale.base.data_type.Sequence",
"type": "OBJECT",
"type": "TUPLE",
} in content["tableSchemaDesc"]["columnSchemaList"]

assert {
Expand Down

0 comments on commit 872e4b1

Please sign in to comment.