From ea0128b0e36b6ff36ea1976046ccad0fea779f64 Mon Sep 17 00:00:00 2001 From: tianwei Date: Thu, 19 Oct 2023 14:27:07 +0800 Subject: [PATCH 1/2] avoid obtaining all artifact content in advance --- client/starwhale/api/_impl/evaluation/log.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/client/starwhale/api/_impl/evaluation/log.py b/client/starwhale/api/_impl/evaluation/log.py index a350b8f075..21b416f42e 100644 --- a/client/starwhale/api/_impl/evaluation/log.py +++ b/client/starwhale/api/_impl/evaluation/log.py @@ -226,13 +226,12 @@ def _auto_decode_types(self, data: t.Any) -> t.Any: Text(encoded) -> string Binary(encoded) -> bytes """ - if isinstance(data, BaseArtifact): - data.owner = self._resource - data.fetch_data() - if isinstance(data, Text) and data.auto_convert_to_str: + # TODO: remove the owner assignment + data.owner = self._resource return data.content elif isinstance(data, Binary) and data.auto_convert_to_bytes: + data.owner = self._resource return data.to_bytes() elif isinstance(data, dict): return {k: self._auto_decode_types(v) for k, v in data.items()} From 87fdebf05a926477a785cff7a0b65694df4b1df4 Mon Sep 17 00:00:00 2001 From: tianwei Date: Thu, 19 Oct 2023 14:28:00 +0800 Subject: [PATCH 2/2] update Text/Binary auto decode threshold value --- client/starwhale/base/data_type.py | 4 ++-- client/tests/sdk/test_evaluation_log.py | 16 ++++++++-------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/client/starwhale/base/data_type.py b/client/starwhale/base/data_type.py index 94bc4dabc2..c08334d8bc 100644 --- a/client/starwhale/base/data_type.py +++ b/client/starwhale/base/data_type.py @@ -217,7 +217,7 @@ class Binary(BaseArtifact, SwObject): # TODO: use the better way to calculate the min size # Detect if the bytes is too long to encode to Binary for the datastore efficiency # size = DIGEST_SIZE + Binary Struct size + Link Object Struct size - AUTO_ENCODE_MIN_SIZE = sys.getsizeof(DIGEST_SIZE) + 512 + AUTO_ENCODE_MIN_SIZE = sys.getsizeof(DIGEST_SIZE) + 1024 def __init__( self, @@ -679,7 +679,7 @@ class Text(BaseArtifact, SwObject): # TODO: use the better way to calculate the min size # Detect if the str is too long to encode to Text for the datastore efficiency # size = DIGEST_SIZE + Text Struct size + Link Object Struct size - AUTO_ENCODE_MIN_SIZE = sys.getsizeof(DIGEST_SIZE) + 512 + AUTO_ENCODE_MIN_SIZE = sys.getsizeof(DIGEST_SIZE) + 1024 def __init__( self, diff --git a/client/tests/sdk/test_evaluation_log.py b/client/tests/sdk/test_evaluation_log.py index a60fe413de..ec3c222eea 100644 --- a/client/tests/sdk/test_evaluation_log.py +++ b/client/tests/sdk/test_evaluation_log.py @@ -68,8 +68,8 @@ def test_log_for_singleton_instance(self) -> None: _log(category=category, id=1, metrics={"a": 1, "b": 2}) _log(category=category, id=2, metrics={"a": 2, "b": 3}) - _log_result(id="id-1", metrics={"text": "ttt" * 200}) - _log_result(id="id-2", metrics={"binary": b"bbb" * 200}) + _log_result(id="id-1", metrics={"text": "ttt" * 1000}) + _log_result(id="id-2", metrics={"binary": b"bbb" * 1000}) _els = _get_log_store_from_context() _els.flush_all(artifacts_flush=True) @@ -79,8 +79,8 @@ def test_log_for_singleton_instance(self) -> None: rt = list(evaluation_log_module.scan_results()) assert rt == [ - {"id": "id-1", "text": "ttt" * 200}, - {"id": "id-2", "binary": b"bbb" * 200}, + {"id": "id-1", "text": "ttt" * 1000}, + {"id": "id-2", "binary": b"bbb" * 1000}, ] def test_log_summary_for_singleton_instance(self) -> None: @@ -201,8 +201,8 @@ def test_log_and_scan_for_standalone(self) -> None: store.log_result( id="id-1", metrics={ - "text": "aaa" * 200, - "binary": b"bbb" * 200, + "text": "aaa" * 2000, + "binary": b"bbb" * 2000, "image": self._generate_random_image(), }, ) @@ -239,8 +239,8 @@ def test_log_and_scan_for_standalone(self) -> None: "binary": b"bbb", "items": [1, 2, 3], } - assert results[0]["text"] == "aaa" * 200 - assert results[0]["binary"] == b"bbb" * 200 + assert results[0]["text"] == "aaa" * 2000 + assert results[0]["binary"] == b"bbb" * 2000 img = results[0]["image"] assert isinstance(img, Image) assert len(img.to_bytes()) > 0