diff --git a/python/qianfan/dataset/data_source/baidu_qianfan.py b/python/qianfan/dataset/data_source/baidu_qianfan.py index 8d919eb9..3c40f1ce 100644 --- a/python/qianfan/dataset/data_source/baidu_qianfan.py +++ b/python/qianfan/dataset/data_source/baidu_qianfan.py @@ -17,6 +17,7 @@ import json import os +import re import uuid import zipfile from typing import Any, Dict, Optional, Tuple @@ -91,7 +92,13 @@ def _get_transmission_bos_info( storage_region = sup_storage_region elif self.storage_type == V2Consts.StorageType.Bos: assert self.storage_region + assert self.storage_path storage_region = self.storage_region + match_result = re.search(r"^bos://(.*?)/(.*)/$", self.storage_path) + if match_result is None: + raise ValueError("no bos bucket and path found") + groups = match_result.groups() + storage_id, storage_path = groups[0], groups[1] elif self.storage_type == V2Consts.StorageType.SysStorage: err_msg = "don't support upload dataset to dataset which use platform bos" log_error(err_msg) @@ -176,11 +183,12 @@ def save( V2Consts.DatasetFormat.PromptImageResponse, ] + ak, sk = self._get_console_ak_and_sk() + # 获取存储信息和鉴权信息 storage_id, storage_path, storage_region = self._get_transmission_bos_info( sup_storage_id, sup_storage_path, sup_storage_region ) - ak, sk = self._get_console_ak_and_sk() # 构造本地和远端的路径 if not should_save_as_zip_file: @@ -471,7 +479,6 @@ def _create_bare_dataset( name=name, version=qianfan_resp["versionNumber"], storage_type=storage_type, - storage_path=qianfan_resp["storagePath"], info=( {**qianfan_resp, **addition_info} if addition_info else {**qianfan_resp} ), @@ -701,11 +708,12 @@ def get_existed_dataset( def create_new_version(self) -> "QianfanDataSource": qianfan_resp = Data.V2.create_dataset_version(self.group_id) + result = qianfan_resp["result"] dataset = QianfanDataSource( - id=qianfan_resp["versionId"], - group_id=qianfan_resp["datasetId"], - name=qianfan_resp["datasetName"], - version=qianfan_resp["versionNumber"], + id=result["versionId"], + group_id=result["datasetId"], + name=result["datasetName"], + version=result["versionNumber"], data_format_type=self.data_format_type, storage_type=self.storage_type, storage_path=self.storage_path, diff --git a/python/qianfan/tests/dataset/data_source_test.py b/python/qianfan/tests/dataset/data_source_test.py index ab1759d0..e42e2b57 100644 --- a/python/qianfan/tests/dataset/data_source_test.py +++ b/python/qianfan/tests/dataset/data_source_test.py @@ -30,12 +30,7 @@ QianfanLocalCacheDir, ) from qianfan.dataset.data_source import FileDataSource, FormatType, QianfanDataSource -from qianfan.resources.console.consts import ( - DataProjectType, - DataSetType, - DataStorageType, - DataTemplateType, -) +from qianfan.resources.console.consts import V2 as V2Consts def _clean_func(): @@ -159,56 +154,43 @@ def test_save_as_folder(): def test_create_bare_qianfan_data_source(): datasource_1 = QianfanDataSource.create_bare_dataset( "name", - DataTemplateType.NonSortedConversation, - DataStorageType.PublicBos, + V2Consts.DatasetFormat.PromptResponse, + V2Consts.StorageType.SysStorage, ) - assert datasource_1.template_type == DataTemplateType.NonSortedConversation - assert datasource_1.project_type == DataProjectType.Conversation - assert datasource_1.set_type == DataSetType.TextOnly + assert datasource_1.data_format_type == V2Consts.DatasetFormat.PromptResponse datasource_2 = QianfanDataSource.create_bare_dataset( "name", - DataTemplateType.Text2Image, - DataStorageType.PrivateBos, - storage_id="a", - storage_path="b", + V2Consts.DatasetFormat.PromptImage, + V2Consts.StorageType.Bos, + storage_path="bos://a/b/", ) - assert datasource_2.template_type == DataTemplateType.Text2Image - assert datasource_2.project_type == DataProjectType.Text2Image - assert datasource_2.set_type == DataSetType.MultiModel - assert ( - datasource_2.storage_path - == "/easydata/_system_/dataset/ds-z07hkq2kyvsmrmdw/texts" - ) - assert datasource_2.storage_id == "a" + assert datasource_2.data_format_type == V2Consts.DatasetFormat.PromptImage + assert datasource_2.storage_path == "bos://a/b/" assert datasource_2.storage_region == "bj" assert datasource_2.format_type() == FormatType.Text2Image def test_create_qianfan_data_source_from_existed(): - source = QianfanDataSource.get_existed_dataset("12", False) - assert source.id == "12" - assert source.storage_region == "bj" + source = QianfanDataSource.create_bare_dataset( + "empty", V2Consts.DatasetFormat.PromptResponse, V2Consts.StorageType.SysStorage + ) + new_source = QianfanDataSource.get_existed_dataset(source.id, False) + assert new_source.id == source.id def create_an_empty_qianfan_datasource() -> QianfanDataSource: return QianfanDataSource( - id=1, - group_id=2, + id="1", + group_id="2", name="test", - set_type=DataSetType.TextOnly, - project_type=DataProjectType.Conversation, - template_type=DataTemplateType.NonSortedConversation, + data_format_type=V2Consts.DatasetFormat.PromptResponse, version=1, - storage_type=DataStorageType.PrivateBos, - storage_id="123", - storage_path="456", - storage_name="storage_name", - storage_raw_path="/s/", + storage_type=V2Consts.StorageType.Bos, + storage_path="bos://123/456/", storage_region="bj", - data_format_type=FormatType.Jsonl, ) @@ -221,11 +203,13 @@ def test_qianfan_data_source_save(mocker: MockerFixture, *args, **kwargs): empty_table = Dataset.create_from_pyobj({QianfanDatasetPackColumnName: ["1"]}) ds = create_an_empty_qianfan_datasource() - ds.storage_type = DataStorageType.PublicBos + ds.storage_type = V2Consts.StorageType.SysStorage with pytest.raises(NotImplementedError): ds.save(empty_table) - ds = create_an_empty_qianfan_datasource() + ds = QianfanDataSource.create_bare_dataset( + "test", V2Consts.DatasetFormat.PromptResponse + ) config = get_config() config.ACCESS_KEY = "" @@ -240,12 +224,12 @@ def test_qianfan_data_source_save(mocker: MockerFixture, *args, **kwargs): ) ds.ak = "1" - with pytest.raises(ValueError): ds.save(empty_table) ds.sk = "2" - assert ds.save(empty_table) + with pytest.raises(NotImplementedError): + ds.save(empty_table) config.ACCESS_KEY = "1" config.SECRET_KEY = "2" @@ -256,16 +240,12 @@ def test_qianfan_data_source_save(mocker: MockerFixture, *args, **kwargs): sup_storage_path="/sdasd/", sup_storage_region="bj", ) - assert ds.save( - empty_table, - sup_storage_id="1", - sup_storage_path="/sdasd/", - sup_storage_region="bj", - ) def test_qianfan_data_source_load(): - ds = create_an_empty_qianfan_datasource() + ds = QianfanDataSource.create_bare_dataset( + "empty", V2Consts.DatasetFormat.PromptResponse, V2Consts.StorageType.SysStorage + ) content = Dataset(inner_table=ds.fetch()).list() assert content[0][0]["response"] == [["no response"]] diff --git a/python/qianfan/tests/dataset/dataset_test.py b/python/qianfan/tests/dataset/dataset_test.py index 87fb08e4..8a624e6d 100644 --- a/python/qianfan/tests/dataset/dataset_test.py +++ b/python/qianfan/tests/dataset/dataset_test.py @@ -33,7 +33,7 @@ QianfanSortedConversation, ) from qianfan.dataset.table import Table -from qianfan.resources.console.consts import DataTemplateType +from qianfan.resources.console.consts import V2 as V2Consts from qianfan.utils.pydantic import BaseModel @@ -128,7 +128,7 @@ def test_dataset_create(): def test_dataset_online_process(): qianfan_data_source = QianfanDataSource.create_bare_dataset( - "test", DataTemplateType.GenericText + "test", V2Consts.DatasetFormat.Text ) dataset = Dataset.load(source=qianfan_data_source) assert dataset.online_data_process( @@ -171,19 +171,13 @@ def test_branch_save(*args, **kwargs): ds.unpack() ds.save(fake_data_source) - from qianfan.tests.dataset.data_source_test import ( - create_an_empty_qianfan_datasource, + fake_qianfan_data_source = QianfanDataSource.create_bare_dataset( + "test", + V2Consts.DatasetFormat.PromptResponse, + V2Consts.StorageType.Bos, + "bos://are/you/ok/", ) - - fake_qianfan_data_source = create_an_empty_qianfan_datasource() ds = Dataset.create_from_pyobj([{"prompt": "nihao", "response": [["hello"]]}]) ds.save(fake_qianfan_data_source) ds.save(FakeDataSource(origin_data="", format=FormatType.Json)) - - fake_qianfan_data_source = create_an_empty_qianfan_datasource() - fake_qianfan_data_source.data_format_type = FormatType.Text - fake_qianfan_data_source.template_type = DataTemplateType.GenericText - fake_qianfan_data_source.project_type = DataTemplateType.GenericText - ds = Dataset.create_from_pyobj({QianfanDatasetPackColumnName: ["wenben"]}) - ds.save(fake_qianfan_data_source) diff --git a/python/qianfan/tests/trainer_test.py b/python/qianfan/tests/trainer_test.py index 675d9ef3..9e505d4a 100644 --- a/python/qianfan/tests/trainer_test.py +++ b/python/qianfan/tests/trainer_test.py @@ -56,7 +56,8 @@ def dispatch(self, event: Event) -> None: def test_load_data_action(): qianfan_data_source = QianfanDataSource.create_bare_dataset( - "test", console_consts.DataTemplateType.NonSortedConversation + "test", + console_consts.V2.DatasetFormat.PromptResponse, ) ds = Dataset.load(source=qianfan_data_source, organize_data_as_group=True) @@ -68,7 +69,7 @@ def test_load_data_action(): res = LoadDataSetAction( preset, - dataset_format_type=console_consts.DataTemplateType.NonSortedConversation, + dataset_format_type=console_consts.V2.DatasetFormat.PromptResponse, ).exec() assert isinstance(res, dict) assert "datasets" in res @@ -134,7 +135,8 @@ def test_trainer_sft_run(): peft_type=PeftType.ALL, ) qianfan_data_source = QianfanDataSource.create_bare_dataset( - "test", console_consts.DataTemplateType.NonSortedConversation + "test", + console_consts.V2.DatasetFormat.PromptResponse, ) ds = Dataset.load(source=qianfan_data_source, organize_data_as_group=True) @@ -183,7 +185,7 @@ def test_trainer_sft_with_deploy(): ) deploy_config = DeployConfig(replicas=1, pool_type=1, service_type=ServiceType.Chat) qianfan_data_source = QianfanDataSource.create_bare_dataset( - "test", console_consts.DataTemplateType.NonSortedConversation + "test", console_consts.V2.DatasetFormat.PromptResponse ) ds = Dataset.load(source=qianfan_data_source, organize_data_as_group=True) @@ -231,7 +233,7 @@ def test_service_exec(): def test_trainer_resume(): qianfan_data_source = QianfanDataSource.create_bare_dataset( - name="test", template_type=console_consts.DataTemplateType.NonSortedConversation + "test", console_consts.V2.DatasetFormat.PromptResponse ) ds = Dataset.load(source=qianfan_data_source, organize_data_as_group=True) @@ -272,7 +274,7 @@ def test_batch_run_on_qianfan(): # 测试_parse_from_input方法 def test__parse_from_input(): qianfan_data_source = QianfanDataSource.create_bare_dataset( - "eval", console_consts.DataTemplateType.NonSortedConversation + "eval", console_consts.V2.DatasetFormat.PromptResponse ) test_dataset = Dataset.load(source=qianfan_data_source, organize_data_as_group=True) test_evaluators = [QianfanRuleEvaluator(using_accuracy=True)] # 创建一些评估器 @@ -300,7 +302,7 @@ def test__parse_from_input(): # 测试eval action exec方法 def test_eval_action_exec(): qianfan_data_source = QianfanDataSource.create_bare_dataset( - "eval", console_consts.DataTemplateType.NonSortedConversation + "eval", console_consts.V2.DatasetFormat.PromptResponse ) test_dataset = Dataset.load(source=qianfan_data_source, organize_data_as_group=True) test_evaluators = [QianfanRuleEvaluator(using_similarity=True)] # 创建一些评估器 @@ -317,7 +319,7 @@ def test_eval_action_exec(): # 测试eval action resume方法 def test_eval_action_resume(): qianfan_data_source = QianfanDataSource.create_bare_dataset( - "eval", console_consts.DataTemplateType.NonSortedConversation + "eval", console_consts.V2.DatasetFormat.PromptResponse ) test_dataset = Dataset.load(source=qianfan_data_source, organize_data_as_group=True) test_evaluators = [QianfanRuleEvaluator(using_similarity=True)] # 创建一些评估器 @@ -338,11 +340,12 @@ def test_trainer_sft_with_eval(): peft_type=PeftType.ALL, ) qianfan_data_source = QianfanDataSource.create_bare_dataset( - "train", console_consts.DataTemplateType.NonSortedConversation + "train", console_consts.V2.DatasetFormat.PromptResponse ) ds = Dataset.load(source=qianfan_data_source, organize_data_as_group=True) qianfan_eval_data_source = QianfanDataSource.create_bare_dataset( - "eval", console_consts.DataTemplateType.NonSortedConversation + "eval", + console_consts.V2.DatasetFormat.PromptResponse, ) eval_ds = Dataset.load(source=qianfan_eval_data_source, organize_data_as_group=True) eh = MyEventHandler() @@ -487,7 +490,8 @@ def test_failed_sft_run(): peft_type=PeftType.ALL, ) qianfan_data_source = QianfanDataSource.create_bare_dataset( - "test", console_consts.DataTemplateType.NonSortedConversation + "test", + console_consts.V2.DatasetFormat.PromptResponse, ) ds = Dataset.load(source=qianfan_data_source, organize_data_as_group=True) @@ -527,7 +531,8 @@ def test_persist(): ), ) qianfan_data_source = QianfanDataSource.create_bare_dataset( - "test", console_consts.DataTemplateType.NonSortedConversation + "test", + console_consts.V2.DatasetFormat.PromptResponse, ) ds = Dataset.load(source=qianfan_data_source, organize_data_as_group=True) diff --git a/python/qianfan/tests/utils/mock_server.py b/python/qianfan/tests/utils/mock_server.py index 6c1712f9..0ac40067 100644 --- a/python/qianfan/tests/utils/mock_server.py +++ b/python/qianfan/tests/utils/mock_server.py @@ -15,19 +15,21 @@ """ Mock server for unit test """ +import copy import io # disable line too long lint error in this file # ruff: noqa: E501 import json import random +import string import threading import time import zipfile from datetime import datetime, timedelta, timezone from functools import wraps from io import BytesIO -from typing import Dict +from typing import Any, Dict, List import flask import requests @@ -4485,37 +4487,341 @@ def dataset_v2(): action = request.args.get(Consts.ConsoleAPIQueryAction) json_body = request.json action_handler = { - Consts.FineTuneCreateJobAction: finetune_v2_create_job, - Consts.FineTuneCreateTaskAction: finetune_v2_create_task, - Consts.FineTuneJobListAction: finetune_v2_job_list, - Consts.FineTuneTaskListAction: finetune_v2_task_list, - Consts.FineTuneTaskDetailAction: finetune_v2_task_detail, - Consts.FineTuneStopTaskAction: finetune_v2_stop_task, - Consts.FineTuneSupportedModelsAction: finetune_v2_supported_models, + Consts.DatasetV2CreateDatasetAction: dataset_v2_create_dataset, + Consts.DatasetV2GetDatasetListAction: dataset_v2_describe_datasets, + Consts.DatasetV2DeleteDatasetAction: dataset_v2_delete_dataset, + Consts.DatasetV2CreateDatasetVersionAction: dataset_v2_create_dataset_version, + Consts.DatasetV2GetDatasetVersionInfoAction: ( + dataset_v2_describe_dataset_version + ), + Consts.DatasetV2DeleteDatasetVersionAction: dataset_v2_delete_dataset_version, + Consts.DatasetV2PublishDatasetVersionAction: dataset_v2_publish_dataset_version, + Consts.DatasetV2GetDatasetVersionListAction: ( + dataset_v2_describe_dataset_versions + ), + Consts.DatasetV2CreateDatasetVersionImportTaskAction: ( + dataset_v2_create_import_task + ), + Consts.DatasetV2GetDatasetVersionImportTaskInfoAction: ( + dataset_v2_describe_import_task + ), + Consts.DatasetV2CreateDatasetVersionExportTaskAction: ( + dataset_v2_create_export_task + ), + Consts.DatasetV2GetDatasetVersionExportTaskInfoAction: ( + dataset_v2_describe_export_task + ), } return action_handler.get(action)(body=json_body) + +def _generate_random_string(length): + # 定义可以用来生成字符串的字符集,包括字母和数字 + characters = string.ascii_letters + string.digits + # 使用 random.choices 从字符集中随机选择指定数量的字符 + random_string = "".join(random.choices(characters, k=length)) + return random_string + + +def _get_current_timestamp(): + # 创建代表东八区(+08:00)的时区对象 + tz_offset = timezone(timedelta(hours=8)) + + # 获取当前时间,并应用时区偏移 + now = datetime.now(tz_offset) + + timestamp_str = now.strftime("%Y-%m-%d %H:%M:%S") + return timestamp_str + + +_v2_dataset_id_version_map: Dict[str, List[str]] = {} +_v2_dataset_map: Dict[str, Dict[str, Any]] = {} + + def dataset_v2_create_dataset(body: Dict): + dataset_id_suffix = _generate_random_string(10) + + dataset_id = f"dg-{dataset_id_suffix}" + dataset_version_id = f"ds-{dataset_id_suffix}" + result_dict = { "requestId": "1bef3f87-c5b2-4419-936b-50f9884f10d4", "result": { - "datasetversionId": "ds-123", - "datasetId": "dg-123", + "versionId": dataset_version_id, + "datasetId": dataset_id, "datasetName": body.get("datasetName", ""), "versionNumber": 1, "dataFormat": body.get("dataFormat", ""), "storageType": body.get("storageType", ""), - "storagePath": "bos:/yourBucket/yourDir/_system_/dataset/ds-xxx/images", - "createTime": "2023-11-02T14:50:30.6533454+08:00" - } + "storagePath": body.get("storagePath", ""), + "createTime": _get_current_timestamp(), + "sizeMB": 513.42, + "description": "中文医疗问答数据集第一版", + "characterCount": 111, + "sampleCount": 10, + "annotationProgress": "1/10", + "importStatus": "Created", + "publishStatus": "Unpublished", + "publishProgress": "0", + "creator": "accountName", + "modifyTime": _get_current_timestamp(), + }, } if body.get("storageType", "") == "sysStorage": del result_dict["result"]["storagePath"] + version_list = _v2_dataset_id_version_map.get(dataset_id, []) + version_list.append(dataset_version_id) + _v2_dataset_id_version_map[dataset_id] = version_list + _v2_dataset_map[dataset_version_id] = result_dict + return json_response(result_dict) +def dataset_v2_describe_datasets(body: Dict): + result_dict = { + "requestId": "b4f5f3f2-307e-41d6-5afc-a6708cfa286b", + "result": { + "pageInfo": { + "marker": "", + "maxKeys": 2, + "isTruncated": False, + "nextMarker": "dg-xxx", + "pageReverse": False, + }, + "datasets": [ + { + "datasetId": "dg-xxx", + "datasetName": "helloDatasetList1", + "dataFormat": "PromptResponse", + }, + { + "datasetId": "dg-xxx", + "datasetName": "helloDatasetList2", + "dataFormat": "PromptImage", + }, + ], + }, + } + + return json_response(result_dict) + + +def dataset_v2_delete_dataset(body: Dict): + return json_response( + { + "requestId": "1bef3f87-c5b2-4419-936b-50f9884f10d4", + "result": True, + } + ) + + +def dataset_v2_create_dataset_version(body: Dict): + dataset_id = body["datasetId"] + dataset_version_id_suffix = _generate_random_string(10) + new_version_id = f"ds-{dataset_version_id_suffix}" + + version_list = _v2_dataset_id_version_map.get(dataset_id, []) + if len(version_list) == 0: + return json_response( + { + "requestId": "6ba7b810-xxxc04fd430c8", + "code": "AccessDenied", + "message": "Access denied.", + } + ) + + old_version_id = version_list[-1] + version_list.append(new_version_id) + _v2_dataset_id_version_map[dataset_id] = version_list + + new_dataset_dict = copy.deepcopy(_v2_dataset_map[old_version_id]) + result = new_dataset_dict["result"] + result["versionId"] = new_version_id + result["versionNumber"] = result["versionNumber"] + 1 + result["createTime"] = _get_current_timestamp() + result["modifyTime"] = _get_current_timestamp() + new_dataset_dict["result"] = result + + _v2_dataset_map[new_version_id] = new_dataset_dict + + return json_response(new_dataset_dict) + + +def dataset_v2_describe_dataset_version(body: Dict): + version_id = body["versionId"] + if version_id in _v2_dataset_map: + return json_response(_v2_dataset_map.get(version_id, {})) + + return json_response( + { + "requestId": "1bef3f87-c5b2-4419-936b-50f9884f10d4", + "result": { + "datasetId": "dg-xxx", + "datasetName": "ChineseMedicalDialogueData中文医疗问答数据集", + "dataFormat": ( + "PromptResponse" if version_id != "ds-mock-generic" else "Text" + ), + "versionId": version_id, + "versionNumber": 1, + "createTime": "2023-09-08 17:10:11", + "modifyTime": "2023-10-25 20:45:23", + "storageType": "sysStorage", + "sizeMB": 513.42, + "description": "中文医疗问答数据集第一版", + "characterCount": 111, + "sampleCount": 10, + "annotationProgress": "1/10", + "importStatus": "Created", + "publishStatus": "Unpublished", + "publishProgress": "0", + "creator": "accountName", + }, + } + ) + + +def dataset_v2_delete_dataset_version(body: Dict): + return json_response( + { + "requestId": "1bef3f87-c5b2-4419-936b-50f9884f10d4", + "result": True, + } + ) + + +def dataset_v2_publish_dataset_version(body: Dict): + version_id = body["versionId"] + + if version_id in _v2_dataset_map: + dataset_dict = _v2_dataset_map[version_id] + dataset_dict["result"]["publishStatus"] = "Published" + dataset_dict["result"]["publishProgress"] = "100%" + _v2_dataset_map[version_id] = dataset_dict + + return json_response( + { + "requestId": "1bef3f87-c5b2-4419-936b-50f9884f10d4", + "result": True, + } + ) + + +def dataset_v2_describe_dataset_versions(body: Dict): + return json_response( + { + "requestId": "1bef3f87-c5b2-4419-936b-50f9884f10d4", + "result": { + "pageInfo": { + "marker": "", + "maxKeys": 2, + "isTruncated": True, + "nextMarker": "ds-xxx", + "pageReverse": False, + }, + "datasetId": "dg-xxx", + "datasetName": "helloDatasetDetail", + "dataFormat": "PromptResponse", + "datasetVersions": [ + { + "versionId": "ds-ck73i9r6423t1rzm", + "versionNumber": 1, + "description": "", + "storageType": "sysStorage", + "sizeMB": 0.05, + "sampleCount": 35, + "characterCount": 111, + "annotationProgress": "35/35", + "importStatus": "Importing", + "publishStatus": "Unpublished", + "creator": "accountName", + "createTime": "2024-07-24 21:29:44", + "modifyTime": "2024-07-24 21:29:50", + }, + { + "versionId": "ds-yx9ajxk2s80m84m4", + "versionNumber": 2, + "description": "", + "storageType": "BOS", + "storagePath": "bos:/{your_bucket}/{you_dir}", + "sizeMB": 0.02, + "sampleCount": 35, + "characterCount": 111, + "annotationProgress": "35/35", + "importStatus": "Importing", + "publishStatus": "Unpublished", + "creator": "accountName", + "createTime": "2024-07-24 23:12:43", + "modifyTime": "2024-07-24 23:13:35", + }, + ], + }, + } + ) + + +def dataset_v2_create_import_task(body: Dict): + task_id_suffix = _generate_random_string(10) + + return json_response( + { + "requestId": "1bef3f87-c5b2-4419-936b-50f9884f10d4", + "result": f"task-{task_id_suffix}", + } + ) + + +def dataset_v2_describe_import_task(body: Dict): + return json_response( + { + "requestId": "febaf751-7725-4a8b-5699-a966b82dd676", + "result": { + "versionId": "ds-sshcwxmh5uk9t17w", + "importStatus": "ImportFinished", + "progress": "100%", + "sizeMB": 0.01, + "sampleCount": 10, + "creator": "accountName", + "startTime": "2024-08-06 11:17:50", + "finishTime": "2024-08-06 11:18:01", + "errDownloadUrl": "", + }, + } + ) + + +def dataset_v2_create_export_task(body: Dict): + task_id_suffix = _generate_random_string(10) + + return json_response( + { + "requestId": "1bef3f87-c5b2-4419-936b-50f9884f10d4", + "result": f"task-{task_id_suffix}", + } + ) + + +def dataset_v2_describe_export_task(body: Dict): + return json_response( + { + "requestId": "bdb7afcc-d9a4-4804-7587-8d9afaa53007", + "result": { + "storageType": "sysStorage", + "storagePath": "bos:/bucketName/some/path/exportFileName.zip", + "sizeMB": 0.05, + "sampleCount": 55, + "exportStatus": "ExportFinished", + "progress": "100%", + "creator": "accountName", + "startTime": "2024-08-01 10:31:48", + "finishTime": "2024-08-01 10:31:58", + "downloadUrl": "http://127.0.0.1:8866/url", + }, + } + ) + + def _start_mock_server(): """ run mock server