Skip to content

Commit

Permalink
support python3.12 for client and sdk
Browse files Browse the repository at this point in the history
  • Loading branch information
tianweidut committed Jan 17, 2024
1 parent c5ce480 commit 7aeeef7
Show file tree
Hide file tree
Showing 15 changed files with 72 additions and 33 deletions.
5 changes: 5 additions & 0 deletions .github/workflows/client.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ jobs:
- "3.9"
- "3.10"
- "3.11"
- "3.12"
needs:
- filter
if: ${{ (github.event_name == 'pull_request' && needs.filter.outputs.client == 'true') || github.event_name == 'push' }}
Expand Down Expand Up @@ -99,6 +100,7 @@ jobs:
- "3.9"
- "3.10"
- "3.11"
- "3.12"
os:
- macos-latest
- ubuntu-latest
Expand Down Expand Up @@ -167,6 +169,7 @@ jobs:
- "3.9"
- "3.10"
- "3.11"
- "3.12"
os:
- macos-latest
- ubuntu-latest
Expand All @@ -175,6 +178,8 @@ jobs:
# pytorch does not release python 3.11 wheel package for macosx os yet.
- os: macos-latest
python-version: "3.11"
- os: macos-latest
python-version: "3.12"
runs-on: ${{ matrix.os }}
defaults:
run:
Expand Down
3 changes: 3 additions & 0 deletions .github/workflows/e2e-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -83,10 +83,13 @@ jobs:
- "3.9"
- "3.10"
- "3.11"
- "3.12"
server-driver:
- "docker-compose"
- "minikube"
exclude:
- python-version: "3.12"
server-driver: "docker-compose"
- python-version: "3.10"
server-driver: "docker-compose"
- python-version: "3.9"
Expand Down
8 changes: 5 additions & 3 deletions client/requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,14 @@ requests-mock >= 1.9.3
isort >= 5.10.1
respx >= 0.19.0
# for integration test
torch>=2.0.1; python_version >= '3.11'
torch>=2.0.1; python_version >= '3.11' and python_version < '3.12'
torch; python_version < '3.11'
tensorflow>=2.12.0; python_version >= '3.11'
tensorflow>=2.12.0; python_version >= '3.11' and python_version < '3.12'
tensorflow; python_version < '3.11'
datasets
librosa # for datasets Audio
# for datasets Audio
# librosa -> numba only versions >=3.8,<3.12 are supported
librosa; python_version < '3.12'
# for starwhale[image] test
pillow
# for starwhale[audio] test
Expand Down
8 changes: 5 additions & 3 deletions client/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,13 @@
# importlib-metadata supplies a backport of 'importlib.metadata'; Python 3.8 makes 'importlib' part of the standard lib.
"importlib-metadata>=4.0.0, <=4.2.0;python_version < '3.8'",
"attrs>=21.4.0",
"pyyaml==6.0",
"pyyaml==6.0.1",
"cattrs>=1.7.1",
"requests>=2.1.0",
"requests-toolbelt>=0.9.0",
"loguru==0.6.0",
"conda-pack==0.6.0",
"conda-pack==0.7.1; python_version >= '3.12'",
"conda-pack==0.6.0; python_version < '3.12'",
"virtualenv>=13.0.0",
"fs>=2.4.0",
"typing-extensions>=4.0.0",
Expand Down Expand Up @@ -103,7 +104,7 @@ def _format_version() -> str:
sw = starwhale.cli:cli
starwhale = starwhale.cli:cli
""",
python_requires=">=3.7, <3.12",
python_requires=">=3.7, <3.13",
scripts=[
"scripts/sw-docker-entrypoint",
],
Expand All @@ -119,6 +120,7 @@ def _format_version() -> str:
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: Implementation :: CPython",
"Topic :: Software Development :: Libraries",
"Topic :: Software Development :: Libraries :: Python Modules",
Expand Down
14 changes: 7 additions & 7 deletions client/starwhale/api/_impl/dataset/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -921,12 +921,12 @@ def commit(
force_add_tags: bool = False,
ignore_add_tags_errors: bool = False,
) -> str:
"""Commit into dataset
"""Commit into dataset.
Commit will flush and generate a version of the dataset. At the same time, commit
operation will also generate auto-increment tag, such as v0, v1, v2. Only one commit is allowed.
Arguments:
tags: (list(str), optional) Specify the tags for the version. Default is None. `latest` and `^v\d+$` tags are reserved tags.
tags: (list(str), optional) Specify the tags for the version. Default is None. `latest` and `^v\\d+$` tags are reserved tags.
message: (str, optional) Commit message. Default is empty str.
force_add_tags: (bool, optional) Force to add tags. Default is False.
ignore_add_tags_errors: (bool, optional) Ignore add tags errors. Default is False.
Expand Down Expand Up @@ -1104,7 +1104,7 @@ def copy(
`patch` mode: only update the changed rows and columns for the remote dataset;
`overwrite` mode: update records and delete extraneous rows from the remote dataset
ignore_tags: (list(str), optional) ignore tags when copying.
In default, copy dataset with all user custom tags. `latest` and `^v\d+$` are the system builtin tags, they are ignored automatically.
In default, copy dataset with all user custom tags. `latest` and `^v\\d+$` are the system builtin tags, they are ignored automatically.
When the tags are already used for the other dataset version in the dest instance, you should use `force` option or adjust the tags.
Returns:
Expand Down Expand Up @@ -1317,7 +1317,7 @@ def from_json(
alignment_size: (int|str, optional) The blob alignment size. The default value is 128.
volume_size: (int|str, optional) The blob volume size. The default value is 64MB.
mode: (str|DatasetChangeMode, optional) The dataset change mode. The default value is `patch`. Mode choices are `patch` and `overwrite`.
tags: (list(str), optional) The tags for the dataset version.`latest` and `^v\d+$` tags are reserved tags.
tags: (list(str), optional) The tags for the dataset version.`latest` and `^v\\d+$` tags are reserved tags.
encoding: (str, optional) The encoding used to decode the input file. The default is None.
encoding does not support text parameter.
Expand Down Expand Up @@ -1484,7 +1484,7 @@ def from_csv(
alignment_size: (int|str, optional) The blob alignment size. The default value is 128.
volume_size: (int|str, optional) The blob volume size. The default value is 64MB.
mode: (str|DatasetChangeMode, optional) The dataset change mode. The default value is `patch`. Mode choices are `patch` and `overwrite`.
tags: (list(str), optional) The tags for the dataset version.`latest` and `^v\d+$` tags are reserved tags.
tags: (list(str), optional) The tags for the dataset version.`latest` and `^v\\d+$` tags are reserved tags.
delimiter: (str, optional) A one-character string used to separate fields. It defaults to ','.
quotechar: (str, optional) A one-character string used to quote fields containing special characters,
such as the delimiter or quotechar, or which contain new-line characters. It defaults to '"'.
Expand Down Expand Up @@ -1562,7 +1562,7 @@ def from_dict_items(
alignment_size: (int|str, optional) The blob alignment size. The default value is 128.
volume_size: (int|str, optional) The blob volume size. The default value is 64MB.
mode: (str|DatasetChangeMode, optional) The dataset change mode. The default value is `patch`. Mode choices are `patch` and `overwrite`.
tags: (list(str), optional) The tags for the dataset version.`latest` and `^v\d+$` tags are reserved tags.
tags: (list(str), optional) The tags for the dataset version.`latest` and `^v\\d+$` tags are reserved tags.
Returns:
A Dataset Object
Expand Down Expand Up @@ -1690,7 +1690,7 @@ def from_folder(
alignment_size: (int|str, optional) The blob alignment size. The default value is 128.
volume_size: (int|str, optional) The blob volume size. The default value is 64MB.
mode: (str|DatasetChangeMode, optional) The dataset change mode. The default value is `patch`. Mode choices are `patch` and `overwrite`.
tags: (list(str), optional) The tags for the dataset version. `latest` and `^v\d+$` tags are reserved tags.
tags: (list(str), optional) The tags for the dataset version. `latest` and `^v\\d+$` tags are reserved tags.
Returns:
A Dataset Object.
Expand Down
2 changes: 1 addition & 1 deletion client/starwhale/api/_impl/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def build(
remote_project_uri: (str, optional) The destination project uri(cloud://remote-instance/project/starwhale) of the Starwhale Model Package
add_all: (bool, optional) Add all files in the workdir to the Starwhale Model Package. If the argument is False, the python cache files and virtualenv files will be ignored.
the ".swignore" file in the workdir will always take effect.
tags: (list(str), optional) The tags for the model version. `latest` and `^v\d+$` tags are reserved tags.
tags: (list(str), optional) The tags for the model version. `latest` and `^v\\d+$` tags are reserved tags.
excludes: (list(str), optional) The excludes files or dirs in the workdir. The excludes files or dirs will be ignored when building the Starwhale Model Package.
The `.swignore` file in the workdir will always take effect.
package_runtime: (bool, optional) Package the runtime in the Starwhale Model Package. If the argument is True, the Starwhale Model Package will be a standalone package.
Expand Down
8 changes: 4 additions & 4 deletions client/starwhale/core/dataset/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ def dataset_cmd(ctx: click.Context) -> None:
"-t",
"--tag",
multiple=True,
help="dataset tags, the option can be used multiple times. `latest` and `^v\d+$` tags are reserved tags.",
help="dataset tags, the option can be used multiple times. `latest` and `^v\\d+$` tags are reserved tags.",
)
@optgroup.option( # type: ignore[no-untyped-call]
"file_encoding",
Expand Down Expand Up @@ -350,7 +350,7 @@ def _build(
swcli dataset build --json /path/to/example.json --field-selector a.b.c # extract the json_content["a"]["b"]["c"] field from the json file.
swcli dataset build --name qald9 --json https://raw.githubusercontent.com/ag-sc/QALD/master/9/data/qald-9-test-multilingual.json --field-selector questions
swcli dataset build --json /path/to/test01.jsonl --json /path/to/test02.jsonl
swcli dataset build --json https://modelscope.cn/api/v1/datasets/damo/100PoisonMpts/repo\?Revision\=master\&FilePath\=train.jsonl
swcli dataset build --json https://modelscope.cn/api/v1/datasets/damo/100PoisonMpts/repo\\?Revision\\=master\\&FilePath\\=train.jsonl
\b
- from huggingface dataset
Expand All @@ -364,7 +364,7 @@ def _build(
swcli dataset build --csv /path/to/example.csv --csv-file /path/to/example2.csv
swcli dataset build --csv /path/to/csv-dir
swcli dataset build --csv http://example.com/example.csv
swcli dataset build --name product-desc-modelscope --csv https://modelscope.cn/api/v1/datasets/lcl193798/product_description_generation/repo\?Revision\=master\&FilePath\=test.csv --encoding=utf-8-sig
swcli dataset build --name product-desc-modelscope --csv https://modelscope.cn/api/v1/datasets/lcl193798/product_description_generation/repo\\?Revision\\=master\\&FilePath\\=test.csv --encoding=utf-8-sig
"""
# TODO: add dry-run
# TODO: add compress args
Expand Down Expand Up @@ -661,7 +661,7 @@ def _copy(
DEST: project uri or dataset uri with name.
In default, copy dataset with all user custom tags. If you want to ignore some tags, you can use `--ignore-tag` option.
`latest` and `^v\d+$` are the system builtin tags, they are ignored automatically.
`latest` and `^v\\d+$` are the system builtin tags, they are ignored automatically.
When the tags are already used for the other dataset version in the dest instance, you should use `--force` option or adjust the tags.
Expand Down
4 changes: 2 additions & 2 deletions client/starwhale/core/model/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def model_cmd(ctx: click.Context) -> None:
"-t",
"--tag",
multiple=True,
help="model tags, the option can be used multiple times. `latest` and `^v\d+$` tags are reserved tags.",
help="model tags, the option can be used multiple times. `latest` and `^v\\d+$` tags are reserved tags.",
)
@click.option(
"--package-runtime/--no-package-runtime",
Expand Down Expand Up @@ -262,7 +262,7 @@ def _copy(
DEST: project uri or model uri with name.
In default, copy dataset with all user custom tags. If you want to ignore some tags, you can use `--ignore-tag` option.
`latest` and `^v\d+$` are the system builtin tags, they are ignored automatically.
`latest` and `^v\\d+$` are the system builtin tags, they are ignored automatically.
When the tags are already used for the other model version in the dest instance, you should use `--force` option or adjust the tags.
Expand Down
4 changes: 2 additions & 2 deletions client/starwhale/core/runtime/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,7 @@ def _quickstart(
"-t",
"--tag",
multiple=True,
help="runtime tags, the option can be used multiple times. `latest` and `^v\d+$` tags are reserved tags.",
help="runtime tags, the option can be used multiple times. `latest` and `^v\\d+$` tags are reserved tags.",
)
@click.option(
"--push",
Expand Down Expand Up @@ -582,7 +582,7 @@ def _copy(
DEST: project uri or runtime uri with name.
In default, copy runtime with all user custom tags. If you want to ignore some tags, you can use `--ignore-tag` option.
`latest` and `^v\d+$` are the system builtin tags, they are ignored automatically.
`latest` and `^v\\d+$` are the system builtin tags, they are ignored automatically.
When the tags are already used for the other runtime version in the dest instance, you should use `--force` option or adjust the tags.
Expand Down
2 changes: 1 addition & 1 deletion client/starwhale/core/runtime/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@

_SUPPORT_CUDA = ["11.3", "11.4", "11.5", "11.6", "11.7"]
_SUPPORT_CUDNN = {"8": {"support_cuda_versions": ["11.3", "11.4", "11.5", "11.6"]}}
_SUPPORT_PYTHON_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11"]
_SUPPORT_PYTHON_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"]

# workaround popular libs with some setup_requires dependencies in the setup.py(pip)
_SUPPORT_PIP_SETUP_REQUIRES_LIBS = {
Expand Down
2 changes: 1 addition & 1 deletion client/starwhale/utils/venv.py
Original file line number Diff line number Diff line change
Expand Up @@ -543,7 +543,7 @@ def venv_activate_render(
# TODO: support relocatable editable python package
sw_cntr_content = f"""
sed -i '1d' {bin}/starwhale {bin}/sw {bin}/swcli {bin}/pip* {bin}/virtualenv
sed -i '1i\#!{bin}/python3' {bin}/starwhale {bin}/sw {bin}/swcli {bin}/pip* {bin}/virtualenv
sed -i '1i\\#!{bin}/python3' {bin}/starwhale {bin}/sw {bin}/swcli {bin}/pip* {bin}/virtualenv
sed -i 's#^VIRTUAL_ENV=.*$#VIRTUAL_ENV={venvdir}#g' {bin}/activate
rm -rf {bin}/python3
Expand Down
9 changes: 9 additions & 0 deletions client/tests/__init__.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,24 @@
import os
import sys
import tempfile
import unittest
from unittest.mock import patch, MagicMock

import pytest

from starwhale.utils import config as sw_config
from starwhale.consts import ENV_SW_CLI_CONFIG, ENV_SW_LOCAL_STORAGE
from starwhale.utils.fs import empty_dir, ensure_dir
from starwhale.api._impl.data_store import LocalDataStore

ROOT_DIR = os.path.dirname(__file__)

# TODO: wait for pytorch and tensorflow release for python3.11
skip_py312 = pytest.mark.skipif(
sys.version_info >= (3, 12),
reason="skip python3.12, because tensorflow and pytorch don't release the related wheel package.",
)


def get_predefined_config_yaml() -> str:
with open(f"{ROOT_DIR}/data/config.yaml") as f:
Expand Down
16 changes: 13 additions & 3 deletions client/tests/base/test_data_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@

import numpy
import numpy as np
import torch
from PIL import Image as PILImage
from requests_mock import Mocker
from pyfakefs.fake_filesystem_unittest import TestCase

from tests import skip_py312
from starwhale.consts import HTTPMethod
from starwhale.api._impl import data_store
from starwhale.utils.error import FieldTypeOrValueError
Expand All @@ -38,6 +38,11 @@
)
from starwhale.base.uri.instance import Instance

try:
import torch
except ImportError:
torch = None


class TestDataType(TestCase):
def setUp(self) -> None:
Expand Down Expand Up @@ -120,6 +125,7 @@ def test_sequence(self) -> None:
assert len(s) == 0
assert s.to_raw_data() == []

@skip_py312
def test_numpy_binary(self) -> None:
np_array = np.array([[1.008, 6.94, 22.990], [39.098, 85.468, 132.91]])
b = NumpyBinary(np_array.tobytes(), np_array.dtype, np_array.shape)
Expand Down Expand Up @@ -227,7 +233,8 @@ def test_bbox(self) -> None:
assert _asdict["y"] == 2
assert _asdict["width"] == 3
assert _asdict["height"] == 4
assert torch.equal(bbox.to_tensor(), torch.Tensor([1, 2, 3, 4]))
if torch is not None:
assert torch.equal(bbox.to_tensor(), torch.Tensor([1, 2, 3, 4]))
_bout = bbox.to_bytes()
assert isinstance(_bout, bytes)
_array = numpy.frombuffer(_bout, dtype=numpy.float64)
Expand Down Expand Up @@ -257,7 +264,10 @@ def test_bbox3d(self) -> None:
assert _asdict["bbox_b"]["y"] == 4
assert _asdict["bbox_b"]["width"] == 3
assert _asdict["bbox_b"]["height"] == 4
assert torch.equal(bbox.to_tensor(), torch.Tensor([[1, 2, 3, 4], [3, 4, 3, 4]]))
if torch is not None:
assert torch.equal(
bbox.to_tensor(), torch.Tensor([[1, 2, 3, 4], [3, 4, 3, 4]])
)
_bout = bbox.to_bytes()
assert isinstance(_bout, bytes)
_array = numpy.frombuffer(_bout, dtype=numpy.float64).reshape(
Expand Down
14 changes: 11 additions & 3 deletions client/tests/sdk/test_dataset_sdk.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,12 @@

import yaml
import numpy
import torch
import pytest
import jsonlines
import torch.utils.data as tdata
from PIL import Image as PILImage
from requests_mock import Mocker

from tests import ROOT_DIR, BaseTestCase
from tests import ROOT_DIR, skip_py312, BaseTestCase
from starwhale import Dataset, dataset
from starwhale.utils import load_yaml
from starwhale.consts import (
Expand Down Expand Up @@ -49,6 +47,13 @@
from starwhale.core.dataset.tabular import TabularDatasetInfo
from starwhale.api._impl.dataset.loader import DataRow

try:
import torch
import torch.utils.data as tdata
except ImportError:
torch = None
tdata = None


class _DatasetSDKTestBase(BaseTestCase):
def setUp(self) -> None:
Expand Down Expand Up @@ -1767,6 +1772,7 @@ def test_with_builder_blob_config(self) -> None:
assert ds._dataset_builder.signature_bins_meta[0].size == 48


@skip_py312
@patch("starwhale.base.uri.resource.Resource._refine_local_rc_info", MagicMock())
@patch("starwhale.base.uri.resource.Resource._refine_remote_rc_info", MagicMock())
class TestPytorch(_DatasetSDKTestBase):
Expand Down Expand Up @@ -1944,6 +1950,7 @@ def test_audio_transform(self) -> None:
assert item["audio"].dtype == torch.float64


@skip_py312
class TestTensorflow(_DatasetSDKTestBase):
def test_simple_data(self) -> None:
import tensorflow as tf
Expand Down Expand Up @@ -2248,6 +2255,7 @@ def test_compound_data(self) -> None:
assert transform_data["sequence_dict"]["int"] == 1
assert transform_data["sequence_dict"]["list_int"] == [1, 1, 1]

@skip_py312
@patch(
"starwhale.integrations.huggingface.dataset.hf_datasets.get_dataset_config_names"
)
Expand Down
Loading

0 comments on commit 7aeeef7

Please sign in to comment.