Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

LuxonisParser - RoboFlow URL Support #189

Merged
merged 8 commits into from
Oct 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ jobs:
with:
ref: ${{ github.head_ref }}

- name: Install pre-commit
run: python3 -m pip install 'pre-commit<4.0.0'

- name: Run pre-commit
uses: pre-commit/[email protected]

Expand Down
68 changes: 62 additions & 6 deletions luxonis_ml/data/parsers/luxonis_parser.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import logging
import zipfile
from enum import Enum
from importlib.util import find_spec
from pathlib import Path
from typing import (
Dict,
Expand All @@ -16,7 +17,8 @@
from luxonis_ml.data import DATASETS_REGISTRY, BaseDataset, LuxonisDataset
from luxonis_ml.data.utils.enums import LabelType
from luxonis_ml.enums import DatasetType
from luxonis_ml.utils import LuxonisFileSystem
from luxonis_ml.utils import LuxonisFileSystem, environ
from luxonis_ml.utils.filesystem import _pip_install

from .base_parser import BaseParser
from .classification_directory_parser import ClassificationDirectoryParser
Expand Down Expand Up @@ -72,8 +74,15 @@
appropriate parser.

@type dataset_dir: str
@param dataset_dir: Path to the dataset directory or zip file.
Can also be a remote URL supported by L{LuxonisFileSystem}.
@param dataset_dir: Identifier of the dataset directory.
Can be one of:
- Local path to the dataset directory.
- Remote URL supported by L{LuxonisFileSystem}.
- C{gcs://} for Google Cloud Storage
- C{s3://} for Amazon S3
- C{roboflow://} for Roboflow datasets.
- Expected format: C{roboflow://workspace/project/version/format}.
Can be a remote URL supported by L{LuxonisFileSystem}.
@type dataset_name: Optional[str]
@param dataset_name: Name of the dataset. If C{None}, the name
is derived from the name of the dataset directory.
Expand All @@ -97,9 +106,16 @@
names.
"""
save_dir = Path(save_dir) if save_dir else None
name = Path(dataset_dir).name
local_path = (save_dir or Path.cwd()) / name
self.dataset_dir = LuxonisFileSystem.download(dataset_dir, local_path)
if dataset_dir.startswith("roboflow://"):
self.dataset_dir, name = self._download_roboflow_dataset(

Check warning on line 110 in luxonis_ml/data/parsers/luxonis_parser.py

View check run for this annotation

Codecov / codecov/patch

luxonis_ml/data/parsers/luxonis_parser.py#L110

Added line #L110 was not covered by tests
dataset_dir, save_dir
)
else:
name = dataset_dir.split("/")[-1]
local_path = (save_dir or Path.cwd()) / name
self.dataset_dir = LuxonisFileSystem.download(
dataset_dir, local_path
)
if self.dataset_dir.suffix == ".zip":
with zipfile.ZipFile(self.dataset_dir, "r") as zip_ref:
unzip_dir = self.dataset_dir.parent / self.dataset_dir.stem
Expand Down Expand Up @@ -237,3 +253,43 @@
return self.parser.parse_split(
split, random_split, split_ratios, **parsed_kwargs, **kwargs
)

def _download_roboflow_dataset(
self, dataset_dir: str, local_path: Optional[Path]
) -> Tuple[Path, str]:
if find_spec("roboflow") is None:
_pip_install("roboflow", "roboflow", "0.1.1")

Check warning on line 261 in luxonis_ml/data/parsers/luxonis_parser.py

View check run for this annotation

Codecov / codecov/patch

luxonis_ml/data/parsers/luxonis_parser.py#L260-L261

Added lines #L260 - L261 were not covered by tests

from roboflow import Roboflow

Check warning on line 263 in luxonis_ml/data/parsers/luxonis_parser.py

View check run for this annotation

Codecov / codecov/patch

luxonis_ml/data/parsers/luxonis_parser.py#L263

Added line #L263 was not covered by tests

if environ.ROBOFLOW_API_KEY is None:
raise RuntimeError(

Check warning on line 266 in luxonis_ml/data/parsers/luxonis_parser.py

View check run for this annotation

Codecov / codecov/patch

luxonis_ml/data/parsers/luxonis_parser.py#L265-L266

Added lines #L265 - L266 were not covered by tests
"ROBOFLOW_API_KEY environment variable is not set. "
"Please set it to your Roboflow API key."
)

rf = Roboflow(api_key=environ.ROBOFLOW_API_KEY)
parts = dataset_dir.split("roboflow://")[1].split("/")
if len(parts) != 4:
raise ValueError(

Check warning on line 274 in luxonis_ml/data/parsers/luxonis_parser.py

View check run for this annotation

Codecov / codecov/patch

luxonis_ml/data/parsers/luxonis_parser.py#L271-L274

Added lines #L271 - L274 were not covered by tests
f"Incorrect Roboflow dataset URL: `{dataset_dir}`. "
"Expected format: `roboflow://workspace/project/version/format`."
)
workspace, project, version, format = dataset_dir.split("roboflow://")[

Check warning on line 278 in luxonis_ml/data/parsers/luxonis_parser.py

View check run for this annotation

Codecov / codecov/patch

luxonis_ml/data/parsers/luxonis_parser.py#L278

Added line #L278 was not covered by tests
1
].split("/")
try:
version = int(version)
except ValueError as e:
raise ValueError(

Check warning on line 284 in luxonis_ml/data/parsers/luxonis_parser.py

View check run for this annotation

Codecov / codecov/patch

luxonis_ml/data/parsers/luxonis_parser.py#L281-L284

Added lines #L281 - L284 were not covered by tests
f"Roboflow version must be an integer, got `{version}`."
) from e

local_path = local_path or Path.cwd() / f"{project}_{format}"
dataset = (

Check warning on line 289 in luxonis_ml/data/parsers/luxonis_parser.py

View check run for this annotation

Codecov / codecov/patch

luxonis_ml/data/parsers/luxonis_parser.py#L288-L289

Added lines #L288 - L289 were not covered by tests
rf.workspace(workspace)
.project(project)
.version(int(version))
.download(format, str(local_path / project))
)
return Path(dataset.location), project

Check warning on line 295 in luxonis_ml/data/parsers/luxonis_parser.py

View check run for this annotation

Codecov / codecov/patch

luxonis_ml/data/parsers/luxonis_parser.py#L295

Added line #L295 was not covered by tests
1 change: 1 addition & 0 deletions luxonis_ml/data/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@ pycocotools>=2.0.7
typeguard>=4.1.0
polars[timezone]>=0.20.31
ordered-set>=4.0.0
# roboflow>=0.1.1
2 changes: 2 additions & 0 deletions luxonis_ml/utils/environ.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ class Environ(BaseSettings):
LUXONISML_BASE_PATH: Path = Path.home() / "luxonis_ml"
LUXONISML_TEAM_ID: str = "offline"

ROBOFLOW_API_KEY: Optional[str] = None

GOOGLE_APPLICATION_CREDENTIALS: Optional[str] = None

LOG_LEVEL: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] = (
Expand Down
21 changes: 11 additions & 10 deletions luxonis_ml/utils/filesystem.py
Original file line number Diff line number Diff line change
Expand Up @@ -674,19 +674,12 @@ def upload(local_path: PathType, url: str) -> None:


def _check_package_installed(protocol: str) -> None: # pragma: no cover
def _pip_install(package: str, version: str) -> None:
logger.error(f"{package} is necessary for {protocol} protocol.")
logger.info(f"Installing {package}...")
subprocess.run(
[sys.executable, "-m", "pip", "install", f"{package}>={version}"]
)

if protocol in ["gs", "gcs"] and find_spec("gcsfs") is None:
_pip_install("gcsfs", "2023.3.0")
_pip_install(protocol, "gcsfs", "2023.3.0")
elif protocol == "s3" and find_spec("s3fs") is None:
_pip_install("s3fs", "2023.3.0")
_pip_install(protocol, "s3fs", "2023.3.0")
elif protocol == "mlflow" and find_spec("mlflow") is None:
_pip_install("mlflow", "2.10.0")
_pip_install(protocol, "mlflow", "2.10.0")


def _get_protocol_and_path(path: str) -> Tuple[str, Optional[str]]:
Expand All @@ -702,3 +695,11 @@ def _get_protocol_and_path(path: str) -> Tuple[str, Optional[str]]:
protocol = "file"

return protocol, path if path else None


def _pip_install(protocol: str, package: str, version: str) -> None:
logger.error(f"'{package}' is necessary for '{protocol}://' protocol.")
logger.info(f"Installing {package}...")
subprocess.run(
[sys.executable, "-m", "pip", "install", f"{package}>={version}"]
)
14 changes: 13 additions & 1 deletion tests/test_data/test_parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from luxonis_ml.data import LabelType, LuxonisLoader, LuxonisParser
from luxonis_ml.enums import DatasetType
from luxonis_ml.utils import environ

URL_PREFIX: Final[str] = "gs://luxonis-test-bucket/luxonis-ml-test-data"
WORK_DIR: Final[str] = "tests/data/parser_datasets"
Expand Down Expand Up @@ -82,13 +83,24 @@ def prepare_dir():
"D1_ParkingSlot-solo.zip",
[LabelType.BOUNDINGBOX, LabelType.SEGMENTATION],
),
(
DatasetType.COCO,
"roboflow://team-roboflow/coco-128/2/coco",
[LabelType.BOUNDINGBOX, LabelType.CLASSIFICATION],
),
],
)
def test_dir_parser(
dataset_type: DatasetType, url: str, expected_label_types: List[LabelType]
):
if not url.startswith("roboflow://"):
url = f"{URL_PREFIX}/{url}"

elif environ.ROBOFLOW_API_KEY is None:
pytest.skip("Roboflow API key is not set")

parser = LuxonisParser(
f"{URL_PREFIX}/{url}",
url,
dataset_name=f"test-{dataset_type}",
delete_existing=True,
save_dir=WORK_DIR,
Expand Down
Loading