From cbbd1cae84ec969e4c35fed4e0755f4c645d8e8d Mon Sep 17 00:00:00 2001 From: masklinn Date: Sat, 9 Nov 2024 09:36:49 +0100 Subject: [PATCH] Split precompiled data into a sub-project (and wheel) The goal of this is the ability to generate wheels for precompiled instances of uap-core, at whatever version we want. 1. It resolves #146 by splitting the versioning of the API and that of the (pre-compiled) data, this is an issue for 1.0 as that detaches uap-python's versioning from uap-core's. 2. It allows users to update the API and the precompiled dataset separately, something they would otherwise need to do via yaml. 3. It fixes #221 by allowing the regular release of "preview" precompiled regexes from uap-core snapshots e.g. we could release 0.19.dev202412 at the start of december with whatever uap-core merged between the previous prerelease and then. This should not be picked up by pip by default, but would allow users to access those prerelases via `pip install --pre`. 4. If done well enough, it might allow users to build bespoke precompiled datasets so they don't have to pick between custom rules and precompiled (not sure there's any demand for this but it seems like it might be useful). 5. If it works well enough it might actually be possible to have 0.x use the legacy codegen package meaning it should not need to be updated anymore. This is implemented via hatch build hooks (which seem seem simpler than doing it via setuptools in the end). Adding `regexes.yaml` to the sdist via artifacts is a bit strange but necessary in order to generate a complete sdist which a wheel can be built from (even though the release script will likely only push the wheel). --- .github/workflows/ci.yml | 8 +- pyproject.toml | 5 +- setup.cfg | 8 - setup.py | 221 ------------------ src/ua_parser/__init__.py | 8 +- src/ua_parser/_lazy.pyi | 11 - src/ua_parser/_matchers.pyi | 11 - src/ua_parser/_regexes.pyi | 7 - src/ua_parser/caching.py | 4 +- src/ua_parser/core.py | 8 +- src/ua_parser/lazy.py | 2 +- src/ua_parser/loaders.py | 4 +- src/ua_parser/matchers.py | 2 +- src/ua_parser/user_agent_parser.py | 6 +- tests/test_core.py | 38 +-- tox.ini | 3 + ua-parser-builtins/README.md | 7 + ua-parser-builtins/hatch_build.py | 206 ++++++++++++++++ ua-parser-builtins/pyproject.toml | 44 ++++ .../ua_parser_builtins/__init__.py | 0 .../ua_parser_builtins/py.typed | 0 ua-parser-builtins/uap-core | 1 + 22 files changed, 309 insertions(+), 295 deletions(-) delete mode 100644 setup.cfg delete mode 100644 setup.py delete mode 100644 src/ua_parser/_lazy.pyi delete mode 100644 src/ua_parser/_matchers.pyi delete mode 100644 src/ua_parser/_regexes.pyi create mode 100644 ua-parser-builtins/README.md create mode 100644 ua-parser-builtins/hatch_build.py create mode 100644 ua-parser-builtins/pyproject.toml create mode 100644 ua-parser-builtins/ua_parser_builtins/__init__.py create mode 100644 ua-parser-builtins/ua_parser_builtins/py.typed create mode 120000 ua-parser-builtins/uap-core diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4f41e866..df1cfb3e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -3,7 +3,6 @@ name: CI on: push: pull_request: - workflow_dispatch: jobs: checks: @@ -11,6 +10,9 @@ jobs: steps: - name: Checkout working copy uses: actions/checkout@v4 + with: + submodules: true + fetch-depth: 0 - name: ruff check uses: chartboost/ruff-action@v1 - name: ruff format @@ -29,7 +31,7 @@ jobs: if: ${{ always() && steps.setup_python.conclusion == 'success' }} run: | python -mpip install --upgrade pip - python -mpip install mypy types-PyYaml + python -mpip install mypy types-PyYaml ./ua-parser-builtins - name: mypy if: ${{ always() && steps.install_mypy.conclusion == 'success' }} run: mypy @@ -101,6 +103,7 @@ jobs: uses: actions/checkout@v4 with: submodules: true + fetch-depth: 0 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v5 with: @@ -115,6 +118,7 @@ jobs: sudo apt install libyaml-dev fi - run: python -mpip install pytest pyyaml + - run: python -mpip install ./ua-parser-builtins # install rs accelerator if available, ignore if not - run: python -mpip install ua-parser-rs || true # re2 is basically impossible to install from source so don't diff --git a/pyproject.toml b/pyproject.toml index 65271a4c..c0d4192c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,7 +8,7 @@ description = "Python port of Browserscope's user agent parser" version = "1.0.0a1" readme = "README.rst" requires-python = ">=3.9" -dependencies = [] +dependencies = ["ua-parser-builtins"] license = {text = "Apache 2.0"} urls = {repository = "https://github.com/ua-parser/uap-python"} @@ -57,8 +57,7 @@ where = ["src"] [tool.ruff] exclude = [ - "src/ua_parser/_lazy.py", - "src/ua_parser/_matchers.py", + "src/ua_parser/generate_builtins.py", ] [tool.ruff.lint] diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 9b07aee0..00000000 --- a/setup.cfg +++ /dev/null @@ -1,8 +0,0 @@ -[options] -packages = find: -package_dir = - =src -setup_requires = pyyaml - -[options.packages.find] -where = src diff --git a/setup.py b/setup.py deleted file mode 100644 index f423348e..00000000 --- a/setup.py +++ /dev/null @@ -1,221 +0,0 @@ -#!/usr/bin/env python -# flake8: noqa -import io -from contextlib import suppress, contextmanager -from os import fspath -from pathlib import Path -from typing import Optional, List, Dict - -from setuptools import setup, Command, find_namespace_packages -from setuptools.command.build import build, SubCommand -from setuptools.command.editable_wheel import editable_wheel - -import yaml - - -build.sub_commands.insert(0, ("compile-regexes", None)) - - -class CompileRegexes(Command, SubCommand): - def initialize_options(self) -> None: - self.pkg_name: Optional[str] = None - - def finalize_options(self) -> None: - self.pkg_name = self.distribution.get_name().replace("-", "_") - - def get_source_files(self) -> List[str]: - return ["uap-core/regexes.yaml"] - - def get_outputs(self) -> List[str]: - return [f"{self.pkg_name}/_regexes.py"] - - def get_output_mapping(self) -> Dict[str, str]: - return dict(zip(self.get_source_files(), self.get_outputs())) - - def run(self) -> None: - # FIXME: check git / submodules? - """ - work_path = self.work_path - if not os.path.exists(os.path.join(work_path, ".git")): - return - - log.info("initializing git submodules") - check_output(["git", "submodule", "init"], cwd=work_path) - check_output(["git", "submodule", "update"], cwd=work_path) - """ - if not self.pkg_name: - return # or error? - - yaml_src = Path("uap-core", "regexes.yaml") - if not yaml_src.is_file(): - raise RuntimeError( - f"Unable to find regexes.yaml, should be at {yaml_src!r}" - ) - - with yaml_src.open("rb") as f: - regexes = yaml.safe_load(f) - - if self.editable_mode: - dist_dir = Path("src") - else: - dist_dir = Path(self.get_finalized_command("bdist_wheel").bdist_dir) - - outdir = dist_dir / self.pkg_name - outdir.mkdir(parents=True, exist_ok=True) - - dest = outdir / "_matchers.py" - dest_lazy = outdir / "_lazy.py" - dest_legacy = outdir / "_regexes.py" - - with ( - dest.open("wb") as eager, - dest_lazy.open("wb") as lazy, - dest_legacy.open("wb") as legacy, - ): - eager = EagerWriter(eager) - lazy = LazyWriter(lazy) - legacy = LegacyWriter(legacy) - - for section in ["user_agent_parsers", "os_parsers", "device_parsers"]: - with ( - eager.section(section), - lazy.section(section), - legacy.section(section), - ): - extract = EXTRACTORS[section] - for p in regexes[section]: - el = trim(extract(p)) - eager.item(el) - lazy.item(el) - legacy.item(el) - eager.end() - lazy.end() - legacy.end() - - -def trim(l): - while len(l) > 1 and l[-1] is None: - l.pop() - return l - - -EXTRACTORS = { - "user_agent_parsers": lambda p: [ - p["regex"], - p.get("family_replacement"), - p.get("v1_replacement"), - p.get("v2_replacement"), - ], - "os_parsers": lambda p: [ - p["regex"], - p.get("os_replacement"), - p.get("os_v1_replacement"), - p.get("os_v2_replacement"), - p.get("os_v3_replacement"), - p.get("os_v4_replacement"), - ], - "device_parsers": lambda p: [ - p["regex"], - p.get("regex_flag"), - p.get("device_replacement"), - p.get("brand_replacement"), - p.get("model_replacement"), - ], -} - - -class Writer: - section_end = b"" - - def __init__(self, fp): - self.fp = fp - self.fp.write( - b"""\ -######################################################## -# NOTICE: this file is autogenerated from regexes.yaml # -######################################################## -""" - ) - self.fp.write(self.prefix) - self._section = None - - @contextmanager - def section(self, id): - self._section = id - self.fp.write(self.sections[id]) - yield - self.fp.write(self.section_end) - - def item(self, elements): - # DeviceMatcher(re, flag, repl1), - self.fp.write(self.items[self._section]) - self.fp.write(", ".join(map(repr, elements)).encode()) - self.fp.write(b"),\n") - - def end(self): - self.fp.write(self.suffix) - - -class LegacyWriter(Writer): - prefix = b"""\ -__all__ = [ - "USER_AGENT_PARSERS", - "DEVICE_PARSERS", - "OS_PARSERS", -] - -from .user_agent_parser import UserAgentParser, DeviceParser, OSParser - -""" - sections = { - "user_agent_parsers": b"USER_AGENT_PARSERS = [\n", - "os_parsers": b"\n\nOS_PARSERS = [\n", - "device_parsers": b"\n\nDEVICE_PARSERS = [\n", - } - section_end = b"]" - items = { - "user_agent_parsers": b" UserAgentParser(", - "os_parsers": b" OSParser(", - "device_parsers": b" DeviceParser(", - } - suffix = b"\n" - - -class EagerWriter(Writer): - prefix = b"""\ -__all__ = ["MATCHERS"] - -from typing import Tuple, List -from .matchers import UserAgentMatcher, OSMatcher, DeviceMatcher - -MATCHERS: Tuple[List[UserAgentMatcher], List[OSMatcher], List[DeviceMatcher]] = ([ -""" - sections = { - "user_agent_parsers": b"", - "os_parsers": b"], [\n", - "device_parsers": b"], [\n", - } - items = { - "user_agent_parsers": b" UserAgentMatcher(", - "os_parsers": b" OSMatcher(", - "device_parsers": b" DeviceMatcher(", - } - suffix = b"])\n" - - -class LazyWriter(EagerWriter): - prefix = b"""\ -__all__ = ["MATCHERS"] - -from typing import Tuple, List -from .lazy import UserAgentMatcher, OSMatcher, DeviceMatcher - -MATCHERS: Tuple[List[UserAgentMatcher], List[OSMatcher], List[DeviceMatcher]] = ([ -""" - - -setup( - cmdclass={ - "compile-regexes": CompileRegexes, - } -) diff --git a/src/ua_parser/__init__.py b/src/ua_parser/__init__.py index 19b6faa2..040dda3e 100644 --- a/src/ua_parser/__init__.py +++ b/src/ua_parser/__init__.py @@ -20,17 +20,17 @@ from __future__ import annotations __all__ = [ + "OS", "BasicResolver", - "CachingResolver", "Cache", + "CachingResolver", "DefaultedResult", "Device", "Domain", "Matchers", - "OS", - "Result", - "Resolver", "PartialResult", + "Resolver", + "Result", "UserAgent", "load_builtins", "load_lazy_builtins", diff --git a/src/ua_parser/_lazy.pyi b/src/ua_parser/_lazy.pyi deleted file mode 100644 index 741db1af..00000000 --- a/src/ua_parser/_lazy.pyi +++ /dev/null @@ -1,11 +0,0 @@ -__all__ = ["MATCHERS"] - -from typing import List, Tuple - -from .lazy import DeviceMatcher, OSMatcher, UserAgentMatcher - -MATCHERS: Tuple[ - List[UserAgentMatcher], - List[OSMatcher], - List[DeviceMatcher], -] diff --git a/src/ua_parser/_matchers.pyi b/src/ua_parser/_matchers.pyi deleted file mode 100644 index 2269fb43..00000000 --- a/src/ua_parser/_matchers.pyi +++ /dev/null @@ -1,11 +0,0 @@ -__all__ = ["MATCHERS"] - -from typing import List, Tuple - -from .matchers import DeviceMatcher, OSMatcher, UserAgentMatcher - -MATCHERS: Tuple[ - List[UserAgentMatcher], - List[OSMatcher], - List[DeviceMatcher], -] diff --git a/src/ua_parser/_regexes.pyi b/src/ua_parser/_regexes.pyi deleted file mode 100644 index 10bc2ef4..00000000 --- a/src/ua_parser/_regexes.pyi +++ /dev/null @@ -1,7 +0,0 @@ -from typing import List - -from .user_agent_parser import DeviceParser, OSParser, UserAgentParser - -USER_AGENT_PARSERS: List[UserAgentParser] -OS_PARSERS: List[OSParser] -DEVICE_PARSERS: List[DeviceParser] diff --git a/src/ua_parser/caching.py b/src/ua_parser/caching.py index 706ad4b3..998c4b36 100644 --- a/src/ua_parser/caching.py +++ b/src/ua_parser/caching.py @@ -78,7 +78,7 @@ def __setitem__(self, key: str, value: PartialResult) -> None: @dataclasses.dataclass class CacheEntry: - __slots__ = ["key", "value", "freq"] + __slots__ = ["freq", "key", "value"] key: str value: PartialResult freq: int @@ -161,7 +161,7 @@ def _evict_small(self) -> None: @dataclasses.dataclass class SieveNode: - __slots__ = ("key", "value", "visited", "next") + __slots__ = ("key", "next", "value", "visited") key: str value: PartialResult visited: bool diff --git a/src/ua_parser/core.py b/src/ua_parser/core.py index 8ea880d6..b7133d4c 100644 --- a/src/ua_parser/core.py +++ b/src/ua_parser/core.py @@ -4,14 +4,14 @@ from typing import Generic, List, Optional, Protocol, Tuple, TypeVar __all__ = [ + "OS", "DefaultedResult", "Device", "Domain", "Matchers", - "OS", - "Result", "PartialResult", "Resolver", + "Result", "UserAgent", ] @@ -74,7 +74,7 @@ def __init__( class Device: """Device information parsed from the user agent string.""" - __slots__ = ("family", "brand", "model") + __slots__ = ("brand", "family", "model") family: str brand: Optional[str] model: Optional[str] @@ -172,7 +172,7 @@ class PartialResult: """ - __slots__ = ("domains", "user_agent", "os", "device", "string") + __slots__ = ("device", "domains", "os", "string", "user_agent") domains: Domain user_agent: Optional[UserAgent] os: Optional[OS] diff --git a/src/ua_parser/lazy.py b/src/ua_parser/lazy.py index c5aa5e23..4f0abedf 100644 --- a/src/ua_parser/lazy.py +++ b/src/ua_parser/lazy.py @@ -1,4 +1,4 @@ -__all__ = ["UserAgentMatcher", "OSMatcher", "DeviceMatcher"] +__all__ = ["DeviceMatcher", "OSMatcher", "UserAgentMatcher"] import re from functools import cached_property diff --git a/src/ua_parser/loaders.py b/src/ua_parser/loaders.py index 18fc3d25..55774eaf 100644 --- a/src/ua_parser/loaders.py +++ b/src/ua_parser/loaders.py @@ -52,7 +52,7 @@ def load_builtins() -> Matchers: further imports simply reference the existing datas. """ - from ._matchers import MATCHERS + from ua_parser_builtins.matchers import MATCHERS # typing and mypy don't have safe upcast (#5756) and mypy is # unhappy about returning concrete matchers for a mixed type @@ -66,7 +66,7 @@ def load_lazy_builtins() -> Matchers: further imports simply reference the existing datas. """ - from ._lazy import MATCHERS + from ua_parser_builtins.lazy import MATCHERS return cast(Matchers, MATCHERS) diff --git a/src/ua_parser/matchers.py b/src/ua_parser/matchers.py index 3956b3b5..35200b0b 100644 --- a/src/ua_parser/matchers.py +++ b/src/ua_parser/matchers.py @@ -1,4 +1,4 @@ -__all__ = ["UserAgentMatcher", "OSMatcher", "DeviceMatcher"] +__all__ = ["DeviceMatcher", "OSMatcher", "UserAgentMatcher"] import re from typing import Literal, Optional, Pattern diff --git a/src/ua_parser/user_agent_parser.py b/src/ua_parser/user_agent_parser.py index 5cb1c744..e6e4bb3e 100644 --- a/src/ua_parser/user_agent_parser.py +++ b/src/ua_parser/user_agent_parser.py @@ -521,4 +521,8 @@ def GetFilters( del SafeLoader else: # Just load our pre-compiled versions - from ._regexes import DEVICE_PARSERS, OS_PARSERS, USER_AGENT_PARSERS + from ua_parser_builtins.regexes import ( + DEVICE_PARSERS, + OS_PARSERS, + USER_AGENT_PARSERS, + ) diff --git a/tests/test_core.py b/tests/test_core.py index 310ddec5..1a87702f 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -3,25 +3,22 @@ import dataclasses import logging import pathlib -import platform from operator import attrgetter +from typing import cast import pytest # type: ignore -if platform.python_implementation() == "PyPy": - from yaml import SafeLoader, load -else: - try: - from yaml import ( # type: ignore - CSafeLoader as SafeLoader, - load, - ) - except ImportError: - logging.getLogger(__name__).warning( - "PyYaml C extension not available to run tests, this will result " - "in dramatic tests slowdown." - ) - from yaml import SafeLoader, load +try: + from yaml import ( + CSafeLoader as SafeLoader, + load, + ) +except ImportError: + logging.getLogger(__name__).warning( + "PyYaml C extension not available to run tests, this will result " + "in tests slowdown." + ) + from yaml import SafeLoader, load # type: ignore from ua_parser import ( BasicResolver, @@ -32,15 +29,22 @@ UserAgent, load_builtins, load_lazy_builtins, + loaders, ) from ua_parser.matchers import UserAgentMatcher CORE_DIR = (pathlib.Path(__name__).parent.parent / "uap-core").resolve() +data = cast(loaders.FileLoader, loaders.load_yaml)(CORE_DIR / "regexes.yaml") +data_lazy = cast(loaders.FileLoader, loaders.load_yaml)( + CORE_DIR / "regexes.yaml", loader=loaders.load_lazy +) PARSERS = [ pytest.param(Parser(BasicResolver(load_builtins())), id="basic"), pytest.param(Parser(BasicResolver(load_lazy_builtins())), id="lazy"), + pytest.param(Parser(BasicResolver(data)), id="basic-yaml"), + pytest.param(Parser(BasicResolver(data_lazy)), id="lazy-yaml"), ] try: from ua_parser import re2 @@ -51,7 +55,7 @@ ) ) else: - PARSERS.append(pytest.param(Parser(re2.Resolver(load_builtins())), id="re2")) + PARSERS.append(pytest.param(Parser(re2.Resolver(data)), id="re2")) try: from ua_parser import regex @@ -64,7 +68,7 @@ ) ) else: - PARSERS.append(pytest.param(Parser(regex.Resolver(load_builtins())), id="regex")) + PARSERS.append(pytest.param(Parser(regex.Resolver(data)), id="regex")) UA_FIELDS = {f.name for f in dataclasses.fields(UserAgent)} diff --git a/tox.ini b/tox.ini index de36509a..0f2edd4c 100644 --- a/tox.ini +++ b/tox.ini @@ -23,6 +23,7 @@ deps = pyyaml google-re2 ua-parser-rs + ./ua-parser-builtins commands = pytest -Werror --doctest-glob="*.rst" {posargs} @@ -31,6 +32,7 @@ deps = pytest pyyaml ua-parser-rs + ./ua-parser-builtins [testenv:flake8] package = skip @@ -47,4 +49,5 @@ package = skip deps = mypy types-PyYaml + ./ua-parser-builtins commands = mypy {posargs:} diff --git a/ua-parser-builtins/README.md b/ua-parser-builtins/README.md new file mode 100644 index 00000000..8a568237 --- /dev/null +++ b/ua-parser-builtins/README.md @@ -0,0 +1,7 @@ +# Precompiled ruleset for [ua-parser](https://pypi.org/project/ua-parser/) + +This project does not do anything on its own, nor does it have any +actual API: it contains the dataset of +[uap-core](https://github.com/ua-parser/uap-core) pre-compiled for use +by [ua-parser](https://pypi.org/project/ua-parser/) to decrease +initialisation times. diff --git a/ua-parser-builtins/hatch_build.py b/ua-parser-builtins/hatch_build.py new file mode 100644 index 00000000..e92e9730 --- /dev/null +++ b/ua-parser-builtins/hatch_build.py @@ -0,0 +1,206 @@ +from __future__ import annotations + +import io +import os +import os.path +import tempfile +from contextlib import contextmanager +from typing import Any, Callable, ClassVar, Iterator, cast + +import yaml +from hatchling.builders.hooks.plugin.interface import BuildHookInterface +from hatchling.metadata.plugin.interface import MetadataHookInterface +from versioningit import get_version + + +class MetadataHook(MetadataHookInterface): + def update(self, metadata: dict[str, Any]) -> None: + v = get_version( + os.path.join(self.root, "uap-core"), + config={ + "format": { + "distance": "{next_version}.dev{distance}", + } + }, + ) + metadata["version"] = v + + +class CompilerHook(BuildHookInterface): + def initialize( + self, + version: str, + build_data: dict[str, Any], + ) -> None: + with open(os.path.join(self.root, "uap-core/regexes.yaml"), "rb") as f: + data = yaml.safe_load(f) + + with ( + tempfile.NamedTemporaryFile(delete=False) as matchers, + tempfile.NamedTemporaryFile(delete=False) as lazy, + tempfile.NamedTemporaryFile(delete=False) as regexes, + ): + matchers_w = EagerWriter(cast(io.RawIOBase, matchers)) + lazy_w = LazyWriter(cast(io.RawIOBase, lazy)) + legacy_w = LegacyWriter(cast(io.RawIOBase, regexes)) + + for section, specs in data.items(): + with ( + matchers_w.section(section), + lazy_w.section(section), + legacy_w.section(section), + ): + extract = EXTRACTORS[section] + for s in specs: + el = trim(extract(s)) + matchers_w.item(el) + lazy_w.item(el) + legacy_w.item(el) + + matchers_w.end() + lazy_w.end() + legacy_w.end() + + build_data["force_include"][matchers.name] = "ua_parser_builtins/matchers.py" + build_data["force_include"][lazy.name] = "ua_parser_builtins/lazy.py" + build_data["force_include"][regexes.name] = "ua_parser_builtins/regexes.py" + + def finalize( + self, + version: str, + build_data: dict[str, Any], + artifact_path: str, + ): + tempdir = tempfile.gettempdir() + for k in build_data["force_include"]: + if k.startswith(tempdir): + os.remove(k) + + +def trim(items: list[str | None]) -> list[str | None]: + """Removes trailing `None` from the extraction""" + while len(items) > 1 and items[-1] is None: + items.pop() + return items + + +EXTRACTORS: dict[str, Callable[[dict[str, str]], list[str | None]]] = { + "user_agent_parsers": lambda p: [ + p["regex"], + p.get("family_replacement"), + p.get("v1_replacement"), + p.get("v2_replacement"), + p.get("v3_replacement"), + p.get("v4_replacement"), + ], + "os_parsers": lambda p: [ + p["regex"], + p.get("os_replacement"), + p.get("os_v1_replacement"), + p.get("os_v2_replacement"), + p.get("os_v3_replacement"), + p.get("os_v4_replacement"), + ], + "device_parsers": lambda p: [ + p["regex"], + p.get("regex_flag"), + p.get("device_replacement"), + p.get("brand_replacement"), + p.get("model_replacement"), + ], +} + + +class Writer: + items: ClassVar[dict[str, bytes]] + sections: ClassVar[dict[str, bytes]] + prefix: bytes + suffix = b"" + section_end = b"" + + def __init__(self, fp: io.RawIOBase) -> None: + self.fp = fp + self.fp.write( + b"""\ +######################################################## +# NOTICE: this file is autogenerated from regexes.yaml # +######################################################## +""" + ) + self.fp.write(self.prefix) + self._section: str | None = None + + @contextmanager + def section(self, id: str) -> Iterator[None]: + self._section = id + self.fp.write(self.sections[id]) + yield + self.fp.write(self.section_end) + + def item(self, elements: list[str | None]) -> None: + # DeviceMatcher(re, flag, repl1), + # assume we're in a section + self.fp.write(self.items[cast(str, self._section)]) + self.fp.write(", ".join(map(repr, elements)).encode()) + self.fp.write(b"),\n") + + def end(self) -> None: + self.fp.write(self.suffix) + + +class LegacyWriter(Writer): + prefix = b"""\ +__all__ = [ + "USER_AGENT_PARSERS", + "DEVICE_PARSERS", + "OS_PARSERS", +] + +from ua_parser.user_agent_parser import UserAgentParser, DeviceParser, OSParser + +""" + sections: ClassVar[dict[str, bytes]] = { + "user_agent_parsers": b"USER_AGENT_PARSERS = [\n", + "os_parsers": b"\n\nOS_PARSERS = [\n", + "device_parsers": b"\n\nDEVICE_PARSERS = [\n", + } + section_end = b"]" + items: ClassVar[dict[str, bytes]] = { + "user_agent_parsers": b" UserAgentParser(", + "os_parsers": b" OSParser(", + "device_parsers": b" DeviceParser(", + } + suffix = b"\n" + + +class EagerWriter(Writer): + prefix = b"""\ +__all__ = ["MATCHERS"] + +from typing import Tuple, List +from ua_parser.matchers import UserAgentMatcher, OSMatcher, DeviceMatcher + +MATCHERS: Tuple[List[UserAgentMatcher], List[OSMatcher], List[DeviceMatcher]] = ([ +""" + sections: ClassVar[dict[str, bytes]] = { + "user_agent_parsers": b"", + "os_parsers": b"], [\n", + "device_parsers": b"], [\n", + } + items: ClassVar[dict[str, bytes]] = { + "user_agent_parsers": b" UserAgentMatcher(", + "os_parsers": b" OSMatcher(", + "device_parsers": b" DeviceMatcher(", + } + suffix = b"])\n" + + +class LazyWriter(EagerWriter): + prefix = b"""\ +__all__ = ["MATCHERS"] + +from typing import Tuple, List +from ua_parser.lazy import UserAgentMatcher, OSMatcher, DeviceMatcher + +MATCHERS: Tuple[List[UserAgentMatcher], List[OSMatcher], List[DeviceMatcher]] = ([ +""" diff --git a/ua-parser-builtins/pyproject.toml b/ua-parser-builtins/pyproject.toml new file mode 100644 index 00000000..db0da38b --- /dev/null +++ b/ua-parser-builtins/pyproject.toml @@ -0,0 +1,44 @@ +[build-system] +requires = ["hatchling", "versioningit", "pyyaml"] +build-backend = "hatchling.build" + +[project] +name = "ua-parser-builtins" +description = "Precompiled rules for User Agent Parser" +readme = "README.md" +dependencies = ["ua-parser"] +requires-python = ">=3.9" +license = {text = "Apache 2.0"} +urls = {repository = "https://github.com/ua-parser/uap-python"} +dynamic = ["version"] +maintainers = [ + { name = "masklinn", email = "uap@masklinn.net" } +] + +classifiers = [ + "Development Status :: 4 - Beta", + "Environment :: Web Environment", + "Intended Audience :: Developers", + "Operating System :: OS Independent", + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python", + "Topic :: Internet :: WWW/HTTP", + "Topic :: Software Development :: Libraries :: Python Modules", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: Implementation :: PyPy", + # "Programming Language :: Python :: Implementation :: GraalPy", +] + +[tool.hatch.build.hooks.custom] + +[tool.hatch.metadata.hooks.custom] + +[tool.hatch.build.targets.sdist] +artifacts = [ + "uap-core/regexes.yaml", +] diff --git a/ua-parser-builtins/ua_parser_builtins/__init__.py b/ua-parser-builtins/ua_parser_builtins/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/ua-parser-builtins/ua_parser_builtins/py.typed b/ua-parser-builtins/ua_parser_builtins/py.typed new file mode 100644 index 00000000..e69de29b diff --git a/ua-parser-builtins/uap-core b/ua-parser-builtins/uap-core new file mode 120000 index 00000000..fbefe368 --- /dev/null +++ b/ua-parser-builtins/uap-core @@ -0,0 +1 @@ +../uap-core \ No newline at end of file