From 986acef432d1013e28d4075ca43380a0ed079045 Mon Sep 17 00:00:00 2001 From: Alex Carney Date: Sun, 22 Sep 2024 19:49:16 +0100 Subject: [PATCH] sphinx-agent: Move `Uri` implementation into its own file --- .../esbonio/sphinx_agent/types/__init__.py | 328 +----------------- lib/esbonio/esbonio/sphinx_agent/types/uri.py | 325 +++++++++++++++++ 2 files changed, 331 insertions(+), 322 deletions(-) create mode 100644 lib/esbonio/esbonio/sphinx_agent/types/uri.py diff --git a/lib/esbonio/esbonio/sphinx_agent/types/__init__.py b/lib/esbonio/esbonio/sphinx_agent/types/__init__.py index bbf137496..4553c307a 100644 --- a/lib/esbonio/esbonio/sphinx_agent/types/__init__.py +++ b/lib/esbonio/esbonio/sphinx_agent/types/__init__.py @@ -1,20 +1,16 @@ """Type definitions for the sphinx agent. -This is the *only* file shared between the agent itself and the parent language server. -For this reason this file *cannot* import anything from Sphinx. +This is the *only* module shared between the agent itself and the parent language +server. For this reason this module *cannot* import anything from Sphinx. """ from __future__ import annotations import dataclasses -import os -import pathlib import re from typing import Any -from typing import Callable from typing import Optional from typing import Union -from urllib import parse from .lsp import Diagnostic from .lsp import DiagnosticSeverity @@ -25,10 +21,13 @@ from .roles import RST_DEFAULT_ROLE from .roles import RST_ROLE from .roles import Role +from .uri import IS_WIN +from .uri import Uri __all__ = ( "Diagnostic", "DiagnosticSeverity", + "IS_WIN", "Location", "MYST_ROLE", "Position", @@ -36,6 +35,7 @@ "RST_ROLE", "Range", "Role", + "Uri", ) MYST_DIRECTIVE: re.Pattern = re.compile( @@ -129,322 +129,6 @@ """ -IS_WIN = os.name == "nt" -SCHEME = re.compile(r"^[a-zA-Z][a-zA-Z\d+.-]*$") -RE_DRIVE_LETTER_PATH = re.compile(r"^(\/?)([a-zA-Z]:)") - - -# TODO: Look into upstreaming this into pygls -# - if it works out -# - when pygls drops 3.7 (Uri uses the := operator) -@dataclasses.dataclass(frozen=True) -class Uri: - """Helper class for working with URIs.""" - - scheme: str - - authority: str - - path: str - - query: str - - fragment: str - - def __post_init__(self): - """Basic validation.""" - if self.scheme is None: - raise ValueError("URIs must have a scheme") - - if not SCHEME.match(self.scheme): - raise ValueError("Invalid scheme") - - if self.authority and self.path and (not self.path.startswith("/")): - raise ValueError("Paths with an authority must start with a slash '/'") - - if self.path and self.path.startswith("//") and (not self.authority): - raise ValueError( - "Paths without an authority cannot start with two slashes '//'" - ) - - def __eq__(self, other): - if type(other) is not type(self): - return False - - if self.scheme != other.scheme: - return False - - if self.authority != other.authority: - return False - - if self.query != other.query: - return False - - if self.fragment != other.fragment: - return False - - if IS_WIN and self.scheme == "file": - # Filepaths on windows are case in-sensitive - if self.path.lower() != other.path.lower(): - return False - - elif self.path != other.path: - return False - - return True - - def __hash__(self): - if IS_WIN and self.scheme == "file": - # Filepaths on windows are case in-sensitive - path = self.path.lower() - else: - path = self.path - - return hash((self.scheme, self.authority, path, self.query, self.fragment)) - - def __fspath__(self): - """Return the file system representation of this uri. - - This makes Uri instances compatible with any function that expects an - ``os.PathLike`` object! - """ - # TODO: Should we raise an exception if scheme != "file"? - return self.as_fs_path(preserve_case=True) - - def __str__(self): - return self.as_string() - - def __truediv__(self, other): - return self.join(other) - - @classmethod - def create( - cls, - *, - scheme: str = "", - authority: str = "", - path: str = "", - query: str = "", - fragment: str = "", - ) -> Uri: - """Create a uri with the given attributes.""" - - if scheme in {"http", "https", "file"}: - if not path.startswith("/"): - path = f"/{path}" - - return cls( - scheme=scheme, - authority=authority, - path=path, - query=query, - fragment=fragment, - ) - - @classmethod - def parse(cls, uri: str) -> Uri: - """Parse the given uri from its string representation.""" - scheme, authority, path, _, query, fragment = parse.urlparse(uri) - return cls.create( - scheme=parse.unquote(scheme), - authority=parse.unquote(authority), - path=parse.unquote(path), - query=parse.unquote(query), - fragment=parse.unquote(fragment), - ) - - def resolve(self) -> Uri: - """Return the fully resolved version of this Uri.""" - - # This operation only makes sense for file uris - if self.scheme != "file": - return Uri.parse(str(self)) - - return Uri.for_file(pathlib.Path(self).resolve()) - - @classmethod - def for_file(cls, filepath: Union[str, os.PathLike[str]]) -> Uri: - """Create a uri based on the given filepath.""" - - fpath = os.fspath(filepath) - if IS_WIN: - fpath = fpath.replace("\\", "/") - - if fpath.startswith("//"): - authority, *path = fpath[2:].split("/") - fpath = "/".join(path) - else: - authority = "" - - return cls.create(scheme="file", authority=authority, path=fpath) - - @property - def fs_path(self) -> Optional[str]: - """Return the equivalent fs path.""" - return self.as_fs_path() - - def where(self, **kwargs) -> Uri: - """Return an transformed version of this uri where certain components of the uri - have been replace with the given arguments. - - Passing a value of ``None`` will remove the given component entirely. - """ - keys = {"scheme", "authority", "path", "query", "fragment"} - valid_keys = keys.copy() & kwargs.keys() - - current = {k: getattr(self, k) for k in keys} - replacements = {k: kwargs[k] for k in valid_keys} - - return Uri.create(**{**current, **replacements}) - - def join(self, path: str) -> Uri: - """Join this Uri's path component with the given path and return the resulting - uri. - - Parameters - ---------- - path - The path segment to join - - Returns - ------- - Uri - The resulting uri - """ - - if not self.path: - raise ValueError("This uri has no path") - - if IS_WIN: - fs_path = self.fs_path - if fs_path is None: - raise ValueError("Unable to join paths, fs_path is None") - - joined = os.path.normpath(os.path.join(fs_path, path)) - new_path = self.for_file(joined).path - else: - new_path = os.path.normpath(os.path.join(self.path, path)) - - return self.where(path=new_path) - - def as_fs_path(self, preserve_case: bool = False) -> Optional[str]: - """Return the file system path correspondin with this uri.""" - if self.path: - path = _normalize_path(self.path, preserve_case) - - if self.authority and len(path) > 1: - path = f"//{self.authority}{path}" - - # Remove the leading `/` from windows paths - elif RE_DRIVE_LETTER_PATH.match(path): - path = path[1:] - - if IS_WIN: - path = path.replace("/", "\\") - - return path - - return None - - def as_string(self, encode=True) -> str: - """Return a string representation of this Uri. - - Parameters - ---------- - encode - If ``True`` (the default), encode any special characters. - - Returns - ------- - str - The string representation of the Uri - """ - - # See: https://github.com/python/mypy/issues/10740 - encoder: Callable[[str], str] = parse.quote if encode else _replace_chars # type: ignore[assignment] - - if authority := self.authority: - usercred, *auth = authority.split("@") - if len(auth) > 0: - *user, cred = usercred.split(":") - if len(user) > 0: - usercred = encoder(":".join(user)) + f":{encoder(cred)}" - else: - usercred = encoder(usercred) - authority = "@".join(auth) - else: - usercred = "" - - authority = authority.lower() - *auth, port = authority.split(":") - if len(auth) > 0: - authority = encoder(":".join(auth)) + f":{port}" - else: - authority = encoder(authority) - - if usercred: - authority = f"{usercred}@{authority}" - - scheme_separator = "" - if authority or self.scheme == "file": - scheme_separator = "//" - - if path := self.path: - path = encoder(_normalize_path(path)) - - if query := self.query: - query = encoder(query) - - if fragment := self.fragment: - fragment = encoder(fragment) - - parts = [ - f"{self.scheme}:", - scheme_separator, - authority if authority else "", - path if path else "", - f"?{query}" if query else "", - f"#{fragment}" if fragment else "", - ] - return "".join(parts) - - -def _replace_chars(segment: str) -> str: - """Replace a certain subset of characters in a uri segment""" - return segment.replace("#", "%23").replace("?", "%3F") - - -def _normalize_path(path: str, preserve_case: bool = False) -> str: - """Normalise the path segment of a Uri. - - Parameters - ---------- - path - The path to normalise. - - preserve_case - If ``True``, preserve the case of the drive label on Windows. - If ``False``, the drive label will be lowercased. - - Returns - ------- - str - The normalised path. - """ - - # normalize to fwd-slashes on windows, - # on other systems bwd-slashes are valid - # filename character, eg /f\oo/ba\r.txt - if IS_WIN: - path = path.replace("\\", "/") - - # Normalize drive paths to lower case - if (not preserve_case) and (match := RE_DRIVE_LETTER_PATH.match(path)): - path = match.group(1) + match.group(2).lower() + path[match.end() :] - - return path - - # -- DB Types # # These represent the structure of data as stored in the SQLite database diff --git a/lib/esbonio/esbonio/sphinx_agent/types/uri.py b/lib/esbonio/esbonio/sphinx_agent/types/uri.py new file mode 100644 index 000000000..c6f8dbd37 --- /dev/null +++ b/lib/esbonio/esbonio/sphinx_agent/types/uri.py @@ -0,0 +1,325 @@ +from __future__ import annotations + +import dataclasses +import os +import pathlib +import re +from typing import Callable +from typing import Optional +from typing import Union +from urllib import parse + +IS_WIN = os.name == "nt" +SCHEME = re.compile(r"^[a-zA-Z][a-zA-Z\d+.-]*$") +RE_DRIVE_LETTER_PATH = re.compile(r"^(\/?)([a-zA-Z]:)") + + +# TODO: Look into upstreaming this into pygls +# - if it works out +# - when pygls drops 3.7 (Uri uses the := operator) +@dataclasses.dataclass(frozen=True) +class Uri: + """Helper class for working with URIs.""" + + scheme: str + + authority: str + + path: str + + query: str + + fragment: str + + def __post_init__(self): + """Basic validation.""" + if self.scheme is None: + raise ValueError("URIs must have a scheme") + + if not SCHEME.match(self.scheme): + raise ValueError("Invalid scheme") + + if self.authority and self.path and (not self.path.startswith("/")): + raise ValueError("Paths with an authority must start with a slash '/'") + + if self.path and self.path.startswith("//") and (not self.authority): + raise ValueError( + "Paths without an authority cannot start with two slashes '//'" + ) + + def __eq__(self, other): + if type(other) is not type(self): + return False + + if self.scheme != other.scheme: + return False + + if self.authority != other.authority: + return False + + if self.query != other.query: + return False + + if self.fragment != other.fragment: + return False + + if IS_WIN and self.scheme == "file": + # Filepaths on windows are case in-sensitive + if self.path.lower() != other.path.lower(): + return False + + elif self.path != other.path: + return False + + return True + + def __hash__(self): + if IS_WIN and self.scheme == "file": + # Filepaths on windows are case in-sensitive + path = self.path.lower() + else: + path = self.path + + return hash((self.scheme, self.authority, path, self.query, self.fragment)) + + def __fspath__(self): + """Return the file system representation of this uri. + + This makes Uri instances compatible with any function that expects an + ``os.PathLike`` object! + """ + # TODO: Should we raise an exception if scheme != "file"? + return self.as_fs_path(preserve_case=True) + + def __str__(self): + return self.as_string() + + def __truediv__(self, other): + return self.join(other) + + @classmethod + def create( + cls, + *, + scheme: str = "", + authority: str = "", + path: str = "", + query: str = "", + fragment: str = "", + ) -> Uri: + """Create a uri with the given attributes.""" + + if scheme in {"http", "https", "file"}: + if not path.startswith("/"): + path = f"/{path}" + + return cls( + scheme=scheme, + authority=authority, + path=path, + query=query, + fragment=fragment, + ) + + @classmethod + def parse(cls, uri: str) -> Uri: + """Parse the given uri from its string representation.""" + scheme, authority, path, _, query, fragment = parse.urlparse(uri) + return cls.create( + scheme=parse.unquote(scheme), + authority=parse.unquote(authority), + path=parse.unquote(path), + query=parse.unquote(query), + fragment=parse.unquote(fragment), + ) + + def resolve(self) -> Uri: + """Return the fully resolved version of this Uri.""" + + # This operation only makes sense for file uris + if self.scheme != "file": + return Uri.parse(str(self)) + + return Uri.for_file(pathlib.Path(self).resolve()) + + @classmethod + def for_file(cls, filepath: Union[str, os.PathLike[str]]) -> Uri: + """Create a uri based on the given filepath.""" + + fpath = os.fspath(filepath) + if IS_WIN: + fpath = fpath.replace("\\", "/") + + if fpath.startswith("//"): + authority, *path = fpath[2:].split("/") + fpath = "/".join(path) + else: + authority = "" + + return cls.create(scheme="file", authority=authority, path=fpath) + + @property + def fs_path(self) -> Optional[str]: + """Return the equivalent fs path.""" + return self.as_fs_path() + + def where(self, **kwargs) -> Uri: + """Return an transformed version of this uri where certain components of the uri + have been replace with the given arguments. + + Passing a value of ``None`` will remove the given component entirely. + """ + keys = {"scheme", "authority", "path", "query", "fragment"} + valid_keys = keys.copy() & kwargs.keys() + + current = {k: getattr(self, k) for k in keys} + replacements = {k: kwargs[k] for k in valid_keys} + + return Uri.create(**{**current, **replacements}) + + def join(self, path: str) -> Uri: + """Join this Uri's path component with the given path and return the resulting + uri. + + Parameters + ---------- + path + The path segment to join + + Returns + ------- + Uri + The resulting uri + """ + + if not self.path: + raise ValueError("This uri has no path") + + if IS_WIN: + fs_path = self.fs_path + if fs_path is None: + raise ValueError("Unable to join paths, fs_path is None") + + joined = os.path.normpath(os.path.join(fs_path, path)) + new_path = self.for_file(joined).path + else: + new_path = os.path.normpath(os.path.join(self.path, path)) + + return self.where(path=new_path) + + def as_fs_path(self, preserve_case: bool = False) -> Optional[str]: + """Return the file system path correspondin with this uri.""" + if self.path: + path = _normalize_path(self.path, preserve_case) + + if self.authority and len(path) > 1: + path = f"//{self.authority}{path}" + + # Remove the leading `/` from windows paths + elif RE_DRIVE_LETTER_PATH.match(path): + path = path[1:] + + if IS_WIN: + path = path.replace("/", "\\") + + return path + + return None + + def as_string(self, encode=True) -> str: + """Return a string representation of this Uri. + + Parameters + ---------- + encode + If ``True`` (the default), encode any special characters. + + Returns + ------- + str + The string representation of the Uri + """ + + # See: https://github.com/python/mypy/issues/10740 + encoder: Callable[[str], str] = parse.quote if encode else _replace_chars # type: ignore[assignment] + + if authority := self.authority: + usercred, *auth = authority.split("@") + if len(auth) > 0: + *user, cred = usercred.split(":") + if len(user) > 0: + usercred = encoder(":".join(user)) + f":{encoder(cred)}" + else: + usercred = encoder(usercred) + authority = "@".join(auth) + else: + usercred = "" + + authority = authority.lower() + *auth, port = authority.split(":") + if len(auth) > 0: + authority = encoder(":".join(auth)) + f":{port}" + else: + authority = encoder(authority) + + if usercred: + authority = f"{usercred}@{authority}" + + scheme_separator = "" + if authority or self.scheme == "file": + scheme_separator = "//" + + if path := self.path: + path = encoder(_normalize_path(path)) + + if query := self.query: + query = encoder(query) + + if fragment := self.fragment: + fragment = encoder(fragment) + + parts = [ + f"{self.scheme}:", + scheme_separator, + authority if authority else "", + path if path else "", + f"?{query}" if query else "", + f"#{fragment}" if fragment else "", + ] + return "".join(parts) + + +def _replace_chars(segment: str) -> str: + """Replace a certain subset of characters in a uri segment""" + return segment.replace("#", "%23").replace("?", "%3F") + + +def _normalize_path(path: str, preserve_case: bool = False) -> str: + """Normalise the path segment of a Uri. + + Parameters + ---------- + path + The path to normalise. + + preserve_case + If ``True``, preserve the case of the drive label on Windows. + If ``False``, the drive label will be lowercased. + + Returns + ------- + str + The normalised path. + """ + + # normalize to fwd-slashes on windows, + # on other systems bwd-slashes are valid + # filename character, eg /f\oo/ba\r.txt + if IS_WIN: + path = path.replace("\\", "/") + + # Normalize drive paths to lower case + if (not preserve_case) and (match := RE_DRIVE_LETTER_PATH.match(path)): + path = match.group(1) + match.group(2).lower() + path[match.end() :] + + return path