Skip to content

Commit

Permalink
sphinx-agent: Move Uri implementation into its own file
Browse files Browse the repository at this point in the history
  • Loading branch information
alcarney committed Sep 22, 2024
1 parent 44daa57 commit 986acef
Show file tree
Hide file tree
Showing 2 changed files with 331 additions and 322 deletions.
328 changes: 6 additions & 322 deletions lib/esbonio/esbonio/sphinx_agent/types/__init__.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,16 @@
"""Type definitions for the sphinx agent.
This is the *only* file shared between the agent itself and the parent language server.
For this reason this file *cannot* import anything from Sphinx.
This is the *only* module shared between the agent itself and the parent language
server. For this reason this module *cannot* import anything from Sphinx.
"""

from __future__ import annotations

import dataclasses
import os
import pathlib
import re
from typing import Any
from typing import Callable
from typing import Optional
from typing import Union
from urllib import parse

from .lsp import Diagnostic
from .lsp import DiagnosticSeverity
Expand All @@ -25,17 +21,21 @@
from .roles import RST_DEFAULT_ROLE
from .roles import RST_ROLE
from .roles import Role
from .uri import IS_WIN
from .uri import Uri

__all__ = (
"Diagnostic",
"DiagnosticSeverity",
"IS_WIN",
"Location",
"MYST_ROLE",
"Position",
"RST_DEFAULT_ROLE",
"RST_ROLE",
"Range",
"Role",
"Uri",
)

MYST_DIRECTIVE: re.Pattern = re.compile(
Expand Down Expand Up @@ -129,322 +129,6 @@
"""


IS_WIN = os.name == "nt"
SCHEME = re.compile(r"^[a-zA-Z][a-zA-Z\d+.-]*$")
RE_DRIVE_LETTER_PATH = re.compile(r"^(\/?)([a-zA-Z]:)")


# TODO: Look into upstreaming this into pygls
# - if it works out
# - when pygls drops 3.7 (Uri uses the := operator)
@dataclasses.dataclass(frozen=True)
class Uri:
"""Helper class for working with URIs."""

scheme: str

authority: str

path: str

query: str

fragment: str

def __post_init__(self):
"""Basic validation."""
if self.scheme is None:
raise ValueError("URIs must have a scheme")

if not SCHEME.match(self.scheme):
raise ValueError("Invalid scheme")

if self.authority and self.path and (not self.path.startswith("/")):
raise ValueError("Paths with an authority must start with a slash '/'")

if self.path and self.path.startswith("//") and (not self.authority):
raise ValueError(
"Paths without an authority cannot start with two slashes '//'"
)

def __eq__(self, other):
if type(other) is not type(self):
return False

if self.scheme != other.scheme:
return False

if self.authority != other.authority:
return False

if self.query != other.query:
return False

if self.fragment != other.fragment:
return False

if IS_WIN and self.scheme == "file":
# Filepaths on windows are case in-sensitive
if self.path.lower() != other.path.lower():
return False

elif self.path != other.path:
return False

return True

def __hash__(self):
if IS_WIN and self.scheme == "file":
# Filepaths on windows are case in-sensitive
path = self.path.lower()
else:
path = self.path

return hash((self.scheme, self.authority, path, self.query, self.fragment))

def __fspath__(self):
"""Return the file system representation of this uri.
This makes Uri instances compatible with any function that expects an
``os.PathLike`` object!
"""
# TODO: Should we raise an exception if scheme != "file"?
return self.as_fs_path(preserve_case=True)

def __str__(self):
return self.as_string()

def __truediv__(self, other):
return self.join(other)

@classmethod
def create(
cls,
*,
scheme: str = "",
authority: str = "",
path: str = "",
query: str = "",
fragment: str = "",
) -> Uri:
"""Create a uri with the given attributes."""

if scheme in {"http", "https", "file"}:
if not path.startswith("/"):
path = f"/{path}"

return cls(
scheme=scheme,
authority=authority,
path=path,
query=query,
fragment=fragment,
)

@classmethod
def parse(cls, uri: str) -> Uri:
"""Parse the given uri from its string representation."""
scheme, authority, path, _, query, fragment = parse.urlparse(uri)
return cls.create(
scheme=parse.unquote(scheme),
authority=parse.unquote(authority),
path=parse.unquote(path),
query=parse.unquote(query),
fragment=parse.unquote(fragment),
)

def resolve(self) -> Uri:
"""Return the fully resolved version of this Uri."""

# This operation only makes sense for file uris
if self.scheme != "file":
return Uri.parse(str(self))

return Uri.for_file(pathlib.Path(self).resolve())

@classmethod
def for_file(cls, filepath: Union[str, os.PathLike[str]]) -> Uri:
"""Create a uri based on the given filepath."""

fpath = os.fspath(filepath)
if IS_WIN:
fpath = fpath.replace("\\", "/")

if fpath.startswith("//"):
authority, *path = fpath[2:].split("/")
fpath = "/".join(path)
else:
authority = ""

return cls.create(scheme="file", authority=authority, path=fpath)

@property
def fs_path(self) -> Optional[str]:
"""Return the equivalent fs path."""
return self.as_fs_path()

def where(self, **kwargs) -> Uri:
"""Return an transformed version of this uri where certain components of the uri
have been replace with the given arguments.
Passing a value of ``None`` will remove the given component entirely.
"""
keys = {"scheme", "authority", "path", "query", "fragment"}
valid_keys = keys.copy() & kwargs.keys()

current = {k: getattr(self, k) for k in keys}
replacements = {k: kwargs[k] for k in valid_keys}

return Uri.create(**{**current, **replacements})

def join(self, path: str) -> Uri:
"""Join this Uri's path component with the given path and return the resulting
uri.
Parameters
----------
path
The path segment to join
Returns
-------
Uri
The resulting uri
"""

if not self.path:
raise ValueError("This uri has no path")

if IS_WIN:
fs_path = self.fs_path
if fs_path is None:
raise ValueError("Unable to join paths, fs_path is None")

joined = os.path.normpath(os.path.join(fs_path, path))
new_path = self.for_file(joined).path
else:
new_path = os.path.normpath(os.path.join(self.path, path))

return self.where(path=new_path)

def as_fs_path(self, preserve_case: bool = False) -> Optional[str]:
"""Return the file system path correspondin with this uri."""
if self.path:
path = _normalize_path(self.path, preserve_case)

if self.authority and len(path) > 1:
path = f"//{self.authority}{path}"

# Remove the leading `/` from windows paths
elif RE_DRIVE_LETTER_PATH.match(path):
path = path[1:]

if IS_WIN:
path = path.replace("/", "\\")

return path

return None

def as_string(self, encode=True) -> str:
"""Return a string representation of this Uri.
Parameters
----------
encode
If ``True`` (the default), encode any special characters.
Returns
-------
str
The string representation of the Uri
"""

# See: https://github.com/python/mypy/issues/10740
encoder: Callable[[str], str] = parse.quote if encode else _replace_chars # type: ignore[assignment]

if authority := self.authority:
usercred, *auth = authority.split("@")
if len(auth) > 0:
*user, cred = usercred.split(":")
if len(user) > 0:
usercred = encoder(":".join(user)) + f":{encoder(cred)}"
else:
usercred = encoder(usercred)
authority = "@".join(auth)
else:
usercred = ""

authority = authority.lower()
*auth, port = authority.split(":")
if len(auth) > 0:
authority = encoder(":".join(auth)) + f":{port}"
else:
authority = encoder(authority)

if usercred:
authority = f"{usercred}@{authority}"

scheme_separator = ""
if authority or self.scheme == "file":
scheme_separator = "//"

if path := self.path:
path = encoder(_normalize_path(path))

if query := self.query:
query = encoder(query)

if fragment := self.fragment:
fragment = encoder(fragment)

parts = [
f"{self.scheme}:",
scheme_separator,
authority if authority else "",
path if path else "",
f"?{query}" if query else "",
f"#{fragment}" if fragment else "",
]
return "".join(parts)


def _replace_chars(segment: str) -> str:
"""Replace a certain subset of characters in a uri segment"""
return segment.replace("#", "%23").replace("?", "%3F")


def _normalize_path(path: str, preserve_case: bool = False) -> str:
"""Normalise the path segment of a Uri.
Parameters
----------
path
The path to normalise.
preserve_case
If ``True``, preserve the case of the drive label on Windows.
If ``False``, the drive label will be lowercased.
Returns
-------
str
The normalised path.
"""

# normalize to fwd-slashes on windows,
# on other systems bwd-slashes are valid
# filename character, eg /f\oo/ba\r.txt
if IS_WIN:
path = path.replace("\\", "/")

# Normalize drive paths to lower case
if (not preserve_case) and (match := RE_DRIVE_LETTER_PATH.match(path)):
path = match.group(1) + match.group(2).lower() + path[match.end() :]

return path


# -- DB Types
#
# These represent the structure of data as stored in the SQLite database
Expand Down
Loading

0 comments on commit 986acef

Please sign in to comment.