forked from python-poetry/poetry-core
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Improve parsing of author information
Instead of relying on regular expressions, this patch leverages Python’s builtin `email.utils.parseaddr()` functionality to parse an RFC-822-compliant email address string into its name and address parts. This should also resolve issues with special characters in the name part; see for example Poetry issues python-poetry#370 and python-poetry#798. python-poetry/poetry#370 python-poetry/poetry#798
- Loading branch information
Showing
4 changed files
with
93 additions
and
18 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,6 +16,7 @@ | |
from poetry.core.packages.dependency_group import MAIN_GROUP | ||
from poetry.core.packages.specification import PackageSpecification | ||
from poetry.core.packages.utils.utils import create_nested_marker | ||
from poetry.core.utils.helpers import parse_author | ||
from poetry.core.version.exceptions import InvalidVersion | ||
from poetry.core.version.markers import parse_marker | ||
|
||
|
@@ -32,6 +33,8 @@ | |
|
||
T = TypeVar("T", bound="Package") | ||
|
||
# TODO: once poetry.console.commands.init.InitCommand._validate_author | ||
# uses poetry.core.utils.helpers.parse_author, this can be removed. | ||
AUTHOR_REGEX = re.compile(r"(?u)^(?P<name>[- .,\w\d'’\"():&]+)(?: <(?P<email>.+?)>)?$") | ||
|
||
|
||
|
@@ -231,34 +234,28 @@ def _get_author(self) -> dict[str, str | None]: | |
if not self._authors: | ||
return {"name": None, "email": None} | ||
|
||
m = AUTHOR_REGEX.match(self._authors[0]) | ||
name, email = parse_author(self._authors[0]) | ||
|
||
if m is None: | ||
if not name or not email: | ||
raise ValueError( | ||
"Invalid author string. Must be in the format: " | ||
"John Smith <[email protected]>" | ||
) | ||
|
||
name = m.group("name") | ||
email = m.group("email") | ||
|
||
return {"name": name, "email": email} | ||
|
||
def _get_maintainer(self) -> dict[str, str | None]: | ||
if not self._maintainers: | ||
return {"name": None, "email": None} | ||
|
||
m = AUTHOR_REGEX.match(self._maintainers[0]) | ||
name, email = parse_author(self._maintainers[0]) | ||
|
||
if m is None: | ||
if not name or not email: | ||
raise ValueError( | ||
"Invalid maintainer string. Must be in the format: " | ||
"John Smith <[email protected]>" | ||
) | ||
|
||
name = m.group("name") | ||
email = m.group("email") | ||
|
||
return {"name": name, "email": email} | ||
|
||
@property | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,6 +8,7 @@ | |
import warnings | ||
|
||
from contextlib import contextmanager | ||
from email.utils import parseaddr | ||
from pathlib import Path | ||
from typing import Any | ||
from typing import Iterator | ||
|
@@ -105,3 +106,26 @@ def readme_content_type(path: str | Path) -> str: | |
return "text/markdown" | ||
else: | ||
return "text/plain" | ||
|
||
|
||
def parse_author(address: str) -> tuple[str | None, str | None]: | ||
"""Parse name and address parts from an email address string. | ||
>>> parse_author("John Doe <[email protected]>") | ||
('John Doe', '[email protected]') | ||
.. note:: | ||
If the input string does not contain an ``@`` character, it is | ||
assumed that it represents only a name without an email address. | ||
:param address: the email address string to parse. | ||
:return: a 2-tuple with the parsed name and email address. If a | ||
part is missing, ``None`` will be returned in its place. | ||
""" | ||
if "@" not in address: | ||
return address, None | ||
name, email = parseaddr(address) | ||
if not name and "@" not in email: | ||
return email, None | ||
return name or None, email or None |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,6 +8,7 @@ | |
import pytest | ||
|
||
from poetry.core.utils.helpers import combine_unicode | ||
from poetry.core.utils.helpers import parse_author | ||
from poetry.core.utils.helpers import parse_requires | ||
from poetry.core.utils.helpers import readme_content_type | ||
from poetry.core.utils.helpers import temporary_directory | ||
|
@@ -118,3 +119,60 @@ def test_utils_helpers_readme_content_type( | |
readme: str | Path, content_type: str | ||
) -> None: | ||
assert readme_content_type(readme) == content_type | ||
|
||
|
||
def test_utils_helpers_parse_author(): | ||
"""Test the :func:`parse_author` function.""" | ||
|
||
# Verify the (probable) default use case | ||
name, email = parse_author("John Doe <[email protected]>") | ||
assert name == "John Doe" | ||
assert email == "[email protected]" | ||
|
||
# Name only | ||
name, email = parse_author("John Doe") | ||
assert name == "John Doe" | ||
assert email is None | ||
|
||
# Name with a “special” character + email address | ||
name, email = parse_author("R&D <[email protected]>") | ||
assert name == "R&D" | ||
assert email == "[email protected]" | ||
|
||
# Name with a “special” character only | ||
name, email = parse_author("R&D") | ||
assert name == "R&D" | ||
assert email is None | ||
|
||
# Name with fancy unicode character + email address | ||
name, email = parse_author("my·fancy corp <[email protected]>") | ||
assert name == "my·fancy corp" | ||
assert email == "[email protected]" | ||
|
||
# Name with fancy unicode character only | ||
name, email = parse_author("my·fancy corp") | ||
assert name == "my·fancy corp" | ||
assert email is None | ||
|
||
# Email address only, wrapped in angular brackets | ||
name, email = parse_author("<[email protected]>") | ||
assert name is None | ||
assert email == "[email protected]" | ||
|
||
# Email address only | ||
name, email = parse_author("[email protected]") | ||
assert name is None | ||
assert email == "[email protected]" | ||
|
||
# Non-RFC-conform cases with unquoted commas | ||
name, email = parse_author("asf,[email protected]") | ||
assert name == "asf" | ||
assert email is None | ||
|
||
name, email = parse_author("asf,<[email protected]>") | ||
assert name == "asf" | ||
assert email is None | ||
|
||
name, email = parse_author("asf, [email protected]") | ||
assert name == "asf" | ||
assert email is None |