From 7722044f4b0f689eec8cb89afc8d3e4bb8e62908 Mon Sep 17 00:00:00 2001 From: Brett Cannon Date: Wed, 1 Feb 2023 11:25:20 -0800 Subject: [PATCH] Parse raw metadata (#671) Co-authored-by: Donald Stufft Co-authored-by: Paul Moore Co-authored-by: Shantanu <12621235+hauntsaninja@users.noreply.github.com> --- .github/workflows/test.yml | 2 +- docs/index.rst | 1 + docs/metadata.rst | 42 +++ noxfile.py | 4 +- src/packaging/_manylinux.py | 2 + src/packaging/metadata.py | 408 +++++++++++++++++++++++++++++ tests/metadata/everything.metadata | 42 +++ tests/test_manylinux.py | 8 +- tests/test_metadata.py | 249 ++++++++++++++++++ 9 files changed, 749 insertions(+), 9 deletions(-) create mode 100644 docs/metadata.rst create mode 100644 src/packaging/metadata.py create mode 100644 tests/metadata/everything.metadata create mode 100644 tests/test_metadata.py diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index b4b4066a..d3d38710 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -23,7 +23,7 @@ jobs: matrix: os: [Ubuntu, Windows, macOS] python_version: - ["3.7", "3.8", "3.9", "3.10", "3.11", "pypy3.7", "pypy3.8", "pypy3.9"] + ["3.7", "3.8", "3.9", "3.10", "3.11", "pypy3.8", "pypy3.9"] steps: - uses: actions/checkout@v3 diff --git a/docs/index.rst b/docs/index.rst index aafdae83..6850e9e8 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -25,6 +25,7 @@ You can install packaging with ``pip``: specifiers markers requirements + metadata tags utils diff --git a/docs/metadata.rst b/docs/metadata.rst new file mode 100644 index 00000000..b87574cb --- /dev/null +++ b/docs/metadata.rst @@ -0,0 +1,42 @@ +Metadata +======== + +.. currentmodule:: packaging.markers + + +Both `source distributions`_ and `binary distributions` +(_sdists_ and _wheels_, respectively) contain files recording the +`core metadata`_ for the distribution. This information is used for +everything from recording the name of the distribution to the +installation dependencies. + + +Usage +----- + +.. doctest:: + + >>> from packaging.metadata import parse_email + >>> metadata = "Metadata-Version: 2.3\nName: packaging\nVersion: 24.0" + >>> raw, unparsed = parse_email(metadata) + >>> raw["metadata_version"] + '2.3' + >>> raw["name"] + 'packaging' + >>> raw["version"] + '24.0' + + +Reference +--------- + +Low Level Interface +''''''''''''''''''' + +.. automodule:: packaging.metadata + :members: + + +.. _source distributions: https://packaging.python.org/en/latest/specifications/source-distribution-format/ +.. _binary distributions: https://packaging.python.org/en/latest/specifications/binary-distribution-format/ +.. _core metadata: https://packaging.python.org/en/latest/specifications/core-metadata/ diff --git a/noxfile.py b/noxfile.py index 6c480595..da5abc73 100644 --- a/noxfile.py +++ b/noxfile.py @@ -21,9 +21,7 @@ nox.options.reuse_existing_virtualenvs = True -@nox.session( - python=["3.7", "3.8", "3.9", "3.10", "3.11", "pypy3.7", "pypy3.8", "pypy3.9"] -) +@nox.session(python=["3.7", "3.8", "3.9", "3.10", "3.11", "pypy3.8", "pypy3.9"]) def tests(session): def coverage(*args): session.run("python", "-m", "coverage", *args) diff --git a/src/packaging/_manylinux.py b/src/packaging/_manylinux.py index 2f0cc743..449c655b 100644 --- a/src/packaging/_manylinux.py +++ b/src/packaging/_manylinux.py @@ -14,6 +14,8 @@ EF_ARM_ABI_FLOAT_HARD = 0x00000400 +# `os.PathLike` not a generic type until Python 3.9, so sticking with `str` +# as the type for `path` until then. @contextlib.contextmanager def _parse_elf(path: str) -> Generator[Optional[ELFFile], None, None]: try: diff --git a/src/packaging/metadata.py b/src/packaging/metadata.py new file mode 100644 index 00000000..e76a60c3 --- /dev/null +++ b/src/packaging/metadata.py @@ -0,0 +1,408 @@ +import email.feedparser +import email.header +import email.message +import email.parser +import email.policy +import sys +import typing +from typing import Dict, List, Optional, Tuple, Union, cast + +if sys.version_info >= (3, 8): # pragma: no cover + from typing import TypedDict +else: # pragma: no cover + if typing.TYPE_CHECKING: + from typing_extensions import TypedDict + else: + try: + from typing_extensions import TypedDict + except ImportError: + + class TypedDict: + def __init_subclass__(*_args, **_kwargs): + pass + + +# The RawMetadata class attempts to make as few assumptions about the underlying +# serialization formats as possible. The idea is that as long as a serialization +# formats offer some very basic primitives in *some* way then we can support +# serializing to and from that format. +class RawMetadata(TypedDict, total=False): + """A dictionary of raw core metadata. + + Each field in core metadata maps to a key of this dictionary (when data is + provided). The key is lower-case and underscores are used instead of dashes + compared to the equivalent core metadata field. Any core metadata field that + can be specified multiple times or can hold multiple values in a single + field have a key with a plural name. + + Core metadata fields that can be specified multiple times are stored as a + list or dict depending on which is appropriate for the field. Any fields + which hold multiple values in a single field are stored as a list. + + """ + + # Metadata 1.0 - PEP 241 + metadata_version: str + name: str + version: str + platforms: List[str] + summary: str + description: str + keywords: List[str] + home_page: str + author: str + author_email: str + license: str + + # Metadata 1.1 - PEP 314 + supported_platforms: List[str] + download_url: str + classifiers: List[str] + requires: List[str] + provides: List[str] + obsoletes: List[str] + + # Metadata 1.2 - PEP 345 + maintainer: str + maintainer_email: str + requires_dist: List[str] + provides_dist: List[str] + obsoletes_dist: List[str] + requires_python: str + requires_external: List[str] + project_urls: Dict[str, str] + + # Metadata 2.0 + # PEP 426 attempted to completely revamp the metadata format + # but got stuck without ever being able to build consensus on + # it and ultimately ended up withdrawn. + # + # However, a number of tools had started emiting METADATA with + # `2.0` Metadata-Version, so for historical reasons, this version + # was skipped. + + # Metadata 2.1 - PEP 566 + description_content_type: str + provides_extra: List[str] + + # Metadata 2.2 - PEP 643 + dynamic: List[str] + + # Metadata 2.3 - PEP 685 + # No new fields were added in PEP 685, just some edge case were + # tightened up to provide better interoptability. + + +_STRING_FIELDS = { + "author", + "author_email", + "description", + "description_content_type", + "download_url", + "home_page", + "license", + "maintainer", + "maintainer_email", + "metadata_version", + "name", + "requires_python", + "summary", + "version", +} + +_LIST_STRING_FIELDS = { + "classifiers", + "dynamic", + "obsoletes", + "obsoletes_dist", + "platforms", + "provides", + "provides_dist", + "provides_extra", + "requires", + "requires_dist", + "requires_external", + "supported_platforms", +} + + +def _parse_keywords(data: str) -> List[str]: + """Split a string of comma-separate keyboards into a list of keywords.""" + return [k.strip() for k in data.split(",")] + + +def _parse_project_urls(data: List[str]) -> Dict[str, str]: + """Parse a list of label/URL string pairings separated by a comma.""" + urls = {} + for pair in data: + # Our logic is slightly tricky here as we want to try and do + # *something* reasonable with malformed data. + # + # The main thing that we have to worry about, is data that does + # not have a ',' at all to split the label from the Value. There + # isn't a singular right answer here, and we will fail validation + # later on (if the caller is validating) so it doesn't *really* + # matter, but since the missing value has to be an empty str + # and our return value is dict[str, str], if we let the key + # be the missing value, then they'd have multiple '' values that + # overwrite each other in a accumulating dict. + # + # The other potentional issue is that it's possible to have the + # same label multiple times in the metadata, with no solid "right" + # answer with what to do in that case. As such, we'll do the only + # thing we can, which is treat the field as unparseable and add it + # to our list of unparsed fields. + parts = [p.strip() for p in pair.split(",", 1)] + parts.extend([""] * (max(0, 2 - len(parts)))) # Ensure 2 items + + # TODO: The spec doesn't say anything about if the keys should be + # considered case sensitive or not... logically they should + # be case-preserving and case-insensitive, but doing that + # would open up more cases where we might have duplicate + # entries. + label, url = parts + if label in urls: + # The label already exists in our set of urls, so this field + # is unparseable, and we can just add the whole thing to our + # unparseable data and stop processing it. + raise KeyError("duplicate labels in project urls") + urls[label] = url + + return urls + + +def _get_payload(msg: email.message.Message, source: Union[bytes, str]) -> str: + """Get the body of the message.""" + # If our source is a str, then our caller has managed encodings for us, + # and we don't need to deal with it. + if isinstance(source, str): + payload: str = msg.get_payload() + return payload + # If our source is a bytes, then we're managing the encoding and we need + # to deal with it. + else: + bpayload: bytes = msg.get_payload(decode=True) + try: + return bpayload.decode("utf8", "strict") + except UnicodeDecodeError: + raise ValueError("payload in an invalid encoding") + + +# The various parse_FORMAT functions here are intended to be as lenient as +# possible in their parsing, while still returning a correctly typed +# RawMetadata. +# +# To aid in this, we also generally want to do as little touching of the +# data as possible, except where there are possibly some historic holdovers +# that make valid data awkward to work with. +# +# While this is a lower level, intermediate format than our ``Metadata`` +# class, some light touch ups can make a massive difference in usability. + +# Map METADATA fields to RawMetadata. +_EMAIL_TO_RAW_MAPPING = { + "author": "author", + "author-email": "author_email", + "classifier": "classifiers", + "description": "description", + "description-content-type": "description_content_type", + "download-url": "download_url", + "dynamic": "dynamic", + "home-page": "home_page", + "keywords": "keywords", + "license": "license", + "maintainer": "maintainer", + "maintainer-email": "maintainer_email", + "metadata-version": "metadata_version", + "name": "name", + "obsoletes": "obsoletes", + "obsoletes-dist": "obsoletes_dist", + "platform": "platforms", + "project-url": "project_urls", + "provides": "provides", + "provides-dist": "provides_dist", + "provides-extra": "provides_extra", + "requires": "requires", + "requires-dist": "requires_dist", + "requires-external": "requires_external", + "requires-python": "requires_python", + "summary": "summary", + "supported-platform": "supported_platforms", + "version": "version", +} + + +def parse_email(data: Union[bytes, str]) -> Tuple[RawMetadata, Dict[str, List[str]]]: + """Parse a distribution's metadata. + + This function returns a two-item tuple of dicts. The first dict is of + recognized fields from the core metadata specification. Fields that can be + parsed and translated into Python's built-in types are converted + appropriately. All other fields are left as-is. Fields that are allowed to + appear multiple times are stored as lists. + + The second dict contains all other fields from the metadata. This includes + any unrecognized fields. It also includes any fields which are expected to + be parsed into a built-in type but were not formatted appropriately. Finally, + any fields that are expected to appear only once but are repeated are + included in this dict. + + """ + raw: Dict[str, Union[str, List[str], Dict[str, str]]] = {} + unparsed: Dict[str, List[str]] = {} + + if isinstance(data, str): + parsed = email.parser.Parser(policy=email.policy.compat32).parsestr(data) + else: + parsed = email.parser.BytesParser(policy=email.policy.compat32).parsebytes(data) + + # We have to wrap parsed.keys() in a set, because in the case of multiple + # values for a key (a list), the key will appear multiple times in the + # list of keys, but we're avoiding that by using get_all(). + for name in frozenset(parsed.keys()): + # Header names in RFC are case insensitive, so we'll normalize to all + # lower case to make comparisons easier. + name = name.lower() + + # We use get_all() here, even for fields that aren't multiple use, + # because otherwise someone could have e.g. two Name fields, and we + # would just silently ignore it rather than doing something about it. + headers = parsed.get_all(name) + + # The way the email module works when parsing bytes is that it + # unconditionally decodes the bytes as ascii using the surrogateescape + # handler. When you pull that data back out (such as with get_all() ), + # it looks to see if the str has any surrogate escapes, and if it does + # it wraps it in a Header object instead of returning the string. + # + # As such, we'll look for those Header objects, and fix up the encoding. + value = [] + # Flag if we have run into any issues processing the headers, thus + # signalling that the data belongs in 'unparsed'. + valid_encoding = True + for h in headers: + # It's unclear if this can return more types than just a Header or + # a str, so we'll just assert here to make sure. + assert isinstance(h, (email.header.Header, str)) + + # If it's a header object, we need to do our little dance to get + # the real data out of it. In cases where there is invalid data + # we're going to end up with mojibake, but there's no obvious, good + # way around that without reimplementing parts of the Header object + # ourselves. + # + # That should be fine since, if mojibacked happens, this key is + # going into the unparsed dict anyways. + if isinstance(h, email.header.Header): + # The Header object stores it's data as chunks, and each chunk + # can be independently encoded, so we'll need to check each + # of them. + chunks: List[Tuple[bytes, Optional[str]]] = [] + for bin, encoding in email.header.decode_header(h): + try: + bin.decode("utf8", "strict") + except UnicodeDecodeError: + # Enable mojibake. + encoding = "latin1" + valid_encoding = False + else: + encoding = "utf8" + chunks.append((bin, encoding)) + + # Turn our chunks back into a Header object, then let that + # Header object do the right thing to turn them into a + # string for us. + value.append(str(email.header.make_header(chunks))) + # This is already a string, so just add it. + else: + value.append(h) + + # We've processed all of our values to get them into a list of str, + # but we may have mojibake data, in which case this is an unparsed + # field. + if not valid_encoding: + unparsed[name] = value + continue + + raw_name = _EMAIL_TO_RAW_MAPPING.get(name) + if raw_name is None: + # This is a bit of a weird situation, we've encountered a key that + # we don't know what it means, so we don't know whether it's meant + # to be a list or not. + # + # Since we can't really tell one way or another, we'll just leave it + # as a list, even though it may be a single item list, because that's + # what makes the most sense for email headers. + unparsed[name] = value + continue + + # If this is one of our string fields, then we'll check to see if our + # value is a list of a single item. If it is then we'll assume that + # it was emitted as a single string, and unwrap the str from inside + # the list. + # + # If it's any other kind of data, then we haven't the faintest clue + # what we should parse it as, and we have to just add it to our list + # of unparsed stuff. + if raw_name in _STRING_FIELDS and len(value) == 1: + raw[raw_name] = value[0] + # If this is one of our list of string fields, then we can just assign + # the value, since email *only* has strings, and our get_all() call + # above ensures that this is a list. + elif raw_name in _LIST_STRING_FIELDS: + raw[raw_name] = value + # Special Case: Keywords + # The keywords field is implemented in the metadata spec as a str, + # but it conceptually is a list of strings, and is serialized using + # ", ".join(keywords), so we'll do some light data massaging to turn + # this into what it logically is. + elif raw_name == "keywords" and len(value) == 1: + raw[raw_name] = _parse_keywords(value[0]) + # Special Case: Project-URL + # The project urls is implemented in the metadata spec as a list of + # specially-formatted strings that represent a key and a value, which + # is fundamentally a mapping, however the email format doesn't support + # mappings in a sane way, so it was crammed into a list of strings + # instead. + # + # We will do a little light data massaging to turn this into a map as + # it logically should be. + elif raw_name == "project_urls": + try: + raw[raw_name] = _parse_project_urls(value) + except KeyError: + unparsed[name] = value + # Nothing that we've done has managed to parse this, so it'll just + # throw it in our unparseable data and move on. + else: + unparsed[name] = value + + # We need to support getting the Description from the message payload in + # addition to getting it from the the headers. This does mean, though, there + # is the possibility of it being set both ways, in which case we put both + # in 'unparsed' since we don't know which is right. + try: + payload = _get_payload(parsed, data) + except ValueError: + unparsed.setdefault("description", []).append( + parsed.get_payload(decode=isinstance(data, bytes)) + ) + else: + if payload: + # Check to see if we've already got a description, if so then both + # it, and this body move to unparseable. + if "description" in raw: + description_header = cast(str, raw.pop("description")) + unparsed.setdefault("description", []).extend( + [description_header, payload] + ) + elif "description" in unparsed: + unparsed["description"].append(payload) + else: + raw["description"] = payload + + # We need to cast our `raw` to a metadata, because a TypedDict only support + # literal key names, but we're computing our key names on purpose, but the + # way this function is implemented, our `TypedDict` can only have valid key + # names. + return cast(RawMetadata, raw), unparsed diff --git a/tests/metadata/everything.metadata b/tests/metadata/everything.metadata new file mode 100644 index 00000000..5412a083 --- /dev/null +++ b/tests/metadata/everything.metadata @@ -0,0 +1,42 @@ +Metadata-Version: 2.3 +Name: BeagleVote +Version: 1.0a2 +Platform: ObscureUnix +Platform: RareDOS +Supported-Platform: RedHat 7.2 +Supported-Platform: i386-win32-2791 +Summary: A module for collecting votes from beagles. +Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM +Keywords: dog,puppy,voting,election +Home-page: http://www.example.com/~cschultz/bvote/ +Download-URL: …/BeagleVote-0.45.tgz +Author: C. Schultz, Universal Features Syndicate, + Los Angeles, CA +Author-email: "C. Schultz" +Maintainer: C. Schultz, Universal Features Syndicate, + Los Angeles, CA +Maintainer-email: "C. Schultz" +License: This software may only be obtained by sending the + author a postcard, and then the user promises not + to redistribute it. +Classifier: Development Status :: 4 - Beta +Classifier: Environment :: Console (Text Based) +Provides-Extra: pdf +Requires-Dist: reportlab; extra == 'pdf' +Requires-Dist: pkginfo +Requires-Dist: PasteDeploy +Requires-Dist: zope.interface (>3.5.0) +Requires-Dist: pywin32 >1.0; sys_platform == 'win32' +Requires-Python: >=3 +Requires-External: C +Requires-External: libpng (>=1.5) +Requires-External: make; sys_platform != "win32" +Project-URL: Bug Tracker, http://bitbucket.org/tarek/distribute/issues/ +Project-URL: Documentation, https://example.com/BeagleVote +Provides-Dist: OtherProject +Provides-Dist: AnotherProject (3.4) +Provides-Dist: virtual_package; python_version >= "3.4" +Dynamic: Obsoletes-Dist +ThisIsNotReal: Hello! + +This description intentionally left blank. diff --git a/tests/test_manylinux.py b/tests/test_manylinux.py index dafdfc3d..3561bb99 100644 --- a/tests/test_manylinux.py +++ b/tests/test_manylinux.py @@ -3,6 +3,7 @@ except ImportError: ctypes = None import os +import pathlib import platform import sys import types @@ -169,11 +170,8 @@ def test_glibc_version_string_none(monkeypatch): ) def test_parse_elf_bad_executable(monkeypatch, content): if content: - path = os.path.join( - os.path.dirname(__file__), - "manylinux", - f"hello-world-{content}", - ) + path = pathlib.Path(__file__).parent / "manylinux" / f"hello-world-{content}" + path = os.fsdecode(path) else: path = None with _parse_elf(path) as ef: diff --git a/tests/test_metadata.py b/tests/test_metadata.py new file mode 100644 index 00000000..22fe76ba --- /dev/null +++ b/tests/test_metadata.py @@ -0,0 +1,249 @@ +import pathlib + +import pytest + +from packaging import metadata + +_RAW_TO_EMAIL_MAPPING = { + raw: email for email, raw in metadata._EMAIL_TO_RAW_MAPPING.items() +} + + +class TestRawMetadata: + @pytest.mark.parametrize("raw_field", metadata._STRING_FIELDS) + def test_non_repeating_fields_only_once(self, raw_field): + data = "VaLuE" + header_field = _RAW_TO_EMAIL_MAPPING[raw_field] + single_header = f"{header_field}: {data}" + raw, unparsed = metadata.parse_email(single_header) + assert not unparsed + assert len(raw) == 1 + assert raw_field in raw + assert raw[raw_field] == data + + @pytest.mark.parametrize("raw_field", metadata._STRING_FIELDS) + def test_non_repeating_fields_repeated(self, raw_field): + header_field = _RAW_TO_EMAIL_MAPPING[raw_field] + data = "VaLuE" + single_header = f"{header_field}: {data}" + repeated_header = "\n".join([single_header] * 2) + raw, unparsed = metadata.parse_email(repeated_header) + assert not raw + assert len(unparsed) == 1 + assert header_field in unparsed + assert unparsed[header_field] == [data] * 2 + + @pytest.mark.parametrize("raw_field", metadata._LIST_STRING_FIELDS) + def test_repeating_fields_only_once(self, raw_field): + data = "VaLuE" + header_field = _RAW_TO_EMAIL_MAPPING[raw_field] + single_header = f"{header_field}: {data}" + raw, unparsed = metadata.parse_email(single_header) + assert not unparsed + assert len(raw) == 1 + assert raw_field in raw + assert raw[raw_field] == [data] + + @pytest.mark.parametrize("raw_field", metadata._LIST_STRING_FIELDS) + def test_repeating_fields_repeated(self, raw_field): + header_field = _RAW_TO_EMAIL_MAPPING[raw_field] + data = "VaLuE" + single_header = f"{header_field}: {data}" + repeated_header = "\n".join([single_header] * 2) + raw, unparsed = metadata.parse_email(repeated_header) + assert not unparsed + assert len(raw) == 1 + assert raw_field in raw + assert raw[raw_field] == [data] * 2 + + @pytest.mark.parametrize( + ["given", "expected"], + [ + ("A", ["A"]), + ("A ", ["A"]), + (" A", ["A"]), + ("A, B", ["A", "B"]), + ("A,B", ["A", "B"]), + (" A, B", ["A", "B"]), + ("A,B ", ["A", "B"]), + ("A B", ["A B"]), + ], + ) + def test_keywords(self, given, expected): + header = f"Keywords: {given}" + raw, unparsed = metadata.parse_email(header) + assert not unparsed + assert len(raw) == 1 + assert "keywords" in raw + assert raw["keywords"] == expected + + @pytest.mark.parametrize( + ["given", "expected"], + [ + ("", {"": ""}), + ("A", {"A": ""}), + ("A,B", {"A": "B"}), + ("A, B", {"A": "B"}), + (" A,B", {"A": "B"}), + ("A,B ", {"A": "B"}), + ("A,B,C", {"A": "B,C"}), + ], + ) + def test_project_urls_parsing(self, given, expected): + header = f"project-url: {given}" + raw, unparsed = metadata.parse_email(header) + assert not unparsed + assert len(raw) == 1 + assert "project_urls" in raw + assert raw["project_urls"] == expected + + def test_duplicate_project_urls(self): + header = "project-url: A, B\nproject-url: A, C" + raw, unparsed = metadata.parse_email(header) + assert not raw + assert len(unparsed) == 1 + assert "project-url" in unparsed + assert unparsed["project-url"] == ["A, B", "A, C"] + + def test_str_input(self): + name = "Tarek Ziadé" + header = f"author: {name}" + raw, unparsed = metadata.parse_email(header) + assert not unparsed + assert len(raw) == 1 + assert "author" in raw + assert raw["author"] == name + + def test_bytes_input(self): + name = "Tarek Ziadé" + header = f"author: {name}".encode() + raw, unparsed = metadata.parse_email(header) + assert not unparsed + assert len(raw) == 1 + assert "author" in raw + assert raw["author"] == name + + def test_header_mojibake(self): + value = "\xc0msterdam" + header_name = "value" + header_bytes = f"{header_name}: {value}".encode("latin1") + raw, unparsed = metadata.parse_email(header_bytes) + # Sanity check + with pytest.raises(UnicodeDecodeError): + header_bytes.decode("utf-8") + assert not raw + assert len(unparsed) == 1 + assert header_name in unparsed + assert unparsed[header_name] == [value] + + @pytest.mark.parametrize( + ["given"], [("hello",), ("description: hello",), (b"hello",)] + ) + def test_description(self, given): + raw, unparsed = metadata.parse_email(given) + assert not unparsed + assert len(raw) == 1 + assert "description" in raw + assert raw["description"] == "hello" + + def test_description_non_utf8(self): + header = "\xc0msterdam" + header_bytes = header.encode("latin1") + raw, unparsed = metadata.parse_email(header_bytes) + assert not raw + assert len(unparsed) == 1 + assert "description" in unparsed + assert unparsed["description"] == [header_bytes] + + @pytest.mark.parametrize( + ["given", "expected"], + [ + ("description: 1\ndescription: 2", ["1", "2"]), + ("description: 1\n\n2", ["1", "2"]), + ("description: 1\ndescription: 2\n\n3", ["1", "2", "3"]), + ], + ) + def test_description_multiple(self, given, expected): + raw, unparsed = metadata.parse_email(given) + assert not raw + assert len(unparsed) == 1 + assert "description" in unparsed + assert unparsed["description"] == expected + + def test_lowercase_keys(self): + header = "AUTHOR: Tarek Ziadé\nWhatever: Else" + raw, unparsed = metadata.parse_email(header) + assert len(raw) == 1 + assert "author" in raw + assert len(unparsed) == 1 + assert "whatever" in unparsed + + def test_complete(self): + """Test all fields (except `Obsoletes-Dist`). + + `Obsoletes-Dist` was sacrificed to provide a value for `Dynamic`. + """ + path = pathlib.Path(__file__).parent / "metadata" / "everything.metadata" + with path.open("r", encoding="utf-8") as file: + metadata_contents = file.read() + raw, unparsed = metadata.parse_email(metadata_contents) + assert len(unparsed) == 1 + assert unparsed["thisisnotreal"] == ["Hello!"] + assert len(raw) == 24 + assert raw["metadata_version"] == "2.3" + assert raw["name"] == "BeagleVote" + assert raw["version"] == "1.0a2" + assert raw["platforms"] == ["ObscureUnix", "RareDOS"] + assert raw["supported_platforms"] == ["RedHat 7.2", "i386-win32-2791"] + assert raw["summary"] == "A module for collecting votes from beagles." + assert ( + raw["description_content_type"] + == "text/markdown; charset=UTF-8; variant=GFM" + ) + assert raw["keywords"] == ["dog", "puppy", "voting", "election"] + assert raw["home_page"] == "http://www.example.com/~cschultz/bvote/" + assert raw["download_url"] == "…/BeagleVote-0.45.tgz" + assert raw["author"] == ( + "C. Schultz, Universal Features Syndicate,\n" + " Los Angeles, CA " + ) + assert raw["author_email"] == '"C. Schultz" ' + assert raw["maintainer"] == ( + "C. Schultz, Universal Features Syndicate,\n" + " Los Angeles, CA " + ) + assert raw["maintainer_email"] == '"C. Schultz" ' + assert raw["license"] == ( + "This software may only be obtained by sending the\n" + " author a postcard, and then the user promises not\n" + " to redistribute it." + ) + assert raw["classifiers"] == [ + "Development Status :: 4 - Beta", + "Environment :: Console (Text Based)", + ] + assert raw["provides_extra"] == ["pdf"] + assert raw["requires_dist"] == [ + "reportlab; extra == 'pdf'", + "pkginfo", + "PasteDeploy", + "zope.interface (>3.5.0)", + "pywin32 >1.0; sys_platform == 'win32'", + ] + assert raw["requires_python"] == ">=3" + assert raw["requires_external"] == [ + "C", + "libpng (>=1.5)", + 'make; sys_platform != "win32"', + ] + assert raw["project_urls"] == { + "Bug Tracker": "http://bitbucket.org/tarek/distribute/issues/", + "Documentation": "https://example.com/BeagleVote", + } + assert raw["provides_dist"] == [ + "OtherProject", + "AnotherProject (3.4)", + 'virtual_package; python_version >= "3.4"', + ] + assert raw["dynamic"] == ["Obsoletes-Dist"] + assert raw["description"] == "This description intentionally left blank.\n"