Skip to content

Commit

Permalink
feat: environment - gather declared license information according to …
Browse files Browse the repository at this point in the history
…PEP639 (#755)

From python environments, gather additional declared license information
according to [PEP 639](https://peps.python.org/pep-0639) (improving
license clarity with better package metadata).

New CLI switches for `cyclonedx environment`: 
* `--PEP-639`: Enable license gathering according to PEP 639 (improving
license clarity with better package metadata).
  The behavior may change during the draft development of the PEP.
* `--gather-license-texts`: Enable license text gathering.

In current state of implementation, `--gather-license-texts` has effect
only if `--PEP-639` is also given.



---------

Signed-off-by: Jan Kowalleck <[email protected]>
  • Loading branch information
jkowalleck authored Jun 10, 2024
1 parent cba521e commit e9cc805
Show file tree
Hide file tree
Showing 57 changed files with 14,333 additions and 4 deletions.
20 changes: 20 additions & 0 deletions cyclonedx_py/_internal/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
from .utils.cdx import licenses_fixup, make_bom
from .utils.packaging import metadata2extrefs, metadata2licenses, normalize_packagename
from .utils.pep610 import PackageSourceArchive, PackageSourceVcs, packagesource2extref, packagesource4dist
from .utils.pep639 import dist2licenses as dist2licenses_pep639
from .utils.pyproject import pyproject2component, pyproject2dependencies, pyproject_load

if TYPE_CHECKING: # pragma: no cover
Expand Down Expand Up @@ -102,6 +103,16 @@ def make_argument_parser(**kwargs: Any) -> 'ArgumentParser':
• Build an SBOM from PDM environment:
$ %(prog)s "$(pdm info --python)"
""")
p.add_argument('--PEP-639',
action='store_true',
dest='pep639',
help='Enable license gathering according to PEP 639 '
'(improving license clarity with better package metadata).\n'
'The behavior may change during the draft development of the PEP.')
p.add_argument('--gather-license-texts',
action='store_true',
dest='gather_license_texts',
help='Enable license text gathering.')
add_argument_pyproject(p)
add_argument_mc_type(p)
# TODO possible additional switch:
Expand All @@ -118,8 +129,12 @@ def make_argument_parser(**kwargs: Any) -> 'ArgumentParser':

def __init__(self, *,
logger: 'Logger',
pep639: bool,
gather_license_texts: bool,
**__: Any) -> None:
self._logger = logger
self._pep639 = pep639
self._gather_license_texts = gather_license_texts

def __call__(self, *, # type:ignore[override]
python: Optional[str],
Expand Down Expand Up @@ -167,6 +182,11 @@ def __add_components(self, bom: 'Bom',
external_references=metadata2extrefs(dist_meta),
# path of dist-package on disc? naaa... a package may have multiple files/folders on disc
)
if self._pep639:
component.licenses.update(
dist2licenses_pep639(dist,
self._gather_license_texts,
self._logger))
del dist_meta, dist_name, dist_version
self.__component_add_extref_and_purl(component, packagesource4dist(dist))
all_components[normalize_packagename(component.name)] = (
Expand Down
38 changes: 38 additions & 0 deletions cyclonedx_py/_internal/utils/mimetypes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# This file is part of CycloneDX Python Lib
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# SPDX-License-Identifier: Apache-2.0
# Copyright (c) OWASP Foundation. All Rights Reserved.

from mimetypes import guess_type as _stdlib_guess_type
from os.path import splitext
from typing import Optional

_ext_mime_map = {
# https://www.iana.org/assignments/media-types/media-types.xhtml
'md': 'text/markdown',
'txt': 'text/plain',
'rst': 'text/prs.fallenstein.rst',
# add more mime types. pull-requests welcome!
}


def guess_type(file_name: str) -> Optional[str]:
"""
The stdlib `mimetypes.guess_type()` is inconsistent, as it depends heavily on type registry in the env/os.
Therefore, this polyfill exists.
"""
ext = splitext(file_name)[1][1:].lower()
return _ext_mime_map.get(ext) \
or _stdlib_guess_type(file_name)[0]
6 changes: 2 additions & 4 deletions cyclonedx_py/_internal/utils/packaging.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@ def metadata2licenses(metadata: 'PackageMetadata') -> Generator['License', None,
# see spec: https://packaging.python.org/en/latest/specifications/core-metadata/#classifier-multiple-use
classifiers: List[str] = metadata.get_all('Classifier') # type:ignore[assignment]
yield from classifiers2licenses(classifiers, lfac, lack)
for mlicense in metadata.get_all('License', ()):
# see spec: https://packaging.python.org/en/latest/specifications/core-metadata/#license
for mlicense in set(metadata.get_all('License', ())):
# see spec: https://packaging.python.org/en/latest/specifications/core-metadata/#license
if len(mlicense) <= 0:
continue
license = lfac.make_from_string(mlicense,
Expand All @@ -57,8 +57,6 @@ def metadata2licenses(metadata: 'PackageMetadata') -> Generator['License', None,
text=AttachedText(content=mlicense))
else:
yield license
# TODO: iterate over "License-File" declarations and read them
# for mlfile in metadata.get_all('License-File'): ...


def metadata2extrefs(metadata: 'PackageMetadata') -> Generator['ExternalReference', None, None]:
Expand Down
80 changes: 80 additions & 0 deletions cyclonedx_py/_internal/utils/pep639.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
# This file is part of CycloneDX Python Lib
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# SPDX-License-Identifier: Apache-2.0
# Copyright (c) OWASP Foundation. All Rights Reserved.

"""
Functionality related to PEP 639.
See https://peps.python.org/pep-0639/
"""

from base64 import b64encode
from os.path import join
from typing import TYPE_CHECKING, Generator

from cyclonedx.factory.license import LicenseFactory
from cyclonedx.model import AttachedText, Encoding
from cyclonedx.model.license import DisjunctiveLicense, LicenseAcknowledgement

from .mimetypes import guess_type

if TYPE_CHECKING: # pragma: no cover
from importlib.metadata import Distribution
from logging import Logger

from cyclonedx.model.license import License


def dist2licenses(
dist: 'Distribution',
gather_text: bool,
logger: 'Logger'
) -> Generator['License', None, None]:
lfac = LicenseFactory()
lack = LicenseAcknowledgement.DECLARED
metadata = dist.metadata # see https://packaging.python.org/en/latest/specifications/core-metadata/
if (lexp := metadata['License-Expression']) is not None:
# see spec: https://peps.python.org/pep-0639/#add-license-expression-field
yield lfac.make_from_string(lexp,
license_acknowledgement=lack)
if gather_text:
for mlfile in set(metadata.get_all('License-File', ())):
# see spec: https://peps.python.org/pep-0639/#add-license-file-field
# latest spec rev: https://discuss.python.org/t/pep-639-round-3-improving-license-clarity-with-better-package-metadata/53020 # noqa: E501

# per spec > license files are stored in the `.dist-info/licenses/` subdirectory of the produced wheel.
# but in practice, other locations are used, too.
content = dist.read_text(join('licenses', mlfile)) \
or dist.read_text(join('license_files', mlfile)) \
or dist.read_text(mlfile)
if content is None: # pragma: no cover
logger.debug('Error: failed to read license file %r for dist %r',
mlfile, metadata['Name'])
continue
encoding = None
content_type = guess_type(mlfile) or AttachedText.DEFAULT_CONTENT_TYPE
# per default, license files are human-readable texts.
if not content_type.startswith('text/'):
encoding = Encoding.BASE_64
content = b64encode(content.encode('utf-8')).decode('ascii')
yield DisjunctiveLicense(
name=f'declared license file: {mlfile}',
acknowledgement=lack,
text=AttachedText(
content=content,
encoding=encoding,
content_type=content_type
))
5 changes: 5 additions & 0 deletions docs/usage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ The full documentation can be issued by running with ``environment --help``:
$ cyclonedx-py environment --help
usage: cyclonedx-py environment [-h] [-v]
[--PEP-639] [--gather-license-texts]
[--short-PURLs] [--output-reproducible]
[--validate | --no-validate]
[-o <file>] [--sv <version>] [--of <format>]
Expand All @@ -65,6 +66,10 @@ The full documentation can be issued by running with ``environment --help``:
options:
-h, --help show this help message and exit
--PEP-639 Enable license gathering according to PEP 639 (improving license clarity with better package metadata).
The behavior may change during the draft development of the PEP.
--gather-license-texts
Enable license text gathering.
--pyproject <file> Path to the root component's `pyproject.toml` file.
This should point to a file compliant with PEP 621 (storing project metadata).
--mc-type <type> Type of the main component
Expand Down
64 changes: 64 additions & 0 deletions tests/_data/infiles/environment/with-license-pep639/init.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
"""
initialize this testbed.
"""

from os import name as os_name
from os.path import dirname, join
from subprocess import PIPE, CompletedProcess, run # nosec:B404
from sys import argv, executable
from typing import Any
from venv import EnvBuilder

__all__ = ['main']

this_dir = dirname(__file__)
env_dir = join(this_dir, '.venv')
constraint_file = join(this_dir, 'pinning.txt')


def pip_run(*args: str, **kwargs: Any) -> CompletedProcess:
# pip is not API, but a CLI -- call it like that!
call = (
executable, '-m', 'pip',
'--python', env_dir,
*args
)
print('+ ', *call)
res = run(call, **kwargs, cwd=this_dir, shell=False) # nosec:B603
if res.returncode != 0:
raise RuntimeError('process failed')
return res


def pip_install(*args: str) -> None:
pip_run(
'install', '--require-virtualenv', '--no-input', '--progress-bar=off', '--no-color',
'-c', constraint_file, # needed for reproducibility
*args
)


def main() -> None:
EnvBuilder(
system_site_packages=False,
symlinks=os_name != 'nt',
with_pip=False,
).create(env_dir)

pip_install(
# with License-Expression
'attrs',
# with License-File
'boolean.py',
'jsonpointer',
'license_expression',
'lxml',
)


if __name__ == '__main__':
main()
if '--pin' in argv:
res = pip_run('freeze', '--all', '--local', stdout=PIPE)
with open(constraint_file, 'wb') as cf:
cf.write(res.stdout)
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
attrs==23.2.0
boolean.py==4.0
jsonpointer==2.4
license-expression==30.3.0
lxml==5.2.2
15 changes: 15 additions & 0 deletions tests/_data/infiles/environment/with-license-pep639/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
[project]
# https://packaging.python.org/en/latest/specifications/declaring-project-metadata/#declaring-project-metadata
name = "with-extras"
version = "0.1.0"
description = "depenndencies with license declaration accoring to PEP 639"

dependencies = [
# with License-Expression
"attrs",
# with License-File
"boolean.py",
"jsonpointer",
"license_expression",
"lxml",
]

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit e9cc805

Please sign in to comment.