Skip to content

Commit

Permalink
Report a reference of all licenses
Browse files Browse the repository at this point in the history
The new --licenses-reference option adds a new "licenses_reference" top
level attribute to a scan seen in the JSON and YAML attribute. This
contains all the license details.

Also bump the WIP version.

Reference: #1697
Signed-off-by: Philippe Ombredanne <[email protected]>
  • Loading branch information
pombredanne committed Feb 10, 2022
1 parent 498b291 commit 83370d2
Show file tree
Hide file tree
Showing 9 changed files with 597 additions and 2 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,13 @@ License detection:
- Small, two-words matches that overlap the previous or next match by
by the word "license" and assimilated are now filtered as false matches.


- The new --licenses-reference option adds a new "licenses_reference" top
level attribute to a scan when using the JSON and YAML outputs. This contains
all the details and the full text of every licenses seen in a file or
package license expression of a scan. This can be added added after the fact
using the --from-json option.

- New experimental support for non-English licenses. Use the command
./scancode --reindex-licenses-for-all-languages to index all known non-English
licenses and rules. From that point on, they will be detected. Because of this
Expand Down
3 changes: 2 additions & 1 deletion setup-mini.cfg
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[metadata]
name = scancode-toolkit-mini
version = 30.1.0
version = 31.0.0
license = Apache-2.0 AND CC-BY-4.0 AND LicenseRef-scancode-other-permissive AND LicenseRef-scancode-other-copyleft

# description must be on ONE line https://github.com/pypa/setuptools/issues/1390
Expand Down Expand Up @@ -176,6 +176,7 @@ scancode_post_scan =
is-license-text = licensedcode.plugin_license_text:IsLicenseText
filter-clues = cluecode.plugin_filter_clues:RedundantCluesFilter
consolidate = summarycode.plugin_consolidate:Consolidator
licenses-reference = licensedcode.plugin_licenses_reference:LicensesReference


# scancode_output_filter is the entry point for filter plugins executed after
Expand Down
3 changes: 2 additions & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[metadata]
name = scancode-toolkit
version = 30.1.0
version = 31.0.0
license = Apache-2.0 AND CC-BY-4.0 AND LicenseRef-scancode-other-permissive AND LicenseRef-scancode-other-copyleft

# description must be on ONE line https://github.com/pypa/setuptools/issues/1390
Expand Down Expand Up @@ -176,6 +176,7 @@ scancode_post_scan =
is-license-text = licensedcode.plugin_license_text:IsLicenseText
filter-clues = cluecode.plugin_filter_clues:RedundantCluesFilter
consolidate = summarycode.plugin_consolidate:Consolidator
licenses-reference = licensedcode.plugin_licenses_reference:LicensesReference


# scancode_output_filter is the entry point for filter plugins executed after
Expand Down
84 changes: 84 additions & 0 deletions src/licensedcode/plugin_licenses_reference.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# ScanCode is a trademark of nexB Inc.
# SPDX-License-Identifier: Apache-2.0
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
# See https://github.com/nexB/scancode-toolkit for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#

import attr
from commoncode.cliutils import PluggableCommandLineOption
from commoncode.cliutils import POST_SCAN_GROUP
from license_expression import Licensing
from plugincode.post_scan import PostScanPlugin
from plugincode.post_scan import post_scan_impl

# Set to True to enable debug tracing
TRACE = False

if TRACE:
import logging
import sys

logger = logging.getLogger(__name__)

def logger_debug(*args):
return logger.debug(' '.join(isinstance(a, str) and a or repr(a) for a in args))

logging.basicConfig(stream=sys.stdout)
logger.setLevel(logging.DEBUG)
else:

def logger_debug(*args):
pass


@post_scan_impl
class LicensesReference(PostScanPlugin):
"""
Add a reference list of all licenses data and text.
"""
codebase_attributes = dict(licenses_reference=attr.ib(default=attr.Factory(list)))

sort_order = 500

options = [
PluggableCommandLineOption(('--licenses-reference',),
is_flag=True, default=False,
help='Include a reference of all the licenses referenced in this '
'scan with the data details and full texts.',
help_group=POST_SCAN_GROUP)
]

def is_enabled(self, licenses_reference, **kwargs):
return licenses_reference

def process_codebase(self, codebase, licenses_reference, **kwargs):
from licensedcode.cache import get_licenses_db
licensing = Licensing()

license_keys = set()

for resource in codebase.walk():
licexps = getattr(resource, 'license_expressions', []) or []
for expression in licexps:
if expression:
license_keys.update(licensing.license_keys(expression))

packages = getattr(codebase, 'packages', []) or []
for package in packages:
# FXIME: license_expression attribute name is changing soon
expression = package.get('license_expression')
if expression:
license_keys.update(licensing.license_keys(expression))

resource.save(codebase)

db = get_licenses_db()
for key in sorted(license_keys):
license_details = db[key].to_dict(
include_ignorables=False,
include_text=True,
)
codebase.attributes.licenses_reference.append(license_details)
Loading

0 comments on commit 83370d2

Please sign in to comment.