Skip to content

Commit

Permalink
Update Resource model #74
Browse files Browse the repository at this point in the history
    * Update query in migrations

Signed-off-by: Jono Yang <[email protected]>
  • Loading branch information
JonoYang committed May 12, 2023
1 parent 712c218 commit 0729011
Show file tree
Hide file tree
Showing 5 changed files with 198 additions and 11 deletions.
3 changes: 2 additions & 1 deletion packagedb/migrations/0059_compute_package_license_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ def compute_package_declared_license_expression_spdx(apps, schema_editor):

Package = apps.get_model('packagedb', 'Package')
packages = Package.objects.filter(
~Q(declared_license_expression="") & Q(declared_license_expression_spdx="")
~Q(declared_license_expression="") & Q(declared_license_expression_spdx="") |
Q(declared_license_expression__isnull=False) & Q(declared_license_expression_spdx__isnull=True)
)

for package in packages:
Expand Down
41 changes: 41 additions & 0 deletions packagedb/migrations/0061_add_new_scan_fields.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# Generated by Django 4.1.2 on 2023-05-12 17:52

from django.db import migrations, models


class Migration(migrations.Migration):
dependencies = [
("packagedb", "0060_remove_package_contains_source_code_and_more"),
]

operations = [
migrations.AddField(
model_name="resource",
name="detected_license_expression",
field=models.TextField(blank=True, help_text="TODO"),
),
migrations.AddField(
model_name="resource",
name="detected_license_expression_spdx",
field=models.TextField(blank=True, help_text="TODO"),
),
migrations.AddField(
model_name="resource",
name="license_clues",
field=models.JSONField(
blank=True, default=list, help_text="List of license clues."
),
),
migrations.AddField(
model_name="resource",
name="license_detections",
field=models.JSONField(
blank=True, default=list, help_text="List of license detection details."
),
),
migrations.AddField(
model_name="resource",
name="percentage_of_license_text",
field=models.FloatField(blank=True, help_text="TODO", null=True),
),
]
112 changes: 112 additions & 0 deletions packagedb/migrations/0062_compute_resource_license_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
# Generated by Django 4.1.2 on 2023-05-12 17:56

from django.db import migrations
from django.db.models import Q


def compute_resource_detected_license_expression(apps, schema_editor):
"""
Compute Resource `detected_license_expression` and
`detected_license_expression_spdx` from old `license_expressions` field.
From scancode.io
"""
from license_expression import combine_expressions
from licensedcode.cache import build_spdx_license_expression

Resource = apps.get_model("packagedb", "Resource")
resources = Resource.objects.filter(~Q(license_expressions=[]) | Q(license_expressions__isnull=False))

for resource in resources:
license_expression = str(combine_expressions(resource.license_expressions))
license_expression_spdx = build_spdx_license_expression(license_expression)
resource.declared_license_expression = license_expression
resource.declared_license_expression_spdx = license_expression_spdx
resource.save()


def _convert_matches_to_detections(license_matches):
"""
Return a list of scancode v32 LicenseDetection mappings from provided
``license_matches``: a list of the scancode v31 LicenseMatch mappings.
From scancode.io
"""
from license_expression import combine_expressions
from licensedcode.detection import get_uuid_on_content
from commoncode.text import python_safe_name

match_attributes = ["score", "start_line", "end_line", "matched_text"]
rule_attributes = [
"matched_length",
"match_coverage",
"matcher",
"rule_relevance",
]
license_detection = {}
detection_matches = []

for match in license_matches:
detection_match = {}

for attribute in match_attributes:
detection_match[attribute] = match[attribute]
for attribute in rule_attributes:
detection_match[attribute] = match["matched_rule"][attribute]

detection_match["rule_identifier"] = match["matched_rule"]["identifier"]
detection_match["license_expression"] = match["matched_rule"][
"license_expression"
]
detection_match["rule_url"] = None
detection_matches.append(detection_match)

license_expressions = [match["license_expression"] for match in detection_matches]
hashable_details = tuple(
[
(match["score"], match["rule_identifier"], match["matched_text"])
for match in detection_matches
]
)
uuid = get_uuid_on_content(hashable_details)

license_detection["matches"] = detection_matches
license_detection["license_expression"] = str(
combine_expressions(license_expressions)
)
license_detection["identifier"] = "{}-{}".format(
python_safe_name(license_detection["license_expression"]), uuid
)

return [license_detection]


def compute_resource_license_detections(apps, schema_editor):
"""
Compute Resource `license_detections` from old `licenses` field.
From scancode.io
"""
Resource = apps.get_model("packagedb", "Resource")
resources = Resource.objects.filter(~Q(licenses=[]) | Q(licenses__isnull=False))
for resource in resources:
detections = _convert_matches_to_detections(resource.licenses)
resource.license_detections = detections
resource.save()


class Migration(migrations.Migration):
dependencies = [
("packagedb", "0061_add_new_scan_fields"),
]

operations = [
migrations.RunPython(
compute_resource_detected_license_expression,
reverse_code=migrations.RunPython.noop,
),
migrations.RunPython(
compute_resource_license_detections,
reverse_code=migrations.RunPython.noop,
),
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Generated by Django 4.1.2 on 2023-05-12 18:28

from django.db import migrations


class Migration(migrations.Migration):
dependencies = [
("packagedb", "0062_compute_resource_license_data"),
]

operations = [
migrations.RemoveField(
model_name="resource",
name="license_expressions",
),
migrations.RemoveField(
model_name="resource",
name="licenses",
),
]
33 changes: 23 additions & 10 deletions packagedb/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -788,6 +788,29 @@ class ScanFieldsModelMixin(models.Model):
This model is from ScanCode.io
"""

detected_license_expression = models.TextField(
blank=True,
help_text=_("TODO"),
)
detected_license_expression_spdx = models.TextField(
blank=True,
help_text=_("TODO"),
)
license_detections = models.JSONField(
blank=True,
default=list,
help_text=_("List of license detection details."),
)
license_clues = models.JSONField(
blank=True,
default=list,
help_text=_("List of license clues."),
)
percentage_of_license_text = models.FloatField(
blank=True,
null=True,
help_text=_("TODO"),
)
copyrights = models.JSONField(
blank=True,
default=list,
Expand All @@ -807,16 +830,6 @@ class ScanFieldsModelMixin(models.Model):
default=list,
help_text=_("List of detected authors (and related detection details)."),
)
licenses = models.JSONField(
blank=True,
default=list,
help_text=_("List of license detection details."),
)
license_expressions = models.JSONField(
blank=True,
default=list,
help_text=_("List of detected license expressions."),
)
package_data = models.JSONField(
default=list,
blank=True,
Expand Down

0 comments on commit 0729011

Please sign in to comment.