Skip to content

Commit

Permalink
feat: add citation.cff and LICENSE to archive packages and git repos
Browse files Browse the repository at this point in the history
currently these are not retroactively inserted into archive packages
since that would require rebuilding everything. Generating git repos,
however, will add them if they are missing

** includes an experimental refactor of metadata transformations which
is used to implement the citation file format generation

resolves comses/planning#234
  • Loading branch information
sgfost committed Aug 6, 2024
1 parent a12a8c1 commit 4334357
Show file tree
Hide file tree
Showing 8 changed files with 301 additions and 18 deletions.
76 changes: 63 additions & 13 deletions django/library/fs.py
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,8 @@ def __init__(
self.version_number = codebase_release.version_number
self.release_id = codebase_release.id
self.codemeta = codebase_release.codemeta
self.citation_cff = codebase_release.citation_cff
self.license_text = codebase_release.license_text
self.bagit_info = codebase_release.bagit_info
self.mimetype_mismatch_message_level = mimetype_mismatch_message_level

Expand Down Expand Up @@ -414,6 +416,14 @@ def sip_dir(self):
def codemeta_path(self):
return self.sip_contents_dir.joinpath("codemeta.json")

@property
def citation_cff_path(self):
return self.sip_contents_dir.joinpath("CITATION.cff")

@property
def license_path(self):
return self.sip_contents_dir.joinpath("LICENSE")

@property
def sip_contents_dir(self):
return self.sip_dir.joinpath("data")
Expand Down Expand Up @@ -516,8 +526,6 @@ def initialize(
def create_or_update_codemeta(self, force=False):
"""
Returns True if a codemeta.json file was created, False otherwise
:param metadata: an optional dictionary with codemeta properties
:return:
"""
path = self.codemeta_path
if force or not path.exists():
Expand All @@ -526,18 +534,44 @@ def create_or_update_codemeta(self, force=False):
return True
return False

def create_or_update_citation_cff(self, force=False):
"""
Returns True if a CITATION.cff file was created, False otherwise
"""
path = self.citation_cff_path
if force or not path.exists():
with path.open(mode="w", encoding="utf-8") as citation_out:
citation_out.write(self.citation_cff.build().to_yaml())
return True
return False

def create_or_update_license(self, force=False):
"""
Returns True if a LICENSE file was created, False otherwise
"""
path = self.license_path
if force or not path.exists():
with path.open(mode="w", encoding="utf-8") as license_out:
license_out.write(self.license_text)
return True
return False

def get_codemeta_json(self):
return self.codemeta.to_json()

def build_published_archive(self, force=False):
self.create_or_update_codemeta(force=force)
self.create_or_update_citation_cff(force=force)
self.create_or_update_license(force=force)
bag = self.get_or_create_sip_bag(self.bagit_info)
self.validate_bagit(bag)
self.build_aip()
self.build_archive(force=force)

def build_review_archive(self):
self.create_or_update_codemeta(force=True)
self.create_or_update_citation_cff(force=True)
self.create_or_update_license(force=True)
shutil.make_archive(
str(self.review_archivepath.with_suffix("")),
format="zip",
Expand Down Expand Up @@ -739,6 +773,8 @@ def build_archive(self, force=False):
def rebuild(self):
msgs = self.build_sip()
self.create_or_update_codemeta(force=True)
self.create_or_update_citation_cff(force=True)
self.create_or_update_license(force=True)
self.build_archive(force=True)
return msgs

Expand Down Expand Up @@ -824,6 +860,11 @@ def add_release_files(self, release):
shutil.rmtree(item)
else:
item.unlink()
self.repo.index.remove(
[str(item.relative_to(self.repo_dir))],
working_tree=True,
r=True,
)
# copy over files from the sip storage and add to the index
for file in sip_storage.list(absolute=True):
rel_path = file.relative_to(sip_storage.location)
Expand All @@ -835,19 +876,20 @@ def add_release_files(self, release):
def add_release_meta_files(self, release):
"""
helper for adding all 'meta' files (readme, citation, license) for a release
TODO: missing logic for building citation and license files (should also be included in our normal archives)
if they do not already exist
"""
# FIXME: README should be codebase metadata, so description + title + image, etc.
self._add_meta_file("README.md", release.release_notes.raw)
self._add_meta_file("CITATION.cff", "nothing yet")
self._add_meta_file("LICENSE", "nothing yet")
self._add_meta_file_if_missing("README.md", self.generate_readme())
self._add_meta_file_if_missing(
"CITATION.cff", release.citation_cff.build().to_yaml()
)
self._add_meta_file_if_missing("LICENSE", release.license_text)

def _add_meta_file(self, filename, content):
def _add_meta_file_if_missing(self, filename, content):
dest_path = self.repo_dir / filename
with dest_path.open("w") as f:
f.write(content + "\n")
self.repo.index.add([filename])
if not dest_path.exists():
with dest_path.open("w") as f:
f.write(content + "\n")
self.repo.index.add([filename])

def commit_release(self, release, tag=True):
"""
Expand Down Expand Up @@ -908,7 +950,9 @@ def build(self):
try:
releases = self.codebase.ordered_releases_list()
if not releases:
raise ValidationError("Must have at least one public release to build from")
raise ValidationError(
"Must have at least one public release to build from"
)
with self.use_temporary_repo():
self.initialize()
for release in releases:
Expand Down Expand Up @@ -947,6 +991,12 @@ def dirs_equal(self, dir1: Path, dir2: Path, ignore=[".git"]):
return False
return True

def generate_readme(self):
"""
create a README.md file for the repository based on the codebase metadata
"""
return f"# {self.codebase.title}\n\n" f"{self.codebase.description.raw}\n"


class ArchiveExtractor:
def __init__(self, sip_storage: CodebaseReleaseSipStorage):
Expand Down
45 changes: 45 additions & 0 deletions django/library/migrations/0030_license_text.py

Large diffs are not rendered by default.

37 changes: 32 additions & 5 deletions django/library/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import logging
import os
import pathlib
from string import Template
import uuid
from collections import OrderedDict
from datetime import timedelta
Expand Down Expand Up @@ -126,6 +127,7 @@ class License(models.Model):
max_length=200, help_text=_("SPDX license code from https://spdx.org/licenses/")
)
url = models.URLField(blank=True)
text = models.TextField(blank=True, help_text=_("Full license text"))

def __str__(self):
return f"{self.name} ({self.url})"
Expand Down Expand Up @@ -232,16 +234,16 @@ def get_markdown_link(self):
def to_codemeta(self):
codemeta = {
"@type": "Person",
"givenName": self.given_name,
"familyName": self.family_name,
"givenName": self.get_given_name(),
"familyName": self.get_family_name(),
}
# FIXME: should we proxy to User / MemberProfile fields if User is available
email = self.get_email()
if email:
codemeta["email"] = email
if self.orcid_url:
codemeta["@id"] = self.orcid_url
if self.json_affiliations:
codemeta["affiliation"] = self.codemeta_affiliation
if self.email:
codemeta["email"] = self.email
return codemeta

def get_aggregated_search_fields(self):
Expand All @@ -266,6 +268,15 @@ def get_full_name(self, family_name_first=False):
# organizations only use given_name
return self.given_name

def get_given_name(self):
return self.given_name or (self.user.first_name if self.user else "")

def get_family_name(self):
return self.family_name or (self.user.last_name if self.user else "")

def get_email(self):
return self.email or (self.user.email if self.user else "")

def _get_person_full_name(self, family_name_first=False):
if not self.has_name:
logger.warning("No usable name found for contributor %s", self.id)
Expand Down Expand Up @@ -1637,6 +1648,22 @@ def codemeta(self):
"""Returns a CodeMeta object that can be dumped to json"""
return CodeMeta.build(self)

@cached_property
def citation_cff(self):
from .transformers import ReleaseCitation

return ReleaseCitation(self)

@cached_property
def license_text(self):
template = Template(self.license.text)
return template.substitute(
{
"copyright_year": timezone.now().year,
"copyright_name": self.citation_authors,
}
)

@property
def is_draft(self):
return self.status == self.Status.DRAFT
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
result tells us there are more sheep
3 changes: 3 additions & 0 deletions django/library/tests/test_fs.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ def test_repo_build(self):
self.assertTrue(os.path.exists(api.repo_dir))
# check git stuff
repo = Repo(api.repo_dir)
self.assertFalse(repo.is_dirty())
self.assertEqual(sum(1 for _ in repo.iter_commits()), public_release_count)
self.assertEqual(len(repo.tags), public_release_count)
# check contents
Expand Down Expand Up @@ -154,6 +155,7 @@ def test_repo_append_releases(self):
self.assertEqual(self.git_mirror.local_releases.count(), 2)
# check git stuff
repo = Repo(api.repo_dir)
self.assertFalse(repo.is_dirty())
public_release_count = self.codebase.public_releases().count()
self.assertEqual(sum(1 for _ in repo.iter_commits()), public_release_count)
self.assertEqual(len(repo.tags), public_release_count)
Expand Down Expand Up @@ -204,6 +206,7 @@ def test_repo_rebuild(self):
self.assertEqual(self.git_mirror.local_releases.count(), 2)
# check git stuff
repo = Repo(api.repo_dir)
self.assertFalse(repo.is_dirty())
public_release_count = self.codebase.public_releases().count()
self.assertEqual(sum(1 for _ in repo.iter_commits()), public_release_count)
self.assertEqual(len(repo.tags), public_release_count)
Expand Down
139 changes: 139 additions & 0 deletions django/library/transformers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
from abc import ABC, abstractmethod
import json
import yaml

from .models import ReleaseContributor, CodebaseRelease, Codebase


def field(name=None, required=False):
"""Register a method as a field in a metadata transformer.
Fields will be included in the resulting metadata
Args:
name (str, optional): resulting name, if different from the method name.
required (bool, optional): if False, will skip the field if the value is falsy. Defaults to False.
"""

def decorator(method):
method.is_field = True
method.field_name = name or method.__name__
method.required = required
return method

return decorator


class Transformer(ABC):
INITIAL_DATA = {}

@abstractmethod
def __init__(self):
pass

def create_fields_dict(self, **kwargs) -> dict:
"""returns a dictionary with fields registered with @field decorators
:param kwargs: additional fields to include in the metadata
"""
for name in dir(self):
method = getattr(self, name)
if getattr(method, "is_field", False) and callable(method):
field_name = method.field_name
value = method()
required = getattr(method, "required", False)
if value or required:
kwargs[field_name] = value
return kwargs

def build(self, **kwargs):
"""Build metadata from INITIAL_DATA and fields
:param kwargs: additional fields to include in the metadata
:return: self
"""
self._metadata = self.INITIAL_DATA.copy()
self._metadata.update(self.create_fields_dict(**kwargs))
return self

@property
def metadata(self):
if not hasattr(self, "_metadata"):
raise AttributeError("Metadata must be generated by calling build()")
return self._metadata

def to_dict(self):
return self.metadata.copy()

def to_json(self):
return json.dumps(self.metadata)

def to_yaml(self):
return yaml.dump(self.metadata)


class ReleaseTransformer(Transformer):

def __init__(self, release: CodebaseRelease):
self.release = release
self.codebase = release.codebase


class CodebaseTransformer(Transformer):

def __init__(self, codebase: Codebase):
self.codebase = codebase
self.metadata = self.INITIAL_DATA.copy()


class ReleaseCodeMeta(ReleaseTransformer):
pass


class ReleaseCitation(ReleaseTransformer):
"""Transform CodebaseRelease metadata into the Citation File Format
(https://citation-file-format.github.io/) version 1.2.0
"""

INITIAL_DATA = {"cff-version": "1.2.0"}

@field(required=True)
def message(self):
return "If you use this software, please cite it using the metadata from this file."

@field(required=True)
def authors(self):
return [
{
"family-names": author.contributor.get_family_name(),
"given-names": author.contributor.get_given_name(),
}
for author in ReleaseContributor.objects.authors(self.release)
]

@field(required=True)
def title(self):
return self.codebase.title

@field()
def version(self):
return self.release.version_number

@field()
def abstract(self):
return self.codebase.description.raw

@field()
def keywords(self):
return [tag.name for tag in self.codebase.tags.all()]

@field()
def license(self):
return self.release.license.name

@field(name="date-released")
def date_released(self):
return (
self.release.last_published_on.strftime("%Y-%m-%d")
if self.release.live
else None
)
Loading

0 comments on commit 4334357

Please sign in to comment.