Skip to content

Commit

Permalink
Add singularity-inspect metalad extractor
Browse files Browse the repository at this point in the history
Fixes: datalad#198

- Adds "singularity inspect path/to/file.sing" to metadata
- Adds "apptainer --version || singularity version" to metadata

find_executable will be removed in 3.12

Code that imports distutils will no longer work from Python 3.12.
Necessary for me to run locally.

Update file docstring to not lie

Handle both singularity and apptainer
  • Loading branch information
asmacdo committed Mar 24, 2023
1 parent c3bb747 commit d4e813a
Show file tree
Hide file tree
Showing 13 changed files with 297 additions and 1 deletion.
5 changes: 5 additions & 0 deletions changelog.d/20230307_164111_austin_add_metalad_extractor.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
### 🚀 Enhancements and New Features

- Add metalad extractor using `singularity inspect`.
Fixes https://github.com/datalad/datalad-container/issues/198 via
https://github.com/datalad/datalad-container/pull/200 (by @asmacdo )
3 changes: 3 additions & 0 deletions datalad_container/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@

from .version import __version__

# Imported to set singularity/apptainer version commands at init
import datalad_container.extractors._load_singularity_versions # noqa

# defines a datalad command suite
# this symbold must be identified as a setuptools entrypoint
# to be found by datalad
Expand Down
2 changes: 2 additions & 0 deletions datalad_container/conftest.py
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
from datalad.conftest import setup_package

from .tests.fixtures import * # noqa: F401, F403 # lgtm [py/polluting-import]
1 change: 1 addition & 0 deletions datalad_container/extractors/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

46 changes: 46 additions & 0 deletions datalad_container/extractors/_load_singularity_versions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
"""
Importing this file extends datalad.support.external_version:
Adds:
- external_versions["cmd:apptainer"]
- external_versions["cmd:singularity"]
"""
import subprocess

from datalad.cmd import (
StdOutCapture,
WitlessRunner,
)
from datalad.support.external_versions import external_versions


def __get_apptainer_version():
try:
out = WitlessRunner().run("apptainer --version", protocol=StdOutCapture)
version = out['stdout'].strip()

except FileNotFoundError as e:
return None

strip_str = len("apptainer version ")
return version[strip_str:]


def __get_singularity_version():
try:
out = WitlessRunner().run("singularity version", protocol=StdOutCapture)
version = out['stdout'].strip()
except FileNotFoundError as e:
return None

# It may be possible to have both apptainer and singularity installed.
# If singularity is installed independently, the versions will not match.
apptainer_version = external_versions["cmd:apptainer"]
if apptainer_version and apptainer_version != version:
return version

return None

# Load external_versions and patch with "cmd:singularity" and "cmd:apptainer"
external_versions.add("cmd:apptainer", func=__get_apptainer_version)
external_versions.add("cmd:singularity", func=__get_singularity_version)
85 changes: 85 additions & 0 deletions datalad_container/extractors/metalad_container.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
# emacs: -*- mode: python; py-indent-offset: 4; tab-width: 4; indent-tabs-mode: nil -*-
# ex: set sts=4 ts=4 sw=4 noet:
# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
#
# See COPYING file distributed along with the datalad package for the
# copyright and license terms.
#
# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
"""Metadata extractors for Container Images stored in Datalad's own core storage"""
import json
import logging
import subprocess
import time
from uuid import UUID

from datalad.support.external_versions import external_versions, UnknownVersion

from datalad_metalad.extractors.base import DataOutputCategory, ExtractorResult, FileMetadataExtractor
from datalad_metalad import get_file_id

CURRENT_VERSION = "0.0.1"

lgr = logging.getLogger('datalad.metadata.extractors.metalad_container')


class MetaladSingularityInspect(FileMetadataExtractor):
"""
Populates metadata singularity/apptainer version and `inspect` output.
"""

def get_data_output_category(self) -> DataOutputCategory:
return DataOutputCategory.IMMEDIATE

def is_content_required(self) -> bool:
return True

def get_id(self) -> UUID:
# Nothing special, made this up - asmacdo
return UUID('3a28cca6-b7a1-11ed-b106-fc3497650c92')


def get_version(self) -> str:
return CURRENT_VERSION

def extract(self, _=None) -> ExtractorResult:
singularity_version = external_versions["cmd:singularity"]
apptainer_version = external_versions["cmd:apptainer"]

if not isinstance(singularity_version, UnknownVersion):
singularity_version = singularity_version.vstring
else:
singularity_version = str(singularity_version)
if not isinstance(apptainer_version, UnknownVersion):
apptainer_version = apptainer_version.vstring
else:
apptainer_version = str(apptainer_version)

return ExtractorResult(
extractor_version=self.get_version(),
extraction_parameter=self.parameter or {},
extraction_success=True,
datalad_result_dict={
"type": "container",
"status": "ok"
},
immediate_data={
"@id": get_file_id(dict(
path=self.file_info.path,
type=self.file_info.type)),
"type": self.file_info.type,
"path": self.file_info.intra_dataset_path,
"content_byte_size": self.file_info.byte_size,
"comment": f"SingularityInspect extractor executed at {time.time()}",
"singularity_version": singularity_version,
"apptainer_version": apptainer_version,
"container_inspect": self._singularity_inspect(self.file_info.path),
})

@staticmethod
def _singularity_inspect(path) -> str:
data = subprocess.run(
["singularity", "inspect", "--json", path],
check=True,
stdout=subprocess.PIPE).stdout.decode()
return json.loads(data)
63 changes: 63 additions & 0 deletions datalad_container/extractors/tests/test_metalad_container.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import os.path as op
import pytest
import subprocess
import sys
from pathlib import Path
from shutil import which
from unittest.mock import patch

from datalad.api import (
Dataset,
clone,
)
from datalad.cmd import (
StdOutCapture,
WitlessRunner,
)
from datalad.support.exceptions import CommandError
from datalad.tests.utils_pytest import (
SkipTest,
assert_in,
assert_raises,
eq_,
ok_exists,
with_tempfile,
with_tree,
)

from datalad_container.extractors.metalad_container import MetaladSingularityInspect

test_img_url = 'shub://datalad/datalad-container:testhelper'


class TestMetaladSingularityInspect:

@with_tempfile
def test__singularity_inspect_nofile(self, path=None):
"""Singularity causes CalledProcessErorr if path DNE."""
with pytest.raises(subprocess.CalledProcessError):
result = MetaladSingularityInspect._singularity_inspect(path)

# TODO this fixture is 2Gb, lets find a smaller one.
def test__singularity_inspect_valid(self, pull_image):
"""Call inspect on a valid singularity container image."""
# TODO using test_img_url, create a session fixture
# path = op.join(Path(__file__).resolve().parent, "fixtures", "singularity.img")
result = MetaladSingularityInspect._singularity_inspect(pull_image)

assert result['type'] == 'container'
# Do I need to catch this?
labels = result['data']['attributes']['labels']
assert_in_labels = [
'org.label-schema.usage.singularity.version',
'org.label-schema.build-date',
'org.label-schema.build-size',
'org.label-schema.usage.singularity.deffile',
'org.label-schema.usage.singularity.deffile.from',
'org.label-schema.usage.singularity.deffile.bootstrap',
'org.label-schema.usage.singularity.version',
]
for label in assert_in_labels:
assert label in assert_in_labels
assert labels['org.label-schema.schema-version'] == '1.0'
assert result['type'] == 'container'
1 change: 1 addition & 0 deletions datalad_container/tests/fixtures/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .singularity_image import pull_image
23 changes: 23 additions & 0 deletions datalad_container/tests/fixtures/singularity_image.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import pytest
from pathlib import Path

from datalad.api import Dataset
from datalad.tests.utils_pytest import with_tempfile

from datalad_container.tests.utils import add_pyscript_image

TEST_IMG_URL = 'shub://datalad/datalad-container:testhelper'

@pytest.fixture(scope="session")
def pull_image(tmp_path_factory: pytest.TempPathFactory) -> str:
fixture_file_name = "fixture.sing"
ds = Dataset(tmp_path_factory.mktemp("singularity_image"))
ds.create(force=True)
ds.containers_add(
'mycontainer',
url=TEST_IMG_URL,
image=fixture_file_name,
)
img_path = ds.pathobj / fixture_file_name
ds.get(img_path)
return img_path
3 changes: 2 additions & 1 deletion datalad_container/tests/test_add.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,4 +41,5 @@ def test_ensure_datalad_remote_maybe_enable(path=None, *, autoenable):
if not autoenable:
assert_not_in("datalad", repo.get_remotes())
_ensure_datalad_remote(repo)
assert_in("datalad", repo.get_remotes())
assert_in("datalad", repo.get_remotes())

1 change: 1 addition & 0 deletions docs/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ Documentation

changelog
acknowledgements
metadata-extraction


API Reference
Expand Down
62 changes: 62 additions & 0 deletions docs/source/metadata-extraction.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
Metadata Extraction
*******************

If `datalad-metalad`_ extension is installed, `datalad-container` can
extract metadata from singularity containers images.

(It is recommended to use a tool like `jq` if you would like to read the
output yourself.)

Singularity Inspect
-------------------

Adds metadata gathered from `singularity inspect` and the version of
`singularity` or `apptainer`.

For example:

(From the ReproNim/containers repository)

`datalad meta-extract -d . singularity_inspect images/bids/bids-pymvpa--1.0.2.sing | jq`

.. code-block::
{
"type": "file",
"dataset_id": "b02e63c2-62c1-11e9-82b0-52540040489c",
"dataset_version": "9ed0a39406e518f0309bb665a99b64dec719fb08",
"path": "images/bids/bids-pymvpa--1.0.2.sing",
"extractor_name": "singularity_inspect",
"extractor_version": "0.0.1",
"extraction_parameter": {},
"extraction_time": 1678897280.0087736,
"agent_name": "Austin Macdonald",
"agent_email": "[email protected]",
"extracted_metadata": {
"@id": "datalad:SHA1-s993116191--cc7ac6e6a31e9ac131035a88f699dfcca785b844",
"type": "file",
"path": "images/bids/bids-pymvpa--1.0.2.sing",
"content_byte_size": 0,
"comment": "SingularityInspect extractor executed at 1678897279.908783",
"singularity_version": "UNKNOWN",
"apptainer_version": "1.1.5-1.fc37",
"container_inspect": {
"data": {
"attributes": {
"labels": {
"org.label-schema.build-date": "Thu,_19_Dec_2019_14:58:41_+0000",
"org.label-schema.build-size": "2442MB",
"org.label-schema.schema-version": "1.0",
"org.label-schema.usage.singularity.deffile": "Singularity.bids-pymvpa--1.0.2",
"org.label-schema.usage.singularity.deffile.bootstrap": "docker",
"org.label-schema.usage.singularity.deffile.from": "bids/pymvpa:v1.0.2",
"org.label-schema.usage.singularity.version": "2.5.2-feature-squashbuild-secbuild-2.5.6e68f9725"
}
}
},
"type": "container"
}
}
}
.. _datalad-metalad: http://docs.datalad.org/projects/metalad/en/latest/
3 changes: 3 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ datalad.extensions =
# valid datalad interface specification (see demo in this extensions)
container = datalad_container:command_suite

datalad.metadata.extractors =
singularity_inspect = datalad_container.extractors.metalad_container:MetaladSingularityInspect

[versioneer]
# See the docstring in versioneer.py for instructions. Note that you must
# re-run 'versioneer.py setup' after changing this section, and commit the
Expand Down

0 comments on commit d4e813a

Please sign in to comment.