Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use conda package streaming #724

Merged
merged 21 commits into from
Sep 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/check-master.yml
Original file line number Diff line number Diff line change
Expand Up @@ -101,10 +101,10 @@ jobs:
# unfortunately, as of June 2021 - GitHub doesn't support anchors for action scripts

- name: Checkout project
uses: actions/checkout@v2
uses: actions/checkout@v4

- name: Setup Miniconda
uses: conda-incubator/setup-miniconda@v2
uses: conda-incubator/setup-miniconda@v3
with:
auto-update-conda: true
python-version: ${{ matrix.python-version }}
Expand Down
96 changes: 69 additions & 27 deletions binstar_client/inspect_package/conda.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,20 @@
# pylint: disable=missing-module-docstring,missing-class-docstring,missing-function-docstring

from __future__ import print_function
from __future__ import annotations, print_function

import os.path
import json
import os.path
import re
import sys
import tempfile
from pprint import pprint
from shutil import rmtree
from conda_package_handling.api import extract
from typing import Any

from ..utils.notebook.data_uri import data_uri_from
from conda_package_streaming.package_streaming import (
CondaComponent,
stream_conda_component,
)

from ..utils.notebook.data_uri import data_uri_from_bytes

os_map = {'osx': 'darwin', 'win': 'win32'}
specs_re = re.compile('^([=><]+)(.*)$')
Expand All @@ -27,7 +29,7 @@ def transform_conda_deps(deps):
dep = dep.strip()
name_spec = dep.split(' ', 1)
if len(name_spec) == 1:
name, = name_spec
(name,) = name_spec
depends.append({'name': name, 'specs': []})
elif len(name_spec) == 2:
name, spec = name_spec
Expand All @@ -52,7 +54,9 @@ def transform_conda_deps(deps):
else:
operator = '=='

depends.append({'name': name, 'specs': [['==', '%s+%s' % (spec, build_str)]]})
depends.append(
{'name': name, 'specs': [['==', '%s+%s' % (spec, build_str)]]}
)

return {'depends': depends}

Expand All @@ -78,12 +82,12 @@ def get_subdir(index):
return '%s-%s' % (index.get('platform'), intel_map.get(arch, arch))


def inspect_conda_info_dir(info_path, basename): # pylint: disable=too-many-locals
def inspect_conda_info_dir(info_contents: dict[str, bytes], basename: str) -> tuple[dict, dict, dict]:
# pylint: disable=too-many-locals
def _load(filename, default=None):
file_path = os.path.join(info_path, filename)
if os.path.exists(file_path):
with open(file_path, encoding='utf-8') as file:
return json.load(file)
info_path = f'info/{filename}'
if info_path in info_contents:
return json.loads(info_contents[info_path])
return default

index = _load('index.json', None)
Expand All @@ -92,13 +96,15 @@ def _load(filename, default=None):

recipe = _load('recipe.json')
about = recipe.get('about', {}) if recipe else _load('about.json', {})
has_prefix = os.path.exists(os.path.join(info_path, 'has_prefix'))
has_prefix = 'info/has_prefix' in info_contents

# Load icon defined in the index.json and file exists inside info folder
icon_b64 = index.get('icon', None)
icon_path = os.path.join(info_path, icon_b64) if icon_b64 else None
if icon_path and os.path.exists(icon_path):
icon_b64 = data_uri_from(icon_path)
if index.get('icon'):
for icon_key in (f'info/{index.get("icon", None)}', 'info/icon.png'):
if icon_key in info_contents:
icon_b64 = data_uri_from_bytes(info_contents[icon_key])
break

subdir = get_subdir(index)
machine = index.get('arch', None)
Expand Down Expand Up @@ -134,7 +140,7 @@ def _load(filename, default=None):
'license_url': about.get('license_url'),
'license_family': about.get('license_family'),
}
file_data = {
file_data: dict[str, Any] = {
'basename': '%s/%s' % (subdir, basename),
'attrs': {
'operatingsystem': operatingsystem,
Expand All @@ -152,22 +158,58 @@ def _load(filename, default=None):
return package_data, release_data, file_data


def inspect_conda_package(filename, *args, **kwargs): # pylint: disable=unused-argument
tmpdir = tempfile.mkdtemp()
extract(filename, tmpdir, components='info')
def gather_info_dir(
path: os.PathLike,
wanted: frozenset[str] = frozenset(
(
'info/index.json',
'info/recipe.json',
'info/about.json',
'info/has_prefix',
)
),
) -> dict[str, bytes]:
"""Use conda-package-streaming to gather files without extracting to disk."""
# based on code from conda-index
have: dict[str, bytes] = {}
seeking = set(wanted)
with open(path, mode='rb') as fileobj:
package_stream = stream_conda_component(
path, fileobj, CondaComponent.info
)
for tar, member in package_stream:
if member.name in wanted:
seeking.remove(member.name)
reader = tar.extractfile(member)
if reader is None:
continue
have[member.name] = reader.read()

if not seeking: # we got what we wanted
package_stream.close()

# extremely rare icon case. index.json lists a <hash>.png but the icon
# appears to always be info/icon.png.
if b'"icon"' in have.get('info/index.json', b''):
index_json = json.loads(have['info/index.json'])
# this case matters for our unit tests
wanted = frozenset(('info/icon.png', f'info/{index_json["icon"]}'))
have.update(gather_info_dir(path, wanted=wanted))

return have

info_dir = os.path.join(tmpdir, 'info')
package_data, release_data, file_data = inspect_conda_info_dir(info_dir, os.path.basename(filename))

rmtree(tmpdir)

def inspect_conda_package(filename, *args, **kwargs): # pylint: disable=unused-argument
info_contents = gather_info_dir(filename)
package_data, release_data, file_data = inspect_conda_info_dir(
info_contents, os.path.basename(filename)
)
return package_data, release_data, file_data


def main():
filename = sys.argv[1]
with open(filename) as fileobj: # pylint: disable=unspecified-encoding
package_data, release_data, file_data = inspect_conda_package(filename, fileobj)
package_data, release_data, file_data = inspect_conda_package(filename)
pprint(package_data)
print('--')
pprint(release_data)
Expand Down
19 changes: 15 additions & 4 deletions binstar_client/utils/notebook/data_uri.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,28 +19,35 @@


class DataURIConverter:
def __init__(self, location):
def __init__(self, location, data=None):
self.check_pillow_installed()
self.location = location
self.data = data

def check_pillow_installed(self):
if Image is None:
raise PillowNotInstalled()

def __call__(self):
if os.path.exists(self.location):
if self.data:
file = io.BytesIO(self.data)
b64 = self._encode(self.resize_and_convert(file).read())
elif os.path.exists(self.location):
with open(self.location, 'rb') as file:
return self._encode(self.resize_and_convert(file).read())
b64 = self._encode(self.resize_and_convert(file).read())
elif self.is_url():
content = requests.get(self.location, timeout=10 * 60 * 60).content
file = io.BytesIO()
file.write(content)
file.seek(0)
return self._encode(self.resize_and_convert(file).read())
b64 = self._encode(self.resize_and_convert(file).read())
else:
raise IOError('{} not found'.format(self.location))
return b64

def resize_and_convert(self, file):
if Image is None:
raise PillowNotInstalled()
image = Image.open(file)
image.thumbnail(THUMB_SIZE)
out = io.BytesIO()
Expand All @@ -64,3 +71,7 @@ def _encode(self, content):

def data_uri_from(location):
return DataURIConverter(location)()


def data_uri_from_bytes(data):
return DataURIConverter(location=None, data=data)()
1 change: 1 addition & 0 deletions conda.recipe/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ requirements:
- python
- anaconda-anon-usage >=0.4.0
- conda-package-handling >=1.7.3
- conda-package-streaming
mattkram marked this conversation as resolved.
Show resolved Hide resolved
- defusedxml >=0.7.1
- nbformat >=4.4.0
- python-dateutil >=2.6.1
Expand Down
39 changes: 24 additions & 15 deletions tests/inspect_package/test_conda.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,18 @@

# Standard libary imports
import unittest
from pathlib import Path

# Local imports
from binstar_client.inspect_package import conda
from binstar_client.utils.notebook.data_uri import data_uri_from
from tests.utils.utils import data_dir


HERE = Path(__file__).parent


def data_dir(path):
return str(HERE / 'data' / path)


expected_package_data = {
Expand Down Expand Up @@ -72,8 +79,10 @@
},
'basename': 'osx-64/conda_gc_test-1.2.1-py27_3.tar.bz2',
'dependencies': {
'depends': [{'name': 'foo', 'specs': [['==', '3']]},
{'name': 'python', 'specs': [['==', '2.7.8']]}],
'depends': [
{'name': 'foo', 'specs': [['==', '3']]},
{'name': 'python', 'specs': [['==', '2.7.8']]},
],
},
}

Expand All @@ -93,8 +102,10 @@
},
'basename': 'linux-64/conda_gc_test-2.2.1-py27_3.tar.bz2',
'dependencies': {
'depends': [{'name': 'foo', 'specs': [['==', '3']]},
{'name': 'python', 'specs': [['==', '2.7.8']]}],
'depends': [
{'name': 'foo', 'specs': [['==', '3']]},
{'name': 'python', 'specs': [['==', '2.7.8']]},
],
},
}

Expand All @@ -114,8 +125,10 @@
},
'basename': 'linux-64/conda_gc_test-2.2.1-py27_3.conda',
'dependencies': {
'depends': [{'name': 'foo', 'specs': [['==', '3']]},
{'name': 'python', 'specs': [['==', '2.7.8']]}],
'depends': [
{'name': 'foo', 'specs': [['==', '3']]},
{'name': 'python', 'specs': [['==', '2.7.8']]},
],
},
}

Expand Down Expand Up @@ -156,35 +169,31 @@ class Test(unittest.TestCase):

def test_conda_old(self):
filename = data_dir('conda_gc_test-1.2.1-py27_3.tar.bz2')
with open(filename, 'rb') as file:
package_data, version_data, file_data = conda.inspect_conda_package(filename, file)
package_data, version_data, file_data = conda.inspect_conda_package(filename)

self.assertEqual(expected_package_data, package_data)
self.assertEqual(expected_version_data_121, version_data)
self.assertEqual(expected_file_data_121, file_data)

def test_conda(self):
filename = data_dir('conda_gc_test-2.2.1-py27_3.tar.bz2')
with open(filename, 'rb') as file:
package_data, version_data, file_data = conda.inspect_conda_package(filename, file)
package_data, version_data, file_data = conda.inspect_conda_package(filename)

self.assertEqual(expected_package_data, package_data)
self.assertEqual(expected_version_data_221, version_data)
self.assertEqual(expected_file_data_221, file_data)

def test_conda_app_image(self):
filename = data_dir('test-app-package-icon-0.1-0.tar.bz2')
with open(filename, 'rb') as file:
package_data, version_data, _ = conda.inspect_conda_package(filename, file)
package_data, version_data, _ = conda.inspect_conda_package(filename)

self.assertEqual(app_expected_package_data, package_data)
self.assertEqual(app_expected_version_data.pop('icon'), version_data.pop('icon'))
self.assertEqual(app_expected_version_data, version_data)

def test_conda_v2_format(self):
filename = data_dir('conda_gc_test-2.2.1-py27_3.conda')
with open(filename, 'rb') as file:
package_data, version_data, file_data = conda.inspect_conda_package(filename, file)
package_data, version_data, file_data = conda.inspect_conda_package(filename)

self.assertEqual(expected_package_data, package_data)
self.assertEqual(expected_version_data_221, version_data)
Expand Down
Loading