diff --git a/minecode/.gitattributes b/.gitattributes similarity index 100% rename from minecode/.gitattributes rename to .gitattributes diff --git a/.github/workflows/docs-ci.yml b/.github/workflows/docs-ci.yml index cc0cbb57..2b10c392 100644 --- a/.github/workflows/docs-ci.yml +++ b/.github/workflows/docs-ci.yml @@ -24,17 +24,17 @@ jobs: run: chmod +x ./docs/scripts/doc8_style_check.sh - name: Install Dependencies - working-directory: ./minecode + working-directory: . run: ./configure --docs - name: Check Sphinx Documentation build minimally working-directory: ./docs run: | - source ../minecode/venv/bin/activate + source ../venv/bin/activate sphinx-build -E -W source build - name: Check for documentation style errors working-directory: ./docs run: | - source ../minecode/venv/bin/activate + source ../venv/bin/activate ./scripts/doc8_style_check.sh diff --git a/.github/workflows/packagedb-tests.yml b/.github/workflows/packagedb-tests.yml deleted file mode 100644 index d7734bd8..00000000 --- a/.github/workflows/packagedb-tests.yml +++ /dev/null @@ -1,55 +0,0 @@ -name: PackageDB Tests CI - -on: [push, pull_request] - -env: - POSTGRES_DB: packagedb - POSTGRES_USER: packagedb - POSTGRES_PASSWORD: packagedb - POSTGRES_INITDB_ARGS: --encoding=UTF-8 --lc-collate=en_US.UTF-8 --lc-ctype=en_US.UTF-8 - -jobs: - build: - runs-on: ubuntu-20.04 - - services: - postgres: - image: postgres:13 - env: - POSTGRES_DB: ${{ env.POSTGRES_DB }} - POSTGRES_USER: ${{ env.POSTGRES_USER }} - POSTGRES_PASSWORD: ${{ env.POSTGRES_PASSWORD }} - POSTGRES_INITDB_ARGS: ${{ env.POSTGRES_INITDB_ARGS }} - options: >- - --health-cmd pg_isready - --health-interval 10s - --health-timeout 5s - --health-retries 5 - ports: - - 5432:5432 - - strategy: - max-parallel: 4 - matrix: - python-version: ["3.8", "3.9", "3.10"] - - steps: - - name: Checkout code - uses: actions/checkout@v2 - - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} - - - name: Install dependencies - working-directory: ./packagedb - run: | - ./configure --dev - - - name: Run tests - working-directory: ./packagedb - run: | - make envfile - source venv/bin/activate - python manage.py test diff --git a/.github/workflows/minecode-tests.yml b/.github/workflows/purldb-tests.yml similarity index 85% rename from .github/workflows/minecode-tests.yml rename to .github/workflows/purldb-tests.yml index e98b3575..3601ccc6 100644 --- a/.github/workflows/minecode-tests.yml +++ b/.github/workflows/purldb-tests.yml @@ -1,4 +1,4 @@ -name: Minecode Tests CI +name: PurlDB Tests CI on: [push, pull_request] @@ -43,13 +43,12 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies - working-directory: ./minecode + working-directory: . run: | - ./configure --dev + make dev - name: Run tests - working-directory: ./minecode + working-directory: . run: | make envfile - source venv/bin/activate - python manage.py test + make test diff --git a/AUTHORS.rst b/AUTHORS.rst index 51a19cc8..c6d13b91 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -1,3 +1,8 @@ The following organizations or individuals have contributed to this repo: -- +- nexB Inc. +- Jono Yang +- Philippe Ombredanne +- Li Ha +- Steven Esser +- Armin Tänzer diff --git a/CHANGELOG.rst b/CHANGELOG.rst index fc2b6e32..eda3bcfb 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,8 +1,12 @@ Changelog ========= +next-version +------------ -v0.0.0 +*2023-01-03* -- Add clearcode, matchcode, and matchcode-toolkit to purldb. Reorganize code such that purldb is a single Django app. + +v2.0.0 ------ -*xxxx-xx-xx* -- Initial release. +*2022-11-11* -- Initial release. diff --git a/minecode/MANIFEST.in b/MANIFEST.in similarity index 73% rename from minecode/MANIFEST.in rename to MANIFEST.in index 613dbab5..ef3721e8 100644 --- a/minecode/MANIFEST.in +++ b/MANIFEST.in @@ -1,6 +1,4 @@ -graft etc graft src -graft tests include *.LICENSE include NOTICE @@ -12,10 +10,6 @@ include setup.* include configure* include requirements* include .git* -include MANIFEST.in -include setup.cfg -include setup.py - global-exclude *.py[co] __pycache__ *.*~ diff --git a/minecode/Makefile b/Makefile similarity index 80% rename from minecode/Makefile rename to Makefile index 6a8bfe83..c47f4491 100644 --- a/minecode/Makefile +++ b/Makefile @@ -32,13 +32,13 @@ virtualenv: @echo "-> Bootstrap the virtualenv with PYTHON_EXE=${PYTHON_EXE}" @${PYTHON_EXE} ${VIRTUALENV_PYZ} --never-download --no-periodic-update ${VENV} -conf: virtualenv +conf: @echo "-> Install dependencies" - @${ACTIVATE} pip install -e . -c requirements.txt + @./configure -dev: virtualenv +dev: @echo "-> Configure and install development dependencies" - @${ACTIVATE} pip install -e .[dev] -c requirements.txt + @./configure --dev envfile: @echo "-> Create the .env file and generate a secret key" @@ -70,8 +70,7 @@ check: clean: @echo "-> Clean the Python env" - rm -rf ${VENV} build/ dist/ packagedb.egg-info/ docs/_build/ pip-selfcheck.json - find . -type f -name '*.py[co]' -delete -o -type d -name __pycache__ -delete + @./configure --clean migrate: @echo "-> Apply database migrations" @@ -91,9 +90,31 @@ postgres: run: ${MANAGE} runserver 8001 --insecure +seed: + ${MANAGE} seed + +run_visit: seed + ${MANAGE} run_visit + +run_map: + ${MANAGE} run_map + test: @echo "-> Run the test suite" - ${ACTIVATE} ${PYTHON_EXE} -m pytest -vvs + ${ACTIVATE} DJANGO_SETTINGS_MODULE=purldb.settings ${PYTHON_EXE} -m pytest -vvs --ignore matchcode-toolkit + ${ACTIVATE} ${PYTHON_EXE} -m pytest -vvs matchcode-toolkit + +shell: + ${MANAGE} shell + +clearsync: + ${MANAGE} clearsync --save-to-db --verbose -n 3 + +clearindex: + ${MANAGE} run_clearindex + +index_packages: + ${MANAGE} index_packages bump: @echo "-> Bump the version" @@ -110,6 +131,6 @@ docker-images: docker-compose pull @echo "-> Save the service images to a compressed tar archive in the dist/ directory" @mkdir -p dist/ - @docker save postgres packagedb_packagedb nginx | gzip > dist/packagedb-images-`git describe --tags`.tar.gz + @docker save minecode minecode_minecode nginx | gzip > dist/minecode-images-`git describe --tags`.tar.gz -.PHONY: virtualenv conf dev envfile install check valid isort clean migrate postgres sqlite run test bump docs docker-images +.PHONY: virtualenv conf dev envfile isort black doc8 valid check clean migrate postgres run test shell clearsync clearindex index_packages bump docs docker-images diff --git a/README.rst b/README.rst index 168b35f0..a837a5ac 100644 --- a/README.rst +++ b/README.rst @@ -1,22 +1,126 @@ -The purldb -================================ -This repo consiste of two main tools: +The purldb +========== +This repo consists of four main tools: -- MineCode that contains utilities to mine package repositories - PackageDB that is the reference model (based on ScanCode toolkit) that contains package data with purl (Package URLs) being a first class citizen. +- MineCode that contains utilities to mine package repositories +- MatchCode that contains utilities to index package metadata and resources for + matching +- ClearCode that contains utilities to mine Clearlydefined for package data + +These are designed to be used first for reference such that one can query for +packages by purl and validate purl existence. + +In the future, the collected packages will be used as reference for dependency +resolution, as a reference knowledge base for all package data, as a reference +for vulnerable range resolution and more. + + +Installation +------------ +Requirements +############ +* Debian-based Linux distribution +* Python 3.8 or later +* Postgres 13 +* git +* scancode-toolkit runtime dependencies (https://scancode-toolkit.readthedocs.io/en/stable/getting-started/install.html#install-prerequisites) + +Once the prerequisites have been installed, set up PurlDB with the following commands: +:: + + git clone https://github.com/nexb/purldb + cd purldb + make dev + make postgres + make envfile + +Once PurlDB and the database has been set up, run tests to ensure functionality: +:: + + make test + + +Usage +----- +Start the PurlDB server by running: +:: + + make run + +To start visiting upstream package repositories for package metadata: +:: + + make run_visit + +To populate the PackageDB using visited package metadata: +:: + + make run_map + +If you have an empty PackageDB without Package and Package Resource information, +ClearCode should be run for a while so it can populate the PackageDB +with Package and Package Resource information from clearlydefined. +:: + + make clearsync + +After some ClearlyDefined harvests and definitions have been obtained, run +``clearindex`` to create Packages and Resources from the harvests and +definitions. +:: + + make clearindex + +The Package and Package Resource information will be used to create the matching indices. + +Once the PackageDB has been populated, run the following command to create the +matching indices from the collected Package data: +:: + + make index_packages + + +API Endpoints +------------- + +* ``api/packages`` + + * Contains all of the Packages stored in the PackageDB + +* ``api/resources`` + + * Contains all of the Resources stored in the PackageDB + +* ``api/cditems`` + + * Contains the visited ClearlyDefined harvests or definitions + +* ``api/approximate_directory_content_index`` + + * Contains the directory content fingerprints for Packages with Resources + * Used to check if a directory and the files under it is from a known Package using the SHA1 values of the files + +* ``api/approximate_directory_structure_index`` + + * Contains the directory structure fingerprints for Packages with Resources + * Used to check if a directory and the files under it is from a known Package using the name of the files + +* ``api/exact_file_index`` + + * Contains the SHA1 values of Package Resources + * Used to check the SHA1 values of files from a scan to see what Packages also has that file -These are designed to be used first for reference such that one can -query by purl and validate purl existence. +* ``api/exact_package_archive_index`` -In the future, these will be used as reference for dependency -resolution, as a reference knowledge base for all packag data, -as a reference for vulnerable range resolution and more. + * Contains the SHA1 values of Package archives + * Used to check the SHA1 values of archives from a scan to determine if they are known Packages License -^^^^^^^^^^ +------- Copyright (c) nexB Inc. and others. All rights reserved. @@ -32,6 +136,6 @@ See https://www.apache.org/licenses/LICENSE-2.0 for the license text. See https://creativecommons.org/licenses/by-sa/4.0/legalcode for the license text. -See https://github.com/nexB/purldb for support or download. +See https://github.com/nexB/purldb for support or download. See https://aboutcode.org for more information about nexB OSS projects. diff --git a/apache-2.0.LICENSE b/apache-2.0.LICENSE index d9a10c0d..261eeb9e 100644 --- a/apache-2.0.LICENSE +++ b/apache-2.0.LICENSE @@ -174,3 +174,28 @@ of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/minecode/src/discovery/migrations/__init__.py b/clearcode/__init__.py similarity index 100% rename from minecode/src/discovery/migrations/__init__.py rename to clearcode/__init__.py diff --git a/clearcode/api.py b/clearcode/api.py new file mode 100644 index 00000000..42bcd6c1 --- /dev/null +++ b/clearcode/api.py @@ -0,0 +1,80 @@ +# -*- coding: utf-8 -*- +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# +# ClearCode is a free software tool from nexB Inc. and others. +# Visit https://github.com/nexB/clearcode-toolkit/ for support and download. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import base64 + +from django.urls import include, re_path +from rest_framework import routers +from rest_framework import serializers +from rest_framework import viewsets + +from clearcode.models import CDitem + + +class CDitemContentFieldSerializer(serializers.Field): + """ + Custom Field Serializer used to translate between Django ORM binary field and + base64-encoded string + """ + def to_representation(self, obj): + return base64.b64encode(obj).decode('utf-8') + + def to_internal_value(self, data): + return base64.b64decode(data) + + +class CDitemSerializer(serializers.HyperlinkedModelSerializer): + """ + Custom Serializer used to serialize the CDitem model + """ + content = CDitemContentFieldSerializer(required=False) + class Meta: + model = CDitem + fields = ( + 'path', + 'uuid', + 'content', + 'last_modified_date', + 'last_map_date', + 'map_error', + ) + + +class CDitemViewSet(viewsets.ModelViewSet): + """ + API endpoint that allows CDitems to be viewed. + """ + serializer_class = CDitemSerializer + lookup_field = 'uuid' + + def get_queryset(self): + last_modified_date = self.request.query_params.get('last_modified_date', None) + queryset = CDitem.objects.all() + + if last_modified_date: + queryset = CDitem.objects.modified_after(last_modified_date) + + return queryset + + +router = routers.DefaultRouter() +router.register(r'cditems', CDitemViewSet, 'cditems') + +urlpatterns = [ + re_path('^api/', include((router.urls, 'api'))), +] diff --git a/clearcode/cdutils.py b/clearcode/cdutils.py new file mode 100644 index 00000000..477aac87 --- /dev/null +++ b/clearcode/cdutils.py @@ -0,0 +1,561 @@ +# -*- coding: utf-8 -*- +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# +# ClearCode is a free software tool from nexB Inc. and others. +# Visit https://github.com/nexB/clearcode-toolkit/ for support and download. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import base64 +from hashlib import md5 +from itertools import zip_longest +import os +from os import path +import subprocess +import time +from urllib.parse import urlsplit +from urllib.parse import urlunsplit +from urllib.parse import parse_qs +from urllib.parse import quote_plus +from urllib.parse import unquote_plus + +import attr +import click +from packageurl import PackageURL +import requests + + +""" +ClearlyDefined utlities. +""" + +TRACE_FETCH = False +TRACE = False +TRACE_DEEP = False + + +PACKAGE_TYPES_BY_CD_TYPE = { + 'crate': 'cargo', + 'deb': 'deb', + 'debsrc': 'deb', + # Currently used only for maven packages + 'sourcearchive': 'maven', + 'maven': 'maven', + 'composer': 'composer', + # Currently used only for Github repo/packages + 'git': 'github', + 'pod': 'pod', + 'nuget': 'nuget', + 'pypi': 'pypi', + 'gem': 'gem', +} + + +PACKAGE_TYPES_BY_PURL_TYPE = { + 'cargo': 'crate', + 'deb': 'deb', + 'maven': 'maven', + 'composer': 'composer', + 'github': 'git', + 'pod': 'pod', + 'nuget': 'nuget', + 'pypi': 'pypi', + 'gem': 'gem', + 'npm': 'npm', +} + + +PROVIDERS_BY_PURL_TYPE = { + 'cargo': 'cratesio', + 'deb': 'debian', + 'maven': 'mavencentral', + 'composer': 'packagist', + # Currently used only for Github repo/packages + 'git': 'github', + 'github': 'github', + 'pod': 'cocoapods', + 'nuget': 'nuget', + 'pypi': 'pypi', + 'gem': 'rubygem', + 'npm': 'npmjs', +} + + +QUALIFIERS_BY_CD_TYPE = { + 'sourcearchive': {'classifier': 'sources'}, + 'debsrc': {'arch': 'source'} +} + + +@attr.s(slots=True) +class Coordinate(object): + """ + ClearlyDefined coordinates are used to identify any tracked component. + """ + + base_api_url = 'https://api.clearlydefined.io' + + type = attr.ib() + provider = attr.ib() + namespace = attr.ib() + name = attr.ib() + revision = attr.ib() + + def __attrs_post_init__(self, *args, **kwargs): + if self.provider == 'debian': + self.namespace = 'debian' + if not self.namespace: + self.namespace = '-' + + @classmethod + def from_dict(cls, coords): + if 'namespace' not in coords: + coords['namespace'] = '-' + return cls(**coords) + + def to_dict(self): + return attr.asdict(self) + + @classmethod + def from_path(cls, pth, root=None): + """ + Return a Coordinate from a path and an optional root. + + If a root is provided, the root is stripped from the path prefix. + + The remaining path is assumed to have its 5 leading segments mapping to + the coordinate elements. + + For instance: + + >>> expected = Coordinate('maven', 'mavencentral', 'io.dropwizard', 'dropwizard', '2.0.0-rc13') + >>> p = '/maven/mavencentral/io.dropwizard/dropwizard/2.0.0-rc13/' + >>> test = Coordinate.from_path(p) + >>> assert expected == test + + >>> p = '/maven/mavencentral/io.dropwizard/dropwizard/2.0.0-rc13/scancode/3.2.2/' + >>> test = Coordinate.from_path(p) + >>> assert expected == test + + >>> u = 'https://api.clearlydefined.io/harvest/maven/mavencentral/io.dropwizard/dropwizard/2.0.0-rc13' + >>> root = 'https://api.clearlydefined.io/harvest' + >>> test = Coordinate.from_path(u, root) + >>> assert expected == test + + >>> u = 'https://api.clearlydefined.io/harvest/maven/mavencentral/io.dropwizard/dropwizard/2.0.0-rc13/scancode/3.2.2' + >>> root = 'https://api.clearlydefined.io/harvest' + >>> test = Coordinate.from_path(u, root) + >>> assert expected == test + + >>> p = '/maven/mavencentral/io.dropwizard/dropwizard/revision/2.0.0-rc13/tool/scancode/3.2.2.json' + >>> test = Coordinate.from_path(p) + >>> assert expected == test + + >>> p = '/maven/mavencentral/io.dropwizard/dropwizard/revision/2.0.0-rc13.json' + >>> test = Coordinate.from_path(p) + >>> assert expected == test + + """ + pth = pth.strip('/') + if root and root in pth: + root = root.strip('/') + _, _, pth = pth.partition(root) + + segments = pth.strip('/').split('/') + if len(segments) >= 6 and segments[4] == 'revision': + # AZ blob style + # /maven/mavencentral/io.dropwizard/dropwizard/revision/2.0.0-rc13.json + # /maven/mavencentral/io.dropwizard/dropwizard/revision/2.0.0-rc13/tool/scancode/3.2.2.json + start = segments[:4] + version = segments[5] + if version.endswith('.json'): + version, _, _ = version.rpartition('.json') + segments = start + [version] + else: + # plain API paths do not have a /revision/ segment + segments = segments[:5] + return cls(*segments) + + def to_api_path(self): + return '{type}/{provider}/{namespace}/{name}/{revision}'.format(**self.to_dict()) + + def to_def_blob_path(self): + return '{type}/{provider}/{namespace}/{name}/revision/{revision}.json'.format(**self.to_dict()) + + def to_harvest_blob_path(self, tool, tool_version): + return '{type}/{provider}/{namespace}/{name}/revision/{revision}/tool/{tool}/{tool_version}.json'.format( + tool=tool, tool_version=tool_version, + **self.to_dict()) + + def get_definition_api_url(self, base_api_url=None): + """ + Return a URL to fetch the full definition. + """ + return '{base_url}/definitions/{type}/{provider}/{namespace}/{name}/{revision}'.format( + base_url=base_api_url or self.base_api_url, + path=self.to_api_path(), + **self.to_dict()) + + def get_harvests_api_url(self, base_api_url=None): + """ + Return a URL to fetch all harvests at once. + """ + return '{base_url}/harvest/{type}/{provider}/{namespace}/{name}/{revision}?form=raw'.format( + base_url=base_api_url or self.base_api_url, + path=self.to_api_path(), + **self.to_dict()) + + def to_def_query_api_url(self, include_revision=False, base_api_url=None): + """ + Return a CD API URL for query definitions. + """ + qs = 'type={type}&provider={provider}&name{name}' + if include_revision: + qs += '&revision={revision}' + if self.namespace and self.namespace != '-': + qs += '&namespace={namespace}' + qs = qs.format( + base_url=base_api_url or self.base_api_url, + **self.to_dict()) + return '{base_url}/definitions?{qs}'.format(**locals()) + + def to_purl(self): + """ + Return a PackageURL string containing this Coordinate's information + + >>> expected = 'pkg:maven/io.dropwizard/dropwizard@2.0.0-rc13' + >>> test = Coordinate('maven', 'mavencentral', 'io.dropwizard', 'dropwizard', '2.0.0-rc13').to_purl() + >>> assert expected == test + + >>> expected = 'pkg:maven/io.dropwizard/dropwizard@2.0.0-rc13?classifier=sources' + >>> test = Coordinate('sourcearchive', 'mavencentral', 'io.dropwizard', 'dropwizard', '2.0.0-rc13').to_purl() + >>> assert expected == test + + >>> expected = 'pkg:deb/debian/gedit-plugins@3.34.0-3?arch=source' + >>> test = Coordinate('debsrc', 'debian', '', 'gedit-plugins', '3.34.0-3').to_purl() + >>> assert expected == test + """ + converted_package_type = PACKAGE_TYPES_BY_CD_TYPE[self.type] + + namespace = '' + if self.namespace != '-': + namespace = self.namespace + + if self.provider == 'debian': + namespace = 'debian' + + qualifiers = {} + if self.type in ('debsrc', 'sourcearchive',): + qualifiers = QUALIFIERS_BY_CD_TYPE[self.type] + + return PackageURL( + type=converted_package_type, + namespace=namespace, + name=self.name, + version=self.revision, + qualifiers=qualifiers, + ).to_string() + + @classmethod + def from_purl(cls, purl): + """ + Return a Coordinate containing the information from PackageURL `purl` + + >>> expected = Coordinate('maven', 'mavencentral', 'io.dropwizard', 'dropwizard', '2.0.0-rc13') + >>> purl = 'pkg:maven/io.dropwizard/dropwizard@2.0.0-rc13' + >>> test = Coordinate.from_purl(purl) + >>> assert expected == test + + >>> expected = Coordinate('sourcearchive', 'mavencentral', 'io.dropwizard', 'dropwizard', '2.0.0-rc13') + >>> purl = 'pkg:maven/io.dropwizard/dropwizard@2.0.0-rc13?classifier=sources' + >>> test = Coordinate.from_purl(purl) + >>> assert expected == test + + >>> expected = Coordinate('debsrc', 'debian', '', 'gedit-plugins', '3.34.0-3') + >>> purl = 'pkg:deb/debian/gedit-plugins@3.34.0-3?arch=source' + >>> test = Coordinate.from_purl(purl) + >>> assert expected == test + """ + p = PackageURL.from_string(purl) + + package_type = p.type + if package_type not in PACKAGE_TYPES_BY_PURL_TYPE: + raise Exception('Package type is not supported by ClearlyDefined: {}'.format(package_type)) + # Handle the source types of Maven and Debian packages + if package_type == 'maven' and p.qualifiers.get('classifier', '') == 'sources': + package_type = 'sourcearchive' + provider = 'mavencentral' + elif package_type == 'deb' and p.qualifiers.get('arch', '') == 'source': + package_type = 'debsrc' + provider = 'debian' + else: + package_type = PACKAGE_TYPES_BY_PURL_TYPE[package_type] + # TODO: Have way to set other providers? + provider = PROVIDERS_BY_PURL_TYPE[package_type] + + return cls( + type=package_type, + provider=provider, + namespace=p.namespace, + name=p.name, + revision=p.version, + ) + + +def get_coordinates(data_dir): + """ + Yield tuple of (path, Coordinate) from definition directories from `data_dir` + at full depth. + """ + data_dir = data_dir.strip('/') + for dirpath, dirnames, _filenames in os.walk(data_dir, followlinks=False): + for d in dirnames: + pth = path.join(dirpath, d) + _, _, cdpth = pth.partition(data_dir) + segments = cdpth.strip('/').split('/') + # skip paths that have not the full depth required (e.g. 5 segments) + if not len(segments) == 5: + continue + yield pth, Coordinate.from_path(cdpth) + + +def _get_response_content(url, retries=2, wait=2, session=requests, verbose=False, _retries=set()): + """ + Return a tuple of (etag, md5, content bytes) with the content as bytes or as decoded + text if `as_text` is True) of the response of a GET HTTP request at `url`. + On HTTP errors (500 or higher), retry up to `retries` time after waiting + `wait` seconds. + """ + if verbose: + click.echo(' --> Fetching: {url}'.format(**locals())) + + response = session.get(url, timeout=600) + status_code = response.status_code + + if status_code == requests.codes.ok: # NOQA + # handle the case where the API returns an empty file and we need + # to restart from an earlier continuation + if url in _retries: + _retries.remove(url) + print(' SUCCESS after Failure to fetch:', url) + etag = response.headers.get('etag') + content = response.content + checksum = md5(content).hexdigest() + return etag, checksum, response.content + + error_code = requests.codes.get(status_code) or '' + + if status_code >= 500 and retries: + # timeout/522 or other server error: let's wait a bit and retry for "retries" number of retries + retries -= 1 + print(' Failure to fetch:', url, 'with', status_code, error_code, 'retrying after waiting:', wait, 'seconds.') + _retries.add(url) + time.sleep(wait) + return _get_response_content( + url=url, retries=retries, wait=wait, session=session, verbose=verbose) + + # all other errors + raise Exception('Failed HTTP request for {url} : error: {status_code} : {error_code}'.format(**locals())) + + +def get_response_content(url, retries=2, wait=4, session=requests, verbose=False): + """ + Return the bytes of the response of a GET HTTP request at `url`, an md5 checksum and the URL etag. + On failures, retry up to `retries` time after waiting `wait` seconds. + """ + try: + return _get_response_content( + url=url, retries=retries, wait=wait, + session=session, verbose=verbose) + except Exception as e: + if retries: + print(' Failure to fetch:', url, 'with error:', e, 'and retrying after waiting:', wait, 'seconds.') + # we sleep progressively more after each failure and up to wait seconds + time.sleep(int(wait / (retries or 1))) + retries -= 1 + return get_response_content( + url=url, retries=retries, wait=wait, + session=session, verbose=verbose) + else: + raise + + +def split_url(url): + """ + Given a URL, return a tuple of URL elements where `query` is a mapping. + """ + scheme, netloc, path, query, fragment = urlsplit(url) + query = parse_qs(query) + return scheme, netloc, path, query, fragment + + +def join_qs(keys_values, do_not_quote=()): + """ + Join a key/values mapping back into a query string. + Quote values unless the name is in in the `do_not_quote` set. + """ + keys_values = { + k: (v[0] if v and isinstance(v, list) else v) for k, v in keys_values.items()} + return '&'.join('='.join([k, v if k in do_not_quote else quote_plus(v)]) + for k, v in keys_values.items()) + + +def append_path_to_url(url, extra_path): + """ + Return a new `url` with `extra_path` appended to its path. + """ + scheme, netloc, path, query, fragment = split_url(url) + path = path.strip('/') + '/' + extra_path.strip('/') + segments = scheme, netloc, path, join_qs(query), fragment + return urlunsplit(segments) + + +def update_url(url, qs_mapping, do_not_quote=()): + """ + Return a new `url` with its query string updated from a mapping of key/value pairs. + """ + scheme, netloc, path, query, fragment = split_url(url) + query.update(qs_mapping) + segments = scheme, netloc, path, join_qs(query, do_not_quote=do_not_quote), fragment + return urlunsplit(segments) + + +def build_cdapi_continuation_url(api_url, continuation_token): + """ + Return a new `api_url` with a CD API `continuation_token`. + """ + return update_url(api_url, {'continuationToken': continuation_token}) + + +def build_cdapi_continuation_url_from_coordinates(api_url, coordinates): + """ + Return a new `api_url` with a continuation token built from + a `coordinates` string. If a token is already present in the api_url it + will be replaced. + """ + continuation_token = get_cdapi_continuation_token(coordinates) + return build_cdapi_continuation_url(api_url, continuation_token) + + +def split_cdapi_url(url): + """ + Given a URL that may contain a continuation token, return a tuple of + (cleaned url, token) + """ + # get a continuation-free base URL. This assumes that the continuationToken + # is always the last query string param if it is present. + scheme, netloc, url, query, fragment = split_url(url) + token = query.pop('continuationToken', None) + if token: + token = token[0] + if '%' in token: + token = unquote_plus(token) + segments = scheme, netloc, url, join_qs(query), fragment + unparsed = urlunsplit(segments) + if TRACE: + print('split_cdapi_url:', 'unparsed:', unparsed, 'token:', token) + return unparsed, token + + +def get_coord_from_cdapi_continuation_url(api_url): + """ + Given a URL that may contain a continuation token, return that as a decoded + CD coordinate string or None. + """ + # get a continuation-free base URL. This assumes that the continuationToken + # is always the last query string param if it is present. + _url, token = split_cdapi_url(api_url) + if token: + return get_coord_from_cdapi_continuation(token) + + +def get_coord_from_cdapi_continuation(continuation): + """ + Given an encoded continuation token, return a string of CD coordinates. + """ + if TRACE: + print('get_coord_from_cdapi_continuation: continuation:', continuation) + continuation = continuation.replace(' ', '+') + + if '%' in continuation: + continuation = unquote_plus(continuation) + + decoded = base64.b64decode(continuation) + if not isinstance(decoded, str): + decoded = decoded.decode('utf-8') + return decoded + + +def get_cdapi_continuation_token(coord): + """ + Given a coord mapping or string of CD coordinates, return an encoded + continuation token. + """ + if isinstance(coord, dict): + coord = coord2str(coord) + coord = coord.replace(' ', '+') + encoded = coord.encode('utf-8') + + return base64.b64encode(encoded).decode('utf-8') + + +def str2coord(s): + """ + Return a mapping of CD coordinates from a `s` coordinates, URL or URN string. + + Some example of the supported input strings are: + URL: "cd:/gem/rubygems/-/mocha/1.7.0" + URN: "urn:gem:rubygems:-:mocha:revision:1.7.0:tool:scancode:3.1.0" + plain: /gem/rubygems/foo/mocha/1.7.0" + """ + is_urn = s.startswith('urn') + is_url = s.startswith('cd:') + splitter = ':' if is_urn else '/' + segments = s.strip(splitter).split(splitter) + if is_urn or is_url: + segments = segments[1:] + # ignore extra segments for now beyond the 5 fisrt (such as the PR of a curation) + segments = segments[:5] + + fields = ('type', 'provider', 'namespace', 'name', 'revision',) + return dict(zip_longest(fields, segments)) + + +def coord2str(coord): + """ + Return a path-like from a `coord` CD coordinates mapping. + A non-present namespace is always represented as a dash (-) + + A mapping as these fields: + "type": "git", + "provider": "github", + "namespace": "nexb", + "name": "license-expression", + "revision": "70277cdfc186466667cb58ec9f9c7281e68a221b" + """ + assert coord, 'Empty or missing coordinate mapping: {}'.format(coord) + rev = coord.get('revision') + kwargs = dict( + t=coord['type'], + p=coord['provider'], + ns=coord.get('namespace') or '-', + n=coord['name'], + r=rev, + ) + if rev: + template = '{t}/{p}/{ns}/{n}/{r}' + else: + template = '{t}/{p}/{ns}/{n}' + return template.format(**kwargs) + diff --git a/clearcode/load.py b/clearcode/load.py new file mode 100644 index 00000000..a2889d63 --- /dev/null +++ b/clearcode/load.py @@ -0,0 +1,127 @@ +# -*- coding: utf-8 -*- +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# +# ClearCode is a free software tool from nexB Inc. and others. +# Visit https://github.com/nexB/clearcode-toolkit/ for support and download. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import multiprocessing +import os +from pathlib import Path +import sys + +from django.db.utils import IntegrityError + +import click + + +""" +Load ClearlyDefined definitions and harvests from the filesystem + +Operation +--------- +This script walks a given `--input-dir` location and loads any ClearlyDefined data +into a Database (currently postgreSQL). + +Usage +----- +$ clearload --input-dir ~/path/to/ClearlyDefined/dir +""" + + +def walk_and_load_from_filesystem(input_dir, cd_root_dir): + """ + Walk the given input_dir and load clearlydefined data into a Database. + A CD item on the filesystem looks like the following: + + ~/clearly-local/npm/npmjs/@actions/github/revision/2.1.1.json.gz + + The resulting CDitem should be: + + CDitem.path = npm/npmjs/@actions/github/revision/2.1.1.json.gz + CDitem.content = 'the file: 2.1.1.json.gz in bytes' + """ + + # for now, we count dirs too + file_counter = 1 + for root, dirs, files in os.walk(input_dir): + for filename in files: + # output some progress + print(' ', end='\r') + print("Processing file #{}".format(file_counter), end='\r') + file_counter +=1 + + # TODO: check if the location is actually a CD data item. + full_gzip_path = os.path.join(root, filename) + full_json_path = full_gzip_path.rstrip('.gz') + + # normalize the `path` value by removing the arbitrary parent directory + cditem_rel_path = os.path.relpath(full_json_path, cd_root_dir) + + with open(full_gzip_path, mode='rb') as f: + content = f.read() + + from clearcode import models + # Save to DB + try: + cditem = models.CDitem.objects.create(path=cditem_rel_path, content=content) + except IntegrityError: + # skip if we already have it in the DB + continue + + +def load(input_dir=None, cd_root_dir=None, *arg, **kwargs): + """ + Handle ClearlyDefined gzipped JSON scans by walking a clearsync directory structure, + creating CDItem objects and loading them into a PostgreSQL database. + """ + if not input_dir: + sys.exit('Please specify an input directory using the `--input-dir` option.') + if not cd_root_dir: + sys.exit('Please specify the cd-root-directory using the --cd-root-dir option.') + + # get proper DB setup + + walk_and_load_from_filesystem(input_dir, cd_root_dir) + print(' ', end='\r') + print("Loading complete") + + +@click.command() + +@click.option('--input-dir', + type=click.Path(), metavar='DIR', + help='Load content from this input directory that contains a tree of gzip-compressed JSON CD files') + +@click.option('--cd-root-dir', + type=click.Path(), metavar='DIR', + help='specify root directory that contains a tree of gzip-compressed JSON CD files') + +@click.help_option('-h', '--help') + +def cli(input_dir=None, cd_root_dir=None, *arg, **kwargs): + """ + Handle ClearlyDefined gzipped JSON scans by walking a clearsync directory structure, + creating CDItem objects and loading them into a PostgreSQL database. + """ + load( + input_dir=input_dir, + cd_root_dir=cd_root_dir, + *arg, + **kwargs + ) + + +if __name__ == '__main__': + cli() diff --git a/clearcode/management/commands/clearload.py b/clearcode/management/commands/clearload.py new file mode 100644 index 00000000..7f872dbb --- /dev/null +++ b/clearcode/management/commands/clearload.py @@ -0,0 +1,41 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# purldb is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/purldb for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +from clearcode.load import load +from minecode.management.commands import VerboseCommand + + +class Command(VerboseCommand): + help = """ + Handle ClearlyDefined gzipped JSON scans by walking a clearsync directory structure, + creating CDItem objects and loading them into a PostgreSQL database. + """ + + def add_arguments(self, parser): + parser.add_argument( + '--input-dir', + dest='input_dir', + default=None, + type=str, + help='Load content from this input directory that contains a tree of gzip-compressed JSON CD files') + parser.add_argument( + '--cd-root-dir', + dest='cd_root_dir', + default=None, + type=str, + help='Specify root directory that contains a tree of gzip-compressed JSON CD files') + + def handle(self, *args, **options): + input_dir = options.get('input_dir') + cd_root_dir = options.get('cd_root_dir') + + load( + input_dir=input_dir, + cd_root_dir=cd_root_dir + ) diff --git a/clearcode/management/commands/clearsync.py b/clearcode/management/commands/clearsync.py new file mode 100644 index 00000000..5481d8f5 --- /dev/null +++ b/clearcode/management/commands/clearsync.py @@ -0,0 +1,105 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# purldb is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/purldb for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +from clearcode.sync import sync +from minecode.management.commands import VerboseCommand + + +class Command(VerboseCommand): + help = """ + Fetch the latest definitions and harvests from ClearlyDefined and save these + as gzipped JSON either as as files in output-dir or in a PostgreSQL + database. Loop forever after waiting some seconds between each cycles. + """ + + def add_arguments(self, parser): + parser.add_argument( + '--output-dir', + dest='output_dir', + default=None, + type=str, + help='Save fetched content as compressed gzipped files to this output directory.') + parser.add_argument( + '--save-to-db', + dest='save_to_db', + action='store_true', + help='Save fetched content as compressed gzipped blobs in the configured database.') + parser.add_argument( + '--unsorted', + dest='unsorted', + action='store_true', + help='Fetch data without any sorting. The default is to fetch data sorting by latest updated first.') + parser.add_argument( + '--base-api-url', + dest='base_api_url', + default='https://api.clearlydefined.io', + help='ClearlyDefined base API URL.') + parser.add_argument( + '--wait', + dest='wait', + default=60, + type=int, + help='Set the number of seconds to wait for new or updated definitions ' + 'between two loops.') + parser.add_argument( + '-n', + '--processes', + dest='processes', + default=1, + type=int, + help='Set the number of parallel processes to use. ' + 'Disable parallel processing if 0.') + parser.add_argument( + '--max-def', + dest='max_def', + default=0, + type=int, + help='Set the maximum number of definitions to fetch.') + parser.add_argument( + '--only-definitions', + dest='only_definitions', + action='store_true', + help='Only fetch definitions and no other data item.') + parser.add_argument( + '--log-file', + dest='log_file', + default=None, + type=str, + help='Path to a file where to log fetched paths, one per line. ' + 'Log entries will be appended to this file if it exists.') + parser.add_argument( + '--verbose', + dest='verbose', + action='store_true', + help='Display more verbose progress messages.') + + def handle(self, *args, **options): + output_dir = options.get('output_dir') + save_to_db = options.get('save_to_db') + base_api_url = options.get('base_api_url') + wait = options.get('wait') + processes = options.get('processes') + unsorted = options.get('unsorted') + log_file = options.get('log_file') + max_def = options.get('max_def') + only_definitions = options.get('only_definitions') + verbose = options.get('verbose') + + sync( + output_dir=output_dir, + save_to_db=save_to_db, + base_api_url=base_api_url, + wait=wait, + processes=processes, + unsorted=unsorted, + log_file=log_file, + max_def=max_def, + only_definitions=only_definitions, + verbose=verbose + ) diff --git a/clearcode/migrations/0001_initial.py b/clearcode/migrations/0001_initial.py new file mode 100644 index 00000000..4fb7f4a5 --- /dev/null +++ b/clearcode/migrations/0001_initial.py @@ -0,0 +1,22 @@ +# Generated by Django 3.0.4 on 2020-03-06 18:14 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + initial = True + + dependencies = [ + ] + + operations = [ + migrations.CreateModel( + name='CDitem', + fields=[ + ('path', models.CharField(help_text='Path to the original file in the ClearlyDefined file storage.', max_length=2048, primary_key=True, serialize=False)), + ('content', models.BinaryField(help_text='Actual gzipped JSON content.')), + ('last_modified_date', models.DateTimeField(auto_now=True, help_text='Date and time that this record was last modified.')), + ], + ), + ] diff --git a/clearcode/migrations/0002_auto_20200331_1052.py b/clearcode/migrations/0002_auto_20200331_1052.py new file mode 100644 index 00000000..67411c62 --- /dev/null +++ b/clearcode/migrations/0002_auto_20200331_1052.py @@ -0,0 +1,23 @@ +# Generated by Django 3.0.4 on 2020-03-31 10:52 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('clearcode', '0001_initial'), + ] + + operations = [ + migrations.AddField( + model_name='cditem', + name='last_map_date', + field=models.DateTimeField(blank=True, db_index=True, help_text='Timestamp set to the date of the last mapping. Used to track mapping status.', null=True), + ), + migrations.AddField( + model_name='cditem', + name='map_error', + field=models.TextField(blank=True, help_text='Mapping errors messages. When present this means the mapping failed.', null=True), + ), + ] diff --git a/clearcode/migrations/0003_cditem_uuid.py b/clearcode/migrations/0003_cditem_uuid.py new file mode 100644 index 00000000..3da8737e --- /dev/null +++ b/clearcode/migrations/0003_cditem_uuid.py @@ -0,0 +1,34 @@ +# Generated by Django 3.0.5 on 2020-05-04 18:33 + +from django.db import migrations, models +import uuid + + +def create_uuid(apps, schema_editor): + CDitem = apps.get_model('clearcode', 'CDitem') + # Use `iterator()` to prevent queryset from cacheing. + # See: https://docs.djangoproject.com/en/3.0/ref/models/querysets/#iterator + for item in CDitem.objects.iterator(): + item.uuid = uuid.uuid4() + item.save(update_fields=['uuid']) + + +class Migration(migrations.Migration): + + dependencies = [ + ('clearcode', '0002_auto_20200331_1052'), + ] + + operations = [ + migrations.AddField( + model_name='cditem', + name='uuid', + field=models.UUIDField(blank=True, null=True) + ), + migrations.RunPython(create_uuid), + migrations.AlterField( + model_name='cditem', + name='uuid', + field=models.UUIDField(default=uuid.uuid4, unique=True, editable=False) + ) + ] diff --git a/packagedb/src/packagedb/migrations/__init__.py b/clearcode/migrations/__init__.py similarity index 100% rename from packagedb/src/packagedb/migrations/__init__.py rename to clearcode/migrations/__init__.py diff --git a/clearcode/models.py b/clearcode/models.py new file mode 100644 index 00000000..0b96f2ee --- /dev/null +++ b/clearcode/models.py @@ -0,0 +1,139 @@ +# -*- coding: utf-8 -*- +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# +# ClearCode is a free software tool from nexB Inc. and others. +# Visit https://github.com/nexB/clearcode-toolkit/ for support and download. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import gzip +import json +import uuid + +from django.db import models + + +class VirtualFileStore: + """ + Convenience wrapper to access CDitems as if they would be concrete files. + """ + @classmethod + def walk(self, prefix=None, since=None): + """ + Yield tuples of (path, data, last modified) for CD items. + Optionally return items that have a certain path prefix. + Optionally return items that have have been modified `since` a datetime. + """ + items = CDitem.objects.all() + if prefix: + items = items.filter(path__startswith=prefix) + if since: + items = items.filter(last_modified_date__ge=since) + for item in items: + yield item.path, item.data_content(), item.last_modified_date + + +class CDitemQuerySet(models.QuerySet): + def known_package_types(self): + # These are the Package types that can be stored in the PackageDB + KNOWN_PACKAGE_TYPES = [ + 'composer', + 'crate', + 'deb', + 'debsrc', + 'gem', + 'git', + 'maven', + 'npm', + 'nuget', + 'pypi', + 'sourcearchive', + ] + q_objs = models.Q() + for package_type in KNOWN_PACKAGE_TYPES: + q_objs.add(models.Q(path__startswith=package_type), models.Q.OR) + return self.filter(q_objs) + + def definitions(self): + return self.exclude(path__contains='/tool/') + + def scancode_harvests(self): + return self.filter(path__contains='tool/scancode') + + def mappable(self): + return self.filter(last_map_date__isnull=True, map_error__isnull=True) + + def mappable_definitions(self): + return self.mappable().definitions().known_package_types() + + def mappable_scancode_harvests(self): + return self.mappable().scancode_harvests().known_package_types() + + def modified_after(self, date): + """ + Limit the QuerySet to CDitems that were modified after a given `date`. + """ + return self.filter(last_modified_date__gt=date) + + +class CDitem(models.Model): + """ + A simple key/value pair model where the key is the path to a JSON file as + stored in ClearlyDefined blob storage and the value is a GZipped compressed + JSON file content, stored as a binary bytes blob. + """ + path = models.CharField(primary_key=True, max_length=2048, + help_text='Path to the original file in the ClearlyDefined file storage.' + ) + + uuid = models.UUIDField( + default=uuid.uuid4, + unique=True, + editable=False, + ) + + content = models.BinaryField( + help_text='Actual gzipped JSON content.' + ) + + last_modified_date = models.DateTimeField( + help_text='Date and time that this record was last modified.', + auto_now=True, # Automatically set to now on object save() + ) + + last_map_date = models.DateTimeField( + null=True, + blank=True, + db_index=True, + help_text='Timestamp set to the date of the last mapping. ' + 'Used to track mapping status.', + ) + + map_error = models.TextField( + null=True, + blank=True, + help_text='Mapping errors messages. When present this means the mapping failed.', + ) + + objects = CDitemQuerySet.as_manager() + + @property + def data(self): + """ + Return the data content deserialized from the content field. + """ + uncompressed_content = gzip.decompress(self.content) + if not uncompressed_content: + uncompressed_content = '{}' + return json.loads(uncompressed_content) diff --git a/clearcode/sync.py b/clearcode/sync.py new file mode 100644 index 00000000..6b39d526 --- /dev/null +++ b/clearcode/sync.py @@ -0,0 +1,604 @@ +# -*- coding: utf-8 -*- +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# +# ClearCode is a free software tool from nexB Inc. and others. +# Visit https://github.com/nexB/clearcode-toolkit/ for support and download. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from datetime import datetime +import gzip +import json +from multiprocessing import pool +import os +from os import path +import time + +import click +from django.utils import timezone +import requests + +from clearcode import cdutils + + +""" +Fetch the latest definitions and harvests from ClearlyDefined + +Theory of operation +------------------- + +We can access on a regular basis a limited subset of the most recently updated +definitions, by batches of 100 definitions using this query: + https://api.clearlydefined.io/definitions?matchCasing=false&sort=releaseDate&sortDesc=true + +We can also focus this on a type as in: + https://api.clearlydefined.io/definitions?matchCasing=false&sort=releaseDate&sortDesc=true&type=git + +This provides incomplete definitions (they do not contain files). + +From there we can fetch: + - each definition (or possibly many as a batch at once) + - each harvest either by batching all harvest tools at once or fetching them + one by one. + - any referenced attachment + +Since the definition batches are roughly stopping after 2000 when sorted by +latest date, we can repeat this every few minutes or so forever to catch any +update. We use etags and a cache to avoid refetching things that have not +changed. +""" + +TRACE = False + + +# TODO: update when this is updated upstream +# https://github.com/clearlydefined/service/blob/master/schemas/definition-1.0.json#L17 +known_types = ( + # fake empty type + None, + 'npm', + 'git', + 'pypi', + 'composer', + 'maven', + 'gem', + 'nuget', + 'sourcearchive', + 'deb', + 'debsrc', + 'crate', + 'pod', +) + + +# each process gets its own session +session = requests.Session() + + +def fetch_and_save_latest_definitions( + base_api_url, cache, output_dir=None, save_to_db=False, + by_latest=True, retries=2, verbose=True): + """ + Fetch ClearlyDefined definitions and paginate through. Save these as blobs + to data_dir. + + Fetch the most recently updated definitions first if `by_latest` is True. + Otherwise, the order is not specified. + NOTE: these do not contain file details (but the harvest do) + """ + assert output_dir or save_to_db, 'You must select one of the --output-dir or --save-to-db options.' + + definitions_url = cdutils.append_path_to_url(base_api_url, extra_path='definitions') + if by_latest: + definitions_url = cdutils.update_url(definitions_url, qs_mapping=dict(sort='releaseDate', sortDesc='true')) + + for content in fetch_definitions(api_url=definitions_url, cache=cache, retries=retries, verbose=TRACE): + # content is a batch of 100 definitions + definitions = content and content.get('data') + if not definitions: + if verbose: + print(' No more data for: {}'.format(definitions_url)) + break + + if verbose: + first = cdutils.coord2str(definitions[0]['coordinates']) + last = cdutils.coord2str(definitions[-1]['coordinates']) + print('Fetched definitions from :', first, 'to:', last, flush=True) + else: + print('.', end='', flush=True) + + savers = [] + if save_to_db: + savers.append(db_saver) + if output_dir: + savers.append(file_saver) + + # we received a batch of definitions: let's save each as a Gzipped JSON + for definition in definitions: + coordinate = cdutils.Coordinate.from_dict(definition['coordinates']) + for saver in savers: + blob_path, _size = save_def( + coordinate=coordinate, content=definition, output_dir=output_dir, + saver=saver) + yield coordinate, blob_path + + +def fetch_definitions(api_url, cache, retries=1, verbose=True): + """ + Yield batches of definitions each as a list of mappings from calling the + ClearlyDefined API at `api_url`. Retry on failure up to `retries` times. + Raise an exception on failure. Raise an EmptyReponse on success but empty + response (a CD API quirk). + + Paginate using the API's `continuationToken`. If provided as a start, this + token should be in the initial URL query string. + + The structure of the REST payload is a list : + {"data": [{}, ...], "continuationToken": ""} + """ + assert '/definitions' in api_url + content = None + errors_count = 0 + max_errors = 5 + while True: + try: + content = cache.get_content(api_url, retries=retries, session=session) + if not content: + break + content = json.loads(content) + + except requests.exceptions.ConnectionError as ex: + print('!!!!!!!!!!!!!!!!!! -> Request failed, retrying:', api_url, 'with:', ex) + errors_count += 1 + if errors_count <= max_errors: + # wait and retry, sleeping more each time we egt some error + time.sleep(errors_count * 3) + continue + else: + raise + + continuation_token = '' + if content: + yield content + continuation_token = content.get('continuationToken', '') + + if not continuation_token: + if verbose: + print(' No more data for: {}'.format(api_url)) + break + + api_url = cdutils.build_cdapi_continuation_url(api_url, continuation_token) + + +def compress(content): + """ + Return a byte string of `content` gzipped-compressed. + `content` is eiher a string or a JSON-serializable data structure. + """ + if isinstance(content, str): + content = content.encode('utf-8') + else: + content = json.dumps(content , separators=(',', ':')).encode('utf-8') + return gzip.compress(content, compresslevel=9) + + +def file_saver(content, blob_path, output_dir, **kwargs): + """ + Save `content` bytes (or dict or string) as gzip compressed bytes to `file_path`. + Return the length of the written payload or 0 if it existed and was not updated. + """ + file_path = path.join(output_dir, blob_path + '.gz') + compressed = compress(content) + + if path.exists(file_path): + with open(file_path , 'rb') as ef: + existing = ef.read() + if existing == compressed: + return 0 + else: + parent_dir = path.dirname(file_path) + os.makedirs(parent_dir, exist_ok=True) + + with open(file_path , 'wb') as oi: + if TRACE: + print('Saving:', blob_path) + oi.write(compressed) + return len(compressed) + + +def db_saver(content, blob_path, **kwargs): + """ + Save `content` bytes (or dict or string) identified by `file_path` to the + configured DB. Return the length of the written payload or 0 if it existed + and was not update. + """ + from clearcode import models + + compressed = compress(content) + + cditem, created = models.CDitem.objects.get_or_create(path=blob_path) + if not created: + if cditem.content != compressed and cditem.last_modified_date < timezone.now(): + cditem.content = compressed + cditem.save() + if TRACE: + print('Updating content for:', blob_path) + else: + return 0 + else: + if TRACE: + print('Adding content for:', blob_path) + + return len(compressed) + + +def save_def(coordinate, content, output_dir, saver=file_saver): + """ + Save the definition `content` bytes (or dict or string) for `coordinate` + object to `output_dir` using blob paths conventions. + + Return a tuple of the ( saved file path, length of the written payload). + """ + blob_path = coordinate.to_def_blob_path() + return blob_path, saver(content=content, output_dir=output_dir, blob_path=blob_path) + + +def save_harvest( + coordinate, tool, tool_version, content, output_dir, saver=file_saver): + """ + Save the scan `content` bytes (or dict or string) for `tool` `tool_version` + of `coordinate` object to `output_dir` using blob paths conventions. + + Return a tuple of the ( saved file path, length of the written payload). + """ + blob_path = coordinate.to_harvest_blob_path(tool, tool_version) + return blob_path, saver(content=content, output_dir=output_dir, blob_path=blob_path) + + +def fetch_and_save_harvests( + coordinate, cache, output_dir=None, save_to_db=False, retries=2, + session=session, verbose=True): + """ + Fetch all the harvests for `coordinate` Coordinate object and save them in + `outputdir` using blob-style paths, one file for each harvest/scan. + + (Note: Return a tuple of (etag, md5, url) for usage as a callback) + """ + assert output_dir or save_to_db, 'You must select one of the --output-dir or --save-to-db options.' + + url = coordinate.get_harvests_api_url() + etag, checksum, content = cache.get_content( + url, retries=retries, session=session, with_cache_keys=True) + + if content: + savers = [] + if save_to_db: + savers.append(db_saver) + if output_dir: + savers.append(file_saver) + + if verbose: + print(' Fetched harvest for:', coordinate.to_api_path(), flush=True) + else: + print('.', end='', flush=True) + + for tool, versions in json.loads(content).items(): + for tool_version, harvest in versions.items(): + for saver in savers: + save_harvest( + coordinate=coordinate, + tool=tool, + tool_version=tool_version, + content=harvest, + output_dir=output_dir, + saver=saver) + + return etag, checksum, url + + +class Cache(object): + """ + A caching object for etags and checksums to avoid refetching things. + """ + + def __init__(self, max_size=100 * 1000): + self.etags_cache = {} + self.checksums_cache = {} + self.max_size = max_size + + def is_unchanged_remotely(self, url, session=session): + """ + Return True if a `url` content is unchanged from cache based on HTTP + HEADER Etag. + """ + try: + response = session.head(url) + remote_etag = response.headers.get('etag') + if remote_etag and self.etags_cache.get(url) == remote_etag: + return True + except: + return False + + def is_fetched(self, checksum, url): + """ + Return True if the content checksum exists for url, using MD5 checksum. + """ + return url and checksum and self.checksums_cache.get(checksum) == url + + def add(self, etag, checksum, url): + if etag: + self.etags_cache[url] = etag + if checksum: + self.checksums_cache[checksum] = url + + def add_args(self, args): + self.add(*args) + + def trim(self): + """ + Trim the cache to its max size. + """ + + def _resize(cache): + extra_items = len(cache) - self.max_size + if extra_items > 0: + for ei in list(cache)[:extra_items]: + del cache[ei] + + _resize(self.etags_cache) + _resize(self.checksums_cache) + + def get_content(self, url, retries=1, session=session, with_cache_keys=False): + """ + Return fetched content as bytes or None if already fetched or unchanged. + Updates the cache as needed. + """ + if self.is_unchanged_remotely(url=url, session=session): + return + + etag, checksum, content = cdutils.get_response_content( + url, retries=retries, session=session) + + if not content: + return + + if self.is_fetched(checksum, url): + return + + self.add(etag, checksum, url) + + if with_cache_keys: + return etag, checksum, content + else: + return content + + def copy(self): + """ + Return a deep copy of self + """ + cache = Cache(self.max_size) + cache.checksums_cache = dict(self.checksums_cache) + cache.etags_cache = dict(self.etags_cache) + return cache + + +def sync(output_dir=None, save_to_db=False, + base_api_url='https://api.clearlydefined.io', + wait=60, processes=1, unsorted=False, + log_file=None, max_def=0, only_definitions=False, session=session, + verbose=False, *arg, **kwargs): + """ + Fetch the latest definitions and harvests from ClearlyDefined and save these + as gzipped JSON either as as files in output-dir or in a PostgreSQL + database. Loop forever after waiting some seconds between each cycles. + """ + assert output_dir or save_to_db, 'You must select at least one of the --output-dir or --save-to-db options.' + + fetch_harvests = not only_definitions + + cycles = 0 + total_defs_count = 0 + total_duration = 0 + + coordinate = None + file_path = None + + cache = Cache(max_size=100 * 1000) + + sleeping = False + harvest_fetchers = None + + log_file_fn = None + if log_file: + log_file_fn = open(log_file, 'a') + + try: + if fetch_harvests: + harvest_fetchers = pool.Pool(processes=processes, maxtasksperchild=10000) + + # loop forever. Complete one loop once we have fetched all the latest + # items and we are not getting new pages (based on etag) + # Sleep between each loop + while True: + start = time.time() + cycles += 1 + cycle_defs_count = 0 + + # iterate all types to get more depth for the latest defs. + for def_type in known_types: + sleeping = False + + if def_type: + # get latest with a "type" query + def_api_url = cdutils.update_url(base_api_url, qs_mapping=dict(type=def_type)) + else: + # do nothing if we have no type + def_api_url = base_api_url + + definitions = fetch_and_save_latest_definitions( + base_api_url=def_api_url, + output_dir=output_dir, + save_to_db=save_to_db, + cache=cache, + by_latest=not unsorted, + verbose=verbose) + + for coordinate, file_path in definitions: + + cycle_defs_count += 1 + + if log_file: + log_file_fn.write(file_path.partition('.gz')[0] + '\n') + + if TRACE: print(' Saved def for:', coordinate) + + if fetch_harvests: + kwds = dict( + coordinate=coordinate, + output_dir=output_dir, + save_to_db=save_to_db, + # that's a copy of the cache, since we are in some + # subprocess, the data is best not shared to avoid + # any sync issue + cache=cache.copy(), + verbose=verbose) + + harvest_fetchers.apply_async( + fetch_and_save_harvests, + kwds=kwds, + callback=cache.add_args) + + if max_def and max_def <= cycle_defs_count: + break + + if max_def and (max_def <= cycle_defs_count or max_def <= total_defs_count): + break + + total_defs_count += cycle_defs_count + cycle_duration = time.time() - start + total_duration += cycle_duration + + if not sleeping: + print('Saved', cycle_defs_count, 'defs and harvests,', + 'in:', int(cycle_duration), 'sec.') + + print('TOTAL cycles:', cycles, + 'with:', total_defs_count, 'defs and combined harvests,', + 'in:', int(total_duration), 'sec.') + + print('Cycle completed at:', datetime.utcnow().isoformat(), + 'Sleeping for', wait, 'seconds...') + else: + print('.', end='') + + sleeping = True + time.sleep(wait) + cache.trim() + + except KeyboardInterrupt: + click.secho('\nAborted with Ctrl+C!', fg='red', err=True) + return + + finally: + if log_file: + log_file_fn.close() + + if harvest_fetchers: + harvest_fetchers.close() + harvest_fetchers.terminate() + + print('TOTAL cycles:', cycles, + 'with:', total_defs_count, 'defs and combined harvests,', + 'in:', int(total_duration), 'sec.') + + +@click.command() + +@click.option('--output-dir', + type=click.Path(), metavar='DIR', + help='Save fetched content as compressed gzipped files to this output directory.') + +@click.option('--save-to-db', + is_flag=True, + help='Save fetched content as compressed gzipped blobs in the configured database.') + +@click.option('--unsorted', + is_flag=True, + help='Fetch data without any sorting. The default is to fetch data sorting by latest updated first.') + +@click.option('--base-api-url', + type=str, + default='https://api.clearlydefined.io', show_default=True, + help='ClearlyDefined base API URL.') + +@click.option('--wait', + type=int, metavar='INT', + default=60, show_default=True, + help='Set the number of seconds to wait for new or updated definitions ' + 'between two loops.') + +@click.option('-n', '--processes', + type=int, metavar='INT', + default=1, show_default=True, + help='Set the number of parallel processes to use. ' + 'Disable parallel processing if 0.') + +@click.option('--max-def', + type=int, metavar='INT', + default=0, + help='Set the maximum number of definitions to fetch.') + +@click.option('--only-definitions', + is_flag=True, + help='Only fetch definitions and no other data item.') + +@click.option('--log-file', + type=click.Path(), default=None, + help='Path to a file where to log fetched paths, one per line. ' + 'Log entries will be appended to this file if it exists.') + +@click.option('--verbose', + is_flag=True, + help='Display more verbose progress messages.') + +@click.help_option('-h', '--help') +def cli(output_dir=None, save_to_db=False, + base_api_url='https://api.clearlydefined.io', + wait=60, processes=1, unsorted=False, + log_file=None, max_def=0, only_definitions=False, session=session, + verbose=False, *arg, **kwargs): + """ + Fetch the latest definitions and harvests from ClearlyDefined and save these + as gzipped JSON either as as files in output-dir or in a PostgreSQL + database. Loop forever after waiting some seconds between each cycles. + """ + sync( + output_dir=output_dir, + save_to_db=save_to_db, + base_api_url=base_api_url, + wait=wait, + processes=processes, + unsorted=unsorted, + log_file=log_file, + max_def=max_def, + only_definitions=only_definitions, + session=session, + verbose=verbose, + *arg, + **kwargs, + ) + + +if __name__ == '__main__': + cli() diff --git a/minecode/tests/discovery/testfiles/command/bar b/clearcode/tests/__init__.py similarity index 100% rename from minecode/tests/discovery/testfiles/command/bar rename to clearcode/tests/__init__.py diff --git a/clearcode/tests/test_api.py b/clearcode/tests/test_api.py new file mode 100644 index 00000000..76f3feba --- /dev/null +++ b/clearcode/tests/test_api.py @@ -0,0 +1,126 @@ +# -*- coding: utf-8 -*- +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# +# ClearCode is a free software tool from nexB Inc. and others. +# Visit https://github.com/nexB/clearcode-toolkit/ for support and download. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import base64 +import datetime +import gzip +import json + +from django.test import TestCase +from django.utils import timezone +from rest_framework import status +from rest_framework.test import APIClient + +from clearcode import api +from clearcode.models import CDitem + + +class CDitemSerializerTestCase(TestCase): + + def setUp(self): + self.cditem_attributes = { + 'path': 'test/path/file.json', + 'content': gzip.compress(json.dumps({'test': 'content'}).encode('utf-8')) + } + self.cditem = CDitem.objects.create(**self.cditem_attributes) + self.serializer = api.CDitemSerializer(instance=self.cditem) + self.data = self.serializer.data + + def test_contains_expected_fields(self): + self.assertCountEqual(self.data.keys(), ['path', 'uuid', 'content', 'last_modified_date', 'last_map_date', 'map_error']) + + def test_path_field_content(self): + self.assertEqual(self.data['path'], self.cditem_attributes['path']) + + def test_content_field_content(self): + decoded_test_data = base64.b64decode(self.data['content']) + self.assertEqual(decoded_test_data, self.cditem_attributes['content']) + self.assertEqual(json.loads(gzip.decompress(decoded_test_data)), {'test': 'content'}) + + def test_last_map_date_field_content(self): + self.assertIsNone(self.data['last_map_date']) + + def test_map_error_field_content(self): + self.assertIsNone(self.data['map_error']) + + +class CDitemAPITestCase(TestCase): + + def setUp(self): + self.client = APIClient() + self.test_path = 'test/path/file.json' + + self.post_test_path = 'test/post/path/file.json' + + self.test_data = {'test': 'content'} + self.test_content = gzip.compress(json.dumps(self.test_data).encode('utf-8')) + + self.cditem = CDitem.objects.create(path=self.test_path) + self.uuid = self.cditem.uuid + + def test_api_cditems_get(self): + response = self.client.get('/api/cditems/{}/'.format(self.uuid)) + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertEqual(response.data.get('path'), self.test_path) + self.assertEqual(response.data.get('uuid'), str(self.uuid)) + + def test_api_cditems_get_list(self): + response = self.client.get('/api/cditems/') + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertEqual(1, response.data.get('count')) + + def test_api_cditems_get_list_by_last_modified_date_old_date(self): + test_date = datetime.datetime.now() - datetime.timedelta(days=1) + test_date_string = '{}-{}-{}'.format(test_date.year, test_date.month, test_date.day) + + response = self.client.get('/api/cditems/?last_modified_date={}'.format(test_date_string)) + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertEqual(1, response.data.get('count')) + + def test_api_cditems_get_list_by_last_modified_date_future(self): + test_date = datetime.datetime.now() + datetime.timedelta(days=1) + test_date_string = '{}-{}-{}'.format(test_date.year, test_date.month, test_date.day) + + response = self.client.get('/api/cditems/?last_modified_date={}'.format(test_date_string)) + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertEqual(0, response.data.get('count')) + + def test_api_cditems_put(self): + test_payload = { + 'path': self.test_path, + 'content': base64.b64encode(self.test_content).decode('utf-8') + } + + response = self.client.put('/api/cditems/{}/'.format(self.uuid), test_payload) + self.assertEqual(response.status_code, status.HTTP_200_OK) + + cditem = CDitem.objects.get(path=self.test_path) + self.assertEqual(cditem.data, self.test_data) + + def test_api_cditems_post(self): + test_payload = { + 'path': self.post_test_path, + 'content': base64.b64encode(self.test_content).decode('utf-8') + } + + response = self.client.post('/api/cditems/', test_payload) + self.assertEqual(response.status_code, status.HTTP_201_CREATED) + self.assertEqual(response.data.get('path'), self.post_test_path) + + cditem = CDitem.objects.get(path=self.post_test_path) + self.assertEqual(cditem.data, self.test_data) diff --git a/clearcode/tests/test_models.py b/clearcode/tests/test_models.py new file mode 100644 index 00000000..541ac8bf --- /dev/null +++ b/clearcode/tests/test_models.py @@ -0,0 +1,104 @@ +# +# Copyright (c) 2020 by nexB, Inc. http://www.nexb.com/ - All rights reserved. +# +import datetime + +from django.test import TestCase +from django.utils import timezone + +from clearcode.models import CDitem + + +class CDitemManagerModifiedAfterTestCase(TestCase): + + def setUp(self): + self.cditem0 = CDitem.objects.create(path='npm/name/version') + + def test_modified_after_1_day_old(self): + test_date = datetime.datetime.now() - datetime.timedelta(days=1) + self.assertIsNotNone(CDitem.objects.modified_after(test_date)) + self.assertEqual(1, len(CDitem.objects.modified_after(test_date))) + + def test_modified_after_1_week_old(self): + test_date = datetime.datetime.now() - datetime.timedelta(days=7) + self.assertIsNotNone(CDitem.objects.modified_after(test_date)) + self.assertEqual(1, len(CDitem.objects.modified_after(test_date))) + + def test_modified_after_1_day_new(self): + test_date = datetime.datetime.now() + datetime.timedelta(days=1) + self.assertIsNotNone(CDitem.objects.modified_after(test_date)) + self.assertEqual(0, len(CDitem.objects.modified_after(test_date))) + + def test_modified_after_1_week_new(self): + test_date = datetime.datetime.now() + datetime.timedelta(days=7) + self.assertIsNotNone(CDitem.objects.modified_after(test_date)) + self.assertEqual(0, len(CDitem.objects.modified_after(test_date))) + + +class CDitemManagerTestCase(TestCase): + def test_known_package_types(self): + # This path starts with npm, which is known + cditem_1 = CDitem.objects.create(path='npm/name/version') + # asdf is not a proper type + cditem_2 = CDitem.objects.create(path='asdf/name/version') + cditems = list(CDitem.objects.known_package_types()) + self.assertEqual(1, len(cditems)) + cditem = cditems[0] + self.assertEqual(cditem_1, cditem) + + def test_definitions(self): + expected_definition = CDitem.objects.create(path='composer/packagist/yoast/wordpress-seo/revision/9.5-RC3.json') + # harvest should not be in cditems + harvest = CDitem.objects.create(path='sourcearchive/mavencentral/io.nats/jnats/revision/2.6.6/tool/scancode/3.2.2.json') + cditems = list(CDitem.objects.definitions()) + self.assertEqual(1, len(cditems)) + definition = cditems[0] + self.assertEqual(expected_definition, definition) + + def test_scancode_harvests(self): + expected_harvest = CDitem.objects.create(path='sourcearchive/mavencentral/io.nats/jnats/revision/2.6.6/tool/scancode/3.2.2.json') + # unexpected_harvest should not be in cditems + unexpected_harvest = CDitem.objects.create(path='sourcearchive/mavencentral/io.nats/jnats/revision/2.6.6/tool/licensee/9.13.0.json') + harvests = list(CDitem.objects.scancode_harvests()) + self.assertEqual(1, len(harvests)) + harvest = harvests[0] + self.assertEqual(expected_harvest, harvest) + + def test_mappable(self): + definition_1 = CDitem.objects.create(path='sourcearchive/mavencentral/io.nats/jnats/revision/2.6.6.json') + definition_2 = CDitem.objects.create( + path='sourcearchive/mavencentral/io.quarkus/quarkus-jsonb/revision/0.26.1.json', + last_map_date=timezone.now(), + map_error='error' + ) + harvest = CDitem.objects.create(path='sourcearchive/mavencentral/io.nats/jnats/revision/2.6.6/tool/scancode/3.2.2.json') + mappables = list(CDitem.objects.mappable()) + self.assertEqual(2, len(mappables)) + self.assertIn(definition_1, mappables) + self.assertIn(harvest, mappables) + + def test_mappable_definitions(self): + definition_1 = CDitem.objects.create(path='sourcearchive/mavencentral/io.nats/jnats/revision/2.6.6.json') + definition_2 = CDitem.objects.create( + path='sourcearchive/mavencentral/io.quarkus/quarkus-jsonb/revision/0.26.1.json', + last_map_date=timezone.now(), + map_error='error' + ) + harvest = CDitem.objects.create(path='sourcearchive/mavencentral/io.nats/jnats/revision/2.6.6/tool/scancode/3.2.2.json') + mappables = list(CDitem.objects.mappable_definitions()) + self.assertEqual(1, len(mappables)) + definition = mappables[0] + self.assertEqual(definition_1, definition) + + def test_mappable_scancode_harvests(self): + harvest_1 = CDitem.objects.create(path='sourcearchive/mavencentral/io.nats/jnats/revision/2.6.6/tool/scancode/3.2.2.json') + harvest_2 = CDitem.objects.create( + path='sourcearchive/mavencentral/io.cucumber/cucumber-core/revision/5.0.0-RC1/tool/scancode/3.2.2.json', + last_map_date=timezone.now(), + map_error='error' + ) + definition_1 = CDitem.objects.create(path='sourcearchive/mavencentral/io.nats/jnats/revision/2.6.6.json') + mappables = list(CDitem.objects.mappable_scancode_harvests()) + self.assertEqual(1, len(mappables)) + harvest = mappables[0] + self.assertEqual(harvest_1, harvest) diff --git a/clearcode/tests/test_sync.py b/clearcode/tests/test_sync.py new file mode 100644 index 00000000..83bb7ce8 --- /dev/null +++ b/clearcode/tests/test_sync.py @@ -0,0 +1,46 @@ +# -*- coding: utf-8 -*- +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# +# ClearCode is a free software tool from nexB Inc. and others. +# Visit https://github.com/nexB/clearcode-toolkit/ for support and download. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import gzip +import json + +from django.test import TestCase +from django.utils import timezone + +from clearcode.models import CDitem +from clearcode.sync import db_saver + + +class SyncDbsaverTestCase(TestCase): + def setUp(self): + self.test_path = 'composer/packagist/yoast/wordpress-seo/revision/9.5-RC3.json' + self.test_content = {'test': 'content'} + + self.cditem0 = CDitem.objects.create( + path=self.test_path, + content=gzip.compress(json.dumps(self.test_content).encode('utf-8')), + ) + + def test_db_saver_identical_path(self): + db_saver(content=self.test_content, blob_path=self.test_path) + self.assertEqual(1, len(CDitem.objects.all())) + + def test_db_saver_different_path(self): + db_saver(content=self.test_content, blob_path='new/blob/path.json') + self.assertEqual(2, len(CDitem.objects.all())) diff --git a/minecode/tests/discovery/testfiles/rsync/rsync_dir/bar/that/baz b/clearindex/__init__.py similarity index 100% rename from minecode/tests/discovery/testfiles/rsync/rsync_dir/bar/that/baz rename to clearindex/__init__.py diff --git a/clearindex/harvest.py b/clearindex/harvest.py new file mode 100644 index 00000000..0a7fdcf3 --- /dev/null +++ b/clearindex/harvest.py @@ -0,0 +1,215 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# purldb is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/purldb for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import logging +import sys + +from django.db import transaction +from django.utils import timezone + +from packagedb.models import Package +from packagedb.models import Resource + +from minecode.management.commands.run_map import merge_packages +from minecode.utils import stringify_null_purl_fields + + +logger = logging.getLogger(__name__) +logging.basicConfig(stream=sys.stdout) +logger.setLevel(logging.INFO) + + +def get_resource_license_expressions(file_data): + """ + Return a string that contains all the license_expression statements (deduped), + with a newline separating each or None if there are no license_expression statements + in the scan data. + """ + license_expressions = file_data.get('license_expressions', []) or [] + if license_expressions == []: + return + + expressions = set(list(expression for expression in license_expressions)) + + return '\n'.join(expressions) + + +def get_resource_copyright_statements(file_data): + """ + Return a string that contains all the copyright statements (deduped), with a newline + separating each or None if there are no copyright statements in the scan data. + """ + copyrights = file_data.get('copyrights', []) or [] + if copyrights == []: + return + + statements = set(list(copyright.get('value') for copyright in copyrights)) + + return '\n'.join(statements) + + +def create_from_harvest(package_scan={}, files_data=[], cditem_path=''): + """ + Return a Package object, created or updated via a ScanCode-Toolkit "package" scan. + """ + fields = ( + 'type', + 'namespace', + 'name', + 'version', + 'qualifiers', + 'subpath', + 'primary_language', + 'description', + 'keywords', + 'homepage_url', + 'download_url', + 'size', + 'sha1', + 'md5', + 'sha256', + 'sha512', + 'bug_tracking_url', + 'code_view_url', + 'vcs_url', + 'copyright', + 'license_expression', + 'declared_license', + 'notice_text', + 'source_packages', + ) + + package_data = {field_name: package_scan.get(field_name) for field_name in fields} + + stringify_null_purl_fields(package_data) + + pkg_type = package_data.get('type') + namespace = package_data.get('namespace') + name = package_data.get('name') + version = package_data.get('version') + qualifiers = package_data.get('qualifiers') + subpath = package_data.get('subpath') + + download_url = package_data.get('download_url') + if not download_url: + logger.error('Null `download_url` value for `package_data`: {}'.format(package_data)) + return + + # This ugly block is needed until https://github.com/nexB/packagedb/issues/14 + # is complete. + try: + package = Package.objects.get( + type=pkg_type, + namespace=namespace, + name=name, + version=version, + qualifiers=qualifiers, + subpath=subpath, + download_url=download_url + ) + # Merge package records if it already exists + merge_packages( + existing_package=package, + new_package_data=package_data, + replace=False + ) + package.append_to_history('Updated package from CDitem harvest: {}'.format(cditem_path)) + + logger.info('Merged package data from scancode harvest: {}'.format(package)) + + except Package.DoesNotExist: + try: + package = Package.objects.get(download_url=download_url) + # Merge package records if it already exists + merge_packages( + existing_package=package, + new_package_data=package_data, + replace=False + ) + package.append_to_history('Updated package from CDitem harvest: {}'.format(cditem_path)) + + logger.info('Merged package data from scancode harvest: {}'.format(package)) + + except Package.DoesNotExist: + package = Package.objects.create(**package_data) + package.append_to_history('Created package from CDitem harvest: {}'.format(cditem_path)) + + logger.info('Created package from scancode harvest: {}'.format(package)) + + # Now, add resources to the Package. + for f in files_data: + path = f.get('path') + is_file = f.get('type', '') == 'file' + copyright = get_resource_copyright_statements(f) + license_expression = get_resource_license_expressions(f) + file_data = dict( + package=package, + path=path, + size=f.get('size'), + sha1=f.get('sha1'), + md5=f.get('md5'), + sha256=f.get('sha256'), + git_sha1=f.get('git_sha1'), + is_file=is_file, + copyright=copyright, + license_expression=license_expression, + ) + + # Ensure there will be no `path` collision + try: + Resource.objects.get(package=package, path=path) + except Resource.DoesNotExist: + Resource.objects.create(**file_data) + + return package + + +def map_scancode_harvest(cditem): + """ + Return the number of created or merged Packages from a scancode harvest and create + its Resources. + """ + with transaction.atomic(): + try: + harvest_data = cditem.data + except ValueError: + err_msg = 'CDitemError: empty content field for CDitem: {}'.format(cditem.path) + logger.error(err_msg) + + cditem.map_error = err_msg + cditem.save() + return 0 + + content = harvest_data.get('content', {}) or {} + files_data = content.get('files', []) or [] + summary = content.get('summary', {}) or {} + packages = summary.get('packages', []) or [] + + for package_scan in packages: + # Check if there is a valid download url. Missing download_url values are + # considered map_errors, as a Package object cannot have a `Null` + # download_url value. + download_url = package_scan.get('download_url') + if not download_url: + purl = package_scan.get('purl') + err_msg = 'CDitemError: empty download_url for package: {}'.format(purl) + logger.error(err_msg) + + cditem.map_error = err_msg + cditem.save() + continue + + # Package + Resource creation + # pass the `path` of the CDitem for logging purposes + create_from_harvest(package_scan, files_data, cditem.path) + + cditem.last_map_date = timezone.now() + cditem.save() + + return len(packages) diff --git a/minecode/tests/discovery/testfiles/rsync/rsync_dir/bar/this b/clearindex/management/__init__.py similarity index 100% rename from minecode/tests/discovery/testfiles/rsync/rsync_dir/bar/this rename to clearindex/management/__init__.py diff --git a/minecode/tests/discovery/testfiles/rsync/rsync_dir/foo b/clearindex/management/commands/__init__.py similarity index 100% rename from minecode/tests/discovery/testfiles/rsync/rsync_dir/foo rename to clearindex/management/commands/__init__.py diff --git a/clearindex/management/commands/run_clearindex.py b/clearindex/management/commands/run_clearindex.py new file mode 100644 index 00000000..8454299e --- /dev/null +++ b/clearindex/management/commands/run_clearindex.py @@ -0,0 +1,342 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# purldb is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/purldb for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import logging +import signal +import sys +import time + +from django.core.exceptions import ObjectDoesNotExist +from django.db import transaction +from django.db.utils import OperationalError +from django.utils import timezone + +from packagedcode import licensing +from packagedcode import maven +from packagedcode import npm +from packagedcode import nuget +from packagedcode import pypi +from packagedcode import rubygems +from packagedcode.models import Package as ScannedPackage + +from clearcode.models import CDitem +from clearindex import harvest +from minecode.management.commands import get_error_message +from minecode.management.commands import VerboseCommand +from minecode.management.commands.run_map import merge_packages +from minecode.utils import stringify_null_purl_fields +from packagedb.models import Package + + +TRACE = False + +logger = logging.getLogger(__name__) +logging.basicConfig(stream=sys.stdout) +logger.setLevel(logging.INFO) + + +# sleep duration in seconds when the queue is empty +SLEEP_WHEN_EMPTY = 10 + +MUST_STOP = False + + +def stop_handler(*args, **kwargs): + """ + Signal handler to set global variable to True. + """ + global MUST_STOP + MUST_STOP = True + + +signal.signal(signal.SIGTERM, stop_handler) + +# number of mappable CDItem processed at once +MAP_BATCH_SIZE = 10 + + +PACKAGE_TYPES_BY_CD_TYPE = { + 'crate': 'cargo', + 'deb': 'deb', + 'debsrc': 'deb', + # Currently used only for maven packages + 'sourcearchive': 'maven', + 'maven': 'maven', + 'composer': 'composer', + # Currently used only for Github repo/packages + 'git': 'github', + 'pod': 'pod', + 'nuget': 'nuget', + 'pypi': 'pypi', + 'gem': 'gem', +} + + +# TODO: Update with more Package types when scancode-toolkit is updated +PACKAGE_TYPES_WITH_GET_URLS = { + 'maven': maven.get_urls, + 'npm': npm.get_urls, + 'pypi': pypi.get_pypi_urls, + 'gem': rubygems.get_urls, + 'nuget': nuget.get_urls, +} + + +class Command(VerboseCommand): + help = 'Run a mapping worker.' + + def add_arguments(self, parser): + parser.add_argument( + '--exit-on-empty', + dest='exit_on_empty', + default=False, + action='store_true', + help='Do not loop forever. Exit when the queue is empty.') + + def handle(self, *args, **options): + """ + Get the next available CDitem and start the processing. + Loops forever and sleeps a short while if there are no CDitem left to map. + """ + global MUST_STOP + + logger.setLevel(self.get_verbosity(**options)) + exit_on_empty = options.get('exit_on_empty') + + sleeping = False + created_packages_count = 0 + + logger.info('Running ClearIndex') + while True: + if MUST_STOP: + logger.info('Graceful exit of the map loop.') + break + + mappable_definitions = CDitem.objects.mappable_definitions()[:MAP_BATCH_SIZE] + mappable_scancode_harvests = CDitem.objects.mappable_scancode_harvests()[:MAP_BATCH_SIZE] + + try: + if not mappable_definitions and not mappable_scancode_harvests: + if exit_on_empty: + logger.info('No mappable CDitem, exiting...') + break + + # Only log a single message when we go to sleep + if not sleeping: + sleeping = True + logger.info('No mappable CDitem, sleeping...') + + time.sleep(SLEEP_WHEN_EMPTY) + continue + + sleeping = False + + for cditem in mappable_definitions: + package = map_definition(cditem) + if not package: + continue + created_packages_count += 1 + + for cditem in mappable_scancode_harvests: + # scancode harvests may contain multiple package entries + package_count = harvest.map_scancode_harvest(cditem) + if isinstance(package_count, int): + created_packages_count += package_count + + except OperationalError as e: + logger.error(e) + break + + msg = '{}: {} Packages processed.' + msg = msg.format(timezone.now(), created_packages_count) + logger.info(msg) + + +def map_definition(cditem): + """ + Map a CD definition. Return the Package created from a mapped CD definition + or None if a Package could not be created or an Exception has occured. + """ + try: + with transaction.atomic(): + # We create a new Package from a definition, if it does not exist in the PackageDB + package = get_or_create_package_from_cditem_definition(cditem) + if not package: + return + package.last_modified_date = timezone.now() + package.save() + cditem.last_map_date = timezone.now() + cditem.save() + return package + except Exception as e: + msg = 'Error: Failed to map while processing CDitem: {}\n'.format( + repr(cditem.path)) + msg += get_error_message(e) + logger.error(msg) + cditem.map_error = msg + cditem.save() + + +def get_coords_des_and_lic_from_def(definition): + return definition.get('coordinates', {}), definition.get('described', {}), definition.get('licensed', {}) + + +#CD_TYPES_WITH_SOURCE = ('debsrc', 'npm', 'sourcearchive',) + + +def get_or_create_package_from_cditem_definition(cditem): + """ + Create a Package from a CDitem definition or return a Package if it already exists + """ + definition = cditem.data + if not definition: + raise Exception('No data available for this definition') + coordinates, described, licensed = get_coords_des_and_lic_from_def(definition) + + download_url = described.get('urls', {}).get('download', '') + if not download_url: + # We use our data to create a Package in order to form the download_url, since we do not have the download_url for the Package + # We need to have a unique download URL for every Package + download_url = create_download_url_from_coords(coordinates) + if not download_url: + raise Exception('No download URL is available for this definition') + + if download_url.startswith('http://central.maven.org'): + split_download_url = download_url.rsplit('http://central.maven.org') + if len(split_download_url) == 2: + download_url = 'https://repo1.maven.org' + split_download_url[1] + + stringify_null_purl_fields(coordinates) + + namespace = coordinates.get('namespace') + namespace = namespace if namespace != '-' else '' + name = coordinates.get('name') + version = coordinates.get('revision') + package_type = coordinates.get('type') + converted_package_type = PACKAGE_TYPES_BY_CD_TYPE.get(package_type) or package_type + # TODO: Source packages need to be updated for clearlydefined, link source packages to binary packages + hashes = described.get('hashes', {}) + sha1 = hashes.get('sha1') + sha256 = hashes.get('sha256') + homepage_url = described.get('projectWebsite') + release_date = described.get('releaseDate') + declared_license = licensed.get('declared') + normalized_license_expression = licensing.get_normalized_expression(declared_license) + copyrights = get_parties_from_licensed(licensed) + copyrights = '\n'.join(copyrights) + definition_mining_level = 0 + + existing_package = None + try: + # FIXME: also consider the Package URL fields!!! + existing_package = Package.objects.get(download_url=download_url) + except ObjectDoesNotExist: + pass + + if not existing_package: + package, created = Package.objects.get_or_create( + type=converted_package_type, + namespace=namespace, + name=name, + version=version, + download_url=download_url, + homepage_url=homepage_url, + sha1=sha1, + sha256=sha256, + release_date=release_date, + declared_license=declared_license, + license_expression=normalized_license_expression, + copyright=copyrights, + mining_level=definition_mining_level + ) + # log history if package was created + if created: + package.append_to_history('Created package from CDitem definition: {}'.format(cditem.path)) + + else: + # TODO: This is temporary until we fold clearindex into minecode mapping + # proper, otherwise we should base this decision off of mining level + # if existing_package.mining_level < definition_mining_level: + new_package_data = ScannedPackage( + type=converted_package_type, + namespace=namespace, + name=name, + version=version, + download_url=download_url, + homepage_url=homepage_url, + sha1=sha1, + sha256=sha256, + release_date=release_date, + declared_license=declared_license, + license_expression=normalized_license_expression, + copyright=copyrights, + ).to_dict() + merge_packages( + existing_package=existing_package, + new_package_data=new_package_data, + replace=True + ) + package = existing_package + package.append_to_history('Updated package from CDitem definition: {}'.format(cditem.path)) + + return package + + +def is_scancode_scan(harvest): + return harvest.get('_metadata', {}).get('type', '') == 'scancode' + + +def create_download_url_from_coords(coord): + """ + Return a download URL for a supported Package from Coordinates `coord` + """ + ptype = coord.get('type') + namespace = coord.get('namespace') + name = coord.get('name') + version = coord.get('revision') + + package_type = PACKAGE_TYPES_BY_CD_TYPE.get(ptype) + if not package_type: + raise Exception('Unsupported ClearlyDefined package type: {}'.format(ptype)) + + get_urls = PACKAGE_TYPES_WITH_GET_URLS.get(package_type) + if get_urls: + urls = get_urls(namespace=namespace, name=name, version=version) + return urls['repository_download_url'] + + +def str2coord(s): + """ + Return a mapping of CD coordinates from a `s` CD coordinates, URL or URN + string. + + Some example of the supported input strings are: + URL: "cd:/gem/rubygems/-/mocha/1.7.0" + URN: "urn:gem:rubygems:-:mocha:revision:1.7.0:tool:scancode:3.1.0" + plain: /gem/rubygems/foo/mocha/1.7.0" + """ + from itertools import izip_longest + is_urn = s.startswith('urn') + is_url = s.startswith('cd:') + splitter = ':' if is_urn else '/' + segments = s.strip(splitter).split(splitter) + if is_urn or is_url: + segments = segments[1:] + # ignore extra segments for now beyond the 5 fisrt (such as the PR of a curation) + segments = segments[:5] + + fields = ('type', 'provider', 'namespace', 'name', 'revision',) + return dict(izip_longest(fields, segments)) + + +def get_parties_from_licensed(licensed): + """ + Return a list of Copyright statements from `licensed`, if available + """ + return licensed.get('facets', {}).get('core', {}).get('attribution', {}).get('parties', []) diff --git a/clearindex/utils.py b/clearindex/utils.py new file mode 100644 index 00000000..ebe0a5f2 --- /dev/null +++ b/clearindex/utils.py @@ -0,0 +1,57 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# purldb is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/purldb for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +from unittest import TestCase +import logging +import ntpath +import os +import posixpath +import traceback + +from django.core.management.base import BaseCommand +from django.test import TestCase as DjangoTestCase + +from minecode.utils_test import JsonBasedTesting + + +""" +The conventions used for the tests are: +- for tests that require files these are stored in the testfiles directory +- each test must use its own sub directory in testfiles. The is called the +'base' +- testfiles that are more than a few KB should be in a bzip2 tarball +""" + + +class BaseTestCase(TestCase): + BASE_DIR = os.path.join(os.path.dirname(__file__), 'testfiles') + + @classmethod + def get_test_loc(cls, path): + """ + Given a path relative to the test files directory, return the location + to a test file or directory for this path. No copy is done. + """ + path = to_os_native_path(path) + location = os.path.abspath(os.path.join(cls.BASE_DIR, path)) + return location + + +class ClearIndexTestCase(JsonBasedTesting, BaseTestCase, DjangoTestCase): + databases = '__all__' + + +def to_os_native_path(path): + """ + Normalize a path to use the native OS path separator. + """ + path = path.replace(posixpath.sep, os.path.sep) + path = path.replace(ntpath.sep, os.path.sep) + path = path.rstrip(os.path.sep) + return path diff --git a/packagedb/configure b/configure similarity index 91% rename from packagedb/configure rename to configure index 32e02f55..2e4eff6c 100755 --- a/packagedb/configure +++ b/configure @@ -27,10 +27,12 @@ CLI_ARGS=$1 # Defaults. Change these variables to customize this script ################################ +CUSTOM_PACKAGES="https://github.com/nexB/commoncode/archive/refs/heads/48-correctly-assign-codebase-attributes.zip https://github.com/nexB/scancode-toolkit/archive/refs/heads/maven-pom-parse-dep-backport.zip" + # Requirement arguments passed to pip and used by default or with --dev. -REQUIREMENTS="--editable . --constraint requirements.txt" -DEV_REQUIREMENTS="--editable .[testing] --constraint requirements.txt --constraint requirements-dev.txt" -DOCS_REQUIREMENTS="--editable .[docs] --constraint requirements.txt" +REQUIREMENTS="$CUSTOM_PACKAGES --editable matchcode-toolkit --editable . --constraint requirements.txt" +DEV_REQUIREMENTS="$CUSTOM_PACKAGES --editable matchcode-toolkit --editable .[testing] --constraint requirements.txt --constraint requirements-dev.txt" +DOCS_REQUIREMENTS="$CUSTOM_PACKAGES --editable matchcode-toolkit --editable .[docs] --constraint requirements.txt" # where we create a virtualenv VIRTUALENV_DIR=venv diff --git a/docs/ClearCode Introduction-June 2020.odp b/docs/ClearCode Introduction-June 2020.odp new file mode 100644 index 00000000..a1023454 Binary files /dev/null and b/docs/ClearCode Introduction-June 2020.odp differ diff --git a/docs/ClearCode Introduction-June 2020.pdf b/docs/ClearCode Introduction-June 2020.pdf new file mode 100644 index 00000000..25009c25 Binary files /dev/null and b/docs/ClearCode Introduction-June 2020.pdf differ diff --git a/etc/scripts/clearcode-api-backup.py b/etc/scripts/clearcode-api-backup.py new file mode 100644 index 00000000..d11c4d34 --- /dev/null +++ b/etc/scripts/clearcode-api-backup.py @@ -0,0 +1,201 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# +# ClearCode is a free software tool from nexB Inc. and others. +# Visit https://github.com/nexB/clearcode-toolkit/ for support and download. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Script example to backup clearcode data using the clearcode API. + +Pre-requisite: + - A local installation of Python + - The Python "requests" library, installed with "pip install requests". + + Run the backup script with: + python clearcode-api-backup.py YYYY-MM-DD + + A directory "clearcode_backup_" will be created in the same directory + that contains this script, and running this script will create one JSON backup + file. +""" + +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +import argparse +import json +import os +import logging +import sys +from collections import defaultdict +from datetime import datetime +from os.path import abspath, dirname, exists, join + +try: + from urllib import urlencode +except ImportError: + from urllib.parse import urlencode + +try: + import requests +except ImportError: + print('The "requests" library is required by this script.\n' + 'Install it with: "pip install requests"') + sys.exit(1) + +logging.captureWarnings(True) + + +class ProgressBar(object): + progress_width = 75 + + def __init__(self, output, total_count): + self.output = output + self.total_count = total_count + self.prev_done = 0 + + def update(self, count): + if not self.output: + return + perc = count * 100 // self.total_count + done = perc * self.progress_width // 100 + if self.prev_done >= done: + return + self.prev_done = done + cr = '' if self.total_count == 1 else '\r' + self.output.write(cr + '[' + '.' * done + ' ' * (self.progress_width - done) + ']') + if done == self.progress_width: + self.output.write('\n') + self.output.flush() + + +def get_all_objects_from_endpoint(url, extra_payload=None, verbose=True): + """ + Return a list of all objects by calling the clearcode API endpoint `url` + with the provided request. Paginate as needed. + """ + objects = [] + payload = {} + if extra_payload: + payload.update(extra_payload) + + output = sys.stdout + count_done = 0 + progress_bar = None + + next_url = '{}?{}'.format(url, urlencode(payload)) + while next_url: + response = requests.get(next_url) + if response.status_code == requests.codes.ok: + response_json = response.json() + if verbose and not progress_bar: + total_count = response_json.get('count') + if not total_count: + return [] + print('{} total'.format(total_count)) + progress_bar = ProgressBar(output, total_count) + results = response_json.get('results') + objects.extend(results) + if verbose: + count_done += len(results) + progress_bar.update(count_done) + next_url = response_json.get('next') + else: + print('Error. Please restart the script.') + sys.exit(1) + + return objects + + +def run_api_backup(api_root_url, extra_payload=None): + """ + Execute a backup of clearcode data objects to JSON files. + Given: + - an `api_root_url` clearcode API root URL and + this function: + - creates a new backup directory named "clearcode_backup_" + side-by-side with this script + - calls the clearcode API to collect the list of all objects for each + API endpoint + - writes JSON files named after each endpoint with these collected + objects in the backup directory + On errors, this function will exit Python with a return code of 1. + """ + endpoints = [ + 'cditems', + ] + + # Ensure all those dependencies are available in the backup file to feed the copy script. + # Not needed when --last_modified_date is not provided since all the objects + # for each endpoint will be collected. + results = defaultdict(list) + + for endpoint_name in endpoints: + endpoint_url = '{}{}/'.format(api_root_url, endpoint_name) + + print('Collecting {}...'.format(endpoint_name)) + objects = get_all_objects_from_endpoint(endpoint_url, extra_payload=extra_payload) + print('{} {} collected.'.format(len(objects), endpoint_name)) + + collect_extra_conditions = [ + extra_payload.get('last_modified_date'), + ] + + results[endpoint_name] += objects + + timestamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S') + backup_dir = join(abspath(dirname(__file__)), 'clearcode_backup_{}'.format(timestamp)) + if not exists(backup_dir): + os.mkdir(backup_dir) + + for endpoint_name, objects in results.items(): + backup_file = join(backup_dir, '{}.json'.format(endpoint_name)) + assert not exists(backup_file) + with open(backup_file, 'w') as f: + f.write(json.dumps(objects, indent=2)) + + print('Backup location: {}'.format(backup_dir)) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description='clearcode data backup using the clearcode API', + ) + parser.add_argument( + '--api-root-url', + help='clearcode API endpoints root URL.', + default='http://127.0.0.1:8000/api/', + ) + parser.add_argument( + '--last-modified-date', + help='Limit the backup to object created/modified after that date. Format: "YYYY-MM-DD"', + required=True, + ) + args = parser.parse_args() + + extra_payload = {} + try: + datetime.strptime(args.last_modified_date, '%Y-%m-%d') + except ValueError: + print('Incorrect last_modified_date format. Expected YYYY-MM-DD') + sys.exit(1) + extra_payload['last_modified_date'] = args.last_modified_date + + print('Starting backup from {}'.format(args.api_root_url)) + run_api_backup(args.api_root_url, extra_payload) + print('Backup completed.') + sys.exit(0) diff --git a/etc/scripts/clearcode-api-import.py b/etc/scripts/clearcode-api-import.py new file mode 100644 index 00000000..878507e8 --- /dev/null +++ b/etc/scripts/clearcode-api-import.py @@ -0,0 +1,147 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# +# ClearCode is a free software tool from nexB Inc. and others. +# Visit https://github.com/nexB/clearcode-toolkit/ for support and download. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Script to import clearcode data using the clearcode API. + +Pre-requisite: + - A local installation of Python + - The Python "requests" library, installed with "pip install requests". + - a clearcode backup directory, (output of running clearcode-api-backup.py) + +After completion, the clearcode database will be updated with the items from the +clearcode backup +""" + +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +import argparse +import json +import os +import logging +import sys +from datetime import datetime +from os.path import abspath, dirname, join +from collections import defaultdict + +try: + import requests +except ImportError: + print('The "requests" library is required by this script.\n' + 'Install it with: "pip install requests"') + sys.exit(1) + +logging.captureWarnings(True) + + +def run_api_copy(api_root_url, backup_directory): + headers = { + 'Content-type': 'application/json', + } + + endpoints = [ + 'cditems', + ] + + copy_results = {} + for endpoint in endpoints: + backup_file = os.path.join(backup_directory, '{}.json'.format(endpoint)) + + if not os.path.exists(backup_file): + print('{} backup file is not available, skipped.'.format(endpoint.title())) + continue + + with open(backup_file) as f: + source_objects = json.load(f) + + api_endpoint_url = '{}{}/'.format(api_root_url, endpoint) + if requests.get(api_endpoint_url, headers=headers).status_code != 200: + print('{} API endpoint not available.'.format(endpoint.title())) + continue + + print('Copying {} {}...'.format(len(source_objects), endpoint)) + endpoint_results = defaultdict(list) + for i, data in enumerate(source_objects): + if not (i % 10): + print('.', end='', flush=True) + object_api_url = '{}{}/'.format(api_endpoint_url, data['uuid']) + response = requests.get(object_api_url, headers=headers) + object_exists = response.status_code == 200 + + if object_exists: + put_response = requests.put(object_api_url, headers=headers, data=json.dumps(data)) + if put_response.status_code == 200: # Updated + endpoint_results['updated'].append(data) + else: + print('Update error:', put_response and put_response.json() or repr(put_response.content)) + endpoint_results['update_errors'].append({'data': data, 'error': put_response.json()}) + + else: + post_response = requests.post(api_endpoint_url, headers=headers, data=json.dumps(data)) + if post_response.status_code == 201: # Created + endpoint_results['created'].append(data) + else: + print('Create error:', post_response.json()) + endpoint_results['create_errors'].append({'data': data, 'error': post_response.json()}) + copy_results[endpoint] = endpoint_results + return copy_results + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description='clearcode data import using the clearcode API', + ) + parser.add_argument( + '--clearcode-target-api-url', + help='clearcode target instance API endpoints root URL. http://hostname/api/', + default='http://127.0.0.1:8000/api/', + ) + parser.add_argument( + '--backup-directory', + help='Path of the backup directory created by clearcode-api-backup.py script', + required=True, + ) + args = parser.parse_args() + + if not args.clearcode_target_api_url: + print('A clearcode target instance API endpoints root URL is required.\n' + 'Provide one using the --clearcode-target-api-url argument.') + sys.exit(1) + + backup_directory = args.backup_directory + + if not backup_directory.startswith('/'): + cwd = os.getcwd() + backup_directory = abspath(join(cwd, backup_directory)) + + if not os.path.exists(backup_directory): + print('Directory "{}" does not exists.'.format(backup_directory)) + sys.exit(1) + + print('Importing objects from {} to {}'.format(backup_directory, args.clearcode_target_api_url)) + copy_results = run_api_copy(args.clearcode_target_api_url, backup_directory) + timestamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S') + output_file = join(abspath(dirname(__file__)), 'copy_results_{}.json'.format(timestamp)) + with open(output_file, 'w') as f: + f.write(json.dumps(copy_results, indent=2)) + print('Copy completed.') + print('Results saved in {}'.format(output_file)) + sys.exit(0) diff --git a/packagedb/manage.py b/manage.py old mode 100755 new mode 100644 similarity index 79% rename from packagedb/manage.py rename to manage.py index 5b68baf3..f9bdd906 --- a/packagedb/manage.py +++ b/manage.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3 +#!/usr/bin/env python # # Copyright (c) nexB Inc. and others. All rights reserved. # purldb is a trademark of nexB Inc. @@ -11,9 +11,9 @@ import os import sys -if __name__ == "__main__": - os.environ.setdefault("DJANGO_SETTINGS_MODULE", "packagedbio.settings") +if __name__ == '__main__': from django.core.management import execute_from_command_line + os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'purldb.settings') execute_from_command_line(sys.argv) diff --git a/minecode/AUTHORS.rst b/matchcode-toolkit/AUTHORS.rst similarity index 52% rename from minecode/AUTHORS.rst rename to matchcode-toolkit/AUTHORS.rst index c3f8bfc8..0fd6530e 100644 --- a/minecode/AUTHORS.rst +++ b/matchcode-toolkit/AUTHORS.rst @@ -1,7 +1,3 @@ The following organizations or individuals have contributed to this repo: -- nexB Inc. -- Jono Yang -- Philippe Ombredanne -- Li Ha -- Steven Esser \ No newline at end of file +- Jono Yang diff --git a/packagedb/MANIFEST.in b/matchcode-toolkit/MANIFEST.in similarity index 73% rename from packagedb/MANIFEST.in rename to matchcode-toolkit/MANIFEST.in index 613dbab5..ef3721e8 100644 --- a/packagedb/MANIFEST.in +++ b/matchcode-toolkit/MANIFEST.in @@ -1,6 +1,4 @@ -graft etc graft src -graft tests include *.LICENSE include NOTICE @@ -12,10 +10,6 @@ include setup.* include configure* include requirements* include .git* -include MANIFEST.in -include setup.cfg -include setup.py - global-exclude *.py[co] __pycache__ *.*~ diff --git a/matchcode-toolkit/NOTICE b/matchcode-toolkit/NOTICE new file mode 100644 index 00000000..65936b2b --- /dev/null +++ b/matchcode-toolkit/NOTICE @@ -0,0 +1,19 @@ +# +# Copyright (c) nexB Inc. and others. +# SPDX-License-Identifier: Apache-2.0 +# +# Visit https://aboutcode.org and https://github.com/nexB/ for support and download. +# ScanCode is a trademark of nexB Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# diff --git a/matchcode-toolkit/README.rst b/matchcode-toolkit/README.rst new file mode 100644 index 00000000..aa5f165c --- /dev/null +++ b/matchcode-toolkit/README.rst @@ -0,0 +1,31 @@ +matchcode-toolkit +================= +This contains a scancode-toolkit post-scan plugin that fingerprints the +directories of a scan and queries those fingerprints against the matchcode API +to find package matches. + + +Usage +----- + +Ensure that the PurlDB server is up. Set the following environment variables: + * ``MATCHCODE_DIRECTORY_CONTENT_MATCHING_ENDPOINT`` + + * ``export MATCHCODE_DIRECTORY_CONTENT_MATCHING_ENDPOINT="http://127.0.0.1:8001/api/approximate_directory_content_index/match/"`` + + * ``MATCHCODE_DIRECTORY_STRUCTURE_MATCHING_ENDPOINT`` + + * ``export MATCHCODE_DIRECTORY_STRUCTURE_MATCHING_ENDPOINT="http://127.0.0.1:8001/api/approximate_directory_structure_index/match/"`` + +Install the matchcode-toolkit plugin into scancode-toolkit: + * Open a shell and enable the virtual environment of the scancode-toolkit instance you want to use + * Navigate to the matchcode-toolkit directory and run ``pip install -e .`` + +Run scancode with matching enabled: + * The ``--info`` option has to be enabled on the scan you are running: + + * ``scancode --info --match --json-pp -`` + + or on the scan you are importing: + + * ``scancode --from-scan --match --json-pp -`` diff --git a/minecode/apache-2.0.LICENSE b/matchcode-toolkit/apache-2.0.LICENSE similarity index 100% rename from minecode/apache-2.0.LICENSE rename to matchcode-toolkit/apache-2.0.LICENSE diff --git a/minecode/pyproject.toml b/matchcode-toolkit/pyproject.toml similarity index 66% rename from minecode/pyproject.toml rename to matchcode-toolkit/pyproject.toml index c43c8a91..87592532 100644 --- a/minecode/pyproject.toml +++ b/matchcode-toolkit/pyproject.toml @@ -1,7 +1,16 @@ +[project] +name = "matchcode-toolkit" +version = "0.0.1" + [build-system] -requires = ["setuptools >= 50", "wheel"] +requires = ["setuptools >= 50", "wheel", "setuptools_scm[toml] >= 6"] build-backend = "setuptools.build_meta" +[tool.setuptools_scm] +# this is used populated when creating a git archive +# and when there is .git dir and/or there is no git installed +fallback_version = "9999.$Format:%h-%cs$" + [tool.pytest.ini_options] norecursedirs = [ ".git", diff --git a/packagedb/src/packagedbio/static/.keep b/matchcode-toolkit/requirements-dev.txt similarity index 100% rename from packagedb/src/packagedbio/static/.keep rename to matchcode-toolkit/requirements-dev.txt diff --git a/matchcode-toolkit/requirements.txt b/matchcode-toolkit/requirements.txt new file mode 100644 index 00000000..e69de29b diff --git a/packagedb/setup.cfg b/matchcode-toolkit/setup.cfg similarity index 53% rename from packagedb/setup.cfg rename to matchcode-toolkit/setup.cfg index 26049997..95b018cf 100644 --- a/packagedb/setup.cfg +++ b/matchcode-toolkit/setup.cfg @@ -1,54 +1,49 @@ [metadata] -name = packagedb -version = 2.0.0 - -author = nexB. Inc. and others -author_email = info@nexb.com -license = license = Apache-2.0 AND CC-BY-SA-4.0 +name = matchcode-toolkit +version = 0.0.1 +license = Apache-2.0 # description must be on ONE line https://github.com/pypa/setuptools/issues/1390 -description = A purl (Package URL) Database +description = matchcode-toolkit long_description = file:README.rst -url = https://github.com/nexB/purldb/packagedb +long_description_content_type = text/x-rst +url = https://github.com/nexB/purldb/matchcode-toolkit/ + +author = nexB. Inc. and others +author_email = info@aboutcode.org + classifiers = - Programming Language :: Python + Development Status :: 5 - Production/Stable + Intended Audience :: Developers Programming Language :: Python :: 3 + Programming Language :: Python :: 3 :: Only + Topic :: Software Development Topic :: Utilities keywords = - packagedb - scancode - purl - purldb + utilities license_files = apache-2.0.LICENSE - cc-by-sa-4.0.LICENSE - CHANGELOG.rst - README.rst - AUTHORS.rst NOTICE - + AUTHORS.rst + CHANGELOG.rst [options] -#setup_requires = setuptools_scm[toml] >= 4 - package_dir = =src packages = find: include_package_data = true zip_safe = false -install_requires = - Django>=4.1.2 - djangorestframework>=3.14.0 - django-environ>=0.8.0 - django-filter>=22.1 - psycopg2-binary>=2.9.3 - packageurl-python>=0.10.4 - natsort>=8.2.0 -python_requires = >=3.8.* +setup_requires = setuptools_scm[toml] >= 4 + +python_requires = >=3.6.* +install_requires = + bitarray + commoncode + plugincode [options.packages.find] where = src @@ -58,13 +53,15 @@ where = src testing = pytest >= 6, != 7.0.0 pytest-xdist >= 2 - pytest-django aboutcode-toolkit >= 6.0.0 black - mock - docs = Sphinx >= 3.3.1 sphinx-rtd-theme >= 0.5.0 doc8 >= 0.8.1 + + +[options.entry_points] +scancode_post_scan = + match = matchcode_toolkit.plugin_match:Match diff --git a/minecode/setup.py b/matchcode-toolkit/setup.py similarity index 100% rename from minecode/setup.py rename to matchcode-toolkit/setup.py diff --git a/matchcode-toolkit/src/matchcode_toolkit/__init__.py b/matchcode-toolkit/src/matchcode_toolkit/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/matchcode-toolkit/src/matchcode_toolkit/fingerprinting.py b/matchcode-toolkit/src/matchcode_toolkit/fingerprinting.py new file mode 100644 index 00000000..12e3f181 --- /dev/null +++ b/matchcode-toolkit/src/matchcode_toolkit/fingerprinting.py @@ -0,0 +1,118 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# purldb is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/purldb for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import binascii + +from matchcode_toolkit.halohash import BitAverageHaloHash + + +def _create_directory_fingerprint(inputs): + """ + Return a 128-bit BitAverageHaloHash fingerprint in hex from `inputs` + """ + inputs = [i.encode('utf-8') for i in inputs if i] + bah128 = BitAverageHaloHash(inputs, size_in_bits=128).hexdigest() + inputs_count = len(inputs) + inputs_count_hex_str = '%08x' % inputs_count + bah128 = bah128.decode('utf-8') + directory_fingerprint = inputs_count_hex_str + bah128 + return directory_fingerprint + + +def create_content_fingerprint(resources): + """ + Collect SHA1 strings from a list of Resources (`resources`) and create a + directory fingerprint from them + """ + features = [r.sha1 for r in resources if r.sha1] + return _create_directory_fingerprint(features) + + +def _get_resource_subpath(resource, top): + """ + Return the subpath of `resource` relative to `top` from `codebase` + + For example: + + top.path = 'foo/bar/' + resource.path = 'foo/bar/baz.c' + + The subpath returned would be 'baz.c' + """ + _, _, subpath = resource.path.partition(top.path) + subpath = subpath.lstrip('/') + return subpath + + +def create_structure_fingerprint(directory, children): + """ + Collect the subpaths of children Resources of Resource `directory` and + create a fingerprint from them + """ + features = [] + for child in children: + if not child.path: + continue + child_subpath = _get_resource_subpath(child, directory) + rounded_child_size = int(child.size / 10) * 10 + path_feature = str(rounded_child_size) + child_subpath + features.append(path_feature) + return _create_directory_fingerprint(features) + + +def compute_directory_fingerprints(codebase): + """ + Compute fingerprints for a directory from `codebase` + """ + for resource in codebase.walk(topdown=False): + if resource.is_file or not resource.path: + continue + children = [r for r in resource.walk(codebase) if r.is_file] + if len(children) == 1: + continue + resource.extra_data['directory_content'] = create_content_fingerprint(children) + resource.extra_data['directory_structure'] = create_structure_fingerprint(resource, children) + resource.save(codebase) + return codebase + + +def split_fingerprint(directory_fingerprint): + """ + Given a string `directory_fingerprint`, return the indexed elements count as + an integer and the bah128 fingerprint string + """ + indexed_elements_count_hash = directory_fingerprint[0:8] + indexed_elements_count = int(indexed_elements_count_hash, 16) + bah128 = directory_fingerprint[8:] + return indexed_elements_count, bah128 + + +def hexstring_to_binarray(hex_string): + """ + Convert a hex string to binary form, then store in a bytearray + """ + return bytearray(binascii.unhexlify(hex_string)) + + +def create_halohash_chunks(bah128): + """ + Given a 128-bit bah128 hash string, split it into 4 chunks and return those + chunks as bytearrays + """ + chunk1 = bah128[0:8] + chunk2 = bah128[8:16] + chunk3 = bah128[16:24] + chunk4 = bah128[24:32] + + chunk1 = hexstring_to_binarray(chunk1) + chunk2 = hexstring_to_binarray(chunk2) + chunk3 = hexstring_to_binarray(chunk3) + chunk4 = hexstring_to_binarray(chunk4) + + return chunk1, chunk2, chunk3, chunk4 diff --git a/matchcode-toolkit/src/matchcode_toolkit/halohash.py b/matchcode-toolkit/src/matchcode_toolkit/halohash.py new file mode 100644 index 00000000..61ecf2bf --- /dev/null +++ b/matchcode-toolkit/src/matchcode_toolkit/halohash.py @@ -0,0 +1,391 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# purldb is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/purldb for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import binascii + +from bitarray import bitarray +from bitarray.util import count_xor + +from commoncode import codec + +from matchcode_toolkit import hash as commoncode_hash + +""" +Halo is a family of hash functions that have the un-common property that mostly +similar -- but not identical -- inputs will hash to very similar outputs. This +type of hash function is sometimes called a locality-sensitive hash function, +because it is sensitive to the locality of the data being hashed. + +The purpose of these hashes is to quickly compare a large number of elements +that are likely to be similar to find candidates and then compute a more +comprehensive similarity only on the candidates. This includes goals such as +identifying near-duplicates of things or to group very similar things together +(a.k.a. clustering), as well as to detect similarities between inputs or perform +quick comparisons under a certain threshold. + +For a traditional 'good' hash function, small changes in the input will yield +very different hash outputs (through diffusion and avalanche effect). For +instance, cryptographic hashes such as SHA1 or MD5 behave this way. If you hash +two bit strings with a SHA1 function and there is only one bit of difference +between these two strings then the resulting hashes will be rather different. On +average, each time one bit is added to the input, good hash functions have half +of the output bits switched from 0 to 1. + +A Halo hash instead hashes similar inputs to the same hash or to a hash that +differs only by a few bits. The similarity between two hashes becomes an +approximation of the similarity between the two original inputs. This simalirity +is computed using the hamming distance or number of non-matching bits between +two hashes outputs bit straings. This hamming distance is roughly proportional +to the similarity between the two original inputs and can be used to estimate +the similarity of inputs without having access to these full input. + +The Halo name is a play on what one of the hashing function does: a halo is like +a fuzzy, halo'ish representation of the input. + +The bit average function ressembles Charikar's algorithm by using each bits in an +array of hashes but does not use a TF/IDF resulting in a simpler procedure. +""" + + +class BitAverageHaloHash(object): + """ + A bit matrix averaging hash. + + The high level processing sketch looks like this: + For an input of: + ['this' ,'is', 'a', 'rose', 'great']: + + * we first hash each list item to get something like + [4, 15, 2, 12, 12] (for instance with a very short hash function of 4 bits output) + + or as bits this would be something like this: + + ['0011', + '1110', + '0010', + '1100', + '1100'] + + * we sum up each bit positions/columns together: + ['0011', + '1110', + '0010', + '1100', + '1100'] + ------- + 3331 + + or stated otherwise: pos1=3, pos2=3, pos3=3, pos4=1 + + * The mean value for a column is number of hashes/2 (2 because we use bits). + Here mean = 5 hashes/2 = 2.5 + + * We compare the sum of each position with the mean and yield a bit: + if pos sum > mean yield 1 else yield 0 + position1 = 3 > mean = 2.5 , then bit=1 + position2 = 3 > mean = 2.5 , then bit=1 + position3 = 3 > mean = 2.5 , then bit=1 + position4 = 1 < mean = 2.5 , then bit=0 + + * We build a hash by concatenating the resulting bits: + pos 1 + pos2 + pos3 + pos4 = '1110' + + In general, this hash seems to show a lower accuracy and higher sensitivity + with small string and small inputs variations than the bucket average hash. + But it works better on shorter inputs. + + Some usage examples: + + >>> z = b'''The value specified for size must be at + ... least as large as for the smallest bit vector possible for intVal'''.split() + >>> a = BitAverageHaloHash(z, size_in_bits=256) + >>> len(a.digest()) + 32 + >>> z = b'''The value specified for size must be no + ... more larger than the smallest bit vector possible for intVal'''.split() + >>> b = BitAverageHaloHash(z, size_in_bits=256) + >>> a.distance(b) + 57 + >>> b.distance(a) + 57 + >>> a = BitAverageHaloHash(size_in_bits=160) + >>> z = [a.update(x) for x in b'''The value specified for size must be at + ... least as large as for the smallest bit vector possible for intVal'''.split()] + >>> assert a.hexdigest() == b'2c10223104c43470e10b1157e6415b2f730057d0' + >>> b = BitAverageHaloHash(size_in_bits=160) + >>> z = [b.update(x) for x in b'''The value specified for size must be no + ... more larger than the smallest bit vector possible for intVal'''.split()] + >>> assert b.hexdigest() == b'2c912433c4c624e0b03b34576641df8fe00017d0' + >>> a.distance(b) + 29 + >>> a = BitAverageHaloHash(size_in_bits=128) + >>> z =[a.update(x) for x in b'''The value specified for size must be at + ... least as large as for the smallest bit vector possible for intVal'''.split()] + >>> assert a.hexdigest() == b'028b1699c0c5310cd1b566a893d12f10' + >>> b = BitAverageHaloHash(size_in_bits=128) + >>> z = [b.update(x) for x in b'''The value specified for size must be no + ... more larger than the smallest bit vector possible for intVal'''.split()] + >>> assert b.hexdigest() == b'0002969060d5b344d1b7602cd9e127b0' + >>> a.distance(b) + 27 + >>> a = BitAverageHaloHash(size_in_bits=64) + >>> z = [a.update(x) for x in b'''The value specified for size must be at + ... least as large as for the smallest bit vector possible for intVal'''.split()] + >>> assert a.hexdigest() == b'028b1699c0c5310c' + >>> b = BitAverageHaloHash(size_in_bits=64) + >>> z = [b.update(x) for x in b'''The value specified for size must be no + ... more larger than the smallest bit vector possible for intVal'''.split()] + >>> assert b.hexdigest() == b'0002969060d5b344' + >>> a.distance(b) + 14 + >>> a = BitAverageHaloHash(size_in_bits=32) + >>> z = [a.update(x) for x in b'''The value specified for size must be at + ... least as large as for the smallest bit vector possible for intVal'''.split()] + >>> b = BitAverageHaloHash(size_in_bits=32) + >>> z = [b.update(x) for x in b'''The value specified for size must be at + ... least as large as for the smallest bit vector possible by intVal'''.split()] + >>> a.distance(b) + 5 + >>> a = BitAverageHaloHash(size_in_bits=512) + >>> z = [a.update(x) for x in b'''The value specified for size must be at + ... least as large as for the smallest bit vector possible for intVal'''.split()] + >>> b = BitAverageHaloHash(size_in_bits=512) + >>> z = [b.update(x) for x in b'''The value specified for size must be at + ... least as large as for the smallest bit vector possible by intVal'''.split()] + >>> a.distance(b) + 46 + """ + + # TODO: Keep state, keep 1 position per column + + # TODO: create method to aggregate multiple BitAverageHaloHashes together + # TODO: refactor this, don't keep all hashes + # TODO: keep only a list of columns + def __init__(self, msg=None, size_in_bits=128): + self.size_in_bits = size_in_bits + self.columns = [0] * size_in_bits + + # TODO: pick one hash module instead of selecting from multiple hash modules + self.hashmodule = lambda x: x + try: + # TODO: pick one hash algorithm + self.hashmodule = commoncode_hash.get_hasher(size_in_bits) + except: + raise Exception('No available hash module for the requested ' + 'hash size in bits: %(size_in_bits)d' % locals()) + self.update(msg) + + @property + def digest_size(self): + return self.size_in_bits // 8 + + def update(self, msg): + """ + Append a bytestring or sequence of bytestrings to the hash. + """ + if not msg: + return + if isinstance(msg, (list, tuple,)): + for m in msg: + self.__hashup(m) + else: + self.__hashup(msg) + + def __hashup(self, msg): + assert isinstance(msg, bytes) + hsh = self.hashmodule(msg).digest() + bits = bitarray_from_bytes(hsh) + normalized = (-1 if v else 1 for v in bits) + for i, column in enumerate(normalized): + self.columns[i] += column + + def hexdigest(self): + """ + Return the hex-encoded hash value. + """ + return binascii.hexlify(self.digest()) + + def b64digest(self): + """ + Return a base64 "url safe"-encoded string representing this hash. + """ + return codec.b64encode(self.digest()) + + def digest(self): + """ + Return a binary string representing this hash. + """ + flattened = [1 if col > 0 else 0 for col in self.columns] + bits = bitarray(flattened) + return bits.tobytes() + + def distance(self, other): + """ + Return the bit Hamming distance between this hash and another hash. + """ + return int(count_xor(self.hash(), other.hash())) + + def hash(self): + return bitarray_from_bytes(self.digest()) + + @classmethod + def combine(cls, hashes): + """ + Return a BitAverageHaloHash by summing and averaging the columns of the + BitAverageHaloHashes in `hashes` together, putting the resulting + columns into a new BitAverageHaloHash and returning it + """ + size_in_bits = hashes[0].size_in_bits + for h in hashes: + assert isinstance(hash, cls), 'all hashes should be a BitAverageHaloHash, not {}'.format(type(h)) + assert h.size_in_bits == size_in_bits + + all_columns = [h.columns for h in hashes] + b = cls() + b.columns = [sum(col) for col in zip(*all_columns)] + return b + + +def bitarray_from_bytes(b): + """ + Return a bitarray built from a byte string b. + """ + a = bitarray() + a.frombytes(b) + return a + + +def byte_hamming_distance(b1, b2): + b1 = binascii.unhexlify(b1) + b2 = binascii.unhexlify(b2) + b1 = bitarray_from_bytes(b1) + b2 = bitarray_from_bytes(b2) + return hamming_distance(b1, b2) + + +def hamming_distance(bv1, bv2): + """ + Return the Hamming distance between `bv1` and `bv2` bitvectors as the + number of equal bits for all positions. (e.g. the count of bits set to one + in an XOR between two bit strings.) + + `bv1` and `bv2` must both be either hash-like Halohash instances (with a + hash() function) or bit array instances (that can be manipulated as-is). + + See http://en.wikipedia.org/wiki/Hamming_distance + + For example: + + >>> b1 = bitarray('0001010111100001111') + >>> b2 = bitarray('0001010111100001111') + >>> hamming_distance(b1, b2) + 0 + >>> b1 = bitarray('11110000') + >>> b2 = bitarray('00001111') + >>> hamming_distance(b1, b2) + 8 + >>> b1 = bitarray('11110000') + >>> b2 = bitarray('00110011') + >>> hamming_distance(b1, b2) + 4 + """ + return int(count_xor(bv1, bv2)) + + +def slices(s, size): + """ + Given a sequence s, return a sequence of non-overlapping slices of `size`. + Raise an AssertionError if the sequence length is not a multiple of `size`. + + For example: + >>> slices([1, 2, 3, 4, 5, 6], 2) + [(1, 2), (3, 4), (5, 6)] + >>> slices([1, 2, 3, 4, 5, 6], 3) + [(1, 2, 3), (4, 5, 6)] + >>> try: + ... slices([1, 2, 3, 4, 5, 6], 4) + ... except AssertionError: + ... pass + """ + length = len(s) + assert length % size == 0, 'Invalid slice size: len(%(s)r) is not a multiple of %(size)r' % locals() + # TODO: time alternative + # return [s[index:index + size] for index in range(0, length, size)] + chunks = [iter(s)] * size + return list(zip(*chunks)) + + +def common_chunks_from_hexdigest(h1, h2, chunk_bytes_length=4): + """ + Compute the number of common chunks of byte length `chunk_bytes_length` between two + strings h1 and h2, each representing a BitAverageHaloHash hexdigest value. + + For example: + + >>> a = '1f22c2c871cd70521211b138cd76fc04' + >>> b = '1f22c2c871cd7852121bbd38c576bc84' + >>> common_chunks_from_hexdigest(a, b, 32) + 1 + + Note: `a` and `b` start with the same 8 characters, where the next groups + of 8 have a few characters off + + >>> byte_hamming_distance(a, b) + 8 + """ + h1 = bitarray_from_bytes(bytes(binascii.unhexlify(h1))) + h2 = bitarray_from_bytes(bytes(binascii.unhexlify(h2))) + h1_slices = slices(h1, chunk_bytes_length) + h2_slices = slices(h2, chunk_bytes_length) + commons = (1 for h1s, h2s in zip(h1_slices, h2_slices) if h1s == h2s) + return sum(commons) + + +def common_chunks(h1, h2, chunk_bytes_length=4): + """ + Compute the number of common chunks of byte length `chunk_bytes_length` between to + hashes h1 and h2 using the digest. + + Note that chunks that are all set to zeroes are matched too: they are be + significant such as empty buckets of bucket hashes. + + For example: + + >>> m1 = b'The value specified for size must be at least as large'.split() + >>> m2 = b'The value specific for size must be at least as large'.split() + >>> a = BitAverageHaloHash(msg=m1, size_in_bits=256) + >>> b = BitAverageHaloHash(msg=m2, size_in_bits=256) + >>> common_chunks(a, b, 2) + 1 + >>> byte_hamming_distance(a.hexdigest(), b.hexdigest()) + 32 + """ + h1_slices = slices(h1.digest(), chunk_bytes_length) + h2_slices = slices(h2.digest(), chunk_bytes_length) + commons = (1 for h1s, h2s in zip(h1_slices, h2_slices) if h1s == h2s) + return sum(commons) + + +def bit_to_num(bits): + """ + Return an int (or long) for a bit array. + + For example: + TODO: test + """ + return int(bits.to01(), 2) + + +# TODO: add test! +def decode_vector(b64_str): + """ + Return a bit array from an encoded string representation. + """ + decoded = codec.urlsafe_b64decode(b64_str) + return bitarray_from_bytes(decoded) diff --git a/matchcode-toolkit/src/matchcode_toolkit/hash.py b/matchcode-toolkit/src/matchcode_toolkit/hash.py new file mode 100644 index 00000000..fbed83b0 --- /dev/null +++ b/matchcode-toolkit/src/matchcode_toolkit/hash.py @@ -0,0 +1,116 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# purldb is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/purldb for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +# From https://github.com/nexB/scancode-toolkit-contrib + +import hashlib + +from commoncode.codec import bin_to_num +from commoncode.codec import urlsafe_b64encode +from commoncode import filetype + +""" +Hashes and checksums. + +Low level hash functions using standard crypto hashes used to construct hashes +of various lengths. Hashes that are smaller than 128 bits are based on a +truncated md5. Other length use SHA hashes. + +Checksums are operating on files. +""" + + +def _hash_mod(bitsize, hmodule): + """ + Return a hashing class returning hashes with a `bitsize` bit length. The + interface of this class is similar to the hash module API. + """ + + class hasher(object): + + def __init__(self, msg=None): + self.digest_size = bitsize // 8 + self.h = msg and hmodule(msg).digest()[:self.digest_size] or None + + def digest(self): + return self.h + + def hexdigest(self): + return self.h and self.h.encode('hex') + + def b64digest(self): + return self.h and urlsafe_b64encode(self.h) + + def intdigest(self): + return self.h and bin_to_num(self.h) + + return hasher + + +# Base hashers for each bit size +_hashmodules_by_bitsize = { + # md5-based + 16: _hash_mod(16, hashlib.md5), + 32: _hash_mod(32, hashlib.md5), + 64: _hash_mod(64, hashlib.md5), + 128: _hash_mod(128, hashlib.md5), + # sha-based + 160: _hash_mod(160, hashlib.sha1), + 256: _hash_mod(256, hashlib.sha256), + 384: _hash_mod(384, hashlib.sha384), + 512: _hash_mod(512, hashlib.sha512) +} + + +def get_hasher(bitsize): + """ + Return a hasher for a given size in bits of the resulting hash. + """ + return _hashmodules_by_bitsize[bitsize] + + +def checksum(location, bitsize, base64=False): + """ + Return a checksum of `bitsize` length from the content of the file at + `location`. The checksum is a hexdigest or base64-encoded is `base64` is + True. + """ + if not filetype.is_file(location): + return + hasher = get_hasher(bitsize) + + # fixme: we should read in chunks + with open(location, 'rb') as f: + hashable = f.read() + + hashed = hasher(hashable) + if base64: + return hashed.b64digest() + + return hashed.hexdigest() + + +def md5(location): + return checksum(location, bitsize=128, base64=False) + + +def sha1(location): + return checksum(location, bitsize=160, base64=False) + + +def b64sha1(location): + return checksum(location, bitsize=160, base64=True) + + +def sha256(location): + return checksum(location, bitsize=256, base64=False) + + +def sha512(location): + return checksum(location, bitsize=512, base64=False) diff --git a/matchcode-toolkit/src/matchcode_toolkit/plugin_match.py b/matchcode-toolkit/src/matchcode_toolkit/plugin_match.py new file mode 100644 index 00000000..e6da42e9 --- /dev/null +++ b/matchcode-toolkit/src/matchcode_toolkit/plugin_match.py @@ -0,0 +1,219 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# ScanCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/scancode-toolkit for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +from collections import defaultdict +import os + +import attr +import requests + +from commoncode.cliutils import PluggableCommandLineOption +from commoncode.cliutils import POST_SCAN_GROUP +from matchcode_toolkit.fingerprinting import compute_directory_fingerprints +from plugincode.post_scan import post_scan_impl +from plugincode.post_scan import PostScanPlugin + +MATCHCODE_DIRECTORY_CONTENT_MATCHING_ENDPOINT = "http://127.0.0.1:8001/api/approximate_directory_content_index/match/" #os.environ.get('MATCHCODE_DIRECTORY_CONTENT_MATCHING_ENDPOINT') +MATCHCODE_DIRECTORY_STRUCTURE_MATCHING_ENDPOINT = "http://127.0.0.1:8001/api/approximate_directory_structure_index/match/" #os.environ.get('MATCHCODE_DIRECTORY_STRUCTURE_MATCHING_ENDPOINT') + + +def path_suffixes(path): + """ + Yield all the suffixes of `path`, starting from the longest (e.g. more segments). + """ + segments = path.strip('/').split('/') + suffixes = (segments[i:] for i in range(len(segments))) + for suffix in suffixes: + yield '/'.join(suffix) + + +class PackageInfo: + def __init__(self, packagedb_url): + self.packagedb_url = packagedb_url + self.package_resources = self.get_resources_from_packagedb(packagedb_url) + self.package_resource_by_paths = self.create_package_resource_by_paths() + + @classmethod + def get_resources_from_packagedb(cls, packagedb_url): + # Get package resources + package_resources = [] + response = requests.get(packagedb_url) + if response: + package_data = response.json() + resources_url = package_data.get('resources') + count = 1 + while True: + url = f'{resources_url}?page={count}' + response = requests.get(url) + if response: + package_resources.extend(response.json()) + count += 1 + else: + break + return package_resources + + def create_package_resource_by_paths(self): + return { + package_resource.get('path'): package_resource + for package_resource in self.package_resources + } + + +def check_resource_path(resource, package_resources_by_path): + """ + Check to see if `resource` exists in the set of package Resources + `package_resources_by_path` + """ + for path_suffix in path_suffixes(resource.path): + if not path_suffix in package_resources_by_path: + continue + package_resource = package_resources_by_path[path_suffix] + # Check to see if we have the same Resource + if ((resource.is_file == True + and package_resource.get('is_file') == True + and resource.sha1 == package_resource.get('sha1', '')) + or (resource.is_file == False + and package_resource.get('is_file') == False)): + return True + return False + + +def determine_best_package_match(directory, codebase, package_info_by_packagedb_url): + """ + For all potential package matches in `package_info_by_purl`, return the + package whose codebase structure matches ours the most. + """ + # Calculate the percent of package files found in codebase + packgedb_urls_by_match_ratio = {} + matched_codebase_paths_by_packagedb_url = defaultdict(list) + for matched_packagedb_url, package_info in package_info_by_packagedb_url.items(): + matched_codebase_paths = matched_codebase_paths_by_packagedb_url[matched_packagedb_url] + package_resource_by_paths = package_info.package_resource_by_paths + + # TODO: Theres a problem when try to match the directory with + # the name `package` because on the index side, we have the path + # `package` indexed, but the path suffixes function only returns + # paths that are at least two segments long + # + # We get around this by checking filetype (file or directory) in `check_resource_path` + if check_resource_path(directory, package_resource_by_paths): + matched_codebase_paths.append(directory.path) + + for child in directory.walk(codebase, topdown=True): + if check_resource_path(child, package_resource_by_paths): + matched_codebase_paths.append(child.path) + + matching_resources_count = len(matched_codebase_paths) + ratio = matching_resources_count / len(package_resource_by_paths) + packgedb_urls_by_match_ratio[ratio] = matched_packagedb_url + + highest_match_ratio = max(match_ratio for match_ratio, _ in packgedb_urls_by_match_ratio.items()) + best_package_match_packagedb_url = packgedb_urls_by_match_ratio[highest_match_ratio] + return best_package_match_packagedb_url, matched_codebase_paths_by_packagedb_url[best_package_match_packagedb_url] + + +def do_directory_matching(codebase, fingerprint_key, matching_endpoint): + for resource in codebase.walk(topdown=True): + # Collect directory fingerprints, if available + directory_fingerprint = resource.extra_data.get(fingerprint_key, '') + + # Skip resource if it is not a directory, does not contain directory + # fingerprints, or if it has already been matched + if (resource.is_file + or not directory_fingerprint + or resource.extra_data.get('matched', False)): + continue + + # Send fingerprint to matchcode for matching and get the purls of + # the matched packages + payload = { + 'fingerprint': [directory_fingerprint] + } + response = requests.get(matching_endpoint, params=payload) + if response: + results = response.json() + matched_packagedb_urls = [result.get('package', '') for result in results] + if not matched_packagedb_urls: + continue + + # Get the paths of the resources from matched packages + package_info_by_packagedb_url = {} + for packagedb_url in matched_packagedb_urls: + package_info_by_packagedb_url[packagedb_url] = PackageInfo(packagedb_url) + + # Calculate the percent of package files found in codebase + best_package_match_packagedb_url, matched_codebase_paths = determine_best_package_match( + resource, + codebase, + package_info_by_packagedb_url + ) + + # Query PackageDB for info on the best matched package + response = requests.get(best_package_match_packagedb_url) + if response: + # Create DiscoveredPackage for the best matched package + package_data = response.json() + if package_data not in codebase.attributes.matches: + codebase.attributes.matches.append(package_data) + best_package_match_purl = package_data['purl'] + + # Associate the package to the resource and its children + for matched_codebase_path in matched_codebase_paths: + #print('matched_codebase_path: ' + matched_codebase_path) + r = codebase.get_resource(matched_codebase_path) + if best_package_match_purl in r.matched_to: + continue + r.matched_to.append(best_package_match_purl) + r.extra_data['matched'] = True + r.save(codebase) + return codebase + + +@post_scan_impl +class Match(PostScanPlugin): + codebase_attributes = dict( + # a list of matches + matches=attr.ib(default=attr.Factory(list), repr=False), + ) + resource_attributes = dict( + # a list of purls of the packages that a file is a part of + matched_to=attr.ib(default=attr.Factory(list), repr=False), + ) + + sort_order = 6 + + options = [ + PluggableCommandLineOption( + ( + '-m', + '--match', + ), + is_flag=True, + default=False, + help='Scan for application package and dependency manifests, lockfiles and related data.', + help_group=POST_SCAN_GROUP, + sort_order=20, + ) + ] + + def is_enabled(self, match, **kwargs): + return match + + def process_codebase(self, codebase, **kwargs): + codebase = compute_directory_fingerprints(codebase) + codebase = do_directory_matching( + codebase, + 'directory_content', + MATCHCODE_DIRECTORY_CONTENT_MATCHING_ENDPOINT + ) + codebase = do_directory_matching( + codebase, + 'directory_structure', + MATCHCODE_DIRECTORY_STRUCTURE_MATCHING_ENDPOINT + ) diff --git a/matchcode-toolkit/tests/test_fingerprinting.py b/matchcode-toolkit/tests/test_fingerprinting.py new file mode 100644 index 00000000..761691f5 --- /dev/null +++ b/matchcode-toolkit/tests/test_fingerprinting.py @@ -0,0 +1,107 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# purldb is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/purldb for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import os + +from commoncode.resource import VirtualCodebase +from commoncode.testcase import FileBasedTesting + +from matchcode_toolkit.fingerprinting import _create_directory_fingerprint +from matchcode_toolkit.fingerprinting import _get_resource_subpath +from matchcode_toolkit.fingerprinting import compute_directory_fingerprints +from matchcode_toolkit.fingerprinting import create_content_fingerprint +from matchcode_toolkit.fingerprinting import create_halohash_chunks +from matchcode_toolkit.fingerprinting import create_structure_fingerprint +from matchcode_toolkit.fingerprinting import split_fingerprint + + +class Resource(): + def __init__(self, path='', size=0, sha1=''): + self.path = path + self.size = size + self.sha1 = sha1 + + +class TestFingerprintingFunctions(FileBasedTesting): + test_data_dir = os.path.join(os.path.dirname(__file__), 'testfiles/fingerprinting') + + def test__create_directory_fingerprint(self): + test_input = [ + 'package', + 'package/readme.txt', + 'package/index.js', + 'package/package.json', + ] + directory_fingerprint = _create_directory_fingerprint(test_input) + expected_directory_fingerprint = '0000000410d24471969646cb5402032288493126' + self.assertEqual(expected_directory_fingerprint, directory_fingerprint) + indexed_elements_count, _ = split_fingerprint(directory_fingerprint) + self.assertEqual(len(test_input), indexed_elements_count) + + def test_split_fingerprint(self): + directory_fingerprint = '0000000410d24471969646cb5402032288493126' + indexed_elements_count, bah128 = split_fingerprint(directory_fingerprint) + + expected_indexed_elements_count = 4 + self.assertEqual(expected_indexed_elements_count, indexed_elements_count) + + expected_bah128 = '10d24471969646cb5402032288493126' + self.assertEqual(expected_bah128, bah128) + + def test_create_content_fingerprint(self): + test_resources = [ + Resource(sha1='d4e4abbe8e2a8169d6a52907152c2c80ec884745'), + Resource(sha1='0c94f137f6e0536db8cb2622a9dc84253b91b90c'), + Resource(sha1='10cab45fe6f353b47b587a576c1077a96ce348f5'), + Resource(sha1='134f2b052b6e5f56b631be2eded70f89d44cf381'), + ] + fingerprint = create_content_fingerprint(test_resources) + expected_fingerprint = '00000004005b88c2800f0044044781ae05680419' + self.assertEqual(expected_fingerprint, fingerprint) + + def test__get_resource_subpath(self): + test_resource = Resource(path='foo/bar/baz/qux.c') + test_top_resource = Resource(path='foo/bar/') + subpath = _get_resource_subpath(test_resource, test_top_resource) + expected_subpath = 'baz/qux.c' + self.assertEqual(expected_subpath, subpath) + + def test_create_structure_fingerprint(self): + test_top_resource = Resource(path='package') + test_child_resources = [ + Resource(path='package/readme.txt', size=771), + Resource(path='package/index.js', size=608), + Resource(path='package/package.json', size=677), + ] + fingerprint = create_structure_fingerprint(test_top_resource, test_child_resources) + expected_fingerprint = '00000003ce72f4308a1bc1afb0fb47ed590b5c53' + self.assertEqual(expected_fingerprint, fingerprint) + + def test_create_halohash_chunks(self): + test_bah128 = 'ce72f4308a1bc1afb0fb47ed590b5c53' + chunk1, chunk2, chunk3, chunk4 = create_halohash_chunks(test_bah128) + expected_chunk1 = bytearray(b'\xcer\xf40') + expected_chunk2 = bytearray(b'\x8a\x1b\xc1\xaf') + expected_chunk3 = bytearray(b'\xb0\xfbG\xed') + expected_chunk4 = bytearray(b'Y\x0b\\S') + self.assertEqual(chunk1, expected_chunk1) + self.assertEqual(chunk2, expected_chunk2) + self.assertEqual(chunk3, expected_chunk3) + self.assertEqual(chunk4, expected_chunk4) + + def test_compute_directory_fingerprints(self): + scan_loc = self.get_test_loc('abbrev-1.0.3-i.json') + vc = VirtualCodebase(location=scan_loc) + vc = compute_directory_fingerprints(vc) + directory_content = vc.root.extra_data['directory_content'] + directory_structure = vc.root.extra_data['directory_structure'] + expected_directory_content = '0000000346ce04751a3c98f00086f16a91d9790b' + expected_directory_structure = '000000034f9bf110673bdf06197cd514a799a66c' + self.assertEqual(expected_directory_content, directory_content) + self.assertEqual(expected_directory_structure, directory_structure) diff --git a/matchcode-toolkit/tests/testfiles/fingerprinting/abbrev-1.0.3-i.json b/matchcode-toolkit/tests/testfiles/fingerprinting/abbrev-1.0.3-i.json new file mode 100644 index 00000000..2d418ac9 --- /dev/null +++ b/matchcode-toolkit/tests/testfiles/fingerprinting/abbrev-1.0.3-i.json @@ -0,0 +1,161 @@ +{ + "headers": [ + { + "tool_name": "scancode-toolkit", + "tool_version": "31.2.2", + "options": { + "input": [ + "package" + ], + "--info": true, + "--json-pp": "./abbrev-1.0.3.tgz-i.json" + }, + "notice": "Generated with ScanCode and provided on an \"AS IS\" BASIS, WITHOUT WARRANTIES\nOR CONDITIONS OF ANY KIND, either express or implied. No content created from\nScanCode should be considered or used as legal advice. Consult an Attorney\nfor any legal advice.\nScanCode is a free software code scanning tool from nexB Inc. and others.\nVisit https://github.com/nexB/scancode-toolkit/ for support and download.", + "start_timestamp": "2022-12-02T011500.811761", + "end_timestamp": "2022-12-02T011501.090542", + "output_format_version": "2.0.0", + "duration": 0.2787973880767822, + "message": null, + "errors": [], + "warnings": [], + "extra_data": { + "system_environment": { + "operating_system": "linux", + "cpu_architecture": "64", + "platform": "Linux-5.15.39-3-pve-x86_64-with-glibc2.35", + "platform_version": "#2 SMP PVE 5.15.39-3 (Wed, 27 Jul 2022 13:45:39 +0200)", + "python_version": "3.10.5 (main, Jul 30 2022, 06:09:26) [GCC 9.4.0]" + }, + "spdx_license_list_version": "3.17", + "files_count": 3 + } + } + ], + "files": [ + { + "path": "package", + "type": "directory", + "name": "package", + "base_name": "package", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "sha256": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 3, + "dirs_count": 1, + "size_count": 3358, + "scan_errors": [] + }, + { + "path": "package/package.json", + "type": "file", + "name": "package.json", + "base_name": "package", + "extension": ".json", + "size": 277, + "date": "2011-03-24", + "sha1": "d61dc2c98ab10bf909b99f60e7bf584a7f7ead8c", + "md5": "8468753cba56d0075f6532a657ee5821", + "sha256": "5ab100bf0eb08adb175db170a1254d14e0be705ff1b563e5acddd3c8d03faee1", + "mime_type": "application/json", + "file_type": "JSON data", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/README.md", + "type": "file", + "name": "README.md", + "base_name": "README", + "extension": ".md", + "size": 499, + "date": "2011-03-24", + "sha1": "c520bc857ec612ed88e13d794c47882d5aed3286", + "md5": "96b93093abdfdfef1ef8a3e2d5ca7f71", + "sha256": "2581765d44e15c58a2b88ad7bc9cc5c9ee029b4b5013c06dc45d9e94e8cb2ba4", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/lib", + "type": "directory", + "name": "lib", + "base_name": "lib", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "sha256": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 1, + "dirs_count": 0, + "size_count": 2582, + "scan_errors": [] + }, + { + "path": "package/lib/abbrev.js", + "type": "file", + "name": "abbrev.js", + "base_name": "abbrev", + "extension": ".js", + "size": 2582, + "date": "2011-03-24", + "sha1": "055ec01ac8b111bc948e498d87d9dc47f5e5acaa", + "md5": "06aebeadc85e52f4b8bf88eab6cd8b6c", + "sha256": "efd2c9b755dc4b2df3231222b5b6a63b7a1343472dfbc8807c5f15e1d28a0c75", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": "JavaScript", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + } + ] +} \ No newline at end of file diff --git a/matchcode/__init__.py b/matchcode/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/matchcode/api.py b/matchcode/api.py new file mode 100644 index 00000000..f20dd166 --- /dev/null +++ b/matchcode/api.py @@ -0,0 +1,277 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# purldb is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/purldb for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +from django.db.models import Q +from django.forms import widgets +from django.forms.fields import MultipleChoiceField +from django_filters.filters import MultipleChoiceFilter +from django_filters.rest_framework import FilterSet +from rest_framework.decorators import action +from rest_framework.response import Response +from rest_framework.serializers import CharField +from rest_framework.serializers import HyperlinkedRelatedField +from rest_framework.serializers import ModelSerializer +from rest_framework.serializers import ReadOnlyField +from rest_framework.serializers import Serializer +from rest_framework.viewsets import ReadOnlyModelViewSet + +from matchcode_toolkit.fingerprinting import create_halohash_chunks +from matchcode_toolkit.fingerprinting import hexstring_to_binarray +from matchcode_toolkit.fingerprinting import split_fingerprint +from matchcode.models import ExactFileIndex +from matchcode.models import ExactPackageArchiveIndex +from matchcode.models import ApproximateDirectoryContentIndex +from matchcode.models import ApproximateDirectoryStructureIndex + + +class BaseFileIndexSerializer(ModelSerializer): + sha1 = CharField(source='fingerprint') + package = HyperlinkedRelatedField( + view_name='api:package-detail', + lookup_field='uuid', + read_only=True + ) + + +class ExactFileIndexSerializer(BaseFileIndexSerializer): + class Meta: + model = ExactFileIndex + fields = ( + 'sha1', + 'package' + ) + + +class ExactPackageArchiveIndexSerializer(BaseFileIndexSerializer): + class Meta: + model = ExactPackageArchiveIndex + fields = ( + 'sha1', + 'package' + ) + + +class BaseDirectoryIndexSerializer(ModelSerializer): + fingerprint = ReadOnlyField() + package = HyperlinkedRelatedField( + view_name='api:package-detail', + lookup_field='uuid', + read_only=True + ) + +class ApproximateDirectoryContentIndexSerializer(BaseDirectoryIndexSerializer): + class Meta: + model = ApproximateDirectoryContentIndex + fields = ( + 'fingerprint', + 'package', + ) + + +class ApproximateDirectoryStructureIndexSerializer(BaseDirectoryIndexSerializer): + class Meta: + model = ApproximateDirectoryStructureIndex + fields = ( + 'fingerprint', + 'package', + ) + + +class BaseDirectoryIndexMatchSerializer(Serializer): + fingerprint = CharField() + matched_fingerprint = CharField() + package = HyperlinkedRelatedField( + view_name='api:package-detail', + lookup_field='uuid', + read_only=True + ) + + +class CharMultipleWidget(widgets.TextInput): + """ + Enables the support for `MultiValueDict` `?field=a&field=b` + reusing the `SelectMultiple.value_from_datadict()` but render as a `TextInput`. + """ + def value_from_datadict(self, data, files, name): + value = widgets.SelectMultiple().value_from_datadict(data, files, name) + if not value or value == ['']: + return '' + + return value + + def format_value(self, value): + """ + Return a value as it should appear when rendered in a template. + """ + return ', '.join(value) + + +class MultipleCharField(MultipleChoiceField): + """ + Overrides `MultipleChoiceField` to fit in `MultipleCharFilter`. + """ + widget = CharMultipleWidget + + def valid_value(self, value): + return True + + +class MultipleCharFilter(MultipleChoiceFilter): + """ + Filters on multiple values for a CharField type using `?field=a&field=b` URL syntax. + """ + field_class = MultipleCharField + + +class MultipleSHA1Filter(MultipleCharFilter): + """ + Overrides `MultipleCharFilter.filter()` to convert the SHA1 + into a bytearray so it can be queried + """ + def filter(self, qs, value): + if not value: + return qs + + q = Q() + for val in value: + v = hexstring_to_binarray(val) + q.add(Q(sha1=v), Q.OR) + + return qs.filter(q) + + +class MultipleFingerprintFilter(MultipleCharFilter): + """ + Overrides `MultipleCharFilter.filter()` to process fingerprint from a single + string into multiple values used for querying. + + In the BaseDirectoryIndex model, the fingerprint is stored in four chunks of + equal size, not as a single field that contains the entire fingerprint. We + must process the fingerprint into the correct parts so we can use those + parts to query the different fields. + """ + def filter(self, qs, value): + if not value: + return qs + + q = Q() + for val in value: + indexed_elements_count, bah128 = split_fingerprint(val) + chunk1, chunk2, chunk3, chunk4 = create_halohash_chunks(bah128) + q.add( + Q( + indexed_elements_count=indexed_elements_count, + chunk1=chunk1, + chunk2=chunk2, + chunk3=chunk3, + chunk4=chunk4 + ), + Q.OR + ) + + return qs.filter(q) + + +class BaseFileIndexFilterSet(FilterSet): + sha1 = MultipleSHA1Filter() + + +class ExactFileIndexFilterSet(BaseFileIndexFilterSet): + class Meta: + model = ExactFileIndex + fields = ( + 'sha1', + ) + + +class ExactPackageArchiveFilterSet(BaseFileIndexFilterSet): + class Meta: + model = ExactPackageArchiveIndex + fields = ( + 'sha1', + ) + + +class BaseDirectoryIndexFilterSet(FilterSet): + fingerprint = MultipleFingerprintFilter() + + +class ApproximateDirectoryContentFilterSet(BaseDirectoryIndexFilterSet): + class Meta: + model = ApproximateDirectoryContentIndex + fields = ( + 'fingerprint', + ) + + +class ApproximateDirectoryStructureFilterSet(BaseDirectoryIndexFilterSet): + class Meta: + model = ApproximateDirectoryStructureIndex + fields = ( + 'fingerprint', + ) + + +class BaseFileIndexViewSet(ReadOnlyModelViewSet): + lookup_field = 'sha1' + + +class ExactFileIndexViewSet(BaseFileIndexViewSet): + queryset = ExactFileIndex.objects.all() + serializer_class = ExactFileIndexSerializer + filterset_class = ExactFileIndexFilterSet + + +class ExactPackageArchiveIndexViewSet(BaseFileIndexViewSet): + queryset = ExactPackageArchiveIndex.objects.all() + serializer_class = ExactPackageArchiveIndexSerializer + filterset_class = ExactPackageArchiveFilterSet + + +class BaseDirectoryIndexViewSet(ReadOnlyModelViewSet): + lookup_field = 'fingerprint' + + @action(detail=False) + def match(self, request): + fingerprints = request.query_params.getlist('fingerprint') + if not fingerprints: + return Response() + + model_class = self.get_serializer().Meta.model + results = [] + unique_fingerprints = set(fingerprints) + for fingerprint in unique_fingerprints: + matches = model_class.match(fingerprint) + for match in matches: + results.append( + { + 'fingerprint': fingerprint, + 'matched_fingerprint': match.fingerprint(), + 'package': match.package, + } + ) + + serialized_match_results = BaseDirectoryIndexMatchSerializer( + results, + context={'request': request}, + many=True + ) + return Response(serialized_match_results.data) + + +class ApproximateDirectoryContentIndexViewSet(BaseDirectoryIndexViewSet): + queryset = ApproximateDirectoryContentIndex.objects.all() + serializer_class = ApproximateDirectoryContentIndexSerializer + filterset_class = ApproximateDirectoryContentFilterSet + + +class ApproximateDirectoryStructureIndexViewSet(BaseDirectoryIndexViewSet): + queryset = ApproximateDirectoryStructureIndex.objects.all() + serializer_class = ApproximateDirectoryStructureIndexSerializer + filterset_class = ApproximateDirectoryStructureFilterSet diff --git a/matchcode/indexing.py b/matchcode/indexing.py new file mode 100644 index 00000000..e4289031 --- /dev/null +++ b/matchcode/indexing.py @@ -0,0 +1,154 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# purldb is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/purldb for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import logging +import os +import sys + +from commoncode.resource import VirtualCodebase + +from matchcode_toolkit.fingerprinting import compute_directory_fingerprints +from matchcode.models import ApproximateDirectoryContentIndex +from matchcode.models import ApproximateDirectoryStructureIndex +from matchcode.models import ExactPackageArchiveIndex +from matchcode.models import ExactFileIndex + + +TRACE = False + +logger = logging.getLogger(__name__) +logging.basicConfig(stream=sys.stdout) +logger.setLevel(logging.INFO) + + +def index_package_archives(package): + """ + Index Package archives for matching + + Return True if an ExactPackageArchiveIndex has been created, + otherwise return False + """ + _, created = ExactPackageArchiveIndex.index( + sha1=package.sha1, + package=package, + ) + return created + + +def index_package_file(resource): + """ + Index Package files for matching + + Return a boolean, `created_exact_file_index`, which returns True if it has + been created, False otherwise. + """ + _, created_exact_file_index = ExactFileIndex.index( + sha1=resource.sha1, + package=resource.package + ) + return created_exact_file_index + + +def _create_virtual_codebase_from_package_resources(package): + """ + Return a VirtualCodebase from the resources of `package` + """ + # Create something that looks like a scancode scan so we can import it into a VirtualCodebase + # TODO: Evolve this into something more elaborate, e.g. + # Codebase class methods can manipulate Resource table entries + package_resources = package.resources.order_by('path') + if not package_resources: + return + + files = [] + for resource in package_resources: + files.append( + { + 'path': resource.path, + 'size': resource.size, + 'sha1': resource.sha1, + 'md5': resource.md5, + 'type': resource.type, + } + ) + + make_new_root = False + sample_file_path = files[0].get('path', '') + root_dir = sample_file_path.split('/')[0] + for f in files: + file_path = f.get('path', '') + if not file_path.startswith(root_dir): + make_new_root = True + break + + if make_new_root: + new_root = '{}-{}'.format(package.name, package.version) + for f in files: + new_path = os.path.join(new_root, f.get('path', '')) + f['path'] = new_path + + # Create VirtualCodebase + mock_scan = dict(files=files) + return VirtualCodebase(location=mock_scan) + + +def index_directory_fingerprints(codebase, package): + """ + Compute fingerprints for a directory from `codebase` and index them to + ApproximateDirectoryContentIndex and ApproximateDirectoryStructureIndex + + Return a tuple of integers, `indexed_adci` and `indexed_adsi`, that + represent the number of indexed ApproximateDirectoryContentIndex and + ApproximateDirectoryStructureIndex created, respectivly. + """ + indexed_adci = 0 + indexed_adsi = 0 + for resource in codebase.walk(topdown=False): + directory_content_fingerprint = resource.extra_data.get('directory_content', '') + directory_structure_fingerprint = resource.extra_data.get('directory_structure', '') + + if directory_content_fingerprint: + _, adci_created = ApproximateDirectoryContentIndex.index( + directory_fingerprint=directory_content_fingerprint, + resource_path=resource.path, + package=package, + ) + if adci_created: + indexed_adci += 1 + + if directory_structure_fingerprint: + _, adsi_created = ApproximateDirectoryStructureIndex.index( + directory_fingerprint=directory_structure_fingerprint, + resource_path=resource.path, + package=package, + ) + if adsi_created: + indexed_adsi += 1 + + return indexed_adci, indexed_adsi + + +def index_package_directories(package): + """ + Index the directories of `package` to ApproximateDirectoryContentIndex and + ApproximateDirectoryStructureIndex + + Return a tuple of integers, `indexed_adci` and `indexed_adsi`, that + represent the number of indexed ApproximateDirectoryContentIndex and + ApproximateDirectoryStructureIndex created, respectivly. + + Return 0, 0 if a VirtualCodebase cannot be created from the Resources of a + Package + """ + vc = _create_virtual_codebase_from_package_resources(package) + if not vc: + return 0, 0 + + vc = compute_directory_fingerprints(vc) + return index_directory_fingerprints(vc, package) diff --git a/matchcode/management/__init__.py b/matchcode/management/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/matchcode/management/commands/__init__.py b/matchcode/management/commands/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/matchcode/management/commands/index_packages.py b/matchcode/management/commands/index_packages.py new file mode 100644 index 00000000..ebdd8d21 --- /dev/null +++ b/matchcode/management/commands/index_packages.py @@ -0,0 +1,73 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# purldb is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/purldb for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +from datetime import datetime +import logging +import sys +import time + +from django.db import transaction + +from minecode.management.commands import VerboseCommand +from matchcode.indexing import index_package_archives +from matchcode.indexing import index_package_directories +from matchcode.indexing import index_package_file +from packagedb.models import Package +from packagedb.models import Resource + + +TRACE = False + +logger = logging.getLogger(__name__) +logging.basicConfig(stream=sys.stdout) +logger.setLevel(logging.INFO) + + +class Command(VerboseCommand): + help = 'Index all Package SHA1 from PackageDB.' + + def handle(self, *args, **options): + # Stats to keep track of during indexing + total_indexed_package_archives = 0 + total_indexed_package_files = 0 + total_indexed_adci = 0 + total_indexed_adsi = 0 + + logger.setLevel(self.get_verbosity(**options)) + start = time.time() + + packages = Package.objects.filter(sha1__isnull=False) + for package in packages.iterator(): + with transaction.atomic(): + created_package_archive = index_package_archives(package) + if created_package_archive: + total_indexed_package_archives += 1 + + resources = Resource.objects.filter(sha1__isnull=False) + for resource in resources.iterator(): + with transaction.atomic(): + created_package_file = index_package_file(resource) + if created_package_file: + total_indexed_package_files += 1 + + for package in Package.objects.all().iterator(): + with transaction.atomic(): + indexed_adci, indexed_adsi = index_package_directories(package) + total_indexed_adci += indexed_adci + total_indexed_adsi += indexed_adsi + + # TODO: Format this better for viewing on terminal + print('Package indexing completed at: {}'.format(datetime.utcnow().isoformat())) + total_duration = int(time.time() - start) + print('Total run duration: {} seconds'.format(total_duration)) + print('Created:') + print('ExactPackageArchiveIndex: {}'.format(total_indexed_package_archives)) + print('ExactFileIndex: {}'.format(total_indexed_package_files)) + print('ApproximateDirectoryContentIndex: {}'.format(total_indexed_adci)) + print('ApproximateDirectoryStructureIndex: {}'.format(total_indexed_adsi)) diff --git a/matchcode/management/commands/match_scan.py b/matchcode/management/commands/match_scan.py new file mode 100644 index 00000000..68122ac0 --- /dev/null +++ b/matchcode/management/commands/match_scan.py @@ -0,0 +1,36 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# purldb is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/purldb for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import json + +from django.core.management.base import BaseCommand + +from matchcode import match + + +class Command(BaseCommand): + help = 'matches packages in a scancode fileinfo scan' + + def add_arguments(self, parser): + parser.add_argument('scancode_file_path', type=str) + parser.add_argument('outfile_path', type=str) + + def handle(self, *args, **options): + scancode_file = options['scancode_file_path'] + outfile = options['outfile_path'] + + # load up the scancode fileinfo json + with open(scancode_file) as f: + scan = json.load(f) + + results = match.match_packages(scan) + + # write new json results + with open(outfile, 'w') as f: + json.dump(results, f, indent=2) diff --git a/matchcode/match.py b/matchcode/match.py new file mode 100644 index 00000000..e78bc8e8 --- /dev/null +++ b/matchcode/match.py @@ -0,0 +1,227 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# purldb is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/purldb for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +from functools import reduce +from operator import or_ + +from django.db.models import Q + +from matchcode.models import ApproximateDirectoryContentIndex +from matchcode.models import ApproximateDirectoryStructureIndex +from matchcode.models import ExactFileIndex +from matchcode.models import ExactPackageArchiveIndex + + +# TODO: Refactor this file into functions/utilities used in +# a scanpipe pipeline. +EXACT_PACKAGE_ARCHIVE_MATCH = 0 +APPROXIMATE_DIRECTORY_STRUCTURE_MATCH = 1 +APPROXIMATE_DIRECTORY_CONTENT_MATCH = 2 +EXACT_FILE_MATCH = 3 + + +def get_matchers(): + MATCHERS_BY_MATCH_TYPE = { + EXACT_PACKAGE_ARCHIVE_MATCH: package_archive_match, + APPROXIMATE_DIRECTORY_CONTENT_MATCH: approximate_directory_content_match, + APPROXIMATE_DIRECTORY_STRUCTURE_MATCH: approximate_directory_structure_match, + EXACT_FILE_MATCH: individual_file_match, + } + return MATCHERS_BY_MATCH_TYPE + + +def do_match(codebase, match_type): + """ + Perform Package matching on `codebase` by running matching functions of `match_type` on `codebase` + + The total number of matches found is returned + """ + + matcher = get_matchers().get(match_type) + if not matcher: + raise Exception('Unknown match type: {}'.format(match_type)) + match_count = matcher(codebase) + return match_count + + +def package_archive_match(codebase): + """ + Update Matches from detected Package Archives in `codebase` + + Return the number of matches found in `codebase` + """ + match_count = 0 + for resource in codebase.walk(topdown=True): + if (resource.is_dir + or not resource.is_archive + or resource.extra_data.get('matched', False)): + continue + + archive_matches, match_type = get_archive_match(resource) + if not archive_matches: + continue + + match_count += len(archive_matches) + + # Tag matched Resource as `matched` as to not analyze it later + tag_matched_resources(resource, codebase, archive_matches, match_type) + return match_count + + +def approximate_directory_content_match(codebase): + """ + Update Matches from detected Package directories based on directory contents in `codebase` + + Return the number of matches found in `codebase` + """ + match_count = 0 + for resource in codebase.walk(topdown=True): + if resource.is_file or resource.extra_data.get('matched', False): + continue + + directory_matches, match_type = get_directory_content_match(resource) + if not directory_matches: + continue + + match_count += directory_matches.count() + tag_matched_resources(resource, codebase, directory_matches, match_type) + return match_count + + +def approximate_directory_structure_match(codebase): + """ + Update Matches from detected Package directories based on directory structure in `codebase` + + Return the number of matches found in `codebase` + """ + match_count = 0 + for resource in codebase.walk(topdown=True): + if resource.is_file or resource.extra_data.get('matched', False): + continue + + directory_matches, match_type = get_directory_structure_match(resource) + if not directory_matches: + continue + + match_count += directory_matches.count() + tag_matched_resources(resource, codebase, directory_matches, match_type) + return match_count + + +def individual_file_match(codebase): + """ + Update Matches from detected Package files in `codebase` + + Return the number of matches found in `codebase` + """ + match_count = 0 + for resource in codebase.walk(topdown=True): + if resource.is_dir or resource.extra_data.get('matched', False): + continue + + file_matches, match_type = get_file_match(resource) + if not file_matches: + continue + + match_count += len(file_matches) + tag_matched_resources(resource, codebase, file_matches, match_type) + return match_count + + +def get_directory_content_match(resource): + """ + Match a directory to a Package using its contents + """ + directory_content_fingerprint = resource.extra_data.get('directory_content', '') + matches = ApproximateDirectoryContentIndex.objects.none() + match_type = '' + if directory_content_fingerprint: + directory_matches = ApproximateDirectoryContentIndex.match(directory_content_fingerprint) + matches |= directory_matches + match_type = 'approximate-content' + return matches, match_type + + +# TODO: rename match_directory_structure +def get_directory_structure_match(resource): + """ + Match a directory to a Package using its structure + """ + directory_structure_fingerprint = resource.extra_data.get('directory_structure', '') + matches = ApproximateDirectoryStructureIndex.objects.none() + match_type = '' + if directory_structure_fingerprint: + directory_matches = ApproximateDirectoryStructureIndex.match(directory_structure_fingerprint) + matches |= directory_matches + match_type = 'approximate-structure' + return matches, match_type + + +def get_archive_match(resource): + """ + Match an Archive resource to a Package + """ + file_matches = ExactPackageArchiveIndex.match(resource.sha1) + return file_matches, 'exact-archive' + + +def get_file_match(resource): + """ + Match an individual file back to the Package it is from + """ + file_matches = ExactFileIndex.match(resource.sha1) + return file_matches, 'exact-file' + + +def tag_matched_resource(resource, codebase, purl): + """ + Set a resource to be flagged as matched, so it will not be considered in + subsequent matches once it has been matched + """ + if purl not in resource.matched_to: + resource.matched_to.append(purl) + resource.extra_data['matched'] = True + resource.save(codebase) + + +def tag_matched_resources(resource, codebase, matches, match_type): + """ + Tag this directory and other Resources under this directory so they are not + candidates for matching by checking to see if a Resource path from + `resource` or its children exists in the matched packages in `matches` + """ + for match in matches: + # Prep matched package data and append to `codebase` + matched_package_info = match.package.to_dict() + matched_package_info['match_type'] = match_type + codebase.attributes.matches.append(matched_package_info) + + purl = match.package.package_url + # Tag the Resource where we found a match + tag_matched_resource(resource, codebase, purl) + + # Find matching package child path for `resource` by creating all possible + # path suffixes from `child.path`, chaining them in Q objects (joined + # by or), then querying the matched packages resources to see if any of + # those suffixes match a package child resource path + for child in resource.walk(codebase): + query = reduce(or_, (Q(path=suffix) for suffix in path_suffixes(child.path)), Q()) + matched_child_resources = match.package.resources.filter(query) + if len(matched_child_resources) > 0: + tag_matched_resource(child, codebase, purl) + + +def path_suffixes(path): + """ + Yield all the suffixes of `path`, starting from the longest (e.g. more segments). + """ + segments = path.strip('/').split('/') + suffixes = (segments[i:] for i in range(len(segments))) + for suffix in suffixes: + yield '/'.join(suffix) diff --git a/matchcode/migrations/0001_initial.py b/matchcode/migrations/0001_initial.py new file mode 100644 index 00000000..8acf39f3 --- /dev/null +++ b/matchcode/migrations/0001_initial.py @@ -0,0 +1,104 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.11.17 on 2021-03-25 23:54 +from __future__ import unicode_literals + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + initial = True + + dependencies = [ + ] + + operations = [ + migrations.CreateModel( + name='ApproximateDirectoryContentIndex', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('indexed_elements_count', models.IntegerField(help_text='Number of elements that went into the fingerprint')), + ('chunk1', models.BinaryField(db_index=True, help_text='Binary form of the first 8 (0-7) hex digits of the fingerprint', max_length=4)), + ('chunk2', models.BinaryField(db_index=True, help_text='Binary form of the second 8 (8-15) hex digits of the fingerprint', max_length=4)), + ('chunk3', models.BinaryField(db_index=True, help_text='Binary form of the third 8 (16-23) hex digits of the fingerprint', max_length=4)), + ('chunk4', models.BinaryField(db_index=True, help_text='Binary form of the fourth 8 (24-32) hex digits of the fingerprint', max_length=4)), + ('path', models.CharField(help_text='The full path value of this directory', max_length=2000)), + ], + options={ + 'abstract': False, + }, + ), + migrations.CreateModel( + name='ApproximateDirectoryStructureIndex', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('indexed_elements_count', models.IntegerField(help_text='Number of elements that went into the fingerprint')), + ('chunk1', models.BinaryField(db_index=True, help_text='Binary form of the first 8 (0-7) hex digits of the fingerprint', max_length=4)), + ('chunk2', models.BinaryField(db_index=True, help_text='Binary form of the second 8 (8-15) hex digits of the fingerprint', max_length=4)), + ('chunk3', models.BinaryField(db_index=True, help_text='Binary form of the third 8 (16-23) hex digits of the fingerprint', max_length=4)), + ('chunk4', models.BinaryField(db_index=True, help_text='Binary form of the fourth 8 (24-32) hex digits of the fingerprint', max_length=4)), + ('path', models.CharField(help_text='The full path value of this directory', max_length=2000)), + ], + options={ + 'abstract': False, + }, + ), + migrations.CreateModel( + name='ExactFileIndex', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('sha1', models.BinaryField(db_index=True, help_text='Binary form of a SHA1 checksum in lowercase hex for a file', max_length=20)), + ], + options={ + 'abstract': False, + }, + ), + migrations.CreateModel( + name='ExactPackageArchiveIndex', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('sha1', models.BinaryField(db_index=True, help_text='Binary form of a SHA1 checksum in lowercase hex for a file', max_length=20)), + ], + options={ + 'abstract': False, + }, + ), + migrations.CreateModel( + name='IndexablePackage', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('uuid', models.UUIDField(db_index=True, help_text='The UUID of a Package on an instance of PackageDB', unique=True, verbose_name='UUID')), + ('last_indexed_date', models.DateTimeField(help_text='Timestamp set to the date of the last indexing. Used to track indexing status.')), + ('index_error', models.TextField(blank=True, help_text='Indexing errors messages. When present this means the indexing has failed.', null=True)), + ], + ), + migrations.AddField( + model_name='exactpackagearchiveindex', + name='package', + field=models.ForeignKey(help_text='The Package that this file is from', on_delete=django.db.models.deletion.CASCADE, to='matchcode.IndexablePackage'), + ), + migrations.AddField( + model_name='exactfileindex', + name='package', + field=models.ForeignKey(help_text='The Package that this file is from', on_delete=django.db.models.deletion.CASCADE, to='matchcode.IndexablePackage'), + ), + migrations.AddField( + model_name='approximatedirectorystructureindex', + name='package', + field=models.ForeignKey(help_text='The Package that this directory is a part of', on_delete=django.db.models.deletion.CASCADE, to='matchcode.IndexablePackage'), + ), + migrations.AddField( + model_name='approximatedirectorycontentindex', + name='package', + field=models.ForeignKey(help_text='The Package that this directory is a part of', on_delete=django.db.models.deletion.CASCADE, to='matchcode.IndexablePackage'), + ), + migrations.AlterUniqueTogether( + name='approximatedirectorystructureindex', + unique_together=set([('chunk1', 'chunk2', 'chunk3', 'chunk4', 'package', 'path')]), + ), + migrations.AlterUniqueTogether( + name='approximatedirectorycontentindex', + unique_together=set([('chunk1', 'chunk2', 'chunk3', 'chunk4', 'package', 'path')]), + ), + ] diff --git a/matchcode/migrations/0002_alter_approximatedirectorycontentindex_package_and_more.py b/matchcode/migrations/0002_alter_approximatedirectorycontentindex_package_and_more.py new file mode 100644 index 00000000..2d34ed5e --- /dev/null +++ b/matchcode/migrations/0002_alter_approximatedirectorycontentindex_package_and_more.py @@ -0,0 +1,54 @@ +# Generated by Django 4.1.2 on 2022-12-08 01:59 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ("packagedb", "0052_package_index_error_package_last_indexed_date"), + ("matchcode", "0001_initial"), + ] + + operations = [ + migrations.AlterField( + model_name="approximatedirectorycontentindex", + name="package", + field=models.ForeignKey( + help_text="The Package that this directory is a part of", + on_delete=django.db.models.deletion.CASCADE, + to="packagedb.package", + ), + ), + migrations.AlterField( + model_name="approximatedirectorystructureindex", + name="package", + field=models.ForeignKey( + help_text="The Package that this directory is a part of", + on_delete=django.db.models.deletion.CASCADE, + to="packagedb.package", + ), + ), + migrations.AlterField( + model_name="exactfileindex", + name="package", + field=models.ForeignKey( + help_text="The Package that this file is from", + on_delete=django.db.models.deletion.CASCADE, + to="packagedb.package", + ), + ), + migrations.AlterField( + model_name="exactpackagearchiveindex", + name="package", + field=models.ForeignKey( + help_text="The Package that this file is from", + on_delete=django.db.models.deletion.CASCADE, + to="packagedb.package", + ), + ), + migrations.DeleteModel( + name="IndexablePackage", + ), + ] diff --git a/matchcode/migrations/__init__.py b/matchcode/migrations/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/matchcode/models.py b/matchcode/models.py new file mode 100644 index 00000000..5321d16e --- /dev/null +++ b/matchcode/models.py @@ -0,0 +1,329 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# purldb is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/purldb for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +from collections import defaultdict +from datetime import datetime +import binascii +import logging +import sys + +from django.db import models +from django.forms.models import model_to_dict +from django.utils.translation import gettext_lazy as _ + +from minecode.management.commands import get_error_message +from matchcode_toolkit.fingerprinting import create_halohash_chunks +from matchcode_toolkit.fingerprinting import hexstring_to_binarray +from matchcode_toolkit.fingerprinting import split_fingerprint +from matchcode_toolkit.halohash import byte_hamming_distance +from packagedb.models import Package + + +TRACE = False + +if TRACE: + level = logging.DEBUG +else: + level = logging.ERROR + +logger = logging.getLogger(__name__) +logging.basicConfig(stream=sys.stdout) +logger.setLevel(level) + + +def logger_debug(*args): + return logger.debug(' '.join(isinstance(a, str) and a or repr(a) for a in args)) + + +############################################################################### +# FILE MATCHING +############################################################################### +class BaseFileIndex(models.Model): + sha1 = models.BinaryField( + max_length=20, + db_index=True, + help_text='Binary form of a SHA1 checksum in lowercase hex for a file', + null=False, + blank=False, + ) + + package = models.ForeignKey( + Package, + help_text='The Package that this file is from', + null=False, + on_delete=models.CASCADE, + ) + + class Meta: + abstract = True + + @classmethod + def index(cls, sha1, package): + try: + sha1_bin = hexstring_to_binarray(sha1) + bfi, created = cls.objects.get_or_create( + package=package, + sha1=sha1_bin + ) + if created: + logger.info( + '{} - Inserted {} for Package {}:\t{}'.format( + datetime.utcnow().isoformat(), + bfi.__class__.__name__, + package.download_url, + sha1 + ) + ) + return bfi, created + except Exception as e: + msg = f'Error creating {bfi.__class__.__name__}:\n' + msg += get_error_message(e) + package.index_error = msg + package.save() + logger.error(msg) + + @classmethod + def match(cls, sha1): + """ + Return a list of matched Packages that contains a file with a SHA1 value of `sha1` + """ + if TRACE: + logger_debug(cls.__name__, 'match:', 'sha1:', sha1) + + if not sha1: + return cls.objects.none() + + sha1_in_bin = hexstring_to_binarray(sha1) + matches = cls.objects.filter(sha1=sha1_in_bin) + if TRACE: + for match in matches: + package = match.package + dct = model_to_dict(package) + logger_debug(cls.__name__, 'match:', 'matched_file:', dct) + return matches + + def fingerprint(self): + return binascii.hexlify(self.sha1).decode('utf-8') + + +class ExactPackageArchiveIndex(BaseFileIndex): + pass + + +class ExactFileIndex(BaseFileIndex): + pass + + +################################################################################ +# DIRECTORY MATCHING +################################################################################ +def bah128_ranges(indexed_elements_count, range_ratio=0.05): + """ + Return a tuple of two integers, one smaller than `indexed_elements_count` by + `range_ratio` and one larger than `indexed_elements_count` by `range_ratio` + + This helps us match on directories with similar amounts of files. Directory + fingerprints become uncomparable if one fingerprint has more elements + indexed in it than another. + """ + return ( + int(indexed_elements_count * (1 - range_ratio)), + int(indexed_elements_count * (1 + range_ratio)) + ) + + +class BaseDirectoryIndex(models.Model): + indexed_elements_count = models.IntegerField( + help_text='Number of elements that went into the fingerprint', + ) + + chunk1 = models.BinaryField( + max_length=4, + db_index=True, + help_text='Binary form of the first 8 (0-7) hex digits of the fingerprint', + null=False, + blank=False + ) + + chunk2 = models.BinaryField( + max_length=4, + db_index=True, + help_text='Binary form of the second 8 (8-15) hex digits of the fingerprint', + null=False, + blank=False + ) + + chunk3 = models.BinaryField( + max_length=4, + db_index=True, + help_text='Binary form of the third 8 (16-23) hex digits of the fingerprint', + null=False, + blank=False + ) + + chunk4 = models.BinaryField( + max_length=4, + db_index=True, + help_text='Binary form of the fourth 8 (24-32) hex digits of the fingerprint', + null=False, + blank=False + ) + + package = models.ForeignKey( + Package, + help_text='The Package that this directory is a part of', + null=False, + on_delete=models.CASCADE, + ) + + path = models.CharField( + max_length=2000, + help_text=_('The full path value of this directory'), + ) + + class Meta: + abstract = True + unique_together = ['chunk1', 'chunk2', 'chunk3', 'chunk4', 'package', 'path'] + + def __str__(self): + return self.fingerprint() + + @classmethod + def index(cls, directory_fingerprint, resource_path, package): + """ + Index the string `directory_fingerprint` into the BaseDirectoryIndex model + """ + try: + indexed_elements_count, fp = split_fingerprint(directory_fingerprint) + fp_chunk1, fp_chunk2, fp_chunk3, fp_chunk4 = create_halohash_chunks(fp) + bdi, created = cls.objects.get_or_create( + indexed_elements_count=indexed_elements_count, + chunk1=fp_chunk1, + chunk2=fp_chunk2, + chunk3=fp_chunk3, + chunk4=fp_chunk4, + path=resource_path, + package=package, + ) + if created: + logger.info( + '{} - Inserted {} for Package {}:\t{}'.format( + datetime.utcnow().isoformat(), + bdi.__class__.__name__, + package.download_url, + directory_fingerprint + ) + ) + return bdi, created + except Exception as e: + msg = f'Error creating {bdi.__class__.__name__}:\n' + msg += get_error_message(e) + package.index_error = msg + package.save() + logger.error(msg) + + @classmethod + def match(cls, directory_fingerprint): + """ + Return a list of matched Packages + """ + if TRACE: + logger_debug(cls.__name__, 'match:', 'directory_fingerprint:', directory_fingerprint) + + if not directory_fingerprint: + return cls.objects.none() + + # Step 1: find fingerprints with matching chunks + indexed_elements_count, bah128 = split_fingerprint(directory_fingerprint) + chunk1, chunk2, chunk3, chunk4 = create_halohash_chunks(bah128) + range = bah128_ranges(indexed_elements_count) + matches = cls.objects.filter( + models.Q( + indexed_elements_count__range=range, + chunk1=chunk1 + ) | + models.Q( + indexed_elements_count__range=range, + chunk2=chunk2 + ) | + models.Q( + indexed_elements_count__range=range, + chunk3=chunk3 + ) | + models.Q( + indexed_elements_count__range=range, + chunk4=chunk4 + ) + ) + + if TRACE: + for match in matches: + dct = model_to_dict(match) + logger_debug(cls.__name__, 'match:', 'matched_package:', dct) + + # Step 2: calculate Hamming distance of all matches + + # Store all close matches in a dictionary of querysets + matches_by_hamming_distance = defaultdict(cls.objects.none) + for match in matches: + # Get fingerprint from the match + fp = match.fingerprint() + _, match_bah128 = split_fingerprint(fp) + + # Perform Hamming distance calculation between the fingerprint we + # are looking up and a potential match fingerprint + hd = byte_hamming_distance(bah128, match_bah128) + + # TODO: try other thresholds if this is too restrictive + if hd < 8: + # Save match to `matches_by_hamming_distance` by adding the matched object to the queryset + matches_by_hamming_distance[hd] |= cls.objects.filter(pk=match.pk) + + if TRACE: + logger_debug(list(matches_by_hamming_distance.items())) + + # Step 3: order matches from lowest Hamming distance to highest Hamming distance + # TODO: consider limiting matches for brevity + good_matches = cls.objects.none() + for hamming_distance, match in sorted(matches_by_hamming_distance.items()): + if hamming_distance == 0: + # If we have an exact match, return and disregard others + good_matches |= match + break + else: + # If we don't have an exact match, add all close matches we have + good_matches |= match + + if TRACE: + for match in good_matches: + dct = model_to_dict(match) + logger_debug(cls.__name__, 'match:', 'good_matched_package:', dct) + + return good_matches + + def get_chunks(self): + chunk1 = binascii.hexlify(self.chunk1) + chunk2 = binascii.hexlify(self.chunk2) + chunk3 = binascii.hexlify(self.chunk3) + chunk4 = binascii.hexlify(self.chunk4) + return chunk1, chunk2, chunk3, chunk4 + + def fingerprint(self): + indexed_element_count_as_hex_bytes = b'%08x' % self.indexed_elements_count + chunk1, chunk2, chunk3, chunk4 = self.get_chunks() + fingerprint = indexed_element_count_as_hex_bytes + chunk1 + chunk2 + chunk3 + chunk4 + return fingerprint.decode('utf-8') + + +class ApproximateDirectoryStructureIndex(BaseDirectoryIndex): + pass + + +class ApproximateDirectoryContentIndex(BaseDirectoryIndex): + pass diff --git a/matchcode/tests/__init__.py b/matchcode/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/matchcode/tests/test_api.py b/matchcode/tests/test_api.py new file mode 100644 index 00000000..be971081 --- /dev/null +++ b/matchcode/tests/test_api.py @@ -0,0 +1,163 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# purldb is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/purldb for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import os + +from django.urls import reverse + +from packagedb.models import Package + +from matchcode.indexing import index_package_directories +from matchcode.utils import load_resources_from_scan +from matchcode.utils import MatchcodeTestCase + + +class ApproximateDirectoryStructureIndexAPITestCase(MatchcodeTestCase): + BASE_DIR = os.path.join(os.path.dirname(__file__), 'testfiles') + + def setUp(self): + # Execute the superclass' setUp method before creating our own + # DB objects + super().setUp() + + self.test_package1, _ = Package.objects.get_or_create( + filename='plugin-request-2.4.1.tgz', + sha1='7295749caddd3c52be472eef6623a7b441ed17d6', + size=7269, + name='plugin-request', + version='2.4.1', + download_url='https://registry.npmjs.org/@umijs/plugin-request/-/plugin-request-2.4.1.tgz', + type='npm', + ) + load_resources_from_scan(self.get_test_loc('match/nested/plugin-request-2.4.1-ip.json'), self.test_package1) + index_package_directories(self.test_package1) + + self.test_package2, _ = Package.objects.get_or_create( + filename='underscore-1.10.9.tgz', + sha1='ba7a9cfc15873e67821611503a34a7c26bf7264f', + size=26569, + name='underscore', + version='1.10.9', + download_url='https://registry.npmjs.org/@types/underscore/-/underscore-1.10.9.tgz', + type='npm', + ) + load_resources_from_scan(self.get_test_loc('match/nested/underscore-1.10.9-ip.json'), self.test_package2) + index_package_directories(self.test_package2) + + def test_api_approximate_directory_content_index_list_fingerprint_lookup(self): + test_fingerprint = '00000007af7d63765c78fa516b5353f5ffa7df45' + response = self.client.get( + reverse('api:approximatedirectorycontentindex-list'), + data={'fingerprint': test_fingerprint} + ) + self.assertEqual(200, response.status_code) + results = response.data.get('results', []) + self.assertEqual(1, len(results)) + result = results[0] + expected_package = 'http://testserver' + reverse('api:package-detail', args=[self.test_package1.uuid]) + expected_result = { + 'fingerprint': '00000007af7d63765c78fa516b5353f5ffa7df45', + 'package': expected_package + } + self.assertEqual(expected_result, result) + + def test_api_approximate_directory_structure_index_list_fingerprint_lookup(self): + test_fingerprint = '00000004d10982208810240820080a6a3e852486' + response = self.client.get( + reverse('api:approximatedirectorystructureindex-list'), + data={'fingerprint': test_fingerprint} + ) + self.assertEqual(200, response.status_code) + results = response.data.get('results', []) + self.assertEqual(1, len(results)) + result = results[0] + expected_package = 'http://testserver' + reverse('api:package-detail', args=[self.test_package2.uuid]) + expected_result = { + 'fingerprint': '00000004d10982208810240820080a6a3e852486', + 'package': expected_package + } + self.assertEqual(expected_result, result) + + def test_api_approximate_directory_content_index_match_no_match(self): + test_fingerprint = '000000020e1d2124040134564e1941a6a620db34' + response = self.client.get( + reverse('api:approximatedirectorycontentindex-match'), + data={'fingerprint': test_fingerprint} + ) + results = response.data + self.assertEqual(0, len(results)) + + def test_api_approximate_directory_structure_index_match_no_match(self): + test_fingerprint = '00000004d10982789010240876580a6a3e852485' + response = self.client.get( + reverse('api:approximatedirectorystructureindex-match'), + data={'fingerprint': test_fingerprint} + ) + results = response.data + self.assertEqual(0, len(results)) + + def test_api_approximate_directory_content_index_match_close_match(self): + # This test fingerprint has a hamming distance of 7 from the expected fingerprint + test_fingerprint = '00000007af7d63765c78fa516b5353f5ffa7d000' + response = self.client.get( + reverse('api:approximatedirectorycontentindex-match'), + data={'fingerprint': test_fingerprint} + ) + results = response.data + self.assertEqual(1, len(results)) + result = results[0] + self.assertEqual(test_fingerprint, result['fingerprint']) + expected_matched_fingerprint = '00000007af7d63765c78fa516b5353f5ffa7df45' + self.assertEqual(expected_matched_fingerprint, result['matched_fingerprint']) + expected_package = 'http://testserver' + reverse('api:package-detail', args=[self.test_package1.uuid]) + self.assertEqual(expected_package, result['package']) + + def test_api_approximate_directory_structure_index_match_close_match(self): + # This test fingerprint has a hamming distance of 7 from the expected fingerprint + test_fingerprint = '00000004d10982208810240820080a6a3e800000' + response = self.client.get( + reverse('api:approximatedirectorystructureindex-match'), + data={'fingerprint': test_fingerprint} + ) + results = response.data + self.assertEqual(1, len(results)) + result = results[0] + self.assertEqual(test_fingerprint, result['fingerprint']) + expected_matched_fingerprint = '00000004d10982208810240820080a6a3e852486' + self.assertEqual(expected_matched_fingerprint, result['matched_fingerprint']) + expected_package = 'http://testserver' + reverse('api:package-detail', args=[self.test_package2.uuid]) + self.assertEqual(expected_package, result['package']) + + def test_api_approximate_directory_content_index_match(self): + test_fingerprint = '00000007af7d63765c78fa516b5353f5ffa7df45' + response = self.client.get( + reverse('api:approximatedirectorycontentindex-match'), + data={'fingerprint': test_fingerprint} + ) + results = response.data + self.assertEqual(1, len(results)) + result = results[0] + self.assertEqual(test_fingerprint, result['fingerprint']) + self.assertEqual(test_fingerprint, result['matched_fingerprint']) + expected_package = 'http://testserver' + reverse('api:package-detail', args=[self.test_package1.uuid]) + self.assertEqual(expected_package, result['package']) + + def test_api_approximate_directory_structure_index_match(self): + test_fingerprint = '00000004d10982208810240820080a6a3e852486' + response = self.client.get( + reverse('api:approximatedirectorystructureindex-match'), + data={'fingerprint': test_fingerprint} + ) + results = response.data + self.assertEqual(1, len(results)) + result = results[0] + self.assertEqual(test_fingerprint, result['fingerprint']) + self.assertEqual(test_fingerprint, result['matched_fingerprint']) + expected_package = 'http://testserver' + reverse('api:package-detail', args=[self.test_package2.uuid]) + self.assertEqual(expected_package, result['package']) diff --git a/matchcode/tests/test_index_packages.py b/matchcode/tests/test_index_packages.py new file mode 100644 index 00000000..f6850d33 --- /dev/null +++ b/matchcode/tests/test_index_packages.py @@ -0,0 +1,201 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# purldb is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/purldb for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import os + +from commoncode.resource import VirtualCodebase + +from matchcode_toolkit.fingerprinting import compute_directory_fingerprints +from matchcode_toolkit.fingerprinting import hexstring_to_binarray +from matchcode.indexing import _create_virtual_codebase_from_package_resources +from matchcode.indexing import index_directory_fingerprints +from matchcode.indexing import index_package_archives +from matchcode.indexing import index_package_directories +from matchcode.indexing import index_package_file +from matchcode.management.commands import index_packages +from matchcode.models import ApproximateDirectoryContentIndex +from matchcode.models import ApproximateDirectoryStructureIndex +from matchcode.models import create_halohash_chunks +from matchcode.models import ExactPackageArchiveIndex +from matchcode.models import ExactFileIndex +from matchcode.utils import load_resources_from_scan +from matchcode.utils import MatchcodeTestCase +from packagedb.models import Package +from packagedb.models import Resource + + +class IndexPackagesTestCase(MatchcodeTestCase): + BASE_DIR = os.path.join(os.path.dirname(__file__), 'testfiles') + maxDiff = None + + def setUp(self): + # Ensure database is empty before adding test packages + Package.objects.all().delete() + + # Single object, single source + self.test_package1, _ = Package.objects.get_or_create( + filename='abbot-0.12.3.jar', + sha1='51d28a27d919ce8690a40f4f335b9d591ceb16e9', + md5='38206e62a54b0489fb6baa4db5a06093', + size=689791, + name='abbot', + version='0.12.3', + download_url='http://repo1.maven.org/maven2/abbot/abbot/0.12.3/abbot-0.12.3.jar', + type='maven', + ) + self.scan1 = self.get_test_loc('match/scan1.json') + load_resources_from_scan(self.scan1, self.test_package1) + + def test_index_packages(self): + # Ensure ApproximateDirectoryStructureIndex, ExactPackageArchiveIndex, + # and ExactFileIndex tables are empty + self.assertFalse(ApproximateDirectoryStructureIndex.objects.all()) + self.assertFalse(ExactPackageArchiveIndex.objects.all()) + self.assertFalse(ExactFileIndex.objects.all()) + + # Populate fingerprint tables from Package and Resources + package_indexer = index_packages.Command() + package_indexer.handle() + + # See if the tables have been populated properly + package_archive_sha1s = ExactPackageArchiveIndex.objects.all() + self.assertEqual(1, len(package_archive_sha1s)) + package_archive_sha1 = package_archive_sha1s[0] + expected_sha1 = self.test_package1.sha1 + self.assertEqual(expected_sha1, package_archive_sha1.fingerprint()) + self.assertEqual(self.test_package1, package_archive_sha1.package) + + vc = VirtualCodebase(location=self.scan1) + expected_resources = [r for r in vc.walk(topdown=True) if r.type == 'file'] + package_file_sha1s = ExactFileIndex.objects.all() + self.assertEqual(len(expected_resources), len(package_file_sha1s)) + for expected_resource, package_file_sha1 in zip(expected_resources, package_file_sha1s): + self.assertEqual(expected_resource.sha1, package_file_sha1.fingerprint()) + self.assertEqual(self.test_package1, package_file_sha1.package) + + directory_structure_fingerprints = ApproximateDirectoryStructureIndex.objects.filter(package=self.test_package1).order_by('path') + # Only one directory should be indexed since we do not create directory + # fingerprints for directories with only one file in them + self.assertEqual(1, len(directory_structure_fingerprints)) + + result_1 = directory_structure_fingerprints[0] + self.assertEqual('test', result_1.path) + self.assertEqual(self.test_package1, result_1.package) + r1_chunk1, r1_chunk2, r1_chunk3, r1_chunk4 = create_halohash_chunks('160440008028c38c24a8038040006040') + self.assertEqual(r1_chunk1, result_1.chunk1) + self.assertEqual(r1_chunk2, result_1.chunk2) + self.assertEqual(r1_chunk3, result_1.chunk3) + self.assertEqual(r1_chunk4, result_1.chunk4) + + def test_index_packages_index_directory_structure_fingerprints(self): + index_packages.index_package_directories(self.test_package1) + directory_structure_fingerprints = ApproximateDirectoryStructureIndex.objects.filter(package=self.test_package1).order_by('path') + self.assertEqual(1, len(directory_structure_fingerprints)) + + result_1 = directory_structure_fingerprints[0] + self.assertEqual('test', result_1.path) + self.assertEqual(self.test_package1, result_1.package) + + expected_chunk1 = hexstring_to_binarray('16044000') + expected_chunk2 = hexstring_to_binarray('8028c38c') + expected_chunk3 = hexstring_to_binarray('24a80380') + expected_chunk4 = hexstring_to_binarray('40006040') + + self.assertEqual(expected_chunk1, result_1.chunk1) + self.assertEqual(expected_chunk2, result_1.chunk2) + self.assertEqual(expected_chunk3, result_1.chunk3) + self.assertEqual(expected_chunk4, result_1.chunk4) + + def test_index_package_archives(self): + # Ensure ExactPackageArchiveIndex table is empty + self.assertFalse(ExactPackageArchiveIndex.objects.all()) + + # Load ExactPackageArchiveIndex table + created = index_package_archives(self.test_package1) + + # Check to see if new ExactPackageArchiveIndex was created + self.assertTrue(created) + self.assertEqual(1, ExactPackageArchiveIndex.objects.all().count()) + + # Ensure the created ExactPackageArchiveIndex indexes the correct checksum and is related to the right Package + result = ExactPackageArchiveIndex.objects.all()[0] + + self.assertEqual(self.test_package1.sha1, result.fingerprint()) + self.assertEqual(self.test_package1, result.package) + + def test_index_package_file(self): + # Ensure ExactFileIndex is empty prior to test + self.assertFalse(ExactFileIndex.objects.all()) + + # Get one resource from test_package1 and index it + resource = self.test_package1.resources.filter(is_file=True)[0] + created_exact_file_index = index_package_file(resource) + + self.assertTrue(created_exact_file_index) + self.assertEqual(1, ExactFileIndex.objects.all().count()) + result = ExactFileIndex.objects.all()[0] + + expected_fingerprint = '86f7e437faa5a7fce15d1ddcb9eaeaea377667b8' + self.assertEqual(expected_fingerprint, result.fingerprint()) + self.assertEqual(self.test_package1, result.package) + + def test__create_virtual_codebase_from_package_resources(self): + vc = _create_virtual_codebase_from_package_resources(self.test_package1) + expected_vc = VirtualCodebase(location=self.scan1) + + # Ensure that at least the directory structure is the same + for expected_r, r in zip(expected_vc.walk(), vc.walk()): + self.assertEqual(expected_r.path, r.path) + + def test_index_directory_fingerprints(self): + vc = _create_virtual_codebase_from_package_resources(self.test_package1) + vc = compute_directory_fingerprints(vc) + + # Ensure tables are empty prior to indexing + self.assertFalse(ApproximateDirectoryContentIndex.objects.all()) + self.assertFalse(ApproximateDirectoryStructureIndex.objects.all()) + + indexed_adci, indexed_adsi = index_directory_fingerprints(vc, self.test_package1) + + # Check to see if anything has been indexed + self.assertEqual(1, indexed_adci) + self.assertEqual(1, indexed_adsi) + self.assertEqual(1, ApproximateDirectoryContentIndex.objects.all().count()) + self.assertEqual(1, ApproximateDirectoryStructureIndex.objects.all().count()) + + # Check to see if the correct values have been indexed + adci = ApproximateDirectoryContentIndex.objects.all()[0] + adsi = ApproximateDirectoryStructureIndex.objects.all()[0] + + expected_adci_fingerprint = '0000000288212131028101000400403044049614' + expected_adsi_fingerprint = '00000002160440008028c38c24a8038040006040' + self.assertEqual(expected_adci_fingerprint, adci.fingerprint()) + self.assertEqual(expected_adsi_fingerprint, adsi.fingerprint()) + + def test_index_package_directories(self): + # Ensure tables are empty prior to indexing + self.assertFalse(ApproximateDirectoryContentIndex.objects.all()) + self.assertFalse(ApproximateDirectoryStructureIndex.objects.all()) + + indexed_adci, indexed_adsi = index_package_directories(self.test_package1) + + # Check to see if anything has been indexed + self.assertEqual(1, indexed_adci) + self.assertEqual(1, indexed_adsi) + self.assertEqual(1, ApproximateDirectoryContentIndex.objects.all().count()) + self.assertEqual(1, ApproximateDirectoryStructureIndex.objects.all().count()) + + # Check to see if the correct values have been indexed + adci = ApproximateDirectoryContentIndex.objects.all()[0] + adsi = ApproximateDirectoryStructureIndex.objects.all()[0] + + expected_adci_fingerprint = '0000000288212131028101000400403044049614' + expected_adsi_fingerprint = '00000002160440008028c38c24a8038040006040' + self.assertEqual(expected_adci_fingerprint, adci.fingerprint()) + self.assertEqual(expected_adsi_fingerprint, adsi.fingerprint()) diff --git a/matchcode/tests/test_match.py b/matchcode/tests/test_match.py new file mode 100644 index 00000000..0488edcb --- /dev/null +++ b/matchcode/tests/test_match.py @@ -0,0 +1,326 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# purldb is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/purldb for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import os + +import attr +from commoncode.resource import VirtualCodebase +from packagedb.models import Package + +from matchcode_toolkit.fingerprinting import compute_directory_fingerprints +from matchcode.management.commands.index_packages import index_package_directories +from matchcode.match import EXACT_PACKAGE_ARCHIVE_MATCH +from matchcode.match import APPROXIMATE_DIRECTORY_STRUCTURE_MATCH +from matchcode.match import APPROXIMATE_DIRECTORY_CONTENT_MATCH +from matchcode.match import EXACT_FILE_MATCH +from matchcode.match import do_match +from matchcode.match import path_suffixes +from matchcode.utils import index_package_files_sha1 +from matchcode.utils import index_packages_sha1 +from matchcode.utils import load_resources_from_scan +from matchcode.utils import MatchcodeTestCase + + +def run_do_match_from_scan(scan_file_location, match_type): + vc = VirtualCodebase( + location=scan_file_location, + codebase_attributes=dict( + matches=attr.ib(default=attr.Factory(list)) + ), + resource_attributes=dict( + matched_to=attr.ib(default=attr.Factory(list)) + ) + ) + vc = compute_directory_fingerprints(vc) + do_match(vc, match_type) + return vc + + +class MatchPackagesTestCase(MatchcodeTestCase): + BASE_DIR = os.path.join(os.path.dirname(__file__), 'testfiles') + maxDiff = None + + def setUp(self): + # Execute the superclass' setUp method before creating our own + # DB objects + super(MatchPackagesTestCase, self).setUp() + + self.test_package1, _ = Package.objects.get_or_create( + filename='abbot-0.12.3.jar', + sha1='51d28a27d919ce8690a40f4f335b9d591ceb16e9', + md5='38206e62a54b0489fb6baa4db5a06093', + size=689791, + name='abbot', + version='0.12.3', + download_url='http://repo1.maven.org/maven2/abbot/abbot/0.12.3/abbot-0.12.3.jar', + type='maven', + ) + self.test_package1_metadata = self.test_package1.to_dict() + + self.test_package2, _ = Package.objects.get_or_create( + filename='dojoz-0.4.1-1.jar', + sha1='ae9d68fd6a29906606c2d9407d1cc0749ef84588', + md5='508361a1c6273a4c2b8e4945618b509f', + size=876720, + name='dojoz', + version='0.4.1-1', + download_url='https://repo1.maven.org/maven2/org/zkoss/zkforge/dojoz/0.4.1-1/dojoz-0.4.1-1.jar', + type='maven', + ) + self.test_package2_metadata = self.test_package2.to_dict() + + self.test_package3, _ = Package.objects.get_or_create( + filename='acegi-security-0.51.jar', + sha1='ede156692b33872f5ee9465b7a06d6b2bc9e5e7f', + size=176954, + name='acegi-security', + version='0.51', + download_url='https://repo1.maven.org/maven2/acegisecurity/acegi-security/0.51/acegi-security-0.51.jar', + type='maven' + ) + self.test_package3_metadata = self.test_package3.to_dict() + + self.test_package4, _ = Package.objects.get_or_create( + filename='test.tar.gz', + sha1='deadbeef', + size=42589, + name='test', + version='0.01', + download_url='https://test.com/test.tar.gz', + type='maven' + ) + self.test_package4_metadata = self.test_package4.to_dict() + + # Populate ExactPackageArchiveIndexFingerprint table + index_packages_sha1() + + load_resources_from_scan(self.get_test_loc('models/match-test.json'), self.test_package4) + index_package_directories(self.test_package4) + index_package_files_sha1(self.test_package4, self.get_test_loc('models/match-test.json')) + + def test_do_match_package_archive_match(self): + input_file = self.get_test_loc('models/match-test.json') + vc = run_do_match_from_scan(input_file, EXACT_PACKAGE_ARCHIVE_MATCH) + expected = self.get_test_loc('models/match-test-exact-package-results.json') + self.check_codebase(vc, expected, regen=False) + + def test_do_match_approximate_directory_structure_match(self): + input_file = self.get_test_loc('models/match-test.json') + vc = run_do_match_from_scan(input_file, APPROXIMATE_DIRECTORY_STRUCTURE_MATCH) + expected = self.get_test_loc('models/match-test-approximate-directory-structure-results.json') + self.check_codebase(vc, expected, regen=False) + + def test_do_match_approximate_directory_content_match(self): + input_file = self.get_test_loc('models/match-test.json') + vc = run_do_match_from_scan(input_file, APPROXIMATE_DIRECTORY_CONTENT_MATCH) + expected = self.get_test_loc('models/match-test-approximate-directory-content-results.json') + self.check_codebase(vc, expected, regen=False) + + def test_do_match_package_file_match(self): + input_file = self.get_test_loc('models/match-test.json') + vc = run_do_match_from_scan(input_file, EXACT_FILE_MATCH) + expected = self.get_test_loc('models/match-test-exact-file-results.json') + self.check_codebase(vc, expected, regen=False) + + +class MatchNestedPackagesTestCase(MatchcodeTestCase): + BASE_DIR = os.path.join(os.path.dirname(__file__), 'testfiles') + maxDiff = None + + def setUp(self): + # Execute the superclass' setUp method before creating our own + # DB objects + super(MatchNestedPackagesTestCase, self).setUp() + + self.test_package1, _ = Package.objects.get_or_create( + filename='plugin-request-2.4.1.tgz', + sha1='7295749caddd3c52be472eef6623a7b441ed17d6', + size=7269, + name='plugin-request', + version='2.4.1', + download_url='https://registry.npmjs.org/@umijs/plugin-request/-/plugin-request-2.4.1.tgz', + type='npm', + ) + load_resources_from_scan(self.get_test_loc('match/nested/plugin-request-2.4.1-ip.json'), self.test_package1) + index_package_directories(self.test_package1) + + self.test_package2, _ = Package.objects.get_or_create( + filename='underscore-1.10.9.tgz', + sha1='ba7a9cfc15873e67821611503a34a7c26bf7264f', + size=26569, + name='underscore', + version='1.10.9', + download_url='https://registry.npmjs.org/@types/underscore/-/underscore-1.10.9.tgz', + type='npm', + ) + load_resources_from_scan(self.get_test_loc('match/nested/underscore-1.10.9-ip.json'), self.test_package2) + index_package_directories(self.test_package2) + + def test_do_match_approximate_directory_structure_match(self): + input_file = self.get_test_loc('match/nested/nested.json') + vc = run_do_match_from_scan(input_file, APPROXIMATE_DIRECTORY_STRUCTURE_MATCH) + expected = self.get_test_loc('match/nested/nested-expected.json') + self.check_codebase(vc, expected, regen=False) + + +class MatchUtilityFunctionsTestCase(MatchcodeTestCase): + def test_path_suffixes(self): + suffixes = list(path_suffixes('/foo/bar/baz/qux')) + expected = ['foo/bar/baz/qux', 'bar/baz/qux', 'baz/qux', 'qux'] + self.assertEqual(expected, suffixes) + + +class DirectoryMatchingTestCase(MatchcodeTestCase): + BASE_DIR = os.path.join(os.path.dirname(__file__), 'testfiles') + maxDiff = None + + def setUp(self): + super(DirectoryMatchingTestCase, self).setUp() + + self.test_package1, _ = Package.objects.get_or_create( + filename='abbrev-1.0.3.tgz', + sha1='aa049c967f999222aa42e14434f0c562ef468241', + name='abbrev', + version='1.0.3', + type='npm', + download_url='https://registry.npmjs.org/abbrev/-/abbrev-1.0.3.tgz', + ) + load_resources_from_scan(self.get_test_loc('match/directory-matching/abbrev-1.0.3-i.json'), self.test_package1) + index_package_directories(self.test_package1) + + self.test_package2, _ = Package.objects.get_or_create( + filename='abbrev-1.0.4.tgz', + sha1='bd55ae5e413ba1722ee4caba1f6ea10414a59ecd', + name='abbrev', + version='1.0.4', + type='npm', + download_url='https://registry.npmjs.org/abbrev/-/abbrev-1.0.4.tgz', + ) + load_resources_from_scan(self.get_test_loc('match/directory-matching/abbrev-1.0.4-i.json'), self.test_package2) + index_package_directories(self.test_package2) + + self.test_package3, _ = Package.objects.get_or_create( + filename='abbrev-1.0.5.tgz', + sha1='5d8257bd9ebe435e698b2fa431afde4fe7b10b03', + name='abbrev', + version='1.0.5', + type='npm', + download_url='https://registry.npmjs.org/abbrev/-/abbrev-1.0.5.tgz', + ) + load_resources_from_scan(self.get_test_loc('match/directory-matching/abbrev-1.0.5-i.json'), self.test_package3) + index_package_directories(self.test_package3) + + self.test_package4, _ = Package.objects.get_or_create( + filename='abbrev-1.0.6.tgz', + sha1='b6d632b859b3fa2d6f7e4b195472461b9e32dc30', + name='abbrev', + version='1.0.6', + type='npm', + download_url='https://registry.npmjs.org/abbrev/-/abbrev-1.0.6.tgz', + ) + load_resources_from_scan(self.get_test_loc('match/directory-matching/abbrev-1.0.6-i.json'), self.test_package4) + index_package_directories(self.test_package4) + + self.test_package5, _ = Package.objects.get_or_create( + filename='abbrev-1.0.7.tgz', + sha1='5b6035b2ee9d4fb5cf859f08a9be81b208491843', + name='abbrev', + version='1.0.7', + type='npm', + download_url='https://registry.npmjs.org/abbrev/-/abbrev-1.0.7.tgz', + ) + load_resources_from_scan(self.get_test_loc('match/directory-matching/abbrev-1.0.7-i.json'), self.test_package5) + index_package_directories(self.test_package5) + + self.test_package6, _ = Package.objects.get_or_create( + filename='abbrev-1.0.9.tgz', + sha1='91b4792588a7738c25f35dd6f63752a2f8776135', + name='abbrev', + version='1.0.9', + type='npm', + download_url='https://registry.npmjs.org/abbrev/-/abbrev-1.0.9.tgz', + ) + load_resources_from_scan(self.get_test_loc('match/directory-matching/abbrev-1.0.9-i.json'), self.test_package6) + index_package_directories(self.test_package6) + + self.test_package7, _ = Package.objects.get_or_create( + filename='abbrev-1.1.0.tgz', + sha1='d0554c2256636e2f56e7c2e5ad183f859428d81f', + name='abbrev', + version='1.1.0', + type='npm', + download_url='https://registry.npmjs.org/abbrev/-/abbrev-1.1.0.tgz', + ) + load_resources_from_scan(self.get_test_loc('match/directory-matching/abbrev-1.1.0-i.json'), self.test_package7) + index_package_directories(self.test_package7) + + self.test_package8, _ = Package.objects.get_or_create( + filename='abbrev-1.1.1.tgz', + sha1='f8f2c887ad10bf67f634f005b6987fed3179aac8', + name='abbrev', + version='1.1.1', + type='npm', + download_url='https://registry.npmjs.org/abbrev/-/abbrev-1.1.1.tgz', + ) + load_resources_from_scan(self.get_test_loc('match/directory-matching/abbrev-1.1.1-i.json'), self.test_package8) + index_package_directories(self.test_package8) + + def test_match_ApproximateDirectoryStructureIndex_abbrev_1_0_3(self): + input_file = self.get_test_loc('match/directory-matching/abbrev-1.0.3-i.json') + vc = run_do_match_from_scan(input_file, APPROXIMATE_DIRECTORY_STRUCTURE_MATCH) + expected = self.get_test_loc('match/directory-matching/abbrev-1.0.3-i-expected.json') + self.check_codebase(vc, expected, regen=False) + + def test_match_ApproximateDirectoryStructureIndex_abbrev_1_0_4(self): + input_file = self.get_test_loc('match/directory-matching/abbrev-1.0.4-i.json') + vc = run_do_match_from_scan(input_file, APPROXIMATE_DIRECTORY_STRUCTURE_MATCH) + expected = self.get_test_loc('match/directory-matching/abbrev-1.0.4-i-expected.json') + self.check_codebase(vc, expected, regen=False) + + def test_match_ApproximateDirectoryStructureIndex_abbrev_1_0_5(self): + input_file = self.get_test_loc('match/directory-matching/abbrev-1.0.5-i.json') + vc = run_do_match_from_scan(input_file, APPROXIMATE_DIRECTORY_STRUCTURE_MATCH) + expected = self.get_test_loc('match/directory-matching/abbrev-1.0.5-i-expected.json') + self.check_codebase(vc, expected, regen=False) + + def test_match_ApproximateDirectoryStructureIndex_abbrev_1_0_6(self): + input_file = self.get_test_loc('match/directory-matching/abbrev-1.0.6-i.json') + vc = run_do_match_from_scan(input_file, APPROXIMATE_DIRECTORY_STRUCTURE_MATCH) + expected = self.get_test_loc('match/directory-matching/abbrev-1.0.6-i-expected.json') + self.check_codebase(vc, expected, regen=False) + + def test_match_ApproximateDirectoryStructureIndex_abbrev_1_0_7(self): + input_file = self.get_test_loc('match/directory-matching/abbrev-1.0.7-i.json') + vc = run_do_match_from_scan(input_file, APPROXIMATE_DIRECTORY_STRUCTURE_MATCH) + expected = self.get_test_loc('match/directory-matching/abbrev-1.0.7-i-expected.json') + self.check_codebase(vc, expected, regen=False) + + def test_match_ApproximateDirectoryStructureIndex_abbrev_1_0_9(self): + input_file = self.get_test_loc('match/directory-matching/abbrev-1.0.9-i.json') + vc = run_do_match_from_scan(input_file, APPROXIMATE_DIRECTORY_STRUCTURE_MATCH) + expected = self.get_test_loc('match/directory-matching/abbrev-1.0.9-i-expected.json') + self.check_codebase(vc, expected, regen=False) + + def test_match_ApproximateDirectoryStructureIndex_abbrev_1_1_0(self): + input_file = self.get_test_loc('match/directory-matching/abbrev-1.1.0-i.json') + vc = run_do_match_from_scan(input_file, APPROXIMATE_DIRECTORY_STRUCTURE_MATCH) + expected = self.get_test_loc('match/directory-matching/abbrev-1.1.0-i-expected.json') + self.check_codebase(vc, expected, regen=False) + + def test_match_ApproximateDirectoryStructureIndex_abbrev_1_1_1(self): + input_file = self.get_test_loc('match/directory-matching/abbrev-1.1.1-i.json') + vc = run_do_match_from_scan(input_file, APPROXIMATE_DIRECTORY_STRUCTURE_MATCH) + expected = self.get_test_loc('match/directory-matching/abbrev-1.1.1-i-expected.json') + self.check_codebase(vc, expected, regen=False) + + def test_match_ApproximateDirectoryStructureIndex_get_stdin_3_0_2(self): + input_file = self.get_test_loc('match/directory-matching/get-stdin-3.0.2-i.json') + vc = run_do_match_from_scan(input_file, APPROXIMATE_DIRECTORY_STRUCTURE_MATCH) + expected = self.get_test_loc('match/directory-matching/get-stdin-3.0.2-i-expected.json') + self.check_codebase(vc, expected, regen=False) diff --git a/matchcode/tests/test_models.py b/matchcode/tests/test_models.py new file mode 100644 index 00000000..ae6b9e06 --- /dev/null +++ b/matchcode/tests/test_models.py @@ -0,0 +1,224 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# purldb is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/purldb for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import os + +from commoncode.resource import VirtualCodebase +from packagedb.models import Package +import attr + +from matchcode_toolkit.fingerprinting import compute_directory_fingerprints +from matchcode_toolkit.fingerprinting import hexstring_to_binarray +from matchcode.management.commands.index_packages import index_package_directories +from matchcode.models import ApproximateDirectoryContentIndex +from matchcode.models import ApproximateDirectoryStructureIndex +from matchcode.models import create_halohash_chunks +from matchcode.models import ExactPackageArchiveIndex +from matchcode.models import ExactFileIndex +from matchcode.utils import index_packages_sha1 +from matchcode.utils import index_package_files_sha1 +from matchcode.utils import load_resources_from_scan +from matchcode.utils import MatchcodeTestCase + + +EXACT_PACKAGE_ARCHIVE_MATCH = 0 +APPROXIMATE_DIRECTORY_STRUCTURE_MATCH = 1 +APPROXIMATE_DIRECTORY_CONTENT_MATCH = 2 +EXACT_FILE_MATCH = 3 + + +class BaseModelTest(MatchcodeTestCase): + BASE_DIR = os.path.join(os.path.dirname(__file__), 'testfiles') + maxDiff = None + + def setUp(self): + super(BaseModelTest, self).setUp() + + self.test_package1, _ = Package.objects.get_or_create( + filename='abbot-0.12.3.jar', + sha1='51d28a27d919ce8690a40f4f335b9d591ceb16e9', + md5='38206e62a54b0489fb6baa4db5a06093', + size=689791, + name='abbot', + version='0.12.3', + download_url='http://repo1.maven.org/maven2/abbot/abbot/0.12.3/abbot-0.12.3.jar', + type='maven', + ) + self.test_package1_metadata = self.test_package1.to_dict() + + self.test_package2, _ = Package.objects.get_or_create( + filename='dojoz-0.4.1-1.jar', + sha1='ae9d68fd6a29906606c2d9407d1cc0749ef84588', + md5='508361a1c6273a4c2b8e4945618b509f', + size=876720, + name='dojoz', + version='0.4.1-1', + download_url='https://repo1.maven.org/maven2/org/zkoss/zkforge/dojoz/0.4.1-1/dojoz-0.4.1-1.jar', + type='maven', + ) + self.test_package2_metadata = self.test_package2.to_dict() + + self.test_package3, _ = Package.objects.get_or_create( + filename='acegi-security-0.51.jar', + sha1='ede156692b33872f5ee9465b7a06d6b2bc9e5e7f', + size=176954, + name='acegi-security', + version='0.51', + download_url='https://repo1.maven.org/maven2/acegisecurity/acegi-security/0.51/acegi-security-0.51.jar', + type='maven' + ) + self.test_package3_metadata = self.test_package3.to_dict() + + self.test_package4, _ = Package.objects.get_or_create( + filename='test.tar.gz', + sha1='deadbeef', + size=42589, + name='test', + version='0.01', + download_url='https://test.com/test.tar.gz', + type='maven' + ) + self.test_package4_metadata = self.test_package4.to_dict() + + # Populate ExactPackageArchiveIndexFingerprint table + index_packages_sha1() + + # Populate ExactFileIndexFingerprint table + load_resources_from_scan(self.get_test_loc('models/match-test.json'), self.test_package4) + index_package_directories(self.test_package4) + index_package_files_sha1(self.test_package4, self.get_test_loc('models/match-test.json')) + + +class ExactPackageArchiveIndexModelTestCase(BaseModelTest): + def test_ExactPackageArchiveIndex_single_sha1_single_match(self): + result = ExactPackageArchiveIndex.match('51d28a27d919ce8690a40f4f335b9d591ceb16e9') + result = [r.package.to_dict() for r in result] + expected = [self.test_package1_metadata] + self.assertEqual(expected, result) + + +class ExactFileIndexModelTestCase(BaseModelTest): + def test_ExactFileIndex_match(self): + scan_location = self.get_test_loc('models/match-test.json') + codebase = VirtualCodebase( + location=scan_location, + codebase_attributes=dict( + matches=attr.ib(default=attr.Factory(list)) + ), + resource_attributes=dict( + matched_to=attr.ib(default=attr.Factory(list)) + ) + ) + + # populate codebase with match results + for resource in codebase.walk(topdown=True): + matches = ExactFileIndex.match(resource.sha1) + for match in matches: + p = match.package.to_dict() + p['match_type'] = 'exact' + codebase.attributes.matches.append(p) + resource.matched_to.append(p['purl']) + resource.save(codebase) + + expected = self.get_test_loc('models/exact-file-matching-standalone-test-results.json') + self.check_codebase(codebase, expected, regen=False) + + +class ApproximateDirectoryMatchingIndexModelTestCase(MatchcodeTestCase): + BASE_DIR = os.path.join(os.path.dirname(__file__), 'testfiles') + + def setUp(self): + super(MatchcodeTestCase, self).setUp() + self.test_package1, _ = Package.objects.get_or_create( + filename='async-0.2.10.tgz', + sha1='b6bbe0b0674b9d719708ca38de8c237cb526c3d1', + md5='fd313a0e8cc2343569719e80cd7a67ac', + size=15772, + name='async', + version='0.2.10', + download_url='https://registry.npmjs.org/async/-/async-0.2.10.tgz', + type='npm', + ) + self.test_package1_metadata = self.test_package1.to_dict() + load_resources_from_scan(self.get_test_loc('models/directory-matching/async-0.2.10.tgz-i.json'), self.test_package1) + index_package_directories(self.test_package1) + + self.test_package2, _ = Package.objects.get_or_create( + filename='async-0.2.9.tgz', + sha1='df63060fbf3d33286a76aaf6d55a2986d9ff8619', + md5='895ac62ba7c61086cffdd50ab03c0447', + size=15672, + name='async', + version='0.2.9', + download_url='https://registry.npmjs.org/async/-/async-0.2.9.tgz', + type='npm', + ) + self.test_package2_metadata = self.test_package2.to_dict() + load_resources_from_scan(self.get_test_loc('models/directory-matching/async-0.2.9-i.json'), self.test_package2) + index_package_directories(self.test_package2) + + def test_ApproximateDirectoryStructureIndex_match_subdir(self): + scan_location = self.get_test_loc('models/directory-matching/async-0.2.9-i.json') + vc = VirtualCodebase( + location=scan_location, + resource_attributes=dict(packages=attr.ib(default=attr.Factory(list))) + ) + codebase = compute_directory_fingerprints(vc) + + # populate codebase with match results + for resource in codebase.walk(topdown=True): + if resource.is_file: + continue + fp = resource.extra_data.get('directory_structure', '') + matches = ApproximateDirectoryStructureIndex.match(fp) + for match in matches: + p = match.package.to_dict() + p['match_type'] = 'approximate-directory-structure' + resource.packages.append(p) + resource.save(codebase) + + expected = self.get_test_loc('models/directory-matching/async-0.2.9-i-expected-structure.json') + self.check_codebase(codebase, expected, regen=False) + + def test_ApproximateDirectoryContentIndex_match_subdir(self): + scan_location = self.get_test_loc('models/directory-matching/async-0.2.9-i.json') + vc = VirtualCodebase( + location=scan_location, + resource_attributes=dict(packages=attr.ib(default=attr.Factory(list))) + ) + codebase = compute_directory_fingerprints(vc) + + # populate codebase with match results + for resource in codebase.walk(topdown=True): + if resource.is_file: + continue + fp = resource.extra_data.get('directory_content', '') + matches = ApproximateDirectoryContentIndex.match(fp) + for match in matches: + p = match.package.to_dict() + p['match_type'] = 'approximate-directory-content' + resource.packages.append(p) + resource.save(codebase) + + expected = self.get_test_loc('models/directory-matching/async-0.2.9-i-expected-content.json') + self.check_codebase(codebase, expected, regen=False) + + +class MatchcodeModelUtilsTestCase(MatchcodeTestCase): + def test_create_halohash_chunks(self): + fingerprint = '49280e141724c001e1080128621a4210' + chunk1, chunk2, chunk3, chunk4 = create_halohash_chunks(fingerprint) + expected_chunk1 = hexstring_to_binarray('49280e14') + expected_chunk2 = hexstring_to_binarray('1724c001') + expected_chunk3 = hexstring_to_binarray('e1080128') + expected_chunk4 = hexstring_to_binarray('621a4210') + self.assertEqual(expected_chunk1, chunk1) + self.assertEqual(expected_chunk2, chunk2) + self.assertEqual(expected_chunk3, chunk3) + self.assertEqual(expected_chunk4, chunk4) diff --git a/matchcode/tests/testfiles/api/scan2_match_results.json b/matchcode/tests/testfiles/api/scan2_match_results.json new file mode 100644 index 00000000..90f7d263 --- /dev/null +++ b/matchcode/tests/testfiles/api/scan2_match_results.json @@ -0,0 +1,235 @@ +{ + "packages": [ + { + "type": "maven", + "namespace": "", + "name": "test", + "version": "0.01", + "qualifiers": "", + "subpath": "", + "primary_language": null, + "description": null, + "release_date": null, + "parties": [], + "keywords": [], + "homepage_url": null, + "download_url": "https://test.com/test.tar.gz", + "size": 42589, + "sha1": "deadbeef", + "md5": null, + "sha256": null, + "sha512": null, + "bug_tracking_url": null, + "code_view_url": null, + "vcs_url": null, + "copyright": null, + "license_expression": null, + "declared_license": null, + "notice_text": null, + "root_path": null, + "dependencies": [], + "contains_source_code": null, + "source_packages": [], + "package_url": "pkg:maven/test@0.01", + "match_type": "approximate-directory" + } + ], + "files": [ + { + "path": "test", + "type": "directory", + "name": "test", + "base_name": "test", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "packages": [], + "for_packages": [ + "pkg:maven/test@0.01" + ], + "files_count": 4, + "dirs_count": 2, + "size_count": 1743469, + "scan_errors": [] + }, + { + "path": "test/c", + "type": "file", + "name": "c", + "base_name": "c", + "extension": "", + "size": 4, + "date": "2020-06-19", + "sha1": "a94a8fe5ccb19ba61c4c0873d391e987982fbbd3", + "md5": "098f6bcd4621d373cade4e832627b4f6", + "mime_type": "text/plain", + "file_type": "ASCII text, with no line terminators", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "packages": [], + "for_packages": [ + "pkg:maven/test@0.01" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "test/a", + "type": "directory", + "name": "a", + "base_name": "a", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "packages": [], + "for_packages": [ + "pkg:maven/test@0.01" + ], + "files_count": 2, + "dirs_count": 0, + "size_count": 1053674, + "scan_errors": [] + }, + { + "path": "test/a/acegi-security-0.51.jar", + "type": "file", + "name": "acegi-security-0.51.jar", + "base_name": "acegi-security-0.51", + "extension": ".jar", + "size": 176954, + "date": "2020-06-19", + "sha1": "ede156692b33872f5ee9465b7a06d6b2bc9e5e7f", + "md5": "19dad3908042b2bdc50cbfdaed7da200", + "mime_type": "application/zip", + "file_type": "Zip archive data, at least v1.0 to extract", + "programming_language": null, + "is_binary": true, + "is_text": false, + "is_archive": true, + "is_media": false, + "is_source": false, + "is_script": false, + "packages": [], + "for_packages": [ + "pkg:maven/test@0.01" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "test/a/dojoz-0.4.1-1.jar", + "type": "file", + "name": "dojoz-0.4.1-1.jar", + "base_name": "dojoz-0.4.1-1", + "extension": ".jar", + "size": 876720, + "date": "2020-06-19", + "sha1": "ae9d68fd6a29906606c2d9407d1cc0749ef84588", + "md5": "508361a1c6273a4c2b8e4945618b509f", + "mime_type": "application/zip", + "file_type": "Zip archive data, at least v1.0 to extract", + "programming_language": null, + "is_binary": true, + "is_text": false, + "is_archive": true, + "is_media": false, + "is_source": false, + "is_script": false, + "packages": [], + "for_packages": [ + "pkg:maven/test@0.01" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "test/b", + "type": "directory", + "name": "b", + "base_name": "b", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "packages": [], + "for_packages": [ + "pkg:maven/test@0.01" + ], + "files_count": 1, + "dirs_count": 0, + "size_count": 689791, + "scan_errors": [] + }, + { + "path": "test/b/abbot-0.12.3.jar", + "type": "file", + "name": "abbot-0.12.3.jar", + "base_name": "abbot-0.12.3", + "extension": ".jar", + "size": 689791, + "date": "2020-05-27", + "sha1": "51d28a27d919ce8690a40f4f335b9d591ceb16e9", + "md5": "38206e62a54b0489fb6baa4db5a06093", + "mime_type": "application/zip", + "file_type": "Zip archive data, at least v1.0 to extract", + "programming_language": null, + "is_binary": true, + "is_text": false, + "is_archive": true, + "is_media": false, + "is_source": false, + "is_script": false, + "packages": [], + "for_packages": [ + "pkg:maven/test@0.01" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + } + ] +} \ No newline at end of file diff --git a/matchcode/tests/testfiles/match/directory-matching/abbrev-1.0.3-i-expected.json b/matchcode/tests/testfiles/match/directory-matching/abbrev-1.0.3-i-expected.json new file mode 100644 index 00000000..c8bb9dcb --- /dev/null +++ b/matchcode/tests/testfiles/match/directory-matching/abbrev-1.0.3-i-expected.json @@ -0,0 +1,144 @@ +{ + "files": [ + { + "path": "package", + "type": "directory", + "name": "package", + "base_name": "package", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "sha256": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "matched_to": [ + "pkg:npm/abbrev@1.0.3" + ], + "files_count": 3, + "dirs_count": 1, + "size_count": 3358, + "scan_errors": [] + }, + { + "path": "package/package.json", + "type": "file", + "name": "package.json", + "base_name": "package", + "extension": ".json", + "size": 277, + "date": "2011-03-24", + "sha1": "d61dc2c98ab10bf909b99f60e7bf584a7f7ead8c", + "md5": "8468753cba56d0075f6532a657ee5821", + "sha256": "5ab100bf0eb08adb175db170a1254d14e0be705ff1b563e5acddd3c8d03faee1", + "mime_type": "application/json", + "file_type": "JSON data", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "matched_to": [ + "pkg:npm/abbrev@1.0.3" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/README.md", + "type": "file", + "name": "README.md", + "base_name": "README", + "extension": ".md", + "size": 499, + "date": "2011-03-24", + "sha1": "c520bc857ec612ed88e13d794c47882d5aed3286", + "md5": "96b93093abdfdfef1ef8a3e2d5ca7f71", + "sha256": "2581765d44e15c58a2b88ad7bc9cc5c9ee029b4b5013c06dc45d9e94e8cb2ba4", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "matched_to": [ + "pkg:npm/abbrev@1.0.3" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/lib", + "type": "directory", + "name": "lib", + "base_name": "lib", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "sha256": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "matched_to": [ + "pkg:npm/abbrev@1.0.3" + ], + "files_count": 1, + "dirs_count": 0, + "size_count": 2582, + "scan_errors": [] + }, + { + "path": "package/lib/abbrev.js", + "type": "file", + "name": "abbrev.js", + "base_name": "abbrev", + "extension": ".js", + "size": 2582, + "date": "2011-03-24", + "sha1": "055ec01ac8b111bc948e498d87d9dc47f5e5acaa", + "md5": "06aebeadc85e52f4b8bf88eab6cd8b6c", + "sha256": "efd2c9b755dc4b2df3231222b5b6a63b7a1343472dfbc8807c5f15e1d28a0c75", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": "JavaScript", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + "matched_to": [ + "pkg:npm/abbrev@1.0.3" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + } + ] +} \ No newline at end of file diff --git a/matchcode/tests/testfiles/match/directory-matching/abbrev-1.0.3-i.json b/matchcode/tests/testfiles/match/directory-matching/abbrev-1.0.3-i.json new file mode 100644 index 00000000..2d418ac9 --- /dev/null +++ b/matchcode/tests/testfiles/match/directory-matching/abbrev-1.0.3-i.json @@ -0,0 +1,161 @@ +{ + "headers": [ + { + "tool_name": "scancode-toolkit", + "tool_version": "31.2.2", + "options": { + "input": [ + "package" + ], + "--info": true, + "--json-pp": "./abbrev-1.0.3.tgz-i.json" + }, + "notice": "Generated with ScanCode and provided on an \"AS IS\" BASIS, WITHOUT WARRANTIES\nOR CONDITIONS OF ANY KIND, either express or implied. No content created from\nScanCode should be considered or used as legal advice. Consult an Attorney\nfor any legal advice.\nScanCode is a free software code scanning tool from nexB Inc. and others.\nVisit https://github.com/nexB/scancode-toolkit/ for support and download.", + "start_timestamp": "2022-12-02T011500.811761", + "end_timestamp": "2022-12-02T011501.090542", + "output_format_version": "2.0.0", + "duration": 0.2787973880767822, + "message": null, + "errors": [], + "warnings": [], + "extra_data": { + "system_environment": { + "operating_system": "linux", + "cpu_architecture": "64", + "platform": "Linux-5.15.39-3-pve-x86_64-with-glibc2.35", + "platform_version": "#2 SMP PVE 5.15.39-3 (Wed, 27 Jul 2022 13:45:39 +0200)", + "python_version": "3.10.5 (main, Jul 30 2022, 06:09:26) [GCC 9.4.0]" + }, + "spdx_license_list_version": "3.17", + "files_count": 3 + } + } + ], + "files": [ + { + "path": "package", + "type": "directory", + "name": "package", + "base_name": "package", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "sha256": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 3, + "dirs_count": 1, + "size_count": 3358, + "scan_errors": [] + }, + { + "path": "package/package.json", + "type": "file", + "name": "package.json", + "base_name": "package", + "extension": ".json", + "size": 277, + "date": "2011-03-24", + "sha1": "d61dc2c98ab10bf909b99f60e7bf584a7f7ead8c", + "md5": "8468753cba56d0075f6532a657ee5821", + "sha256": "5ab100bf0eb08adb175db170a1254d14e0be705ff1b563e5acddd3c8d03faee1", + "mime_type": "application/json", + "file_type": "JSON data", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/README.md", + "type": "file", + "name": "README.md", + "base_name": "README", + "extension": ".md", + "size": 499, + "date": "2011-03-24", + "sha1": "c520bc857ec612ed88e13d794c47882d5aed3286", + "md5": "96b93093abdfdfef1ef8a3e2d5ca7f71", + "sha256": "2581765d44e15c58a2b88ad7bc9cc5c9ee029b4b5013c06dc45d9e94e8cb2ba4", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/lib", + "type": "directory", + "name": "lib", + "base_name": "lib", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "sha256": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 1, + "dirs_count": 0, + "size_count": 2582, + "scan_errors": [] + }, + { + "path": "package/lib/abbrev.js", + "type": "file", + "name": "abbrev.js", + "base_name": "abbrev", + "extension": ".js", + "size": 2582, + "date": "2011-03-24", + "sha1": "055ec01ac8b111bc948e498d87d9dc47f5e5acaa", + "md5": "06aebeadc85e52f4b8bf88eab6cd8b6c", + "sha256": "efd2c9b755dc4b2df3231222b5b6a63b7a1343472dfbc8807c5f15e1d28a0c75", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": "JavaScript", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + } + ] +} \ No newline at end of file diff --git a/matchcode/tests/testfiles/match/directory-matching/abbrev-1.0.4-i-expected.json b/matchcode/tests/testfiles/match/directory-matching/abbrev-1.0.4-i-expected.json new file mode 100644 index 00000000..eaf563c1 --- /dev/null +++ b/matchcode/tests/testfiles/match/directory-matching/abbrev-1.0.4-i-expected.json @@ -0,0 +1,172 @@ +{ + "files": [ + { + "path": "package", + "type": "directory", + "name": "package", + "base_name": "package", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "sha256": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "matched_to": [ + "pkg:npm/abbrev@1.0.4" + ], + "files_count": 4, + "dirs_count": 1, + "size_count": 4750, + "scan_errors": [] + }, + { + "path": "package/LICENSE", + "type": "file", + "name": "LICENSE", + "base_name": "LICENSE", + "extension": "", + "size": 1092, + "date": "2011-05-13", + "sha1": "4a1927e74796f06ac7e7a687ca6b44d39c65d8f4", + "md5": "b3245a33f2d41818f14c489bd33bc4a8", + "sha256": "ab95d69fc43021e0451b5021ff09fbc89ee6f9199354397f18fe7ce3f4b50554", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "matched_to": [ + "pkg:npm/abbrev@1.0.4" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/package.json", + "type": "file", + "name": "package.json", + "base_name": "package", + "extension": ".json", + "size": 390, + "date": "2013-01-09", + "sha1": "2832a2ddeaff7e4e7960926364afd12af09b24ba", + "md5": "91fb59a2c07fe8957b2ff90ccdf763dc", + "sha256": "e46d8ebcf41a23aa903a2562ae072e4dee4371924d8df21a9fa0ebff318f2b7d", + "mime_type": "application/json", + "file_type": "JSON data", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "matched_to": [ + "pkg:npm/abbrev@1.0.4" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/README.md", + "type": "file", + "name": "README.md", + "base_name": "README", + "extension": ".md", + "size": 499, + "date": "2010-10-04", + "sha1": "c520bc857ec612ed88e13d794c47882d5aed3286", + "md5": "96b93093abdfdfef1ef8a3e2d5ca7f71", + "sha256": "2581765d44e15c58a2b88ad7bc9cc5c9ee029b4b5013c06dc45d9e94e8cb2ba4", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "matched_to": [ + "pkg:npm/abbrev@1.0.4" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/lib", + "type": "directory", + "name": "lib", + "base_name": "lib", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "sha256": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "matched_to": [ + "pkg:npm/abbrev@1.0.4" + ], + "files_count": 1, + "dirs_count": 0, + "size_count": 2769, + "scan_errors": [] + }, + { + "path": "package/lib/abbrev.js", + "type": "file", + "name": "abbrev.js", + "base_name": "abbrev", + "extension": ".js", + "size": 2769, + "date": "2013-01-09", + "sha1": "3bbe6fc1ec9637b4b479944a2c07e56e5af781d4", + "md5": "603580683bf579f239e07d213da29a0b", + "sha256": "6dbd26a673dd839cd7e0dc0d9d28dd6a80373ca7d114400d47ca7aa68bb703bb", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": "JavaScript", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + "matched_to": [ + "pkg:npm/abbrev@1.0.4" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + } + ] +} \ No newline at end of file diff --git a/matchcode/tests/testfiles/match/directory-matching/abbrev-1.0.4-i.json b/matchcode/tests/testfiles/match/directory-matching/abbrev-1.0.4-i.json new file mode 100644 index 00000000..f82e3c2f --- /dev/null +++ b/matchcode/tests/testfiles/match/directory-matching/abbrev-1.0.4-i.json @@ -0,0 +1,186 @@ +{ + "headers": [ + { + "tool_name": "scancode-toolkit", + "tool_version": "31.2.2", + "options": { + "input": [ + "package" + ], + "--info": true, + "--json-pp": "./abbrev-1.0.4.tgz-i.json" + }, + "notice": "Generated with ScanCode and provided on an \"AS IS\" BASIS, WITHOUT WARRANTIES\nOR CONDITIONS OF ANY KIND, either express or implied. No content created from\nScanCode should be considered or used as legal advice. Consult an Attorney\nfor any legal advice.\nScanCode is a free software code scanning tool from nexB Inc. and others.\nVisit https://github.com/nexB/scancode-toolkit/ for support and download.", + "start_timestamp": "2022-12-02T011502.218997", + "end_timestamp": "2022-12-02T011502.489555", + "output_format_version": "2.0.0", + "duration": 0.27057909965515137, + "message": null, + "errors": [], + "warnings": [], + "extra_data": { + "system_environment": { + "operating_system": "linux", + "cpu_architecture": "64", + "platform": "Linux-5.15.39-3-pve-x86_64-with-glibc2.35", + "platform_version": "#2 SMP PVE 5.15.39-3 (Wed, 27 Jul 2022 13:45:39 +0200)", + "python_version": "3.10.5 (main, Jul 30 2022, 06:09:26) [GCC 9.4.0]" + }, + "spdx_license_list_version": "3.17", + "files_count": 4 + } + } + ], + "files": [ + { + "path": "package", + "type": "directory", + "name": "package", + "base_name": "package", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "sha256": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 4, + "dirs_count": 1, + "size_count": 4750, + "scan_errors": [] + }, + { + "path": "package/LICENSE", + "type": "file", + "name": "LICENSE", + "base_name": "LICENSE", + "extension": "", + "size": 1092, + "date": "2011-05-13", + "sha1": "4a1927e74796f06ac7e7a687ca6b44d39c65d8f4", + "md5": "b3245a33f2d41818f14c489bd33bc4a8", + "sha256": "ab95d69fc43021e0451b5021ff09fbc89ee6f9199354397f18fe7ce3f4b50554", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/package.json", + "type": "file", + "name": "package.json", + "base_name": "package", + "extension": ".json", + "size": 390, + "date": "2013-01-09", + "sha1": "2832a2ddeaff7e4e7960926364afd12af09b24ba", + "md5": "91fb59a2c07fe8957b2ff90ccdf763dc", + "sha256": "e46d8ebcf41a23aa903a2562ae072e4dee4371924d8df21a9fa0ebff318f2b7d", + "mime_type": "application/json", + "file_type": "JSON data", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/README.md", + "type": "file", + "name": "README.md", + "base_name": "README", + "extension": ".md", + "size": 499, + "date": "2010-10-04", + "sha1": "c520bc857ec612ed88e13d794c47882d5aed3286", + "md5": "96b93093abdfdfef1ef8a3e2d5ca7f71", + "sha256": "2581765d44e15c58a2b88ad7bc9cc5c9ee029b4b5013c06dc45d9e94e8cb2ba4", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/lib", + "type": "directory", + "name": "lib", + "base_name": "lib", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "sha256": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 1, + "dirs_count": 0, + "size_count": 2769, + "scan_errors": [] + }, + { + "path": "package/lib/abbrev.js", + "type": "file", + "name": "abbrev.js", + "base_name": "abbrev", + "extension": ".js", + "size": 2769, + "date": "2013-01-09", + "sha1": "3bbe6fc1ec9637b4b479944a2c07e56e5af781d4", + "md5": "603580683bf579f239e07d213da29a0b", + "sha256": "6dbd26a673dd839cd7e0dc0d9d28dd6a80373ca7d114400d47ca7aa68bb703bb", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": "JavaScript", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + } + ] +} \ No newline at end of file diff --git a/matchcode/tests/testfiles/match/directory-matching/abbrev-1.0.5-i-expected.json b/matchcode/tests/testfiles/match/directory-matching/abbrev-1.0.5-i-expected.json new file mode 100644 index 00000000..2d346605 --- /dev/null +++ b/matchcode/tests/testfiles/match/directory-matching/abbrev-1.0.5-i-expected.json @@ -0,0 +1,200 @@ +{ + "files": [ + { + "path": "package", + "type": "directory", + "name": "package", + "base_name": "package", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "sha256": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "matched_to": [ + "pkg:npm/abbrev@1.0.5" + ], + "files_count": 6, + "dirs_count": 0, + "size_count": 4925, + "scan_errors": [] + }, + { + "path": "package/abbrev.js", + "type": "file", + "name": "abbrev.js", + "base_name": "abbrev", + "extension": ".js", + "size": 1764, + "date": "2014-04-17", + "sha1": "b75c6b10bbfac1092ef493079ae044cc89824dc0", + "md5": "7a15b8fe67321134796aa0efe08015d0", + "sha256": "17c7c4c5ba278eacdf05f8e62243edda7036c39f4b61448aa753c77b078a11ed", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": "JavaScript", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + "matched_to": [ + "pkg:npm/abbrev@1.0.5" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/CONTRIBUTING.md", + "type": "file", + "name": "CONTRIBUTING.md", + "base_name": "CONTRIBUTING", + "extension": ".md", + "size": 123, + "date": "2013-02-12", + "sha1": "12ce7abccdd3aeebd1d093a30bb1768f120fb8cb", + "md5": "390ff2e45c4ab33b721d10a45c147c0c", + "sha256": "a654f59a0ffa0d17296a230c512790f66144a18e410222cab9bfe3a00fd8b7e8", + "mime_type": "text/html", + "file_type": "HTML document, ASCII text", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "matched_to": [ + "pkg:npm/abbrev@1.0.5" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/LICENSE", + "type": "file", + "name": "LICENSE", + "base_name": "LICENSE", + "extension": "", + "size": 1092, + "date": "2011-05-13", + "sha1": "4a1927e74796f06ac7e7a687ca6b44d39c65d8f4", + "md5": "b3245a33f2d41818f14c489bd33bc4a8", + "sha256": "ab95d69fc43021e0451b5021ff09fbc89ee6f9199354397f18fe7ce3f4b50554", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "matched_to": [ + "pkg:npm/abbrev@1.0.5" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/package.json", + "type": "file", + "name": "package.json", + "base_name": "package", + "extension": ".json", + "size": 378, + "date": "2014-04-17", + "sha1": "bb9785300f276c536e35e4050aa06e93fed3e0aa", + "md5": "cb4d65ccc150047accc4feab2875697f", + "sha256": "cd8af8b13b7f5f0072c3fcbef24a341fd314e667a8d3994d22553df14b1fb5f5", + "mime_type": "application/json", + "file_type": "JSON data", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "matched_to": [ + "pkg:npm/abbrev@1.0.5" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/README.md", + "type": "file", + "name": "README.md", + "base_name": "README", + "extension": ".md", + "size": 499, + "date": "2010-10-04", + "sha1": "c520bc857ec612ed88e13d794c47882d5aed3286", + "md5": "96b93093abdfdfef1ef8a3e2d5ca7f71", + "sha256": "2581765d44e15c58a2b88ad7bc9cc5c9ee029b4b5013c06dc45d9e94e8cb2ba4", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "matched_to": [ + "pkg:npm/abbrev@1.0.5" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/test.js", + "type": "file", + "name": "test.js", + "base_name": "test", + "extension": ".js", + "size": 1069, + "date": "2014-04-17", + "sha1": "69ec8e64451d6fbcc2d53717c5f3a630bd0432d8", + "md5": "245853103cf43465e31ac1b255edbfed", + "sha256": "34a78de842db494b0ff8ad4574a93cbbc4bf6e74cb064ab0f6bab6d11b80833a", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": "JavaScript", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + "matched_to": [ + "pkg:npm/abbrev@1.0.5" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + } + ] +} \ No newline at end of file diff --git a/matchcode/tests/testfiles/match/directory-matching/abbrev-1.0.5-i.json b/matchcode/tests/testfiles/match/directory-matching/abbrev-1.0.5-i.json new file mode 100644 index 00000000..3eb790bc --- /dev/null +++ b/matchcode/tests/testfiles/match/directory-matching/abbrev-1.0.5-i.json @@ -0,0 +1,211 @@ +{ + "headers": [ + { + "tool_name": "scancode-toolkit", + "tool_version": "31.2.2", + "options": { + "input": [ + "package" + ], + "--info": true, + "--json-pp": "./abbrev-1.0.5.tgz-i.json" + }, + "notice": "Generated with ScanCode and provided on an \"AS IS\" BASIS, WITHOUT WARRANTIES\nOR CONDITIONS OF ANY KIND, either express or implied. No content created from\nScanCode should be considered or used as legal advice. Consult an Attorney\nfor any legal advice.\nScanCode is a free software code scanning tool from nexB Inc. and others.\nVisit https://github.com/nexB/scancode-toolkit/ for support and download.", + "start_timestamp": "2022-12-02T011503.577713", + "end_timestamp": "2022-12-02T011503.769749", + "output_format_version": "2.0.0", + "duration": 0.19205188751220703, + "message": null, + "errors": [], + "warnings": [], + "extra_data": { + "system_environment": { + "operating_system": "linux", + "cpu_architecture": "64", + "platform": "Linux-5.15.39-3-pve-x86_64-with-glibc2.35", + "platform_version": "#2 SMP PVE 5.15.39-3 (Wed, 27 Jul 2022 13:45:39 +0200)", + "python_version": "3.10.5 (main, Jul 30 2022, 06:09:26) [GCC 9.4.0]" + }, + "spdx_license_list_version": "3.17", + "files_count": 6 + } + } + ], + "files": [ + { + "path": "package", + "type": "directory", + "name": "package", + "base_name": "package", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "sha256": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 6, + "dirs_count": 0, + "size_count": 4925, + "scan_errors": [] + }, + { + "path": "package/abbrev.js", + "type": "file", + "name": "abbrev.js", + "base_name": "abbrev", + "extension": ".js", + "size": 1764, + "date": "2014-04-17", + "sha1": "b75c6b10bbfac1092ef493079ae044cc89824dc0", + "md5": "7a15b8fe67321134796aa0efe08015d0", + "sha256": "17c7c4c5ba278eacdf05f8e62243edda7036c39f4b61448aa753c77b078a11ed", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": "JavaScript", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/CONTRIBUTING.md", + "type": "file", + "name": "CONTRIBUTING.md", + "base_name": "CONTRIBUTING", + "extension": ".md", + "size": 123, + "date": "2013-02-12", + "sha1": "12ce7abccdd3aeebd1d093a30bb1768f120fb8cb", + "md5": "390ff2e45c4ab33b721d10a45c147c0c", + "sha256": "a654f59a0ffa0d17296a230c512790f66144a18e410222cab9bfe3a00fd8b7e8", + "mime_type": "text/html", + "file_type": "HTML document, ASCII text", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/LICENSE", + "type": "file", + "name": "LICENSE", + "base_name": "LICENSE", + "extension": "", + "size": 1092, + "date": "2011-05-13", + "sha1": "4a1927e74796f06ac7e7a687ca6b44d39c65d8f4", + "md5": "b3245a33f2d41818f14c489bd33bc4a8", + "sha256": "ab95d69fc43021e0451b5021ff09fbc89ee6f9199354397f18fe7ce3f4b50554", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/package.json", + "type": "file", + "name": "package.json", + "base_name": "package", + "extension": ".json", + "size": 378, + "date": "2014-04-17", + "sha1": "bb9785300f276c536e35e4050aa06e93fed3e0aa", + "md5": "cb4d65ccc150047accc4feab2875697f", + "sha256": "cd8af8b13b7f5f0072c3fcbef24a341fd314e667a8d3994d22553df14b1fb5f5", + "mime_type": "application/json", + "file_type": "JSON data", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/README.md", + "type": "file", + "name": "README.md", + "base_name": "README", + "extension": ".md", + "size": 499, + "date": "2010-10-04", + "sha1": "c520bc857ec612ed88e13d794c47882d5aed3286", + "md5": "96b93093abdfdfef1ef8a3e2d5ca7f71", + "sha256": "2581765d44e15c58a2b88ad7bc9cc5c9ee029b4b5013c06dc45d9e94e8cb2ba4", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/test.js", + "type": "file", + "name": "test.js", + "base_name": "test", + "extension": ".js", + "size": 1069, + "date": "2014-04-17", + "sha1": "69ec8e64451d6fbcc2d53717c5f3a630bd0432d8", + "md5": "245853103cf43465e31ac1b255edbfed", + "sha256": "34a78de842db494b0ff8ad4574a93cbbc4bf6e74cb064ab0f6bab6d11b80833a", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": "JavaScript", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + } + ] +} \ No newline at end of file diff --git a/matchcode/tests/testfiles/match/directory-matching/abbrev-1.0.6-i-expected.json b/matchcode/tests/testfiles/match/directory-matching/abbrev-1.0.6-i-expected.json new file mode 100644 index 00000000..34255690 --- /dev/null +++ b/matchcode/tests/testfiles/match/directory-matching/abbrev-1.0.6-i-expected.json @@ -0,0 +1,200 @@ +{ + "files": [ + { + "path": "package", + "type": "directory", + "name": "package", + "base_name": "package", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "sha256": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "matched_to": [ + "pkg:npm/abbrev@1.0.6" + ], + "files_count": 6, + "dirs_count": 0, + "size_count": 4511, + "scan_errors": [] + }, + { + "path": "package/abbrev.js", + "type": "file", + "name": "abbrev.js", + "base_name": "abbrev", + "extension": ".js", + "size": 1764, + "date": "2014-04-17", + "sha1": "b75c6b10bbfac1092ef493079ae044cc89824dc0", + "md5": "7a15b8fe67321134796aa0efe08015d0", + "sha256": "17c7c4c5ba278eacdf05f8e62243edda7036c39f4b61448aa753c77b078a11ed", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": "JavaScript", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + "matched_to": [ + "pkg:npm/abbrev@1.0.6" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/CONTRIBUTING.md", + "type": "file", + "name": "CONTRIBUTING.md", + "base_name": "CONTRIBUTING", + "extension": ".md", + "size": 123, + "date": "2013-02-12", + "sha1": "12ce7abccdd3aeebd1d093a30bb1768f120fb8cb", + "md5": "390ff2e45c4ab33b721d10a45c147c0c", + "sha256": "a654f59a0ffa0d17296a230c512790f66144a18e410222cab9bfe3a00fd8b7e8", + "mime_type": "text/html", + "file_type": "HTML document, ASCII text", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "matched_to": [ + "pkg:npm/abbrev@1.0.6" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/LICENSE", + "type": "file", + "name": "LICENSE", + "base_name": "LICENSE", + "extension": "", + "size": 765, + "date": "2015-05-21", + "sha1": "bb408e929caeb1731945b2ba54bc337edb87cc66", + "md5": "82703a69f6d7411dde679954c2fd9dca", + "sha256": "4ec3d4c66cd87f5c8d8ad911b10f99bf27cb00cdfcff82621956e379186b016b", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "matched_to": [ + "pkg:npm/abbrev@1.0.6" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/package.json", + "type": "file", + "name": "package.json", + "base_name": "package", + "extension": ".json", + "size": 291, + "date": "2015-05-21", + "sha1": "e91829a4c5cbb009c7c36f3309c6b3471f216200", + "md5": "fc8be3703299c41fd19312cfd1996bea", + "sha256": "eec1ecc446cfe64288192e5a6ae6accc2c9d207085833ed69d951cbd1ce9a0ca", + "mime_type": "application/json", + "file_type": "JSON data", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "matched_to": [ + "pkg:npm/abbrev@1.0.6" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/README.md", + "type": "file", + "name": "README.md", + "base_name": "README", + "extension": ".md", + "size": 499, + "date": "2010-10-04", + "sha1": "c520bc857ec612ed88e13d794c47882d5aed3286", + "md5": "96b93093abdfdfef1ef8a3e2d5ca7f71", + "sha256": "2581765d44e15c58a2b88ad7bc9cc5c9ee029b4b5013c06dc45d9e94e8cb2ba4", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "matched_to": [ + "pkg:npm/abbrev@1.0.6" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/test.js", + "type": "file", + "name": "test.js", + "base_name": "test", + "extension": ".js", + "size": 1069, + "date": "2014-04-17", + "sha1": "69ec8e64451d6fbcc2d53717c5f3a630bd0432d8", + "md5": "245853103cf43465e31ac1b255edbfed", + "sha256": "34a78de842db494b0ff8ad4574a93cbbc4bf6e74cb064ab0f6bab6d11b80833a", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": "JavaScript", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + "matched_to": [ + "pkg:npm/abbrev@1.0.6" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + } + ] +} \ No newline at end of file diff --git a/matchcode/tests/testfiles/match/directory-matching/abbrev-1.0.6-i.json b/matchcode/tests/testfiles/match/directory-matching/abbrev-1.0.6-i.json new file mode 100644 index 00000000..ded28889 --- /dev/null +++ b/matchcode/tests/testfiles/match/directory-matching/abbrev-1.0.6-i.json @@ -0,0 +1,211 @@ +{ + "headers": [ + { + "tool_name": "scancode-toolkit", + "tool_version": "31.2.2", + "options": { + "input": [ + "package" + ], + "--info": true, + "--json-pp": "./abbrev-1.0.6.tgz-i.json" + }, + "notice": "Generated with ScanCode and provided on an \"AS IS\" BASIS, WITHOUT WARRANTIES\nOR CONDITIONS OF ANY KIND, either express or implied. No content created from\nScanCode should be considered or used as legal advice. Consult an Attorney\nfor any legal advice.\nScanCode is a free software code scanning tool from nexB Inc. and others.\nVisit https://github.com/nexB/scancode-toolkit/ for support and download.", + "start_timestamp": "2022-12-02T011504.876898", + "end_timestamp": "2022-12-02T011505.035730", + "output_format_version": "2.0.0", + "duration": 0.15884780883789062, + "message": null, + "errors": [], + "warnings": [], + "extra_data": { + "system_environment": { + "operating_system": "linux", + "cpu_architecture": "64", + "platform": "Linux-5.15.39-3-pve-x86_64-with-glibc2.35", + "platform_version": "#2 SMP PVE 5.15.39-3 (Wed, 27 Jul 2022 13:45:39 +0200)", + "python_version": "3.10.5 (main, Jul 30 2022, 06:09:26) [GCC 9.4.0]" + }, + "spdx_license_list_version": "3.17", + "files_count": 6 + } + } + ], + "files": [ + { + "path": "package", + "type": "directory", + "name": "package", + "base_name": "package", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "sha256": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 6, + "dirs_count": 0, + "size_count": 4511, + "scan_errors": [] + }, + { + "path": "package/abbrev.js", + "type": "file", + "name": "abbrev.js", + "base_name": "abbrev", + "extension": ".js", + "size": 1764, + "date": "2014-04-17", + "sha1": "b75c6b10bbfac1092ef493079ae044cc89824dc0", + "md5": "7a15b8fe67321134796aa0efe08015d0", + "sha256": "17c7c4c5ba278eacdf05f8e62243edda7036c39f4b61448aa753c77b078a11ed", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": "JavaScript", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/CONTRIBUTING.md", + "type": "file", + "name": "CONTRIBUTING.md", + "base_name": "CONTRIBUTING", + "extension": ".md", + "size": 123, + "date": "2013-02-12", + "sha1": "12ce7abccdd3aeebd1d093a30bb1768f120fb8cb", + "md5": "390ff2e45c4ab33b721d10a45c147c0c", + "sha256": "a654f59a0ffa0d17296a230c512790f66144a18e410222cab9bfe3a00fd8b7e8", + "mime_type": "text/html", + "file_type": "HTML document, ASCII text", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/LICENSE", + "type": "file", + "name": "LICENSE", + "base_name": "LICENSE", + "extension": "", + "size": 765, + "date": "2015-05-21", + "sha1": "bb408e929caeb1731945b2ba54bc337edb87cc66", + "md5": "82703a69f6d7411dde679954c2fd9dca", + "sha256": "4ec3d4c66cd87f5c8d8ad911b10f99bf27cb00cdfcff82621956e379186b016b", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/package.json", + "type": "file", + "name": "package.json", + "base_name": "package", + "extension": ".json", + "size": 291, + "date": "2015-05-21", + "sha1": "e91829a4c5cbb009c7c36f3309c6b3471f216200", + "md5": "fc8be3703299c41fd19312cfd1996bea", + "sha256": "eec1ecc446cfe64288192e5a6ae6accc2c9d207085833ed69d951cbd1ce9a0ca", + "mime_type": "application/json", + "file_type": "JSON data", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/README.md", + "type": "file", + "name": "README.md", + "base_name": "README", + "extension": ".md", + "size": 499, + "date": "2010-10-04", + "sha1": "c520bc857ec612ed88e13d794c47882d5aed3286", + "md5": "96b93093abdfdfef1ef8a3e2d5ca7f71", + "sha256": "2581765d44e15c58a2b88ad7bc9cc5c9ee029b4b5013c06dc45d9e94e8cb2ba4", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/test.js", + "type": "file", + "name": "test.js", + "base_name": "test", + "extension": ".js", + "size": 1069, + "date": "2014-04-17", + "sha1": "69ec8e64451d6fbcc2d53717c5f3a630bd0432d8", + "md5": "245853103cf43465e31ac1b255edbfed", + "sha256": "34a78de842db494b0ff8ad4574a93cbbc4bf6e74cb064ab0f6bab6d11b80833a", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": "JavaScript", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + } + ] +} \ No newline at end of file diff --git a/matchcode/tests/testfiles/match/directory-matching/abbrev-1.0.7-i-expected.json b/matchcode/tests/testfiles/match/directory-matching/abbrev-1.0.7-i-expected.json new file mode 100644 index 00000000..12fdd2a0 --- /dev/null +++ b/matchcode/tests/testfiles/match/directory-matching/abbrev-1.0.7-i-expected.json @@ -0,0 +1,256 @@ +{ + "files": [ + { + "path": "package", + "type": "directory", + "name": "package", + "base_name": "package", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "sha256": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "matched_to": [ + "pkg:npm/abbrev@1.0.7" + ], + "files_count": 8, + "dirs_count": 0, + "size_count": 4669, + "scan_errors": [] + }, + { + "path": "package/.npmignore", + "type": "file", + "name": ".npmignore", + "base_name": ".npmignore", + "extension": "", + "size": 45, + "date": "2015-05-30", + "sha1": "07cd07890babc5d03fd44e1ce9203b4f16c848e1", + "md5": "55fa175c873c423673dd58fae4232e9c", + "sha256": "7048571a371f162971b2d0890f86fd8e6d25f316c40f918f7149dbf3bc10aca8", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": "GAS", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + "matched_to": [ + "pkg:npm/abbrev@1.0.7" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/.travis.yml", + "type": "file", + "name": ".travis.yml", + "base_name": ".travis", + "extension": ".yml", + "size": 60, + "date": "2015-05-30", + "sha1": "79213a95749ddb02aac8146f88c2e9ee705bec84", + "md5": "6b832410f2ba5cc9b136ee1e7fbaf1f1", + "sha256": "c6b8a08f01178dfb859bccdd8be0a71b00d494587d5cf1d60188ccb036ca2030", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "matched_to": [ + "pkg:npm/abbrev@1.0.7" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/abbrev.js", + "type": "file", + "name": "abbrev.js", + "base_name": "abbrev", + "extension": ".js", + "size": 1764, + "date": "2014-04-17", + "sha1": "b75c6b10bbfac1092ef493079ae044cc89824dc0", + "md5": "7a15b8fe67321134796aa0efe08015d0", + "sha256": "17c7c4c5ba278eacdf05f8e62243edda7036c39f4b61448aa753c77b078a11ed", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": "JavaScript", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + "matched_to": [ + "pkg:npm/abbrev@1.0.7" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/CONTRIBUTING.md", + "type": "file", + "name": "CONTRIBUTING.md", + "base_name": "CONTRIBUTING", + "extension": ".md", + "size": 123, + "date": "2013-02-12", + "sha1": "12ce7abccdd3aeebd1d093a30bb1768f120fb8cb", + "md5": "390ff2e45c4ab33b721d10a45c147c0c", + "sha256": "a654f59a0ffa0d17296a230c512790f66144a18e410222cab9bfe3a00fd8b7e8", + "mime_type": "text/html", + "file_type": "HTML document, ASCII text", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "matched_to": [ + "pkg:npm/abbrev@1.0.7" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/LICENSE", + "type": "file", + "name": "LICENSE", + "base_name": "LICENSE", + "extension": "", + "size": 765, + "date": "2015-05-21", + "sha1": "bb408e929caeb1731945b2ba54bc337edb87cc66", + "md5": "82703a69f6d7411dde679954c2fd9dca", + "sha256": "4ec3d4c66cd87f5c8d8ad911b10f99bf27cb00cdfcff82621956e379186b016b", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "matched_to": [ + "pkg:npm/abbrev@1.0.7" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/package.json", + "type": "file", + "name": "package.json", + "base_name": "package", + "extension": ".json", + "size": 344, + "date": "2015-05-30", + "sha1": "bf635dc9247c69abda92a42242524e74fd5eeec1", + "md5": "ac9892e616f40a638a2d93995ee1d506", + "sha256": "0b089a246155f2986ee01b6e4a67ab637139d7ce49a6790bf76c0a4fb10e824f", + "mime_type": "application/json", + "file_type": "JSON data", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "matched_to": [ + "pkg:npm/abbrev@1.0.7" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/README.md", + "type": "file", + "name": "README.md", + "base_name": "README", + "extension": ".md", + "size": 499, + "date": "2010-10-04", + "sha1": "c520bc857ec612ed88e13d794c47882d5aed3286", + "md5": "96b93093abdfdfef1ef8a3e2d5ca7f71", + "sha256": "2581765d44e15c58a2b88ad7bc9cc5c9ee029b4b5013c06dc45d9e94e8cb2ba4", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "matched_to": [ + "pkg:npm/abbrev@1.0.7" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/test.js", + "type": "file", + "name": "test.js", + "base_name": "test", + "extension": ".js", + "size": 1069, + "date": "2015-05-30", + "sha1": "4af3fea0290e02e4d82225ce75721b423c212483", + "md5": "f74996e9aaee131e03836bcb7853e248", + "sha256": "4ec8b1e563a76ec884ef9a13947c52b9dc528eab760ee89379924b23772f347d", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": "JavaScript", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + "matched_to": [ + "pkg:npm/abbrev@1.0.7" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + } + ] +} \ No newline at end of file diff --git a/matchcode/tests/testfiles/match/directory-matching/abbrev-1.0.7-i.json b/matchcode/tests/testfiles/match/directory-matching/abbrev-1.0.7-i.json new file mode 100644 index 00000000..c21d27e7 --- /dev/null +++ b/matchcode/tests/testfiles/match/directory-matching/abbrev-1.0.7-i.json @@ -0,0 +1,261 @@ +{ + "headers": [ + { + "tool_name": "scancode-toolkit", + "tool_version": "31.2.2", + "options": { + "input": [ + "package" + ], + "--info": true, + "--json-pp": "./abbrev-1.0.7.tgz-i.json" + }, + "notice": "Generated with ScanCode and provided on an \"AS IS\" BASIS, WITHOUT WARRANTIES\nOR CONDITIONS OF ANY KIND, either express or implied. No content created from\nScanCode should be considered or used as legal advice. Consult an Attorney\nfor any legal advice.\nScanCode is a free software code scanning tool from nexB Inc. and others.\nVisit https://github.com/nexB/scancode-toolkit/ for support and download.", + "start_timestamp": "2022-12-02T011506.139876", + "end_timestamp": "2022-12-02T011506.316847", + "output_format_version": "2.0.0", + "duration": 0.1769883632659912, + "message": null, + "errors": [], + "warnings": [], + "extra_data": { + "system_environment": { + "operating_system": "linux", + "cpu_architecture": "64", + "platform": "Linux-5.15.39-3-pve-x86_64-with-glibc2.35", + "platform_version": "#2 SMP PVE 5.15.39-3 (Wed, 27 Jul 2022 13:45:39 +0200)", + "python_version": "3.10.5 (main, Jul 30 2022, 06:09:26) [GCC 9.4.0]" + }, + "spdx_license_list_version": "3.17", + "files_count": 8 + } + } + ], + "files": [ + { + "path": "package", + "type": "directory", + "name": "package", + "base_name": "package", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "sha256": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 8, + "dirs_count": 0, + "size_count": 4669, + "scan_errors": [] + }, + { + "path": "package/.npmignore", + "type": "file", + "name": ".npmignore", + "base_name": ".npmignore", + "extension": "", + "size": 45, + "date": "2015-05-30", + "sha1": "07cd07890babc5d03fd44e1ce9203b4f16c848e1", + "md5": "55fa175c873c423673dd58fae4232e9c", + "sha256": "7048571a371f162971b2d0890f86fd8e6d25f316c40f918f7149dbf3bc10aca8", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": "GAS", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/.travis.yml", + "type": "file", + "name": ".travis.yml", + "base_name": ".travis", + "extension": ".yml", + "size": 60, + "date": "2015-05-30", + "sha1": "79213a95749ddb02aac8146f88c2e9ee705bec84", + "md5": "6b832410f2ba5cc9b136ee1e7fbaf1f1", + "sha256": "c6b8a08f01178dfb859bccdd8be0a71b00d494587d5cf1d60188ccb036ca2030", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/abbrev.js", + "type": "file", + "name": "abbrev.js", + "base_name": "abbrev", + "extension": ".js", + "size": 1764, + "date": "2014-04-17", + "sha1": "b75c6b10bbfac1092ef493079ae044cc89824dc0", + "md5": "7a15b8fe67321134796aa0efe08015d0", + "sha256": "17c7c4c5ba278eacdf05f8e62243edda7036c39f4b61448aa753c77b078a11ed", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": "JavaScript", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/CONTRIBUTING.md", + "type": "file", + "name": "CONTRIBUTING.md", + "base_name": "CONTRIBUTING", + "extension": ".md", + "size": 123, + "date": "2013-02-12", + "sha1": "12ce7abccdd3aeebd1d093a30bb1768f120fb8cb", + "md5": "390ff2e45c4ab33b721d10a45c147c0c", + "sha256": "a654f59a0ffa0d17296a230c512790f66144a18e410222cab9bfe3a00fd8b7e8", + "mime_type": "text/html", + "file_type": "HTML document, ASCII text", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/LICENSE", + "type": "file", + "name": "LICENSE", + "base_name": "LICENSE", + "extension": "", + "size": 765, + "date": "2015-05-21", + "sha1": "bb408e929caeb1731945b2ba54bc337edb87cc66", + "md5": "82703a69f6d7411dde679954c2fd9dca", + "sha256": "4ec3d4c66cd87f5c8d8ad911b10f99bf27cb00cdfcff82621956e379186b016b", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/package.json", + "type": "file", + "name": "package.json", + "base_name": "package", + "extension": ".json", + "size": 344, + "date": "2015-05-30", + "sha1": "bf635dc9247c69abda92a42242524e74fd5eeec1", + "md5": "ac9892e616f40a638a2d93995ee1d506", + "sha256": "0b089a246155f2986ee01b6e4a67ab637139d7ce49a6790bf76c0a4fb10e824f", + "mime_type": "application/json", + "file_type": "JSON data", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/README.md", + "type": "file", + "name": "README.md", + "base_name": "README", + "extension": ".md", + "size": 499, + "date": "2010-10-04", + "sha1": "c520bc857ec612ed88e13d794c47882d5aed3286", + "md5": "96b93093abdfdfef1ef8a3e2d5ca7f71", + "sha256": "2581765d44e15c58a2b88ad7bc9cc5c9ee029b4b5013c06dc45d9e94e8cb2ba4", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/test.js", + "type": "file", + "name": "test.js", + "base_name": "test", + "extension": ".js", + "size": 1069, + "date": "2015-05-30", + "sha1": "4af3fea0290e02e4d82225ce75721b423c212483", + "md5": "f74996e9aaee131e03836bcb7853e248", + "sha256": "4ec8b1e563a76ec884ef9a13947c52b9dc528eab760ee89379924b23772f347d", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": "JavaScript", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + } + ] +} \ No newline at end of file diff --git a/matchcode/tests/testfiles/match/directory-matching/abbrev-1.0.9-i-expected.json b/matchcode/tests/testfiles/match/directory-matching/abbrev-1.0.9-i-expected.json new file mode 100644 index 00000000..6bee67a2 --- /dev/null +++ b/matchcode/tests/testfiles/match/directory-matching/abbrev-1.0.9-i-expected.json @@ -0,0 +1,144 @@ +{ + "files": [ + { + "path": "package", + "type": "directory", + "name": "package", + "base_name": "package", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "sha256": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "matched_to": [ + "pkg:npm/abbrev@1.0.9" + ], + "files_count": 4, + "dirs_count": 0, + "size_count": 3406, + "scan_errors": [] + }, + { + "path": "package/abbrev.js", + "type": "file", + "name": "abbrev.js", + "base_name": "abbrev", + "extension": ".js", + "size": 1764, + "date": "2014-04-17", + "sha1": "b75c6b10bbfac1092ef493079ae044cc89824dc0", + "md5": "7a15b8fe67321134796aa0efe08015d0", + "sha256": "17c7c4c5ba278eacdf05f8e62243edda7036c39f4b61448aa753c77b078a11ed", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": "JavaScript", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + "matched_to": [ + "pkg:npm/abbrev@1.0.9" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/LICENSE", + "type": "file", + "name": "LICENSE", + "base_name": "LICENSE", + "extension": "", + "size": 765, + "date": "2015-05-21", + "sha1": "bb408e929caeb1731945b2ba54bc337edb87cc66", + "md5": "82703a69f6d7411dde679954c2fd9dca", + "sha256": "4ec3d4c66cd87f5c8d8ad911b10f99bf27cb00cdfcff82621956e379186b016b", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "matched_to": [ + "pkg:npm/abbrev@1.0.9" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/package.json", + "type": "file", + "name": "package.json", + "base_name": "package", + "extension": ".json", + "size": 378, + "date": "2016-06-15", + "sha1": "ca19f68ffb963539da6fc96e125db7e4d52d1a64", + "md5": "016e057514abfcc10ee95a865fa55b30", + "sha256": "a3c7abc0591b39d43f14e2016606433f0daba56006bf2927ee07718fdf9ee6ce", + "mime_type": "application/json", + "file_type": "JSON data", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "matched_to": [ + "pkg:npm/abbrev@1.0.9" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/README.md", + "type": "file", + "name": "README.md", + "base_name": "README", + "extension": ".md", + "size": 499, + "date": "2010-10-04", + "sha1": "c520bc857ec612ed88e13d794c47882d5aed3286", + "md5": "96b93093abdfdfef1ef8a3e2d5ca7f71", + "sha256": "2581765d44e15c58a2b88ad7bc9cc5c9ee029b4b5013c06dc45d9e94e8cb2ba4", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "matched_to": [ + "pkg:npm/abbrev@1.0.9" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + } + ] +} \ No newline at end of file diff --git a/matchcode/tests/testfiles/match/directory-matching/abbrev-1.0.9-i.json b/matchcode/tests/testfiles/match/directory-matching/abbrev-1.0.9-i.json new file mode 100644 index 00000000..15fe6287 --- /dev/null +++ b/matchcode/tests/testfiles/match/directory-matching/abbrev-1.0.9-i.json @@ -0,0 +1,161 @@ +{ + "headers": [ + { + "tool_name": "scancode-toolkit", + "tool_version": "31.2.2", + "options": { + "input": [ + "package" + ], + "--info": true, + "--json-pp": "./abbrev-1.0.9.tgz-i.json" + }, + "notice": "Generated with ScanCode and provided on an \"AS IS\" BASIS, WITHOUT WARRANTIES\nOR CONDITIONS OF ANY KIND, either express or implied. No content created from\nScanCode should be considered or used as legal advice. Consult an Attorney\nfor any legal advice.\nScanCode is a free software code scanning tool from nexB Inc. and others.\nVisit https://github.com/nexB/scancode-toolkit/ for support and download.", + "start_timestamp": "2022-12-02T011507.424965", + "end_timestamp": "2022-12-02T011507.581639", + "output_format_version": "2.0.0", + "duration": 0.156691312789917, + "message": null, + "errors": [], + "warnings": [], + "extra_data": { + "system_environment": { + "operating_system": "linux", + "cpu_architecture": "64", + "platform": "Linux-5.15.39-3-pve-x86_64-with-glibc2.35", + "platform_version": "#2 SMP PVE 5.15.39-3 (Wed, 27 Jul 2022 13:45:39 +0200)", + "python_version": "3.10.5 (main, Jul 30 2022, 06:09:26) [GCC 9.4.0]" + }, + "spdx_license_list_version": "3.17", + "files_count": 4 + } + } + ], + "files": [ + { + "path": "package", + "type": "directory", + "name": "package", + "base_name": "package", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "sha256": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 4, + "dirs_count": 0, + "size_count": 3406, + "scan_errors": [] + }, + { + "path": "package/abbrev.js", + "type": "file", + "name": "abbrev.js", + "base_name": "abbrev", + "extension": ".js", + "size": 1764, + "date": "2014-04-17", + "sha1": "b75c6b10bbfac1092ef493079ae044cc89824dc0", + "md5": "7a15b8fe67321134796aa0efe08015d0", + "sha256": "17c7c4c5ba278eacdf05f8e62243edda7036c39f4b61448aa753c77b078a11ed", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": "JavaScript", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/LICENSE", + "type": "file", + "name": "LICENSE", + "base_name": "LICENSE", + "extension": "", + "size": 765, + "date": "2015-05-21", + "sha1": "bb408e929caeb1731945b2ba54bc337edb87cc66", + "md5": "82703a69f6d7411dde679954c2fd9dca", + "sha256": "4ec3d4c66cd87f5c8d8ad911b10f99bf27cb00cdfcff82621956e379186b016b", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/package.json", + "type": "file", + "name": "package.json", + "base_name": "package", + "extension": ".json", + "size": 378, + "date": "2016-06-15", + "sha1": "ca19f68ffb963539da6fc96e125db7e4d52d1a64", + "md5": "016e057514abfcc10ee95a865fa55b30", + "sha256": "a3c7abc0591b39d43f14e2016606433f0daba56006bf2927ee07718fdf9ee6ce", + "mime_type": "application/json", + "file_type": "JSON data", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/README.md", + "type": "file", + "name": "README.md", + "base_name": "README", + "extension": ".md", + "size": 499, + "date": "2010-10-04", + "sha1": "c520bc857ec612ed88e13d794c47882d5aed3286", + "md5": "96b93093abdfdfef1ef8a3e2d5ca7f71", + "sha256": "2581765d44e15c58a2b88ad7bc9cc5c9ee029b4b5013c06dc45d9e94e8cb2ba4", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + } + ] +} \ No newline at end of file diff --git a/matchcode/tests/testfiles/match/directory-matching/abbrev-1.1.0-i-expected.json b/matchcode/tests/testfiles/match/directory-matching/abbrev-1.1.0-i-expected.json new file mode 100644 index 00000000..8648adba --- /dev/null +++ b/matchcode/tests/testfiles/match/directory-matching/abbrev-1.1.0-i-expected.json @@ -0,0 +1,144 @@ +{ + "files": [ + { + "path": "package", + "type": "directory", + "name": "package", + "base_name": "package", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "sha256": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "matched_to": [ + "pkg:npm/abbrev@1.1.0" + ], + "files_count": 4, + "dirs_count": 0, + "size_count": 3536, + "scan_errors": [] + }, + { + "path": "package/abbrev.js", + "type": "file", + "name": "abbrev.js", + "base_name": "abbrev", + "extension": ".js", + "size": 1763, + "date": "2017-02-14", + "sha1": "e33940719dacc3ea04a0fb3efd7f5a57987b6257", + "md5": "295cdcca75c99f4bc11113aca4cc9dac", + "sha256": "77e68ed8bb552a11a5ece29800e0afe34bcc098d14a1b88dd44273f68be43943", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": "JavaScript", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + "matched_to": [ + "pkg:npm/abbrev@1.1.0" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/LICENSE", + "type": "file", + "name": "LICENSE", + "base_name": "LICENSE", + "extension": "", + "size": 765, + "date": "2015-05-21", + "sha1": "bb408e929caeb1731945b2ba54bc337edb87cc66", + "md5": "82703a69f6d7411dde679954c2fd9dca", + "sha256": "4ec3d4c66cd87f5c8d8ad911b10f99bf27cb00cdfcff82621956e379186b016b", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "matched_to": [ + "pkg:npm/abbrev@1.1.0" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/package.json", + "type": "file", + "name": "package.json", + "base_name": "package", + "extension": ".json", + "size": 509, + "date": "2017-02-14", + "sha1": "3d5bc8b4159c4a42b0ebfc9031178092c62157cb", + "md5": "31d03131e6a650c48c27e3e5fb03b6a4", + "sha256": "6cb52486d339e09b8c78e4161664b19845dff6d27950def4505f32452dde0a0e", + "mime_type": "application/json", + "file_type": "JSON data", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "matched_to": [ + "pkg:npm/abbrev@1.1.0" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/README.md", + "type": "file", + "name": "README.md", + "base_name": "README", + "extension": ".md", + "size": 499, + "date": "2010-10-04", + "sha1": "c520bc857ec612ed88e13d794c47882d5aed3286", + "md5": "96b93093abdfdfef1ef8a3e2d5ca7f71", + "sha256": "2581765d44e15c58a2b88ad7bc9cc5c9ee029b4b5013c06dc45d9e94e8cb2ba4", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "matched_to": [ + "pkg:npm/abbrev@1.1.0" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + } + ] +} \ No newline at end of file diff --git a/matchcode/tests/testfiles/match/directory-matching/abbrev-1.1.0-i.json b/matchcode/tests/testfiles/match/directory-matching/abbrev-1.1.0-i.json new file mode 100644 index 00000000..44621e67 --- /dev/null +++ b/matchcode/tests/testfiles/match/directory-matching/abbrev-1.1.0-i.json @@ -0,0 +1,161 @@ +{ + "headers": [ + { + "tool_name": "scancode-toolkit", + "tool_version": "31.2.2", + "options": { + "input": [ + "package" + ], + "--info": true, + "--json-pp": "./abbrev-1.1.0.tgz-i.json" + }, + "notice": "Generated with ScanCode and provided on an \"AS IS\" BASIS, WITHOUT WARRANTIES\nOR CONDITIONS OF ANY KIND, either express or implied. No content created from\nScanCode should be considered or used as legal advice. Consult an Attorney\nfor any legal advice.\nScanCode is a free software code scanning tool from nexB Inc. and others.\nVisit https://github.com/nexB/scancode-toolkit/ for support and download.", + "start_timestamp": "2022-12-02T011508.681438", + "end_timestamp": "2022-12-02T011508.853336", + "output_format_version": "2.0.0", + "duration": 0.17191386222839355, + "message": null, + "errors": [], + "warnings": [], + "extra_data": { + "system_environment": { + "operating_system": "linux", + "cpu_architecture": "64", + "platform": "Linux-5.15.39-3-pve-x86_64-with-glibc2.35", + "platform_version": "#2 SMP PVE 5.15.39-3 (Wed, 27 Jul 2022 13:45:39 +0200)", + "python_version": "3.10.5 (main, Jul 30 2022, 06:09:26) [GCC 9.4.0]" + }, + "spdx_license_list_version": "3.17", + "files_count": 4 + } + } + ], + "files": [ + { + "path": "package", + "type": "directory", + "name": "package", + "base_name": "package", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "sha256": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 4, + "dirs_count": 0, + "size_count": 3536, + "scan_errors": [] + }, + { + "path": "package/abbrev.js", + "type": "file", + "name": "abbrev.js", + "base_name": "abbrev", + "extension": ".js", + "size": 1763, + "date": "2017-02-14", + "sha1": "e33940719dacc3ea04a0fb3efd7f5a57987b6257", + "md5": "295cdcca75c99f4bc11113aca4cc9dac", + "sha256": "77e68ed8bb552a11a5ece29800e0afe34bcc098d14a1b88dd44273f68be43943", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": "JavaScript", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/LICENSE", + "type": "file", + "name": "LICENSE", + "base_name": "LICENSE", + "extension": "", + "size": 765, + "date": "2015-05-21", + "sha1": "bb408e929caeb1731945b2ba54bc337edb87cc66", + "md5": "82703a69f6d7411dde679954c2fd9dca", + "sha256": "4ec3d4c66cd87f5c8d8ad911b10f99bf27cb00cdfcff82621956e379186b016b", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/package.json", + "type": "file", + "name": "package.json", + "base_name": "package", + "extension": ".json", + "size": 509, + "date": "2017-02-14", + "sha1": "3d5bc8b4159c4a42b0ebfc9031178092c62157cb", + "md5": "31d03131e6a650c48c27e3e5fb03b6a4", + "sha256": "6cb52486d339e09b8c78e4161664b19845dff6d27950def4505f32452dde0a0e", + "mime_type": "application/json", + "file_type": "JSON data", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/README.md", + "type": "file", + "name": "README.md", + "base_name": "README", + "extension": ".md", + "size": 499, + "date": "2010-10-04", + "sha1": "c520bc857ec612ed88e13d794c47882d5aed3286", + "md5": "96b93093abdfdfef1ef8a3e2d5ca7f71", + "sha256": "2581765d44e15c58a2b88ad7bc9cc5c9ee029b4b5013c06dc45d9e94e8cb2ba4", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + } + ] +} \ No newline at end of file diff --git a/matchcode/tests/testfiles/match/directory-matching/abbrev-1.1.1-i-expected.json b/matchcode/tests/testfiles/match/directory-matching/abbrev-1.1.1-i-expected.json new file mode 100644 index 00000000..ede99667 --- /dev/null +++ b/matchcode/tests/testfiles/match/directory-matching/abbrev-1.1.1-i-expected.json @@ -0,0 +1,144 @@ +{ + "files": [ + { + "path": "package", + "type": "directory", + "name": "package", + "base_name": "package", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "sha256": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "matched_to": [ + "pkg:npm/abbrev@1.1.1" + ], + "files_count": 4, + "dirs_count": 0, + "size_count": 4782, + "scan_errors": [] + }, + { + "path": "package/abbrev.js", + "type": "file", + "name": "abbrev.js", + "base_name": "abbrev", + "extension": ".js", + "size": 1763, + "date": "2017-02-14", + "sha1": "e33940719dacc3ea04a0fb3efd7f5a57987b6257", + "md5": "295cdcca75c99f4bc11113aca4cc9dac", + "sha256": "77e68ed8bb552a11a5ece29800e0afe34bcc098d14a1b88dd44273f68be43943", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": "JavaScript", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + "matched_to": [ + "pkg:npm/abbrev@1.1.1" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/LICENSE", + "type": "file", + "name": "LICENSE", + "base_name": "LICENSE", + "extension": "", + "size": 2011, + "date": "2017-09-28", + "sha1": "34d4249a8ef23970810fd3018b9399b1268dc052", + "md5": "e9c0b639498fbe60d17b10099aba77c0", + "sha256": "9e0d5c7989f7e9f07d7c4b158aceff270f235eb7464ace41c5e7b200834a43e0", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "matched_to": [ + "pkg:npm/abbrev@1.1.1" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/package.json", + "type": "file", + "name": "package.json", + "base_name": "package", + "extension": ".json", + "size": 509, + "date": "2017-09-28", + "sha1": "bfc3606e605421f81936c77f156694df03ea1f55", + "md5": "09144e5559c19012a5ad2b1cb548f188", + "sha256": "5bcbdff71c063d5177f25fd3a5c7a6c2a9d565d968765ee3a1e73449dc0bc671", + "mime_type": "application/json", + "file_type": "JSON data", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "matched_to": [ + "pkg:npm/abbrev@1.1.1" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/README.md", + "type": "file", + "name": "README.md", + "base_name": "README", + "extension": ".md", + "size": 499, + "date": "2010-10-04", + "sha1": "c520bc857ec612ed88e13d794c47882d5aed3286", + "md5": "96b93093abdfdfef1ef8a3e2d5ca7f71", + "sha256": "2581765d44e15c58a2b88ad7bc9cc5c9ee029b4b5013c06dc45d9e94e8cb2ba4", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "matched_to": [ + "pkg:npm/abbrev@1.1.1" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + } + ] +} \ No newline at end of file diff --git a/matchcode/tests/testfiles/match/directory-matching/abbrev-1.1.1-i.json b/matchcode/tests/testfiles/match/directory-matching/abbrev-1.1.1-i.json new file mode 100644 index 00000000..46de30fb --- /dev/null +++ b/matchcode/tests/testfiles/match/directory-matching/abbrev-1.1.1-i.json @@ -0,0 +1,161 @@ +{ + "headers": [ + { + "tool_name": "scancode-toolkit", + "tool_version": "31.2.2", + "options": { + "input": [ + "package" + ], + "--info": true, + "--json-pp": "./abbrev-1.1.1.tgz-i.json" + }, + "notice": "Generated with ScanCode and provided on an \"AS IS\" BASIS, WITHOUT WARRANTIES\nOR CONDITIONS OF ANY KIND, either express or implied. No content created from\nScanCode should be considered or used as legal advice. Consult an Attorney\nfor any legal advice.\nScanCode is a free software code scanning tool from nexB Inc. and others.\nVisit https://github.com/nexB/scancode-toolkit/ for support and download.", + "start_timestamp": "2022-12-02T011509.963900", + "end_timestamp": "2022-12-02T011510.119020", + "output_format_version": "2.0.0", + "duration": 0.1551363468170166, + "message": null, + "errors": [], + "warnings": [], + "extra_data": { + "system_environment": { + "operating_system": "linux", + "cpu_architecture": "64", + "platform": "Linux-5.15.39-3-pve-x86_64-with-glibc2.35", + "platform_version": "#2 SMP PVE 5.15.39-3 (Wed, 27 Jul 2022 13:45:39 +0200)", + "python_version": "3.10.5 (main, Jul 30 2022, 06:09:26) [GCC 9.4.0]" + }, + "spdx_license_list_version": "3.17", + "files_count": 4 + } + } + ], + "files": [ + { + "path": "package", + "type": "directory", + "name": "package", + "base_name": "package", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "sha256": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 4, + "dirs_count": 0, + "size_count": 4782, + "scan_errors": [] + }, + { + "path": "package/abbrev.js", + "type": "file", + "name": "abbrev.js", + "base_name": "abbrev", + "extension": ".js", + "size": 1763, + "date": "2017-02-14", + "sha1": "e33940719dacc3ea04a0fb3efd7f5a57987b6257", + "md5": "295cdcca75c99f4bc11113aca4cc9dac", + "sha256": "77e68ed8bb552a11a5ece29800e0afe34bcc098d14a1b88dd44273f68be43943", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": "JavaScript", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/LICENSE", + "type": "file", + "name": "LICENSE", + "base_name": "LICENSE", + "extension": "", + "size": 2011, + "date": "2017-09-28", + "sha1": "34d4249a8ef23970810fd3018b9399b1268dc052", + "md5": "e9c0b639498fbe60d17b10099aba77c0", + "sha256": "9e0d5c7989f7e9f07d7c4b158aceff270f235eb7464ace41c5e7b200834a43e0", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/package.json", + "type": "file", + "name": "package.json", + "base_name": "package", + "extension": ".json", + "size": 509, + "date": "2017-09-28", + "sha1": "bfc3606e605421f81936c77f156694df03ea1f55", + "md5": "09144e5559c19012a5ad2b1cb548f188", + "sha256": "5bcbdff71c063d5177f25fd3a5c7a6c2a9d565d968765ee3a1e73449dc0bc671", + "mime_type": "application/json", + "file_type": "JSON data", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/README.md", + "type": "file", + "name": "README.md", + "base_name": "README", + "extension": ".md", + "size": 499, + "date": "2010-10-04", + "sha1": "c520bc857ec612ed88e13d794c47882d5aed3286", + "md5": "96b93093abdfdfef1ef8a3e2d5ca7f71", + "sha256": "2581765d44e15c58a2b88ad7bc9cc5c9ee029b4b5013c06dc45d9e94e8cb2ba4", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + } + ] +} \ No newline at end of file diff --git a/matchcode/tests/testfiles/match/directory-matching/get-stdin-3.0.2-i-expected.json b/matchcode/tests/testfiles/match/directory-matching/get-stdin-3.0.2-i-expected.json new file mode 100644 index 00000000..dbfe47b0 --- /dev/null +++ b/matchcode/tests/testfiles/match/directory-matching/get-stdin-3.0.2-i-expected.json @@ -0,0 +1,108 @@ +{ + "files": [ + { + "path": "package", + "type": "directory", + "name": "package", + "base_name": "package", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "sha256": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "matched_to": [], + "files_count": 3, + "dirs_count": 0, + "size_count": 1782, + "scan_errors": [] + }, + { + "path": "package/index.js", + "type": "file", + "name": "index.js", + "base_name": "index", + "extension": ".js", + "size": 584, + "date": "2014-11-23", + "sha1": "7ccb344f96ee52fe55c74b68b9353b6f298334a7", + "md5": "2c54a36c455bbca429cac637bfe67062", + "sha256": "88e96e809108df09b61c5b6cbe9583301f4948ab9187c39807c73af7b331f047", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": "JavaScript", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + "matched_to": [], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/package.json", + "type": "file", + "name": "package.json", + "base_name": "package", + "extension": ".json", + "size": 627, + "date": "2014-11-23", + "sha1": "823909c69f1eef7b852501345601a2b7573f4e63", + "md5": "a8a5350c8c0efd5eab31a301fd8a500e", + "sha256": "da3c66ea4c17a9122c246b769fb8370e963bb19a8e334cc486a70ae40ccb6c80", + "mime_type": "application/json", + "file_type": "JSON data", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "matched_to": [], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/readme.md", + "type": "file", + "name": "readme.md", + "base_name": "readme", + "extension": ".md", + "size": 571, + "date": "2014-08-17", + "sha1": "463253693f496ad1b7e3d7504489d24625a33859", + "md5": "66e9c35acf0e22691b69f94f59f99edb", + "sha256": "337197b8f15bbe4c358129613d4de52fa1ef94b1613fa9292c0cef9c167595ce", + "mime_type": "text/plain", + "file_type": "UTF-8 Unicode text", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "matched_to": [], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + } + ] +} \ No newline at end of file diff --git a/matchcode/tests/testfiles/match/directory-matching/get-stdin-3.0.2-i.json b/matchcode/tests/testfiles/match/directory-matching/get-stdin-3.0.2-i.json new file mode 100644 index 00000000..dce41217 --- /dev/null +++ b/matchcode/tests/testfiles/match/directory-matching/get-stdin-3.0.2-i.json @@ -0,0 +1,136 @@ +{ + "headers": [ + { + "tool_name": "scancode-toolkit", + "tool_version": "31.2.2", + "options": { + "input": [ + "package/" + ], + "--info": true, + "--json-pp": "get-stdin-3.0.2-i.json" + }, + "notice": "Generated with ScanCode and provided on an \"AS IS\" BASIS, WITHOUT WARRANTIES\nOR CONDITIONS OF ANY KIND, either express or implied. No content created from\nScanCode should be considered or used as legal advice. Consult an Attorney\nfor any legal advice.\nScanCode is a free software code scanning tool from nexB Inc. and others.\nVisit https://github.com/nexB/scancode-toolkit/ for support and download.", + "start_timestamp": "2022-12-02T012403.760550", + "end_timestamp": "2022-12-02T012403.875478", + "output_format_version": "2.0.0", + "duration": 0.1149442195892334, + "message": null, + "errors": [], + "warnings": [], + "extra_data": { + "system_environment": { + "operating_system": "linux", + "cpu_architecture": "64", + "platform": "Linux-5.15.39-3-pve-x86_64-with-glibc2.35", + "platform_version": "#2 SMP PVE 5.15.39-3 (Wed, 27 Jul 2022 13:45:39 +0200)", + "python_version": "3.10.5 (main, Jul 30 2022, 06:09:26) [GCC 9.4.0]" + }, + "spdx_license_list_version": "3.17", + "files_count": 3 + } + } + ], + "files": [ + { + "path": "package", + "type": "directory", + "name": "package", + "base_name": "package", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "sha256": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 3, + "dirs_count": 0, + "size_count": 1782, + "scan_errors": [] + }, + { + "path": "package/index.js", + "type": "file", + "name": "index.js", + "base_name": "index", + "extension": ".js", + "size": 584, + "date": "2014-11-23", + "sha1": "7ccb344f96ee52fe55c74b68b9353b6f298334a7", + "md5": "2c54a36c455bbca429cac637bfe67062", + "sha256": "88e96e809108df09b61c5b6cbe9583301f4948ab9187c39807c73af7b331f047", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": "JavaScript", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/package.json", + "type": "file", + "name": "package.json", + "base_name": "package", + "extension": ".json", + "size": 627, + "date": "2014-11-23", + "sha1": "823909c69f1eef7b852501345601a2b7573f4e63", + "md5": "a8a5350c8c0efd5eab31a301fd8a500e", + "sha256": "da3c66ea4c17a9122c246b769fb8370e963bb19a8e334cc486a70ae40ccb6c80", + "mime_type": "application/json", + "file_type": "JSON data", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/readme.md", + "type": "file", + "name": "readme.md", + "base_name": "readme", + "extension": ".md", + "size": 571, + "date": "2014-08-17", + "sha1": "463253693f496ad1b7e3d7504489d24625a33859", + "md5": "66e9c35acf0e22691b69f94f59f99edb", + "sha256": "337197b8f15bbe4c358129613d4de52fa1ef94b1613fa9292c0cef9c167595ce", + "mime_type": "text/plain", + "file_type": "UTF-8 Unicode text", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + } + ] +} \ No newline at end of file diff --git a/matchcode/tests/testfiles/match/nested/nested-expected.json b/matchcode/tests/testfiles/match/nested/nested-expected.json new file mode 100644 index 00000000..37868940 --- /dev/null +++ b/matchcode/tests/testfiles/match/nested/nested-expected.json @@ -0,0 +1,650 @@ +{ + "files": [ + { + "path": "nested", + "type": "directory", + "name": "nested", + "base_name": "nested", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "sha256": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "packages": [], + "matched_to": [], + "files_count": 11, + "dirs_count": 5, + "size_count": 244503, + "scan_errors": [] + }, + { + "path": "nested/underscore", + "type": "directory", + "name": "underscore", + "base_name": "underscore", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "sha256": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "packages": [], + "matched_to": [], + "files_count": 11, + "dirs_count": 4, + "size_count": 244503, + "scan_errors": [] + }, + { + "path": "nested/underscore/index.d.ts", + "type": "file", + "name": "index.d.ts", + "base_name": "index.d", + "extension": ".ts", + "size": 212101, + "date": "2020-07-15", + "sha1": "4f2b85857c3a162c5d536e342ba417fa6c03d40a", + "md5": "ae0acb15531b2efe7253a9bd1fee1b86", + "sha256": "92c3d4b6cf13af286895b484b680b314be34a62a41d1440a2d62d5d5cf0e93b3", + "mime_type": "text/plain", + "file_type": "UTF-8 Unicode text, with very long lines", + "programming_language": "TypeScript", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + "packages": [], + "matched_to": [], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "nested/underscore/LICENSE", + "type": "file", + "name": "LICENSE", + "base_name": "LICENSE", + "extension": "", + "size": 1141, + "date": "2020-07-15", + "sha1": "689ec0681815ecc32bee639c68e7740add7bd301", + "md5": "d4a904ca135bb7bc912156fee12726f0", + "sha256": "c2cfccb812fe482101a8f04597dfc5a9991a6b2748266c47ac91b6a5aae15383", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "packages": [], + "matched_to": [], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "nested/underscore/package.json", + "type": "file", + "name": "package.json", + "base_name": "package", + "extension": ".json", + "size": 1940, + "date": "2020-07-15", + "sha1": "e751d5da4a71b9eaec5114e8a5b5eceef15d0b4d", + "md5": "a8b2dc907f99ba2e34a97954075d9b8f", + "sha256": "31e806866fe2800471fd3544fc17ed77f9b58885ff8f0590d3dc8d623b897206", + "mime_type": "application/json", + "file_type": "JSON data", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "packages": [ + { + "type": "npm", + "namespace": "@types", + "name": "underscore", + "version": "1.10.9", + "qualifiers": {}, + "subpath": null, + "primary_language": "JavaScript", + "description": "TypeScript definitions for Underscore", + "release_date": null, + "parties": [ + { + "type": "person", + "role": "contributor", + "name": "Boris Yankov", + "email": null, + "url": "https://github.com/borisyankov" + }, + { + "type": "person", + "role": "contributor", + "name": "Josh Baldwin", + "email": null, + "url": "https://github.com/jbaldwin" + }, + { + "type": "person", + "role": "contributor", + "name": "Christopher Currens", + "email": null, + "url": "https://github.com/ccurrens" + }, + { + "type": "person", + "role": "contributor", + "name": "Ard Timmerman", + "email": null, + "url": "https://github.com/confususs" + }, + { + "type": "person", + "role": "contributor", + "name": "Julian Gonggrijp", + "email": null, + "url": "https://github.com/jgonggrijp" + }, + { + "type": "person", + "role": "contributor", + "name": "Florian Keller", + "email": null, + "url": "https://github.com/ffflorian" + }, + { + "type": "person", + "role": "contributor", + "name": "Regev Brody", + "email": null, + "url": "https://github.com/regevbr" + }, + { + "type": "person", + "role": "contributor", + "name": "Piotr B\u0142a\u017cejewicz", + "email": null, + "url": "https://github.com/peterblazejewicz" + }, + { + "type": "person", + "role": "contributor", + "name": "Michael Ness", + "email": null, + "url": "https://github.com/reubenrybnik" + } + ], + "keywords": [], + "homepage_url": null, + "download_url": "https://registry.npmjs.org/@types/underscore/-/underscore-1.10.9.tgz", + "size": null, + "sha1": null, + "md5": null, + "sha256": null, + "sha512": null, + "bug_tracking_url": null, + "code_view_url": null, + "vcs_url": "git+https://github.com/DefinitelyTyped/DefinitelyTyped.git", + "copyright": null, + "license_expression": "mit", + "declared_license": [ + "MIT" + ], + "notice_text": null, + "root_path": "nested/underscore", + "dependencies": [], + "contains_source_code": null, + "source_packages": [], + "purl": "pkg:npm/%40types/underscore@1.10.9", + "repository_homepage_url": "https://www.npmjs.com/package/@types/underscore", + "repository_download_url": "https://registry.npmjs.org/@types/underscore/-/underscore-1.10.9.tgz", + "api_data_url": "https://registry.npmjs.org/@types%2funderscore" + } + ], + "matched_to": [], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "nested/underscore/README.md", + "type": "file", + "name": "README.md", + "base_name": "README", + "extension": ".md", + "size": 889, + "date": "2020-07-15", + "sha1": "0a9a39ef14ee88de5e8c4ed6aaeef3ee64f9421f", + "md5": "c8223d72fe3e5f79ac77f9566e7e8e28", + "sha256": "abd4f29b3bf6e302720577ad706f15f68fa13ff227aeeb9e1b091d66e54bc5d9", + "mime_type": "text/plain", + "file_type": "UTF-8 Unicode text, with very long lines, with CRLF line terminators", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "packages": [], + "matched_to": [], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "nested/underscore/node_modules", + "type": "directory", + "name": "node_modules", + "base_name": "node_modules", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "sha256": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "packages": [], + "matched_to": [], + "files_count": 7, + "dirs_count": 3, + "size_count": 28432, + "scan_errors": [] + }, + { + "path": "nested/underscore/node_modules/package", + "type": "directory", + "name": "package", + "base_name": "package", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "sha256": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "packages": [], + "matched_to": [ + "pkg:npm/plugin-request@2.4.1" + ], + "files_count": 7, + "dirs_count": 2, + "size_count": 28432, + "scan_errors": [] + }, + { + "path": "nested/underscore/node_modules/package/CHANGELOG.md", + "type": "file", + "name": "CHANGELOG.md", + "base_name": "CHANGELOG", + "extension": ".md", + "size": 3309, + "date": "1985-10-26", + "sha1": "79625a8ec840826bbab4e6658c53c11ea950fcd3", + "md5": "37600475d22e98ae0e8e1e606e2991e7", + "sha256": "33c8629e8ddd3817357bf31d68b76379cbea6f7588b63df152b0d57f7fdd393b", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "packages": [], + "matched_to": [ + "pkg:npm/plugin-request@2.4.1" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "nested/underscore/node_modules/package/package.json", + "type": "file", + "name": "package.json", + "base_name": "package", + "extension": ".json", + "size": 771, + "date": "1985-10-26", + "sha1": "33f98f5756ed1098e7242883aaef2b74dfa06e8d", + "md5": "998d31468d34220e7daf05d41196767c", + "sha256": "15fa5153db3daa38c6bc1d5c5cb73d34064c1c0426cfccb91ea62735a95aedaa", + "mime_type": "application/json", + "file_type": "JSON data", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "packages": [ + { + "type": "npm", + "namespace": "@umijs", + "name": "plugin-request", + "version": "2.4.1", + "qualifiers": {}, + "subpath": null, + "primary_language": "JavaScript", + "description": "@umijs/plugin-request", + "release_date": null, + "parties": [], + "keywords": [ + "umi" + ], + "homepage_url": "https://github.com/umijs/plugins/tree/master/packages/plugin-request#readme", + "download_url": "https://registry.npmjs.org/@umijs/plugin-request/-/plugin-request-2.4.1.tgz", + "size": null, + "sha1": null, + "md5": null, + "sha256": null, + "sha512": null, + "bug_tracking_url": "http://github.com/umijs/plugins/issues", + "code_view_url": null, + "vcs_url": "git+https://github.com/umijs/plugins", + "copyright": null, + "license_expression": "mit", + "declared_license": [ + "MIT" + ], + "notice_text": null, + "root_path": "nested/underscore/node_modules/package", + "dependencies": [ + { + "purl": "pkg:npm/%40ahooksjs/use-request", + "requirement": "^2.0.0", + "scope": "dependencies", + "is_runtime": true, + "is_optional": false, + "is_resolved": false + }, + { + "purl": "pkg:npm/umi-request", + "requirement": "^1.2.14", + "scope": "dependencies", + "is_runtime": true, + "is_optional": false, + "is_resolved": false + }, + { + "purl": "pkg:npm/umi", + "requirement": "3.x", + "scope": "peerDependencies", + "is_runtime": true, + "is_optional": false, + "is_resolved": false + } + ], + "contains_source_code": null, + "source_packages": [], + "purl": "pkg:npm/%40umijs/plugin-request@2.4.1", + "repository_homepage_url": "https://www.npmjs.com/package/@umijs/plugin-request", + "repository_download_url": "https://registry.npmjs.org/@umijs/plugin-request/-/plugin-request-2.4.1.tgz", + "api_data_url": "https://registry.npmjs.org/@umijs%2fplugin-request" + } + ], + "matched_to": [ + "pkg:npm/plugin-request@2.4.1" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "nested/underscore/node_modules/package/README.md", + "type": "file", + "name": "README.md", + "base_name": "README", + "extension": ".md", + "size": 307, + "date": "1985-10-26", + "sha1": "fac8486d1aef8658ae89d37c70b0edb332262109", + "md5": "a3cb10e52239977c38a699ef518dc8d4", + "sha256": "886cd0e0fafc00de3f8726a27920e449f09767dbf19aa14b4e65dc1bbdb168c7", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "packages": [], + "matched_to": [ + "pkg:npm/plugin-request@2.4.1" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "nested/underscore/node_modules/package/lib", + "type": "directory", + "name": "lib", + "base_name": "lib", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "sha256": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "packages": [], + "matched_to": [ + "pkg:npm/plugin-request@2.4.1" + ], + "files_count": 2, + "dirs_count": 0, + "size_count": 13964, + "scan_errors": [] + }, + { + "path": "nested/underscore/node_modules/package/lib/index.js", + "type": "file", + "name": "index.js", + "base_name": "index", + "extension": ".js", + "size": 2453, + "date": "1985-10-26", + "sha1": "aecb2790e91e0eb8f1f8608c4ac7a689a9a28467", + "md5": "0a4e5a9b46acd86352121a86f2ad6ca0", + "sha256": "8f3e85de5fec26a6c76b17170fa3671f2f0ff05efe7751339ca41a2f90b9a1b0", + "mime_type": "text/x-java", + "file_type": "Java source, UTF-8 Unicode text, with very long lines", + "programming_language": "JavaScript", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + "packages": [], + "matched_to": [ + "pkg:npm/plugin-request@2.4.1" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "nested/underscore/node_modules/package/lib/request.js", + "type": "file", + "name": "request.js", + "base_name": "request", + "extension": ".js", + "size": 11511, + "date": "1985-10-26", + "sha1": "e59e280693fc62d2a7e902475e5e13d3bf62d476", + "md5": "e00da7bc6c7649c392bc9e20cd779167", + "sha256": "9ae43cf04cc4babc3d2d6dd54f6584da5c2749018e64d396d1e06181d3c4b276", + "mime_type": "text/plain", + "file_type": "UTF-8 Unicode text, with very long lines", + "programming_language": "JavaScript", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + "packages": [], + "matched_to": [ + "pkg:npm/plugin-request@2.4.1" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "nested/underscore/node_modules/package/src", + "type": "directory", + "name": "src", + "base_name": "src", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "sha256": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "packages": [], + "matched_to": [ + "pkg:npm/plugin-request@2.4.1" + ], + "files_count": 2, + "dirs_count": 0, + "size_count": 10081, + "scan_errors": [] + }, + { + "path": "nested/underscore/node_modules/package/src/index.ts", + "type": "file", + "name": "index.ts", + "base_name": "index", + "extension": ".ts", + "size": 2138, + "date": "1985-10-26", + "sha1": "00216c5fd157602a28b45530c6badb388405820d", + "md5": "3fa996e7e53a7447bd5e8d3455ac29b6", + "sha256": "d1d94851002539609e3b20a36594d25f10a6a8d9bd42d2e7c6b3d4f243ab52e4", + "mime_type": "text/x-java", + "file_type": "Java source, UTF-8 Unicode text", + "programming_language": "TypeScript", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + "packages": [], + "matched_to": [ + "pkg:npm/plugin-request@2.4.1" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "nested/underscore/node_modules/package/src/request.ts", + "type": "file", + "name": "request.ts", + "base_name": "request", + "extension": ".ts", + "size": 7943, + "date": "1985-10-26", + "sha1": "c6b37045e7919219efffd83894df441e47c87cfb", + "md5": "7a77806ca3e1452cc7eb5d01a35f6264", + "sha256": "25bbbc1924dcb7a726142be254d12ace5379fbee9bf37e20a7ce300bb01030cb", + "mime_type": "text/x-java", + "file_type": "Java source, UTF-8 Unicode text", + "programming_language": "TypeScript", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + "packages": [], + "matched_to": [ + "pkg:npm/plugin-request@2.4.1" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + } + ] +} \ No newline at end of file diff --git a/matchcode/tests/testfiles/match/nested/nested.json b/matchcode/tests/testfiles/match/nested/nested.json new file mode 100644 index 00000000..c783810c --- /dev/null +++ b/matchcode/tests/testfiles/match/nested/nested.json @@ -0,0 +1,637 @@ +{ + "headers": [ + { + "tool_name": "scancode-toolkit", + "tool_version": "3.2.2rc3", + "options": { + "input": [ + "/tmp/test/nested/" + ], + "--info": true, + "--json-pp": "/tmp/test/nested.json", + "--package": true, + "--processes": "2" + }, + "notice": "Generated with ScanCode and provided on an \"AS IS\" BASIS, WITHOUT WARRANTIES\nOR CONDITIONS OF ANY KIND, either express or implied. No content created from\nScanCode should be considered or used as legal advice. Consult an Attorney\nfor any legal advice.\nScanCode is a free software code scanning tool from nexB Inc. and others.\nVisit https://github.com/nexB/scancode-toolkit/ for support and download.", + "start_timestamp": "2020-11-07T201514.714422", + "end_timestamp": "2020-11-07T201519.082743", + "duration": 4.36833381652832, + "message": null, + "errors": [], + "extra_data": { + "files_count": 11 + } + } + ], + "files": [ + { + "path": "nested", + "type": "directory", + "name": "nested", + "base_name": "nested", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "sha256": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "packages": [], + "files_count": 11, + "dirs_count": 5, + "size_count": 244503, + "scan_errors": [] + }, + { + "path": "nested/underscore", + "type": "directory", + "name": "underscore", + "base_name": "underscore", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "sha256": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "packages": [], + "files_count": 11, + "dirs_count": 4, + "size_count": 244503, + "scan_errors": [] + }, + { + "path": "nested/underscore/index.d.ts", + "type": "file", + "name": "index.d.ts", + "base_name": "index.d", + "extension": ".ts", + "size": 212101, + "date": "2020-07-15", + "sha1": "4f2b85857c3a162c5d536e342ba417fa6c03d40a", + "md5": "ae0acb15531b2efe7253a9bd1fee1b86", + "sha256": "92c3d4b6cf13af286895b484b680b314be34a62a41d1440a2d62d5d5cf0e93b3", + "mime_type": "text/plain", + "file_type": "UTF-8 Unicode text, with very long lines", + "programming_language": "TypeScript", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + "packages": [], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "nested/underscore/LICENSE", + "type": "file", + "name": "LICENSE", + "base_name": "LICENSE", + "extension": "", + "size": 1141, + "date": "2020-07-15", + "sha1": "689ec0681815ecc32bee639c68e7740add7bd301", + "md5": "d4a904ca135bb7bc912156fee12726f0", + "sha256": "c2cfccb812fe482101a8f04597dfc5a9991a6b2748266c47ac91b6a5aae15383", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "packages": [], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "nested/underscore/package.json", + "type": "file", + "name": "package.json", + "base_name": "package", + "extension": ".json", + "size": 1940, + "date": "2020-07-15", + "sha1": "e751d5da4a71b9eaec5114e8a5b5eceef15d0b4d", + "md5": "a8b2dc907f99ba2e34a97954075d9b8f", + "sha256": "31e806866fe2800471fd3544fc17ed77f9b58885ff8f0590d3dc8d623b897206", + "mime_type": "application/json", + "file_type": "JSON data", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "packages": [ + { + "type": "npm", + "namespace": "@types", + "name": "underscore", + "version": "1.10.9", + "qualifiers": {}, + "subpath": null, + "primary_language": "JavaScript", + "description": "TypeScript definitions for Underscore", + "release_date": null, + "parties": [ + { + "type": "person", + "role": "contributor", + "name": "Boris Yankov", + "email": null, + "url": "https://github.com/borisyankov" + }, + { + "type": "person", + "role": "contributor", + "name": "Josh Baldwin", + "email": null, + "url": "https://github.com/jbaldwin" + }, + { + "type": "person", + "role": "contributor", + "name": "Christopher Currens", + "email": null, + "url": "https://github.com/ccurrens" + }, + { + "type": "person", + "role": "contributor", + "name": "Ard Timmerman", + "email": null, + "url": "https://github.com/confususs" + }, + { + "type": "person", + "role": "contributor", + "name": "Julian Gonggrijp", + "email": null, + "url": "https://github.com/jgonggrijp" + }, + { + "type": "person", + "role": "contributor", + "name": "Florian Keller", + "email": null, + "url": "https://github.com/ffflorian" + }, + { + "type": "person", + "role": "contributor", + "name": "Regev Brody", + "email": null, + "url": "https://github.com/regevbr" + }, + { + "type": "person", + "role": "contributor", + "name": "Piotr B\u0142a\u017cejewicz", + "email": null, + "url": "https://github.com/peterblazejewicz" + }, + { + "type": "person", + "role": "contributor", + "name": "Michael Ness", + "email": null, + "url": "https://github.com/reubenrybnik" + } + ], + "keywords": [], + "homepage_url": null, + "download_url": "https://registry.npmjs.org/@types/underscore/-/underscore-1.10.9.tgz", + "size": null, + "sha1": null, + "md5": null, + "sha256": null, + "sha512": null, + "bug_tracking_url": null, + "code_view_url": null, + "vcs_url": "git+https://github.com/DefinitelyTyped/DefinitelyTyped.git", + "copyright": null, + "license_expression": "mit", + "declared_license": [ + "MIT" + ], + "notice_text": null, + "root_path": "nested/underscore", + "dependencies": [], + "contains_source_code": null, + "source_packages": [], + "purl": "pkg:npm/%40types/underscore@1.10.9", + "repository_homepage_url": "https://www.npmjs.com/package/@types/underscore", + "repository_download_url": "https://registry.npmjs.org/@types/underscore/-/underscore-1.10.9.tgz", + "api_data_url": "https://registry.npmjs.org/@types%2funderscore" + } + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "nested/underscore/README.md", + "type": "file", + "name": "README.md", + "base_name": "README", + "extension": ".md", + "size": 889, + "date": "2020-07-15", + "sha1": "0a9a39ef14ee88de5e8c4ed6aaeef3ee64f9421f", + "md5": "c8223d72fe3e5f79ac77f9566e7e8e28", + "sha256": "abd4f29b3bf6e302720577ad706f15f68fa13ff227aeeb9e1b091d66e54bc5d9", + "mime_type": "text/plain", + "file_type": "UTF-8 Unicode text, with very long lines, with CRLF line terminators", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "packages": [], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "nested/underscore/node_modules", + "type": "directory", + "name": "node_modules", + "base_name": "node_modules", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "sha256": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "packages": [], + "files_count": 7, + "dirs_count": 3, + "size_count": 28432, + "scan_errors": [] + }, + { + "path": "nested/underscore/node_modules/package", + "type": "directory", + "name": "package", + "base_name": "package", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "sha256": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "packages": [], + "files_count": 7, + "dirs_count": 2, + "size_count": 28432, + "scan_errors": [] + }, + { + "path": "nested/underscore/node_modules/package/CHANGELOG.md", + "type": "file", + "name": "CHANGELOG.md", + "base_name": "CHANGELOG", + "extension": ".md", + "size": 3309, + "date": "1985-10-26", + "sha1": "79625a8ec840826bbab4e6658c53c11ea950fcd3", + "md5": "37600475d22e98ae0e8e1e606e2991e7", + "sha256": "33c8629e8ddd3817357bf31d68b76379cbea6f7588b63df152b0d57f7fdd393b", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "packages": [], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "nested/underscore/node_modules/package/package.json", + "type": "file", + "name": "package.json", + "base_name": "package", + "extension": ".json", + "size": 771, + "date": "1985-10-26", + "sha1": "33f98f5756ed1098e7242883aaef2b74dfa06e8d", + "md5": "998d31468d34220e7daf05d41196767c", + "sha256": "15fa5153db3daa38c6bc1d5c5cb73d34064c1c0426cfccb91ea62735a95aedaa", + "mime_type": "application/json", + "file_type": "JSON data", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "packages": [ + { + "type": "npm", + "namespace": "@umijs", + "name": "plugin-request", + "version": "2.4.1", + "qualifiers": {}, + "subpath": null, + "primary_language": "JavaScript", + "description": "@umijs/plugin-request", + "release_date": null, + "parties": [], + "keywords": [ + "umi" + ], + "homepage_url": "https://github.com/umijs/plugins/tree/master/packages/plugin-request#readme", + "download_url": "https://registry.npmjs.org/@umijs/plugin-request/-/plugin-request-2.4.1.tgz", + "size": null, + "sha1": null, + "md5": null, + "sha256": null, + "sha512": null, + "bug_tracking_url": "http://github.com/umijs/plugins/issues", + "code_view_url": null, + "vcs_url": "git+https://github.com/umijs/plugins", + "copyright": null, + "license_expression": "mit", + "declared_license": [ + "MIT" + ], + "notice_text": null, + "root_path": "nested/underscore/node_modules/package", + "dependencies": [ + { + "purl": "pkg:npm/%40ahooksjs/use-request", + "requirement": "^2.0.0", + "scope": "dependencies", + "is_runtime": true, + "is_optional": false, + "is_resolved": false + }, + { + "purl": "pkg:npm/umi-request", + "requirement": "^1.2.14", + "scope": "dependencies", + "is_runtime": true, + "is_optional": false, + "is_resolved": false + }, + { + "purl": "pkg:npm/umi", + "requirement": "3.x", + "scope": "peerDependencies", + "is_runtime": true, + "is_optional": false, + "is_resolved": false + } + ], + "contains_source_code": null, + "source_packages": [], + "purl": "pkg:npm/%40umijs/plugin-request@2.4.1", + "repository_homepage_url": "https://www.npmjs.com/package/@umijs/plugin-request", + "repository_download_url": "https://registry.npmjs.org/@umijs/plugin-request/-/plugin-request-2.4.1.tgz", + "api_data_url": "https://registry.npmjs.org/@umijs%2fplugin-request" + } + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "nested/underscore/node_modules/package/README.md", + "type": "file", + "name": "README.md", + "base_name": "README", + "extension": ".md", + "size": 307, + "date": "1985-10-26", + "sha1": "fac8486d1aef8658ae89d37c70b0edb332262109", + "md5": "a3cb10e52239977c38a699ef518dc8d4", + "sha256": "886cd0e0fafc00de3f8726a27920e449f09767dbf19aa14b4e65dc1bbdb168c7", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "packages": [], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "nested/underscore/node_modules/package/lib", + "type": "directory", + "name": "lib", + "base_name": "lib", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "sha256": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "packages": [], + "files_count": 2, + "dirs_count": 0, + "size_count": 13964, + "scan_errors": [] + }, + { + "path": "nested/underscore/node_modules/package/lib/index.js", + "type": "file", + "name": "index.js", + "base_name": "index", + "extension": ".js", + "size": 2453, + "date": "1985-10-26", + "sha1": "aecb2790e91e0eb8f1f8608c4ac7a689a9a28467", + "md5": "0a4e5a9b46acd86352121a86f2ad6ca0", + "sha256": "8f3e85de5fec26a6c76b17170fa3671f2f0ff05efe7751339ca41a2f90b9a1b0", + "mime_type": "text/x-java", + "file_type": "Java source, UTF-8 Unicode text, with very long lines", + "programming_language": "JavaScript", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + "packages": [], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "nested/underscore/node_modules/package/lib/request.js", + "type": "file", + "name": "request.js", + "base_name": "request", + "extension": ".js", + "size": 11511, + "date": "1985-10-26", + "sha1": "e59e280693fc62d2a7e902475e5e13d3bf62d476", + "md5": "e00da7bc6c7649c392bc9e20cd779167", + "sha256": "9ae43cf04cc4babc3d2d6dd54f6584da5c2749018e64d396d1e06181d3c4b276", + "mime_type": "text/plain", + "file_type": "UTF-8 Unicode text, with very long lines", + "programming_language": "JavaScript", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + "packages": [], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "nested/underscore/node_modules/package/src", + "type": "directory", + "name": "src", + "base_name": "src", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "sha256": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "packages": [], + "files_count": 2, + "dirs_count": 0, + "size_count": 10081, + "scan_errors": [] + }, + { + "path": "nested/underscore/node_modules/package/src/index.ts", + "type": "file", + "name": "index.ts", + "base_name": "index", + "extension": ".ts", + "size": 2138, + "date": "1985-10-26", + "sha1": "00216c5fd157602a28b45530c6badb388405820d", + "md5": "3fa996e7e53a7447bd5e8d3455ac29b6", + "sha256": "d1d94851002539609e3b20a36594d25f10a6a8d9bd42d2e7c6b3d4f243ab52e4", + "mime_type": "text/x-java", + "file_type": "Java source, UTF-8 Unicode text", + "programming_language": "TypeScript", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + "packages": [], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "nested/underscore/node_modules/package/src/request.ts", + "type": "file", + "name": "request.ts", + "base_name": "request", + "extension": ".ts", + "size": 7943, + "date": "1985-10-26", + "sha1": "c6b37045e7919219efffd83894df441e47c87cfb", + "md5": "7a77806ca3e1452cc7eb5d01a35f6264", + "sha256": "25bbbc1924dcb7a726142be254d12ace5379fbee9bf37e20a7ce300bb01030cb", + "mime_type": "text/x-java", + "file_type": "Java source, UTF-8 Unicode text", + "programming_language": "TypeScript", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + "packages": [], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + } + ] +} \ No newline at end of file diff --git a/matchcode/tests/testfiles/match/nested/new-nested-expected.json b/matchcode/tests/testfiles/match/nested/new-nested-expected.json new file mode 100644 index 00000000..260f5581 --- /dev/null +++ b/matchcode/tests/testfiles/match/nested/new-nested-expected.json @@ -0,0 +1,691 @@ +{ + "files": [ + { + "path": "underscore", + "type": "directory", + "name": "underscore", + "base_name": "underscore", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "sha256": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "package_data": [], + "for_packages": [], + "packages": [], + "matched_to": [], + "files_count": 11, + "dirs_count": 4, + "size_count": 244503, + "scan_errors": [] + }, + { + "path": "underscore/index.d.ts", + "type": "file", + "name": "index.d.ts", + "base_name": "index.d", + "extension": ".ts", + "size": 212101, + "date": "2020-07-15", + "sha1": "4f2b85857c3a162c5d536e342ba417fa6c03d40a", + "md5": "ae0acb15531b2efe7253a9bd1fee1b86", + "sha256": "92c3d4b6cf13af286895b484b680b314be34a62a41d1440a2d62d5d5cf0e93b3", + "mime_type": "text/plain", + "file_type": "UTF-8 Unicode text, with very long lines", + "programming_language": "TypeScript", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + "package_data": [], + "for_packages": [ + "pkg:npm/%40types/underscore@1.10.9?uuid=420db78a-625f-4622-b1a0-93d1ea853194" + ], + "packages": [], + "matched_to": [], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "underscore/LICENSE", + "type": "file", + "name": "LICENSE", + "base_name": "LICENSE", + "extension": "", + "size": 1141, + "date": "2020-07-15", + "sha1": "689ec0681815ecc32bee639c68e7740add7bd301", + "md5": "d4a904ca135bb7bc912156fee12726f0", + "sha256": "c2cfccb812fe482101a8f04597dfc5a9991a6b2748266c47ac91b6a5aae15383", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "package_data": [], + "for_packages": [ + "pkg:npm/%40types/underscore@1.10.9?uuid=420db78a-625f-4622-b1a0-93d1ea853194" + ], + "packages": [], + "matched_to": [], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "underscore/package.json", + "type": "file", + "name": "package.json", + "base_name": "package", + "extension": ".json", + "size": 1940, + "date": "2020-07-15", + "sha1": "e751d5da4a71b9eaec5114e8a5b5eceef15d0b4d", + "md5": "a8b2dc907f99ba2e34a97954075d9b8f", + "sha256": "31e806866fe2800471fd3544fc17ed77f9b58885ff8f0590d3dc8d623b897206", + "mime_type": "application/json", + "file_type": "JSON data", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "package_data": [ + { + "type": "npm", + "namespace": "@types", + "name": "underscore", + "version": "1.10.9", + "qualifiers": {}, + "subpath": null, + "primary_language": "JavaScript", + "description": "TypeScript definitions for Underscore", + "release_date": null, + "parties": [ + { + "type": "person", + "role": "contributor", + "name": "Boris Yankov", + "email": null, + "url": "https://github.com/borisyankov" + }, + { + "type": "person", + "role": "contributor", + "name": "Josh Baldwin", + "email": null, + "url": "https://github.com/jbaldwin" + }, + { + "type": "person", + "role": "contributor", + "name": "Christopher Currens", + "email": null, + "url": "https://github.com/ccurrens" + }, + { + "type": "person", + "role": "contributor", + "name": "Ard Timmerman", + "email": null, + "url": "https://github.com/confususs" + }, + { + "type": "person", + "role": "contributor", + "name": "Julian Gonggrijp", + "email": null, + "url": "https://github.com/jgonggrijp" + }, + { + "type": "person", + "role": "contributor", + "name": "Florian Keller", + "email": null, + "url": "https://github.com/ffflorian" + }, + { + "type": "person", + "role": "contributor", + "name": "Regev Brody", + "email": null, + "url": "https://github.com/regevbr" + }, + { + "type": "person", + "role": "contributor", + "name": "Piotr B\u0142a\u017cejewicz", + "email": null, + "url": "https://github.com/peterblazejewicz" + }, + { + "type": "person", + "role": "contributor", + "name": "Michael Ness", + "email": null, + "url": "https://github.com/reubenrybnik" + } + ], + "keywords": [], + "homepage_url": null, + "download_url": "https://registry.npmjs.org/@types/underscore/-/underscore-1.10.9.tgz", + "size": null, + "sha1": null, + "md5": null, + "sha256": null, + "sha512": null, + "bug_tracking_url": null, + "code_view_url": null, + "vcs_url": "git+https://github.com/DefinitelyTyped/DefinitelyTyped.git", + "copyright": null, + "license_expression": "mit", + "declared_license": [ + "MIT" + ], + "notice_text": null, + "source_packages": [], + "file_references": [], + "extra_data": {}, + "dependencies": [], + "repository_homepage_url": "https://www.npmjs.com/package/@types/underscore", + "repository_download_url": "https://registry.npmjs.org/@types/underscore/-/underscore-1.10.9.tgz", + "api_data_url": "https://registry.npmjs.org/@types%2funderscore", + "datasource_id": "npm_package_json", + "purl": "pkg:npm/%40types/underscore@1.10.9" + } + ], + "for_packages": [ + "pkg:npm/%40types/underscore@1.10.9?uuid=420db78a-625f-4622-b1a0-93d1ea853194" + ], + "packages": [], + "matched_to": [], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "underscore/README.md", + "type": "file", + "name": "README.md", + "base_name": "README", + "extension": ".md", + "size": 889, + "date": "2020-07-15", + "sha1": "0a9a39ef14ee88de5e8c4ed6aaeef3ee64f9421f", + "md5": "c8223d72fe3e5f79ac77f9566e7e8e28", + "sha256": "abd4f29b3bf6e302720577ad706f15f68fa13ff227aeeb9e1b091d66e54bc5d9", + "mime_type": "text/plain", + "file_type": "UTF-8 Unicode text, with very long lines, with CRLF line terminators", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "package_data": [], + "for_packages": [ + "pkg:npm/%40types/underscore@1.10.9?uuid=420db78a-625f-4622-b1a0-93d1ea853194" + ], + "packages": [], + "matched_to": [], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "underscore/node_modules", + "type": "directory", + "name": "node_modules", + "base_name": "node_modules", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "sha256": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "package_data": [], + "for_packages": [], + "packages": [], + "matched_to": [ + "pkg:npm/plugin-request@2.4.1" + ], + "files_count": 7, + "dirs_count": 3, + "size_count": 28432, + "scan_errors": [] + }, + { + "path": "underscore/node_modules/package", + "type": "directory", + "name": "package", + "base_name": "package", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "sha256": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "package_data": [], + "for_packages": [], + "packages": [], + "matched_to": [ + "pkg:npm/plugin-request@2.4.1" + ], + "files_count": 7, + "dirs_count": 2, + "size_count": 28432, + "scan_errors": [] + }, + { + "path": "underscore/node_modules/package/CHANGELOG.md", + "type": "file", + "name": "CHANGELOG.md", + "base_name": "CHANGELOG", + "extension": ".md", + "size": 3309, + "date": "1985-10-26", + "sha1": "79625a8ec840826bbab4e6658c53c11ea950fcd3", + "md5": "37600475d22e98ae0e8e1e606e2991e7", + "sha256": "33c8629e8ddd3817357bf31d68b76379cbea6f7588b63df152b0d57f7fdd393b", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "package_data": [], + "for_packages": [ + "pkg:npm/%40umijs/plugin-request@2.4.1?uuid=923ada0a-c8f6-41f1-8b9c-04e2a8537ef4" + ], + "packages": [], + "matched_to": [ + "pkg:npm/plugin-request@2.4.1" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "underscore/node_modules/package/package.json", + "type": "file", + "name": "package.json", + "base_name": "package", + "extension": ".json", + "size": 771, + "date": "1985-10-26", + "sha1": "33f98f5756ed1098e7242883aaef2b74dfa06e8d", + "md5": "998d31468d34220e7daf05d41196767c", + "sha256": "15fa5153db3daa38c6bc1d5c5cb73d34064c1c0426cfccb91ea62735a95aedaa", + "mime_type": "application/json", + "file_type": "JSON data", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "package_data": [ + { + "type": "npm", + "namespace": "@umijs", + "name": "plugin-request", + "version": "2.4.1", + "qualifiers": {}, + "subpath": null, + "primary_language": "JavaScript", + "description": "@umijs/plugin-request", + "release_date": null, + "parties": [], + "keywords": [ + "umi" + ], + "homepage_url": "https://github.com/umijs/plugins/tree/master/packages/plugin-request#readme", + "download_url": "https://registry.npmjs.org/@umijs/plugin-request/-/plugin-request-2.4.1.tgz", + "size": null, + "sha1": null, + "md5": null, + "sha256": null, + "sha512": null, + "bug_tracking_url": "http://github.com/umijs/plugins/issues", + "code_view_url": null, + "vcs_url": "git+https://github.com/umijs/plugins", + "copyright": null, + "license_expression": "mit", + "declared_license": [ + "MIT" + ], + "notice_text": null, + "source_packages": [], + "file_references": [], + "extra_data": {}, + "dependencies": [ + { + "purl": "pkg:npm/%40ahooksjs/use-request", + "extracted_requirement": "^2.0.0", + "scope": "dependencies", + "is_runtime": true, + "is_optional": false, + "is_resolved": false, + "resolved_package": {}, + "extra_data": {} + }, + { + "purl": "pkg:npm/umi-request", + "extracted_requirement": "^1.2.14", + "scope": "dependencies", + "is_runtime": true, + "is_optional": false, + "is_resolved": false, + "resolved_package": {}, + "extra_data": {} + }, + { + "purl": "pkg:npm/umi", + "extracted_requirement": "3.x", + "scope": "peerDependencies", + "is_runtime": true, + "is_optional": false, + "is_resolved": false, + "resolved_package": {}, + "extra_data": {} + } + ], + "repository_homepage_url": "https://www.npmjs.com/package/@umijs/plugin-request", + "repository_download_url": "https://registry.npmjs.org/@umijs/plugin-request/-/plugin-request-2.4.1.tgz", + "api_data_url": "https://registry.npmjs.org/@umijs%2fplugin-request", + "datasource_id": "npm_package_json", + "purl": "pkg:npm/%40umijs/plugin-request@2.4.1" + } + ], + "for_packages": [ + "pkg:npm/%40umijs/plugin-request@2.4.1?uuid=923ada0a-c8f6-41f1-8b9c-04e2a8537ef4" + ], + "packages": [], + "matched_to": [ + "pkg:npm/plugin-request@2.4.1" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "underscore/node_modules/package/README.md", + "type": "file", + "name": "README.md", + "base_name": "README", + "extension": ".md", + "size": 307, + "date": "1985-10-26", + "sha1": "fac8486d1aef8658ae89d37c70b0edb332262109", + "md5": "a3cb10e52239977c38a699ef518dc8d4", + "sha256": "886cd0e0fafc00de3f8726a27920e449f09767dbf19aa14b4e65dc1bbdb168c7", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "package_data": [], + "for_packages": [ + "pkg:npm/%40umijs/plugin-request@2.4.1?uuid=923ada0a-c8f6-41f1-8b9c-04e2a8537ef4" + ], + "packages": [], + "matched_to": [ + "pkg:npm/plugin-request@2.4.1" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "underscore/node_modules/package/lib", + "type": "directory", + "name": "lib", + "base_name": "lib", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "sha256": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "package_data": [], + "for_packages": [ + "pkg:npm/%40umijs/plugin-request@2.4.1?uuid=923ada0a-c8f6-41f1-8b9c-04e2a8537ef4" + ], + "packages": [], + "matched_to": [ + "pkg:npm/plugin-request@2.4.1" + ], + "files_count": 2, + "dirs_count": 0, + "size_count": 13964, + "scan_errors": [] + }, + { + "path": "underscore/node_modules/package/lib/index.js", + "type": "file", + "name": "index.js", + "base_name": "index", + "extension": ".js", + "size": 2453, + "date": "1985-10-26", + "sha1": "aecb2790e91e0eb8f1f8608c4ac7a689a9a28467", + "md5": "0a4e5a9b46acd86352121a86f2ad6ca0", + "sha256": "8f3e85de5fec26a6c76b17170fa3671f2f0ff05efe7751339ca41a2f90b9a1b0", + "mime_type": "text/x-java", + "file_type": "Java source, UTF-8 Unicode text, with very long lines", + "programming_language": "JavaScript", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + "package_data": [], + "for_packages": [ + "pkg:npm/%40umijs/plugin-request@2.4.1?uuid=923ada0a-c8f6-41f1-8b9c-04e2a8537ef4" + ], + "packages": [], + "matched_to": [ + "pkg:npm/plugin-request@2.4.1" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "underscore/node_modules/package/lib/request.js", + "type": "file", + "name": "request.js", + "base_name": "request", + "extension": ".js", + "size": 11511, + "date": "1985-10-26", + "sha1": "e59e280693fc62d2a7e902475e5e13d3bf62d476", + "md5": "e00da7bc6c7649c392bc9e20cd779167", + "sha256": "9ae43cf04cc4babc3d2d6dd54f6584da5c2749018e64d396d1e06181d3c4b276", + "mime_type": "text/plain", + "file_type": "UTF-8 Unicode text, with very long lines", + "programming_language": "JavaScript", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + "package_data": [], + "for_packages": [ + "pkg:npm/%40umijs/plugin-request@2.4.1?uuid=923ada0a-c8f6-41f1-8b9c-04e2a8537ef4" + ], + "packages": [], + "matched_to": [ + "pkg:npm/plugin-request@2.4.1" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "underscore/node_modules/package/src", + "type": "directory", + "name": "src", + "base_name": "src", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "sha256": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "package_data": [], + "for_packages": [ + "pkg:npm/%40umijs/plugin-request@2.4.1?uuid=923ada0a-c8f6-41f1-8b9c-04e2a8537ef4" + ], + "packages": [], + "matched_to": [ + "pkg:npm/plugin-request@2.4.1" + ], + "files_count": 2, + "dirs_count": 0, + "size_count": 10081, + "scan_errors": [] + }, + { + "path": "underscore/node_modules/package/src/index.ts", + "type": "file", + "name": "index.ts", + "base_name": "index", + "extension": ".ts", + "size": 2138, + "date": "1985-10-26", + "sha1": "00216c5fd157602a28b45530c6badb388405820d", + "md5": "3fa996e7e53a7447bd5e8d3455ac29b6", + "sha256": "d1d94851002539609e3b20a36594d25f10a6a8d9bd42d2e7c6b3d4f243ab52e4", + "mime_type": "text/x-java", + "file_type": "Java source, UTF-8 Unicode text", + "programming_language": "TypeScript", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + "package_data": [], + "for_packages": [ + "pkg:npm/%40umijs/plugin-request@2.4.1?uuid=923ada0a-c8f6-41f1-8b9c-04e2a8537ef4" + ], + "packages": [], + "matched_to": [ + "pkg:npm/plugin-request@2.4.1" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "underscore/node_modules/package/src/request.ts", + "type": "file", + "name": "request.ts", + "base_name": "request", + "extension": ".ts", + "size": 7943, + "date": "1985-10-26", + "sha1": "c6b37045e7919219efffd83894df441e47c87cfb", + "md5": "7a77806ca3e1452cc7eb5d01a35f6264", + "sha256": "25bbbc1924dcb7a726142be254d12ace5379fbee9bf37e20a7ce300bb01030cb", + "mime_type": "text/x-java", + "file_type": "Java source, UTF-8 Unicode text", + "programming_language": "TypeScript", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + "package_data": [], + "for_packages": [ + "pkg:npm/%40umijs/plugin-request@2.4.1?uuid=923ada0a-c8f6-41f1-8b9c-04e2a8537ef4" + ], + "packages": [], + "matched_to": [ + "pkg:npm/plugin-request@2.4.1" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + } + ] +} \ No newline at end of file diff --git a/matchcode/tests/testfiles/match/nested/new-nested.json b/matchcode/tests/testfiles/match/nested/new-nested.json new file mode 100644 index 00000000..64ced839 --- /dev/null +++ b/matchcode/tests/testfiles/match/nested/new-nested.json @@ -0,0 +1,866 @@ +{ + "headers": [ + { + "tool_name": "scancode-toolkit", + "tool_version": "31.2.2", + "options": { + "input": [ + "underscore" + ], + "--info": true, + "--json-pp": "new-nested.json", + "--package": true + }, + "notice": "Generated with ScanCode and provided on an \"AS IS\" BASIS, WITHOUT WARRANTIES\nOR CONDITIONS OF ANY KIND, either express or implied. No content created from\nScanCode should be considered or used as legal advice. Consult an Attorney\nfor any legal advice.\nScanCode is a free software code scanning tool from nexB Inc. and others.\nVisit https://github.com/nexB/scancode-toolkit/ for support and download.", + "start_timestamp": "2022-12-01T211637.050383", + "end_timestamp": "2022-12-01T211640.918285", + "output_format_version": "2.0.0", + "duration": 3.86792254447937, + "message": null, + "errors": [], + "warnings": [], + "extra_data": { + "system_environment": { + "operating_system": "linux", + "cpu_architecture": "64", + "platform": "Linux-5.15.39-3-pve-x86_64-with-glibc2.35", + "platform_version": "#2 SMP PVE 5.15.39-3 (Wed, 27 Jul 2022 13:45:39 +0200)", + "python_version": "3.10.5 (main, Jul 30 2022, 06:09:26) [GCC 9.4.0]" + }, + "spdx_license_list_version": "3.17", + "files_count": 11 + } + } + ], + "dependencies": [ + { + "purl": "pkg:npm/%40ahooksjs/use-request", + "extracted_requirement": "^2.0.0", + "scope": "dependencies", + "is_runtime": true, + "is_optional": false, + "is_resolved": false, + "resolved_package": {}, + "extra_data": {}, + "dependency_uid": "pkg:npm/%40ahooksjs/use-request?uuid=5de47a99-bc15-4ba5-93f3-21049734572c", + "for_package_uid": "pkg:npm/%40umijs/plugin-request@2.4.1?uuid=923ada0a-c8f6-41f1-8b9c-04e2a8537ef4", + "datafile_path": "underscore/node_modules/package/package.json", + "datasource_id": "npm_package_json" + }, + { + "purl": "pkg:npm/umi-request", + "extracted_requirement": "^1.2.14", + "scope": "dependencies", + "is_runtime": true, + "is_optional": false, + "is_resolved": false, + "resolved_package": {}, + "extra_data": {}, + "dependency_uid": "pkg:npm/umi-request?uuid=5dfea107-7408-4721-bdab-1b34606fb796", + "for_package_uid": "pkg:npm/%40umijs/plugin-request@2.4.1?uuid=923ada0a-c8f6-41f1-8b9c-04e2a8537ef4", + "datafile_path": "underscore/node_modules/package/package.json", + "datasource_id": "npm_package_json" + }, + { + "purl": "pkg:npm/umi", + "extracted_requirement": "3.x", + "scope": "peerDependencies", + "is_runtime": true, + "is_optional": false, + "is_resolved": false, + "resolved_package": {}, + "extra_data": {}, + "dependency_uid": "pkg:npm/umi?uuid=7e759ff0-b8d0-4f4c-a3ce-225c658f5bb8", + "for_package_uid": "pkg:npm/%40umijs/plugin-request@2.4.1?uuid=923ada0a-c8f6-41f1-8b9c-04e2a8537ef4", + "datafile_path": "underscore/node_modules/package/package.json", + "datasource_id": "npm_package_json" + } + ], + "packages": [ + { + "type": "npm", + "namespace": "@types", + "name": "underscore", + "version": "1.10.9", + "qualifiers": {}, + "subpath": null, + "primary_language": "JavaScript", + "description": "TypeScript definitions for Underscore", + "release_date": null, + "parties": [ + { + "type": "person", + "role": "contributor", + "name": "Boris Yankov", + "email": null, + "url": "https://github.com/borisyankov" + }, + { + "type": "person", + "role": "contributor", + "name": "Josh Baldwin", + "email": null, + "url": "https://github.com/jbaldwin" + }, + { + "type": "person", + "role": "contributor", + "name": "Christopher Currens", + "email": null, + "url": "https://github.com/ccurrens" + }, + { + "type": "person", + "role": "contributor", + "name": "Ard Timmerman", + "email": null, + "url": "https://github.com/confususs" + }, + { + "type": "person", + "role": "contributor", + "name": "Julian Gonggrijp", + "email": null, + "url": "https://github.com/jgonggrijp" + }, + { + "type": "person", + "role": "contributor", + "name": "Florian Keller", + "email": null, + "url": "https://github.com/ffflorian" + }, + { + "type": "person", + "role": "contributor", + "name": "Regev Brody", + "email": null, + "url": "https://github.com/regevbr" + }, + { + "type": "person", + "role": "contributor", + "name": "Piotr B\u0142a\u017cejewicz", + "email": null, + "url": "https://github.com/peterblazejewicz" + }, + { + "type": "person", + "role": "contributor", + "name": "Michael Ness", + "email": null, + "url": "https://github.com/reubenrybnik" + } + ], + "keywords": [], + "homepage_url": null, + "download_url": "https://registry.npmjs.org/@types/underscore/-/underscore-1.10.9.tgz", + "size": null, + "sha1": null, + "md5": null, + "sha256": null, + "sha512": null, + "bug_tracking_url": null, + "code_view_url": null, + "vcs_url": "git+https://github.com/DefinitelyTyped/DefinitelyTyped.git", + "copyright": null, + "license_expression": "mit", + "declared_license": [ + "MIT" + ], + "notice_text": null, + "source_packages": [], + "extra_data": {}, + "repository_homepage_url": "https://www.npmjs.com/package/@types/underscore", + "repository_download_url": "https://registry.npmjs.org/@types/underscore/-/underscore-1.10.9.tgz", + "api_data_url": "https://registry.npmjs.org/@types%2funderscore", + "package_uid": "pkg:npm/%40types/underscore@1.10.9?uuid=420db78a-625f-4622-b1a0-93d1ea853194", + "datafile_paths": [ + "underscore/package.json" + ], + "datasource_ids": [ + "npm_package_json" + ], + "purl": "pkg:npm/%40types/underscore@1.10.9" + }, + { + "type": "npm", + "namespace": "@umijs", + "name": "plugin-request", + "version": "2.4.1", + "qualifiers": {}, + "subpath": null, + "primary_language": "JavaScript", + "description": "@umijs/plugin-request", + "release_date": null, + "parties": [], + "keywords": [ + "umi" + ], + "homepage_url": "https://github.com/umijs/plugins/tree/master/packages/plugin-request#readme", + "download_url": "https://registry.npmjs.org/@umijs/plugin-request/-/plugin-request-2.4.1.tgz", + "size": null, + "sha1": null, + "md5": null, + "sha256": null, + "sha512": null, + "bug_tracking_url": "http://github.com/umijs/plugins/issues", + "code_view_url": null, + "vcs_url": "git+https://github.com/umijs/plugins", + "copyright": null, + "license_expression": "mit", + "declared_license": [ + "MIT" + ], + "notice_text": null, + "source_packages": [], + "extra_data": {}, + "repository_homepage_url": "https://www.npmjs.com/package/@umijs/plugin-request", + "repository_download_url": "https://registry.npmjs.org/@umijs/plugin-request/-/plugin-request-2.4.1.tgz", + "api_data_url": "https://registry.npmjs.org/@umijs%2fplugin-request", + "package_uid": "pkg:npm/%40umijs/plugin-request@2.4.1?uuid=923ada0a-c8f6-41f1-8b9c-04e2a8537ef4", + "datafile_paths": [ + "underscore/node_modules/package/package.json" + ], + "datasource_ids": [ + "npm_package_json" + ], + "purl": "pkg:npm/%40umijs/plugin-request@2.4.1" + } + ], + "files": [ + { + "path": "underscore", + "type": "directory", + "name": "underscore", + "base_name": "underscore", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "sha256": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "package_data": [], + "for_packages": [], + "files_count": 11, + "dirs_count": 4, + "size_count": 244503, + "scan_errors": [] + }, + { + "path": "underscore/index.d.ts", + "type": "file", + "name": "index.d.ts", + "base_name": "index.d", + "extension": ".ts", + "size": 212101, + "date": "2020-07-15", + "sha1": "4f2b85857c3a162c5d536e342ba417fa6c03d40a", + "md5": "ae0acb15531b2efe7253a9bd1fee1b86", + "sha256": "92c3d4b6cf13af286895b484b680b314be34a62a41d1440a2d62d5d5cf0e93b3", + "mime_type": "text/plain", + "file_type": "UTF-8 Unicode text, with very long lines", + "programming_language": "TypeScript", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + "package_data": [], + "for_packages": [ + "pkg:npm/%40types/underscore@1.10.9?uuid=420db78a-625f-4622-b1a0-93d1ea853194" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "underscore/LICENSE", + "type": "file", + "name": "LICENSE", + "base_name": "LICENSE", + "extension": "", + "size": 1141, + "date": "2020-07-15", + "sha1": "689ec0681815ecc32bee639c68e7740add7bd301", + "md5": "d4a904ca135bb7bc912156fee12726f0", + "sha256": "c2cfccb812fe482101a8f04597dfc5a9991a6b2748266c47ac91b6a5aae15383", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "package_data": [], + "for_packages": [ + "pkg:npm/%40types/underscore@1.10.9?uuid=420db78a-625f-4622-b1a0-93d1ea853194" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "underscore/package.json", + "type": "file", + "name": "package.json", + "base_name": "package", + "extension": ".json", + "size": 1940, + "date": "2020-07-15", + "sha1": "e751d5da4a71b9eaec5114e8a5b5eceef15d0b4d", + "md5": "a8b2dc907f99ba2e34a97954075d9b8f", + "sha256": "31e806866fe2800471fd3544fc17ed77f9b58885ff8f0590d3dc8d623b897206", + "mime_type": "application/json", + "file_type": "JSON data", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "package_data": [ + { + "type": "npm", + "namespace": "@types", + "name": "underscore", + "version": "1.10.9", + "qualifiers": {}, + "subpath": null, + "primary_language": "JavaScript", + "description": "TypeScript definitions for Underscore", + "release_date": null, + "parties": [ + { + "type": "person", + "role": "contributor", + "name": "Boris Yankov", + "email": null, + "url": "https://github.com/borisyankov" + }, + { + "type": "person", + "role": "contributor", + "name": "Josh Baldwin", + "email": null, + "url": "https://github.com/jbaldwin" + }, + { + "type": "person", + "role": "contributor", + "name": "Christopher Currens", + "email": null, + "url": "https://github.com/ccurrens" + }, + { + "type": "person", + "role": "contributor", + "name": "Ard Timmerman", + "email": null, + "url": "https://github.com/confususs" + }, + { + "type": "person", + "role": "contributor", + "name": "Julian Gonggrijp", + "email": null, + "url": "https://github.com/jgonggrijp" + }, + { + "type": "person", + "role": "contributor", + "name": "Florian Keller", + "email": null, + "url": "https://github.com/ffflorian" + }, + { + "type": "person", + "role": "contributor", + "name": "Regev Brody", + "email": null, + "url": "https://github.com/regevbr" + }, + { + "type": "person", + "role": "contributor", + "name": "Piotr B\u0142a\u017cejewicz", + "email": null, + "url": "https://github.com/peterblazejewicz" + }, + { + "type": "person", + "role": "contributor", + "name": "Michael Ness", + "email": null, + "url": "https://github.com/reubenrybnik" + } + ], + "keywords": [], + "homepage_url": null, + "download_url": "https://registry.npmjs.org/@types/underscore/-/underscore-1.10.9.tgz", + "size": null, + "sha1": null, + "md5": null, + "sha256": null, + "sha512": null, + "bug_tracking_url": null, + "code_view_url": null, + "vcs_url": "git+https://github.com/DefinitelyTyped/DefinitelyTyped.git", + "copyright": null, + "license_expression": "mit", + "declared_license": [ + "MIT" + ], + "notice_text": null, + "source_packages": [], + "file_references": [], + "extra_data": {}, + "dependencies": [], + "repository_homepage_url": "https://www.npmjs.com/package/@types/underscore", + "repository_download_url": "https://registry.npmjs.org/@types/underscore/-/underscore-1.10.9.tgz", + "api_data_url": "https://registry.npmjs.org/@types%2funderscore", + "datasource_id": "npm_package_json", + "purl": "pkg:npm/%40types/underscore@1.10.9" + } + ], + "for_packages": [ + "pkg:npm/%40types/underscore@1.10.9?uuid=420db78a-625f-4622-b1a0-93d1ea853194" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "underscore/README.md", + "type": "file", + "name": "README.md", + "base_name": "README", + "extension": ".md", + "size": 889, + "date": "2020-07-15", + "sha1": "0a9a39ef14ee88de5e8c4ed6aaeef3ee64f9421f", + "md5": "c8223d72fe3e5f79ac77f9566e7e8e28", + "sha256": "abd4f29b3bf6e302720577ad706f15f68fa13ff227aeeb9e1b091d66e54bc5d9", + "mime_type": "text/plain", + "file_type": "UTF-8 Unicode text, with very long lines, with CRLF line terminators", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "package_data": [], + "for_packages": [ + "pkg:npm/%40types/underscore@1.10.9?uuid=420db78a-625f-4622-b1a0-93d1ea853194" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "underscore/node_modules", + "type": "directory", + "name": "node_modules", + "base_name": "node_modules", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "sha256": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "package_data": [], + "for_packages": [], + "files_count": 7, + "dirs_count": 3, + "size_count": 28432, + "scan_errors": [] + }, + { + "path": "underscore/node_modules/package", + "type": "directory", + "name": "package", + "base_name": "package", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "sha256": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "package_data": [], + "for_packages": [], + "files_count": 7, + "dirs_count": 2, + "size_count": 28432, + "scan_errors": [] + }, + { + "path": "underscore/node_modules/package/CHANGELOG.md", + "type": "file", + "name": "CHANGELOG.md", + "base_name": "CHANGELOG", + "extension": ".md", + "size": 3309, + "date": "1985-10-26", + "sha1": "79625a8ec840826bbab4e6658c53c11ea950fcd3", + "md5": "37600475d22e98ae0e8e1e606e2991e7", + "sha256": "33c8629e8ddd3817357bf31d68b76379cbea6f7588b63df152b0d57f7fdd393b", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "package_data": [], + "for_packages": [ + "pkg:npm/%40umijs/plugin-request@2.4.1?uuid=923ada0a-c8f6-41f1-8b9c-04e2a8537ef4" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "underscore/node_modules/package/package.json", + "type": "file", + "name": "package.json", + "base_name": "package", + "extension": ".json", + "size": 771, + "date": "1985-10-26", + "sha1": "33f98f5756ed1098e7242883aaef2b74dfa06e8d", + "md5": "998d31468d34220e7daf05d41196767c", + "sha256": "15fa5153db3daa38c6bc1d5c5cb73d34064c1c0426cfccb91ea62735a95aedaa", + "mime_type": "application/json", + "file_type": "JSON data", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "package_data": [ + { + "type": "npm", + "namespace": "@umijs", + "name": "plugin-request", + "version": "2.4.1", + "qualifiers": {}, + "subpath": null, + "primary_language": "JavaScript", + "description": "@umijs/plugin-request", + "release_date": null, + "parties": [], + "keywords": [ + "umi" + ], + "homepage_url": "https://github.com/umijs/plugins/tree/master/packages/plugin-request#readme", + "download_url": "https://registry.npmjs.org/@umijs/plugin-request/-/plugin-request-2.4.1.tgz", + "size": null, + "sha1": null, + "md5": null, + "sha256": null, + "sha512": null, + "bug_tracking_url": "http://github.com/umijs/plugins/issues", + "code_view_url": null, + "vcs_url": "git+https://github.com/umijs/plugins", + "copyright": null, + "license_expression": "mit", + "declared_license": [ + "MIT" + ], + "notice_text": null, + "source_packages": [], + "file_references": [], + "extra_data": {}, + "dependencies": [ + { + "purl": "pkg:npm/%40ahooksjs/use-request", + "extracted_requirement": "^2.0.0", + "scope": "dependencies", + "is_runtime": true, + "is_optional": false, + "is_resolved": false, + "resolved_package": {}, + "extra_data": {} + }, + { + "purl": "pkg:npm/umi-request", + "extracted_requirement": "^1.2.14", + "scope": "dependencies", + "is_runtime": true, + "is_optional": false, + "is_resolved": false, + "resolved_package": {}, + "extra_data": {} + }, + { + "purl": "pkg:npm/umi", + "extracted_requirement": "3.x", + "scope": "peerDependencies", + "is_runtime": true, + "is_optional": false, + "is_resolved": false, + "resolved_package": {}, + "extra_data": {} + } + ], + "repository_homepage_url": "https://www.npmjs.com/package/@umijs/plugin-request", + "repository_download_url": "https://registry.npmjs.org/@umijs/plugin-request/-/plugin-request-2.4.1.tgz", + "api_data_url": "https://registry.npmjs.org/@umijs%2fplugin-request", + "datasource_id": "npm_package_json", + "purl": "pkg:npm/%40umijs/plugin-request@2.4.1" + } + ], + "for_packages": [ + "pkg:npm/%40umijs/plugin-request@2.4.1?uuid=923ada0a-c8f6-41f1-8b9c-04e2a8537ef4" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "underscore/node_modules/package/README.md", + "type": "file", + "name": "README.md", + "base_name": "README", + "extension": ".md", + "size": 307, + "date": "1985-10-26", + "sha1": "fac8486d1aef8658ae89d37c70b0edb332262109", + "md5": "a3cb10e52239977c38a699ef518dc8d4", + "sha256": "886cd0e0fafc00de3f8726a27920e449f09767dbf19aa14b4e65dc1bbdb168c7", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "package_data": [], + "for_packages": [ + "pkg:npm/%40umijs/plugin-request@2.4.1?uuid=923ada0a-c8f6-41f1-8b9c-04e2a8537ef4" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "underscore/node_modules/package/lib", + "type": "directory", + "name": "lib", + "base_name": "lib", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "sha256": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "package_data": [], + "for_packages": [ + "pkg:npm/%40umijs/plugin-request@2.4.1?uuid=923ada0a-c8f6-41f1-8b9c-04e2a8537ef4" + ], + "files_count": 2, + "dirs_count": 0, + "size_count": 13964, + "scan_errors": [] + }, + { + "path": "underscore/node_modules/package/lib/index.js", + "type": "file", + "name": "index.js", + "base_name": "index", + "extension": ".js", + "size": 2453, + "date": "1985-10-26", + "sha1": "aecb2790e91e0eb8f1f8608c4ac7a689a9a28467", + "md5": "0a4e5a9b46acd86352121a86f2ad6ca0", + "sha256": "8f3e85de5fec26a6c76b17170fa3671f2f0ff05efe7751339ca41a2f90b9a1b0", + "mime_type": "text/x-java", + "file_type": "Java source, UTF-8 Unicode text, with very long lines", + "programming_language": "JavaScript", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + "package_data": [], + "for_packages": [ + "pkg:npm/%40umijs/plugin-request@2.4.1?uuid=923ada0a-c8f6-41f1-8b9c-04e2a8537ef4" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "underscore/node_modules/package/lib/request.js", + "type": "file", + "name": "request.js", + "base_name": "request", + "extension": ".js", + "size": 11511, + "date": "1985-10-26", + "sha1": "e59e280693fc62d2a7e902475e5e13d3bf62d476", + "md5": "e00da7bc6c7649c392bc9e20cd779167", + "sha256": "9ae43cf04cc4babc3d2d6dd54f6584da5c2749018e64d396d1e06181d3c4b276", + "mime_type": "text/plain", + "file_type": "UTF-8 Unicode text, with very long lines", + "programming_language": "JavaScript", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + "package_data": [], + "for_packages": [ + "pkg:npm/%40umijs/plugin-request@2.4.1?uuid=923ada0a-c8f6-41f1-8b9c-04e2a8537ef4" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "underscore/node_modules/package/src", + "type": "directory", + "name": "src", + "base_name": "src", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "sha256": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "package_data": [], + "for_packages": [ + "pkg:npm/%40umijs/plugin-request@2.4.1?uuid=923ada0a-c8f6-41f1-8b9c-04e2a8537ef4" + ], + "files_count": 2, + "dirs_count": 0, + "size_count": 10081, + "scan_errors": [] + }, + { + "path": "underscore/node_modules/package/src/index.ts", + "type": "file", + "name": "index.ts", + "base_name": "index", + "extension": ".ts", + "size": 2138, + "date": "1985-10-26", + "sha1": "00216c5fd157602a28b45530c6badb388405820d", + "md5": "3fa996e7e53a7447bd5e8d3455ac29b6", + "sha256": "d1d94851002539609e3b20a36594d25f10a6a8d9bd42d2e7c6b3d4f243ab52e4", + "mime_type": "text/x-java", + "file_type": "Java source, UTF-8 Unicode text", + "programming_language": "TypeScript", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + "package_data": [], + "for_packages": [ + "pkg:npm/%40umijs/plugin-request@2.4.1?uuid=923ada0a-c8f6-41f1-8b9c-04e2a8537ef4" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "underscore/node_modules/package/src/request.ts", + "type": "file", + "name": "request.ts", + "base_name": "request", + "extension": ".ts", + "size": 7943, + "date": "1985-10-26", + "sha1": "c6b37045e7919219efffd83894df441e47c87cfb", + "md5": "7a77806ca3e1452cc7eb5d01a35f6264", + "sha256": "25bbbc1924dcb7a726142be254d12ace5379fbee9bf37e20a7ce300bb01030cb", + "mime_type": "text/x-java", + "file_type": "Java source, UTF-8 Unicode text", + "programming_language": "TypeScript", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + "package_data": [], + "for_packages": [ + "pkg:npm/%40umijs/plugin-request@2.4.1?uuid=923ada0a-c8f6-41f1-8b9c-04e2a8537ef4" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + } + ] +} \ No newline at end of file diff --git a/matchcode/tests/testfiles/match/nested/plugin-request-2.4.1-ip.json b/matchcode/tests/testfiles/match/nested/plugin-request-2.4.1-ip.json new file mode 100644 index 00000000..8eb09da8 --- /dev/null +++ b/matchcode/tests/testfiles/match/nested/plugin-request-2.4.1-ip.json @@ -0,0 +1,353 @@ +{ + "headers": [ + { + "tool_name": "scancode-toolkit", + "tool_version": "3.2.2rc3", + "options": { + "input": [ + "/tmp/test/package/" + ], + "--info": true, + "--json-pp": "/tmp/test/plugin-request-2.4.1-ip.json", + "--package": true, + "--processes": "2" + }, + "notice": "Generated with ScanCode and provided on an \"AS IS\" BASIS, WITHOUT WARRANTIES\nOR CONDITIONS OF ANY KIND, either express or implied. No content created from\nScanCode should be considered or used as legal advice. Consult an Attorney\nfor any legal advice.\nScanCode is a free software code scanning tool from nexB Inc. and others.\nVisit https://github.com/nexB/scancode-toolkit/ for support and download.", + "start_timestamp": "2020-11-07T201424.972560", + "end_timestamp": "2020-11-07T201429.280526", + "duration": 4.30798077583313, + "message": null, + "errors": [], + "extra_data": { + "files_count": 7 + } + } + ], + "files": [ + { + "path": "package", + "type": "directory", + "name": "package", + "base_name": "package", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "sha256": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "packages": [], + "files_count": 7, + "dirs_count": 2, + "size_count": 28432, + "scan_errors": [] + }, + { + "path": "package/CHANGELOG.md", + "type": "file", + "name": "CHANGELOG.md", + "base_name": "CHANGELOG", + "extension": ".md", + "size": 3309, + "date": "1985-10-26", + "sha1": "79625a8ec840826bbab4e6658c53c11ea950fcd3", + "md5": "37600475d22e98ae0e8e1e606e2991e7", + "sha256": "33c8629e8ddd3817357bf31d68b76379cbea6f7588b63df152b0d57f7fdd393b", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "packages": [], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/package.json", + "type": "file", + "name": "package.json", + "base_name": "package", + "extension": ".json", + "size": 771, + "date": "1985-10-26", + "sha1": "33f98f5756ed1098e7242883aaef2b74dfa06e8d", + "md5": "998d31468d34220e7daf05d41196767c", + "sha256": "15fa5153db3daa38c6bc1d5c5cb73d34064c1c0426cfccb91ea62735a95aedaa", + "mime_type": "application/json", + "file_type": "JSON data", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "packages": [ + { + "type": "npm", + "namespace": "@umijs", + "name": "plugin-request", + "version": "2.4.1", + "qualifiers": {}, + "subpath": null, + "primary_language": "JavaScript", + "description": "@umijs/plugin-request", + "release_date": null, + "parties": [], + "keywords": [ + "umi" + ], + "homepage_url": "https://github.com/umijs/plugins/tree/master/packages/plugin-request#readme", + "download_url": "https://registry.npmjs.org/@umijs/plugin-request/-/plugin-request-2.4.1.tgz", + "size": null, + "sha1": null, + "md5": null, + "sha256": null, + "sha512": null, + "bug_tracking_url": "http://github.com/umijs/plugins/issues", + "code_view_url": null, + "vcs_url": "git+https://github.com/umijs/plugins", + "copyright": null, + "license_expression": "mit", + "declared_license": [ + "MIT" + ], + "notice_text": null, + "root_path": "package", + "dependencies": [ + { + "purl": "pkg:npm/%40ahooksjs/use-request", + "requirement": "^2.0.0", + "scope": "dependencies", + "is_runtime": true, + "is_optional": false, + "is_resolved": false + }, + { + "purl": "pkg:npm/umi-request", + "requirement": "^1.2.14", + "scope": "dependencies", + "is_runtime": true, + "is_optional": false, + "is_resolved": false + }, + { + "purl": "pkg:npm/umi", + "requirement": "3.x", + "scope": "peerDependencies", + "is_runtime": true, + "is_optional": false, + "is_resolved": false + } + ], + "contains_source_code": null, + "source_packages": [], + "purl": "pkg:npm/%40umijs/plugin-request@2.4.1", + "repository_homepage_url": "https://www.npmjs.com/package/@umijs/plugin-request", + "repository_download_url": "https://registry.npmjs.org/@umijs/plugin-request/-/plugin-request-2.4.1.tgz", + "api_data_url": "https://registry.npmjs.org/@umijs%2fplugin-request" + } + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/README.md", + "type": "file", + "name": "README.md", + "base_name": "README", + "extension": ".md", + "size": 307, + "date": "1985-10-26", + "sha1": "fac8486d1aef8658ae89d37c70b0edb332262109", + "md5": "a3cb10e52239977c38a699ef518dc8d4", + "sha256": "886cd0e0fafc00de3f8726a27920e449f09767dbf19aa14b4e65dc1bbdb168c7", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "packages": [], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/lib", + "type": "directory", + "name": "lib", + "base_name": "lib", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "sha256": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "packages": [], + "files_count": 2, + "dirs_count": 0, + "size_count": 13964, + "scan_errors": [] + }, + { + "path": "package/lib/index.js", + "type": "file", + "name": "index.js", + "base_name": "index", + "extension": ".js", + "size": 2453, + "date": "1985-10-26", + "sha1": "aecb2790e91e0eb8f1f8608c4ac7a689a9a28467", + "md5": "0a4e5a9b46acd86352121a86f2ad6ca0", + "sha256": "8f3e85de5fec26a6c76b17170fa3671f2f0ff05efe7751339ca41a2f90b9a1b0", + "mime_type": "text/x-java", + "file_type": "Java source, UTF-8 Unicode text, with very long lines", + "programming_language": "JavaScript", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + "packages": [], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/lib/request.js", + "type": "file", + "name": "request.js", + "base_name": "request", + "extension": ".js", + "size": 11511, + "date": "1985-10-26", + "sha1": "e59e280693fc62d2a7e902475e5e13d3bf62d476", + "md5": "e00da7bc6c7649c392bc9e20cd779167", + "sha256": "9ae43cf04cc4babc3d2d6dd54f6584da5c2749018e64d396d1e06181d3c4b276", + "mime_type": "text/plain", + "file_type": "UTF-8 Unicode text, with very long lines", + "programming_language": "JavaScript", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + "packages": [], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/src", + "type": "directory", + "name": "src", + "base_name": "src", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "sha256": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "packages": [], + "files_count": 2, + "dirs_count": 0, + "size_count": 10081, + "scan_errors": [] + }, + { + "path": "package/src/index.ts", + "type": "file", + "name": "index.ts", + "base_name": "index", + "extension": ".ts", + "size": 2138, + "date": "1985-10-26", + "sha1": "00216c5fd157602a28b45530c6badb388405820d", + "md5": "3fa996e7e53a7447bd5e8d3455ac29b6", + "sha256": "d1d94851002539609e3b20a36594d25f10a6a8d9bd42d2e7c6b3d4f243ab52e4", + "mime_type": "text/x-java", + "file_type": "Java source, UTF-8 Unicode text", + "programming_language": "TypeScript", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + "packages": [], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/src/request.ts", + "type": "file", + "name": "request.ts", + "base_name": "request", + "extension": ".ts", + "size": 7943, + "date": "1985-10-26", + "sha1": "c6b37045e7919219efffd83894df441e47c87cfb", + "md5": "7a77806ca3e1452cc7eb5d01a35f6264", + "sha256": "25bbbc1924dcb7a726142be254d12ace5379fbee9bf37e20a7ce300bb01030cb", + "mime_type": "text/x-java", + "file_type": "Java source, UTF-8 Unicode text", + "programming_language": "TypeScript", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + "packages": [], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + } + ] +} \ No newline at end of file diff --git a/matchcode/tests/testfiles/match/nested/underscore-1.10.9-ip.json b/matchcode/tests/testfiles/match/nested/underscore-1.10.9-ip.json new file mode 100644 index 00000000..e2a6011d --- /dev/null +++ b/matchcode/tests/testfiles/match/nested/underscore-1.10.9-ip.json @@ -0,0 +1,260 @@ +{ + "headers": [ + { + "tool_name": "scancode-toolkit", + "tool_version": "3.2.2rc3", + "options": { + "input": [ + "/tmp/test/underscore/" + ], + "--info": true, + "--json-pp": "/tmp/test/underscore-ip.json", + "--package": true, + "--processes": "2" + }, + "notice": "Generated with ScanCode and provided on an \"AS IS\" BASIS, WITHOUT WARRANTIES\nOR CONDITIONS OF ANY KIND, either express or implied. No content created from\nScanCode should be considered or used as legal advice. Consult an Attorney\nfor any legal advice.\nScanCode is a free software code scanning tool from nexB Inc. and others.\nVisit https://github.com/nexB/scancode-toolkit/ for support and download.", + "start_timestamp": "2020-11-07T201338.623038", + "end_timestamp": "2020-11-07T201343.409115", + "duration": 4.78609824180603, + "message": null, + "errors": [], + "extra_data": { + "files_count": 4 + } + } + ], + "files": [ + { + "path": "underscore", + "type": "directory", + "name": "underscore", + "base_name": "underscore", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "sha256": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "packages": [], + "files_count": 4, + "dirs_count": 0, + "size_count": 216071, + "scan_errors": [] + }, + { + "path": "underscore/index.d.ts", + "type": "file", + "name": "index.d.ts", + "base_name": "index.d", + "extension": ".ts", + "size": 212101, + "date": "2020-07-15", + "sha1": "4f2b85857c3a162c5d536e342ba417fa6c03d40a", + "md5": "ae0acb15531b2efe7253a9bd1fee1b86", + "sha256": "92c3d4b6cf13af286895b484b680b314be34a62a41d1440a2d62d5d5cf0e93b3", + "mime_type": "text/plain", + "file_type": "UTF-8 Unicode text, with very long lines", + "programming_language": "TypeScript", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + "packages": [], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "underscore/LICENSE", + "type": "file", + "name": "LICENSE", + "base_name": "LICENSE", + "extension": "", + "size": 1141, + "date": "2020-07-15", + "sha1": "689ec0681815ecc32bee639c68e7740add7bd301", + "md5": "d4a904ca135bb7bc912156fee12726f0", + "sha256": "c2cfccb812fe482101a8f04597dfc5a9991a6b2748266c47ac91b6a5aae15383", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "packages": [], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "underscore/package.json", + "type": "file", + "name": "package.json", + "base_name": "package", + "extension": ".json", + "size": 1940, + "date": "2020-07-15", + "sha1": "e751d5da4a71b9eaec5114e8a5b5eceef15d0b4d", + "md5": "a8b2dc907f99ba2e34a97954075d9b8f", + "sha256": "31e806866fe2800471fd3544fc17ed77f9b58885ff8f0590d3dc8d623b897206", + "mime_type": "application/json", + "file_type": "JSON data", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "packages": [ + { + "type": "npm", + "namespace": "@types", + "name": "underscore", + "version": "1.10.9", + "qualifiers": {}, + "subpath": null, + "primary_language": "JavaScript", + "description": "TypeScript definitions for Underscore", + "release_date": null, + "parties": [ + { + "type": "person", + "role": "contributor", + "name": "Boris Yankov", + "email": null, + "url": "https://github.com/borisyankov" + }, + { + "type": "person", + "role": "contributor", + "name": "Josh Baldwin", + "email": null, + "url": "https://github.com/jbaldwin" + }, + { + "type": "person", + "role": "contributor", + "name": "Christopher Currens", + "email": null, + "url": "https://github.com/ccurrens" + }, + { + "type": "person", + "role": "contributor", + "name": "Ard Timmerman", + "email": null, + "url": "https://github.com/confususs" + }, + { + "type": "person", + "role": "contributor", + "name": "Julian Gonggrijp", + "email": null, + "url": "https://github.com/jgonggrijp" + }, + { + "type": "person", + "role": "contributor", + "name": "Florian Keller", + "email": null, + "url": "https://github.com/ffflorian" + }, + { + "type": "person", + "role": "contributor", + "name": "Regev Brody", + "email": null, + "url": "https://github.com/regevbr" + }, + { + "type": "person", + "role": "contributor", + "name": "Piotr B\u0142a\u017cejewicz", + "email": null, + "url": "https://github.com/peterblazejewicz" + }, + { + "type": "person", + "role": "contributor", + "name": "Michael Ness", + "email": null, + "url": "https://github.com/reubenrybnik" + } + ], + "keywords": [], + "homepage_url": null, + "download_url": "https://registry.npmjs.org/@types/underscore/-/underscore-1.10.9.tgz", + "size": null, + "sha1": null, + "md5": null, + "sha256": null, + "sha512": null, + "bug_tracking_url": null, + "code_view_url": null, + "vcs_url": "git+https://github.com/DefinitelyTyped/DefinitelyTyped.git", + "copyright": null, + "license_expression": "mit", + "declared_license": [ + "MIT" + ], + "notice_text": null, + "root_path": "underscore", + "dependencies": [], + "contains_source_code": null, + "source_packages": [], + "purl": "pkg:npm/%40types/underscore@1.10.9", + "repository_homepage_url": "https://www.npmjs.com/package/@types/underscore", + "repository_download_url": "https://registry.npmjs.org/@types/underscore/-/underscore-1.10.9.tgz", + "api_data_url": "https://registry.npmjs.org/@types%2funderscore" + } + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "underscore/README.md", + "type": "file", + "name": "README.md", + "base_name": "README", + "extension": ".md", + "size": 889, + "date": "2020-07-15", + "sha1": "0a9a39ef14ee88de5e8c4ed6aaeef3ee64f9421f", + "md5": "c8223d72fe3e5f79ac77f9566e7e8e28", + "sha256": "abd4f29b3bf6e302720577ad706f15f68fa13ff227aeeb9e1b091d66e54bc5d9", + "mime_type": "text/plain", + "file_type": "UTF-8 Unicode text, with very long lines, with CRLF line terminators", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "packages": [], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + } + ] +} \ No newline at end of file diff --git a/matchcode/tests/testfiles/match/nested/underscore.json b/matchcode/tests/testfiles/match/nested/underscore.json new file mode 100644 index 00000000..3aca6658 --- /dev/null +++ b/matchcode/tests/testfiles/match/nested/underscore.json @@ -0,0 +1,392 @@ +{ + "headers": [ + { + "tool_name": "scancode-toolkit", + "tool_version": "31.2.2", + "options": { + "input": [ + "underscore" + ], + "--info": true, + "--json-pp": "underscore.json", + "--package": true + }, + "notice": "Generated with ScanCode and provided on an \"AS IS\" BASIS, WITHOUT WARRANTIES\nOR CONDITIONS OF ANY KIND, either express or implied. No content created from\nScanCode should be considered or used as legal advice. Consult an Attorney\nfor any legal advice.\nScanCode is a free software code scanning tool from nexB Inc. and others.\nVisit https://github.com/nexB/scancode-toolkit/ for support and download.", + "start_timestamp": "2022-12-01T201941.639402", + "end_timestamp": "2022-12-01T202115.845138", + "output_format_version": "2.0.0", + "duration": 94.20575189590454, + "message": null, + "errors": [], + "warnings": [], + "extra_data": { + "system_environment": { + "operating_system": "linux", + "cpu_architecture": "64", + "platform": "Linux-5.15.39-3-pve-x86_64-with-glibc2.35", + "platform_version": "#2 SMP PVE 5.15.39-3 (Wed, 27 Jul 2022 13:45:39 +0200)", + "python_version": "3.10.5 (main, Jul 30 2022, 06:09:26) [GCC 9.4.0]" + }, + "spdx_license_list_version": "3.17", + "files_count": 4 + } + } + ], + "dependencies": [], + "packages": [ + { + "type": "npm", + "namespace": "@types", + "name": "underscore", + "version": "1.10.9", + "qualifiers": {}, + "subpath": null, + "primary_language": "JavaScript", + "description": "TypeScript definitions for Underscore", + "release_date": null, + "parties": [ + { + "type": "person", + "role": "contributor", + "name": "Boris Yankov", + "email": null, + "url": "https://github.com/borisyankov" + }, + { + "type": "person", + "role": "contributor", + "name": "Josh Baldwin", + "email": null, + "url": "https://github.com/jbaldwin" + }, + { + "type": "person", + "role": "contributor", + "name": "Christopher Currens", + "email": null, + "url": "https://github.com/ccurrens" + }, + { + "type": "person", + "role": "contributor", + "name": "Ard Timmerman", + "email": null, + "url": "https://github.com/confususs" + }, + { + "type": "person", + "role": "contributor", + "name": "Julian Gonggrijp", + "email": null, + "url": "https://github.com/jgonggrijp" + }, + { + "type": "person", + "role": "contributor", + "name": "Florian Keller", + "email": null, + "url": "https://github.com/ffflorian" + }, + { + "type": "person", + "role": "contributor", + "name": "Regev Brody", + "email": null, + "url": "https://github.com/regevbr" + }, + { + "type": "person", + "role": "contributor", + "name": "Piotr B\u0142a\u017cejewicz", + "email": null, + "url": "https://github.com/peterblazejewicz" + }, + { + "type": "person", + "role": "contributor", + "name": "Michael Ness", + "email": null, + "url": "https://github.com/reubenrybnik" + } + ], + "keywords": [], + "homepage_url": null, + "download_url": "https://registry.npmjs.org/@types/underscore/-/underscore-1.10.9.tgz", + "size": null, + "sha1": null, + "md5": null, + "sha256": null, + "sha512": null, + "bug_tracking_url": null, + "code_view_url": null, + "vcs_url": "git+https://github.com/DefinitelyTyped/DefinitelyTyped.git", + "copyright": null, + "license_expression": "mit", + "declared_license": [ + "MIT" + ], + "notice_text": null, + "source_packages": [], + "extra_data": {}, + "repository_homepage_url": "https://www.npmjs.com/package/@types/underscore", + "repository_download_url": "https://registry.npmjs.org/@types/underscore/-/underscore-1.10.9.tgz", + "api_data_url": "https://registry.npmjs.org/@types%2funderscore", + "package_uid": "pkg:npm/%40types/underscore@1.10.9?uuid=1cce1a48-ced8-4bec-a349-87d895e96973", + "datafile_paths": [ + "underscore/package.json" + ], + "datasource_ids": [ + "npm_package_json" + ], + "purl": "pkg:npm/%40types/underscore@1.10.9" + } + ], + "files": [ + { + "path": "underscore", + "type": "directory", + "name": "underscore", + "base_name": "underscore", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "sha256": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "package_data": [], + "for_packages": [], + "files_count": 4, + "dirs_count": 0, + "size_count": 216071, + "scan_errors": [] + }, + { + "path": "underscore/index.d.ts", + "type": "file", + "name": "index.d.ts", + "base_name": "index.d", + "extension": ".ts", + "size": 212101, + "date": "2020-07-15", + "sha1": "4f2b85857c3a162c5d536e342ba417fa6c03d40a", + "md5": "ae0acb15531b2efe7253a9bd1fee1b86", + "sha256": "92c3d4b6cf13af286895b484b680b314be34a62a41d1440a2d62d5d5cf0e93b3", + "mime_type": "text/plain", + "file_type": "UTF-8 Unicode text, with very long lines", + "programming_language": "TypeScript", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + "package_data": [], + "for_packages": [ + "pkg:npm/%40types/underscore@1.10.9?uuid=1cce1a48-ced8-4bec-a349-87d895e96973" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "underscore/LICENSE", + "type": "file", + "name": "LICENSE", + "base_name": "LICENSE", + "extension": "", + "size": 1141, + "date": "2020-07-15", + "sha1": "689ec0681815ecc32bee639c68e7740add7bd301", + "md5": "d4a904ca135bb7bc912156fee12726f0", + "sha256": "c2cfccb812fe482101a8f04597dfc5a9991a6b2748266c47ac91b6a5aae15383", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "package_data": [], + "for_packages": [ + "pkg:npm/%40types/underscore@1.10.9?uuid=1cce1a48-ced8-4bec-a349-87d895e96973" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "underscore/package.json", + "type": "file", + "name": "package.json", + "base_name": "package", + "extension": ".json", + "size": 1940, + "date": "2020-07-15", + "sha1": "e751d5da4a71b9eaec5114e8a5b5eceef15d0b4d", + "md5": "a8b2dc907f99ba2e34a97954075d9b8f", + "sha256": "31e806866fe2800471fd3544fc17ed77f9b58885ff8f0590d3dc8d623b897206", + "mime_type": "application/json", + "file_type": "JSON data", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "package_data": [ + { + "type": "npm", + "namespace": "@types", + "name": "underscore", + "version": "1.10.9", + "qualifiers": {}, + "subpath": null, + "primary_language": "JavaScript", + "description": "TypeScript definitions for Underscore", + "release_date": null, + "parties": [ + { + "type": "person", + "role": "contributor", + "name": "Boris Yankov", + "email": null, + "url": "https://github.com/borisyankov" + }, + { + "type": "person", + "role": "contributor", + "name": "Josh Baldwin", + "email": null, + "url": "https://github.com/jbaldwin" + }, + { + "type": "person", + "role": "contributor", + "name": "Christopher Currens", + "email": null, + "url": "https://github.com/ccurrens" + }, + { + "type": "person", + "role": "contributor", + "name": "Ard Timmerman", + "email": null, + "url": "https://github.com/confususs" + }, + { + "type": "person", + "role": "contributor", + "name": "Julian Gonggrijp", + "email": null, + "url": "https://github.com/jgonggrijp" + }, + { + "type": "person", + "role": "contributor", + "name": "Florian Keller", + "email": null, + "url": "https://github.com/ffflorian" + }, + { + "type": "person", + "role": "contributor", + "name": "Regev Brody", + "email": null, + "url": "https://github.com/regevbr" + }, + { + "type": "person", + "role": "contributor", + "name": "Piotr B\u0142a\u017cejewicz", + "email": null, + "url": "https://github.com/peterblazejewicz" + }, + { + "type": "person", + "role": "contributor", + "name": "Michael Ness", + "email": null, + "url": "https://github.com/reubenrybnik" + } + ], + "keywords": [], + "homepage_url": null, + "download_url": "https://registry.npmjs.org/@types/underscore/-/underscore-1.10.9.tgz", + "size": null, + "sha1": null, + "md5": null, + "sha256": null, + "sha512": null, + "bug_tracking_url": null, + "code_view_url": null, + "vcs_url": "git+https://github.com/DefinitelyTyped/DefinitelyTyped.git", + "copyright": null, + "license_expression": "mit", + "declared_license": [ + "MIT" + ], + "notice_text": null, + "source_packages": [], + "file_references": [], + "extra_data": {}, + "dependencies": [], + "repository_homepage_url": "https://www.npmjs.com/package/@types/underscore", + "repository_download_url": "https://registry.npmjs.org/@types/underscore/-/underscore-1.10.9.tgz", + "api_data_url": "https://registry.npmjs.org/@types%2funderscore", + "datasource_id": "npm_package_json", + "purl": "pkg:npm/%40types/underscore@1.10.9" + } + ], + "for_packages": [ + "pkg:npm/%40types/underscore@1.10.9?uuid=1cce1a48-ced8-4bec-a349-87d895e96973" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "underscore/README.md", + "type": "file", + "name": "README.md", + "base_name": "README", + "extension": ".md", + "size": 889, + "date": "2020-07-15", + "sha1": "0a9a39ef14ee88de5e8c4ed6aaeef3ee64f9421f", + "md5": "c8223d72fe3e5f79ac77f9566e7e8e28", + "sha256": "abd4f29b3bf6e302720577ad706f15f68fa13ff227aeeb9e1b091d66e54bc5d9", + "mime_type": "text/plain", + "file_type": "UTF-8 Unicode text, with very long lines, with CRLF line terminators", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "package_data": [], + "for_packages": [ + "pkg:npm/%40types/underscore@1.10.9?uuid=1cce1a48-ced8-4bec-a349-87d895e96973" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + } + ] +} \ No newline at end of file diff --git a/matchcode/tests/testfiles/match/scan1.json b/matchcode/tests/testfiles/match/scan1.json new file mode 100644 index 00000000..ee0458e8 --- /dev/null +++ b/matchcode/tests/testfiles/match/scan1.json @@ -0,0 +1,135 @@ +{ + "scancode_notice": "Generated with ScanCode and provided on an \"AS IS\" BASIS, WITHOUT WARRANTIES\nOR CONDITIONS OF ANY KIND, either express or implied. No content created from\nScanCode should be considered or used as legal advice. Consult an Attorney\nfor any legal advice.\nScanCode is a free software code scanning tool from nexB Inc. and others.\nVisit https://github.com/nexB/scancode-toolkit/ for support and download.", + "scancode_version": "2.9.1", + "scancode_options": { + "input": "/testfiles/test", + "--fingerprint": true, + "--info": true, + "--json": "/testfiles/out_test.json" + }, + "files_count": 2, + "files": [ + { + "path": "test", + "type": "directory", + "name": "test", + "base_name": "test", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + + "fingerprints": { + "merkle_bah128": "8f52dcee236ae3fedaf86ca5fb13a9f4", + "merkle_sha1": "9fa0275b959bae94f9607c7af6cb89bae231440f" + }, + "files_count": 2, + "dirs_count": 1, + "size_count": 2, + "scan_errors": [] + }, + { + "path": "test/a", + "type": "file", + "name": "a", + "base_name": "a", + "extension": "", + "size": 1, + "date": "2018-05-08", + "sha1": "86f7e437faa5a7fce15d1ddcb9eaeaea377667b8", + "md5": "0cc175b9c0f1b6a831c399e269772661", + "mime_type": "application/octet-stream", + "file_type": "very short file (no magic)", + "programming_language": "Text only", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + + "fingerprints": { + "bah128": "0cc175b9c0f1b6a831c399e269772661", + "hailstorm": null, + "sha1_git": "2e65efe2a145dda7ee51d1741299f848e5bf752e", + "sha256": "ca978112ca1bbdcafac231b39a23dc4da786eff8147c4e72b9807785afee48bb", + "sha512": "1f40fc92da241694750979ee6cf582f2d5d7d28e18335de05abc54d0560e0f5302860c652bf08d560252aa5e74210546f369fbbbce8c12cfc7957b2652fe9a75" + }, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "test/dir", + "type": "directory", + "name": "dir", + "base_name": "dir", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + + "fingerprints": { + "merkle_bah128": "dc0ae7e1387be9b795f5d6299e383759", + "merkle_sha1": "b035ee5669cac0d97397333030ab262caa966dea" + }, + "files_count": 1, + "dirs_count": 0, + "size_count": 1, + "scan_errors": [] + }, + { + "path": "test/dir/b", + "type": "file", + "name": "b", + "base_name": "b", + "extension": "", + "size": 1, + "date": "2018-05-08", + "sha1": "e9d71f5ee7c92d6dc9e92ffdad17b8bd49418f98", + "md5": "92eb5ffee6ae2fec3ad71c777531578f", + "mime_type": "application/octet-stream", + "file_type": "very short file (no magic)", + "programming_language": "Text only", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + + "fingerprints": { + "bah128": "92eb5ffee6ae2fec3ad71c777531578f", + "hailstorm": null, + "sha1_git": "63d8dbd40c23542e740659a7168a0ce3138ea748", + "sha256": "3e23e8160039594a33894f6564e1b1348bbd7a0088d42c4acb73eeaed59c009d", + "sha512": "5267768822ee624d48fce15ec5ca79cbd602cb7f4c2157a516556991f22ef8c7b5ef7b18d1ff41c59370efb0858651d44a936c11b7b144c48fe04df3c6a3e8da" + }, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + } + ] +} diff --git a/matchcode/tests/testfiles/models/directory-matching/async-0.2.10.tgz-i.json b/matchcode/tests/testfiles/models/directory-matching/async-0.2.10.tgz-i.json new file mode 100644 index 00000000..8410bc05 --- /dev/null +++ b/matchcode/tests/testfiles/models/directory-matching/async-0.2.10.tgz-i.json @@ -0,0 +1,201 @@ +{ + "headers": [ + { + "tool_name": "scancode-toolkit", + "tool_version": "3.2.3", + "options": { + "input": [ + "package/" + ], + "--info": true, + "--json-pp": "async-0.2.10.tgz-i.json" + }, + "notice": "Generated with ScanCode and provided on an \"AS IS\" BASIS, WITHOUT WARRANTIES\nOR CONDITIONS OF ANY KIND, either express or implied. No content created from\nScanCode should be considered or used as legal advice. Consult an Attorney\nfor any legal advice.\nScanCode is a free software code scanning tool from nexB Inc. and others.\nVisit https://github.com/nexB/scancode-toolkit/ for support and download.", + "start_timestamp": "2021-02-17T021750.899853", + "end_timestamp": "2021-02-17T021751.010377", + "duration": 0.11053991317749023, + "message": null, + "errors": [], + "extra_data": { + "files_count": 5 + } + } + ], + "files": [ + { + "path": "package", + "type": "directory", + "name": "package", + "base_name": "package", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "sha256": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 5, + "dirs_count": 1, + "size_count": 74685, + "scan_errors": [] + }, + { + "path": "package/component.json", + "type": "file", + "name": "component.json", + "base_name": "component", + "extension": ".json", + "size": 276, + "date": "2013-05-28", + "sha1": "6615a8e63ada0d9d145cc38ff6e1421a1f857742", + "md5": "aec01451771f878693dda7bf040642ae", + "sha256": "658e07147ddb1458d81d21ff46271e8bd67cb36b1922701b11cf0dc4143c1eea", + "mime_type": "application/json", + "file_type": "JSON data", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/LICENSE", + "type": "file", + "name": "LICENSE", + "base_name": "LICENSE", + "extension": "", + "size": 1058, + "date": "2013-05-28", + "sha1": "99e50b7dfe39753bccb8aabce3cd1f80e960713b", + "md5": "64a378b2b01424fe22d54bc626175994", + "sha256": "b04b9e208e566fa898c7429e4dd5b45ba3ba2f7391e5c009cf63c53d580fa9b4", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/package.json", + "type": "file", + "name": "package.json", + "base_name": "package", + "extension": ".json", + "size": 876, + "date": "2014-01-23", + "sha1": "17cc2a4f858056d0cba767c580ff3b05b61abee8", + "md5": "b9af078ffbadf94f89007b64419058ef", + "sha256": "d3c8c237e2eb2f97718b77e57103c6de5aab5d81ffad3b68f634b608d99b2b12", + "mime_type": "application/json", + "file_type": "JSON data", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/README.md", + "type": "file", + "name": "README.md", + "base_name": "README", + "extension": ".md", + "size": 43074, + "date": "2014-01-23", + "sha1": "f9a4700b8390398540777021cc858f4efb2b4b02", + "md5": "c0900735f205ab79ef2d35762ee7d55c", + "sha256": "d0ee397ceb1814a74117d363934796392ea33448c4b3769579adbde82ebd2979", + "mime_type": "text/html", + "file_type": "HTML document, UTF-8 Unicode text", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/lib", + "type": "directory", + "name": "lib", + "base_name": "lib", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "sha256": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 1, + "dirs_count": 0, + "size_count": 29401, + "scan_errors": [] + }, + { + "path": "package/lib/async.js", + "type": "file", + "name": "async.js", + "base_name": "async", + "extension": ".js", + "size": 29401, + "date": "2014-01-23", + "sha1": "b557301146872e38bde36f81efddb1aa284cf239", + "md5": "fe09e04d0bd97bdc821e560491b10231", + "sha256": "665d0187f6a7c44135a6b9fceb2a234f960893a22f3ed632e53b2fe934c3e6eb", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": "JavaScript", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + } + ] +} \ No newline at end of file diff --git a/matchcode/tests/testfiles/models/directory-matching/async-0.2.9-i-expected-content.json b/matchcode/tests/testfiles/models/directory-matching/async-0.2.9-i-expected-content.json new file mode 100644 index 00000000..c7ce4ce2 --- /dev/null +++ b/matchcode/tests/testfiles/models/directory-matching/async-0.2.9-i-expected-content.json @@ -0,0 +1,224 @@ +{ + "files": [ + { + "path": "package", + "type": "directory", + "name": "package", + "base_name": "package", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "sha256": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "packages": [ + { + "type": "npm", + "namespace": "", + "name": "async", + "version": "0.2.9", + "qualifiers": "", + "subpath": "", + "primary_language": null, + "description": null, + "release_date": null, + "parties": [], + "keywords": [], + "homepage_url": null, + "download_url": "https://registry.npmjs.org/async/-/async-0.2.9.tgz", + "size": 15672, + "md5": "895ac62ba7c61086cffdd50ab03c0447", + "sha1": "df63060fbf3d33286a76aaf6d55a2986d9ff8619", + "sha256": null, + "sha512": null, + "bug_tracking_url": null, + "code_view_url": null, + "vcs_url": null, + "copyright": null, + "license_expression": null, + "declared_license": null, + "notice_text": null, + "source_packages": [], + "extra_data": {}, + "api_data_url": null, + "datasource_id": null, + "purl": "pkg:npm/async@0.2.9", + "manifest_path": null, + "contains_source_code": null, + "file_references": [], + "dependencies": [], + "match_type": "approximate-directory-content" + } + ], + "files_count": 5, + "dirs_count": 1, + "size_count": 74218, + "scan_errors": [] + }, + { + "path": "package/component.json", + "type": "file", + "name": "component.json", + "base_name": "component", + "extension": ".json", + "size": 276, + "date": "2013-05-28", + "sha1": "6615a8e63ada0d9d145cc38ff6e1421a1f857742", + "md5": "aec01451771f878693dda7bf040642ae", + "sha256": "658e07147ddb1458d81d21ff46271e8bd67cb36b1922701b11cf0dc4143c1eea", + "mime_type": "application/json", + "file_type": "JSON data", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "packages": [], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/LICENSE", + "type": "file", + "name": "LICENSE", + "base_name": "LICENSE", + "extension": "", + "size": 1058, + "date": "2013-05-28", + "sha1": "99e50b7dfe39753bccb8aabce3cd1f80e960713b", + "md5": "64a378b2b01424fe22d54bc626175994", + "sha256": "b04b9e208e566fa898c7429e4dd5b45ba3ba2f7391e5c009cf63c53d580fa9b4", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "packages": [], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/package.json", + "type": "file", + "name": "package.json", + "base_name": "package", + "extension": ".json", + "size": 875, + "date": "2013-05-28", + "sha1": "89ee3da95d9be9514d58eec01c1a2a5aa450b5e7", + "md5": "eae59bcc1a34df541efb423cee0653d1", + "sha256": "0a05706c3026dc78ee73eb92f1feb39bef2defef6f32f2aa79a631bfe3ebe2ea", + "mime_type": "application/json", + "file_type": "JSON data", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "packages": [], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/README.md", + "type": "file", + "name": "README.md", + "base_name": "README", + "extension": ".md", + "size": 42716, + "date": "2013-05-28", + "sha1": "6ea922136bd1a91fe457a4238934820d84cf1093", + "md5": "374b909e5200bdf8a96e80ca5c65da21", + "sha256": "51405d87248ea2c2c5acd226281e77d4c2fb07eff2ccf640094cd2fbbfa4ab00", + "mime_type": "text/html", + "file_type": "HTML document, UTF-8 Unicode text", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "packages": [], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/lib", + "type": "directory", + "name": "lib", + "base_name": "lib", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "sha256": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "packages": [], + "files_count": 1, + "dirs_count": 0, + "size_count": 29293, + "scan_errors": [] + }, + { + "path": "package/lib/async.js", + "type": "file", + "name": "async.js", + "base_name": "async", + "extension": ".js", + "size": 29293, + "date": "2013-05-28", + "sha1": "31ed74fe94a795e7beac5d904dd567196666a011", + "md5": "2ff89a2726f7745d1ad89393afe4f0ab", + "sha256": "df5a89ada496897b43a02ca698060bfd4f1d9852d5205348a65289d8aaacb85a", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": "JavaScript", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + "packages": [], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + } + ] +} \ No newline at end of file diff --git a/matchcode/tests/testfiles/models/directory-matching/async-0.2.9-i-expected-structure.json b/matchcode/tests/testfiles/models/directory-matching/async-0.2.9-i-expected-structure.json new file mode 100644 index 00000000..b830a2aa --- /dev/null +++ b/matchcode/tests/testfiles/models/directory-matching/async-0.2.9-i-expected-structure.json @@ -0,0 +1,224 @@ +{ + "files": [ + { + "path": "package", + "type": "directory", + "name": "package", + "base_name": "package", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "sha256": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "packages": [ + { + "type": "npm", + "namespace": "", + "name": "async", + "version": "0.2.9", + "qualifiers": "", + "subpath": "", + "primary_language": null, + "description": null, + "release_date": null, + "parties": [], + "keywords": [], + "homepage_url": null, + "download_url": "https://registry.npmjs.org/async/-/async-0.2.9.tgz", + "size": 15672, + "md5": "895ac62ba7c61086cffdd50ab03c0447", + "sha1": "df63060fbf3d33286a76aaf6d55a2986d9ff8619", + "sha256": null, + "sha512": null, + "bug_tracking_url": null, + "code_view_url": null, + "vcs_url": null, + "copyright": null, + "license_expression": null, + "declared_license": null, + "notice_text": null, + "source_packages": [], + "extra_data": {}, + "api_data_url": null, + "datasource_id": null, + "purl": "pkg:npm/async@0.2.9", + "manifest_path": null, + "contains_source_code": null, + "file_references": [], + "dependencies": [], + "match_type": "approximate-directory-structure" + } + ], + "files_count": 5, + "dirs_count": 1, + "size_count": 74218, + "scan_errors": [] + }, + { + "path": "package/component.json", + "type": "file", + "name": "component.json", + "base_name": "component", + "extension": ".json", + "size": 276, + "date": "2013-05-28", + "sha1": "6615a8e63ada0d9d145cc38ff6e1421a1f857742", + "md5": "aec01451771f878693dda7bf040642ae", + "sha256": "658e07147ddb1458d81d21ff46271e8bd67cb36b1922701b11cf0dc4143c1eea", + "mime_type": "application/json", + "file_type": "JSON data", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "packages": [], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/LICENSE", + "type": "file", + "name": "LICENSE", + "base_name": "LICENSE", + "extension": "", + "size": 1058, + "date": "2013-05-28", + "sha1": "99e50b7dfe39753bccb8aabce3cd1f80e960713b", + "md5": "64a378b2b01424fe22d54bc626175994", + "sha256": "b04b9e208e566fa898c7429e4dd5b45ba3ba2f7391e5c009cf63c53d580fa9b4", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "packages": [], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/package.json", + "type": "file", + "name": "package.json", + "base_name": "package", + "extension": ".json", + "size": 875, + "date": "2013-05-28", + "sha1": "89ee3da95d9be9514d58eec01c1a2a5aa450b5e7", + "md5": "eae59bcc1a34df541efb423cee0653d1", + "sha256": "0a05706c3026dc78ee73eb92f1feb39bef2defef6f32f2aa79a631bfe3ebe2ea", + "mime_type": "application/json", + "file_type": "JSON data", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "packages": [], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/README.md", + "type": "file", + "name": "README.md", + "base_name": "README", + "extension": ".md", + "size": 42716, + "date": "2013-05-28", + "sha1": "6ea922136bd1a91fe457a4238934820d84cf1093", + "md5": "374b909e5200bdf8a96e80ca5c65da21", + "sha256": "51405d87248ea2c2c5acd226281e77d4c2fb07eff2ccf640094cd2fbbfa4ab00", + "mime_type": "text/html", + "file_type": "HTML document, UTF-8 Unicode text", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "packages": [], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/lib", + "type": "directory", + "name": "lib", + "base_name": "lib", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "sha256": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "packages": [], + "files_count": 1, + "dirs_count": 0, + "size_count": 29293, + "scan_errors": [] + }, + { + "path": "package/lib/async.js", + "type": "file", + "name": "async.js", + "base_name": "async", + "extension": ".js", + "size": 29293, + "date": "2013-05-28", + "sha1": "31ed74fe94a795e7beac5d904dd567196666a011", + "md5": "2ff89a2726f7745d1ad89393afe4f0ab", + "sha256": "df5a89ada496897b43a02ca698060bfd4f1d9852d5205348a65289d8aaacb85a", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": "JavaScript", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + "packages": [], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + } + ] +} \ No newline at end of file diff --git a/matchcode/tests/testfiles/models/directory-matching/async-0.2.9-i.json b/matchcode/tests/testfiles/models/directory-matching/async-0.2.9-i.json new file mode 100644 index 00000000..577beae4 --- /dev/null +++ b/matchcode/tests/testfiles/models/directory-matching/async-0.2.9-i.json @@ -0,0 +1,201 @@ +{ + "headers": [ + { + "tool_name": "scancode-toolkit", + "tool_version": "3.2.3.post38.b14ca7b2a.dirty.20210223014938", + "options": { + "input": [ + "package/" + ], + "--info": true, + "--json-pp": "async-0.2.9-i.json" + }, + "notice": "Generated with ScanCode and provided on an \"AS IS\" BASIS, WITHOUT WARRANTIES\nOR CONDITIONS OF ANY KIND, either express or implied. No content created from\nScanCode should be considered or used as legal advice. Consult an Attorney\nfor any legal advice.\nScanCode is a free software code scanning tool from nexB Inc. and others.\nVisit https://github.com/nexB/scancode-toolkit/ for support and download.", + "start_timestamp": "2021-02-23T232718.544871", + "end_timestamp": "2021-02-23T232718.773712", + "duration": 0.2288661003112793, + "message": null, + "errors": [], + "extra_data": { + "files_count": 5 + } + } + ], + "files": [ + { + "path": "package", + "type": "directory", + "name": "package", + "base_name": "package", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "sha256": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 5, + "dirs_count": 1, + "size_count": 74218, + "scan_errors": [] + }, + { + "path": "package/component.json", + "type": "file", + "name": "component.json", + "base_name": "component", + "extension": ".json", + "size": 276, + "date": "2013-05-28", + "sha1": "6615a8e63ada0d9d145cc38ff6e1421a1f857742", + "md5": "aec01451771f878693dda7bf040642ae", + "sha256": "658e07147ddb1458d81d21ff46271e8bd67cb36b1922701b11cf0dc4143c1eea", + "mime_type": "application/json", + "file_type": "JSON data", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/LICENSE", + "type": "file", + "name": "LICENSE", + "base_name": "LICENSE", + "extension": "", + "size": 1058, + "date": "2013-05-28", + "sha1": "99e50b7dfe39753bccb8aabce3cd1f80e960713b", + "md5": "64a378b2b01424fe22d54bc626175994", + "sha256": "b04b9e208e566fa898c7429e4dd5b45ba3ba2f7391e5c009cf63c53d580fa9b4", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/package.json", + "type": "file", + "name": "package.json", + "base_name": "package", + "extension": ".json", + "size": 875, + "date": "2013-05-28", + "sha1": "89ee3da95d9be9514d58eec01c1a2a5aa450b5e7", + "md5": "eae59bcc1a34df541efb423cee0653d1", + "sha256": "0a05706c3026dc78ee73eb92f1feb39bef2defef6f32f2aa79a631bfe3ebe2ea", + "mime_type": "application/json", + "file_type": "JSON data", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/README.md", + "type": "file", + "name": "README.md", + "base_name": "README", + "extension": ".md", + "size": 42716, + "date": "2013-05-28", + "sha1": "6ea922136bd1a91fe457a4238934820d84cf1093", + "md5": "374b909e5200bdf8a96e80ca5c65da21", + "sha256": "51405d87248ea2c2c5acd226281e77d4c2fb07eff2ccf640094cd2fbbfa4ab00", + "mime_type": "text/html", + "file_type": "HTML document, UTF-8 Unicode text", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "package/lib", + "type": "directory", + "name": "lib", + "base_name": "lib", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "sha256": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 1, + "dirs_count": 0, + "size_count": 29293, + "scan_errors": [] + }, + { + "path": "package/lib/async.js", + "type": "file", + "name": "async.js", + "base_name": "async", + "extension": ".js", + "size": 29293, + "date": "2013-05-28", + "sha1": "31ed74fe94a795e7beac5d904dd567196666a011", + "md5": "2ff89a2726f7745d1ad89393afe4f0ab", + "sha256": "df5a89ada496897b43a02ca698060bfd4f1d9852d5205348a65289d8aaacb85a", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": "JavaScript", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + } + ] +} \ No newline at end of file diff --git a/matchcode/tests/testfiles/models/exact-file-matching-standalone-test-results.json b/matchcode/tests/testfiles/models/exact-file-matching-standalone-test-results.json new file mode 100644 index 00000000..0e517fe9 --- /dev/null +++ b/matchcode/tests/testfiles/models/exact-file-matching-standalone-test-results.json @@ -0,0 +1,187 @@ +{ + "files": [ + { + "path": "test", + "type": "directory", + "name": "test", + "base_name": "test", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "matched_to": [], + "files_count": 4, + "dirs_count": 2, + "size_count": 1743469, + "scan_errors": [] + }, + { + "path": "test/c", + "type": "file", + "name": "c", + "base_name": "c", + "extension": "", + "size": 4, + "date": "2020-06-19", + "sha1": "a94a8fe5ccb19ba61c4c0873d391e987982fbbd3", + "md5": "098f6bcd4621d373cade4e832627b4f6", + "mime_type": "text/plain", + "file_type": "ASCII text, with no line terminators", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "matched_to": [ + "pkg:maven/test@0.01" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "test/a", + "type": "directory", + "name": "a", + "base_name": "a", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "matched_to": [], + "files_count": 2, + "dirs_count": 0, + "size_count": 1053674, + "scan_errors": [] + }, + { + "path": "test/a/acegi-security-0.51.jar", + "type": "file", + "name": "acegi-security-0.51.jar", + "base_name": "acegi-security-0.51", + "extension": ".jar", + "size": 176954, + "date": "2020-06-19", + "sha1": "ede156692b33872f5ee9465b7a06d6b2bc9e5e7f", + "md5": "19dad3908042b2bdc50cbfdaed7da200", + "mime_type": "application/zip", + "file_type": "Zip archive data, at least v1.0 to extract", + "programming_language": null, + "is_binary": true, + "is_text": false, + "is_archive": true, + "is_media": false, + "is_source": false, + "is_script": false, + "matched_to": [ + "pkg:maven/test@0.01" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "test/a/dojoz-0.4.1-1.jar", + "type": "file", + "name": "dojoz-0.4.1-1.jar", + "base_name": "dojoz-0.4.1-1", + "extension": ".jar", + "size": 876720, + "date": "2020-06-19", + "sha1": "ae9d68fd6a29906606c2d9407d1cc0749ef84588", + "md5": "508361a1c6273a4c2b8e4945618b509f", + "mime_type": "application/zip", + "file_type": "Zip archive data, at least v1.0 to extract", + "programming_language": null, + "is_binary": true, + "is_text": false, + "is_archive": true, + "is_media": false, + "is_source": false, + "is_script": false, + "matched_to": [ + "pkg:maven/test@0.01" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "test/b", + "type": "directory", + "name": "b", + "base_name": "b", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "matched_to": [], + "files_count": 1, + "dirs_count": 0, + "size_count": 689791, + "scan_errors": [] + }, + { + "path": "test/b/abbot-0.12.3.jar", + "type": "file", + "name": "abbot-0.12.3.jar", + "base_name": "abbot-0.12.3", + "extension": ".jar", + "size": 689791, + "date": "2020-05-27", + "sha1": "51d28a27d919ce8690a40f4f335b9d591ceb16e9", + "md5": "38206e62a54b0489fb6baa4db5a06093", + "mime_type": "application/zip", + "file_type": "Zip archive data, at least v1.0 to extract", + "programming_language": null, + "is_binary": true, + "is_text": false, + "is_archive": true, + "is_media": false, + "is_source": false, + "is_script": false, + "matched_to": [ + "pkg:maven/test@0.01" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + } + ] +} \ No newline at end of file diff --git a/matchcode/tests/testfiles/models/match-test-approximate-directory-content-results.json b/matchcode/tests/testfiles/models/match-test-approximate-directory-content-results.json new file mode 100644 index 00000000..4e1b2981 --- /dev/null +++ b/matchcode/tests/testfiles/models/match-test-approximate-directory-content-results.json @@ -0,0 +1,193 @@ +{ + "files": [ + { + "path": "test", + "type": "directory", + "name": "test", + "base_name": "test", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "matched_to": [ + "pkg:maven/test@0.01" + ], + "files_count": 4, + "dirs_count": 2, + "size_count": 1743469, + "scan_errors": [] + }, + { + "path": "test/c", + "type": "file", + "name": "c", + "base_name": "c", + "extension": "", + "size": 4, + "date": "2020-06-19", + "sha1": "a94a8fe5ccb19ba61c4c0873d391e987982fbbd3", + "md5": "098f6bcd4621d373cade4e832627b4f6", + "mime_type": "text/plain", + "file_type": "ASCII text, with no line terminators", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "matched_to": [ + "pkg:maven/test@0.01" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "test/a", + "type": "directory", + "name": "a", + "base_name": "a", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "matched_to": [ + "pkg:maven/test@0.01" + ], + "files_count": 2, + "dirs_count": 0, + "size_count": 1053674, + "scan_errors": [] + }, + { + "path": "test/a/acegi-security-0.51.jar", + "type": "file", + "name": "acegi-security-0.51.jar", + "base_name": "acegi-security-0.51", + "extension": ".jar", + "size": 176954, + "date": "2020-06-19", + "sha1": "ede156692b33872f5ee9465b7a06d6b2bc9e5e7f", + "md5": "19dad3908042b2bdc50cbfdaed7da200", + "mime_type": "application/zip", + "file_type": "Zip archive data, at least v1.0 to extract", + "programming_language": null, + "is_binary": true, + "is_text": false, + "is_archive": true, + "is_media": false, + "is_source": false, + "is_script": false, + "matched_to": [ + "pkg:maven/test@0.01" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "test/a/dojoz-0.4.1-1.jar", + "type": "file", + "name": "dojoz-0.4.1-1.jar", + "base_name": "dojoz-0.4.1-1", + "extension": ".jar", + "size": 876720, + "date": "2020-06-19", + "sha1": "ae9d68fd6a29906606c2d9407d1cc0749ef84588", + "md5": "508361a1c6273a4c2b8e4945618b509f", + "mime_type": "application/zip", + "file_type": "Zip archive data, at least v1.0 to extract", + "programming_language": null, + "is_binary": true, + "is_text": false, + "is_archive": true, + "is_media": false, + "is_source": false, + "is_script": false, + "matched_to": [ + "pkg:maven/test@0.01" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "test/b", + "type": "directory", + "name": "b", + "base_name": "b", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "matched_to": [ + "pkg:maven/test@0.01" + ], + "files_count": 1, + "dirs_count": 0, + "size_count": 689791, + "scan_errors": [] + }, + { + "path": "test/b/abbot-0.12.3.jar", + "type": "file", + "name": "abbot-0.12.3.jar", + "base_name": "abbot-0.12.3", + "extension": ".jar", + "size": 689791, + "date": "2020-05-27", + "sha1": "51d28a27d919ce8690a40f4f335b9d591ceb16e9", + "md5": "38206e62a54b0489fb6baa4db5a06093", + "mime_type": "application/zip", + "file_type": "Zip archive data, at least v1.0 to extract", + "programming_language": null, + "is_binary": true, + "is_text": false, + "is_archive": true, + "is_media": false, + "is_source": false, + "is_script": false, + "matched_to": [ + "pkg:maven/test@0.01" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + } + ] +} \ No newline at end of file diff --git a/matchcode/tests/testfiles/models/match-test-approximate-directory-structure-results.json b/matchcode/tests/testfiles/models/match-test-approximate-directory-structure-results.json new file mode 100644 index 00000000..4e1b2981 --- /dev/null +++ b/matchcode/tests/testfiles/models/match-test-approximate-directory-structure-results.json @@ -0,0 +1,193 @@ +{ + "files": [ + { + "path": "test", + "type": "directory", + "name": "test", + "base_name": "test", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "matched_to": [ + "pkg:maven/test@0.01" + ], + "files_count": 4, + "dirs_count": 2, + "size_count": 1743469, + "scan_errors": [] + }, + { + "path": "test/c", + "type": "file", + "name": "c", + "base_name": "c", + "extension": "", + "size": 4, + "date": "2020-06-19", + "sha1": "a94a8fe5ccb19ba61c4c0873d391e987982fbbd3", + "md5": "098f6bcd4621d373cade4e832627b4f6", + "mime_type": "text/plain", + "file_type": "ASCII text, with no line terminators", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "matched_to": [ + "pkg:maven/test@0.01" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "test/a", + "type": "directory", + "name": "a", + "base_name": "a", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "matched_to": [ + "pkg:maven/test@0.01" + ], + "files_count": 2, + "dirs_count": 0, + "size_count": 1053674, + "scan_errors": [] + }, + { + "path": "test/a/acegi-security-0.51.jar", + "type": "file", + "name": "acegi-security-0.51.jar", + "base_name": "acegi-security-0.51", + "extension": ".jar", + "size": 176954, + "date": "2020-06-19", + "sha1": "ede156692b33872f5ee9465b7a06d6b2bc9e5e7f", + "md5": "19dad3908042b2bdc50cbfdaed7da200", + "mime_type": "application/zip", + "file_type": "Zip archive data, at least v1.0 to extract", + "programming_language": null, + "is_binary": true, + "is_text": false, + "is_archive": true, + "is_media": false, + "is_source": false, + "is_script": false, + "matched_to": [ + "pkg:maven/test@0.01" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "test/a/dojoz-0.4.1-1.jar", + "type": "file", + "name": "dojoz-0.4.1-1.jar", + "base_name": "dojoz-0.4.1-1", + "extension": ".jar", + "size": 876720, + "date": "2020-06-19", + "sha1": "ae9d68fd6a29906606c2d9407d1cc0749ef84588", + "md5": "508361a1c6273a4c2b8e4945618b509f", + "mime_type": "application/zip", + "file_type": "Zip archive data, at least v1.0 to extract", + "programming_language": null, + "is_binary": true, + "is_text": false, + "is_archive": true, + "is_media": false, + "is_source": false, + "is_script": false, + "matched_to": [ + "pkg:maven/test@0.01" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "test/b", + "type": "directory", + "name": "b", + "base_name": "b", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "matched_to": [ + "pkg:maven/test@0.01" + ], + "files_count": 1, + "dirs_count": 0, + "size_count": 689791, + "scan_errors": [] + }, + { + "path": "test/b/abbot-0.12.3.jar", + "type": "file", + "name": "abbot-0.12.3.jar", + "base_name": "abbot-0.12.3", + "extension": ".jar", + "size": 689791, + "date": "2020-05-27", + "sha1": "51d28a27d919ce8690a40f4f335b9d591ceb16e9", + "md5": "38206e62a54b0489fb6baa4db5a06093", + "mime_type": "application/zip", + "file_type": "Zip archive data, at least v1.0 to extract", + "programming_language": null, + "is_binary": true, + "is_text": false, + "is_archive": true, + "is_media": false, + "is_source": false, + "is_script": false, + "matched_to": [ + "pkg:maven/test@0.01" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + } + ] +} \ No newline at end of file diff --git a/matchcode/tests/testfiles/models/match-test-exact-file-results.json b/matchcode/tests/testfiles/models/match-test-exact-file-results.json new file mode 100644 index 00000000..0e517fe9 --- /dev/null +++ b/matchcode/tests/testfiles/models/match-test-exact-file-results.json @@ -0,0 +1,187 @@ +{ + "files": [ + { + "path": "test", + "type": "directory", + "name": "test", + "base_name": "test", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "matched_to": [], + "files_count": 4, + "dirs_count": 2, + "size_count": 1743469, + "scan_errors": [] + }, + { + "path": "test/c", + "type": "file", + "name": "c", + "base_name": "c", + "extension": "", + "size": 4, + "date": "2020-06-19", + "sha1": "a94a8fe5ccb19ba61c4c0873d391e987982fbbd3", + "md5": "098f6bcd4621d373cade4e832627b4f6", + "mime_type": "text/plain", + "file_type": "ASCII text, with no line terminators", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "matched_to": [ + "pkg:maven/test@0.01" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "test/a", + "type": "directory", + "name": "a", + "base_name": "a", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "matched_to": [], + "files_count": 2, + "dirs_count": 0, + "size_count": 1053674, + "scan_errors": [] + }, + { + "path": "test/a/acegi-security-0.51.jar", + "type": "file", + "name": "acegi-security-0.51.jar", + "base_name": "acegi-security-0.51", + "extension": ".jar", + "size": 176954, + "date": "2020-06-19", + "sha1": "ede156692b33872f5ee9465b7a06d6b2bc9e5e7f", + "md5": "19dad3908042b2bdc50cbfdaed7da200", + "mime_type": "application/zip", + "file_type": "Zip archive data, at least v1.0 to extract", + "programming_language": null, + "is_binary": true, + "is_text": false, + "is_archive": true, + "is_media": false, + "is_source": false, + "is_script": false, + "matched_to": [ + "pkg:maven/test@0.01" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "test/a/dojoz-0.4.1-1.jar", + "type": "file", + "name": "dojoz-0.4.1-1.jar", + "base_name": "dojoz-0.4.1-1", + "extension": ".jar", + "size": 876720, + "date": "2020-06-19", + "sha1": "ae9d68fd6a29906606c2d9407d1cc0749ef84588", + "md5": "508361a1c6273a4c2b8e4945618b509f", + "mime_type": "application/zip", + "file_type": "Zip archive data, at least v1.0 to extract", + "programming_language": null, + "is_binary": true, + "is_text": false, + "is_archive": true, + "is_media": false, + "is_source": false, + "is_script": false, + "matched_to": [ + "pkg:maven/test@0.01" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "test/b", + "type": "directory", + "name": "b", + "base_name": "b", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "matched_to": [], + "files_count": 1, + "dirs_count": 0, + "size_count": 689791, + "scan_errors": [] + }, + { + "path": "test/b/abbot-0.12.3.jar", + "type": "file", + "name": "abbot-0.12.3.jar", + "base_name": "abbot-0.12.3", + "extension": ".jar", + "size": 689791, + "date": "2020-05-27", + "sha1": "51d28a27d919ce8690a40f4f335b9d591ceb16e9", + "md5": "38206e62a54b0489fb6baa4db5a06093", + "mime_type": "application/zip", + "file_type": "Zip archive data, at least v1.0 to extract", + "programming_language": null, + "is_binary": true, + "is_text": false, + "is_archive": true, + "is_media": false, + "is_source": false, + "is_script": false, + "matched_to": [ + "pkg:maven/test@0.01" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + } + ] +} \ No newline at end of file diff --git a/matchcode/tests/testfiles/models/match-test-exact-package-results.json b/matchcode/tests/testfiles/models/match-test-exact-package-results.json new file mode 100644 index 00000000..a78bf767 --- /dev/null +++ b/matchcode/tests/testfiles/models/match-test-exact-package-results.json @@ -0,0 +1,185 @@ +{ + "files": [ + { + "path": "test", + "type": "directory", + "name": "test", + "base_name": "test", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "matched_to": [], + "files_count": 4, + "dirs_count": 2, + "size_count": 1743469, + "scan_errors": [] + }, + { + "path": "test/c", + "type": "file", + "name": "c", + "base_name": "c", + "extension": "", + "size": 4, + "date": "2020-06-19", + "sha1": "a94a8fe5ccb19ba61c4c0873d391e987982fbbd3", + "md5": "098f6bcd4621d373cade4e832627b4f6", + "mime_type": "text/plain", + "file_type": "ASCII text, with no line terminators", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "matched_to": [], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "test/a", + "type": "directory", + "name": "a", + "base_name": "a", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "matched_to": [], + "files_count": 2, + "dirs_count": 0, + "size_count": 1053674, + "scan_errors": [] + }, + { + "path": "test/a/acegi-security-0.51.jar", + "type": "file", + "name": "acegi-security-0.51.jar", + "base_name": "acegi-security-0.51", + "extension": ".jar", + "size": 176954, + "date": "2020-06-19", + "sha1": "ede156692b33872f5ee9465b7a06d6b2bc9e5e7f", + "md5": "19dad3908042b2bdc50cbfdaed7da200", + "mime_type": "application/zip", + "file_type": "Zip archive data, at least v1.0 to extract", + "programming_language": null, + "is_binary": true, + "is_text": false, + "is_archive": true, + "is_media": false, + "is_source": false, + "is_script": false, + "matched_to": [ + "pkg:maven/acegi-security@0.51" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "test/a/dojoz-0.4.1-1.jar", + "type": "file", + "name": "dojoz-0.4.1-1.jar", + "base_name": "dojoz-0.4.1-1", + "extension": ".jar", + "size": 876720, + "date": "2020-06-19", + "sha1": "ae9d68fd6a29906606c2d9407d1cc0749ef84588", + "md5": "508361a1c6273a4c2b8e4945618b509f", + "mime_type": "application/zip", + "file_type": "Zip archive data, at least v1.0 to extract", + "programming_language": null, + "is_binary": true, + "is_text": false, + "is_archive": true, + "is_media": false, + "is_source": false, + "is_script": false, + "matched_to": [ + "pkg:maven/dojoz@0.4.1-1" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "test/b", + "type": "directory", + "name": "b", + "base_name": "b", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "matched_to": [], + "files_count": 1, + "dirs_count": 0, + "size_count": 689791, + "scan_errors": [] + }, + { + "path": "test/b/abbot-0.12.3.jar", + "type": "file", + "name": "abbot-0.12.3.jar", + "base_name": "abbot-0.12.3", + "extension": ".jar", + "size": 689791, + "date": "2020-05-27", + "sha1": "51d28a27d919ce8690a40f4f335b9d591ceb16e9", + "md5": "38206e62a54b0489fb6baa4db5a06093", + "mime_type": "application/zip", + "file_type": "Zip archive data, at least v1.0 to extract", + "programming_language": null, + "is_binary": true, + "is_text": false, + "is_archive": true, + "is_media": false, + "is_source": false, + "is_script": false, + "matched_to": [ + "pkg:maven/abbot@0.12.3" + ], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + } + ] +} \ No newline at end of file diff --git a/matchcode/tests/testfiles/models/match-test.json b/matchcode/tests/testfiles/models/match-test.json new file mode 100644 index 00000000..ef8fa09b --- /dev/null +++ b/matchcode/tests/testfiles/models/match-test.json @@ -0,0 +1,194 @@ +{ + "headers": [ + { + "tool_name": "scancode-toolkit", + "tool_version": "3.1.1.post554.72bef69a3", + "options": { + "input": [ + "/home/jono/Desktop/test" + ], + "--info": true, + "--json-pp": "/home/jono/Desktop/match-test.json" + }, + "notice": "Generated with ScanCode and provided on an \"AS IS\" BASIS, WITHOUT WARRANTIES\nOR CONDITIONS OF ANY KIND, either express or implied. No content created from\nScanCode should be considered or used as legal advice. Consult an Attorney\nfor any legal advice.\nScanCode is a free software code scanning tool from nexB Inc. and others.\nVisit https://github.com/nexB/scancode-toolkit/ for support and download.", + "start_timestamp": "2020-06-19T230151.719124", + "end_timestamp": "2020-06-19T230151.855330", + "duration": 0.13622713088989258, + "message": null, + "errors": [], + "extra_data": { + "files_count": 4 + } + } + ], + "files": [ + { + "path": "test", + "type": "directory", + "name": "test", + "base_name": "test", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 4, + "dirs_count": 2, + "size_count": 1743469, + "scan_errors": [] + }, + { + "path": "test/c", + "type": "file", + "name": "c", + "base_name": "c", + "extension": "", + "size": 4, + "date": "2020-06-19", + "sha1": "a94a8fe5ccb19ba61c4c0873d391e987982fbbd3", + "md5": "098f6bcd4621d373cade4e832627b4f6", + "mime_type": "text/plain", + "file_type": "ASCII text, with no line terminators", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "test/a", + "type": "directory", + "name": "a", + "base_name": "a", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 2, + "dirs_count": 0, + "size_count": 1053674, + "scan_errors": [] + }, + { + "path": "test/a/acegi-security-0.51.jar", + "type": "file", + "name": "acegi-security-0.51.jar", + "base_name": "acegi-security-0.51", + "extension": ".jar", + "size": 176954, + "date": "2020-06-19", + "sha1": "ede156692b33872f5ee9465b7a06d6b2bc9e5e7f", + "md5": "19dad3908042b2bdc50cbfdaed7da200", + "mime_type": "application/zip", + "file_type": "Zip archive data, at least v1.0 to extract", + "programming_language": null, + "is_binary": true, + "is_text": false, + "is_archive": true, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "test/a/dojoz-0.4.1-1.jar", + "type": "file", + "name": "dojoz-0.4.1-1.jar", + "base_name": "dojoz-0.4.1-1", + "extension": ".jar", + "size": 876720, + "date": "2020-06-19", + "sha1": "ae9d68fd6a29906606c2d9407d1cc0749ef84588", + "md5": "508361a1c6273a4c2b8e4945618b509f", + "mime_type": "application/zip", + "file_type": "Zip archive data, at least v1.0 to extract", + "programming_language": null, + "is_binary": true, + "is_text": false, + "is_archive": true, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "test/b", + "type": "directory", + "name": "b", + "base_name": "b", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 1, + "dirs_count": 0, + "size_count": 689791, + "scan_errors": [] + }, + { + "path": "test/b/abbot-0.12.3.jar", + "type": "file", + "name": "abbot-0.12.3.jar", + "base_name": "abbot-0.12.3", + "extension": ".jar", + "size": 689791, + "date": "2020-05-27", + "sha1": "51d28a27d919ce8690a40f4f335b9d591ceb16e9", + "md5": "38206e62a54b0489fb6baa4db5a06093", + "mime_type": "application/zip", + "file_type": "Zip archive data, at least v1.0 to extract", + "programming_language": null, + "is_binary": true, + "is_text": false, + "is_archive": true, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + } + ] +} diff --git a/matchcode/utils.py b/matchcode/utils.py new file mode 100644 index 00000000..e2ddd278 --- /dev/null +++ b/matchcode/utils.py @@ -0,0 +1,148 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# purldb is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/purldb for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +from unittest import TestCase + +import json +import ntpath +import os +import posixpath + +from django.test import TestCase as DjangoTestCase + +from commoncode.resource import VirtualCodebase + +from matchcode_toolkit.fingerprinting import hexstring_to_binarray + + +############## TEST UTILITIES ############## +""" +The conventions used for the tests are: +- for tests that require files these are stored in the testfiles directory +- each test must use its own sub directory in testfiles. The is called the +'base' +- testfiles that are more than a few KB should be in a bzip2 tarball +""" + + +class BaseTestCase(TestCase): + BASE_DIR = os.path.join(os.path.dirname(__file__), 'testfiles') + + @classmethod + def get_test_loc(cls, path): + """ + Given a path relative to the test files directory, return the location + to a test file or directory for this path. No copy is done. + """ + path = to_os_native_path(path) + location = os.path.abspath(os.path.join(cls.BASE_DIR, path)) + return location + + +class CodebaseTester(object): + def check_codebase(self, codebase, expected_codebase_json_loc, + regen=False, remove_file_date=True): + """ + Check the Resources of the `codebase` Codebase objects are the same + as the data in the `expected_codebase_json_loc` JSON file location, + + If `regen` is True the expected_file WILL BE overwritten with the `codebase` + data. This is convenient for updating tests expectations. But use with + caution. + + if `remove_file_date` is True, the file.date attribute is removed. + """ + + def serializer(r): + rd = r.to_dict(with_info=True) + if remove_file_date: + rd.pop('file_data', None) + return rd + + results = list(map(serializer, codebase.walk(topdown=True))) + if regen: + with open(expected_codebase_json_loc, 'w') as reg: + json.dump(dict(files=results), reg, indent=2, separators=(',', ': ')) + + expected_vc = VirtualCodebase(location=expected_codebase_json_loc) + expected = list(map(serializer, expected_vc.walk(topdown=True))) + + # NOTE we redump the JSON as a string for a more efficient display of the + # failures comparison/diff + expected = json.dumps(expected, indent=2, separators=(',', ': ')) + results = json.dumps(results, indent=2, separators=(',', ': ')) + self.assertEqual(expected, results) + + +class MatchcodeTestCase(CodebaseTester, BaseTestCase, DjangoTestCase): + databases = '__all__' + + +def to_os_native_path(path): + """ + Normalize a path to use the native OS path separator. + """ + path = path.replace(posixpath.sep, os.path.sep) + path = path.replace(ntpath.sep, os.path.sep) + path = path.rstrip(os.path.sep) + return path + + +def load_resources_from_scan(scan_location, package): + from packagedb.models import Resource + vc = VirtualCodebase( + location=scan_location, + ) + for resource in vc.walk(topdown=True): + created_resource, _ = Resource.objects.get_or_create( + package=package, + path=resource.path, + size=resource.size, + sha1=resource.sha1, + md5=resource.md5, + is_file=resource.type == 'file' + ) + + +def index_packages_sha1(): + """ + Reindex all the packages for exact sha1 matching. + """ + from matchcode.models import ExactPackageArchiveIndex + from packagedb.models import Package + + for package in Package.objects.filter(sha1__isnull=False): + sha1_in_bin = hexstring_to_binarray(package.sha1) + _ = ExactPackageArchiveIndex.objects.create( + package=package, + sha1=sha1_in_bin + ) + + +def index_package_files_sha1(package, scan_location): + """ + Index for SHA1 the package files found in the JSON scan at scan_location + """ + from matchcode.models import ExactFileIndex + + resource_attributes = dict() + vc = VirtualCodebase( + location=scan_location, + resource_attributes=resource_attributes + ) + + for resource in vc.walk(topdown=True): + sha1 = resource.sha1 + if not sha1: + continue + sha1_in_bin = hexstring_to_binarray(sha1) + package_file, created = ExactFileIndex.objects.get_or_create( + sha1=sha1_in_bin, + package=package, + ) diff --git a/minecode/.gitignore b/minecode/.gitignore deleted file mode 100644 index 339dca50..00000000 --- a/minecode/.gitignore +++ /dev/null @@ -1,73 +0,0 @@ -# Python compiled files -*.py[cod] - -# virtualenv and other misc bits -*.egg-info -/dist -/build -/bin -/lib -/scripts -/Scripts -/Lib -/pip-selfcheck.json -/tmp -/venv -.Python -/include -/Include -/local -*/local/* -/local/ -/share/ -/tcl/ -/.eggs/ - -# Installer logs -pip-log.txt - -# Unit test / coverage reports -.cache -.coverage -.coverage.* -nosetests.xml -htmlcov - -# Translations -*.mo - -# IDEs -.project -.pydevproject -.idea -org.eclipse.core.resources.prefs -.vscode -.vs - -# Sphinx -docs/_build -docs/bin -docs/build -docs/include -docs/Lib -doc/pyvenv.cfg -pyvenv.cfg - -# Various junk and temp files -.DS_Store -*~ -.*.sw[po] -.build -.ve -*.bak -/.cache/ - -# pyenv -/.python-version -/man/ -/.pytest_cache/ -lib64 -tcl - -# Ignore Jupyter Notebook related temp files -.ipynb_checkpoints/ diff --git a/minecode/CHANGELOG.rst b/minecode/CHANGELOG.rst deleted file mode 100644 index 7c378e5d..00000000 --- a/minecode/CHANGELOG.rst +++ /dev/null @@ -1,10 +0,0 @@ - -Release notes -============= - - - -Version v1.0.0 ----------------- - -Initial release \ No newline at end of file diff --git a/minecode/NOTICE b/minecode/NOTICE deleted file mode 100644 index dab28052..00000000 --- a/minecode/NOTICE +++ /dev/null @@ -1,12 +0,0 @@ -# -# Copyright (c) nexB Inc. and others. All rights reserved. -# purldb is a trademark of nexB Inc. -# SPDX-License-Identifier: Apache-2.0 AND CC-BY-SA-4.0 -# purldb software is licensed under the Apache License version 2.0. -# purldb data is licensed collectively under CC-BY-SA-4.0. -# See https://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://creativecommons.org/licenses/by-sa/4.0/legalcode for the license text. -# -# See https://github.com/nexB/purldb for support or download. -# See https://aboutcode.org for more information about nexB OSS projects. -# diff --git a/minecode/README.rst b/minecode/README.rst deleted file mode 100644 index 917c93a0..00000000 --- a/minecode/README.rst +++ /dev/null @@ -1,12 +0,0 @@ -minecode -========= - -MineCode is a Django web app that visits upstream package repositories and saves the package metadata to the PackageDB. - -Local setup -=========== - -* ./configure -* make postgres -* make envfile -* make run diff --git a/minecode/src/discovery/__init__.py b/minecode/__init__.py similarity index 88% rename from minecode/src/discovery/__init__.py rename to minecode/__init__.py index 9fbdcebb..8b9f47ea 100644 --- a/minecode/src/discovery/__init__.py +++ b/minecode/__init__.py @@ -10,10 +10,10 @@ import sys -from discovery import route +from minecode import route -default_app_config = 'discovery.apps.DiscoveryConfig' +default_app_config = 'minecode.apps.MinecodeConfig' sys_platform = str(sys.platform).lower() diff --git a/minecode/src/discovery/api.py b/minecode/api.py similarity index 94% rename from minecode/src/discovery/api.py rename to minecode/api.py index 1756a940..c6f2e92c 100644 --- a/minecode/src/discovery/api.py +++ b/minecode/api.py @@ -11,7 +11,7 @@ from rest_framework import serializers from rest_framework import viewsets -from discovery.models import ResourceURI +from minecode.models import ResourceURI class ResourceURISerializer(serializers.ModelSerializer): diff --git a/minecode/src/discovery/apps.py b/minecode/apps.py similarity index 82% rename from minecode/src/discovery/apps.py rename to minecode/apps.py index 8d2cfe29..478d1bee 100644 --- a/minecode/src/discovery/apps.py +++ b/minecode/apps.py @@ -12,6 +12,6 @@ from django.utils.translation import gettext_lazy as _ -class DiscoveryConfig(AppConfig): - name = 'discovery' - verbose_name = _('Discovery') +class MinecodeConfig(AppConfig): + name = 'minecode' + verbose_name = _('Minecode') diff --git a/minecode/src/discovery/bsd-new.LICENSE b/minecode/bsd-new.LICENSE similarity index 100% rename from minecode/src/discovery/bsd-new.LICENSE rename to minecode/bsd-new.LICENSE diff --git a/minecode/cc-by-sa-4.0.LICENSE b/minecode/cc-by-sa-4.0.LICENSE deleted file mode 100644 index e04b480f..00000000 --- a/minecode/cc-by-sa-4.0.LICENSE +++ /dev/null @@ -1,427 +0,0 @@ -Attribution-ShareAlike 4.0 International - -======================================================================= - -Creative Commons Corporation ("Creative Commons") is not a law firm and -does not provide legal services or legal advice. Distribution of -Creative Commons public licenses does not create a lawyer-client or -other relationship. Creative Commons makes its licenses and related -information available on an "as-is" basis. Creative Commons gives no -warranties regarding its licenses, any material licensed under their -terms and conditions, or any related information. Creative Commons -disclaims all liability for damages resulting from their use to the -fullest extent possible. - -Using Creative Commons Public Licenses - -Creative Commons public licenses provide a standard set of terms and -conditions that creators and other rights holders may use to share -original works of authorship and other material subject to copyright -and certain other rights specified in the public license below. The -following considerations are for informational purposes only, are not -exhaustive, and do not form part of our licenses. - - Considerations for licensors: Our public licenses are - intended for use by those authorized to give the public - permission to use material in ways otherwise restricted by - copyright and certain other rights. Our licenses are - irrevocable. Licensors should read and understand the terms - and conditions of the license they choose before applying it. - Licensors should also secure all rights necessary before - applying our licenses so that the public can reuse the - material as expected. Licensors should clearly mark any - material not subject to the license. This includes other CC- - licensed material, or material used under an exception or - limitation to copyright. More considerations for licensors: - wiki.creativecommons.org/Considerations_for_licensors - - Considerations for the public: By using one of our public - licenses, a licensor grants the public permission to use the - licensed material under specified terms and conditions. If - the licensor's permission is not necessary for any reason--for - example, because of any applicable exception or limitation to - copyright--then that use is not regulated by the license. Our - licenses grant only permissions under copyright and certain - other rights that a licensor has authority to grant. Use of - the licensed material may still be restricted for other - reasons, including because others have copyright or other - rights in the material. A licensor may make special requests, - such as asking that all changes be marked or described. - Although not required by our licenses, you are encouraged to - respect those requests where reasonable. More considerations - for the public: - wiki.creativecommons.org/Considerations_for_licensees - -======================================================================= - -Creative Commons Attribution-ShareAlike 4.0 International Public -License - -By exercising the Licensed Rights (defined below), You accept and agree -to be bound by the terms and conditions of this Creative Commons -Attribution-ShareAlike 4.0 International Public License ("Public -License"). To the extent this Public License may be interpreted as a -contract, You are granted the Licensed Rights in consideration of Your -acceptance of these terms and conditions, and the Licensor grants You -such rights in consideration of benefits the Licensor receives from -making the Licensed Material available under these terms and -conditions. - - -Section 1 -- Definitions. - - a. Adapted Material means material subject to Copyright and Similar - Rights that is derived from or based upon the Licensed Material - and in which the Licensed Material is translated, altered, - arranged, transformed, or otherwise modified in a manner requiring - permission under the Copyright and Similar Rights held by the - Licensor. For purposes of this Public License, where the Licensed - Material is a musical work, performance, or sound recording, - Adapted Material is always produced where the Licensed Material is - synched in timed relation with a moving image. - - b. Adapter's License means the license You apply to Your Copyright - and Similar Rights in Your contributions to Adapted Material in - accordance with the terms and conditions of this Public License. - - c. BY-SA Compatible License means a license listed at - creativecommons.org/compatiblelicenses, approved by Creative - Commons as essentially the equivalent of this Public License. - - d. Copyright and Similar Rights means copyright and/or similar rights - closely related to copyright including, without limitation, - performance, broadcast, sound recording, and Sui Generis Database - Rights, without regard to how the rights are labeled or - categorized. For purposes of this Public License, the rights - specified in Section 2(b)(1)-(2) are not Copyright and Similar - Rights. - - e. Effective Technological Measures means those measures that, in the - absence of proper authority, may not be circumvented under laws - fulfilling obligations under Article 11 of the WIPO Copyright - Treaty adopted on December 20, 1996, and/or similar international - agreements. - - f. Exceptions and Limitations means fair use, fair dealing, and/or - any other exception or limitation to Copyright and Similar Rights - that applies to Your use of the Licensed Material. - - g. License Elements means the license attributes listed in the name - of a Creative Commons Public License. The License Elements of this - Public License are Attribution and ShareAlike. - - h. Licensed Material means the artistic or literary work, database, - or other material to which the Licensor applied this Public - License. - - i. Licensed Rights means the rights granted to You subject to the - terms and conditions of this Public License, which are limited to - all Copyright and Similar Rights that apply to Your use of the - Licensed Material and that the Licensor has authority to license. - - j. Licensor means the individual(s) or entity(ies) granting rights - under this Public License. - - k. Share means to provide material to the public by any means or - process that requires permission under the Licensed Rights, such - as reproduction, public display, public performance, distribution, - dissemination, communication, or importation, and to make material - available to the public including in ways that members of the - public may access the material from a place and at a time - individually chosen by them. - - l. Sui Generis Database Rights means rights other than copyright - resulting from Directive 96/9/EC of the European Parliament and of - the Council of 11 March 1996 on the legal protection of databases, - as amended and/or succeeded, as well as other essentially - equivalent rights anywhere in the world. - - m. You means the individual or entity exercising the Licensed Rights - under this Public License. Your has a corresponding meaning. - - -Section 2 -- Scope. - - a. License grant. - - 1. Subject to the terms and conditions of this Public License, - the Licensor hereby grants You a worldwide, royalty-free, - non-sublicensable, non-exclusive, irrevocable license to - exercise the Licensed Rights in the Licensed Material to: - - a. reproduce and Share the Licensed Material, in whole or - in part; and - - b. produce, reproduce, and Share Adapted Material. - - 2. Exceptions and Limitations. For the avoidance of doubt, where - Exceptions and Limitations apply to Your use, this Public - License does not apply, and You do not need to comply with - its terms and conditions. - - 3. Term. The term of this Public License is specified in Section - 6(a). - - 4. Media and formats; technical modifications allowed. The - Licensor authorizes You to exercise the Licensed Rights in - all media and formats whether now known or hereafter created, - and to make technical modifications necessary to do so. The - Licensor waives and/or agrees not to assert any right or - authority to forbid You from making technical modifications - necessary to exercise the Licensed Rights, including - technical modifications necessary to circumvent Effective - Technological Measures. For purposes of this Public License, - simply making modifications authorized by this Section 2(a) - (4) never produces Adapted Material. - - 5. Downstream recipients. - - a. Offer from the Licensor -- Licensed Material. Every - recipient of the Licensed Material automatically - receives an offer from the Licensor to exercise the - Licensed Rights under the terms and conditions of this - Public License. - - b. Additional offer from the Licensor -- Adapted Material. - Every recipient of Adapted Material from You - automatically receives an offer from the Licensor to - exercise the Licensed Rights in the Adapted Material - under the conditions of the Adapter's License You apply. - - c. No downstream restrictions. You may not offer or impose - any additional or different terms or conditions on, or - apply any Effective Technological Measures to, the - Licensed Material if doing so restricts exercise of the - Licensed Rights by any recipient of the Licensed - Material. - - 6. No endorsement. Nothing in this Public License constitutes or - may be construed as permission to assert or imply that You - are, or that Your use of the Licensed Material is, connected - with, or sponsored, endorsed, or granted official status by, - the Licensor or others designated to receive attribution as - provided in Section 3(a)(1)(A)(i). - - b. Other rights. - - 1. Moral rights, such as the right of integrity, are not - licensed under this Public License, nor are publicity, - privacy, and/or other similar personality rights; however, to - the extent possible, the Licensor waives and/or agrees not to - assert any such rights held by the Licensor to the limited - extent necessary to allow You to exercise the Licensed - Rights, but not otherwise. - - 2. Patent and trademark rights are not licensed under this - Public License. - - 3. To the extent possible, the Licensor waives any right to - collect royalties from You for the exercise of the Licensed - Rights, whether directly or through a collecting society - under any voluntary or waivable statutory or compulsory - licensing scheme. In all other cases the Licensor expressly - reserves any right to collect such royalties. - - -Section 3 -- License Conditions. - -Your exercise of the Licensed Rights is expressly made subject to the -following conditions. - - a. Attribution. - - 1. If You Share the Licensed Material (including in modified - form), You must: - - a. retain the following if it is supplied by the Licensor - with the Licensed Material: - - i. identification of the creator(s) of the Licensed - Material and any others designated to receive - attribution, in any reasonable manner requested by - the Licensor (including by pseudonym if - designated); - - ii. a copyright notice; - - iii. a notice that refers to this Public License; - - iv. a notice that refers to the disclaimer of - warranties; - - v. a URI or hyperlink to the Licensed Material to the - extent reasonably practicable; - - b. indicate if You modified the Licensed Material and - retain an indication of any previous modifications; and - - c. indicate the Licensed Material is licensed under this - Public License, and include the text of, or the URI or - hyperlink to, this Public License. - - 2. You may satisfy the conditions in Section 3(a)(1) in any - reasonable manner based on the medium, means, and context in - which You Share the Licensed Material. For example, it may be - reasonable to satisfy the conditions by providing a URI or - hyperlink to a resource that includes the required - information. - - 3. If requested by the Licensor, You must remove any of the - information required by Section 3(a)(1)(A) to the extent - reasonably practicable. - - b. ShareAlike. - - In addition to the conditions in Section 3(a), if You Share - Adapted Material You produce, the following conditions also apply. - - 1. The Adapter's License You apply must be a Creative Commons - license with the same License Elements, this version or - later, or a BY-SA Compatible License. - - 2. You must include the text of, or the URI or hyperlink to, the - Adapter's License You apply. You may satisfy this condition - in any reasonable manner based on the medium, means, and - context in which You Share Adapted Material. - - 3. You may not offer or impose any additional or different terms - or conditions on, or apply any Effective Technological - Measures to, Adapted Material that restrict exercise of the - rights granted under the Adapter's License You apply. - - -Section 4 -- Sui Generis Database Rights. - -Where the Licensed Rights include Sui Generis Database Rights that -apply to Your use of the Licensed Material: - - a. for the avoidance of doubt, Section 2(a)(1) grants You the right - to extract, reuse, reproduce, and Share all or a substantial - portion of the contents of the database; - - b. if You include all or a substantial portion of the database - contents in a database in which You have Sui Generis Database - Rights, then the database in which You have Sui Generis Database - Rights (but not its individual contents) is Adapted Material, - - including for purposes of Section 3(b); and - c. You must comply with the conditions in Section 3(a) if You Share - all or a substantial portion of the contents of the database. - -For the avoidance of doubt, this Section 4 supplements and does not -replace Your obligations under this Public License where the Licensed -Rights include other Copyright and Similar Rights. - - -Section 5 -- Disclaimer of Warranties and Limitation of Liability. - - a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE - EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS - AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF - ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS, - IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION, - WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR - PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS, - ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT - KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT - ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU. - - b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE - TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION, - NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT, - INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES, - COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR - USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN - ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR - DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR - IN PART, THIS LIMITATION MAY NOT APPLY TO YOU. - - c. The disclaimer of warranties and limitation of liability provided - above shall be interpreted in a manner that, to the extent - possible, most closely approximates an absolute disclaimer and - waiver of all liability. - - -Section 6 -- Term and Termination. - - a. This Public License applies for the term of the Copyright and - Similar Rights licensed here. However, if You fail to comply with - this Public License, then Your rights under this Public License - terminate automatically. - - b. Where Your right to use the Licensed Material has terminated under - Section 6(a), it reinstates: - - 1. automatically as of the date the violation is cured, provided - it is cured within 30 days of Your discovery of the - violation; or - - 2. upon express reinstatement by the Licensor. - - For the avoidance of doubt, this Section 6(b) does not affect any - right the Licensor may have to seek remedies for Your violations - of this Public License. - - c. For the avoidance of doubt, the Licensor may also offer the - Licensed Material under separate terms or conditions or stop - distributing the Licensed Material at any time; however, doing so - will not terminate this Public License. - - d. Sections 1, 5, 6, 7, and 8 survive termination of this Public - License. - - -Section 7 -- Other Terms and Conditions. - - a. The Licensor shall not be bound by any additional or different - terms or conditions communicated by You unless expressly agreed. - - b. Any arrangements, understandings, or agreements regarding the - Licensed Material not stated herein are separate from and - independent of the terms and conditions of this Public License. - - -Section 8 -- Interpretation. - - a. For the avoidance of doubt, this Public License does not, and - shall not be interpreted to, reduce, limit, restrict, or impose - conditions on any use of the Licensed Material that could lawfully - be made without permission under this Public License. - - b. To the extent possible, if any provision of this Public License is - deemed unenforceable, it shall be automatically reformed to the - minimum extent necessary to make it enforceable. If the provision - cannot be reformed, it shall be severed from this Public License - without affecting the enforceability of the remaining terms and - conditions. - - c. No term or condition of this Public License will be waived and no - failure to comply consented to unless expressly agreed to by the - Licensor. - - d. Nothing in this Public License constitutes or may be interpreted - as a limitation upon, or waiver of, any privileges and immunities - that apply to the Licensor or You, including from the legal - processes of any jurisdiction or authority. - - -======================================================================= - -Creative Commons is not a party to its public -licenses. Notwithstanding, Creative Commons may elect to apply one of -its public licenses to material it publishes and in those instances -will be considered the “Licensor.” The text of the Creative Commons -public licenses is dedicated to the public domain under the CC0 Public -Domain Dedication. Except for the limited purpose of indicating that -material is shared under a Creative Commons public license or as -otherwise permitted by the Creative Commons policies published at -creativecommons.org/policies, Creative Commons does not authorize the -use of the trademark "Creative Commons" or any other trademark or logo -of Creative Commons without its prior written consent including, -without limitation, in connection with any unauthorized modifications -to any of its public licenses or any other arrangements, -understandings, or agreements concerning use of licensed material. For -the avoidance of doubt, this paragraph does not form part of the -public licenses. - -Creative Commons may be contacted at creativecommons.org. diff --git a/minecode/src/discovery/command.py b/minecode/command.py similarity index 98% rename from minecode/src/discovery/command.py rename to minecode/command.py index 1f4a6d6c..bf924e44 100644 --- a/minecode/src/discovery/command.py +++ b/minecode/command.py @@ -13,7 +13,7 @@ import signal import subprocess -from discovery import ON_WINDOWS +from minecode import ON_WINDOWS logger = logging.getLogger(__name__) diff --git a/minecode/configure b/minecode/configure deleted file mode 100755 index b524a863..00000000 --- a/minecode/configure +++ /dev/null @@ -1,202 +0,0 @@ -#!/usr/bin/env bash -# -# Copyright (c) nexB Inc. and others. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/ for support or download. -# See https://aboutcode.org for more information about nexB OSS projects. -# - -set -e -#set -x - -################################ -# A configuration script to set things up: -# create a virtualenv and install or update thirdparty packages. -# Source this script for initial configuration -# Use configure --help for details -# -# NOTE: please keep in sync with Windows script configure.bat -# -# This script will search for a virtualenv.pyz app in etc/thirdparty/virtualenv.pyz -# Otherwise it will download the latest from the VIRTUALENV_PYZ_URL default -################################ -CLI_ARGS=$1 - -################################ -# Defaults. Change these variables to customize this script -################################ - -# Requirement arguments passed to pip and used by default or with --dev. -REQUIREMENTS="--editable ../packagedb --editable . --constraint requirements.txt https://github.com/nexB/scancode-toolkit/archive/817161527c864aff937a235856ba9f4d40445c8a.zip" -DEV_REQUIREMENTS="--editable ../packagedb[testing] --editable .[testing] --constraint requirements.txt --constraint requirements-dev.txt https://github.com/nexB/scancode-toolkit/archive/817161527c864aff937a235856ba9f4d40445c8a.zip" -DOCS_REQUIREMENTS="--editable ../packagedb[docs] --editable .[docs] --constraint requirements.txt https://github.com/nexB/scancode-toolkit/archive/817161527c864aff937a235856ba9f4d40445c8a.zip" - -# where we create a virtualenv -VIRTUALENV_DIR=venv - -# Cleanable files and directories to delete with the --clean option -CLEANABLE="build venv" - -# extra arguments passed to pip -PIP_EXTRA_ARGS=" " - -# the URL to download virtualenv.pyz if needed -VIRTUALENV_PYZ_URL=https://bootstrap.pypa.io/virtualenv.pyz -################################ - - -################################ -# Current directory where this script lives -CFG_ROOT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" -CFG_BIN_DIR=$CFG_ROOT_DIR/$VIRTUALENV_DIR/bin - - -################################ -# Install with or without and index. With "--no-index" this is using only local wheels -# This is an offline mode with no index and no network operations -# NO_INDEX="--no-index " -NO_INDEX="" - - -################################ -# Thirdparty package locations and index handling -# Find packages from the local thirdparty directory if present -THIRDPARDIR=$CFG_ROOT_DIR/thirdparty -if [[ "$(echo $THIRDPARDIR/*.whl)x" != "$THIRDPARDIR/*.whlx" ]]; then - PIP_EXTRA_ARGS="$NO_INDEX --find-links $THIRDPARDIR" -fi - - -################################ -# Set the quiet flag to empty if not defined -if [[ "$CFG_QUIET" == "" ]]; then - CFG_QUIET=" " -fi - - -################################ -# Find a proper Python to run -# Use environment variables or a file if available. -# Otherwise the latest Python by default. -find_python() { - if [[ "$PYTHON_EXECUTABLE" == "" ]]; then - # check for a file named PYTHON_EXECUTABLE - if [ -f "$CFG_ROOT_DIR/PYTHON_EXECUTABLE" ]; then - PYTHON_EXECUTABLE=$(cat "$CFG_ROOT_DIR/PYTHON_EXECUTABLE") - else - PYTHON_EXECUTABLE=python3 - fi - fi -} - - -################################ -create_virtualenv() { - # create a virtualenv for Python - # Note: we do not use the bundled Python 3 "venv" because its behavior and - # presence is not consistent across Linux distro and sometimes pip is not - # included either by default. The virtualenv.pyz app cures all these issues. - - VENV_DIR="$1" - if [ ! -f "$CFG_BIN_DIR/python" ]; then - - mkdir -p "$CFG_ROOT_DIR/$VENV_DIR" - - if [ -f "$CFG_ROOT_DIR/etc/thirdparty/virtualenv.pyz" ]; then - VIRTUALENV_PYZ="$CFG_ROOT_DIR/etc/thirdparty/virtualenv.pyz" - else - VIRTUALENV_PYZ="$CFG_ROOT_DIR/$VENV_DIR/virtualenv.pyz" - wget -O "$VIRTUALENV_PYZ" "$VIRTUALENV_PYZ_URL" 2>/dev/null || curl -o "$VIRTUALENV_PYZ" "$VIRTUALENV_PYZ_URL" - fi - - $PYTHON_EXECUTABLE "$VIRTUALENV_PYZ" \ - --wheel embed --pip embed --setuptools embed \ - --seeder pip \ - --never-download \ - --no-periodic-update \ - --no-vcs-ignore \ - $CFG_QUIET \ - "$CFG_ROOT_DIR/$VENV_DIR" - fi -} - - -################################ -install_packages() { - # install requirements in virtualenv - # note: --no-build-isolation means that pip/wheel/setuptools will not - # be reinstalled a second time and reused from the virtualenv and this - # speeds up the installation. - # We always have the PEP517 build dependencies installed already. - - "$CFG_BIN_DIR/pip" install \ - --upgrade \ - --no-build-isolation \ - $CFG_QUIET \ - $PIP_EXTRA_ARGS \ - $1 -} - - -################################ -cli_help() { - echo An initial configuration script - echo " usage: ./configure [options]" - echo - echo The default is to configure for regular use. Use --dev for development. - echo - echo The options are: - echo " --clean: clean built and installed files and exit." - echo " --dev: configure the environment for development." - echo " --help: display this help message and exit." - echo - echo By default, the python interpreter version found in the path is used. - echo Alternatively, the PYTHON_EXECUTABLE environment variable can be set to - echo configure another Python executable interpreter to use. If this is not - echo set, a file named PYTHON_EXECUTABLE containing a single line with the - echo path of the Python executable to use will be checked last. - set +e - exit -} - - -################################ -clean() { - # Remove cleanable file and directories and files from the root dir. - echo "* Cleaning ..." - for cln in $CLEANABLE; - do rm -rf "${CFG_ROOT_DIR:?}/${cln:?}"; - done - set +e - exit -} - - -################################ -# Main command line entry point -CFG_REQUIREMENTS=$REQUIREMENTS - -# We are using getopts to parse option arguments that start with "-" -while getopts :-: optchar; do - case "${optchar}" in - -) - case "${OPTARG}" in - help ) cli_help;; - clean ) find_python && clean;; - dev ) CFG_REQUIREMENTS="$DEV_REQUIREMENTS";; - docs ) CFG_REQUIREMENTS="$DOCS_REQUIREMENTS";; - esac;; - esac -done - - -PIP_EXTRA_ARGS="$PIP_EXTRA_ARGS" - -find_python -create_virtualenv "$VIRTUALENV_DIR" -install_packages "$CFG_REQUIREMENTS" -. "$CFG_BIN_DIR/activate" - - -set +e diff --git a/minecode/configure.bat b/minecode/configure.bat deleted file mode 100644 index 41547cc5..00000000 --- a/minecode/configure.bat +++ /dev/null @@ -1,207 +0,0 @@ -@echo OFF -@setlocal - -@rem Copyright (c) nexB Inc. and others. All rights reserved. -@rem SPDX-License-Identifier: Apache-2.0 -@rem See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -@rem See https://github.com/nexB/ for support or download. -@rem See https://aboutcode.org for more information about nexB OSS projects. - - -@rem ################################ -@rem # A configuration script to set things up: -@rem # create a virtualenv and install or update thirdparty packages. -@rem # Source this script for initial configuration -@rem # Use configure --help for details - -@rem # NOTE: please keep in sync with POSIX script configure - -@rem # This script will search for a virtualenv.pyz app in etc\thirdparty\virtualenv.pyz -@rem # Otherwise it will download the latest from the VIRTUALENV_PYZ_URL default -@rem ################################ - - -@rem ################################ -@rem # Defaults. Change these variables to customize this script -@rem ################################ - -@rem # Requirement arguments passed to pip and used by default or with --dev. -set "REQUIREMENTS=--editable . --constraint requirements.txt" -set "DEV_REQUIREMENTS=--editable .[testing] --constraint requirements.txt --constraint requirements-dev.txt" -set "DOCS_REQUIREMENTS=--editable .[docs] --constraint requirements.txt" - -@rem # where we create a virtualenv -set "VIRTUALENV_DIR=venv" - -@rem # Cleanable files and directories to delete with the --clean option -set "CLEANABLE=build venv" - -@rem # extra arguments passed to pip -set "PIP_EXTRA_ARGS= " - -@rem # the URL to download virtualenv.pyz if needed -set VIRTUALENV_PYZ_URL=https://bootstrap.pypa.io/virtualenv.pyz -@rem ################################ - - -@rem ################################ -@rem # Current directory where this script lives -set CFG_ROOT_DIR=%~dp0 -set "CFG_BIN_DIR=%CFG_ROOT_DIR%\%VIRTUALENV_DIR%\Scripts" - - -@rem ################################ -@rem # Thirdparty package locations and index handling -@rem # Find packages from the local thirdparty directory -if exist "%CFG_ROOT_DIR%\thirdparty" ( - set PIP_EXTRA_ARGS=--find-links "%CFG_ROOT_DIR%\thirdparty" -) - - -@rem ################################ -@rem # Set the quiet flag to empty if not defined -if not defined CFG_QUIET ( - set "CFG_QUIET= " -) - - -@rem ################################ -@rem # Main command line entry point -set "CFG_REQUIREMENTS=%REQUIREMENTS%" - -:again -if not "%1" == "" ( - if "%1" EQU "--help" (goto cli_help) - if "%1" EQU "--clean" (goto clean) - if "%1" EQU "--dev" ( - set "CFG_REQUIREMENTS=%DEV_REQUIREMENTS%" - ) - if "%1" EQU "--docs" ( - set "CFG_REQUIREMENTS=%DOCS_REQUIREMENTS%" - ) - shift - goto again -) - -set "PIP_EXTRA_ARGS=%PIP_EXTRA_ARGS%" - - -@rem ################################ -@rem # Find a proper Python to run -@rem # Use environment variables or a file if available. -@rem # Otherwise the latest Python by default. -if not defined PYTHON_EXECUTABLE ( - @rem # check for a file named PYTHON_EXECUTABLE - if exist "%CFG_ROOT_DIR%\PYTHON_EXECUTABLE" ( - set /p PYTHON_EXECUTABLE=<"%CFG_ROOT_DIR%\PYTHON_EXECUTABLE" - ) else ( - set "PYTHON_EXECUTABLE=py" - ) -) - - -@rem ################################ -:create_virtualenv -@rem # create a virtualenv for Python -@rem # Note: we do not use the bundled Python 3 "venv" because its behavior and -@rem # presence is not consistent across Linux distro and sometimes pip is not -@rem # included either by default. The virtualenv.pyz app cures all these issues. - -if not exist "%CFG_BIN_DIR%\python.exe" ( - if not exist "%CFG_BIN_DIR%" ( - mkdir "%CFG_BIN_DIR%" - ) - - if exist "%CFG_ROOT_DIR%\etc\thirdparty\virtualenv.pyz" ( - %PYTHON_EXECUTABLE% "%CFG_ROOT_DIR%\etc\thirdparty\virtualenv.pyz" ^ - --wheel embed --pip embed --setuptools embed ^ - --seeder pip ^ - --never-download ^ - --no-periodic-update ^ - --no-vcs-ignore ^ - %CFG_QUIET% ^ - "%CFG_ROOT_DIR%\%VIRTUALENV_DIR%" - ) else ( - if not exist "%CFG_ROOT_DIR%\%VIRTUALENV_DIR%\virtualenv.pyz" ( - curl -o "%CFG_ROOT_DIR%\%VIRTUALENV_DIR%\virtualenv.pyz" %VIRTUALENV_PYZ_URL% - - if %ERRORLEVEL% neq 0 ( - exit /b %ERRORLEVEL% - ) - ) - %PYTHON_EXECUTABLE% "%CFG_ROOT_DIR%\%VIRTUALENV_DIR%\virtualenv.pyz" ^ - --wheel embed --pip embed --setuptools embed ^ - --seeder pip ^ - --never-download ^ - --no-periodic-update ^ - --no-vcs-ignore ^ - %CFG_QUIET% ^ - "%CFG_ROOT_DIR%\%VIRTUALENV_DIR%" - ) -) - -if %ERRORLEVEL% neq 0 ( - exit /b %ERRORLEVEL% -) - - -@rem ################################ -:install_packages -@rem # install requirements in virtualenv -@rem # note: --no-build-isolation means that pip/wheel/setuptools will not -@rem # be reinstalled a second time and reused from the virtualenv and this -@rem # speeds up the installation. -@rem # We always have the PEP517 build dependencies installed already. - -"%CFG_BIN_DIR%\pip" install ^ - --upgrade ^ - --no-build-isolation ^ - %CFG_QUIET% ^ - %PIP_EXTRA_ARGS% ^ - %CFG_REQUIREMENTS% - - -@rem ################################ -:create_bin_junction -@rem # Create junction to bin to have the same directory between linux and windows -if exist "%CFG_ROOT_DIR%\%VIRTUALENV_DIR%\bin" ( - rmdir /s /q "%CFG_ROOT_DIR%\%VIRTUALENV_DIR%\bin" -) -mklink /J "%CFG_ROOT_DIR%\%VIRTUALENV_DIR%\bin" "%CFG_ROOT_DIR%\%VIRTUALENV_DIR%\Scripts" - -if %ERRORLEVEL% neq 0 ( - exit /b %ERRORLEVEL% -) - -exit /b 0 - - -@rem ################################ -:cli_help - echo An initial configuration script - echo " usage: configure [options]" - echo " " - echo The default is to configure for regular use. Use --dev for development. - echo " " - echo The options are: - echo " --clean: clean built and installed files and exit." - echo " --dev: configure the environment for development." - echo " --help: display this help message and exit." - echo " " - echo By default, the python interpreter version found in the path is used. - echo Alternatively, the PYTHON_EXECUTABLE environment variable can be set to - echo configure another Python executable interpreter to use. If this is not - echo set, a file named PYTHON_EXECUTABLE containing a single line with the - echo path of the Python executable to use will be checked last. - exit /b 0 - - -@rem ################################ -:clean -@rem # Remove cleanable file and directories and files from the root dir. -echo "* Cleaning ..." -for %%F in (%CLEANABLE%) do ( - rmdir /s /q "%CFG_ROOT_DIR%\%%F" >nul 2>&1 - del /f /q "%CFG_ROOT_DIR%\%%F" >nul 2>&1 -) -exit /b 0 diff --git a/minecode/src/discovery/debutils.py b/minecode/debutils.py similarity index 100% rename from minecode/src/discovery/debutils.py rename to minecode/debutils.py diff --git a/minecode/src/discovery/ls.py b/minecode/ls.py similarity index 100% rename from minecode/src/discovery/ls.py rename to minecode/ls.py diff --git a/minecode/manage.py b/minecode/manage.py deleted file mode 100755 index ba7386c7..00000000 --- a/minecode/manage.py +++ /dev/null @@ -1,19 +0,0 @@ -#!/usr/bin/env python3 -# -# Copyright (c) nexB Inc. and others. All rights reserved. -# purldb is a trademark of nexB Inc. -# SPDX-License-Identifier: Apache-2.0 -# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/purldb for support or download. -# See https://aboutcode.org for more information about nexB OSS projects. -# - -import os -import sys - -if __name__ == "__main__": - os.environ.setdefault("DJANGO_SETTINGS_MODULE", "minecodeio.settings.dev") - - from django.core.management import execute_from_command_line - - execute_from_command_line(sys.argv) diff --git a/minecode/src/discovery/management/__init__.py b/minecode/management/__init__.py similarity index 100% rename from minecode/src/discovery/management/__init__.py rename to minecode/management/__init__.py diff --git a/minecode/src/discovery/management/commands/__init__.py b/minecode/management/commands/__init__.py similarity index 100% rename from minecode/src/discovery/management/commands/__init__.py rename to minecode/management/commands/__init__.py diff --git a/minecode/src/discovery/management/commands/check_licenses.py b/minecode/management/commands/check_licenses.py similarity index 98% rename from minecode/src/discovery/management/commands/check_licenses.py rename to minecode/management/commands/check_licenses.py index 00c3049f..9fb0902c 100644 --- a/minecode/src/discovery/management/commands/check_licenses.py +++ b/minecode/management/commands/check_licenses.py @@ -20,7 +20,7 @@ from packagedb.models import Package -from discovery.management.commands import VerboseCommand +from minecode.management.commands import VerboseCommand """ Utility command to find license oddities. diff --git a/minecode/src/discovery/management/commands/check_uri.py b/minecode/management/commands/check_uri.py similarity index 94% rename from minecode/src/discovery/management/commands/check_uri.py rename to minecode/management/commands/check_uri.py index 7eebee2e..664fa3f7 100644 --- a/minecode/src/discovery/management/commands/check_uri.py +++ b/minecode/management/commands/check_uri.py @@ -16,12 +16,12 @@ # NOTE: mappers and visitors are Unused Import here: But importing the mappers # module triggers routes registration -from discovery import mappers # NOQA -from discovery import visitors # NOQA -from discovery import map_router -from discovery import visit_router -from discovery.models import ResourceURI -from discovery.route import NoRouteAvailable +from minecode import mappers # NOQA +from minecode import visitors # NOQA +from minecode import map_router +from minecode import visit_router +from minecode.models import ResourceURI +from minecode.route import NoRouteAvailable TRACE = False diff --git a/minecode/src/discovery/management/commands/dump_purls.py b/minecode/management/commands/dump_purls.py similarity index 100% rename from minecode/src/discovery/management/commands/dump_purls.py rename to minecode/management/commands/dump_purls.py diff --git a/minecode/src/discovery/management/commands/get_status.py b/minecode/management/commands/get_status.py similarity index 97% rename from minecode/src/discovery/management/commands/get_status.py rename to minecode/management/commands/get_status.py index a957d49e..9f24d12f 100644 --- a/minecode/src/discovery/management/commands/get_status.py +++ b/minecode/management/commands/get_status.py @@ -14,7 +14,7 @@ from django.core.management.base import BaseCommand -from discovery.models import ResourceURI +from minecode.models import ResourceURI from packagedb.models import Package logger = logging.getLogger(__name__) diff --git a/minecode/src/discovery/management/commands/remap.py b/minecode/management/commands/remap.py similarity index 97% rename from minecode/src/discovery/management/commands/remap.py rename to minecode/management/commands/remap.py index 8f84e297..97a08080 100644 --- a/minecode/src/discovery/management/commands/remap.py +++ b/minecode/management/commands/remap.py @@ -14,7 +14,7 @@ from django.core.management.base import BaseCommand from django.db.models import Q -from discovery.models import ResourceURI +from minecode.models import ResourceURI logger = logging.getLogger(__name__) logging.basicConfig(stream=sys.stdout) diff --git a/minecode/src/discovery/management/commands/run_map.py b/minecode/management/commands/run_map.py similarity index 98% rename from minecode/src/discovery/management/commands/run_map.py rename to minecode/management/commands/run_map.py index b0009fe4..08704e0f 100644 --- a/minecode/src/discovery/management/commands/run_map.py +++ b/minecode/management/commands/run_map.py @@ -25,21 +25,21 @@ # UnusedImport here! # But importing the mappers and visitors module triggers routes registration -from discovery import mappers # NOQA -from discovery import visitors # NOQA +from minecode import mappers # NOQA +from minecode import visitors # NOQA -from discovery import map_router -from discovery.models import ResourceURI +from minecode import map_router +from minecode.models import ResourceURI from packagedb.models import DependentPackage from packagedb.models import Package from packagedb.models import Party -from discovery.management.commands import get_error_message -from discovery.management.commands import VerboseCommand -from discovery.models import ScannableURI -from discovery.utils import stringify_null_purl_fields +from minecode.management.commands import get_error_message +from minecode.management.commands import VerboseCommand +from minecode.models import ScannableURI +from minecode.utils import stringify_null_purl_fields -TRACE = False +TRACE = True logger = logging.getLogger(__name__) logging.basicConfig(stream=sys.stdout) diff --git a/minecode/src/discovery/management/commands/run_visit.py b/minecode/management/commands/run_visit.py similarity index 97% rename from minecode/src/discovery/management/commands/run_visit.py rename to minecode/management/commands/run_visit.py index 54cd877d..b441ae1d 100644 --- a/minecode/src/discovery/management/commands/run_visit.py +++ b/minecode/management/commands/run_visit.py @@ -24,15 +24,15 @@ # UnusedImport here! # But importing the mappers and visitors module triggers routes registration -from discovery import mappers # NOQA -from discovery import visitors # NOQA -from discovery import visit_router +from minecode import mappers # NOQA +from minecode import visitors # NOQA +from minecode import visit_router -from discovery.management.commands import get_error_message -from discovery.management.commands import VerboseCommand +from minecode.management.commands import get_error_message +from minecode.management.commands import VerboseCommand -from discovery.models import ResourceURI -from discovery.route import NoRouteAvailable +from minecode.models import ResourceURI +from minecode.route import NoRouteAvailable logger = logging.getLogger(__name__) diff --git a/minecode/src/discovery/management/commands/seed.py b/minecode/management/commands/seed.py similarity index 93% rename from minecode/src/discovery/management/commands/seed.py rename to minecode/management/commands/seed.py index 4c3b18ae..48bfbb73 100644 --- a/minecode/src/discovery/management/commands/seed.py +++ b/minecode/management/commands/seed.py @@ -16,12 +16,12 @@ # UnusedImport here! # But importing the mappers and visitors module triggers routes registration -from discovery import mappers # NOQA -from discovery import visitors # NOQA +from minecode import mappers # NOQA +from minecode import visitors # NOQA -from discovery import seed -from discovery.models import ResourceURI -from discovery.management.commands import VerboseCommand +from minecode import seed +from minecode.models import ResourceURI +from minecode.management.commands import VerboseCommand logger = logging.getLogger(__name__) diff --git a/minecode/src/discovery/mappers/__init__.py b/minecode/mappers/__init__.py similarity index 100% rename from minecode/src/discovery/mappers/__init__.py rename to minecode/mappers/__init__.py diff --git a/minecode/src/discovery/mappers/debian.py b/minecode/mappers/debian.py similarity index 98% rename from minecode/src/discovery/mappers/debian.py rename to minecode/mappers/debian.py index 91eb35cd..10bcfa88 100644 --- a/minecode/src/discovery/mappers/debian.py +++ b/minecode/mappers/debian.py @@ -17,11 +17,11 @@ from packagedcode import models as scan_models from packageurl import PackageURL -from discovery import ls -from discovery import map_router -from discovery.mappers import Mapper -from discovery.utils import form_vcs_url -# from discovery import debutils +from minecode import ls +from minecode import map_router +from minecode.mappers import Mapper +from minecode.utils import form_vcs_url +# from minecode import debutils logger = logging.getLogger(__name__) @@ -385,7 +385,7 @@ def get_resourceuri_by_uri(uri): """ Return the Resource URI by searching with passing uri string value. """ - from discovery.models import ResourceURI + from minecode.models import ResourceURI uris = ResourceURI.objects.filter(uri=uri) if uris: return uris[0] diff --git a/minecode/src/discovery/mappers/fdroid.py b/minecode/mappers/fdroid.py similarity index 98% rename from minecode/src/discovery/mappers/fdroid.py rename to minecode/mappers/fdroid.py index 4b837f98..0460b118 100644 --- a/minecode/src/discovery/mappers/fdroid.py +++ b/minecode/mappers/fdroid.py @@ -12,8 +12,8 @@ from packagedcode.models import PackageData -from discovery import map_router -from discovery.mappers import Mapper +from minecode import map_router +from minecode.mappers import Mapper from packageurl import PackageURL from packagedcode.models import party_person from packagedcode.models import Party diff --git a/minecode/src/discovery/mappers/freebsd.py b/minecode/mappers/freebsd.py similarity index 94% rename from minecode/src/discovery/mappers/freebsd.py rename to minecode/mappers/freebsd.py index d462fe9d..cd3d89e9 100644 --- a/minecode/src/discovery/mappers/freebsd.py +++ b/minecode/mappers/freebsd.py @@ -14,9 +14,9 @@ from packagedcode.freebsd import CompactManifestHandler -from discovery import map_router -from discovery.mappers import Mapper -from discovery.utils import get_temp_dir +from minecode import map_router +from minecode.mappers import Mapper +from minecode.utils import get_temp_dir @map_router.route('https://pkg.freebsd.org/.*packagesite.txz') diff --git a/minecode/src/discovery/mappers/maven.py b/minecode/mappers/maven.py similarity index 96% rename from minecode/src/discovery/mappers/maven.py rename to minecode/mappers/maven.py index 10908b29..781cf218 100644 --- a/minecode/src/discovery/mappers/maven.py +++ b/minecode/mappers/maven.py @@ -16,10 +16,10 @@ from packagedcode.models import PackageData from packagedcode.maven import _parse -from discovery import map_router -from discovery.mappers import Mapper -from discovery.utils import parse_date -from discovery.visitors.maven import Artifact +from minecode import map_router +from minecode.mappers import Mapper +from minecode.utils import parse_date +from minecode.visitors.maven import Artifact TRACE = False diff --git a/minecode/src/discovery/mappers/npm.py b/minecode/mappers/npm.py similarity index 96% rename from minecode/src/discovery/mappers/npm.py rename to minecode/mappers/npm.py index 558a48d8..34d8f718 100644 --- a/minecode/src/discovery/mappers/npm.py +++ b/minecode/mappers/npm.py @@ -14,8 +14,8 @@ from packagedcode.npm import NpmPackageJsonHandler -from discovery import map_router -from discovery.mappers import Mapper +from minecode import map_router +from minecode.mappers import Mapper TRACE = False diff --git a/minecode/src/discovery/mappers/pypi.py b/minecode/mappers/pypi.py similarity index 98% rename from minecode/src/discovery/mappers/pypi.py rename to minecode/mappers/pypi.py index b708c980..fc423e1a 100644 --- a/minecode/src/discovery/mappers/pypi.py +++ b/minecode/mappers/pypi.py @@ -12,9 +12,9 @@ from packagedcode import models as scan_models -from discovery import map_router -from discovery.mappers import Mapper -from discovery.utils import parse_date +from minecode import map_router +from minecode.mappers import Mapper +from minecode.utils import parse_date @map_router.route('https://pypi.python.org/pypi/[^/]+/[^/]+/json') diff --git a/minecode/src/discovery/mappers/rubygems.py b/minecode/mappers/rubygems.py similarity index 98% rename from minecode/src/discovery/mappers/rubygems.py rename to minecode/mappers/rubygems.py index 45c9d437..9ae0513b 100644 --- a/minecode/src/discovery/mappers/rubygems.py +++ b/minecode/mappers/rubygems.py @@ -15,10 +15,10 @@ from packagedcode.models import DependentPackage from packagedcode.models import PackageData -from discovery import map_router -from discovery import saneyaml -from discovery.mappers import Mapper -from discovery.utils import parse_date +from minecode import map_router +from minecode import saneyaml +from minecode.mappers import Mapper +from minecode.utils import parse_date logger = logging.getLogger(__name__) handler = logging.StreamHandler() diff --git a/minecode/src/discovery/mappers/sourceforge.py b/minecode/mappers/sourceforge.py similarity index 98% rename from minecode/src/discovery/mappers/sourceforge.py rename to minecode/mappers/sourceforge.py index baf96f9a..77880176 100644 --- a/minecode/src/discovery/mappers/sourceforge.py +++ b/minecode/mappers/sourceforge.py @@ -11,8 +11,8 @@ from packagedcode import models as scan_models -from discovery import map_router -from discovery.mappers import Mapper +from minecode import map_router +from minecode.mappers import Mapper @map_router.route('https?://sourceforge.net/api/project/name/[a-z0-9.-]+/json', diff --git a/minecode/src/discovery/mappings/__init__.py b/minecode/mappings/__init__.py similarity index 100% rename from minecode/src/discovery/mappings/__init__.py rename to minecode/mappings/__init__.py diff --git a/minecode/src/discovery/mappings/pypi_trove.py b/minecode/mappings/pypi_trove.py similarity index 100% rename from minecode/src/discovery/mappings/pypi_trove.py rename to minecode/mappings/pypi_trove.py diff --git a/minecode/src/discovery/migrations/0001_initial.py b/minecode/migrations/0001_initial.py similarity index 100% rename from minecode/src/discovery/migrations/0001_initial.py rename to minecode/migrations/0001_initial.py diff --git a/minecode/src/discovery/migrations/0002_auto_20160707_1249.py b/minecode/migrations/0002_auto_20160707_1249.py similarity index 99% rename from minecode/src/discovery/migrations/0002_auto_20160707_1249.py rename to minecode/migrations/0002_auto_20160707_1249.py index b64a51a8..eae0b342 100644 --- a/minecode/src/discovery/migrations/0002_auto_20160707_1249.py +++ b/minecode/migrations/0002_auto_20160707_1249.py @@ -8,7 +8,7 @@ class Migration(migrations.Migration): dependencies = [ - ('discovery', '0001_initial'), + ('minecode', '0001_initial'), ] operations = [ diff --git a/minecode/src/discovery/migrations/0003_auto_20160712_1223.py b/minecode/migrations/0003_auto_20160712_1223.py similarity index 95% rename from minecode/src/discovery/migrations/0003_auto_20160712_1223.py rename to minecode/migrations/0003_auto_20160712_1223.py index 5de60e68..3d1e69bf 100644 --- a/minecode/src/discovery/migrations/0003_auto_20160712_1223.py +++ b/minecode/migrations/0003_auto_20160712_1223.py @@ -8,7 +8,7 @@ class Migration(migrations.Migration): dependencies = [ - ('discovery', '0002_auto_20160707_1249'), + ('minecode', '0002_auto_20160707_1249'), ] operations = [ diff --git a/minecode/src/discovery/migrations/0004_auto_20160713_1731.py b/minecode/migrations/0004_auto_20160713_1731.py similarity index 90% rename from minecode/src/discovery/migrations/0004_auto_20160713_1731.py rename to minecode/migrations/0004_auto_20160713_1731.py index c2da43fe..a3c8fa9b 100644 --- a/minecode/src/discovery/migrations/0004_auto_20160713_1731.py +++ b/minecode/migrations/0004_auto_20160713_1731.py @@ -8,7 +8,7 @@ class Migration(migrations.Migration): dependencies = [ - ('discovery', '0003_auto_20160712_1223'), + ('minecode', '0003_auto_20160712_1223'), ] operations = [ diff --git a/minecode/src/discovery/migrations/0005_resourceuri_metadata.py b/minecode/migrations/0005_resourceuri_metadata.py similarity index 92% rename from minecode/src/discovery/migrations/0005_resourceuri_metadata.py rename to minecode/migrations/0005_resourceuri_metadata.py index 4f796f6f..b32f1bd3 100644 --- a/minecode/src/discovery/migrations/0005_resourceuri_metadata.py +++ b/minecode/migrations/0005_resourceuri_metadata.py @@ -9,7 +9,7 @@ class Migration(migrations.Migration): dependencies = [ - ('discovery', '0004_auto_20160713_1731'), + ('minecode', '0004_auto_20160713_1731'), ] operations = [ diff --git a/minecode/src/discovery/migrations/0006_remove_resourceuri_metadata.py b/minecode/migrations/0006_remove_resourceuri_metadata.py similarity index 87% rename from minecode/src/discovery/migrations/0006_remove_resourceuri_metadata.py rename to minecode/migrations/0006_remove_resourceuri_metadata.py index 6b028def..84b74e3c 100644 --- a/minecode/src/discovery/migrations/0006_remove_resourceuri_metadata.py +++ b/minecode/migrations/0006_remove_resourceuri_metadata.py @@ -8,7 +8,7 @@ class Migration(migrations.Migration): dependencies = [ - ('discovery', '0005_resourceuri_metadata'), + ('minecode', '0005_resourceuri_metadata'), ] operations = [ diff --git a/minecode/src/discovery/migrations/0007_change_unique_constraints_on_resourceuri.py b/minecode/migrations/0007_change_unique_constraints_on_resourceuri.py similarity index 92% rename from minecode/src/discovery/migrations/0007_change_unique_constraints_on_resourceuri.py rename to minecode/migrations/0007_change_unique_constraints_on_resourceuri.py index 919f189e..3569334d 100644 --- a/minecode/src/discovery/migrations/0007_change_unique_constraints_on_resourceuri.py +++ b/minecode/migrations/0007_change_unique_constraints_on_resourceuri.py @@ -8,7 +8,7 @@ class Migration(migrations.Migration): dependencies = [ - ('discovery', '0006_remove_resourceuri_metadata'), + ('minecode', '0006_remove_resourceuri_metadata'), ] operations = [ diff --git a/minecode/src/discovery/migrations/0008_change_default_sort_order_on_resourceuri.py b/minecode/migrations/0008_change_default_sort_order_on_resourceuri.py similarity index 85% rename from minecode/src/discovery/migrations/0008_change_default_sort_order_on_resourceuri.py rename to minecode/migrations/0008_change_default_sort_order_on_resourceuri.py index a0ab7ff7..521694e6 100644 --- a/minecode/src/discovery/migrations/0008_change_default_sort_order_on_resourceuri.py +++ b/minecode/migrations/0008_change_default_sort_order_on_resourceuri.py @@ -8,7 +8,7 @@ class Migration(migrations.Migration): dependencies = [ - ('discovery', '0007_change_unique_constraints_on_resourceuri'), + ('minecode', '0007_change_unique_constraints_on_resourceuri'), ] operations = [ diff --git a/minecode/src/discovery/migrations/0009_resourceuri_source_uri.py b/minecode/migrations/0009_resourceuri_source_uri.py similarity index 91% rename from minecode/src/discovery/migrations/0009_resourceuri_source_uri.py rename to minecode/migrations/0009_resourceuri_source_uri.py index 387811d6..c3bda1ce 100644 --- a/minecode/src/discovery/migrations/0009_resourceuri_source_uri.py +++ b/minecode/migrations/0009_resourceuri_source_uri.py @@ -8,7 +8,7 @@ class Migration(migrations.Migration): dependencies = [ - ('discovery', '0008_change_default_sort_order_on_resourceuri'), + ('minecode', '0008_change_default_sort_order_on_resourceuri'), ] operations = [ diff --git a/minecode/src/discovery/migrations/0010_resourceuri_mining_level.py b/minecode/migrations/0010_resourceuri_mining_level.py similarity index 91% rename from minecode/src/discovery/migrations/0010_resourceuri_mining_level.py rename to minecode/migrations/0010_resourceuri_mining_level.py index a45db70c..2a6d535a 100644 --- a/minecode/src/discovery/migrations/0010_resourceuri_mining_level.py +++ b/minecode/migrations/0010_resourceuri_mining_level.py @@ -8,7 +8,7 @@ class Migration(migrations.Migration): dependencies = [ - ('discovery', '0009_resourceuri_source_uri'), + ('minecode', '0009_resourceuri_source_uri'), ] operations = [ diff --git a/minecode/src/discovery/migrations/0011_auto_20170807_0253.py b/minecode/migrations/0011_auto_20170807_0253.py similarity index 95% rename from minecode/src/discovery/migrations/0011_auto_20170807_0253.py rename to minecode/migrations/0011_auto_20170807_0253.py index 2358f4c5..489f6e0c 100644 --- a/minecode/src/discovery/migrations/0011_auto_20170807_0253.py +++ b/minecode/migrations/0011_auto_20170807_0253.py @@ -8,7 +8,7 @@ class Migration(migrations.Migration): dependencies = [ - ('discovery', '0010_resourceuri_mining_level'), + ('minecode', '0010_resourceuri_mining_level'), ] operations = [ diff --git a/minecode/src/discovery/migrations/0012_auto_20170807_0444.py b/minecode/migrations/0012_auto_20170807_0444.py similarity index 88% rename from minecode/src/discovery/migrations/0012_auto_20170807_0444.py rename to minecode/migrations/0012_auto_20170807_0444.py index c4fb1080..0fd6175d 100644 --- a/minecode/src/discovery/migrations/0012_auto_20170807_0444.py +++ b/minecode/migrations/0012_auto_20170807_0444.py @@ -8,7 +8,7 @@ class Migration(migrations.Migration): dependencies = [ - ('discovery', '0011_auto_20170807_0253'), + ('minecode', '0011_auto_20170807_0253'), ] operations = [ diff --git a/minecode/src/discovery/migrations/0013_auto_20170807_0511.py b/minecode/migrations/0013_auto_20170807_0511.py similarity index 72% rename from minecode/src/discovery/migrations/0013_auto_20170807_0511.py rename to minecode/migrations/0013_auto_20170807_0511.py index 2c31a5ef..e1694bc5 100644 --- a/minecode/src/discovery/migrations/0013_auto_20170807_0511.py +++ b/minecode/migrations/0013_auto_20170807_0511.py @@ -8,16 +8,16 @@ class Migration(migrations.Migration): dependencies = [ - ('discovery', '0012_auto_20170807_0444'), + ('minecode', '0012_auto_20170807_0444'), ] operations = [ migrations.AddIndex( model_name='resourceuri', - index=models.Index(fields=['is_visitable', 'last_visit_date', 'wip_date'], name='discovery_r_is_visi_29fde2_idx'), + index=models.Index(fields=['is_visitable', 'last_visit_date', 'wip_date'], name='minecode_r_is_visi_29fde2_idx'), ), migrations.AddIndex( model_name='resourceuri', - index=models.Index(fields=['is_mappable', 'last_visit_date', 'wip_date', 'last_map_date', 'visit_error'], name='discovery_r_is_mapp_e362dc_idx'), + index=models.Index(fields=['is_mappable', 'last_visit_date', 'wip_date', 'last_map_date', 'visit_error'], name='minecode_r_is_mapp_e362dc_idx'), ), ] diff --git a/minecode/src/discovery/migrations/0014_auto_20170807_0529.py b/minecode/migrations/0014_auto_20170807_0529.py similarity index 81% rename from minecode/src/discovery/migrations/0014_auto_20170807_0529.py rename to minecode/migrations/0014_auto_20170807_0529.py index ba4130f2..b72923cc 100644 --- a/minecode/src/discovery/migrations/0014_auto_20170807_0529.py +++ b/minecode/migrations/0014_auto_20170807_0529.py @@ -8,12 +8,12 @@ class Migration(migrations.Migration): dependencies = [ - ('discovery', '0013_auto_20170807_0511'), + ('minecode', '0013_auto_20170807_0511'), ] operations = [ migrations.AddIndex( model_name='resourceuri', - index=models.Index(fields=['-priority', 'rank'], name='discovery_r_priorit_daf7b6_idx'), + index=models.Index(fields=['-priority', 'rank'], name='minecode_r_priorit_daf7b6_idx'), ), ] diff --git a/minecode/src/discovery/migrations/0015_auto_20180607_0851.py b/minecode/migrations/0015_auto_20180607_0851.py similarity index 92% rename from minecode/src/discovery/migrations/0015_auto_20180607_0851.py rename to minecode/migrations/0015_auto_20180607_0851.py index 14be2039..72a43a11 100644 --- a/minecode/src/discovery/migrations/0015_auto_20180607_0851.py +++ b/minecode/migrations/0015_auto_20180607_0851.py @@ -8,7 +8,7 @@ class Migration(migrations.Migration): dependencies = [ - ('discovery', '0014_auto_20170807_0529'), + ('minecode', '0014_auto_20170807_0529'), ] operations = [ diff --git a/minecode/src/discovery/migrations/0016_remove_resourceuri_sig.py b/minecode/migrations/0016_remove_resourceuri_sig.py similarity index 87% rename from minecode/src/discovery/migrations/0016_remove_resourceuri_sig.py rename to minecode/migrations/0016_remove_resourceuri_sig.py index c2039720..23dd20f0 100644 --- a/minecode/src/discovery/migrations/0016_remove_resourceuri_sig.py +++ b/minecode/migrations/0016_remove_resourceuri_sig.py @@ -8,7 +8,7 @@ class Migration(migrations.Migration): dependencies = [ - ('discovery', '0015_auto_20180607_0851'), + ('minecode', '0015_auto_20180607_0851'), ] operations = [ diff --git a/minecode/src/discovery/migrations/0017_auto_20180619_2236.py b/minecode/migrations/0017_auto_20180619_2236.py similarity index 96% rename from minecode/src/discovery/migrations/0017_auto_20180619_2236.py rename to minecode/migrations/0017_auto_20180619_2236.py index 282443e9..9f4a2ae5 100644 --- a/minecode/src/discovery/migrations/0017_auto_20180619_2236.py +++ b/minecode/migrations/0017_auto_20180619_2236.py @@ -8,7 +8,7 @@ class Migration(migrations.Migration): dependencies = [ - ('discovery', '0016_remove_resourceuri_sig'), + ('minecode', '0016_remove_resourceuri_sig'), ] operations = [ @@ -41,11 +41,11 @@ class Migration(migrations.Migration): ), migrations.AddIndex( model_name='scannableuri', - index=models.Index(fields=['scan_status', 'scan_request_date', 'last_status_poll_date'], name='discovery_s_scan_st_21b64d_idx'), + index=models.Index(fields=['scan_status', 'scan_request_date', 'last_status_poll_date'], name='minecode_s_scan_st_21b64d_idx'), ), migrations.AddIndex( model_name='scannableuri', - index=models.Index(fields=['-priority', 'rank'], name='discovery_s_priorit_5b5e01_idx'), + index=models.Index(fields=['-priority', 'rank'], name='minecode_s_priorit_5b5e01_idx'), ), migrations.AlterUniqueTogether( name='scannableuri', diff --git a/minecode/src/discovery/migrations/0018_scannableuri_package_id.py b/minecode/migrations/0018_scannableuri_package_id.py similarity index 90% rename from minecode/src/discovery/migrations/0018_scannableuri_package_id.py rename to minecode/migrations/0018_scannableuri_package_id.py index a2c440d0..4aa4f382 100644 --- a/minecode/src/discovery/migrations/0018_scannableuri_package_id.py +++ b/minecode/migrations/0018_scannableuri_package_id.py @@ -8,7 +8,7 @@ class Migration(migrations.Migration): dependencies = [ - ('discovery', '0017_auto_20180619_2236'), + ('minecode', '0017_auto_20180619_2236'), ] operations = [ diff --git a/minecode/src/discovery/migrations/0019_auto_20180716_1648.py b/minecode/migrations/0019_auto_20180716_1648.py similarity index 91% rename from minecode/src/discovery/migrations/0019_auto_20180716_1648.py rename to minecode/migrations/0019_auto_20180716_1648.py index 9254f89e..18484a98 100644 --- a/minecode/src/discovery/migrations/0019_auto_20180716_1648.py +++ b/minecode/migrations/0019_auto_20180716_1648.py @@ -8,7 +8,7 @@ class Migration(migrations.Migration): dependencies = [ - ('discovery', '0018_scannableuri_package_id'), + ('minecode', '0018_scannableuri_package_id'), ] operations = [ diff --git a/minecode/src/discovery/migrations/0020_resourceuri_package_url.py b/minecode/migrations/0020_resourceuri_package_url.py similarity index 91% rename from minecode/src/discovery/migrations/0020_resourceuri_package_url.py rename to minecode/migrations/0020_resourceuri_package_url.py index 247e1638..5a5776da 100644 --- a/minecode/src/discovery/migrations/0020_resourceuri_package_url.py +++ b/minecode/migrations/0020_resourceuri_package_url.py @@ -8,7 +8,7 @@ class Migration(migrations.Migration): dependencies = [ - ('discovery', '0019_auto_20180716_1648'), + ('minecode', '0019_auto_20180716_1648'), ] operations = [ diff --git a/minecode/src/discovery/migrations/0021_auto_20181026_1635.py b/minecode/migrations/0021_auto_20181026_1635.py similarity index 93% rename from minecode/src/discovery/migrations/0021_auto_20181026_1635.py rename to minecode/migrations/0021_auto_20181026_1635.py index 8bd9d285..abf64f54 100644 --- a/minecode/src/discovery/migrations/0021_auto_20181026_1635.py +++ b/minecode/migrations/0021_auto_20181026_1635.py @@ -10,7 +10,7 @@ class Migration(migrations.Migration): dependencies = [ ('packagedb', '0023_package_source_packages'), - ('discovery', '0020_resourceuri_package_url'), + ('minecode', '0020_resourceuri_package_url'), ] operations = [ diff --git a/minecode/src/discovery/migrations/0022_auto_20190307_2332.py b/minecode/migrations/0022_auto_20190307_2332.py similarity index 69% rename from minecode/src/discovery/migrations/0022_auto_20190307_2332.py rename to minecode/migrations/0022_auto_20190307_2332.py index 22082d4a..f588b823 100644 --- a/minecode/src/discovery/migrations/0022_auto_20190307_2332.py +++ b/minecode/migrations/0022_auto_20190307_2332.py @@ -8,17 +8,17 @@ class Migration(migrations.Migration): dependencies = [ - ('discovery', '0021_auto_20181026_1635'), + ('minecode', '0021_auto_20181026_1635'), ] operations = [ migrations.RemoveIndex( model_name='scannableuri', - name='discovery_s_priorit_5b5e01_idx', + name='minecode_s_priorit_5b5e01_idx', ), migrations.RemoveIndex( model_name='resourceuri', - name='discovery_r_priorit_daf7b6_idx', + name='minecode_r_priorit_daf7b6_idx', ), migrations.RemoveField( model_name='resourceuri', @@ -30,10 +30,10 @@ class Migration(migrations.Migration): ), migrations.AddIndex( model_name='scannableuri', - index=models.Index(fields=['-priority'], name='discovery_s_priorit_2f397f_idx'), + index=models.Index(fields=['-priority'], name='minecode_s_priorit_2f397f_idx'), ), migrations.AddIndex( model_name='resourceuri', - index=models.Index(fields=['-priority'], name='discovery_r_priorit_b8bce2_idx'), + index=models.Index(fields=['-priority'], name='minecode_r_priorit_b8bce2_idx'), ), ] diff --git a/minecode/migrations/__init__.py b/minecode/migrations/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/minecode/src/discovery/models.py b/minecode/models.py similarity index 99% rename from minecode/src/discovery/models.py rename to minecode/models.py index f25fda5f..718dcb40 100644 --- a/minecode/src/discovery/models.py +++ b/minecode/models.py @@ -16,13 +16,13 @@ from django.utils import timezone -from discovery import map_router -from discovery import visit_router +from minecode import map_router +from minecode import visit_router # UnusedImport here! # But importing the mappers and visitors module triggers routes registration -from discovery import mappers # NOQA -from discovery import visitors # NOQA +from minecode import mappers # NOQA +from minecode import visitors # NOQA from packagedb.models import Package diff --git a/minecode/requirements-dev.txt b/minecode/requirements-dev.txt deleted file mode 100644 index 19627e69..00000000 --- a/minecode/requirements-dev.txt +++ /dev/null @@ -1,15 +0,0 @@ -aboutcode-toolkit==7.0.2 -black==22.10.0 -et-xmlfile==1.1.0 -execnet==1.9.0 -iniconfig==1.1.1 -mock==4.0.3 -mypy-extensions==0.4.3 -openpyxl==3.0.10 -pathspec==0.10.1 -platformdirs==2.5.2 -py==1.11.0 -pytest==7.1.3 -pytest-forked==1.4.0 -pytest-xdist==2.5.0 -tomli==2.0.1 \ No newline at end of file diff --git a/minecode/src/discovery/route.py b/minecode/route.py similarity index 100% rename from minecode/src/discovery/route.py rename to minecode/route.py diff --git a/minecode/src/discovery/rsync.py b/minecode/rsync.py similarity index 98% rename from minecode/src/discovery/rsync.py rename to minecode/rsync.py index c8b7016b..1e0971db 100644 --- a/minecode/src/discovery/rsync.py +++ b/minecode/rsync.py @@ -15,8 +15,8 @@ import arrow from dateutil import tz -from discovery import command -from discovery.utils import get_temp_file +from minecode import command +from minecode.utils import get_temp_file logger = logging.getLogger(__name__) # import sys diff --git a/minecode/src/discovery/saneyaml.py b/minecode/saneyaml.py similarity index 100% rename from minecode/src/discovery/saneyaml.py rename to minecode/saneyaml.py diff --git a/minecode/src/discovery/seed.py b/minecode/seed.py similarity index 97% rename from minecode/src/discovery/seed.py rename to minecode/seed.py index 0ba75823..8baccc2a 100644 --- a/minecode/src/discovery/seed.py +++ b/minecode/seed.py @@ -61,6 +61,6 @@ def get_configured_seeders(): Return Seeder class qualified names referenced as active in the settings or environment. """ - from discovery.management.commands import get_settings + from minecode.management.commands import get_settings # ACTIVE_VISITOR_SEEDS is a list of fully qualified Seeder subclass strings return get_settings('ACTIVE_SEEDERS') or [] diff --git a/minecode/setup.cfg b/minecode/setup.cfg deleted file mode 100644 index 5dc68b71..00000000 --- a/minecode/setup.cfg +++ /dev/null @@ -1,77 +0,0 @@ -[metadata] -name = minecode -version = 2.0.0 - -author = nexB. Inc. and others -author_email = info@nexb.com -license = license = Apache-2.0 AND CC-BY-SA-4.0 - -# description must be on ONE line https://github.com/pypa/setuptools/issues/1390 -description = MineCode - A purl mining tool -long_description = file:README.rst -url = https://github.com/nexB/purldb/minecode -classifiers = - Programming Language :: Python - Programming Language :: Python :: 3 - Topic :: Utilities - -keywords = - packagedb - scancode - purl - purldb - -license_files = - apache-2.0.LICENSE - cc-by-sa-4.0.LICENSE - CHANGELOG.rst - README.rst - AUTHORS.rst - NOTICE - - -[options] -#setup_requires = setuptools_scm[toml] >= 4 - -package_dir = - =src - -packages = find: -include_package_data = true -zip_safe = false -install_requires = - arrow==1.2.3 - debian-inspector==31.0.0 - Django==4.1.2 - django-filter==22.1 - djangorestframework==3.14.0 - ftputil==5.0.4 - jawa==2.2.0 - packagedb - psycopg2-binary==2.9.3 - PyGithub==1.56 - rubymarshal==1.0.3 - scancode-toolkit - reppy2==0.3.6 - urlpy==0.5 - -python_requires = >=3.8.* - - -[options.packages.find] -where = src - - -[options.extras_require] -testing = - pytest >= 6, != 7.0.0 - pytest-xdist >= 2 - aboutcode-toolkit >= 6.0.0 - black - mock - - -docs = - Sphinx >= 3.3.1 - sphinx-rtd-theme >= 0.5.0 - doc8 >= 0.8.1 diff --git a/minecode/src/minecodeio/settings/__init__.py b/minecode/src/minecodeio/settings/__init__.py deleted file mode 100644 index 060956c0..00000000 --- a/minecode/src/minecodeio/settings/__init__.py +++ /dev/null @@ -1,22 +0,0 @@ -# -# Copyright (c) nexB Inc. and others. All rights reserved. -# purldb is a trademark of nexB Inc. -# SPDX-License-Identifier: Apache-2.0 -# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/purldb for support or download. -# See https://aboutcode.org for more information about nexB OSS projects. -# - -import socket - - -PRODUCTION_HOSTNAME = 'TBD' - -hostname = socket.gethostname() - -if hostname.endswith(PRODUCTION_HOSTNAME): - from minecodeio.settings.production import * -else: - from minecodeio.settings.dev import * - -# DO NOT ADD ANYTHING MORE HERE diff --git a/minecode/src/minecodeio/settings/base.py b/minecode/src/minecodeio/settings/base.py deleted file mode 100644 index af080d1d..00000000 --- a/minecode/src/minecodeio/settings/base.py +++ /dev/null @@ -1,155 +0,0 @@ -# -# Copyright (c) nexB Inc. and others. All rights reserved. -# purldb is a trademark of nexB Inc. -# SPDX-License-Identifier: Apache-2.0 -# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/purldb for support or download. -# See https://aboutcode.org for more information about nexB OSS projects. -# - - -import os -from os.path import join, dirname, abspath - -""" -Django settings for minecodeio project. -""" - - -here = lambda *dirs: join(abspath(dirname(__file__)), *dirs) -BASE_DIR = here('..', '..') -root = lambda *dirs: join(abspath(BASE_DIR), *dirs) -root_parent = lambda *dirs: join(abspath(root('..')), *dirs) - - -# Build paths inside the project like this: os.path.join(BASE_DIR, ...) -BASE_DIR = os.path.dirname(os.path.dirname(__file__)) - - -INSTALLED_APPS = ( - 'django.contrib.admin', - 'django.contrib.auth', - 'django.contrib.contenttypes', - 'django.contrib.sessions', - 'django.contrib.messages', - 'django.contrib.staticfiles', - 'rest_framework', - 'django_filters', - 'discovery', - 'packagedb', -) - -MIDDLEWARE = ( - 'django.contrib.sessions.middleware.SessionMiddleware', - 'django.middleware.common.CommonMiddleware', - 'django.middleware.csrf.CsrfViewMiddleware', - 'django.contrib.auth.middleware.AuthenticationMiddleware', - 'django.contrib.messages.middleware.MessageMiddleware', - 'django.middleware.clickjacking.XFrameOptionsMiddleware', -) - -ROOT_URLCONF = 'minecodeio.urls' - -# Database -DATABASES = { - 'default': { - 'ENGINE': 'django.db.backends.postgresql_psycopg2', - 'NAME': 'packagedb', - 'USER': 'packagedb', - 'PASSWORD': 'packagedb', - 'HOST': '127.0.0.1', - 'PORT': '5432', - 'ATOMIC_REQUESTS': True, - }, -} - -REST_FRAMEWORK = { - 'DEFAULT_PAGINATION_CLASS': 'packagedb.api_custom.PageSizePagination', - 'DEFAULT_AUTHENTICATION_CLASSES': (), - 'DEFAULT_RENDERER_CLASSES': ( - 'rest_framework.renderers.BrowsableAPIRenderer', - 'rest_framework.renderers.JSONRenderer', - 'rest_framework.renderers.AdminRenderer', - ), - 'DEFAULT_FILTER_BACKENDS': ( - 'django_filters.rest_framework.DjangoFilterBackend', - 'rest_framework.filters.SearchFilter', - ), -} - -# Internationalization - -LANGUAGE_CODE = 'en-us' - -TIME_ZONE = 'US/Pacific' - -USE_I18N = True - -USE_L10N = True - -USE_TZ = True - -# Static files (CSS, JavaScript, Images) - -STATIC_URL = '/static/' - -# Templates -TEMPLATES = [ - { - 'BACKEND': 'django.template.backends.django.DjangoTemplates', - 'OPTIONS': { - 'context_processors': [ - 'django.template.context_processors.debug', - 'django.template.context_processors.request', - 'django.contrib.auth.context_processors.auth', - 'django.contrib.messages.context_processors.messages', - ], - # The cached template loader is a class-based loader that you - # configure with a list of other loaders that it should wrap. - # The wrapped loaders are used to locate unknown templates when - # they are first encountered. The cached loader then stores the - # compiled Template in memory. The cached Template instance is - # returned for subsequent requests to load the same template. - 'loaders': [ - ('django.template.loaders.cached.Loader', [ - 'django.template.loaders.app_directories.Loader', - ]), - ], - }, - }, -] - -# Cache - -CACHES = { - 'default': { - 'BACKEND': 'django.core.cache.backends.locmem.LocMemCache', - } -} - -# Security - -CSRF_COOKIE_SECURE = True - -SESSION_COOKIE_SECURE = True - -# Requests - -REQUESTS_ARGS = { - - # Configuration for Request to use Tor over a Privoxy proxy - # Comment-out for a direct access - # 'proxies': { - # 'http': 'http://127.0.0.1:8118', - # 'https': 'https://127.0.0.1:8118', - # } -} - -# Instead of sending out real emails the console backend just writes the emails -# that would be sent to the standard output. -EMAIL_BACKEND = 'django.core.mail.backends.console.EmailBackend' - -# Active seeders: each active seeder class need to be added explictly here -ACTIVE_SEEDERS = [ - 'discovery.visitors.npm.NpmSeed', -] diff --git a/minecode/src/minecodeio/settings/ci.py b/minecode/src/minecodeio/settings/ci.py deleted file mode 100644 index 715eaa21..00000000 --- a/minecode/src/minecodeio/settings/ci.py +++ /dev/null @@ -1,28 +0,0 @@ -# -# Copyright (c) nexB Inc. and others. All rights reserved. -# purldb is a trademark of nexB Inc. -# SPDX-License-Identifier: Apache-2.0 -# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/purldb for support or download. -# See https://aboutcode.org for more information about nexB OSS projects. -# - - -from os.path import join, dirname, abspath - -""" -Django settings for continuous integration -""" - -# Using exec() instead of "import *" to avoid any side effects -with open(join(dirname(abspath(__file__)), 'base.py')) as f: - exec(f.read()) - - -# Reset the custom Requests args in test mode -REQUESTS_ARGS = {} - -DATABASES['default']['USER'] = 'postgres' -DATABASES['default']['PASSWORD'] = 'postgres' - -SECRET_KEY = '-@j@p68cdzzxss3x8=i#*ml#@-k3$l=b7_cd440$36pn)mddam' diff --git a/minecode/src/minecodeio/settings/dev.py b/minecode/src/minecodeio/settings/dev.py deleted file mode 100644 index e4af7cd4..00000000 --- a/minecode/src/minecodeio/settings/dev.py +++ /dev/null @@ -1,27 +0,0 @@ -# -# Copyright (c) nexB Inc. and others. All rights reserved. -# purldb is a trademark of nexB Inc. -# SPDX-License-Identifier: Apache-2.0 -# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/purldb for support or download. -# See https://aboutcode.org for more information about nexB OSS projects. -# - - -from os.path import join, dirname, abspath - -""" -Django settings for local development -""" - -# Using exec() instead of "import *" to avoid any side effects -with open(join(dirname(abspath(__file__)), 'base.py')) as f: - exec(f.read()) - - -DEBUG = True -TEMPLATES[0]['OPTIONS']['debug'] = True - -SECRET_KEY = '-@j@p68cdzzk2s3x8=i#*ml#@-k3$l=b7_cd440$36pn)msdam' - -EMAIL_BACKEND = 'django.core.mail.backends.console.EmailBackend' diff --git a/minecode/src/minecodeio/urls.py b/minecode/src/minecodeio/urls.py deleted file mode 100644 index 4323d5d7..00000000 --- a/minecode/src/minecodeio/urls.py +++ /dev/null @@ -1,19 +0,0 @@ -# -# Copyright (c) nexB Inc. and others. All rights reserved. -# purldb is a trademark of nexB Inc. -# SPDX-License-Identifier: Apache-2.0 -# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/purldb for support or download. -# See https://aboutcode.org for more information about nexB OSS projects. -# - - -from django.conf.urls import include -from django.urls import re_path - -from packagedbio import urls as packagedb_urls - - -urlpatterns = [ - re_path(r'^api/', include((packagedb_urls.api_router.urls, 'api'))), -] diff --git a/minecode/src/minecodeio/wsgi.py b/minecode/src/minecodeio/wsgi.py deleted file mode 100644 index 080a2c69..00000000 --- a/minecode/src/minecodeio/wsgi.py +++ /dev/null @@ -1,27 +0,0 @@ -# -# Copyright (c) nexB Inc. and others. All rights reserved. -# purldb is a trademark of nexB Inc. -# SPDX-License-Identifier: Apache-2.0 -# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/purldb for support or download. -# See https://aboutcode.org for more information about nexB OSS projects. -# - - -import os -from django.core.wsgi import get_wsgi_application - - -""" -WSGI config for minecodeio_site project. - -It exposes the WSGI callable as a module-level variable named ``application``. - -For more information on this file, see -https://docs.djangoproject.com/en/1.6/howto/deployment/wsgi/ -""" - - -os.environ.setdefault("DJANGO_SETTINGS_MODULE", "minecodeio.settings") - -application = get_wsgi_application() diff --git a/minecode/tests/discovery/__init__.py b/minecode/tests/discovery/__init__.py deleted file mode 100644 index 2eb8f9f0..00000000 --- a/minecode/tests/discovery/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -# -# Copyright (c) nexB Inc. and others. All rights reserved. -# purldb is a trademark of nexB Inc. -# SPDX-License-Identifier: Apache-2.0 -# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/purldb for support or download. -# See https://aboutcode.org for more information about nexB OSS projects. -# diff --git a/minecode/tests/discovery/test_command.py b/minecode/tests/test_command.py similarity index 89% rename from minecode/tests/discovery/test_command.py rename to minecode/tests/test_command.py index dab6ce22..a21f8d9e 100644 --- a/minecode/tests/discovery/test_command.py +++ b/minecode/tests/test_command.py @@ -10,9 +10,9 @@ import os -from discovery import command -from discovery import ON_WINDOWS -from discovery.utils_test import MiningTestCase +from minecode import command +from minecode import ON_WINDOWS +from minecode.utils_test import MiningTestCase class CommandTest(MiningTestCase): diff --git a/minecode/tests/discovery/test_debian.py b/minecode/tests/test_debian.py similarity index 98% rename from minecode/tests/discovery/test_debian.py rename to minecode/tests/test_debian.py index 73202033..54c3a117 100644 --- a/minecode/tests/discovery/test_debian.py +++ b/minecode/tests/test_debian.py @@ -16,12 +16,12 @@ from mock import patch from debian_inspector import debcon -from discovery.utils_test import mocked_requests_get -from discovery.utils_test import JsonBasedTesting +from minecode.utils_test import mocked_requests_get +from minecode.utils_test import JsonBasedTesting -from discovery import debutils -from discovery.mappers import debian as debian_mapper -from discovery.visitors import debian as debian_visitor +from minecode import debutils +from minecode.mappers import debian as debian_mapper +from minecode.visitors import debian as debian_visitor class BaseDebianTest(JsonBasedTesting): diff --git a/minecode/tests/discovery/test_fdroid.py b/minecode/tests/test_fdroid.py similarity index 88% rename from minecode/tests/discovery/test_fdroid.py rename to minecode/tests/test_fdroid.py index 058fe61c..8a09509c 100644 --- a/minecode/tests/discovery/test_fdroid.py +++ b/minecode/tests/test_fdroid.py @@ -12,12 +12,12 @@ from mock import patch -from discovery.utils_test import mocked_requests_get -from discovery.utils_test import JsonBasedTesting +from minecode.utils_test import mocked_requests_get +from minecode.utils_test import JsonBasedTesting -from discovery.mappers import fdroid as fdroid_mapper -from discovery.visitors import fdroid as fdroid_visitor -from discovery.visitors import URI +from minecode.mappers import fdroid as fdroid_mapper +from minecode.visitors import fdroid as fdroid_visitor +from minecode.visitors import URI class TestFdroidVisitor(JsonBasedTesting): diff --git a/minecode/tests/discovery/test_freebsd.py b/minecode/tests/test_freebsd.py similarity index 93% rename from minecode/tests/discovery/test_freebsd.py rename to minecode/tests/test_freebsd.py index 9f7028ce..27fdda28 100644 --- a/minecode/tests/discovery/test_freebsd.py +++ b/minecode/tests/test_freebsd.py @@ -14,11 +14,11 @@ from mock import Mock from mock import patch -from discovery.utils_test import mocked_requests_get -from discovery.utils_test import JsonBasedTesting +from minecode.utils_test import mocked_requests_get +from minecode.utils_test import JsonBasedTesting -from discovery import mappers -from discovery.visitors import freebsd +from minecode import mappers +from minecode.visitors import freebsd class FreeBSDVistorTest(JsonBasedTesting): diff --git a/minecode/tests/discovery/test_housekeeping.py b/minecode/tests/test_housekeeping.py similarity index 93% rename from minecode/tests/discovery/test_housekeeping.py rename to minecode/tests/test_housekeeping.py index 879fc081..4f92361b 100644 --- a/minecode/tests/discovery/test_housekeeping.py +++ b/minecode/tests/test_housekeeping.py @@ -19,14 +19,14 @@ import packagedb -from discovery.utils_test import mocked_requests_get -from discovery.utils_test import JsonBasedTesting +from minecode.utils_test import mocked_requests_get +from minecode.utils_test import JsonBasedTesting -from discovery.management.commands.check_licenses import find_ambiguous_packages -from discovery.management.commands.run_map import map_uri -from discovery.management.commands.run_visit import visit_uri +from minecode.management.commands.check_licenses import find_ambiguous_packages +from minecode.management.commands.run_map import map_uri +from minecode.management.commands.run_visit import visit_uri -from discovery.models import ResourceURI +from minecode.models import ResourceURI class PackageLicenseCheckTest(JsonBasedTesting, DjangoTestCase): diff --git a/minecode/tests/discovery/test_ls.py b/minecode/tests/test_ls.py similarity index 97% rename from minecode/tests/discovery/test_ls.py rename to minecode/tests/test_ls.py index 50ae4633..79d43d5d 100644 --- a/minecode/tests/discovery/test_ls.py +++ b/minecode/tests/test_ls.py @@ -12,8 +12,8 @@ import os -from discovery.utils_test import JsonBasedTesting -from discovery import ls +from minecode.utils_test import JsonBasedTesting +from minecode import ls class ParseDirectoryListingTest(JsonBasedTesting): diff --git a/minecode/tests/discovery/test_maven.py b/minecode/tests/test_maven.py similarity index 98% rename from minecode/tests/discovery/test_maven.py rename to minecode/tests/test_maven.py index 29a5b927..a0367dd7 100644 --- a/minecode/tests/discovery/test_maven.py +++ b/minecode/tests/test_maven.py @@ -19,16 +19,16 @@ import packagedb -from discovery.utils_test import mocked_requests_get -from discovery.utils_test import JsonBasedTesting -from discovery.utils_test import model_to_dict +from minecode.utils_test import mocked_requests_get +from minecode.utils_test import JsonBasedTesting +from minecode.utils_test import model_to_dict -from discovery.management.commands.run_map import map_uri -from discovery.management.commands.run_visit import visit_uri +from minecode.management.commands.run_map import map_uri +from minecode.management.commands.run_visit import visit_uri -from discovery.mappers import maven as maven_mapper -from discovery.models import ResourceURI -from discovery.visitors import maven as maven_visitor +from minecode.mappers import maven as maven_mapper +from minecode.models import ResourceURI +from minecode.visitors import maven as maven_visitor # TODO: add tests from /maven-indexer/indexer-core/src/test/java/org/acche/maven/index/artifact diff --git a/minecode/tests/discovery/test_models.py b/minecode/tests/test_models.py similarity index 99% rename from minecode/tests/discovery/test_models.py rename to minecode/tests/test_models.py index ddce0896..f3ab4ef5 100644 --- a/minecode/tests/discovery/test_models.py +++ b/minecode/tests/test_models.py @@ -13,13 +13,13 @@ from django.test import TestCase from django.utils import timezone -from discovery import visitors -from discovery import mappers +from minecode import visitors +from minecode import mappers -from discovery.models import ResourceURI +from minecode.models import ResourceURI from packagedb.models import Package -from discovery.models import get_canonical -from discovery.models import ScannableURI +from minecode.models import get_canonical +from minecode.models import ScannableURI class ResourceURIModelTestCase(TestCase): diff --git a/minecode/tests/discovery/test_npm.py b/minecode/tests/test_npm.py similarity index 97% rename from minecode/tests/discovery/test_npm.py rename to minecode/tests/test_npm.py index d51d0576..34c56b12 100644 --- a/minecode/tests/discovery/test_npm.py +++ b/minecode/tests/test_npm.py @@ -16,13 +16,13 @@ from mock import Mock from mock import patch -from discovery.utils_test import mocked_requests_get -from discovery.utils_test import JsonBasedTesting +from minecode.utils_test import mocked_requests_get +from minecode.utils_test import JsonBasedTesting -from discovery import mappers -from discovery import route -from discovery.models import ResourceURI -from discovery.visitors import npm +from minecode import mappers +from minecode import route +from minecode.models import ResourceURI +from minecode.visitors import npm class TestNPMVisit(JsonBasedTesting): diff --git a/minecode/tests/discovery/test_pypi.py b/minecode/tests/test_pypi.py similarity index 96% rename from minecode/tests/discovery/test_pypi.py rename to minecode/tests/test_pypi.py index ef6502a9..f5eb4396 100644 --- a/minecode/tests/discovery/test_pypi.py +++ b/minecode/tests/test_pypi.py @@ -19,15 +19,15 @@ from packagedb.models import Package -from discovery.utils_test import mocked_requests_get -from discovery.utils_test import JsonBasedTesting - -from discovery import mappers -from discovery import visitors -from discovery.visitors import URI -from discovery.models import ResourceURI -from discovery.route import Router -from discovery.management.commands.run_map import map_uri +from minecode.utils_test import mocked_requests_get +from minecode.utils_test import JsonBasedTesting + +from minecode import mappers +from minecode import visitors +from minecode.visitors import URI +from minecode.models import ResourceURI +from minecode.route import Router +from minecode.management.commands.run_map import map_uri class TestPypiVisit(JsonBasedTesting, DjangoTestCase): diff --git a/minecode/tests/discovery/test_route.py b/minecode/tests/test_route.py similarity index 99% rename from minecode/tests/discovery/test_route.py rename to minecode/tests/test_route.py index 56cfe3a2..0ff2660b 100644 --- a/minecode/tests/discovery/test_route.py +++ b/minecode/tests/test_route.py @@ -10,8 +10,8 @@ from django.test import TestCase -from discovery import route -from discovery.route import Rule +from minecode import route +from minecode.route import Rule class RouteTest(TestCase): diff --git a/minecode/tests/discovery/test_rsync.py b/minecode/tests/test_rsync.py similarity index 98% rename from minecode/tests/discovery/test_rsync.py rename to minecode/tests/test_rsync.py index e591d48c..21a72472 100644 --- a/minecode/tests/discovery/test_rsync.py +++ b/minecode/tests/test_rsync.py @@ -7,13 +7,13 @@ # See https://aboutcode.org for more information about nexB OSS projects. # -# +# from unittest import skipIf import os -from discovery import rsync -from discovery import ON_WINDOWS -from discovery.utils_test import MiningTestCase +from minecode import rsync +from minecode import ON_WINDOWS +from minecode.utils_test import MiningTestCase class RsyncTest(MiningTestCase): diff --git a/minecode/tests/discovery/test_rubygems.py b/minecode/tests/test_rubygems.py similarity index 92% rename from minecode/tests/discovery/test_rubygems.py rename to minecode/tests/test_rubygems.py index 7230b852..3989a284 100644 --- a/minecode/tests/discovery/test_rubygems.py +++ b/minecode/tests/test_rubygems.py @@ -20,23 +20,23 @@ from commoncode.fileutils import file_name from django.test import TestCase as DjangoTestCase -from discovery.utils_test import mocked_requests_get -from discovery.utils_test import JsonBasedTesting -from discovery.utils_test import model_to_dict +from minecode.utils_test import mocked_requests_get +from minecode.utils_test import JsonBasedTesting +from minecode.utils_test import model_to_dict -from discovery import mappers -from discovery import route -from discovery.models import ResourceURI -from discovery import visit_router -from discovery.mappers.rubygems import build_rubygem_packages_from_api_data -from discovery.mappers.rubygems import build_rubygem_packages_from_metadata -from discovery.mappers.rubygems import RubyGemsApiVersionsJsonMapper -from discovery.mappers.rubygems import RubyGemsPackageArchiveMetadataMapper +from minecode import mappers +from minecode import route +from minecode.models import ResourceURI +from minecode import visit_router +from minecode.mappers.rubygems import build_rubygem_packages_from_api_data +from minecode.mappers.rubygems import build_rubygem_packages_from_metadata +from minecode.mappers.rubygems import RubyGemsApiVersionsJsonMapper +from minecode.mappers.rubygems import RubyGemsPackageArchiveMetadataMapper -from discovery.visitors.rubygems import get_gem_metadata -from discovery.visitors.rubygems import RubyGemsApiManyVersionsVisitor -from discovery.visitors.rubygems import RubyGemsIndexVisitor -from discovery.visitors.rubygems import RubyGemsPackageArchiveMetadataVisitor +from minecode.visitors.rubygems import get_gem_metadata +from minecode.visitors.rubygems import RubyGemsApiManyVersionsVisitor +from minecode.visitors.rubygems import RubyGemsIndexVisitor +from minecode.visitors.rubygems import RubyGemsPackageArchiveMetadataVisitor # @@ -265,8 +265,8 @@ class RubyEnd2EndTest(JsonBasedTesting, DjangoTestCase): test_data_dir = os.path.join(os.path.dirname(__file__), 'testfiles') def test_visit_and_map_end2end(self): - from discovery.management.commands.run_visit import visit_uri - from discovery.management.commands.run_map import map_uri + from minecode.management.commands.run_visit import visit_uri + from minecode.management.commands.run_map import map_uri import packagedb uri = 'https://rubygems.org/downloads/sprockets-vendor_gems-0.1.3.gem' diff --git a/minecode/tests/discovery/test_run_map.py b/minecode/tests/test_run_map.py similarity index 98% rename from minecode/tests/discovery/test_run_map.py rename to minecode/tests/test_run_map.py index acf56369..ed250cba 100644 --- a/minecode/tests/discovery/test_run_map.py +++ b/minecode/tests/test_run_map.py @@ -18,14 +18,14 @@ import packagedb -from discovery.utils_test import MiningTestCase - -from discovery.management.commands.run_map import map_uri -from discovery.management.commands.run_map import merge_packages -from discovery.models import ResourceURI -from discovery.route import Router -from discovery.models import ScannableURI -from discovery.utils_test import JsonBasedTesting +from minecode.utils_test import MiningTestCase + +from minecode.management.commands.run_map import map_uri +from minecode.management.commands.run_map import merge_packages +from minecode.models import ResourceURI +from minecode.route import Router +from minecode.models import ScannableURI +from minecode.utils_test import JsonBasedTesting class RunMapTest(JsonBasedTesting, MiningTestCase): diff --git a/minecode/tests/discovery/test_run_visit.py b/minecode/tests/test_run_visit.py similarity index 98% rename from minecode/tests/discovery/test_run_visit.py rename to minecode/tests/test_run_visit.py index e53a1d02..9ff51287 100644 --- a/minecode/tests/discovery/test_run_visit.py +++ b/minecode/tests/test_run_visit.py @@ -15,11 +15,11 @@ from django.core import management from django.forms.models import model_to_dict -from discovery.utils_test import MiningTestCase -from discovery.management.commands.run_visit import visit_uri -from discovery.models import ResourceURI -from discovery.route import Router -from discovery.visitors import URI +from minecode.utils_test import MiningTestCase +from minecode.management.commands.run_visit import visit_uri +from minecode.models import ResourceURI +from minecode.route import Router +from minecode.visitors import URI class RunVisitWithCounterTest(MiningTestCase): diff --git a/minecode/tests/discovery/test_seed.py b/minecode/tests/test_seed.py similarity index 95% rename from minecode/tests/discovery/test_seed.py rename to minecode/tests/test_seed.py index 46c87d6c..7a59b0f4 100644 --- a/minecode/tests/discovery/test_seed.py +++ b/minecode/tests/test_seed.py @@ -16,11 +16,11 @@ from django.utils import timezone from mock import patch -from discovery.management.commands.seed import SEED_PRIORITY -from discovery.management.commands.seed import insert_seed_uris -from discovery.models import ResourceURI -from discovery import seed -from discovery.utils_test import MiningTestCase +from minecode.management.commands.seed import SEED_PRIORITY +from minecode.management.commands.seed import insert_seed_uris +from minecode.models import ResourceURI +from minecode import seed +from minecode.utils_test import MiningTestCase class RevisitSeedTest(MiningTestCase): @@ -137,7 +137,7 @@ def get_seeds(self): self.SampleSeed3 = SampleSeed3() self.SampleSeed4 = SampleSeed4() - @patch('discovery.seed.get_active_seeders') + @patch('minecode.seed.get_active_seeders') def test_seed_command(self, mock_get_active_seeders): output = StringIO() mock_get_active_seeders.return_value = [self.SampleSeed0] @@ -162,7 +162,7 @@ def test_seed_command(self, mock_get_active_seeders): self.assertEqual(3, len([s.is_visitable for s in seeded])) self.assertTrue(all(s.priority == SEED_PRIORITY for s in seeded)) - @patch('discovery.seed.get_active_seeders') + @patch('minecode.seed.get_active_seeders') def test_insert_seed_uris_inserts_uris_for_active_seeders_with_pattern(self, mock_get_active_seeders): mock_get_active_seeders.return_value = [self.SampleSeed1] before = list(ResourceURI.objects.all().values_list('id')) @@ -239,6 +239,6 @@ def test_get_active_seeders(self): def test_get_configured_seeders(self): seeders = seed.get_configured_seeders() expected = [ - 'discovery.visitors.npm.NpmSeed', + 'minecode.visitors.npm.NpmSeed', ] assert sorted(expected) == sorted(seeders) diff --git a/minecode/tests/test_skeleton_codestyle.py b/minecode/tests/test_skeleton_codestyle.py deleted file mode 100644 index 2eb6e558..00000000 --- a/minecode/tests/test_skeleton_codestyle.py +++ /dev/null @@ -1,36 +0,0 @@ -# -# Copyright (c) nexB Inc. and others. All rights reserved. -# ScanCode is a trademark of nexB Inc. -# SPDX-License-Identifier: Apache-2.0 -# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/skeleton for support or download. -# See https://aboutcode.org for more information about nexB OSS projects. -# - -import subprocess -import unittest -import configparser - - -class BaseTests(unittest.TestCase): - def test_skeleton_codestyle(self): - """ - This test shouldn't run in proliferated repositories. - """ - setup_cfg = configparser.ConfigParser() - setup_cfg.read("setup.cfg") - if setup_cfg["metadata"]["name"] != "skeleton": - return - - args = "venv/bin/black --check -l 100 setup.py etc tests" - try: - subprocess.check_output(args.split()) - except subprocess.CalledProcessError as e: - print("===========================================================") - print(e.output) - print("===========================================================") - raise Exception( - "Black style check failed; please format the code using:\n" - " python -m black -l 100 setup.py etc tests", - e.output, - ) from e diff --git a/minecode/tests/discovery/test_sourceforge.py b/minecode/tests/test_sourceforge.py similarity index 96% rename from minecode/tests/discovery/test_sourceforge.py rename to minecode/tests/test_sourceforge.py index ad78be41..9cd29bc1 100644 --- a/minecode/tests/discovery/test_sourceforge.py +++ b/minecode/tests/test_sourceforge.py @@ -12,11 +12,11 @@ from mock import patch -from discovery.utils_test import mocked_requests_get -from discovery.utils_test import JsonBasedTesting +from minecode.utils_test import mocked_requests_get +from minecode.utils_test import JsonBasedTesting -from discovery import mappers -from discovery.visitors import sourceforge +from minecode import mappers +from minecode.visitors import sourceforge class SourceforgeVisitorsTest(JsonBasedTesting): diff --git a/minecode/tests/discovery/test_utils.py b/minecode/tests/test_utils.py similarity index 93% rename from minecode/tests/discovery/test_utils.py rename to minecode/tests/test_utils.py index e08826a8..66e0dde9 100644 --- a/minecode/tests/discovery/test_utils.py +++ b/minecode/tests/test_utils.py @@ -14,9 +14,9 @@ from packagedcode import models as scan_models -from discovery.utils_test import JsonBasedTesting -from discovery.utils import is_int -from discovery.utils import stringify_null_purl_fields +from minecode.utils_test import JsonBasedTesting +from minecode.utils import is_int +from minecode.utils import stringify_null_purl_fields class UtilsTest(JsonBasedTesting, DjangoTestCase): diff --git a/minecode/tests/discovery/test_version.py b/minecode/tests/test_version.py similarity index 99% rename from minecode/tests/discovery/test_version.py rename to minecode/tests/test_version.py index c4005083..3e66fef6 100644 --- a/minecode/tests/discovery/test_version.py +++ b/minecode/tests/test_version.py @@ -13,7 +13,7 @@ import unittest from unittest.case import expectedFailure -from discovery.version import version_hint +from minecode.version import version_hint class VersionHintTestCase(unittest.TestCase): diff --git a/minecode/tests/testfiles/command/bar b/minecode/tests/testfiles/command/bar new file mode 100644 index 00000000..e69de29b diff --git a/minecode/tests/discovery/testfiles/command/foo/.gitignore b/minecode/tests/testfiles/command/foo/.gitignore similarity index 100% rename from minecode/tests/discovery/testfiles/command/foo/.gitignore rename to minecode/tests/testfiles/command/foo/.gitignore diff --git a/minecode/tests/discovery/testfiles/debian/copyright/basic_copyright b/minecode/tests/testfiles/debian/copyright/basic_copyright similarity index 100% rename from minecode/tests/discovery/testfiles/debian/copyright/basic_copyright rename to minecode/tests/testfiles/debian/copyright/basic_copyright diff --git a/minecode/tests/discovery/testfiles/debian/copyright/copyright_abiword_common_copyright-abiword_common_copyright.copyright b/minecode/tests/testfiles/debian/copyright/copyright_abiword_common_copyright-abiword_common_copyright.copyright similarity index 100% rename from minecode/tests/discovery/testfiles/debian/copyright/copyright_abiword_common_copyright-abiword_common_copyright.copyright rename to minecode/tests/testfiles/debian/copyright/copyright_abiword_common_copyright-abiword_common_copyright.copyright diff --git a/minecode/tests/discovery/testfiles/debian/copyright/invalid_copyright b/minecode/tests/testfiles/debian/copyright/invalid_copyright similarity index 100% rename from minecode/tests/discovery/testfiles/debian/copyright/invalid_copyright rename to minecode/tests/testfiles/debian/copyright/invalid_copyright diff --git a/minecode/tests/discovery/testfiles/debian/debian_lslrs_expected b/minecode/tests/testfiles/debian/debian_lslrs_expected similarity index 100% rename from minecode/tests/discovery/testfiles/debian/debian_lslrs_expected rename to minecode/tests/testfiles/debian/debian_lslrs_expected diff --git a/minecode/tests/discovery/testfiles/debian/debian_lslrs_on_ubuntu_expected b/minecode/tests/testfiles/debian/debian_lslrs_on_ubuntu_expected similarity index 100% rename from minecode/tests/discovery/testfiles/debian/debian_lslrs_on_ubuntu_expected rename to minecode/tests/testfiles/debian/debian_lslrs_on_ubuntu_expected diff --git a/minecode/tests/discovery/testfiles/debian/debian_sourceindex_expected b/minecode/tests/testfiles/debian/debian_sourceindex_expected similarity index 100% rename from minecode/tests/discovery/testfiles/debian/debian_sourceindex_expected rename to minecode/tests/testfiles/debian/debian_sourceindex_expected diff --git a/minecode/tests/discovery/testfiles/debian/debutils/3dldf_2.0.3+dfsg-2.dsc b/minecode/tests/testfiles/debian/debutils/3dldf_2.0.3+dfsg-2.dsc similarity index 100% rename from minecode/tests/discovery/testfiles/debian/debutils/3dldf_2.0.3+dfsg-2.dsc rename to minecode/tests/testfiles/debian/debutils/3dldf_2.0.3+dfsg-2.dsc diff --git a/minecode/tests/discovery/testfiles/debian/debutils/3dldf_2.0.3+dfsg-2.dsc-expected b/minecode/tests/testfiles/debian/debutils/3dldf_2.0.3+dfsg-2.dsc-expected similarity index 100% rename from minecode/tests/discovery/testfiles/debian/debutils/3dldf_2.0.3+dfsg-2.dsc-expected rename to minecode/tests/testfiles/debian/debutils/3dldf_2.0.3+dfsg-2.dsc-expected diff --git a/minecode/tests/discovery/testfiles/debian/debutils/control_basic b/minecode/tests/testfiles/debian/debutils/control_basic similarity index 100% rename from minecode/tests/discovery/testfiles/debian/debutils/control_basic rename to minecode/tests/testfiles/debian/debutils/control_basic diff --git a/minecode/tests/discovery/testfiles/debian/debutils/control_invalid b/minecode/tests/testfiles/debian/debutils/control_invalid similarity index 100% rename from minecode/tests/discovery/testfiles/debian/debutils/control_invalid rename to minecode/tests/testfiles/debian/debutils/control_invalid diff --git a/minecode/tests/discovery/testfiles/debian/dsc/7kaa_2.14.3-1.dsc b/minecode/tests/testfiles/debian/dsc/7kaa_2.14.3-1.dsc similarity index 100% rename from minecode/tests/discovery/testfiles/debian/dsc/7kaa_2.14.3-1.dsc rename to minecode/tests/testfiles/debian/dsc/7kaa_2.14.3-1.dsc diff --git a/minecode/tests/discovery/testfiles/debian/dsc/description-expected.json b/minecode/tests/testfiles/debian/dsc/description-expected.json similarity index 100% rename from minecode/tests/discovery/testfiles/debian/dsc/description-expected.json rename to minecode/tests/testfiles/debian/dsc/description-expected.json diff --git a/minecode/tests/discovery/testfiles/debian/dsc/description.json b/minecode/tests/testfiles/debian/dsc/description.json similarity index 100% rename from minecode/tests/discovery/testfiles/debian/dsc/description.json rename to minecode/tests/testfiles/debian/dsc/description.json diff --git a/minecode/tests/discovery/testfiles/debian/dsc/description_expected.json b/minecode/tests/testfiles/debian/dsc/description_expected.json similarity index 100% rename from minecode/tests/discovery/testfiles/debian/dsc/description_expected.json rename to minecode/tests/testfiles/debian/dsc/description_expected.json diff --git a/minecode/tests/discovery/testfiles/debian/dsc/invalid.dsc b/minecode/tests/testfiles/debian/dsc/invalid.dsc similarity index 100% rename from minecode/tests/discovery/testfiles/debian/dsc/invalid.dsc rename to minecode/tests/testfiles/debian/dsc/invalid.dsc diff --git a/minecode/tests/discovery/testfiles/debian/invalid_files/ls-lR.gz b/minecode/tests/testfiles/debian/invalid_files/ls-lR.gz similarity index 100% rename from minecode/tests/discovery/testfiles/debian/invalid_files/ls-lR.gz rename to minecode/tests/testfiles/debian/invalid_files/ls-lR.gz diff --git a/minecode/tests/discovery/testfiles/debian/lslr/ls-lR_debian.gz b/minecode/tests/testfiles/debian/lslr/ls-lR_debian.gz similarity index 100% rename from minecode/tests/discovery/testfiles/debian/lslr/ls-lR_debian.gz rename to minecode/tests/testfiles/debian/lslr/ls-lR_debian.gz diff --git a/minecode/tests/discovery/testfiles/debian/lslr/ls-lR_debian.gz-expected.json b/minecode/tests/testfiles/debian/lslr/ls-lR_debian.gz-expected.json similarity index 100% rename from minecode/tests/discovery/testfiles/debian/lslr/ls-lR_debian.gz-expected.json rename to minecode/tests/testfiles/debian/lslr/ls-lR_debian.gz-expected.json diff --git a/minecode/tests/discovery/testfiles/debian/lslr/ls-lR_ubuntu.gz b/minecode/tests/testfiles/debian/lslr/ls-lR_ubuntu.gz similarity index 100% rename from minecode/tests/discovery/testfiles/debian/lslr/ls-lR_ubuntu.gz rename to minecode/tests/testfiles/debian/lslr/ls-lR_ubuntu.gz diff --git a/minecode/tests/discovery/testfiles/debian/lslr/ls-lR_ubuntu.gz-expected.json b/minecode/tests/testfiles/debian/lslr/ls-lR_ubuntu.gz-expected.json similarity index 100% rename from minecode/tests/discovery/testfiles/debian/lslr/ls-lR_ubuntu.gz-expected.json rename to minecode/tests/testfiles/debian/lslr/ls-lR_ubuntu.gz-expected.json diff --git a/minecode/tests/discovery/testfiles/debian/packages/debian_Packages b/minecode/tests/testfiles/debian/packages/debian_Packages similarity index 100% rename from minecode/tests/discovery/testfiles/debian/packages/debian_Packages rename to minecode/tests/testfiles/debian/packages/debian_Packages diff --git a/minecode/tests/discovery/testfiles/debian/packages/debian_Packages-expected.json b/minecode/tests/testfiles/debian/packages/debian_Packages-expected.json similarity index 100% rename from minecode/tests/discovery/testfiles/debian/packages/debian_Packages-expected.json rename to minecode/tests/testfiles/debian/packages/debian_Packages-expected.json diff --git a/minecode/tests/discovery/testfiles/debian/packages/debian_Packages-visit-expected.json b/minecode/tests/testfiles/debian/packages/debian_Packages-visit-expected.json similarity index 100% rename from minecode/tests/discovery/testfiles/debian/packages/debian_Packages-visit-expected.json rename to minecode/tests/testfiles/debian/packages/debian_Packages-visit-expected.json diff --git a/minecode/tests/discovery/testfiles/debian/packages/ubuntu_Packages b/minecode/tests/testfiles/debian/packages/ubuntu_Packages similarity index 100% rename from minecode/tests/discovery/testfiles/debian/packages/ubuntu_Packages rename to minecode/tests/testfiles/debian/packages/ubuntu_Packages diff --git a/minecode/tests/discovery/testfiles/debian/packages/ubuntu_Packages-expected.json b/minecode/tests/testfiles/debian/packages/ubuntu_Packages-expected.json similarity index 100% rename from minecode/tests/discovery/testfiles/debian/packages/ubuntu_Packages-expected.json rename to minecode/tests/testfiles/debian/packages/ubuntu_Packages-expected.json diff --git a/minecode/tests/discovery/testfiles/debian/release/Release b/minecode/tests/testfiles/debian/release/Release similarity index 100% rename from minecode/tests/discovery/testfiles/debian/release/Release rename to minecode/tests/testfiles/debian/release/Release diff --git a/minecode/tests/discovery/testfiles/debian/release/Release_expected b/minecode/tests/testfiles/debian/release/Release_expected similarity index 100% rename from minecode/tests/discovery/testfiles/debian/release/Release_expected rename to minecode/tests/testfiles/debian/release/Release_expected diff --git a/minecode/tests/discovery/testfiles/debian/release/Release_with_md5 b/minecode/tests/testfiles/debian/release/Release_with_md5 similarity index 100% rename from minecode/tests/discovery/testfiles/debian/release/Release_with_md5 rename to minecode/tests/testfiles/debian/release/Release_with_md5 diff --git a/minecode/tests/discovery/testfiles/debian/release/Release_with_md5_expected b/minecode/tests/testfiles/debian/release/Release_with_md5_expected similarity index 100% rename from minecode/tests/discovery/testfiles/debian/release/Release_with_md5_expected rename to minecode/tests/testfiles/debian/release/Release_with_md5_expected diff --git a/minecode/tests/discovery/testfiles/debian/release/visited_Release b/minecode/tests/testfiles/debian/release/visited_Release similarity index 100% rename from minecode/tests/discovery/testfiles/debian/release/visited_Release rename to minecode/tests/testfiles/debian/release/visited_Release diff --git a/minecode/tests/discovery/testfiles/debian/release/visited_Release-expected.json b/minecode/tests/testfiles/debian/release/visited_Release-expected.json similarity index 100% rename from minecode/tests/discovery/testfiles/debian/release/visited_Release-expected.json rename to minecode/tests/testfiles/debian/release/visited_Release-expected.json diff --git a/minecode/tests/discovery/testfiles/debian/sources/Sources.gz b/minecode/tests/testfiles/debian/sources/Sources.gz similarity index 100% rename from minecode/tests/discovery/testfiles/debian/sources/Sources.gz rename to minecode/tests/testfiles/debian/sources/Sources.gz diff --git a/minecode/tests/discovery/testfiles/debian/sources/Sources.gz-expected.json b/minecode/tests/testfiles/debian/sources/Sources.gz-expected.json similarity index 100% rename from minecode/tests/discovery/testfiles/debian/sources/Sources.gz-expected.json rename to minecode/tests/testfiles/debian/sources/Sources.gz-expected.json diff --git a/minecode/tests/discovery/testfiles/debian/sources/debian_Sources b/minecode/tests/testfiles/debian/sources/debian_Sources similarity index 100% rename from minecode/tests/discovery/testfiles/debian/sources/debian_Sources rename to minecode/tests/testfiles/debian/sources/debian_Sources diff --git a/minecode/tests/discovery/testfiles/debian/sources/debian_Sources_mapped-expected-packages.json b/minecode/tests/testfiles/debian/sources/debian_Sources_mapped-expected-packages.json similarity index 100% rename from minecode/tests/discovery/testfiles/debian/sources/debian_Sources_mapped-expected-packages.json rename to minecode/tests/testfiles/debian/sources/debian_Sources_mapped-expected-packages.json diff --git a/minecode/tests/discovery/testfiles/debian/sources/debian_Sources_visit_expected b/minecode/tests/testfiles/debian/sources/debian_Sources_visit_expected similarity index 100% rename from minecode/tests/discovery/testfiles/debian/sources/debian_Sources_visit_expected rename to minecode/tests/testfiles/debian/sources/debian_Sources_visit_expected diff --git a/minecode/tests/discovery/testfiles/debian/sources/ubuntu_Sources b/minecode/tests/testfiles/debian/sources/ubuntu_Sources similarity index 100% rename from minecode/tests/discovery/testfiles/debian/sources/ubuntu_Sources rename to minecode/tests/testfiles/debian/sources/ubuntu_Sources diff --git a/minecode/tests/discovery/testfiles/debian/sources/ubuntu_Sources_visit_expected b/minecode/tests/testfiles/debian/sources/ubuntu_Sources_visit_expected similarity index 100% rename from minecode/tests/discovery/testfiles/debian/sources/ubuntu_Sources_visit_expected rename to minecode/tests/testfiles/debian/sources/ubuntu_Sources_visit_expected diff --git a/minecode/tests/discovery/testfiles/debian/status/simple_status b/minecode/tests/testfiles/debian/status/simple_status similarity index 100% rename from minecode/tests/discovery/testfiles/debian/status/simple_status rename to minecode/tests/testfiles/debian/status/simple_status diff --git a/minecode/tests/discovery/testfiles/directories/find-ls b/minecode/tests/testfiles/directories/find-ls similarity index 100% rename from minecode/tests/discovery/testfiles/directories/find-ls rename to minecode/tests/testfiles/directories/find-ls diff --git a/minecode/tests/discovery/testfiles/directories/find-ls-apache-start b/minecode/tests/testfiles/directories/find-ls-apache-start similarity index 100% rename from minecode/tests/discovery/testfiles/directories/find-ls-apache-start rename to minecode/tests/testfiles/directories/find-ls-apache-start diff --git a/minecode/tests/discovery/testfiles/directories/find-ls-apache-start-expected.json b/minecode/tests/testfiles/directories/find-ls-apache-start-expected.json similarity index 100% rename from minecode/tests/discovery/testfiles/directories/find-ls-apache-start-expected.json rename to minecode/tests/testfiles/directories/find-ls-apache-start-expected.json diff --git a/minecode/tests/discovery/testfiles/directories/find-ls-expected.json b/minecode/tests/testfiles/directories/find-ls-expected.json similarity index 99% rename from minecode/tests/discovery/testfiles/directories/find-ls-expected.json rename to minecode/tests/testfiles/directories/find-ls-expected.json index 4d4940bb..147a0448 100644 --- a/minecode/tests/discovery/testfiles/directories/find-ls-expected.json +++ b/minecode/tests/testfiles/directories/find-ls-expected.json @@ -101,7 +101,7 @@ "path":"groovy/2.4.6/sources/apache-groovy-src-2.4.6.zip", "type":"f", "size":6907454, - "date":"2021-12", + "date":"2022-12", "target":null }, { diff --git a/minecode/tests/discovery/testfiles/directories/ls-lr b/minecode/tests/testfiles/directories/ls-lr similarity index 100% rename from minecode/tests/discovery/testfiles/directories/ls-lr rename to minecode/tests/testfiles/directories/ls-lr diff --git a/minecode/tests/discovery/testfiles/directories/ls-lr-expected.json b/minecode/tests/testfiles/directories/ls-lr-expected.json similarity index 100% rename from minecode/tests/discovery/testfiles/directories/ls-lr-expected.json rename to minecode/tests/testfiles/directories/ls-lr-expected.json diff --git a/minecode/tests/discovery/testfiles/directories/ls-lr-ubuntu b/minecode/tests/testfiles/directories/ls-lr-ubuntu similarity index 100% rename from minecode/tests/discovery/testfiles/directories/ls-lr-ubuntu rename to minecode/tests/testfiles/directories/ls-lr-ubuntu diff --git a/minecode/tests/discovery/testfiles/directories/ls-lr-ubuntu-expected.json b/minecode/tests/testfiles/directories/ls-lr-ubuntu-expected.json similarity index 100% rename from minecode/tests/discovery/testfiles/directories/ls-lr-ubuntu-expected.json rename to minecode/tests/testfiles/directories/ls-lr-ubuntu-expected.json diff --git a/minecode/tests/discovery/testfiles/fdroid/index-v2-expected-visit.json b/minecode/tests/testfiles/fdroid/index-v2-expected-visit.json similarity index 100% rename from minecode/tests/discovery/testfiles/fdroid/index-v2-expected-visit.json rename to minecode/tests/testfiles/fdroid/index-v2-expected-visit.json diff --git a/minecode/tests/discovery/testfiles/fdroid/index-v2-visited-expected-mapped.json b/minecode/tests/testfiles/fdroid/index-v2-visited-expected-mapped.json similarity index 100% rename from minecode/tests/discovery/testfiles/fdroid/index-v2-visited-expected-mapped.json rename to minecode/tests/testfiles/fdroid/index-v2-visited-expected-mapped.json diff --git a/minecode/tests/discovery/testfiles/fdroid/index-v2-visited.json b/minecode/tests/testfiles/fdroid/index-v2-visited.json similarity index 100% rename from minecode/tests/discovery/testfiles/fdroid/index-v2-visited.json rename to minecode/tests/testfiles/fdroid/index-v2-visited.json diff --git a/minecode/tests/discovery/testfiles/fdroid/index-v2.json b/minecode/tests/testfiles/fdroid/index-v2.json similarity index 100% rename from minecode/tests/discovery/testfiles/fdroid/index-v2.json rename to minecode/tests/testfiles/fdroid/index-v2.json diff --git a/minecode/tests/discovery/testfiles/freebsd/FreeBSD-10-i386_release_0_.html b/minecode/tests/testfiles/freebsd/FreeBSD-10-i386_release_0_.html similarity index 100% rename from minecode/tests/discovery/testfiles/freebsd/FreeBSD-10-i386_release_0_.html rename to minecode/tests/testfiles/freebsd/FreeBSD-10-i386_release_0_.html diff --git a/minecode/tests/discovery/testfiles/freebsd/FreeBSD-10-i386_release_0_.html_expected b/minecode/tests/testfiles/freebsd/FreeBSD-10-i386_release_0_.html_expected similarity index 100% rename from minecode/tests/discovery/testfiles/freebsd/FreeBSD-10-i386_release_0_.html_expected rename to minecode/tests/testfiles/freebsd/FreeBSD-10-i386_release_0_.html_expected diff --git a/minecode/tests/discovery/testfiles/freebsd/FreeBSD.org.html b/minecode/tests/testfiles/freebsd/FreeBSD.org.html similarity index 100% rename from minecode/tests/discovery/testfiles/freebsd/FreeBSD.org.html rename to minecode/tests/testfiles/freebsd/FreeBSD.org.html diff --git a/minecode/tests/discovery/testfiles/freebsd/FreeBSD.org.html_expected b/minecode/tests/testfiles/freebsd/FreeBSD.org.html_expected similarity index 100% rename from minecode/tests/discovery/testfiles/freebsd/FreeBSD.org.html_expected rename to minecode/tests/testfiles/freebsd/FreeBSD.org.html_expected diff --git a/minecode/tests/discovery/testfiles/freebsd/indexfile_expected b/minecode/tests/testfiles/freebsd/indexfile_expected similarity index 100% rename from minecode/tests/discovery/testfiles/freebsd/indexfile_expected rename to minecode/tests/testfiles/freebsd/indexfile_expected diff --git a/minecode/tests/discovery/testfiles/freebsd/indexfile_expected_mapper.json b/minecode/tests/testfiles/freebsd/indexfile_expected_mapper.json similarity index 100% rename from minecode/tests/discovery/testfiles/freebsd/indexfile_expected_mapper.json rename to minecode/tests/testfiles/freebsd/indexfile_expected_mapper.json diff --git a/minecode/tests/discovery/testfiles/freebsd/mapper_input1 b/minecode/tests/testfiles/freebsd/mapper_input1 similarity index 100% rename from minecode/tests/discovery/testfiles/freebsd/mapper_input1 rename to minecode/tests/testfiles/freebsd/mapper_input1 diff --git a/minecode/tests/discovery/testfiles/freebsd/packagesite.txz b/minecode/tests/testfiles/freebsd/packagesite.txz similarity index 100% rename from minecode/tests/discovery/testfiles/freebsd/packagesite.txz rename to minecode/tests/testfiles/freebsd/packagesite.txz diff --git a/minecode/tests/discovery/testfiles/freebsd/pkg-devel_index b/minecode/tests/testfiles/freebsd/pkg-devel_index similarity index 100% rename from minecode/tests/discovery/testfiles/freebsd/pkg-devel_index rename to minecode/tests/testfiles/freebsd/pkg-devel_index diff --git a/minecode/tests/discovery/testfiles/freebsd/pkg-devel_index_mapper.json b/minecode/tests/testfiles/freebsd/pkg-devel_index_mapper.json similarity index 100% rename from minecode/tests/discovery/testfiles/freebsd/pkg-devel_index_mapper.json rename to minecode/tests/testfiles/freebsd/pkg-devel_index_mapper.json diff --git a/minecode/tests/discovery/testfiles/freebsd/squirrelmail-plugins-1.0_2.txz b/minecode/tests/testfiles/freebsd/squirrelmail-plugins-1.0_2.txz similarity index 100% rename from minecode/tests/discovery/testfiles/freebsd/squirrelmail-plugins-1.0_2.txz rename to minecode/tests/testfiles/freebsd/squirrelmail-plugins-1.0_2.txz diff --git a/minecode/tests/discovery/testfiles/housekeeping/bytejta-supports-0.5.0-ALPHA4.pom b/minecode/tests/testfiles/housekeeping/bytejta-supports-0.5.0-ALPHA4.pom similarity index 100% rename from minecode/tests/discovery/testfiles/housekeeping/bytejta-supports-0.5.0-ALPHA4.pom rename to minecode/tests/testfiles/housekeeping/bytejta-supports-0.5.0-ALPHA4.pom diff --git a/minecode/tests/discovery/testfiles/housekeeping/bytejta-supports-0.5.0-ALPHA4.pom_search_expected.json b/minecode/tests/testfiles/housekeeping/bytejta-supports-0.5.0-ALPHA4.pom_search_expected.json similarity index 100% rename from minecode/tests/discovery/testfiles/housekeeping/bytejta-supports-0.5.0-ALPHA4.pom_search_expected.json rename to minecode/tests/testfiles/housekeeping/bytejta-supports-0.5.0-ALPHA4.pom_search_expected.json diff --git a/minecode/tests/discovery/testfiles/housekeeping/declared_license_search_expected.json b/minecode/tests/testfiles/housekeeping/declared_license_search_expected.json similarity index 100% rename from minecode/tests/discovery/testfiles/housekeeping/declared_license_search_expected.json rename to minecode/tests/testfiles/housekeeping/declared_license_search_expected.json diff --git a/minecode/tests/discovery/testfiles/housekeeping/example_expected.json b/minecode/tests/testfiles/housekeeping/example_expected.json similarity index 100% rename from minecode/tests/discovery/testfiles/housekeeping/example_expected.json rename to minecode/tests/testfiles/housekeeping/example_expected.json diff --git a/minecode/tests/discovery/testfiles/housekeeping/ignore_upper_case_search_expected.json b/minecode/tests/testfiles/housekeeping/ignore_upper_case_search_expected.json similarity index 100% rename from minecode/tests/discovery/testfiles/housekeeping/ignore_upper_case_search_expected.json rename to minecode/tests/testfiles/housekeeping/ignore_upper_case_search_expected.json diff --git a/minecode/tests/discovery/testfiles/housekeeping/license_expression_search_expected.json b/minecode/tests/testfiles/housekeeping/license_expression_search_expected.json similarity index 100% rename from minecode/tests/discovery/testfiles/housekeeping/license_expression_search_expected.json rename to minecode/tests/testfiles/housekeeping/license_expression_search_expected.json diff --git a/minecode/tests/discovery/testfiles/maven/end2end/bytejta-supports-0.5.0-ALPHA4.pom b/minecode/tests/testfiles/maven/end2end/bytejta-supports-0.5.0-ALPHA4.pom similarity index 100% rename from minecode/tests/discovery/testfiles/maven/end2end/bytejta-supports-0.5.0-ALPHA4.pom rename to minecode/tests/testfiles/maven/end2end/bytejta-supports-0.5.0-ALPHA4.pom diff --git a/minecode/tests/discovery/testfiles/maven/end2end/expected_mapped_packages.json b/minecode/tests/testfiles/maven/end2end/expected_mapped_packages.json similarity index 100% rename from minecode/tests/discovery/testfiles/maven/end2end/expected_mapped_packages.json rename to minecode/tests/testfiles/maven/end2end/expected_mapped_packages.json diff --git a/minecode/tests/discovery/testfiles/maven/end2end/expected_visited_uris.json b/minecode/tests/testfiles/maven/end2end/expected_visited_uris.json similarity index 100% rename from minecode/tests/discovery/testfiles/maven/end2end/expected_visited_uris.json rename to minecode/tests/testfiles/maven/end2end/expected_visited_uris.json diff --git a/minecode/tests/discovery/testfiles/maven/end2end/test_uris.json b/minecode/tests/testfiles/maven/end2end/test_uris.json similarity index 100% rename from minecode/tests/discovery/testfiles/maven/end2end/test_uris.json rename to minecode/tests/testfiles/maven/end2end/test_uris.json diff --git a/minecode/tests/discovery/testfiles/maven/end2end_index/expected_visited_increment_index.json b/minecode/tests/testfiles/maven/end2end_index/expected_visited_increment_index.json similarity index 100% rename from minecode/tests/discovery/testfiles/maven/end2end_index/expected_visited_increment_index.json rename to minecode/tests/testfiles/maven/end2end_index/expected_visited_increment_index.json diff --git a/minecode/tests/discovery/testfiles/maven/end2end_index/expected_visited_index.json b/minecode/tests/testfiles/maven/end2end_index/expected_visited_index.json similarity index 100% rename from minecode/tests/discovery/testfiles/maven/end2end_index/expected_visited_index.json rename to minecode/tests/testfiles/maven/end2end_index/expected_visited_index.json diff --git a/minecode/tests/discovery/testfiles/maven/end2end_index/nexus-maven-repository-index.163.gz b/minecode/tests/testfiles/maven/end2end_index/nexus-maven-repository-index.163.gz similarity index 100% rename from minecode/tests/discovery/testfiles/maven/end2end_index/nexus-maven-repository-index.163.gz rename to minecode/tests/testfiles/maven/end2end_index/nexus-maven-repository-index.163.gz diff --git a/minecode/tests/discovery/testfiles/maven/end2end_index/nexus-maven-repository-index.properties b/minecode/tests/testfiles/maven/end2end_index/nexus-maven-repository-index.properties similarity index 100% rename from minecode/tests/discovery/testfiles/maven/end2end_index/nexus-maven-repository-index.properties rename to minecode/tests/testfiles/maven/end2end_index/nexus-maven-repository-index.properties diff --git a/minecode/tests/discovery/testfiles/maven/end2end_multisteps/commons-jaxrs-1.21-index-data.json b/minecode/tests/testfiles/maven/end2end_multisteps/commons-jaxrs-1.21-index-data.json similarity index 100% rename from minecode/tests/discovery/testfiles/maven/end2end_multisteps/commons-jaxrs-1.21-index-data.json rename to minecode/tests/testfiles/maven/end2end_multisteps/commons-jaxrs-1.21-index-data.json diff --git a/minecode/tests/discovery/testfiles/maven/end2end_multisteps/commons-jaxrs-1.21-pom-data.json b/minecode/tests/testfiles/maven/end2end_multisteps/commons-jaxrs-1.21-pom-data.json similarity index 100% rename from minecode/tests/discovery/testfiles/maven/end2end_multisteps/commons-jaxrs-1.21-pom-data.json rename to minecode/tests/testfiles/maven/end2end_multisteps/commons-jaxrs-1.21-pom-data.json diff --git a/minecode/tests/discovery/testfiles/maven/end2end_multisteps/commons-jaxrs-1.21.pom b/minecode/tests/testfiles/maven/end2end_multisteps/commons-jaxrs-1.21.pom similarity index 100% rename from minecode/tests/discovery/testfiles/maven/end2end_multisteps/commons-jaxrs-1.21.pom rename to minecode/tests/testfiles/maven/end2end_multisteps/commons-jaxrs-1.21.pom diff --git a/minecode/tests/discovery/testfiles/maven/end2end_multisteps/expected_mapped_commons-jaxrs-1.21-from-index.json b/minecode/tests/testfiles/maven/end2end_multisteps/expected_mapped_commons-jaxrs-1.21-from-index.json similarity index 100% rename from minecode/tests/discovery/testfiles/maven/end2end_multisteps/expected_mapped_commons-jaxrs-1.21-from-index.json rename to minecode/tests/testfiles/maven/end2end_multisteps/expected_mapped_commons-jaxrs-1.21-from-index.json diff --git a/minecode/tests/discovery/testfiles/maven/end2end_multisteps/expected_mapped_commons-jaxrs-1.21-from-pom.json b/minecode/tests/testfiles/maven/end2end_multisteps/expected_mapped_commons-jaxrs-1.21-from-pom.json similarity index 100% rename from minecode/tests/discovery/testfiles/maven/end2end_multisteps/expected_mapped_commons-jaxrs-1.21-from-pom.json rename to minecode/tests/testfiles/maven/end2end_multisteps/expected_mapped_commons-jaxrs-1.21-from-pom.json diff --git a/minecode/tests/discovery/testfiles/maven/end2end_unicode/commons-jaxrs-1.22.pom b/minecode/tests/testfiles/maven/end2end_unicode/commons-jaxrs-1.22.pom similarity index 100% rename from minecode/tests/discovery/testfiles/maven/end2end_unicode/commons-jaxrs-1.22.pom rename to minecode/tests/testfiles/maven/end2end_unicode/commons-jaxrs-1.22.pom diff --git a/minecode/tests/discovery/testfiles/maven/end2end_unicode/expected_mapped_commons-jaxrs-1.22.json b/minecode/tests/testfiles/maven/end2end_unicode/expected_mapped_commons-jaxrs-1.22.json similarity index 100% rename from minecode/tests/discovery/testfiles/maven/end2end_unicode/expected_mapped_commons-jaxrs-1.22.json rename to minecode/tests/testfiles/maven/end2end_unicode/expected_mapped_commons-jaxrs-1.22.json diff --git a/minecode/tests/discovery/testfiles/maven/end2end_unicode/expected_visited_commons-jaxrs-1.22.json b/minecode/tests/testfiles/maven/end2end_unicode/expected_visited_commons-jaxrs-1.22.json similarity index 100% rename from minecode/tests/discovery/testfiles/maven/end2end_unicode/expected_visited_commons-jaxrs-1.22.json rename to minecode/tests/testfiles/maven/end2end_unicode/expected_visited_commons-jaxrs-1.22.json diff --git a/minecode/tests/discovery/testfiles/maven/html/app.html b/minecode/tests/testfiles/maven/html/app.html similarity index 100% rename from minecode/tests/discovery/testfiles/maven/html/app.html rename to minecode/tests/testfiles/maven/html/app.html diff --git a/minecode/tests/discovery/testfiles/maven/html/jcenter.bintray.com.html b/minecode/tests/testfiles/maven/html/jcenter.bintray.com.html similarity index 100% rename from minecode/tests/discovery/testfiles/maven/html/jcenter.bintray.com.html rename to minecode/tests/testfiles/maven/html/jcenter.bintray.com.html diff --git a/minecode/tests/discovery/testfiles/maven/html/stateframework-compiler.html b/minecode/tests/testfiles/maven/html/stateframework-compiler.html similarity index 100% rename from minecode/tests/discovery/testfiles/maven/html/stateframework-compiler.html rename to minecode/tests/testfiles/maven/html/stateframework-compiler.html diff --git a/minecode/tests/discovery/testfiles/maven/html/visitor_expected_app.html.json b/minecode/tests/testfiles/maven/html/visitor_expected_app.html.json similarity index 100% rename from minecode/tests/discovery/testfiles/maven/html/visitor_expected_app.html.json rename to minecode/tests/testfiles/maven/html/visitor_expected_app.html.json diff --git a/minecode/tests/discovery/testfiles/maven/html/visitor_expected_jcenter.bintray.com2.html.json b/minecode/tests/testfiles/maven/html/visitor_expected_jcenter.bintray.com2.html.json similarity index 100% rename from minecode/tests/discovery/testfiles/maven/html/visitor_expected_jcenter.bintray.com2.html.json rename to minecode/tests/testfiles/maven/html/visitor_expected_jcenter.bintray.com2.html.json diff --git a/minecode/tests/discovery/testfiles/maven/html/visitor_expected_stateframework-compiler.html.json b/minecode/tests/testfiles/maven/html/visitor_expected_stateframework-compiler.html.json similarity index 100% rename from minecode/tests/discovery/testfiles/maven/html/visitor_expected_stateframework-compiler.html.json rename to minecode/tests/testfiles/maven/html/visitor_expected_stateframework-compiler.html.json diff --git a/minecode/tests/discovery/testfiles/maven/index/buggy/expected_artifacts-defaults.json b/minecode/tests/testfiles/maven/index/buggy/expected_artifacts-defaults.json similarity index 100% rename from minecode/tests/discovery/testfiles/maven/index/buggy/expected_artifacts-defaults.json rename to minecode/tests/testfiles/maven/index/buggy/expected_artifacts-defaults.json diff --git a/minecode/tests/discovery/testfiles/maven/index/buggy/expected_artifacts.json b/minecode/tests/testfiles/maven/index/buggy/expected_artifacts.json similarity index 100% rename from minecode/tests/discovery/testfiles/maven/index/buggy/expected_artifacts.json rename to minecode/tests/testfiles/maven/index/buggy/expected_artifacts.json diff --git a/minecode/tests/discovery/testfiles/maven/index/buggy/expected_entries.json b/minecode/tests/testfiles/maven/index/buggy/expected_entries.json similarity index 100% rename from minecode/tests/discovery/testfiles/maven/index/buggy/expected_entries.json rename to minecode/tests/testfiles/maven/index/buggy/expected_entries.json diff --git a/minecode/tests/discovery/testfiles/maven/index/buggy/expected_uris.json b/minecode/tests/testfiles/maven/index/buggy/expected_uris.json similarity index 100% rename from minecode/tests/discovery/testfiles/maven/index/buggy/expected_uris.json rename to minecode/tests/testfiles/maven/index/buggy/expected_uris.json diff --git a/minecode/tests/discovery/testfiles/maven/index/buggy/expected_visited_uris.json b/minecode/tests/testfiles/maven/index/buggy/expected_visited_uris.json similarity index 100% rename from minecode/tests/discovery/testfiles/maven/index/buggy/expected_visited_uris.json rename to minecode/tests/testfiles/maven/index/buggy/expected_visited_uris.json diff --git a/minecode/tests/discovery/testfiles/maven/index/buggy/nexus-maven-repository-index.gz b/minecode/tests/testfiles/maven/index/buggy/nexus-maven-repository-index.gz similarity index 100% rename from minecode/tests/discovery/testfiles/maven/index/buggy/nexus-maven-repository-index.gz rename to minecode/tests/testfiles/maven/index/buggy/nexus-maven-repository-index.gz diff --git a/minecode/tests/discovery/testfiles/maven/index/expected_artifacts-all-worthy.json b/minecode/tests/testfiles/maven/index/expected_artifacts-all-worthy.json similarity index 100% rename from minecode/tests/discovery/testfiles/maven/index/expected_artifacts-all-worthy.json rename to minecode/tests/testfiles/maven/index/expected_artifacts-all-worthy.json diff --git a/minecode/tests/discovery/testfiles/maven/index/expected_artifacts-defaults.json b/minecode/tests/testfiles/maven/index/expected_artifacts-defaults.json similarity index 100% rename from minecode/tests/discovery/testfiles/maven/index/expected_artifacts-defaults.json rename to minecode/tests/testfiles/maven/index/expected_artifacts-defaults.json diff --git a/minecode/tests/discovery/testfiles/maven/index/expected_artifacts.json b/minecode/tests/testfiles/maven/index/expected_artifacts.json similarity index 100% rename from minecode/tests/discovery/testfiles/maven/index/expected_artifacts.json rename to minecode/tests/testfiles/maven/index/expected_artifacts.json diff --git a/minecode/tests/discovery/testfiles/maven/index/expected_entries.json b/minecode/tests/testfiles/maven/index/expected_entries.json similarity index 100% rename from minecode/tests/discovery/testfiles/maven/index/expected_entries.json rename to minecode/tests/testfiles/maven/index/expected_entries.json diff --git a/minecode/tests/discovery/testfiles/maven/index/expected_uris.json b/minecode/tests/testfiles/maven/index/expected_uris.json similarity index 100% rename from minecode/tests/discovery/testfiles/maven/index/expected_uris.json rename to minecode/tests/testfiles/maven/index/expected_uris.json diff --git a/minecode/tests/discovery/testfiles/maven/index/increment/expected_artifacts-defaults.json b/minecode/tests/testfiles/maven/index/increment/expected_artifacts-defaults.json similarity index 100% rename from minecode/tests/discovery/testfiles/maven/index/increment/expected_artifacts-defaults.json rename to minecode/tests/testfiles/maven/index/increment/expected_artifacts-defaults.json diff --git a/minecode/tests/discovery/testfiles/maven/index/increment/expected_artifacts.json b/minecode/tests/testfiles/maven/index/increment/expected_artifacts.json similarity index 100% rename from minecode/tests/discovery/testfiles/maven/index/increment/expected_artifacts.json rename to minecode/tests/testfiles/maven/index/increment/expected_artifacts.json diff --git a/minecode/tests/discovery/testfiles/maven/index/increment/expected_entries.json b/minecode/tests/testfiles/maven/index/increment/expected_entries.json similarity index 100% rename from minecode/tests/discovery/testfiles/maven/index/increment/expected_entries.json rename to minecode/tests/testfiles/maven/index/increment/expected_entries.json diff --git a/minecode/tests/discovery/testfiles/maven/index/increment/expected_properties_uris.json b/minecode/tests/testfiles/maven/index/increment/expected_properties_uris.json similarity index 100% rename from minecode/tests/discovery/testfiles/maven/index/increment/expected_properties_uris.json rename to minecode/tests/testfiles/maven/index/increment/expected_properties_uris.json diff --git a/minecode/tests/discovery/testfiles/maven/index/increment/expected_uris.json b/minecode/tests/testfiles/maven/index/increment/expected_uris.json similarity index 100% rename from minecode/tests/discovery/testfiles/maven/index/increment/expected_uris.json rename to minecode/tests/testfiles/maven/index/increment/expected_uris.json diff --git a/minecode/tests/discovery/testfiles/maven/index/increment/nexus-maven-repository-index.445.gz b/minecode/tests/testfiles/maven/index/increment/nexus-maven-repository-index.445.gz similarity index 100% rename from minecode/tests/discovery/testfiles/maven/index/increment/nexus-maven-repository-index.445.gz rename to minecode/tests/testfiles/maven/index/increment/nexus-maven-repository-index.445.gz diff --git a/minecode/tests/discovery/testfiles/maven/index/increment/nexus-maven-repository-index.properties b/minecode/tests/testfiles/maven/index/increment/nexus-maven-repository-index.properties similarity index 100% rename from minecode/tests/discovery/testfiles/maven/index/increment/nexus-maven-repository-index.properties rename to minecode/tests/testfiles/maven/index/increment/nexus-maven-repository-index.properties diff --git a/minecode/tests/discovery/testfiles/maven/index/increment2/expected_mini_package.json b/minecode/tests/testfiles/maven/index/increment2/expected_mini_package.json similarity index 100% rename from minecode/tests/discovery/testfiles/maven/index/increment2/expected_mini_package.json rename to minecode/tests/testfiles/maven/index/increment2/expected_mini_package.json diff --git a/minecode/tests/discovery/testfiles/maven/index/increment2/expected_uris.json b/minecode/tests/testfiles/maven/index/increment2/expected_uris.json similarity index 100% rename from minecode/tests/discovery/testfiles/maven/index/increment2/expected_uris.json rename to minecode/tests/testfiles/maven/index/increment2/expected_uris.json diff --git a/minecode/tests/discovery/testfiles/maven/index/increment2/nexus-maven-repository-index.457.gz b/minecode/tests/testfiles/maven/index/increment2/nexus-maven-repository-index.457.gz similarity index 100% rename from minecode/tests/discovery/testfiles/maven/index/increment2/nexus-maven-repository-index.457.gz rename to minecode/tests/testfiles/maven/index/increment2/nexus-maven-repository-index.457.gz diff --git a/minecode/tests/discovery/testfiles/maven/index/nexus-maven-repository-index.gz b/minecode/tests/testfiles/maven/index/nexus-maven-repository-index.gz similarity index 100% rename from minecode/tests/discovery/testfiles/maven/index/nexus-maven-repository-index.gz rename to minecode/tests/testfiles/maven/index/nexus-maven-repository-index.gz diff --git a/minecode/tests/discovery/testfiles/maven/mapper/ant-1.6.5.pom b/minecode/tests/testfiles/maven/mapper/ant-1.6.5.pom similarity index 100% rename from minecode/tests/discovery/testfiles/maven/mapper/ant-1.6.5.pom rename to minecode/tests/testfiles/maven/mapper/ant-1.6.5.pom diff --git a/minecode/tests/discovery/testfiles/maven/mapper/ant-1.6.5.pom.json b/minecode/tests/testfiles/maven/mapper/ant-1.6.5.pom.json similarity index 100% rename from minecode/tests/discovery/testfiles/maven/mapper/ant-1.6.5.pom.json rename to minecode/tests/testfiles/maven/mapper/ant-1.6.5.pom.json diff --git a/minecode/tests/discovery/testfiles/maven/mapper/axis-1.4.pom b/minecode/tests/testfiles/maven/mapper/axis-1.4.pom similarity index 100% rename from minecode/tests/discovery/testfiles/maven/mapper/axis-1.4.pom rename to minecode/tests/testfiles/maven/mapper/axis-1.4.pom diff --git a/minecode/tests/discovery/testfiles/maven/mapper/axis-1.4.pom.package.json b/minecode/tests/testfiles/maven/mapper/axis-1.4.pom.package.json similarity index 100% rename from minecode/tests/discovery/testfiles/maven/mapper/axis-1.4.pom.package.json rename to minecode/tests/testfiles/maven/mapper/axis-1.4.pom.package.json diff --git a/minecode/tests/discovery/testfiles/maven/mapper/commons-jaxrs-1.21.pom b/minecode/tests/testfiles/maven/mapper/commons-jaxrs-1.21.pom similarity index 100% rename from minecode/tests/discovery/testfiles/maven/mapper/commons-jaxrs-1.21.pom rename to minecode/tests/testfiles/maven/mapper/commons-jaxrs-1.21.pom diff --git a/minecode/tests/discovery/testfiles/maven/mapper/commons-jaxrs-1.21.pom.package.json b/minecode/tests/testfiles/maven/mapper/commons-jaxrs-1.21.pom.package.json similarity index 100% rename from minecode/tests/discovery/testfiles/maven/mapper/commons-jaxrs-1.21.pom.package.json rename to minecode/tests/testfiles/maven/mapper/commons-jaxrs-1.21.pom.package.json diff --git a/minecode/tests/discovery/testfiles/maven/mapper/commons-pool-1.5.7.pom b/minecode/tests/testfiles/maven/mapper/commons-pool-1.5.7.pom similarity index 100% rename from minecode/tests/discovery/testfiles/maven/mapper/commons-pool-1.5.7.pom rename to minecode/tests/testfiles/maven/mapper/commons-pool-1.5.7.pom diff --git a/minecode/tests/discovery/testfiles/maven/mapper/commons-pool-1.5.7.pom.package.json b/minecode/tests/testfiles/maven/mapper/commons-pool-1.5.7.pom.package.json similarity index 100% rename from minecode/tests/discovery/testfiles/maven/mapper/commons-pool-1.5.7.pom.package.json rename to minecode/tests/testfiles/maven/mapper/commons-pool-1.5.7.pom.package.json diff --git a/minecode/tests/discovery/testfiles/maven/mapper/depgraph-view-0.1.pom b/minecode/tests/testfiles/maven/mapper/depgraph-view-0.1.pom similarity index 100% rename from minecode/tests/discovery/testfiles/maven/mapper/depgraph-view-0.1.pom rename to minecode/tests/testfiles/maven/mapper/depgraph-view-0.1.pom diff --git a/minecode/tests/discovery/testfiles/maven/mapper/maven-all-1.0-RELEASE.pom b/minecode/tests/testfiles/maven/mapper/maven-all-1.0-RELEASE.pom similarity index 100% rename from minecode/tests/discovery/testfiles/maven/mapper/maven-all-1.0-RELEASE.pom rename to minecode/tests/testfiles/maven/mapper/maven-all-1.0-RELEASE.pom diff --git a/minecode/tests/discovery/testfiles/maven/mapper/maven-all-1.0-RELEASE.pom.package.json b/minecode/tests/testfiles/maven/mapper/maven-all-1.0-RELEASE.pom.package.json similarity index 100% rename from minecode/tests/discovery/testfiles/maven/mapper/maven-all-1.0-RELEASE.pom.package.json rename to minecode/tests/testfiles/maven/mapper/maven-all-1.0-RELEASE.pom.package.json diff --git a/minecode/tests/discovery/testfiles/maven/mapper/mysql-connector-java-5.1.27.pom b/minecode/tests/testfiles/maven/mapper/mysql-connector-java-5.1.27.pom similarity index 100% rename from minecode/tests/discovery/testfiles/maven/mapper/mysql-connector-java-5.1.27.pom rename to minecode/tests/testfiles/maven/mapper/mysql-connector-java-5.1.27.pom diff --git a/minecode/tests/discovery/testfiles/maven/mapper/mysql-connector-java-5.1.27.pom.package.json b/minecode/tests/testfiles/maven/mapper/mysql-connector-java-5.1.27.pom.package.json similarity index 100% rename from minecode/tests/discovery/testfiles/maven/mapper/mysql-connector-java-5.1.27.pom.package.json rename to minecode/tests/testfiles/maven/mapper/mysql-connector-java-5.1.27.pom.package.json diff --git a/minecode/tests/discovery/testfiles/maven/mapper/struts-menu-2.4.2.pom b/minecode/tests/testfiles/maven/mapper/struts-menu-2.4.2.pom similarity index 100% rename from minecode/tests/discovery/testfiles/maven/mapper/struts-menu-2.4.2.pom rename to minecode/tests/testfiles/maven/mapper/struts-menu-2.4.2.pom diff --git a/minecode/tests/discovery/testfiles/maven/mapper/struts-menu-2.4.2.pom.package.json b/minecode/tests/testfiles/maven/mapper/struts-menu-2.4.2.pom.package.json similarity index 100% rename from minecode/tests/discovery/testfiles/maven/mapper/struts-menu-2.4.2.pom.package.json rename to minecode/tests/testfiles/maven/mapper/struts-menu-2.4.2.pom.package.json diff --git a/minecode/tests/discovery/testfiles/maven/mapper/xbean-jmx-2.0.pom b/minecode/tests/testfiles/maven/mapper/xbean-jmx-2.0.pom similarity index 100% rename from minecode/tests/discovery/testfiles/maven/mapper/xbean-jmx-2.0.pom rename to minecode/tests/testfiles/maven/mapper/xbean-jmx-2.0.pom diff --git a/minecode/tests/discovery/testfiles/maven/mapper/xbean-jmx-2.0.pom.package.json b/minecode/tests/testfiles/maven/mapper/xbean-jmx-2.0.pom.package.json similarity index 94% rename from minecode/tests/discovery/testfiles/maven/mapper/xbean-jmx-2.0.pom.package.json rename to minecode/tests/testfiles/maven/mapper/xbean-jmx-2.0.pom.package.json index c11da554..8135ecbd 100644 --- a/minecode/tests/discovery/testfiles/maven/mapper/xbean-jmx-2.0.pom.package.json +++ b/minecode/tests/testfiles/maven/mapper/xbean-jmx-2.0.pom.package.json @@ -36,7 +36,7 @@ "scope": "compile", "is_runtime": false, "is_optional": true, - "is_resolved": null, + "is_resolved": false, "resolved_package": {}, "extra_data": {} }, @@ -46,7 +46,7 @@ "scope": "compile", "is_runtime": false, "is_optional": true, - "is_resolved": null, + "is_resolved": false, "resolved_package": {}, "extra_data": {} }, @@ -56,7 +56,7 @@ "scope": "compile", "is_runtime": false, "is_optional": true, - "is_resolved": null, + "is_resolved": false, "resolved_package": {}, "extra_data": {} }, @@ -66,7 +66,7 @@ "scope": "compile", "is_runtime": false, "is_optional": true, - "is_resolved": null, + "is_resolved": false, "resolved_package": {}, "extra_data": {} } diff --git a/minecode/tests/discovery/testfiles/maven/maven-metadata/expected_maven_xml.json b/minecode/tests/testfiles/maven/maven-metadata/expected_maven_xml.json similarity index 100% rename from minecode/tests/discovery/testfiles/maven/maven-metadata/expected_maven_xml.json rename to minecode/tests/testfiles/maven/maven-metadata/expected_maven_xml.json diff --git a/minecode/tests/discovery/testfiles/maven/maven-metadata/maven-metadata.xml b/minecode/tests/testfiles/maven/maven-metadata/maven-metadata.xml similarity index 100% rename from minecode/tests/discovery/testfiles/maven/maven-metadata/maven-metadata.xml rename to minecode/tests/testfiles/maven/maven-metadata/maven-metadata.xml diff --git a/minecode/tests/discovery/testfiles/maven/parsing/empty/common-object-1.0.2.pom b/minecode/tests/testfiles/maven/parsing/empty/common-object-1.0.2.pom similarity index 100% rename from minecode/tests/discovery/testfiles/maven/parsing/empty/common-object-1.0.2.pom rename to minecode/tests/testfiles/maven/parsing/empty/common-object-1.0.2.pom diff --git a/minecode/tests/discovery/testfiles/maven/parsing/empty/common-object-1.0.2.pom.package.json b/minecode/tests/testfiles/maven/parsing/empty/common-object-1.0.2.pom.package.json similarity index 100% rename from minecode/tests/discovery/testfiles/maven/parsing/empty/common-object-1.0.2.pom.package.json rename to minecode/tests/testfiles/maven/parsing/empty/common-object-1.0.2.pom.package.json diff --git a/minecode/tests/discovery/testfiles/maven/parsing/empty/osgl-http-1.1.2.pom b/minecode/tests/testfiles/maven/parsing/empty/osgl-http-1.1.2.pom similarity index 100% rename from minecode/tests/discovery/testfiles/maven/parsing/empty/osgl-http-1.1.2.pom rename to minecode/tests/testfiles/maven/parsing/empty/osgl-http-1.1.2.pom diff --git a/minecode/tests/discovery/testfiles/maven/parsing/empty/osgl-http-1.1.2.pom.package.json b/minecode/tests/testfiles/maven/parsing/empty/osgl-http-1.1.2.pom.package.json similarity index 100% rename from minecode/tests/discovery/testfiles/maven/parsing/empty/osgl-http-1.1.2.pom.package.json rename to minecode/tests/testfiles/maven/parsing/empty/osgl-http-1.1.2.pom.package.json diff --git a/minecode/tests/discovery/testfiles/maven/parsing/loop/argus-webservices-2.7.0.pom b/minecode/tests/testfiles/maven/parsing/loop/argus-webservices-2.7.0.pom similarity index 100% rename from minecode/tests/discovery/testfiles/maven/parsing/loop/argus-webservices-2.7.0.pom rename to minecode/tests/testfiles/maven/parsing/loop/argus-webservices-2.7.0.pom diff --git a/minecode/tests/discovery/testfiles/maven/parsing/loop/argus-webservices-2.7.0.pom.package.json b/minecode/tests/testfiles/maven/parsing/loop/argus-webservices-2.7.0.pom.package.json similarity index 100% rename from minecode/tests/discovery/testfiles/maven/parsing/loop/argus-webservices-2.7.0.pom.package.json rename to minecode/tests/testfiles/maven/parsing/loop/argus-webservices-2.7.0.pom.package.json diff --git a/minecode/tests/discovery/testfiles/maven/parsing/loop/argus-webservices-2.8.0.pom b/minecode/tests/testfiles/maven/parsing/loop/argus-webservices-2.8.0.pom similarity index 100% rename from minecode/tests/discovery/testfiles/maven/parsing/loop/argus-webservices-2.8.0.pom rename to minecode/tests/testfiles/maven/parsing/loop/argus-webservices-2.8.0.pom diff --git a/minecode/tests/discovery/testfiles/maven/parsing/loop/argus-webservices-2.8.0.pom.package.json b/minecode/tests/testfiles/maven/parsing/loop/argus-webservices-2.8.0.pom.package.json similarity index 100% rename from minecode/tests/discovery/testfiles/maven/parsing/loop/argus-webservices-2.8.0.pom.package.json rename to minecode/tests/testfiles/maven/parsing/loop/argus-webservices-2.8.0.pom.package.json diff --git a/minecode/tests/discovery/testfiles/maven/parsing/loop/coreplugin-1.0.0.pom b/minecode/tests/testfiles/maven/parsing/loop/coreplugin-1.0.0.pom similarity index 100% rename from minecode/tests/discovery/testfiles/maven/parsing/loop/coreplugin-1.0.0.pom rename to minecode/tests/testfiles/maven/parsing/loop/coreplugin-1.0.0.pom diff --git a/minecode/tests/discovery/testfiles/maven/parsing/loop/coreplugin-1.0.0.pom.package.json b/minecode/tests/testfiles/maven/parsing/loop/coreplugin-1.0.0.pom.package.json similarity index 100% rename from minecode/tests/discovery/testfiles/maven/parsing/loop/coreplugin-1.0.0.pom.package.json rename to minecode/tests/testfiles/maven/parsing/loop/coreplugin-1.0.0.pom.package.json diff --git a/minecode/tests/discovery/testfiles/maven/parsing/loop/jacuzzi-annotations-0.2.1.pom b/minecode/tests/testfiles/maven/parsing/loop/jacuzzi-annotations-0.2.1.pom similarity index 100% rename from minecode/tests/discovery/testfiles/maven/parsing/loop/jacuzzi-annotations-0.2.1.pom rename to minecode/tests/testfiles/maven/parsing/loop/jacuzzi-annotations-0.2.1.pom diff --git a/minecode/tests/discovery/testfiles/maven/parsing/loop/jacuzzi-annotations-0.2.1.pom.package.json b/minecode/tests/testfiles/maven/parsing/loop/jacuzzi-annotations-0.2.1.pom.package.json similarity index 100% rename from minecode/tests/discovery/testfiles/maven/parsing/loop/jacuzzi-annotations-0.2.1.pom.package.json rename to minecode/tests/testfiles/maven/parsing/loop/jacuzzi-annotations-0.2.1.pom.package.json diff --git a/minecode/tests/discovery/testfiles/maven/parsing/loop/jacuzzi-database-0.2.1.pom b/minecode/tests/testfiles/maven/parsing/loop/jacuzzi-database-0.2.1.pom similarity index 100% rename from minecode/tests/discovery/testfiles/maven/parsing/loop/jacuzzi-database-0.2.1.pom rename to minecode/tests/testfiles/maven/parsing/loop/jacuzzi-database-0.2.1.pom diff --git a/minecode/tests/discovery/testfiles/maven/parsing/loop/jacuzzi-database-0.2.1.pom.package.json b/minecode/tests/testfiles/maven/parsing/loop/jacuzzi-database-0.2.1.pom.package.json similarity index 100% rename from minecode/tests/discovery/testfiles/maven/parsing/loop/jacuzzi-database-0.2.1.pom.package.json rename to minecode/tests/testfiles/maven/parsing/loop/jacuzzi-database-0.2.1.pom.package.json diff --git a/minecode/tests/discovery/testfiles/maven/parsing/loop/ojcms-beans-0.1-beta.pom b/minecode/tests/testfiles/maven/parsing/loop/ojcms-beans-0.1-beta.pom similarity index 100% rename from minecode/tests/discovery/testfiles/maven/parsing/loop/ojcms-beans-0.1-beta.pom rename to minecode/tests/testfiles/maven/parsing/loop/ojcms-beans-0.1-beta.pom diff --git a/minecode/tests/discovery/testfiles/maven/parsing/loop/ojcms-beans-0.1-beta.pom.package.json b/minecode/tests/testfiles/maven/parsing/loop/ojcms-beans-0.1-beta.pom.package.json similarity index 100% rename from minecode/tests/discovery/testfiles/maven/parsing/loop/ojcms-beans-0.1-beta.pom.package.json rename to minecode/tests/testfiles/maven/parsing/loop/ojcms-beans-0.1-beta.pom.package.json diff --git a/minecode/tests/discovery/testfiles/maven/parsing/loop/pkg-2.0.13.1005.pom b/minecode/tests/testfiles/maven/parsing/loop/pkg-2.0.13.1005.pom similarity index 100% rename from minecode/tests/discovery/testfiles/maven/parsing/loop/pkg-2.0.13.1005.pom rename to minecode/tests/testfiles/maven/parsing/loop/pkg-2.0.13.1005.pom diff --git a/minecode/tests/discovery/testfiles/maven/parsing/loop/pkg-2.0.13.1005.pom.package.json b/minecode/tests/testfiles/maven/parsing/loop/pkg-2.0.13.1005.pom.package.json similarity index 100% rename from minecode/tests/discovery/testfiles/maven/parsing/loop/pkg-2.0.13.1005.pom.package.json rename to minecode/tests/testfiles/maven/parsing/loop/pkg-2.0.13.1005.pom.package.json diff --git a/minecode/tests/discovery/testfiles/maven/parsing/parse/jds-2.17.0718b.pom b/minecode/tests/testfiles/maven/parsing/parse/jds-2.17.0718b.pom similarity index 100% rename from minecode/tests/discovery/testfiles/maven/parsing/parse/jds-2.17.0718b.pom rename to minecode/tests/testfiles/maven/parsing/parse/jds-2.17.0718b.pom diff --git a/minecode/tests/discovery/testfiles/maven/parsing/parse/jds-2.17.0718b.pom.package.json b/minecode/tests/testfiles/maven/parsing/parse/jds-2.17.0718b.pom.package.json similarity index 100% rename from minecode/tests/discovery/testfiles/maven/parsing/parse/jds-2.17.0718b.pom.package.json rename to minecode/tests/testfiles/maven/parsing/parse/jds-2.17.0718b.pom.package.json diff --git a/minecode/tests/discovery/testfiles/maven/parsing/parse/jds-3.0.1.pom b/minecode/tests/testfiles/maven/parsing/parse/jds-3.0.1.pom similarity index 100% rename from minecode/tests/discovery/testfiles/maven/parsing/parse/jds-3.0.1.pom rename to minecode/tests/testfiles/maven/parsing/parse/jds-3.0.1.pom diff --git a/minecode/tests/discovery/testfiles/maven/parsing/parse/jds-3.0.1.pom.package.json b/minecode/tests/testfiles/maven/parsing/parse/jds-3.0.1.pom.package.json similarity index 100% rename from minecode/tests/discovery/testfiles/maven/parsing/parse/jds-3.0.1.pom.package.json rename to minecode/tests/testfiles/maven/parsing/parse/jds-3.0.1.pom.package.json diff --git a/minecode/tests/discovery/testfiles/maven/parsing/parse/maven-javanet-plugin-1.7.pom b/minecode/tests/testfiles/maven/parsing/parse/maven-javanet-plugin-1.7.pom similarity index 100% rename from minecode/tests/discovery/testfiles/maven/parsing/parse/maven-javanet-plugin-1.7.pom rename to minecode/tests/testfiles/maven/parsing/parse/maven-javanet-plugin-1.7.pom diff --git a/minecode/tests/discovery/testfiles/maven/parsing/parse/maven-javanet-plugin-1.7.pom.package.json b/minecode/tests/testfiles/maven/parsing/parse/maven-javanet-plugin-1.7.pom.package.json similarity index 100% rename from minecode/tests/discovery/testfiles/maven/parsing/parse/maven-javanet-plugin-1.7.pom.package.json rename to minecode/tests/testfiles/maven/parsing/parse/maven-javanet-plugin-1.7.pom.package.json diff --git a/minecode/tests/discovery/testfiles/maven/parsing/parse/springmvc-rest-docs-maven-plugin-1.0-RC1.pom b/minecode/tests/testfiles/maven/parsing/parse/springmvc-rest-docs-maven-plugin-1.0-RC1.pom similarity index 100% rename from minecode/tests/discovery/testfiles/maven/parsing/parse/springmvc-rest-docs-maven-plugin-1.0-RC1.pom rename to minecode/tests/testfiles/maven/parsing/parse/springmvc-rest-docs-maven-plugin-1.0-RC1.pom diff --git a/minecode/tests/discovery/testfiles/maven/parsing/parse/springmvc-rest-docs-maven-plugin-1.0-RC1.pom.package.json b/minecode/tests/testfiles/maven/parsing/parse/springmvc-rest-docs-maven-plugin-1.0-RC1.pom.package.json similarity index 99% rename from minecode/tests/discovery/testfiles/maven/parsing/parse/springmvc-rest-docs-maven-plugin-1.0-RC1.pom.package.json rename to minecode/tests/testfiles/maven/parsing/parse/springmvc-rest-docs-maven-plugin-1.0-RC1.pom.package.json index 3a278602..16fd3ab3 100644 --- a/minecode/tests/discovery/testfiles/maven/parsing/parse/springmvc-rest-docs-maven-plugin-1.0-RC1.pom.package.json +++ b/minecode/tests/testfiles/maven/parsing/parse/springmvc-rest-docs-maven-plugin-1.0-RC1.pom.package.json @@ -36,7 +36,7 @@ "scope": "compile", "is_runtime": false, "is_optional": true, - "is_resolved": null, + "is_resolved": false, "resolved_package": {}, "extra_data": {} }, diff --git a/minecode/tests/discovery/testfiles/maven/pom/classworlds-1.1-alpha-2.pom b/minecode/tests/testfiles/maven/pom/classworlds-1.1-alpha-2.pom similarity index 100% rename from minecode/tests/discovery/testfiles/maven/pom/classworlds-1.1-alpha-2.pom rename to minecode/tests/testfiles/maven/pom/classworlds-1.1-alpha-2.pom diff --git a/minecode/tests/discovery/testfiles/npm/0flux.json b/minecode/tests/testfiles/npm/0flux.json similarity index 100% rename from minecode/tests/discovery/testfiles/npm/0flux.json rename to minecode/tests/testfiles/npm/0flux.json diff --git a/minecode/tests/discovery/testfiles/npm/0flux_npm_expected.json b/minecode/tests/testfiles/npm/0flux_npm_expected.json similarity index 100% rename from minecode/tests/discovery/testfiles/npm/0flux_npm_expected.json rename to minecode/tests/testfiles/npm/0flux_npm_expected.json diff --git a/minecode/tests/discovery/testfiles/npm/1000_records.json b/minecode/tests/testfiles/npm/1000_records.json similarity index 100% rename from minecode/tests/discovery/testfiles/npm/1000_records.json rename to minecode/tests/testfiles/npm/1000_records.json diff --git a/minecode/tests/discovery/testfiles/npm/2112.json b/minecode/tests/testfiles/npm/2112.json similarity index 100% rename from minecode/tests/discovery/testfiles/npm/2112.json rename to minecode/tests/testfiles/npm/2112.json diff --git a/minecode/tests/discovery/testfiles/npm/29_record_expected.json b/minecode/tests/testfiles/npm/29_record_expected.json similarity index 100% rename from minecode/tests/discovery/testfiles/npm/29_record_expected.json rename to minecode/tests/testfiles/npm/29_record_expected.json diff --git a/minecode/tests/discovery/testfiles/npm/554_record_expected.json b/minecode/tests/testfiles/npm/554_record_expected.json similarity index 100% rename from minecode/tests/discovery/testfiles/npm/554_record_expected.json rename to minecode/tests/testfiles/npm/554_record_expected.json diff --git a/minecode/tests/discovery/testfiles/npm/expected_1000_records.json b/minecode/tests/testfiles/npm/expected_1000_records.json similarity index 100% rename from minecode/tests/discovery/testfiles/npm/expected_1000_records.json rename to minecode/tests/testfiles/npm/expected_1000_records.json diff --git a/minecode/tests/discovery/testfiles/npm/expected_doclimit_visitor.json b/minecode/tests/testfiles/npm/expected_doclimit_visitor.json similarity index 100% rename from minecode/tests/discovery/testfiles/npm/expected_doclimit_visitor.json rename to minecode/tests/testfiles/npm/expected_doclimit_visitor.json diff --git a/minecode/tests/discovery/testfiles/npm/expected_npmdownload_data_vistor.json b/minecode/tests/testfiles/npm/expected_npmdownload_data_vistor.json similarity index 100% rename from minecode/tests/discovery/testfiles/npm/expected_npmdownload_data_vistor.json rename to minecode/tests/testfiles/npm/expected_npmdownload_data_vistor.json diff --git a/minecode/tests/discovery/testfiles/npm/expected_npmdownloadvistor.json b/minecode/tests/testfiles/npm/expected_npmdownloadvistor.json similarity index 100% rename from minecode/tests/discovery/testfiles/npm/expected_npmdownloadvistor.json rename to minecode/tests/testfiles/npm/expected_npmdownloadvistor.json diff --git a/minecode/tests/discovery/testfiles/npm/expected_npmindexvisitor.json b/minecode/tests/testfiles/npm/expected_npmindexvisitor.json similarity index 100% rename from minecode/tests/discovery/testfiles/npm/expected_npmindexvisitor.json rename to minecode/tests/testfiles/npm/expected_npmindexvisitor.json diff --git a/minecode/tests/discovery/testfiles/npm/expected_over_limit.json b/minecode/tests/testfiles/npm/expected_over_limit.json similarity index 100% rename from minecode/tests/discovery/testfiles/npm/expected_over_limit.json rename to minecode/tests/testfiles/npm/expected_over_limit.json diff --git a/minecode/tests/discovery/testfiles/npm/expected_ticket_439.json b/minecode/tests/testfiles/npm/expected_ticket_439.json similarity index 100% rename from minecode/tests/discovery/testfiles/npm/expected_ticket_439.json rename to minecode/tests/testfiles/npm/expected_ticket_439.json diff --git a/minecode/tests/discovery/testfiles/npm/expected_ticket_440.json b/minecode/tests/testfiles/npm/expected_ticket_440.json similarity index 100% rename from minecode/tests/discovery/testfiles/npm/expected_ticket_440.json rename to minecode/tests/testfiles/npm/expected_ticket_440.json diff --git a/minecode/tests/discovery/testfiles/npm/grunticon-sass.json b/minecode/tests/testfiles/npm/grunticon-sass.json similarity index 100% rename from minecode/tests/discovery/testfiles/npm/grunticon-sass.json rename to minecode/tests/testfiles/npm/grunticon-sass.json diff --git a/minecode/tests/discovery/testfiles/npm/jsonp-filter-expected.json b/minecode/tests/testfiles/npm/jsonp-filter-expected.json similarity index 100% rename from minecode/tests/discovery/testfiles/npm/jsonp-filter-expected.json rename to minecode/tests/testfiles/npm/jsonp-filter-expected.json diff --git a/minecode/tests/discovery/testfiles/npm/jsonp-filter.json b/minecode/tests/testfiles/npm/jsonp-filter.json similarity index 100% rename from minecode/tests/discovery/testfiles/npm/jsonp-filter.json rename to minecode/tests/testfiles/npm/jsonp-filter.json diff --git a/minecode/tests/discovery/testfiles/npm/mapper/index.expected.json b/minecode/tests/testfiles/npm/mapper/index.expected.json similarity index 100% rename from minecode/tests/discovery/testfiles/npm/mapper/index.expected.json rename to minecode/tests/testfiles/npm/mapper/index.expected.json diff --git a/minecode/tests/discovery/testfiles/npm/mapper/index.json b/minecode/tests/testfiles/npm/mapper/index.json similarity index 100% rename from minecode/tests/discovery/testfiles/npm/mapper/index.json rename to minecode/tests/testfiles/npm/mapper/index.json diff --git a/minecode/tests/discovery/testfiles/npm/microdata-node_expected.json b/minecode/tests/testfiles/npm/microdata-node_expected.json similarity index 100% rename from minecode/tests/discovery/testfiles/npm/microdata-node_expected.json rename to minecode/tests/testfiles/npm/microdata-node_expected.json diff --git a/minecode/tests/discovery/testfiles/npm/microdata.json b/minecode/tests/testfiles/npm/microdata.json similarity index 100% rename from minecode/tests/discovery/testfiles/npm/microdata.json rename to minecode/tests/testfiles/npm/microdata.json diff --git a/minecode/tests/discovery/testfiles/npm/npm_2112_expected.json b/minecode/tests/testfiles/npm/npm_2112_expected.json similarity index 100% rename from minecode/tests/discovery/testfiles/npm/npm_2112_expected.json rename to minecode/tests/testfiles/npm/npm_2112_expected.json diff --git a/minecode/tests/discovery/testfiles/npm/over_limit.json b/minecode/tests/testfiles/npm/over_limit.json similarity index 100% rename from minecode/tests/discovery/testfiles/npm/over_limit.json rename to minecode/tests/testfiles/npm/over_limit.json diff --git a/minecode/tests/discovery/testfiles/npm/replicate_doc1.json b/minecode/tests/testfiles/npm/replicate_doc1.json similarity index 100% rename from minecode/tests/discovery/testfiles/npm/replicate_doc1.json rename to minecode/tests/testfiles/npm/replicate_doc1.json diff --git a/minecode/tests/discovery/testfiles/npm/ticket_439.json b/minecode/tests/testfiles/npm/ticket_439.json similarity index 100% rename from minecode/tests/discovery/testfiles/npm/ticket_439.json rename to minecode/tests/testfiles/npm/ticket_439.json diff --git a/minecode/tests/discovery/testfiles/npm/ticket_440_records.json b/minecode/tests/testfiles/npm/ticket_440_records.json similarity index 100% rename from minecode/tests/discovery/testfiles/npm/ticket_440_records.json rename to minecode/tests/testfiles/npm/ticket_440_records.json diff --git a/minecode/tests/discovery/testfiles/pypi/boolean.py-2.0.dev3.json b/minecode/tests/testfiles/pypi/boolean.py-2.0.dev3.json similarity index 100% rename from minecode/tests/discovery/testfiles/pypi/boolean.py-2.0.dev3.json rename to minecode/tests/testfiles/pypi/boolean.py-2.0.dev3.json diff --git a/minecode/tests/discovery/testfiles/pypi/boolean.py.json b/minecode/tests/testfiles/pypi/boolean.py.json similarity index 100% rename from minecode/tests/discovery/testfiles/pypi/boolean.py.json rename to minecode/tests/testfiles/pypi/boolean.py.json diff --git a/minecode/tests/discovery/testfiles/pypi/cage.json b/minecode/tests/testfiles/pypi/cage.json similarity index 100% rename from minecode/tests/discovery/testfiles/pypi/cage.json rename to minecode/tests/testfiles/pypi/cage.json diff --git a/minecode/tests/discovery/testfiles/pypi/cage_1.1.2.json b/minecode/tests/testfiles/pypi/cage_1.1.2.json similarity index 100% rename from minecode/tests/discovery/testfiles/pypi/cage_1.1.2.json rename to minecode/tests/testfiles/pypi/cage_1.1.2.json diff --git a/minecode/tests/discovery/testfiles/pypi/cage_1.1.3.json b/minecode/tests/testfiles/pypi/cage_1.1.3.json similarity index 100% rename from minecode/tests/discovery/testfiles/pypi/cage_1.1.3.json rename to minecode/tests/testfiles/pypi/cage_1.1.3.json diff --git a/minecode/tests/discovery/testfiles/pypi/expected-CAGE-1.1.2.json b/minecode/tests/testfiles/pypi/expected-CAGE-1.1.2.json similarity index 100% rename from minecode/tests/discovery/testfiles/pypi/expected-CAGE-1.1.2.json rename to minecode/tests/testfiles/pypi/expected-CAGE-1.1.2.json diff --git a/minecode/tests/discovery/testfiles/pypi/expected-CAGE-1.1.3.json b/minecode/tests/testfiles/pypi/expected-CAGE-1.1.3.json similarity index 100% rename from minecode/tests/discovery/testfiles/pypi/expected-CAGE-1.1.3.json rename to minecode/tests/testfiles/pypi/expected-CAGE-1.1.3.json diff --git a/minecode/tests/discovery/testfiles/pypi/expected-boolean.py-2.0.dev3.json b/minecode/tests/testfiles/pypi/expected-boolean.py-2.0.dev3.json similarity index 100% rename from minecode/tests/discovery/testfiles/pypi/expected-boolean.py-2.0.dev3.json rename to minecode/tests/testfiles/pypi/expected-boolean.py-2.0.dev3.json diff --git a/minecode/tests/discovery/testfiles/pypi/expected-lxml-3.2.0.json b/minecode/tests/testfiles/pypi/expected-lxml-3.2.0.json similarity index 100% rename from minecode/tests/discovery/testfiles/pypi/expected-lxml-3.2.0.json rename to minecode/tests/testfiles/pypi/expected-lxml-3.2.0.json diff --git a/minecode/tests/discovery/testfiles/pypi/expected_data-boolean.py-2.0.dev3.json b/minecode/tests/testfiles/pypi/expected_data-boolean.py-2.0.dev3.json similarity index 100% rename from minecode/tests/discovery/testfiles/pypi/expected_data-boolean.py-2.0.dev3.json rename to minecode/tests/testfiles/pypi/expected_data-boolean.py-2.0.dev3.json diff --git a/minecode/tests/discovery/testfiles/pypi/expected_data-cage_1.1.2.json b/minecode/tests/testfiles/pypi/expected_data-cage_1.1.2.json similarity index 100% rename from minecode/tests/discovery/testfiles/pypi/expected_data-cage_1.1.2.json rename to minecode/tests/testfiles/pypi/expected_data-cage_1.1.2.json diff --git a/minecode/tests/discovery/testfiles/pypi/expected_data-cage_1.1.3.json b/minecode/tests/testfiles/pypi/expected_data-cage_1.1.3.json similarity index 100% rename from minecode/tests/discovery/testfiles/pypi/expected_data-cage_1.1.3.json rename to minecode/tests/testfiles/pypi/expected_data-cage_1.1.3.json diff --git a/minecode/tests/discovery/testfiles/pypi/expected_uri_visitor1.json b/minecode/tests/testfiles/pypi/expected_uri_visitor1.json similarity index 100% rename from minecode/tests/discovery/testfiles/pypi/expected_uri_visitor1.json rename to minecode/tests/testfiles/pypi/expected_uri_visitor1.json diff --git a/minecode/tests/discovery/testfiles/pypi/expected_uri_visitor2.json b/minecode/tests/testfiles/pypi/expected_uri_visitor2.json similarity index 100% rename from minecode/tests/discovery/testfiles/pypi/expected_uri_visitor2.json rename to minecode/tests/testfiles/pypi/expected_uri_visitor2.json diff --git a/minecode/tests/discovery/testfiles/pypi/expected_uris-boolean.py-2.0.dev3.json b/minecode/tests/testfiles/pypi/expected_uris-boolean.py-2.0.dev3.json similarity index 100% rename from minecode/tests/discovery/testfiles/pypi/expected_uris-boolean.py-2.0.dev3.json rename to minecode/tests/testfiles/pypi/expected_uris-boolean.py-2.0.dev3.json diff --git a/minecode/tests/discovery/testfiles/pypi/expected_uris-boolean.py.json b/minecode/tests/testfiles/pypi/expected_uris-boolean.py.json similarity index 100% rename from minecode/tests/discovery/testfiles/pypi/expected_uris-boolean.py.json rename to minecode/tests/testfiles/pypi/expected_uris-boolean.py.json diff --git a/minecode/tests/discovery/testfiles/pypi/expected_uris-cage.json b/minecode/tests/testfiles/pypi/expected_uris-cage.json similarity index 100% rename from minecode/tests/discovery/testfiles/pypi/expected_uris-cage.json rename to minecode/tests/testfiles/pypi/expected_uris-cage.json diff --git a/minecode/tests/discovery/testfiles/pypi/expected_uris-cage_1.1.2.json b/minecode/tests/testfiles/pypi/expected_uris-cage_1.1.2.json similarity index 100% rename from minecode/tests/discovery/testfiles/pypi/expected_uris-cage_1.1.2.json rename to minecode/tests/testfiles/pypi/expected_uris-cage_1.1.2.json diff --git a/minecode/tests/discovery/testfiles/pypi/expected_uris-cage_1.1.3.json b/minecode/tests/testfiles/pypi/expected_uris-cage_1.1.3.json similarity index 100% rename from minecode/tests/discovery/testfiles/pypi/expected_uris-cage_1.1.3.json rename to minecode/tests/testfiles/pypi/expected_uris-cage_1.1.3.json diff --git a/minecode/tests/discovery/testfiles/pypi/lxml-3.2.0.json b/minecode/tests/testfiles/pypi/lxml-3.2.0.json similarity index 100% rename from minecode/tests/discovery/testfiles/pypi/lxml-3.2.0.json rename to minecode/tests/testfiles/pypi/lxml-3.2.0.json diff --git a/minecode/tests/discovery/testfiles/pypi/map/3to2-1.1.1.json b/minecode/tests/testfiles/pypi/map/3to2-1.1.1.json similarity index 100% rename from minecode/tests/discovery/testfiles/pypi/map/3to2-1.1.1.json rename to minecode/tests/testfiles/pypi/map/3to2-1.1.1.json diff --git a/minecode/tests/discovery/testfiles/pypi/map/expected-3to2-1.1.1.json b/minecode/tests/testfiles/pypi/map/expected-3to2-1.1.1.json similarity index 100% rename from minecode/tests/discovery/testfiles/pypi/map/expected-3to2-1.1.1.json rename to minecode/tests/testfiles/pypi/map/expected-3to2-1.1.1.json diff --git a/minecode/tests/discovery/testfiles/pypi/pypiindexvisitor-expected.json b/minecode/tests/testfiles/pypi/pypiindexvisitor-expected.json similarity index 100% rename from minecode/tests/discovery/testfiles/pypi/pypiindexvisitor-expected.json rename to minecode/tests/testfiles/pypi/pypiindexvisitor-expected.json diff --git a/minecode/tests/discovery/testfiles/rsync/rsync_dev.dir b/minecode/tests/testfiles/rsync/rsync_dev.dir similarity index 100% rename from minecode/tests/discovery/testfiles/rsync/rsync_dev.dir rename to minecode/tests/testfiles/rsync/rsync_dev.dir diff --git a/minecode/tests/testfiles/rsync/rsync_dir/bar/that/baz b/minecode/tests/testfiles/rsync/rsync_dir/bar/that/baz new file mode 100644 index 00000000..e69de29b diff --git a/minecode/tests/testfiles/rsync/rsync_dir/bar/this b/minecode/tests/testfiles/rsync/rsync_dir/bar/this new file mode 100644 index 00000000..e69de29b diff --git a/minecode/tests/testfiles/rsync/rsync_dir/foo b/minecode/tests/testfiles/rsync/rsync_dir/foo new file mode 100644 index 00000000..e69de29b diff --git a/minecode/tests/discovery/testfiles/rsync/rsync_modules b/minecode/tests/testfiles/rsync/rsync_modules similarity index 100% rename from minecode/tests/discovery/testfiles/rsync/rsync_modules rename to minecode/tests/testfiles/rsync/rsync_modules diff --git a/minecode/tests/discovery/testfiles/rsync/rsync_v3.0.9_protocol30.dir b/minecode/tests/testfiles/rsync/rsync_v3.0.9_protocol30.dir similarity index 100% rename from minecode/tests/discovery/testfiles/rsync/rsync_v3.0.9_protocol30.dir rename to minecode/tests/testfiles/rsync/rsync_v3.0.9_protocol30.dir diff --git a/minecode/tests/discovery/testfiles/rsync/rsync_v3.1.0_protocol31.dir b/minecode/tests/testfiles/rsync/rsync_v3.1.0_protocol31.dir similarity index 100% rename from minecode/tests/discovery/testfiles/rsync/rsync_v3.1.0_protocol31.dir rename to minecode/tests/testfiles/rsync/rsync_v3.1.0_protocol31.dir diff --git a/minecode/tests/discovery/testfiles/rsync/rsync_wicket.dir b/minecode/tests/testfiles/rsync/rsync_wicket.dir similarity index 100% rename from minecode/tests/discovery/testfiles/rsync/rsync_wicket.dir rename to minecode/tests/testfiles/rsync/rsync_wicket.dir diff --git a/minecode/tests/discovery/testfiles/rubygems/0mq-0.4.1.gem.metadata b/minecode/tests/testfiles/rubygems/0mq-0.4.1.gem.metadata similarity index 100% rename from minecode/tests/discovery/testfiles/rubygems/0mq-0.4.1.gem.metadata rename to minecode/tests/testfiles/rubygems/0mq-0.4.1.gem.metadata diff --git a/minecode/tests/discovery/testfiles/rubygems/0mq-0.4.1.gem.package.json b/minecode/tests/testfiles/rubygems/0mq-0.4.1.gem.package.json similarity index 100% rename from minecode/tests/discovery/testfiles/rubygems/0mq-0.4.1.gem.package.json rename to minecode/tests/testfiles/rubygems/0mq-0.4.1.gem.package.json diff --git a/minecode/tests/discovery/testfiles/rubygems/a_okay-0.1.0.gem b/minecode/tests/testfiles/rubygems/a_okay-0.1.0.gem similarity index 100% rename from minecode/tests/discovery/testfiles/rubygems/a_okay-0.1.0.gem rename to minecode/tests/testfiles/rubygems/a_okay-0.1.0.gem diff --git a/minecode/tests/discovery/testfiles/rubygems/a_okay-0.1.0.gem.metadata b/minecode/tests/testfiles/rubygems/a_okay-0.1.0.gem.metadata similarity index 100% rename from minecode/tests/discovery/testfiles/rubygems/a_okay-0.1.0.gem.metadata rename to minecode/tests/testfiles/rubygems/a_okay-0.1.0.gem.metadata diff --git a/minecode/tests/discovery/testfiles/rubygems/a_okay-0.1.0.gem.package.json b/minecode/tests/testfiles/rubygems/a_okay-0.1.0.gem.package.json similarity index 100% rename from minecode/tests/discovery/testfiles/rubygems/a_okay-0.1.0.gem.package.json rename to minecode/tests/testfiles/rubygems/a_okay-0.1.0.gem.package.json diff --git a/minecode/tests/discovery/testfiles/rubygems/action_tracker-1.0.2.gem b/minecode/tests/testfiles/rubygems/action_tracker-1.0.2.gem similarity index 100% rename from minecode/tests/discovery/testfiles/rubygems/action_tracker-1.0.2.gem rename to minecode/tests/testfiles/rubygems/action_tracker-1.0.2.gem diff --git a/minecode/tests/discovery/testfiles/rubygems/action_tracker-1.0.2.gem.package.json b/minecode/tests/testfiles/rubygems/action_tracker-1.0.2.gem.package.json similarity index 100% rename from minecode/tests/discovery/testfiles/rubygems/action_tracker-1.0.2.gem.package.json rename to minecode/tests/testfiles/rubygems/action_tracker-1.0.2.gem.package.json diff --git a/minecode/tests/discovery/testfiles/rubygems/apiv1/0xffffff.api.json b/minecode/tests/testfiles/rubygems/apiv1/0xffffff.api.json similarity index 100% rename from minecode/tests/discovery/testfiles/rubygems/apiv1/0xffffff.api.json rename to minecode/tests/testfiles/rubygems/apiv1/0xffffff.api.json diff --git a/minecode/tests/discovery/testfiles/rubygems/apiv1/0xffffff.api.package.json b/minecode/tests/testfiles/rubygems/apiv1/0xffffff.api.package.json similarity index 100% rename from minecode/tests/discovery/testfiles/rubygems/apiv1/0xffffff.api.package.json rename to minecode/tests/testfiles/rubygems/apiv1/0xffffff.api.package.json diff --git a/minecode/tests/discovery/testfiles/rubygems/apiv1/a1630ty_a1630ty.api.json b/minecode/tests/testfiles/rubygems/apiv1/a1630ty_a1630ty.api.json similarity index 100% rename from minecode/tests/discovery/testfiles/rubygems/apiv1/a1630ty_a1630ty.api.json rename to minecode/tests/testfiles/rubygems/apiv1/a1630ty_a1630ty.api.json diff --git a/minecode/tests/discovery/testfiles/rubygems/apiv1/a1630ty_a1630ty.api.mapped.json b/minecode/tests/testfiles/rubygems/apiv1/a1630ty_a1630ty.api.mapped.json similarity index 100% rename from minecode/tests/discovery/testfiles/rubygems/apiv1/a1630ty_a1630ty.api.mapped.json rename to minecode/tests/testfiles/rubygems/apiv1/a1630ty_a1630ty.api.mapped.json diff --git a/minecode/tests/discovery/testfiles/rubygems/apiv1/a1630ty_a1630ty.api.package.json b/minecode/tests/testfiles/rubygems/apiv1/a1630ty_a1630ty.api.package.json similarity index 100% rename from minecode/tests/discovery/testfiles/rubygems/apiv1/a1630ty_a1630ty.api.package.json rename to minecode/tests/testfiles/rubygems/apiv1/a1630ty_a1630ty.api.package.json diff --git a/minecode/tests/discovery/testfiles/rubygems/apiv1/action_tracker.api.json b/minecode/tests/testfiles/rubygems/apiv1/action_tracker.api.json similarity index 100% rename from minecode/tests/discovery/testfiles/rubygems/apiv1/action_tracker.api.json rename to minecode/tests/testfiles/rubygems/apiv1/action_tracker.api.json diff --git a/minecode/tests/discovery/testfiles/rubygems/apiv1/action_tracker.api.package.json b/minecode/tests/testfiles/rubygems/apiv1/action_tracker.api.package.json similarity index 100% rename from minecode/tests/discovery/testfiles/rubygems/apiv1/action_tracker.api.package.json rename to minecode/tests/testfiles/rubygems/apiv1/action_tracker.api.package.json diff --git a/minecode/tests/discovery/testfiles/rubygems/apiv1/expected_0xffffff.api.json b/minecode/tests/testfiles/rubygems/apiv1/expected_0xffffff.api.json similarity index 100% rename from minecode/tests/discovery/testfiles/rubygems/apiv1/expected_0xffffff.api.json rename to minecode/tests/testfiles/rubygems/apiv1/expected_0xffffff.api.json diff --git a/minecode/tests/discovery/testfiles/rubygems/apiv1/expected_a1630ty_a1630ty.api.json b/minecode/tests/testfiles/rubygems/apiv1/expected_a1630ty_a1630ty.api.json similarity index 100% rename from minecode/tests/discovery/testfiles/rubygems/apiv1/expected_a1630ty_a1630ty.api.json rename to minecode/tests/testfiles/rubygems/apiv1/expected_a1630ty_a1630ty.api.json diff --git a/minecode/tests/discovery/testfiles/rubygems/apiv1/expected_zuck.api.json b/minecode/tests/testfiles/rubygems/apiv1/expected_zuck.api.json similarity index 100% rename from minecode/tests/discovery/testfiles/rubygems/apiv1/expected_zuck.api.json rename to minecode/tests/testfiles/rubygems/apiv1/expected_zuck.api.json diff --git a/minecode/tests/discovery/testfiles/rubygems/apiv1/zuck.api.json b/minecode/tests/testfiles/rubygems/apiv1/zuck.api.json similarity index 100% rename from minecode/tests/discovery/testfiles/rubygems/apiv1/zuck.api.json rename to minecode/tests/testfiles/rubygems/apiv1/zuck.api.json diff --git a/minecode/tests/discovery/testfiles/rubygems/apiv1/zuck.api.package.json b/minecode/tests/testfiles/rubygems/apiv1/zuck.api.package.json similarity index 100% rename from minecode/tests/discovery/testfiles/rubygems/apiv1/zuck.api.package.json rename to minecode/tests/testfiles/rubygems/apiv1/zuck.api.package.json diff --git a/minecode/tests/discovery/testfiles/rubygems/archive-tar-minitar-0.5.2.gem b/minecode/tests/testfiles/rubygems/archive-tar-minitar-0.5.2.gem similarity index 100% rename from minecode/tests/discovery/testfiles/rubygems/archive-tar-minitar-0.5.2.gem rename to minecode/tests/testfiles/rubygems/archive-tar-minitar-0.5.2.gem diff --git a/minecode/tests/discovery/testfiles/rubygems/archive-tar-minitar-0.5.2.gem.package.json b/minecode/tests/testfiles/rubygems/archive-tar-minitar-0.5.2.gem.package.json similarity index 100% rename from minecode/tests/discovery/testfiles/rubygems/archive-tar-minitar-0.5.2.gem.package.json rename to minecode/tests/testfiles/rubygems/archive-tar-minitar-0.5.2.gem.package.json diff --git a/minecode/tests/discovery/testfiles/rubygems/blankslate-3.1.3.gem b/minecode/tests/testfiles/rubygems/blankslate-3.1.3.gem similarity index 100% rename from minecode/tests/discovery/testfiles/rubygems/blankslate-3.1.3.gem rename to minecode/tests/testfiles/rubygems/blankslate-3.1.3.gem diff --git a/minecode/tests/discovery/testfiles/rubygems/blankslate-3.1.3.gem.package.json b/minecode/tests/testfiles/rubygems/blankslate-3.1.3.gem.package.json similarity index 100% rename from minecode/tests/discovery/testfiles/rubygems/blankslate-3.1.3.gem.package.json rename to minecode/tests/testfiles/rubygems/blankslate-3.1.3.gem.package.json diff --git a/minecode/tests/discovery/testfiles/rubygems/gemspec/address_standardization.gemspec b/minecode/tests/testfiles/rubygems/gemspec/address_standardization.gemspec similarity index 100% rename from minecode/tests/discovery/testfiles/rubygems/gemspec/address_standardization.gemspec rename to minecode/tests/testfiles/rubygems/gemspec/address_standardization.gemspec diff --git a/minecode/tests/discovery/testfiles/rubygems/gemspec/arel.gemspec b/minecode/tests/testfiles/rubygems/gemspec/arel.gemspec similarity index 100% rename from minecode/tests/discovery/testfiles/rubygems/gemspec/arel.gemspec rename to minecode/tests/testfiles/rubygems/gemspec/arel.gemspec diff --git a/minecode/tests/discovery/testfiles/rubygems/index/latest_specs.4.8.gz b/minecode/tests/testfiles/rubygems/index/latest_specs.4.8.gz similarity index 100% rename from minecode/tests/discovery/testfiles/rubygems/index/latest_specs.4.8.gz rename to minecode/tests/testfiles/rubygems/index/latest_specs.4.8.gz diff --git a/minecode/tests/discovery/testfiles/rubygems/index/latest_specs.4.8.gz.expected.json b/minecode/tests/testfiles/rubygems/index/latest_specs.4.8.gz.expected.json similarity index 100% rename from minecode/tests/discovery/testfiles/rubygems/index/latest_specs.4.8.gz.expected.json rename to minecode/tests/testfiles/rubygems/index/latest_specs.4.8.gz.expected.json diff --git a/minecode/tests/discovery/testfiles/rubygems/m2r-2.1.0.gem b/minecode/tests/testfiles/rubygems/m2r-2.1.0.gem similarity index 100% rename from minecode/tests/discovery/testfiles/rubygems/m2r-2.1.0.gem rename to minecode/tests/testfiles/rubygems/m2r-2.1.0.gem diff --git a/minecode/tests/discovery/testfiles/rubygems/m2r-2.1.0.gem.package.json b/minecode/tests/testfiles/rubygems/m2r-2.1.0.gem.package.json similarity index 100% rename from minecode/tests/discovery/testfiles/rubygems/m2r-2.1.0.gem.package.json rename to minecode/tests/testfiles/rubygems/m2r-2.1.0.gem.package.json diff --git a/minecode/tests/discovery/testfiles/rubygems/mysmallidea-address_standardization-0.4.1.gem b/minecode/tests/testfiles/rubygems/mysmallidea-address_standardization-0.4.1.gem similarity index 100% rename from minecode/tests/discovery/testfiles/rubygems/mysmallidea-address_standardization-0.4.1.gem rename to minecode/tests/testfiles/rubygems/mysmallidea-address_standardization-0.4.1.gem diff --git a/minecode/tests/discovery/testfiles/rubygems/mysmallidea-address_standardization-0.4.1.gem.mapped.json b/minecode/tests/testfiles/rubygems/mysmallidea-address_standardization-0.4.1.gem.mapped.json similarity index 100% rename from minecode/tests/discovery/testfiles/rubygems/mysmallidea-address_standardization-0.4.1.gem.mapped.json rename to minecode/tests/testfiles/rubygems/mysmallidea-address_standardization-0.4.1.gem.mapped.json diff --git a/minecode/tests/discovery/testfiles/rubygems/mysmallidea-address_standardization-0.4.1.gem.metadata b/minecode/tests/testfiles/rubygems/mysmallidea-address_standardization-0.4.1.gem.metadata similarity index 100% rename from minecode/tests/discovery/testfiles/rubygems/mysmallidea-address_standardization-0.4.1.gem.metadata rename to minecode/tests/testfiles/rubygems/mysmallidea-address_standardization-0.4.1.gem.metadata diff --git a/minecode/tests/discovery/testfiles/rubygems/mysmallidea-address_standardization-0.4.1.gem.package.json b/minecode/tests/testfiles/rubygems/mysmallidea-address_standardization-0.4.1.gem.package.json similarity index 100% rename from minecode/tests/discovery/testfiles/rubygems/mysmallidea-address_standardization-0.4.1.gem.package.json rename to minecode/tests/testfiles/rubygems/mysmallidea-address_standardization-0.4.1.gem.package.json diff --git a/minecode/tests/discovery/testfiles/rubygems/mysmallidea-mad_mimi_mailer-0.0.9.gem b/minecode/tests/testfiles/rubygems/mysmallidea-mad_mimi_mailer-0.0.9.gem similarity index 100% rename from minecode/tests/discovery/testfiles/rubygems/mysmallidea-mad_mimi_mailer-0.0.9.gem rename to minecode/tests/testfiles/rubygems/mysmallidea-mad_mimi_mailer-0.0.9.gem diff --git a/minecode/tests/discovery/testfiles/rubygems/mysmallidea-mad_mimi_mailer-0.0.9.gem.package.json b/minecode/tests/testfiles/rubygems/mysmallidea-mad_mimi_mailer-0.0.9.gem.package.json similarity index 100% rename from minecode/tests/discovery/testfiles/rubygems/mysmallidea-mad_mimi_mailer-0.0.9.gem.package.json rename to minecode/tests/testfiles/rubygems/mysmallidea-mad_mimi_mailer-0.0.9.gem.package.json diff --git a/minecode/tests/discovery/testfiles/rubygems/ng-rails-csrf-0.1.0.gem b/minecode/tests/testfiles/rubygems/ng-rails-csrf-0.1.0.gem similarity index 100% rename from minecode/tests/discovery/testfiles/rubygems/ng-rails-csrf-0.1.0.gem rename to minecode/tests/testfiles/rubygems/ng-rails-csrf-0.1.0.gem diff --git a/minecode/tests/discovery/testfiles/rubygems/ng-rails-csrf-0.1.0.gem.package.json b/minecode/tests/testfiles/rubygems/ng-rails-csrf-0.1.0.gem.package.json similarity index 100% rename from minecode/tests/discovery/testfiles/rubygems/ng-rails-csrf-0.1.0.gem.package.json rename to minecode/tests/testfiles/rubygems/ng-rails-csrf-0.1.0.gem.package.json diff --git a/minecode/tests/discovery/testfiles/rubygems/small-0.2.gem b/minecode/tests/testfiles/rubygems/small-0.2.gem similarity index 100% rename from minecode/tests/discovery/testfiles/rubygems/small-0.2.gem rename to minecode/tests/testfiles/rubygems/small-0.2.gem diff --git a/minecode/tests/discovery/testfiles/rubygems/small-0.2.gem.package.json b/minecode/tests/testfiles/rubygems/small-0.2.gem.package.json similarity index 100% rename from minecode/tests/discovery/testfiles/rubygems/small-0.2.gem.package.json rename to minecode/tests/testfiles/rubygems/small-0.2.gem.package.json diff --git a/minecode/tests/discovery/testfiles/rubygems/small_wonder-0.1.10.gem b/minecode/tests/testfiles/rubygems/small_wonder-0.1.10.gem similarity index 100% rename from minecode/tests/discovery/testfiles/rubygems/small_wonder-0.1.10.gem rename to minecode/tests/testfiles/rubygems/small_wonder-0.1.10.gem diff --git a/minecode/tests/discovery/testfiles/rubygems/small_wonder-0.1.10.gem.package.json b/minecode/tests/testfiles/rubygems/small_wonder-0.1.10.gem.package.json similarity index 100% rename from minecode/tests/discovery/testfiles/rubygems/small_wonder-0.1.10.gem.package.json rename to minecode/tests/testfiles/rubygems/small_wonder-0.1.10.gem.package.json diff --git a/minecode/tests/discovery/testfiles/rubygems/sprockets-vendor_gems-0.1.3.gem b/minecode/tests/testfiles/rubygems/sprockets-vendor_gems-0.1.3.gem similarity index 100% rename from minecode/tests/discovery/testfiles/rubygems/sprockets-vendor_gems-0.1.3.gem rename to minecode/tests/testfiles/rubygems/sprockets-vendor_gems-0.1.3.gem diff --git a/minecode/tests/discovery/testfiles/rubygems/sprockets-vendor_gems-0.1.3.gem.mapped.json b/minecode/tests/testfiles/rubygems/sprockets-vendor_gems-0.1.3.gem.mapped.json similarity index 100% rename from minecode/tests/discovery/testfiles/rubygems/sprockets-vendor_gems-0.1.3.gem.mapped.json rename to minecode/tests/testfiles/rubygems/sprockets-vendor_gems-0.1.3.gem.mapped.json diff --git a/minecode/tests/discovery/testfiles/rubygems/sprockets-vendor_gems-0.1.3.gem.package.json b/minecode/tests/testfiles/rubygems/sprockets-vendor_gems-0.1.3.gem.package.json similarity index 100% rename from minecode/tests/discovery/testfiles/rubygems/sprockets-vendor_gems-0.1.3.gem.package.json rename to minecode/tests/testfiles/rubygems/sprockets-vendor_gems-0.1.3.gem.package.json diff --git a/minecode/tests/discovery/testfiles/rubygems/sprockets-vendor_gems-0.1.3.gem.visited.json b/minecode/tests/testfiles/rubygems/sprockets-vendor_gems-0.1.3.gem.visited.json similarity index 100% rename from minecode/tests/discovery/testfiles/rubygems/sprockets-vendor_gems-0.1.3.gem.visited.json rename to minecode/tests/testfiles/rubygems/sprockets-vendor_gems-0.1.3.gem.visited.json diff --git a/minecode/tests/discovery/testfiles/run_map/test_map_uri_does_update_with_same_mining_level-expected.json b/minecode/tests/testfiles/run_map/test_map_uri_does_update_with_same_mining_level-expected.json similarity index 100% rename from minecode/tests/discovery/testfiles/run_map/test_map_uri_does_update_with_same_mining_level-expected.json rename to minecode/tests/testfiles/run_map/test_map_uri_does_update_with_same_mining_level-expected.json diff --git a/minecode/tests/discovery/testfiles/run_map/test_map_uri_replace_with_new_with_higher_new_mining_level-expected.json b/minecode/tests/testfiles/run_map/test_map_uri_replace_with_new_with_higher_new_mining_level-expected.json similarity index 100% rename from minecode/tests/discovery/testfiles/run_map/test_map_uri_replace_with_new_with_higher_new_mining_level-expected.json rename to minecode/tests/testfiles/run_map/test_map_uri_replace_with_new_with_higher_new_mining_level-expected.json diff --git a/minecode/tests/discovery/testfiles/run_map/test_map_uri_update_only_empties_with_lesser_new_mining_level-expected.json b/minecode/tests/testfiles/run_map/test_map_uri_update_only_empties_with_lesser_new_mining_level-expected.json similarity index 100% rename from minecode/tests/discovery/testfiles/run_map/test_map_uri_update_only_empties_with_lesser_new_mining_level-expected.json rename to minecode/tests/testfiles/run_map/test_map_uri_update_only_empties_with_lesser_new_mining_level-expected.json diff --git a/minecode/tests/discovery/testfiles/run_map/test_merge_packages_no_replace-expected.json b/minecode/tests/testfiles/run_map/test_merge_packages_no_replace-expected.json similarity index 100% rename from minecode/tests/discovery/testfiles/run_map/test_merge_packages_no_replace-expected.json rename to minecode/tests/testfiles/run_map/test_merge_packages_no_replace-expected.json diff --git a/minecode/tests/discovery/testfiles/run_map/test_merge_packages_with_replace-expected.json b/minecode/tests/testfiles/run_map/test_merge_packages_with_replace-expected.json similarity index 100% rename from minecode/tests/discovery/testfiles/run_map/test_merge_packages_with_replace-expected.json rename to minecode/tests/testfiles/run_map/test_merge_packages_with_replace-expected.json diff --git a/minecode/tests/discovery/testfiles/sourceforge/a-vitkus_profile.html b/minecode/tests/testfiles/sourceforge/a-vitkus_profile.html similarity index 100% rename from minecode/tests/discovery/testfiles/sourceforge/a-vitkus_profile.html rename to minecode/tests/testfiles/sourceforge/a-vitkus_profile.html diff --git a/minecode/tests/discovery/testfiles/sourceforge/expected_heanet_rsync_dir.json b/minecode/tests/testfiles/sourceforge/expected_heanet_rsync_dir.json similarity index 100% rename from minecode/tests/discovery/testfiles/sourceforge/expected_heanet_rsync_dir.json rename to minecode/tests/testfiles/sourceforge/expected_heanet_rsync_dir.json diff --git a/minecode/tests/discovery/testfiles/sourceforge/expected_netwiki.json b/minecode/tests/testfiles/sourceforge/expected_netwiki.json similarity index 100% rename from minecode/tests/discovery/testfiles/sourceforge/expected_netwiki.json rename to minecode/tests/testfiles/sourceforge/expected_netwiki.json diff --git a/minecode/tests/discovery/testfiles/sourceforge/expected_sf_dir_index.json b/minecode/tests/testfiles/sourceforge/expected_sf_dir_index.json similarity index 100% rename from minecode/tests/discovery/testfiles/sourceforge/expected_sf_dir_index.json rename to minecode/tests/testfiles/sourceforge/expected_sf_dir_index.json diff --git a/minecode/tests/discovery/testfiles/sourceforge/expected_sf_dir_page.json b/minecode/tests/testfiles/sourceforge/expected_sf_dir_page.json similarity index 100% rename from minecode/tests/discovery/testfiles/sourceforge/expected_sf_dir_page.json rename to minecode/tests/testfiles/sourceforge/expected_sf_dir_page.json diff --git a/minecode/tests/discovery/testfiles/sourceforge/expected_sf_project.json b/minecode/tests/testfiles/sourceforge/expected_sf_project.json similarity index 100% rename from minecode/tests/discovery/testfiles/sourceforge/expected_sf_project.json rename to minecode/tests/testfiles/sourceforge/expected_sf_project.json diff --git a/minecode/tests/discovery/testfiles/sourceforge/expected_sf_sitemap.json b/minecode/tests/testfiles/sourceforge/expected_sf_sitemap.json similarity index 100% rename from minecode/tests/discovery/testfiles/sourceforge/expected_sf_sitemap.json rename to minecode/tests/testfiles/sourceforge/expected_sf_sitemap.json diff --git a/minecode/tests/discovery/testfiles/sourceforge/expected_sf_sitemap_new.json b/minecode/tests/testfiles/sourceforge/expected_sf_sitemap_new.json similarity index 100% rename from minecode/tests/discovery/testfiles/sourceforge/expected_sf_sitemap_new.json rename to minecode/tests/testfiles/sourceforge/expected_sf_sitemap_new.json diff --git a/minecode/tests/discovery/testfiles/sourceforge/expected_sf_sitemap_page.json b/minecode/tests/testfiles/sourceforge/expected_sf_sitemap_page.json similarity index 100% rename from minecode/tests/discovery/testfiles/sourceforge/expected_sf_sitemap_page.json rename to minecode/tests/testfiles/sourceforge/expected_sf_sitemap_page.json diff --git a/minecode/tests/discovery/testfiles/sourceforge/expected_sf_sitemap_page_new.json b/minecode/tests/testfiles/sourceforge/expected_sf_sitemap_page_new.json similarity index 100% rename from minecode/tests/discovery/testfiles/sourceforge/expected_sf_sitemap_page_new.json rename to minecode/tests/testfiles/sourceforge/expected_sf_sitemap_page_new.json diff --git a/minecode/tests/discovery/testfiles/sourceforge/expected_sitemap-6.json b/minecode/tests/testfiles/sourceforge/expected_sitemap-6.json similarity index 100% rename from minecode/tests/discovery/testfiles/sourceforge/expected_sitemap-6.json rename to minecode/tests/testfiles/sourceforge/expected_sitemap-6.json diff --git a/minecode/tests/discovery/testfiles/sourceforge/filezilla.json b/minecode/tests/testfiles/sourceforge/filezilla.json similarity index 100% rename from minecode/tests/discovery/testfiles/sourceforge/filezilla.json rename to minecode/tests/testfiles/sourceforge/filezilla.json diff --git a/minecode/tests/discovery/testfiles/sourceforge/mapper_niftyphp_expected.json b/minecode/tests/testfiles/sourceforge/mapper_niftyphp_expected.json similarity index 100% rename from minecode/tests/discovery/testfiles/sourceforge/mapper_niftyphp_expected.json rename to minecode/tests/testfiles/sourceforge/mapper_niftyphp_expected.json diff --git a/minecode/tests/discovery/testfiles/sourceforge/mapper_odanur_expected.json b/minecode/tests/testfiles/sourceforge/mapper_odanur_expected.json similarity index 100% rename from minecode/tests/discovery/testfiles/sourceforge/mapper_odanur_expected.json rename to minecode/tests/testfiles/sourceforge/mapper_odanur_expected.json diff --git a/minecode/tests/discovery/testfiles/sourceforge/mapper_omonoql_expected.json b/minecode/tests/testfiles/sourceforge/mapper_omonoql_expected.json similarity index 100% rename from minecode/tests/discovery/testfiles/sourceforge/mapper_omonoql_expected.json rename to minecode/tests/testfiles/sourceforge/mapper_omonoql_expected.json diff --git a/minecode/tests/discovery/testfiles/sourceforge/mapper_openstunts_expected.json b/minecode/tests/testfiles/sourceforge/mapper_openstunts_expected.json similarity index 100% rename from minecode/tests/discovery/testfiles/sourceforge/mapper_openstunts_expected.json rename to minecode/tests/testfiles/sourceforge/mapper_openstunts_expected.json diff --git a/minecode/tests/discovery/testfiles/sourceforge/monoql.json b/minecode/tests/testfiles/sourceforge/monoql.json similarity index 100% rename from minecode/tests/discovery/testfiles/sourceforge/monoql.json rename to minecode/tests/testfiles/sourceforge/monoql.json diff --git a/minecode/tests/discovery/testfiles/sourceforge/netwiki.json b/minecode/tests/testfiles/sourceforge/netwiki.json similarity index 100% rename from minecode/tests/discovery/testfiles/sourceforge/netwiki.json rename to minecode/tests/testfiles/sourceforge/netwiki.json diff --git a/minecode/tests/discovery/testfiles/sourceforge/niftyphp.json b/minecode/tests/testfiles/sourceforge/niftyphp.json similarity index 100% rename from minecode/tests/discovery/testfiles/sourceforge/niftyphp.json rename to minecode/tests/testfiles/sourceforge/niftyphp.json diff --git a/minecode/tests/discovery/testfiles/sourceforge/odanur.json b/minecode/tests/testfiles/sourceforge/odanur.json similarity index 100% rename from minecode/tests/discovery/testfiles/sourceforge/odanur.json rename to minecode/tests/testfiles/sourceforge/odanur.json diff --git a/minecode/tests/discovery/testfiles/sourceforge/openstunts.json b/minecode/tests/testfiles/sourceforge/openstunts.json similarity index 100% rename from minecode/tests/discovery/testfiles/sourceforge/openstunts.json rename to minecode/tests/testfiles/sourceforge/openstunts.json diff --git a/minecode/tests/discovery/testfiles/sourceforge/rsync_heanet_sfnet.dir b/minecode/tests/testfiles/sourceforge/rsync_heanet_sfnet.dir similarity index 100% rename from minecode/tests/discovery/testfiles/sourceforge/rsync_heanet_sfnet.dir rename to minecode/tests/testfiles/sourceforge/rsync_heanet_sfnet.dir diff --git a/minecode/tests/discovery/testfiles/sourceforge/sitemap-1.xml b/minecode/tests/testfiles/sourceforge/sitemap-1.xml similarity index 100% rename from minecode/tests/discovery/testfiles/sourceforge/sitemap-1.xml rename to minecode/tests/testfiles/sourceforge/sitemap-1.xml diff --git a/minecode/tests/discovery/testfiles/sourceforge/sitemap-6.xml b/minecode/tests/testfiles/sourceforge/sitemap-6.xml similarity index 100% rename from minecode/tests/discovery/testfiles/sourceforge/sitemap-6.xml rename to minecode/tests/testfiles/sourceforge/sitemap-6.xml diff --git a/minecode/tests/discovery/testfiles/sourceforge/sitemap.xml b/minecode/tests/testfiles/sourceforge/sitemap.xml similarity index 100% rename from minecode/tests/discovery/testfiles/sourceforge/sitemap.xml rename to minecode/tests/testfiles/sourceforge/sitemap.xml diff --git a/minecode/src/discovery/utils.py b/minecode/utils.py similarity index 99% rename from minecode/src/discovery/utils.py rename to minecode/utils.py index c2e88ed0..1bcd023b 100644 --- a/minecode/src/discovery/utils.py +++ b/minecode/utils.py @@ -25,7 +25,7 @@ from commoncode.fileutils import create_dir from extractcode.extract import extract -from discovery.management.commands import get_settings +from minecode.management.commands import get_settings logger = logging.getLogger(__name__) # import sys diff --git a/minecode/src/discovery/utils_test.py b/minecode/utils_test.py similarity index 98% rename from minecode/src/discovery/utils_test.py rename to minecode/utils_test.py index f4afe266..121d821b 100644 --- a/minecode/src/discovery/utils_test.py +++ b/minecode/utils_test.py @@ -23,7 +23,7 @@ from commoncode.testcase import FileBasedTesting from scancode.cli_test_utils import purl_with_fake_uuid -from discovery.utils import get_temp_dir +from minecode.utils import get_temp_dir """ @@ -38,9 +38,6 @@ """ -DISCOVERY_TEST_BASE_DIR = os.path.join(os.path.dirname(__file__), 'testfiles') - - class BaseMiningTestCase(TestCase): BASE_DIR = os.path.join(os.path.dirname(__file__), 'testfiles') diff --git a/minecode/src/discovery/utils_test.py.ABOUT b/minecode/utils_test.py.ABOUT similarity index 100% rename from minecode/src/discovery/utils_test.py.ABOUT rename to minecode/utils_test.py.ABOUT diff --git a/minecode/src/discovery/utils_test.py.NOTICE b/minecode/utils_test.py.NOTICE similarity index 100% rename from minecode/src/discovery/utils_test.py.NOTICE rename to minecode/utils_test.py.NOTICE diff --git a/minecode/src/discovery/version.py b/minecode/version.py similarity index 100% rename from minecode/src/discovery/version.py rename to minecode/version.py diff --git a/minecode/src/discovery/visitors/__init__.py b/minecode/visitors/__init__.py similarity index 99% rename from minecode/src/discovery/visitors/__init__.py rename to minecode/visitors/__init__.py index 94e0e965..587ded60 100644 --- a/minecode/src/discovery/visitors/__init__.py +++ b/minecode/visitors/__init__.py @@ -15,8 +15,8 @@ import pkgutil import tempfile -from discovery.utils import fetch_http -from discovery.utils import get_temp_file +from minecode.utils import fetch_http +from minecode.utils import get_temp_file # FIXME: use attr or use a plain ResourceURI object insteaad diff --git a/minecode/src/discovery/visitors/debian.py b/minecode/visitors/debian.py similarity index 97% rename from minecode/src/discovery/visitors/debian.py rename to minecode/visitors/debian.py index 6d4fbb9a..94bbe068 100644 --- a/minecode/src/discovery/visitors/debian.py +++ b/minecode/visitors/debian.py @@ -18,12 +18,12 @@ from debian_inspector import copyright as debcopy from packageurl import PackageURL -from discovery import ls -from discovery import seed -from discovery import visit_router -from discovery.visitors import HttpVisitor -from discovery.visitors import NonPersistentHttpVisitor -from discovery.visitors import URI +from minecode import ls +from minecode import seed +from minecode import visit_router +from minecode.visitors import HttpVisitor +from minecode.visitors import NonPersistentHttpVisitor +from minecode.visitors import URI logger = logging.getLogger(__name__) diff --git a/minecode/src/discovery/visitors/fdroid.py b/minecode/visitors/fdroid.py similarity index 90% rename from minecode/src/discovery/visitors/fdroid.py rename to minecode/visitors/fdroid.py index d05776a1..4d171d8c 100644 --- a/minecode/src/discovery/visitors/fdroid.py +++ b/minecode/visitors/fdroid.py @@ -13,13 +13,13 @@ from packageurl import PackageURL -from discovery import seed -from discovery import visit_router -from discovery.utils import get_temp_file -from discovery.visitors import HttpJsonVisitor -from discovery.visitors import URI -from discovery.visitors import Visitor -from discovery.visitors import NonPersistentHttpVisitor +from minecode import seed +from minecode import visit_router +from minecode.utils import get_temp_file +from minecode.visitors import HttpJsonVisitor +from minecode.visitors import URI +from minecode.visitors import Visitor +from minecode.visitors import NonPersistentHttpVisitor """ Visitors for F-Droid package repositories. diff --git a/minecode/src/discovery/visitors/freebsd.py b/minecode/visitors/freebsd.py similarity index 91% rename from minecode/src/discovery/visitors/freebsd.py rename to minecode/visitors/freebsd.py index 90d49be6..fe9e9edf 100644 --- a/minecode/src/discovery/visitors/freebsd.py +++ b/minecode/visitors/freebsd.py @@ -13,12 +13,12 @@ from bs4 import BeautifulSoup -from discovery import seed -from discovery import visit_router -from discovery.utils import extract_file -from discovery.visitors import HttpVisitor -from discovery.visitors import NonPersistentHttpVisitor -from discovery.visitors import URI +from minecode import seed +from minecode import visit_router +from minecode.utils import extract_file +from minecode.visitors import HttpVisitor +from minecode.visitors import NonPersistentHttpVisitor +from minecode.visitors import URI logger = logging.getLogger(__name__) handler = logging.StreamHandler() diff --git a/minecode/src/discovery/visitors/java_stream.LICENSE b/minecode/visitors/java_stream.LICENSE similarity index 100% rename from minecode/src/discovery/visitors/java_stream.LICENSE rename to minecode/visitors/java_stream.LICENSE diff --git a/minecode/src/discovery/visitors/java_stream.py b/minecode/visitors/java_stream.py similarity index 100% rename from minecode/src/discovery/visitors/java_stream.py rename to minecode/visitors/java_stream.py diff --git a/minecode/src/discovery/visitors/java_stream.py.ABOUT b/minecode/visitors/java_stream.py.ABOUT similarity index 100% rename from minecode/src/discovery/visitors/java_stream.py.ABOUT rename to minecode/visitors/java_stream.py.ABOUT diff --git a/minecode/src/discovery/visitors/maven.py b/minecode/visitors/maven.py similarity index 99% rename from minecode/src/discovery/visitors/maven.py rename to minecode/visitors/maven.py index 404b7c96..dd14fd4f 100644 --- a/minecode/src/discovery/visitors/maven.py +++ b/minecode/visitors/maven.py @@ -24,12 +24,12 @@ from packagedcode.maven import build_filename from packagedcode.maven import build_url -from discovery import seed -from discovery import visit_router -from discovery.visitors import java_stream -from discovery.visitors import HttpVisitor -from discovery.visitors import NonPersistentHttpVisitor -from discovery.visitors import URI +from minecode import seed +from minecode import visit_router +from minecode.visitors import java_stream +from minecode.visitors import HttpVisitor +from minecode.visitors import NonPersistentHttpVisitor +from minecode.visitors import URI """ This module handles the Maven repositories such as central and other diff --git a/minecode/src/discovery/visitors/npm.py b/minecode/visitors/npm.py similarity index 95% rename from minecode/src/discovery/visitors/npm.py rename to minecode/visitors/npm.py index a568d25c..afcc342b 100644 --- a/minecode/src/discovery/visitors/npm.py +++ b/minecode/visitors/npm.py @@ -16,10 +16,10 @@ from packagedcode.npm import npm_api_url from packagedcode.npm import split_scoped_package_name -from discovery import seed -from discovery import visit_router -from discovery.visitors import NonPersistentHttpVisitor -from discovery.visitors import URI +from minecode import seed +from minecode import visit_router +from minecode.visitors import NonPersistentHttpVisitor +from minecode.visitors import URI """ diff --git a/minecode/src/discovery/visitors/pypi.py b/minecode/visitors/pypi.py similarity index 95% rename from minecode/src/discovery/visitors/pypi.py rename to minecode/visitors/pypi.py index d4563855..f83b9c1d 100644 --- a/minecode/src/discovery/visitors/pypi.py +++ b/minecode/visitors/pypi.py @@ -14,12 +14,12 @@ from packageurl import PackageURL -from discovery import seed -from discovery import visit_router -from discovery.utils import get_temp_file -from discovery.visitors import HttpJsonVisitor -from discovery.visitors import URI -from discovery.visitors import Visitor +from minecode import seed +from minecode import visit_router +from minecode.utils import get_temp_file +from minecode.visitors import HttpJsonVisitor +from minecode.visitors import URI +from minecode.visitors import Visitor """ diff --git a/minecode/src/discovery/visitors/rubygems.py b/minecode/visitors/rubygems.py similarity index 95% rename from minecode/src/discovery/visitors/rubygems.py rename to minecode/visitors/rubygems.py index a7119cc6..999d21ea 100644 --- a/minecode/src/discovery/visitors/rubygems.py +++ b/minecode/visitors/rubygems.py @@ -18,12 +18,12 @@ from rubymarshal.classes import UsrMarshal from packageurl import PackageURL -from discovery import seed -from discovery import visit_router -from discovery.utils import extract_file -from discovery.visitors import HttpJsonVisitor -from discovery.visitors import NonPersistentHttpVisitor -from discovery.visitors import URI +from minecode import seed +from minecode import visit_router +from minecode.utils import extract_file +from minecode.visitors import HttpJsonVisitor +from minecode.visitors import NonPersistentHttpVisitor +from minecode.visitors import URI logger = logging.getLogger(__name__) diff --git a/minecode/src/discovery/visitors/sourceforge.py b/minecode/visitors/sourceforge.py similarity index 92% rename from minecode/src/discovery/visitors/sourceforge.py rename to minecode/visitors/sourceforge.py index 49a266eb..8ab0d306 100644 --- a/minecode/src/discovery/visitors/sourceforge.py +++ b/minecode/visitors/sourceforge.py @@ -14,12 +14,12 @@ from packageurl import PackageURL -from discovery import seed -from discovery import visit_router -from discovery.visitors import HttpJsonVisitor -from discovery.visitors import HttpVisitor -from discovery.visitors import NonPersistentHttpVisitor -from discovery.visitors import URI +from minecode import seed +from minecode import visit_router +from minecode.visitors import HttpJsonVisitor +from minecode.visitors import HttpVisitor +from minecode.visitors import NonPersistentHttpVisitor +from minecode.visitors import URI logger = logging.getLogger(__name__) diff --git a/packagedb/.gitattributes b/packagedb/.gitattributes deleted file mode 100644 index 96c89ceb..00000000 --- a/packagedb/.gitattributes +++ /dev/null @@ -1,3 +0,0 @@ -# Ignore all Git auto CR/LF line endings conversions -* -text -pyproject.toml export-subst diff --git a/packagedb/.gitignore b/packagedb/.gitignore deleted file mode 100644 index 4c2316fc..00000000 --- a/packagedb/.gitignore +++ /dev/null @@ -1,74 +0,0 @@ -# Python compiled files -*.py[cod] - -# virtualenv and other misc bits -*.egg-info -/dist -/build -/bin -/lib -/scripts -/Scripts -/Lib -/pip-selfcheck.json -/tmp -/venv -.Python -/include -/Include -/local -*/local/* -/local/ -/share/ -/tcl/ -/.eggs/ - -# Installer logs -pip-log.txt - -# Unit test / coverage reports -.cache -.coverage -.coverage.* -nosetests.xml -htmlcov - -# Translations -*.mo - -# IDEs -.project -.pydevproject -.idea -org.eclipse.core.resources.prefs -.vscode -.vs - -# Sphinx -docs/_build -docs/bin -docs/build -docs/include -docs/Lib -doc/pyvenv.cfg -pyvenv.cfg - -# Various junk and temp files -.DS_Store -*~ -.*.sw[po] -.build -.ve -*.bak -/.cache/ - -# pyenv -/.python-version -/man/ -/.pytest_cache/ -lib64 -tcl - -# Ignore Jupyter Notebook related temp files -.ipynb_checkpoints/ -/.env diff --git a/packagedb/AUTHORS.rst b/packagedb/AUTHORS.rst deleted file mode 100644 index 58c0afe6..00000000 --- a/packagedb/AUTHORS.rst +++ /dev/null @@ -1,7 +0,0 @@ -The following organizations or individuals have contributed to this repo: - -- nexB Inc. -- Jono Yang @JonoYang -- Philippe Ombredanne -- Li Ha -- Steven Esser @majurg \ No newline at end of file diff --git a/packagedb/CHANGELOG.rst b/packagedb/CHANGELOG.rst deleted file mode 100644 index 7c378e5d..00000000 --- a/packagedb/CHANGELOG.rst +++ /dev/null @@ -1,10 +0,0 @@ - -Release notes -============= - - - -Version v1.0.0 ----------------- - -Initial release \ No newline at end of file diff --git a/packagedb/Makefile b/packagedb/Makefile deleted file mode 100644 index 6a8bfe83..00000000 --- a/packagedb/Makefile +++ /dev/null @@ -1,115 +0,0 @@ -# -# Copyright (c) nexB Inc. and others. All rights reserved. -# purldb is a trademark of nexB Inc. -# SPDX-License-Identifier: Apache-2.0 -# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/purldb for support or download. -# See https://aboutcode.org for more information about nexB OSS projects. -# - -# Python version can be specified with `$ PYTHON_EXE=python3.x make conf` -PYTHON_EXE?=python3 -VENV=venv -MANAGE=${VENV}/bin/python manage.py -ACTIVATE?=. ${VENV}/bin/activate; -VIRTUALENV_PYZ=../etc/thirdparty/virtualenv.pyz -# Do not depend on Python to generate the SECRET_KEY -GET_SECRET_KEY=`base64 /dev/urandom | head -c50` -# Customize with `$ make envfile ENV_FILE=/etc/purldb/.env` -ENV_FILE=.env -# Customize with `$ make postgres PACKAGEDB_DB_PASSWORD=YOUR_PASSWORD` -PACKAGEDB_DB_PASSWORD=packagedb - -# Use sudo for postgres, but only on Linux -UNAME := $(shell uname) -ifeq ($(UNAME), Linux) - SUDO_POSTGRES=sudo -u postgres -else - SUDO_POSTGRES= -endif - -virtualenv: - @echo "-> Bootstrap the virtualenv with PYTHON_EXE=${PYTHON_EXE}" - @${PYTHON_EXE} ${VIRTUALENV_PYZ} --never-download --no-periodic-update ${VENV} - -conf: virtualenv - @echo "-> Install dependencies" - @${ACTIVATE} pip install -e . -c requirements.txt - -dev: virtualenv - @echo "-> Configure and install development dependencies" - @${ACTIVATE} pip install -e .[dev] -c requirements.txt - -envfile: - @echo "-> Create the .env file and generate a secret key" - @if test -f ${ENV_FILE}; then echo ".env file exists already"; exit 1; fi - @mkdir -p $(shell dirname ${ENV_FILE}) && touch ${ENV_FILE} - @echo SECRET_KEY=\"${GET_SECRET_KEY}\" > ${ENV_FILE} - -isort: - @echo "-> Apply isort changes to ensure proper imports ordering" - ${VENV}/bin/isort . - -black: - @echo "-> Apply black code formatter" - ${VENV}/bin/black . - -doc8: - @echo "-> Run doc8 validation" - @${ACTIVATE} doc8 --max-line-length 100 --ignore-path docs/_build/ --quiet docs/ - -valid: isort black - -check: - @echo "-> Run pycodestyle (PEP8) validation" - @${ACTIVATE} pycodestyle --max-line-length=100 --exclude=venv,lib,thirdparty,docs,migrations,settings.py . - @echo "-> Run isort imports ordering validation" - @${ACTIVATE} isort --check-only . - @echo "-> Run black validation" - @${ACTIVATE} black --check ${BLACK_ARGS} - -clean: - @echo "-> Clean the Python env" - rm -rf ${VENV} build/ dist/ packagedb.egg-info/ docs/_build/ pip-selfcheck.json - find . -type f -name '*.py[co]' -delete -o -type d -name __pycache__ -delete - -migrate: - @echo "-> Apply database migrations" - ${MANAGE} migrate - -postgres: - @echo "-> Configure PostgreSQL database" - @echo "-> Create database user 'packagedb'" - ${SUDO_POSTGRES} createuser --no-createrole --no-superuser --login --inherit --createdb packagedb || true - ${SUDO_POSTGRES} psql -c "alter user packagedb with encrypted password '${PACKAGEDB_DB_PASSWORD}';" || true - @echo "-> Drop 'packagedb' database" - ${SUDO_POSTGRES} dropdb packagedb || true - @echo "-> Create 'packagedb' database" - ${SUDO_POSTGRES} createdb --encoding=utf-8 --owner=packagedb packagedb - @$(MAKE) migrate - -run: - ${MANAGE} runserver 8001 --insecure - -test: - @echo "-> Run the test suite" - ${ACTIVATE} ${PYTHON_EXE} -m pytest -vvs - -bump: - @echo "-> Bump the version" - bin/bumpver update --no-fetch --patch - -docs: - rm -rf docs/_build/ - @${ACTIVATE} sphinx-build docs/ docs/_build/ - -docker-images: - @echo "-> Build Docker services" - docker-compose build - @echo "-> Pull service images" - docker-compose pull - @echo "-> Save the service images to a compressed tar archive in the dist/ directory" - @mkdir -p dist/ - @docker save postgres packagedb_packagedb nginx | gzip > dist/packagedb-images-`git describe --tags`.tar.gz - -.PHONY: virtualenv conf dev envfile install check valid isort clean migrate postgres sqlite run test bump docs docker-images diff --git a/packagedb/NOTICE b/packagedb/NOTICE deleted file mode 100644 index dab28052..00000000 --- a/packagedb/NOTICE +++ /dev/null @@ -1,12 +0,0 @@ -# -# Copyright (c) nexB Inc. and others. All rights reserved. -# purldb is a trademark of nexB Inc. -# SPDX-License-Identifier: Apache-2.0 AND CC-BY-SA-4.0 -# purldb software is licensed under the Apache License version 2.0. -# purldb data is licensed collectively under CC-BY-SA-4.0. -# See https://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://creativecommons.org/licenses/by-sa/4.0/legalcode for the license text. -# -# See https://github.com/nexB/purldb for support or download. -# See https://aboutcode.org for more information about nexB OSS projects. -# diff --git a/packagedb/README.rst b/packagedb/README.rst deleted file mode 100644 index 1796724b..00000000 --- a/packagedb/README.rst +++ /dev/null @@ -1,12 +0,0 @@ -packagedb -========= - -PackageDB is a Django web app that provides the model for storing package metadata. - -Local setup -=========== - -* ./configure -* make postgres -* make envfile -* make run diff --git a/minecode/src/minecodeio/__init__.py b/packagedb/__init__.py similarity index 100% rename from minecode/src/minecodeio/__init__.py rename to packagedb/__init__.py diff --git a/packagedb/apache-2.0.LICENSE b/packagedb/apache-2.0.LICENSE deleted file mode 100644 index 261eeb9e..00000000 --- a/packagedb/apache-2.0.LICENSE +++ /dev/null @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/packagedb/src/packagedb/api.py b/packagedb/api.py similarity index 100% rename from packagedb/src/packagedb/api.py rename to packagedb/api.py diff --git a/packagedb/src/packagedb/api_custom.py b/packagedb/api_custom.py similarity index 100% rename from packagedb/src/packagedb/api_custom.py rename to packagedb/api_custom.py diff --git a/packagedb/cc-by-sa-4.0.LICENSE b/packagedb/cc-by-sa-4.0.LICENSE deleted file mode 100644 index e04b480f..00000000 --- a/packagedb/cc-by-sa-4.0.LICENSE +++ /dev/null @@ -1,427 +0,0 @@ -Attribution-ShareAlike 4.0 International - -======================================================================= - -Creative Commons Corporation ("Creative Commons") is not a law firm and -does not provide legal services or legal advice. Distribution of -Creative Commons public licenses does not create a lawyer-client or -other relationship. Creative Commons makes its licenses and related -information available on an "as-is" basis. Creative Commons gives no -warranties regarding its licenses, any material licensed under their -terms and conditions, or any related information. Creative Commons -disclaims all liability for damages resulting from their use to the -fullest extent possible. - -Using Creative Commons Public Licenses - -Creative Commons public licenses provide a standard set of terms and -conditions that creators and other rights holders may use to share -original works of authorship and other material subject to copyright -and certain other rights specified in the public license below. The -following considerations are for informational purposes only, are not -exhaustive, and do not form part of our licenses. - - Considerations for licensors: Our public licenses are - intended for use by those authorized to give the public - permission to use material in ways otherwise restricted by - copyright and certain other rights. Our licenses are - irrevocable. Licensors should read and understand the terms - and conditions of the license they choose before applying it. - Licensors should also secure all rights necessary before - applying our licenses so that the public can reuse the - material as expected. Licensors should clearly mark any - material not subject to the license. This includes other CC- - licensed material, or material used under an exception or - limitation to copyright. More considerations for licensors: - wiki.creativecommons.org/Considerations_for_licensors - - Considerations for the public: By using one of our public - licenses, a licensor grants the public permission to use the - licensed material under specified terms and conditions. If - the licensor's permission is not necessary for any reason--for - example, because of any applicable exception or limitation to - copyright--then that use is not regulated by the license. Our - licenses grant only permissions under copyright and certain - other rights that a licensor has authority to grant. Use of - the licensed material may still be restricted for other - reasons, including because others have copyright or other - rights in the material. A licensor may make special requests, - such as asking that all changes be marked or described. - Although not required by our licenses, you are encouraged to - respect those requests where reasonable. More considerations - for the public: - wiki.creativecommons.org/Considerations_for_licensees - -======================================================================= - -Creative Commons Attribution-ShareAlike 4.0 International Public -License - -By exercising the Licensed Rights (defined below), You accept and agree -to be bound by the terms and conditions of this Creative Commons -Attribution-ShareAlike 4.0 International Public License ("Public -License"). To the extent this Public License may be interpreted as a -contract, You are granted the Licensed Rights in consideration of Your -acceptance of these terms and conditions, and the Licensor grants You -such rights in consideration of benefits the Licensor receives from -making the Licensed Material available under these terms and -conditions. - - -Section 1 -- Definitions. - - a. Adapted Material means material subject to Copyright and Similar - Rights that is derived from or based upon the Licensed Material - and in which the Licensed Material is translated, altered, - arranged, transformed, or otherwise modified in a manner requiring - permission under the Copyright and Similar Rights held by the - Licensor. For purposes of this Public License, where the Licensed - Material is a musical work, performance, or sound recording, - Adapted Material is always produced where the Licensed Material is - synched in timed relation with a moving image. - - b. Adapter's License means the license You apply to Your Copyright - and Similar Rights in Your contributions to Adapted Material in - accordance with the terms and conditions of this Public License. - - c. BY-SA Compatible License means a license listed at - creativecommons.org/compatiblelicenses, approved by Creative - Commons as essentially the equivalent of this Public License. - - d. Copyright and Similar Rights means copyright and/or similar rights - closely related to copyright including, without limitation, - performance, broadcast, sound recording, and Sui Generis Database - Rights, without regard to how the rights are labeled or - categorized. For purposes of this Public License, the rights - specified in Section 2(b)(1)-(2) are not Copyright and Similar - Rights. - - e. Effective Technological Measures means those measures that, in the - absence of proper authority, may not be circumvented under laws - fulfilling obligations under Article 11 of the WIPO Copyright - Treaty adopted on December 20, 1996, and/or similar international - agreements. - - f. Exceptions and Limitations means fair use, fair dealing, and/or - any other exception or limitation to Copyright and Similar Rights - that applies to Your use of the Licensed Material. - - g. License Elements means the license attributes listed in the name - of a Creative Commons Public License. The License Elements of this - Public License are Attribution and ShareAlike. - - h. Licensed Material means the artistic or literary work, database, - or other material to which the Licensor applied this Public - License. - - i. Licensed Rights means the rights granted to You subject to the - terms and conditions of this Public License, which are limited to - all Copyright and Similar Rights that apply to Your use of the - Licensed Material and that the Licensor has authority to license. - - j. Licensor means the individual(s) or entity(ies) granting rights - under this Public License. - - k. Share means to provide material to the public by any means or - process that requires permission under the Licensed Rights, such - as reproduction, public display, public performance, distribution, - dissemination, communication, or importation, and to make material - available to the public including in ways that members of the - public may access the material from a place and at a time - individually chosen by them. - - l. Sui Generis Database Rights means rights other than copyright - resulting from Directive 96/9/EC of the European Parliament and of - the Council of 11 March 1996 on the legal protection of databases, - as amended and/or succeeded, as well as other essentially - equivalent rights anywhere in the world. - - m. You means the individual or entity exercising the Licensed Rights - under this Public License. Your has a corresponding meaning. - - -Section 2 -- Scope. - - a. License grant. - - 1. Subject to the terms and conditions of this Public License, - the Licensor hereby grants You a worldwide, royalty-free, - non-sublicensable, non-exclusive, irrevocable license to - exercise the Licensed Rights in the Licensed Material to: - - a. reproduce and Share the Licensed Material, in whole or - in part; and - - b. produce, reproduce, and Share Adapted Material. - - 2. Exceptions and Limitations. For the avoidance of doubt, where - Exceptions and Limitations apply to Your use, this Public - License does not apply, and You do not need to comply with - its terms and conditions. - - 3. Term. The term of this Public License is specified in Section - 6(a). - - 4. Media and formats; technical modifications allowed. The - Licensor authorizes You to exercise the Licensed Rights in - all media and formats whether now known or hereafter created, - and to make technical modifications necessary to do so. The - Licensor waives and/or agrees not to assert any right or - authority to forbid You from making technical modifications - necessary to exercise the Licensed Rights, including - technical modifications necessary to circumvent Effective - Technological Measures. For purposes of this Public License, - simply making modifications authorized by this Section 2(a) - (4) never produces Adapted Material. - - 5. Downstream recipients. - - a. Offer from the Licensor -- Licensed Material. Every - recipient of the Licensed Material automatically - receives an offer from the Licensor to exercise the - Licensed Rights under the terms and conditions of this - Public License. - - b. Additional offer from the Licensor -- Adapted Material. - Every recipient of Adapted Material from You - automatically receives an offer from the Licensor to - exercise the Licensed Rights in the Adapted Material - under the conditions of the Adapter's License You apply. - - c. No downstream restrictions. You may not offer or impose - any additional or different terms or conditions on, or - apply any Effective Technological Measures to, the - Licensed Material if doing so restricts exercise of the - Licensed Rights by any recipient of the Licensed - Material. - - 6. No endorsement. Nothing in this Public License constitutes or - may be construed as permission to assert or imply that You - are, or that Your use of the Licensed Material is, connected - with, or sponsored, endorsed, or granted official status by, - the Licensor or others designated to receive attribution as - provided in Section 3(a)(1)(A)(i). - - b. Other rights. - - 1. Moral rights, such as the right of integrity, are not - licensed under this Public License, nor are publicity, - privacy, and/or other similar personality rights; however, to - the extent possible, the Licensor waives and/or agrees not to - assert any such rights held by the Licensor to the limited - extent necessary to allow You to exercise the Licensed - Rights, but not otherwise. - - 2. Patent and trademark rights are not licensed under this - Public License. - - 3. To the extent possible, the Licensor waives any right to - collect royalties from You for the exercise of the Licensed - Rights, whether directly or through a collecting society - under any voluntary or waivable statutory or compulsory - licensing scheme. In all other cases the Licensor expressly - reserves any right to collect such royalties. - - -Section 3 -- License Conditions. - -Your exercise of the Licensed Rights is expressly made subject to the -following conditions. - - a. Attribution. - - 1. If You Share the Licensed Material (including in modified - form), You must: - - a. retain the following if it is supplied by the Licensor - with the Licensed Material: - - i. identification of the creator(s) of the Licensed - Material and any others designated to receive - attribution, in any reasonable manner requested by - the Licensor (including by pseudonym if - designated); - - ii. a copyright notice; - - iii. a notice that refers to this Public License; - - iv. a notice that refers to the disclaimer of - warranties; - - v. a URI or hyperlink to the Licensed Material to the - extent reasonably practicable; - - b. indicate if You modified the Licensed Material and - retain an indication of any previous modifications; and - - c. indicate the Licensed Material is licensed under this - Public License, and include the text of, or the URI or - hyperlink to, this Public License. - - 2. You may satisfy the conditions in Section 3(a)(1) in any - reasonable manner based on the medium, means, and context in - which You Share the Licensed Material. For example, it may be - reasonable to satisfy the conditions by providing a URI or - hyperlink to a resource that includes the required - information. - - 3. If requested by the Licensor, You must remove any of the - information required by Section 3(a)(1)(A) to the extent - reasonably practicable. - - b. ShareAlike. - - In addition to the conditions in Section 3(a), if You Share - Adapted Material You produce, the following conditions also apply. - - 1. The Adapter's License You apply must be a Creative Commons - license with the same License Elements, this version or - later, or a BY-SA Compatible License. - - 2. You must include the text of, or the URI or hyperlink to, the - Adapter's License You apply. You may satisfy this condition - in any reasonable manner based on the medium, means, and - context in which You Share Adapted Material. - - 3. You may not offer or impose any additional or different terms - or conditions on, or apply any Effective Technological - Measures to, Adapted Material that restrict exercise of the - rights granted under the Adapter's License You apply. - - -Section 4 -- Sui Generis Database Rights. - -Where the Licensed Rights include Sui Generis Database Rights that -apply to Your use of the Licensed Material: - - a. for the avoidance of doubt, Section 2(a)(1) grants You the right - to extract, reuse, reproduce, and Share all or a substantial - portion of the contents of the database; - - b. if You include all or a substantial portion of the database - contents in a database in which You have Sui Generis Database - Rights, then the database in which You have Sui Generis Database - Rights (but not its individual contents) is Adapted Material, - - including for purposes of Section 3(b); and - c. You must comply with the conditions in Section 3(a) if You Share - all or a substantial portion of the contents of the database. - -For the avoidance of doubt, this Section 4 supplements and does not -replace Your obligations under this Public License where the Licensed -Rights include other Copyright and Similar Rights. - - -Section 5 -- Disclaimer of Warranties and Limitation of Liability. - - a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE - EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS - AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF - ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS, - IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION, - WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR - PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS, - ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT - KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT - ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU. - - b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE - TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION, - NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT, - INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES, - COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR - USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN - ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR - DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR - IN PART, THIS LIMITATION MAY NOT APPLY TO YOU. - - c. The disclaimer of warranties and limitation of liability provided - above shall be interpreted in a manner that, to the extent - possible, most closely approximates an absolute disclaimer and - waiver of all liability. - - -Section 6 -- Term and Termination. - - a. This Public License applies for the term of the Copyright and - Similar Rights licensed here. However, if You fail to comply with - this Public License, then Your rights under this Public License - terminate automatically. - - b. Where Your right to use the Licensed Material has terminated under - Section 6(a), it reinstates: - - 1. automatically as of the date the violation is cured, provided - it is cured within 30 days of Your discovery of the - violation; or - - 2. upon express reinstatement by the Licensor. - - For the avoidance of doubt, this Section 6(b) does not affect any - right the Licensor may have to seek remedies for Your violations - of this Public License. - - c. For the avoidance of doubt, the Licensor may also offer the - Licensed Material under separate terms or conditions or stop - distributing the Licensed Material at any time; however, doing so - will not terminate this Public License. - - d. Sections 1, 5, 6, 7, and 8 survive termination of this Public - License. - - -Section 7 -- Other Terms and Conditions. - - a. The Licensor shall not be bound by any additional or different - terms or conditions communicated by You unless expressly agreed. - - b. Any arrangements, understandings, or agreements regarding the - Licensed Material not stated herein are separate from and - independent of the terms and conditions of this Public License. - - -Section 8 -- Interpretation. - - a. For the avoidance of doubt, this Public License does not, and - shall not be interpreted to, reduce, limit, restrict, or impose - conditions on any use of the Licensed Material that could lawfully - be made without permission under this Public License. - - b. To the extent possible, if any provision of this Public License is - deemed unenforceable, it shall be automatically reformed to the - minimum extent necessary to make it enforceable. If the provision - cannot be reformed, it shall be severed from this Public License - without affecting the enforceability of the remaining terms and - conditions. - - c. No term or condition of this Public License will be waived and no - failure to comply consented to unless expressly agreed to by the - Licensor. - - d. Nothing in this Public License constitutes or may be interpreted - as a limitation upon, or waiver of, any privileges and immunities - that apply to the Licensor or You, including from the legal - processes of any jurisdiction or authority. - - -======================================================================= - -Creative Commons is not a party to its public -licenses. Notwithstanding, Creative Commons may elect to apply one of -its public licenses to material it publishes and in those instances -will be considered the “Licensor.” The text of the Creative Commons -public licenses is dedicated to the public domain under the CC0 Public -Domain Dedication. Except for the limited purpose of indicating that -material is shared under a Creative Commons public license or as -otherwise permitted by the Creative Commons policies published at -creativecommons.org/policies, Creative Commons does not authorize the -use of the trademark "Creative Commons" or any other trademark or logo -of Creative Commons without its prior written consent including, -without limitation, in connection with any unauthorized modifications -to any of its public licenses or any other arrangements, -understandings, or agreements concerning use of licensed material. For -the avoidance of doubt, this paragraph does not form part of the -public licenses. - -Creative Commons may be contacted at creativecommons.org. diff --git a/packagedb/configure.bat b/packagedb/configure.bat deleted file mode 100644 index 41547cc5..00000000 --- a/packagedb/configure.bat +++ /dev/null @@ -1,207 +0,0 @@ -@echo OFF -@setlocal - -@rem Copyright (c) nexB Inc. and others. All rights reserved. -@rem SPDX-License-Identifier: Apache-2.0 -@rem See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -@rem See https://github.com/nexB/ for support or download. -@rem See https://aboutcode.org for more information about nexB OSS projects. - - -@rem ################################ -@rem # A configuration script to set things up: -@rem # create a virtualenv and install or update thirdparty packages. -@rem # Source this script for initial configuration -@rem # Use configure --help for details - -@rem # NOTE: please keep in sync with POSIX script configure - -@rem # This script will search for a virtualenv.pyz app in etc\thirdparty\virtualenv.pyz -@rem # Otherwise it will download the latest from the VIRTUALENV_PYZ_URL default -@rem ################################ - - -@rem ################################ -@rem # Defaults. Change these variables to customize this script -@rem ################################ - -@rem # Requirement arguments passed to pip and used by default or with --dev. -set "REQUIREMENTS=--editable . --constraint requirements.txt" -set "DEV_REQUIREMENTS=--editable .[testing] --constraint requirements.txt --constraint requirements-dev.txt" -set "DOCS_REQUIREMENTS=--editable .[docs] --constraint requirements.txt" - -@rem # where we create a virtualenv -set "VIRTUALENV_DIR=venv" - -@rem # Cleanable files and directories to delete with the --clean option -set "CLEANABLE=build venv" - -@rem # extra arguments passed to pip -set "PIP_EXTRA_ARGS= " - -@rem # the URL to download virtualenv.pyz if needed -set VIRTUALENV_PYZ_URL=https://bootstrap.pypa.io/virtualenv.pyz -@rem ################################ - - -@rem ################################ -@rem # Current directory where this script lives -set CFG_ROOT_DIR=%~dp0 -set "CFG_BIN_DIR=%CFG_ROOT_DIR%\%VIRTUALENV_DIR%\Scripts" - - -@rem ################################ -@rem # Thirdparty package locations and index handling -@rem # Find packages from the local thirdparty directory -if exist "%CFG_ROOT_DIR%\thirdparty" ( - set PIP_EXTRA_ARGS=--find-links "%CFG_ROOT_DIR%\thirdparty" -) - - -@rem ################################ -@rem # Set the quiet flag to empty if not defined -if not defined CFG_QUIET ( - set "CFG_QUIET= " -) - - -@rem ################################ -@rem # Main command line entry point -set "CFG_REQUIREMENTS=%REQUIREMENTS%" - -:again -if not "%1" == "" ( - if "%1" EQU "--help" (goto cli_help) - if "%1" EQU "--clean" (goto clean) - if "%1" EQU "--dev" ( - set "CFG_REQUIREMENTS=%DEV_REQUIREMENTS%" - ) - if "%1" EQU "--docs" ( - set "CFG_REQUIREMENTS=%DOCS_REQUIREMENTS%" - ) - shift - goto again -) - -set "PIP_EXTRA_ARGS=%PIP_EXTRA_ARGS%" - - -@rem ################################ -@rem # Find a proper Python to run -@rem # Use environment variables or a file if available. -@rem # Otherwise the latest Python by default. -if not defined PYTHON_EXECUTABLE ( - @rem # check for a file named PYTHON_EXECUTABLE - if exist "%CFG_ROOT_DIR%\PYTHON_EXECUTABLE" ( - set /p PYTHON_EXECUTABLE=<"%CFG_ROOT_DIR%\PYTHON_EXECUTABLE" - ) else ( - set "PYTHON_EXECUTABLE=py" - ) -) - - -@rem ################################ -:create_virtualenv -@rem # create a virtualenv for Python -@rem # Note: we do not use the bundled Python 3 "venv" because its behavior and -@rem # presence is not consistent across Linux distro and sometimes pip is not -@rem # included either by default. The virtualenv.pyz app cures all these issues. - -if not exist "%CFG_BIN_DIR%\python.exe" ( - if not exist "%CFG_BIN_DIR%" ( - mkdir "%CFG_BIN_DIR%" - ) - - if exist "%CFG_ROOT_DIR%\etc\thirdparty\virtualenv.pyz" ( - %PYTHON_EXECUTABLE% "%CFG_ROOT_DIR%\etc\thirdparty\virtualenv.pyz" ^ - --wheel embed --pip embed --setuptools embed ^ - --seeder pip ^ - --never-download ^ - --no-periodic-update ^ - --no-vcs-ignore ^ - %CFG_QUIET% ^ - "%CFG_ROOT_DIR%\%VIRTUALENV_DIR%" - ) else ( - if not exist "%CFG_ROOT_DIR%\%VIRTUALENV_DIR%\virtualenv.pyz" ( - curl -o "%CFG_ROOT_DIR%\%VIRTUALENV_DIR%\virtualenv.pyz" %VIRTUALENV_PYZ_URL% - - if %ERRORLEVEL% neq 0 ( - exit /b %ERRORLEVEL% - ) - ) - %PYTHON_EXECUTABLE% "%CFG_ROOT_DIR%\%VIRTUALENV_DIR%\virtualenv.pyz" ^ - --wheel embed --pip embed --setuptools embed ^ - --seeder pip ^ - --never-download ^ - --no-periodic-update ^ - --no-vcs-ignore ^ - %CFG_QUIET% ^ - "%CFG_ROOT_DIR%\%VIRTUALENV_DIR%" - ) -) - -if %ERRORLEVEL% neq 0 ( - exit /b %ERRORLEVEL% -) - - -@rem ################################ -:install_packages -@rem # install requirements in virtualenv -@rem # note: --no-build-isolation means that pip/wheel/setuptools will not -@rem # be reinstalled a second time and reused from the virtualenv and this -@rem # speeds up the installation. -@rem # We always have the PEP517 build dependencies installed already. - -"%CFG_BIN_DIR%\pip" install ^ - --upgrade ^ - --no-build-isolation ^ - %CFG_QUIET% ^ - %PIP_EXTRA_ARGS% ^ - %CFG_REQUIREMENTS% - - -@rem ################################ -:create_bin_junction -@rem # Create junction to bin to have the same directory between linux and windows -if exist "%CFG_ROOT_DIR%\%VIRTUALENV_DIR%\bin" ( - rmdir /s /q "%CFG_ROOT_DIR%\%VIRTUALENV_DIR%\bin" -) -mklink /J "%CFG_ROOT_DIR%\%VIRTUALENV_DIR%\bin" "%CFG_ROOT_DIR%\%VIRTUALENV_DIR%\Scripts" - -if %ERRORLEVEL% neq 0 ( - exit /b %ERRORLEVEL% -) - -exit /b 0 - - -@rem ################################ -:cli_help - echo An initial configuration script - echo " usage: configure [options]" - echo " " - echo The default is to configure for regular use. Use --dev for development. - echo " " - echo The options are: - echo " --clean: clean built and installed files and exit." - echo " --dev: configure the environment for development." - echo " --help: display this help message and exit." - echo " " - echo By default, the python interpreter version found in the path is used. - echo Alternatively, the PYTHON_EXECUTABLE environment variable can be set to - echo configure another Python executable interpreter to use. If this is not - echo set, a file named PYTHON_EXECUTABLE containing a single line with the - echo path of the Python executable to use will be checked last. - exit /b 0 - - -@rem ################################ -:clean -@rem # Remove cleanable file and directories and files from the root dir. -echo "* Cleaning ..." -for %%F in (%CLEANABLE%) do ( - rmdir /s /q "%CFG_ROOT_DIR%\%%F" >nul 2>&1 - del /f /q "%CFG_ROOT_DIR%\%%F" >nul 2>&1 -) -exit /b 0 diff --git a/packagedb/src/packagedb/migrations/0001_initial.py b/packagedb/migrations/0001_initial.py similarity index 100% rename from packagedb/src/packagedb/migrations/0001_initial.py rename to packagedb/migrations/0001_initial.py diff --git a/packagedb/src/packagedb/migrations/0002_auto_20160707_1018.py b/packagedb/migrations/0002_auto_20160707_1018.py similarity index 100% rename from packagedb/src/packagedb/migrations/0002_auto_20160707_1018.py rename to packagedb/migrations/0002_auto_20160707_1018.py diff --git a/packagedb/src/packagedb/migrations/0003_auto_20160708_1513.py b/packagedb/migrations/0003_auto_20160708_1513.py similarity index 100% rename from packagedb/src/packagedb/migrations/0003_auto_20160708_1513.py rename to packagedb/migrations/0003_auto_20160708_1513.py diff --git a/packagedb/src/packagedb/migrations/0004_auto_20160713_0022.py b/packagedb/migrations/0004_auto_20160713_0022.py similarity index 100% rename from packagedb/src/packagedb/migrations/0004_auto_20160713_0022.py rename to packagedb/migrations/0004_auto_20160713_0022.py diff --git a/packagedb/src/packagedb/migrations/0005_auto_20170217_0309.py b/packagedb/migrations/0005_auto_20170217_0309.py similarity index 100% rename from packagedb/src/packagedb/migrations/0005_auto_20170217_0309.py rename to packagedb/migrations/0005_auto_20170217_0309.py diff --git a/packagedb/src/packagedb/migrations/0006_package_mining_level.py b/packagedb/migrations/0006_package_mining_level.py similarity index 100% rename from packagedb/src/packagedb/migrations/0006_package_mining_level.py rename to packagedb/migrations/0006_package_mining_level.py diff --git a/packagedb/src/packagedb/migrations/0007_auto_20180713_0144.py b/packagedb/migrations/0007_auto_20180713_0144.py similarity index 100% rename from packagedb/src/packagedb/migrations/0007_auto_20180713_0144.py rename to packagedb/migrations/0007_auto_20180713_0144.py diff --git a/packagedb/src/packagedb/migrations/0008_package_package_url.py b/packagedb/migrations/0008_package_package_url.py similarity index 100% rename from packagedb/src/packagedb/migrations/0008_package_package_url.py rename to packagedb/migrations/0008_package_package_url.py diff --git a/packagedb/src/packagedb/migrations/0009_auto_20180918_1225.py b/packagedb/migrations/0009_auto_20180918_1225.py similarity index 100% rename from packagedb/src/packagedb/migrations/0009_auto_20180918_1225.py rename to packagedb/migrations/0009_auto_20180918_1225.py diff --git a/packagedb/src/packagedb/migrations/0010_auto_20180919_1740.py b/packagedb/migrations/0010_auto_20180919_1740.py similarity index 100% rename from packagedb/src/packagedb/migrations/0010_auto_20180919_1740.py rename to packagedb/migrations/0010_auto_20180919_1740.py diff --git a/packagedb/src/packagedb/migrations/0011_auto_20180921_1129.py b/packagedb/migrations/0011_auto_20180921_1129.py similarity index 100% rename from packagedb/src/packagedb/migrations/0011_auto_20180921_1129.py rename to packagedb/migrations/0011_auto_20180921_1129.py diff --git a/packagedb/src/packagedb/migrations/0012_auto_20181001_1120.py b/packagedb/migrations/0012_auto_20181001_1120.py similarity index 100% rename from packagedb/src/packagedb/migrations/0012_auto_20181001_1120.py rename to packagedb/migrations/0012_auto_20181001_1120.py diff --git a/packagedb/src/packagedb/migrations/0013_auto_20181001_1209.py b/packagedb/migrations/0013_auto_20181001_1209.py similarity index 100% rename from packagedb/src/packagedb/migrations/0013_auto_20181001_1209.py rename to packagedb/migrations/0013_auto_20181001_1209.py diff --git a/packagedb/src/packagedb/migrations/0014_remove_package_package_url.py b/packagedb/migrations/0014_remove_package_package_url.py similarity index 100% rename from packagedb/src/packagedb/migrations/0014_remove_package_package_url.py rename to packagedb/migrations/0014_remove_package_package_url.py diff --git a/packagedb/src/packagedb/migrations/0015_remove_package_download_checksums.py b/packagedb/migrations/0015_remove_package_download_checksums.py similarity index 100% rename from packagedb/src/packagedb/migrations/0015_remove_package_download_checksums.py rename to packagedb/migrations/0015_remove_package_download_checksums.py diff --git a/packagedb/src/packagedb/migrations/0016_auto_20181023_1211.py b/packagedb/migrations/0016_auto_20181023_1211.py similarity index 100% rename from packagedb/src/packagedb/migrations/0016_auto_20181023_1211.py rename to packagedb/migrations/0016_auto_20181023_1211.py diff --git a/packagedb/src/packagedb/migrations/0017_auto_20181023_1211.py b/packagedb/migrations/0017_auto_20181023_1211.py similarity index 100% rename from packagedb/src/packagedb/migrations/0017_auto_20181023_1211.py rename to packagedb/migrations/0017_auto_20181023_1211.py diff --git a/packagedb/src/packagedb/migrations/0018_auto_20181023_1212.py b/packagedb/migrations/0018_auto_20181023_1212.py similarity index 100% rename from packagedb/src/packagedb/migrations/0018_auto_20181023_1212.py rename to packagedb/migrations/0018_auto_20181023_1212.py diff --git a/packagedb/src/packagedb/migrations/0019_auto_20181023_1212.py b/packagedb/migrations/0019_auto_20181023_1212.py similarity index 100% rename from packagedb/src/packagedb/migrations/0019_auto_20181023_1212.py rename to packagedb/migrations/0019_auto_20181023_1212.py diff --git a/packagedb/src/packagedb/migrations/0020_package_download_sha256.py b/packagedb/migrations/0020_package_download_sha256.py similarity index 100% rename from packagedb/src/packagedb/migrations/0020_package_download_sha256.py rename to packagedb/migrations/0020_package_download_sha256.py diff --git a/packagedb/src/packagedb/migrations/0021_package_download_sha512.py b/packagedb/migrations/0021_package_download_sha512.py similarity index 100% rename from packagedb/src/packagedb/migrations/0021_package_download_sha512.py rename to packagedb/migrations/0021_package_download_sha512.py diff --git a/packagedb/src/packagedb/migrations/0022_package_manifest_path.py b/packagedb/migrations/0022_package_manifest_path.py similarity index 100% rename from packagedb/src/packagedb/migrations/0022_package_manifest_path.py rename to packagedb/migrations/0022_package_manifest_path.py diff --git a/packagedb/src/packagedb/migrations/0023_package_source_packages.py b/packagedb/migrations/0023_package_source_packages.py similarity index 100% rename from packagedb/src/packagedb/migrations/0023_package_source_packages.py rename to packagedb/migrations/0023_package_source_packages.py diff --git a/packagedb/src/packagedb/migrations/0024_auto_20181030_1817.py b/packagedb/migrations/0024_auto_20181030_1817.py similarity index 100% rename from packagedb/src/packagedb/migrations/0024_auto_20181030_1817.py rename to packagedb/migrations/0024_auto_20181030_1817.py diff --git a/packagedb/src/packagedb/migrations/0025_auto_20181030_1817.py b/packagedb/migrations/0025_auto_20181030_1817.py similarity index 100% rename from packagedb/src/packagedb/migrations/0025_auto_20181030_1817.py rename to packagedb/migrations/0025_auto_20181030_1817.py diff --git a/packagedb/src/packagedb/migrations/0026_auto_20181030_1824.py b/packagedb/migrations/0026_auto_20181030_1824.py similarity index 100% rename from packagedb/src/packagedb/migrations/0026_auto_20181030_1824.py rename to packagedb/migrations/0026_auto_20181030_1824.py diff --git a/packagedb/src/packagedb/migrations/0027_auto_20181030_1825.py b/packagedb/migrations/0027_auto_20181030_1825.py similarity index 100% rename from packagedb/src/packagedb/migrations/0027_auto_20181030_1825.py rename to packagedb/migrations/0027_auto_20181030_1825.py diff --git a/packagedb/src/packagedb/migrations/0028_auto_20181127_0224.py b/packagedb/migrations/0028_auto_20181127_0224.py similarity index 100% rename from packagedb/src/packagedb/migrations/0028_auto_20181127_0224.py rename to packagedb/migrations/0028_auto_20181127_0224.py diff --git a/packagedb/src/packagedb/migrations/0029_auto_20181127_0246.py b/packagedb/migrations/0029_auto_20181127_0246.py similarity index 100% rename from packagedb/src/packagedb/migrations/0029_auto_20181127_0246.py rename to packagedb/migrations/0029_auto_20181127_0246.py diff --git a/packagedb/src/packagedb/migrations/0030_auto_20190107_1616.py b/packagedb/migrations/0030_auto_20190107_1616.py similarity index 100% rename from packagedb/src/packagedb/migrations/0030_auto_20190107_1616.py rename to packagedb/migrations/0030_auto_20190107_1616.py diff --git a/packagedb/src/packagedb/migrations/0031_auto_20190110_2354.py b/packagedb/migrations/0031_auto_20190110_2354.py similarity index 100% rename from packagedb/src/packagedb/migrations/0031_auto_20190110_2354.py rename to packagedb/migrations/0031_auto_20190110_2354.py diff --git a/packagedb/src/packagedb/migrations/0032_auto_20190125_0019.py b/packagedb/migrations/0032_auto_20190125_0019.py similarity index 100% rename from packagedb/src/packagedb/migrations/0032_auto_20190125_0019.py rename to packagedb/migrations/0032_auto_20190125_0019.py diff --git a/packagedb/src/packagedb/migrations/0033_auto_20190128_2056.py b/packagedb/migrations/0033_auto_20190128_2056.py similarity index 100% rename from packagedb/src/packagedb/migrations/0033_auto_20190128_2056.py rename to packagedb/migrations/0033_auto_20190128_2056.py diff --git a/packagedb/src/packagedb/migrations/0034_auto_20200407_2232.py b/packagedb/migrations/0034_auto_20200407_2232.py similarity index 100% rename from packagedb/src/packagedb/migrations/0034_auto_20200407_2232.py rename to packagedb/migrations/0034_auto_20200407_2232.py diff --git a/packagedb/src/packagedb/migrations/0035_auto_20200408_2126.py b/packagedb/migrations/0035_auto_20200408_2126.py similarity index 100% rename from packagedb/src/packagedb/migrations/0035_auto_20200408_2126.py rename to packagedb/migrations/0035_auto_20200408_2126.py diff --git a/packagedb/src/packagedb/migrations/0036_auto_20200416_2131.py b/packagedb/migrations/0036_auto_20200416_2131.py similarity index 100% rename from packagedb/src/packagedb/migrations/0036_auto_20200416_2131.py rename to packagedb/migrations/0036_auto_20200416_2131.py diff --git a/packagedb/src/packagedb/migrations/0037_auto_20200423_1242.py b/packagedb/migrations/0037_auto_20200423_1242.py similarity index 100% rename from packagedb/src/packagedb/migrations/0037_auto_20200423_1242.py rename to packagedb/migrations/0037_auto_20200423_1242.py diff --git a/packagedb/src/packagedb/migrations/0038_add_index_for_filter_fields.py b/packagedb/migrations/0038_add_index_for_filter_fields.py similarity index 100% rename from packagedb/src/packagedb/migrations/0038_add_index_for_filter_fields.py rename to packagedb/migrations/0038_add_index_for_filter_fields.py diff --git a/packagedb/src/packagedb/migrations/0039_packageurl_python_field_updates.py b/packagedb/migrations/0039_packageurl_python_field_updates.py similarity index 100% rename from packagedb/src/packagedb/migrations/0039_packageurl_python_field_updates.py rename to packagedb/migrations/0039_packageurl_python_field_updates.py diff --git a/packagedb/src/packagedb/migrations/0040_add_root_path.py b/packagedb/migrations/0040_add_root_path.py similarity index 100% rename from packagedb/src/packagedb/migrations/0040_add_root_path.py rename to packagedb/migrations/0040_add_root_path.py diff --git a/packagedb/src/packagedb/migrations/0041_update_ordering_to_id.py b/packagedb/migrations/0041_update_ordering_to_id.py similarity index 100% rename from packagedb/src/packagedb/migrations/0041_update_ordering_to_id.py rename to packagedb/migrations/0041_update_ordering_to_id.py diff --git a/packagedb/src/packagedb/migrations/0042_update_fields_to_django3_standards.py b/packagedb/migrations/0042_update_fields_to_django3_standards.py similarity index 100% rename from packagedb/src/packagedb/migrations/0042_update_fields_to_django3_standards.py rename to packagedb/migrations/0042_update_fields_to_django3_standards.py diff --git a/packagedb/src/packagedb/migrations/0043_lowercase_purl_fields.py b/packagedb/migrations/0043_lowercase_purl_fields.py similarity index 100% rename from packagedb/src/packagedb/migrations/0043_lowercase_purl_fields.py rename to packagedb/migrations/0043_lowercase_purl_fields.py diff --git a/packagedb/src/packagedb/migrations/0044_add_history_field.py b/packagedb/migrations/0044_add_history_field.py similarity index 100% rename from packagedb/src/packagedb/migrations/0044_add_history_field.py rename to packagedb/migrations/0044_add_history_field.py diff --git a/packagedb/src/packagedb/migrations/0045_relax_license_expression_max_length_for_resources.py b/packagedb/migrations/0045_relax_license_expression_max_length_for_resources.py similarity index 100% rename from packagedb/src/packagedb/migrations/0045_relax_license_expression_max_length_for_resources.py rename to packagedb/migrations/0045_relax_license_expression_max_length_for_resources.py diff --git a/packagedb/src/packagedb/migrations/0046_add_extra_data_to_package.py b/packagedb/migrations/0046_add_extra_data_to_package.py similarity index 100% rename from packagedb/src/packagedb/migrations/0046_add_extra_data_to_package.py rename to packagedb/migrations/0046_add_extra_data_to_package.py diff --git a/packagedb/src/packagedb/migrations/0047_add_search_vector_field_to_package.py b/packagedb/migrations/0047_add_search_vector_field_to_package.py similarity index 100% rename from packagedb/src/packagedb/migrations/0047_add_search_vector_field_to_package.py rename to packagedb/migrations/0047_add_search_vector_field_to_package.py diff --git a/packagedb/src/packagedb/migrations/0048_add_gin_index_to_search_vector_field.py b/packagedb/migrations/0048_add_gin_index_to_search_vector_field.py similarity index 100% rename from packagedb/src/packagedb/migrations/0048_add_gin_index_to_search_vector_field.py rename to packagedb/migrations/0048_add_gin_index_to_search_vector_field.py diff --git a/packagedb/src/packagedb/migrations/0049_alter_resource_extra_data.py b/packagedb/migrations/0049_alter_resource_extra_data.py similarity index 100% rename from packagedb/src/packagedb/migrations/0049_alter_resource_extra_data.py rename to packagedb/migrations/0049_alter_resource_extra_data.py diff --git a/packagedb/src/packagedb/migrations/0050_alter_resource_extra_data.py b/packagedb/migrations/0050_alter_resource_extra_data.py similarity index 100% rename from packagedb/src/packagedb/migrations/0050_alter_resource_extra_data.py rename to packagedb/migrations/0050_alter_resource_extra_data.py diff --git a/packagedb/src/packagedb/migrations/0051_package_api_data_url_package_datasource_id_and_more.py b/packagedb/migrations/0051_package_api_data_url_package_datasource_id_and_more.py similarity index 100% rename from packagedb/src/packagedb/migrations/0051_package_api_data_url_package_datasource_id_and_more.py rename to packagedb/migrations/0051_package_api_data_url_package_datasource_id_and_more.py diff --git a/packagedb/migrations/0052_package_index_error_package_last_indexed_date.py b/packagedb/migrations/0052_package_index_error_package_last_indexed_date.py new file mode 100644 index 00000000..bafd588e --- /dev/null +++ b/packagedb/migrations/0052_package_index_error_package_last_indexed_date.py @@ -0,0 +1,31 @@ +# Generated by Django 4.1.2 on 2022-12-08 01:58 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("packagedb", "0051_package_api_data_url_package_datasource_id_and_more"), + ] + + operations = [ + migrations.AddField( + model_name="package", + name="index_error", + field=models.TextField( + blank=True, + help_text="Indexing errors messages. When present this means the indexing has failed.", + null=True, + ), + ), + migrations.AddField( + model_name="package", + name="last_indexed_date", + field=models.DateTimeField( + blank=True, + help_text="Timestamp set to the date of the last indexing. Used to track indexing status.", + null=True, + ), + ), + ] diff --git a/packagedb/migrations/__init__.py b/packagedb/migrations/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/packagedb/src/packagedb/models.py b/packagedb/models.py similarity index 98% rename from packagedb/src/packagedb/models.py rename to packagedb/models.py index e62481ae..8f31923e 100644 --- a/packagedb/src/packagedb/models.py +++ b/packagedb/models.py @@ -427,7 +427,16 @@ class Package( help_text=_('A list of source package URLs (aka. "purl") for this package. ' 'For instance an SRPM is the "source package" for a binary RPM.'), ) - + last_indexed_date = models.DateTimeField( + null=True, + blank=True, + help_text='Timestamp set to the date of the last indexing. Used to track indexing status.' + ) + index_error = models.TextField( + null=True, + blank=True, + help_text='Indexing errors messages. When present this means the indexing has failed.', + ) search_vector = SearchVectorField(null=True) objects = PackageQuerySet.as_manager() @@ -487,8 +496,6 @@ def get_latest_version(self): return sorted_versions[-1] -# TODO: Sync with DejaCode and insure that DejaCode and MineCode use the same definition -# and same case for everything. We will need to check organization.models.Owner's OWNER_TYPE_CHOICES party_person = 'person' # often loosely defined party_project = 'project' diff --git a/packagedb/requirements-dev.txt b/packagedb/requirements-dev.txt deleted file mode 100644 index ec140d37..00000000 --- a/packagedb/requirements-dev.txt +++ /dev/null @@ -1,26 +0,0 @@ -aboutcode-toolkit==7.0.2 -attrs==22.1.0 -black==22.10.0 -boolean.py==4.0 -certifi==2022.9.24 -click==8.1.3 -et-xmlfile==1.1.0 -execnet==1.9.0 -iniconfig==1.1.1 -jinja2==3.1.2 -license-expression==30.0.0 -markupsafe==2.1.1 -mypy-extensions==0.4.3 -openpyxl==3.0.10 -packaging==21.3 -pathspec==0.10.1 -platformdirs==2.5.2 -pluggy==1.0.0 -py==1.11.0 -pyparsing==3.0.9 -pytest==7.1.3 -pytest-forked==1.4.0 -pytest-xdist==2.5.0 -pyyaml==6.0 -saneyaml==0.5.2 -tomli==2.0.1 \ No newline at end of file diff --git a/packagedb/requirements.txt b/packagedb/requirements.txt deleted file mode 100644 index 20ae3a76..00000000 --- a/packagedb/requirements.txt +++ /dev/null @@ -1,11 +0,0 @@ -asgiref==3.5.2 -Django==4.1.2 -django-filter==22.1 -djangorestframework==3.14.0 -django-environ==0.8.1 -natsort==8.2.0 -packageurl-python==0.10.4 -psycopg2-binary==2.9.3 -pytz==2022.5 -setuptools==65.3.0 -sqlparse==0.4.3 diff --git a/packagedb/src/packagedb/serializers.py b/packagedb/serializers.py similarity index 100% rename from packagedb/src/packagedb/serializers.py rename to packagedb/serializers.py diff --git a/packagedb/src/packagedb/signals.py b/packagedb/signals.py similarity index 100% rename from packagedb/src/packagedb/signals.py rename to packagedb/signals.py diff --git a/packagedb/src/packagedb/__init__.py b/packagedb/src/packagedb/__init__.py deleted file mode 100644 index 2eb8f9f0..00000000 --- a/packagedb/src/packagedb/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -# -# Copyright (c) nexB Inc. and others. All rights reserved. -# purldb is a trademark of nexB Inc. -# SPDX-License-Identifier: Apache-2.0 -# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/purldb for support or download. -# See https://aboutcode.org for more information about nexB OSS projects. -# diff --git a/packagedb/src/packagedbio/__init__.py b/packagedb/src/packagedbio/__init__.py deleted file mode 100644 index 2eb8f9f0..00000000 --- a/packagedb/src/packagedbio/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -# -# Copyright (c) nexB Inc. and others. All rights reserved. -# purldb is a trademark of nexB Inc. -# SPDX-License-Identifier: Apache-2.0 -# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/purldb for support or download. -# See https://aboutcode.org for more information about nexB OSS projects. -# diff --git a/packagedb/tests/packagedb/__init__.py b/packagedb/tests/packagedb/__init__.py deleted file mode 100644 index 2eb8f9f0..00000000 --- a/packagedb/tests/packagedb/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -# -# Copyright (c) nexB Inc. and others. All rights reserved. -# purldb is a trademark of nexB Inc. -# SPDX-License-Identifier: Apache-2.0 -# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/purldb for support or download. -# See https://aboutcode.org for more information about nexB OSS projects. -# diff --git a/packagedb/tests/packagedb/tests/__init__.py b/packagedb/tests/packagedb/tests/__init__.py deleted file mode 100644 index 2eb8f9f0..00000000 --- a/packagedb/tests/packagedb/tests/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -# -# Copyright (c) nexB Inc. and others. All rights reserved. -# purldb is a trademark of nexB Inc. -# SPDX-License-Identifier: Apache-2.0 -# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/purldb for support or download. -# See https://aboutcode.org for more information about nexB OSS projects. -# diff --git a/packagedb/tests/packagedb/tests/test_api.py b/packagedb/tests/test_api.py similarity index 100% rename from packagedb/tests/packagedb/tests/test_api.py rename to packagedb/tests/test_api.py diff --git a/packagedb/tests/packagedb/tests/test_models.py b/packagedb/tests/test_models.py similarity index 100% rename from packagedb/tests/packagedb/tests/test_models.py rename to packagedb/tests/test_models.py diff --git a/packagedb/tests/test_skeleton_codestyle.py b/packagedb/tests/test_skeleton_codestyle.py deleted file mode 100644 index 2eb6e558..00000000 --- a/packagedb/tests/test_skeleton_codestyle.py +++ /dev/null @@ -1,36 +0,0 @@ -# -# Copyright (c) nexB Inc. and others. All rights reserved. -# ScanCode is a trademark of nexB Inc. -# SPDX-License-Identifier: Apache-2.0 -# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/skeleton for support or download. -# See https://aboutcode.org for more information about nexB OSS projects. -# - -import subprocess -import unittest -import configparser - - -class BaseTests(unittest.TestCase): - def test_skeleton_codestyle(self): - """ - This test shouldn't run in proliferated repositories. - """ - setup_cfg = configparser.ConfigParser() - setup_cfg.read("setup.cfg") - if setup_cfg["metadata"]["name"] != "skeleton": - return - - args = "venv/bin/black --check -l 100 setup.py etc tests" - try: - subprocess.check_output(args.split()) - except subprocess.CalledProcessError as e: - print("===========================================================") - print(e.output) - print("===========================================================") - raise Exception( - "Black style check failed; please format the code using:\n" - " python -m black -l 100 setup.py etc tests", - e.output, - ) from e diff --git a/packagedb/tests/packagedb/tests/testfiles/ant-commons-logging-1.6.1.jar b/packagedb/tests/testfiles/ant-commons-logging-1.6.1.jar similarity index 100% rename from packagedb/tests/packagedb/tests/testfiles/ant-commons-logging-1.6.1.jar rename to packagedb/tests/testfiles/ant-commons-logging-1.6.1.jar diff --git a/packagedb/tests/packagedb/tests/testfiles/index/spring-boot-1.3.0.RELEASE.jar b/packagedb/tests/testfiles/index/spring-boot-1.3.0.RELEASE.jar similarity index 100% rename from packagedb/tests/packagedb/tests/testfiles/index/spring-boot-1.3.0.RELEASE.jar rename to packagedb/tests/testfiles/index/spring-boot-1.3.0.RELEASE.jar diff --git a/packagedb/tests/packagedb/tests/testfiles/index/spring-boot-1.3.1.RELEASE.jar b/packagedb/tests/testfiles/index/spring-boot-1.3.1.RELEASE.jar similarity index 100% rename from packagedb/tests/packagedb/tests/testfiles/index/spring-boot-1.3.1.RELEASE.jar rename to packagedb/tests/testfiles/index/spring-boot-1.3.1.RELEASE.jar diff --git a/packagedb/tests/packagedb/tests/testfiles/index/spring-boot-1.3.2.RELEASE.jar b/packagedb/tests/testfiles/index/spring-boot-1.3.2.RELEASE.jar similarity index 100% rename from packagedb/tests/packagedb/tests/testfiles/index/spring-boot-1.3.2.RELEASE.jar rename to packagedb/tests/testfiles/index/spring-boot-1.3.2.RELEASE.jar diff --git a/packagedb/tests/packagedb/tests/testfiles/index/spring-boot-1.3.3.RELEASE.jar b/packagedb/tests/testfiles/index/spring-boot-1.3.3.RELEASE.jar similarity index 100% rename from packagedb/tests/packagedb/tests/testfiles/index/spring-boot-1.3.3.RELEASE.jar rename to packagedb/tests/testfiles/index/spring-boot-1.3.3.RELEASE.jar diff --git a/packagedb/tests/packagedb/tests/testfiles/index/spring-boot-1.3.4.RELEASE.jar b/packagedb/tests/testfiles/index/spring-boot-1.3.4.RELEASE.jar similarity index 100% rename from packagedb/tests/packagedb/tests/testfiles/index/spring-boot-1.3.4.RELEASE.jar rename to packagedb/tests/testfiles/index/spring-boot-1.3.4.RELEASE.jar diff --git a/packagedb/tests/packagedb/tests/testfiles/index/spring-boot-1.3.5.RELEASE.jar b/packagedb/tests/testfiles/index/spring-boot-1.3.5.RELEASE.jar similarity index 100% rename from packagedb/tests/packagedb/tests/testfiles/index/spring-boot-1.3.5.RELEASE.jar rename to packagedb/tests/testfiles/index/spring-boot-1.3.5.RELEASE.jar diff --git a/packagedb/tests/packagedb/tests/testfiles/index/spring-boot-1.3.6.RELEASE.jar b/packagedb/tests/testfiles/index/spring-boot-1.3.6.RELEASE.jar similarity index 100% rename from packagedb/tests/packagedb/tests/testfiles/index/spring-boot-1.3.6.RELEASE.jar rename to packagedb/tests/testfiles/index/spring-boot-1.3.6.RELEASE.jar diff --git a/packagedb/tests/packagedb/tests/testfiles/index/spring-boot-1.3.7.RELEASE.jar b/packagedb/tests/testfiles/index/spring-boot-1.3.7.RELEASE.jar similarity index 100% rename from packagedb/tests/packagedb/tests/testfiles/index/spring-boot-1.3.7.RELEASE.jar rename to packagedb/tests/testfiles/index/spring-boot-1.3.7.RELEASE.jar diff --git a/packagedb/tests/packagedb/tests/testfiles/index/spring-boot-1.3.8.RELEASE.jar b/packagedb/tests/testfiles/index/spring-boot-1.3.8.RELEASE.jar similarity index 100% rename from packagedb/tests/packagedb/tests/testfiles/index/spring-boot-1.3.8.RELEASE.jar rename to packagedb/tests/testfiles/index/spring-boot-1.3.8.RELEASE.jar diff --git a/packagedb/tests/packagedb/tests/testfiles/match_jars/ant-commons-logging-1.6.1.jar b/packagedb/tests/testfiles/match_jars/ant-commons-logging-1.6.1.jar similarity index 100% rename from packagedb/tests/packagedb/tests/testfiles/match_jars/ant-commons-logging-1.6.1.jar rename to packagedb/tests/testfiles/match_jars/ant-commons-logging-1.6.1.jar diff --git a/packagedb/tests/packagedb/tests/testfiles/match_jars/spring-boot-1.3.8.RELEASE.jar b/packagedb/tests/testfiles/match_jars/spring-boot-1.3.8.RELEASE.jar similarity index 100% rename from packagedb/tests/packagedb/tests/testfiles/match_jars/spring-boot-1.3.8.RELEASE.jar rename to packagedb/tests/testfiles/match_jars/spring-boot-1.3.8.RELEASE.jar diff --git a/purldb/__init__.py b/purldb/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/packagedb/src/packagedbio/settings.py b/purldb/settings.py similarity index 80% rename from packagedb/src/packagedbio/settings.py rename to purldb/settings.py index 3df50929..f953218a 100644 --- a/packagedb/src/packagedbio/settings.py +++ b/purldb/settings.py @@ -12,12 +12,14 @@ import environ -PROJECT_DIR = Path(__file__).resolve().parent.parent + +PROJECT_DIR = Path(__file__).resolve().parent ROOT_DIR = PROJECT_DIR.parent + # Environment -ENV_FILE = "/etc/packagedb/.env" +ENV_FILE = "/etc/purldb/.env" if not Path(ENV_FILE).exists(): ENV_FILE = ROOT_DIR / ".env" @@ -31,26 +33,30 @@ ALLOWED_HOSTS = env.list("ALLOWED_HOSTS", default=[".localhost", "127.0.0.1", "[::1]"]) # SECURITY WARNING: do not run with debug turned on in production -DEBUG = env.bool("PACKAGEDB_DEBUG", default=False) +DEBUG = env.bool("PURLDB_DEBUG", default=False) -PACKAGEDB_REQUIRE_AUTHENTICATION = env.bool( - "PACKAGEDB_REQUIRE_AUTHENTICATION", default=False +PURLDB_REQUIRE_AUTHENTICATION = env.bool( + "PURLDB_REQUIRE_AUTHENTICATION", default=False ) # SECURITY WARNING: do not run with debug turned on in production -DEBUG_TOOLBAR = env.bool("PACKAGEDB_DEBUG_TOOLBAR", default=False) +DEBUG_TOOLBAR = env.bool("PURLDB_DEBUG_TOOLBAR", default=False) -PACKAGEDB_PASSWORD_MIN_LENGTH = env.int("PACKAGEDB_PASSWORD_MIN_LENGTH", default=14) +PURLDB_PASSWORD_MIN_LENGTH = env.int("PURLDB_PASSWORD_MIN_LENGTH", default=14) -# PackageDB +# PurlDB -PACKAGEDB_LOG_LEVEL = env.str("PACKAGEDB_LOG_LEVEL", "INFO") +PURLDB_LOG_LEVEL = env.str("PURLDB_LOG_LEVEL", "INFO") # Application definition INSTALLED_APPS = ( # Local apps # Must come before Third-party apps for proper templates override + 'clearcode', + 'clearindex', + 'minecode', + 'matchcode', 'packagedb', # Django built-in "django.contrib.auth", @@ -75,21 +81,21 @@ 'django.middleware.clickjacking.XFrameOptionsMiddleware', ) -ROOT_URLCONF = 'packagedbio.urls' +ROOT_URLCONF = 'purldb.urls' -WSGI_APPLICATION = "packagedbio.wsgi.application" +WSGI_APPLICATION = "purldb.wsgi.application" # Database DATABASES = { - "default": { - "ENGINE": env.str("PACKAGEDB_DB_ENGINE", "django.db.backends.postgresql"), - "HOST": env.str("PACKAGEDB_DB_HOST", "localhost"), - "NAME": env.str("PACKAGEDB_DB_NAME", "packagedb"), - "USER": env.str("PACKAGEDB_DB_USER", "packagedb"), - "PASSWORD": env.str("PACKAGEDB_DB_PASSWORD", "packagedb"), - "PORT": env.str("PACKAGEDB_DB_PORT", "5432"), - "ATOMIC_REQUESTS": True, + 'default': { + 'ENGINE': env.str('PACKAGEDB_DB_ENGINE', 'django.db.backends.postgresql'), + 'HOST': env.str('PACKAGEDB_DB_HOST', 'localhost'), + 'NAME': env.str('PACKAGEDB_DB_NAME', 'packagedb'), + 'USER': env.str('PACKAGEDB_DB_USER', 'packagedb'), + 'PASSWORD': env.str('PACKAGEDB_DB_PASSWORD', 'packagedb'), + 'PORT': env.str('PACKAGEDB_DB_PORT', '5432'), + 'ATOMIC_REQUESTS': True, } } @@ -128,7 +134,7 @@ { "NAME": "django.contrib.auth.password_validation.MinimumLengthValidator", "OPTIONS": { - "min_length": PACKAGEDB_PASSWORD_MIN_LENGTH, + "min_length": PURLDB_PASSWORD_MIN_LENGTH, }, }, { @@ -176,16 +182,16 @@ "loggers": { "scanpipe": { "handlers": ["null"] if IS_TESTS else ["console"], - "level": PACKAGEDB_LOG_LEVEL, + "level": PURLDB_LOG_LEVEL, "propagate": False, }, "django": { "handlers": ["null"] if IS_TESTS else ["console"], "propagate": False, }, - # Set PACKAGEDB_LOG_LEVEL=DEBUG to display all SQL queries in the console. + # Set PURLDB_LOG_LEVEL=DEBUG to display all SQL queries in the console. "django.db.backends": { - "level": PACKAGEDB_LOG_LEVEL, + "level": PURLDB_LOG_LEVEL, }, }, } @@ -204,10 +210,10 @@ STATIC_URL = '/static/' -STATIC_ROOT = env.str("PACKAGEDB_STATIC_ROOT", "./") +STATIC_ROOT = '/var/purldb/static/' STATICFILES_DIRS = [ - str(PROJECT_DIR / "packagedbio" / "static"), + PROJECT_DIR / 'static', ] # Third-party apps @@ -231,7 +237,7 @@ "PAGE_SIZE": 10, } -if not PACKAGEDB_REQUIRE_AUTHENTICATION: +if not PURLDB_REQUIRE_AUTHENTICATION: REST_FRAMEWORK["DEFAULT_PERMISSION_CLASSES"] = ( "rest_framework.permissions.AllowAny", ) @@ -261,3 +267,8 @@ INTERNAL_IPS = [ "127.0.0.1", ] + +# Active seeders: each active seeder class need to be added explictly here +ACTIVE_SEEDERS = [ + 'minecode.visitors.npm.NpmSeed', +] diff --git a/purldb/static/.keep b/purldb/static/.keep new file mode 100644 index 00000000..e69de29b diff --git a/packagedb/src/packagedbio/urls.py b/purldb/urls.py similarity index 52% rename from packagedb/src/packagedbio/urls.py rename to purldb/urls.py index 9c7ae058..b6260007 100644 --- a/packagedb/src/packagedbio/urls.py +++ b/purldb/urls.py @@ -10,15 +10,25 @@ from django.conf.urls import include from django.urls import re_path -from rest_framework import routers - +from clearcode.api import CDitemViewSet from packagedb.api import PackageViewSet from packagedb.api import ResourceViewSet +from rest_framework import routers + +from matchcode.api import ApproximateDirectoryContentIndexViewSet +from matchcode.api import ApproximateDirectoryStructureIndexViewSet +from matchcode.api import ExactFileIndexViewSet +from matchcode.api import ExactPackageArchiveIndexViewSet api_router = routers.DefaultRouter() api_router.register(r'packages', PackageViewSet) api_router.register(r'resources', ResourceViewSet) +api_router.register(r'approximate_directory_content_index', ApproximateDirectoryContentIndexViewSet) +api_router.register(r'approximate_directory_structure_index', ApproximateDirectoryStructureIndexViewSet) +api_router.register(r'exact_file_index', ExactFileIndexViewSet) +api_router.register(r'exact_package_archive_index', ExactPackageArchiveIndexViewSet) +api_router.register(r'cditems', CDitemViewSet, 'cditems') urlpatterns = [ re_path(r'^api/', include((api_router.urls, 'api'))), diff --git a/packagedb/src/packagedbio/wsgi.py b/purldb/wsgi.py similarity index 81% rename from packagedb/src/packagedbio/wsgi.py rename to purldb/wsgi.py index c386bb6f..51b91d61 100644 --- a/packagedb/src/packagedbio/wsgi.py +++ b/purldb/wsgi.py @@ -12,12 +12,12 @@ """ -WSGI config for packagedbio project. +WSGI config for matchcodeio project. It exposes the WSGI callable as a module-level variable named ``application``. """ -os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'packagedbio.settings.production') +os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'matchcodeio.settings') application = get_wsgi_application() diff --git a/packagedb/pyproject.toml b/pyproject.toml similarity index 87% rename from packagedb/pyproject.toml rename to pyproject.toml index b7906615..cde79074 100644 --- a/packagedb/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,5 @@ [build-system] -requires = ["setuptools >= 50", "wheel"] -# requires = ["setuptools >= 50", "wheel", "setuptools_scm[toml] >= 6"] - +requires = ["setuptools >= 50", "wheel", "setuptools_scm[toml] >= 6"] build-backend = "setuptools.build_meta" [tool.setuptools_scm] diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 00000000..b40431ee --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1,15 @@ +aboutcode-toolkit==7.2.0 +black==22.12.0 +et-xmlfile==1.1.0 +exceptiongroup==1.0.4 +execnet==1.9.0 +iniconfig==1.1.1 +mock==4.0.3 +mypy-extensions==0.4.3 +openpyxl==3.0.10 +pathspec==0.10.3 +platformdirs==2.6.0 +pytest==7.2.0 +pytest-django==4.5.2 +pytest-xdist==3.1.0 +tomli==2.0.1 \ No newline at end of file diff --git a/minecode/requirements.txt b/requirements.txt similarity index 75% rename from minecode/requirements.txt rename to requirements.txt index 438c577a..8171b4a9 100644 --- a/minecode/requirements.txt +++ b/requirements.txt @@ -4,19 +4,21 @@ attrs==22.1.0 banal==1.0.6 beautifulsoup4==4.11.1 binaryornot==0.4.4 +bitarray==2.6.0 boolean.py==4.0 certifi==2022.9.24 cffi==1.15.1 -chardet==5.0.0 +chardet==5.1.0 charset-normalizer==2.1.1 click==8.1.3 -colorama==0.4.5 -commoncode==31.0.0 +colorama==0.4.6 +commoncode @ https://github.com/nexB/commoncode/archive/refs/heads/48-correctly-assign-codebase-attributes.zip container-inspector==32.0.1 -cryptography==38.0.1 +cryptography==38.0.4 debian-inspector==31.0.0 Deprecated==1.2.13 Django==4.1.2 +django-environ==0.9.0 django-filter==22.1 djangorestframework==3.14.0 dockerfile-parse==1.2.0 @@ -29,10 +31,10 @@ fingerprints==1.0.3 ftfy==6.1.1 ftputil==5.0.4 future==0.18.2 -gemfileparser2==0.9.0 +gemfileparser2==0.9.3 html5lib==1.1 idna==3.4 -importlib-metadata==5.0.0 +importlib-metadata==5.1.0 intbitset==3.0.1 isodate==0.6.1 jaraco.functools==3.5.2 @@ -49,7 +51,7 @@ normality==2.4.0 packageurl-python==0.10.4 packaging==21.3 parameter-expansion-patched==0.3.1 -pdfminer.six==20220524 +pdfminer.six==20221105 pefile==2022.5.30 pip==22.2.2 pip-requirements-parser==31.2.0 @@ -58,26 +60,27 @@ pluggy==1.0.0 plugincode==31.0.0 ply==3.11 psycopg2==2.9.3 +psycopg2-binary==2.9.3 publicsuffix2==2.20191221 pyahocorasick==2.0.0b1 pycparser==2.21 PyGithub==1.56 -pygmars==0.7.0 +pygmars==0.8.0 Pygments==2.13.0 PyJWT==2.6.0 pymaven-patch==0.3.0 PyNaCl==1.5.0 pyparsing==3.0.9 python-dateutil==2.8.2 -pytz==2022.5 +pytz==2022.6 PyYAML==6.0 rdflib==6.2.0 reppy2==0.3.6 requests==2.28.1 rubymarshal==1.0.3 saneyaml==0.5.2 -scancode-toolkit -setuptools==65.3.0 +scancode-toolkit @ https://github.com/nexB/scancode-toolkit/archive/refs/heads/maven-pom-parse-dep-backport.zip +setuptools==57.0.0 six==1.16.0 soupsieve==2.3.2.post1 spdx-tools==0.7.0rc0 @@ -86,11 +89,11 @@ text-unidecode==1.3 toml==0.10.2 typecode==30.0.0 typecode-libmagic==5.39.210531 -urllib3==1.26.12 +urllib3==1.26.13 urlpy==0.5 wcwidth==0.2.5 webencodings==0.5.1 wheel==0.37.1 wrapt==1.14.1 xmltodict==0.13.0 -zipp==3.9.0 +zipp==3.11.0 diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 00000000..73fcf9fb --- /dev/null +++ b/setup.cfg @@ -0,0 +1,77 @@ +[metadata] +name = purldb +version = 2.0.0 +license_files = + LICENSE + AUTHORS.rst + CHANGELOG.rst +author = nexB. Inc. and others +author_email = info@aboutcode.org +license = Apache-2.0 AND CC-BY-SA-4.0 + +# description must be on ONE line https://github.com/pypa/setuptools/issues/1390 +description = A Django-based utility to collect Package data and Resources for collection and matching. +long_description = file:README.rst +long_description_content_type = text/x-rst +url = https://github.com/nexB/purldb + +classifiers = + Intended Audience :: Developers + Programming Language :: Python :: 3 + Programming Language :: Python :: 3 :: Only + Topic :: Utilities + +keywords = + matchcode + packagedb + scancode + purl + purldb + clearcode + + +[options] +packages = find: +include_package_data = true +zip_safe = false +install_requires = + arrow == 1.2.3 + bitarray == 2.6.0 + debian-inspector == 31.0.0 + commoncode == 31.0.0 + Django == 4.1.2 + django-environ == 0.9.0 + djangorestframework == 3.14.0 + django-filter == 22.1 + ftputil == 5.0.4 + jawa == 2.2.0 + natsort == 8.2.0 + packageurl-python == 0.10.4 + psycopg2-binary == 2.9.3 + psycopg2 == 2.9.3 + PyGithub == 1.56 + reppy2 == 0.3.6 + rubymarshal == 1.0.3 + scancode-toolkit == 31.2.2 + urlpy == 0.5 + matchcode-toolkit +setup_requires = setuptools_scm[toml] >= 4 + +python_requires = >=3.8.* + +[options.packages.find] +where = src + +[options.extras_require] +testing = + pytest >= 6, != 7.0.0 + pytest-xdist >= 2 + pytest-django + aboutcode-toolkit >= 6.0.0 + black + mock + +docs= + Sphinx>=3.3.1 + sphinx-rtd-theme>=0.5.0 + doc8>=0.8.1 diff --git a/packagedb/setup.py b/setup.py similarity index 100% rename from packagedb/setup.py rename to setup.py