Skip to content

Commit

Permalink
Merge pull request #360 from nexB/batch-3-4-purl2sym
Browse files Browse the repository at this point in the history
purl2sym: Support indexing of Batch3 and Batch4 packages
  • Loading branch information
JonoYang authored Apr 2, 2024
2 parents c4f0e3c + 707c61f commit 5f6316c
Show file tree
Hide file tree
Showing 9 changed files with 100 additions and 15 deletions.
6 changes: 6 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,12 @@ Once the prerequisites have been installed, set up PurlDB with the following com
make postgres
make postgres_matchcodeio

Indexing some PURLs requires a GitHub API token. Please add your GitHub API key to the `.env` file
::

GH_TOKEN=your-github-api


Once PurlDB and the database has been set up, run tests to ensure functionality:
::

Expand Down
23 changes: 21 additions & 2 deletions minecode/tests/test_generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,12 +60,31 @@ def test_map_generic_package(self):
self.assertEqual('1.0.0', package.version)
self.assertEqual('http://example.com/test.tar.gz', package.download_url)

def test_process_request_dir_listed(self):
def test_map_fetchcode_supported_package(self):
package_count = Package.objects.all().count()
self.assertEqual(0, package_count)

purl = PackageURL.from_string("pkg:generic/[email protected]")
error_msg = generic.map_fetchcode_supported_package(purl)

self.assertEqual('', error_msg)
package_count = Package.objects.all().count()
self.assertEqual(1, package_count)

package = Package.objects.first()
self.assertEqual("udhcp", package.name)
self.assertEqual("0.9.1", package.version)
self.assertEqual(
"https://web.archive.org/web/20021209021312/http://udhcp.busybox.net/source//udhcp-0.9.1.tar.gz",
package.download_url,
)

def test_process_request_fetchcode_generic(self):
package_count = Package.objects.all().count()
self.assertEqual(0, package_count)

purl = "pkg:generic/[email protected]"
error_msg = generic.process_request_dir_listed(purl)
error_msg = generic.process_request_fetchcode_generic(purl)

self.assertEqual(None, error_msg)
package_count = Package.objects.all().count()
Expand Down
26 changes: 18 additions & 8 deletions minecode/visitors/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,9 +91,9 @@ def packagedata_from_dict(package_data):
return PackageData.from_data(cleaned_package_data)


def map_directory_listed_package(package_url):
def map_fetchcode_supported_package(package_url):
"""
Add a directory listed `package_url` to the PackageDB.
Add a `package_url` supported by fetchcode to the PackageDB.
Return an error string if any errors are encountered during the process
"""
Expand Down Expand Up @@ -121,7 +121,7 @@ def map_directory_listed_package(package_url):
return error


DIR_SUPPORTED_PURLS = [
GENERIC_FETCHCODE_SUPPORTED_PURLS = [
"pkg:generic/busybox@.*",
"pkg:generic/bzip2@.*",
"pkg:generic/dnsmasq@.*",
Expand All @@ -137,16 +137,26 @@ def map_directory_listed_package(package_url):
"pkg:generic/samba@.*",
"pkg:generic/syslinux@.*",
"pkg:generic/toybox@.*",
"pkg:generic/uclibc@@.*",
"pkg:generic/uclibc@.*",
"pkg:generic/uclibc-ng@.*",
"pkg:generic/util-linux@.*",
"pkg:generic/wpa_supplicant@.*",
"pkg:generic/ipkg@.*",
"pkg:generic/linux@.*",
"pkg:generic/mtd-utils@.*",
"pkg:generic/barebox@.*",
"pkg:generic/e2fsprogs@.*",
"pkg:generic/udhcp@.*",
"pkg:generic/miniupnpc@.*",
"pkg:generic/miniupnpd@.*",
"pkg:generic/minissdpd@.*",
"pkg:generic/erofs-utils@.*",
]


@priority_router.route(*DIR_SUPPORTED_PURLS)
def process_request_dir_listed(purl_str):
# Indexing some generic PURLs requires a GitHub API token.
# Please add your GitHub API key to the `.env` file, for example: `GH_TOKEN=your-github-api`.
@priority_router.route(*GENERIC_FETCHCODE_SUPPORTED_PURLS)
def process_request_fetchcode_generic(purl_str):
"""
Process `priority_resource_uri` containing a generic Package URL (PURL)
supported by fetchcode.
Expand All @@ -161,7 +171,7 @@ def process_request_dir_listed(purl_str):
error = f"error occurred when parsing {purl_str}: {e}"
return error

error_msg = map_directory_listed_package(package_url)
error_msg = map_fetchcode_supported_package(package_url)

if error_msg:
return error_msg
25 changes: 25 additions & 0 deletions minecode/visitors/github.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,11 @@
from github.Download import Download
from packageurl import PackageURL

from minecode import priority_router
from minecode import visit_router, seed
from minecode.visitors import HttpJsonVisitor
from minecode.visitors import URI
from minecode.visitors.generic import map_fetchcode_supported_package


logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -179,3 +181,26 @@ def json_serial_date_obj(obj):
"""JSON serializer for date object"""
if obj and isinstance(obj, (datetime, date)):
return obj.isoformat()


# Indexing GitHub PURLs requires a GitHub API token.
# Please add your GitHub API key to the `.env` file, for example: `GH_TOKEN=your-github-api`.
@priority_router.route('pkg:github/.*')
def process_request_dir_listed(purl_str):
"""
Process `priority_resource_uri` containing a GitHub Package URL (PURL).
This involves obtaining Package information for the PURL using
https://github.com/nexB/fetchcode and using it to create a new
PackageDB entry. The package is then added to the scan queue afterwards.
"""
try:
package_url = PackageURL.from_string(purl_str)
except ValueError as e:
error = f"error occurred when parsing {purl_str}: {e}"
return error

error_msg = map_fetchcode_supported_package(package_url)

if error_msg:
return error_msg
4 changes: 2 additions & 2 deletions minecode/visitors/gnu.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from packageurl import PackageURL

from minecode import priority_router
from minecode.visitors.generic import map_directory_listed_package
from minecode.visitors.generic import map_fetchcode_supported_package

logger = logging.getLogger(__name__)
handler = logging.StreamHandler()
Expand All @@ -35,7 +35,7 @@ def process_request(purl_str):
if not package_url.version:
return

error_msg = map_directory_listed_package(package_url)
error_msg = map_fetchcode_supported_package(package_url)

if error_msg:
return error_msg
25 changes: 25 additions & 0 deletions minecode/visitors/openssl.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,13 @@
from commoncode import fileutils
from packageurl import PackageURL

from minecode import priority_router
from minecode import seed
from minecode import visit_router
from minecode.utils import is_int
from minecode.visitors import HttpVisitor
from minecode.visitors import URI
from minecode.visitors.generic import map_fetchcode_supported_package


class OpenSSLSeed(seed.Seeder):
Expand Down Expand Up @@ -88,3 +90,26 @@ def get_uris(self, content):
yield URI(uri=url, source_uri=self.uri, package_url=package_url, date=date, file_name=file_name, size=size)
else:
yield URI(uri=url, source_uri=self.uri, date=date, size=size)

# Indexing OpenSSL PURLs requires a GitHub API token.
# Please add your GitHub API key to the `.env` file, for example: `GH_TOKEN=your-github-api`.
@priority_router.route('pkg:openssl/openssl@.*')
def process_request_dir_listed(purl_str):
"""
Process `priority_resource_uri` containing a OpenSSL Package URL (PURL)
supported by fetchcode.
This involves obtaining Package information for the PURL using
https://github.com/nexB/fetchcode and using it to create a new
PackageDB entry. The package is then added to the scan queue afterwards.
"""
try:
package_url = PackageURL.from_string(purl_str)
except ValueError as e:
error = f"error occurred when parsing {purl_str}: {e}"
return error

error_msg = map_fetchcode_supported_package(package_url)

if error_msg:
return error_msg
2 changes: 1 addition & 1 deletion packagedb/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -726,7 +726,7 @@ def _reindex_package(package, reindexed_packages):
reindexed_packages = []
requeued_packages = []

supported_ecosystems = ['maven', 'npm', 'deb']
supported_ecosystems = ['maven', 'npm', 'deb', 'generic', 'gnu', 'openssl', 'github', 'conan']

unique_packages, unsupported_packages, unsupported_vers = get_resolved_packages(packages, supported_ecosystems)

Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ extractcode==31.0.0
extractcode-7z==16.5.210531
extractcode-libarchive==3.5.1.210531
fasteners==0.19
fetchcode==0.4.0
fetchcode==0.5.1
fetchcode-container==1.2.3.210512
fingerprints==1.2.3
fontawesomefree==6.5.1
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ install_requires =
djangorestframework == 3.15.0
django-filter == 24.1
drf-spectacular == 0.26.5
fetchcode == 0.4.0
fetchcode == 0.5.1
gunicorn == 21.2.0
ftputil == 5.0.4
jawa == 2.2.0
Expand Down

0 comments on commit 5f6316c

Please sign in to comment.