Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for custom maven repository URLs #164

Merged
merged 2 commits into from
Aug 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions minecode/tests/test_maven.py
Original file line number Diff line number Diff line change
Expand Up @@ -726,6 +726,19 @@ def test_map_maven_package(self):
expected_purl_str = 'pkg:maven/classworlds/[email protected]'
self.assertEqual(expected_purl_str, package.purl)

def test_map_maven_package_custom_repo_url(self):
package_count = packagedb.models.Package.objects.all().count()
self.assertEqual(0, package_count)
custom_repo_purl = "pkg:maven/org.eclipse.core/runtime@20070801?repository_url=https://packages.atlassian.com/mvn/maven-atlassian-external/"
package_url = PackageURL.from_string(custom_repo_purl)
maven_visitor.map_maven_package(package_url, packagedb.models.PackageContentType.BINARY)
package_count = packagedb.models.Package.objects.all().count()
self.assertEqual(1, package_count)
package = packagedb.models.Package.objects.all().first()
expected_repo_url = 'https://packages.atlassian.com/mvn/maven-atlassian-external//org/eclipse/core/runtime/20070801/runtime-20070801.jar'
self.assertEqual(expected_repo_url, package.download_url)


def test_process_request(self):
purl_str = 'pkg:maven/org.apache.twill/[email protected]'
download_url = 'https://repo1.maven.org/maven2/org/apache/twill/twill-core/0.12.0/twill-core-0.12.0.jar'
Expand Down
38 changes: 27 additions & 11 deletions minecode/visitors/maven.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,9 @@
logger.setLevel(logging.DEBUG)


MAVEN_BASE_URL = 'https://repo1.maven.org/maven2'


class GzipFileWithTrailing(gzip.GzipFile):
"""
A subclass of gzip.GzipFile supporting files with trailing garbage. Ignore
Expand Down Expand Up @@ -104,7 +107,7 @@ def get_seeds(self):
# also has a npm mirrors: https://maven-eu.nuxeo.org/nexus/#view-repositories;npmjs~browsestorage


def get_pom_text(namespace, name, version, qualifiers={}):
def get_pom_text(namespace, name, version, qualifiers={}, base_url=MAVEN_BASE_URL):
"""
Return the contents of the POM file of the package described by the purl
field arguments in a string.
Expand All @@ -116,7 +119,8 @@ def get_pom_text(namespace, name, version, qualifiers={}):
namespace=namespace,
name=name,
version=version,
qualifiers=qualifiers
qualifiers=qualifiers,
base_url=base_url,
)
# Get and parse POM info
pom_url = urls['api_data_url']
Expand Down Expand Up @@ -151,7 +155,7 @@ def get_package_sha1(package):
return sha1


def fetch_parent(pom_text):
def fetch_parent(pom_text, base_url=MAVEN_BASE_URL):
"""
Return the parent pom text of `pom_text`, or None if `pom_text` has no parent.
"""
Expand All @@ -171,20 +175,21 @@ def fetch_parent(pom_text):
namespace=parent_namespace,
name=parent_name,
version=parent_version,
qualifiers={}
qualifiers={},
base_url=base_url,
)
return parent_pom_text


def get_ancestry(pom_text):
def get_ancestry(pom_text, base_url=MAVEN_BASE_URL):
"""
Return a list of pom text of the ancestors of `pom`. The list is ordered
from oldest ancestor to newest. The list is empty is there is no parent pom.
"""
ancestors = []
has_parent = True
while has_parent:
parent_pom_text = fetch_parent(pom_text)
parent_pom_text = fetch_parent(pom_text=pom_text, base_url=base_url)
if not parent_pom_text:
has_parent = False
else:
Expand All @@ -193,7 +198,7 @@ def get_ancestry(pom_text):
return reversed(ancestors)


def get_merged_ancestor_package_from_maven_package(package):
def get_merged_ancestor_package_from_maven_package(package, base_url=MAVEN_BASE_URL):
"""
Merge package details of a package with its ancestor pom
and return the merged package.
Expand All @@ -205,6 +210,7 @@ def get_merged_ancestor_package_from_maven_package(package):
namespace=package.namespace,
version=package.version,
qualifiers=package.qualifiers,
base_url=base_url,
)
merged_package = merge_ancestors(
ancestor_pom_texts=get_ancestry(pom_text),
Expand Down Expand Up @@ -279,11 +285,17 @@ def map_maven_package(package_url, package_content):
db_package = None
error = ''

if "repository_url" in package_url.qualifiers:
base_url = package_url.qualifiers["repository_url"]
else:
base_url = MAVEN_BASE_URL

pom_text = get_pom_text(
namespace=package_url.namespace,
name=package_url.name,
version=package_url.version,
qualifiers=package_url.qualifiers
qualifiers=package_url.qualifiers,
base_url=base_url,
)
if not pom_text:
msg = f'Package does not exist on maven: {package_url}'
Expand All @@ -295,18 +307,22 @@ def map_maven_package(package_url, package_content):
'maven_pom',
'maven',
'Java',
text=pom_text
text=pom_text,
base_url=base_url,
)
ancestor_pom_texts = get_ancestry(pom_text)
ancestor_pom_texts = get_ancestry(pom_text=pom_text, base_url=base_url)
package = merge_ancestors(
ancestor_pom_texts=ancestor_pom_texts,
package=package
)


urls = get_urls(
namespace=package_url.namespace,
name=package_url.name,
version=package_url.version,
qualifiers=package_url.qualifiers
qualifiers=package_url.qualifiers,
base_url=base_url,
)
# In the case of looking up a maven package with qualifiers of
# `classifiers=sources`, the purl of the package created from the pom does
Expand Down
Loading