Skip to content

Commit

Permalink
Support parsing metadata filenames for purls
Browse files Browse the repository at this point in the history
Supports parsing .dsc, copyright and changelog files,
typically present in the debian package/metadata archives
for name and version.

Signed-off-by: Ayan Sinha Mahapatra <[email protected]>
  • Loading branch information
AyanSinhaMahapatra committed Jan 30, 2024
1 parent 240e24a commit 743841c
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 2 deletions.
25 changes: 23 additions & 2 deletions src/debian_inspector/package.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,8 @@ def from_filename(cls, filename):
return cls(
name=name,
version=version,
original_filename=filename)
original_filename=filename
)

def to_dict(self):
data = {}
Expand All @@ -110,16 +111,36 @@ def to_tuple(self):
return tuple(v for v in self.to_dict().values() if v != 'original_filename')


@attrs
class CodeMetadata(CodeArchive):
"""
A .dsc, copyright or changelog file present in the debian
package/metadata archive and contains package information
on the filename or as file contents.
For example in ./changelogs/main/d/diffutils/ there are
files such as:
- diffutils_3.7-5_copyright
- diffutils_3.7-5_changelog
And in .pool/main/b/base-files/ there are files such as:
- base-files_11.1+deb11u8.dsc
"""


def get_nva(filename):
"""
Return a tuple of (name string, Version object, archictecture string or
None) parsed from the `filename` of .deb, .udeb, .orig or .debian archive..
"""
is_known = False
if filename.endswith(('.deb', '.udeb')):
if filename.endswith(('.deb', '.udeb', '.dsc')):
basename, _extension = path.splitext(filename)
is_known = True

elif filename.endswith(('_changelog', '_copyright')):
basename, _, _ = filename.rpartition("_")
is_known = True

elif filename.endswith(('.tar.gz', '.tar.xz', '.tar.bz2', '.tar.lzma')):
# A Format: 3.0 archive.
# Note that we ignore the legacy .diff.gz files for Format: 1.0
Expand Down
18 changes: 18 additions & 0 deletions tests/test_package.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,24 @@ def test_CodeArchive_from_filename(self):
original_filename=fn)
assert debarch == expected

def test_CodeMetadata_from_filename_dsc(self):
fn = 'base-files_11.1+deb11u8.dsc'
debarch = package.CodeMetadata.from_filename(fn)
expected = package.CodeMetadata(
name='base-files',
version=version.Version(epoch=0, upstream='11.1+deb11u8', revision='0'),
original_filename=fn)
assert debarch == expected

def test_CodeMetadata_from_filename_copyright(self):
fn = 'bash_4.1-3+deb6u2_copyright'
debarch = package.CodeMetadata.from_filename(fn)
expected = package.CodeMetadata(
name='bash',
version=version.Version(epoch=0, upstream='4.1', revision='3+deb6u2'),
original_filename=fn)
assert debarch == expected

def test_CodeArchive_from_filename_supports_tar_gz_bz2_and_xz(self):
package.CodeArchive.from_filename('python2.7_2.7.3-0ubuntu3.4.orig.tar.gz')
package.CodeArchive.from_filename('python2.7_2.7.3-0ubuntu3.4.debian.tar.gz')
Expand Down

0 comments on commit 743841c

Please sign in to comment.