Skip to content

Commit

Permalink
fix: Continue parsing Python RECORD files when bad lines encountered (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
kzantow authored Oct 29, 2022
1 parent dd89461 commit c489c37
Show file tree
Hide file tree
Showing 6 changed files with 95 additions and 13 deletions.
5 changes: 1 addition & 4 deletions syft/pkg/cataloger/python/package_cataloger.go
Original file line number Diff line number Diff line change
Expand Up @@ -143,10 +143,7 @@ func (c *PackageCataloger) fetchRecordFiles(resolver source.FileResolver, metada
defer internal.CloseAndLogError(recordContents, recordPath)

// parse the record contents
records, err := parseWheelOrEggRecord(recordContents)
if err != nil {
return nil, nil, err
}
records := parseWheelOrEggRecord(recordContents)

files = append(files, records...)
}
Expand Down
33 changes: 32 additions & 1 deletion syft/pkg/cataloger/python/package_cataloger_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,37 @@ func TestPythonPackageWheelCataloger(t *testing.T) {
},
},
},
{
name: "malformed-record",
fixtures: []string{
"test-fixtures/malformed-record/dist-info/METADATA",
"test-fixtures/malformed-record/dist-info/RECORD",
},
expectedPackage: pkg.Package{
Name: "Pygments",
Version: "2.6.1",
Type: pkg.PythonPkg,
Language: pkg.Python,
Licenses: []string{"BSD License"},
FoundBy: "python-package-cataloger",
MetadataType: pkg.PythonPackageMetadataType,
Metadata: pkg.PythonPackageMetadata{
Name: "Pygments",
Version: "2.6.1",
License: "BSD License",
Platform: "any",
Author: "Georg Brandl",
AuthorEmail: "[email protected]",
SitePackagesRootPath: "test-fixtures/malformed-record",
Files: []pkg.PythonFileRecord{
{Path: "flask/json/tag.py", Digest: &pkg.PythonFileDigest{"sha256", "9ehzrmt5k7hxf7ZEK0NOs3swvQyU9fWNe-pnYe69N60"}, Size: "8223"},
{Path: "../../Scripts/flask.exe", Digest: &pkg.PythonFileDigest{"sha256", "mPrbVeZCDX20himZ_bRai1nCs_tgr7jHIOGZlcgn-T4"}, Size: "93063"},
{Path: "../../Scripts/flask.exe", Size: "89470", Digest: &pkg.PythonFileDigest{"sha256", "jvqh4N3qOqXLlq40i6ZOLCY9tAOwfwdzIpLDYhRjoqQ"}},
{Path: "Flask-1.0.2.dist-info/INSTALLER", Size: "4", Digest: &pkg.PythonFileDigest{"sha256", "zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg"}},
},
},
},
},
{
// in cases where the metadata file is available and the record is not we should still record there is a package
// additionally empty top_level.txt files should not result in an error
Expand Down Expand Up @@ -171,7 +202,7 @@ func TestPythonPackageWheelCataloger(t *testing.T) {
t.Fatalf("unexpected number of packages: %d", len(actual))
}

for _, d := range deep.Equal(actual[0], test.expectedPackage) {
for _, d := range deep.Equal(test.expectedPackage, actual[0]) {
t.Errorf("diff: %+v", d)
}
})
Expand Down
10 changes: 6 additions & 4 deletions syft/pkg/cataloger/python/parse_wheel_egg_record.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ import (

// parseWheelOrEggMetadata takes a Python Egg or Wheel (which share the same format and values for our purposes),
// returning all Python packages listed.
func parseWheelOrEggRecord(reader io.Reader) ([]pkg.PythonFileRecord, error) {
func parseWheelOrEggRecord(reader io.Reader) []pkg.PythonFileRecord {
var records []pkg.PythonFileRecord
r := csv.NewReader(reader)

Expand All @@ -24,11 +24,13 @@ func parseWheelOrEggRecord(reader io.Reader) ([]pkg.PythonFileRecord, error) {
break
}
if err != nil {
return nil, fmt.Errorf("unable to read python record file: %w", err)
log.Warnf("unable to read python record file: %w", err)
continue
}

if len(recordList) != 3 {
return nil, fmt.Errorf("python record an unexpected length=%d: %q", len(recordList), recordList)
log.Warnf("python record an unexpected length=%d: %q", len(recordList), recordList)
continue
}

var record pkg.PythonFileRecord
Expand Down Expand Up @@ -59,7 +61,7 @@ func parseWheelOrEggRecord(reader io.Reader) ([]pkg.PythonFileRecord, error) {
records = append(records, record)
}

return records, nil
return records
}

func parseInstalledFiles(reader io.Reader, location, sitePackagesRootPath string) ([]pkg.PythonFileRecord, error) {
Expand Down
5 changes: 1 addition & 4 deletions syft/pkg/cataloger/python/parse_wheel_egg_record_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,7 @@ func TestParseWheelEggRecord(t *testing.T) {
t.Fatalf("failed to open fixture: %+v", err)
}

actual, err := parseWheelOrEggRecord(fixture)
if err != nil {
t.Fatalf("failed to parse: %+v", err)
}
actual := parseWheelOrEggRecord(fixture)

for _, d := range deep.Equal(actual, test.ExpectedMetadata) {
t.Errorf("diff: %+v", d)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
Metadata-Version: 2.1
Name: Pygments
Version: 2.6.1
Summary: Pygments is a syntax highlighting package written in Python.
Home-page: https://pygments.org/
Author: Georg Brandl
Author-email: [email protected]
License: BSD License
Keywords: syntax highlighting
Platform: any
Classifier: License :: OSI Approved :: BSD License
Classifier: Intended Audience :: Developers
Classifier: Intended Audience :: End Users/Desktop
Classifier: Intended Audience :: System Administrators
Classifier: Development Status :: 6 - Mature
Classifier: Programming Language :: Python
Classifier: Programming Language :: Python :: 3
Classifier: Programming Language :: Python :: 3.5
Classifier: Programming Language :: Python :: 3.6
Classifier: Programming Language :: Python :: 3.7
Classifier: Programming Language :: Python :: 3.8
Classifier: Programming Language :: Python :: Implementation :: CPython
Classifier: Programming Language :: Python :: Implementation :: PyPy
Classifier: Operating System :: OS Independent
Classifier: Topic :: Text Processing :: Filters
Classifier: Topic :: Utilities
Requires-Python: >=3.5


Pygments
~~~~~~~~

Pygments is a syntax highlighting package written in Python.

It is a generic syntax highlighter suitable for use in code hosting, forums,
wikis or other applications that need to prettify source code. Highlights
are:

* a wide range of over 500 languages and other text formats is supported
* special attention is paid to details, increasing quality by a fair amount
* support for new languages and formats are added easily
* a number of output formats, presently HTML, LaTeX, RTF, SVG, all image formats that PIL supports and ANSI sequences
* it is usable as a command-line tool and as a library

:copyright: Copyright 2006-2019 by the Pygments team, see AUTHORS.
:license: BSD, see LICENSE for details.

Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
flask/json/tag.py,sha256=9ehzrmt5k7hxf7ZEK0NOs3swvQyU9fWNe-pnYe69N60,8223
<<<<<<< HEAD
../../Scripts/flask.exe,sha256=mPrbVeZCDX20himZ_bRai1nCs_tgr7jHIOGZlcgn-T4,93063
=======
../../Scripts/flask.exe,sha256=jvqh4N3qOqXLlq40i6ZOLCY9tAOwfwdzIpLDYhRjoqQ,89470
>>>>>>> 69c24e18ea630ec869a32ad7b0b0d9647714cd5d
Flask-1.0.2.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
app/venv/Lib/site-packages/Flask-1.0.2.dist-info/WHEEL0100777000000000000000000000015613752730745020435 0ustar0000000000000000Wheel-Version: 1.0

0 comments on commit c489c37

Please sign in to comment.