Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(license): stop spliting a long license text #7336

Merged
merged 18 commits into from
Sep 5, 2024
Merged
2 changes: 1 addition & 1 deletion pkg/dependency/parser/python/packaging/parse.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ func (p *Parser) Parse(r xio.ReadSeekerAt) ([]ftypes.Package, []ftypes.Dependenc
}

if license == "" && h.Get("License-File") != "" {
license = "file://" + h.Get("License-File")
license = licensing.LicenseFilePrefix + h.Get("License-File")
}

return []ftypes.Package{
Expand Down
4 changes: 2 additions & 2 deletions pkg/fanal/analyzer/language/python/packaging/packaging.go
Original file line number Diff line number Diff line change
Expand Up @@ -122,11 +122,11 @@ func (a packagingAnalyzer) fillAdditionalData(fsys fs.FS, app *types.Application
// Parser adds `file://` prefix to filepath from `License-File` field
// We need to read this file to find licenses
// Otherwise, this is the name of the license
if !strings.HasPrefix(lic, "file://") {
if !strings.HasPrefix(lic, licensing.LicenseFilePrefix) {
licenses = append(licenses, lic)
continue
}
licenseFilePath := path.Base(strings.TrimPrefix(lic, "file://"))
licenseFilePath := path.Base(strings.TrimPrefix(lic, licensing.LicenseFilePrefix))

findings, err := classifyLicense(app.FilePath, licenseFilePath, a.licenseClassifierConfidenceLevel, fsys)
if err != nil {
Expand Down
45 changes: 45 additions & 0 deletions pkg/licensing/normalize.go
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,12 @@ var mapping = map[string]string{
"PUBLIC DOMAIN": Unlicense,
}

const (
LicenseTextPrefix = "text://"
LicenseFilePrefix = "file://"
CustomLicensePrefix = "CUSTOM License"
)

// pythonLicenseExceptions contains licenses that we cannot separate correctly using our logic.
// first word after separator (or/and) => license name
var pythonLicenseExceptions = map[string]string{
Expand All @@ -179,6 +185,39 @@ var pythonLicenseExceptions = map[string]string{

var licenseSplitRegexp = regexp.MustCompile("(,?[_ ]+(?:or|and)[_ ]+)|(,[ ]*)")

// Typical keywords for license texts
var licenseTextKeywords = []string{
"http://",
"https://",
"(c)",
"as-is",
";",
"hereby",
"permission to use",
"permission is",
"use in source",
"use, copy, modify",
"using",
}

func isLicenseText(str string) bool {
for _, keyword := range licenseTextKeywords {
if strings.Contains(str, keyword) {
return true
}
}
return false
}

func TrimLicenseText(text string) string {
s := strings.Split(text, " ")
n := len(s)
if n > 3 {
n = 3
}
return strings.Join(s[:n], " ") + "..."
}

func Normalize(name string) string {
name = strings.TrimSpace(name)
if l, ok := mapping[strings.ToUpper(name)]; ok {
Expand All @@ -191,6 +230,12 @@ func SplitLicenses(str string) []string {
if str == "" {
return nil
}
if isLicenseText(strings.ToLower(str)) {
return []string{
LicenseTextPrefix + str,
}
}

var licenses []string
for _, maybeLic := range licenseSplitRegexp.Split(str, -1) {
lower := strings.ToLower(maybeLic)
Expand Down
7 changes: 7 additions & 0 deletions pkg/licensing/normalize_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,13 @@ func TestSplitLicenses(t *testing.T) {
"Historical Permission Notice and Disclaimer (HPND)",
},
},
{
name: "License text",
license: "* Permission to use this software in any way is granted without",
licenses: []string{
"text://* Permission to use this software in any way is granted without",
},
},
}

for _, tt := range tests {
Expand Down
2 changes: 2 additions & 0 deletions pkg/rpc/convert.go
Original file line number Diff line number Diff line change
Expand Up @@ -436,6 +436,7 @@ func ConvertFromRPCDetectedLicenses(rpcLicenses []*common.DetectedLicense) []typ
PkgName: l.PkgName,
FilePath: l.FilePath,
Name: l.Name,
Text: l.Text,
Confidence: float64(l.Confidence),
Link: l.Link,
})
Expand Down Expand Up @@ -983,6 +984,7 @@ func ConvertToRPCLicenses(licenses []types.DetectedLicense) []*common.DetectedLi
PkgName: l.PkgName,
FilePath: l.FilePath,
Name: l.Name,
Text: l.Text,
Confidence: float32(l.Confidence),
Link: l.Link,
})
Expand Down
4 changes: 4 additions & 0 deletions pkg/rpc/convert_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -760,6 +760,7 @@ func TestConvertFromRPCLicenses(t *testing.T) {
PkgName: "alpine-baselayout",
FilePath: "some-path",
Name: "GPL-2.0",
Text: "text://* Permission to use this software in any way is granted without",
Confidence: 1,
Link: "https://some-link",
},
Expand All @@ -771,6 +772,7 @@ func TestConvertFromRPCLicenses(t *testing.T) {
PkgName: "alpine-baselayout",
FilePath: "some-path",
Name: "GPL-2.0",
Text: "text://* Permission to use this software in any way is granted without",
Confidence: 1,
Link: "https://some-link",
},
Expand Down Expand Up @@ -806,6 +808,7 @@ func TestConvertToRPCLicenses(t *testing.T) {
PkgName: "alpine-baselayout",
FilePath: "some-path",
Name: "GPL-2.0",
Text: "text://* Permission to use this software in any way is granted without",
Confidence: 1,
Link: "https://some-link",
},
Expand All @@ -817,6 +820,7 @@ func TestConvertToRPCLicenses(t *testing.T) {
PkgName: "alpine-baselayout",
FilePath: "some-path",
Name: "GPL-2.0",
Text: "text://* Permission to use this software in any way is granted without",
Confidence: 1,
Link: "https://some-link",
},
Expand Down
48 changes: 29 additions & 19 deletions pkg/scanner/local/scan.go
Original file line number Diff line number Diff line change
Expand Up @@ -261,14 +261,7 @@ func (s Scanner) scanLicenses(target types.ScanTarget, options types.ScanOptions
var osPkgLicenses []types.DetectedLicense
for _, pkg := range target.Packages {
for _, license := range pkg.Licenses {
category, severity := scanner.Scan(license)
osPkgLicenses = append(osPkgLicenses, types.DetectedLicense{
Severity: severity,
Category: category,
PkgName: pkg.Name,
Name: license,
Confidence: 1.0,
})
osPkgLicenses = append(osPkgLicenses, toDetectedLicense(scanner, license, pkg.Name, ""))
}
}
results = append(results, types.Result{
Expand All @@ -282,17 +275,11 @@ func (s Scanner) scanLicenses(target types.ScanTarget, options types.ScanOptions
var langLicenses []types.DetectedLicense
for _, lib := range app.Packages {
for _, license := range lib.Licenses {
category, severity := scanner.Scan(license)
langLicenses = append(langLicenses, types.DetectedLicense{
Severity: severity,
Category: category,
PkgName: lib.Name,
Name: license,
// Lock files use app.FilePath - https://github.com/aquasecurity/trivy/blob/6ccc0a554b07b05fd049f882a1825a0e1e0aabe1/pkg/fanal/types/artifact.go#L245-L246
// Applications use lib.FilePath - https://github.com/aquasecurity/trivy/blob/6ccc0a554b07b05fd049f882a1825a0e1e0aabe1/pkg/fanal/types/artifact.go#L93-L94
FilePath: lo.Ternary(lib.FilePath != "", lib.FilePath, app.FilePath),
Confidence: 1.0,
})
// Lock files use app.FilePath - https://github.com/aquasecurity/trivy/blob/6ccc0a554b07b05fd049f882a1825a0e1e0aabe1/pkg/fanal/types/artifact.go#L245-L246
// Applications use lib.FilePath - https://github.com/aquasecurity/trivy/blob/6ccc0a554b07b05fd049f882a1825a0e1e0aabe1/pkg/fanal/types/artifact.go#L93-L94
filePath := lo.Ternary(lib.FilePath != "", lib.FilePath, app.FilePath)

langLicenses = append(langLicenses, toDetectedLicense(scanner, license, lib.Name, filePath))
}
}

Expand Down Expand Up @@ -390,6 +377,29 @@ func toDetectedMisconfiguration(res ftypes.MisconfResult, defaultSeverity dbType
}
}

func toDetectedLicense(scanner licensing.Scanner, license, pkgName, filePath string) types.DetectedLicense {
var category ftypes.LicenseCategory
var severity, licenseText string
if strings.HasPrefix(license, licensing.LicenseTextPrefix) { // License text
licenseText = strings.TrimPrefix(license, licensing.LicenseTextPrefix)
category = ftypes.CategoryUnknown
severity = dbTypes.SeverityUnknown.String()
license = licensing.CustomLicensePrefix + ": " + licensing.TrimLicenseText(licenseText)
} else { // License name
category, severity = scanner.Scan(license)
}

return types.DetectedLicense{
Severity: severity,
Category: category,
PkgName: pkgName,
FilePath: filePath,
Name: license,
Text: licenseText,
Confidence: 1.0,
}
}

func ShouldScanMisconfigOrRbac(scanners types.Scanners) bool {
return scanners.AnyEnabled(types.MisconfigScanner, types.RBACScanner)
}
Expand Down
38 changes: 38 additions & 0 deletions pkg/scanner/local/scan_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,25 @@ var (
},
Licenses: []string{"MIT"},
}
python39min = ftypes.Package{
Name: "python3.9-minimal",
Version: "3.9.1",
FilePath: "/usr/lib/python/site-packages/python3.9-minimal/METADATA",
Layer: ftypes.Layer{
DiffID: "sha256:0ea33a93585cf1917ba522b2304634c3073654062d5282c1346322967790ef33",
},
Licenses: []string{"text://Redistribution and use in source and binary forms, with or without"},
}
menuinstPkg = ftypes.Package{
Name: "menuinst",
Version: "2.0.2",
FilePath: "opt/conda/lib/python3.11/site-packages/menuinst-2.0.2.dist-info/METADATA",
Layer: ftypes.Layer{
DiffID: "sha256:0ea33a93585cf1917ba522b2304634c3073654062d5282c1346322967790ef33",
},
Licenses: []string{"text://(c) 2016 Continuum Analytics, Inc. / http://continuum.io All Rights Reserved"},
}

laravelPkg = ftypes.Package{
Name: "laravel/framework",
Version: "6.0.0",
Expand Down Expand Up @@ -225,6 +244,7 @@ func TestScanner_Scan(t *testing.T) {
},
Packages: []ftypes.Package{
muslPkg,
python39min,
},
Applications: []ftypes.Application{
{
Expand All @@ -239,6 +259,7 @@ func TestScanner_Scan(t *testing.T) {
FilePath: "",
Packages: []ftypes.Package{
urllib3Pkg,
menuinstPkg,
},
},
},
Expand All @@ -257,6 +278,14 @@ func TestScanner_Scan(t *testing.T) {
Name: "MIT",
Confidence: 1,
},
{
Severity: "UNKNOWN",
Category: "unknown",
PkgName: python39min.Name,
Name: "CUSTOM License: Redistribution and use...",
Text: "Redistribution and use in source and binary forms, with or without",
Confidence: 1,
},
},
},
{
Expand Down Expand Up @@ -286,6 +315,15 @@ func TestScanner_Scan(t *testing.T) {
Name: "MIT",
Confidence: 1,
},
{
Severity: "UNKNOWN",
Category: "unknown",
PkgName: menuinstPkg.Name,
FilePath: "opt/conda/lib/python3.11/site-packages/menuinst-2.0.2.dist-info/METADATA",
Name: "CUSTOM License: (c) 2016 Continuum...",
Text: "(c) 2016 Continuum Analytics, Inc. / http://continuum.io All Rights Reserved",
Confidence: 1,
},
},
},
{
Expand Down
3 changes: 3 additions & 0 deletions pkg/types/license.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ type DetectedLicense struct {
// Name holds a detected license name
Name string

// Text holds a long license text if Trivy detects a license name as a license text
Text string

// Confidence is level of the match. The confidence level is between 0.0 and 1.0, with 1.0 indicating an
// exact match and 0.0 indicating a complete mismatch
Confidence float64
Expand Down
Loading