Skip to content

Commit

Permalink
🐛 Fix regex for matching RPM packages with unorthodox vendor name (#4726
Browse files Browse the repository at this point in the history
)

* Modify regex pattern for SuSE vendor name.

Signed-off-by: Vasil Sirakov <[email protected]>

* Trim out the HTML tag altogether and update the unit tests.

Signed-off-by: Vasil Sirakov <[email protected]>

* Fix unit test name.

Signed-off-by: Vasil Sirakov <[email protected]>

* Cleanup vendor name also in static query code path.

Signed-off-by: Vasil Sirakov <[email protected]>

---------

Signed-off-by: Vasil Sirakov <[email protected]>
  • Loading branch information
VasilSirakov authored Oct 29, 2024
1 parent 4e1406a commit d6d1f67
Show file tree
Hide file tree
Showing 2 changed files with 103 additions and 4 deletions.
22 changes: 18 additions & 4 deletions providers/os/resources/packages/rpm_packages.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ const (
RpmPkgFormat = "rpm"
)

var RPM_REGEX = regexp.MustCompile(`^([\w-+]*)\s(\d*|\(none\)):([\w\d-+.:]+)\s([\w\d]*|\(none\))__([\w\d\s,\.]+)__(.*)$`)
var RPM_REGEX = regexp.MustCompile(`^([\w-+]*)\s(\d*|\(none\)):([\w\d-+.:]+)\s([\w\d]*|\(none\))__([\w\d\s,/<>:\.]+)__(.*)$`)

// ParseRpmPackages parses output from:
// rpm -qa --queryformat '%{NAME} %{EPOCHNUM}:%{VERSION}-%{RELEASE} %{ARCH}__%{VENDOR}__%{SUMMARY}\n'
Expand Down Expand Up @@ -58,7 +58,10 @@ func ParseRpmPackages(pf *inventory.Platform, input io.Reader) []Package {
if arch == "(none)" {
arch = ""
}
pkg := newRpmPackage(pf, name, version, arch, epoch, m[5], m[6])

vendor := cleanupVendorName(m[5])

pkg := newRpmPackage(pf, name, version, arch, epoch, vendor, m[6])
pkg.FilesAvailable = PkgFilesAsync // when we use commands we need to fetch the files async
pkgs = append(pkgs, pkg)

Expand Down Expand Up @@ -97,8 +100,19 @@ func newRpmPackage(pf *inventory.Platform, name, version, arch, epoch, vendor, d
}
}

// matches a closed pair of angle brackets with any number of characters inside.
var CLEANUP_VENDOR_REGEX = regexp.MustCompile(`<.*>`)

// it is possible for the vendor name to contain a HTML tag with a website inside, e.g. SUSE.
// we remove it because it is not necessary and later causes troubles for the CPE generation.
// this assumes angle brackets are not used anywhere else in the names of vendors which is already the case.
func cleanupVendorName(vendor string) string {
cleaned := CLEANUP_VENDOR_REGEX.ReplaceAllString(vendor, "")
return strings.TrimRight(cleaned, " ")
}

// RpmPkgManager is the package manager for Redhat, CentOS, Oracle, Photon and Suse
// it support two modes: runtime where the rpm command is available and static analysis for images (e.g. container tar)
// it supports two modes: runtime where the rpm command is available and static analysis for images (e.g. container tar)
// If the RpmPkgManager is used in static mode, it extracts the rpm database from the system and copies it to the local
// filesystem to run a local rpm command to extract the data. The static analysis is always slower than using the running
// one since more data need to copied. Therefore the runtime check should be preferred over the static analysis
Expand Down Expand Up @@ -278,7 +292,7 @@ func (rpm *RpmPkgManager) staticList() ([]Package, error) {
version = version + "-" + pkg.Release
}

rpmPkg := newRpmPackage(rpm.platform, pkg.Name, version, pkg.Arch, epoch, pkg.Vendor, pkg.Summary)
rpmPkg := newRpmPackage(rpm.platform, pkg.Name, version, pkg.Arch, epoch, cleanupVendorName(pkg.Vendor), pkg.Summary)

// determine all files attached
records := []FileRecord{}
Expand Down
85 changes: 85 additions & 0 deletions providers/os/resources/packages/rpm_packages_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -287,3 +287,88 @@ func TestPhoton4ImageParser(t *testing.T) {
}
assert.Equal(t, p, findPkg(m, p.Name), p.Name)
}

// ensure that the tag in the SuSE vendor name is parsed correctly
func TestSuSEParser(t *testing.T) {
epoch := int(0)
pkgList := []*rpmdb.PackageInfo{
{
Name: "grep",
Epoch: &epoch,
Version: "3.1",
Release: "150000.4.6.1",
Arch: "x86_64",
Vendor: "SUSE LLC <https://www.suse.com/>",
Summary: "Print lines matching a pattern",
},
}

var packageList bytes.Buffer
for _, pkg := range pkgList {
packageList.WriteString(fmt.Sprintf("%s %d:%s-%s %s__%s__%s\n", pkg.Name, pkg.EpochNum(), pkg.Version, pkg.Release, pkg.Arch, pkg.Vendor, pkg.Summary))
}

pf := &inventory.Platform{
Name: "suse",
Version: "15.6",
Arch: "x86_64",
Family: []string{"linux", "unix", "os"},
Labels: map[string]string{
"distro-id": "suse",
},
}

m := ParseRpmPackages(pf, &packageList)
assert.Equal(t, 1, len(m), "detected the right amount of packages")

p := Package{
Name: "grep",
Version: "3.1-150000.4.6.1",
// Note that the tag <https://suse.com/> has been removed.
Vendor: "SUSE LLC",
Arch: "x86_64",
Description: "Print lines matching a pattern",
PUrl: "pkg:rpm/suse/[email protected]?arch=x86_64&distro=suse-15.6",
CPEs: []string{
"cpe:2.3:a:suse_llc:grep:3.1-150000.4.6.1:*:*:*:*:*:x86_64:*",
"cpe:2.3:a:suse_llc:grep:3.1-150000.4:*:*:*:*:*:x86_64:*",
"cpe:2.3:a:suse_llc:grep:3.1:*:*:*:*:*:x86_64:*",
"cpe:2.3:a:suse_llc:grep:3.1-150000.4.6.1:*:*:*:*:*:*:*",
"cpe:2.3:a:suse_llc:grep:3.1-150000.4:*:*:*:*:*:*:*",
"cpe:2.3:a:suse_llc:grep:3.1:*:*:*:*:*:*:*",
},
Format: RpmPkgFormat,
FilesAvailable: PkgFilesAsync,
}
assert.Equal(t, p, m[0], p.Name)
}

func TestVendorNameCleanup(t *testing.T) {
vendorFromRpm := "SUSE LLC"
actual := cleanupVendorName(vendorFromRpm)
require.Equal(t, "SUSE LLC", actual)

vendorFromRpm = "SUSE LLC<https://suse.com/>"
actual = cleanupVendorName(vendorFromRpm)
require.Equal(t, "SUSE LLC", actual)

vendorFromRpm = "SUSE LLC <https://suse.com/>"
actual = cleanupVendorName(vendorFromRpm)
require.Equal(t, "SUSE LLC", actual)

vendorFromRpm = "SUSE LLC <https://suse.com/>"
actual = cleanupVendorName(vendorFromRpm)
require.Equal(t, "SUSE LLC", actual)

vendorFromRpm = "SUSE LLC <abc><def>"
actual = cleanupVendorName(vendorFromRpm)
require.Equal(t, "SUSE LLC", actual)

vendorFromRpm = "SUSE LLC <>"
actual = cleanupVendorName(vendorFromRpm)
require.Equal(t, "SUSE LLC", actual)

vendorFromRpm = "SUSE LLC <<>>"
actual = cleanupVendorName(vendorFromRpm)
require.Equal(t, "SUSE LLC", actual)
}

0 comments on commit d6d1f67

Please sign in to comment.