Skip to content

Commit

Permalink
fix: optional python dependencies
Browse files Browse the repository at this point in the history
Optional Python dependencies are being over connected.

When building Python dep-graphs the current algorithm does not consider
whether a transitive should be traversed based on 'extra' definitions.

When a Python dependency uses extras, this means that dependency is
optional and will only be installed when the installer asks.

For example if 'my-package' has the following METADATA snippet:

```
Requires-Dist: logger>=1.0
Provides-Extra: test
Requires-Dist: tester>=0.5.0; extra == 'test'
```

This means 'logger' is always installed, but 'tester' is only installed
if 'my-package' is installed with extra 'test' dependencies, like so:

```
my-package[test]==1.2.3
```

The current algorithm sometimes gets away with this lack of accuracy
because the dependency is skipped if the METADATA file doesn't even
exist. However in many cases what's optional in one transitive line is
not in another and this can mean the METADATA file does exists. This
results in the current algorithm accidentally associating potentially
large sub-graphs to transitive lines that should be terminated when
extras are not being used.

The first change here introduces code that firstly parses out these
extra definitions in both requirements.txt and METADATA files. Then also
parsers that pick out the Provides-Extra and Requires-Dist extra
environment markers.

The second change adapts the pip dep-graph builder to take into account
extra properties to decide whether a dependency should be traversed or
not.
  • Loading branch information
gitphill committed Nov 14, 2024
1 parent 6d31066 commit b71e613
Show file tree
Hide file tree
Showing 47 changed files with 9,277 additions and 14 deletions.
31 changes: 29 additions & 2 deletions lib/analyzer/applications/python/pip.ts
Original file line number Diff line number Diff line change
Expand Up @@ -50,20 +50,47 @@ class PythonDepGraphBuilder {
if (!metadata) {
return;
}
const nodeId = `${metadata.name}@${metadata.version}`;
const extrasId = req.extras?.length ? `:${req.extras}` : "";
const nodeId = `${metadata.name}@${metadata.version}${extrasId}`;
if (!this.visitedMap.has(nodeId)) {
this.visitedMap.add(nodeId);
this.builder.addPkgNode(
{ name: metadata.name, version: metadata.version },
nodeId,
);
for (const dep of metadata.dependencies) {
await this.addDependenciesToDepGraph(nodeId, dep);
if (this.shouldTraverse(req, dep)) {
await this.addDependenciesToDepGraph(nodeId, dep);
}
}
}
this.builder.connectDep(root, nodeId);
}

// test extras and environment markers to determine whether a dependency is optional
// if it is optional only traverse if the requirement asked for those optionals
private shouldTraverse(
req: PythonRequirement,
dep: PythonRequirement,
): boolean {
// always traverse deps with no extra environment markers (they're non-optional)
if (!dep.extraEnvMarkers || dep.extraEnvMarkers.length === 0) {
return true;
}

// determine if dep was required with extras, and those extras match the deps env markers
const intersection = req.extras?.filter((i) =>
dep.extraEnvMarkers?.includes(i),
);

// yes! this is an optional dependency that was asked for
if (intersection && intersection.length > 0) {
return true;
}

return false; // no! stop here we don't want to traverse optional dependencies
}

// find the best match for a dependency in found metadata files
private findMetadata(dep: PythonRequirement): PythonPackage | null {
const nameMatches = this.metadata[dep.name.toLowerCase()];
Expand Down
19 changes: 19 additions & 0 deletions lib/python-parser/common.ts
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,22 @@ export function compareVersions(version1: string, version2: string) {

return compareArrays(v1, v2);
}

/**
* Parse extra names inside extras_list
* there can be multiple extras separated by comma
* see https://peps.python.org/pep-0508
*
* @param extras the contents of an extra list, inside (but not including) the square brackets
* @returns string array of names
*/
export function parseExtraNames(extras = ""): string[] {
const extraNames = new Set<string>();
for (const extra of extras.split(",")) {
const name = extra.trim();
if (name) {
extraNames.add(name);
}
}
return Array.from(extraNames);
}
49 changes: 41 additions & 8 deletions lib/python-parser/metadata-parser.ts
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
import * as semver from "semver";
import { specifierValidRange } from "./common";
import { PythonPackage, PythonRequirement } from "./types";
import { parseExtraNames, specifierValidRange } from "./common";
import { findProvidesExtras } from "./provides-extra";
import type { PythonPackage, PythonRequirement } from "./types";

const PACKAGE_NAME = "Name: ";
const PACKAGE_VERSION = "Version: ";
const PACKAGE_DEPS = "Requires-Dist: ";
const DEP_PARSE_REGEX =
/^(?<name>[\w.-]+)(\s?\(?(?<specifier><|<=|!=|==|>=|>|~=|===)(?<version>[\w.]+)\)?)?/;
/^(?<name>[\w.-]+)((\[(?<extras>.*)\])?)(\s?\(?(?<specifier><|<=|!=|==|>=|>|~=|===)(?<version>[\w.]+)\)?)?/;

export function getPackageInfo(fileContent: string): PythonPackage {
const lines = fileContent.split("\n");
const providesExtras = findProvidesExtras(lines);
let name = "";
let version = "";
const dependencies: PythonRequirement[] = [];
Expand All @@ -24,6 +27,7 @@ export function getPackageInfo(fileContent: string): PythonPackage {
} else if (line.startsWith(PACKAGE_DEPS)) {
const pythonPackage = parseDependency(
line.substring(PACKAGE_DEPS.length),
providesExtras,
);
if (pythonPackage) {
dependencies.push(pythonPackage);
Expand All @@ -38,22 +42,51 @@ export function getPackageInfo(fileContent: string): PythonPackage {
} as PythonPackage;
}

// parse a line containing a dependency package name and (optional) specifier + version
function parseDependency(packageDependency: string): PythonRequirement | null {
// parse a line containing a dependency package name & extras and (optional) specifier + version
export function parseDependency(
packageDependency: string,
providesExtras: string[],
): PythonRequirement | null {
packageDependency = packageDependency.trim();

const parsedDep = DEP_PARSE_REGEX.exec(packageDependency);
if (!parsedDep?.groups) {
return null;
}
const { name, version, specifier } = parsedDep.groups;
const correctedSpecifier = specifierValidRange(specifier, version);
const { name, extras, version, specifier } = parsedDep.groups;

return {
name: name.toLowerCase(),
version,
specifier: correctedSpecifier,
specifier: specifierValidRange(specifier, version),
extras: parseExtraNames(extras),
extraEnvMarkers: parseExtraEnvMarkers(providesExtras, packageDependency),
} as PythonRequirement;
}

// parse extra environment markers located after the quoted marker
// see https://peps.python.org/pep-0508
function parseExtraEnvMarkers(
providesExtras: string[],
requiresDist?: string,
): string[] {
const extraNames = new Set<string>();

// search string after quoted_marker ;
const quotedMarker = requiresDist?.split(";");
if (quotedMarker && quotedMarker.length > 1) {
for (const extra of providesExtras) {
// search for extra env markers for given provides extras
const re = new RegExp(`.*extra.*("|')(?<extra>${extra})("|').*`);
if (re.exec(quotedMarker[1])) {
extraNames.add(extra);
}
}
}

return Array.from(extraNames);
}

function getParseableVersion(versionString: string): string {
const validVersion = semver.coerce(versionString);
if (!validVersion) {
Expand Down
15 changes: 15 additions & 0 deletions lib/python-parser/provides-extra.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
// https://packaging.python.org/en/latest/specifications/core-metadata/#provides-extra-multiple-use
const PROVIDES_EXTRA = "Provides-Extra:";

// given the lines of a METADATA file, find all 'Provides-Extra' names
export function findProvidesExtras(lines: string[]): string[] {
const extras = new Set<string>();
for (const line of lines) {
const l = line.trim();
if (l.startsWith(PROVIDES_EXTRA)) {
const extra = l.substring(PROVIDES_EXTRA.length).trim();
extras.add(extra);
}
}
return Array.from(extras);
}
7 changes: 4 additions & 3 deletions lib/python-parser/requirements-parser.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import { specifierValidRange } from "./common";
import { parseExtraNames, specifierValidRange } from "./common";
import { PythonRequirement } from "./types";

// This looks like a crazy regex, but it's long because of the named capture groups
// which make the result easier to read. It essentially breaks each line into name,
// specifier and version, where only the name is mandatory
const VERSION_PARSE_REGEX =
/^(?<name>[\w.-]+)((?<specifier><|<=|!=|==|>=|>|~=|===)(?<version>[\w.]*))?/;
/^(?<name>[\w.-]+)((\[(?<extras>.*)\])?)((?<specifier><|<=|!=|==|>=|>|~=|===)(?<version>[\w.]*))?/;

export function getRequirements(fileContent: string): PythonRequirement[] {
const lines = fileContent.split("\n");
Expand All @@ -22,11 +22,12 @@ function parseLine(line: string): PythonRequirement | null {
if (!parsedLine?.groups) {
return null;
}
const { name, specifier, version } = parsedLine.groups;
const { name, extras, specifier, version } = parsedLine.groups;
const correctedSpecifier = specifierValidRange(specifier, version);
return {
name: name.toLowerCase(),
specifier: correctedSpecifier,
version,
extras: parseExtraNames(extras),
} as PythonRequirement;
}
2 changes: 2 additions & 0 deletions lib/python-parser/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ export interface PythonRequirement {
name: string;
version?: string;
specifier?: string;
extras?: string[];
extraEnvMarkers?: string[];
}

export interface PythonPackage {
Expand Down
Loading

0 comments on commit b71e613

Please sign in to comment.