Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cache results of maven http calls #644

Merged
merged 2 commits into from
Feb 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion gatsby-node.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ const {
} = require("./src/components/util/pretty-platform")
const { sortableName } = require("./src/components/util/sortable-name")
const { extensionSlug, extensionSlugFromCoordinates } = require("./src/components/util/extension-slugger")
const { generateMavenInfo } = require("./src/maven/maven-info")
const { generateMavenInfo, initMavenCache } = require("./src/maven/maven-info")
const { createRemoteFileNode } = require("gatsby-source-filesystem")
const { rewriteGuideUrl } = require("./src/components/util/guide-url-rewriter")
const ESLintPlugin = require("eslint-webpack-plugin")
Expand Down Expand Up @@ -183,6 +183,11 @@ exports.sourceNodes = async ({
return Promise.all(secondPromises)
}

exports.onPreBootstrap = async () => {
await initMavenCache()
}


exports.createPages = async ({ graphql, actions, reporter }) => {
const { createPage } = actions

Expand Down
2 changes: 1 addition & 1 deletion plugins/github-enricher/gatsby-node.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
const { getCache } = require("gatsby/dist/utils/get-cache")
const { createRemoteFileNode } = require("gatsby-source-filesystem")
const { labelExtractor } = require("./labelExtractor")
const PersistableCache = require("./persistable-cache")
const PersistableCache = require("../../src/persistable-cache")
const {
findSponsor,
clearCaches,
Expand Down Expand Up @@ -487,7 +487,7 @@
: ""

// Tolerate scm urls ending in .git, but don't try and turn them into issues urls without patching
const topLevelIssuesUrl = (scmUrl + "/issues").replace("\.git/issues", "/issues")

Check warning on line 490 in plugins/github-enricher/gatsby-node.js

View workflow job for this annotation

GitHub Actions / build

Unnecessary escape character: \.
const issuesUrl = labels
? encodeUrl(
scmUrl +
Expand Down
2 changes: 1 addition & 1 deletion plugins/github-enricher/sponsorFinder.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
const PersistableCache = require("./persistable-cache")
const PersistableCache = require("../../src/persistable-cache")
const { queryGraphQl, queryRest } = require("./github-helper")

// We store the raw(ish) data in the cache, to avoid repeating the same request multiple times and upsetting the github rate limiter
Expand Down
75 changes: 57 additions & 18 deletions src/maven/maven-info.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,35 @@
const axios = require("axios")
const promiseRetry = require("promise-retry")
const { readPom } = require("./pom-reader")
const PersistableCache = require("../persistable-cache")

const DAY_IN_SECONDS = 60 * 60 * 24

let timestampCache, latestVersionCache

const initMavenCache = async () => {
// If there are problems with the cache, it works well to add something like latestVersionCache.flushAll() on a main-branch build
// (and then remove it next build)

timestampCache = new PersistableCache({
key: "maven-api-for-timestamps",
stdTTL: 5 * DAY_IN_SECONDS
})
latestVersionCache = new PersistableCache({
key: "maven-api-for-latest-versions",
stdTTL: 5 * DAY_IN_SECONDS // the worst that happens if this is out of of date is we do one extra query to read the pom
})

await latestVersionCache.ready()
console.log("Ingested cached maven information for", latestVersionCache.size(), "artifacts.")
await timestampCache.ready()
console.log("Ingested cached timestamp information for", timestampCache.size(), "maven artifacts.")
}

const clearMavenCache = () => {
timestampCache?.flushAll()
latestVersionCache?.flushAll()
}

const getTimestampFromMavenArtifactsListing = async maven => {
const mavenArtifactsUrl = await createMavenArtifactsUrlFromCoordinates(maven)
Expand All @@ -14,7 +43,7 @@
const lastModified = listingHeaders.headers["last-modified"]
return Date.parse(lastModified)
} else {
throw "Artifact url did not exist (probably temporarily)."

Check warning on line 46 in src/maven/maven-info.js

View workflow job for this annotation

GitHub Actions / build

Expected an error object to be thrown
}
}

Expand Down Expand Up @@ -86,6 +115,7 @@

const generateMavenInfo = async artifact => {
const maven = parse(artifact)

const mavenUrl = await createMavenUrlFromCoordinates(maven)

if (mavenUrl) {
Expand All @@ -94,7 +124,7 @@

//

const latestVersion = await tolerantlyGetLatestVersionFromMavenSearch(maven)
const latestVersion = await latestVersionCache.getOrSet(artifact, () => tolerantlyGetLatestVersionFromMavenSearch(maven))
// If the latest version of an artifact is also its current version, there's unlikely to be a relocation on it
if (latestVersion && latestVersion !== maven.version) {
const latestPomUrl = await createMavenArtifactsUrlFromCoordinates({
Expand All @@ -103,11 +133,16 @@
version: latestVersion
})

const response = await axios.get(
latestPomUrl,
{}
const data = await latestVersionCache.getOrSet(latestPomUrl, async () => {
const options = {
retries: 6,
factor: 3,
minTimeout: 4 * 1000,
}
const response = await promiseRetry(async () => axios.get(latestPomUrl, {}), options)
return response.data
}
)
const { data } = response

const processed = await readPom(data)

Expand All @@ -125,20 +160,24 @@

}

let timestamp
// This will be slow because we need to need hit the endpoint too fast and we need to back off; we perhaps should batch, but that's hard to implement with our current model
// We should perhaps also consider a soft-cache locally for when we fail completely
try {
timestamp = await getTimestampFromMavenArtifactsListing(maven)
} catch (e) {
console.log(
"Could not get timestamp from repository folder, querying maven directly."
)
console.log("Error is:", e)
timestamp = tolerantlyGetTimestampFromMavenSearch(maven)
}
let timestamp = await timestampCache.getOrSet(artifact, async () => {
// This will be slow because we need to need hit the endpoint too fast and we need to back off; we perhaps should batch, but that's hard to implement with our current model
let thing
try {
thing = await getTimestampFromMavenArtifactsListing(maven)
} catch (e) {
console.log(
"Could not get timestamp from repository folder, querying maven directly."
)
console.log("Error is:", e)
thing = await tolerantlyGetTimestampFromMavenSearch(maven)
}
return thing
})

maven.timestamp = await timestamp


return maven
}
module.exports = { generateMavenInfo }
module.exports = { generateMavenInfo, clearMavenCache, initMavenCache }
18 changes: 17 additions & 1 deletion src/maven/maven-info.test.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { generateMavenInfo } from "./maven-info"
import { generateMavenInfo, initMavenCache } from "./maven-info"
import { clearCaches } from "../../plugins/github-enricher/sponsorFinder"

jest.mock("./maven-url")
const axios = require("axios")
Expand Down Expand Up @@ -30,6 +31,11 @@ axios.head.mockReturnValue({
describe("the maven information generator", () => {
const artifact = "io.quarkus:quarkus-vertx::jar:3.0.0.Alpha1"

beforeEach(async () => {
clearCaches()
await initMavenCache()
})

describe("when the repo listing is working well", () => {
it("adds a maven url", async () => {
const mavenInfo = await generateMavenInfo(artifact)
Expand All @@ -45,6 +51,16 @@ describe("the maven information generator", () => {
const mavenInfo = await generateMavenInfo(artifact)
expect(mavenInfo.timestamp).toBe(1675955892000)
})

it("uses the cache on subsequent calls", async () => {
const startingCallCount = axios.get.mock.calls.length
// Warm the cache
await generateMavenInfo(artifact)
expect(axios.get.mock.calls.length).toBe(startingCallCount + 1)
// Now go again
await generateMavenInfo(artifact)
expect(axios.get.mock.calls.length).toBe(startingCallCount + 1)
})
})

describe("when the repository listing has errors", () => {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ class PersistableCache {
return this.get(key)
} else {
const answer = await functionThatReturnsAPromise()
console.log("caching ", answer)
this.set(key, answer)
return answer
}
Expand Down
Loading