diff --git a/lib/modules/datasource/rubygems/index.ts b/lib/modules/datasource/rubygems/index.ts index 2f44fc6ab5965f..670cd75bbe6ea8 100644 --- a/lib/modules/datasource/rubygems/index.ts +++ b/lib/modules/datasource/rubygems/index.ts @@ -1,6 +1,7 @@ import { Marshal } from '@qnighy/marshal'; import type { ZodError } from 'zod'; import { logger } from '../../../logger'; +import { cache } from '../../../util/cache/package/decorator'; import { HttpError } from '../../../util/http'; import { AsyncResult, Result } from '../../../util/result'; import { getQueryString, joinUrlParts, parseUrl } from '../../../util/url'; @@ -46,6 +47,16 @@ export class RubyGemsDatasource extends Datasource { private readonly versionsEndpointCache: VersionsEndpointCache; + @cache({ + namespace: `datasource-${RubyGemsDatasource.id}`, + key: ({ packageName, registryUrl }: GetReleasesConfig) => + // TODO: types (#22198) + `releases:${registryUrl!}:${packageName}`, + cacheable: ({ registryUrl }: GetReleasesConfig) => { + const registryHostname = parseUrl(registryUrl)?.hostname; + return registryHostname === 'rubygems.org'; + }, + }) async getReleases({ packageName, registryUrl, diff --git a/lib/modules/datasource/rubygems/metadata-cache.spec.ts b/lib/modules/datasource/rubygems/metadata-cache.spec.ts index 7b376ac6eb60ca..be203610a91ab5 100644 --- a/lib/modules/datasource/rubygems/metadata-cache.spec.ts +++ b/lib/modules/datasource/rubygems/metadata-cache.spec.ts @@ -8,17 +8,18 @@ jest.mock('../../../util/cache/package'); const packageCache = mocked(_packageCache); describe('modules/datasource/rubygems/metadata-cache', () => { - const cache: Map = new Map(); + const packageCacheMock: Map = new Map(); beforeEach(() => { - cache.clear(); + packageCacheMock.clear(); packageCache.get.mockImplementation( - (ns, key) => Promise.resolve(cache.get(`${ns}::${key}`)) as never + (ns, key) => + Promise.resolve(packageCacheMock.get(`${ns}::${key}`)) as never ); packageCache.set.mockImplementation((ns, key, value) => { - cache.set(`${ns}::${key}`, value); + packageCacheMock.set(`${ns}::${key}`, value); return Promise.resolve() as never; }); }); @@ -64,7 +65,11 @@ describe('modules/datasource/rubygems/metadata-cache', () => { homepage_uri: 'https://example.com', }); - const res = await cache.getRelease('https://rubygems.org', 'foobar', []); + const res = await cache.getRelease('https://rubygems.org', 'foobar', [ + '1.0.0', + '2.0.0', + '3.0.0', + ]); expect(res).toEqual({ changelogUrl: 'https://example.com/changelog', @@ -93,6 +98,110 @@ describe('modules/datasource/rubygems/metadata-cache', () => { }); }); + it('handles inconsistent data between versions and endpoint', async () => { + const cache = new MetadataCache(new Http('test')); + + httpMock + .scope('https://rubygems.org') + .get('/api/v1/versions/foobar.json') + .reply(200, [ + { number: '1.0.0', created_at: '2021-01-01' }, + { number: '2.0.0', created_at: '2022-01-01' }, + { number: '3.0.0', created_at: '2023-01-01' }, + ]) + .get('/api/v1/gems/foobar.json') + .reply(200, { + name: 'foobar', + created_at: '2023-01-01', + changelog_uri: 'https://example.com/changelog', + source_code_uri: 'https://example.com/source', + homepage_uri: 'https://example.com', + }); + + const res = await cache.getRelease('https://rubygems.org', 'foobar', [ + '1.0.0', + '2.0.0', + '3.0.0', + '4.0.0', + ]); + + expect(res).toEqual({ + releases: [ + { version: '1.0.0' }, + { version: '2.0.0' }, + { version: '3.0.0' }, + { version: '4.0.0' }, + ], + }); + }); + + it('handles inconsistent data between cache and endpoint', async () => { + packageCacheMock.set( + 'datasource-rubygems::metadata-cache:https://rubygems.org:foobar', + { + hash: '123', + createdAt: '2021-01-01', + data: { + releases: [ + { version: '1.0.0' }, + { version: '2.0.0' }, + { version: '3.0.0' }, + ], + }, + } + ); + const cache = new MetadataCache(new Http('test')); + + httpMock + .scope('https://rubygems.org') + .get('/api/v1/versions/foobar.json') + .reply(200, [ + { number: '1.0.0', created_at: '2021-01-01' }, + { number: '2.0.0', created_at: '2022-01-01' }, + { number: '3.0.0', created_at: '2023-01-01' }, + ]) + .get('/api/v1/gems/foobar.json') + .reply(200, { + name: 'foobar', + created_at: '2023-01-01', + changelog_uri: 'https://example.com/changelog', + source_code_uri: 'https://example.com/source', + homepage_uri: 'https://example.com', + }); + + const res = await cache.getRelease('https://rubygems.org', 'foobar', [ + '1.0.0', + '2.0.0', + '3.0.0', + '4.0.0', + ]); + + expect(res).toEqual({ + releases: [ + { version: '1.0.0' }, + { version: '2.0.0' }, + { version: '3.0.0' }, + ], + }); + expect(packageCache.set).toHaveBeenCalledWith( + 'datasource-rubygems', + 'metadata-cache:https://rubygems.org:foobar', + { + createdAt: '2021-01-01', + data: { + releases: [ + { version: '1.0.0' }, + { version: '2.0.0' }, + { version: '3.0.0' }, + ], + }, + hash: '123', + isFallback: true, + }, + 24 * 60 + ); + }); + it('returns cached data', async () => { const cache = new MetadataCache(new Http('test')); diff --git a/lib/modules/datasource/rubygems/metadata-cache.ts b/lib/modules/datasource/rubygems/metadata-cache.ts index 41860faf0fb3a6..834b8cdc4cdb92 100644 --- a/lib/modules/datasource/rubygems/metadata-cache.ts +++ b/lib/modules/datasource/rubygems/metadata-cache.ts @@ -1,3 +1,4 @@ +import { logger } from '../../../logger'; import * as packageCache from '../../../util/cache/package'; import { toSha256 } from '../../../util/hash'; import type { Http } from '../../../util/http'; @@ -9,8 +10,26 @@ import { getV1Releases } from './common'; interface CacheRecord { hash: string; data: ReleaseResult; + isFallback?: true; } +function hashVersions(versions: string[]): string { + return toSha256(versions.sort().join(',')); +} + +function hashReleases(releases: ReleaseResult): string { + return hashVersions(releases.releases.map((release) => release.version)); +} + +type CacheNotFoundError = { type: 'cache-not-found' }; +type CacheStaleError = { + type: 'cache-stale'; + cache: CacheRecord; +}; +type CacheInvalidError = { type: 'cache-invalid' }; +type CacheLoadError = CacheNotFoundError | CacheStaleError; +type CacheError = CacheNotFoundError | CacheStaleError | CacheInvalidError; + export class MetadataCache { constructor(private readonly http: Http) {} @@ -21,44 +40,74 @@ export class MetadataCache { ): Promise { const cacheNs = `datasource-rubygems`; const cacheKey = `metadata-cache:${registryUrl}:${packageName}`; - const hash = toSha256(versions.join('')); + const versionsHash = hashVersions(versions); - const loadCache = (): AsyncResult> => - Result.wrapNullable( + const loadCache = (): AsyncResult => + Result.wrapNullable( packageCache.get(cacheNs, cacheKey), - 'cache-not-found' as const + { type: 'cache-not-found' } ).transform((cache) => { - return hash === cache.hash + return versionsHash === cache.hash ? Result.ok(cache.data) - : Result.err('cache-outdated' as const); + : Result.err({ type: 'cache-stale', cache }); }); - const saveCache = async (data: ReleaseResult): Promise => { + const saveCache = async ( + cache: CacheRecord, + ttlMinutes = 100 * 24 * 60, + ttlDelta = 10 * 24 * 60 + ): Promise => { const registryHostname = parseUrl(registryUrl)?.hostname; if (registryHostname === 'rubygems.org') { - const newCache: CacheRecord = { hash, data }; - const ttlMinutes = 100 * 24 * 60; - const ttlRandomDelta = Math.floor(Math.random() * 10 * 24 * 60); - await packageCache.set( - cacheNs, - cacheKey, - newCache, - ttlMinutes + ttlRandomDelta - ); + const ttlRandomDelta = Math.floor(Math.random() * ttlDelta); + const ttl = ttlMinutes + ttlRandomDelta; + await packageCache.set(cacheNs, cacheKey, cache, ttl); } - - return data; }; return await loadCache() - .catch(() => - getV1Releases(this.http, registryUrl, packageName).transform(saveCache) - ) - .catch(() => - Result.ok({ - releases: versions.map((version) => ({ version })), - }) + .catch((err) => + getV1Releases(this.http, registryUrl, packageName).transform( + async ( + data: ReleaseResult + ): Promise> => { + const dataHash = hashReleases(data); + if (dataHash === versionsHash) { + await saveCache({ + hash: dataHash, + data, + }); + return Result.ok(data); + } + + /** + * Return stale cache for 24 hours, + * if metadata is inconsistent with versions list. + */ + if (err.type === 'cache-stale') { + const staleCache = err.cache; + if (!staleCache.isFallback) { + await saveCache( + { ...staleCache, isFallback: true }, + 24 * 60, + 0 + ); + } + return Result.ok(staleCache.data); + } + + return Result.err({ type: 'cache-invalid' }); + } + ) ) + .catch((err) => { + logger.debug( + { err }, + 'Rubygems: error fetching rubygems data, falling back to versions-only result' + ); + const releases = versions.map((version) => ({ version })); + return Result.ok({ releases } as ReleaseResult); + }) .unwrapOrThrow(); } } diff --git a/lib/modules/datasource/rubygems/readme.md b/lib/modules/datasource/rubygems/readme.md new file mode 100644 index 00000000000000..a9078b5563dc05 --- /dev/null +++ b/lib/modules/datasource/rubygems/readme.md @@ -0,0 +1,66 @@ +# Rubygems datasource + +Datasource query order depends on the registry. + +## Querying `rubygems.org` + +Rubygems rate limits are easy to hit, so we need to be careful with the queries. +This is implemented with two-level cache: + +- First, we query `https://rubygems.org/versions` endpoint for current versions for all packages. + + Either full or delta sync is performed, depending on the cache state. + + All the data of this layer is stored in-memory as the mapping `packageName -> version[]`. + + ```mermaid + stateDiagram-v2 + [*] --> Empty + + state "Empty" as Empty + Empty --> FullSync: getPkgReleases() + + state "Synced" as Synced + Synced --> DeltaSync + + state "Unsupported" as Unsupported + Unsupported --> [*] + + state "Full sync" as FullSync : GET /versions (~20Mb) + state full_sync_result <> + FullSync --> full_sync_result: Response + full_sync_result --> Synced: (1) Status 200 + full_sync_result --> Unsupported: (2) Status 404 + full_sync_result --> Empty: (3) Status other than 200 or 404\n Clear cache and throw ExternalHostError + + state "Delta sync" as DeltaSync: GET /versions with "Range" header + state delta_sync_result <> + DeltaSync --> delta_sync_result: Successful response + delta_sync_result --> Synced: (1) Status other than 206\nFull data is received, extract and replace old cache\n (as if it is the full sync) + delta_sync_result --> FullSync: (2) The head of response doesn't match\n the tail of the previously fetched data + delta_sync_result --> Synced: (3) The head of response matches\n the tail of the previously fetched data + + state delta_sync_error <> + DeltaSync --> delta_sync_error: Error response + delta_sync_error --> FullSync: (1) Status 416 should not happen\nbut moves to full sync + delta_sync_error --> Unsupported: (2) Status 404 + delta_sync_error --> Empty: (3) Status other than 404 or 416 + ``` + +- Then, more data is obtained from `https://rubygems.org/api/v1/versions/.json` and `https://rubygems.org/api/v1/gems/.json`. + + From the previous layer, the cache key is formed from the `packageName`, and the list of versions is additionally hashed and stored to ensure consistency, so that we reach these API endpoints only when the key has expired or when the list of versions has changed. + + The data for this cache layer is being persisted in the longer-term package cache. + +## Querying `rubygems.pkg.github.com` or `gitlab.com` + +These particular registries are queried using obsolete API + +- `/api/v1/dependencies` + +## Other registries + +- Fetch from `/api/v1/versions/.json` +- Fallback to `/info/`, if above fails +- Fallback to the obsolete `/api/v1/dependencies`, if above fails