Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Drop entries from the refcache that have status 4XX #3506

Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Drop entries from the refcache that have status 4XX
chalin committed Nov 6, 2023

Verified

This commit was signed with the committer’s verified signature.
strider Gaël Chamoulaud
commit 2013a902c5cbfe1172feb189f148f61f6feffe0a
92 changes: 72 additions & 20 deletions gulp-src/prune.js
Original file line number Diff line number Diff line change
@@ -12,7 +12,7 @@ Prune entries from ${refcacheFile} file that meet one of following conditions:
- Status 4XX, unless the --keep-4xx option is specified
- The oldest entries, optionally before the date specified by --before <date>

Use --num <n> to limit the number of pruned entries.
Use --num <n> to limit the number of entries pruned by date.
`;

// The refcacheFile is a JSON map with each map entry of the form, e.g.:
@@ -43,54 +43,79 @@ async function pruneTask() {
'Keep all refcache entries with StatusCode in the 400 range. Default is to prune them regardless of the last seen date.',
default: false,
},
list: {
type: 'boolean',
description: 'List the <num> + 1 oldest entries. No entries are pruned.',
},
}).argv;

const n = argv.num > 0 ? argv.num : n_default;
const beforeDate = argv.before ? new Date(argv.before) : null;
const beforeDate = argv.before
? new Date(argv.before)
: new Date('9999-12-31');
const prune4xx = !argv['keep-4xx'];
const list = argv['list'];

if (argv.info) {
// Info about options was already displayed by yargs.help().
console.log(info);
return;
}

// Deletes (prunes) 4XX entries from `entries`.
// Returns the number of entries deleted.
function prune4xxEntriesAndReturnCount(entries) {
const entriesWith4xxStatus = Object.keys(entries)
.map((url) => [url, entries[url].LastSeen, entries[url].StatusCode])
.filter(
([url, date, statusCode]) => 400 <= statusCode && statusCode <= 499,
);

if (prune4xx && entriesWith4xxStatus.length > 0) {
console.log(
`INFO: pruning ${entriesWith4xxStatus.length} entries with 4XX status.`,
);
const keysToPrune = entriesWith4xxStatus.map((item) => item[0]);
keysToPrune.forEach((key) => delete entries[key]);
}
return entriesWith4xxStatus.length;
}

try {
const json = await fs.readFile(refcacheFile, 'utf8');
const entries = JSON.parse(json);

// Create array of entries of prune candidates only, sorted by LastSeen:
const sortedEntriesOfPruneCandidates = Object.keys(entries)
if (list) {
listOldest(entries, n + 1);
return;
}

const numEntriesWith4xxStatus = prune4xxEntriesAndReturnCount(entries);

// Create array of entries of prune candidates by date, sorted by LastSeen:
const pruneCandidatesByDate__sorted = Object.keys(entries)
.map((url) => [url, entries[url].LastSeen, entries[url].StatusCode])
.filter(
(
[url, date, statusCode], // True for prune candidates:
) =>
// Include entry if pruning 4xx and status code is in 4xx
(prune4xx && 400 <= statusCode && statusCode <= 499) ||
// Or if it is before the given date
(beforeDate ? new Date(date) < beforeDate : true),
)
.filter(([url, date, statusCode]) => new Date(date) < beforeDate)
.sort((a, b) => new Date(a[1]) - new Date(b[1]));

if (sortedEntriesOfPruneCandidates.length === 0) {
console.log('INFO: no entries to prune under given options.');
if (pruneCandidatesByDate__sorted.length === 0) {
console.log('INFO: no entries to prune for given date.');
return;
} else {
console.log(
`INFO: ${sortedEntriesOfPruneCandidates.length} entries as prune candidates under given options.`,
`INFO: ${pruneCandidatesByDate__sorted.length} entries as prune candidates for given date.`,
);
}

if (!n) {
if (n == 0) {
console.log(
`WARN: num is ${n} so nothing will be pruned. Specify number of entries to prune as --num <n>.`,
`WARN: num is ${n} so no entries will be pruned by date. Specify number of entries to prune as --num <n>.`,
);
return;
if (numEntriesWith4xxStatus == 0) return;
}

// Get keys of at most n entries to prune
const keysToPrune = sortedEntriesOfPruneCandidates
const keysToPrune = pruneCandidatesByDate__sorted
.slice(0, n)
.map((item) => item[0]);
keysToPrune.forEach((key) => delete entries[key]);
@@ -103,6 +128,33 @@ async function pruneTask() {
}
}

function listOldest(entries, numberOfEntries) {
const entriesArray = Object.keys(entries)
.map((url) => [url, entries[url].LastSeen, entries[url].StatusCode])
.sort((a, b) => new Date(a[1]) - new Date(b[1]));
const oldestEntries = entriesArray.slice(0, numberOfEntries);

if (oldestEntries.length > 0)
console.log(`Listing oldest ${numberOfEntries} entries:`);

oldestEntries.forEach((e) => {
const date = new Date(e[1]);
const formattedDate = date
.toLocaleDateString('en-CA', {
year: 'numeric',
month: '2-digit',
day: '2-digit',
})
.replace(/\//g, '-');
const formattedTime = date.toLocaleTimeString('en-CA', {
hour: '2-digit',
minute: '2-digit',
hour12: false,
});
console.log(` ${formattedDate} ${formattedTime} for ${e[0]}`);
});
}

pruneTask.description = `Prune --num <n> entries from ${refcacheFile} file. For details, use --info.`;

gulp.task('prune', pruneTask);
8 changes: 0 additions & 8 deletions static/refcache.json
Original file line number Diff line number Diff line change
@@ -2303,10 +2303,6 @@
"StatusCode": 200,
"LastSeen": "2023-10-17T15:13:11.067528+02:00"
},
"https://github.com/metrico/qryn-collector": {
"StatusCode": 404,
"LastSeen": "2023-10-17T15:10:07.758268+02:00"
},
"https://github.com/mhausenblas": {
"StatusCode": 200,
"LastSeen": "2023-06-30T09:39:03.127776-04:00"
@@ -6415,10 +6411,6 @@
"StatusCode": 200,
"LastSeen": "2023-06-29T18:43:02.782058-04:00"
},
"https://www.zocdoc.com/": {
"StatusCode": 403,
"LastSeen": "2023-06-30T16:27:32.038092-04:00"
},
"https://youtu.be/9iaGG-YZw5I": {
"StatusCode": 200,
"LastSeen": "2023-06-01T17:03:14.742262-04:00"