Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Drop entries from the refcache that have status 4XX #3506

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 72 additions & 20 deletions gulp-src/prune.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ Prune entries from ${refcacheFile} file that meet one of following conditions:
- Status 4XX, unless the --keep-4xx option is specified
- The oldest entries, optionally before the date specified by --before <date>

Use --num <n> to limit the number of pruned entries.
Use --num <n> to limit the number of entries pruned by date.
`;

// The refcacheFile is a JSON map with each map entry of the form, e.g.:
Expand Down Expand Up @@ -43,54 +43,79 @@ async function pruneTask() {
'Keep all refcache entries with StatusCode in the 400 range. Default is to prune them regardless of the last seen date.',
default: false,
},
list: {
type: 'boolean',
description: 'List the <num> + 1 oldest entries. No entries are pruned.',
},
}).argv;

const n = argv.num > 0 ? argv.num : n_default;
const beforeDate = argv.before ? new Date(argv.before) : null;
const beforeDate = argv.before
? new Date(argv.before)
: new Date('9999-12-31');
const prune4xx = !argv['keep-4xx'];
const list = argv['list'];

if (argv.info) {
// Info about options was already displayed by yargs.help().
console.log(info);
return;
}

// Deletes (prunes) 4XX entries from `entries`.
// Returns the number of entries deleted.
function prune4xxEntriesAndReturnCount(entries) {
const entriesWith4xxStatus = Object.keys(entries)
.map((url) => [url, entries[url].LastSeen, entries[url].StatusCode])
.filter(
([url, date, statusCode]) => 400 <= statusCode && statusCode <= 499,
);

if (prune4xx && entriesWith4xxStatus.length > 0) {
console.log(
`INFO: pruning ${entriesWith4xxStatus.length} entries with 4XX status.`,
);
const keysToPrune = entriesWith4xxStatus.map((item) => item[0]);
keysToPrune.forEach((key) => delete entries[key]);
}
return entriesWith4xxStatus.length;
}

try {
const json = await fs.readFile(refcacheFile, 'utf8');
const entries = JSON.parse(json);

// Create array of entries of prune candidates only, sorted by LastSeen:
const sortedEntriesOfPruneCandidates = Object.keys(entries)
if (list) {
listOldest(entries, n + 1);
return;
}

const numEntriesWith4xxStatus = prune4xxEntriesAndReturnCount(entries);

// Create array of entries of prune candidates by date, sorted by LastSeen:
const pruneCandidatesByDate__sorted = Object.keys(entries)
.map((url) => [url, entries[url].LastSeen, entries[url].StatusCode])
.filter(
(
[url, date, statusCode], // True for prune candidates:
) =>
// Include entry if pruning 4xx and status code is in 4xx
(prune4xx && 400 <= statusCode && statusCode <= 499) ||
// Or if it is before the given date
(beforeDate ? new Date(date) < beforeDate : true),
)
.filter(([url, date, statusCode]) => new Date(date) < beforeDate)
.sort((a, b) => new Date(a[1]) - new Date(b[1]));

if (sortedEntriesOfPruneCandidates.length === 0) {
console.log('INFO: no entries to prune under given options.');
if (pruneCandidatesByDate__sorted.length === 0) {
console.log('INFO: no entries to prune for given date.');
return;
} else {
console.log(
`INFO: ${sortedEntriesOfPruneCandidates.length} entries as prune candidates under given options.`,
`INFO: ${pruneCandidatesByDate__sorted.length} entries as prune candidates for given date.`,
);
}

if (!n) {
if (n == 0) {
console.log(
`WARN: num is ${n} so nothing will be pruned. Specify number of entries to prune as --num <n>.`,
`WARN: num is ${n} so no entries will be pruned by date. Specify number of entries to prune as --num <n>.`,
);
return;
if (numEntriesWith4xxStatus == 0) return;
}

// Get keys of at most n entries to prune
const keysToPrune = sortedEntriesOfPruneCandidates
const keysToPrune = pruneCandidatesByDate__sorted
.slice(0, n)
.map((item) => item[0]);
keysToPrune.forEach((key) => delete entries[key]);
Expand All @@ -103,6 +128,33 @@ async function pruneTask() {
}
}

function listOldest(entries, numberOfEntries) {
const entriesArray = Object.keys(entries)
.map((url) => [url, entries[url].LastSeen, entries[url].StatusCode])
.sort((a, b) => new Date(a[1]) - new Date(b[1]));
const oldestEntries = entriesArray.slice(0, numberOfEntries);

if (oldestEntries.length > 0)
console.log(`Listing oldest ${numberOfEntries} entries:`);

oldestEntries.forEach((e) => {
const date = new Date(e[1]);
const formattedDate = date
.toLocaleDateString('en-CA', {
year: 'numeric',
month: '2-digit',
day: '2-digit',
})
.replace(/\//g, '-');
const formattedTime = date.toLocaleTimeString('en-CA', {
hour: '2-digit',
minute: '2-digit',
hour12: false,
});
console.log(` ${formattedDate} ${formattedTime} for ${e[0]}`);
});
}

pruneTask.description = `Prune --num <n> entries from ${refcacheFile} file. For details, use --info.`;

gulp.task('prune', pruneTask);
8 changes: 0 additions & 8 deletions static/refcache.json
Original file line number Diff line number Diff line change
Expand Up @@ -2303,10 +2303,6 @@
"StatusCode": 200,
"LastSeen": "2023-10-17T15:13:11.067528+02:00"
},
"https://github.com/metrico/qryn-collector": {
"StatusCode": 404,
"LastSeen": "2023-10-17T15:10:07.758268+02:00"
},
"https://github.com/mhausenblas": {
"StatusCode": 200,
"LastSeen": "2023-06-30T09:39:03.127776-04:00"
Expand Down Expand Up @@ -6439,10 +6435,6 @@
"StatusCode": 200,
"LastSeen": "2023-06-29T18:43:02.782058-04:00"
},
"https://www.zocdoc.com/": {
"StatusCode": 403,
"LastSeen": "2023-06-30T16:27:32.038092-04:00"
},
"https://youtu.be/9iaGG-YZw5I": {
"StatusCode": 200,
"LastSeen": "2023-06-01T17:03:14.742262-04:00"
Expand Down