Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature: cache file signature detection #1238

Merged
merged 2 commits into from
Jul 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/console/progressBar.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ export const ProgressBarSymbol = {
// Candidates
GENERATING: chalk.cyan('Σ'),
FILTERING: chalk.cyan('∆'),
EXTENSION_CORRECTION: chalk.cyan('.'),
HASHING: chalk.cyan('#'),
VALIDATING: chalk.cyan(process.platform === 'win32' ? '?' : '≟'),
COMBINING_ALL: chalk.cyan(process.platform === 'win32' ? 'U' : '∪'),
Expand Down
15 changes: 5 additions & 10 deletions src/modules/candidateExtensionCorrector.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import DAT from '../types/dats/dat.js';
import Parent from '../types/dats/parent.js';
import ROM from '../types/dats/rom.js';
import ArchiveEntry from '../types/files/archives/archiveEntry.js';
import FileSignature from '../types/files/fileSignature.js';
import FileCache from '../types/files/fileCache.js';
import Options, { FixExtension } from '../types/options.js';
import OutputFactory from '../types/outputFactory.js';
import ReleaseCandidate from '../types/releaseCandidate.js';
Expand Down Expand Up @@ -52,7 +52,7 @@ export default class CandidateExtensionCorrector extends Module {
.filter((romWithFiles) => this.romNeedsCorrecting(romWithFiles))
.length;
this.progressBar.logTrace(`${dat.getNameShort()}: correcting ${romsThatNeedCorrecting.toLocaleString()} output file extension${romsThatNeedCorrecting !== 1 ? 's' : ''}`);
await this.progressBar.setSymbol(ProgressBarSymbol.HASHING);
await this.progressBar.setSymbol(ProgressBarSymbol.EXTENSION_CORRECTION);
await this.progressBar.reset(romsThatNeedCorrecting);

const correctedParentsToCandidates = await this.correctExtensions(dat, parentsToCandidates);
Expand Down Expand Up @@ -148,21 +148,16 @@ export default class CandidateExtensionCorrector extends Module {
this.progressBar.logTrace(`${dat.getNameShort()}: ${parent.getName()}: correcting extension for: ${romWithFiles.getInputFile()
.toString()}`);

await romWithFiles.getInputFile().createReadStream(async (stream) => {
const romSignature = await FileSignature.signatureFromFileStream(stream);
if (!romSignature) {
// No signature was found, so we can't perform any correction
return;
}

const romSignature = await FileCache.getOrComputeFileSignature(romWithFiles.getInputFile());
if (romSignature) {
// ROM file signature found, use the appropriate extension
const { dir, name } = path.parse(correctedRom.getName());
const correctedRomName = path.format({
dir,
name: name + romSignature.getExtension(),
});
correctedRom = correctedRom.withName(correctedRomName);
});
}

this.progressBar.removeWaitingMessage(waitingMessage);
await this.progressBar.incrementDone();
Expand Down
13 changes: 7 additions & 6 deletions src/types/cache.ts
Original file line number Diff line number Diff line change
Expand Up @@ -135,16 +135,17 @@ export default class Cache<V> {
* Delete a key in the cache.
*/
public async delete(key: string | RegExp): Promise<void> {
let keys: string[];
let keysToDelete: string[];
if (key instanceof RegExp) {
keys = [...this.keys().keys()].filter((k) => k.match(key));
keysToDelete = [...this.keys().keys()].filter((k) => k.match(key));
} else {
keys = [key];
keysToDelete = [key];
}

await Promise.all(keys.map(async (k) => {
await this.lockKey(k, () => this.deleteUnsafe(k));
}));
// Note: avoiding lockKey() because it could get expensive with many keys to delete
await this.keyMutexesMutex.runExclusive(() => {
keysToDelete.forEach((k) => this.deleteUnsafe(k));
});
}

private deleteUnsafe(key: string): void {
Expand Down
61 changes: 51 additions & 10 deletions src/types/files/fileCache.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import Archive from './archives/archive.js';
import ArchiveEntry, { ArchiveEntryProps } from './archives/archiveEntry.js';
import File, { FileProps } from './file.js';
import { ChecksumBitmask } from './fileChecksums.js';
import FileSignature from './fileSignature.js';
import ROMHeader from './romHeader.js';

interface CacheValue {
Expand All @@ -14,11 +15,15 @@ interface CacheValue {
value: FileProps | ArchiveEntryProps<Archive>[] | string | undefined,
}

enum ValueType {
FILE_CHECKSUMS = 'F',
ARCHIVE_CHECKSUMS = 'A',
FILE_HEADER = 'H',
}
const ValueType = {
FILE_CHECKSUMS: 'F',
ARCHIVE_CHECKSUMS: 'A',
// ROM headers and file signatures may not be found for files, and that is a valid result that
// gets cached. But when the list of known headers or signatures changes, we may be able to find
// a non-undefined result. So these dynamic values help with cache busting.
ROM_HEADER: `H${ROMHeader.getKnownHeaderCount()}`,
FILE_SIGNATURE: `S${FileSignature.getKnownSignatureCount()}`,
};

export default class FileCache {
private static readonly VERSION = 3;
Expand All @@ -45,7 +50,8 @@ export default class FileCache {
const keyRegex = new RegExp(`^V${prevVersion}\\|`);
return this.cache.delete(keyRegex);
}));
// await this.cache.delete(new RegExp(`\\|[^${Object.values(ValueType).join()}]$`));
// Delete keys from old value types
await this.cache.delete(new RegExp(`\\|(?!(${Object.values(ValueType).join('|')}))[^|]+$`));

// Delete keys for deleted files
const disks = FsPoly.disksSync();
Expand Down Expand Up @@ -198,7 +204,7 @@ export default class FileCache {
static async getOrComputeFileHeader(file: File): Promise<ROMHeader | undefined> {
// NOTE(cemmer): we're explicitly not catching ENOENT errors here, we want it to bubble up
const stats = await FsPoly.stat(file.getFilePath());
const cacheKey = this.getCacheKey(file.toString(), ValueType.FILE_HEADER);
const cacheKey = this.getCacheKey(file.toString(), ValueType.ROM_HEADER);

const cachedValue = await this.cache.getOrCompute(
cacheKey,
Expand All @@ -214,10 +220,11 @@ export default class FileCache {
},
(cached) => {
if (cached.fileSize !== stats.size || cached.modifiedTimeMillis !== stats.mtimeMs) {
// File has changed since being cached
// Recompute if the file has changed since being cached
return true;
}
return false;
// Recompute if the cached value isn't known
return typeof cached.value === 'string' && !ROMHeader.headerFromName(cached.value);
},
);

Expand All @@ -228,7 +235,41 @@ export default class FileCache {
return ROMHeader.headerFromName(cachedHeaderName);
}

private static getCacheKey(filePath: string, valueType: ValueType): string {
static async getOrComputeFileSignature(file: File): Promise<FileSignature | undefined> {
// NOTE(cemmer): we're explicitly not catching ENOENT errors here, we want it to bubble up
const stats = await FsPoly.stat(file.getFilePath());
const cacheKey = this.getCacheKey(file.toString(), ValueType.FILE_SIGNATURE);

const cachedValue = await this.cache.getOrCompute(
cacheKey,
async () => {
const signature = await file.createReadStream(
async (stream) => FileSignature.signatureFromFileStream(stream),
);
return {
fileSize: stats.size,
modifiedTimeMillis: stats.mtimeMs,
value: signature?.getName(),
};
},
(cached) => {
if (cached.fileSize !== stats.size || cached.modifiedTimeMillis !== stats.mtimeMs) {
// File has changed since being cached
return true;
}
// Recompute if the cached value isn't known
return typeof cached.value === 'string' && !FileSignature.signatureFromName(cached.value);
},
);

const cachedSignatureName = cachedValue.value as string | undefined;
if (!cachedSignatureName) {
return undefined;
}
return FileSignature.signatureFromName(cachedSignatureName);
}

private static getCacheKey(filePath: string, valueType: string): string {
return `V${FileCache.VERSION}|${filePath}|${valueType}`;
}
}
4 changes: 1 addition & 3 deletions src/types/files/fileFactory.ts
Original file line number Diff line number Diff line change
Expand Up @@ -110,9 +110,7 @@ export default class FileFactory {
let signature: FileSignature | undefined;
try {
const file = await File.fileOf({ filePath });
signature = await file.createReadStream(
async (stream) => FileSignature.signatureFromFileStream(stream),
);
signature = await FileCache.getOrComputeFileSignature(file);
} catch {
// Fail silently on assumed I/O errors
return undefined;
Expand Down
Loading
Loading