Skip to content

Commit

Permalink
feat(core): expose tar header reader in @cotar/core
Browse files Browse the repository at this point in the history
  • Loading branch information
blacha committed Apr 7, 2021
1 parent 19a088b commit 07693ea
Show file tree
Hide file tree
Showing 6 changed files with 133 additions and 77 deletions.
88 changes: 19 additions & 69 deletions packages/cli/src/create/tar.to.ttiles.ts
Original file line number Diff line number Diff line change
@@ -1,91 +1,41 @@
import { bp } from 'binparse';
import { TarReader } from '@cotar/core';
import { createWriteStream, promises as fs } from 'fs';
import type { Logger } from 'pino';

export enum TarType {
File = 0,
HardLink = 1,
SymLink = 2,
CharDeviceNode = 3,
BlockDeviceNode = 4,
Directory = 5,
FifoNode = 6,
Reserved = 7,
}
const tar = bp.object('TarHeader', {
path: bp.string(100),
mode: bp.string(8),
uid: bp.string(8),
gid: bp.string(8),
size: bp.bytes(12).refine((val) => parseInt(val.toString(), 8)),
mtime: bp.bytes(12),
unk1: bp.bytes(8),
type: bp.string(1).refine(Number),
linkName: bp.string(100),
magic: bp.string(6),
version: bp.bytes(2),
uname: bp.string(32),
gname: bp.string(32),
devMajog: bp.bytes(8),
devMinor: bp.bytes(8),
prefix: bp.bytes(155),
unk3: bp.bytes(12),
});

function alignOffsetToBlock(ctx: { offset: number }): void {
let size = ctx.offset & 511;
while (size !== 0) {
ctx.offset += 512 - size;
size = ctx.offset & 511;
}
}

export async function toTarTilesIndex(filename: string, indexFileName: string, logger: Logger): Promise<void> {
const fd = await fs.open(filename, 'r');

const stat = await fd.stat();

const ctx = { offset: 0, startOffset: 0 };

const Files: Record<string, { o: number; s: number }> = {};
let fileCount = 0;
const headBuffer = Buffer.alloc(512);
logger.info({ index: indexFileName }, 'Cotar.Index:Start');
const outputBuffer = createWriteStream(indexFileName);
outputBuffer.write(`[\n`);

const startTime = Date.now();
let currentTime = startTime;
while (ctx.offset < stat.size) {
alignOffsetToBlock(ctx);

const headData = await fd.read(headBuffer, 0, 512, ctx.offset);
ctx.offset += 512;
if (headData.bytesRead < 512) throw new Error('Failed to read header data');
const head = tar.raw(headBuffer);

if (head.path === '') break;
if (TarType[head.type] == null) throw new Error('Unknown header');
async function readBytes(offset: number, count: number): Promise<Buffer | null> {
const res = await fd.read(headBuffer, 0, count, offset);
if (res.bytesRead < count) return null;
return headBuffer;
}

if (head.type === TarType.File) {
if (fileCount > 0) outputBuffer.write(',\n');
outputBuffer.write(JSON.stringify([head.path, ctx.offset, head.size]));
Files[head.path] = { o: ctx.offset, s: head.size };
fileCount++;
if (fileCount % 25_000 === 0) {
const duration = Date.now() - currentTime;
currentTime = Date.now();
const percent = ((ctx.offset / stat.size) * 100).toFixed(2);
logger.debug({ current: fileCount, percent, duration }, 'Cotar.Index:Write');
}
let currentTime = startTime;
for await (const ctx of TarReader.iterate(readBytes)) {
if (ctx.header.type !== TarReader.Type.File) continue;
if (fileCount > 0) outputBuffer.write(',\n');
outputBuffer.write(JSON.stringify([ctx.header.path, ctx.offset, ctx.header.size]));

fileCount++;
if (fileCount % 25_000 === 0) {
const duration = Date.now() - currentTime;
currentTime = Date.now();
const percent = ((ctx.offset / stat.size) * 100).toFixed(2);
logger.debug({ current: fileCount, percent, duration }, 'Cotar.Index:Write');
}

ctx.offset += head.size;
}

await new Promise<void>((r) => outputBuffer.write('\n]', () => r()));
logger.info(
{ index: indexFileName, count: Object.keys(Files).length, duration: Date.now() - startTime },
'Cotar.Index:Created',
);
logger.info({ index: indexFileName, count: fileCount, duration: Date.now() - startTime }, 'Cotar.Index:Created');
}
5 changes: 4 additions & 1 deletion packages/core/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,8 @@
"build/src",
"bin",
"*.png"
]
],
"devDependencies": {
"@types/node": "^14.14.37"
}
}
33 changes: 33 additions & 0 deletions packages/core/src/__test__/tar.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import o from 'ospec';
import * as cp from 'child_process';
import * as path from 'path';
import { promises as fs } from 'fs';
import { FileHandle } from 'fs/promises';
import { TarFileHeader, TarReader } from '../tar';

o.spec('TarReader', () => {
// Create a Tar file of the built source
o.before(() => {
cp.execSync(`tar cf ${tarFilePath} tar.test.*`, { cwd: __dirname });
});
const tarFilePath = path.join(__dirname, 'test.tar');

let fd: FileHandle | null;
const headBuffer = Buffer.alloc(512);
async function readBytes(offset: number, count: number): Promise<Buffer | null> {
if (fd == null) throw new Error('File is closed');
const res = await fd.read(headBuffer, 0, count, offset);
if (res.bytesRead < count) return null;
return headBuffer;
}
o.beforeEach(async () => {
fd = await fs.open(tarFilePath, 'r');
});
o.afterEach(() => fd?.close());

o('should iterate files', async () => {
const files: TarFileHeader[] = [];
for await (const file of TarReader.iterate(readBytes)) files.push(file);
o(files.map((c) => c.header.path)).deepEquals(['tar.test.d.ts', 'tar.test.d.ts.map', 'tar.test.js']);
});
});
1 change: 1 addition & 0 deletions packages/core/src/index.ts
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
export { Cotar } from './cotar';
export { TarIndex, TarIndexRecord } from './tar.index';
export { TarHeader, TarReader } from './tar';
71 changes: 71 additions & 0 deletions packages/core/src/tar.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import { bp, StrutInfer, toHex } from 'binparse';

export interface MinimalBuffer {
readonly [n: number]: number;
length: number;
slice(start: number, end: number): MinimalBuffer;
}

export type AsyncFileRead = (readCount: number, byteCount: number) => Promise<MinimalBuffer | null>;
export interface TarFileHeader {
offset: number;
header: StrutInfer<typeof TarHeader>;
}

export enum TarType {
File = 0,
HardLink = 1,
SymLink = 2,
CharDeviceNode = 3,
BlockDeviceNode = 4,
Directory = 5,
FifoNode = 6,
Reserved = 7,
}
export const TarHeader = bp.object('TarHeader', {
path: bp.string(100),
mode: bp.string(8),
uid: bp.string(8),
gid: bp.string(8),
size: bp.bytes(12).refine((val) => parseInt(val.toString(), 8)),
mtime: bp.bytes(12),
unk1: bp.bytes(8),
type: bp.string(1).refine(Number),
linkName: bp.string(100),
magic: bp.string(6),
version: bp.bytes(2),
uName: bp.string(32),
gName: bp.string(32),
devMajor: bp.bytes(8),
devMinor: bp.bytes(8),
prefix: bp.bytes(155),
padding: bp.bytes(12),
});

function alignOffsetToBlock(ctx: { offset: number }): void {
let size = ctx.offset & 511;
while (size !== 0) {
ctx.offset += 512 - size;
size = ctx.offset & 511;
}
}
async function* iterateTarFiles(getBytes: AsyncFileRead): AsyncGenerator<TarFileHeader> {
const ctx = { offset: 0, startOffset: 0 };

while (true) {
alignOffsetToBlock(ctx);
const headData = await getBytes(ctx.offset, 512);
if (headData == null) return;
const head = TarHeader.raw(headData);
if (isNaN(head.size)) return;
ctx.offset += head.size + 512;

if (TarType[head.type] == null) throw new Error('Unknown header @ ' + toHex(ctx.offset));
if (head.type === TarType.File) yield { header: head, offset: ctx.offset };
}
}

export const TarReader = {
Type: TarType,
iterate: iterateTarFiles,
};
12 changes: 5 additions & 7 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -1020,6 +1020,11 @@
resolved "https://registry.yarnpkg.com/@types/node/-/node-14.14.35.tgz#42c953a4e2b18ab931f72477e7012172f4ffa313"
integrity sha512-Lt+wj8NVPx0zUmUwumiVXapmaLUcAk3yPuHCFVXras9k5VT9TdhJqKqGVUQCD60OTMCl0qxJ57OiTL0Mic3Iag==

"@types/node@^14.14.37":
version "14.14.37"
resolved "https://registry.yarnpkg.com/@types/node/-/node-14.14.37.tgz#a3dd8da4eb84a996c36e331df98d82abd76b516e"
integrity sha512-XYmBiy+ohOR4Lh5jE379fV2IU+6Jn4g5qASinhitfyO71b/sCo6MKsMLF5tc7Zf2CE8hViVQyYSobJNke8OvUw==

"@types/normalize-package-data@^2.4.0":
version "2.4.0"
resolved "https://registry.yarnpkg.com/@types/normalize-package-data/-/normalize-package-data-2.4.0.tgz#e486d0d97396d79beedd0a6e33f4534ff6b4973e"
Expand Down Expand Up @@ -1058,13 +1063,6 @@
dependencies:
"@types/node" "*"

"@types/tar-stream@^2.2.0":
version "2.2.0"
resolved "https://registry.yarnpkg.com/@types/tar-stream/-/tar-stream-2.2.0.tgz#2778ef8e328a520959a39681c15c83c53553426f"
integrity sha512-sRTpT180sVigzD4SiCWJQQrqcdkWnmscWvx+cXvAoPtXbLFC5+QmKi2xwRcPe4iRu0GcVl1qTeJKUTS5hULfrw==
dependencies:
"@types/node" "*"

"@typescript-eslint/eslint-plugin@^4.17.0":
version "4.19.0"
resolved "https://registry.yarnpkg.com/@typescript-eslint/eslint-plugin/-/eslint-plugin-4.19.0.tgz#56f8da9ee118fe9763af34d6a526967234f6a7f0"
Expand Down

0 comments on commit 07693ea

Please sign in to comment.