-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(core): expose tar header reader in @cotar/core
- Loading branch information
Showing
6 changed files
with
133 additions
and
77 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,91 +1,41 @@ | ||
import { bp } from 'binparse'; | ||
import { TarReader } from '@cotar/core'; | ||
import { createWriteStream, promises as fs } from 'fs'; | ||
import type { Logger } from 'pino'; | ||
|
||
export enum TarType { | ||
File = 0, | ||
HardLink = 1, | ||
SymLink = 2, | ||
CharDeviceNode = 3, | ||
BlockDeviceNode = 4, | ||
Directory = 5, | ||
FifoNode = 6, | ||
Reserved = 7, | ||
} | ||
const tar = bp.object('TarHeader', { | ||
path: bp.string(100), | ||
mode: bp.string(8), | ||
uid: bp.string(8), | ||
gid: bp.string(8), | ||
size: bp.bytes(12).refine((val) => parseInt(val.toString(), 8)), | ||
mtime: bp.bytes(12), | ||
unk1: bp.bytes(8), | ||
type: bp.string(1).refine(Number), | ||
linkName: bp.string(100), | ||
magic: bp.string(6), | ||
version: bp.bytes(2), | ||
uname: bp.string(32), | ||
gname: bp.string(32), | ||
devMajog: bp.bytes(8), | ||
devMinor: bp.bytes(8), | ||
prefix: bp.bytes(155), | ||
unk3: bp.bytes(12), | ||
}); | ||
|
||
function alignOffsetToBlock(ctx: { offset: number }): void { | ||
let size = ctx.offset & 511; | ||
while (size !== 0) { | ||
ctx.offset += 512 - size; | ||
size = ctx.offset & 511; | ||
} | ||
} | ||
|
||
export async function toTarTilesIndex(filename: string, indexFileName: string, logger: Logger): Promise<void> { | ||
const fd = await fs.open(filename, 'r'); | ||
|
||
const stat = await fd.stat(); | ||
|
||
const ctx = { offset: 0, startOffset: 0 }; | ||
|
||
const Files: Record<string, { o: number; s: number }> = {}; | ||
let fileCount = 0; | ||
const headBuffer = Buffer.alloc(512); | ||
logger.info({ index: indexFileName }, 'Cotar.Index:Start'); | ||
const outputBuffer = createWriteStream(indexFileName); | ||
outputBuffer.write(`[\n`); | ||
|
||
const startTime = Date.now(); | ||
let currentTime = startTime; | ||
while (ctx.offset < stat.size) { | ||
alignOffsetToBlock(ctx); | ||
|
||
const headData = await fd.read(headBuffer, 0, 512, ctx.offset); | ||
ctx.offset += 512; | ||
if (headData.bytesRead < 512) throw new Error('Failed to read header data'); | ||
const head = tar.raw(headBuffer); | ||
|
||
if (head.path === '') break; | ||
if (TarType[head.type] == null) throw new Error('Unknown header'); | ||
async function readBytes(offset: number, count: number): Promise<Buffer | null> { | ||
const res = await fd.read(headBuffer, 0, count, offset); | ||
if (res.bytesRead < count) return null; | ||
return headBuffer; | ||
} | ||
|
||
if (head.type === TarType.File) { | ||
if (fileCount > 0) outputBuffer.write(',\n'); | ||
outputBuffer.write(JSON.stringify([head.path, ctx.offset, head.size])); | ||
Files[head.path] = { o: ctx.offset, s: head.size }; | ||
fileCount++; | ||
if (fileCount % 25_000 === 0) { | ||
const duration = Date.now() - currentTime; | ||
currentTime = Date.now(); | ||
const percent = ((ctx.offset / stat.size) * 100).toFixed(2); | ||
logger.debug({ current: fileCount, percent, duration }, 'Cotar.Index:Write'); | ||
} | ||
let currentTime = startTime; | ||
for await (const ctx of TarReader.iterate(readBytes)) { | ||
if (ctx.header.type !== TarReader.Type.File) continue; | ||
if (fileCount > 0) outputBuffer.write(',\n'); | ||
outputBuffer.write(JSON.stringify([ctx.header.path, ctx.offset, ctx.header.size])); | ||
|
||
fileCount++; | ||
if (fileCount % 25_000 === 0) { | ||
const duration = Date.now() - currentTime; | ||
currentTime = Date.now(); | ||
const percent = ((ctx.offset / stat.size) * 100).toFixed(2); | ||
logger.debug({ current: fileCount, percent, duration }, 'Cotar.Index:Write'); | ||
} | ||
|
||
ctx.offset += head.size; | ||
} | ||
|
||
await new Promise<void>((r) => outputBuffer.write('\n]', () => r())); | ||
logger.info( | ||
{ index: indexFileName, count: Object.keys(Files).length, duration: Date.now() - startTime }, | ||
'Cotar.Index:Created', | ||
); | ||
logger.info({ index: indexFileName, count: fileCount, duration: Date.now() - startTime }, 'Cotar.Index:Created'); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,5 +14,8 @@ | |
"build/src", | ||
"bin", | ||
"*.png" | ||
] | ||
], | ||
"devDependencies": { | ||
"@types/node": "^14.14.37" | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
import o from 'ospec'; | ||
import * as cp from 'child_process'; | ||
import * as path from 'path'; | ||
import { promises as fs } from 'fs'; | ||
import { FileHandle } from 'fs/promises'; | ||
import { TarFileHeader, TarReader } from '../tar'; | ||
|
||
o.spec('TarReader', () => { | ||
// Create a Tar file of the built source | ||
o.before(() => { | ||
cp.execSync(`tar cf ${tarFilePath} tar.test.*`, { cwd: __dirname }); | ||
}); | ||
const tarFilePath = path.join(__dirname, 'test.tar'); | ||
|
||
let fd: FileHandle | null; | ||
const headBuffer = Buffer.alloc(512); | ||
async function readBytes(offset: number, count: number): Promise<Buffer | null> { | ||
if (fd == null) throw new Error('File is closed'); | ||
const res = await fd.read(headBuffer, 0, count, offset); | ||
if (res.bytesRead < count) return null; | ||
return headBuffer; | ||
} | ||
o.beforeEach(async () => { | ||
fd = await fs.open(tarFilePath, 'r'); | ||
}); | ||
o.afterEach(() => fd?.close()); | ||
|
||
o('should iterate files', async () => { | ||
const files: TarFileHeader[] = []; | ||
for await (const file of TarReader.iterate(readBytes)) files.push(file); | ||
o(files.map((c) => c.header.path)).deepEquals(['tar.test.d.ts', 'tar.test.d.ts.map', 'tar.test.js']); | ||
}); | ||
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,3 @@ | ||
export { Cotar } from './cotar'; | ||
export { TarIndex, TarIndexRecord } from './tar.index'; | ||
export { TarHeader, TarReader } from './tar'; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
import { bp, StrutInfer, toHex } from 'binparse'; | ||
|
||
export interface MinimalBuffer { | ||
readonly [n: number]: number; | ||
length: number; | ||
slice(start: number, end: number): MinimalBuffer; | ||
} | ||
|
||
export type AsyncFileRead = (readCount: number, byteCount: number) => Promise<MinimalBuffer | null>; | ||
export interface TarFileHeader { | ||
offset: number; | ||
header: StrutInfer<typeof TarHeader>; | ||
} | ||
|
||
export enum TarType { | ||
File = 0, | ||
HardLink = 1, | ||
SymLink = 2, | ||
CharDeviceNode = 3, | ||
BlockDeviceNode = 4, | ||
Directory = 5, | ||
FifoNode = 6, | ||
Reserved = 7, | ||
} | ||
export const TarHeader = bp.object('TarHeader', { | ||
path: bp.string(100), | ||
mode: bp.string(8), | ||
uid: bp.string(8), | ||
gid: bp.string(8), | ||
size: bp.bytes(12).refine((val) => parseInt(val.toString(), 8)), | ||
mtime: bp.bytes(12), | ||
unk1: bp.bytes(8), | ||
type: bp.string(1).refine(Number), | ||
linkName: bp.string(100), | ||
magic: bp.string(6), | ||
version: bp.bytes(2), | ||
uName: bp.string(32), | ||
gName: bp.string(32), | ||
devMajor: bp.bytes(8), | ||
devMinor: bp.bytes(8), | ||
prefix: bp.bytes(155), | ||
padding: bp.bytes(12), | ||
}); | ||
|
||
function alignOffsetToBlock(ctx: { offset: number }): void { | ||
let size = ctx.offset & 511; | ||
while (size !== 0) { | ||
ctx.offset += 512 - size; | ||
size = ctx.offset & 511; | ||
} | ||
} | ||
async function* iterateTarFiles(getBytes: AsyncFileRead): AsyncGenerator<TarFileHeader> { | ||
const ctx = { offset: 0, startOffset: 0 }; | ||
|
||
while (true) { | ||
alignOffsetToBlock(ctx); | ||
const headData = await getBytes(ctx.offset, 512); | ||
if (headData == null) return; | ||
const head = TarHeader.raw(headData); | ||
if (isNaN(head.size)) return; | ||
ctx.offset += head.size + 512; | ||
|
||
if (TarType[head.type] == null) throw new Error('Unknown header @ ' + toHex(ctx.offset)); | ||
if (head.type === TarType.File) yield { header: head, offset: ctx.offset }; | ||
} | ||
} | ||
|
||
export const TarReader = { | ||
Type: TarType, | ||
iterate: iterateTarFiles, | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters