Skip to content

Commit

Permalink
Update to generic-filehandle2
Browse files Browse the repository at this point in the history
  • Loading branch information
cmdcolin committed Dec 12, 2024
1 parent af6ed4a commit fc27a45
Show file tree
Hide file tree
Showing 13 changed files with 497 additions and 494 deletions.
8 changes: 0 additions & 8 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,19 +1,11 @@
## [4.0.1](https://github.com/GMOD/bam-js/compare/v4.0.0...v4.0.1) (2024-11-12)



# [4.0.0](https://github.com/GMOD/bam-js/compare/v3.0.3...v4.0.0) (2024-11-12)



## [3.0.3](https://github.com/GMOD/bam-js/compare/v3.0.0...v3.0.3) (2024-11-11)



## [3.0.2](https://github.com/GMOD/bam-js/compare/v3.0.0...v3.0.2) (2024-11-11)



- republish v3.0.1 since it got tagged on a deleted branch

## [3.0.1](https://github.com/GMOD/bam-js/compare/v3.0.0...v3.0.1) (2024-11-11)
Expand Down
18 changes: 7 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,10 @@ var records = await t.getRecordsForRange('ctgA', 0, 50000)
```

The `bamPath` argument only works on nodejs. In the browser, you should pass
`bamFilehandle` with a generic-filehandle e.g. `RemoteFile`
`bamFilehandle` with a generic-filehandle2 e.g. `RemoteFile`

```typescript
const { RemoteFile } = require('generic-filehandle')
const { RemoteFile } = require('generic-filehandle2')
const bam = new BamFile({
bamFilehandle: new RemoteFile('yourfile.bam'), // or a full http url
baiFilehandle: new RemoteFile('yourfile.bam.bai'), // or a full http url
Expand Down Expand Up @@ -76,9 +76,10 @@ The BAM class constructor accepts arguments
yielding

Note: filehandles implement the Filehandle interface from
https://www.npmjs.com/package/generic-filehandle. This module offers the path
and url arguments as convenience methods for supplying the LocalFile and
RemoteFile
https://www.npmjs.com/package/generic-filehandle2.

This module offers the path and url arguments as convenience methods for
supplying the LocalFile and RemoteFile

### async getRecordsForRange(refName, start, end, opts)

Expand Down Expand Up @@ -112,7 +113,7 @@ for await (const chunk of file.streamRecordsForRange(
The `getRecordsForRange` simply wraps this process by concatenating chunks into
an array

### async getHeader(opts: {....anything to pass to generic-filehandle opts})
### async getHeader(opts: {....anything to pass to generic-filehandle2 opts})

This obtains the header from `HtsgetFile` or `BamFile`. Retrieves BAM file and
BAI/CSI header if applicable, or API request for refnames from htsget
Expand Down Expand Up @@ -157,11 +158,6 @@ feature.flags // flags
feature.template_length // TLEN
```

#### Note

The reason that we hide the data behind this ".get" function is that we lazily
decode records on demand, which can reduce memory consumption.

## License

MIT © [Colin Diesh](https://github.com/cmdcolin)
10 changes: 5 additions & 5 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@
"test": "vitest",
"lint": "eslint --report-unused-disable-directives --max-warnings 0",
"clean": "rimraf dist esm",
"build:esm": "tsc --target es2018 --outDir esm",
"build:es5": "tsc --target es2015 --module commonjs --outDir dist",
"build:esm": "tsc --outDir esm",
"build:es5": "tsc --module commonjs --outDir dist",
"build": "npm run build:esm && npm run build:es5",
"prebuild": "npm run clean && npm run lint",
"preversion": "npm run lint && npm test run && npm run build",
Expand All @@ -39,10 +39,9 @@
],
"dependencies": {
"@gmod/abortable-promise-cache": "^2.0.0",
"@gmod/bgzf-filehandle": "^1.4.4",
"buffer": "^6.0.3",
"@gmod/bgzf-filehandle": "^2.0.0",
"crc": "^4.3.2",
"generic-filehandle": "^3.0.0",
"generic-filehandle2": "^0.0.1",
"long": "^4.0.0",
"quick-lru": "^4.0.0"
},
Expand All @@ -52,6 +51,7 @@
"@typescript-eslint/eslint-plugin": "^8.1.0",
"@typescript-eslint/parser": "^8.1.0",
"@vitest/coverage-v8": "^2.0.5",
"buffer": "^6.0.3",
"eslint": "^9.9.0",
"eslint-config-prettier": "^9.1.0",
"eslint-plugin-prettier": "^5.1.3",
Expand Down
2 changes: 1 addition & 1 deletion src/bai.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ export default class BAI extends IndexFile {
}

// fetch and parse the index
async _parse(opts?: BaseOpts) {
async _parse(_opts?: BaseOpts) {
const bytes = await this.filehandle.readFile()
const dataView = new DataView(bytes.buffer)

Expand Down
2 changes: 1 addition & 1 deletion src/bamFile.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import crc32 from 'crc/crc32'
import { unzip, unzipChunkSlice } from '@gmod/bgzf-filehandle'
import { LocalFile, RemoteFile, GenericFilehandle } from 'generic-filehandle'
import { LocalFile, RemoteFile, GenericFilehandle } from 'generic-filehandle2'
import AbortablePromiseCache from '@gmod/abortable-promise-cache'
import QuickLRU from 'quick-lru'

Expand Down
2 changes: 1 addition & 1 deletion src/chunk.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import VirtualOffset from './virtualOffset'

// little class representing a chunk in the index
export default class Chunk {
public buffer?: Buffer
public buffer?: Uint8Array

constructor(
public minv: VirtualOffset,
Expand Down
27 changes: 18 additions & 9 deletions src/htsget.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import { unzip } from '@gmod/bgzf-filehandle'
import { Buffer } from 'buffer'
import { BaseOpts, BamOpts } from './util'
import { BaseOpts, BamOpts, concatUint8Array } from './util'
import BamFile, { BAM_MAGIC } from './bamFile'
import Chunk from './chunk'
import { parseHeaderText } from './sam'
Expand All @@ -14,7 +13,8 @@ async function concat(arr: HtsgetChunk[], opts?: Record<string, any>) {
arr.map(async chunk => {
const { url, headers } = chunk
if (url.startsWith('data:')) {
return Buffer.from(url.split(',')[1], 'base64')
// @ts-expect-error
return Uint8Array.fromBase64(url.split(',')[1], 'base64') as Uint8Array
} else {
//remove referer header, it is not even allowed to be specified
// @ts-expect-error
Expand All @@ -29,12 +29,12 @@ async function concat(arr: HtsgetChunk[], opts?: Record<string, any>) {
`HTTP ${res.status} fetching ${url}: ${await res.text()}`,
)
}
return Buffer.from(await res.arrayBuffer())
return new Uint8Array(await res.arrayBuffer())
}
}),
)

return Buffer.concat(await Promise.all(res.map(elt => unzip(elt))))
return concatUint8Array(await Promise.all(res.map(elt => unzip(elt))))
}

export default class HtsgetFile extends BamFile {
Expand Down Expand Up @@ -108,11 +108,17 @@ export default class HtsgetFile extends BamFile {
}
}

// @ts-expect-error
async _readChunk({ chunk }: { chunk: Chunk; opts: BaseOpts }) {
if (!chunk.buffer) {
throw new Error('expected chunk.buffer in htsget')
}
return { data: chunk.buffer, cpositions: [], dpositions: [], chunk }
return {
data: chunk.buffer,
cpositions: [],
dpositions: [],
chunk,
}
}

async getHeader(opts: BaseOpts = {}) {
Expand All @@ -125,12 +131,15 @@ export default class HtsgetFile extends BamFile {
}
const data = await result.json()
const uncba = await concat(data.htsget.urls, opts)
const dataView = new DataView(uncba.buffer)

if (uncba.readInt32LE(0) !== BAM_MAGIC) {
if (dataView.getInt32(0, true) !== BAM_MAGIC) {
throw new Error('Not a BAM file')
}
const headLen = uncba.readInt32LE(4)
const headerText = uncba.toString('utf8', 8, 8 + headLen)
const headLen = dataView.getInt32(4, true)

const decoder = new TextDecoder('utf8')
const headerText = decoder.decode(uncba.subarray(8, 8 + headLen))
const samHeader = parseHeaderText(headerText)

// use the @SQ lines in the header to figure out the
Expand Down
2 changes: 1 addition & 1 deletion src/indexFile.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { GenericFilehandle } from 'generic-filehandle'
import { GenericFilehandle } from 'generic-filehandle2'
import Chunk from './chunk'
import { BaseOpts } from './util'

Expand Down
2 changes: 1 addition & 1 deletion src/record.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ interface Bytes {
export default class BamRecord {
public fileOffset: number
private bytes: Bytes
private #dataView: DataView
#dataView: DataView

constructor(args: { bytes: Bytes; fileOffset: number }) {
this.bytes = args.bytes
Expand Down
17 changes: 17 additions & 0 deletions src/util.ts
Original file line number Diff line number Diff line change
Expand Up @@ -147,3 +147,20 @@ export function parseNameBytes(
}
return { refNameToId, refIdToName }
}

export function sum(array: Uint8Array[]) {
let sum = 0
for (const entry of array) {
sum += entry.length
}
return sum
}
export function concatUint8Array(args: Uint8Array[]) {
const mergedArray = new Uint8Array(sum(args))
let offset = 0
for (const entry of args) {
mergedArray.set(entry, offset)
offset += entry.length
}
return mergedArray
}
2 changes: 1 addition & 1 deletion test/bai.test.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { expect, test } from 'vitest'
import { BAI, BamFile, BamRecord } from '../src'

import { LocalFile } from 'generic-filehandle'
import { LocalFile } from 'generic-filehandle2'
import FakeRecord from './fakerecord'

test('loads BAI volvox-sorted.bam.bai', async () => {
Expand Down
2 changes: 1 addition & 1 deletion tsconfig.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"include": ["src"],
"compilerOptions": {
"outDir": "dist",
"target": "es2018",
"target": "es2022",
"types": ["node"],
"lib": ["dom", "esnext"],
"declaration": true,
Expand Down
Loading

0 comments on commit fc27a45

Please sign in to comment.