Skip to content

Commit

Permalink
chore(shapefile): Typed ShapefileLoader
Browse files Browse the repository at this point in the history
  • Loading branch information
ibgreen committed Oct 30, 2023
1 parent af8e56f commit ff98a58
Show file tree
Hide file tree
Showing 24 changed files with 309 additions and 178 deletions.
35 changes: 29 additions & 6 deletions modules/shapefile/src/dbf-loader.ts
Original file line number Diff line number Diff line change
@@ -1,14 +1,29 @@
import type {Loader, LoaderWithParser} from '@loaders.gl/loader-utils';
// loaders.gl, MIT license
// Copyright (c) vis.gl contributors

import type {Loader, LoaderWithParser, LoaderOptions} from '@loaders.gl/loader-utils';
import type {ObjectRowTable} from '@loaders.gl/schema';
// import type {DBFResult} from './lib/parsers/parse-dbf';
import {parseDBF, parseDBFInBatches} from './lib/parsers/parse-dbf';

// __VERSION__ is injected by babel-plugin-version-inline
// @ts-ignore TS2304: Cannot find name '__VERSION__'.
const VERSION = typeof __VERSION__ !== 'undefined' ? __VERSION__ : 'latest';

/** Options for loading DBF files */
export type DBFLoaderOptions = LoaderOptions & {
dbf?: {
/** Shape of returned table */
shape?: 'object-row-table';
/** Encoding of strings in table */
encoding?: string;
};
};

/**
* DBFLoader - DBF files are used to contain non-geometry columns in Shapefiles
*/
export const DBFWorkerLoader: Loader = {
export const DBFWorkerLoader: Loader<ObjectRowTable, ObjectRowTable, DBFLoaderOptions> = {
name: 'DBF',
id: 'dbf',
module: 'shapefile',
Expand All @@ -19,17 +34,25 @@ export const DBFWorkerLoader: Loader = {
mimeTypes: ['application/x-dbf'],
options: {
dbf: {
shape: 'object-row-table',
encoding: 'latin1'
}
}
};

/** DBF file loader */
export const DBFLoader: LoaderWithParser = {
export const DBFLoader: LoaderWithParser<ObjectRowTable, ObjectRowTable, DBFLoaderOptions> = {
...DBFWorkerLoader,
parse: async (arrayBuffer, options) => parseDBF(arrayBuffer, options),
parse: async (arrayBuffer: ArrayBuffer, options?: DBFLoaderOptions) => {
const dbfOptions = {...DBFLoader.options, ...options?.dbf};
return parseDBF(arrayBuffer, dbfOptions);
}.

Check failure on line 49 in modules/shapefile/src/dbf-loader.ts

View workflow job for this annotation

GitHub Actions / test (16)

',' expected.

Check failure on line 49 in modules/shapefile/src/dbf-loader.ts

View workflow job for this annotation

GitHub Actions / test (18)

',' expected.

Check failure on line 49 in modules/shapefile/src/dbf-loader.ts

View workflow job for this annotation

GitHub Actions / test (20)

',' expected.
parseSync: parseDBF,

Check failure on line 50 in modules/shapefile/src/dbf-loader.ts

View workflow job for this annotation

GitHub Actions / test (16)

',' expected.

Check failure on line 50 in modules/shapefile/src/dbf-loader.ts

View workflow job for this annotation

GitHub Actions / test (18)

',' expected.

Check failure on line 50 in modules/shapefile/src/dbf-loader.ts

View workflow job for this annotation

GitHub Actions / test (20)

',' expected.
parseInBatches(arrayBufferIterator: AsyncIterable<ArrayBuffer> | Iterable<ArrayBuffer>, options) {
return parseDBFInBatches(arrayBufferIterator, options);
parseInBatches(
arrayBufferIterator: AsyncIterable<ArrayBuffer> | Iterable<ArrayBuffer>,
options?: DBFLoaderOptions
): AsyncIterableIterator<ObjectRowTable> {
const dbfOptions = {...DBFLoader.options, ...options?.dbf};
return parseDBFInBatches(arrayBufferIterator, dbfOptions);
}
};
3 changes: 3 additions & 0 deletions modules/shapefile/src/index.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
// loaders.gl, MIT license
// Copyright (c) vis.gl contributors

export {ShapefileLoader} from './shapefile-loader';
export {DBFLoader, DBFWorkerLoader} from './dbf-loader';
export {SHPLoader, SHPWorkerLoader} from './shp-loader';
Expand Down
132 changes: 95 additions & 37 deletions modules/shapefile/src/lib/parsers/parse-dbf.ts
Original file line number Diff line number Diff line change
@@ -1,13 +1,53 @@
import {Field, ObjectRowTable} from '@loaders.gl/schema';
// loaders.gl, MIT license
// Copyright (c) vis.gl contributors

import type {Field, ObjectRowTable, ObjectRowTableBatch} from '@loaders.gl/schema';
import {Schema} from '@loaders.gl/schema';
import {BinaryChunkReader} from '../streaming/binary-chunk-reader';
import {
DBFLoaderOptions,
DBFResult,
DBFTableOutput,
DBFHeader,
DBFRowsOutput,
DBFField
} from './types';

type DBFParserOptions = {
shape?: 'object-row-table';
encoding?: string;
};

export type DBFResult = {
data: {[key: string]: unknown[]}[];
schema?: Schema;
error?: string;
dbfHeader?: DBFHeader;
dbfFields?: DBFField[];
progress: {
bytesUsed: number;
rowsTotal: number;
rows: number;
};
};

/** Binary header stored in DBF file */
export type DBFHeader = {
/** Last updated date - year */
year: number;
/** Last updated date - month */
month: number;
/** Last updated date - day */
day: number;
/** Number of records in data file */
nRecords: number;
/** Length of header in bytes */
headerLength: number;
/** Length of each record */
recordLength: number;
/** Not clear if this is usually set */
languageDriver: number;
};

/** Field descriptor */
export type DBFField = {
name: string;
dataType: string;
fieldLength: number;
decimal: number;
};

const LITTLE_ENDIAN = true;
const DBF_HEADER_SIZE = 32;
Expand All @@ -25,7 +65,12 @@ class DBFParser {
textDecoder: TextDecoder;
state = STATE.START;
result: DBFResult = {
data: []
data: [],
progress: {
bytesUsed: 0,
rowsTotal: 0,
rows: 0
}
};

constructor(options: {encoding: string}) {
Expand Down Expand Up @@ -62,18 +107,15 @@ class DBFParser {
* @param options
* @returns DBFTable or rows
*/
export function parseDBF(
arrayBuffer: ArrayBuffer,
options: DBFLoaderOptions = {}
): DBFRowsOutput | DBFTableOutput | ObjectRowTable {
export function parseDBF(arrayBuffer: ArrayBuffer, options: DBFParserOptions = {}): ObjectRowTable {
const {encoding = 'latin1'} = options.dbf || {};

const dbfParser = new DBFParser({encoding});
dbfParser.write(arrayBuffer);
dbfParser.end();

const {data, schema} = dbfParser.result;
const shape = options?.dbf?.shape;
const shape = options?.shape || 'object-row-table';
switch (shape) {
case 'object-row-table': {
const table: ObjectRowTable = {
Expand All @@ -83,49 +125,74 @@ export function parseDBF(
};
return table;
}
case 'table':
return {schema, rows: data};
case 'rows':
default:
return data;
throw new Error(shape);
}
const table: ObjectRowTable = {
shape: 'object-row-table',
schema,
data
};
return table;
}

/**
* @param asyncIterator
* @param options
*/
export async function* parseDBFInBatches(
asyncIterator: AsyncIterable<ArrayBuffer> | Iterable<ArrayBuffer>,
options: DBFLoaderOptions = {}
): AsyncIterable<DBFHeader | DBFRowsOutput | DBFTableOutput> {
const {encoding = 'latin1'} = options.dbf || {};
options: DBFParserOptions = {}
): AsyncIterableIterator<ObjectRowTableBatch> {
const {encoding = 'latin1'} = option;

const parser = new DBFParser({encoding});
let headerReturned = false;
for await (const arrayBuffer of asyncIterator) {
parser.write(arrayBuffer);
if (!headerReturned && parser.result.dbfHeader) {
headerReturned = true;
yield parser.result.dbfHeader;
yield {
batchType: 'metadata',
shape: 'object-row-table',
data: [],
length: 0,
// Additional data
dbfHeader: parser.result.dbfHeader
};
}

if (parser.result.data.length > 0) {
yield parser.result.data;
const data = parser.result.data;
parser.result.data = [];
yield {
batchType: 'data',
shape: 'object-row-table',
data,
length: data.length
};
}
}
parser.end();
if (parser.result.data.length > 0) {
yield parser.result.data;
const data = parser.result.data;
yield {
batchType: 'data',
shape: 'object-row-table',
data,
length: data.length
};
}
}

/**
* https://www.dbase.com/Knowledgebase/INT/db7_file_fmt.htm
* State machine for DBF parsing
* @param state
* @param result
* @param binaryReader
* @param textDecoder
* @returns
* @see https://www.dbase.com/Knowledgebase/INT/db7_file_fmt.htm
*/
/* eslint-disable complexity, max-depth */
function parseState(
Expand Down Expand Up @@ -161,8 +228,7 @@ function parseState(
case STATE.FIELD_DESCRIPTORS:
// Parse DBF field descriptors (schema)
const fieldDescriptorView = binaryReader.getDataView(
// @ts-ignore
result.dbfHeader.headerLength - DBF_HEADER_SIZE
result.dbfHeader!.headerLength - DBF_HEADER_SIZE
);
if (!fieldDescriptorView) {
return state;
Expand Down Expand Up @@ -191,10 +257,8 @@ function parseState(
// Note: Avoid actually reading the last byte, which may not be present
binaryReader.skip(1);

// @ts-ignore
const row = parseRow(recordView, result.dbfFields, textDecoder);
const row = parseRow(recordView, result.dbfFields || [], textDecoder);
result.data.push(row);
// @ts-ignore
result.progress.rows = result.data.length;
}
state = STATE.END;
Expand All @@ -218,17 +282,12 @@ function parseState(
*/
function parseDBFHeader(headerView: DataView): DBFHeader {
return {
// Last updated date
year: headerView.getUint8(1) + 1900,
month: headerView.getUint8(2),
day: headerView.getUint8(3),
// Number of records in data file
nRecords: headerView.getUint32(4, LITTLE_ENDIAN),
// Length of header in bytes
headerLength: headerView.getUint16(8, LITTLE_ENDIAN),
// Length of each record
recordLength: headerView.getUint16(10, LITTLE_ENDIAN),
// Not sure if this is usually set
languageDriver: headerView.getUint8(29)
};
}
Expand Down Expand Up @@ -266,7 +325,6 @@ function parseRows(binaryReader, fields, nRecords, recordLength, textDecoder) {
for (let i = 0; i < nRecords; i++) {
const recordView = binaryReader.getDataView(recordLength - 1);
binaryReader.skip(1);
// @ts-ignore
rows.push(parseRow(recordView, fields, textDecoder));
}
return rows;
Expand Down
Loading

0 comments on commit ff98a58

Please sign in to comment.