Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(shapefile): Typed ShapefileLoader #2611

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 50 additions & 3 deletions modules/shapefile/src/dbf-loader.ts
Original file line number Diff line number Diff line change
@@ -1,14 +1,29 @@
import type {Loader, LoaderWithParser} from '@loaders.gl/loader-utils';
// loaders.gl, MIT license
// Copyright (c) vis.gl contributors

import type {Loader, LoaderWithParser, LoaderOptions} from '@loaders.gl/loader-utils';
import type {ObjectRowTable} from '@loaders.gl/schema';
// import type {DBFResult} from './lib/parsers/parse-dbf';
import {parseDBF, parseDBFInBatches} from './lib/parsers/parse-dbf';

// __VERSION__ is injected by babel-plugin-version-inline
// @ts-ignore TS2304: Cannot find name '__VERSION__'.
const VERSION = typeof __VERSION__ !== 'undefined' ? __VERSION__ : 'latest';

/** Options for loading DBF files */
export type DBFLoaderOptions = LoaderOptions & {
dbf?: {
/** Shape of returned table */
shape?: 'object-row-table';
/** Encoding of strings in table */
encoding?: string;
};
};

/**
* DBFLoader - DBF files are used to contain non-geometry columns in Shapefiles
*/
export const DBFWorkerLoader: Loader = {
export const DBFWorkerLoader: Loader<ObjectRowTable, ObjectRowTable, DBFLoaderOptions> = {
name: 'DBF',
id: 'dbf',
module: 'shapefile',
Expand All @@ -19,17 +34,49 @@
mimeTypes: ['application/x-dbf'],
options: {
dbf: {
shape: 'object-row-table',
encoding: 'latin1'
}
}
};

/** DBF file loader */
export const DBFLoader: LoaderWithParser = {
export const DBFLoader: LoaderWithParser<ObjectRowTable, ObjectRowTable, DBFLoaderOptions> = {
...DBFWorkerLoader,
<<<<<<< Updated upstream

Check failure on line 46 in modules/shapefile/src/dbf-loader.ts

View workflow job for this annotation

GitHub Actions / test (16)

Merge conflict marker encountered.

Check failure on line 46 in modules/shapefile/src/dbf-loader.ts

View workflow job for this annotation

GitHub Actions / test (18)

Merge conflict marker encountered.

Check failure on line 46 in modules/shapefile/src/dbf-loader.ts

View workflow job for this annotation

GitHub Actions / test (20)

Merge conflict marker encountered.
parse: async (arrayBuffer: ArrayBuffer, options?: DBFLoaderOptions) => {
const dbfOptions = {...DBFLoader.options, ...options?.dbf};
return parseDBF(arrayBuffer, dbfOptions);
}.

Check failure on line 50 in modules/shapefile/src/dbf-loader.ts

View workflow job for this annotation

GitHub Actions / test (16)

',' expected.

Check failure on line 50 in modules/shapefile/src/dbf-loader.ts

View workflow job for this annotation

GitHub Actions / test (18)

',' expected.

Check failure on line 50 in modules/shapefile/src/dbf-loader.ts

View workflow job for this annotation

GitHub Actions / test (20)

',' expected.
parseSync: parseDBF,

Check failure on line 51 in modules/shapefile/src/dbf-loader.ts

View workflow job for this annotation

GitHub Actions / test (16)

',' expected.

Check failure on line 51 in modules/shapefile/src/dbf-loader.ts

View workflow job for this annotation

GitHub Actions / test (18)

',' expected.

Check failure on line 51 in modules/shapefile/src/dbf-loader.ts

View workflow job for this annotation

GitHub Actions / test (20)

',' expected.
=======

Check failure on line 52 in modules/shapefile/src/dbf-loader.ts

View workflow job for this annotation

GitHub Actions / test (16)

Merge conflict marker encountered.

Check failure on line 52 in modules/shapefile/src/dbf-loader.ts

View workflow job for this annotation

GitHub Actions / test (18)

Merge conflict marker encountered.

Check failure on line 52 in modules/shapefile/src/dbf-loader.ts

View workflow job for this annotation

GitHub Actions / test (20)

Merge conflict marker encountered.
<<<<<<< Updated upstream
parse: async (arrayBuffer, options) => parseDBF(arrayBuffer, options),
parseSync: parseDBF,
parseInBatches(arrayBufferIterator: AsyncIterable<ArrayBuffer> | Iterable<ArrayBuffer>, options) {
return parseDBFInBatches(arrayBufferIterator, options);
=======

parse: async (arrayBuffer: ArrayBuffer, options?: DBFLoaderOptions) => {
const dbfOptions = {...DBFLoader.options, ...options?.dbf};
return parseDBF(arrayBuffer, dbfOptions);
},

parseSync: (arrayBuffer: ArrayBuffer, options?: DBFLoaderOptions) => {
const dbfOptions = {...DBFLoader.options, ...options?.dbf};
return parseDBF(arrayBuffer, dbfOptions);
},

>>>>>>> Stashed changes

Check failure on line 70 in modules/shapefile/src/dbf-loader.ts

View workflow job for this annotation

GitHub Actions / test (16)

Merge conflict marker encountered.

Check failure on line 70 in modules/shapefile/src/dbf-loader.ts

View workflow job for this annotation

GitHub Actions / test (18)

Merge conflict marker encountered.

Check failure on line 70 in modules/shapefile/src/dbf-loader.ts

View workflow job for this annotation

GitHub Actions / test (20)

Merge conflict marker encountered.
parseInBatches(
arrayBufferIterator: AsyncIterable<ArrayBuffer> | Iterable<ArrayBuffer>,
options?: DBFLoaderOptions
): AsyncIterableIterator<ObjectRowTable> {
const dbfOptions = {...DBFLoader.options, ...options?.dbf};
return parseDBFInBatches(arrayBufferIterator, dbfOptions);
<<<<<<< Updated upstream

Check failure on line 77 in modules/shapefile/src/dbf-loader.ts

View workflow job for this annotation

GitHub Actions / test (16)

Merge conflict marker encountered.

Check failure on line 77 in modules/shapefile/src/dbf-loader.ts

View workflow job for this annotation

GitHub Actions / test (18)

Merge conflict marker encountered.

Check failure on line 77 in modules/shapefile/src/dbf-loader.ts

View workflow job for this annotation

GitHub Actions / test (20)

Merge conflict marker encountered.
=======

Check failure on line 78 in modules/shapefile/src/dbf-loader.ts

View workflow job for this annotation

GitHub Actions / test (16)

Merge conflict marker encountered.

Check failure on line 78 in modules/shapefile/src/dbf-loader.ts

View workflow job for this annotation

GitHub Actions / test (18)

Merge conflict marker encountered.

Check failure on line 78 in modules/shapefile/src/dbf-loader.ts

View workflow job for this annotation

GitHub Actions / test (20)

Merge conflict marker encountered.
>>>>>>> Stashed changes

Check failure on line 79 in modules/shapefile/src/dbf-loader.ts

View workflow job for this annotation

GitHub Actions / test (16)

Merge conflict marker encountered.

Check failure on line 79 in modules/shapefile/src/dbf-loader.ts

View workflow job for this annotation

GitHub Actions / test (18)

Merge conflict marker encountered.

Check failure on line 79 in modules/shapefile/src/dbf-loader.ts

View workflow job for this annotation

GitHub Actions / test (20)

Merge conflict marker encountered.
>>>>>>> Stashed changes

Check failure on line 80 in modules/shapefile/src/dbf-loader.ts

View workflow job for this annotation

GitHub Actions / test (16)

Merge conflict marker encountered.

Check failure on line 80 in modules/shapefile/src/dbf-loader.ts

View workflow job for this annotation

GitHub Actions / test (18)

Merge conflict marker encountered.

Check failure on line 80 in modules/shapefile/src/dbf-loader.ts

View workflow job for this annotation

GitHub Actions / test (20)

Merge conflict marker encountered.
}
};
3 changes: 3 additions & 0 deletions modules/shapefile/src/index.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
// loaders.gl, MIT license
// Copyright (c) vis.gl contributors

export {ShapefileLoader} from './shapefile-loader';
export {DBFLoader, DBFWorkerLoader} from './dbf-loader';
export {SHPLoader, SHPWorkerLoader} from './shp-loader';
Expand Down
148 changes: 113 additions & 35 deletions modules/shapefile/src/lib/parsers/parse-dbf.ts
Original file line number Diff line number Diff line change
@@ -1,13 +1,53 @@
import {Field, ObjectRowTable} from '@loaders.gl/schema';
// loaders.gl, MIT license
// Copyright (c) vis.gl contributors

import type {Field, ObjectRowTable, ObjectRowTableBatch} from '@loaders.gl/schema';
import {Schema} from '@loaders.gl/schema';
import {BinaryChunkReader} from '../streaming/binary-chunk-reader';
import {
DBFLoaderOptions,
DBFResult,
DBFTableOutput,
DBFHeader,
DBFRowsOutput,
DBFField
} from './types';

type DBFParserOptions = {
shape?: 'object-row-table';
encoding?: string;
};

export type DBFResult = {
data: {[key: string]: unknown[]}[];
schema?: Schema;
error?: string;
dbfHeader?: DBFHeader;
dbfFields?: DBFField[];
progress: {
bytesUsed: number;
rowsTotal: number;
rows: number;
};
};

/** Binary header stored in DBF file */
export type DBFHeader = {
/** Last updated date - year */
year: number;
/** Last updated date - month */
month: number;
/** Last updated date - day */
day: number;
/** Number of records in data file */
nRecords: number;
/** Length of header in bytes */
headerLength: number;
/** Length of each record */
recordLength: number;
/** Not clear if this is usually set */
languageDriver: number;
};

/** Field descriptor */
export type DBFField = {
name: string;
dataType: string;
fieldLength: number;
decimal: number;
};

const LITTLE_ENDIAN = true;
const DBF_HEADER_SIZE = 32;
Expand All @@ -25,7 +65,12 @@
textDecoder: TextDecoder;
state = STATE.START;
result: DBFResult = {
data: []
data: [],
progress: {
bytesUsed: 0,
rowsTotal: 0,
rows: 0
}
};

constructor(options: {encoding: string}) {
Expand Down Expand Up @@ -62,70 +107,112 @@
* @param options
* @returns DBFTable or rows
*/
<<<<<<< Updated upstream

Check failure on line 110 in modules/shapefile/src/lib/parsers/parse-dbf.ts

View workflow job for this annotation

GitHub Actions / test (16)

Merge conflict marker encountered.

Check failure on line 110 in modules/shapefile/src/lib/parsers/parse-dbf.ts

View workflow job for this annotation

GitHub Actions / test (18)

Merge conflict marker encountered.

Check failure on line 110 in modules/shapefile/src/lib/parsers/parse-dbf.ts

View workflow job for this annotation

GitHub Actions / test (20)

Merge conflict marker encountered.
export function parseDBF(arrayBuffer: ArrayBuffer, options: DBFParserOptions = {}): ObjectRowTable {
=======
<<<<<<< Updated upstream
export function parseDBF(
arrayBuffer: ArrayBuffer,
options: DBFLoaderOptions = {}
): DBFRowsOutput | DBFTableOutput | ObjectRowTable {
>>>>>>> Stashed changes
const {encoding = 'latin1'} = options.dbf || {};
=======
export function parseDBF(arrayBuffer: ArrayBuffer, options: DBFParserOptions = {}): ObjectRowTable {
const {encoding = 'latin1'} = options;
>>>>>>> Stashed changes

const dbfParser = new DBFParser({encoding});
dbfParser.write(arrayBuffer);
dbfParser.end();

const {data, schema} = dbfParser.result;
const shape = options?.dbf?.shape;
const shape = options?.shape || 'object-row-table';
switch (shape) {
case 'object-row-table': {
const table: ObjectRowTable = {
shape: 'object-row-table',
schema,
data
};
const table: ObjectRowTable = {shape: 'object-row-table', schema, data};
return table;
}
case 'table':
return {schema, rows: data};
case 'rows':
default:
return data;
throw new Error(shape);
}
const table: ObjectRowTable = {
shape: 'object-row-table',
schema,
data
};
return table;
}

/**
* @param asyncIterator
* @param options
*/
export async function* parseDBFInBatches(
asyncIterator: AsyncIterable<ArrayBuffer> | Iterable<ArrayBuffer>,
<<<<<<< Updated upstream
options: DBFParserOptions = {}
): AsyncIterableIterator<ObjectRowTableBatch> {
const {encoding = 'latin1'} = option;
=======
<<<<<<< Updated upstream
options: DBFLoaderOptions = {}
): AsyncIterable<DBFHeader | DBFRowsOutput | DBFTableOutput> {
const {encoding = 'latin1'} = options.dbf || {};
=======
options: DBFParserOptions = {}
): AsyncIterableIterator<ObjectRowTableBatch> {
const {encoding = 'latin1'} = options;
>>>>>>> Stashed changes
>>>>>>> Stashed changes

const parser = new DBFParser({encoding});
let headerReturned = false;
for await (const arrayBuffer of asyncIterator) {
parser.write(arrayBuffer);
if (!headerReturned && parser.result.dbfHeader) {
headerReturned = true;
yield parser.result.dbfHeader;
yield {
batchType: 'metadata',
shape: 'object-row-table',
data: [],
length: 0,
// Additional data
dbfHeader: parser.result.dbfHeader
};
}

if (parser.result.data.length > 0) {
yield parser.result.data;
const data = parser.result.data;
parser.result.data = [];
yield {
batchType: 'data',
shape: 'object-row-table',
data,
length: data.length
};
}
}
parser.end();
if (parser.result.data.length > 0) {
yield parser.result.data;
const data = parser.result.data;
yield {
batchType: 'data',
shape: 'object-row-table',
data,
length: data.length
};
}
}

/**
* https://www.dbase.com/Knowledgebase/INT/db7_file_fmt.htm
* State machine for DBF parsing
* @param state
* @param result
* @param binaryReader
* @param textDecoder
* @returns
* @see https://www.dbase.com/Knowledgebase/INT/db7_file_fmt.htm
*/
/* eslint-disable complexity, max-depth */
function parseState(
Expand Down Expand Up @@ -161,8 +248,7 @@
case STATE.FIELD_DESCRIPTORS:
// Parse DBF field descriptors (schema)
const fieldDescriptorView = binaryReader.getDataView(
// @ts-ignore
result.dbfHeader.headerLength - DBF_HEADER_SIZE
result.dbfHeader!.headerLength - DBF_HEADER_SIZE
);
if (!fieldDescriptorView) {
return state;
Expand Down Expand Up @@ -191,10 +277,8 @@
// Note: Avoid actually reading the last byte, which may not be present
binaryReader.skip(1);

// @ts-ignore
const row = parseRow(recordView, result.dbfFields, textDecoder);
const row = parseRow(recordView, result.dbfFields || [], textDecoder);
result.data.push(row);
// @ts-ignore
result.progress.rows = result.data.length;
}
state = STATE.END;
Expand All @@ -218,17 +302,12 @@
*/
function parseDBFHeader(headerView: DataView): DBFHeader {
return {
// Last updated date
year: headerView.getUint8(1) + 1900,
month: headerView.getUint8(2),
day: headerView.getUint8(3),
// Number of records in data file
nRecords: headerView.getUint32(4, LITTLE_ENDIAN),
// Length of header in bytes
headerLength: headerView.getUint16(8, LITTLE_ENDIAN),
// Length of each record
recordLength: headerView.getUint16(10, LITTLE_ENDIAN),
// Not sure if this is usually set
languageDriver: headerView.getUint8(29)
};
}
Expand Down Expand Up @@ -266,7 +345,6 @@
for (let i = 0; i < nRecords; i++) {
const recordView = binaryReader.getDataView(recordLength - 1);
binaryReader.skip(1);
// @ts-ignore
rows.push(parseRow(recordView, fields, textDecoder));
}
return rows;
Expand Down
Loading
Loading