diff --git a/lib/reader.js b/lib/reader.js index 43e78d9d..aa301477 100644 --- a/lib/reader.js +++ b/lib/reader.js @@ -482,8 +482,7 @@ function decodeDataPageV2(cursor, header, opts) { function decodeSchema(schemaElements) { let schema = {}; - for (let idx = 0; idx < schemaElements.length; ) { - const schemaElement = schemaElements[idx]; + schemaElements.forEach(schemaElement => { let repetitionType = parquet_util.getThriftEnum( parquet_thrift.FieldRepetitionType, @@ -506,9 +505,20 @@ function decodeSchema(schemaElements) { schema[schemaElement.name] = { optional: optional, repeated: repeated, - fields: decodeSchema( - schemaElements.slice(idx + 1, idx + 1 + schemaElement.num_children)) + fields: Object.create({},{ + /* define parent and num_children as non-enumerable */ + parent: { + value: schema, + enumerable: false + }, + num_children: { + value: schemaElement.num_children, + enumerable: false + } + }) }; + /* move the schema pointer to the children */ + schema = schema[schemaElement.name].fields; } else { let logicalType = parquet_util.getThriftEnum( parquet_thrift.Type, @@ -528,9 +538,11 @@ function decodeSchema(schemaElements) { }; } - idx += (schemaElement.num_children || 0) + 1; - } - + /* if we have processed all children we move schema pointer to parent again */ + while (schema.parent && Object.keys(schema).length === schema.num_children) { + schema = schema.parent; + } + }); return schema; } diff --git a/test/decodeSchema.js b/test/decodeSchema.js new file mode 100644 index 00000000..64994440 --- /dev/null +++ b/test/decodeSchema.js @@ -0,0 +1,234 @@ +'use strict'; +const chai = require('chai'); +const assert = chai.assert; +const parquet = require('../parquet.js'); + +describe('ParquetSchema', function() { + it('should handle complex nesting', function() { + var metadata = { + version: 1, + schema: [ + { type: null, + type_length: null, + repetition_type: null, + name: 'root', + num_children: 1, + converted_type: null, + scale: null, + precision: null, + field_id: null + }, { + type: null, + type_length: null, + repetition_type: 0, + name: 'a', + num_children: 2, + converted_type: null, + scale: null, + precision: null, + field_id: null + }, { + type: null, + type_length: null, + repetition_type: 0, + name: 'b', + num_children: 2, + converted_type: null, + scale: null, + precision: null, + field_id: null + }, { + type: null, + type_length: null, + repetition_type: 0, + name: 'c', + num_children: 1, + converted_type: null, + scale: null, + precision: null, + field_id: null + }, { + type: 6, + type_length: null, + repetition_type: 0, + name: 'd', + num_children: null, + converted_type: 0, + scale: null, + precision: null, + field_id: null + }, { + type: null, + type_length: null, + repetition_type: 0, + name: 'e', + num_children: 2, + converted_type: null, + scale: null, + precision: null, + field_id: null + }, { + type: 6, + type_length: null, + repetition_type: 0, + name: 'f', + num_children: null, + converted_type: 0, + scale: null, + precision: null, + field_id: null + }, { + type: 6, + type_length: null, + repetition_type: 0, + name: 'g', + num_children: null, + converted_type: 0, + scale: null, + precision: null, + field_id: null + }, { + type: 6, + type_length: null, + repetition_type: 0, + name: 'h', + num_children: null, + converted_type: 0, + scale: null, + precision: null, + field_id: null + } + ] + }; + + const expected = { + "a": { + "name": "a", + "path": [ + "a" + ], + "repetitionType": "REQUIRED", + "rLevelMax": 0, + "dLevelMax": 0, + "isNested": true, + "fieldCount": 2, + "fields": { + "b": { + "name": "b", + "path": [ + "a", + "b" + ], + "repetitionType": "REQUIRED", + "rLevelMax": 0, + "dLevelMax": 0, + "isNested": true, + "fieldCount": 2, + "fields": { + "c": { + "name": "c", + "path": [ + "a", + "b", + "c" + ], + "repetitionType": "REQUIRED", + "rLevelMax": 0, + "dLevelMax": 0, + "isNested": true, + "fieldCount": 1, + "fields": { + "d": { + "name": "d", + "primitiveType": "BYTE_ARRAY", + "originalType": "UTF8", + "path": [ + "a", + "b", + "c", + "d" + ], + "repetitionType": "REQUIRED", + "typeLength": undefined, + "encoding": "PLAIN", + "compression": "UNCOMPRESSED", + "rLevelMax": 0, + "dLevelMax": 0 + } + } + }, + "e": { + "name": "e", + "path": [ + "a", + "b", + "e" + ], + "repetitionType": "REQUIRED", + "rLevelMax": 0, + "dLevelMax": 0, + "isNested": true, + "fieldCount": 2, + "fields": { + "f": { + "name": "f", + "primitiveType": "BYTE_ARRAY", + "originalType": "UTF8", + "path": [ + "a", + "b", + "e", + "f" + ], + "repetitionType": "REQUIRED", + "typeLength": undefined, + "encoding": "PLAIN", + "compression": "UNCOMPRESSED", + "rLevelMax": 0, + "dLevelMax": 0 + }, + "g": { + "name": "g", + "primitiveType": "BYTE_ARRAY", + "originalType": "UTF8", + "path": [ + "a", + "b", + "e", + "g" + ], + "repetitionType": "REQUIRED", + "typeLength": undefined, + "encoding": "PLAIN", + "compression": "UNCOMPRESSED", + "rLevelMax": 0, + "dLevelMax": 0 + } + } + } + } + }, + "h": { + "name": "h", + "primitiveType": "BYTE_ARRAY", + "originalType": "UTF8", + "path": [ + "a", + "h" + ], + "repetitionType": "REQUIRED", + "typeLength": undefined, + "encoding": "PLAIN", + "compression": "UNCOMPRESSED", + "rLevelMax": 0, + "dLevelMax": 0 + } + } + } + }; + + const reader = new parquet.ParquetReader(metadata,{}); + assert.deepEqual(reader.schema.fields,expected); + }); + +});