Skip to content

Commit

Permalink
get datatype from parquet file
Browse files Browse the repository at this point in the history
  • Loading branch information
nmcardoso committed Sep 29, 2024
1 parent 153459f commit ec3bb6d
Show file tree
Hide file tree
Showing 8 changed files with 79 additions and 18 deletions.
1 change: 1 addition & 0 deletions app/@types/configContext.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ interface ITableConfig extends IterableInterface {
decIndex: number | null,
state: 'unloaded' | 'loading' | 'success' | 'positionNotFound' | 'error',
isSameFile: boolean,
dataTypes?: string[],
}

interface IGrid extends IterableInterface {
Expand Down
1 change: 0 additions & 1 deletion app/components/appbar/DownloadTableButton.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,6 @@ const DownloadModal = ({ show, onHide }: any) => {
}
return column.getColDef().headerName
}
console.log('colKeys:', getColKeys(), 'class', getClassFilter())
tcState.grid.api.exportDataAsCsv({
suppressQuotes: true,
columnKeys: getColKeys(),
Expand Down
3 changes: 3 additions & 0 deletions app/components/setup/FileInputTab.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,7 @@ export default function FileInputTab() {
selectedColumnsId: [summary.raIndex, summary.decIndex],
raIndex: summary.raIndex,
decIndex: summary.decIndex,
dataTypes: summary.dataTypes,
status: 'success',
file,
isSameFile,
Expand All @@ -286,6 +287,7 @@ export default function FileInputTab() {
})
}
}).catch(err => {
console.log(err)
tcDispatch({
type: ContextActions.USER_FILE_INPUT,
payload: {
Expand Down Expand Up @@ -329,6 +331,7 @@ export default function FileInputTab() {
selectedColumnsId: [summary.raIndex, summary.decIndex],
raIndex: summary.raIndex,
decIndex: summary.decIndex,
dataTypes: summary.dataTypes,
status: 'success',
url,
isSameFile,
Expand Down
4 changes: 0 additions & 4 deletions app/components/table/AIGrid.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,7 @@ export default function AIGrid() {
tcState.cols.classification.enabled
) {
prevClass = tcState.grid.data.map((e) => ({ 'ai:class': e['ai:class'] }))
console.log('prev class', prevClass)
}
console.log('isSameFile', tcState.table.isSameFile)

let data
if (tcState.table.type === 'local') {
Expand All @@ -91,11 +89,9 @@ export default function AIGrid() {
const { colDef, initVal } = TableHelper.getColDefs(tcState)

data = data?.map((e, i, _) => ({ ...e, ...initVal, 'ai:id': String(i + 1) }))
console.log('data', data)

if (prevClass) {
data = data?.map((e, i, _) => ({ ...e, ...prevClass[i] }))
console.log('data with prev class', data)
}

setLoading(false)
Expand Down
3 changes: 2 additions & 1 deletion app/contexts/XTableConfigContext.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import { ContextActions } from '@/interfaces/contextActions'
import TableHelper from '@/lib/TableHelper'


export const SCHEMA_VERSION: number = 14 // 11
export const SCHEMA_VERSION: number = 15

const getInitialState = (): IState => ({
schemaVersion: SCHEMA_VERSION,
Expand All @@ -18,6 +18,7 @@ const getInitialState = (): IState => ({
decIndex: null,
state: 'unloaded',
isSameFile: false,
dataTypes: undefined,
},
grid: {
data: undefined,
Expand Down
16 changes: 10 additions & 6 deletions app/lib/TableHelper.ts
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
import AsyncTextCell from '@/components/table/AsyncTextCell'
import ClassCell from '@/components/table/ClassCell'
import imageCellFactory from '@/components/table/ImageCell'
import { IState } from '@/contexts/XTableConfigContext'
import { ColDef } from '@ag-grid-community/core'
import Papa, { ParseResult } from 'papaparse'
import TableReader from './io'
import { queuedState } from './states'
import { findIndex } from './utils'
import TableReader from './io'


interface ITableSummary {
Expand Down Expand Up @@ -78,12 +76,15 @@ const sdssCatalogColDefFactory = (table: string, col: string): ColDef => {
}
}

const userTableColDefFactory = (colName: string): ColDef => {
const userTableColDefFactory = (colName: string, dtype?: string): ColDef => {
const cellDtype = dtype ? {cellDataType: dtype} : {}
console.log(colName, cellDtype)
return {
field: `tab:${colName}`,
flex: 1,
headerName: colName.toLowerCase(),
filter: true,
...cellDtype,
}
}

Expand Down Expand Up @@ -111,8 +112,10 @@ class TableHelper {
async getTableSummary(file: File | string) {
const reader = new TableReader(file)
const cols = await reader.getColumns()
const dataTypes = await reader.getDataTypes()
console.log(dataTypes)
if (!!cols) {
return this.getHeaderSummary(cols)
return {...this.getHeaderSummary(cols), dataTypes}
} else {
return undefined
}
Expand All @@ -132,7 +135,8 @@ class TableHelper {
if (!!tcState.table.selectedColumnsId) {
for (const colId of tcState.table.selectedColumnsId) {
const colName = tcState.table.columns[colId]
defs.push(userTableColDefFactory(colName))
const dtype = tcState.table.dataTypes?.[colId]
defs.push(userTableColDefFactory(colName, dtype))
}
}

Expand Down
66 changes: 63 additions & 3 deletions app/lib/io.ts
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ const readParquet = (file: string | File) => {

const getParquetColumnsFromFile = async (file: File) => {
const meta = parquetMetadata(await file.arrayBuffer())
console.log(meta)
return meta.schema.filter((e) => e.name !== 'schema').map((e) => e.name)
}

Expand All @@ -147,6 +148,55 @@ const getParquetColumns = (file: string | File) => {




const getParquetDataTypesFromFile = async (file: File) => {
const meta = parquetMetadata(await file.arrayBuffer())
const dataTypeMap = {
BOOLEAN: 'boolean',
INT32: 'number',
INT64: 'number',
INT96: 'number',
FLOAT: 'number',
DOUBLE: 'number',
STRING: 'text',
BYTE_ARRAY: 'object',
FIXED_LEN_BYTE_ARRAY: 'object',
DEFAULT: undefined,
}
return meta.schema
.filter((e) => e.name !== 'schema')
.map((e) => dataTypeMap[(e.logical_type?.type?.toUpperCase() || e.type?.toUpperCase() || 'DEFAULT')])
}

const getParquetDataTypesFromUrl = async (url: string) => {
const meta = await parquetMetadataAsync(await asyncBufferFromUrl(url))
const dataTypeMap = {
BOOLEAN: 'boolean',
INT32: 'number',
INT64: 'number',
INT96: 'number',
FLOAT: 'number',
DOUBLE: 'number',
STRING: 'text',
BYTE_ARRAY: 'object',
FIXED_LEN_BYTE_ARRAY: 'object',
DEFAULT: undefined,
}
return meta.schema
.filter((e) => e.name !== 'schema')
.map((e) => dataTypeMap[(e.logical_type?.type?.toUpperCase() || e.type?.toUpperCase() || 'DEFAULT')])
}


const getParquetDataTypes = (file: string | File) => {
if (typeof file === 'string' || file instanceof String) {
return getParquetDataTypesFromUrl(file as string)
} else {
return getParquetDataTypesFromFile(file as File)
}
}


export default class TableReader {
file: File | string
ext?: string
Expand All @@ -172,14 +222,14 @@ export default class TableReader {
}

async read() {
if (this.getFileExt() === 'csv') {
if (['csv', 'tsv', 'dat', 'txt'].includes(this.getFileExt() || '')) {
return await readCsv(this.file)
} else if (this.getFileExt() === 'parquet') {
} else if (['parquet', 'parq', 'par', 'pq'].includes(this.getFileExt() || '')) {
const data = await readParquet(this.file)
const cols = await this.getColumns()
if (!!cols) {
return data.map(e => Object.fromEntries(
Object.entries(e).map(([key, value]) => [`tab:${cols[key]}`, value])
Object.entries(e).map(([key, value]) => [`tab:${cols[key as unknown as number]}`, value])
))
} else {
return data
Expand All @@ -195,4 +245,14 @@ export default class TableReader {
}
return this.columns
}

async getDataTypes() {
let dataTypes
if (['csv', 'tsv', 'dat', 'txt'].includes(this.getFileExt() || '')) {
dataTypes = undefined
} else if (['parquet', 'parq', 'par', 'pq'].includes(this.getFileExt() || '')) {
dataTypes = await getParquetDataTypes(this.file)
}
return dataTypes
}
}
3 changes: 0 additions & 3 deletions app/services/sdss.ts
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,6 @@ export default class SdssService {
})
return resp.data
} catch (e) {
console.log(typeof (e))
console.log(e)
}
}
Expand All @@ -170,7 +169,6 @@ export default class SdssService {
WHERE vc.viewname='${table}'
`.trim()
}
console.log(sql)
const url = `${SQL_URL}?cmd=${encodeURIComponent(sql)}&format=json`
const resp = await queryClient.fetchQuery({
queryKey: ['sdss-service-columns', table],
Expand Down Expand Up @@ -202,7 +200,6 @@ export class SdssSpectra extends SdssService implements IResourceFetch {

async fetch() {
const specObjId = await this.getObjSpecId(this.ra, this.dec)
console.log(specObjId)
if (!specObjId) return undefined

return await queryClient.fetchQuery({
Expand Down

0 comments on commit ec3bb6d

Please sign in to comment.