Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add summary generation to schema-prototype #1481

Merged
merged 13 commits into from
Jul 8, 2022
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions bids-validator/src/schema/context.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,15 @@ export class BIDSContext implements Context {
file: BIDSFile
suffix: string
extension: string
entities: object
entities: Record<string, string>
dataset: ContextDataset
subject: ContextSubject
datatype: string
modality: string
sidecar: object
associations: ContextAssociations
columns: object
json: object
json: Record<string, any>
nifti_header: ContextNiftiHeader

constructor(fileTree: FileTree, file: BIDSFile, issues: DatasetIssues) {
Expand Down
71 changes: 71 additions & 0 deletions bids-validator/src/summary/collectSubjectMetadata.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import { SubjectMetadata } from '../types/validation-result.ts'
const PARTICIPANT_ID = 'participantId'
/**
* Go from tsv format string with participant_id as a required header to array of form
* [
* {
* participantId: 'participant_id_1'
* foo: 'x',
* ...
* },
* {
* participantId: 'participant_id_2'
* foo: 'y',
* ...
* }
* ...
* ]
*
* returns null if participant_id is not a header or file contents do not exist
* @param {string} participantsTsvContent
*/
export const collectSubjectMetadata = (
participantsTsvContent: Uint8Array,
): SubjectMetadata[] => {
if (!participantsTsvContent) {
return []
}

const contentTable = new TextDecoder()
.decode(participantsTsvContent)
.split(/\r?\n/)
.filter((row) => row !== '')
.map((row) => row.split('\t'))
const [snakeCaseHeaders, ...subjectData] = contentTable
const headers = snakeCaseHeaders.map((header) =>
header === 'participant_id' ? PARTICIPANT_ID : header,
)
const targetKeys = [PARTICIPANT_ID, 'age', 'sex', 'group']
.map((key) => ({
key,
index: headers.findIndex((targetKey) => targetKey === key),
}))
.filter(({ index }) => index !== -1)
const participantIdKey = targetKeys.find(({ key }) => key === PARTICIPANT_ID)
const ageKey = targetKeys.find(({ key }) => key === 'age')
if (participantIdKey === undefined) return [] as SubjectMetadata[]
else
return subjectData
.map((data) => {
// this first map is for transforming any data coming out of participants.tsv:
// strip subject ids to match metadata.subjects: 'sub-01' -> '01'
data[participantIdKey.index] = data[participantIdKey.index].replace(
/^sub-/,
'',
)
// make age an integer
// @ts-expect-error
if (ageKey) data[ageKey.index] = parseInt(data[ageKey.index])
return data
})
.map((data) =>
//extract all target metadata for each subject
targetKeys.reduce(
(subject, { key, index }) => ({
...subject,
[key]: data[index],
}),
{},
),
) as SubjectMetadata[]
}
132 changes: 132 additions & 0 deletions bids-validator/src/summary/summary.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
import { collectSubjectMetadata } from './collectSubjectMetadata.ts'
import { readAll, readerFromStreamReader } from '../deps/stream.ts'
import { Summary } from '../types/validation-result.ts'
import { BIDSContext } from '../schema/context.ts'

const modalitiesCount: Record<string, number> = {
mri: 0,
pet: 0,
meg: 0,
eeg: 0,
ieeg: 0,
microscopy: 0,
}

const secondaryModalitiesCount: Record<string, number> = {
MRI_Diffusion: 0,
MRI_Structural: 0,
MRI_Functional: 0,
MRI_Perfusion: 0,
PET_Static: 0,
PET_Dynamic: 0,
iEEG_ECoG: 0,
iEEG_SEEG: 0,
}

const modalityPrettyLookup: Record<string, string> = {
mri: 'MRI',
pet: 'PET',
meg: 'MEG',
eeg: 'EEG',
ieeg: 'iEEG',
micro: 'Microscopy',
}

const secondaryLookup: Record<string, string> = {
dwi: 'MRI_Diffusion',
anat: 'MRI_Structural',
bold: 'MRI_Functional',
perf: 'MRI_Perfusion',
}

function computeModalities(modalities: Record<string, number>): string[] {
// Order by matching file count
const nonZero = Object.keys(modalities).filter((a) => modalities[a] !== 0)
if (nonZero.length === 0) {
return []
}
const sortedModalities = nonZero.sort((a, b) => {
if (modalities[b] === modalities[a]) {
// On a tie, hand it to the non-MRI modality
if (b === 'MRI') {
return -1
} else {
return 0
}
}
return modalities[b] - modalities[a]
})
return sortedModalities.map((mod) =>
mod in modalityPrettyLookup ? modalityPrettyLookup[mod] : mod,
)
}

function computeSecondaryModalities(
secondary: Record<string, number>,
): string[] {
const nonZeroSecondary = Object.keys(secondary).filter(
(a) => secondary[a] !== 0,
)
const sortedSecondary = nonZeroSecondary.sort(
(a, b) => secondary[b] - secondary[a],
)
return sortedSecondary
}

export const summary: Summary = {
dataProcessed: false,
totalFiles: -1,
size: 0,
sessions: new Set(),
subjects: new Set(),
subjectMetadata: [],
tasks: new Set(),
pet: {},
get modalities() {
return computeModalities(modalitiesCount)
},
get secondaryModalities() {
return computeSecondaryModalities(secondaryModalitiesCount)
},
}

export async function updateSummary(context: BIDSContext): Promise<void> {
if (context.file.path.startsWith('/derivatives')) {
return
}

summary.totalFiles++
summary.size += await context.file.size

if ('sub' in context.entities) {
summary.subjects.add(context.entities.sub)
}
if ('ses' in context.entities) {
summary.sessions.add(context.entities.ses)
}
if ('TaskName' in context.json) {
summary.tasks.add(context.json.TaskName)
}
if (context.modality) {
modalitiesCount[context.modality]++
}

if (context.datatype in secondaryLookup) {
const key = secondaryLookup[context.datatype]
secondaryModalitiesCount[key]++
} else if (context.datatype === 'pet' && 'rec' in context.entities) {
if (['acstat', 'nacstat'].includes(context.entities.rec)) {
secondaryModalitiesCount.PET_Static++
} else if (['acdyn', 'nacdyn'].includes(context.entities.rec)) {
secondaryModalitiesCount.PET_Dynamic++
}
}

if (context.file.path.includes('participants.tsv')) {
const stream = await context.file.stream
const streamReader = stream.getReader()
const denoReader = readerFromStreamReader(streamReader)
const fileBuffer = await readAll(denoReader)
rwblair marked this conversation as resolved.
Show resolved Hide resolved
summary.subjectMetadata = collectSubjectMetadata(fileBuffer)
}
}
6 changes: 5 additions & 1 deletion bids-validator/src/tests/local/common.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,17 @@ import { FileTree } from '../../types/filetree.ts'
import { validate } from '../../validators/bids.ts'
import { ValidationResult } from '../../types/validation-result.ts'
import { DatasetIssues } from '../../issues/datasetIssues.ts'
import { summary } from '../../summary/summary.ts'

export async function validatePath(
t: Deno.TestContext,
path: string,
): Promise<{ tree: FileTree; result: ValidationResult }> {
let tree: FileTree = new FileTree('', '')
let result: ValidationResult = { issues: new DatasetIssues(), summary: {} }
let result: ValidationResult = {
issues: new DatasetIssues(),
summary: summary,
}

await t.step('file tree is read', async () => {
tree = await readFileTree(path)
Expand Down
2 changes: 1 addition & 1 deletion bids-validator/src/tests/local/valid_headers.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ Deno.test('valid_headers dataset', async (t) => {
})

await t.step('summary has correct dataProcessed', () => {
assertEquals(result.summary.dataProcessed, ['rhyme judgment'])
assertEquals(result.summary.dataProcessed, false)
})

await t.step('summary has correct dataProcessed', () => {
Expand Down
29 changes: 28 additions & 1 deletion bids-validator/src/types/validation-result.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,36 @@
import { DatasetIssues } from '../issues/datasetIssues.ts'

export interface SubjectMetadata {
PARTICIPANT_ID: string
age: number
sex: string
group: string
}
/*
BodyPart: {},
ScannerManufacturer: {},
ScannerManufacturersModelName: {},
TracerName: {},
TracerRadionuclide: {},
*/

export interface Summary {
sessions: Set<string>
subjects: Set<string>
subjectMetadata: SubjectMetadata[]
tasks: Set<string>
modalities: string[]
secondaryModalities: string[]
totalFiles: number
size: number
dataProcessed: boolean
pet: Record<string, any>
}

/**
* The output of a validation run
*/
export interface ValidationResult {
issues: DatasetIssues
summary: Record<string, any>
summary: Summary
}
3 changes: 2 additions & 1 deletion bids-validator/src/validators/bids.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,14 @@ import {
} from './filenames.ts'
import { DatasetIssues } from '../issues/datasetIssues.ts'
import { ValidationResult } from '../types/validation-result.ts'
import { summary, updateSummary } from '../summary/summary.ts'

/**
* Full BIDS schema validation entrypoint
*/
export async function validate(fileTree: FileTree): Promise<ValidationResult> {
const issues = new DatasetIssues()
// TODO - summary should be implemented in pure schema mode
const summary = {}
const schema = await loadSchema()
for await (const context of walkFileTree(fileTree, issues)) {
// TODO - Skip ignored files for now (some tests may reference ignored files)
Expand All @@ -33,6 +33,7 @@ export async function validate(fileTree: FileTree): Promise<ValidationResult> {
checkLabelFormat(schema, context)
}
applyRules(schema, context)
updateSummary(context)
rwblair marked this conversation as resolved.
Show resolved Hide resolved
}
return {
issues,
Expand Down