Skip to content

Commit

Permalink
Merge pull request #1481 from rwblair/schema/summary
Browse files Browse the repository at this point in the history
Add summary generation to schema-prototype
  • Loading branch information
rwblair authored Jul 8, 2022
2 parents c22c3a0 + ac2aa22 commit f4ec159
Show file tree
Hide file tree
Showing 13 changed files with 335 additions and 19 deletions.
2 changes: 1 addition & 1 deletion bids-validator/src/files/deno.ts
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ export async function _readFileTree(
const tree = new FileTreeDeno(relativePath, name, parent, rootPath)

for await (const dirEntry of Deno.readDir(join(rootPath, relativePath))) {
if (dirEntry.isFile) {
if (dirEntry.isFile || dirEntry.isSymlink) {
const file = new BIDSFileDeno(
rootPath,
join(relativePath, dirEntry.name),
Expand Down
3 changes: 3 additions & 0 deletions bids-validator/src/files/ignore.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ export async function readBidsIgnore(file: BIDSFile) {
}
}

const defaultIgnores = ['.git**', '.datalad']

/**
* Deno implementation of .bidsignore style rules
*/
Expand All @@ -20,6 +22,7 @@ export class FileIgnoreRulesDeno implements FileIgnoreRules {

constructor(config: string[]) {
this.#ignore = ignore({ allowRelativePaths: true })
this.#ignore.add(defaultIgnores)
this.#ignore.add(config)
}

Expand Down
2 changes: 1 addition & 1 deletion bids-validator/src/main.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ async function main() {
const schemaResult = await validate(tree)

if (options.schemaOnly) {
inspect(schemaResult.issues.issues)
inspect(schemaResult)
// TODO - generate a summary without the old validator
} else {
const output = schemaResult.issues.formatOutput()
Expand Down
11 changes: 7 additions & 4 deletions bids-validator/src/schema/context.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,14 @@ export class BIDSContext implements Context {
file: BIDSFile
suffix: string
extension: string
entities: object
entities: Record<string, string>
dataset: ContextDataset
subject: ContextSubject
datatype: string
modality: string
sidecar: object
associations: ContextAssociations
columns: object
json: object
nifti_header: ContextNiftiHeader

constructor(fileTree: FileTree, file: BIDSFile, issues: DatasetIssues) {
Expand All @@ -43,10 +42,14 @@ export class BIDSContext implements Context {
this.sidecar = {}
this.associations = {} as ContextAssociations
this.columns = {}
this.json = {}
this.nifti_header = {} as ContextNiftiHeader
}

get json(): Promise<Record<string, any>> {
return this.file
.text()
.then((text) => JSON.parse(text))
.catch((error) => {})
}
get path(): string {
return this.datasetPath
}
Expand Down
70 changes: 70 additions & 0 deletions bids-validator/src/summary/collectSubjectMetadata.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import { SubjectMetadata } from '../types/validation-result.ts'
const PARTICIPANT_ID = 'participantId'
/**
* Go from tsv format string with participant_id as a required header to array of form
* [
* {
* participantId: 'participant_id_1'
* foo: 'x',
* ...
* },
* {
* participantId: 'participant_id_2'
* foo: 'y',
* ...
* }
* ...
* ]
*
* returns null if participant_id is not a header or file contents do not exist
* @param {string} participantsTsvContent
*/
export const collectSubjectMetadata = (
participantsTsvContent: string,
): SubjectMetadata[] => {
if (!participantsTsvContent) {
return []
}

const contentTable = participantsTsvContent
.split(/\r?\n/)
.filter((row) => row !== '')
.map((row) => row.split('\t'))
const [snakeCaseHeaders, ...subjectData] = contentTable
const headers = snakeCaseHeaders.map((header) =>
header === 'participant_id' ? PARTICIPANT_ID : header,
)
const targetKeys = [PARTICIPANT_ID, 'age', 'sex', 'group']
.map((key) => ({
key,
index: headers.findIndex((targetKey) => targetKey === key),
}))
.filter(({ index }) => index !== -1)
const participantIdKey = targetKeys.find(({ key }) => key === PARTICIPANT_ID)
const ageKey = targetKeys.find(({ key }) => key === 'age')
if (participantIdKey === undefined) return [] as SubjectMetadata[]
else
return subjectData
.map((data) => {
// this first map is for transforming any data coming out of participants.tsv:
// strip subject ids to match metadata.subjects: 'sub-01' -> '01'
data[participantIdKey.index] = data[participantIdKey.index].replace(
/^sub-/,
'',
)
// make age an integer
// @ts-expect-error
if (ageKey) data[ageKey.index] = parseInt(data[ageKey.index])
return data
})
.map((data) =>
//extract all target metadata for each subject
targetKeys.reduce(
(subject, { key, index }) => ({
...subject,
[key]: data[index],
}),
{},
),
) as SubjectMetadata[]
}
15 changes: 15 additions & 0 deletions bids-validator/src/summary/summary.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import { computeModalities, modalityPrettyLookup, Summary } from './summary.ts'
import { assertEquals, assertObjectMatch } from '../deps/asserts.ts'

Deno.test('Summary class and helper functions', async (t) => {
await t.step('Constructor succeeds', () => {
new Summary()
})
await t.step('computeModalities properly sorts modality counts', () => {
const modalitiesIn = { eeg: 5, pet: 6, mri: 6, ieeg: 6 }
const modalitiesOut = ['pet', 'ieeg', 'mri', 'eeg'].map(
(x) => modalityPrettyLookup[x],
)
assertEquals(computeModalities(modalitiesIn), modalitiesOut)
})
})
166 changes: 166 additions & 0 deletions bids-validator/src/summary/summary.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
import { collectSubjectMetadata } from './collectSubjectMetadata.ts'
import { readAll, readerFromStreamReader } from '../deps/stream.ts'
import { SummaryOutput, SubjectMetadata } from '../types/validation-result.ts'
import { BIDSContext } from '../schema/context.ts'

export const modalityPrettyLookup: Record<string, string> = {
mri: 'MRI',
pet: 'PET',
meg: 'MEG',
eeg: 'EEG',
ieeg: 'iEEG',
micro: 'Microscopy',
}

const secondaryLookup: Record<string, string> = {
dwi: 'MRI_Diffusion',
anat: 'MRI_Structural',
func: 'MRI_Functional',
perf: 'MRI_Perfusion',
}

export function computeModalities(
modalities: Record<string, number>,
): string[] {
// Order by matching file count
const nonZero = Object.keys(modalities).filter((a) => modalities[a] !== 0)
if (nonZero.length === 0) {
return []
}
const sortedModalities = nonZero.sort((a, b) => {
if (modalities[b] === modalities[a]) {
// On a tie, hand it to the non-MRI modality
if (b === 'mri') {
return -1
} else {
return 0
}
}
return modalities[b] - modalities[a]
})
return sortedModalities.map((mod) =>
mod in modalityPrettyLookup ? modalityPrettyLookup[mod] : mod,
)
}

export function computeSecondaryModalities(
secondary: Record<string, number>,
): string[] {
const nonZeroSecondary = Object.keys(secondary).filter(
(a) => secondary[a] !== 0,
)
const sortedSecondary = nonZeroSecondary.sort(
(a, b) => secondary[b] - secondary[a],
)
return sortedSecondary
}

export class Summary {
sessions: Set<string>
subjects: Set<string>
subjectMetadata: SubjectMetadata[]
tasks: Set<string>
totalFiles: number
size: number
dataProcessed: boolean
pet: Record<string, any>
modalitiesCount: Record<string, number>
secondaryModalitiesCount: Record<string, number>
datatypes: Set<string>
constructor() {
this.dataProcessed = false
this.totalFiles = -1
this.size = 0
this.sessions = new Set()
this.subjects = new Set()
this.subjectMetadata = []
this.tasks = new Set()
this.pet = {}
this.datatypes = new Set()
this.modalitiesCount = {
mri: 0,
pet: 0,
meg: 0,
eeg: 0,
ieeg: 0,
microscopy: 0,
}
this.secondaryModalitiesCount = {
MRI_Diffusion: 0,
MRI_Structural: 0,
MRI_Functional: 0,
MRI_Perfusion: 0,
PET_Static: 0,
PET_Dynamic: 0,
iEEG_ECoG: 0,
iEEG_SEEG: 0,
}
}
get modalities() {
return computeModalities(this.modalitiesCount)
}
get secondaryModalities() {
return computeSecondaryModalities(this.secondaryModalitiesCount)
}
async update(context: BIDSContext): Promise<void> {
if (context.file.path.startsWith('/derivatives')) {
return
}

this.totalFiles++
this.size += await context.file.size

if ('sub' in context.entities) {
this.subjects.add(context.entities.sub)
}
if ('ses' in context.entities) {
this.sessions.add(context.entities.ses)
}

if (context.datatype.length) {
this.datatypes.add(context.datatype)
}

if (context.extension === '.json') {
const parsedJson = await context.json
if ('TaskName' in parsedJson) {
this.tasks.add(parsedJson.TaskName)
}
}
if (context.modality) {
this.modalitiesCount[context.modality]++
}

if (context.datatype in secondaryLookup) {
const key = secondaryLookup[context.datatype]
this.secondaryModalitiesCount[key]++
} else if (context.datatype === 'pet' && 'rec' in context.entities) {
if (['acstat', 'nacstat'].includes(context.entities.rec)) {
this.secondaryModalitiesCount.PET_Static++
} else if (['acdyn', 'nacdyn'].includes(context.entities.rec)) {
this.secondaryModalitiesCount.PET_Dynamic++
}
}

if (context.file.path.endsWith('participants.tsv')) {
const tsvContents = await context.file.text()
this.subjectMetadata = collectSubjectMetadata(tsvContents)
}
}

formatOutput(): SummaryOutput {
return {
sessions: Array.from(this.sessions),
subjects: Array.from(this.subjects),
subjectMetadata: this.subjectMetadata,
tasks: Array.from(this.tasks),
modalities: this.modalities,
secondaryModalities: this.secondaryModalities,
totalFiles: this.totalFiles,
size: this.size,
dataProcessed: this.dataProcessed,
pet: this.pet,
datatypes: Array.from(this.datatypes),
}
}
}
7 changes: 6 additions & 1 deletion bids-validator/src/tests/local/common.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,18 @@ import { FileTree } from '../../types/filetree.ts'
import { validate } from '../../validators/bids.ts'
import { ValidationResult } from '../../types/validation-result.ts'
import { DatasetIssues } from '../../issues/datasetIssues.ts'
import { Summary } from '../../summary/summary.ts'

export async function validatePath(
t: Deno.TestContext,
path: string,
): Promise<{ tree: FileTree; result: ValidationResult }> {
let tree: FileTree = new FileTree('', '')
let result: ValidationResult = { issues: new DatasetIssues(), summary: {} }
let summary = new Summary()
let result: ValidationResult = {
issues: new DatasetIssues(),
summary: summary.formatOutput(),
}

await t.step('file tree is read', async () => {
tree = await readFileTree(path)
Expand Down
2 changes: 1 addition & 1 deletion bids-validator/src/tests/local/valid_headers.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ Deno.test('valid_headers dataset', async (t) => {
})

await t.step('summary has correct tasks', () => {
assertEquals(result.summary.tasks, ['rhyme judgment'])
assertEquals(Array.from(result.summary.tasks), ['rhyme judgment'])
})

await t.step('summary has correct dataProcessed', () => {
Expand Down
30 changes: 29 additions & 1 deletion bids-validator/src/types/validation-result.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,37 @@
import { DatasetIssues } from '../issues/datasetIssues.ts'

export interface SubjectMetadata {
PARTICIPANT_ID: string
age: number
sex: string
group: string
}
/*
BodyPart: {},
ScannerManufacturer: {},
ScannerManufacturersModelName: {},
TracerName: {},
TracerRadionuclide: {},
*/

export interface SummaryOutput {
sessions: string[]
subjects: string[]
subjectMetadata: SubjectMetadata[]
tasks: string[]
modalities: string[]
secondaryModalities: string[]
totalFiles: number
size: number
dataProcessed: boolean
pet: Record<string, any>
datatypes: string[]
}

/**
* The output of a validation run
*/
export interface ValidationResult {
issues: DatasetIssues
summary: Record<string, any>
summary: SummaryOutput
}
Loading

0 comments on commit f4ec159

Please sign in to comment.