-
Notifications
You must be signed in to change notification settings - Fork 111
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #1481 from rwblair/schema/summary
Add summary generation to schema-prototype
- Loading branch information
Showing
13 changed files
with
335 additions
and
19 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
import { SubjectMetadata } from '../types/validation-result.ts' | ||
const PARTICIPANT_ID = 'participantId' | ||
/** | ||
* Go from tsv format string with participant_id as a required header to array of form | ||
* [ | ||
* { | ||
* participantId: 'participant_id_1' | ||
* foo: 'x', | ||
* ... | ||
* }, | ||
* { | ||
* participantId: 'participant_id_2' | ||
* foo: 'y', | ||
* ... | ||
* } | ||
* ... | ||
* ] | ||
* | ||
* returns null if participant_id is not a header or file contents do not exist | ||
* @param {string} participantsTsvContent | ||
*/ | ||
export const collectSubjectMetadata = ( | ||
participantsTsvContent: string, | ||
): SubjectMetadata[] => { | ||
if (!participantsTsvContent) { | ||
return [] | ||
} | ||
|
||
const contentTable = participantsTsvContent | ||
.split(/\r?\n/) | ||
.filter((row) => row !== '') | ||
.map((row) => row.split('\t')) | ||
const [snakeCaseHeaders, ...subjectData] = contentTable | ||
const headers = snakeCaseHeaders.map((header) => | ||
header === 'participant_id' ? PARTICIPANT_ID : header, | ||
) | ||
const targetKeys = [PARTICIPANT_ID, 'age', 'sex', 'group'] | ||
.map((key) => ({ | ||
key, | ||
index: headers.findIndex((targetKey) => targetKey === key), | ||
})) | ||
.filter(({ index }) => index !== -1) | ||
const participantIdKey = targetKeys.find(({ key }) => key === PARTICIPANT_ID) | ||
const ageKey = targetKeys.find(({ key }) => key === 'age') | ||
if (participantIdKey === undefined) return [] as SubjectMetadata[] | ||
else | ||
return subjectData | ||
.map((data) => { | ||
// this first map is for transforming any data coming out of participants.tsv: | ||
// strip subject ids to match metadata.subjects: 'sub-01' -> '01' | ||
data[participantIdKey.index] = data[participantIdKey.index].replace( | ||
/^sub-/, | ||
'', | ||
) | ||
// make age an integer | ||
// @ts-expect-error | ||
if (ageKey) data[ageKey.index] = parseInt(data[ageKey.index]) | ||
return data | ||
}) | ||
.map((data) => | ||
//extract all target metadata for each subject | ||
targetKeys.reduce( | ||
(subject, { key, index }) => ({ | ||
...subject, | ||
[key]: data[index], | ||
}), | ||
{}, | ||
), | ||
) as SubjectMetadata[] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
import { computeModalities, modalityPrettyLookup, Summary } from './summary.ts' | ||
import { assertEquals, assertObjectMatch } from '../deps/asserts.ts' | ||
|
||
Deno.test('Summary class and helper functions', async (t) => { | ||
await t.step('Constructor succeeds', () => { | ||
new Summary() | ||
}) | ||
await t.step('computeModalities properly sorts modality counts', () => { | ||
const modalitiesIn = { eeg: 5, pet: 6, mri: 6, ieeg: 6 } | ||
const modalitiesOut = ['pet', 'ieeg', 'mri', 'eeg'].map( | ||
(x) => modalityPrettyLookup[x], | ||
) | ||
assertEquals(computeModalities(modalitiesIn), modalitiesOut) | ||
}) | ||
}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,166 @@ | ||
import { collectSubjectMetadata } from './collectSubjectMetadata.ts' | ||
import { readAll, readerFromStreamReader } from '../deps/stream.ts' | ||
import { SummaryOutput, SubjectMetadata } from '../types/validation-result.ts' | ||
import { BIDSContext } from '../schema/context.ts' | ||
|
||
export const modalityPrettyLookup: Record<string, string> = { | ||
mri: 'MRI', | ||
pet: 'PET', | ||
meg: 'MEG', | ||
eeg: 'EEG', | ||
ieeg: 'iEEG', | ||
micro: 'Microscopy', | ||
} | ||
|
||
const secondaryLookup: Record<string, string> = { | ||
dwi: 'MRI_Diffusion', | ||
anat: 'MRI_Structural', | ||
func: 'MRI_Functional', | ||
perf: 'MRI_Perfusion', | ||
} | ||
|
||
export function computeModalities( | ||
modalities: Record<string, number>, | ||
): string[] { | ||
// Order by matching file count | ||
const nonZero = Object.keys(modalities).filter((a) => modalities[a] !== 0) | ||
if (nonZero.length === 0) { | ||
return [] | ||
} | ||
const sortedModalities = nonZero.sort((a, b) => { | ||
if (modalities[b] === modalities[a]) { | ||
// On a tie, hand it to the non-MRI modality | ||
if (b === 'mri') { | ||
return -1 | ||
} else { | ||
return 0 | ||
} | ||
} | ||
return modalities[b] - modalities[a] | ||
}) | ||
return sortedModalities.map((mod) => | ||
mod in modalityPrettyLookup ? modalityPrettyLookup[mod] : mod, | ||
) | ||
} | ||
|
||
export function computeSecondaryModalities( | ||
secondary: Record<string, number>, | ||
): string[] { | ||
const nonZeroSecondary = Object.keys(secondary).filter( | ||
(a) => secondary[a] !== 0, | ||
) | ||
const sortedSecondary = nonZeroSecondary.sort( | ||
(a, b) => secondary[b] - secondary[a], | ||
) | ||
return sortedSecondary | ||
} | ||
|
||
export class Summary { | ||
sessions: Set<string> | ||
subjects: Set<string> | ||
subjectMetadata: SubjectMetadata[] | ||
tasks: Set<string> | ||
totalFiles: number | ||
size: number | ||
dataProcessed: boolean | ||
pet: Record<string, any> | ||
modalitiesCount: Record<string, number> | ||
secondaryModalitiesCount: Record<string, number> | ||
datatypes: Set<string> | ||
constructor() { | ||
this.dataProcessed = false | ||
this.totalFiles = -1 | ||
this.size = 0 | ||
this.sessions = new Set() | ||
this.subjects = new Set() | ||
this.subjectMetadata = [] | ||
this.tasks = new Set() | ||
this.pet = {} | ||
this.datatypes = new Set() | ||
this.modalitiesCount = { | ||
mri: 0, | ||
pet: 0, | ||
meg: 0, | ||
eeg: 0, | ||
ieeg: 0, | ||
microscopy: 0, | ||
} | ||
this.secondaryModalitiesCount = { | ||
MRI_Diffusion: 0, | ||
MRI_Structural: 0, | ||
MRI_Functional: 0, | ||
MRI_Perfusion: 0, | ||
PET_Static: 0, | ||
PET_Dynamic: 0, | ||
iEEG_ECoG: 0, | ||
iEEG_SEEG: 0, | ||
} | ||
} | ||
get modalities() { | ||
return computeModalities(this.modalitiesCount) | ||
} | ||
get secondaryModalities() { | ||
return computeSecondaryModalities(this.secondaryModalitiesCount) | ||
} | ||
async update(context: BIDSContext): Promise<void> { | ||
if (context.file.path.startsWith('/derivatives')) { | ||
return | ||
} | ||
|
||
this.totalFiles++ | ||
this.size += await context.file.size | ||
|
||
if ('sub' in context.entities) { | ||
this.subjects.add(context.entities.sub) | ||
} | ||
if ('ses' in context.entities) { | ||
this.sessions.add(context.entities.ses) | ||
} | ||
|
||
if (context.datatype.length) { | ||
this.datatypes.add(context.datatype) | ||
} | ||
|
||
if (context.extension === '.json') { | ||
const parsedJson = await context.json | ||
if ('TaskName' in parsedJson) { | ||
this.tasks.add(parsedJson.TaskName) | ||
} | ||
} | ||
if (context.modality) { | ||
this.modalitiesCount[context.modality]++ | ||
} | ||
|
||
if (context.datatype in secondaryLookup) { | ||
const key = secondaryLookup[context.datatype] | ||
this.secondaryModalitiesCount[key]++ | ||
} else if (context.datatype === 'pet' && 'rec' in context.entities) { | ||
if (['acstat', 'nacstat'].includes(context.entities.rec)) { | ||
this.secondaryModalitiesCount.PET_Static++ | ||
} else if (['acdyn', 'nacdyn'].includes(context.entities.rec)) { | ||
this.secondaryModalitiesCount.PET_Dynamic++ | ||
} | ||
} | ||
|
||
if (context.file.path.endsWith('participants.tsv')) { | ||
const tsvContents = await context.file.text() | ||
this.subjectMetadata = collectSubjectMetadata(tsvContents) | ||
} | ||
} | ||
|
||
formatOutput(): SummaryOutput { | ||
return { | ||
sessions: Array.from(this.sessions), | ||
subjects: Array.from(this.subjects), | ||
subjectMetadata: this.subjectMetadata, | ||
tasks: Array.from(this.tasks), | ||
modalities: this.modalities, | ||
secondaryModalities: this.secondaryModalities, | ||
totalFiles: this.totalFiles, | ||
size: this.size, | ||
dataProcessed: this.dataProcessed, | ||
pet: this.pet, | ||
datatypes: Array.from(this.datatypes), | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,9 +1,37 @@ | ||
import { DatasetIssues } from '../issues/datasetIssues.ts' | ||
|
||
export interface SubjectMetadata { | ||
PARTICIPANT_ID: string | ||
age: number | ||
sex: string | ||
group: string | ||
} | ||
/* | ||
BodyPart: {}, | ||
ScannerManufacturer: {}, | ||
ScannerManufacturersModelName: {}, | ||
TracerName: {}, | ||
TracerRadionuclide: {}, | ||
*/ | ||
|
||
export interface SummaryOutput { | ||
sessions: string[] | ||
subjects: string[] | ||
subjectMetadata: SubjectMetadata[] | ||
tasks: string[] | ||
modalities: string[] | ||
secondaryModalities: string[] | ||
totalFiles: number | ||
size: number | ||
dataProcessed: boolean | ||
pet: Record<string, any> | ||
datatypes: string[] | ||
} | ||
|
||
/** | ||
* The output of a validation run | ||
*/ | ||
export interface ValidationResult { | ||
issues: DatasetIssues | ||
summary: Record<string, any> | ||
summary: SummaryOutput | ||
} |
Oops, something went wrong.