diff --git a/package.json b/package.json index e77dec423..da399c136 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "scroll-cli", - "version": "163.1.0", + "version": "163.2.0", "description": "A language for scientists of all ages. A curated collection of tools for refining and sharing thoughts.", "main": "scroll.js", "engines": { diff --git a/parsers/root.parsers b/parsers/root.parsers index f2c4e62ba..af5b1c1bb 100644 --- a/parsers/root.parsers +++ b/parsers/root.parsers @@ -340,7 +340,7 @@ scrollParser } get scrollVersion() { // currently manually updated - return "163.1.0" + return "163.2.0" } // Use the first paragraph for the description // todo: add a particle method version of get that gets you the first particle. (actulaly make get return array?) diff --git a/parsers/tables.parsers b/parsers/tables.parsers index 93a5765d2..f2d27c289 100644 --- a/parsers/tables.parsers +++ b/parsers/tables.parsers @@ -676,3 +676,57 @@ tableSearchParser buildInstance() { return "" } + +scrollSummarizeParser + extends abstractTableTransformParser + description Generate summary statistics for each column. + cue summarize + example + table data.csv + summarize + printTable + javascript + get coreTable() { + const {lodash} = this.root + const sourceData = this.parent.coreTable + if (!sourceData.length) return [] + return this.parent.columnNames.map(colName => { + const values = sourceData.map(row => row[colName]).filter(val => val !== undefined && val !== null) + const numericValues = values.filter(val => typeof val === "number" && !isNaN(val)) + const sorted = [...numericValues].sort((a, b) => a - b) + // Calculate mode + const frequency = {} + values.forEach(val => { + frequency[val] = (frequency[val] || 0) + 1 + }) + const mode = Object.entries(frequency) + .sort((a, b) => b[1] - a[1]) + .map(entry => entry[0])[0] + // Calculate median for numeric values + const median = sorted.length ? + sorted.length % 2 === 0 + ? (sorted[sorted.length/2 - 1] + sorted[sorted.length/2]) / 2 + : sorted[Math.floor(sorted.length/2)] + : null + const sum = numericValues.length ? numericValues.reduce((a, b) => a + b, 0) : null + const theType = typeof values[0] + const count = values.length + const mean = theType === "number" ? sum/count : "" + return { + name: colName, + type: theType, + incompleteCount: sourceData.length - values.length, + uniqueCount: new Set(values).size, + count, + sum, + median, + mean, + min: sorted.length ? sorted[0] : null, + max: sorted.length ? sorted[sorted.length - 1] : null, + mode + } + }) + } + get columnNames() { + return ["name", "type", "incompleteCount", "uniqueCount", "count", "sum", "median", "mean", "min", "max", "mode"] + } \ No newline at end of file diff --git a/releaseNotes.scroll b/releaseNotes.scroll index 48a95b5e6..7c500040d 100644 --- a/releaseNotes.scroll +++ b/releaseNotes.scroll @@ -22,6 +22,9 @@ ciBadges.scroll br thinColumns +📦 163.2.0 12/15/2024 +🎉 added `summarize` parser + 📦 163.1.0 12/15/2024 🎉 column names in table particles now try to match users intent (case insensitive and close match). 🎉 new `assertIgnoreBelowErrorsParser` for automated testing purposes diff --git a/tests/tables.scroll b/tests/tables.scroll index f3cccc8f5..4bdda9adc 100644 --- a/tests/tables.scroll +++ b/tests/tables.scroll @@ -3,7 +3,7 @@ buildTxt theme gazette -thinColumns 1 +container 800px # Simple sparkline with inline data sparkline 5 7 27 87 300 17 10 5 @@ -132,3 +132,10 @@ iris printTable select species printTable + +--- + +# Summarize +iris + summarize + printTable \ No newline at end of file