From 3882e4d3146b864cca9e5ac7fac5d718dcf9f964 Mon Sep 17 00:00:00 2001 From: Sriramajeyam Sugumaran <153843+yesoreyeram@users.noreply.github.com> Date: Mon, 27 Mar 2023 07:39:30 +0100 Subject: [PATCH] pivot --- CHANGELOG.md | 4 + package.json | 2 +- src/grammar/grammar.ne | 4 + src/grammar/grammar.ts | 6 ++ src/grammar/tests/grammer.pivot.test.ts | 102 ++++++++++++++++++++++++ src/parser/evaluate.ts | 3 + src/parser/parsers.ts | 1 + src/parser/pivot/pivot.test.ts | 45 +++++++++++ src/parser/pivot/pivot.ts | 55 +++++++++++++ src/parser/summarize/summarize.ts | 2 +- src/types.ts | 10 ++- 11 files changed, 231 insertions(+), 3 deletions(-) create mode 100644 src/grammar/tests/grammer.pivot.test.ts create mode 100644 src/parser/pivot/pivot.test.ts create mode 100644 src/parser/pivot/pivot.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index d90b146..cac236d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ Change history of the project. All the feature updates, bug fixes, breaking changes will be documented here. +## [0.0.19] + +- Feature: new root level command`pivot` added + ## [0.0.18] - Feature: new method `atob` and `btoa` added diff --git a/package.json b/package.json index c87db86..6dedf8e 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "uql", - "version": "0.0.18", + "version": "0.0.19", "description": "UQL - Unstructured Query Language", "author": "Sriramajeyam Sugumaran", "license": "Apache-2.0", diff --git a/src/grammar/grammar.ne b/src/grammar/grammar.ne index 753218a..0256081 100644 --- a/src/grammar/grammar.ne +++ b/src/grammar/grammar.ne @@ -89,6 +89,7 @@ command | command_distinct {% d => ({ type: "distinct", value: d[0] })%} | command_mv_expand {% d => ({ type: "mv-expand", value: d[0] })%} | command_summarize {% d => ({ type: "summarize", value: d[0] })%} + | command_pivot {% d => ({ type: "pivot", value: d[0] })%} | command_range {% d => ({ type: "range", value: d[0] })%} | "jsonata" __ str {% d => ({ type: "jsonata", expression: d[2] }) %} # Command Function @@ -287,6 +288,9 @@ parse_arg -> %dash %dash %identifier __ str {% d => ({ identifier: d[2].value, value: d[4] }) %} | %dash %dash %identifier __ str_type {% d => ({ identifier: d[2].value, value: d[4].value }) %} | %dash %dash %identifier __ %identifier {% d => ({ identifier: d[2].value, value: d[4].value }) %} +# Command : Pivot +command_pivot + -> "pivot" __ summarize_assignment __ ",":* __ ref_types:* {% d => ({ metric : d[2], fields : d[6] !== undefined && d[6].length > 0 ? d[6][0]: [] })%} # Command : Summarize command_summarize -> summarize_item {% pick(0) %} diff --git a/src/grammar/grammar.ts b/src/grammar/grammar.ts index aff1573..d189243 100644 --- a/src/grammar/grammar.ts +++ b/src/grammar/grammar.ts @@ -143,6 +143,7 @@ const grammar: Grammar = { {"name": "command", "symbols": ["command_distinct"], "postprocess": d => ({ type: "distinct", value: d[0] })}, {"name": "command", "symbols": ["command_mv_expand"], "postprocess": d => ({ type: "mv-expand", value: d[0] })}, {"name": "command", "symbols": ["command_summarize"], "postprocess": d => ({ type: "summarize", value: d[0] })}, + {"name": "command", "symbols": ["command_pivot"], "postprocess": d => ({ type: "pivot", value: d[0] })}, {"name": "command", "symbols": ["command_range"], "postprocess": d => ({ type: "range", value: d[0] })}, {"name": "command", "symbols": [{"literal":"jsonata"}, "__", "str"], "postprocess": d => ({ type: "jsonata", expression: d[2] })}, {"name": "function_assignments", "symbols": ["function_assignment"], "postprocess": as_array(0)}, @@ -330,6 +331,11 @@ const grammar: Grammar = { {"name": "parse_arg", "symbols": [(oqlLexer.has("dash") ? {type: "dash"} : dash), (oqlLexer.has("dash") ? {type: "dash"} : dash), (oqlLexer.has("identifier") ? {type: "identifier"} : identifier), "__", "str"], "postprocess": d => ({ identifier: d[2].value, value: d[4] })}, {"name": "parse_arg", "symbols": [(oqlLexer.has("dash") ? {type: "dash"} : dash), (oqlLexer.has("dash") ? {type: "dash"} : dash), (oqlLexer.has("identifier") ? {type: "identifier"} : identifier), "__", "str_type"], "postprocess": d => ({ identifier: d[2].value, value: d[4].value })}, {"name": "parse_arg", "symbols": [(oqlLexer.has("dash") ? {type: "dash"} : dash), (oqlLexer.has("dash") ? {type: "dash"} : dash), (oqlLexer.has("identifier") ? {type: "identifier"} : identifier), "__", (oqlLexer.has("identifier") ? {type: "identifier"} : identifier)], "postprocess": d => ({ identifier: d[2].value, value: d[4].value })}, + {"name": "command_pivot$ebnf$1", "symbols": []}, + {"name": "command_pivot$ebnf$1", "symbols": ["command_pivot$ebnf$1", {"literal":","}], "postprocess": (d) => d[0].concat([d[1]])}, + {"name": "command_pivot$ebnf$2", "symbols": []}, + {"name": "command_pivot$ebnf$2", "symbols": ["command_pivot$ebnf$2", "ref_types"], "postprocess": (d) => d[0].concat([d[1]])}, + {"name": "command_pivot", "symbols": [{"literal":"pivot"}, "__", "summarize_assignment", "__", "command_pivot$ebnf$1", "__", "command_pivot$ebnf$2"], "postprocess": d => ({ metric : d[2], fields : d[6] !== undefined && d[6].length > 0 ? d[6][0]: [] })}, {"name": "command_summarize", "symbols": ["summarize_item"], "postprocess": pick(0)}, {"name": "command_summarize$ebnf$1", "symbols": []}, {"name": "command_summarize$ebnf$1", "symbols": ["command_summarize$ebnf$1", "summarize_args"], "postprocess": (d) => d[0].concat([d[1]])}, diff --git a/src/grammar/tests/grammer.pivot.test.ts b/src/grammar/tests/grammer.pivot.test.ts new file mode 100644 index 0000000..f6210a6 --- /dev/null +++ b/src/grammar/tests/grammer.pivot.test.ts @@ -0,0 +1,102 @@ +import { Parser, Grammar } from "nearley"; +import grammar from "../grammar"; + +const oqlGrammar = Grammar.fromCompiled(grammar); + +const get = (input: string): unknown[] => { + const oqlParser = new Parser(oqlGrammar); + oqlParser.feed(input); + return oqlParser.results; +}; + +const tests: [string, { query: string; expected: unknown }][] = [ + [ + "pivot - with default arguments", + { + query: `pivot count()`, + expected: [ + { + type: "pivot", + value: { + metric: { alias: undefined, args: [], operator: "count" }, + fields: [], + }, + }, + ], + }, + ], + [ + "pivot - with just aggregation", + { + query: `pivot sum("quantity")`, + expected: [ + { + type: "pivot", + value: { + metric: { alias: undefined, args: [{ type: "ref", value: "quantity" }], operator: "sum" }, + fields: [], + }, + }, + ], + }, + ], + [ + "pivot - with aggregation and col", + { + query: `pivot sum("quantity"), "fruit"`, + expected: [ + { + type: "pivot", + value: { + metric: { alias: undefined, args: [{ type: "ref", value: "quantity" }], operator: "sum" }, + fields: [{ type: "ref", value: "fruit" }], + }, + }, + ], + }, + ], + [ + "pivot - with aggregation and col and row", + { + query: `pivot sum("quantity"), "fruit", "size"`, + expected: [ + { + type: "pivot", + value: { + metric: { alias: undefined, args: [{ type: "ref", value: "quantity" }], operator: "sum" }, + fields: [ + { type: "ref", value: "fruit" }, + { type: "ref", value: "size" }, + ], + }, + }, + ], + }, + ], + [ + "pivot - with aggregation and col and row with alias", + { + query: `pivot "qty"=sum("quantity"), "fruit", "size"`, + expected: [ + { + type: "pivot", + value: { + metric: { alias: "qty", args: [{ type: "ref", value: "quantity" }], operator: "sum" }, + fields: [ + { type: "ref", value: "fruit" }, + { type: "ref", value: "size" }, + ], + }, + }, + ], + }, + ], +]; + +describe("grammar pivot", () => { + it.each(tests)("%s", (_, test) => { + const { query, expected } = test as { query: string; expected: unknown }; + const results = get(query as string); + expect(results[0]).toStrictEqual(expected); + }); +}); diff --git a/src/parser/evaluate.ts b/src/parser/evaluate.ts index 8232567..e622322 100644 --- a/src/parser/evaluate.ts +++ b/src/parser/evaluate.ts @@ -62,6 +62,9 @@ export const evaluate = (commands: Command[], options?: { data?: any }): Promise case "summarize": previousValue = parsers.summarize(previousValue, currentCommand); break; + case "pivot": + previousValue = parsers.pivot(previousValue, currentCommand); + break; case "parse-json": previousValue = parsers.parseJson(previousValue, currentCommand); break; diff --git a/src/parser/parsers.ts b/src/parser/parsers.ts index 02c7a8c..d80b006 100644 --- a/src/parser/parsers.ts +++ b/src/parser/parsers.ts @@ -21,6 +21,7 @@ export * from "./project-away/project-away"; export * from "./project-reorder/project-reorder"; export * from "./extend/extend"; export * from "./summarize/summarize"; +export * from "./pivot/pivot"; export * from "./mv-expand/mv-expand"; export * from "./parse-json/parse-json"; diff --git a/src/parser/pivot/pivot.test.ts b/src/parser/pivot/pivot.test.ts new file mode 100644 index 0000000..5cb0ff1 --- /dev/null +++ b/src/parser/pivot/pivot.test.ts @@ -0,0 +1,45 @@ +import { uql } from "../index"; + +describe("pivot", () => { + const data = { + fruits: [ + { fruit: "apple", size: "sm", qty: 1 }, + { fruit: "apple", size: "md", qty: 2 }, + { fruit: "apple", size: "lg", qty: 3 }, + { fruit: "banana", size: "sm", qty: 1 }, + { fruit: "banana", size: "lg", qty: 6 }, + { fruit: "banana", size: "xl", qty: 5 }, + ], + }; + describe("basic", () => { + it("with default arguments", async () => { + expect(await uql(`pivot count()`, { data: data.fruits })).toStrictEqual(6); + }); + it("with custom operator", async () => { + expect(await uql(`pivot sum("qty")`, { data: data.fruits })).toStrictEqual(18); + expect(await uql(`pivot max("qty")`, { data: data.fruits })).toStrictEqual(6); + }); + it("with custom operator with row", async () => { + expect(await uql(`pivot sum("qty"), "fruit"`, { data: data.fruits })).toStrictEqual([ + { fruit: "apple", value: 6 }, + { fruit: "banana", value: 12 }, + ]); + expect(await uql(`pivot count("qty"), "size"`, { data: data.fruits })).toStrictEqual([ + { size: "sm", value: 2 }, + { size: "md", value: 1 }, + { size: "lg", value: 2 }, + { size: "xl", value: 1 }, + ]); + }); + it("with custom operator with row and col", async () => { + expect(await uql(`pivot sum("qty"), "fruit", "size"`, { data: data.fruits })).toStrictEqual([ + { fruit: "apple", sm: 1, md: 2, lg: 3, xl: 0 }, + { fruit: "banana", sm: 1, md: 0, lg: 6, xl: 5 }, + ]); + expect(await uql(`pivot max("qty"), "fruit", "size"`, { data: data.fruits })).toStrictEqual([ + { fruit: "apple", sm: 1, md: 2, lg: 3, xl: null }, + { fruit: "banana", sm: 1, md: null, lg: 6, xl: 5 }, + ]); + }); + }); +}); diff --git a/src/parser/pivot/pivot.ts b/src/parser/pivot/pivot.ts new file mode 100644 index 0000000..a9052d5 --- /dev/null +++ b/src/parser/pivot/pivot.ts @@ -0,0 +1,55 @@ +import { isArray, uniq } from "lodash"; +import { UQLsummarize } from "./../summarize/summarize"; +import { Command, CommandResult } from "../../types"; + +export const pivot = (pv: CommandResult, cv: Extract): CommandResult => { + let input = pv.output; + if (input == null || !isArray(input)) { + return { ...pv, output: null }; + } + let item = cv.value; + let rows: string[] = []; + if (item && item.fields && item.fields.length > 0) { + rows = uniq(input.map((u) => (item && item.fields ? u[item.fields[0].value] : ""))).filter((v) => v !== ""); + } + let cols: string[] = []; + if (item && item.fields && item.fields.length > 1) { + cols = uniq(input.map((u) => (item && item.fields ? u[item.fields[1].value] : ""))).filter((v) => v !== ""); + } + if (item.fields?.length === 2) { + let out: any[] = []; + rows.forEach((r) => { + let rowName = item && item.fields ? item.fields[0].value : ""; + let colName = item && item.fields ? item.fields[1].value : ""; + let outValue: Record = { [rowName]: r }; + cols.forEach((c) => { + let currentItems = (input as any[]).filter((ins) => ins[rowName] === r && ins[colName] === c) || []; + if (currentItems.length === 0) { + outValue[c] = item.metric.operator === "count" || item.metric.operator === "dcount" || item.metric.operator === "sum" ? 0 : null; + } else { + let v: any = UQLsummarize({}, [{ ...item.metric, alias: item.metric.operator }], currentItems); + outValue[c] = v[item.metric.operator]; + } + }); + out.push(outValue); + }); + return { ...pv, output: out }; + } + if (item.fields?.length === 1) { + let out: any[] = []; + rows.forEach((r) => { + let rowName = item && item.fields ? item.fields[0].value : ""; + let outValue = { [rowName]: r }; + let v: any = UQLsummarize( + {}, + [{ ...item.metric, alias: item.metric.operator }], + (input as any[]).filter((ins) => ins[rowName] === r) + ); + outValue["value"] = v[item.metric.operator]; + out.push(outValue); + }); + return { ...pv, output: out }; + } + let v: any = UQLsummarize({}, [{ ...item.metric, alias: item.metric.operator }], input); + return { ...pv, output: v[item.metric.operator] }; +}; diff --git a/src/parser/summarize/summarize.ts b/src/parser/summarize/summarize.ts index b2310b2..0cc556a 100644 --- a/src/parser/summarize/summarize.ts +++ b/src/parser/summarize/summarize.ts @@ -42,7 +42,7 @@ const IsConditionalSummaryMetric = (i: type_summarize_assignment): i is Extract< return false; }; -const UQLsummarize = (o: object, metrics: type_summarize_assignment[], pi: unknown[]): object => { +export const UQLsummarize = (o: object, metrics: type_summarize_assignment[], pi: unknown[]): object => { metrics.forEach((i) => { if (IsConditionalSummaryMetric(i)) { const input: any[] = filterData(pi, i.condition); diff --git a/src/types.ts b/src/types.ts index a7067e9..e34a465 100644 --- a/src/types.ts +++ b/src/types.ts @@ -9,7 +9,7 @@ export type type_function = { alias?: string; operator: FunctionName; args: type export type type_orderby_arg = { field: string; direction: "asc" | "desc" }; export type type_summarize_arg = type_str_type; export type type_summarize_function = { operator: FunctionName; args: type_summarize_arg[] } | { operator: ConditionalFunctionName; condition: type_where_arg[]; ref: type_ref_type }; -export type type_summarize_assignment = { alias?: string } & type_summarize_function; +export type type_summarize_assignment = ({ alias?: string } & type_summarize_function) | { alias?: string; operator: Operator; args: type_summarize_arg[] }; export type type_summarize_item = { metrics: type_summarize_assignment[]; by: type_summarize_arg[] }; export type type_parse_arg = { identifier: string; value: string }; @@ -128,6 +128,7 @@ export type CommandType = | "project-reorder" | "extend" | "summarize" + | "pivot" | "range" | "scope" | "where" @@ -187,6 +188,12 @@ type CommandExtend = { type CommandSummarize = { value: type_summarize_item; } & CommandBase<"summarize">; +type CommandPivot = { + value: { + metric: type_summarize_assignment; + fields?: type_ref_type[]; + }; +} & CommandBase<"pivot">; type CommandRange = { value: { start: number; end: number; step: number } | { start: string; end: number; step: string }; } & CommandBase<"range">; @@ -213,6 +220,7 @@ export type Command = | CommandParseYAML | CommandExtend | CommandSummarize + | CommandPivot | CommandRange; export type CommandResult = { context: Record; output: unknown };