-
Notifications
You must be signed in to change notification settings - Fork 41
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: implement bivariate aggregate functions (#1593)
- Loading branch information
Showing
9 changed files
with
623 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
47 changes: 47 additions & 0 deletions
47
v3/src/models/formula/functions/bivariate-stats-functions.test.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
import { evaluate } from "../test-utils/formula-test-utils" | ||
import { UNDEF_RESULT } from "./function-utils" | ||
|
||
// Note that aggregate functions require formula-test-utils since they use the custom MathJS scope API to support | ||
// caching. Therefore, they cannot be simply tested using basic MathJS evaluation, similar to arithmetic functions. | ||
|
||
// Most of the tests use attributes from the Mammals dataset, comparing v3 results with v2. | ||
|
||
describe("correlation", () => { | ||
it("returns correct value", () => { | ||
expect(evaluate("correlation(LifeSpan, Order)")).toBe(UNDEF_RESULT) | ||
expect(evaluate("correlation(LifeSpan, Speed)")).toBeCloseTo(-0.059392, 6) | ||
expect(evaluate("correlation(Height, Mass)")).toBeCloseTo(0.684623, 6) | ||
}) | ||
}) | ||
|
||
describe("linRegrIntercept", () => { | ||
it("returns correct value", () => { | ||
expect(evaluate("linRegrIntercept(LifeSpan, Order)")).toBe(UNDEF_RESULT) | ||
expect(evaluate("linRegrIntercept(LifeSpan, Speed)")).toBeCloseTo(50.722887, 6) | ||
expect(evaluate("linRegrIntercept(Height, Mass)")).toBeCloseTo(-516.767727, 6) | ||
}) | ||
}) | ||
|
||
describe("linRegrSESlope", () => { | ||
it("returns correct value", () => { | ||
expect(evaluate("linRegrSESlope(LifeSpan, Order)")).toBe(UNDEF_RESULT) | ||
expect(evaluate("linRegrSESlope(LifeSpan, Speed)")).toBeCloseTo(0.252991, 6) | ||
expect(evaluate("linRegrSESlope(Height, Mass)")).toBeCloseTo(155.171375, 6) | ||
}) | ||
}) | ||
|
||
describe("linRegrSlope", () => { | ||
it("returns correct value", () => { | ||
expect(evaluate("linRegrSlope(LifeSpan, Order)")).toBe(UNDEF_RESULT) | ||
expect(evaluate("linRegrSlope(LifeSpan, Speed)")).toBeCloseTo(-0.070601, 6) | ||
expect(evaluate("linRegrSlope(Height, Mass)")).toBeCloseTo(728.730807, 6) | ||
}) | ||
}) | ||
|
||
describe("rSquared", () => { | ||
it("returns correct value", () => { | ||
expect(evaluate("rSquared(LifeSpan, Order)")).toBe(UNDEF_RESULT) | ||
expect(evaluate("rSquared(LifeSpan, Speed)")).toBeCloseTo(0.003527, 6) | ||
expect(evaluate("rSquared(Height, Mass)")).toBeCloseTo(0.468709, 6) | ||
}) | ||
}) |
62 changes: 62 additions & 0 deletions
62
v3/src/models/formula/functions/bivariate-stats-functions.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
import { | ||
correlation, linRegrIntercept, linRegrStdErrSlopeAndIntercept, linRegrSlope, rSquared | ||
} from "../../../utilities/stats-utils" | ||
import { IFormulaMathjsFunction } from "../formula-types" | ||
import { aggregateBivariateNumericFnWithFilterFactory, cachedAggregateFnFactory } from "./aggregate-functions" | ||
|
||
export const bivariateStatsFunctions: Record<string, IFormulaMathjsFunction> = { | ||
|
||
correlation: { | ||
numOfRequiredArguments: 2, | ||
isAggregate: true, | ||
cachedEvaluateFactory: cachedAggregateFnFactory, | ||
evaluateRaw: aggregateBivariateNumericFnWithFilterFactory(xyValues => { | ||
return correlation(xyValues) | ||
}) | ||
}, | ||
|
||
linRegrIntercept: { | ||
numOfRequiredArguments: 2, | ||
isAggregate: true, | ||
cachedEvaluateFactory: cachedAggregateFnFactory, | ||
evaluateRaw: aggregateBivariateNumericFnWithFilterFactory(xyValues => { | ||
return linRegrIntercept(xyValues) | ||
}) | ||
}, | ||
|
||
linRegrSEIntercept: { | ||
numOfRequiredArguments: 2, | ||
isAggregate: true, | ||
cachedEvaluateFactory: cachedAggregateFnFactory, | ||
evaluateRaw: aggregateBivariateNumericFnWithFilterFactory(xyValues => { | ||
return linRegrStdErrSlopeAndIntercept(xyValues).stdErrIntercept | ||
}) | ||
}, | ||
|
||
linRegrSESlope: { | ||
numOfRequiredArguments: 2, | ||
isAggregate: true, | ||
cachedEvaluateFactory: cachedAggregateFnFactory, | ||
evaluateRaw: aggregateBivariateNumericFnWithFilterFactory(xyValues => { | ||
return linRegrStdErrSlopeAndIntercept(xyValues).stdErrSlope | ||
}) | ||
}, | ||
|
||
linRegrSlope: { | ||
numOfRequiredArguments: 2, | ||
isAggregate: true, | ||
cachedEvaluateFactory: cachedAggregateFnFactory, | ||
evaluateRaw: aggregateBivariateNumericFnWithFilterFactory(xyValues => { | ||
return linRegrSlope(xyValues) | ||
}) | ||
}, | ||
|
||
rSquared: { | ||
numOfRequiredArguments: 2, | ||
isAggregate: true, | ||
cachedEvaluateFactory: cachedAggregateFnFactory, | ||
evaluateRaw: aggregateBivariateNumericFnWithFilterFactory(xyValues => { | ||
return rSquared(xyValues) | ||
}) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
export const data = [ | ||
{ x: 60, y: 130 }, | ||
{ x: 61, y: 131 }, | ||
{ x: 62, y: 132 }, | ||
{ x: 63, y: 133 }, | ||
{ x: 64, y: 134 }, | ||
{ x: 65, y: 135 }, | ||
{ x: 66, y: 136 }, | ||
{ x: 67, y: 137 }, | ||
{ x: 68, y: 138 }, | ||
{ x: 69, y: 139 }, | ||
{ x: 70, y: 140 }, | ||
] | ||
|
||
export const certifiedResults = { | ||
slope: 2.07438016528926, | ||
sdSlope: 0.165289256198347E-01, | ||
sdResiduals: 3.56753034006338, | ||
rSquared: 0.999365492298663 | ||
} | ||
|
||
// http://www.itl.nist.gov/div898/strd/lls/data/LINKS/DATA/NoInt1.dat | ||
|
||
/* | ||
NIST/ITL StRD | ||
Dataset Name: NoInt1 (NoInt1.dat) | ||
File Format: ASCII | ||
Certified Values (lines 31 to 44) | ||
Data (lines 61 to 71) | ||
Procedure: Linear Least Squares Regression | ||
Reference: Eberhardt, K., NIST. | ||
Data: 1 Response Variable (y) | ||
1 Predictor Variable (x) | ||
11 Observations | ||
Average Level of Difficulty | ||
Generated Data | ||
Model: Linear Class | ||
1 Parameter (B1) | ||
y = B1*x + e | ||
Certified Regression Statistics | ||
Standard Deviation | ||
Parameter Estimate of Estimate | ||
B1 2.07438016528926 0.165289256198347E-01 | ||
Residual | ||
Standard Deviation 3.56753034006338 | ||
R-Squared 0.999365492298663 | ||
Certified Analysis of Variance Table | ||
Source of Degrees of Sums of Mean | ||
Variation Freedom Squares Squares F Statistic | ||
Regression 1 200457.727272727 200457.727272727 15750.2500000000 | ||
Residual 10 127.272727272727 12.7272727272727 | ||
Data: y x | ||
130 60 | ||
131 61 | ||
132 62 | ||
133 63 | ||
134 64 | ||
135 65 | ||
136 66 | ||
137 67 | ||
138 68 | ||
139 69 | ||
140 70 | ||
*/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,135 @@ | ||
export const data = [ | ||
{ y: 0.1, x: 0.2 }, | ||
{ y: 338.8, x: 337.4 }, | ||
{ y: 118.1, x: 118.2 }, | ||
{ y: 888.0, x: 884.6 }, | ||
{ y: 9.2, x: 10.1 }, | ||
{ y: 228.1, x: 226.5 }, | ||
{ y: 668.5, x: 666.3 }, | ||
{ y: 998.5, x: 996.3 }, | ||
{ y: 449.1, x: 448.6 }, | ||
{ y: 778.9, x: 777.0 }, | ||
{ y: 559.2, x: 558.2 }, | ||
{ y: 0.3, x: 0.4 }, | ||
{ y: 0.1, x: 0.6 }, | ||
{ y: 778.1, x: 775.5 }, | ||
{ y: 668.8, x: 666.9 }, | ||
{ y: 339.3, x: 338.0 }, | ||
{ y: 448.9, x: 447.5 }, | ||
{ y: 10.8, x: 11.6 }, | ||
{ y: 557.7, x: 556.0 }, | ||
{ y: 228.3, x: 228.1 }, | ||
{ y: 998.0, x: 995.8 }, | ||
{ y: 888.8, x: 887.6 }, | ||
{ y: 119.6, x: 120.2 }, | ||
{ y: 0.3, x: 0.3 }, | ||
{ y: 0.6, x: 0.3 }, | ||
{ y: 557.6, x: 556.8 }, | ||
{ y: 339.3, x: 339.1 }, | ||
{ y: 888.0, x: 887.2 }, | ||
{ y: 998.5, x: 999.0 }, | ||
{ y: 778.9, x: 779.0 }, | ||
{ y: 10.2, x: 11.1 }, | ||
{ y: 117.6, x: 118.3 }, | ||
{ y: 228.9, x: 229.2 }, | ||
{ y: 668.4, x: 669.1 }, | ||
{ y: 449.2, x: 448.9 }, | ||
{ y: 0.2, x: 0.5 } | ||
] | ||
|
||
export const certifiedResults = { | ||
count: 36, | ||
intercept: -0.262323073774029, | ||
slope: 1.00211681802045, | ||
sdIntercept: 0.232818234301152, | ||
sdSlope: 0.429796848199937E-03, | ||
sdResiduals: 0.884796396144373, | ||
rSquared: 0.999993745883712 | ||
} | ||
|
||
// http://www.itl.nist.gov/div898/strd/lls/data/LINKS/DATA/Norris.dat | ||
|
||
/* | ||
NIST/ITL StRD | ||
Dataset Name: Norris (Norris.dat) | ||
File Format: ASCII | ||
Certified Values (lines 31 to 46) | ||
Data (lines 61 to 96) | ||
Procedure: Linear Least Squares Regression | ||
Reference: Norris, J., NIST. | ||
Calibration of Ozone Monitors. | ||
Data: 1 Response Variable (y) | ||
1 Predictor Variable (x) | ||
36 Observations | ||
Lower Level of Difficulty | ||
Observed Data | ||
Model: Linear Class | ||
2 Parameters (B0,B1) | ||
y = B0 + B1*x + e | ||
Certified Regression Statistics | ||
Standard Deviation | ||
Parameter Estimate of Estimate | ||
B0 -0.262323073774029 0.232818234301152 | ||
B1 1.00211681802045 0.429796848199937E-03 | ||
Residual | ||
Standard Deviation 0.884796396144373 | ||
R-Squared 0.999993745883712 | ||
Certified Analysis of Variance Table | ||
Source of Degrees of Sums of Mean | ||
Variation Freedom Squares Squares F Statistic | ||
Regression 1 4255954.13232369 4255954.13232369 5436385.54079785 | ||
Residual 34 26.6173985294224 0.782864662630069 | ||
Data: y x | ||
0.1 0.2 | ||
338.8 337.4 | ||
118.1 118.2 | ||
888.0 884.6 | ||
9.2 10.1 | ||
228.1 226.5 | ||
668.5 666.3 | ||
998.5 996.3 | ||
449.1 448.6 | ||
778.9 777.0 | ||
559.2 558.2 | ||
0.3 0.4 | ||
0.1 0.6 | ||
778.1 775.5 | ||
668.8 666.9 | ||
339.3 338.0 | ||
448.9 447.5 | ||
10.8 11.6 | ||
557.7 556.0 | ||
228.3 228.1 | ||
998.0 995.8 | ||
888.8 887.6 | ||
119.6 120.2 | ||
0.3 0.3 | ||
0.6 0.3 | ||
557.6 556.8 | ||
339.3 339.1 | ||
888.0 887.2 | ||
998.5 999.0 | ||
778.9 779.0 | ||
10.2 11.1 | ||
117.6 118.3 | ||
228.9 229.2 | ||
668.4 669.1 | ||
449.2 448.9 | ||
0.2 0.5 | ||
*/ |
Oops, something went wrong.