From a17f279c7c75e6a083ca922133303153bb288123 Mon Sep 17 00:00:00 2001 From: Kun Jinkao <45487685+Snoopy1866@users.noreply.github.com> Date: Thu, 12 Sep 2024 23:29:41 +0800 Subject: [PATCH] feat --- .gitignore | 1 + cspell.json | 3 +- main.py | 8 + src/pystatpower/basic.py | 148 +++++++ src/pystatpower/procedures/two_proportion.py | 381 +++++++++++++++++++ tests/test_basic.py | 107 ++++++ 6 files changed, 647 insertions(+), 1 deletion(-) create mode 100644 main.py create mode 100644 src/pystatpower/basic.py create mode 100644 src/pystatpower/procedures/two_proportion.py create mode 100644 tests/test_basic.py diff --git a/.gitignore b/.gitignore index 68bc17f..8e25ea7 100644 --- a/.gitignore +++ b/.gitignore @@ -158,3 +158,4 @@ cython_debug/ # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ +/.vscode diff --git a/cspell.json b/cspell.json index 2e6c441..2c54c99 100644 --- a/cspell.json +++ b/cspell.json @@ -13,6 +13,7 @@ "brenth", "proportion", "nullproportion", - "ospp" + "ospp", + "unpooled" ] } diff --git a/main.py b/main.py new file mode 100644 index 0000000..f9c409e --- /dev/null +++ b/main.py @@ -0,0 +1,8 @@ +from pystatpower.basic import * + + +class TestEnum(Enum): + Test1 = 1 + + +Alternative(TestEnum.Test1) diff --git a/src/pystatpower/basic.py b/src/pystatpower/basic.py new file mode 100644 index 0000000..4ab8383 --- /dev/null +++ b/src/pystatpower/basic.py @@ -0,0 +1,148 @@ +from abc import ABC, abstractmethod +from enum import Enum +from math import inf +from numbers import Real + +from pystatpower.interval import Interval + + +class Param(ABC): + """抽象参数基类""" + + domain = None + + @abstractmethod + def __init__(self, value): + pass + + @classmethod + @abstractmethod + def _check(cls, domain, value): + pass + + +class NumericParam(Param): + """数值参数基类""" + + domain = Interval(-inf, inf) + + def __init__(self, value: Real): + cls = type(self) + cls._check(value) + self._value = value + + @property + def value(self): + return self._value + + @classmethod + def _check(cls, value: Real): + domain = cls.domain + if not isinstance(value, Real): + raise TypeError(f"{value} is not a real number") + if value not in domain: + raise ValueError(f"{value} is not in {domain}") + + +class OptionalParam(Param): + """选项参数基类""" + + class EmptyEnum(Enum): + pass + + domain = EmptyEnum + + def __init__(self, value: Enum | str): + cls = type(self) + self._value = cls._check(value) + + @property + def value(self): + return self._value + + @classmethod + def _check(cls, value: Enum | str) -> Enum: + domain = cls.domain + + if isinstance(value, str): + try: + value = domain[value.upper()] + except KeyError: + raise ValueError(f"No such option '{value}' in {domain.__name__}") + elif not isinstance(value, domain): + raise TypeError(f"{value} is not a {domain.__name__}") + + return value + + +class Alpha(NumericParam): + """显著性水平""" + + domain = Interval(0, 1) + + +class Power(NumericParam): + """检验效能""" + + domain = Interval(0, 1) + + +class Mean(NumericParam): + """均值""" + + domain = Interval(-inf, inf) + + +class STD(NumericParam): + """标准差""" + + domain = Interval(0, inf) + + +class Proportion(NumericParam): + """率""" + + domain = Interval(0, 1) + + +class Percent(NumericParam): + """百分比""" + + domain = Interval(0, 1) + + +class Ratio(NumericParam): + """比例""" + + domain = Interval(0, inf) + + +class Size(NumericParam): + """样本量""" + + domain = Interval(0, inf) + + +class DropOutRate(NumericParam): + """脱落率""" + + domain = Interval(0, 1, lower_inclusive=True) + + +class Alternative(OptionalParam): + """备择假设类型 + + Attributes + ---------- + TWO_SIDED : (int) + 双侧检验 + ONE_SIDED : (int) + 单侧检验 + """ + + class AlternativeEnum(Enum): + + TWO_SIDED = 1 + ONE_SIDED = 2 + + domain = AlternativeEnum diff --git a/src/pystatpower/procedures/two_proportion.py b/src/pystatpower/procedures/two_proportion.py new file mode 100644 index 0000000..e166e81 --- /dev/null +++ b/src/pystatpower/procedures/two_proportion.py @@ -0,0 +1,381 @@ +"""两独立样本差异性检验""" + +from abc import abstractmethod +from enum import Enum +from math import sqrt +from numbers import Real + +from scipy.stats import norm +from scipy.optimize import brenth + +from pystatpower.basic import Alternative, OptionalParam, Percent, Proportion, Ratio + + +class TestType(OptionalParam): + """检验类型 + + Attributes + ---------- + Z_TEST_POOLED : (int) + Z 检验(合并方差) + Z_TEST_UNPOOLED : (int) + Z 检验(独立方差) + Z_TEST_CC_POOLED : (int) + Z 检验(连续性校正,合并方差) + Z_TEST_CC_UNPOOLED : (int) + Z 检验(连续性校正,独立方差) + """ + + class EnumTestType(Enum): + + Z_TEST_POOLED = 1 + Z_TEST_UNPOOLED = 2 + Z_TEST_CC_POOLED = 3 + Z_TEST_CC_UNPOOLED = 4 + + domain = EnumTestType + + +class GroupAllocationType: + """样本量分配类型""" + + class SolveForSize(OptionalParam): + """样本量分配类型(求解目标:样本量) + + Attributes + ---------- + EQUAL : (int) + 等量分配 + SIZE_OF_TREATMENT : (int) + 试验组样本量 + SIZE_OF_REFERENCE : (int) + 对照组样本量 + RATIO_OF_TREATMENT_TO_REFERENCE : (int) + 试验组与对照组样本量比例 + RATIO_OF_REFERENCE_TO_TREATMENT : (int) + 对照组与试验组样本量比例 + PERCENT_OF_TREATMENT : (int) + 试验组样本百分比 + PERCENT_OF_REFERENCE : (int) + 对照组样本百分比 + """ + + class EnumGroupAllocationType(Enum): + + EQUAL = 1 + SIZE_OF_TREATMENT = 2 + SIZE_OF_REFERENCE = 3 + RATIO_OF_TREATMENT_TO_REFERENCE = 4 + RATIO_OF_REFERENCE_TO_TREATMENT = 5 + PERCENT_OF_TREATMENT = 6 + PERCENT_OF_REFERENCE = 7 + + domain = EnumGroupAllocationType + + +def fun_power( + alpha: float, + treatment_n: float, + reference_n: float, + treatment_proportion: float, + reference_proportion: float, + alternative: Alternative.AlternativeEnum, + test_type: TestType.EnumTestType, +): + n1 = treatment_n + n2 = reference_n + p1 = treatment_proportion + p2 = reference_proportion + + # 计算标准误 + match test_type: + case TestType.EnumTestType.Z_TEST_POOLED | TestType.EnumTestType.Z_TEST_CC_POOLED: + p_hat = (n1 * p1 + n2 * p2) / (n1 + n2) + se = sqrt(p_hat * (1 - p_hat) * (1 / n1 + 1 / n2)) + case TestType.EnumTestType.Z_TEST_UNPOOLED | TestType.EnumTestType.Z_TEST_CC_UNPOOLED: + se = sqrt(p1 * (1 - p1) / n1 + p2 * (1 - p2) / n2) + case _: + assert False, "未知的检验类型" + + # 连续性校正 + c = 0 + if test_type in [TestType.EnumTestType.Z_TEST_CC_POOLED, TestType.EnumTestType.Z_TEST_CC_UNPOOLED]: + c = (1 / 2) * (1 / n1 + 1 / n2) + + # 计算检验效能 + match alternative: + case Alternative.AlternativeEnum.TWO_SIDED: + z_alpha = norm.ppf(1 - alpha / 2) + z_stat = [(p1 - p2 - c) / se, (p1 - p2 + c) / se] + power = norm.cdf(-z_alpha - z_stat[0]) + 1 - norm.cdf(z_alpha - z_stat[1]) + case Alternative.AlternativeEnum.ONE_SIDED: + z_alpha = norm.ppf(1 - alpha) + if p1 > p2: + z_stat = (p1 - p2 + c) / se + power = 1 - norm.cdf(z_alpha - z_stat) + elif p1 <= p2: + z_stat = (p1 - p2 - c) / se + power = norm.cdf(-z_alpha - z_stat) + case _: + assert False, "未知的备择假设类型" + + return power + + +class TwoProportion: + + @abstractmethod + def solve(self): + pass + + +class GroupAllocation: + def __init__( + self, + group_allocation_type: ( + GroupAllocationType.SolveForSize.EnumGroupAllocationType | str + ) = GroupAllocationType.SolveForSize.EnumGroupAllocationType.EQUAL, + size_of_treatment: Real | None = None, + size_of_reference: Real | None = None, + ratio_of_treatment_to_reference: Real | None = None, + ratio_of_reference_to_treatment: Real | None = None, + percent_of_treatment: Real | None = None, + percent_of_reference: Real | None = None, + ): + self.group_allocation_type = GroupAllocationType.SolveForSize(group_allocation_type) + self.size_of_treatment = Proportion(size_of_treatment) + self.size_of_reference = Proportion(size_of_reference) + self.ratio_of_treatment_to_reference = Ratio(ratio_of_treatment_to_reference) + self.ratio_of_reference_to_treatment = Ratio(ratio_of_reference_to_treatment) + self.percent_of_treatment = Percent(percent_of_treatment) + self.percent_of_reference = Percent(percent_of_reference) + + match group_allocation_type.value: + case GroupAllocationType.SolveForSize.EnumGroupAllocationType.EQUAL: + self.treatment_size_formula = lambda n: n + self.reference_size_formula = lambda n: n + case GroupAllocationType.SolveForSize.EnumGroupAllocationType.SIZE_OF_TREATMENT: + self.treatment_size_formula = lambda n: self.size_of_treatment.value + self.reference_size_formula = lambda n: n + case GroupAllocationType.SolveForSize.EnumGroupAllocationType.SIZE_OF_REFERENCE: + self.treatment_size_formula = lambda n: n + self.reference_size_formula = lambda n: self.size_of_reference.value + case GroupAllocationType.SolveForSize.EnumGroupAllocationType.RATIO_OF_TREATMENT_TO_REFERENCE: + self.treatment_size_formula = lambda n: self.ratio_of_treatment_to_reference.value * n + self.reference_size_formula = lambda n: n + case GroupAllocationType.SolveForSize.EnumGroupAllocationType.RATIO_OF_REFERENCE_TO_TREATMENT: + self.treatment_size_formula = lambda n: n + self.reference_size_formula = lambda n: self.ratio_of_reference_to_treatment.value * n + case GroupAllocationType.SolveForSize.EnumGroupAllocationType.PERCENT_OF_TREATMENT: + self.treatment_size_formula = lambda n: n + self.reference_size_formula = ( + lambda n: (1 - self.percent_of_treatment.value) / self.percent_of_treatment.value * n + ) + case GroupAllocationType.SolveForSize.EnumGroupAllocationType.PERCENT_OF_REFERENCE: + self.treatment_size_formula = ( + lambda n: (1 - self.percent_of_reference.value) / self.percent_of_reference.value * n + ) + self.reference_size_formula = lambda n: n + + +class TwoProportionSolveForAlpha(TwoProportion): + def __init__( + self, + power: Real, + alternative: str, + test_type: str, + treatment_proportion: Real, + reference_proportion: Real, + group_allocation: GroupAllocation, + ): + pass + + +# solve for sample size +class EnumTwoProportionSolveForSizeGroupAllocation(Enum): + EQUAL = 1 + SIZE_OF_TREATMENT = 2 + SIZE_OF_REFERENCE = 3 + RATIO_OF_TREATMENT_TO_REFERENCE = 4 + RATIO_OF_REFERENCE_TO_TREATMENT = 5 + PERCENT_OF_TREATMENT = 6 + PERCENT_OF_REFERENCE = 7 + + +class TwoProportionSolveForSizeGroupAllocator: + def __init__( + self, + group_allocation_option: EnumTwoProportionSolveForSizeGroupAllocation = EnumTwoProportionSolveForSizeGroupAllocation.EQUAL, + treatment_size: float = None, + reference_size: float = None, + ratio_of_treatment_to_reference: float = None, + ratio_of_reference_to_treatment: float = None, + percent_of_treatment: float = None, + percent_of_reference: float = None, + ): + match group_allocation_option: + case EnumTwoProportionSolveForSizeGroupAllocation.EQUAL: + self.treatment_size_formula = lambda n: n + self.reference_size_formula = lambda n: n + case EnumTwoProportionSolveForSizeGroupAllocation.SIZE_OF_TREATMENT: + self.treatment_size_formula = lambda n: treatment_size + self.reference_size_formula = lambda n: n + case EnumTwoProportionSolveForSizeGroupAllocation.SIZE_OF_REFERENCE: + self.treatment_size_formula = lambda n: n + self.reference_size_formula = lambda n: reference_size + case EnumTwoProportionSolveForSizeGroupAllocation.RATIO_OF_TREATMENT_TO_REFERENCE: + self.treatment_size_formula = lambda n: ratio_of_treatment_to_reference * n + self.reference_size_formula = lambda n: n + case EnumTwoProportionSolveForSizeGroupAllocation.RATIO_OF_REFERENCE_TO_TREATMENT: + self.treatment_size_formula = lambda n: n + self.reference_size_formula = lambda n: ratio_of_reference_to_treatment * n + case EnumTwoProportionSolveForSizeGroupAllocation.PERCENT_OF_TREATMENT: + self.treatment_size_formula = lambda n: n + self.reference_size_formula = lambda n: (1 - percent_of_treatment) / percent_of_treatment * n + case EnumTwoProportionSolveForSizeGroupAllocation.PERCENT_OF_REFERENCE: + self.treatment_size_formula = lambda n: (1 - percent_of_reference) / percent_of_reference * n + self.reference_size_formula = lambda n: n + + +class TwoProportionSolveForSize(TwoProportion): + + def __init__( + self, + alpha: float = 0.05, + power: float = 0.80, + alternative: EnumAlternative = EnumAlternative.TWO_SIDED, + test_type: EnumTestType = EnumTestType.Z_TEST_POOLED, + treatment_proportion: float = 0.95, + reference_proportion: float = 0.80, + group_allocation: TwoProportionSolveForSizeGroupAllocator = TwoProportionSolveForSizeGroupAllocator(), + ): + self._alpha = alpha + self._power = power + self._alternative = alternative + self._test_type = test_type + self._treatment_proportion = treatment_proportion + self._reference_proportion = reference_proportion + self._group_allocation = group_allocation + + @property + def alpha(self) -> float: + return self._alpha + + @alpha.setter + def alpha(self, alpha: float): + self._alpha = alpha + + @property + def power(self) -> float: + return self._power + + @power.setter + def power(self, power: float): + self._power = power + + @property + def alternative(self) -> EnumAlternative: + return self._alternative + + @alternative.setter + def alternative(self, alternative: EnumAlternative): + self._alternative = alternative + + @property + def test_type(self) -> EnumTestType: + return self._test_type + + @test_type.setter + def test_type(self, test_type: EnumTestType): + self._test_type = test_type + + @property + def treatment_proportion(self) -> float: + return self._treatment_proportion + + @treatment_proportion.setter + def treatment_proportion(self, treatment_proportion: float): + self._treatment_proportion = treatment_proportion + + @property + def reference_proportion(self) -> float: + return self._reference_proportion + + @reference_proportion.setter + def reference_proportion(self, reference_proportion: float): + self._reference_proportion = reference_proportion + + @property + def group_allocation(self) -> TwoProportionSolveForSizeGroupAllocator: + return self._group_allocation + + @group_allocation.setter + def group_allocation(self, group_allocation: TwoProportionSolveForSizeGroupAllocator): + self._group_allocation = group_allocation + + def solve(self): + if None in ( + self._alpha, + self._power, + self._alternative, + self._test_type, + self._treatment_proportion, + self._reference_proportion, + self._group_allocation, + ): + raise ValueError("所有参数必须在调用 solve 之前设置") + self._eval = ( + lambda n: fun_power( + self._alpha, + self._group_allocation.treatment_size_formula(n), + self._group_allocation.reference_size_formula(n), + self._treatment_proportion, + self._reference_proportion, + self._alternative, + self._test_type, + ) + - self._power + ) + try: + n = brenth(self._eval, 1, 1e10) + except ValueError as e: + raise ValueError("无法求解样本量") from e + self._treatment_n = self._group_allocation.treatment_size_formula(n) + self._reference_n = self._group_allocation.reference_size_formula(n) + + +# solve for alpha +class EnumTwoProportionSolveForAlphaGroupAllocation(Enum): + EQUAL = 1 + INDIVIDUAL = 2 + TREATMENT_SIZE_AND_RATIO_OF_TREATMENT_TO_REFERENCE = 3 + TREATMENT_SIZE_AND_RATIO_OF_REFERENCE_TO_TREATMENT = 4 + REFERENCE_SIZE_AND_RATIO_OF_TREATMENT_TO_REFERENCE = 5 + REFERENCE_SIZE_AND_RATIO_OF_REFERENCE_TO_TREATMENT = 6 + TOTAL_SIZE_AND_TREATMENT_PERCENT = 7 + TOTAL_SIZE_AND_REFERENCE_PERCENT = 8 + TOTAL_SIZE_AND_RATIO_OF_TREATMENT_TO_REFERENCE = 9 + TOTAL_SIZE_AND_RATIO_OF_REFERENCE_TO_TREATMENT = 10 + TREATMENT_SIZE_AND_TREATMENT_PERCENT = 11 + TREATMENT_SIZE_AND_REFERENCE_PERCENT = 12 + REFERENCE_SIZE_AND_TREATMENT_PERCENT = 13 + REFERENCE_SIZE_AND_REFERENCE_PERCENT = 14 + + +class TwoProportionSolveForAlphaGroupAllocator: + def __init__( + self, + group_allocation_option: EnumTwoProportionSolveForAlphaGroupAllocation = EnumTwoProportionSolveForAlphaGroupAllocation.EQUAL, + total_size: float = None, + treatment_size: float = None, + reference_size: float = None, + ratio_of_treatment_to_reference: float = None, + ratio_of_reference_to_treatment: float = None, + percent_of_treatment: float = None, + percent_of_reference: float = None, + ): + + match group_allocation_option: + case EnumTwoProportionSolveForAlphaGroupAllocation.EQUAL: + self.treatment_size_formula = lambda n: n + self.reference_size_formula = lambda n: n diff --git a/tests/test_basic.py b/tests/test_basic.py new file mode 100644 index 0000000..b8fbbe6 --- /dev/null +++ b/tests/test_basic.py @@ -0,0 +1,107 @@ +import pytest +from pystatpower.basic import * + + +class ErrorEnum(Enum): + Error = 1 + + +def test_alpha(): + assert Alpha(0.05).value == 0.05 + assert Alpha(0.001).value == 0.001 + + with pytest.raises(ValueError): + Alpha(-1) + with pytest.raises(ValueError): + Alpha(0) + with pytest.raises(ValueError): + Alpha(1) + with pytest.raises(ValueError): + Alpha(2) + + +def test_power(): + assert Power(0.05).value == 0.05 + assert Power(0.001).value == 0.001 + + with pytest.raises(ValueError): + Power(-1) + with pytest.raises(ValueError): + Power(0) + with pytest.raises(ValueError): + Power(1) + with pytest.raises(ValueError): + Power(2) + + +def test_mean(): + assert Mean(-10).value == -10 + assert Mean(0).value == 0 + assert Mean(10).value == 10 + + with pytest.raises(ValueError): + Mean(-inf) + with pytest.raises(ValueError): + Mean(inf) + + +def test_std(): + assert STD(10).value == 10 + + with pytest.raises(ValueError): + STD(-10) + with pytest.raises(ValueError): + STD(0) + with pytest.raises(ValueError): + STD(inf) + + +def test_proportion(): + assert Proportion(0.5).value == 0.5 + + with pytest.raises(ValueError): + Proportion(-1) + with pytest.raises(ValueError): + Proportion(0) + with pytest.raises(ValueError): + Proportion(1) + with pytest.raises(ValueError): + Proportion(2) + + +def test_size(): + assert Size(20).value == 20 + assert Size(20.142857).value == 20.142857 + + with pytest.raises(ValueError): + Size(-1) + with pytest.raises(ValueError): + Size(0) + with pytest.raises(ValueError): + Size(inf) + + +def test_dropout_rate(): + assert DropOutRate(0).value == 0 + assert DropOutRate(0.5).value == 0.5 + + with pytest.raises(ValueError): + DropOutRate(-1) + with pytest.raises(ValueError): + DropOutRate(1) + with pytest.raises(ValueError): + DropOutRate(2) + + +def test_alternative(): + assert Alternative("two_sided").value == Alternative.AlternativeEnum.TWO_SIDED + assert Alternative("one_sided").value == Alternative.AlternativeEnum.ONE_SIDED + + assert Alternative(Alternative.AlternativeEnum.TWO_SIDED).value == Alternative.AlternativeEnum.TWO_SIDED + assert Alternative(Alternative.AlternativeEnum.ONE_SIDED).value == Alternative.AlternativeEnum.ONE_SIDED + + with pytest.raises(ValueError): + Alternative("greater") + + with pytest.raises(TypeError): + Alternative(ErrorEnum.Error)