Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix processing bugs after file update #6

Merged
merged 7 commits into from
Dec 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file modified .coverage
Binary file not shown.
155 changes: 79 additions & 76 deletions coverage.xml
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
<?xml version="1.0" ?>
<coverage version="7.3.1" timestamp="1701986374806" lines-valid="162" lines-covered="144" line-rate="0.8889" branches-valid="54" branches-covered="49" branch-rate="0.9074" complexity="0">
<coverage version="7.3.1" timestamp="1703026094115" lines-valid="165" lines-covered="147" line-rate="0.8909" branches-valid="54" branches-covered="49" branch-rate="0.9074" complexity="0">
<!-- Generated by coverage.py: https://coverage.readthedocs.io/en/7.3.1 -->
<!-- Based on https://raw.githubusercontent.com/cobertura/web/master/htdocs/xml/coverage-04.dtd -->
<sources>
<source>/Users/jvivian/Library/CloudStorage/[email protected]/My Drive/projects/covid19-drDFM/covid19_drdfm</source>
<source>/home/jvivian/covid19-drDFM/covid19_drdfm</source>
</sources>
<packages>
<package name="." line-rate="0.8889" branch-rate="0.9074" complexity="0">
<package name="." line-rate="0.8909" branch-rate="0.9074" complexity="0">
<classes>
<class name="cli.py" filename="cli.py" complexity="0" line-rate="1" branch-rate="1">
<methods/>
Expand Down Expand Up @@ -42,7 +42,7 @@
<line number="147" hits="1"/>
</lines>
</class>
<class name="dfm.py" filename="dfm.py" complexity="0" line-rate="0.9118" branch-rate="0.7917">
<class name="dfm.py" filename="dfm.py" complexity="0" line-rate="0.9143" branch-rate="0.7917">
<methods/>
<lines>
<line number="7" hits="1"/>
Expand All @@ -52,67 +52,69 @@
<line number="12" hits="1"/>
<line number="13" hits="1"/>
<line number="15" hits="1"/>
<line number="18" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="19" hits="1"/>
<line number="16" hits="1"/>
<line number="19" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="20" hits="1"/>
<line number="21" hits="1"/>
<line number="25" hits="1"/>
<line number="27" hits="1"/>
<line number="30" hits="1"/>
<line number="40" hits="1"/>
<line number="42" hits="1"/>
<line number="43" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="44" hits="1"/>
<line number="22" hits="1"/>
<line number="26" hits="1"/>
<line number="28" hits="1"/>
<line number="31" hits="1"/>
<line number="41" hits="1"/>
<line number="43" hits="1"/>
<line number="45" hits="1"/>
<line number="46" hits="1"/>
<line number="47" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="48" hits="1"/>
<line number="49" hits="1"/>
<line number="58" hits="1"/>
<line number="59" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="60" hits="1"/>
<line number="61" hits="1"/>
<line number="62" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="63" hits="1"/>
<line number="50" hits="1"/>
<line number="53" hits="1"/>
<line number="62" hits="1"/>
<line number="63" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="64" hits="1"/>
<line number="65" hits="1"/>
<line number="66" hits="1"/>
<line number="66" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="67" hits="1"/>
<line number="69" hits="1"/>
<line number="81" hits="1"/>
<line number="82" hits="1"/>
<line number="84" hits="1"/>
<line number="70" hits="1"/>
<line number="73" hits="1"/>
<line number="85" hits="1"/>
<line number="86" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="88" hits="1" branch="true" condition-coverage="50% (1/2)" missing-branches="89"/>
<line number="89" hits="0"/>
<line number="90" hits="0"/>
<line number="92" hits="1"/>
<line number="93" hits="1"/>
<line number="94" hits="1"/>
<line number="95" hits="1"/>
<line number="96" hits="0"/>
<line number="97" hits="0" branch="true" condition-coverage="0% (0/2)" missing-branches="98,99"/>
<line number="98" hits="0"/>
<line number="99" hits="0"/>
<line number="101" hits="1"/>
<line number="102" hits="1"/>
<line number="86" hits="1"/>
<line number="88" hits="1"/>
<line number="89" hits="1"/>
<line number="90" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="92" hits="1" branch="true" condition-coverage="50% (1/2)" missing-branches="93"/>
<line number="93" hits="0"/>
<line number="94" hits="0"/>
<line number="96" hits="1"/>
<line number="97" hits="1"/>
<line number="98" hits="1"/>
<line number="99" hits="1"/>
<line number="100" hits="0"/>
<line number="101" hits="0" branch="true" condition-coverage="0% (0/2)" missing-branches="102,103"/>
<line number="102" hits="0"/>
<line number="103" hits="0"/>
<line number="105" hits="1"/>
<line number="113" hits="1"/>
<line number="114" hits="1"/>
<line number="115" hits="1"/>
<line number="116" hits="1"/>
<line number="106" hits="1"/>
<line number="109" hits="1"/>
<line number="117" hits="1"/>
<line number="118" hits="1"/>
<line number="119" hits="1"/>
<line number="120" hits="1"/>
<line number="121" hits="1"/>
<line number="131" hits="1" branch="true" condition-coverage="50% (1/2)" missing-branches="136"/>
<line number="132" hits="1"/>
<line number="133" hits="1"/>
<line number="135" hits="1"/>
<line number="136" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="122" hits="1"/>
<line number="125" hits="1"/>
<line number="135" hits="1" branch="true" condition-coverage="50% (1/2)" missing-branches="140"/>
<line number="136" hits="1"/>
<line number="137" hits="1"/>
<line number="138" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="139" hits="1"/>
<line number="140" hits="1"/>
<line number="141" hits="1" branch="true" condition-coverage="50% (1/2)" missing-branches="exit"/>
<line number="140" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="141" hits="1"/>
<line number="142" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="143" hits="1"/>
<line number="144" hits="1"/>
<line number="145" hits="1" branch="true" condition-coverage="50% (1/2)" missing-branches="exit"/>
<line number="146" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="147" hits="1"/>
</lines>
</class>
<class name="processing.py" filename="processing.py" complexity="0" line-rate="1" branch-rate="1">
Expand All @@ -134,48 +136,49 @@
<line number="33" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="34" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="35" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="53" hits="1"/>
<line number="55" hits="1"/>
<line number="56" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="57" hits="1"/>
<line number="58" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="59" hits="1"/>
<line number="60" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="61" hits="1"/>
<line number="62" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="63" hits="1"/>
<line number="65" hits="1"/>
<line number="68" hits="1"/>
<line number="74" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="75" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="78" hits="1"/>
<line number="87" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="98" hits="1"/>
<line number="107" hits="1"/>
<line number="66" hits="1"/>
<line number="72" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="73" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="76" hits="1"/>
<line number="85" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="96" hits="1"/>
<line number="105" hits="1"/>
<line number="106" hits="1"/>
<line number="107" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="108" hits="1"/>
<line number="109" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="109" hits="1"/>
<line number="110" hits="1"/>
<line number="111" hits="1"/>
<line number="111" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="112" hits="1"/>
<line number="113" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="114" hits="1"/>
<line number="115" hits="1"/>
<line number="118" hits="1"/>
<line number="127" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="128" hits="1"/>
<line number="129" hits="1"/>
<line number="132" hits="1"/>
<line number="141" hits="1"/>
<line number="144" hits="1"/>
<line number="113" hits="1"/>
<line number="116" hits="1"/>
<line number="125" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="126" hits="1"/>
<line number="127" hits="1"/>
<line number="130" hits="1"/>
<line number="139" hits="1"/>
<line number="142" hits="1"/>
<line number="155" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="157" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="159" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="161" hits="1"/>
<line number="162" hits="1"/>
<line number="165" hits="1"/>
<line number="159" hits="1"/>
<line number="160" hits="1"/>
<line number="163" hits="1"/>
<line number="172" hits="1"/>
<line number="174" hits="1"/>
<line number="176" hits="1"/>
<line number="177" hits="1"/>
<line number="178" hits="1"/>
<line number="179" hits="1"/>
<line number="180" hits="1"/>
<line number="181" hits="1"/>
</lines>
</class>
<class name="scm.py" filename="scm.py" complexity="0" line-rate="0" branch-rate="1">
Expand Down
2 changes: 1 addition & 1 deletion covid19_drdfm/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
"Demand_5": "Cons5",
"Demand_6": "Employment1",
"Demand_7": "Employment2",
"Supply_1": "GDP",
"GDP": "GDP",
"Supply_2": "UI",
"Supply_3": "PartR",
"Supply_4": "UR",
Expand Down
6 changes: 5 additions & 1 deletion covid19_drdfm/dfm.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from statsmodels.tsa.stattools import adfuller

from covid19_drdfm.constants import FACTORS
from covid19_drdfm.processing import normalize


@dataclass
Expand Down Expand Up @@ -40,9 +41,12 @@ def state_process(df: pd.DataFrame, state: str) -> pd.DataFrame:
df = df[df.State == state]
#! The trunctation will be removed when data is updated in OCT - A.C.
df = df[:-12]
#! Test double-norm
df = normalize(df).fillna(0)
#! TEST REMOVE
const_cols = [x for x in df.columns if is_constant(df[x])]
pprint(f"Constant Columns...dropping\n{const_cols}")
df = df.drop(columns=const_cols)
df = df.drop(columns=const_cols).set_index("Time", drop=True)
return df


Expand Down
7 changes: 3 additions & 4 deletions covid19_drdfm/processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ def get_df() -> pd.DataFrame:
dfs = [pd.read_csv(x) for x in paths]
return (
reduce(lambda x, y: pd.merge(x, y, on=["State", "Year", "Period"], how="left"), dfs)
.fillna(0)
.drop(columns=["Monetary_1_x", "Monetary_11_x"])
.rename(columns={"Monetary_1_y": "Monetary_1", "Monetary_11_y": "Monetary_11"})
.drop(
Expand All @@ -46,7 +45,6 @@ def get_df() -> pd.DataFrame:
.pipe(adjust_pandemic_response)
.pipe(diff_vars, cols=DIFF_COLS)
.pipe(diff_vars, cols=LOG_DIFF_COLS, log=True)
.fillna(0)
.pipe(normalize)
.drop(index=0) # Drop first row with NaNs from diff
)
Expand Down Expand Up @@ -90,7 +88,7 @@ def adjust_inflation(df: pd.DataFrame) -> pd.DataFrame:
.assign(Demand_3=lambda x: x.Demand_3.div(x.Monetary_3 / 100))
.assign(Demand_4=lambda x: x.Demand_4.div(x.Monetary_3 / 100))
.assign(Demand_5=lambda x: x.Demand_5.div(x.Monetary_3 / 100))
.assign(Supply_1=lambda x: x.Supply_1.div(x.Monetary_3 / 100))
.assign(GDP=lambda x: x.GDP.div(x.Monetary_3 / 100))
.assign(Supply_6=lambda x: x.Supply_6.div(x.Monetary_3 / 100))
)

Expand Down Expand Up @@ -171,11 +169,12 @@ def normalize(df: pd.DataFrame) -> pd.DataFrame:
Returns:
pd.DataFrame: Normalized and stationary DataFrame
"""
meta_cols = df[["State", "Time"]]
meta_cols = df[["State", "Time"]].copy().reset_index(drop=True)
# df = df.drop(columns=["Time"]) if "Time" in df.columns else df
df = df.drop(columns=["State", "Time"])
# Normalize data
scaler = MinMaxScaler()
new = pd.DataFrame(scaler.fit_transform(df), columns=df.columns)
new["State"] = meta_cols["State"]
new["Time"] = meta_cols["Time"]
return new
21 changes: 10 additions & 11 deletions covid19_drdfm/streamlit/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,15 @@
from rich import print as pprint
from sklearn.preprocessing import MinMaxScaler

from covid19_drdfm.constants import FACTORS
from covid19_drdfm.dfm import state_process
from covid19_drdfm.processing import get_df, get_factors
from covid19_drdfm.processing import NAME_MAP
from covid19_drdfm.processing import NAME_MAP, get_df, normalize

st.set_page_config(layout="wide")
pio.templates.default = "plotly_white"

DEFAULTS = {
"Uncat": ["Monetary_5", "Monetary_9", "Monetary_10", "Supply_1", "Supply_7"],
"Uncat": ["Monetary_5", "Monetary_9", "Monetary_10", "GDP", "Supply_7"],
"Consumption": ["Demand_3", "Demand_4", "Demand_5"],
"Response": [
"Pandemic_Response_1",
Expand All @@ -32,8 +32,8 @@
"Inflation": ["Monetary_2", "Monetary_3", "Monetary_1"],
"Pandemic": ["Pandemic_1", "Pandemic_2", "Pandemic_6", "Pandemic_9", "Pandemic_7", "Pandemic_10"],
}
DEFAULTS = {NAME_MAP[x]: [NAME_MAP[z] for z in y] for x, y in DEFAULTS.items() if x in NAME_MAP in NAME_MAP}
print(DEFAULTS)
DEFAULTS = {x: [NAME_MAP[z] for z in y] for x, y in DEFAULTS.items()}
# st.write(DEFAULTS)


def center_title(text):
Expand All @@ -55,10 +55,9 @@ def run_parameterized_model(

"""
# Factors and input data
factors = get_factors()
factor_multiplicities = {"Global": global_multiplier}
df = state_process(df, state)
columns = list(columns) + ["State", "Time"]
columns = list(columns) # + ["State", "Time"]
columns = [x for x in columns if x in df.columns]
new = df[columns]
variables = list(factors.keys())
Expand All @@ -73,9 +72,9 @@ def run_parameterized_model(
# Run Model
if (out / "model.csv").exists():
return
model = sm.tsa.DynamicFactorMQ(new, factors=factors, factor_multiplicities=factor_multiplicities)
model = sm.tsa.DynamicFactorMQ(new, factors=FACTORS, factor_multiplicities=factor_multiplicities)
try:
results = model.fit(disp=10, maxiter=5_000)
results = model.fit(disp=10, maxiter=10_000)
except Exception as e:
with open(outdir / "failed.txt", "a") as f:
f.write(f"{state}\t{e}\n")
Expand All @@ -86,7 +85,7 @@ def run_parameterized_model(
f.write(results.summary().as_csv())
filtered = results.factors["filtered"]
filtered["State"] = state
filtered.to_csv(out / "filtered-factors.csv")
filtered.to_csv(out / "filtered-factors.csv", index=None)
return model


Expand All @@ -97,7 +96,7 @@ def get_data():

df = get_df()
sub = pd.Series([x for x in df.columns if x not in ["State", "Time"]], name="Variables").to_frame()
factors = get_factors()
factors = FACTORS.copy()
factor_vars = list(factors.keys())
_ = [factors.pop(x) for x in factor_vars if x not in df.columns]
sub["Group"] = [factors[x][1] for x in sub.Variables if x in df.columns]
Expand Down
7 changes: 4 additions & 3 deletions tests/test_dfm.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
# TODO: output should go in a directory instead of dumping shit everywhere
def test_run_model():
df = get_df()
run_model(df, "NY", Path("./testdir"))
assert Path("./testdir/NY/model.csv").exists()
assert Path("./testdir/NY/results.csv").exists()
state = "SD"
run_model(df, state, Path("./testdir"))
assert Path("./testdir/SD/model.csv").exists()
assert Path("./testdir/SD/results.csv").exists()
shutil.rmtree("./testdir")