Skip to content

Commit

Permalink
Feature flask implement random forest (#116)
Browse files Browse the repository at this point in the history
* Create util file

Move util functions to util file to be shared by all models

* Add Notebook for RF modelling

* Add notebook for SARIMAX and Randomforest

Organizing the util functions, add SARIMAX  and Randomforest models
  • Loading branch information
mjsaeed authored Nov 29, 2023
1 parent 040338b commit 45f446d
Show file tree
Hide file tree
Showing 9 changed files with 2,375 additions and 37 deletions.
45 changes: 8 additions & 37 deletions neo_dolfin/ai/savings/SavingPredAI.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,51 +9,22 @@
import csv
import itertools
import datetime
import SavingPredAIUtil as util

#read input file
transfile = "neo_dolfin/static/modified_transactions_data.csv"
df = pd.read_csv(transfile)
print("File read into df")
print(df.head(5))

# Then, resample the DataFrame with daily frequency and forward-fill missing values
def data_resample(df):
df['postDate'] = pd.to_datetime(df['postDate'])
df.set_index('postDate', inplace=True)
df2 = df.resample('D').ffill()
# Reset the index to have 'postDate' as a regular column again
print("resampling done.. resting the index")
df2.reset_index(inplace=True)
return df2

data= data_resample(df)
transfile = "neo_dolfin/static/data/modified_transactions_data.csv"
df = util.read_file(path = transfile )

#resample the DataFrame with daily frequency and forward-fill missing values
data= util.data_resample(df)

# Split the data into train and test
def train_testsplit(df,trainsize):
df.set_index('postDate', inplace=True)
train_size = int(len(df) * trainsize)
traindata = df['balance'][:train_size]
testdata = df['balance'][train_size:]
return traindata,testdata

train_data,test_data = train_testsplit(data,0.8)
train_data,test_data = util.train_testsplit(data,0.8)
print("train sample:\n", train_data.head(3))
print("test sample:\n", test_data.head(3))

# checking stationarity
def ad_test(dataset):
dftest = adfuller(dataset, autolag = 'AIC')
print("1. ADF : ",dftest[0])
print("2. P-Value : ", dftest[1])
print("3. Num Of Lags : ", dftest[2])
print("4. Num Of Observations Used For ADF Regression:", dftest[3])
print("5. Critical Values :")
for key, val in dftest[4].items():
print("\t",key, ": ", val)
if (dftest[1] > 0.05):
print("Data is not stationary") #if p>0.05; Data is not stationary
print("Data is stationary")
ad_test(train_data)
util.ad_test(train_data)

print("Stationarity check performed.")

Expand Down
44 changes: 44 additions & 0 deletions neo_dolfin/ai/savings/SavingPredAIUtil.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import pandas as pd
from statsmodels.tsa.stattools import adfuller

#read input file
def read_file(path):
df = pd.read_csv(path)
print("File read into df")
print(df.head(10))
return df

# Then, resample the DataFrame with daily frequency and forward-fill missing values
def data_resample(df):
df['postDate'] = pd.to_datetime(df['postDate'])
df.set_index('postDate', inplace=True)
df2 = df.resample('D').ffill()
# Reset the index to have 'postDate' as a regular column again
print("resampling done.. resting the index")
df2.reset_index(inplace=True)
return df2

# Split the data into train and test
def train_testsplit(df,trainsize):
df.set_index('postDate', inplace=True)
train_size = int(len(df) * trainsize)
traindata = df['balance'][:train_size]
testdata = df['balance'][train_size:]
return traindata,testdata

# checking stationarity
def ad_test(dataset):
dftest = adfuller(dataset, autolag = 'AIC')
print("1. ADF : ",dftest[0])
print("2. P-Value : ", dftest[1])
print("3. Num Of Lags : ", dftest[2])
print("4. Num Of Observations Used For ADF Regression:", dftest[3])
print("5. Critical Values :")
for key, val in dftest[4].items():
print("\t",key, ": ", val)
if (dftest[1] > 0.05):
print("Data is not stationary") #if p>0.05; Data is not stationary
print("Data is stationary")



18 changes: 18 additions & 0 deletions neo_dolfin/ai/savings/SavingPredRF.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import numpy as np
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt
import SavingPredAIUtil as util

#read input file
transfile = "neo_dolfin/static/data/modified_transactions_data.csv"
df = util.read_file(path = transfile )

#resample the DataFrame with daily frequency and forward-fill missing values
data= util.data_resample(df)

# Split the data into train and test
train_data,test_data = util.train_testsplit(data,0.8)
print("train sample:\n", train_data.head(10))
print("test sample:\n", test_data.head(10))

142 changes: 142 additions & 0 deletions neo_dolfin/ai/savings/SavingPredRF_NB.ipynb

Large diffs are not rendered by default.

357 changes: 357 additions & 0 deletions neo_dolfin/ai/savings/Transactions_2022Q1.csv

Large diffs are not rendered by default.

359 changes: 359 additions & 0 deletions neo_dolfin/ai/savings/Transactions_2022Q2.csv

Large diffs are not rendered by default.

310 changes: 310 additions & 0 deletions neo_dolfin/ai/savings/Transactions_2022Q3.csv

Large diffs are not rendered by default.

256 changes: 256 additions & 0 deletions neo_dolfin/ai/savings/Transactions_2022Q4.csv

Large diffs are not rendered by default.

Loading

0 comments on commit 45f446d

Please sign in to comment.