-
Notifications
You must be signed in to change notification settings - Fork 29
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Feature flask implement random forest (#116)
* Create util file Move util functions to util file to be shared by all models * Add Notebook for RF modelling * Add notebook for SARIMAX and Randomforest Organizing the util functions, add SARIMAX and Randomforest models
- Loading branch information
Showing
9 changed files
with
2,375 additions
and
37 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
import pandas as pd | ||
from statsmodels.tsa.stattools import adfuller | ||
|
||
#read input file | ||
def read_file(path): | ||
df = pd.read_csv(path) | ||
print("File read into df") | ||
print(df.head(10)) | ||
return df | ||
|
||
# Then, resample the DataFrame with daily frequency and forward-fill missing values | ||
def data_resample(df): | ||
df['postDate'] = pd.to_datetime(df['postDate']) | ||
df.set_index('postDate', inplace=True) | ||
df2 = df.resample('D').ffill() | ||
# Reset the index to have 'postDate' as a regular column again | ||
print("resampling done.. resting the index") | ||
df2.reset_index(inplace=True) | ||
return df2 | ||
|
||
# Split the data into train and test | ||
def train_testsplit(df,trainsize): | ||
df.set_index('postDate', inplace=True) | ||
train_size = int(len(df) * trainsize) | ||
traindata = df['balance'][:train_size] | ||
testdata = df['balance'][train_size:] | ||
return traindata,testdata | ||
|
||
# checking stationarity | ||
def ad_test(dataset): | ||
dftest = adfuller(dataset, autolag = 'AIC') | ||
print("1. ADF : ",dftest[0]) | ||
print("2. P-Value : ", dftest[1]) | ||
print("3. Num Of Lags : ", dftest[2]) | ||
print("4. Num Of Observations Used For ADF Regression:", dftest[3]) | ||
print("5. Critical Values :") | ||
for key, val in dftest[4].items(): | ||
print("\t",key, ": ", val) | ||
if (dftest[1] > 0.05): | ||
print("Data is not stationary") #if p>0.05; Data is not stationary | ||
print("Data is stationary") | ||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
import numpy as np | ||
import pandas as pd | ||
import statsmodels.api as sm | ||
import matplotlib.pyplot as plt | ||
import SavingPredAIUtil as util | ||
|
||
#read input file | ||
transfile = "neo_dolfin/static/data/modified_transactions_data.csv" | ||
df = util.read_file(path = transfile ) | ||
|
||
#resample the DataFrame with daily frequency and forward-fill missing values | ||
data= util.data_resample(df) | ||
|
||
# Split the data into train and test | ||
train_data,test_data = util.train_testsplit(data,0.8) | ||
print("train sample:\n", train_data.head(10)) | ||
print("test sample:\n", test_data.head(10)) | ||
|
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Oops, something went wrong.