-
Notifications
You must be signed in to change notification settings - Fork 0
/
NN_input.py
33 lines (31 loc) · 1.63 KB
/
NN_input.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import pandas as pd
import numpy as np
def get_data(sector = 'total', period = 5):
'''
Read from a certain close.csv. If name not given, default is total_close.csv
(all the files are in `/csvFiles`)
Return a feature matrix X (749 * 995) and a label matrix Y (749 * 199)
Every row of X is 5 days data of all the companies (5 * 199 = 995), there are 749 rows (749 days in total data)
Every row of Y is 6th day data of all the companies, there are 749 rows
'''
data = pd.read_csv('csvFiles/sp500_%s_close.csv' % sector, index_col = 0) # get data
cols = data.columns
data = np.array(data)
filt = ~np.isnan(data).any(axis = 0)
data = data[:,filt] # remove NA
cols = np.array(cols)[filt]
# X = np.array([data[x:x + period,:].flatten('F') for x in range(0, data.shape[0] - 5)])
# Y = np.array([data[x,:].flatten() for x in range(period, data.shape[0])])
X = np.array([data[x:x + period,:].flatten() for x in range(0, data.shape[0] - 5)]) # 5 day as features, total 749
Y = np.array([data[x,:].flatten() for x in range(period, data.shape[0])]) # for each 5 day, use the 6th day as the label (result)
data = pd.read_csv('csvFilesTest/%s_test.csv' % sector, index_col = 0) # get data
data = np.array(data[cols])
X_test = np.array([data[x:x + period,:].flatten() for x in range(0, data.shape[0] - 5)])
Y_test = np.array([data[x,:].flatten() for x in range(period, data.shape[0])])
return X, Y, X.shape[0], X.shape[1], Y.shape[1], X_test, Y_test
if __name__ == '__main__':
x, y, x0, x1, y1, xt, yt = get_data()
print(x.shape)
print(y.shape)
print(xt.shape)
print(yt.shape)