forked from philip-nomad/stock-prediction-model
-
Notifications
You must be signed in to change notification settings - Fork 0
/
prediction.py
190 lines (153 loc) · 7.36 KB
/
prediction.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
import csv
import os
import warnings
import numpy as np
import pandas as pd
import yfinance as yf
from dateutil.relativedelta import relativedelta
import lstm_calculator
import news_contents_sentimental_analysis
warnings.filterwarnings('ignore', category=FutureWarning)
import tensorflow as tf
WEIGHT_FOR_LSTM_VALUE = 0.6 # 가중치 a: LSTM 가중치
WEIGHT_FOR_EMOTIONAL_ANALYSIS_VALUE = 0.3 # 가중치 b: 감성분석 점수 가중치
WEIGHT_FOR_PER_VALUE = 0.1 # 가중치 c: PER 점수 가중치
PATH = "./"
os.chdir(PATH)
DIR = 'prediction_score'
STOCK_DIR = 'stock'
def mkdir(company_code):
if not os.path.exists(f"./{DIR}/{company_code}"):
os.makedirs(f"./{DIR}/{company_code}")
def start(company_code, learning_date):
mkdir(company_code)
learning_start_date = learning_date - relativedelta(days=30)
for i in range(30):
with open(f"./{lstm_calculator.DIR}/{company_code}/{company_code}_{learning_start_date}.csv", 'r', -1,
'utf-8') as lines:
next(lines)
for line in csv.reader(lines):
lstm_prediction = round(float(line[0]))
previous_closing_price = round(float(line[1]))
lstm_value = (lstm_prediction - previous_closing_price) / previous_closing_price # (다음날 예측 종가 - 오늘 종가) / 오늘 종가
# 감성분석 값 불러오기
emotional_analysis_csv = news_contents_sentimental_analysis.calculate_two_weeks(company_code,
learning_start_date)
# PER 값 불러오기
company_per_csv = 0
same_category_per_csv = 0
try:
with open('./per_data/csv/' + company_code + '.csv', 'r', -1, 'utf-8') as lines:
next(lines)
for line in csv.reader(lines):
company_per_csv = float(line[2])
same_category_per_csv = float(line[3])
except FileNotFoundError:
company_per_csv = 0
same_category_per_csv = 0
# 자기 회사 PER 이랑 동일업종 PER 이 모두 양수인 경우에만 per_value 계산
if company_per_csv > 0 and same_category_per_csv > 0:
per_value_csv = 1 - company_per_csv / same_category_per_csv # 1 - 자기 회사 PER / 동일 업종 PER
else:
per_value_csv = 0
result = {
'LearningDate': [learning_start_date],
'LstmScore': [float(lstm_value)],
'EmotionalScore': [emotional_analysis_csv],
'PerScore': [per_value_csv],
'PreviousClosingPrice': [previous_closing_price],
}
prediction_df = pd.DataFrame(result, columns=["LearningDate", "LstmScore", "EmotionalScore", "PerScore",
"PreviousClosingPrice"])
if i == 0:
prediction_df.to_csv(f"./{DIR}/{company_code}/{company_code}.csv", index=False, header=True)
else:
prediction_df.to_csv(f"./{DIR}/{company_code}/{company_code}.csv", index=False, mode='a', header=False)
learning_start_date = learning_start_date + relativedelta(days=1)
stock_start_date = learning_date - relativedelta(days=30)
stock_info = yf.download(company_code + '.KS', start=learning_date - relativedelta(days=40), end=learning_date)
# stock = pd.read_csv(f"./{STOCK_DIR}/{company_code}.KS.csv")
# stock_info = stock_info.set_index(['Date'])
# while stock_start_date:
start_date = stock_start_date
while stock_info.loc[
start_date.strftime("%Y-%m-%d"):(start_date + relativedelta(days=1)).strftime("%Y-%m-%d")].empty:
start_date -= relativedelta(days=1)
start_closing_price = \
stock_info.loc[start_date.strftime("%Y-%m-%d"):(start_date + relativedelta(days=1)).strftime("%Y-%m-%d")][
"Close"][
0]
stock_info = stock_info.loc[stock_start_date.strftime("%Y-%m-%d"):learning_date.strftime("%Y-%m-%d")]
stock_info_date_list = list(stock_info.index)
stock_info = stock_info.values[0:, 1:].astype(np.float)
# 날짜 뽑기 x
# 종가 뽑기 y (x == y)
# [{x1: y1}, {x2: y2}, {x3: y3}]
# str(list(stock_info.index)[0].date())
prices = stock_info[:, -3] # 한달 치 실제 종가
zip_iterator = zip(stock_info_date_list, prices)
temp_date = None
temp_price = 0
result = []
for data in zip_iterator:
if temp_date is None:
temp_date = data[0]
temp_price = data[1]
else:
date_gap = (data[0] - temp_date) # 1, 2, 3
for _ in range(date_gap.days):
result.append(temp_price)
temp_date = data[0]
temp_price = data[1]
if stock_info_date_list[-1] != learning_date:
for _ in range((learning_date - stock_info_date_list[-1].date()).days):
result.append(temp_price)
if len(result) != 30:
for _ in range(30 - len(result)):
result.insert(0, start_closing_price)
# result = [82000, 81900, 82300, 82300, 82300]
xy = pd.read_csv(f"./{DIR}/{company_code}/{company_code}.csv")
lstm_x = xy.iloc[:, 1]
emotional_x = xy.iloc[:, 2]
per_x = xy.iloc[:, 3]
previous_x = xy.iloc[:, 4]
today_y = result
x1 = tf.placeholder(tf.float32, shape=[None]) # lstm score
x2 = tf.placeholder(tf.float32, shape=[None]) # sentimental score
x3 = tf.placeholder(tf.float32, shape=[None]) # per score
x4 = tf.placeholder(tf.float32, shape=[None]) # previous day close
y = tf.placeholder(tf.float32, shape=[None]) # today close
w1 = tf.Variable(0.65, dtype=tf.float32, name='w1', constraint=lambda x: tf.clip_by_value(x, 0, 1))
w2 = tf.Variable(0.25, dtype=tf.float32, name='w2', constraint=lambda x: tf.clip_by_value(x, 0, 1))
w3 = 1 - (w1 + w2)
weight_sum = w1 + w2 + w3
init_op = tf.initialize_all_variables()
hypothesis = ((x1 * w1 + x2 * w2 + (x3 * w3)) / 10 + 1) * x4
cost = tf.reduce_mean(tf.square(hypothesis - y))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=1e-9)
train = optimizer.minimize(cost)
final_w1 = 0.0
final_w2 = 0.0
final_w3 = 0.0
with tf.Session() as sess:
sess.run(init_op)
for step in range(5):
cost_val, hy_val, _ = sess.run(
[cost, hypothesis, train],
feed_dict={x1: lstm_x, x2: emotional_x, x3: per_x, x4: previous_x, y: today_y}
)
if step == 4:
print(f"\nW1: {sess.run(w1)} W2: {sess.run(w2)} W3: {sess.run(w3)} Sum: {sess.run(weight_sum)}")
final_w1 = sess.run(w1)
final_w2 = sess.run(w2)
final_w3 = sess.run(w3)
# else:
# print(step, "Cost", cost_val, "\nPrediction:\n", hy_val, "\nW3:", sess.run(w3), "\nW2:", sess.run(w2),
# "\nW1:", sess.run(w1),
# "\nSum", sess.run(weight_sum))
return final_w1, final_w2, final_w3
"""
if 부분이 학습을 모두 끝내고 출력하는 거에여 사실 가중치들만 출력하면 되는데 일단 혹시 몰라서 학습내용도 다 출력 시켰습니다.
final_W1, final_W2, final_W3를 return 하면 최종 가중치들 입니다. 일단 주석 처리 해놓을꼐여
이거 return해서 각각 순서대로 lstm, 감성분석, per에 넣어서 계산하면 됩니다.
"""