forked from hengtaoguo/MachineLearning_Python
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathLogisticRegression.py
154 lines (119 loc) · 5.12 KB
/
LogisticRegression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
#-*- coding: utf-8 -*-
from __future__ import print_function
import numpy as np
import matplotlib.pyplot as plt
from scipy import optimize
from matplotlib.font_manager import FontProperties
font = FontProperties(fname=r"c:\windows\fonts\simsun.ttc", size=14) # 解决windows环境下画图汉字乱码问题
def LogisticRegression():
data = loadtxtAndcsv_data("data2.txt", ",", np.float64)
X = data[:,0:-1]
y = data[:,-1]
plot_data(X,y) # 作图
X = mapFeature(X[:,0],X[:,1]) #映射为多项式
initial_theta = np.zeros((X.shape[1],1))#初始化theta
initial_lambda = 0.1 #初始化正则化系数,一般取0.01,0.1,1.....
J = costFunction(initial_theta,X,y,initial_lambda) #计算一下给定初始化的theta和lambda求出的代价J
print(J) #输出一下计算的值,应该为0.693147
#result = optimize.fmin(costFunction, initial_theta, args=(X,y,initial_lambda)) #直接使用最小化的方法,效果不好
'''调用scipy中的优化算法fmin_bfgs(拟牛顿法Broyden-Fletcher-Goldfarb-Shanno)
- costFunction是自己实现的一个求代价的函数,
- initial_theta表示初始化的值,
- fprime指定costFunction的梯度
- args是其余测参数,以元组的形式传入,最后会将最小化costFunction的theta返回
'''
result = optimize.fmin_bfgs(costFunction, initial_theta, fprime=gradient, args=(X,y,initial_lambda))
p = predict(X, result) #预测
print(u'在训练集上的准确度为%f%%'%np.mean(np.float64(p==y)*100)) # 与真实值比较,p==y返回True,转化为float
X = data[:,0:-1]
y = data[:,-1]
plotDecisionBoundary(result,X,y) #画决策边界
# 加载txt和csv文件
def loadtxtAndcsv_data(fileName,split,dataType):
return np.loadtxt(fileName,delimiter=split,dtype=dataType)
# 加载npy文件
def loadnpy_data(fileName):
return np.load(fileName)
# 显示二维图形
def plot_data(X,y):
pos = np.where(y==1) #找到y==1的坐标位置
neg = np.where(y==0) #找到y==0的坐标位置
#作图
plt.figure(figsize=(15,12))
plt.plot(X[pos,0],X[pos,1],'ro') # red o
plt.plot(X[neg,0],X[neg,1],'bo') # blue o
plt.title(u"两个类别散点图",fontproperties=font)
plt.show()
# 映射为多项式
def mapFeature(X1,X2):
degree = 2; # 映射的最高次方
out = np.ones((X1.shape[0],1)) # 映射后的结果数组(取代X)
'''
这里以degree=2为例,映射为1,x1,x2,x1^2,x1,x2,x2^2
'''
for i in np.arange(1,degree+1):
for j in range(i+1):
temp = X1**(i-j)*(X2**j) #矩阵直接乘相当于matlab中的点乘.*
out = np.hstack((out, temp.reshape(-1,1)))
return out
# 代价函数
def costFunction(initial_theta,X,y,inital_lambda):
m = len(y)
J = 0
h = sigmoid(np.dot(X,initial_theta)) # 计算h(z)
theta1 = initial_theta.copy() # 因为正则化j=1从1开始,不包含0,所以复制一份,前theta(0)值为0
theta1[0] = 0
temp = np.dot(np.transpose(theta1),theta1)
J = (-np.dot(np.transpose(y),np.log(h))-np.dot(np.transpose(1-y),np.log(1-h))+temp*inital_lambda/2)/m # 正则化的代价方程
return J
# 计算梯度
def gradient(initial_theta,X,y,inital_lambda):
m = len(y)
grad = np.zeros((initial_theta.shape[0]))
h = sigmoid(np.dot(X,initial_theta))# 计算h(z)
theta1 = initial_theta.copy()
theta1[0] = 0
grad = np.dot(np.transpose(X),h-y)/m+inital_lambda/m*theta1 #正则化的梯度
return grad
# S型函数
def sigmoid(z):
h = np.zeros((len(z),1)) # 初始化,与z的长度一置
h = 1.0/(1.0+np.exp(-z))
return h
#画决策边界
def plotDecisionBoundary(theta,X,y):
pos = np.where(y==1) #找到y==1的坐标位置
neg = np.where(y==0) #找到y==0的坐标位置
#作图
plt.figure(figsize=(15,12))
plt.plot(X[pos,0],X[pos,1],'ro') # red o
plt.plot(X[neg,0],X[neg,1],'bo') # blue o
plt.title(u"决策边界",fontproperties=font)
#u = np.linspace(30,100,100)
#v = np.linspace(30,100,100)
u = np.linspace(-1,1.5,50) #根据具体的数据,这里需要调整
v = np.linspace(-1,1.5,50)
z = np.zeros((len(u),len(v)))
for i in range(len(u)):
for j in range(len(v)):
z[i,j] = np.dot(mapFeature(u[i].reshape(1,-1),v[j].reshape(1,-1)),theta) # 计算对应的值,需要map
z = np.transpose(z)
plt.contour(u,v,z,[0,0.01],linewidth=2.0) # 画等高线,范围在[0,0.01],即近似为决策边界
#plt.legend()
plt.show()
# 预测
def predict(X,theta):
m = X.shape[0]
p = np.zeros((m,1))
p = sigmoid(np.dot(X,theta)) # 预测的结果,是个概率值
for i in range(m):
if p[i] > 0.5: #概率大于0.5预测为1,否则预测为0
p[i] = 1
else:
p[i] = 0
return p
# 测试逻辑回归函数
def testLogisticRegression():
LogisticRegression()
if __name__ == "__main__":
testLogisticRegression()