-
Notifications
You must be signed in to change notification settings - Fork 0
/
oracles.py
175 lines (144 loc) · 5.62 KB
/
oracles.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
import numpy as np
import scipy
from scipy.special import expit
class BaseSmoothOracle(object):
"""
Base class for implementation of oracles.
"""
def func(self, x):
"""
Computes the value of function at point x.
"""
raise NotImplementedError('Func oracle is not implemented.')
def grad(self, x):
"""
Computes the gradient at point x.
"""
raise NotImplementedError('Grad oracle is not implemented.')
def hess(self, x):
"""
Computes the Hessian matrix at point x.
"""
raise NotImplementedError('Hessian oracle is not implemented.')
def func_directional(self, x, d, alpha):
"""
Computes phi(alpha) = f(x + alpha*d).
"""
return np.squeeze(self.func(x + alpha * d))
def grad_directional(self, x, d, alpha):
"""
Computes phi'(alpha) = (f(x + alpha*d))'_{alpha}
"""
return np.squeeze(self.grad(x + alpha * d).dot(d))
class QuadraticOracle(BaseSmoothOracle):
"""
Oracle for quadratic function:
func(x) = 1/2 x^TAx - b^Tx.
"""
def __init__(self, A, b):
if not scipy.sparse.isspmatrix_dia(A) and not np.allclose(A, A.T):
raise ValueError('A should be a symmetric matrix.')
self.A = A
self.b = b
def func(self, x):
return 0.5 * np.dot(self.A.dot(x), x) - self.b.dot(x)
def grad(self, x):
return self.A.dot(x) - self.b
def hess(self, x):
return self.A
class LogRegL2Oracle(BaseSmoothOracle):
"""
Oracle for logistic regression with l2 regularization:
func(x) = 1/m sum_i log(1 + exp(-b_i * a_i^T x)) + regcoef / 2 ||x||_2^2.
Let A and b be parameters of the logistic regression (feature matrix
and labels vector respectively).
For user-friendly interface use create_log_reg_oracle()
Parameters
----------
matvec_Ax : function
Computes matrix-vector product Ax, where x is a vector of size n.
matvec_ATx : function of x
Computes matrix-vector product A^Tx, where x is a vector of size m.
matmat_ATsA : function
Computes matrix-matrix-matrix product A^T * Diag(s) * A,
"""
def __init__(self, matvec_Ax, matvec_ATx, matmat_ATsA, b, regcoef):
self.matvec_Ax = matvec_Ax
self.matvec_ATx = matvec_ATx
self.matmat_ATsA = matmat_ATsA
self.b = b
self.regcoef = regcoef
def func(self, x):
# TODO: Implement
return (1.0/self.b.shape[0]) * np.ones(self.b.shape).dot(np.logaddexp(np.zeros(self.b.shape), -1 * self.b * self.matvec_Ax(x))) + \
0.5 * self.regcoef * x.dot(x)
def grad(self, x):
# TODO: Implement
return (-1.0/self.b.shape[0]) * self.matvec_ATx(scipy.special.expit(-1 * self.b * self.matvec_Ax(x)) * self.b) + self.regcoef * x
def hess(self, x):
# TODO: Implement
Ax = self.matvec_Ax(x)
return (1.0/self.b.shape[0]) * self.matmat_ATsA(scipy.special.expit(Ax * self.b) *
scipy.special.expit(-Ax * self.b)) + \
self.regcoef * np.identity(x.shape[0])
class LogRegL2OptimizedOracle(LogRegL2Oracle):
"""
Oracle for logistic regression with l2 regularization
with optimized *_directional methods (are used in line_search).
For explanation see LogRegL2Oracle.
"""
def __init__(self, matvec_Ax, matvec_ATx, matmat_ATsA, b, regcoef):
super().__init__(matvec_Ax, matvec_ATx, matmat_ATsA, b, regcoef)
def func_directional(self, x, d, alpha):
# TODO: Implement optimized version with pre-computation of Ax and Ad
return None
def grad_directional(self, x, d, alpha):
# TODO: Implement optimized version with pre-computation of Ax and Ad
return None
def create_log_reg_oracle(A, b, regcoef, oracle_type='usual'):
"""
Auxiliary function for creating logistic regression oracles.
`oracle_type` must be either 'usual' or 'optimized'
"""
def matvec_Ax(x):
return A.dot(x)
def matvec_ATx(x):
return A.T.dot(x)
def matmat_ATsA(s):
if isinstance(A, scipy.sparse.csr_matrix):
B = A.toarray()
return B.T.dot(B * s.reshape(-1, 1))
else:
return A.T.dot(A * s.reshape(-1, 1))
if oracle_type == 'usual':
oracle = LogRegL2Oracle
elif oracle_type == 'optimized':
oracle = LogRegL2OptimizedOracle
else:
raise 'Unknown oracle_type=%s' % oracle_type
return oracle(matvec_Ax, matvec_ATx, matmat_ATsA, b, regcoef)
def grad_finite_diff(func, x, eps=1e-8):
"""
Returns approximation of the gradient using finite differences:
result_i := (f(x + eps * e_i) - f(x)) / eps,
where e_i are coordinate vectors:
e_i = (0, 0, ..., 0, 1, 0, ..., 0)
>> i <<
"""
result = np.zeros(x.shape)
basis = np.eye(x.shape[0], x.shape[0])
f = func(x)
for i in range(x.shape[0]):
result[i] = (func(x + eps * basis[i]) - f) / eps
# TODO: Implement numerical estimation of the gradient
return result
def hess_finite_diff(func, x, eps=1e-5):
result = np.zeros((x.shape[0], x.shape[0]))
basis = np.eye(x.shape[0], x.shape[0])
f = func(x)
for i in range(x.shape[0]):
for j in range(x.shape[0]):
result[i][j] = (func(x + eps * basis[j] + eps * basis[i]) -
func(x + eps * basis[j]) - func(x + eps * basis[i]) + f)/(eps**2)
# TODO: Implement numerical estimation of the Hessian
return result