-
Notifications
You must be signed in to change notification settings - Fork 0
/
firstTests.py
78 lines (59 loc) · 1.9 KB
/
firstTests.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Oct 22 14:48:30 2018
First attemts at sparse statistics
@author: heiko
"""
import numpy as np
import scipy
import sklearn.linear_model as lm
import matplotlib.pyplot as plt
def sparse_testset(n=1000,p=2,q=100,beta=1):
"""
generates a sparse linear regression dataset with
n datapoints
p true predictors and
q possible predictors
"""
assert p<=q, 'need at least as many possible as true predictors'
X = np.random.randn(n,q)
y = beta*np.matmul(X[:,0:p],np.ones((p,1)))+np.random.randn(n,1)
y = np.ravel(y)
return (y,X)
data = sparse_testset()
lm1 = lm.LinearRegression()
lm1.fit(data[1],data[0])
lasso = lm.Lasso(alpha=0.1,tol=10**-10)
lasso.fit(data[1],data[0])
lassoCV = lm.LassoCV(cv = 10)
lassoCV.fit(data[1],data[0])
#lP = lm.lasso_path(data[1],data[0])
alphas_lasso, coefs_lasso, _ = lm.lasso_path(data[1], data[0], 0.01, fit_intercept=True)
colors = ['b', 'r', 'g', 'c', 'k']
neg_log_alphas_lasso = -np.log10(alphas_lasso)
for coef_l, c in zip(coefs_lasso, colors):
l1 = plt.plot(neg_log_alphas_lasso, coef_l, c=c)
lassoCV = lm.LassoCV(cv = 10)
nNonZero = np.zeros(1000)
for irepeat in range(1000):
data = sparse_testset()
lassoCV.fit(data[1],data[0])
nNonZero[irepeat]=np.sum(lassoCV.coef_!=0)
plt.figure()
ax = plt.subplot(111)
ax.hist(nNonZero,bins=np.arange(1,50,1))
plt.xlabel('# of non-zero coefficients')
plt.ylabel('# of occurences')
plt.title('Lasso regression 2 strong predictors')
# Hide the right and top spines
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
# Only show ticks on the left and bottom spines
ax.yaxis.set_ticks_position('left')
ax.xaxis.set_ticks_position('bottom')
X_norm = data[1]-np.mean(data[1],axis=0)
y_norm = data[0]-np.mean(data[0])
import cProfile
cProfile.run('lasso_cd(X_norm,y_norm,0.1)')
lasso_cd(X_norm,y_norm,0.1)