-
Notifications
You must be signed in to change notification settings - Fork 26
/
Copy pathhousing.jl
64 lines (59 loc) · 2.21 KB
/
housing.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
"""
This example uses the Housing dataset from the UCI Machine Learning
Repository to demonstrate a linear regression model. The dataset has
housing related information for 506 neighborhoods in Boston from
1978. Each neighborhood has 14 attributes, the goal is to use the
first 13, such as average number of rooms per house, or distance to
employment centers, to predict the 14’th attribute: median dollar
value of the houses.
To run the demo, simply `include("housing.jl")` and run `Housing.train()`.
The dataset will be automatically downloaded. You can provide the
initial weights as an optional argument, which should be a pair of
1x13 weight matrix and a scalar bias. `train` also accepts the
following keyword arguments: `lr` specifies the learning rate,
`epochs` gives number of epochs, and `seed` specifies the random
number seed. The quadratic loss for the train and test sets will be
printed at every epoch and optimized parameters will be returned.
"""
module Housing
using AutoGrad
using Main
function loss(wb, x=xtrn, ygold=ytrn)
(w,b) = wb
ypred = w*x .+ b
sum(abs2,ypred - ygold) / size(ygold,2)
end
function train(w=Any[0.1*randn(1,13), 0.0]; lr=.1, epochs=20, seed=nothing)
seed==nothing || (srand(seed); loaddata())
isdefined(Housing,:xtrn) || loaddata()
gradfun = grad(loss)
println((0, :trnloss, loss(w,xtrn,ytrn), :tstloss, loss(w,xtst,ytst)))
for epoch=1:epochs
g = gradfun(w, xtrn, ytrn)
for i in 1:length(w)
w[i] -= lr * g[i]
end
println((epoch, :trnloss, loss(w,xtrn,ytrn), :tstloss, loss(w,xtst,ytst)))
end
return w
end
function loaddata()
global xtrn, ytrn, xtst, ytst
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data"
file=AutoGrad.dir("data", "housing.data")
if !isfile(file)
info("Downloading $url to $file")
download(url, file)
end
data = readdlm(file)'
@show size(data) # (14,506)
x = data[1:13,:]
y = data[14:14,:]
x = (x .- mean(x,2)) ./ std(x,2) # Data normalization
r = randperm(size(x,2)) # trn/tst split
xtrn=x[:,r[1:400]]
ytrn=y[:,r[1:400]]
xtst=x[:,r[401:end]]
ytst=y[:,r[401:end]]
end
end # module Housing