forked from jostmey/NakedTensor
-
Notifications
You must be signed in to change notification settings - Fork 0
/
bigdata.py
65 lines (52 loc) · 1.69 KB
/
bigdata.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# License: See LICENSE
# Fit a straight line, of the form y=m*x+b
import tensorflow as tf
import numpy as np
'''
Your dataset.
'''
xs = np.linspace(0.0, 8.0, 8000000) # 8-million features
ys = 0.3*xs-0.8+np.random.normal(scale=0.25, size=len(xs)) # 8-million labels
'''
Initial guesses, which will be refined by TensorFlow.
'''
m_initial = -0.5 # Initial guesses
b_initial = 1.0
'''
Define free variables to be solved.
'''
m = tf.Variable(m_initial) # Parameters
b = tf.Variable(b_initial)
'''
Define placeholders for big data.
'''
_BATCH = 8 # Use only eight points at a time.
xs_placeholder = tf.placeholder(tf.float32, [_BATCH])
ys_placeholder = tf.placeholder(tf.float32, [_BATCH])
'''
Define the error between the data and the model as a tensor (distributed computing).
'''
ys_model = m*xs_placeholder+b # Tensorflow knows this is a vector operation
total_error = tf.reduce_sum((ys_placeholder-ys_model)**2) # Sum up every item in the vector
'''
Once cost function is defined, create gradient descent optimizer.
'''
optimizer_operation = tf.train.GradientDescentOptimizer(learning_rate=0.001).minimize(total_error) # Does one step
'''
Something you just have to do.
'''
initializer_operation = tf.global_variables_initializer()
'''
All calculations are done in a session.
'''
with tf.Session() as session:
session.run(initializer_operation)
_EPOCHS = 10000 # Number of "sweeps" across data
for iteration in range(_EPOCHS):
random_indices = np.random.randint(len(xs), size=_BATCH) # Randomly sample the data
feed = {
xs_placeholder: xs[random_indices],
ys_placeholder: ys[random_indices]
}
session.run(optimizer_operation, feed_dict=feed)
print('Slope:', m.eval(), 'Intercept:', b.eval())