-
Notifications
You must be signed in to change notification settings - Fork 9
/
getting_started.py
77 lines (57 loc) · 2.38 KB
/
getting_started.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
"""
Getting started with The Cannon and APOGEE
"""
import os
import numpy as np
from astropy.table import Table
import AnniesLasso as tc
# Load in the data.
PATH, CATALOG, FILE_FORMAT = ("/Users/arc/research/apogee/", "apogee-rg.fits",
"apogee-rg-custom-normalization-{}.memmap")
labelled_set = Table.read(os.path.join(PATH, CATALOG))
dispersion = np.memmap(os.path.join(PATH, FILE_FORMAT).format("dispersion"),
mode="r", dtype=float)
normalized_flux = np.memmap(
os.path.join(PATH, FILE_FORMAT).format("flux"),
mode="c", dtype=float).reshape((len(labelled_set), -1))
normalized_ivar = np.memmap(
os.path.join(PATH, FILE_FORMAT).format("ivar"),
mode="c", dtype=float).reshape(normalized_flux.shape)
# The labelled set includes ~14000 stars. Let's chose a random ~1,400 for the
# training and validation sets.
np.random.seed(888) # For reproducibility.
q = np.random.randint(0, 10, len(labelled_set)) % 10
validate_set = (q == 0)
train_set = (q == 1)
# Create a Cannon model in parallel using all available threads
model = tc.L1RegularizedCannonModel(labelled_set[train_set],
normalized_flux[train_set], normalized_ivar[train_set],
dispersion=dispersion, threads=-1)
# No regularization.
model.regularization = 0
# Specify the vectorizer.
model.vectorizer = tc.vectorizer.NormalizedPolynomialVectorizer(
labelled_set[train_set],
tc.vectorizer.polynomial.terminator(["TEFF", "LOGG", "FE_H"], 2))
print("Vectorizer terms: {0}".format(
" + ".join(model.vectorizer.get_human_readable_label_vector())))
# Train the model.
model.train()
# Let's set the scatter for each pixel to ensure the mean chi-squared value is
# 1 for the training set, then re-train.
model._set_s2_by_hogg_heuristic()
model.train()
# Use the model to fit the stars in the validation set.
validation_set_labels = model.fit(
normalized_flux[validate_set], normalized_ivar[validate_set])
for i, label_name in enumerate(model.vectorizer.label_names):
fig, ax = plt.subplots()
x = labelled_set[label_name][validate_set]
y = validation_set_labels[:, i]
abs_diff = np.abs(y - x)
ax.scatter(x, y, facecolor="k")
limits = np.array([ax.get_xlim(), ax.get_ylim()])
ax.set_xlim(limits.min(), limits.max())
ax.set_ylim(limits.min(), limits.max())
ax.set_title("{0}: {1:.2f}".format(label_name, np.mean(abs_diff)))
print("{0}: {1:.2f}".format(label_name, np.mean(abs_diff)))