Skip to content

Commit

Permalink
Added titles to some figures
Browse files Browse the repository at this point in the history
  • Loading branch information
Stefan committed Dec 8, 2023
1 parent d613917 commit 0eae692
Show file tree
Hide file tree
Showing 13 changed files with 98 additions and 105 deletions.
18 changes: 7 additions & 11 deletions experiments/precision_recall.ipynb

Large diffs are not rendered by default.

159 changes: 80 additions & 79 deletions experiments/searchspace_and_greedy_optimality.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,82 +16,83 @@
scoreset = [0, 1]

if __name__ == "__main__":
dataset = "thorax"
df = pd.read_csv(f"../../data/{dataset}.csv")
X = df.iloc[:, 1:].to_numpy()
y = df.iloc[:, 0].to_numpy()
wpos = y.mean()

opt, meas = "BACC", "BACC"
loss = get_loss(opt, wpos)
perf = lambda y, pred: -get_loss(meas, wpos)(y, pred)


def fit_predict(scores_):
features, scores = from_scorevec(scores_)
clf = _ClassifierAtK(features, scores=list(scores),
initial_feature_thresholds=np.full_like(features, .5),
threshold_optimizer=None)
clf.fit(X, y)
y_prob = clf.predict_proba(X)[:, 1]

return scores_, perf(y, y_prob)
# return accuracy_score(y, y_prob>.5)


G = nx.Graph()
node = dict()

with Pool(12) as p:
for scores, metric in tqdm(p.imap(fit_predict, product(scoreset, repeat=X.shape[1])),
total=len(scoreset) ** X.shape[1]):

id_ = np.count_nonzero(scores), metric
node[tuple(scores)] = id_

for index in np.nonzero(scores)[0]:
# get previous nodes by removing one non-zero feature
from_ = np.array(scores)
from_[index] = 0

G.add_edge(node[tuple(from_)], id_)

pos = {p: p for p in G.nodes}

sns.set(font_scale=1.5, rc={'text.usetex': True})
sns.set_style("whitegrid")
plt.rc('font', **{'family': 'serif'})
fig, ax = plt.subplots()
fig.set_size_inches(13, 4)
ax.set_ylabel("Balanced Accuracy in sample")
ax.set_xlabel("")

print("drawing nodes")
nx.draw_networkx_nodes(G, pos, node_color="#000000", node_size=10, ax=ax, alpha=.1)
print("drawing edges")
nx.draw_networkx_edges(G, pos, edge_color="#000000", ax=ax, width=0.5, node_size=10, alpha=.1)

# highlight cascade
print("fitting cascade")
psl = ProbabilisticScoringList(score_set=set(scoreset) - {0},
stage_loss=loss,
lookahead=LOOKAHEAD).fit(X, y)
cascade = [(i, perf(y, clf.predict_proba(X)[:, 1]))
for i, clf in enumerate(psl.stage_clfs)]

G = nx.Graph()
for u, v in zip(cascade, cascade[1:]):
G.add_edge(u, v)
print("drawing cascade")
nx.draw_networkx_nodes(G, pos, node_color="#1f78b4", node_size=50, ax=ax)
nx.draw_networkx_edges(G, pos, edge_color="#1f78b4", ax=ax, width=1, node_size=10)

# plt.box(False)
ax.tick_params(left=True, bottom=True, labelleft=True, labelbottom=True)
ax.xaxis.set_major_locator(MultipleLocator(1))
ax.set_xlim(-0.2, X.shape[1] + 0.2)

print("generating file")
code = f"local{opt}_globalSUM_metr{meas}"
fig.savefig(f"../../fig/{dataset}_{code}.pdf")
# fig.savefig(f"../../fig/{dataset}_{code}.png", dpi=300)
for dataset in ["thorax", "covid"]:
df = pd.read_csv(f"../data/{dataset}.csv")
X = df.iloc[:, 1:].to_numpy()
y = df.iloc[:, 0].to_numpy()
wpos = y.mean()

opt, meas = "BACC", "BACC"
loss = get_loss(opt, wpos)
perf = lambda y, pred: -get_loss(meas, wpos)(y, pred)


def fit_predict(scores_):
features, scores = from_scorevec(scores_)
clf = _ClassifierAtK(features, scores=list(scores),
initial_feature_thresholds=np.full_like(features, .5),
threshold_optimizer=None)
clf.fit(X, y)
y_prob = clf.predict_proba(X)[:, 1]

return scores_, perf(y, y_prob)
# return accuracy_score(y, y_prob>.5)


G = nx.Graph()
node = dict()

with Pool(12) as p:
for scores, metric in tqdm(p.imap(fit_predict, product(scoreset, repeat=X.shape[1])),
total=len(scoreset) ** X.shape[1]):

id_ = np.count_nonzero(scores), metric
node[tuple(scores)] = id_

for index in np.nonzero(scores)[0]:
# get previous nodes by removing one non-zero feature
from_ = np.array(scores)
from_[index] = 0

G.add_edge(node[tuple(from_)], id_)

pos = {p: p for p in G.nodes}

sns.set(font_scale=1.5, rc={'text.usetex': True})
sns.set_style("whitegrid")
plt.rc('font', **{'family': 'serif'})
fig, ax = plt.subplots()
fig.set_size_inches(13, 4)
ax.set_ylabel("Balanced Accuracy in sample")
ax.set_xlabel("Model Complexity")

print("drawing nodes")
nx.draw_networkx_nodes(G, pos, node_color="#000000", node_size=10, ax=ax, alpha=.1)
print("drawing edges")
nx.draw_networkx_edges(G, pos, edge_color="#000000", ax=ax, width=0.5, node_size=10, alpha=.1)

# highlight cascade
print("fitting cascade")
psl = ProbabilisticScoringList(score_set=set(scoreset) - {0},
stage_loss=loss,
lookahead=LOOKAHEAD).fit(X, y)
cascade = [(i, perf(y, clf.predict_proba(X)[:, 1]))
for i, clf in enumerate(psl.stage_clfs)]

G = nx.Graph()
for u, v in zip(cascade, cascade[1:]):
G.add_edge(u, v)
print("drawing cascade")
nx.draw_networkx_nodes(G, pos, node_color="#1f78b4", node_size=50, ax=ax)
nx.draw_networkx_edges(G, pos, edge_color="#1f78b4", ax=ax, width=1, node_size=10)

# plt.box(False)
ax.tick_params(left=True, bottom=True, labelleft=True, labelbottom=True)
ax.xaxis.set_major_locator(MultipleLocator(1))
ax.set_xlim(-0.2, X.shape[1] + 0.2)

print("generating file")
code = f"local{opt}_globalSUM_metr{meas}"
fig.suptitle(dataset.title())
fig.savefig(f"../fig/{dataset}_{code}.pdf", bbox_inches="tight")
# fig.savefig(f"../../fig/{dataset}_{code}.png", dpi=300)
Binary file modified fig/all_opt_method_vs_bacc.pdf
Binary file not shown.
Binary file modified fig/all_opt_method_vs_cp.pdf
Binary file not shown.
Binary file modified fig/covid_localBACC_globalSUM_metrBACC.pdf
Binary file not shown.
Binary file modified fig/covid_opt_method_vs_bacc.pdf
Binary file not shown.
Binary file modified fig/covid_optimality_of_greedy_(optmethod=bacc, eval=bacc).pdf
Binary file not shown.
Binary file modified fig/thorax_localBACC_globalSUM_metrBACC.pdf
Binary file not shown.
Binary file modified fig/thorax_opt_exp_entr_varios_metrics.pdf
Binary file not shown.
Binary file modified fig/thorax_opt_method_vs_bacc.pdf
Binary file not shown.
Binary file modified fig/thorax_optimality_of_greedy_(optmethod=bacc, eval=bacc).pdf
Binary file not shown.
Binary file modified fig/thorax_precision_recall_curve_thorax_example.pdf
Binary file not shown.
26 changes: 11 additions & 15 deletions plots.ipynb

Large diffs are not rendered by default.

0 comments on commit 0eae692

Please sign in to comment.