forked from devpouya/FastSpectralClustering
-
Notifications
You must be signed in to change notification settings - Fork 0
/
launch_runtime_sec.py
88 lines (75 loc) · 3.07 KB
/
launch_runtime_sec.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import numpy as np
import subprocess
import random
import os
import sys
import csv
# global settings
num_runs = 3
median_idx = 1
# # First Benchmark: growing dim
# test = "growing_dim"
# output_filename = "c_code_"
# dataset_path = os.getcwd() + "/benchmarks/datasets/6c_5000n_growing_dim/"
# output_path = os.getcwd() + "/benchmarks/scikit/measurements/growing_dim/"
# k = 6
# n = 5000
# params = [2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048]
test = "growing_n_"
output_filename = "c_code_"
dataset = "8c_300d_growing_n_normalized"
dataset_path = os.getcwd() + "/benchmarks/datasets/"+dataset+"/"
output_path = os.getcwd() + "/benchmarks/scikit/measurements/growing_n/"+dataset+"/"
k = 8 #??
dim = 300 #??
params = [n for n in range(100, 6100, 100)]
# # First Benchmark: growing dim
# test = "growing n"
# output_filename = "vec_8.txt"
# dataset_path = os.getcwd() + "/benchmarks/datasets/72c_300d_growing_n/"
# output_path = os.getcwd() + "/benchmarks/spectral_clustering/measurements/"
# k = 72
# dim = 300
# params = range(100, 6000, 100)
# global
subprocess.run(["make", "clean"])
# subprocess.run(["make", "countops"])
subprocess.run(["make"])
directory = os.fsencode(str(dataset_path))
runtimes_median = []
performances_median = []
for par in params:
print(test + " | parameter = " + str(par))
for file in sorted(os.listdir(directory)): # when generating data have just numbers for simplicity
filename = os.fsdecode(file)
if filename == str(par) + ".txt":
# compute NUM_RUNS times and get the median
runtimes = []
performances = []
for i in range(0, num_runs):
clustering = subprocess.check_output(["./clustering", str(dataset_path) + filename, str(k), "out.txt"],
universal_newlines="\n").split("\n")
#print(clustering[0])
runtime = clustering[0]
runtimes.append(runtime)
# countops = subprocess.check_output(["./countops", str(dataset_path) + filename, str(k), "out.txt"],
# universal_newlines="\n").split("\n")
# #print(countops[1])
# flops = countops[1]
# performances.append(float(runtime)/float(flops))
# sort the arrays
runtimes.sort()
# performances.sort()
# adding to final list
runtimes_median.append(runtimes[median_idx])
# performances_median.append(performances[median_idx])
print("runtime: "+ str(runtimes[median_idx]))
# +" (cycles), performance: "+ str(performances[median_idx]) +" (flops/cycle)")
with open(str(output_path) + output_filename + test + "graph_runtime_sec", 'w') as f:
writer = csv.writer(f, delimiter='\t')
writer.writerows(zip(params, runtimes_median))
# with open(str(output_path) + output_filename + "_perf", 'w') as f:
# writer = csv.writer(f, delimiter='\t')
# writer.writerows(zip(params, performances_median))