-
Notifications
You must be signed in to change notification settings - Fork 2
/
finddup.cpp
136 lines (120 loc) · 3.42 KB
/
finddup.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
// cl /EHsc /O2 /openmp matcher.cpp Landmark.cpp Database.cpp lib/WavReader.cpp lib/Timing.cpp lib/ReadAudio.cpp lib/BmpReader.cpp lib/Signal.cpp lib/utils.cpp lib/Sound.cpp .\PeakFinder.cpp .\PeakFinderDejavu.cpp Analyzer.cpp
#include <stdio.h>
#include <cstdint>
#include <algorithm>
#include <string>
#include <stdexcept>
#include <fstream>
#include <sstream>
#include <omp.h>
#include <mpi.h>
#include "lib/Timing.hpp"
#include "Landmark.hpp"
#include "lib/utils.hpp"
#include "Database.hpp"
#include "PeakFinderDejavu.hpp"
int processQuery(
std::string name,
LandmarkBuilder &builder,
const Database &db,
int songid,
int blacklist,
std::ostream &fout
) {
Timing tm;
int oneread = 10000;
std::vector<Peak> peaks(oneread);
FILE *fin = fopen(name.c_str(), "rb");
if (fin == NULL) {
return -1;
}
int nread;
int total = 0;
while ((nread = fread(&peaks[total], sizeof(Peak), oneread, fin)) > 0) {
total += nread;
peaks.resize(total + oneread);
}
peaks.resize(total);
int max_t = 0;
for (Peak peak : peaks) {
if (peak.time > max_t) max_t = peak.time;
}
int nsongs = db.songList.size();
int ptr = 0;
std::vector<match_t> scores(nsongs);
for (int t = 0; t < max_t; t += 8000 * 10 / 512) {
std::vector<Peak> sub_peaks;
while (ptr < peaks.size() && peaks[ptr].time < t + 8000 * 10 / 512) {
sub_peaks.push_back(peaks[ptr]);
ptr++;
}
std::vector<Landmark> lms = builder.peaks_to_landmarks(sub_peaks);
db.query_landmarks(lms, scores.data());
std::vector<int> song_rank;
for (int i = 0; i < nsongs; i++) {
if (i != blacklist) song_rank.push_back(i);
}
std::sort(song_rank.begin(), song_rank.end(), [&](int a, int b){
return scores[a].score > scores[b].score;
});
fout << songid << ',' << t;
for (int rank = 0; rank < 10 && rank < song_rank.size(); rank++) {
int which = song_rank[rank];
fout << ',' << which << ',' << scores[which].score << ',' << scores[which].offset;
}
fout << '\n';
}
fout.flush();
return 0;
}
int main(int argc, char *argv[]) {
int nprocs, pid;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
MPI_Comm_rank(MPI_COMM_WORLD, &pid);
if (argc < 4) {
if (pid == 0)
printf("Usage: ./finddup <peak file list> <database dir> <result file>\n");
return 1;
}
Timing timing;
std::ifstream flist(argv[1]);
if (!flist) {
printf("cannot read peak list!\n");
return 1;
}
char namebuf[100];
sprintf(namebuf, "finddup-pid-%d", pid);
init_logger(namebuf);
std::string line;
std::vector<std::string> queryList;
while (std::getline(flist, line)) {
queryList.push_back(line);
}
flist.close();
LOG_DEBUG("read peak list %.3fs", timing.getRunTime() * 0.001);
Database db;
if (db.load(argv[2])) {
LOG_FATAL("cannot load database");
return 1;
}
int nSongs = db.songList.size();
std::stringstream ss;
ss << argv[3] << "-pid-" << pid;
std::ofstream fout(ss.str());
if (!fout) {
LOG_FATAL("cannot write result!");
return 1;
}
LOG_DEBUG("load database %.3fs", timing.getRunTime() * 0.001);
LandmarkBuilder builder;
for (int i = pid; i < queryList.size(); i += nprocs) {
std::string name = queryList[i];
LOG_INFO("File: %s", name.c_str());
processQuery(name, builder, db, i, i, fout);
}
fout.close();
LOG_INFO("Total time: %.3fs", timing.getRunTime() * 0.001);
MPI_Finalize();
return 0;
}