-
Notifications
You must be signed in to change notification settings - Fork 2
/
ExportDatasets.java
76 lines (64 loc) · 2.68 KB
/
ExportDatasets.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
package NewApproaches;
import BlockBuilding.StandardBlocking;
import DataModel.AbstractBlock;
import DataModel.BilateralBlock;
import java.io.BufferedWriter;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.io.Writer;
import java.util.List;
/**
*
* @author G.A.P. II
*/
public class ExportDatasets {
private final static String SEGMENT_DELIMITER = ";";
private final static String VALUE_DELIMITER = "#";
public static void main(String[] args) throws UnsupportedEncodingException, FileNotFoundException, IOException {
// String mainDirectory = "/home/gpapadakis/newData/";
String mainDirectory = "C:\\Users\\VASILIS\\Documents\\OAEI_Datasets\\";
String[] datasetsPaths = {
// mainDirectory + "restaurant/",
mainDirectory + "rexa-dblp\\",
// mainDirectory + "yago_imdb/",
// mainDirectory + "bbcMusic\\"
};
String[] d1Datasets = {
// "restaurant1Profiles",
"rexaProfiles",
// "yagoProfiles",
// "bbc-musicNewNoRdfProfiles"
};
String[] d2Datasets = {
// "restaurant2Profiles",
"swetodblp_april_2008Profiles",
// "imdbProfiles",
// "dbpedia37processedNewNoSameAsNoWikipediaSortedProfiles"
};
for (int datasetIndex = 0; datasetIndex < d1Datasets.length; datasetIndex++) {
Writer out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(datasetsPaths[datasetIndex] + "blocks.txt"), "UTF-8"));
Preprocessing preprocessing = new Preprocessing(datasetsPaths[datasetIndex] + d1Datasets[datasetIndex],
datasetsPaths[datasetIndex] + d2Datasets[datasetIndex]);
final List<AbstractBlock> valueBlocks = preprocessing.getPurgedBlocks(new RexaDBLPTokenBlocking());
int index = 0;
for (AbstractBlock block : valueBlocks) {
index++;
out.write(index + "\t");
BilateralBlock bBlock = (BilateralBlock) block;
for (int entityId : bBlock.getIndex1Entities()) {
out.write(entityId + VALUE_DELIMITER);
}
out.write(SEGMENT_DELIMITER);
for (int entityId : bBlock.getIndex2Entities()) {
int newId = -entityId - 1;
out.write(newId + VALUE_DELIMITER);
}
out.write("\n");
}
out.close();
}
}
}