This repository has been archived by the owner on Dec 23, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathMachine.scala
126 lines (100 loc) · 4.04 KB
/
Machine.scala
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
//Machine.scala
//This is the machine to be included in the main program, that does calculations and matching
import IO.File
import scala.collection.mutable.ArrayBuffer
trait Machine {
//ABSTRACT: Returns true if source(i) matches with comparator(j)
def matchRow(i: Int, j: Int): Boolean
//For the ith row in source, this returns a List of the matching indices in comparator
def rowInComparator(i: Int): List[Int]
//For the source, this returns a mapping from source row to it's matches
def calculateMatchingRows
//returns a list of indices for source where that row has at least one match in the comparator
def calculateRowsWithMatches
}
class RunableMachine(source: File, comparator: File) extends Machine {
var arrayOfMatches = new Array[List[Int]](source.rowCount);
var rowsWithMatches: List[Int] = Nil
calculateMatchingRows
calculateRowsWithMatches
//ABSTRACT: Returns true if source(i) matches with comparator(j)
def matchRow(i: Int, j: Int): Boolean = false
//For the ith row in source, this returns a List of the matching indices in comparator
def rowInComparator(i: Int): List[Int] = {
var matches: List[Int] = Nil
for (j <- 0 until comparator.rowCount)
if (matchRow(i, j)){
matches = j :: matches
}
matches.reverse
}
//For the source, this returns a mapping from source row to it's matches
def calculateMatchingRows = {
for (i <- 0 until source.rowCount)
arrayOfMatches(i) = rowInComparator(i)
}
//returns a list of indices for source where that row has at least one match in the comparator
def calculateRowsWithMatches = {
rowsWithMatches = Nil
for (i <- 0 until source.rowCount)
if (arrayOfMatches(i).length > 0)
rowsWithMatches = rowsWithMatches :+ i
}
}
//The implemented machine to be used
class PercentageEqualityMachine(source: File, comparator: File, percentage: Int) extends RunableMachine(source, comparator) {
//Implements matching with equality of all rows
override def matchRow(i: Int, j: Int): Boolean = {
var count: Int = 0;
val threshold: Int = math.ceil((percentage.toFloat/100)*source.columnCount).toInt
var k: Int = 0;
var l: Int = 0;
var matchFound: Boolean = false;
while ((count < threshold) && (k < source.columnCount)){
matchFound = false;
l = 0;
while ((!matchFound) && (l < comparator.columnCount)){
if (source.row(i)(k) == comparator.row(j)(l)){
matchFound = true
count += 1
}
l += 1
}
k += 1
}
return (count >= threshold)
}
}
class DistributionMachine(source: File, comparator: File) extends RunableMachine(source, comparator) {
override def matchRow(i: Int, j: Int): Boolean = {
val row1 = source.row(i) ;
val row2 = comparator.row(j)
var sourceD = new Array[Int](source.columnCount)
var compD = new Array[Int](comparator.columnCount)
//println(sourceD.length.toString() ++ "is the array length and row's length is" ++ row1.length.toString())
calcD(row1,sourceD) ; calcD(row2,compD)
var matched = true
for (i <- 0 until sourceD.length) { if (sourceD(i) != compD(i)) { matched = false } }
return matched
}
def calcD(arow: ArrayBuffer[String], dis: Array[Int]) = {
//println("the distribution length is" ++ dis.length.toString())
var l = 0 ; var k = 0
while (l < dis.length) {
var updated = false
//println("Going to update row with element" ++ l.toString())
/*for (p <- 0 to l) {
if (arow.apply(p) == arow.apply(l)) { dis(l) = dis(p) ; updated = true}
}
if (!updated) {dis(l) = k ; k += 1}*/
val index = arow.indexOf(arow.apply(l),0)
if (index == l) { dis(l) = k ; k += 1}
else { dis(l) = dis(index) }
l += 1
}
}
}
//The implemented machine to be used
class EqualityMachine(source: File, comparator: File) extends PercentageEqualityMachine(source, comparator,100)
//The implemented machine to be used
class SingleEqualityMachine(source: File, comparator: File) extends PercentageEqualityMachine(source, comparator,(100/source.columnCount).toInt)