-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSpellcheck.java
232 lines (212 loc) · 7.37 KB
/
Spellcheck.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.Map;
import java.util.Scanner;
import java.util.Set;
public class Spellcheck {
private static final String dictionaryPath = "/usr/share/dict/words";
// Using a map allows us to group the original dictionary by the first
// letter.
private Map<Character, LinkedHashSet<String>> dictionary = new HashMap<Character, LinkedHashSet<String>>();
private Set<Character> vowels = new HashSet<Character>(5);
// Indicates the number of differences between the input word and the tested
// dictionary word.
private int differences;
private int minDifferences;
// Singleton istance.
private static Spellcheck instance = null;
public static Spellcheck getInstance() {
if (instance == null) {
instance = new Spellcheck();
}
return instance;
}
// Initialize vowel set and load dictionary.
private Spellcheck() {
Collections.addAll(vowels, 'a', 'e', 'i', 'o', 'u');
try {
loadDictionary();
} catch (IOException e) {
e.printStackTrace();
}
}
private void loadDictionary() throws IOException {
BufferedReader br = new BufferedReader(new FileReader(dictionaryPath));
String line;
// Use ASCII code to put each letter of the alphabet as a map key.
for (int i = 0; i < 26; i++) {
dictionary.put(((char) (i + 97)), new LinkedHashSet<String>());
}
// Fill the map values without case-sensitivity.
while ((line = br.readLine()) != null) {
dictionary.get(Character.toLowerCase(line.charAt(0))).add(line);
}
br.close();
}
// As no English word is using three identical letters in a row, correctSyntax replaces
// every triplet or more by a pair.
private String correctSyntax(String word) {
boolean isDouble = false;
char lastChar = ' ';
String cleanWord = "";
for (char c : word.toCharArray()) {
if (Character.toLowerCase(lastChar) == Character.toLowerCase(c) && isDouble)
lastChar = c;
else if (Character.toLowerCase(lastChar) == Character.toLowerCase(c)) {
isDouble = true;
cleanWord += c;
lastChar = c;
} else {
isDouble = false;
cleanWord += c;
lastChar = c;
}
}
return cleanWord;
}
// Browse the given dictionary and keep the best match between the input
// word and all dictionary words.
private String findBestMatch(String word, LinkedHashSet<String> dictionary) {
String bestMatch = null;
int errorsMin = -1;
for (String currentDictWord : dictionary) {
// Any dictionary word longer than the input word suggest an error
// case we dont deal with.
if (currentDictWord.length() <= word.length()) {
boolean match = matches(word, currentDictWord);
// Replace the old bestmatch by the new if it's better. A match
// is based on the number of differences between the two words.
if (match == true && (errorsMin == -1 || differences < errorsMin)) {
bestMatch = currentDictWord;
errorsMin = differences;
minDifferences = differences;
}
}
}
return bestMatch;
}
// Evaluates the differences between the two words considering every given
// combinations of conflicts.
public boolean matches(String word, String dictWord) {
int wordLength = word.length();
int dictWordLength = dictWord.length();
// Correspond to the given error cases to treat in the exercise.
int correctableErrors = 0;
int pairCounter = 0;
differences = 0;
int i;
char previousDictWordChar = ' ';
for (i = 0; i < wordLength; i++) {
// Allows to count only one difference if the character is an
// uppercase and not a vowel nor a part of a pair.
boolean upper = false;
char wordChar = word.charAt(i);
char dictWordChar;
if (i - pairCounter < dictWordLength) {
// Ignores the second character of a pair if pair is detected as
// a difference.
dictWordChar = dictWord.charAt(i - pairCounter);
} else {
// Allows to continue even if the dictionnary word is too small.
dictWordChar = dictWord.charAt(dictWordLength - 1);
}
if (wordChar != dictWordChar) {
// Checks separately if the character is uppercase because it
// can be both upper and vowel or part of a pair.
if (Character.toUpperCase(dictWordChar) == Character.toUpperCase(wordChar)) {
correctableErrors++;
differences++;
upper = true;
}
// Checks if the character is a vowel without case
// sensitivity.
if (vowels.contains(Character.toLowerCase(dictWordChar))
&& vowels.contains(Character.toLowerCase(wordChar))
&& Character.toLowerCase(dictWordChar)
!= Character.toLowerCase(wordChar)) {
if (i > 0
&& Character.toLowerCase(wordChar)
== Character.toLowerCase(word.charAt(i - 1))
&& Character.toLowerCase(wordChar)
== Character.toLowerCase(previousDictWordChar)
&& wordLength - i
!= dictWordLength - (i - pairCounter)) {
correctableErrors++;
differences++;
pairCounter++;
} else {
correctableErrors++;
differences++;
}
// Checks if the current difference is caused by a pair of
// the same letter.
} else if (i > 0
&& Character.toLowerCase(wordChar) == Character.toLowerCase(word.charAt(i - 1))
&& upper == false) {
pairCounter++;
if (i >= pairCounter && dictWordLength > i - pairCounter) {
if (Character.toLowerCase(wordChar) == Character.toLowerCase(dictWord.charAt(i - pairCounter))) {
correctableErrors++;
differences++;
}
}
} else if (upper == false) {
// If the difference is not managed.
differences++;
}
}
previousDictWordChar = dictWordChar;
}
// Counts additional letters between a possible match and the actual
// dictionary word as differences.
if (dictWordLength >= i - pairCounter) {
while (dictWordLength > i - pairCounter) {
differences++;
pairCounter--;
}
}
return correctableErrors == differences;
}
// Reduces the complexity by using only the needed dictionary part.
private String correct(String word) {
// If the first letter is a vowel the matching word could be found in
// each word of the dictionary starting with a vowel.
if (vowels.contains(Character.toLowerCase(word.charAt(0)))) {
int bestDifferences = -1;
String bestMatch = null;
for (Character c : vowels) {
String correctedWord = findBestMatch(word, dictionary.get(c));
// Uses the same system as in findBestMatch to keep only the
// best match.
if (correctedWord != null
&& (bestDifferences == -1 || minDifferences < bestDifferences))
bestMatch = correctedWord;
}
return bestMatch;
// Otherwise the only dictionary part to consider is the one of the
// first letter of our input word.
} else {
return findBestMatch(word,
dictionary.get(Character.toLowerCase(word.charAt(0))));
}
}
public static void main(String[] args) {
while (true) {
System.out.print(">");
Scanner user_input = new Scanner(System.in);
String word = user_input.next();
System.out.println(word);
String correctedWord = Spellcheck.getInstance().correctSyntax(word);
correctedWord = Spellcheck.getInstance().correct(correctedWord);
if (correctedWord == null) {
System.out.println("NO SUGGESTIONS");
} else
System.out.println(correctedWord);
}
}
}