Skip to content

Commit

Permalink
fuzzy completion wip
Browse files Browse the repository at this point in the history
  • Loading branch information
cichli committed Dec 31, 2014
1 parent e32d952 commit 352f42c
Show file tree
Hide file tree
Showing 8 changed files with 295 additions and 10 deletions.
2 changes: 2 additions & 0 deletions project.clj
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
:license {:name "Eclipse Public License"
:url "http://www.eclipse.org/legal/epl-v10.html"}
:dependencies []
:source-paths ["src/clj"]
:java-source-paths ["src/java"]
:profiles {:dev {:dependencies [[org.clojure/clojure "1.5.1"]
[org.clojure/clojurescript "0.0-2202"]
[org.clojure/core.async "0.1.303.0-886421-alpha"]
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
(ns cljs-tooling.complete
"Standalone auto-complete library based on cljs analyzer state"
(:require [cljs-tooling.util.analysis :as a]
[cljs-tooling.util.misc :as u]))
[cljs-tooling.util.misc :as u])
(:import [cljs_tooling DamerauLevenshtein LiquidMetal]))

;;; TODO
(defn ns-classes
Expand Down Expand Up @@ -61,14 +62,47 @@
(scoped-completions env sym context-ns)
(unscoped-completions env context-ns)))

(def ^:private damerau-levenshtein-scorer (cljs_tooling.DamerauLevenshtein. 1 1 1 1))

(defn- damerau-levenshtein-score
[source target]
(.execute damerau-levenshtein-scorer source target))

(defn damerau-levenshtein-match?
[name prefix]
(when (<= (.length prefix) (.length name))
(let [name (subs name 0 (.length prefix))
threshold (java.lang.Math/ceil (/ (.length prefix) 4))]
(>= threshold (damerau-levenshtein-score name prefix)))))

(defn liquidmetal-match?
[name prefix]
(<= 0.5 (cljs_tooling.LiquidMetal/score name prefix)))

(defn prefix-match?
[name prefix]
(.startsWith name prefix))

(defn match?
[name prefix]
(or (prefix-match? name prefix)
(damerau-levenshtein-match? name prefix)
(liquidmetal-match? name prefix)))

(defn completions
"Return a sequence of matching completions given current namespace and a prefix string"
([env prefix] (completions env prefix nil))
([env prefix context-ns]
(->> (potential-completions env (u/as-sym prefix) (u/as-sym context-ns))
distinct
(map str)
(filter #(.startsWith % prefix))
sort)))

([env prefix context-ns] (completions env prefix context-ns match?))
([env prefix context-ns match-fn]
(->> (potential-completions env (u/as-sym prefix) (u/as-sym context-ns))
distinct
(map str)
(filter #(match-fn % prefix))
sort
doall)))

(defn completions-test
[env prefix context-ns]
(doseq [candidate (potential-completions env (u/as-sym prefix) (u/as-sym context-ns))]
(when (damerau-levenshtein-match? (str candidate) prefix)
(println candidate (damerau-levenshtein-score (str candidate) prefix)))))
File renamed without changes.
File renamed without changes.
File renamed without changes.
160 changes: 160 additions & 0 deletions src/java/cljs_tooling/DamerauLevenshtein.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
// Adapted from https://github.com/KevinStern/software-and-algorithms/blob/master/src/main/java/blogspot/software_and_algorithms/stern_library/string/DamerauLevenshteinAlgorithm.java

package cljs_tooling;

import java.util.HashMap;
import java.util.Map;

/* Copyright (c) 2012 Kevin L. Stern
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/

/**
* The Damerau-Levenshtein Algorithm is an extension to the Levenshtein
* Algorithm which solves the edit distance problem between a source string and
* a target string with the following operations:
*
* <ul>
* <li>Character Insertion</li>
* <li>Character Deletion</li>
* <li>Character Replacement</li>
* <li>Adjacent Character Swap</li>
* </ul>
*
* Note that the adjacent character swap operation is an edit that may be
* applied when two adjacent characters in the source string match two adjacent
* characters in the target string, but in reverse order, rather than a general
* allowance for adjacent character swaps.
* <p>
*
* This implementation allows the client to specify the costs of the various
* edit operations with the restriction that the cost of two swap operations
* must not be less than the cost of a delete operation followed by an insert
* operation. This restriction is required to preclude two swaps involving the
* same character being required for optimality which, in turn, enables a fast
* dynamic programming solution.
* <p>
*
* The running time of the Damerau-Levenshtein algorithm is O(n*m) where n is
* the length of the source string and m is the length of the target string.
* This implementation consumes O(n*m) space.
*
* @author Kevin L. Stern
*/
public class DamerauLevenshtein {
private final int deleteCost, insertCost, replaceCost, swapCost;

/**
* Constructor.
*
* @param deleteCost
* the cost of deleting a character.
* @param insertCost
* the cost of inserting a character.
* @param replaceCost
* the cost of replacing a character.
* @param swapCost
* the cost of swapping two adjacent characters.
*/
public DamerauLevenshtein(int deleteCost, int insertCost,
int replaceCost, int swapCost) {
/*
* Required to facilitate the premise to the algorithm that two swaps of the
* same character are never required for optimality.
*/
if (2 * swapCost < insertCost + deleteCost) {
throw new IllegalArgumentException("Unsupported cost assignment");
}
this.deleteCost = deleteCost;
this.insertCost = insertCost;
this.replaceCost = replaceCost;
this.swapCost = swapCost;
}

/**
* Compute the Damerau-Levenshtein distance between the specified source
* string and the specified target string.
*/
public int execute(String source, String target) {
if (source.length() == 0) {
return target.length() * insertCost;
}
if (target.length() == 0) {
return source.length() * deleteCost;
}
int[][] table = new int[source.length()][target.length()];
Map<Character, Integer> sourceIndexByCharacter = new HashMap<Character, Integer>();
if (source.charAt(0) != target.charAt(0)) {
table[0][0] = Math.min(replaceCost, deleteCost + insertCost);
}
sourceIndexByCharacter.put(source.charAt(0), 0);
for (int i = 1; i < source.length(); i++) {
int deleteDistance = table[i - 1][0] + deleteCost;
int insertDistance = (i + 1) * deleteCost + insertCost;
int matchDistance = i * deleteCost
+ (source.charAt(i) == target.charAt(0) ? 0 : replaceCost);
table[i][0] = Math.min(Math.min(deleteDistance, insertDistance),
matchDistance);
}
for (int j = 1; j < target.length(); j++) {
int deleteDistance = (j + 1) * insertCost + deleteCost;
int insertDistance = table[0][j - 1] + insertCost;
int matchDistance = j * insertCost
+ (source.charAt(0) == target.charAt(j) ? 0 : replaceCost);
table[0][j] = Math.min(Math.min(deleteDistance, insertDistance),
matchDistance);
}
for (int i = 1; i < source.length(); i++) {
int maxSourceLetterMatchIndex = source.charAt(i) == target.charAt(0) ? 0
: -1;
for (int j = 1; j < target.length(); j++) {
Integer candidateSwapIndex = sourceIndexByCharacter.get(target
.charAt(j));
int jSwap = maxSourceLetterMatchIndex;
int deleteDistance = table[i - 1][j] + deleteCost;
int insertDistance = table[i][j - 1] + insertCost;
int matchDistance = table[i - 1][j - 1];
if (source.charAt(i) != target.charAt(j)) {
matchDistance += replaceCost;
} else {
maxSourceLetterMatchIndex = j;
}
int swapDistance;
if (candidateSwapIndex != null && jSwap != -1) {
int iSwap = candidateSwapIndex;
int preSwapCost;
if (iSwap == 0 && jSwap == 0) {
preSwapCost = 0;
} else {
preSwapCost = table[Math.max(0, iSwap - 1)][Math.max(0, jSwap - 1)];
}
swapDistance = preSwapCost + (i - iSwap - 1) * deleteCost
+ (j - jSwap - 1) * insertCost + swapCost;
} else {
swapDistance = Integer.MAX_VALUE;
}
table[i][j] = Math.min(Math.min(Math
.min(deleteDistance, insertDistance), matchDistance), swapDistance);
}
sourceIndexByCharacter.put(source.charAt(i), i);
}
return table[source.length() - 1][target.length() - 1];
}
}
88 changes: 88 additions & 0 deletions src/java/cljs_tooling/LiquidMetal.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
// Adapted from https://gist.github.com/rmsy/5137611

/*
* Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/

package cljs_tooling;

import java.util.Collection;

public class LiquidMetal {
public static final double SCORE_NO_MATCH = 0.0;
public static final double SCORE_MATCH = 1.0;
public static final double SCORE_TRAILING = 0.8;
public static final double SCORE_TRAILING_BUT_STARTED = 0.9;
public static final double SCORE_BUFFER = 0.85;
public static final String WORD_SEPARATORS = " \t_-./";

public static final double score(String string, String abbreviation) {
if (abbreviation.length() == 0) return SCORE_TRAILING;
if (abbreviation.length() > string.length()) return SCORE_NO_MATCH;

double[] scores = buildScoreArray(string, abbreviation);

// complete miss:
if (scores == null) return SCORE_NO_MATCH;

double sum = 0.0;
for (double score : scores) {
sum += score;
}

return (sum / scores.length);
}

private static final double[] buildScoreArray(String string, String abbreviation) {
double[] scores = new double[string.length()];
String lower = string.toLowerCase();
String chars = abbreviation.toLowerCase();

int lastIndex = -1;
boolean started = false;
for (int i = 0; i < chars.length(); i++) {
char c = chars.charAt(i);
int index = lower.indexOf(c, lastIndex + 1);

if (index == -1) return null; // signal no match
if (index == 0) started = true;

if (isNewWord(string, index)) {
scores[index - 1] = 1.0;
fillArray(scores, SCORE_BUFFER, lastIndex + 1, index - 1);
} else if (isUpperCase(string, index)) {
fillArray(scores, SCORE_BUFFER, lastIndex + 1, index);
} else {
fillArray(scores, SCORE_NO_MATCH, lastIndex + 1, index);
}

scores[index] = SCORE_MATCH;
lastIndex = index;
}

double trailingScore = started ? SCORE_TRAILING_BUT_STARTED : SCORE_TRAILING;
fillArray(scores, trailingScore, lastIndex + 1, scores.length);
return scores;
}

private static final boolean isNewWord(String string, int index) {
if (index == 0) return false;
char c = string.charAt(index);
return WORD_SEPARATORS.indexOf(c) != -1;
}

private static final void fillArray(double[] array, double value, int from, int to) {
for (int i = from; i < to; i++) {
array[i] = value;
}
}

private static final boolean isUpperCase(String string, int index) {
char c = string.charAt(index);
return ('A' <= c && c <= 'Z');
}
}
5 changes: 3 additions & 2 deletions test/cljs_tooling/test_complete.clj
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,9 @@
(use-fixtures :once test-env/wrap-test-env)

(defn completions
[& args]
(apply cc/completions test-env/*env* args))
([prefix] (cc/completions test-env/*env* prefix nil cc/prefix-match?))
([prefix context-ns] (cc/completions test-env/*env* prefix context-ns cc/prefix-match?))
([prefix context-ns match-fn] (cc/completions test-env/*env* prefix context-ns match-fn)))

(deftest namespace-completions
(testing "Namespace"
Expand Down

0 comments on commit 352f42c

Please sign in to comment.