From 379d707969cdccc3f61a2f9c80f09deda744b5ed Mon Sep 17 00:00:00 2001 From: bvaughn Date: Sun, 2 Aug 2015 10:45:32 -0700 Subject: [PATCH 1/2] Resolves #169. Convert stop words filter from SortedSet/Array to Object. All stop words tests pass after this change. --- lib/stop_word_filter.js | 249 ++++++++++++++++++++-------------------- 1 file changed, 123 insertions(+), 126 deletions(-) diff --git a/lib/stop_word_filter.js b/lib/stop_word_filter.js index 285f94c6..ef97e8f5 100644 --- a/lib/stop_word_filter.js +++ b/lib/stop_word_filter.js @@ -16,132 +16,129 @@ * @see lunr.Pipeline */ lunr.stopWordFilter = function (token) { - if (lunr.stopWordFilter.stopWords.indexOf(token) === -1) return token + if (token && !lunr.stopWordFilter.stopWords[token]) return token; } -lunr.stopWordFilter.stopWords = new lunr.SortedSet -lunr.stopWordFilter.stopWords.length = 119 -lunr.stopWordFilter.stopWords.elements = [ - "", - "a", - "able", - "about", - "across", - "after", - "all", - "almost", - "also", - "am", - "among", - "an", - "and", - "any", - "are", - "as", - "at", - "be", - "because", - "been", - "but", - "by", - "can", - "cannot", - "could", - "dear", - "did", - "do", - "does", - "either", - "else", - "ever", - "every", - "for", - "from", - "get", - "got", - "had", - "has", - "have", - "he", - "her", - "hers", - "him", - "his", - "how", - "however", - "i", - "if", - "in", - "into", - "is", - "it", - "its", - "just", - "least", - "let", - "like", - "likely", - "may", - "me", - "might", - "most", - "must", - "my", - "neither", - "no", - "nor", - "not", - "of", - "off", - "often", - "on", - "only", - "or", - "other", - "our", - "own", - "rather", - "said", - "say", - "says", - "she", - "should", - "since", - "so", - "some", - "than", - "that", - "the", - "their", - "them", - "then", - "there", - "these", - "they", - "this", - "tis", - "to", - "too", - "twas", - "us", - "wants", - "was", - "we", - "were", - "what", - "when", - "where", - "which", - "while", - "who", - "whom", - "why", - "will", - "with", - "would", - "yet", - "you", - "your" -] +lunr.stopWordFilter.stopWords = { + a: true, + able: true, + about: true, + across: true, + after: true, + all: true, + almost: true, + also: true, + am: true, + among: true, + an: true, + and: true, + any: true, + are: true, + as: true, + at: true, + be: true, + because: true, + been: true, + but: true, + by: true, + can: true, + cannot: true, + could: true, + dear: true, + did: true, + do: true, + does: true, + either: true, + else: true, + ever: true, + every: true, + for: true, + from: true, + get: true, + got: true, + had: true, + has: true, + have: true, + he: true, + her: true, + hers: true, + him: true, + his: true, + how: true, + however: true, + i: true, + if: true, + in: true, + into: true, + is: true, + it: true, + its: true, + just: true, + least: true, + let: true, + like: true, + likely: true, + may: true, + me: true, + might: true, + most: true, + must: true, + my: true, + neither: true, + no: true, + nor: true, + not: true, + of: true, + off: true, + often: true, + on: true, + only: true, + or: true, + other: true, + our: true, + own: true, + rather: true, + said: true, + say: true, + says: true, + she: true, + should: true, + since: true, + so: true, + some: true, + than: true, + that: true, + the: true, + their: true, + them: true, + then: true, + there: true, + these: true, + they: true, + this: true, + tis: true, + to: true, + too: true, + twas: true, + us: true, + wants: true, + was: true, + we: true, + were: true, + what: true, + when: true, + where: true, + which: true, + while: true, + who: true, + whom: true, + why: true, + will: true, + with: true, + would: true, + yet: true, + you: true, + your: true +} -lunr.Pipeline.registerFunction(lunr.stopWordFilter, 'stopWordFilter') +lunr.Pipeline.registerFunction(lunr.stopWordFilter, 'stopWordFilter') \ No newline at end of file From 9dbfeca60cb4513a9f1f6f9c2d4e8b9232670eb9 Mon Sep 17 00:00:00 2001 From: bvaughn Date: Mon, 3 Aug 2015 18:39:07 -0700 Subject: [PATCH 2/2] Updated stop words filter to avoid collisions with Object.prototype properties. Added additional unit test. --- lib/stop_word_filter.js | 240 +++++++++++++++++----------------- test/stop_word_filter_test.js | 8 ++ 2 files changed, 128 insertions(+), 120 deletions(-) diff --git a/lib/stop_word_filter.js b/lib/stop_word_filter.js index ef97e8f5..5aabd467 100644 --- a/lib/stop_word_filter.js +++ b/lib/stop_word_filter.js @@ -16,129 +16,129 @@ * @see lunr.Pipeline */ lunr.stopWordFilter = function (token) { - if (token && !lunr.stopWordFilter.stopWords[token]) return token; + if (token && lunr.stopWordFilter.stopWords[token] !== token) return token; } lunr.stopWordFilter.stopWords = { - a: true, - able: true, - about: true, - across: true, - after: true, - all: true, - almost: true, - also: true, - am: true, - among: true, - an: true, - and: true, - any: true, - are: true, - as: true, - at: true, - be: true, - because: true, - been: true, - but: true, - by: true, - can: true, - cannot: true, - could: true, - dear: true, - did: true, - do: true, - does: true, - either: true, - else: true, - ever: true, - every: true, - for: true, - from: true, - get: true, - got: true, - had: true, - has: true, - have: true, - he: true, - her: true, - hers: true, - him: true, - his: true, - how: true, - however: true, - i: true, - if: true, - in: true, - into: true, - is: true, - it: true, - its: true, - just: true, - least: true, - let: true, - like: true, - likely: true, - may: true, - me: true, - might: true, - most: true, - must: true, - my: true, - neither: true, - no: true, - nor: true, - not: true, - of: true, - off: true, - often: true, - on: true, - only: true, - or: true, - other: true, - our: true, - own: true, - rather: true, - said: true, - say: true, - says: true, - she: true, - should: true, - since: true, - so: true, - some: true, - than: true, - that: true, - the: true, - their: true, - them: true, - then: true, - there: true, - these: true, - they: true, - this: true, - tis: true, - to: true, - too: true, - twas: true, - us: true, - wants: true, - was: true, - we: true, - were: true, - what: true, - when: true, - where: true, - which: true, - while: true, - who: true, - whom: true, - why: true, - will: true, - with: true, - would: true, - yet: true, - you: true, - your: true + a: 'a', + able: 'able', + about: 'about', + across: 'across', + after: 'after', + all: 'all', + almost: 'almost', + also: 'also', + am: 'am', + among: 'among', + an: 'an', + and: 'and', + any: 'any', + are: 'are', + as: 'as', + at: 'at', + be: 'be', + because: 'because', + been: 'been', + but: 'but', + by: 'by', + can: 'can', + cannot: 'cannot', + could: 'could', + dear: 'dear', + did: 'did', + do: 'do', + does: 'does', + either: 'either', + else: 'else', + ever: 'ever', + every: 'every', + for: 'for', + from: 'from', + get: 'get', + got: 'got', + had: 'had', + has: 'has', + have: 'have', + he: 'he', + her: 'her', + hers: 'hers', + him: 'him', + his: 'his', + how: 'how', + however: 'however', + i: 'i', + if: 'if', + in: 'in', + into: 'into', + is: 'is', + it: 'it', + its: 'its', + just: 'just', + least: 'least', + let: 'let', + like: 'like', + likely: 'likely', + may: 'may', + me: 'me', + might: 'might', + most: 'most', + must: 'must', + my: 'my', + neither: 'neither', + no: 'no', + nor: 'nor', + not: 'not', + of: 'of', + off: 'off', + often: 'often', + on: 'on', + only: 'only', + or: 'or', + other: 'other', + our: 'our', + own: 'own', + rather: 'rather', + said: 'said', + say: 'say', + says: 'says', + she: 'she', + should: 'should', + since: 'since', + so: 'so', + some: 'some', + than: 'than', + that: 'that', + the: 'the', + their: 'their', + them: 'them', + then: 'then', + there: 'there', + these: 'these', + they: 'they', + this: 'this', + tis: 'tis', + to: 'to', + too: 'too', + twas: 'twas', + us: 'us', + wants: 'wants', + was: 'was', + we: 'we', + were: 'were', + what: 'what', + when: 'when', + where: 'where', + which: 'which', + while: 'while', + who: 'who', + whom: 'whom', + why: 'why', + will: 'will', + with: 'with', + would: 'would', + yet: 'yet', + you: 'you', + your: 'your' } lunr.Pipeline.registerFunction(lunr.stopWordFilter, 'stopWordFilter') \ No newline at end of file diff --git a/test/stop_word_filter_test.js b/test/stop_word_filter_test.js index 8713cb96..00b6d11e 100644 --- a/test/stop_word_filter_test.js +++ b/test/stop_word_filter_test.js @@ -16,6 +16,14 @@ test('non stop words pass through', function () { }) }) +test('should not filter Object.prototype terms', function () { + var nonStopWords = ['constructor', 'hasOwnProperty', 'toString', 'valueOf'] + + nonStopWords.forEach(function (word) { + equal(lunr.stopWordFilter(word), word) + }) +}) + test('should be registered with lunr.Pipeline', function () { equal(lunr.stopWordFilter.label, 'stopWordFilter') deepEqual(lunr.Pipeline.registeredFunctions['stopWordFilter'], lunr.stopWordFilter)