From 27fcdfcd80122dc3fb1ff5f1bc9b68e7b826db63 Mon Sep 17 00:00:00 2001 From: Titus Wormer Date: Tue, 26 Apr 2016 20:00:33 +0200 Subject: [PATCH] Add more conformant unicode handling This update ensures emoji are correctly stripped (GH-4), and non-Latin characters are not incorrectly lower-cased (GH-6), and adds a bunch of (verified) test cases to ensure the future holds less broken builds. Closes GH-4. Closes GH-5. Closes GH-8. --- index.js | 15 +++++-- package.json | 7 ++++ test/index.js | 108 ++++++++++++++++++++++++++++++++++++++++++++++---- 3 files changed, 118 insertions(+), 12 deletions(-) diff --git a/index.js b/index.js index 8ad871c..66c3f64 100644 --- a/index.js +++ b/index.js @@ -1,3 +1,5 @@ +var emoji = require('emoji-regex') + module.exports = BananaSlug function BananaSlug () { @@ -42,14 +44,19 @@ BananaSlug.prototype.reset = function () { var whitespace = /\s/g +function lower (string) { + return string.toLowerCase() +} + function slugger (string) { var re = /[\u2000-\u206F\u2E00-\u2E7F\\'!"#$%&()*+,.\/:;<=>?@\[\]^`{|}~]/g var maintainCase = false var replacement = '-' - var result if (typeof string !== 'string') return '' - if (!maintainCase) string = string.toLowerCase() - result = string.trim().replace(re, '').replace(whitespace, replacement) - return result + if (!maintainCase) string = string.replace(/[A-Z]+/g, lower) + return string.trim() + .replace(re, '') + .replace(emoji(), '') + .replace(whitespace, replacement) } diff --git a/package.json b/package.json index cfc03c1..b79f07a 100644 --- a/package.json +++ b/package.json @@ -3,9 +3,16 @@ "description": "Generate a slug just like GitHub does for markdown headings.", "version": "1.1.0", "author": "Dan Flettre ", + "contributors": [ + "Dan Flettre ", + "Titus Wormer (http://wooorm.com)" + ], "bugs": { "url": "https://github.com/Flet/github-slugger/issues" }, + "dependencies": { + "emoji-regex": "^6.0.0" + }, "devDependencies": { "standard": "*", "tap-spec": "^4.0.2", diff --git a/test/index.js b/test/index.js index 81d90b5..65e2777 100644 --- a/test/index.js +++ b/test/index.js @@ -18,7 +18,7 @@ test('github test cases', function (t) { var slugger = new GithubSlugger() testCases.forEach(function (test) { - t.equals(test.slug, slugger.slug(test.text), test.mesg) + t.equals(slugger.slug(test.text), test.slug, test.mesg) }) t.end() }) @@ -71,22 +71,114 @@ var testCases = [ }, { mesg: 'deals with duplicates correctly', - text: 'duplicate', - slug: 'duplicate' + text: 'duplicates', + slug: 'duplicates' }, { mesg: 'deals with duplicates correctly-1', - text: 'duplicate', - slug: 'duplicate-1' + text: 'duplicates', + slug: 'duplicates-1' }, { mesg: 'deals with duplicates correctly-2', - text: 'duplicate', - slug: 'duplicate-2' + text: 'duplicates', + slug: 'duplicates-2' }, { mesg: 'deals with non-latin chars', text: 'Привет', - slug: 'привет' + slug: 'Привет' + }, + // https://github.com/wooorm/gh-and-npm-slug-generation + { + mesg: 'gh-and-npm-slug-generation-1', + text: 'I ♥ unicode', + slug: 'i--unicode' + }, + { + mesg: 'gh-and-npm-slug-generation-2', + text: 'Dash-dash', + slug: 'dash-dash' + }, + { + mesg: 'gh-and-npm-slug-generation-3', + text: 'en–dash!', + slug: 'endash' + }, + { + mesg: 'gh-and-npm-slug-generation-4', + text: 'em–dash', + slug: 'emdash' + }, + { + mesg: 'gh-and-npm-slug-generation-5', + text: '😄 unicode emoji', + slug: '-unicode-emoji' + }, + { + mesg: 'gh-and-npm-slug-generation-6', + text: '😄-😄 unicode emoji', + slug: '--unicode-emoji' + }, + { + mesg: 'gh-and-npm-slug-generation-7', + text: '😄_😄 unicode emoji', + slug: '_-unicode-emoji' + }, + { + mesg: 'gh-and-npm-slug-generation-8', + text: '😄 - an emoji', + slug: '---an-emoji' + }, + { + mesg: 'gh-and-npm-slug-generation-9', + text: ':smile: - a gemoji', + slug: 'smile---a-gemoji' + }, + { + mesg: 'gh-and-npm-slug-generation-10', + text: ' Initial spaces', + slug: 'initial-spaces' + }, + { + mesg: 'gh-and-npm-slug-generation-11', + text: 'Final spaces ', + slug: 'final-spaces' + }, + { + mesg: 'gh-and-npm-slug-generation-12', + text: 'duplicate', + slug: 'duplicate' + }, + { + mesg: 'gh-and-npm-slug-generation-13', + text: 'duplicate', + slug: 'duplicate-1' + }, + { + mesg: 'gh-and-npm-slug-generation-14', + text: 'Привет non-latin 你好', + slug: 'Привет-non-latin-你好' + }, + // https://github.com/chrisdickinson/emoji-slug-example + { + mesg: 'emoji-slug-example-1', + text: ':ok: No underscore', + slug: 'ok-no-underscore' + }, + { + mesg: 'emoji-slug-example-2', + text: ':ok_hand: Single', + slug: 'ok_hand-single' + }, + { + mesg: 'emoji-slug-example-3', + text: ':ok_hand::hatched_chick: Two in a row with no spaces', + slug: 'ok_handhatched_chick-two-in-a-row-with-no-spaces' + }, + { + mesg: 'emoji-slug-example-4', + text: ':ok_hand: :hatched_chick: Two in a row', + slug: 'ok_hand-hatched_chick-two-in-a-row' } ]