Skip to content

Commit

Permalink
Add missing Vietnamese diacritics (#46)
Browse files Browse the repository at this point in the history
  • Loading branch information
huy-vuong authored Jul 31, 2023
1 parent 365d297 commit 7bace5c
Show file tree
Hide file tree
Showing 2 changed files with 59 additions and 4 deletions.
58 changes: 57 additions & 1 deletion index.js
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@ var characterMap = {
"Ầ": "A",
"Ằ": "A",
"Ȃ": "A",
"Ả": "A",
"Ạ": "A",
"Ẩ": "A",
"Ẫ": "A",
"Ậ": "A",
"Ç": "C",
"Ḉ": "C",
"È": "E",
Expand All @@ -26,12 +31,20 @@ var characterMap = {
"Ḕ": "E",
"Ḝ": "E",
"Ȇ": "E",
"Ẻ": "E",
"Ẽ": "E",
"Ẹ": "E",
"Ể": "E",
"Ễ": "E",
"Ệ": "E",
"Ì": "I",
"Í": "I",
"Î": "I",
"Ï": "I",
"Ḯ": "I",
"Ȋ": "I",
"Ỉ": "I",
"Ị": "I",
"Ð": "D",
"Ñ": "N",
"Ò": "O",
Expand All @@ -44,10 +57,25 @@ var characterMap = {
"Ṍ": "O",
"Ṓ": "O",
"Ȏ": "O",
"Ỏ": "O",
"Ọ": "O",
"Ổ": "O",
"Ỗ": "O",
"Ộ": "O",
"Ờ": "O",
"Ở": "O",
"Ỡ": "O",
"Ớ": "O",
"Ợ": "O",
"Ù": "U",
"Ú": "U",
"Û": "U",
"Ü": "U",
"Ủ": "U",
"Ụ": "U",
"Ử": "U",
"Ữ": "U",
"Ự": "U",
"Ý": "Y",
"à": "a",
"á": "a",
Expand All @@ -64,6 +92,11 @@ var characterMap = {
"ầ": "a",
"ằ": "a",
"ȃ": "a",
"ả": "a",
"ạ": "a",
"ẩ": "a",
"ẫ": "a",
"ậ": "a",
"ç": "c",
"ḉ": "c",
"è": "e",
Expand All @@ -76,12 +109,20 @@ var characterMap = {
"ḕ": "e",
"ḝ": "e",
"ȇ": "e",
"ẻ": "e",
"ẽ": "e",
"ẹ": "e",
"ể": "e",
"ễ": "e",
"ệ": "e",
"ì": "i",
"í": "i",
"î": "i",
"ï": "i",
"ḯ": "i",
"ȋ": "i",
"ỉ": "i",
"ị": "i",
"ð": "d",
"ñ": "n",
"ò": "o",
Expand All @@ -94,10 +135,25 @@ var characterMap = {
"ṍ": "o",
"ṓ": "o",
"ȏ": "o",
"ỏ": "o",
"ọ": "o",
"ổ": "o",
"ỗ": "o",
"ộ": "o",
"ờ": "o",
"ở": "o",
"ỡ": "o",
"ớ": "o",
"ợ": "o",
"ù": "u",
"ú": "u",
"û": "u",
"ü": "u",
"ủ": "u",
"ụ": "u",
"ử": "u",
"ữ": "u",
"ự": "u",
"ý": "y",
"ÿ": "y",
"Ā": "A",
Expand Down Expand Up @@ -412,7 +468,7 @@ function matcher(match) {
return characterMap[match];
}

var removeAccents = function(string) {
var removeAccents = function(string) {
return string.replace(allAccents, matcher);
};

Expand Down
5 changes: 2 additions & 3 deletions test.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ var tape = require('tape');
var removeAccents = require('./');

tape('remove accents from string', function(t) {
var input = 'ÀÁÂÃÄÅẤẮÆẦẰÇḈÈÉÊËẾḖỀḔÌÍÎÏḮÐÑÒÓÔÕÖØỐṌṒÙÚÛÜÝàáâãäåấắæầằçḉèéêëếḗềḕìíîïḯñòóôõöøốṍṓùúûüýÿĀāĂ㥹ĆćĈĉĊċČčĎďĐđĒēĔĕĖėĘęĚěĜĝĞğĠġĢģǴǵĤĥĦħĨĩĪīĬĭĮįİıIJijĴĵĶķḰḱĹĺĻļĽľĿŀŁłḾḿŃńŅņŇňʼnŌōŎŏŐőŒœŔŕŖŗŘřŚśŜŝŞşŠšŢţŤťŦŧŨũŪūŬŭŮůŰűŲųŴŵẂẃŶŷŸŹźŻżŽžſƒƠơƯưǍǎǏǐǑǒǓǔǕǖǗǘǙǚǛǜỨứṸṹǺǻǼǽǾǿðÞþṔṕṤṥX́x́ЃѓЌќA̋a̋E̋e̋I̋i̋ǸǹỒồṐṑỪừẀẁỲỳȀȁȄȅȈȉȌȍȐȑȔȕẲẴẶḜẳẵặḝC̆c̆ḪḫK̆k̆M̆m̆N̆n̆P̆p̆R̆r̆T̆t̆V̆v̆X̆x̆Y̆y̆ȂȆȊȎȃȇȋȏȒȓȖȗșțȘȚB̌b̌F̌f̌ǦǧȞȟJ̌ǰǨǩM̌m̌P̌p̌Q̌q̌ṦṧV̌v̌W̌w̌X̌x̌Y̌y̌A̧a̧B̧b̧ḐḑȨȩƐ̧ɛ̧ḨḩI̧i̧Ɨ̧ɨ̧M̧m̧O̧o̧Q̧q̧U̧u̧X̧x̧Z̧z̧';
var input = 'ÀÁÂÃÄÅẤẮÆẦẰẢẠẨẪẬÇḈÈÉÊËẾḖỀḔẺẼẸỂỄỆÌÍÎÏḮỈỊÐÑÒÓÔÕÖØỐṌṒỎỌỔỖỘỜỞỠỚỢÙÚÛÜỦỤỬỮỰÝàáâãäåấắæầằảạẩẫậçḉèéêëếḗềḕẻẽẹểễệìíîïḯỉịñòóôõöøốṍṓỏọổỗộờởỡớợùúûüủụửữựýÿĀāĂ㥹ĆćĈĉĊċČčĎďĐđĒēĔĕĖėĘęĚěĜĝĞğĠġĢģǴǵĤĥĦħĨĩĪīĬĭĮįİıIJijĴĵĶķḰḱĹĺĻļĽľĿŀŁłḾḿŃńŅņŇňʼnŌōŎŏŐőŒœŔŕŖŗŘřŚśŜŝŞşŠšŢţŤťŦŧŨũŪūŬŭŮůŰűŲųŴŵẂẃŶŷŸŹźŻżŽžſƒƠơƯưǍǎǏǐǑǒǓǔǕǖǗǘǙǚǛǜỨứṸṹǺǻǼǽǾǿðÞþṔṕṤṥX́x́ЃѓЌќA̋a̋E̋e̋I̋i̋ǸǹỒồṐṑỪừẀẁỲỳȀȁȄȅȈȉȌȍȐȑȔȕẲẴẶḜẳẵặḝC̆c̆ḪḫK̆k̆M̆m̆N̆n̆P̆p̆R̆r̆T̆t̆V̆v̆X̆x̆Y̆y̆ȂȆȊȎȃȇȋȏȒȓȖȗșțȘȚB̌b̌F̌f̌ǦǧȞȟJ̌ǰǨǩM̌m̌P̌p̌Q̌q̌ṦṧV̌v̌W̌w̌X̌x̌Y̌y̌A̧a̧B̧b̧ḐḑȨȩƐ̧ɛ̧ḨḩI̧i̧Ɨ̧ɨ̧M̧m̧O̧o̧Q̧q̧U̧u̧X̧x̧Z̧z';
var output = removeAccents(input);
var expected = 'AAAAAAAAAEAACCEEEEEEEEIIIIIDNOOOOOOOOOUUUUYaaaaaaaaaeaacceeeeeeeeiiiiinooooooooouuuuyyAaAaAaCcCcCcCcDdDdEeEeEeEeEeGgGgGgGgGgHhHhIiIiIiIiIiIJijJjKkKkLlLlLlLlllMmNnNnNnnOoOoOoOEoeRrRrRrSsSsSsSsTtTtTtUuUuUuUuUuUuWwWwYyYZzZzZzsfOoUuAaIiOoUuUuUuUuUuUuUuAaAEaeOodTHthPpSsXxГгКкAaEeIiNnOoOoUuWwYyAaEeIiOoRrUuAAAEaaaeCcHhKkMmNnPpRrTtVvXxYyAEIOaeioRrUustSTBbFfGgHhJjKkMmPpQqSsVvWwXxYyAaBbDdEeEeHhIiIiMmOoQqUuXxZz';
var expected = 'AAAAAAAAAEAAAAAAACCEEEEEEEEEEEEEEIIIIIIIDNOOOOOOOOOOOOOOOOOOOUUUUUUUUUYaaaaaaaaaeaaaaaaacceeeeeeeeeeeeeeiiiiiiinooooooooooooooooooouuuuuuuuuyyAaAaAaCcCcCcCcDdDdEeEeEeEeEeGgGgGgGgGgHhHhIiIiIiIiIiIJijJjKkKkLlLlLlLlllMmNnNnNnnOoOoOoOEoeRrRrRrSsSsSsSsTtTtTtUuUuUuUuUuUuWwWwYyYZzZzZzsfOoUuAaIiOoUuUuUuUuUuUuUuAaAEaeOodTHthPpSsXxГгКкAaEeIiNnOoOoUuWwYyAaEeIiOoRrUuAAAEaaaeCcHhKkMmNnPpRrTtVvXxYyAEIOaeioRrUustSTBbFfGgHhJjKkMmPpQqSsVvWwXxYyAaBbDdEeEeHhIiIiMmOoQqUuXxZz';

t.same( output, expected );

Expand Down Expand Up @@ -54,4 +54,3 @@ tape('ß is not accented', function(t) {
t.same(removeAccents.remove('Straße'), 'Straße');
t.end();
});

0 comments on commit 7bace5c

Please sign in to comment.