diff --git a/src/Support/EdgeNgramTokenizer.php b/src/Support/EdgeNgramTokenizer.php index 2cd6858..c29ee7f 100644 --- a/src/Support/EdgeNgramTokenizer.php +++ b/src/Support/EdgeNgramTokenizer.php @@ -13,7 +13,7 @@ public function tokenize($text, $stopwords = []) $splits = preg_split($this->getPattern(), $text, -1, PREG_SPLIT_NO_EMPTY); foreach ($splits as $split) { - for ($i = 2; $i <= strlen($split); $i++) { + for ($i = 2; $i <= mb_strlen($split); $i++) { $ngrams[] = mb_substr($split, 0, $i); } }