Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

숫자가 종종 잘못된 문자로 매칭되는 버그 수정 #203

Merged
merged 3 commits into from
Nov 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 0 additions & 9 deletions src/KTrie.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -818,15 +818,6 @@ size_t kiwi::splitByTrie(
if (curNode->fail())
{
curNode = curNode->fail();
for (auto submatcher = curNode; submatcher; submatcher = submatcher->fail())
{
const Form* cand = submatcher->val(trie);
if (!cand) break;
else if (!trie.hasSubmatch(cand))
{
if (!insertCandidates(candidates, cand, formBase, typoPtrs, str, nonSpaces)) break;
}
}
nextNode = curNode->template nextOpt<arch>(trie, str[n + i]);
}
else
Expand Down
48 changes: 48 additions & 0 deletions test/test_cpp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -988,6 +988,12 @@ TEST(KiwiCpp, ZCoda)
TEST(KiwiCpp, ZSiot)
{
Kiwi& kiwi = reuseKiwiInstance();

auto resSplit = kiwi.analyze(u"찰랑찰랑한 머릿결과 볼륨감", Match::allWithNormalizing | Match::splitSaisiot);
EXPECT_EQ(resSplit.first.size(), 8);
EXPECT_EQ(resSplit.first[3].str, u"머리");
EXPECT_EQ(resSplit.first[4].tag, POSTag::z_siot);
EXPECT_EQ(resSplit.first[5].str, u"결");

for (auto s : {u"하굣길", u"만둣국", u"나뭇잎", u"세숫물", u"고춧가루", u"시곗바늘", u"사글셋방"})
{
Expand All @@ -1014,6 +1020,35 @@ TEST(KiwiCpp, ZSiot)
}
}

TEST(KiwiCpp, ZSiotWithTypo)
{
Kiwi kiwi = KiwiBuilder{ MODEL_PATH, 0, BuildOption::default_, }.build(getDefaultTypoSet(DefaultTypoSet::basicTypoSetWithContinual));

for (auto s : { u"하굣길", u"만둣국", u"나뭇잎", u"세숫물", u"고춧가루", u"시곗바늘", u"사글셋방" })
{
auto resNone = kiwi.analyze(s, Match::allWithNormalizing);
auto resSplit = kiwi.analyze(s, Match::allWithNormalizing | Match::splitSaisiot);
auto resMerge = kiwi.analyze(s, Match::allWithNormalizing | Match::mergeSaisiot);
EXPECT_FALSE(std::any_of(resNone.first.begin(), resNone.first.end(), [](const TokenInfo& token) { return token.tag == POSTag::z_siot; }));
EXPECT_EQ(resSplit.first.size(), 3);
EXPECT_EQ(resSplit.first[0].tag, POSTag::nng);
EXPECT_EQ(resSplit.first[1].tag, POSTag::z_siot);
EXPECT_EQ(resSplit.first[2].tag, POSTag::nng);
EXPECT_EQ(resMerge.first.size(), 1);
EXPECT_EQ(resMerge.first[0].tag, POSTag::nng);
}

for (auto s : { u"발렛 파킹", u"미닛" })
{
auto resNone = kiwi.analyze(s, Match::allWithNormalizing);
auto resSplit = kiwi.analyze(s, Match::allWithNormalizing | Match::splitSaisiot);
auto resMerge = kiwi.analyze(s, Match::allWithNormalizing | Match::mergeSaisiot);
EXPECT_EQ(resNone.second, resSplit.second);
EXPECT_EQ(resNone.second, resMerge.second);
EXPECT_FALSE(std::any_of(resSplit.first.begin(), resSplit.first.end(), [](const TokenInfo& token) { return token.tag == POSTag::z_siot; }));
}
}

TEST(KiwiCpp, AnalyzeWithWordPosition)
{
std::u16string testSentence = u"나 정말 배불렄ㅋㅋ";
Expand Down Expand Up @@ -1609,3 +1644,16 @@ TEST(KiwiCpp, IssueP172_LengthError)
auto res = kiwi.analyze(text, Match::allWithNormalizing).first;
EXPECT_GT(res.size(), 0);
}

TEST(KiwiCpp, IssueP189)
{
Kiwi& kiwi = reuseKiwiInstance();
auto res = kiwi.analyze(u"담아 1팩 무료", Match::allWithNormalizing).first;

EXPECT_EQ(res.size(), 5);
EXPECT_EQ(res[0].str, u"담");
EXPECT_EQ(res[1].str, u"어");
EXPECT_EQ(res[2].str, u"1");
EXPECT_EQ(res[3].str, u"팩");
EXPECT_EQ(res[4].str, u"무료");
}
Loading