-
-
Notifications
You must be signed in to change notification settings - Fork 50
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #172 from bab2min/dev_18
Prepare v0.18.0
- Loading branch information
Showing
22 changed files
with
265 additions
and
48 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,8 +2,8 @@ | |
* @file Form.h | ||
* @author bab2min ([email protected]) | ||
* @brief 형태 및 형태소에 관한 정보를 담는 구조체들이 선언된 헤더 | ||
* @version 0.17.0 | ||
* @date 2022-09-01 | ||
* @version 0.18.0 | ||
* @date 2024-07-01 | ||
* | ||
* | ||
*/ | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,8 +2,8 @@ | |
* @file Kiwi.h | ||
* @author bab2min ([email protected]) | ||
* @brief Kiwi C++ API를 담고 있는 헤더 파일 | ||
* @version 0.17.0 | ||
* @date 2022-09-01 | ||
* @version 0.18.0 | ||
* @date 2024-07-01 | ||
* | ||
* | ||
*/ | ||
|
@@ -650,7 +650,7 @@ namespace kiwi | |
* @param numThreads 모델 및 형태소 분석에 사용할 스레드 개수 | ||
* @param options 생성 옵션. `kiwi::BuildOption`을 참조 | ||
*/ | ||
KiwiBuilder(const std::string& modelPath, size_t numThreads = 0, BuildOption options = BuildOption::integrateAllomorph | BuildOption::loadDefaultDict, bool useSBG = false); | ||
KiwiBuilder(const std::string& modelPath, size_t numThreads = 0, BuildOption options = BuildOption::default_, bool useSBG = false); | ||
|
||
/** | ||
* @brief 현재 KiwiBuilder 객체가 유효한 분석 모델을 로딩한 상태인지 알려준다. | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
#pragma once | ||
|
||
#include <vector> | ||
#include <string> | ||
|
||
namespace kiwi | ||
{ | ||
std::vector<std::pair<std::u16string, size_t>> extractSubstrings( | ||
const char16_t* first, | ||
const char16_t* last, | ||
size_t minCnt, | ||
size_t minLength = 2, | ||
size_t maxLength = 32, | ||
bool longestOnly = true, | ||
char16_t stopChr = 0); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,8 +2,8 @@ | |
* @file SwTokenizer.h | ||
* @author bab2min ([email protected]) | ||
* @brief Subword Tokenizer | ||
* @version 0.16.1 | ||
* @date 2022-07-28 | ||
* @version 0.18.0 | ||
* @date 2024-07-01 | ||
* | ||
* | ||
*/ | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,8 +2,8 @@ | |
* @file Types.h | ||
* @author bab2min ([email protected]) | ||
* @brief Kiwi C++ API에 쓰이는 주요 타입들을 모아놓은 헤더 파일 | ||
* @version 0.17.0 | ||
* @date 2022-09-01 | ||
* @version 0.18.0 | ||
* @date 2024-07-01 | ||
* | ||
* | ||
*/ | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,8 +2,8 @@ | |
* @file capi.h | ||
* @author bab2min ([email protected]) | ||
* @brief Kiwi C API를 담고 있는 헤더 파일 | ||
* @version 0.17.0 | ||
* @date 2022-09-01 | ||
* @version 0.18.0 | ||
* @date 2024-07-01 | ||
* | ||
* | ||
*/ | ||
|
@@ -45,7 +45,11 @@ typedef struct { | |
uint32_t line_number; /**< 줄 번호*/ | ||
uint16_t length; /**< 길이(UTF16 문자 기준) */ | ||
uint8_t tag; /**< 품사 태그 */ | ||
uint8_t sense_id; /**< 의미 번호 */ | ||
union | ||
{ | ||
uint8_t sense_id; /**< 의미 번호 */ | ||
uint8_t script; /**< 유니코드 영역에 기반한 문자 타입 */ | ||
}; | ||
float score; /**< 해당 형태소의 언어모델 점수 */ | ||
float typo_cost; /**< 오타가 교정된 경우 오타 비용. 그렇지 않은 경우 0 */ | ||
uint32_t typo_form_id; /**< 교정 전 오타의 형태에 대한 정보 (typoCost가 0인 경우 의미 없음) */ | ||
|
@@ -1008,6 +1012,16 @@ DECL_DLL int kiwi_pt_add_token_to_span_w(kiwi_pretokenized_h handle, int span_id | |
*/ | ||
DECL_DLL int kiwi_pt_close(kiwi_pretokenized_h handle); | ||
|
||
/** | ||
* @brief `kiwi_token_info_t`의 `script`가 가리키는 문자 영역의 유니코드 상 이름을 반환합니다. | ||
* | ||
* @param script `kiwi_token_info_t`의 `script` 필드 값 | ||
* @return 유니코드 영역의 이름을 반환합니다. 알 수 없을 경우 "Unknown"을 반환합니다. | ||
* | ||
* @note 이 함수가 반환하는 값은 string literal이므로 별도로 해제할 필요가 없습니다. | ||
*/ | ||
DECL_DLL const char* kiwi_get_script_name(uint8_t script); | ||
|
||
#ifdef __cplusplus | ||
} | ||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
#include <kiwi/HSDataset.h> | ||
#include <kiwi/Dataset.h> | ||
#include "RaggedVector.hpp" | ||
|
||
using namespace kiwi; | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.