Skip to content

Commit

Permalink
fixed corpus error
Browse files Browse the repository at this point in the history
  • Loading branch information
bab2min committed Jun 6, 2020
1 parent 286abda commit c05cdf0
Show file tree
Hide file tree
Showing 12 changed files with 143 additions and 48 deletions.
1 change: 1 addition & 0 deletions README.kr.rst
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,7 @@ tomotopy의 Python3 예제 코드는 https://github.com/bab2min/tomotopy/blob/ma
* `tomotopy.LDAModel.used_vocabs`, `tomotopy.LDAModel.used_vocab_freq`, `tomotopy.LDAModel.used_vocab_df` 등의 새로운 프로퍼티가 모델에 추가되었습니다.
* 새로운 토픽 모델인 g-DMR(`tomotopy.GDMRModel`)가 추가되었습니다.
* macOS에서 `tomotopy.label.FoRelevance`를 생성할 때 발생하던 문제가 해결되었습니다.
* `tomotopy.utils.Corpus.add_doc`로 `raw`가 없는 문헌을 생성한 뒤 토픽 모델에 입력할 시 발생하는 오류를 수정했습니다.

* 0.7.1 (2020-05-08)
* `tomotopy.HLDAModel`용으로 `tomotopy.Document.path`가 새로 추가되었습니다.
Expand Down
1 change: 1 addition & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,7 @@ History
* New properties including `tomotopy.LDAModel.used_vocabs`, `tomotopy.LDAModel.used_vocab_freq` and `tomotopy.LDAModel.used_vocab_df` were added into topic models.
* A new g-DMR topic model(`tomotopy.GDMRModel`) was added.
* An error at initializing `tomotopy.label.FoRelevance` in macOS was fixed.
* An error that occured when using `tomotopy.utils.Corpus` created without `raw` parameters was fixed.

* 0.7.1 (2020-05-08)
* `tomotopy.Document.path` was added for `tomotopy.HLDAModel`.
Expand Down
23 changes: 17 additions & 6 deletions src/python/py_DMR.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,14 +90,25 @@ static PyObject* DMR_addDoc_(TopicModelObject* self, PyObject* args, PyObject *k
auto* inst = static_cast<tomoto::IDMRModel*>(self->inst);
string raw;
if (argRaw) raw = argRaw;
if (argRaw && (!argStartPos || !argLength))
{
throw runtime_error{ "`start_pos` and `length` must be given when `raw` is given." };
}

vector<tomoto::Vid> words;
vector<uint32_t> startPos;
vector<uint16_t> length;

py::UniqueObj iter = PyObject_GetIter(argWords);
vector<tomoto::Vid> words = py::makeIterToVector<tomoto::Vid>(iter);
iter = PyObject_GetIter(argStartPos);
vector<uint32_t> startPos = py::makeIterToVector<uint32_t>(iter);
iter = PyObject_GetIter(argLength);
vector<uint16_t> length = py::makeIterToVector<uint16_t>(iter);
char2Byte(raw, startPos, length);
words = py::makeIterToVector<tomoto::Vid>(iter);
if (argStartPos)
{
iter = PyObject_GetIter(argStartPos);
startPos = py::makeIterToVector<uint32_t>(iter);
iter = PyObject_GetIter(argLength);
length = py::makeIterToVector<uint16_t>(iter);
char2Byte(raw, startPos, length);
}
auto ret = inst->addDoc(raw, words, startPos, length, { string{metadata} });
return py::buildPyValue(ret);
}
Expand Down
23 changes: 17 additions & 6 deletions src/python/py_DT.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,14 +97,25 @@ static PyObject* DT_addDoc_(TopicModelObject* self, PyObject* args, PyObject *kw
auto* inst = static_cast<tomoto::IDTModel*>(self->inst);
string raw;
if (argRaw) raw = argRaw;
if (argRaw && (!argStartPos || !argLength))
{
throw runtime_error{ "`start_pos` and `length` must be given when `raw` is given." };
}

vector<tomoto::Vid> words;
vector<uint32_t> startPos;
vector<uint16_t> length;

py::UniqueObj iter = PyObject_GetIter(argWords);
vector<tomoto::Vid> words = py::makeIterToVector<tomoto::Vid>(iter);
iter = PyObject_GetIter(argStartPos);
vector<uint32_t> startPos = py::makeIterToVector<uint32_t>(iter);
iter = PyObject_GetIter(argLength);
vector<uint16_t> length = py::makeIterToVector<uint16_t>(iter);
char2Byte(raw, startPos, length);
words = py::makeIterToVector<tomoto::Vid>(iter);
if (argStartPos)
{
iter = PyObject_GetIter(argStartPos);
startPos = py::makeIterToVector<uint32_t>(iter);
iter = PyObject_GetIter(argLength);
length = py::makeIterToVector<uint16_t>(iter);
char2Byte(raw, startPos, length);
}
auto ret = inst->addDoc(raw, words, startPos, length, timepoint);
return py::buildPyValue(ret);
}
Expand Down
23 changes: 17 additions & 6 deletions src/python/py_GDMR.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -129,14 +129,25 @@ static PyObject* GDMR_addDoc_(TopicModelObject* self, PyObject* args, PyObject *
auto* inst = static_cast<tomoto::IGDMRModel*>(self->inst);
string raw;
if (argRaw) raw = argRaw;
if (argRaw && (!argStartPos || !argLength))
{
throw runtime_error{ "`start_pos` and `length` must be given when `raw` is given." };
}

vector<tomoto::Vid> words;
vector<uint32_t> startPos;
vector<uint16_t> length;

py::UniqueObj iter = PyObject_GetIter(argWords), iterMetadata;
vector<tomoto::Vid> words = py::makeIterToVector<tomoto::Vid>(iter);
iter = PyObject_GetIter(argStartPos);
vector<uint32_t> startPos = py::makeIterToVector<uint32_t>(iter);
iter = PyObject_GetIter(argLength);
vector<uint16_t> length = py::makeIterToVector<uint16_t>(iter);
char2Byte(raw, startPos, length);
words = py::makeIterToVector<tomoto::Vid>(iter);
if (argStartPos)
{
iter = PyObject_GetIter(argStartPos);
startPos = py::makeIterToVector<uint32_t>(iter);
iter = PyObject_GetIter(argLength);
length = py::makeIterToVector<uint16_t>(iter);
char2Byte(raw, startPos, length);
}

if (!argMetadata || !(iterMetadata = PyObject_GetIter(argMetadata)))
{
Expand Down
23 changes: 17 additions & 6 deletions src/python/py_LDA.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,14 +92,25 @@ static PyObject* LDA_addDoc_(TopicModelObject* self, PyObject* args, PyObject *k
auto* inst = static_cast<tomoto::ILDAModel*>(self->inst);
string raw;
if (argRaw) raw = argRaw;
if (argRaw && (!argStartPos || !argLength))
{
throw runtime_error{ "`start_pos` and `length` must be given when `raw` is given." };
}

vector<tomoto::Vid> words;
vector<uint32_t> startPos;
vector<uint16_t> length;

py::UniqueObj iter = PyObject_GetIter(argWords);
vector<tomoto::Vid> words = py::makeIterToVector<tomoto::Vid>(iter);
iter = PyObject_GetIter(argStartPos);
vector<uint32_t> startPos = py::makeIterToVector<uint32_t>(iter);
iter = PyObject_GetIter(argLength);
vector<uint16_t> length = py::makeIterToVector<uint16_t>(iter);
char2Byte(raw, startPos, length);
words = py::makeIterToVector<tomoto::Vid>(iter);
if (argStartPos)
{
iter = PyObject_GetIter(argStartPos);
startPos = py::makeIterToVector<uint32_t>(iter);
iter = PyObject_GetIter(argLength);
length = py::makeIterToVector<uint16_t>(iter);
char2Byte(raw, startPos, length);
}
auto ret = inst->addDoc(raw, words, startPos, length);
return py::buildPyValue(ret);
}
Expand Down
24 changes: 18 additions & 6 deletions src/python/py_LLDA.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,14 +99,26 @@ static PyObject* LLDA_addDoc_(TopicModelObject* self, PyObject* args, PyObject *
auto* inst = static_cast<tomoto::ILLDAModel*>(self->inst);
string raw;
if (argRaw) raw = argRaw;
if (argRaw && (!argStartPos || !argLength))
{
throw runtime_error{ "`start_pos` and `length` must be given when `raw` is given." };
}

vector<tomoto::Vid> words;
vector<uint32_t> startPos;
vector<uint16_t> length;

py::UniqueObj iter = PyObject_GetIter(argWords);
vector<tomoto::Vid> words = py::makeIterToVector<tomoto::Vid>(iter);
iter = PyObject_GetIter(argStartPos);
vector<uint32_t> startPos = py::makeIterToVector<uint32_t>(iter);
iter = PyObject_GetIter(argLength);
vector<uint16_t> length = py::makeIterToVector<uint16_t>(iter);
char2Byte(raw, startPos, length);
words = py::makeIterToVector<tomoto::Vid>(iter);
if (argStartPos)
{
iter = PyObject_GetIter(argStartPos);
startPos = py::makeIterToVector<uint32_t>(iter);
iter = PyObject_GetIter(argLength);
length = py::makeIterToVector<uint16_t>(iter);
char2Byte(raw, startPos, length);
}

vector<string> labels;
if (argLabels)
{
Expand Down
23 changes: 17 additions & 6 deletions src/python/py_MGLDA.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,14 +93,25 @@ static PyObject* MGLDA_addDoc_(TopicModelObject* self, PyObject* args, PyObject
auto* inst = static_cast<tomoto::IMGLDAModel*>(self->inst);
string raw;
if (argRaw) raw = argRaw;
if (argRaw && (!argStartPos || !argLength))
{
throw runtime_error{ "`start_pos` and `length` must be given when `raw` is given." };
}

vector<tomoto::Vid> words;
vector<uint32_t> startPos;
vector<uint16_t> length;

py::UniqueObj iter = PyObject_GetIter(argWords);
vector<tomoto::Vid> words = py::makeIterToVector<tomoto::Vid>(iter);
iter = PyObject_GetIter(argStartPos);
vector<uint32_t> startPos = py::makeIterToVector<uint32_t>(iter);
iter = PyObject_GetIter(argLength);
vector<uint16_t> length = py::makeIterToVector<uint16_t>(iter);
char2Byte(raw, startPos, length);
words = py::makeIterToVector<tomoto::Vid>(iter);
if (argStartPos)
{
iter = PyObject_GetIter(argStartPos);
startPos = py::makeIterToVector<uint32_t>(iter);
iter = PyObject_GetIter(argLength);
length = py::makeIterToVector<uint16_t>(iter);
char2Byte(raw, startPos, length);
}

auto ret = inst->addDoc(raw, words, startPos, length, delimiter);
return py::buildPyValue(ret);
Expand Down
24 changes: 18 additions & 6 deletions src/python/py_PLDA.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,14 +100,26 @@ static PyObject* PLDA_addDoc_(TopicModelObject* self, PyObject* args, PyObject *
auto* inst = static_cast<tomoto::IPLDAModel*>(self->inst);
string raw;
if (argRaw) raw = argRaw;
if (argRaw && (!argStartPos || !argLength))
{
throw runtime_error{ "`start_pos` and `length` must be given when `raw` is given." };
}

vector<tomoto::Vid> words;
vector<uint32_t> startPos;
vector<uint16_t> length;

py::UniqueObj iter = PyObject_GetIter(argWords);
vector<tomoto::Vid> words = py::makeIterToVector<tomoto::Vid>(iter);
iter = PyObject_GetIter(argStartPos);
vector<uint32_t> startPos = py::makeIterToVector<uint32_t>(iter);
iter = PyObject_GetIter(argLength);
vector<uint16_t> length = py::makeIterToVector<uint16_t>(iter);
char2Byte(raw, startPos, length);
words = py::makeIterToVector<tomoto::Vid>(iter);
if (argStartPos)
{
iter = PyObject_GetIter(argStartPos);
startPos = py::makeIterToVector<uint32_t>(iter);
iter = PyObject_GetIter(argLength);
length = py::makeIterToVector<uint16_t>(iter);
char2Byte(raw, startPos, length);
}

vector<string> labels;
if (argLabels)
{
Expand Down
24 changes: 18 additions & 6 deletions src/python/py_SLDA.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -169,14 +169,26 @@ static PyObject* SLDA_addDoc_(TopicModelObject* self, PyObject* args, PyObject *
auto* inst = static_cast<tomoto::ISLDAModel*>(self->inst);
string raw;
if (argRaw) raw = argRaw;
if (argRaw && (!argStartPos || !argLength))
{
throw runtime_error{ "`start_pos` and `length` must be given when `raw` is given." };
}

vector<tomoto::Vid> words;
vector<uint32_t> startPos;
vector<uint16_t> length;

py::UniqueObj iter = PyObject_GetIter(argWords);
vector<tomoto::Vid> words = py::makeIterToVector<tomoto::Vid>(iter);
iter = PyObject_GetIter(argStartPos);
vector<uint32_t> startPos = py::makeIterToVector<uint32_t>(iter);
iter = PyObject_GetIter(argLength);
vector<uint16_t> length = py::makeIterToVector<uint16_t>(iter);
char2Byte(raw, startPos, length);
words = py::makeIterToVector<tomoto::Vid>(iter);
if (argStartPos)
{
iter = PyObject_GetIter(argStartPos);
startPos = py::makeIterToVector<uint32_t>(iter);
iter = PyObject_GetIter(argLength);
length = py::makeIterToVector<uint16_t>(iter);
char2Byte(raw, startPos, length);
}

vector<tomoto::Float> ys;
if (argY)
{
Expand Down
1 change: 1 addition & 0 deletions tomotopy/documentation.kr.rst
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,7 @@ tomotopy의 Python3 예제 코드는 https://github.com/bab2min/tomotopy/blob/ma
* `tomotopy.LDAModel.used_vocabs`, `tomotopy.LDAModel.used_vocab_freq`, `tomotopy.LDAModel.used_vocab_df` 등의 새로운 프로퍼티가 모델에 추가되었습니다.
* 새로운 토픽 모델인 g-DMR(`tomotopy.GDMRModel`)가 추가되었습니다.
* macOS에서 `tomotopy.label.FoRelevance`를 생성할 때 발생하던 문제가 해결되었습니다.
* `tomotopy.utils.Corpus.add_doc`로 `raw`가 없는 문헌을 생성한 뒤 토픽 모델에 입력할 시 발생하는 오류를 수정했습니다.

* 0.7.1 (2020-05-08)
* `tomotopy.HLDAModel`용으로 `tomotopy.Document.path`가 새로 추가되었습니다.
Expand Down
1 change: 1 addition & 0 deletions tomotopy/documentation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,7 @@ History
* New properties including `tomotopy.LDAModel.used_vocabs`, `tomotopy.LDAModel.used_vocab_freq` and `tomotopy.LDAModel.used_vocab_df` were added into topic models.
* A new g-DMR topic model(`tomotopy.GDMRModel`) was added.
* An error at initializing `tomotopy.label.FoRelevance` in macOS was fixed.
* An error that occured when using `tomotopy.utils.Corpus` created without `raw` parameters was fixed.

* 0.7.1 (2020-05-08)
* `tomotopy.Document.path` was added for `tomotopy.HLDAModel`.
Expand Down

0 comments on commit c05cdf0

Please sign in to comment.