diff --git a/README.kr.rst b/README.kr.rst index 6b69a10..aca7127 100644 --- a/README.kr.rst +++ b/README.kr.rst @@ -248,6 +248,7 @@ tomotopy의 Python3 예제 코드는 https://github.com/bab2min/tomotopy/blob/ma * `tomotopy.LDAModel.used_vocabs`, `tomotopy.LDAModel.used_vocab_freq`, `tomotopy.LDAModel.used_vocab_df` 등의 새로운 프로퍼티가 모델에 추가되었습니다. * 새로운 토픽 모델인 g-DMR(`tomotopy.GDMRModel`)가 추가되었습니다. * macOS에서 `tomotopy.label.FoRelevance`를 생성할 때 발생하던 문제가 해결되었습니다. + * `tomotopy.utils.Corpus.add_doc`로 `raw`가 없는 문헌을 생성한 뒤 토픽 모델에 입력할 시 발생하는 오류를 수정했습니다. * 0.7.1 (2020-05-08) * `tomotopy.HLDAModel`용으로 `tomotopy.Document.path`가 새로 추가되었습니다. diff --git a/README.rst b/README.rst index c0a555f..a9be20d 100644 --- a/README.rst +++ b/README.rst @@ -254,6 +254,7 @@ History * New properties including `tomotopy.LDAModel.used_vocabs`, `tomotopy.LDAModel.used_vocab_freq` and `tomotopy.LDAModel.used_vocab_df` were added into topic models. * A new g-DMR topic model(`tomotopy.GDMRModel`) was added. * An error at initializing `tomotopy.label.FoRelevance` in macOS was fixed. + * An error that occured when using `tomotopy.utils.Corpus` created without `raw` parameters was fixed. * 0.7.1 (2020-05-08) * `tomotopy.Document.path` was added for `tomotopy.HLDAModel`. diff --git a/src/python/py_DMR.cpp b/src/python/py_DMR.cpp index 4aa7401..d352d7f 100644 --- a/src/python/py_DMR.cpp +++ b/src/python/py_DMR.cpp @@ -90,14 +90,25 @@ static PyObject* DMR_addDoc_(TopicModelObject* self, PyObject* args, PyObject *k auto* inst = static_cast(self->inst); string raw; if (argRaw) raw = argRaw; + if (argRaw && (!argStartPos || !argLength)) + { + throw runtime_error{ "`start_pos` and `length` must be given when `raw` is given." }; + } + + vector words; + vector startPos; + vector length; py::UniqueObj iter = PyObject_GetIter(argWords); - vector words = py::makeIterToVector(iter); - iter = PyObject_GetIter(argStartPos); - vector startPos = py::makeIterToVector(iter); - iter = PyObject_GetIter(argLength); - vector length = py::makeIterToVector(iter); - char2Byte(raw, startPos, length); + words = py::makeIterToVector(iter); + if (argStartPos) + { + iter = PyObject_GetIter(argStartPos); + startPos = py::makeIterToVector(iter); + iter = PyObject_GetIter(argLength); + length = py::makeIterToVector(iter); + char2Byte(raw, startPos, length); + } auto ret = inst->addDoc(raw, words, startPos, length, { string{metadata} }); return py::buildPyValue(ret); } diff --git a/src/python/py_DT.cpp b/src/python/py_DT.cpp index 0d39dfe..8b487da 100644 --- a/src/python/py_DT.cpp +++ b/src/python/py_DT.cpp @@ -97,14 +97,25 @@ static PyObject* DT_addDoc_(TopicModelObject* self, PyObject* args, PyObject *kw auto* inst = static_cast(self->inst); string raw; if (argRaw) raw = argRaw; + if (argRaw && (!argStartPos || !argLength)) + { + throw runtime_error{ "`start_pos` and `length` must be given when `raw` is given." }; + } + + vector words; + vector startPos; + vector length; py::UniqueObj iter = PyObject_GetIter(argWords); - vector words = py::makeIterToVector(iter); - iter = PyObject_GetIter(argStartPos); - vector startPos = py::makeIterToVector(iter); - iter = PyObject_GetIter(argLength); - vector length = py::makeIterToVector(iter); - char2Byte(raw, startPos, length); + words = py::makeIterToVector(iter); + if (argStartPos) + { + iter = PyObject_GetIter(argStartPos); + startPos = py::makeIterToVector(iter); + iter = PyObject_GetIter(argLength); + length = py::makeIterToVector(iter); + char2Byte(raw, startPos, length); + } auto ret = inst->addDoc(raw, words, startPos, length, timepoint); return py::buildPyValue(ret); } diff --git a/src/python/py_GDMR.cpp b/src/python/py_GDMR.cpp index b44ba8b..43369b0 100644 --- a/src/python/py_GDMR.cpp +++ b/src/python/py_GDMR.cpp @@ -129,14 +129,25 @@ static PyObject* GDMR_addDoc_(TopicModelObject* self, PyObject* args, PyObject * auto* inst = static_cast(self->inst); string raw; if (argRaw) raw = argRaw; + if (argRaw && (!argStartPos || !argLength)) + { + throw runtime_error{ "`start_pos` and `length` must be given when `raw` is given." }; + } + + vector words; + vector startPos; + vector length; py::UniqueObj iter = PyObject_GetIter(argWords), iterMetadata; - vector words = py::makeIterToVector(iter); - iter = PyObject_GetIter(argStartPos); - vector startPos = py::makeIterToVector(iter); - iter = PyObject_GetIter(argLength); - vector length = py::makeIterToVector(iter); - char2Byte(raw, startPos, length); + words = py::makeIterToVector(iter); + if (argStartPos) + { + iter = PyObject_GetIter(argStartPos); + startPos = py::makeIterToVector(iter); + iter = PyObject_GetIter(argLength); + length = py::makeIterToVector(iter); + char2Byte(raw, startPos, length); + } if (!argMetadata || !(iterMetadata = PyObject_GetIter(argMetadata))) { diff --git a/src/python/py_LDA.cpp b/src/python/py_LDA.cpp index e70abd8..a10ffbd 100644 --- a/src/python/py_LDA.cpp +++ b/src/python/py_LDA.cpp @@ -92,14 +92,25 @@ static PyObject* LDA_addDoc_(TopicModelObject* self, PyObject* args, PyObject *k auto* inst = static_cast(self->inst); string raw; if (argRaw) raw = argRaw; + if (argRaw && (!argStartPos || !argLength)) + { + throw runtime_error{ "`start_pos` and `length` must be given when `raw` is given." }; + } + + vector words; + vector startPos; + vector length; py::UniqueObj iter = PyObject_GetIter(argWords); - vector words = py::makeIterToVector(iter); - iter = PyObject_GetIter(argStartPos); - vector startPos = py::makeIterToVector(iter); - iter = PyObject_GetIter(argLength); - vector length = py::makeIterToVector(iter); - char2Byte(raw, startPos, length); + words = py::makeIterToVector(iter); + if (argStartPos) + { + iter = PyObject_GetIter(argStartPos); + startPos = py::makeIterToVector(iter); + iter = PyObject_GetIter(argLength); + length = py::makeIterToVector(iter); + char2Byte(raw, startPos, length); + } auto ret = inst->addDoc(raw, words, startPos, length); return py::buildPyValue(ret); } diff --git a/src/python/py_LLDA.cpp b/src/python/py_LLDA.cpp index 0b10d99..f0bb98e 100644 --- a/src/python/py_LLDA.cpp +++ b/src/python/py_LLDA.cpp @@ -99,14 +99,26 @@ static PyObject* LLDA_addDoc_(TopicModelObject* self, PyObject* args, PyObject * auto* inst = static_cast(self->inst); string raw; if (argRaw) raw = argRaw; + if (argRaw && (!argStartPos || !argLength)) + { + throw runtime_error{ "`start_pos` and `length` must be given when `raw` is given." }; + } + + vector words; + vector startPos; + vector length; py::UniqueObj iter = PyObject_GetIter(argWords); - vector words = py::makeIterToVector(iter); - iter = PyObject_GetIter(argStartPos); - vector startPos = py::makeIterToVector(iter); - iter = PyObject_GetIter(argLength); - vector length = py::makeIterToVector(iter); - char2Byte(raw, startPos, length); + words = py::makeIterToVector(iter); + if (argStartPos) + { + iter = PyObject_GetIter(argStartPos); + startPos = py::makeIterToVector(iter); + iter = PyObject_GetIter(argLength); + length = py::makeIterToVector(iter); + char2Byte(raw, startPos, length); + } + vector labels; if (argLabels) { diff --git a/src/python/py_MGLDA.cpp b/src/python/py_MGLDA.cpp index 06f5d84..80ea3f4 100644 --- a/src/python/py_MGLDA.cpp +++ b/src/python/py_MGLDA.cpp @@ -93,14 +93,25 @@ static PyObject* MGLDA_addDoc_(TopicModelObject* self, PyObject* args, PyObject auto* inst = static_cast(self->inst); string raw; if (argRaw) raw = argRaw; + if (argRaw && (!argStartPos || !argLength)) + { + throw runtime_error{ "`start_pos` and `length` must be given when `raw` is given." }; + } + + vector words; + vector startPos; + vector length; py::UniqueObj iter = PyObject_GetIter(argWords); - vector words = py::makeIterToVector(iter); - iter = PyObject_GetIter(argStartPos); - vector startPos = py::makeIterToVector(iter); - iter = PyObject_GetIter(argLength); - vector length = py::makeIterToVector(iter); - char2Byte(raw, startPos, length); + words = py::makeIterToVector(iter); + if (argStartPos) + { + iter = PyObject_GetIter(argStartPos); + startPos = py::makeIterToVector(iter); + iter = PyObject_GetIter(argLength); + length = py::makeIterToVector(iter); + char2Byte(raw, startPos, length); + } auto ret = inst->addDoc(raw, words, startPos, length, delimiter); return py::buildPyValue(ret); diff --git a/src/python/py_PLDA.cpp b/src/python/py_PLDA.cpp index 0a94864..34a6cff 100644 --- a/src/python/py_PLDA.cpp +++ b/src/python/py_PLDA.cpp @@ -100,14 +100,26 @@ static PyObject* PLDA_addDoc_(TopicModelObject* self, PyObject* args, PyObject * auto* inst = static_cast(self->inst); string raw; if (argRaw) raw = argRaw; + if (argRaw && (!argStartPos || !argLength)) + { + throw runtime_error{ "`start_pos` and `length` must be given when `raw` is given." }; + } + + vector words; + vector startPos; + vector length; py::UniqueObj iter = PyObject_GetIter(argWords); - vector words = py::makeIterToVector(iter); - iter = PyObject_GetIter(argStartPos); - vector startPos = py::makeIterToVector(iter); - iter = PyObject_GetIter(argLength); - vector length = py::makeIterToVector(iter); - char2Byte(raw, startPos, length); + words = py::makeIterToVector(iter); + if (argStartPos) + { + iter = PyObject_GetIter(argStartPos); + startPos = py::makeIterToVector(iter); + iter = PyObject_GetIter(argLength); + length = py::makeIterToVector(iter); + char2Byte(raw, startPos, length); + } + vector labels; if (argLabels) { diff --git a/src/python/py_SLDA.cpp b/src/python/py_SLDA.cpp index ede17b3..c5363f9 100644 --- a/src/python/py_SLDA.cpp +++ b/src/python/py_SLDA.cpp @@ -169,14 +169,26 @@ static PyObject* SLDA_addDoc_(TopicModelObject* self, PyObject* args, PyObject * auto* inst = static_cast(self->inst); string raw; if (argRaw) raw = argRaw; + if (argRaw && (!argStartPos || !argLength)) + { + throw runtime_error{ "`start_pos` and `length` must be given when `raw` is given." }; + } + + vector words; + vector startPos; + vector length; py::UniqueObj iter = PyObject_GetIter(argWords); - vector words = py::makeIterToVector(iter); - iter = PyObject_GetIter(argStartPos); - vector startPos = py::makeIterToVector(iter); - iter = PyObject_GetIter(argLength); - vector length = py::makeIterToVector(iter); - char2Byte(raw, startPos, length); + words = py::makeIterToVector(iter); + if (argStartPos) + { + iter = PyObject_GetIter(argStartPos); + startPos = py::makeIterToVector(iter); + iter = PyObject_GetIter(argLength); + length = py::makeIterToVector(iter); + char2Byte(raw, startPos, length); + } + vector ys; if (argY) { diff --git a/tomotopy/documentation.kr.rst b/tomotopy/documentation.kr.rst index ba0f8d1..40c81f1 100644 --- a/tomotopy/documentation.kr.rst +++ b/tomotopy/documentation.kr.rst @@ -287,6 +287,7 @@ tomotopy의 Python3 예제 코드는 https://github.com/bab2min/tomotopy/blob/ma * `tomotopy.LDAModel.used_vocabs`, `tomotopy.LDAModel.used_vocab_freq`, `tomotopy.LDAModel.used_vocab_df` 등의 새로운 프로퍼티가 모델에 추가되었습니다. * 새로운 토픽 모델인 g-DMR(`tomotopy.GDMRModel`)가 추가되었습니다. * macOS에서 `tomotopy.label.FoRelevance`를 생성할 때 발생하던 문제가 해결되었습니다. + * `tomotopy.utils.Corpus.add_doc`로 `raw`가 없는 문헌을 생성한 뒤 토픽 모델에 입력할 시 발생하는 오류를 수정했습니다. * 0.7.1 (2020-05-08) * `tomotopy.HLDAModel`용으로 `tomotopy.Document.path`가 새로 추가되었습니다. diff --git a/tomotopy/documentation.rst b/tomotopy/documentation.rst index 2212b98..17801e9 100644 --- a/tomotopy/documentation.rst +++ b/tomotopy/documentation.rst @@ -290,6 +290,7 @@ History * New properties including `tomotopy.LDAModel.used_vocabs`, `tomotopy.LDAModel.used_vocab_freq` and `tomotopy.LDAModel.used_vocab_df` were added into topic models. * A new g-DMR topic model(`tomotopy.GDMRModel`) was added. * An error at initializing `tomotopy.label.FoRelevance` in macOS was fixed. + * An error that occured when using `tomotopy.utils.Corpus` created without `raw` parameters was fixed. * 0.7.1 (2020-05-08) * `tomotopy.Document.path` was added for `tomotopy.HLDAModel`.