Skip to content

Commit

Permalink
fix
Browse files Browse the repository at this point in the history
  • Loading branch information
zzzxl1993 committed Oct 17, 2024
1 parent dd08cae commit dd8bcb2
Show file tree
Hide file tree
Showing 4 changed files with 312 additions and 0 deletions.
1 change: 1 addition & 0 deletions src/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ SET(test_files ./tests.cpp
./search/spans/TestSpanExplanationsOfNonMatches.cpp
./search/spans/TestSpanExplanationsOfNonMatches.h
./index/TestIndexCompaction.cpp
./index/TestIndexCompress.cpp
./index/TestIndexModifier.cpp
./index/TestIndexWriter.cpp
./index/TestIndexModifier.cpp
Expand Down
309 changes: 309 additions & 0 deletions src/test/index/TestIndexCompress.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,309 @@
#include <CLucene.h> // IWYU pragma: keep
#include <CLucene/index/IndexReader.h>
#include <CLucene/search/query/TermPositionIterator.h>
#include <CLucene/util/stringUtil.h>

#include <ctime>
#include <exception>
#include <stdexcept>
#include <string>
#include <vector>

#include "CLucene/analysis/Analyzers.h"
#include "CLucene/index/IndexVersion.h"
#include "CLucene/index/Term.h"
#include "CLucene/store/FSDirectory.h"
#include "test.h"

CL_NS_USE(search)
CL_NS_USE(store)
CL_NS_USE(index)
CL_NS_USE(util)

static constexpr int32_t doc_count = 10000;

#define FINALLY(eptr, finallyBlock) \
{ \
finallyBlock; \
if (eptr) { \
std::rethrow_exception(eptr); \
} \
}

int32_t getDaySeed() {
std::time_t now = std::time(nullptr);
std::tm* localTime = std::localtime(&now);
localTime->tm_sec = 0;
localTime->tm_min = 0;
localTime->tm_hour = 0;
return static_cast<int32_t>(std::mktime(localTime) / (60 * 60 * 24));
}

static std::string generateRandomIP() {
std::string ip_v4;
ip_v4.append(std::to_string(rand() % 256));
ip_v4.append(".");
ip_v4.append(std::to_string(rand() % 256));
ip_v4.append(".");
ip_v4.append(std::to_string(rand() % 256));
ip_v4.append(".");
ip_v4.append(std::to_string(rand() % 256));
return ip_v4;
}

static void write_index(const std::string& name, RAMDirectory* dir, IndexVersion index_version,
const std::vector<std::string>& datas) {
auto* analyzer = _CLNEW lucene::analysis::SimpleAnalyzer<char>;
analyzer->set_stopwords(nullptr);
auto* indexwriter = _CLNEW lucene::index::IndexWriter(dir, analyzer, true);
indexwriter->setRAMBufferSizeMB(512);
indexwriter->setMaxBufferedDocs(-1);
indexwriter->setMaxFieldLength(0x7FFFFFFFL);
indexwriter->setMergeFactor(1000000000);
indexwriter->setUseCompoundFile(false);

auto* char_string_reader = _CLNEW lucene::util::SStringReader<char>;

auto* doc = _CLNEW lucene::document::Document();
int32_t field_config = lucene::document::Field::STORE_NO;
field_config |= lucene::document::Field::INDEX_NONORMS;
field_config |= lucene::document::Field::INDEX_TOKENIZED;
auto field_name = std::wstring(name.begin(), name.end());
auto* field = _CLNEW lucene::document::Field(field_name.c_str(), field_config);
field->setOmitTermFreqAndPositions(false);
field->setIndexVersion(index_version);
doc->add(*field);

for (const auto& data : datas) {
char_string_reader->init(data.data(), data.size(), false);
auto* stream = analyzer->reusableTokenStream(field->name(), char_string_reader);
field->setValue(stream);
indexwriter->addDocument(doc);
}

indexwriter->close();

_CLLDELETE(indexwriter);
_CLLDELETE(doc);
_CLLDELETE(analyzer);
_CLLDELETE(char_string_reader);
}

static void read_index(RAMDirectory* dir, int32_t doc_count) {
auto* reader = IndexReader::open(dir);

std::exception_ptr eptr;
try {
if (doc_count != reader->numDocs()) {
std::string msg = "doc_count: " + std::to_string(doc_count) +
", numDocs: " + std::to_string(reader->numDocs());
_CLTHROWA(CL_ERR_IllegalArgument, msg.c_str());
}

Term* term = nullptr;
TermEnum* enumerator = nullptr;
try {
enumerator = reader->terms();
while (enumerator->next()) {
term = enumerator->term();

auto* term_pos = reader->termPositions(term);

std::exception_ptr eptr;
try {
TermPositionIterator iter(term_pos);
int32_t doc = 0;
while ((doc = iter.nextDoc()) != INT32_MAX) {
for (int32_t i = 0; i < iter.freq(); i++) {
int32_t pos = iter.nextPosition();
if (pos < 0 || pos > 3) {
std::string msg = "pos: " + std::to_string(pos);
_CLTHROWA(CL_ERR_IllegalArgument, msg.c_str());
}
}
}
} catch (...) {
eptr = std::current_exception();
}
FINALLY(eptr, { _CLDELETE(term_pos); })

_CLDECDELETE(term);
}
}
_CLFINALLY({
_CLDECDELETE(term);
enumerator->close();
_CLDELETE(enumerator);
})

} catch (...) {
eptr = std::current_exception();
}
FINALLY(eptr, {
reader->close();
_CLLDELETE(reader);
})
}

static void index_compaction(RAMDirectory* tmp_dir, std::vector<lucene::store::Directory*> srcDirs,
std::vector<lucene::store::Directory*> destDirs, int32_t count) {
auto* analyzer = _CLNEW lucene::analysis::SimpleAnalyzer<char>;
auto* indexwriter = _CLNEW lucene::index::IndexWriter(tmp_dir, analyzer, true);

std::vector<std::vector<std::pair<uint32_t, uint32_t>>> trans_vec(
srcDirs.size(), std::vector<std::pair<uint32_t, uint32_t>>(count));
int32_t idx = 0;
int32_t id = 0;
for (int32_t i = 0; i < count; i++) {
for (int32_t j = 0; j < srcDirs.size(); j++) {
if (id == count * destDirs.size()) {
idx++;
id = 0;
}
trans_vec[j][i] = std::make_pair(idx, id++);
}
}

std::vector<uint32_t> dest_index_docs(destDirs.size());
for (int32_t i = 0; i < destDirs.size(); i++) {
dest_index_docs[i] = count * destDirs.size();
}

std::exception_ptr eptr;
try {
indexwriter->indexCompaction(srcDirs, destDirs, trans_vec, dest_index_docs);
} catch (...) {
eptr = std::current_exception();
}
FINALLY(eptr, {
indexwriter->close();
_CLDELETE(indexwriter);
_CLDELETE(analyzer);
})
}

void TestIndexCompressV2(CuTest* tc) {
std::srand(getDaySeed());

std::string name = "v2_field_name";
std::vector<std::string> datas;
for (int32_t i = 0; i < doc_count; i++) {
std::string ip_v4 = generateRandomIP();
datas.emplace_back(ip_v4);
}

RAMDirectory dir;
write_index(name, &dir, IndexVersion::kV2, datas);

try {
read_index(&dir, doc_count);
} catch (...) {
assertTrue(false);
}

std::cout << "\nTestIndexCompressV2 sucess" << std::endl;
}

void TestIndexCompactionV2(CuTest* tc) {
std::srand(getDaySeed());
std::string name = "field_name";

// index v2
RAMDirectory in_dir;
{
std::vector<std::string> datas;
for (int32_t i = 0; i < doc_count; i++) {
std::string ip_v4 = generateRandomIP();
datas.emplace_back(ip_v4);
}
write_index(name, &in_dir, IndexVersion::kV2, datas);
}

// index compaction v3
RAMDirectory outdir1;
RAMDirectory outdir2;
RAMDirectory outdir3;
{
std::vector<lucene::store::Directory*> srcDirs;
srcDirs.push_back(&in_dir);
srcDirs.push_back(&in_dir);
srcDirs.push_back(&in_dir);
srcDirs.push_back(&in_dir);
srcDirs.push_back(&in_dir);
srcDirs.push_back(&in_dir);
srcDirs.push_back(&in_dir);
srcDirs.push_back(&in_dir);
srcDirs.push_back(&in_dir);
std::vector<lucene::store::Directory*> destDirs;
destDirs.push_back(&outdir1);
destDirs.push_back(&outdir2);
destDirs.push_back(&outdir3);

try {
RAMDirectory empty_dir;
index_compaction(&empty_dir, srcDirs, destDirs, doc_count);
} catch (...) {
assertTrue(false);
}
}

std::cout << "TestIndexCompactionV2 sucess" << std::endl;
}

void TestIndexCompactionException(CuTest* tc) {
std::srand(getDaySeed());
std::string name = "field_name";

// index v1
RAMDirectory in_dir_v1;
{
std::vector<std::string> datas;
for (int32_t i = 0; i < 10; i++) {
std::string ip_v4 = generateRandomIP();
datas.emplace_back(ip_v4);
}
write_index(name, &in_dir_v1, IndexVersion::kV1, datas);
}

// index v2
RAMDirectory in_dir_v2;
{
std::vector<std::string> datas;
for (int32_t i = 0; i < 10; i++) {
std::string ip_v4 = generateRandomIP();
datas.emplace_back(ip_v4);
}
write_index(name, &in_dir_v2, IndexVersion::kV2, datas);
}

// index compaction exception 1
RAMDirectory out_dir;
{
std::vector<lucene::store::Directory*> srcDirs;
srcDirs.push_back(&in_dir_v1);
srcDirs.push_back(&in_dir_v2);
std::vector<lucene::store::Directory*> destDirs;
destDirs.push_back(&out_dir);

bool flag = false;
try {
RAMDirectory empty_dir;
index_compaction(&empty_dir, srcDirs, destDirs, 10);
} catch (...) {
flag = true;
}
assertTrue(flag);
}

std::cout << "TestIndexCompactionException sucess" << std::endl;
}

CuSuite* testIndexCompress() {
CuSuite* suite = CuSuiteNew(_T("CLucene Index Compress Test"));

SUITE_ADD_TEST(suite, TestIndexCompressV2);
SUITE_ADD_TEST(suite, TestIndexCompactionV2);
SUITE_ADD_TEST(suite, TestIndexCompactionException);

return suite;
}
1 change: 1 addition & 0 deletions src/test/test.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ CuSuite *testSearchRange(void);
CuSuite *testMultiPhraseQuery(void);
CuSuite *testIndexCompaction(void);
CuSuite *testStringReader(void);
CuSuite *testIndexCompress(void);

#ifdef TEST_CONTRIB_LIBS
//CuSuite *testGermanAnalyzer(void);
Expand Down
1 change: 1 addition & 0 deletions src/test/tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ unittest tests[] = {
{"MultiPhraseQuery", testMultiPhraseQuery},
{"IndexCompaction", testIndexCompaction},
{"testStringReader", testStringReader},
{"IndexCompress", testIndexCompress},
#ifdef TEST_CONTRIB_LIBS
{"chinese", testchinese},
#endif
Expand Down

0 comments on commit dd8bcb2

Please sign in to comment.