Skip to content

Commit

Permalink
[Improvement](inverted index) Remove the check for inverted index fil…
Browse files Browse the repository at this point in the history
…e exists (#36945)

Remove the check for inverted index file exists to avoid latency of remote fs eg. s3
  • Loading branch information
csun5285 authored Jul 2, 2024
1 parent d33e1a0 commit a25ac14
Show file tree
Hide file tree
Showing 7 changed files with 108 additions and 52 deletions.
2 changes: 1 addition & 1 deletion be/src/clucene
20 changes: 14 additions & 6 deletions be/src/olap/rowset/segment_v2/inverted_index_file_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include "olap/rowset/segment_v2/inverted_index_compound_reader.h"
#include "olap/rowset/segment_v2/inverted_index_fs_directory.h"
#include "olap/tablet_schema.h"
#include "util/debug_points.h"

namespace doris::segment_v2 {

Expand All @@ -41,14 +42,17 @@ Status InvertedIndexFileReader::_init_from_v2(int32_t read_buffer_size) {

std::unique_lock<std::shared_mutex> lock(_mutex); // Lock for writing
try {
bool exists = false;
RETURN_IF_ERROR(_fs->exists(index_file_full_path, &exists));
if (!exists) {
int64_t file_size = 0;
Status st = _fs->file_size(index_file_full_path, &file_size);
DBUG_EXECUTE_IF("inverted file read error: index file not found", {
st = Status::Error<doris::ErrorCode::NOT_FOUND>("index file not found");
})
if (st.code() == ErrorCode::NOT_FOUND) {
return Status::Error<ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND>(
"inverted index file {} is not found", index_file_full_path);
} else if (!st.ok()) {
return st;
}
int64_t file_size = 0;
RETURN_IF_ERROR(_fs->file_size(index_file_full_path, &file_size));
if (file_size == 0) {
LOG(WARNING) << "inverted index file " << index_file_full_path << " is empty.";
return Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>(
Expand Down Expand Up @@ -157,6 +161,10 @@ Result<std::unique_ptr<DorisCompoundReader>> InvertedIndexFileReader::_open(
dir->close();
_CLDELETE(dir)
}
if (err.number() == CL_ERR_FileNotFound) {
return ResultError(Status::Error<ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND>(
"inverted index path: {} not exist.", index_file_path));
}
return ResultError(Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>(
"CLuceneError occur when open idx file {}, error msg: {}", index_file_path,
err.what()));
Expand All @@ -174,7 +182,7 @@ Result<std::unique_ptr<DorisCompoundReader>> InvertedIndexFileReader::_open(
if (index_it == _indices_entries.end()) {
std::ostringstream errMsg;
errMsg << "No index with id " << index_id << " found";
return ResultError(Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>(
return ResultError(Status::Error<ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND>(
"CLuceneError occur when open idx file {}, error msg: {}",
InvertedIndexDescriptor::get_index_file_path_v2(_index_path_prefix),
errMsg.str()));
Expand Down
49 changes: 20 additions & 29 deletions be/src/olap/rowset/segment_v2/inverted_index_fs_directory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,30 +112,28 @@ bool DorisFSDirectory::FSIndexInput::open(const io::FileSystemSPtr& fs, const ch
reader_options.cache_type = config::enable_file_cache ? io::FileCachePolicy::FILE_BLOCK_CACHE
: io::FileCachePolicy::NO_CACHE;
reader_options.is_doris_table = true;
if (!fs->open_file(path, &h->_reader, &reader_options).ok()) {
error.set(CL_ERR_IO, "open file error");
Status st = fs->open_file(path, &h->_reader, &reader_options);
DBUG_EXECUTE_IF("inverted file read error: index file not found",
{ st = Status::Error<doris::ErrorCode::NOT_FOUND>("index file not found"); })
if (st.code() == ErrorCode::NOT_FOUND) {
error.set(CL_ERR_FileNotFound, "File does not exist");
} else if (st.code() == ErrorCode::IO_ERROR) {
error.set(CL_ERR_IO, "File open io error");
} else if (st.code() == ErrorCode::PERMISSION_DENIED) {
error.set(CL_ERR_IO, "File Access denied");
} else {
error.set(CL_ERR_IO, "Could not open file");
}

//Check if a valid handle was retrieved
if (h->_reader) {
if (st.ok() && h->_reader) {
//Store the file length
h->_length = h->_reader->size();
h->_fpos = 0;
ret = _CLNEW FSIndexInput(std::move(h), buffer_size);
return true;

} else {
int err = errno;
if (err == ENOENT) {
error.set(CL_ERR_IO, "File does not exist");
} else if (err == EACCES) {
error.set(CL_ERR_IO, "File Access denied");
} else if (err == EMFILE) {
error.set(CL_ERR_IO, "Too many open files");
} else {
error.set(CL_ERR_IO, "Could not open file");
}
}

//delete h->_shared_lock;
//_CLDECDELETE(h)
return false;
Expand Down Expand Up @@ -349,19 +347,6 @@ void DorisFSDirectory::init(const io::FileSystemSPtr& fs, const char* path,
}

lucene::store::Directory::setLockFactory(lock_factory);

// It's fail checking directory existence in S3.
if (_fs->type() == io::FileSystemType::S3) {
return;
}
bool exists = false;
LOG_AND_THROW_IF_ERROR(_fs->exists(directory, &exists),
"Doris compound directory init IO error");
if (!exists) {
auto e = "Doris compound directory init error: " + directory + " is not a directory";
LOG(WARNING) << e;
_CLTHROWA(CL_ERR_IO, e.c_str());
}
}

void DorisFSDirectory::priv_getFN(char* buffer, const char* name) const {
Expand Down Expand Up @@ -432,7 +417,13 @@ int64_t DorisFSDirectory::fileLength(const char* name) const {
char buffer[CL_MAX_DIR];
priv_getFN(buffer, name);
int64_t size = -1;
LOG_AND_THROW_IF_ERROR(_fs->file_size(buffer, &size), "Get file size IO error");
Status st = _fs->file_size(buffer, &size);
DBUG_EXECUTE_IF("inverted file read error: index file not found",
{ st = Status::Error<doris::ErrorCode::NOT_FOUND>("index file not found"); })
if (st.code() == ErrorCode::NOT_FOUND) {
_CLTHROWA(CL_ERR_FileNotFound, "File does not exist");
}
LOG_AND_THROW_IF_ERROR(st, "Get file size IO error");
return size;
}

Expand Down
15 changes: 1 addition & 14 deletions be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -201,8 +201,6 @@ Status InvertedIndexReader::read_null_bitmap(InvertedIndexQueryCacheHandle* cach
return Status::OK();
}

RETURN_IF_ERROR(check_file_exist(index_file_key));

if (!dir) {
// TODO: ugly code here, try to refact.
auto directory = DORIS_TRY(_inverted_index_file_reader->open(&_index_meta));
Expand Down Expand Up @@ -252,7 +250,7 @@ Status InvertedIndexReader::handle_searcher_cache(
auto mem_tracker = std::make_unique<MemTracker>("InvertedIndexSearcherCacheWithRead");
SCOPED_RAW_TIMER(&stats->inverted_index_searcher_open_timer);
IndexSearcherPtr searcher;
RETURN_IF_ERROR(check_file_exist(index_file_key));

auto dir = DORIS_TRY(_inverted_index_file_reader->open(&_index_meta));
// try to reuse index_searcher's directory to read null_bitmap to cache
// to avoid open directory additionally for null_bitmap
Expand Down Expand Up @@ -286,17 +284,6 @@ Status InvertedIndexReader::create_index_searcher(lucene::store::Directory* dir,
return Status::OK();
};

Status InvertedIndexReader::check_file_exist(const std::string& index_file_key) {
bool exists = false;
RETURN_IF_ERROR(_inverted_index_file_reader->index_file_exist(&_index_meta, &exists));
if (!exists) {
LOG(WARNING) << "inverted index: " << index_file_key << " not exist.";
return Status::Error<ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND>(
"inverted index input file {} not found", index_file_key);
}
return Status::OK();
}

Status FullTextIndexReader::new_iterator(OlapReaderStatistics* stats, RuntimeState* runtime_state,
std::unique_ptr<InvertedIndexIterator>* iterator) {
*iterator = InvertedIndexIterator::create_unique(stats, runtime_state, shared_from_this());
Expand Down
2 changes: 0 additions & 2 deletions be/src/olap/rowset/segment_v2/inverted_index_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -141,8 +141,6 @@ class InvertedIndexReader : public std::enable_shared_from_this<InvertedIndexRea
MemTracker* mem_tracker,
InvertedIndexReaderType reader_type);

Status check_file_exist(const std::string& index_file_key);

protected:
friend class InvertedIndexIterator;
std::shared_ptr<InvertedIndexFileReader> _inverted_index_file_reader;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !sql --
3

-- !sql --
3

-- !sql --
3

-- !sql --
3

Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.


suite("test_index_not_found_fault_injection", "nonConcurrent") {
def testTable = "test_index_not_found"

sql "DROP TABLE IF EXISTS ${testTable}"
sql """
CREATE TABLE ${testTable} (
`@timestamp` int(11) NULL COMMENT "",
`clientip` string NULL COMMENT "",
`request` string NULL COMMENT "",
`status` string NULL COMMENT "",
`size` string NULL COMMENT "",
INDEX clientip_idx (`clientip`) USING INVERTED PROPERTIES("parser" = "unicode", "support_phrase" = "true") COMMENT '',
INDEX request_idx (`request`) USING INVERTED PROPERTIES("parser" = "unicode", "support_phrase" = "true") COMMENT '',
INDEX status_idx (`status`) USING INVERTED PROPERTIES("parser" = "unicode", "support_phrase" = "true") COMMENT '',
INDEX size_idx (`size`) USING INVERTED PROPERTIES("parser" = "unicode", "support_phrase" = "true") COMMENT ''
) ENGINE=OLAP
DUPLICATE KEY(`@timestamp`)
COMMENT "OLAP"
DISTRIBUTED BY HASH(`@timestamp`) BUCKETS 1
PROPERTIES (
"replication_allocation" = "tag.location.default: 1",
"disable_auto_compaction" = "true"
);
"""

sql """ INSERT INTO ${testTable} VALUES (893964617, '40.135.0.0', 'GET /images/hm_bg.jpg HTTP/1.0', 200, 24736); """
sql """ INSERT INTO ${testTable} VALUES (893964653, '232.0.0.0', 'GET /images/hm_bg.jpg HTTP/1.0', 200, 3781); """
sql """ INSERT INTO ${testTable} VALUES (893964672, '26.1.0.0', 'GET /images/hm_bg.jpg HTTP/1.0', 304, 0); """

try {
GetDebugPoint().enableDebugPointForAllBEs("inverted file read error: index file not found")

qt_sql """ select count() from ${testTable} where (request match_phrase 'http'); """
qt_sql """ select count() from ${testTable} where (request match_phrase_prefix 'http'); """

qt_sql """ select count() from ${testTable} where (clientip match_phrase 'http' or request match_phrase 'http' or status match_phrase 'http' or size match_phrase 'http'); """
qt_sql """ select count() from ${testTable} where (clientip match_phrase_prefix 'http' or request match_phrase_prefix 'http' or status match_phrase_prefix 'http' or size match_phrase_prefix 'http'); """
} finally {
GetDebugPoint().disableDebugPointForAllBEs("inverted file read error: index file not found")
}
}

0 comments on commit a25ac14

Please sign in to comment.