Skip to content

Commit

Permalink
add some truncated fixed string case (vesoft-inc#1967)
Browse files Browse the repository at this point in the history
* add some truncated fixed string case

* add test case about utf8 truncated in index

Co-authored-by: hs.zhang <[email protected]>
Co-authored-by: Sophie <[email protected]>

Co-authored-by: Doodle <[email protected]>
Co-authored-by: hs.zhang <[email protected]>
Co-authored-by: Sophie <[email protected]>
  • Loading branch information
4 people authored Dec 22, 2022
1 parent 1541ad8 commit 8ac1a26
Show file tree
Hide file tree
Showing 4 changed files with 1,072 additions and 1 deletion.
30 changes: 30 additions & 0 deletions src/codec/Common.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,5 +77,35 @@ inline std::string toHexStr(folly::StringPiece str) {
return buf;
}

// If v is longer than maxLen, return a safepoint to be cut which contains legal utf-8 characters,
// and make sure the returned size is less than or equal to maxLen
inline size_t utf8CutSize(folly::StringPiece v, size_t maxLen) {
DCHECK_GT(v.size(), maxLen);
size_t len = 0;
size_t curLen = 0; // current length of utf-8 character
while (len < maxLen) {
auto tmp = static_cast<unsigned char>(v[len]);
if (tmp >= 0xFC) {
curLen = 6;
} else if (tmp >= 0xF8) {
curLen = 5;
} else if (tmp >= 0xF0) {
curLen = 4;
} else if (tmp >= 0xE0) {
curLen = 3;
} else if (tmp >= 0xC0) {
curLen = 2;
} else {
curLen = 1;
}
if (len + curLen <= maxLen) {
len += curLen;
} else {
break;
}
}
return len;
}

} // namespace nebula
#endif // CODEC_COMMON_H_
3 changes: 2 additions & 1 deletion src/codec/RowWriterV2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

#include <cmath>

#include "codec/Common.h"
#include "common/time/TimeUtils.h"
#include "common/time/WallClock.h"
#include "common/utils/DefaultValueContext.h"
Expand Down Expand Up @@ -679,7 +680,7 @@ WriteResult RowWriterV2::write(ssize_t index, folly::StringPiece v) noexcept {
case PropertyType::FIXED_STRING: {
// In-place string. If the pass-in string is longer than the pre-defined
// fixed length, the string will be truncated to the fixed length
size_t len = v.size() > field->size() ? field->size() : v.size();
size_t len = v.size() > field->size() ? utf8CutSize(v, field->size()) : v.size();
strncpy(&buf_[offset], v.data(), len);
if (len < field->size()) {
memset(&buf_[offset + len], 0, field->size() - len);
Expand Down
92 changes: 92 additions & 0 deletions tests/tck/features/fetch/FetchEmpty.feature
Original file line number Diff line number Diff line change
Expand Up @@ -80,3 +80,95 @@ Feature: Fetch prop on empty tag/edge
| e |
| [:zero_prop_edge "1"->"2" @0{}] |
And drop the used space

Scenario: Tag Fixed String Property
When executing query:
"""
CREATE TAG tag_with_fixed_string(col1 fixed_string(5));
"""
And wait 5 seconds
When executing query:
"""
INSERT VERTEX tag_with_fixed_string(col1)
VALUES
"1": ("πŸ˜€πŸ˜€"),
"2": ("πŸ˜‚πŸ˜‚"),
"3": ("羊羊羊"),
"4": ("🐏🐏🐏");
"""
Then the execution should be successful
When executing query:
"""
FETCH PROP on tag_with_fixed_string "1" yield tag_with_fixed_string.col1 as col1
"""
Then the result should be, in any order:
| col1 |
| "πŸ˜€" |
When executing query:
"""
FETCH PROP on tag_with_fixed_string "2" yield tag_with_fixed_string.col1 as col1
"""
Then the result should be, in any order:
| col1 |
| "πŸ˜‚" |
When executing query:
"""
FETCH PROP on tag_with_fixed_string "3" yield tag_with_fixed_string.col1 as col1
"""
Then the result should be, in any order:
| col1 |
| "羊" |
When executing query:
"""
FETCH PROP on tag_with_fixed_string "4" yield tag_with_fixed_string.col1 as col1
"""
Then the result should be, in any order:
| col1 |
| "🐏" |
And drop the used space

Scenario: Edge Fixed String Property
When executing query:
"""
CREATE EDGE edge_with_fixed_string(col1 fixed_string(5));
"""
And wait 5 seconds
When executing query:
"""
INSERT EDGE edge_with_fixed_string(col1)
VALUES
"1" -> "1": ("πŸ˜€πŸ˜€"),
"2" -> "2": ("πŸ˜‚πŸ˜‚"),
"3" -> "3": ("羊羊羊"),
"4" -> "4": ("🐏🐏🐏");
"""
Then the execution should be successful
When executing query:
"""
FETCH PROP on edge_with_fixed_string "1" -> "1" yield edge_with_fixed_string.col1 as col1
"""
Then the result should be, in any order:
| col1 |
| "πŸ˜€" |
When executing query:
"""
FETCH PROP on edge_with_fixed_string "2" -> "2" yield edge_with_fixed_string.col1 as col1
"""
Then the result should be, in any order:
| col1 |
| "πŸ˜‚" |
When executing query:
"""
FETCH PROP on edge_with_fixed_string "3" -> "3" yield edge_with_fixed_string.col1 as col1
"""
Then the result should be, in any order:
| col1 |
| "羊" |
When executing query:
"""
FETCH PROP on edge_with_fixed_string "4" -> "4" yield edge_with_fixed_string.col1 as col1
"""
Then the result should be, in any order:
| col1 |
| "🐏" |
And drop the used space
Loading

0 comments on commit 8ac1a26

Please sign in to comment.