Skip to content

Commit

Permalink
Support Utf8View for string function bit_length() (#13221)
Browse files Browse the repository at this point in the history
* Support `Utf8View` for string function `bit_length()`

Signed-off-by: Austin Liu <[email protected]>

* Add scalar test case

Signed-off-by: Austin Liu <[email protected]>

* Refine tests

Signed-off-by: Austin Liu <[email protected]>

* Fix wrong format

Signed-off-by: Austin Liu <[email protected]>

---------

Signed-off-by: Austin Liu <[email protected]>
  • Loading branch information
austin362667 authored Nov 10, 2024
1 parent ee56d6a commit 1557fce
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 0 deletions.
3 changes: 3 additions & 0 deletions datafusion/functions/src/string/bit_length.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,9 @@ impl ScalarUDFImpl for BitLengthFunc {
ScalarValue::LargeUtf8(v) => Ok(ColumnarValue::Scalar(
ScalarValue::Int64(v.as_ref().map(|x| (x.len() * 8) as i64)),
)),
ScalarValue::Utf8View(v) => Ok(ColumnarValue::Scalar(
ScalarValue::Int32(v.as_ref().map(|x| (x.len() * 8) as i32)),
)),
_ => unreachable!("bit length"),
},
}
Expand Down
41 changes: 41 additions & 0 deletions datafusion/sqllogictest/test_files/string/string_literal.slt
Original file line number Diff line number Diff line change
Expand Up @@ -1623,3 +1623,44 @@ a\_c \%abc false
\%abc a\_c false
\%abc %abc true
\%abc \%abc false

# test utf8, largeutf8, utf8view, DictionaryString for bit_length
query IIII
SELECT
bit_length('Andrew'),
bit_length('datafusion数据融合'),
bit_length('💖'),
bit_length('josé')
;
----
48 176 32 40

query IIII
SELECT
bit_length(arrow_cast('Andrew', 'LargeUtf8')),
bit_length(arrow_cast('datafusion数据融合', 'LargeUtf8')),
bit_length(arrow_cast('💖', 'LargeUtf8')),
bit_length(arrow_cast('josé', 'LargeUtf8'))
;
----
48 176 32 40

query IIII
SELECT
bit_length(arrow_cast('Andrew', 'Utf8View')),
bit_length(arrow_cast('datafusion数据融合', 'Utf8View')),
bit_length(arrow_cast('💖', 'Utf8View')),
bit_length(arrow_cast('josé', 'Utf8View'))
;
----
48 176 32 40

query IIII
SELECT
bit_length(arrow_cast('Andrew', 'Dictionary(Int32, Utf8)')),
bit_length(arrow_cast('datafusion数据融合', 'Dictionary(Int32, Utf8)')),
bit_length(arrow_cast('💖', 'Dictionary(Int32, Utf8)')),
bit_length(arrow_cast('josé', 'Dictionary(Int32, Utf8)'))
;
----
48 176 32 40
1 change: 1 addition & 0 deletions datafusion/sqllogictest/test_files/string/string_view.slt
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ select octet_length(column1_utf8view) from test;
0
NULL

# TODO: Revisit this issue after upgrading to the arrow-rs version that includes apache/arrow-rs#6671.
query error DataFusion error: Arrow error: Compute error: bit_length not supported for Utf8View
select bit_length(column1_utf8view) from test;

Expand Down

0 comments on commit 1557fce

Please sign in to comment.