-
Notifications
You must be signed in to change notification settings - Fork 1.2k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Initial support for
StringView
, merge changes from string-view
de…
…velopment branch (#11402) * Update `string-view` branch to arrow-rs main (#10966) * Pin to arrow main * Fix clippy with latest arrow * Uncomment test that needs new arrow-rs to work * Update datafusion-cli Cargo.lock * Update Cargo.lock * tapelo * feat: Implement equality = and inequality <> support for StringView (#10985) * feat: Implement equality = and inequality <> support for StringView * chore: Add tests for the StringView * chore * chore: Update tests for NULL * fix: Used build_array_string! * chore: Update string_coercion function to handle Utf8View type in binary.rs * chore: add tests * chore: ci * Add more StringView comparison test coverage (#10997) * Add more StringView comparison test coverage * add reference * Add another test showing casting on columns works correctly * feat: Implement equality = and inequality <> support for BinaryView (#11004) * feat: Implement equality = and inequality <> support for BinaryView Signed-off-by: Chojan Shang <[email protected]> * chore: make fmt happy Signed-off-by: Chojan Shang <[email protected]> --------- Signed-off-by: Chojan Shang <[email protected]> * Implement support for LargeString and LargeBinary for StringView and BinaryView (#11034) * implement large binary * add tests for large string * better comments for string coercion * Improve filter predicates with `Utf8View` literals (#11043) * refactor: Improve type coercion logic in TypeCoercionRewriter * refactor: Improve type coercion logic in TypeCoercionRewriter * chore * chore: Update test * refactor: Improve type coercion logic in TypeCoercionRewriter * refactor: Remove unused import and update code formatting in unwrap_cast_in_comparison.rs * Remove arrow-patch --------- Signed-off-by: Chojan Shang <[email protected]> Co-authored-by: Alex Huang <[email protected]> Co-authored-by: Chojan Shang <[email protected]> Co-authored-by: Xiangpeng Hao <[email protected]>
- Loading branch information
1 parent
f11bdf0
commit ccb4baf
Showing
5 changed files
with
566 additions
and
32 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,202 @@ | ||
# Licensed to the Apache Software Foundation (ASF) under one | ||
# or more contributor license agreements. See the NOTICE file | ||
# distributed with this work for additional information | ||
# regarding copyright ownership. The ASF licenses this file | ||
# to you under the Apache License, Version 2.0 (the | ||
# "License"); you may not use this file except in compliance | ||
# with the License. You may obtain a copy of the License at | ||
|
||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
|
||
# Unless required by applicable law or agreed to in writing, | ||
# software distributed under the License is distributed on an | ||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
# KIND, either express or implied. See the License for the | ||
# specific language governing permissions and limitations | ||
# under the License. | ||
|
||
######## | ||
## Test setup | ||
######## | ||
|
||
statement ok | ||
create table test_source as values | ||
('Andrew', 'X'), | ||
('Xiangpeng', 'Xiangpeng'), | ||
('Raphael', 'R'), | ||
(NULL, 'R') | ||
; | ||
|
||
# Table with the different combination of column types | ||
statement ok | ||
CREATE TABLE test AS | ||
SELECT | ||
arrow_cast(column1, 'Utf8') as column1_utf8, | ||
arrow_cast(column2, 'Utf8') as column2_utf8, | ||
arrow_cast(column1, 'Binary') AS column1_binary, | ||
arrow_cast(column2, 'Binary') AS column2_binary, | ||
arrow_cast(column1, 'LargeBinary') AS column1_large_binary, | ||
arrow_cast(column2, 'LargeBinary') AS column2_large_binary, | ||
arrow_cast(arrow_cast(column1, 'Binary'), 'BinaryView') AS column1_binaryview, | ||
arrow_cast(arrow_cast(column2, 'Binary'), 'BinaryView') AS column2_binaryview, | ||
arrow_cast(column1, 'Dictionary(Int32, Binary)') AS column1_dict, | ||
arrow_cast(column2, 'Dictionary(Int32, Binary)') AS column2_dict | ||
FROM test_source; | ||
|
||
statement ok | ||
drop table test_source | ||
|
||
######## | ||
## BinaryView to BinaryView | ||
######## | ||
|
||
# BinaryView scalar to BinaryView scalar | ||
|
||
query BBBB | ||
SELECT | ||
arrow_cast(arrow_cast('NULL', 'Binary'), 'BinaryView') = arrow_cast(arrow_cast('Andrew', 'Binary'), 'BinaryView') AS comparison1, | ||
arrow_cast(arrow_cast('NULL', 'Binary'), 'BinaryView') <> arrow_cast(arrow_cast('Andrew', 'Binary'), 'BinaryView') AS comparison2, | ||
arrow_cast(arrow_cast('Andrew', 'Binary'), 'BinaryView') = arrow_cast(arrow_cast('Andrew', 'Binary'), 'BinaryView') AS comparison3, | ||
arrow_cast(arrow_cast('Xiangpeng', 'Binary'), 'BinaryView') <> arrow_cast(arrow_cast('Andrew', 'Binary'), 'BinaryView') AS comparison4; | ||
---- | ||
false true true true | ||
|
||
|
||
# BinaryView column to BinaryView column comparison as filters | ||
|
||
query TT | ||
select column1_utf8, column2_utf8 from test where column1_binaryview = column2_binaryview; | ||
---- | ||
Xiangpeng Xiangpeng | ||
|
||
query TT | ||
select column1_utf8, column2_utf8 from test where column1_binaryview <> column2_binaryview; | ||
---- | ||
Andrew X | ||
Raphael R | ||
|
||
# BinaryView column to BinaryView column | ||
query TTBB | ||
select | ||
column1_utf8, column2_utf8, | ||
column1_binaryview = column2_binaryview, | ||
column1_binaryview <> column2_binaryview | ||
from test; | ||
---- | ||
Andrew X false true | ||
Xiangpeng Xiangpeng true false | ||
Raphael R false true | ||
NULL R NULL NULL | ||
|
||
# BinaryView column to BinaryView scalar comparison | ||
query TTBBBB | ||
select | ||
column1_utf8, column2_utf8, | ||
column1_binaryview = arrow_cast(arrow_cast('Andrew', 'Binary'), 'BinaryView'), | ||
arrow_cast(arrow_cast('Andrew', 'Binary'), 'BinaryView') = column1_binaryview, | ||
column1_binaryview <> arrow_cast(arrow_cast('Andrew', 'Binary'), 'BinaryView'), | ||
arrow_cast(arrow_cast('Andrew', 'Binary'), 'BinaryView') <> column1_binaryview | ||
from test; | ||
---- | ||
Andrew X true true false false | ||
Xiangpeng Xiangpeng false false true true | ||
Raphael R false false true true | ||
NULL R NULL NULL NULL NULL | ||
|
||
######## | ||
## BinaryView to Binary | ||
######## | ||
|
||
# test BinaryViewArray with Binary columns | ||
query TTBBBB | ||
select | ||
column1_utf8, column2_utf8, | ||
column1_binaryview = column2_binary, | ||
column2_binary = column1_binaryview, | ||
column1_binaryview <> column2_binary, | ||
column2_binary <> column1_binaryview | ||
from test; | ||
---- | ||
Andrew X false false true true | ||
Xiangpeng Xiangpeng true true false false | ||
Raphael R false false true true | ||
NULL R NULL NULL NULL NULL | ||
|
||
# test BinaryViewArray with LargeBinary columns | ||
query TTBBBB | ||
select | ||
column1_utf8, column2_utf8, | ||
column1_binaryview = column2_large_binary, | ||
column2_large_binary = column1_binaryview, | ||
column1_binaryview <> column2_large_binary, | ||
column2_large_binary <> column1_binaryview | ||
from test; | ||
---- | ||
Andrew X false false true true | ||
Xiangpeng Xiangpeng true true false false | ||
Raphael R false false true true | ||
NULL R NULL NULL NULL NULL | ||
|
||
# BinaryView column to Binary scalar | ||
query TTBBBB | ||
select | ||
column1_utf8, column2_utf8, | ||
column1_binaryview = arrow_cast('Andrew', 'Binary'), | ||
arrow_cast('Andrew', 'Binary') = column1_binaryview, | ||
column1_binaryview <> arrow_cast('Andrew', 'Binary'), | ||
arrow_cast('Andrew', 'Binary') <> column1_binaryview | ||
from test; | ||
---- | ||
Andrew X true true false false | ||
Xiangpeng Xiangpeng false false true true | ||
Raphael R false false true true | ||
NULL R NULL NULL NULL NULL | ||
|
||
# BinaryView column to LargeBinary scalar | ||
query TTBBBB | ||
select | ||
column1_utf8, column2_utf8, | ||
column1_binaryview = arrow_cast('Andrew', 'LargeBinary'), | ||
arrow_cast('Andrew', 'LargeBinary') = column1_binaryview, | ||
column1_binaryview <> arrow_cast('Andrew', 'LargeBinary'), | ||
arrow_cast('Andrew', 'LargeBinary') <> column1_binaryview | ||
from test; | ||
---- | ||
Andrew X true true false false | ||
Xiangpeng Xiangpeng false false true true | ||
Raphael R false false true true | ||
NULL R NULL NULL NULL NULL | ||
|
||
# Binary column to BinaryView scalar | ||
query TTBBBB | ||
select | ||
column1_utf8, column2_utf8, | ||
column1_binary = arrow_cast(arrow_cast('Andrew', 'Binary'), 'BinaryView'), | ||
arrow_cast(arrow_cast('Andrew', 'Binary'), 'BinaryView') = column1_binary, | ||
column1_binary <> arrow_cast(arrow_cast('Andrew', 'Binary'), 'BinaryView'), | ||
arrow_cast(arrow_cast('Andrew', 'Binary'), 'BinaryView') <> column1_binary | ||
from test; | ||
---- | ||
Andrew X true true false false | ||
Xiangpeng Xiangpeng false false true true | ||
Raphael R false false true true | ||
NULL R NULL NULL NULL NULL | ||
|
||
|
||
# LargeBinary column to BinaryView scalar | ||
query TTBBBB | ||
select | ||
column1_utf8, column2_utf8, | ||
column1_large_binary = arrow_cast(arrow_cast('Andrew', 'Binary'), 'BinaryView'), | ||
arrow_cast(arrow_cast('Andrew', 'Binary'), 'BinaryView') = column1_large_binary, | ||
column1_large_binary <> arrow_cast(arrow_cast('Andrew', 'Binary'), 'BinaryView'), | ||
arrow_cast(arrow_cast('Andrew', 'Binary'), 'BinaryView') <> column1_large_binary | ||
from test; | ||
---- | ||
Andrew X true true false false | ||
Xiangpeng Xiangpeng false false true true | ||
Raphael R false false true true | ||
NULL R NULL NULL NULL NULL | ||
|
||
statement ok | ||
drop table test; |
Oops, something went wrong.