Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: literal value filter #767

Merged
merged 393 commits into from
Jun 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
393 commits
Select commit Hold shift + click to select a range
ff7dcbb
removed nanosecond timestamps
daniel-sanche Mar 30, 2023
39da24d
ran black
daniel-sanche Mar 30, 2023
3a6fff1
removed from_dict
daniel-sanche Mar 30, 2023
d465737
Merge branch 'v3_row_response' into read_rows_state_machine
daniel-sanche Mar 30, 2023
00a3d3e
got acceptance tests passing
daniel-sanche Mar 31, 2023
393749f
removed type conversion
daniel-sanche Mar 31, 2023
2a42216
renamed acceptance test file
daniel-sanche Mar 31, 2023
536e587
ran blacken
daniel-sanche Mar 31, 2023
4e262d1
unwrap proto-plus object
daniel-sanche Mar 31, 2023
fac018e
added test skeleton
daniel-sanche Mar 31, 2023
9800a78
working on tests
daniel-sanche Mar 31, 2023
8a22d15
implement pool as custom grpc channel
daniel-sanche Mar 31, 2023
38e5662
did some restructuring
daniel-sanche Apr 1, 2023
5155800
got some tests working
daniel-sanche Apr 1, 2023
522f7fa
improved tests
daniel-sanche Apr 2, 2023
9429244
renamed RowResponse and CellResponse to Row and Cell
daniel-sanche Apr 2, 2023
1aa7424
fixed tests
daniel-sanche Apr 2, 2023
a603649
simplified row construction
daniel-sanche Apr 2, 2023
68a5a0f
added RowRange object
daniel-sanche Apr 3, 2023
cc2e7c8
added comments
daniel-sanche Apr 3, 2023
ba629c8
added api-core submodule
daniel-sanche Apr 3, 2023
75d2c10
copied in rough retryable logic
daniel-sanche Apr 3, 2023
d5eca2a
Merge branch 'v3_row_response' into read_rows_state_machine
daniel-sanche Apr 3, 2023
2a26797
updated Row and Cell class names
daniel-sanche Apr 3, 2023
bcd394f
fixed tests
daniel-sanche Apr 3, 2023
037af0d
added last scanned row class
daniel-sanche Apr 3, 2023
e17d9bc
ran blacken
daniel-sanche Apr 3, 2023
db80d22
Merge branch 'read_rows_state_machine' into read_rows_retries
daniel-sanche Apr 3, 2023
b3d977d
handle last scanned rows
daniel-sanche Apr 3, 2023
1f85462
Merge branch 'add_new_transport' into read_rows_retries
daniel-sanche Apr 3, 2023
1fba6ea
updated add_keys
daniel-sanche Apr 3, 2023
c4f82b0
removed chaining
daniel-sanche Apr 3, 2023
caca14c
improved to_dicts
daniel-sanche Apr 3, 2023
5f9ce85
improving row_ranges
daniel-sanche Apr 3, 2023
8e5f60a
fixed properties
daniel-sanche Apr 3, 2023
57184c1
added type checking to range
daniel-sanche Apr 3, 2023
3eda7f4
got tests passing
daniel-sanche Apr 3, 2023
65f5a2a
blacken, mypy
daniel-sanche Apr 3, 2023
3e724db
ran blacken
daniel-sanche Apr 3, 2023
45eadce
improved API usage
daniel-sanche Apr 3, 2023
c06213f
use invalid chunk
daniel-sanche Apr 3, 2023
6e75a2f
added per request timeouts
daniel-sanche Apr 3, 2023
a205e93
account for RequestStats
daniel-sanche Apr 3, 2023
ce3eb75
added output generator wrapper
daniel-sanche Apr 3, 2023
74029c9
updated template
daniel-sanche Apr 3, 2023
7f2be30
got tests passing
daniel-sanche Apr 3, 2023
2b044ce
removed metadata
daniel-sanche Apr 4, 2023
1743098
added sleep between swwapping and closing channels
daniel-sanche Apr 4, 2023
e5fa4b6
ran blacken
daniel-sanche Apr 4, 2023
8955ec5
got tests working
daniel-sanche Apr 4, 2023
002bc5f
fixed lint issue
daniel-sanche Apr 4, 2023
65f0d2f
fixed tests
daniel-sanche Apr 4, 2023
664a6d2
Merge branch 'add_new_transport' into read_rows_retries
daniel-sanche Apr 4, 2023
d3db731
Merge branch 'add_new_transport' into read_rows_state_machine
daniel-sanche Apr 4, 2023
5f41c06
changed return type
daniel-sanche Apr 4, 2023
aa26911
Merge branch 'v3_read_rows_query' into read_rows_state_machine
daniel-sanche Apr 4, 2023
7b68207
fixed typing issues
daniel-sanche Apr 4, 2023
a776cb5
Merge branch 'read_rows_state_machine' into read_rows_retries
daniel-sanche Apr 4, 2023
c164a47
adjusted types
daniel-sanche Apr 4, 2023
96d58d1
added per-row-rimeout to merge_row_stream_with_cache
daniel-sanche Apr 4, 2023
216610e
cancel stream on exception
daniel-sanche Apr 4, 2023
c505c39
moved retry logic into RetryableRowMerger
daniel-sanche Apr 4, 2023
179c8b8
fixed issues in merger
daniel-sanche Apr 4, 2023
3cc5380
moved streaming into cache into RetryableRowMerger
daniel-sanche Apr 4, 2023
4af0218
restructuring
daniel-sanche Apr 4, 2023
d6a323f
added idle timeout
daniel-sanche Apr 4, 2023
7b6d1db
keep track of last_raised
daniel-sanche Apr 4, 2023
733a393
fixed mypy issues
daniel-sanche Apr 4, 2023
12807e0
made idle timeout internal value
daniel-sanche Apr 4, 2023
0e3d32c
combined row merger functions
daniel-sanche Apr 4, 2023
5b055b4
made adjustments to RowMerger
daniel-sanche Apr 4, 2023
dbf19c9
holds a gapic client instead of inherits from it
daniel-sanche Apr 5, 2023
ab7931c
Merge branch 'add_new_transport' into read_rows_retries
daniel-sanche Apr 5, 2023
88f14f6
don't emit _LastScannedRows
daniel-sanche Apr 5, 2023
9f15a6a
fixed type issues
daniel-sanche Apr 5, 2023
b3c32b0
got tests passing
daniel-sanche Apr 5, 2023
770d9f5
added comments
daniel-sanche Apr 5, 2023
9f3e0c5
added comment
daniel-sanche Apr 5, 2023
a0620ea
added random noise to refresh intervals
daniel-sanche Apr 5, 2023
4f5ed46
improving comments; clean up
daniel-sanche Apr 5, 2023
c169ba8
fixed param order
daniel-sanche Apr 5, 2023
9ec3697
working on getting end-to-end read_rows working
daniel-sanche Apr 5, 2023
b6873e8
fixed issue in pulling from cache
daniel-sanche Apr 5, 2023
2facc79
added timeout to results generator
daniel-sanche Apr 5, 2023
ee826bb
added acceptance tests for read_rows
daniel-sanche Apr 5, 2023
25af0c0
adding tests
daniel-sanche Apr 5, 2023
2f7778d
got operation deadline error working properly
daniel-sanche Apr 5, 2023
d6b8e6b
made RowMerger back into an iterable
daniel-sanche Apr 5, 2023
3f085a9
added test for per-row timeout
daniel-sanche Apr 6, 2023
6abb9d4
don't attach retry errors if there are none
daniel-sanche Apr 6, 2023
128320c
added tests for per_request_timeout
daniel-sanche Apr 6, 2023
a048536
added idle timeout test
daniel-sanche Apr 6, 2023
371dd64
remove row merger after error
daniel-sanche Apr 6, 2023
ebbaa1e
reorganized retryable_merge_rows
daniel-sanche Apr 6, 2023
2a3e379
improved resource clean up on retries and expiration
daniel-sanche Apr 6, 2023
2e50c51
added tests for request stats
daniel-sanche Apr 6, 2023
0b63b2b
added tests for exceptions
daniel-sanche Apr 6, 2023
de102bb
clean up on_error
daniel-sanche Apr 6, 2023
bbdb8e6
await sleep
daniel-sanche Apr 6, 2023
83472dc
got tests working
daniel-sanche Apr 6, 2023
bef40bd
updated api-core
daniel-sanche Apr 6, 2023
29a98ed
Merge branch 'v3' into read_rows_retries
daniel-sanche Apr 6, 2023
534005a
ran blacken
daniel-sanche Apr 6, 2023
6f1c781
made invalid chunk a server error
daniel-sanche Apr 6, 2023
38f66e5
moved invalid chunk with other exceptions
daniel-sanche Apr 6, 2023
bf24c25
made row merger and classes private
daniel-sanche Apr 6, 2023
4dbacb5
added read_rows
daniel-sanche Apr 6, 2023
6e6978e
ran blacken
daniel-sanche Apr 6, 2023
21f7846
added comments
daniel-sanche Apr 6, 2023
52e9dbf
added test for revise rowset
daniel-sanche Apr 6, 2023
715be51
fixed lint issues
daniel-sanche Apr 6, 2023
2f50cb7
moved ReadRowsIterator into new file
daniel-sanche Apr 6, 2023
1486d5a
Merge branch 'v3' into add_new_transport
daniel-sanche Apr 6, 2023
28d5a7a
fixed lint issues
daniel-sanche Apr 6, 2023
3b11580
Merge branch 'add_new_transport' into read_rows_retries
daniel-sanche Apr 6, 2023
d47c941
changed comment
daniel-sanche Apr 6, 2023
d1bd128
added comments to iterator
daniel-sanche Apr 6, 2023
039d623
added var for idle timeout
daniel-sanche Apr 6, 2023
3d34dcd
sped up acceptance tests
daniel-sanche Apr 6, 2023
70fbff9
reduced size of template by making subclass
daniel-sanche Apr 7, 2023
383d8eb
reverted unintentional gapic generation changes
daniel-sanche Apr 7, 2023
018fe03
updated submodule
daniel-sanche Apr 7, 2023
3764a98
added default timeouts to table surface
daniel-sanche Apr 7, 2023
745ae38
end after row_limit rows
daniel-sanche Apr 13, 2023
3d11d55
changed retryable exceptions
daniel-sanche Apr 13, 2023
f0403e7
changed warning stack level
daniel-sanche Apr 13, 2023
84a775a
changed retryable errors
daniel-sanche Apr 13, 2023
15a9d23
improved comments
daniel-sanche Apr 13, 2023
8636654
improved idle timeouts
daniel-sanche Apr 13, 2023
1aca392
changed retry parameters
daniel-sanche Apr 13, 2023
45fef1e
added limit revision to each retry
daniel-sanche Apr 13, 2023
951a77b
removed unneeded check
daniel-sanche Apr 13, 2023
e3a0b66
fixed idle timeout test
daniel-sanche Apr 13, 2023
6089934
removed tracking of emitted rows
daniel-sanche Apr 13, 2023
fb4b0ca
removed revise_on_retry flag
daniel-sanche Apr 14, 2023
83b908c
changed initial sleep
daniel-sanche Apr 14, 2023
5688561
added extra timeout check
daniel-sanche Apr 14, 2023
ff3724d
removed outdated test
daniel-sanche Apr 17, 2023
78a309c
fixed type annotations
daniel-sanche Apr 17, 2023
c50ae18
added slots
daniel-sanche Apr 17, 2023
d73121b
renamed cache to buffer
daniel-sanche Apr 17, 2023
14d8527
renamed errors
daniel-sanche Apr 17, 2023
4b89c86
replaced type check with None check
daniel-sanche Apr 17, 2023
9f89577
added comment for last_scanned_row heartbeat
daniel-sanche Apr 17, 2023
4b229b9
added early return
daniel-sanche Apr 17, 2023
152bccf
moved validation
daniel-sanche Apr 17, 2023
67c2911
added close call to ReadRowsIterator
daniel-sanche Apr 18, 2023
ff11ad3
removed del
daniel-sanche Apr 18, 2023
78bd5d3
pull out buffer control logic
daniel-sanche Apr 18, 2023
ca4a16d
got buffering working
daniel-sanche Apr 18, 2023
0dba121
check for full table scan revision
daniel-sanche Apr 18, 2023
3537566
renamed and added underscores
daniel-sanche Apr 18, 2023
981f169
added extra check
daniel-sanche Apr 18, 2023
d3d4c76
removed unneeded validation
daniel-sanche Apr 18, 2023
1901094
renamed RowMerger to ReadRowsOperation
daniel-sanche Apr 18, 2023
947fe9b
changed _read_rows test file name
daniel-sanche Apr 18, 2023
773d4e5
added row builder tests
daniel-sanche Apr 18, 2023
cbb0513
added revise_row tests
daniel-sanche Apr 19, 2023
2bec693
ran blacken
daniel-sanche Apr 19, 2023
5cd8e00
added constructor tests
daniel-sanche Apr 19, 2023
d6f3ae1
upgraded submodule
daniel-sanche Apr 19, 2023
f2d7e71
added tests
daniel-sanche Apr 19, 2023
cb23d32
update docstring
daniel-sanche Apr 19, 2023
bc31ab8
update docstring
daniel-sanche Apr 19, 2023
f54dfde
fix typo
daniel-sanche Apr 19, 2023
46cfc49
docstring improvements
daniel-sanche Apr 19, 2023
573bbd1
made creating table outside loop into error
daniel-sanche Apr 19, 2023
4f2657d
make tables own active instances, and remove instances when tables close
daniel-sanche Apr 19, 2023
59955be
added pool_size and channels as public properties
daniel-sanche Apr 19, 2023
377a8c9
fixed typo
daniel-sanche Apr 19, 2023
8a29898
simplified pooled multicallable
daniel-sanche Apr 20, 2023
50aa5ba
ran blacken
daniel-sanche Apr 20, 2023
42a52a3
associate ids with instances, instead of Table objects
daniel-sanche Apr 20, 2023
abc7a2d
fixed tests
daniel-sanche Apr 20, 2023
836af0f
made sure that empty strings are valid family and qualifier inputs
daniel-sanche Apr 20, 2023
e73551d
added tests for state machine
daniel-sanche Apr 20, 2023
792aba1
added state machine tests
daniel-sanche Apr 20, 2023
e57c510
fixed broken mock
daniel-sanche Apr 20, 2023
88748a9
added additional tests
daniel-sanche Apr 20, 2023
0c38981
ran blacken
daniel-sanche Apr 20, 2023
50dc608
reverted pooled multicallable changes
daniel-sanche Apr 20, 2023
b116755
pass scopes to created channels
daniel-sanche Apr 21, 2023
ec5eb07
added basic ping system test
daniel-sanche Apr 21, 2023
55cdcc2
keep both the names and ids in table object
daniel-sanche Apr 21, 2023
0253692
Merge branch 'add_new_transport' into read_rows_retries
daniel-sanche Apr 21, 2023
3855333
added api-core to noxfile tests
daniel-sanche Apr 21, 2023
213519e
added basic read rows stream to system tests
daniel-sanche Apr 21, 2023
9e3b411
pull project details out of env vars
daniel-sanche Apr 21, 2023
d8cf158
added automatic row creation for system tests
daniel-sanche Apr 21, 2023
c9b8217
added read_rows non stream
daniel-sanche Apr 21, 2023
500eff0
added range query system test
daniel-sanche Apr 21, 2023
27130f0
added logic for temporary test tables and instances
daniel-sanche Apr 21, 2023
f4f4fac
made iterator active into a property
daniel-sanche Apr 21, 2023
06dee54
added more read_rows system tests
daniel-sanche Apr 21, 2023
9e11f88
fixed lint issues
daniel-sanche Apr 21, 2023
794c55a
added iterator tests
daniel-sanche Apr 21, 2023
ccd9545
added tests for timeouts
daniel-sanche Apr 21, 2023
ca84b96
ran black
daniel-sanche Apr 21, 2023
eb936cf
fixed lint issues
daniel-sanche Apr 21, 2023
ab43138
restructured test_client
daniel-sanche Apr 21, 2023
cb1884d
changed how random is mocked
daniel-sanche Apr 21, 2023
9a89d74
ran black
daniel-sanche Apr 21, 2023
7f783fc
restructred test_client
daniel-sanche Apr 21, 2023
6a6d219
Merge branch 'add_new_transport' into read_rows_retries
daniel-sanche Apr 21, 2023
72eca75
restructured test_client_read_rows
daniel-sanche Apr 21, 2023
ad42436
moved read rows tests in test_client
daniel-sanche Apr 21, 2023
fb30977
feat: replace exact value filter with literal value filter
daniel-sanche Apr 21, 2023
f6fa468
fixed lint issues
daniel-sanche Apr 21, 2023
7606e3a
update submodules in nox
daniel-sanche Apr 22, 2023
829e68f
ran black
daniel-sanche Apr 22, 2023
e8eff39
Merge branch 'v3' into read_rows_retries
daniel-sanche Apr 24, 2023
6a58e86
removed submodule update
daniel-sanche Apr 24, 2023
9be5b07
removed unneeded import
daniel-sanche Apr 24, 2023
83ffe31
added submodule update to trampoline.sh
daniel-sanche Apr 24, 2023
2de0c5e
reverted submodule update
daniel-sanche Apr 24, 2023
3d597dd
added api-core fork to external dependencies
daniel-sanche Apr 28, 2023
a7d6d25
changed pinned api-core version
daniel-sanche Apr 28, 2023
55ca37f
brought in shared deadline logic from mutate_rows
daniel-sanche Apr 28, 2023
fd27aa5
merged in read_rows performance optimizations
daniel-sanche May 12, 2023
12a8879
fixed lint issues
daniel-sanche May 12, 2023
0ff3324
use _convert_retry_deadline for iterator
daniel-sanche May 19, 2023
f9a614e
renamed variable
daniel-sanche May 19, 2023
9fee2e2
renamed last_emitted_row; made duplicates an error
daniel-sanche May 19, 2023
444c022
end operation when row_set is complete
daniel-sanche May 19, 2023
3360a04
changed client defaults
daniel-sanche May 19, 2023
11961ae
request_timeouts are limited to remaining operation_timeout
daniel-sanche May 22, 2023
3921093
fixed comparison typo
daniel-sanche May 22, 2023
6c76e27
fixed docstring
daniel-sanche May 23, 2023
a1d62e9
added comment on exception
daniel-sanche May 23, 2023
6c53fe5
improve revise_row_set function
daniel-sanche May 23, 2023
13058b7
timeout is capped at 0
daniel-sanche May 23, 2023
df296cb
removed buffer
daniel-sanche May 23, 2023
2ed3b83
fixed tests
daniel-sanche May 23, 2023
248e6be
removed request_stats
daniel-sanche May 23, 2023
3d69147
added todo
daniel-sanche May 23, 2023
d4221fc
updated api-core version for tests
daniel-sanche May 23, 2023
36aea0c
added space in constraints file
daniel-sanche May 23, 2023
1bef2a2
prioritzed external dependencies
daniel-sanche May 23, 2023
1e95bf2
added retries to system tests
daniel-sanche May 23, 2023
61c01c6
fixed errors with 3.7 tests
daniel-sanche May 23, 2023
4535c17
fixed lint issue
daniel-sanche May 23, 2023
b548ec9
update test names
daniel-sanche May 23, 2023
85114b1
Merge branch 'read_rows_retries' into literal_value_filter
daniel-sanche May 24, 2023
1d68c35
allow utf-8
daniel-sanche May 24, 2023
aeef47f
added system test for literal value filter
daniel-sanche May 24, 2023
f476c73
Merge branch 'v3' into literal_value_filter
daniel-sanche May 24, 2023
970298a
added some negative test cases
daniel-sanche May 25, 2023
804cc14
fixed special handling for null bytes
daniel-sanche May 25, 2023
6f52c6a
improved tests
daniel-sanche May 25, 2023
1daeffc
Merge branch 'v3' into literal_value_filter
daniel-sanche Jun 6, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 40 additions & 4 deletions google/cloud/bigtable/row_filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -481,20 +481,56 @@ def to_dict(self) -> dict[str, bytes]:
return {"value_regex_filter": self.regex}


class ExactValueFilter(ValueRegexFilter):
class LiteralValueFilter(ValueRegexFilter):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need to go through deprecation process, to let users know that ExactValueFilter will be removed in v3, and that they should be using LiteralValueFilter instead?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It will not be removed right away, since there is a copy of ExactValueFilter and all other classes from the existing library in google.cloud.bigtable.deprecated in the v3 branch

The v3 deprecation process will be a bit tricky, but we will make sure that all existing classes are accessible for at least a decent transition period

"""Row filter for an exact value.


:type value: bytes or str or int
:param value:
a literal string encodable as ASCII, or the
equivalent bytes, or an integer (which will be packed into 8-bytes).
a literal string, integer, or the equivalent bytes.
Integer values will be packed into signed 8-bytes.
"""

def __init__(self, value: bytes | str | int):
if isinstance(value, int):
value = _PACK_I64(value)
super(ExactValueFilter, self).__init__(value)
elif isinstance(value, str):
value = value.encode("utf-8")
value = self._write_literal_regex(value)
super(LiteralValueFilter, self).__init__(value)

@staticmethod
def _write_literal_regex(input_bytes: bytes) -> bytes:
"""
Escape re2 special characters from literal bytes.

Extracted from: re2 QuoteMeta:
https://github.com/google/re2/blob/70f66454c255080a54a8da806c52d1f618707f8a/re2/re2.cc#L456
"""
result = bytearray()
for byte in input_bytes:
# If this is the part of a UTF8 or Latin1 character, we need \
# to copy this byte without escaping. Experimentally this is \
# what works correctly with the regexp library. \
utf8_latin1_check = (byte & 128) == 0
if (
(byte < ord("a") or byte > ord("z"))
and (byte < ord("A") or byte > ord("Z"))
and (byte < ord("0") or byte > ord("9"))
and byte != ord("_")
and utf8_latin1_check
):
if byte == 0:
# Special handling for null chars.
# Note that this special handling is not strictly required for RE2,
# but this quoting is required for other regexp libraries such as
# PCRE.
# Can't use "\\0" since the next character might be a digit.
result.extend([ord("\\"), ord("x"), ord("0"), ord("0")])
continue
result.append(ord(b"\\"))
result.append(byte)
return bytes(result)

def __repr__(self) -> str:
return f"{self.__class__.__name__}(value={self.regex!r})"
Expand Down
50 changes: 50 additions & 0 deletions tests/system/test_system.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,10 @@ def __init__(self, table):
async def add_row(
self, row_key, family=TEST_FAMILY, qualifier=b"q", value=b"test-value"
):
if isinstance(value, str):
value = value.encode("utf-8")
elif isinstance(value, int):
value = value.to_bytes(8, byteorder="big", signed=True)
request = {
"table_name": self.table.table_name,
"row_key": row_key,
Expand Down Expand Up @@ -391,3 +395,49 @@ async def test_read_rows_stream_inactive_timer(table, temp_rows):
await generator.__anext__()
assert "inactivity" in str(e)
assert "idle_timeout=0.1" in str(e)


@retry.Retry(predicate=retry.if_exception_type(ClientError), initial=1, maximum=5)
@pytest.mark.parametrize(
"cell_value,filter_input,expect_match",
[
(b"abc", b"abc", True),
(b"abc", "abc", True),
(b".", ".", True),
(".*", ".*", True),
(".*", b".*", True),
("a", ".*", False),
(b".*", b".*", True),
(r"\a", r"\a", True),
(b"\xe2\x98\x83", "☃", True),
("☃", "☃", True),
(r"\C☃", r"\C☃", True),
(1, 1, True),
(2, 1, False),
(68, 68, True),
("D", 68, False),
(68, "D", False),
(-1, -1, True),
(2852126720, 2852126720, True),
(-1431655766, -1431655766, True),
(-1431655766, -1, False),
],
)
@pytest.mark.asyncio
async def test_literal_value_filter(
table, temp_rows, cell_value, filter_input, expect_match
):
"""
Literal value filter does complex escaping on re2 strings.
Make sure inputs are properly interpreted by the server
"""
from google.cloud.bigtable.row_filters import LiteralValueFilter
from google.cloud.bigtable import ReadRowsQuery

f = LiteralValueFilter(filter_input)
await temp_rows.add_row(b"row_key_1", value=cell_value)
query = ReadRowsQuery(row_filter=f)
row_list = await table.read_rows(query)
assert len(row_list) == bool(
expect_match
), f"row {type(cell_value)}({cell_value}) not found with {type(filter_input)}({filter_input}) filter"
116 changes: 73 additions & 43 deletions tests/unit/test_row_filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -822,84 +822,90 @@ def test_value_regex_filter___repr__():
assert eval(repr(row_filter)) == row_filter


def test_exact_value_filter_to_pb_w_bytes():
from google.cloud.bigtable.row_filters import ExactValueFilter
def test_literal_value_filter_to_pb_w_bytes():
from google.cloud.bigtable.row_filters import LiteralValueFilter

value = regex = b"value-regex"
row_filter = ExactValueFilter(value)
value = regex = b"value_regex"
row_filter = LiteralValueFilter(value)
pb_val = row_filter._to_pb()
expected_pb = _RowFilterPB(value_regex_filter=regex)
assert pb_val == expected_pb


def test_exact_value_filter_to_dict_w_bytes():
from google.cloud.bigtable.row_filters import ExactValueFilter
def test_literal_value_filter_to_dict_w_bytes():
from google.cloud.bigtable.row_filters import LiteralValueFilter
from google.cloud.bigtable_v2.types import data as data_v2_pb2

value = regex = b"value-regex"
row_filter = ExactValueFilter(value)
value = regex = b"value_regex"
row_filter = LiteralValueFilter(value)
expected_dict = {"value_regex_filter": regex}
assert row_filter.to_dict() == expected_dict
expected_pb_value = row_filter._to_pb()
assert data_v2_pb2.RowFilter(**expected_dict) == expected_pb_value


def test_exact_value_filter_to_pb_w_str():
from google.cloud.bigtable.row_filters import ExactValueFilter
def test_literal_value_filter_to_pb_w_str():
from google.cloud.bigtable.row_filters import LiteralValueFilter

value = "value-regex"
value = "value_regex"
regex = value.encode("ascii")
row_filter = ExactValueFilter(value)
row_filter = LiteralValueFilter(value)
pb_val = row_filter._to_pb()
expected_pb = _RowFilterPB(value_regex_filter=regex)
assert pb_val == expected_pb


def test_exact_value_filter_to_dict_w_str():
from google.cloud.bigtable.row_filters import ExactValueFilter
def test_literal_value_filter_to_dict_w_str():
from google.cloud.bigtable.row_filters import LiteralValueFilter
from google.cloud.bigtable_v2.types import data as data_v2_pb2

value = "value-regex"
value = "value_regex"
regex = value.encode("ascii")
row_filter = ExactValueFilter(value)
row_filter = LiteralValueFilter(value)
expected_dict = {"value_regex_filter": regex}
assert row_filter.to_dict() == expected_dict
expected_pb_value = row_filter._to_pb()
assert data_v2_pb2.RowFilter(**expected_dict) == expected_pb_value


def test_exact_value_filter_to_pb_w_int():
import struct
from google.cloud.bigtable.row_filters import ExactValueFilter
@pytest.mark.parametrize(
"value,expected_byte_string",
[
# null bytes are encoded as "\x00" in ascii characters
# others are just prefixed with "\"
(0, b"\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00"),
(1, b"\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\\x01"),
(
68,
b"\\x00\\x00\\x00\\x00\\x00\\x00\\x00D",
), # bytes that encode to alphanum are not escaped
(570, b"\\x00\\x00\\x00\\x00\\x00\\x00\\\x02\\\x3a"),
(2852126720, b"\\x00\\x00\\x00\\x00\xaa\\x00\\x00\\x00"),
(-1, b"\xff\xff\xff\xff\xff\xff\xff\xff"),
(-1096642724096, b"\xff\xff\xff\\x00\xaa\xff\xff\\x00"),
],
)
def test_literal_value_filter_w_int(value, expected_byte_string):
from google.cloud.bigtable.row_filters import LiteralValueFilter
from google.cloud.bigtable_v2.types import data as data_v2_pb2

value = 1
regex = struct.Struct(">q").pack(value)
row_filter = ExactValueFilter(value)
row_filter = LiteralValueFilter(value)
# test pb
pb_val = row_filter._to_pb()
expected_pb = _RowFilterPB(value_regex_filter=regex)
expected_pb = _RowFilterPB(value_regex_filter=expected_byte_string)
assert pb_val == expected_pb


def test_exact_value_filter_to_dict_w_int():
import struct
from google.cloud.bigtable.row_filters import ExactValueFilter
from google.cloud.bigtable_v2.types import data as data_v2_pb2

value = 1
regex = struct.Struct(">q").pack(value)
row_filter = ExactValueFilter(value)
expected_dict = {"value_regex_filter": regex}
# test dict
expected_dict = {"value_regex_filter": expected_byte_string}
assert row_filter.to_dict() == expected_dict
expected_pb_value = row_filter._to_pb()
assert data_v2_pb2.RowFilter(**expected_dict) == expected_pb_value
assert data_v2_pb2.RowFilter(**expected_dict) == pb_val


def test_exact_value_filter___repr__():
from google.cloud.bigtable.row_filters import ExactValueFilter
def test_literal_value_filter___repr__():
from google.cloud.bigtable.row_filters import LiteralValueFilter

value = "value-regex"
row_filter = ExactValueFilter(value)
expected = "ExactValueFilter(value=b'value-regex')"
value = "value_regex"
row_filter = LiteralValueFilter(value)
expected = "LiteralValueFilter(value=b'value_regex')"
assert repr(row_filter) == expected
assert repr(row_filter) == str(row_filter)
assert eval(repr(row_filter)) == row_filter
Expand Down Expand Up @@ -1907,6 +1913,30 @@ def test_conditional_row_filter___str__():
assert str(row_filter4) == expected


@pytest.mark.parametrize(
"input_arg, expected_bytes",
[
(b"abc", b"abc"),
("abc", b"abc"),
(1, b"\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\\x01"), # null bytes are ascii
(b"*", b"\\*"),
(".", b"\\."),
(b"\\", b"\\\\"),
(b"h.*i", b"h\\.\\*i"),
(b'""', b'\\"\\"'),
(b"[xyz]", b"\\[xyz\\]"),
(b"\xe2\x98\xba\xef\xb8\x8f", b"\xe2\x98\xba\xef\xb8\x8f"),
("☃", b"\xe2\x98\x83"),
(r"\C☃", b"\\\\C\xe2\x98\x83"),
],
)
def test_literal_value__write_literal_regex(input_arg, expected_bytes):
from google.cloud.bigtable.row_filters import LiteralValueFilter

filter_ = LiteralValueFilter(input_arg)
assert filter_.regex == expected_bytes


def _ColumnRangePB(*args, **kw):
from google.cloud.bigtable_v2.types import data as data_v2_pb2

Expand Down Expand Up @@ -1955,15 +1985,15 @@ def _get_regex_filters():
FamilyNameRegexFilter,
ColumnQualifierRegexFilter,
ValueRegexFilter,
ExactValueFilter,
LiteralValueFilter,
)

return [
RowKeyRegexFilter,
FamilyNameRegexFilter,
ColumnQualifierRegexFilter,
ValueRegexFilter,
ExactValueFilter,
LiteralValueFilter,
]


Expand Down