diff --git a/py-polars/polars/expr/binary.py b/py-polars/polars/expr/binary.py index 00f346770925..b6b55f5a1eae 100644 --- a/py-polars/polars/expr/binary.py +++ b/py-polars/polars/expr/binary.py @@ -18,7 +18,7 @@ def __init__(self, expr: Expr): self._pyexpr = expr._pyexpr def contains(self, literal: bytes) -> Expr: - """ + r""" Check if binaries in Series contain a binary substring. Parameters @@ -30,11 +30,41 @@ def contains(self, literal: bytes) -> Expr: ------- Boolean mask + See Also + -------- + starts_with : Check if the binary substring exists at the start + ends_with : Check if the binary substring exists at the end + + Examples + -------- + >>> colors = pl.DataFrame( + ... { + ... "name": ["black", "yellow", "blue"], + ... "code": [b"\x00\x00\x00", b"\xff\xff\x00", b"\x00\x00\xff"], + ... } + ... ) + >>> colors.select( + ... "name", + ... pl.col("code").bin.encode("hex").alias("code_encoded_hex"), + ... pl.col("code").bin.contains(b"\xff").alias("contains_ff"), + ... pl.col("code").bin.starts_with(b"\xff").alias("starts_with_ff"), + ... pl.col("code").bin.ends_with(b"\xff").alias("ends_with_ff"), + ... ) + shape: (3, 5) + ┌────────┬──────────────────┬─────────────┬────────────────┬──────────────┐ + │ name ┆ code_encoded_hex ┆ contains_ff ┆ starts_with_ff ┆ ends_with_ff │ + │ --- ┆ --- ┆ --- ┆ --- ┆ --- │ + │ str ┆ str ┆ bool ┆ bool ┆ bool │ + ╞════════╪══════════════════╪═════════════╪════════════════╪══════════════╡ + │ black ┆ 000000 ┆ false ┆ false ┆ false │ + │ yellow ┆ ffff00 ┆ true ┆ true ┆ false │ + │ blue ┆ 0000ff ┆ true ┆ false ┆ true │ + └────────┴──────────────────┴─────────────┴────────────────┴──────────────┘ """ return wrap_expr(self._pyexpr.bin_contains(literal)) def ends_with(self, suffix: bytes) -> Expr: - """ + r""" Check if string values end with a binary substring. Parameters @@ -42,11 +72,45 @@ def ends_with(self, suffix: bytes) -> Expr: suffix Suffix substring. + Returns + ------- + Boolean mask + + See Also + -------- + starts_with : Check if the binary substring exists at the start + contains : Check if the binary substring exists anywhere + + Examples + -------- + >>> colors = pl.DataFrame( + ... { + ... "name": ["black", "yellow", "blue"], + ... "code": [b"\x00\x00\x00", b"\xff\xff\x00", b"\x00\x00\xff"], + ... } + ... ) + >>> colors.select( + ... "name", + ... pl.col("code").bin.encode("hex").alias("code_encoded_hex"), + ... pl.col("code").bin.contains(b"\xff").alias("contains_ff"), + ... pl.col("code").bin.starts_with(b"\xff").alias("starts_with_ff"), + ... pl.col("code").bin.ends_with(b"\xff").alias("ends_with_ff"), + ... ) + shape: (3, 5) + ┌────────┬──────────────────┬─────────────┬────────────────┬──────────────┐ + │ name ┆ code_encoded_hex ┆ contains_ff ┆ starts_with_ff ┆ ends_with_ff │ + │ --- ┆ --- ┆ --- ┆ --- ┆ --- │ + │ str ┆ str ┆ bool ┆ bool ┆ bool │ + ╞════════╪══════════════════╪═════════════╪════════════════╪══════════════╡ + │ black ┆ 000000 ┆ false ┆ false ┆ false │ + │ yellow ┆ ffff00 ┆ true ┆ true ┆ false │ + │ blue ┆ 0000ff ┆ true ┆ false ┆ true │ + └────────┴──────────────────┴─────────────┴────────────────┴──────────────┘ """ return wrap_expr(self._pyexpr.bin_ends_with(suffix)) def starts_with(self, prefix: bytes) -> Expr: - """ + r""" Check if values start with a binary substring. Parameters @@ -54,6 +118,40 @@ def starts_with(self, prefix: bytes) -> Expr: prefix Prefix substring. + Returns + ------- + Boolean mask + + See Also + -------- + ends_with : Check if the binary substring exists at the end + contains : Check if the binary substring exists anywhere + + Examples + -------- + >>> colors = pl.DataFrame( + ... { + ... "name": ["black", "yellow", "blue"], + ... "code": [b"\x00\x00\x00", b"\xff\xff\x00", b"\x00\x00\xff"], + ... } + ... ) + >>> colors.select( + ... "name", + ... pl.col("code").bin.encode("hex").alias("code_encoded_hex"), + ... pl.col("code").bin.contains(b"\xff").alias("contains_ff"), + ... pl.col("code").bin.starts_with(b"\xff").alias("starts_with_ff"), + ... pl.col("code").bin.ends_with(b"\xff").alias("ends_with_ff"), + ... ) + shape: (3, 5) + ┌────────┬──────────────────┬─────────────┬────────────────┬──────────────┐ + │ name ┆ code_encoded_hex ┆ contains_ff ┆ starts_with_ff ┆ ends_with_ff │ + │ --- ┆ --- ┆ --- ┆ --- ┆ --- │ + │ str ┆ str ┆ bool ┆ bool ┆ bool │ + ╞════════╪══════════════════╪═════════════╪════════════════╪══════════════╡ + │ black ┆ 000000 ┆ false ┆ false ┆ false │ + │ yellow ┆ ffff00 ┆ true ┆ true ┆ false │ + │ blue ┆ 0000ff ┆ true ┆ false ┆ true │ + └────────┴──────────────────┴─────────────┴────────────────┴──────────────┘ """ return wrap_expr(self._pyexpr.bin_starts_with(prefix)) @@ -80,7 +178,7 @@ def decode(self, encoding: TransferEncoding, *, strict: bool = True) -> Expr: ) def encode(self, encoding: TransferEncoding) -> Expr: - """ + r""" Encode a value using the provided encoding. Parameters @@ -92,6 +190,27 @@ def encode(self, encoding: TransferEncoding) -> Expr: ------- Binary array with values encoded using provided encoding + Examples + -------- + >>> colors = pl.DataFrame( + ... { + ... "name": ["black", "yellow", "blue"], + ... "code": [b"\x00\x00\x00", b"\xff\xff\x00", b"\x00\x00\xff"], + ... } + ... ) + >>> colors.with_columns( + ... pl.col("code").bin.encode("hex").alias("code_encoded_hex"), + ... ) + shape: (3, 3) + ┌────────┬───────────────┬──────────────────┐ + │ name ┆ code ┆ code_encoded_hex │ + │ --- ┆ --- ┆ --- │ + │ str ┆ binary ┆ str │ + ╞════════╪═══════════════╪══════════════════╡ + │ black ┆ [binary data] ┆ 000000 │ + │ yellow ┆ [binary data] ┆ ffff00 │ + │ blue ┆ [binary data] ┆ 0000ff │ + └────────┴───────────────┴──────────────────┘ """ if encoding == "hex": return wrap_expr(self._pyexpr.bin_hex_encode())