Skip to content

Commit

Permalink
Merge pull request #13 from abstractqqq/better_api
Browse files Browse the repository at this point in the history
updated syntax
  • Loading branch information
ctdunc authored May 4, 2024
2 parents 52b41c0 + 2260172 commit 1e1e87c
Show file tree
Hide file tree
Showing 10 changed files with 502 additions and 173 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ jobs:
path: dist

macos:
runs-on: macos-latest
runs-on: macos-13
strategy:
matrix:
target: [x86_64, aarch64]
Expand Down
72 changes: 36 additions & 36 deletions examples/basics.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
"outputs": [],
"source": [
"import polars as pl\n",
"import polars_istr # noqa: F401"
"import polars_istr as istr"
]
},
{
Expand All @@ -35,7 +35,7 @@
" white-space: pre-wrap;\n",
"}\n",
"</style>\n",
"<small>shape: (4, 1)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>iban</th></tr><tr><td>str</td></tr></thead><tbody><tr><td>&quot;AA110011123Z56…</td></tr><tr><td>&quot;DE445001051754…</td></tr><tr><td>&quot;AD120001203020…</td></tr><tr><td>&quot;MR000002000101…</td></tr></tbody></table></div>"
"<small>shape: (4, 1)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>iban</th></tr><tr><td>str</td></tr></thead><tbody><tr><td>&quot;AA110011123Z5678&quot;</td></tr><tr><td>&quot;DE44500105175407324931&quot;</td></tr><tr><td>&quot;AD1200012030200359100100&quot;</td></tr><tr><td>&quot;MR0000020001010000123456754&quot;</td></tr></tbody></table></div>"
],
"text/plain": [
"shape: (4, 1)\n",
Expand Down Expand Up @@ -81,7 +81,7 @@
" white-space: pre-wrap;\n",
"}\n",
"</style>\n",
"<small>shape: (4, 6)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>country_code</th><th>reason</th><th>is_valid</th><th>bban</th><th>bank_id</th><th>branch_id</th></tr><tr><td>str</td><td>str</td><td>bool</td><td>str</td><td>str</td><td>str</td></tr></thead><tbody><tr><td>null</td><td>&quot;Invalid countr…</td><td>false</td><td>null</td><td>null</td><td>null</td></tr><tr><td>&quot;DE&quot;</td><td>&quot;ok&quot;</td><td>true</td><td>&quot;50010517540732…</td><td>&quot;50010517&quot;</td><td>null</td></tr><tr><td>&quot;AD&quot;</td><td>&quot;ok&quot;</td><td>true</td><td>&quot;00012030200359…</td><td>&quot;0001&quot;</td><td>&quot;2030&quot;</td></tr><tr><td>null</td><td>&quot;Invalid checks…</td><td>false</td><td>null</td><td>null</td><td>null</td></tr></tbody></table></div>"
"<small>shape: (4, 6)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>country_code</th><th>reason</th><th>is_valid</th><th>bban</th><th>bank_id</th><th>branch_id</th></tr><tr><td>str</td><td>str</td><td>bool</td><td>str</td><td>str</td><td>str</td></tr></thead><tbody><tr><td>null</td><td>&quot;Invalid country code&quot;</td><td>false</td><td>null</td><td>null</td><td>null</td></tr><tr><td>&quot;DE&quot;</td><td>&quot;ok&quot;</td><td>true</td><td>&quot;500105175407324931&quot;</td><td>&quot;50010517&quot;</td><td>null</td></tr><tr><td>&quot;AD&quot;</td><td>&quot;ok&quot;</td><td>true</td><td>&quot;00012030200359100100&quot;</td><td>&quot;0001&quot;</td><td>&quot;2030&quot;</td></tr><tr><td>null</td><td>&quot;Invalid checksum&quot;</td><td>false</td><td>null</td><td>null</td><td>null</td></tr></tbody></table></div>"
],
"text/plain": [
"shape: (4, 6)\n",
Expand All @@ -104,12 +104,12 @@
],
"source": [
"df.select(\n",
" pl.col(\"iban\").iban.country_code().alias(\"country_code\"),\n",
" pl.col(\"iban\").iban.check().alias(\"reason\"),\n",
" pl.col(\"iban\").iban.is_valid().alias(\"is_valid\"),\n",
" pl.col(\"iban\").iban.bban().alias(\"bban\"),\n",
" pl.col(\"iban\").iban.bank_id().alias(\"bank_id\"),\n",
" pl.col(\"iban\").iban.branch_id().alias(\"branch_id\"),\n",
" istr.iban_country_code(\"iban\").alias(\"country_code\"),\n",
" istr.iban_check(\"iban\").alias(\"reason\"),\n",
" istr.iban_is_valid(\"iban\").alias(\"is_valid\"),\n",
" istr.iban_bban(\"iban\").alias(\"bban\"),\n",
" istr.iban_bank_id(\"iban\").alias(\"bank_id\"),\n",
" istr.iban_branch_id(\"iban\").alias(\"branch_id\"),\n",
") "
]
},
Expand All @@ -129,7 +129,7 @@
" white-space: pre-wrap;\n",
"}\n",
"</style>\n",
"<small>shape: (4, 5)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>country_code</th><th>check_digits</th><th>bban</th><th>bank_id</th><th>branch_id</th></tr><tr><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td></tr></thead><tbody><tr><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td></tr><tr><td>&quot;DE&quot;</td><td>&quot;44&quot;</td><td>&quot;50010517540732…</td><td>&quot;50010517&quot;</td><td>null</td></tr><tr><td>&quot;AD&quot;</td><td>&quot;12&quot;</td><td>&quot;00012030200359…</td><td>&quot;0001&quot;</td><td>&quot;2030&quot;</td></tr><tr><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td></tr></tbody></table></div>"
"<small>shape: (4, 5)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>country_code</th><th>check_digits</th><th>bban</th><th>bank_id</th><th>branch_id</th></tr><tr><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td></tr></thead><tbody><tr><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td></tr><tr><td>&quot;DE&quot;</td><td>&quot;44&quot;</td><td>&quot;500105175407324931&quot;</td><td>&quot;50010517&quot;</td><td>null</td></tr><tr><td>&quot;AD&quot;</td><td>&quot;12&quot;</td><td>&quot;00012030200359100100&quot;</td><td>&quot;0001&quot;</td><td>&quot;2030&quot;</td></tr><tr><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td></tr></tbody></table></div>"
],
"text/plain": [
"shape: (4, 5)\n",
Expand All @@ -152,7 +152,7 @@
],
"source": [
"df.select(\n",
" pl.col(\"iban\").iban.extract_all().alias(\"ib\")\n",
" istr.iban_extract_all(\"iban\").alias(\"ib\")\n",
").unnest(\"ib\")"
]
},
Expand Down Expand Up @@ -259,10 +259,10 @@
],
"source": [
"df.select(\n",
" pl.col(\"isin\").isin.country_code().alias(\"country_code\"),\n",
" pl.col(\"isin\").isin.check_digit().alias(\"check_digit\"),\n",
" pl.col(\"isin\").isin.security_id().alias(\"security_id\"),\n",
" pl.col(\"isin\").isin.is_valid().alias(\"is_valid\"),\n",
" istr.isin_country_code(\"isin\").alias(\"country_code\"),\n",
" istr.isin_check_digit(\"isin\").alias(\"check_digit\"),\n",
" istr.isin_security_id(\"isin\").alias(\"security_id\"),\n",
" istr.isin_is_valid(\"isin\").alias(\"is_valid\"),\n",
")"
]
},
Expand Down Expand Up @@ -314,7 +314,7 @@
" white-space: pre-wrap;\n",
"}\n",
"</style>\n",
"<small>shape: (9, 8)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>host</th><th>domain</th><th>fragment</th><th>path</th><th>query</th><th>check</th><th>is_valid</th><th>is_special</th></tr><tr><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>bool</td><td>bool</td></tr></thead><tbody><tr><td>&quot;example.com&quot;</td><td>&quot;example.com&quot;</td><td>&quot;row=4&quot;</td><td>&quot;/data.csv&quot;</td><td>null</td><td>&quot;ok&quot;</td><td>true</td><td>true</td></tr><tr><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>&quot;relative URL w…</td><td>false</td><td>null</td></tr><tr><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>&quot;relative URL w…</td><td>false</td><td>null</td></tr><tr><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>&quot;relative URL w…</td><td>false</td><td>null</td></tr><tr><td>&quot;127.0.0.1&quot;</td><td>null</td><td>null</td><td>&quot;/&quot;</td><td>null</td><td>&quot;ok&quot;</td><td>true</td><td>true</td></tr><tr><td>&quot;test.com&quot;</td><td>&quot;test.com&quot;</td><td>null</td><td>&quot;/&quot;</td><td>null</td><td>&quot;ok&quot;</td><td>true</td><td>true</td></tr><tr><td>null</td><td>null</td><td>null</td><td>&quot;/tmp/foo&quot;</td><td>null</td><td>&quot;ok&quot;</td><td>true</td><td>true</td></tr><tr><td>&quot;example.com&quot;</td><td>&quot;example.com&quot;</td><td>null</td><td>&quot;/products&quot;</td><td>&quot;page=2&amp;sort=de…</td><td>&quot;ok&quot;</td><td>true</td><td>true</td></tr><tr><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td></tr></tbody></table></div>"
"<small>shape: (9, 8)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>host</th><th>domain</th><th>fragment</th><th>path</th><th>query</th><th>check</th><th>is_valid</th><th>is_special</th></tr><tr><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>bool</td><td>bool</td></tr></thead><tbody><tr><td>&quot;example.com&quot;</td><td>&quot;example.com&quot;</td><td>&quot;row=4&quot;</td><td>&quot;/data.csv&quot;</td><td>null</td><td>&quot;ok&quot;</td><td>true</td><td>true</td></tr><tr><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>&quot;relative URL without a base&quot;</td><td>false</td><td>null</td></tr><tr><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>&quot;relative URL without a base&quot;</td><td>false</td><td>null</td></tr><tr><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>&quot;relative URL without a base&quot;</td><td>false</td><td>null</td></tr><tr><td>&quot;127.0.0.1&quot;</td><td>null</td><td>null</td><td>&quot;/&quot;</td><td>null</td><td>&quot;ok&quot;</td><td>true</td><td>true</td></tr><tr><td>&quot;test.com&quot;</td><td>&quot;test.com&quot;</td><td>null</td><td>&quot;/&quot;</td><td>null</td><td>&quot;ok&quot;</td><td>true</td><td>true</td></tr><tr><td>null</td><td>null</td><td>null</td><td>&quot;/tmp/foo&quot;</td><td>null</td><td>&quot;ok&quot;</td><td>true</td><td>true</td></tr><tr><td>&quot;example.com&quot;</td><td>&quot;example.com&quot;</td><td>null</td><td>&quot;/products&quot;</td><td>&quot;page=2&amp;sort=desc&quot;</td><td>&quot;ok&quot;</td><td>true</td><td>true</td></tr><tr><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td></tr></tbody></table></div>"
],
"text/plain": [
"shape: (9, 8)\n",
Expand Down Expand Up @@ -353,14 +353,14 @@
],
"source": [
"df.select(\n",
" pl.col(\"url\").url.host().alias(\"host\"),\n",
" pl.col(\"url\").url.domain().alias(\"domain\"),\n",
" pl.col(\"url\").url.fragment().alias(\"fragment\"),\n",
" pl.col(\"url\").url.path().alias(\"path\"),\n",
" pl.col(\"url\").url.query().alias(\"query\"),\n",
" pl.col(\"url\").url.check().alias(\"check\"),\n",
" pl.col(\"url\").url.is_valid().alias(\"is_valid\"),\n",
" pl.col(\"url\").url.is_special().alias(\"is_special\"),\n",
" istr.url_host(\"url\").alias(\"host\"),\n",
" istr.url_domain(\"url\").alias(\"domain\"),\n",
" istr.url_fragment(\"url\").alias(\"fragment\"),\n",
" istr.url_path(\"url\").alias(\"path\"),\n",
" istr.url_query(\"url\").alias(\"query\"),\n",
" istr.url_check(\"url\").alias(\"check\"),\n",
" istr.url_is_valid(\"url\").alias(\"is_valid\"),\n",
" istr.url_is_special(\"url\").alias(\"is_special\"),\n",
")"
]
},
Expand Down Expand Up @@ -429,17 +429,17 @@
],
"source": [
"df.select(\n",
" pl.col(\"cusip\").cusip.issue_num().alias(\"issue_num\"),\n",
" pl.col(\"cusip\").cusip.issuer_num().alias(\"issuer_num\"),\n",
" pl.col(\"cusip\").cusip.check_digit().alias(\"check_digit\"),\n",
" pl.col(\"cusip\").cusip.country_code().alias(\"country_code\"),\n",
" pl.col(\"cusip\").cusip.payload().alias(\"payload\"),\n",
" pl.col(\"cusip\").cusip.is_private_issue().alias(\"is_private_issue\"),\n",
" pl.col(\"cusip\").cusip.has_private_issuer().alias(\"has_private_issuer\"),\n",
" pl.col(\"cusip\").cusip.is_private_use().alias(\"is_private_use\"),\n",
" pl.col(\"cusip\").cusip.is_cins().alias(\"is_cins\"),\n",
" pl.col(\"cusip\").cusip.is_cins_base().alias(\"is_cins_base\"),\n",
" pl.col(\"cusip\").cusip.is_cins_extended().alias(\"is_cins_extended\"),\n",
" istr.cusip_issue_num(\"cusip\").alias(\"issue_num\"),\n",
" istr.cusip_issuer_num(\"cusip\").alias(\"issuer_num\"),\n",
" istr.cusip_check_digit(\"cusip\").alias(\"check_digit\"),\n",
" istr.cusip_country_code(\"cusip\").alias(\"country_code\"),\n",
" istr.cusip_payload(\"cusip\").alias(\"payload\"),\n",
" istr.cusip_is_private_issue(\"cusip\").alias(\"is_private_issue\"),\n",
" istr.cusip_has_private_issuer(\"cusip\").alias(\"has_private_issuer\"),\n",
" istr.cusip_is_private_use(\"cusip\").alias(\"is_private_use\"),\n",
" istr.cusip_is_cins(\"cusip\").alias(\"is_cins\"),\n",
" istr.cusip_is_cins_base(\"cusip\").alias(\"is_cins_base\"),\n",
" istr.cusip_is_cins_extended(\"cusip\").alias(\"is_cins_extended\"),\n",
" )"
]
},
Expand Down Expand Up @@ -476,7 +476,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.8"
"version": "3.12.3"
}
},
"nbformat": 4,
Expand Down
9 changes: 4 additions & 5 deletions python/polars_istr/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from .iban import IbanExt # noqa: E402
from .isin import IsinExt # noqa: E402
from .cusip import CusipExt # noqa: E402
from .url import UrlExt # noqa: E402
from .iban import * # noqa: E402, F403
from .isin import * # noqa: E402, F403
from .cusip import * # noqa: E402, F403
from .url import * # noqa: E402, F403

__version__ = "0.1.0"
__all__ = ["IbanExt", "IsinExt", "CusipExt", "UrlExt"]
52 changes: 52 additions & 0 deletions python/polars_istr/_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import polars as pl
from typing import Any, Optional, List, Dict
from .type_alias import StrOrExpr


def str_to_expr(x: StrOrExpr) -> pl.Expr:
if isinstance(x, str):
return pl.col(x)
elif isinstance(x, pl.Expr):
return x
else:
raise ValueError("Can only parse str (column name) or Polars expressions.")


def pl_plugin(
*,
lib: str,
symbol: str,
args: List[StrOrExpr],
kwargs: Optional[Dict[str, Any]] = None,
is_elementwise: bool = False,
returns_scalar: bool = False,
changes_length: bool = False,
cast_to_supertype: bool = False,
) -> pl.Expr:
# pl.__version__ should always be a valid version number, so split returns always 3 strs
if tuple(int(x) for x in pl.__version__.split(".")) < (0, 20, 16):
# This will eventually be deprecated?
first = str_to_expr(args[0])
return first.register_plugin(
lib=lib,
symbol=symbol,
args=[str_to_expr(x) for x in args[1:]],
kwargs=kwargs,
is_elementwise=is_elementwise,
returns_scalar=returns_scalar,
changes_length=changes_length,
cast_to_supertype=cast_to_supertype,
)

from polars.plugins import register_plugin_function

return register_plugin_function(
plugin_path=lib,
args=[str_to_expr(x) for x in args],
function_name=symbol,
kwargs=kwargs,
is_elementwise=is_elementwise,
returns_scalar=returns_scalar,
changes_length=changes_length,
cast_to_supertype=cast_to_supertype,
)
Loading

0 comments on commit 1e1e87c

Please sign in to comment.