diff --git a/README.md b/README.md
index d67e060..ce076b4 100644
--- a/README.md
+++ b/README.md
@@ -19,4 +19,5 @@ Processing IBAN, ISINs, URLs, etc., and other standard format data in Polars.
1. Iban is powered by [iban_validate](https://crates.io/crates/iban_validate)
2. Isin is powered by [isin_rs](https://docs.rs/isin/latest/isin/)
-3. URL is powered by [url](https://crates.io/crates/url)
\ No newline at end of file
+3. URL is powered by [url](https://crates.io/crates/url)
+4. CUSIP is powered by [cusip](https://crates.io/crates/cusip)
\ No newline at end of file
diff --git a/examples/basics.ipynb b/examples/basics.ipynb
index 8577fc2..45ca28c 100644
--- a/examples/basics.ipynb
+++ b/examples/basics.ipynb
@@ -8,7 +8,7 @@
"outputs": [],
"source": [
"import polars as pl\n",
- "import polars_istr"
+ "import polars_istr # noqa: F401"
]
},
{
@@ -363,6 +363,101 @@
" pl.col(\"url\").url.is_special().alias(\"is_special\"),\n",
")"
]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f716217c",
+ "metadata": {},
+ "source": [
+ "# CUSIP"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "f0aa4db1",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df = pl.DataFrame({\n",
+ " \"cusip\": [\n",
+ " \"303075105\", # regular cusip (FactSet - Common Stock)\n",
+ " \"30307510\", # regular cusip ex. check digit\n",
+ " \"G0052B105\", # regular CINS (Abingdon Capital PLC - Shares)\n",
+ " \"HELLOWORLD\", # Invalid\n",
+ " ]\n",
+ "})"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "711de472",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "
shape: (4, 11)issue_num | issuer_num | check_digit | country_code | payload | is_private_issue | has_private_issuer | is_private_use | is_cins | is_cins_base | is_cins_extended |
---|
str | str | str | str | str | bool | bool | bool | bool | bool | bool |
"10" | "303075" | "5" | null | "30307510" | false | false | false | false | null | null |
null | null | null | null | null | null | null | null | null | null | null |
"10" | "0052B" | "5" | "G" | "G0052B10" | false | false | false | true | true | false |
null | null | null | null | null | null | null | null | null | null | null |
"
+ ],
+ "text/plain": [
+ "shape: (4, 11)\n",
+ "┌───────────┬────────────┬───────────┬───────────┬───┬───────────┬─────────┬───────────┬───────────┐\n",
+ "│ issue_num ┆ issuer_num ┆ check_dig ┆ country_c ┆ … ┆ is_privat ┆ is_cins ┆ is_cins_b ┆ is_cins_e │\n",
+ "│ --- ┆ --- ┆ it ┆ ode ┆ ┆ e_use ┆ --- ┆ ase ┆ xtended │\n",
+ "│ str ┆ str ┆ --- ┆ --- ┆ ┆ --- ┆ bool ┆ --- ┆ --- │\n",
+ "│ ┆ ┆ str ┆ str ┆ ┆ bool ┆ ┆ bool ┆ bool │\n",
+ "╞═══════════╪════════════╪═══════════╪═══════════╪═══╪═══════════╪═════════╪═══════════╪═══════════╡\n",
+ "│ 10 ┆ 303075 ┆ 5 ┆ null ┆ … ┆ false ┆ false ┆ null ┆ null │\n",
+ "│ null ┆ null ┆ null ┆ null ┆ … ┆ null ┆ null ┆ null ┆ null │\n",
+ "│ 10 ┆ 0052B ┆ 5 ┆ G ┆ … ┆ false ┆ true ┆ true ┆ false │\n",
+ "│ null ┆ null ┆ null ┆ null ┆ … ┆ null ┆ null ┆ null ┆ null │\n",
+ "└───────────┴────────────┴───────────┴───────────┴───┴───────────┴─────────┴───────────┴───────────┘"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.select(\n",
+ " pl.col(\"cusip\").cusip.issue_num().alias(\"issue_num\"),\n",
+ " pl.col(\"cusip\").cusip.issuer_num().alias(\"issuer_num\"),\n",
+ " pl.col(\"cusip\").cusip.check_digit().alias(\"check_digit\"),\n",
+ " pl.col(\"cusip\").cusip.country_code().alias(\"country_code\"),\n",
+ " pl.col(\"cusip\").cusip.payload().alias(\"payload\"),\n",
+ " pl.col(\"cusip\").cusip.is_private_issue().alias(\"is_private_issue\"),\n",
+ " pl.col(\"cusip\").cusip.has_private_issuer().alias(\"has_private_issuer\"),\n",
+ " pl.col(\"cusip\").cusip.is_private_use().alias(\"is_private_use\"),\n",
+ " pl.col(\"cusip\").cusip.is_cins().alias(\"is_cins\"),\n",
+ " pl.col(\"cusip\").cusip.is_cins_base().alias(\"is_cins_base\"),\n",
+ " pl.col(\"cusip\").cusip.is_cins_extended().alias(\"is_cins_extended\"),\n",
+ " )"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "cd30b6da",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "8a7fa410",
+ "metadata": {},
+ "outputs": [],
+ "source": []
}
],
"metadata": {
@@ -381,7 +476,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.11.7"
+ "version": "3.11.8"
}
},
"nbformat": 4,
diff --git a/src/cusip_parsing/mod.rs b/src/cusip_parsing/mod.rs
index 1355bfa..2f42543 100644
--- a/src/cusip_parsing/mod.rs
+++ b/src/cusip_parsing/mod.rs
@@ -114,7 +114,7 @@ fn pl_cusip_country_code(inputs: &[Series]) -> PolarsResult {
ca.into_iter().for_each(|op_s| {
if let Some(s) = op_s {
if let Ok(cusip) = CUSIP::parse(s) {
- if let Some(cins) = cusip.as_cins(){
+ if let Some(cins) = cusip.as_cins() {
s_builder.append_value(cins.country_code().to_string());
} else {
s_builder.append_null();
diff --git a/tests/test_correctness.py b/tests/test_correctness.py
index 68c812b..26ccb21 100644
--- a/tests/test_correctness.py
+++ b/tests/test_correctness.py
@@ -7,13 +7,10 @@
import pytest
-import pytest
-import polars as pl
import polars_istr # noqa: F401
-from polars.testing import assert_frame_equal
-from typing import List, Optional
+from typing import Optional
# There are no valid test cases for Extended CINS or Private Issue(r) since I could not
@@ -116,6 +113,9 @@ def test_cusip(
}
)
+ assert_frame_equal(test1, ans)
+ assert_frame_equal(test2, ans)
+
@pytest.mark.parametrize(
"df, cc, cd, reason, is_valid, bban, bank_id, branch_id",