Skip to content

Commit

Permalink
fixed some tests, added examples
Browse files Browse the repository at this point in the history
  • Loading branch information
abstractqqq committed Mar 17, 2024
1 parent 90003a7 commit 8f4498f
Show file tree
Hide file tree
Showing 4 changed files with 104 additions and 8 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,5 @@ Processing IBAN, ISINs, URLs, etc., and other standard format data in Polars.

1. Iban is powered by [iban_validate](https://crates.io/crates/iban_validate)
2. Isin is powered by [isin_rs](https://docs.rs/isin/latest/isin/)
3. URL is powered by [url](https://crates.io/crates/url)
3. URL is powered by [url](https://crates.io/crates/url)
4. CUSIP is powered by [cusip](https://crates.io/crates/cusip)
99 changes: 97 additions & 2 deletions examples/basics.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
"outputs": [],
"source": [
"import polars as pl\n",
"import polars_istr"
"import polars_istr # noqa: F401"
]
},
{
Expand Down Expand Up @@ -363,6 +363,101 @@
" pl.col(\"url\").url.is_special().alias(\"is_special\"),\n",
")"
]
},
{
"cell_type": "markdown",
"id": "f716217c",
"metadata": {},
"source": [
"# CUSIP"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "f0aa4db1",
"metadata": {},
"outputs": [],
"source": [
"df = pl.DataFrame({\n",
" \"cusip\": [\n",
" \"303075105\", # regular cusip (FactSet - Common Stock)\n",
" \"30307510\", # regular cusip ex. check digit\n",
" \"G0052B105\", # regular CINS (Abingdon Capital PLC - Shares)\n",
" \"HELLOWORLD\", # Invalid\n",
" ]\n",
"})"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "711de472",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div><style>\n",
".dataframe > thead > tr,\n",
".dataframe > tbody > tr {\n",
" text-align: right;\n",
" white-space: pre-wrap;\n",
"}\n",
"</style>\n",
"<small>shape: (4, 11)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>issue_num</th><th>issuer_num</th><th>check_digit</th><th>country_code</th><th>payload</th><th>is_private_issue</th><th>has_private_issuer</th><th>is_private_use</th><th>is_cins</th><th>is_cins_base</th><th>is_cins_extended</th></tr><tr><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>bool</td><td>bool</td><td>bool</td><td>bool</td><td>bool</td><td>bool</td></tr></thead><tbody><tr><td>&quot;10&quot;</td><td>&quot;303075&quot;</td><td>&quot;5&quot;</td><td>null</td><td>&quot;30307510&quot;</td><td>false</td><td>false</td><td>false</td><td>false</td><td>null</td><td>null</td></tr><tr><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td></tr><tr><td>&quot;10&quot;</td><td>&quot;0052B&quot;</td><td>&quot;5&quot;</td><td>&quot;G&quot;</td><td>&quot;G0052B10&quot;</td><td>false</td><td>false</td><td>false</td><td>true</td><td>true</td><td>false</td></tr><tr><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td></tr></tbody></table></div>"
],
"text/plain": [
"shape: (4, 11)\n",
"┌───────────┬────────────┬───────────┬───────────┬───┬───────────┬─────────┬───────────┬───────────┐\n",
"│ issue_num ┆ issuer_num ┆ check_dig ┆ country_c ┆ … ┆ is_privat ┆ is_cins ┆ is_cins_b ┆ is_cins_e │\n",
"│ --- ┆ --- ┆ it ┆ ode ┆ ┆ e_use ┆ --- ┆ ase ┆ xtended │\n",
"│ str ┆ str ┆ --- ┆ --- ┆ ┆ --- ┆ bool ┆ --- ┆ --- │\n",
"│ ┆ ┆ str ┆ str ┆ ┆ bool ┆ ┆ bool ┆ bool │\n",
"╞═══════════╪════════════╪═══════════╪═══════════╪═══╪═══════════╪═════════╪═══════════╪═══════════╡\n",
"│ 10 ┆ 303075 ┆ 5 ┆ null ┆ … ┆ false ┆ false ┆ null ┆ null │\n",
"│ null ┆ null ┆ null ┆ null ┆ … ┆ null ┆ null ┆ null ┆ null │\n",
"│ 10 ┆ 0052B ┆ 5 ┆ G ┆ … ┆ false ┆ true ┆ true ┆ false │\n",
"│ null ┆ null ┆ null ┆ null ┆ … ┆ null ┆ null ┆ null ┆ null │\n",
"└───────────┴────────────┴───────────┴───────────┴───┴───────────┴─────────┴───────────┴───────────┘"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.select(\n",
" pl.col(\"cusip\").cusip.issue_num().alias(\"issue_num\"),\n",
" pl.col(\"cusip\").cusip.issuer_num().alias(\"issuer_num\"),\n",
" pl.col(\"cusip\").cusip.check_digit().alias(\"check_digit\"),\n",
" pl.col(\"cusip\").cusip.country_code().alias(\"country_code\"),\n",
" pl.col(\"cusip\").cusip.payload().alias(\"payload\"),\n",
" pl.col(\"cusip\").cusip.is_private_issue().alias(\"is_private_issue\"),\n",
" pl.col(\"cusip\").cusip.has_private_issuer().alias(\"has_private_issuer\"),\n",
" pl.col(\"cusip\").cusip.is_private_use().alias(\"is_private_use\"),\n",
" pl.col(\"cusip\").cusip.is_cins().alias(\"is_cins\"),\n",
" pl.col(\"cusip\").cusip.is_cins_base().alias(\"is_cins_base\"),\n",
" pl.col(\"cusip\").cusip.is_cins_extended().alias(\"is_cins_extended\"),\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "cd30b6da",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "8a7fa410",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand All @@ -381,7 +476,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.7"
"version": "3.11.8"
}
},
"nbformat": 4,
Expand Down
2 changes: 1 addition & 1 deletion src/cusip_parsing/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ fn pl_cusip_country_code(inputs: &[Series]) -> PolarsResult<Series> {
ca.into_iter().for_each(|op_s| {
if let Some(s) = op_s {
if let Ok(cusip) = CUSIP::parse(s) {
if let Some(cins) = cusip.as_cins(){
if let Some(cins) = cusip.as_cins() {
s_builder.append_value(cins.country_code().to_string());
} else {
s_builder.append_null();
Expand Down
8 changes: 4 additions & 4 deletions tests/test_correctness.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,10 @@

import pytest

import pytest
import polars as pl

import polars_istr # noqa: F401

from polars.testing import assert_frame_equal
from typing import List, Optional
from typing import Optional


# There are no valid test cases for Extended CINS or Private Issue(r) since I could not
Expand Down Expand Up @@ -116,6 +113,9 @@ def test_cusip(
}
)

assert_frame_equal(test1, ans)
assert_frame_equal(test2, ans)


@pytest.mark.parametrize(
"df, cc, cd, reason, is_valid, bban, bank_id, branch_id",
Expand Down

0 comments on commit 8f4498f

Please sign in to comment.