Skip to content

Commit

Permalink
Update name of Pooler class to CSVPooler
Browse files Browse the repository at this point in the history
  • Loading branch information
git-steven committed Feb 24, 2024
1 parent 294f92f commit b1d24d2
Show file tree
Hide file tree
Showing 8 changed files with 187 additions and 180 deletions.
15 changes: 9 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,14 @@ values:

### As dataframe row
```python
from csv_batcher.pooler import Pooler, CallbackWith
from csv_batcher.csv_pooler import CSVPooler, CallbackWith

# Callback function passed to pooler; accepts a dataframe row
# as a pandas Series (via apply)
def process_dataframe_row(row):
return row.iloc[0]

pooler = Pooler(
pooler = CSVPooler(
"5mSalesRecords.csv",
process_dataframe_row,
callback_with=CallbackWith.DATAFRAME_ROW,
Expand All @@ -31,7 +31,7 @@ values:

### As dataframe
```python
from csv_batcher.pooler import Pooler, CallbackWith
from csv_batcher.csv_pooler import CSVPooler, CallbackWith

# Used in DataFrame.apply:
def process_dataframe_row(row):
Expand All @@ -43,7 +43,7 @@ values:
# Or do something more complicated....
return len(df)

pooler = Pooler(
pooler = CSVPooler(
"5mSalesRecords.csv",
process_dataframe,
callback_with=CallbackWith.DATAFRAME,
Expand All @@ -53,7 +53,7 @@ values:

### As CSV filename
```python
from csv_batcher.pooler import Pooler, CallbackWith
from csv_batcher.csv_pooler import CSVPooler, CallbackWith

def process_csv_filename(csv_chunk_filename):
# print("processing ", csv_chunk_filename)
Expand All @@ -68,7 +68,7 @@ values:
def process_dataframe_row(row):
return row.iloc[0]

pooler = Pooler(
pooler = CSVPooler(
"5mSalesRecords.csv",
process_dataframe,
callback_with=CallbackWith.CSV_FILENAME
Expand Down Expand Up @@ -96,3 +96,6 @@ values:
```bash
clear; pytest
```

### Publishing
`poetry publish --build -u __token__ -p $PYPI_TOKEN`
4 changes: 2 additions & 2 deletions csv_batcher/csv_pooler.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,13 @@ class CallbackWith(StrEnum):
# function. This causes the callback function to be called with
# each row as a `pd.Series` object:
DATAFRAME_ROW = auto()
class Pooler:
class CSVPooler:
def __init__(
self,
csv_filename: str,
process_fn: Callable,
callback_with: CallbackWith = CallbackWith.CSV_FILENAME,
pool_size: int = 12,
pool_size: int = 5,
chunk_lines: int = 10000,
):
"""
Expand Down
8 changes: 4 additions & 4 deletions csv_batcher/test_csv_pooler.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import unittest
from csv_batcher.utils.time import time_and_log
from csv_batcher.csv_pooler import Pooler, CallbackWith
from csv_batcher.csv_pooler import CSVPooler, CallbackWith
import pandas as pd

def __process_dataframe_row(row):
Expand All @@ -16,17 +16,17 @@ def __process_as_dataframe(df):

def test_big_file_as_csv():
with time_and_log("test_big_file_as_csv"):
pooler = Pooler("5mSalesRecords.csv", __process_csv_filename)
pooler = CSVPooler("5mSalesRecords.csv", __process_csv_filename)
pooler.process()

def test_big_file_as_dataframe():
with time_and_log("test_big_file_as_dataframe"):
pooler = Pooler("5mSalesRecords.csv", __process_as_dataframe, callback_with=CallbackWith.DATAFRAME)
pooler = CSVPooler("5mSalesRecords.csv", __process_as_dataframe, callback_with=CallbackWith.DATAFRAME)
pooler.process()

def test_big_file_as_dataframe_rows():
with time_and_log("test_big_file_as_dataframe_rows"):
pooler = Pooler("5mSalesRecords.csv", __process_dataframe_row, callback_with=CallbackWith.DATAFRAME_ROW)
pooler = CSVPooler("5mSalesRecords.csv", __process_dataframe_row, callback_with=CallbackWith.DATAFRAME_ROW)
pooler.process()

def test_no_pooler():
Expand Down
16 changes: 10 additions & 6 deletions docs/csv_batcher.html
Original file line number Diff line number Diff line change
Expand Up @@ -74,14 +74,14 @@ <h2 id="usage">Usage</h2>
<h3 id="as-dataframe-row">As dataframe row</h3>

<div class="pdoc-code codehilite">
<pre><span></span><code> <span class="kn">from</span> <span class="nn">csv_batcher.pooler</span> <span class="kn">import</span> <span class="n">Pooler</span><span class="p">,</span> <span class="n">CallbackWith</span>
<pre><span></span><code> <span class="kn">from</span> <span class="nn"><a href="csv_batcher/csv_pooler.html">csv_batcher.csv_pooler</a></span> <span class="kn">import</span> <span class="n">CSVPooler</span><span class="p">,</span> <span class="n">CallbackWith</span>

<span class="c1"># Callback function passed to pooler; accepts a dataframe row</span>
<span class="c1"># as a pandas Series (via apply)</span>
<span class="k">def</span> <span class="nf">process_dataframe_row</span><span class="p">(</span><span class="n">row</span><span class="p">):</span>
<span class="k">return</span> <span class="n">row</span><span class="o">.</span><span class="n">iloc</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>

<span class="n">pooler</span> <span class="o">=</span> <span class="n">Pooler</span><span class="p">(</span>
<span class="n">pooler</span> <span class="o">=</span> <span class="n">CSVPooler</span><span class="p">(</span>
<span class="s2">&quot;5mSalesRecords.csv&quot;</span><span class="p">,</span>
<span class="n">process_dataframe_row</span><span class="p">,</span>
<span class="n">callback_with</span><span class="o">=</span><span class="n">CallbackWith</span><span class="o">.</span><span class="n">DATAFRAME_ROW</span><span class="p">,</span>
Expand All @@ -91,7 +91,7 @@ <h3 id="as-dataframe-row">As dataframe row</h3>

<span class="c1">### As dataframe</span>
<span class="err">```</span><span class="n">python</span>
<span class="kn">from</span> <span class="nn">csv_batcher.pooler</span> <span class="kn">import</span> <span class="n">Pooler</span><span class="p">,</span> <span class="n">CallbackWith</span>
<span class="kn">from</span> <span class="nn"><a href="csv_batcher/csv_pooler.html">csv_batcher.csv_pooler</a></span> <span class="kn">import</span> <span class="n">CSVPooler</span><span class="p">,</span> <span class="n">CallbackWith</span>

<span class="c1"># Used in DataFrame.apply:</span>
<span class="k">def</span> <span class="nf">process_dataframe_row</span><span class="p">(</span><span class="n">row</span><span class="p">):</span>
Expand All @@ -103,7 +103,7 @@ <h3 id="as-dataframe-row">As dataframe row</h3>
<span class="c1"># Or do something more complicated....</span>
<span class="k">return</span> <span class="nb">len</span><span class="p">(</span><span class="n">df</span><span class="p">)</span>

<span class="n">pooler</span> <span class="o">=</span> <span class="n">Pooler</span><span class="p">(</span>
<span class="n">pooler</span> <span class="o">=</span> <span class="n">CSVPooler</span><span class="p">(</span>
<span class="s2">&quot;5mSalesRecords.csv&quot;</span><span class="p">,</span>
<span class="n">process_dataframe</span><span class="p">,</span>
<span class="n">callback_with</span><span class="o">=</span><span class="n">CallbackWith</span><span class="o">.</span><span class="n">DATAFRAME</span><span class="p">,</span>
Expand All @@ -113,7 +113,7 @@ <h3 id="as-dataframe-row">As dataframe row</h3>

<span class="c1">### As CSV filename</span>
<span class="err">```</span><span class="n">python</span>
<span class="kn">from</span> <span class="nn">csv_batcher.pooler</span> <span class="kn">import</span> <span class="n">Pooler</span><span class="p">,</span> <span class="n">CallbackWith</span>
<span class="kn">from</span> <span class="nn"><a href="csv_batcher/csv_pooler.html">csv_batcher.csv_pooler</a></span> <span class="kn">import</span> <span class="n">CSVPooler</span><span class="p">,</span> <span class="n">CallbackWith</span>

<span class="k">def</span> <span class="nf">process_csv_filename</span><span class="p">(</span><span class="n">csv_chunk_filename</span><span class="p">):</span>
<span class="c1"># print(&quot;processing &quot;, csv_chunk_filename)</span>
Expand All @@ -128,7 +128,7 @@ <h3 id="as-dataframe-row">As dataframe row</h3>
<span class="k">def</span> <span class="nf">process_dataframe_row</span><span class="p">(</span><span class="n">row</span><span class="p">):</span>
<span class="k">return</span> <span class="n">row</span><span class="o">.</span><span class="n">iloc</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>

<span class="n">pooler</span> <span class="o">=</span> <span class="n">Pooler</span><span class="p">(</span>
<span class="n">pooler</span> <span class="o">=</span> <span class="n">CSVPooler</span><span class="p">(</span>
<span class="s2">&quot;5mSalesRecords.csv&quot;</span><span class="p">,</span>
<span class="n">process_dataframe</span><span class="p">,</span>
<span class="n">callback_with</span><span class="o">=</span><span class="n">CallbackWith</span><span class="o">.</span><span class="n">CSV_FILENAME</span>
Expand Down Expand Up @@ -163,6 +163,10 @@ <h3 id="testing">Testing</h3>
<pre><span></span><code><span class="w"> </span>clear<span class="p">;</span><span class="w"> </span>pytest
</code></pre>
</div>

<h3 id="publishing">Publishing</h3>

<p><code>poetry publish --build -u __token__ -p $PYPI_TOKEN</code></p>
</div>

<input id="mod-csv_batcher-view-source" class="view-source-toggle-state" type="checkbox" aria-hidden="true" tabindex="-1">
Expand Down
Loading

0 comments on commit b1d24d2

Please sign in to comment.