diff --git a/.gitignore b/.gitignore index 8bcb63e..68bd129 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,8 @@ .DS_Store *.pem +.idea + dist/**/* .virtualenvs diff --git a/README.md b/README.md index 8a255c6..6839cab 100644 --- a/README.md +++ b/README.md @@ -12,9 +12,9 @@ https://github.com/tangledpath/csv-batcher ## Documentation https://tangledpath.github.io/csv-batcher/csv_batcher.html -## TODO -* Better integrate results from callbacks -* Maybe implement pooling with celery (for use in django apps, etc.), which can bring about [horizontal scaling]([url](https://en.wikipedia.org/wiki/Scalability#Horizontal_or_scale_out)). +## Further excercises +* Possibly implement pooling with celery (for use in django apps, etc.), which can bring about [horizontal scaling]([url](https://en.wikipedia.org/wiki/Scalability#Horizontal_or_scale_out)). + ## Usage Arguments sent to callback function can be controlled by creating pooler with `callback_with` and the CallbackWith enum @@ -22,88 +22,93 @@ values: ### As dataframe row ```python - from csv_batcher.csv_pooler import CSVPooler, CallbackWith +from csv_batcher.csv_pooler import CSVPooler, CallbackWith - # Callback function passed to pooler; accepts a dataframe row - # as a pandas Series (via apply) - def process_dataframe_row(row): +# Callback function passed to pooler; accepts a dataframe row +# as a pandas Series (via apply) +def process_dataframe_row(row): return row.iloc[0] - pooler = CSVPooler( +pooler = CSVPooler( "5mSalesRecords.csv", process_dataframe_row, callback_with=CallbackWith.DATAFRAME_ROW, pool_size=16 - ) - pooler.process() +) +for processed_batch in pooler.process(): + print(processed_batch) +``` ### As dataframe ```python - from csv_batcher.csv_pooler import CSVPooler, CallbackWith +from csv_batcher.csv_pooler import CSVPooler, CallbackWith - # Used in DataFrame.apply: - def process_dataframe_row(row): +# Used from process_datafrom's apply: +def process_dataframe_row(row): return row.iloc[0] - # Callback function passed to pooler; accepts a dataframe: - def process_dataframe(df): +# Callback function passed to pooler; accepts a dataframe: +def process_dataframe(df): foo = df.apply(process_dataframe_row, axis=1) # Or do something more complicated.... return len(df) - pooler = CSVPooler( +pooler = CSVPooler( "5mSalesRecords.csv", process_dataframe, callback_with=CallbackWith.DATAFRAME, pool_size=16 - ) - pooler.process() +) +for processed_batch in pooler.process(): + print(processed_batch) +``` ### As CSV filename ```python - from csv_batcher.csv_pooler import CSVPooler, CallbackWith +import pandas as pd +from csv_batcher.csv_pooler import CSVPooler, CallbackWith - def process_csv_filename(csv_chunk_filename): - # print("processing ", csv_chunk_filename) - df = pd.read_csv(csv_chunk_filename, skipinitialspace=True, index_col=None) - foo = df.apply(process_dataframe_row, axis=1) - return len(df) - - def process_as_dataframe(df): - foo = df.apply(process_dataframe_row, axis=1) - return len(df) +# Used from process_csv_filename's apply: +def process_dataframe_row(row): + return row.iloc[0] - def process_dataframe_row(row): - return row.iloc[0] +def process_csv_filename(csv_chunk_filename): + # print("processing ", csv_chunk_filename) + df = pd.read_csv(csv_chunk_filename, skipinitialspace=True, index_col=None) + foo = df.apply(process_dataframe_row, axis=1) + return len(df) - pooler = CSVPooler( +pooler = CSVPooler( "5mSalesRecords.csv", - process_dataframe, - callback_with=CallbackWith.CSV_FILENAME + process_csv_filename, + callback_with=CallbackWith.CSV_FILENAME, chunk_lines=10000, pool_size=16 - ) - pooler.process() +) +for processed_batch in pooler.process(): + print(processed_batch) ``` ## Development ### Linting ```bash - ruff check . # Find linting errors - ruff check . --fix # Auto-fix linting errors (where possible) +ruff check . # Find linting errors +ruff check . --fix # Auto-fix linting errors (where possible) ``` ### Documentation ``` - # Shows in browser - poetry run pdoc csv_batcher - # Generates to ./docs - poetry run pdoc csv_batcher -o ./docs +# Shows in browser +poetry run pdoc csv_batcher +# Generates to ./docs +poetry run pdoc csv_batcher -o ./docs ``` ### Testing ```bash - clear; pytest +clear; pytest ``` ### Publishing -`poetry publish --build -u __token__ -p $PYPI_TOKEN` +```bash +poetry publish --build -u __token__ -p $PYPI_TOKEN` +``` diff --git a/csv_batcher/csv_pooler.py b/csv_batcher/csv_pooler.py index 1b7d295..0f3cfb8 100644 --- a/csv_batcher/csv_pooler.py +++ b/csv_batcher/csv_pooler.py @@ -66,8 +66,9 @@ def process(self): csv_file_cnt = len(csv_splitter.csv_files()) logging.info(f"Pooling against {csv_file_cnt} files") with Pool(self.pool_size) as p: - for result in p.imap(self._process_csv, csv_splitter.csv_files()): - processed_count += result + for result, count in p.imap(self._process_csv, csv_splitter.csv_files()): + yield(result) + processed_count += count finally: csv_splitter.cleanup() @@ -75,17 +76,17 @@ def process(self): def _process_csv(self, csv_chunk_filename): if self.callback_with == CallbackWith.CSV_FILENAME: - self.process_fn(csv_chunk_filename) + result = self.process_fn(csv_chunk_filename) with open(csv_chunk_filename) as f: # Get total lines and subtract for header: - result = sum(1 for line in f) - 1 + count = sum(1 for line in f) - 1 elif self.callback_with == CallbackWith.DATAFRAME: df = pd.read_csv(csv_chunk_filename, skipinitialspace=True, index_col=None) - result = df.shape[0] - self.process_fn(df) + count = df.shape[0] + result = self.process_fn(df) elif self.callback_with == CallbackWith.DATAFRAME_ROW: df = pd.read_csv(csv_chunk_filename, skipinitialspace=True, index_col=None) - result = df.shape[0] - df.apply(self.process_fn, axis=1) + count = df.shape[0] + result = df.apply(self.process_fn, axis=1) - return result + return result, count diff --git a/csv_batcher/test_csv_pooler.py b/csv_batcher/test_csv_pooler.py index c061554..e54ec38 100644 --- a/csv_batcher/test_csv_pooler.py +++ b/csv_batcher/test_csv_pooler.py @@ -1,4 +1,3 @@ -import unittest from csv_batcher.utils.time import time_and_log from csv_batcher.csv_pooler import CSVPooler, CallbackWith import pandas as pd @@ -17,22 +16,29 @@ def __process_as_dataframe(df): def test_big_file_as_csv(): with time_and_log("test_big_file_as_csv"): pooler = CSVPooler("5mSalesRecords.csv", __process_csv_filename) - pooler.process() + for processed_batch in pooler.process(): + assert isinstance(processed_batch, pd.Series) def test_big_file_as_dataframe(): with time_and_log("test_big_file_as_dataframe"): pooler = CSVPooler("5mSalesRecords.csv", __process_as_dataframe, callback_with=CallbackWith.DATAFRAME) - pooler.process() + for processed_batch in pooler.process(): + assert isinstance(processed_batch, pd.Series) def test_big_file_as_dataframe_rows(): with time_and_log("test_big_file_as_dataframe_rows"): pooler = CSVPooler("5mSalesRecords.csv", __process_dataframe_row, callback_with=CallbackWith.DATAFRAME_ROW) - pooler.process() + for processed_batch in pooler.process(): + assert isinstance(processed_batch, pd.Series) def test_no_pooler(): with time_and_log("test_no_pooler"): __process_csv_filename("5mSalesRecords.csv") -if __name__ == "__main__": - unittest.main() +if __name__ == '__main__': + test_big_file_as_csv() + test_big_file_as_dataframe() + test_big_file_as_dataframe_rows() + # test_migrator_idempotency() + diff --git a/docs/csv_batcher.html b/docs/csv_batcher.html index d562e1c..bd132ed 100644 --- a/docs/csv_batcher.html +++ b/docs/csv_batcher.html @@ -25,9 +25,11 @@
A python-based, multiprocess CSV batcher suitable for +
A lightweight, python-based, multiprocess CSV batcher suitable for use with dataframes or other tools that deal with large CSV files (or those that require timely processing).
https://github.com/tangledpath/csv-batcher
+https://tangledpath.github.io/csv-batcher/csv_batcher.html
+Arguments sent to callback function can be controlled by @@ -79,68 +91,75 @@
from csv_batcher.csv_pooler import CSVPooler, CallbackWith
+from csv_batcher.csv_pooler import CSVPooler, CallbackWith
- # Callback function passed to pooler; accepts a dataframe row
- # as a pandas Series (via apply)
- def process_dataframe_row(row):
+# Callback function passed to pooler; accepts a dataframe row
+# as a pandas Series (via apply)
+def process_dataframe_row(row):
return row.iloc[0]
- pooler = CSVPooler(
+pooler = CSVPooler(
"5mSalesRecords.csv",
process_dataframe_row,
callback_with=CallbackWith.DATAFRAME_ROW,
pool_size=16
- )
- pooler.process()
+)
+for processed_batch in pooler.process():
+ print(processed_batch)
+
+
from csv_batcher.csv_pooler import CSVPooler, CallbackWith
+
+# Used from process_datafrom's apply:
+def process_dataframe_row(row):
return row.iloc[0]
- # Callback function passed to pooler; accepts a dataframe:
- def process_dataframe(df):
+# Callback function passed to pooler; accepts a dataframe:
+def process_dataframe(df):
foo = df.apply(process_dataframe_row, axis=1)
# Or do something more complicated....
return len(df)
- pooler = CSVPooler(
+pooler = CSVPooler(
"5mSalesRecords.csv",
process_dataframe,
callback_with=CallbackWith.DATAFRAME,
pool_size=16
- )
- pooler.process()
+)
+for processed_batch in pooler.process():
+ print(processed_batch)
+
+import pandas as pd
+from csv_batcher.csv_pooler import CSVPooler, CallbackWith
- def process_as_dataframe(df):
- foo = df.apply(process_dataframe_row, axis=1)
- return len(df)
+# Used from process_csv_filename's apply:
+def process_dataframe_row(row):
+ return row.iloc[0]
- def process_dataframe_row(row):
- return row.iloc[0]
+def process_csv_filename(csv_chunk_filename):
+ # print("processing ", csv_chunk_filename)
+ df = pd.read_csv(csv_chunk_filename, skipinitialspace=True, index_col=None)
+ foo = df.apply(process_dataframe_row, axis=1)
+ return len(df)
- pooler = CSVPooler(
+pooler = CSVPooler(
"5mSalesRecords.csv",
- process_dataframe,
- callback_with=CallbackWith.CSV_FILENAME
+ process_csv_filename,
+ callback_with=CallbackWith.CSV_FILENAME,
chunk_lines=10000,
pool_size=16
- )
- pooler.process()
+)
+for processed_batch in pooler.process():
+ print(processed_batch)
ruff check . # Find linting errors
- ruff check . --fix # Auto-fix linting errors (where possible)
+ruff check . # Find linting errors
+ruff check . --fix # Auto-fix linting errors (where possible)
# Shows in browser
- poetry run pdoc csv_batcher
- # Generates to ./docs
- poetry run pdoc csv_batcher -o ./docs
+# Shows in browser
+poetry run pdoc csv_batcher
+# Generates to ./docs
+poetry run pdoc csv_batcher -o ./docs
Testing
- clear; pytest
+clear; pytest
Publishing
-poetry publish --build -u __token__ -p $PYPI_TOKEN
+
+poetry publish --build -u __token__ -p $PYPI_TOKEN`
+
+
1import unittest - 2from csv_batcher.utils.time import time_and_log - 3from csv_batcher.csv_pooler import CSVPooler, CallbackWith - 4import pandas as pd - 5 - 6def __process_dataframe_row(row): - 7 return row.iloc[0] - 8 - 9def __process_csv_filename(csv_chunk_filename): -10 # print("processing ", csv_chunk_filename) -11 df = pd.read_csv(csv_chunk_filename, skipinitialspace=True, index_col=None) -12 return df.apply(__process_dataframe_row, axis=1) -13 -14def __process_as_dataframe(df): -15 return df.apply(__process_dataframe_row, axis=1) -16 -17def test_big_file_as_csv(): -18 with time_and_log("test_big_file_as_csv"): -19 pooler = CSVPooler("5mSalesRecords.csv", __process_csv_filename) -20 pooler.process() +@@ -116,10 +121,11 @@1from csv_batcher.utils.time import time_and_log + 2from csv_batcher.csv_pooler import CSVPooler, CallbackWith + 3import pandas as pd + 4 + 5def __process_dataframe_row(row): + 6 return row.iloc[0] + 7 + 8def __process_csv_filename(csv_chunk_filename): + 9 # print("processing ", csv_chunk_filename) +10 df = pd.read_csv(csv_chunk_filename, skipinitialspace=True, index_col=None) +11 return df.apply(__process_dataframe_row, axis=1) +12 +13def __process_as_dataframe(df): +14 return df.apply(__process_dataframe_row, axis=1) +15 +16def test_big_file_as_csv(): +17 with time_and_log("test_big_file_as_csv"): +18 pooler = CSVPooler("5mSalesRecords.csv", __process_csv_filename) +19 for processed_batch in pooler.process(): +20 assert isinstance(processed_batch, pd.Series) 21 22def test_big_file_as_dataframe(): 23 with time_and_log("test_big_file_as_dataframe"): 24 pooler = CSVPooler("5mSalesRecords.csv", __process_as_dataframe, callback_with=CallbackWith.DATAFRAME) -25 pooler.process() -26 -27def test_big_file_as_dataframe_rows(): -28 with time_and_log("test_big_file_as_dataframe_rows"): -29 pooler = CSVPooler("5mSalesRecords.csv", __process_dataframe_row, callback_with=CallbackWith.DATAFRAME_ROW) -30 pooler.process() -31 -32def test_no_pooler(): -33 with time_and_log("test_no_pooler"): -34 __process_csv_filename("5mSalesRecords.csv") -35 -36 -37if __name__ == "__main__": -38 unittest.main() +25 for processed_batch in pooler.process(): +26 assert isinstance(processed_batch, pd.Series) +27 +28def test_big_file_as_dataframe_rows(): +29 with time_and_log("test_big_file_as_dataframe_rows"): +30 pooler = CSVPooler("5mSalesRecords.csv", __process_dataframe_row, callback_with=CallbackWith.DATAFRAME_ROW) +31 for processed_batch in pooler.process(): +32 assert isinstance(processed_batch, pd.Series) +33 +34def test_no_pooler(): +35 with time_and_log("test_no_pooler"): +36 __process_csv_filename("5mSalesRecords.csv") +37 +38 +39if __name__ == '__main__': +40 test_big_file_as_csv() +41 test_big_file_as_dataframe() +42 test_big_file_as_dataframe_rows() +43 # test_migrator_idempotency()
18def test_big_file_as_csv(): -19 with time_and_log("test_big_file_as_csv"): -20 pooler = CSVPooler("5mSalesRecords.csv", __process_csv_filename) -21 pooler.process() +@@ -140,7 +146,8 @@17def test_big_file_as_csv(): +18 with time_and_log("test_big_file_as_csv"): +19 pooler = CSVPooler("5mSalesRecords.csv", __process_csv_filename) +20 for processed_batch in pooler.process(): +21 assert isinstance(processed_batch, pd.Series)
@@ -158,10 +165,11 @@23def test_big_file_as_dataframe(): 24 with time_and_log("test_big_file_as_dataframe"): 25 pooler = CSVPooler("5mSalesRecords.csv", __process_as_dataframe, callback_with=CallbackWith.DATAFRAME) -26 pooler.process() +26 for processed_batch in pooler.process(): +27 assert isinstance(processed_batch, pd.Series)
28def test_big_file_as_dataframe_rows(): -29 with time_and_log("test_big_file_as_dataframe_rows"): -30 pooler = CSVPooler("5mSalesRecords.csv", __process_dataframe_row, callback_with=CallbackWith.DATAFRAME_ROW) -31 pooler.process() +@@ -179,9 +187,9 @@29def test_big_file_as_dataframe_rows(): +30 with time_and_log("test_big_file_as_dataframe_rows"): +31 pooler = CSVPooler("5mSalesRecords.csv", __process_dataframe_row, callback_with=CallbackWith.DATAFRAME_ROW) +32 for processed_batch in pooler.process(): +33 assert isinstance(processed_batch, pd.Series)
33def test_no_pooler(): -34 with time_and_log("test_no_pooler"): -35 __process_csv_filename("5mSalesRecords.csv") +diff --git a/docs/search.js b/docs/search.js index 5491344..62ab7ab 100644 --- a/docs/search.js +++ b/docs/search.js @@ -1,6 +1,6 @@ window.pdocSearch = (function(){ /** elasticlunr - http://weixsong.github.io * Copyright (C) 2017 Oliver Nightingale * Copyright (C) 2017 Wei Song * MIT Licensed */!function(){function e(e){if(null===e||"object"!=typeof e)return e;var t=e.constructor();for(var n in e)e.hasOwnProperty(n)&&(t[n]=e[n]);return t}var t=function(e){var n=new t.Index;return n.pipeline.add(t.trimmer,t.stopWordFilter,t.stemmer),e&&e.call(n,n),n};t.version="0.9.5",lunr=t,t.utils={},t.utils.warn=function(e){return function(t){e.console&&console.warn&&console.warn(t)}}(this),t.utils.toString=function(e){return void 0===e||null===e?"":e.toString()},t.EventEmitter=function(){this.events={}},t.EventEmitter.prototype.addListener=function(){var e=Array.prototype.slice.call(arguments),t=e.pop(),n=e;if("function"!=typeof t)throw new TypeError("last argument must be a function");n.forEach(function(e){this.hasHandler(e)||(this.events[e]=[]),this.events[e].push(t)},this)},t.EventEmitter.prototype.removeListener=function(e,t){if(this.hasHandler(e)){var n=this.events[e].indexOf(t);-1!==n&&(this.events[e].splice(n,1),0==this.events[e].length&&delete this.events[e])}},t.EventEmitter.prototype.emit=function(e){if(this.hasHandler(e)){var t=Array.prototype.slice.call(arguments,1);this.events[e].forEach(function(e){e.apply(void 0,t)},this)}},t.EventEmitter.prototype.hasHandler=function(e){return e in this.events},t.tokenizer=function(e){if(!arguments.length||null===e||void 0===e)return[];if(Array.isArray(e)){var n=e.filter(function(e){return null===e||void 0===e?!1:!0});n=n.map(function(e){return t.utils.toString(e).toLowerCase()});var i=[];return n.forEach(function(e){var n=e.split(t.tokenizer.seperator);i=i.concat(n)},this),i}return e.toString().trim().toLowerCase().split(t.tokenizer.seperator)},t.tokenizer.defaultSeperator=/[\s\-]+/,t.tokenizer.seperator=t.tokenizer.defaultSeperator,t.tokenizer.setSeperator=function(e){null!==e&&void 0!==e&&"object"==typeof e&&(t.tokenizer.seperator=e)},t.tokenizer.resetSeperator=function(){t.tokenizer.seperator=t.tokenizer.defaultSeperator},t.tokenizer.getSeperator=function(){return t.tokenizer.seperator},t.Pipeline=function(){this._queue=[]},t.Pipeline.registeredFunctions={},t.Pipeline.registerFunction=function(e,n){n in t.Pipeline.registeredFunctions&&t.utils.warn("Overwriting existing registered function: "+n),e.label=n,t.Pipeline.registeredFunctions[n]=e},t.Pipeline.getRegisteredFunction=function(e){return e in t.Pipeline.registeredFunctions!=!0?null:t.Pipeline.registeredFunctions[e]},t.Pipeline.warnIfFunctionNotRegistered=function(e){var n=e.label&&e.label in this.registeredFunctions;n||t.utils.warn("Function is not registered with pipeline. This may cause problems when serialising the index.\n",e)},t.Pipeline.load=function(e){var n=new t.Pipeline;return e.forEach(function(e){var i=t.Pipeline.getRegisteredFunction(e);if(!i)throw new Error("Cannot load un-registered function: "+e);n.add(i)}),n},t.Pipeline.prototype.add=function(){var e=Array.prototype.slice.call(arguments);e.forEach(function(e){t.Pipeline.warnIfFunctionNotRegistered(e),this._queue.push(e)},this)},t.Pipeline.prototype.after=function(e,n){t.Pipeline.warnIfFunctionNotRegistered(n);var i=this._queue.indexOf(e);if(-1===i)throw new Error("Cannot find existingFn");this._queue.splice(i+1,0,n)},t.Pipeline.prototype.before=function(e,n){t.Pipeline.warnIfFunctionNotRegistered(n);var i=this._queue.indexOf(e);if(-1===i)throw new Error("Cannot find existingFn");this._queue.splice(i,0,n)},t.Pipeline.prototype.remove=function(e){var t=this._queue.indexOf(e);-1!==t&&this._queue.splice(t,1)},t.Pipeline.prototype.run=function(e){for(var t=[],n=e.length,i=this._queue.length,o=0;n>o;o++){for(var r=e[o],s=0;i>s&&(r=this._queue[s](r,o,e),void 0!==r&&null!==r);s++);void 0!==r&&null!==r&&t.push(r)}return t},t.Pipeline.prototype.reset=function(){this._queue=[]},t.Pipeline.prototype.get=function(){return this._queue},t.Pipeline.prototype.toJSON=function(){return this._queue.map(function(e){return t.Pipeline.warnIfFunctionNotRegistered(e),e.label})},t.Index=function(){this._fields=[],this._ref="id",this.pipeline=new t.Pipeline,this.documentStore=new t.DocumentStore,this.index={},this.eventEmitter=new t.EventEmitter,this._idfCache={},this.on("add","remove","update",function(){this._idfCache={}}.bind(this))},t.Index.prototype.on=function(){var e=Array.prototype.slice.call(arguments);return this.eventEmitter.addListener.apply(this.eventEmitter,e)},t.Index.prototype.off=function(e,t){return this.eventEmitter.removeListener(e,t)},t.Index.load=function(e){e.version!==t.version&&t.utils.warn("version mismatch: current "+t.version+" importing "+e.version);var n=new this;n._fields=e.fields,n._ref=e.ref,n.documentStore=t.DocumentStore.load(e.documentStore),n.pipeline=t.Pipeline.load(e.pipeline),n.index={};for(var i in e.index)n.index[i]=t.InvertedIndex.load(e.index[i]);return n},t.Index.prototype.addField=function(e){return this._fields.push(e),this.index[e]=new t.InvertedIndex,this},t.Index.prototype.setRef=function(e){return this._ref=e,this},t.Index.prototype.saveDocument=function(e){return this.documentStore=new t.DocumentStore(e),this},t.Index.prototype.addDoc=function(e,n){if(e){var n=void 0===n?!0:n,i=e[this._ref];this.documentStore.addDoc(i,e),this._fields.forEach(function(n){var o=this.pipeline.run(t.tokenizer(e[n]));this.documentStore.addFieldLength(i,n,o.length);var r={};o.forEach(function(e){e in r?r[e]+=1:r[e]=1},this);for(var s in r){var u=r[s];u=Math.sqrt(u),this.index[n].addToken(s,{ref:i,tf:u})}},this),n&&this.eventEmitter.emit("add",e,this)}},t.Index.prototype.removeDocByRef=function(e){if(e&&this.documentStore.isDocStored()!==!1&&this.documentStore.hasDoc(e)){var t=this.documentStore.getDoc(e);this.removeDoc(t,!1)}},t.Index.prototype.removeDoc=function(e,n){if(e){var n=void 0===n?!0:n,i=e[this._ref];this.documentStore.hasDoc(i)&&(this.documentStore.removeDoc(i),this._fields.forEach(function(n){var o=this.pipeline.run(t.tokenizer(e[n]));o.forEach(function(e){this.index[n].removeToken(e,i)},this)},this),n&&this.eventEmitter.emit("remove",e,this))}},t.Index.prototype.updateDoc=function(e,t){var t=void 0===t?!0:t;this.removeDocByRef(e[this._ref],!1),this.addDoc(e,!1),t&&this.eventEmitter.emit("update",e,this)},t.Index.prototype.idf=function(e,t){var n="@"+t+"/"+e;if(Object.prototype.hasOwnProperty.call(this._idfCache,n))return this._idfCache[n];var i=this.index[t].getDocFreq(e),o=1+Math.log(this.documentStore.length/(i+1));return this._idfCache[n]=o,o},t.Index.prototype.getFields=function(){return this._fields.slice()},t.Index.prototype.search=function(e,n){if(!e)return[];e="string"==typeof e?{any:e}:JSON.parse(JSON.stringify(e));var i=null;null!=n&&(i=JSON.stringify(n));for(var o=new t.Configuration(i,this.getFields()).get(),r={},s=Object.keys(e),u=0;u35def test_no_pooler(): +36 with time_and_log("test_no_pooler"): +37 __process_csv_filename("5mSalesRecords.csv")0&&t.push(e);for(var i in n)"docs"!==i&&"df"!==i&&this.expandToken(e+i,t,n[i]);return t},t.InvertedIndex.prototype.toJSON=function(){return{root:this.root}},t.Configuration=function(e,n){var e=e||"";if(void 0==n||null==n)throw new Error("fields should not be null");this.config={};var i;try{i=JSON.parse(e),this.buildUserConfig(i,n)}catch(o){t.utils.warn("user configuration parse failed, will use default configuration"),this.buildDefaultConfig(n)}},t.Configuration.prototype.buildDefaultConfig=function(e){this.reset(),e.forEach(function(e){this.config[e]={boost:1,bool:"OR",expand:!1}},this)},t.Configuration.prototype.buildUserConfig=function(e,n){var i="OR",o=!1;if(this.reset(),"bool"in e&&(i=e.bool||i),"expand"in e&&(o=e.expand||o),"fields"in e)for(var r in e.fields)if(n.indexOf(r)>-1){var s=e.fields[r],u=o;void 0!=s.expand&&(u=s.expand),this.config[r]={boost:s.boost||0===s.boost?s.boost:1,bool:s.bool||i,expand:u}}else t.utils.warn("field name in user configuration not found in index instance fields");else this.addAllFields2UserConfig(i,o,n)},t.Configuration.prototype.addAllFields2UserConfig=function(e,t,n){n.forEach(function(n){this.config[n]={boost:1,bool:e,expand:t}},this)},t.Configuration.prototype.get=function(){return this.config},t.Configuration.prototype.reset=function(){this.config={}},lunr.SortedSet=function(){this.length=0,this.elements=[]},lunr.SortedSet.load=function(e){var t=new this;return t.elements=e,t.length=e.length,t},lunr.SortedSet.prototype.add=function(){var e,t;for(e=0;e 1;){if(r===e)return o;e>r&&(t=o),r>e&&(n=o),i=n-t,o=t+Math.floor(i/2),r=this.elements[o]}return r===e?o:-1},lunr.SortedSet.prototype.locationFor=function(e){for(var t=0,n=this.elements.length,i=n-t,o=t+Math.floor(i/2),r=this.elements[o];i>1;)e>r&&(t=o),r>e&&(n=o),i=n-t,o=t+Math.floor(i/2),r=this.elements[o];return r>e?o:e>r?o+1:void 0},lunr.SortedSet.prototype.intersect=function(e){for(var t=new lunr.SortedSet,n=0,i=0,o=this.length,r=e.length,s=this.elements,u=e.elements;;){if(n>o-1||i>r-1)break;s[n]!==u[i]?s[n]u[i]&&i++:(t.add(s[n]),n++,i++)}return t},lunr.SortedSet.prototype.clone=function(){var e=new lunr.SortedSet;return e.elements=this.toArray(),e.length=e.elements.length,e},lunr.SortedSet.prototype.union=function(e){var t,n,i;this.length>=e.length?(t=this,n=e):(t=e,n=this),i=t.clone();for(var o=0,r=n.toArray();o csv-batcher\n\n A python-based, multiprocess CSV batcher suitable for\nuse with dataframes or other tools that deal with large CSV files (or those that require timely processing).
\n\nInstallation
\n\npip install csv-batcher
\n\nGitHub
\n\nDocumentation
\n\nhttps://tangledpath.github.io/csv-batcher/csv_batcher.html
\n\nUsage
\n\nArguments sent to callback function can be controlled by\ncreating pooler with
\n\ncallback_with
and the CallbackWith enum\nvalues:As dataframe row
\n\n\n\n\n\nfrom csv_batcher.csv_pooler import CSVPooler, CallbackWith\n\n # Callback function passed to pooler; accepts a dataframe row\n # as a pandas Series (via apply)\n def process_dataframe_row(row):\n return row.iloc[0]\n\n pooler = CSVPooler(\n "5mSalesRecords.csv",\n process_dataframe_row,\n callback_with=CallbackWith.DATAFRAME_ROW,\n pool_size=16\n )\n pooler.process()\n\n### As dataframe\n```python\n from csv_batcher.csv_pooler import CSVPooler, CallbackWith\n\n # Used in DataFrame.apply:\n def process_dataframe_row(row):\n return row.iloc[0]\n\n # Callback function passed to pooler; accepts a dataframe:\n def process_dataframe(df):\n foo = df.apply(process_dataframe_row, axis=1)\n # Or do something more complicated....\n return len(df)\n\n pooler = CSVPooler(\n "5mSalesRecords.csv",\n process_dataframe,\n callback_with=CallbackWith.DATAFRAME,\n pool_size=16\n )\n pooler.process()\n\n### As CSV filename\n```python\n from csv_batcher.csv_pooler import CSVPooler, CallbackWith\n\n def process_csv_filename(csv_chunk_filename):\n # print("processing ", csv_chunk_filename)\n df = pd.read_csv(csv_chunk_filename, skipinitialspace=True, index_col=None)\n foo = df.apply(process_dataframe_row, axis=1)\n return len(df)\n\n def process_as_dataframe(df):\n foo = df.apply(process_dataframe_row, axis=1)\n return len(df)\n\n def process_dataframe_row(row):\n return row.iloc[0]\n\n pooler = CSVPooler(\n "5mSalesRecords.csv",\n process_dataframe,\n callback_with=CallbackWith.CSV_FILENAME\n chunk_lines=10000,\n pool_size=16\n )\n pooler.process()\n
Development
\n\nLinting
\n\n\n\n\n\nruff check . # Find linting errors\n ruff check . --fix # Auto-fix linting errors (where possible)\n
Documentation
\n\n\n\n# Shows in browser\n poetry run pdoc csv_batcher\n # Generates to ./docs\n poetry run pdoc csv_batcher -o ./docs\n
Testing
\n\n\n\n\n\nclear; pytest\n
Publishing
\n\n\n"}, "csv_batcher.csv_pooler": {"fullname": "csv_batcher.csv_pooler", "modulename": "csv_batcher.csv_pooler", "kind": "module", "doc": "\n"}, "csv_batcher.csv_pooler.CallbackWith": {"fullname": "csv_batcher.csv_pooler.CallbackWith", "modulename": "csv_batcher.csv_pooler", "qualname": "CallbackWith", "kind": "class", "doc": "
poetry publish --build -u __token__ -p $PYPI_TOKEN
CallbackWith Enum, used to control what is passed to callback function
\n", "bases": "enum.StrEnum"}, "csv_batcher.csv_pooler.CallbackWith.CSV_FILENAME": {"fullname": "csv_batcher.csv_pooler.CallbackWith.CSV_FILENAME", "modulename": "csv_batcher.csv_pooler", "qualname": "CallbackWith.CSV_FILENAME", "kind": "variable", "doc": "\n", "default_value": "<CallbackWith.CSV_FILENAME: 'csv_filename'>"}, "csv_batcher.csv_pooler.CallbackWith.DATAFRAME": {"fullname": "csv_batcher.csv_pooler.CallbackWith.DATAFRAME", "modulename": "csv_batcher.csv_pooler", "qualname": "CallbackWith.DATAFRAME", "kind": "variable", "doc": "\n", "default_value": "<CallbackWith.DATAFRAME: 'dataframe'>"}, "csv_batcher.csv_pooler.CallbackWith.DATAFRAME_ROW": {"fullname": "csv_batcher.csv_pooler.CallbackWith.DATAFRAME_ROW", "modulename": "csv_batcher.csv_pooler", "qualname": "CallbackWith.DATAFRAME_ROW", "kind": "variable", "doc": "\n", "default_value": "<CallbackWith.DATAFRAME_ROW: 'dataframe_row'>"}, "csv_batcher.csv_pooler.CSVPooler": {"fullname": "csv_batcher.csv_pooler.CSVPooler", "modulename": "csv_batcher.csv_pooler", "qualname": "CSVPooler", "kind": "class", "doc": "\n"}, "csv_batcher.csv_pooler.CSVPooler.__init__": {"fullname": "csv_batcher.csv_pooler.CSVPooler.__init__", "modulename": "csv_batcher.csv_pooler", "qualname": "CSVPooler.__init__", "kind": "function", "doc": "Construct
\n\nPooler
with givencsv_filename
,process_fn
,as_dataframe
, `pool_size', 'chunk_size'Args:\n csv_filename (str): Name of CSV file\n process_fn (Callable): A function that accepts a single argument\n By default, this is the path to a chunked CSV file\n If
\n", "signature": "(\tcsv_filename: str,\tprocess_fn: Callable,\tcallback_with: csv_batcher.csv_pooler.CallbackWith = <CallbackWith.CSV_FILENAME: 'csv_filename'>,\tpool_size: int = 5,\tchunk_lines: int = 10000)"}, "csv_batcher.csv_pooler.CSVPooler.csv_filename": {"fullname": "csv_batcher.csv_pooler.CSVPooler.csv_filename", "modulename": "csv_batcher.csv_pooler", "qualname": "CSVPooler.csv_filename", "kind": "variable", "doc": "\n"}, "csv_batcher.csv_pooler.CSVPooler.process_fn": {"fullname": "csv_batcher.csv_pooler.CSVPooler.process_fn", "modulename": "csv_batcher.csv_pooler", "qualname": "CSVPooler.process_fn", "kind": "variable", "doc": "\n"}, "csv_batcher.csv_pooler.CSVPooler.callback_with": {"fullname": "csv_batcher.csv_pooler.CSVPooler.callback_with", "modulename": "csv_batcher.csv_pooler", "qualname": "CSVPooler.callback_with", "kind": "variable", "doc": "\n"}, "csv_batcher.csv_pooler.CSVPooler.pool_size": {"fullname": "csv_batcher.csv_pooler.CSVPooler.pool_size", "modulename": "csv_batcher.csv_pooler", "qualname": "CSVPooler.pool_size", "kind": "variable", "doc": "\n"}, "csv_batcher.csv_pooler.CSVPooler.chunk_lines": {"fullname": "csv_batcher.csv_pooler.CSVPooler.chunk_lines", "modulename": "csv_batcher.csv_pooler", "qualname": "CSVPooler.chunk_lines", "kind": "variable", "doc": "\n"}, "csv_batcher.csv_pooler.CSVPooler.process": {"fullname": "csv_batcher.csv_pooler.CSVPooler.process", "modulename": "csv_batcher.csv_pooler", "qualname": "CSVPooler.process", "kind": "function", "doc": "as_dataframe
is True, then the argument sent is a dataframe of the chunked CSV\n callback_with (CallbackWith): Controls what is sent to callback function.\n @see CallbackWith enumeration for details\n Defaults to CallbackWith.CSV_FILENAME.\n as_dataframe_rows (bool): When True, a dataframe is created as with as_dataframe.\n that is sent toprocess_fn
instead. Defaults to False.\n pool_size (int, optional): Number of workers to uses. Defaults to 8.\n chunk_lines (int, optional): Target row count for each chunked CSV. Last chunk may\n have fewer rows. Defaults to 10000.Processes
\n", "signature": "(self):", "funcdef": "def"}, "csv_batcher.csv_splitter": {"fullname": "csv_batcher.csv_splitter", "modulename": "csv_batcher.csv_splitter", "kind": "module", "doc": "\n"}, "csv_batcher.csv_splitter.CSVSplitter": {"fullname": "csv_batcher.csv_splitter.CSVSplitter", "modulename": "csv_batcher.csv_splitter", "qualname": "CSVSplitter", "kind": "class", "doc": "self.csv_filename
by usingCSVSplitter
to split it\n into multiple temporary files defined byself.chunk_lines
.\nUsemultiprocessing.Pool
to use multiple process workers to process\nthe group of CSVs.Splits a CSV file into multiple files
\n"}, "csv_batcher.csv_splitter.CSVSplitter.__init__": {"fullname": "csv_batcher.csv_splitter.CSVSplitter.__init__", "modulename": "csv_batcher.csv_splitter", "qualname": "CSVSplitter.__init__", "kind": "function", "doc": "Construct CSVSplitter
\n\nArgs:\n csv_filename (str): path to CSV file\n chunk_line_cnt (int, optional): Target lines for each chunk. Last chunk might\n be smaller than this. Defaults to 10000.
\n", "signature": "(csv_filename: str, chunk_line_cnt: int = 10000)"}, "csv_batcher.csv_splitter.CSVSplitter.csv_filename": {"fullname": "csv_batcher.csv_splitter.CSVSplitter.csv_filename", "modulename": "csv_batcher.csv_splitter", "qualname": "CSVSplitter.csv_filename", "kind": "variable", "doc": "\n"}, "csv_batcher.csv_splitter.CSVSplitter.chunk_line_cnt": {"fullname": "csv_batcher.csv_splitter.CSVSplitter.chunk_line_cnt", "modulename": "csv_batcher.csv_splitter", "qualname": "CSVSplitter.chunk_line_cnt", "kind": "variable", "doc": "\n"}, "csv_batcher.csv_splitter.CSVSplitter.chunk_dir": {"fullname": "csv_batcher.csv_splitter.CSVSplitter.chunk_dir", "modulename": "csv_batcher.csv_splitter", "qualname": "CSVSplitter.chunk_dir", "kind": "variable", "doc": "\n"}, "csv_batcher.csv_splitter.CSVSplitter.chunk_files": {"fullname": "csv_batcher.csv_splitter.CSVSplitter.chunk_files", "modulename": "csv_batcher.csv_splitter", "qualname": "CSVSplitter.chunk_files", "kind": "variable", "doc": "\n"}, "csv_batcher.csv_splitter.CSVSplitter.csv_files": {"fullname": "csv_batcher.csv_splitter.CSVSplitter.csv_files", "modulename": "csv_batcher.csv_splitter", "qualname": "CSVSplitter.csv_files", "kind": "function", "doc": "Returns
\n", "signature": "(self):", "funcdef": "def"}, "csv_batcher.csv_splitter.CSVSplitter.cleanup": {"fullname": "csv_batcher.csv_splitter.CSVSplitter.cleanup", "modulename": "csv_batcher.csv_splitter", "qualname": "CSVSplitter.cleanup", "kind": "function", "doc": "self.chunk_files
Remove temporary directory for chunk files; this must be called\nand should be called in a
\n", "signature": "(self):", "funcdef": "def"}, "csv_batcher.test_csv_pooler": {"fullname": "csv_batcher.test_csv_pooler", "modulename": "csv_batcher.test_csv_pooler", "kind": "module", "doc": "\n"}, "csv_batcher.test_csv_pooler.test_big_file_as_csv": {"fullname": "csv_batcher.test_csv_pooler.test_big_file_as_csv", "modulename": "csv_batcher.test_csv_pooler", "qualname": "test_big_file_as_csv", "kind": "function", "doc": "\n", "signature": "():", "funcdef": "def"}, "csv_batcher.test_csv_pooler.test_big_file_as_dataframe": {"fullname": "csv_batcher.test_csv_pooler.test_big_file_as_dataframe", "modulename": "csv_batcher.test_csv_pooler", "qualname": "test_big_file_as_dataframe", "kind": "function", "doc": "\n", "signature": "():", "funcdef": "def"}, "csv_batcher.test_csv_pooler.test_big_file_as_dataframe_rows": {"fullname": "csv_batcher.test_csv_pooler.test_big_file_as_dataframe_rows", "modulename": "csv_batcher.test_csv_pooler", "qualname": "test_big_file_as_dataframe_rows", "kind": "function", "doc": "\n", "signature": "():", "funcdef": "def"}, "csv_batcher.test_csv_pooler.test_no_pooler": {"fullname": "csv_batcher.test_csv_pooler.test_no_pooler", "modulename": "csv_batcher.test_csv_pooler", "qualname": "test_no_pooler", "kind": "function", "doc": "\n", "signature": "():", "funcdef": "def"}}, "docInfo": {"csv_batcher": {"qualname": 0, "fullname": 2, "annotation": 0, "default_value": 0, "signature": 0, "bases": 0, "doc": 951}, "csv_batcher.csv_pooler": {"qualname": 0, "fullname": 4, "annotation": 0, "default_value": 0, "signature": 0, "bases": 0, "doc": 3}, "csv_batcher.csv_pooler.CallbackWith": {"qualname": 1, "fullname": 5, "annotation": 0, "default_value": 0, "signature": 0, "bases": 2, "doc": 13}, "csv_batcher.csv_pooler.CallbackWith.CSV_FILENAME": {"qualname": 3, "fullname": 7, "annotation": 0, "default_value": 11, "signature": 0, "bases": 0, "doc": 3}, "csv_batcher.csv_pooler.CallbackWith.DATAFRAME": {"qualname": 2, "fullname": 6, "annotation": 0, "default_value": 9, "signature": 0, "bases": 0, "doc": 3}, "csv_batcher.csv_pooler.CallbackWith.DATAFRAME_ROW": {"qualname": 3, "fullname": 7, "annotation": 0, "default_value": 11, "signature": 0, "bases": 0, "doc": 3}, "csv_batcher.csv_pooler.CSVPooler": {"qualname": 1, "fullname": 5, "annotation": 0, "default_value": 0, "signature": 0, "bases": 0, "doc": 3}, "csv_batcher.csv_pooler.CSVPooler.__init__": {"qualname": 3, "fullname": 7, "annotation": 0, "default_value": 0, "signature": 121, "bases": 0, "doc": 159}, "csv_batcher.csv_pooler.CSVPooler.csv_filename": {"qualname": 3, "fullname": 7, "annotation": 0, "default_value": 0, "signature": 0, "bases": 0, "doc": 3}, "csv_batcher.csv_pooler.CSVPooler.process_fn": {"qualname": 3, "fullname": 7, "annotation": 0, "default_value": 0, "signature": 0, "bases": 0, "doc": 3}, "csv_batcher.csv_pooler.CSVPooler.callback_with": {"qualname": 3, "fullname": 7, "annotation": 0, "default_value": 0, "signature": 0, "bases": 0, "doc": 3}, "csv_batcher.csv_pooler.CSVPooler.pool_size": {"qualname": 3, "fullname": 7, "annotation": 0, "default_value": 0, "signature": 0, "bases": 0, "doc": 3}, "csv_batcher.csv_pooler.CSVPooler.chunk_lines": {"qualname": 3, "fullname": 7, "annotation": 0, "default_value": 0, "signature": 0, "bases": 0, "doc": 3}, "csv_batcher.csv_pooler.CSVPooler.process": {"qualname": 2, "fullname": 6, "annotation": 0, "default_value": 0, "signature": 11, "bases": 0, "doc": 44}, "csv_batcher.csv_splitter": {"qualname": 0, "fullname": 4, "annotation": 0, "default_value": 0, "signature": 0, "bases": 0, "doc": 3}, "csv_batcher.csv_splitter.CSVSplitter": {"qualname": 1, "fullname": 5, "annotation": 0, "default_value": 0, "signature": 0, "bases": 0, "doc": 9}, "csv_batcher.csv_splitter.CSVSplitter.__init__": {"qualname": 3, "fullname": 7, "annotation": 0, "default_value": 0, "signature": 34, "bases": 0, "doc": 37}, "csv_batcher.csv_splitter.CSVSplitter.csv_filename": {"qualname": 3, "fullname": 7, "annotation": 0, "default_value": 0, "signature": 0, "bases": 0, "doc": 3}, "csv_batcher.csv_splitter.CSVSplitter.chunk_line_cnt": {"qualname": 4, "fullname": 8, "annotation": 0, "default_value": 0, "signature": 0, "bases": 0, "doc": 3}, "csv_batcher.csv_splitter.CSVSplitter.chunk_dir": {"qualname": 3, "fullname": 7, "annotation": 0, "default_value": 0, "signature": 0, "bases": 0, "doc": 3}, "csv_batcher.csv_splitter.CSVSplitter.chunk_files": {"qualname": 3, "fullname": 7, "annotation": 0, "default_value": 0, "signature": 0, "bases": 0, "doc": 3}, "csv_batcher.csv_splitter.CSVSplitter.csv_files": {"qualname": 3, "fullname": 7, "annotation": 0, "default_value": 0, "signature": 11, "bases": 0, "doc": 8}, "csv_batcher.csv_splitter.CSVSplitter.cleanup": {"qualname": 2, "fullname": 6, "annotation": 0, "default_value": 0, "signature": 11, "bases": 0, "doc": 22}, "csv_batcher.test_csv_pooler": {"qualname": 0, "fullname": 5, "annotation": 0, "default_value": 0, "signature": 0, "bases": 0, "doc": 3}, "csv_batcher.test_csv_pooler.test_big_file_as_csv": {"qualname": 5, "fullname": 10, "annotation": 0, "default_value": 0, "signature": 7, "bases": 0, "doc": 3}, "csv_batcher.test_csv_pooler.test_big_file_as_dataframe": {"qualname": 5, "fullname": 10, "annotation": 0, "default_value": 0, "signature": 7, "bases": 0, "doc": 3}, "csv_batcher.test_csv_pooler.test_big_file_as_dataframe_rows": {"qualname": 6, "fullname": 11, "annotation": 0, "default_value": 0, "signature": 7, "bases": 0, "doc": 3}, "csv_batcher.test_csv_pooler.test_no_pooler": {"qualname": 3, "fullname": 8, "annotation": 0, "default_value": 0, "signature": 7, "bases": 0, "doc": 3}}, "length": 28, "save": true}, "index": {"qualname": {"root": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1}}, "df": 2, "c": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "b": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "c": {"docs": {}, "df": 0, "k": {"docs": {"csv_batcher.csv_pooler.CSVPooler.callback_with": {"tf": 1}}, "df": 1, "w": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "h": {"docs": {"csv_batcher.csv_pooler.CallbackWith": {"tf": 1}, "csv_batcher.csv_pooler.CallbackWith.CSV_FILENAME": {"tf": 1}, "csv_batcher.csv_pooler.CallbackWith.DATAFRAME": {"tf": 1}, "csv_batcher.csv_pooler.CallbackWith.DATAFRAME_ROW": {"tf": 1}}, "df": 4}}}}}}}}}}}, "s": {"docs": {}, "df": 0, "v": {"docs": {"csv_batcher.csv_pooler.CallbackWith.CSV_FILENAME": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.csv_filename": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.csv_filename": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.csv_files": {"tf": 1}, "csv_batcher.test_csv_pooler.test_big_file_as_csv": {"tf": 1}}, "df": 5, "p": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "r": {"docs": {"csv_batcher.csv_pooler.CSVPooler": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.csv_filename": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.process_fn": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.callback_with": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.pool_size": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.chunk_lines": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.process": {"tf": 1}}, "df": 8}}}}}}, "s": {"docs": {}, "df": 0, "p": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "r": {"docs": {"csv_batcher.csv_splitter.CSVSplitter": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.csv_filename": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.chunk_line_cnt": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.chunk_dir": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.chunk_files": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.csv_files": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.cleanup": {"tf": 1}}, "df": 8}}}}}}}}}}, "h": {"docs": {}, "df": 0, "u": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "k": {"docs": {"csv_batcher.csv_pooler.CSVPooler.chunk_lines": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.chunk_line_cnt": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.chunk_dir": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.chunk_files": {"tf": 1}}, "df": 4}}}}, "n": {"docs": {}, "df": 0, "t": {"docs": {"csv_batcher.csv_splitter.CSVSplitter.chunk_line_cnt": {"tf": 1}}, "df": 1}}, "l": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "u": {"docs": {}, "df": 0, "p": {"docs": {"csv_batcher.csv_splitter.CSVSplitter.cleanup": {"tf": 1}}, "df": 1}}}}}}}, "f": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "e": {"docs": {"csv_batcher.test_csv_pooler.test_big_file_as_csv": {"tf": 1}, "csv_batcher.test_csv_pooler.test_big_file_as_dataframe": {"tf": 1}, "csv_batcher.test_csv_pooler.test_big_file_as_dataframe_rows": {"tf": 1}}, "df": 3, "n": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "m": {"docs": {}, "df": 0, "e": {"docs": {"csv_batcher.csv_pooler.CallbackWith.CSV_FILENAME": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.csv_filename": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.csv_filename": {"tf": 1}}, "df": 3}}}}, "s": {"docs": {"csv_batcher.csv_splitter.CSVSplitter.chunk_files": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.csv_files": {"tf": 1}}, "df": 2}}}}, "n": {"docs": {"csv_batcher.csv_pooler.CSVPooler.process_fn": {"tf": 1}}, "df": 1}}, "d": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "f": {"docs": {}, "df": 0, "r": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "m": {"docs": {}, "df": 0, "e": {"docs": {"csv_batcher.csv_pooler.CallbackWith.DATAFRAME": {"tf": 1}, "csv_batcher.csv_pooler.CallbackWith.DATAFRAME_ROW": {"tf": 1}, "csv_batcher.test_csv_pooler.test_big_file_as_dataframe": {"tf": 1}, "csv_batcher.test_csv_pooler.test_big_file_as_dataframe_rows": {"tf": 1}}, "df": 4}}}}}}}}, "i": {"docs": {}, "df": 0, "r": {"docs": {"csv_batcher.csv_splitter.CSVSplitter.chunk_dir": {"tf": 1}}, "df": 1}}}, "r": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "w": {"docs": {"csv_batcher.csv_pooler.CallbackWith.DATAFRAME_ROW": {"tf": 1}}, "df": 1, "s": {"docs": {"csv_batcher.test_csv_pooler.test_big_file_as_dataframe_rows": {"tf": 1}}, "df": 1}}}}, "i": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "t": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1}}, "df": 2}}}}, "p": {"docs": {}, "df": 0, "r": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "c": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "s": {"docs": {}, "df": 0, "s": {"docs": {"csv_batcher.csv_pooler.CSVPooler.process_fn": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.process": {"tf": 1}}, "df": 2}}}}}}, "o": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "l": {"docs": {"csv_batcher.csv_pooler.CSVPooler.pool_size": {"tf": 1}}, "df": 1, "e": {"docs": {}, "df": 0, "r": {"docs": {"csv_batcher.test_csv_pooler.test_no_pooler": {"tf": 1}}, "df": 1}}}}}}, "w": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "h": {"docs": {"csv_batcher.csv_pooler.CSVPooler.callback_with": {"tf": 1}}, "df": 1}}}}, "s": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "z": {"docs": {}, "df": 0, "e": {"docs": {"csv_batcher.csv_pooler.CSVPooler.pool_size": {"tf": 1}}, "df": 1}}}}, "l": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "e": {"docs": {"csv_batcher.csv_splitter.CSVSplitter.chunk_line_cnt": {"tf": 1}}, "df": 1, "s": {"docs": {"csv_batcher.csv_pooler.CSVPooler.chunk_lines": {"tf": 1}}, "df": 1}}}}}, "t": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "s": {"docs": {}, "df": 0, "t": {"docs": {"csv_batcher.test_csv_pooler.test_big_file_as_csv": {"tf": 1}, "csv_batcher.test_csv_pooler.test_big_file_as_dataframe": {"tf": 1}, "csv_batcher.test_csv_pooler.test_big_file_as_dataframe_rows": {"tf": 1}, "csv_batcher.test_csv_pooler.test_no_pooler": {"tf": 1}}, "df": 4}}}}, "b": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "g": {"docs": {"csv_batcher.test_csv_pooler.test_big_file_as_csv": {"tf": 1}, "csv_batcher.test_csv_pooler.test_big_file_as_dataframe": {"tf": 1}, "csv_batcher.test_csv_pooler.test_big_file_as_dataframe_rows": {"tf": 1}}, "df": 3}}}, "a": {"docs": {}, "df": 0, "s": {"docs": {"csv_batcher.test_csv_pooler.test_big_file_as_csv": {"tf": 1}, "csv_batcher.test_csv_pooler.test_big_file_as_dataframe": {"tf": 1}, "csv_batcher.test_csv_pooler.test_big_file_as_dataframe_rows": {"tf": 1}}, "df": 3}}, "n": {"docs": {}, "df": 0, "o": {"docs": {"csv_batcher.test_csv_pooler.test_no_pooler": {"tf": 1}}, "df": 1}}}}, "fullname": {"root": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1}}, "df": 2, "c": {"docs": {}, "df": 0, "s": {"docs": {}, "df": 0, "v": {"docs": {"csv_batcher": {"tf": 1}, "csv_batcher.csv_pooler": {"tf": 1.4142135623730951}, "csv_batcher.csv_pooler.CallbackWith": {"tf": 1.4142135623730951}, "csv_batcher.csv_pooler.CallbackWith.CSV_FILENAME": {"tf": 1.7320508075688772}, "csv_batcher.csv_pooler.CallbackWith.DATAFRAME": {"tf": 1.4142135623730951}, "csv_batcher.csv_pooler.CallbackWith.DATAFRAME_ROW": {"tf": 1.4142135623730951}, "csv_batcher.csv_pooler.CSVPooler": {"tf": 1.4142135623730951}, "csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1.4142135623730951}, "csv_batcher.csv_pooler.CSVPooler.csv_filename": {"tf": 1.7320508075688772}, "csv_batcher.csv_pooler.CSVPooler.process_fn": {"tf": 1.4142135623730951}, "csv_batcher.csv_pooler.CSVPooler.callback_with": {"tf": 1.4142135623730951}, "csv_batcher.csv_pooler.CSVPooler.pool_size": {"tf": 1.4142135623730951}, "csv_batcher.csv_pooler.CSVPooler.chunk_lines": {"tf": 1.4142135623730951}, "csv_batcher.csv_pooler.CSVPooler.process": {"tf": 1.4142135623730951}, "csv_batcher.csv_splitter": {"tf": 1.4142135623730951}, "csv_batcher.csv_splitter.CSVSplitter": {"tf": 1.4142135623730951}, "csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1.4142135623730951}, "csv_batcher.csv_splitter.CSVSplitter.csv_filename": {"tf": 1.7320508075688772}, "csv_batcher.csv_splitter.CSVSplitter.chunk_line_cnt": {"tf": 1.4142135623730951}, "csv_batcher.csv_splitter.CSVSplitter.chunk_dir": {"tf": 1.4142135623730951}, "csv_batcher.csv_splitter.CSVSplitter.chunk_files": {"tf": 1.4142135623730951}, "csv_batcher.csv_splitter.CSVSplitter.csv_files": {"tf": 1.7320508075688772}, "csv_batcher.csv_splitter.CSVSplitter.cleanup": {"tf": 1.4142135623730951}, "csv_batcher.test_csv_pooler": {"tf": 1.4142135623730951}, "csv_batcher.test_csv_pooler.test_big_file_as_csv": {"tf": 1.7320508075688772}, "csv_batcher.test_csv_pooler.test_big_file_as_dataframe": {"tf": 1.4142135623730951}, "csv_batcher.test_csv_pooler.test_big_file_as_dataframe_rows": {"tf": 1.4142135623730951}, "csv_batcher.test_csv_pooler.test_no_pooler": {"tf": 1.4142135623730951}}, "df": 28, "p": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "r": {"docs": {"csv_batcher.csv_pooler.CSVPooler": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.csv_filename": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.process_fn": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.callback_with": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.pool_size": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.chunk_lines": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.process": {"tf": 1}}, "df": 8}}}}}}, "s": {"docs": {}, "df": 0, "p": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "r": {"docs": {"csv_batcher.csv_splitter.CSVSplitter": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.csv_filename": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.chunk_line_cnt": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.chunk_dir": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.chunk_files": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.csv_files": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.cleanup": {"tf": 1}}, "df": 8}}}}}}}}}}, "a": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "b": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "c": {"docs": {}, "df": 0, "k": {"docs": {"csv_batcher.csv_pooler.CSVPooler.callback_with": {"tf": 1}}, "df": 1, "w": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "h": {"docs": {"csv_batcher.csv_pooler.CallbackWith": {"tf": 1}, "csv_batcher.csv_pooler.CallbackWith.CSV_FILENAME": {"tf": 1}, "csv_batcher.csv_pooler.CallbackWith.DATAFRAME": {"tf": 1}, "csv_batcher.csv_pooler.CallbackWith.DATAFRAME_ROW": {"tf": 1}}, "df": 4}}}}}}}}}}}, "h": {"docs": {}, "df": 0, "u": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "k": {"docs": {"csv_batcher.csv_pooler.CSVPooler.chunk_lines": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.chunk_line_cnt": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.chunk_dir": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.chunk_files": {"tf": 1}}, "df": 4}}}}, "n": {"docs": {}, "df": 0, "t": {"docs": {"csv_batcher.csv_splitter.CSVSplitter.chunk_line_cnt": {"tf": 1}}, "df": 1}}, "l": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "u": {"docs": {}, "df": 0, "p": {"docs": {"csv_batcher.csv_splitter.CSVSplitter.cleanup": {"tf": 1}}, "df": 1}}}}}}}, "b": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "c": {"docs": {}, "df": 0, "h": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "r": {"docs": {"csv_batcher": {"tf": 1}, "csv_batcher.csv_pooler": {"tf": 1}, "csv_batcher.csv_pooler.CallbackWith": {"tf": 1}, "csv_batcher.csv_pooler.CallbackWith.CSV_FILENAME": {"tf": 1}, "csv_batcher.csv_pooler.CallbackWith.DATAFRAME": {"tf": 1}, "csv_batcher.csv_pooler.CallbackWith.DATAFRAME_ROW": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.csv_filename": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.process_fn": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.callback_with": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.pool_size": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.chunk_lines": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.process": {"tf": 1}, "csv_batcher.csv_splitter": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.csv_filename": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.chunk_line_cnt": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.chunk_dir": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.chunk_files": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.csv_files": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.cleanup": {"tf": 1}, "csv_batcher.test_csv_pooler": {"tf": 1}, "csv_batcher.test_csv_pooler.test_big_file_as_csv": {"tf": 1}, "csv_batcher.test_csv_pooler.test_big_file_as_dataframe": {"tf": 1}, "csv_batcher.test_csv_pooler.test_big_file_as_dataframe_rows": {"tf": 1}, "csv_batcher.test_csv_pooler.test_no_pooler": {"tf": 1}}, "df": 28}}}}}}, "i": {"docs": {}, "df": 0, "g": {"docs": {"csv_batcher.test_csv_pooler.test_big_file_as_csv": {"tf": 1}, "csv_batcher.test_csv_pooler.test_big_file_as_dataframe": {"tf": 1}, "csv_batcher.test_csv_pooler.test_big_file_as_dataframe_rows": {"tf": 1}}, "df": 3}}}, "p": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "l": {"docs": {"csv_batcher.csv_pooler.CSVPooler.pool_size": {"tf": 1}}, "df": 1, "e": {"docs": {}, "df": 0, "r": {"docs": {"csv_batcher.csv_pooler": {"tf": 1}, "csv_batcher.csv_pooler.CallbackWith": {"tf": 1}, "csv_batcher.csv_pooler.CallbackWith.CSV_FILENAME": {"tf": 1}, "csv_batcher.csv_pooler.CallbackWith.DATAFRAME": {"tf": 1}, "csv_batcher.csv_pooler.CallbackWith.DATAFRAME_ROW": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.csv_filename": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.process_fn": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.callback_with": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.pool_size": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.chunk_lines": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.process": {"tf": 1}, "csv_batcher.test_csv_pooler": {"tf": 1}, "csv_batcher.test_csv_pooler.test_big_file_as_csv": {"tf": 1}, "csv_batcher.test_csv_pooler.test_big_file_as_dataframe": {"tf": 1}, "csv_batcher.test_csv_pooler.test_big_file_as_dataframe_rows": {"tf": 1}, "csv_batcher.test_csv_pooler.test_no_pooler": {"tf": 1.4142135623730951}}, "df": 18}}}}}, "r": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "c": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "s": {"docs": {}, "df": 0, "s": {"docs": {"csv_batcher.csv_pooler.CSVPooler.process_fn": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.process": {"tf": 1}}, "df": 2}}}}}}}, "f": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "e": {"docs": {"csv_batcher.test_csv_pooler.test_big_file_as_csv": {"tf": 1}, "csv_batcher.test_csv_pooler.test_big_file_as_dataframe": {"tf": 1}, "csv_batcher.test_csv_pooler.test_big_file_as_dataframe_rows": {"tf": 1}}, "df": 3, "n": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "m": {"docs": {}, "df": 0, "e": {"docs": {"csv_batcher.csv_pooler.CallbackWith.CSV_FILENAME": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.csv_filename": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.csv_filename": {"tf": 1}}, "df": 3}}}}, "s": {"docs": {"csv_batcher.csv_splitter.CSVSplitter.chunk_files": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.csv_files": {"tf": 1}}, "df": 2}}}}, "n": {"docs": {"csv_batcher.csv_pooler.CSVPooler.process_fn": {"tf": 1}}, "df": 1}}, "d": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "f": {"docs": {}, "df": 0, "r": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "m": {"docs": {}, "df": 0, "e": {"docs": {"csv_batcher.csv_pooler.CallbackWith.DATAFRAME": {"tf": 1}, "csv_batcher.csv_pooler.CallbackWith.DATAFRAME_ROW": {"tf": 1}, "csv_batcher.test_csv_pooler.test_big_file_as_dataframe": {"tf": 1}, "csv_batcher.test_csv_pooler.test_big_file_as_dataframe_rows": {"tf": 1}}, "df": 4}}}}}}}}, "i": {"docs": {}, "df": 0, "r": {"docs": {"csv_batcher.csv_splitter.CSVSplitter.chunk_dir": {"tf": 1}}, "df": 1}}}, "r": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "w": {"docs": {"csv_batcher.csv_pooler.CallbackWith.DATAFRAME_ROW": {"tf": 1}}, "df": 1, "s": {"docs": {"csv_batcher.test_csv_pooler.test_big_file_as_dataframe_rows": {"tf": 1}}, "df": 1}}}}, "i": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "t": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1}}, "df": 2}}}}, "w": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "h": {"docs": {"csv_batcher.csv_pooler.CSVPooler.callback_with": {"tf": 1}}, "df": 1}}}}, "s": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "z": {"docs": {}, "df": 0, "e": {"docs": {"csv_batcher.csv_pooler.CSVPooler.pool_size": {"tf": 1}}, "df": 1}}}, "p": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "r": {"docs": {"csv_batcher.csv_splitter": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.csv_filename": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.chunk_line_cnt": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.chunk_dir": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.chunk_files": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.csv_files": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.cleanup": {"tf": 1}}, "df": 9}}}}}}}}, "l": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "e": {"docs": {"csv_batcher.csv_splitter.CSVSplitter.chunk_line_cnt": {"tf": 1}}, "df": 1, "s": {"docs": {"csv_batcher.csv_pooler.CSVPooler.chunk_lines": {"tf": 1}}, "df": 1}}}}}, "t": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "s": {"docs": {}, "df": 0, "t": {"docs": {"csv_batcher.test_csv_pooler": {"tf": 1}, "csv_batcher.test_csv_pooler.test_big_file_as_csv": {"tf": 1.4142135623730951}, "csv_batcher.test_csv_pooler.test_big_file_as_dataframe": {"tf": 1.4142135623730951}, "csv_batcher.test_csv_pooler.test_big_file_as_dataframe_rows": {"tf": 1.4142135623730951}, "csv_batcher.test_csv_pooler.test_no_pooler": {"tf": 1.4142135623730951}}, "df": 5}}}}, "a": {"docs": {}, "df": 0, "s": {"docs": {"csv_batcher.test_csv_pooler.test_big_file_as_csv": {"tf": 1}, "csv_batcher.test_csv_pooler.test_big_file_as_dataframe": {"tf": 1}, "csv_batcher.test_csv_pooler.test_big_file_as_dataframe_rows": {"tf": 1}}, "df": 3}}, "n": {"docs": {}, "df": 0, "o": {"docs": {"csv_batcher.test_csv_pooler.test_no_pooler": {"tf": 1}}, "df": 1}}}}, "annotation": {"root": {"docs": {}, "df": 0}}, "default_value": {"root": {"docs": {"csv_batcher.csv_pooler.CallbackWith.CSV_FILENAME": {"tf": 1.4142135623730951}, "csv_batcher.csv_pooler.CallbackWith.DATAFRAME": {"tf": 1.4142135623730951}, "csv_batcher.csv_pooler.CallbackWith.DATAFRAME_ROW": {"tf": 1.4142135623730951}}, "df": 3, "l": {"docs": {}, "df": 0, "t": {"docs": {"csv_batcher.csv_pooler.CallbackWith.CSV_FILENAME": {"tf": 1}, "csv_batcher.csv_pooler.CallbackWith.DATAFRAME": {"tf": 1}, "csv_batcher.csv_pooler.CallbackWith.DATAFRAME_ROW": {"tf": 1}}, "df": 3}}, "c": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "b": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "c": {"docs": {}, "df": 0, "k": {"docs": {}, "df": 0, "w": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "h": {"docs": {"csv_batcher.csv_pooler.CallbackWith.CSV_FILENAME": {"tf": 1}, "csv_batcher.csv_pooler.CallbackWith.DATAFRAME": {"tf": 1}, "csv_batcher.csv_pooler.CallbackWith.DATAFRAME_ROW": {"tf": 1}}, "df": 3}}}}}}}}}}}, "s": {"docs": {}, "df": 0, "v": {"docs": {"csv_batcher.csv_pooler.CallbackWith.CSV_FILENAME": {"tf": 1.4142135623730951}}, "df": 1}}}, "f": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "m": {"docs": {}, "df": 0, "e": {"docs": {"csv_batcher.csv_pooler.CallbackWith.CSV_FILENAME": {"tf": 1.4142135623730951}}, "df": 1}}}}}}}}, "x": {"2": {"7": {"docs": {"csv_batcher.csv_pooler.CallbackWith.CSV_FILENAME": {"tf": 1.4142135623730951}, "csv_batcher.csv_pooler.CallbackWith.DATAFRAME": {"tf": 1.4142135623730951}, "csv_batcher.csv_pooler.CallbackWith.DATAFRAME_ROW": {"tf": 1.4142135623730951}}, "df": 3}, "docs": {}, "df": 0}, "docs": {}, "df": 0}, "g": {"docs": {}, "df": 0, "t": {"docs": {"csv_batcher.csv_pooler.CallbackWith.CSV_FILENAME": {"tf": 1}, "csv_batcher.csv_pooler.CallbackWith.DATAFRAME": {"tf": 1}, "csv_batcher.csv_pooler.CallbackWith.DATAFRAME_ROW": {"tf": 1}}, "df": 3}}, "d": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "f": {"docs": {}, "df": 0, "r": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "m": {"docs": {}, "df": 0, "e": {"docs": {"csv_batcher.csv_pooler.CallbackWith.DATAFRAME": {"tf": 1.4142135623730951}, "csv_batcher.csv_pooler.CallbackWith.DATAFRAME_ROW": {"tf": 1.4142135623730951}}, "df": 2}}}}}}}}}, "r": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "w": {"docs": {"csv_batcher.csv_pooler.CallbackWith.DATAFRAME_ROW": {"tf": 1.4142135623730951}}, "df": 1}}}}}, "signature": {"root": {"1": {"0": {"0": {"0": {"0": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1}}, "df": 2}, "docs": {}, "df": 0}, "docs": {}, "df": 0}, "docs": {}, "df": 0}, "docs": {}, "df": 0}, "3": {"9": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1.4142135623730951}}, "df": 1}, "docs": {}, "df": 0}, "5": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}}, "df": 1}, "docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 9.539392014169456}, "csv_batcher.csv_pooler.CSVPooler.process": {"tf": 3.1622776601683795}, "csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 5.0990195135927845}, "csv_batcher.csv_splitter.CSVSplitter.csv_files": {"tf": 3.1622776601683795}, "csv_batcher.csv_splitter.CSVSplitter.cleanup": {"tf": 3.1622776601683795}, "csv_batcher.test_csv_pooler.test_big_file_as_csv": {"tf": 2.6457513110645907}, "csv_batcher.test_csv_pooler.test_big_file_as_dataframe": {"tf": 2.6457513110645907}, "csv_batcher.test_csv_pooler.test_big_file_as_dataframe_rows": {"tf": 2.6457513110645907}, "csv_batcher.test_csv_pooler.test_no_pooler": {"tf": 2.6457513110645907}}, "df": 9, "c": {"docs": {}, "df": 0, "s": {"docs": {}, "df": 0, "v": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 2.23606797749979}, "csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1}}, "df": 2}}, "a": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "b": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "e": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}}, "df": 1}}}}, "b": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "c": {"docs": {}, "df": 0, "k": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}}, "df": 1, "w": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "h": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1.4142135623730951}}, "df": 1}}}}}}}}}}}, "h": {"docs": {}, "df": 0, "u": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "k": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1}}, "df": 2}}}}, "n": {"docs": {}, "df": 0, "t": {"docs": {"csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1}}, "df": 1}}}, "f": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "m": {"docs": {}, "df": 0, "e": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1.7320508075688772}, "csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1}}, "df": 2}}}}}}}, "n": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}}, "df": 1}}, "s": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "r": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1}}, "df": 2}}, "i": {"docs": {}, "df": 0, "z": {"docs": {}, "df": 0, "e": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}}, "df": 1}}}, "e": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "f": {"docs": {"csv_batcher.csv_pooler.CSVPooler.process": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.csv_files": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.cleanup": {"tf": 1}}, "df": 3}}}}, "p": {"docs": {}, "df": 0, "r": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "c": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "s": {"docs": {}, "df": 0, "s": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}}, "df": 1}}}}}}, "o": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "l": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}}, "df": 1, "e": {"docs": {}, "df": 0, "r": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}}, "df": 1}}}}}}, "w": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "h": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}}, "df": 1}}}}, "b": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "c": {"docs": {}, "df": 0, "h": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "r": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}}, "df": 1}}}}}}}, "l": {"docs": {}, "df": 0, "t": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}}, "df": 1}, "i": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "e": {"docs": {"csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1}}, "df": 1, "s": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}}, "df": 1}}}}}, "g": {"docs": {}, "df": 0, "t": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}}, "df": 1}}, "i": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "t": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1.4142135623730951}, "csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1}}, "df": 2}}}}}, "bases": {"root": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "u": {"docs": {}, "df": 0, "m": {"docs": {"csv_batcher.csv_pooler.CallbackWith": {"tf": 1}}, "df": 1}}}}, "s": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "r": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "u": {"docs": {}, "df": 0, "m": {"docs": {"csv_batcher.csv_pooler.CallbackWith": {"tf": 1}}, "df": 1}}}}}}}}}, "doc": {"root": {"0": {"docs": {"csv_batcher": {"tf": 1.7320508075688772}}, "df": 1}, "1": {"0": {"0": {"0": {"0": {"docs": {"csv_batcher": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1}}, "df": 3}, "docs": {}, "df": 0}, "docs": {}, "df": 0}, "docs": {}, "df": 0}, "6": {"docs": {"csv_batcher": {"tf": 1.7320508075688772}}, "df": 1}, "docs": {"csv_batcher": {"tf": 1.7320508075688772}}, "df": 1}, "5": {"docs": {}, "df": 0, "m": {"docs": {}, "df": 0, "s": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "s": {"docs": {}, "df": 0, "r": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "c": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "r": {"docs": {}, "df": 0, "d": {"docs": {}, "df": 0, "s": {"docs": {"csv_batcher": {"tf": 1.7320508075688772}}, "df": 1}}}}}}}}}}}}}}, "8": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}}, "df": 1}, "docs": {"csv_batcher": {"tf": 25}, "csv_batcher.csv_pooler": {"tf": 1.7320508075688772}, "csv_batcher.csv_pooler.CallbackWith": {"tf": 1.4142135623730951}, "csv_batcher.csv_pooler.CallbackWith.CSV_FILENAME": {"tf": 1.7320508075688772}, "csv_batcher.csv_pooler.CallbackWith.DATAFRAME": {"tf": 1.7320508075688772}, "csv_batcher.csv_pooler.CallbackWith.DATAFRAME_ROW": {"tf": 1.7320508075688772}, "csv_batcher.csv_pooler.CSVPooler": {"tf": 1.7320508075688772}, "csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 4.898979485566356}, "csv_batcher.csv_pooler.CSVPooler.csv_filename": {"tf": 1.7320508075688772}, "csv_batcher.csv_pooler.CSVPooler.process_fn": {"tf": 1.7320508075688772}, "csv_batcher.csv_pooler.CSVPooler.callback_with": {"tf": 1.7320508075688772}, "csv_batcher.csv_pooler.CSVPooler.pool_size": {"tf": 1.7320508075688772}, "csv_batcher.csv_pooler.CSVPooler.chunk_lines": {"tf": 1.7320508075688772}, "csv_batcher.csv_pooler.CSVPooler.process": {"tf": 3.3166247903554}, "csv_batcher.csv_splitter": {"tf": 1.7320508075688772}, "csv_batcher.csv_splitter.CSVSplitter": {"tf": 1.4142135623730951}, "csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 2.6457513110645907}, "csv_batcher.csv_splitter.CSVSplitter.csv_filename": {"tf": 1.7320508075688772}, "csv_batcher.csv_splitter.CSVSplitter.chunk_line_cnt": {"tf": 1.7320508075688772}, "csv_batcher.csv_splitter.CSVSplitter.chunk_dir": {"tf": 1.7320508075688772}, "csv_batcher.csv_splitter.CSVSplitter.chunk_files": {"tf": 1.7320508075688772}, "csv_batcher.csv_splitter.CSVSplitter.csv_files": {"tf": 2}, "csv_batcher.csv_splitter.CSVSplitter.cleanup": {"tf": 2}, "csv_batcher.test_csv_pooler": {"tf": 1.7320508075688772}, "csv_batcher.test_csv_pooler.test_big_file_as_csv": {"tf": 1.7320508075688772}, "csv_batcher.test_csv_pooler.test_big_file_as_dataframe": {"tf": 1.7320508075688772}, "csv_batcher.test_csv_pooler.test_big_file_as_dataframe_rows": {"tf": 1.7320508075688772}, "csv_batcher.test_csv_pooler.test_no_pooler": {"tf": 1.7320508075688772}}, "df": 28, "c": {"docs": {}, "df": 0, "s": {"docs": {}, "df": 0, "v": {"docs": {"csv_batcher": {"tf": 4.69041575982343}, "csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 2.6457513110645907}, "csv_batcher.csv_pooler.CSVPooler.process": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1.4142135623730951}}, "df": 5, "p": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "r": {"docs": {"csv_batcher": {"tf": 2.449489742783178}}, "df": 1}}}}}}, "s": {"docs": {"csv_batcher.csv_pooler.CSVPooler.process": {"tf": 1}}, "df": 1, "p": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "r": {"docs": {"csv_batcher.csv_pooler.CSVPooler.process": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1}}, "df": 2}}}}}}}}}}, "a": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "b": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "c": {"docs": {}, "df": 0, "k": {"docs": {"csv_batcher": {"tf": 2.6457513110645907}, "csv_batcher.csv_pooler.CallbackWith": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1.4142135623730951}}, "df": 3, "w": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "h": {"docs": {"csv_batcher": {"tf": 2.6457513110645907}, "csv_batcher.csv_pooler.CallbackWith": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1.7320508075688772}}, "df": 3}}}}}}}}, "a": {"docs": {}, "df": 0, "b": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "e": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}}, "df": 1}}}}, "e": {"docs": {}, "df": 0, "d": {"docs": {"csv_batcher.csv_splitter.CSVSplitter.cleanup": {"tf": 1.4142135623730951}}, "df": 1}}}}, "n": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}, "o": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "r": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "l": {"docs": {"csv_batcher.csv_pooler.CallbackWith": {"tf": 1}}, "df": 1, "l": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "d": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}, "s": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}}, "df": 1}}}}}, "s": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "r": {"docs": {}, "df": 0, "u": {"docs": {}, "df": 0, "c": {"docs": {}, "df": 0, "t": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1}}, "df": 2}}}}}}}, "m": {"docs": {}, "df": 0, "p": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "c": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "d": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}}}}}}}, "l": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}, "u": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "t": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}}, "df": 1}}}}, "r": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "g": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}, "e": {"docs": {}, "df": 0, "d": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}}, "df": 1}}}}}}, "h": {"docs": {}, "df": 0, "u": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "k": {"docs": {"csv_batcher": {"tf": 2}, "csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1.7320508075688772}, "csv_batcher.csv_pooler.CSVPooler.process": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1.7320508075688772}, "csv_batcher.csv_splitter.CSVSplitter.csv_files": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.cleanup": {"tf": 1}}, "df": 6, "e": {"docs": {}, "df": 0, "d": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1.7320508075688772}}, "df": 1}}}}}, "e": {"docs": {}, "df": 0, "c": {"docs": {}, "df": 0, "k": {"docs": {"csv_batcher": {"tf": 1.4142135623730951}}, "df": 1}}}}, "l": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "r": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}}, "n": {"docs": {}, "df": 0, "t": {"docs": {"csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1}}, "df": 1}}}, "b": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "c": {"docs": {}, "df": 0, "h": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "r": {"docs": {"csv_batcher": {"tf": 3}}, "df": 1, "/": {"docs": {}, "df": 0, "c": {"docs": {}, "df": 0, "s": {"docs": {}, "df": 0, "v": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}}}}}}}, "s": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "d": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}}, "e": {"docs": {"csv_batcher": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.cleanup": {"tf": 1.4142135623730951}}, "df": 3}, "y": {"docs": {"csv_batcher": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.process": {"tf": 1.4142135623730951}}, "df": 3}, "r": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "w": {"docs": {}, "df": 0, "s": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "r": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}}}}, "u": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "d": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}}, "o": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "l": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}}, "df": 1}}}, "l": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "c": {"docs": {}, "df": 0, "k": {"docs": {"csv_batcher.csv_splitter.CSVSplitter.cleanup": {"tf": 1}}, "df": 1}}}}}, "a": {"docs": {"csv_batcher": {"tf": 2}, "csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 2.23606797749979}, "csv_batcher.csv_splitter.CSVSplitter": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.cleanup": {"tf": 1}}, "df": 4, "r": {"docs": {}, "df": 0, "g": {"docs": {}, "df": 0, "u": {"docs": {}, "df": 0, "m": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "t": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1.4142135623730951}}, "df": 1, "s": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}}}}, "s": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1}}, "df": 2}}}, "n": {"docs": {}, "df": 0, "d": {"docs": {"csv_batcher": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.cleanup": {"tf": 1}}, "df": 2}}, "s": {"docs": {"csv_batcher": {"tf": 2.23606797749979}, "csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 2.23606797749979}}, "df": 2}, "c": {"docs": {}, "df": 0, "c": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "p": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "s": {"docs": {"csv_batcher": {"tf": 1.4142135623730951}, "csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}}, "df": 2}}}}}}, "p": {"docs": {}, "df": 0, "p": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "y": {"docs": {"csv_batcher": {"tf": 2.23606797749979}}, "df": 1}}}}, "x": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "s": {"docs": {"csv_batcher": {"tf": 1.7320508075688772}}, "df": 1}}}, "u": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "o": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}}, "p": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1, "y": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "h": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "n": {"docs": {"csv_batcher": {"tf": 1.7320508075688772}}, "df": 1}}}, "e": {"docs": {}, "df": 0, "s": {"docs": {}, "df": 0, "t": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}}, "p": {"docs": {}, "df": 0, "i": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}, "r": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "c": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "s": {"docs": {}, "df": 0, "s": {"docs": {"csv_batcher": {"tf": 3.872983346207417}, "csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1.7320508075688772}, "csv_batcher.csv_pooler.CSVPooler.process": {"tf": 1.4142135623730951}}, "df": 3, "i": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "g": {"docs": {"csv_batcher": {"tf": 1.4142135623730951}}, "df": 1}}}, "e": {"docs": {}, "df": 0, "s": {"docs": {"csv_batcher.csv_pooler.CSVPooler.process": {"tf": 1}}, "df": 1}}}}}}}, "i": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "t": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}}, "i": {"docs": {}, "df": 0, "p": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}, "o": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "l": {"docs": {"csv_batcher": {"tf": 1.7320508075688772}, "csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1.4142135623730951}, "csv_batcher.csv_pooler.CSVPooler.process": {"tf": 1}}, "df": 3, "e": {"docs": {}, "df": 0, "r": {"docs": {"csv_batcher": {"tf": 3.4641016151377544}, "csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}}, "df": 2}}}}, "s": {"docs": {}, "df": 0, "s": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "b": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "e": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}}}}, "e": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "r": {"docs": {}, "df": 0, "y": {"docs": {"csv_batcher": {"tf": 1.7320508075688772}}, "df": 1}}}}}, "a": {"docs": {}, "df": 0, "s": {"docs": {}, "df": 0, "s": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "d": {"docs": {"csv_batcher": {"tf": 1.4142135623730951}, "csv_batcher.csv_pooler.CallbackWith": {"tf": 1}}, "df": 2}}}}, "n": {"docs": {}, "df": 0, "d": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "s": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}}, "t": {"docs": {}, "df": 0, "h": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1}}, "df": 2}}}, "d": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1, "o": {"docs": {}, "df": 0, "c": {"docs": {"csv_batcher": {"tf": 1.4142135623730951}}, "df": 1}}}, "u": {"docs": {}, "df": 0, "b": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "s": {"docs": {}, "df": 0, "h": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1, "i": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "g": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}}}}}}}}, "m": {"docs": {}, "df": 0, "u": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "p": {"docs": {}, "df": 0, "r": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "c": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "s": {"docs": {}, "df": 0, "s": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1, "i": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "g": {"docs": {"csv_batcher.csv_pooler.CSVPooler.process": {"tf": 1}}, "df": 1}}}}}}}}}, "l": {"docs": {}, "df": 0, "e": {"docs": {"csv_batcher.csv_pooler.CSVPooler.process": {"tf": 1.4142135623730951}, "csv_batcher.csv_splitter.CSVSplitter": {"tf": 1}}, "df": 2}}}}}}, "s": {"docs": {}, "df": 0, "t": {"docs": {"csv_batcher.csv_splitter.CSVSplitter.cleanup": {"tf": 1}}, "df": 1}}}, "o": {"docs": {}, "df": 0, "r": {"docs": {}, "df": 0, "e": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}, "a": {"docs": {}, "df": 0, "y": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}}, "df": 1}}, "i": {"docs": {}, "df": 0, "g": {"docs": {}, "df": 0, "h": {"docs": {}, "df": 0, "t": {"docs": {"csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1}}, "df": 1}}}}}, "s": {"docs": {}, "df": 0, "u": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "b": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "e": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}}}}}, "e": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "t": {"docs": {"csv_batcher": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1.7320508075688772}}, "df": 2}}, "r": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "s": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}}, "e": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}}, "df": 1}, "l": {"docs": {}, "df": 0, "f": {"docs": {"csv_batcher.csv_pooler.CSVPooler.process": {"tf": 1.4142135623730951}, "csv_batcher.csv_splitter.CSVSplitter.csv_files": {"tf": 1}}, "df": 2}}}, "i": {"docs": {}, "df": 0, "z": {"docs": {}, "df": 0, "e": {"docs": {"csv_batcher": {"tf": 1.7320508075688772}, "csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1.7320508075688772}}, "df": 2}}, "n": {"docs": {}, "df": 0, "g": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "e": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}}, "df": 1}}}}}, "o": {"docs": {}, "df": 0, "m": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "h": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "g": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}}}}}}, "k": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "p": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "s": {"docs": {}, "df": 0, "p": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "c": {"docs": {}, "df": 0, "e": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}}}}}}}}}}}}}, "h": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "w": {"docs": {}, "df": 0, "s": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}, "u": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "d": {"docs": {"csv_batcher.csv_splitter.CSVSplitter.cleanup": {"tf": 1}}, "df": 1}}}}}, "t": {"docs": {}, "df": 0, "r": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1}}, "df": 2}}, "p": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "t": {"docs": {"csv_batcher.csv_pooler.CSVPooler.process": {"tf": 1}}, "df": 1, "s": {"docs": {"csv_batcher.csv_splitter.CSVSplitter": {"tf": 1}}, "df": 1}}}}}, "m": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "r": {"docs": {"csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1}}, "df": 1}}}}}}}, "f": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "r": {"docs": {"csv_batcher": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1.4142135623730951}, "csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.cleanup": {"tf": 1}}, "df": 4}, "o": {"docs": {"csv_batcher": {"tf": 1.7320508075688772}}, "df": 1}}, "i": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "e": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1.4142135623730951}, "csv_batcher.csv_splitter.CSVSplitter": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1}}, "df": 3, "s": {"docs": {"csv_batcher": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.process": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.csv_files": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.cleanup": {"tf": 1}}, "df": 5}, "n": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "m": {"docs": {}, "df": 0, "e": {"docs": {"csv_batcher": {"tf": 2.449489742783178}, "csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1.7320508075688772}, "csv_batcher.csv_pooler.CSVPooler.process": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1}}, "df": 4}}}}}}, "n": {"docs": {}, "df": 0, "d": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}, "a": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "y": {"docs": {"csv_batcher.csv_splitter.CSVSplitter.cleanup": {"tf": 1}}, "df": 1}}}}}, "x": {"docs": {"csv_batcher": {"tf": 1.4142135623730951}}, "df": 1}}, "u": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "c": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "n": {"docs": {"csv_batcher": {"tf": 1.7320508075688772}, "csv_batcher.csv_pooler.CallbackWith": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1.4142135623730951}}, "df": 3}}}}}}}, "r": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "m": {"docs": {"csv_batcher": {"tf": 1.7320508075688772}}, "df": 1}}}, "n": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1.7320508075688772}}, "df": 1}, "a": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "s": {"docs": {}, "df": 0, "e": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}}, "df": 1}}}}, "e": {"docs": {}, "df": 0, "w": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "r": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}}, "df": 1}}}}}, "u": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1, "s": {"docs": {}, "df": 0, "e": {"docs": {"csv_batcher": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.process": {"tf": 1.4142135623730951}}, "df": 2, "d": {"docs": {"csv_batcher": {"tf": 1}, "csv_batcher.csv_pooler.CallbackWith": {"tf": 1}}, "df": 2}, "s": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}}, "df": 1}}, "a": {"docs": {}, "df": 0, "g": {"docs": {}, "df": 0, "e": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}, "i": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "g": {"docs": {"csv_batcher.csv_pooler.CSVPooler.process": {"tf": 1}}, "df": 1}}}}}, "w": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "h": {"docs": {"csv_batcher": {"tf": 2.6457513110645907}, "csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1.7320508075688772}}, "df": 2}}}, "h": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "r": {"docs": {}, "df": 0, "e": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}, "n": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}}, "df": 1}}, "a": {"docs": {}, "df": 0, "t": {"docs": {"csv_batcher.csv_pooler.CallbackWith": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}}, "df": 2}}}, "o": {"docs": {}, "df": 0, "r": {"docs": {}, "df": 0, "k": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "r": {"docs": {}, "df": 0, "s": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.process": {"tf": 1}}, "df": 2}}}}}}}, "d": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "f": {"docs": {}, "df": 0, "r": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "m": {"docs": {}, "df": 0, "e": {"docs": {"csv_batcher": {"tf": 4.242640687119285}, "csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 2.449489742783178}}, "df": 2, "s": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}}}}}}}, "e": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "l": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}, "f": {"docs": {"csv_batcher": {"tf": 2.449489742783178}}, "df": 1, "a": {"docs": {}, "df": 0, "u": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "t": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}}, "df": 1, "s": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 2}, "csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1}}, "df": 2}}}}}, "i": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "d": {"docs": {"csv_batcher.csv_pooler.CSVPooler.process": {"tf": 1}}, "df": 1}}}}}, "v": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "p": {"docs": {}, "df": 0, "m": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "t": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}}}}}}}, "t": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "s": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}}, "df": 1}}}}}}, "o": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1, "c": {"docs": {}, "df": 0, "u": {"docs": {}, "df": 0, "m": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "n": {"docs": {"csv_batcher": {"tf": 1.4142135623730951}}, "df": 1}}}}}}}}}}, "s": {"docs": {"csv_batcher": {"tf": 1.4142135623730951}}, "df": 1}}}, "f": {"docs": {"csv_batcher": {"tf": 3}}, "df": 1}, "i": {"docs": {}, "df": 0, "r": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "c": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "r": {"docs": {}, "df": 0, "y": {"docs": {"csv_batcher.csv_splitter.CSVSplitter.cleanup": {"tf": 1}}, "df": 1}}}}}}}}}, "o": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1, "r": {"docs": {"csv_batcher": {"tf": 1.7320508075688772}}, "df": 1}, "t": {"docs": {}, "df": 0, "h": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "r": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}}, "f": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1.7320508075688772}, "csv_batcher.csv_pooler.CSVPooler.process": {"tf": 1}}, "df": 2}, "p": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "l": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1.4142135623730951}, "csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1}}, "df": 2}}}}}}}}, "t": {"docs": {}, "df": 0, "o": {"docs": {"csv_batcher": {"tf": 2}, "csv_batcher.csv_pooler.CallbackWith": {"tf": 1.4142135623730951}, "csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 2.8284271247461903}, "csv_batcher.csv_pooler.CSVPooler.process": {"tf": 1.7320508075688772}, "csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1.4142135623730951}}, "df": 5, "o": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "s": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}, "k": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "n": {"docs": {"csv_batcher": {"tf": 1.4142135623730951}}, "df": 1}}}}, "h": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "t": {"docs": {"csv_batcher": {"tf": 1.4142135623730951}, "csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1.4142135623730951}}, "df": 2}, "n": {"docs": {"csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1}}, "df": 1}}, "o": {"docs": {}, "df": 0, "s": {"docs": {}, "df": 0, "e": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}, "e": {"docs": {"csv_batcher": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1.7320508075688772}, "csv_batcher.csv_pooler.CSVPooler.process": {"tf": 1}}, "df": 3, "n": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}}, "df": 1}}, "i": {"docs": {}, "df": 0, "s": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.cleanup": {"tf": 1}}, "df": 3}}}, "i": {"docs": {}, "df": 0, "m": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "y": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}}}, "r": {"docs": {}, "df": 0, "u": {"docs": {}, "df": 0, "e": {"docs": {"csv_batcher": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1.4142135623730951}}, "df": 2}}}, "e": {"docs": {}, "df": 0, "s": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "g": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}}}, "m": {"docs": {}, "df": 0, "p": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "r": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "r": {"docs": {}, "df": 0, "y": {"docs": {"csv_batcher.csv_pooler.CSVPooler.process": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.cleanup": {"tf": 1}}, "df": 2}}}}}}}}, "a": {"docs": {}, "df": 0, "r": {"docs": {}, "df": 0, "g": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "t": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1}}, "df": 2}}}}}}, "l": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "r": {"docs": {}, "df": 0, "g": {"docs": {}, "df": 0, "e": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}, "s": {"docs": {}, "df": 0, "t": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1}}, "df": 2}}}, "e": {"docs": {}, "df": 0, "n": {"docs": {"csv_batcher": {"tf": 1.7320508075688772}}, "df": 1}}, "i": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "e": {"docs": {"csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1}}, "df": 1, "s": {"docs": {"csv_batcher": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.process": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1}}, "df": 4}}, "t": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "g": {"docs": {"csv_batcher": {"tf": 1.7320508075688772}}, "df": 1}}}}}}}, "r": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "q": {"docs": {}, "df": 0, "u": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "r": {"docs": {}, "df": 0, "e": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}}}, "t": {"docs": {}, "df": 0, "u": {"docs": {}, "df": 0, "r": {"docs": {}, "df": 0, "n": {"docs": {"csv_batcher": {"tf": 2.449489742783178}}, "df": 1, "s": {"docs": {"csv_batcher.csv_splitter.CSVSplitter.csv_files": {"tf": 1}}, "df": 1}}}}}, "a": {"docs": {}, "df": 0, "d": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}, "m": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "v": {"docs": {}, "df": 0, "e": {"docs": {"csv_batcher.csv_splitter.CSVSplitter.cleanup": {"tf": 1}}, "df": 1}}}}}, "o": {"docs": {}, "df": 0, "w": {"docs": {"csv_batcher": {"tf": 4}, "csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}}, "df": 2, "s": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1.4142135623730951}}, "df": 1}}}, "u": {"docs": {}, "df": 0, "f": {"docs": {}, "df": 0, "f": {"docs": {"csv_batcher": {"tf": 1.4142135623730951}}, "df": 1}}, "n": {"docs": {"csv_batcher": {"tf": 1.4142135623730951}}, "df": 1}}}, "i": {"docs": {}, "df": 0, "n": {"docs": {"csv_batcher": {"tf": 1.4142135623730951}, "csv_batcher.csv_splitter.CSVSplitter.cleanup": {"tf": 1}}, "df": 2, "s": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "l": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1, "a": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "n": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}}}}}}, "e": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "d": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}}, "df": 1}}}}}, "d": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "x": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}, "t": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1.4142135623730951}, "csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1}}, "df": 2, "o": {"docs": {"csv_batcher.csv_pooler.CSVPooler.process": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter": {"tf": 1}}, "df": 2}}}, "o": {"docs": {}, "df": 0, "/": {"docs": {}, "df": 0, "c": {"docs": {}, "df": 0, "s": {"docs": {}, "df": 0, "v": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}}}, "m": {"docs": {}, "df": 0, "p": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "r": {"docs": {}, "df": 0, "t": {"docs": {"csv_batcher": {"tf": 1.7320508075688772}}, "df": 1}}}}}, "l": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "c": {"docs": {"csv_batcher": {"tf": 1.7320508075688772}}, "df": 1}}}, "s": {"docs": {"csv_batcher.csv_pooler.CallbackWith": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 2.449489742783178}}, "df": 2}, "f": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}}, "df": 1}, "t": {"docs": {"csv_batcher.csv_pooler.CSVPooler.process": {"tf": 1}}, "df": 1}}, "g": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "h": {"docs": {}, "df": 0, "u": {"docs": {}, "df": 0, "b": {"docs": {"csv_batcher": {"tf": 1.4142135623730951}}, "df": 1}}}}, "v": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "n": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}}, "df": 1}}}}, "e": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "r": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "s": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}}}}}}, "r": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "u": {"docs": {}, "df": 0, "p": {"docs": {"csv_batcher.csv_pooler.CSVPooler.process": {"tf": 1}}, "df": 1}}}}}, "h": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "p": {"docs": {}, "df": 0, "s": {"docs": {}, "df": 0, ":": {"docs": {}, "df": 0, "/": {"docs": {}, "df": 0, "/": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "g": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "d": {"docs": {}, "df": 0, "p": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "h": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}}}}}}}}}}}}}}}, "m": {"docs": {}, "df": 0, "l": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}, "a": {"docs": {}, "df": 0, "v": {"docs": {}, "df": 0, "e": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}}, "df": 1}}}}, "e": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "u": {"docs": {}, "df": 0, "m": {"docs": {"csv_batcher": {"tf": 1}, "csv_batcher.csv_pooler.CallbackWith": {"tf": 1}}, "df": 2, "e": {"docs": {}, "df": 0, "r": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "n": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}}, "df": 1}}}}}}}}}}, "r": {"docs": {}, "df": 0, "r": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "r": {"docs": {}, "df": 0, "s": {"docs": {"csv_batcher": {"tf": 1.4142135623730951}}, "df": 1}}}}}, "a": {"docs": {}, "df": 0, "c": {"docs": {}, "df": 0, "h": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1}}, "df": 2}}}}, "v": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "u": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "s": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}}}, "i": {"docs": {}, "df": 0, "a": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}, "q": {"docs": {}, "df": 0, "u": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "t": {"docs": {"csv_batcher": {"tf": 2.8284271247461903}}, "df": 1}}}}, "n": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "e": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}, "a": {"docs": {}, "df": 0, "m": {"docs": {}, "df": 0, "e": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}}, "df": 1}}}, "u": {"docs": {}, "df": 0, "m": {"docs": {}, "df": 0, "b": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "r": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}}, "df": 1}}}}}}}}}, "pipeline": ["trimmer"], "_isPrebuiltIndex": true}; + /** pdoc search index */const docs = {"version": "0.9.5", "fields": ["qualname", "fullname", "annotation", "default_value", "signature", "bases", "doc"], "ref": "fullname", "documentStore": {"docs": {"csv_batcher": {"fullname": "csv_batcher", "modulename": "csv_batcher", "kind": "module", "doc": "finally
blockcsv-batcher
\n\nScaling vertically with CSVs.
\n\nA lightweight, python-based, multiprocess CSV batcher suitable for\nuse with dataframes or other tools that deal with large CSV files (or those that require timely processing).
\n\nInstallation
\n\npip install csv-batcher
\n\nGitHub
\n\nhttps://github.com/tangledpath/csv-batcher
\n\nDocumentation
\n\nhttps://tangledpath.github.io/csv-batcher/csv_batcher.html
\n\nFurther excercises
\n\n\n
\n\n- Possibly implement pooling with celery (for use in django apps, etc.), which can bring about horizontal scaling.
\nUsage
\n\nArguments sent to callback function can be controlled by\ncreating pooler with
\n\ncallback_with
and the CallbackWith enum\nvalues:As dataframe row
\n\n\n\n\n\nfrom csv_batcher.csv_pooler import CSVPooler, CallbackWith\n\n# Callback function passed to pooler; accepts a dataframe row\n# as a pandas Series (via apply)\ndef process_dataframe_row(row):\n return row.iloc[0]\n\npooler = CSVPooler(\n "5mSalesRecords.csv",\n process_dataframe_row,\n callback_with=CallbackWith.DATAFRAME_ROW,\n pool_size=16\n)\nfor processed_batch in pooler.process():\n print(processed_batch)\n
As dataframe
\n\n\n\n\n\nfrom csv_batcher.csv_pooler import CSVPooler, CallbackWith\n\n# Used from process_datafrom's apply:\ndef process_dataframe_row(row):\n return row.iloc[0]\n\n# Callback function passed to pooler; accepts a dataframe:\ndef process_dataframe(df):\n foo = df.apply(process_dataframe_row, axis=1)\n # Or do something more complicated....\n return len(df)\n\npooler = CSVPooler(\n "5mSalesRecords.csv",\n process_dataframe,\n callback_with=CallbackWith.DATAFRAME,\n pool_size=16\n)\nfor processed_batch in pooler.process():\n print(processed_batch)\n
As CSV filename
\n\n\n\n\n\nimport pandas as pd\nfrom csv_batcher.csv_pooler import CSVPooler, CallbackWith\n\n# Used from process_csv_filename's apply:\ndef process_dataframe_row(row):\n return row.iloc[0]\n\ndef process_csv_filename(csv_chunk_filename):\n # print("processing ", csv_chunk_filename)\n df = pd.read_csv(csv_chunk_filename, skipinitialspace=True, index_col=None)\n foo = df.apply(process_dataframe_row, axis=1)\n return len(df)\n\npooler = CSVPooler(\n "5mSalesRecords.csv",\n process_csv_filename,\n callback_with=CallbackWith.CSV_FILENAME,\n chunk_lines=10000,\n pool_size=16\n)\nfor processed_batch in pooler.process():\n print(processed_batch)\n
Development
\n\nLinting
\n\n\n\n\n\nruff check . # Find linting errors\nruff check . --fix # Auto-fix linting errors (where possible)\n
Documentation
\n\n\n\n# Shows in browser\npoetry run pdoc csv_batcher\n# Generates to ./docs\npoetry run pdoc csv_batcher -o ./docs\n
Testing
\n\n\n\n\n\nclear; pytest\n
Publishing
\n\n\n\n"}, "csv_batcher.csv_pooler": {"fullname": "csv_batcher.csv_pooler", "modulename": "csv_batcher.csv_pooler", "kind": "module", "doc": "\n"}, "csv_batcher.csv_pooler.CallbackWith": {"fullname": "csv_batcher.csv_pooler.CallbackWith", "modulename": "csv_batcher.csv_pooler", "qualname": "CallbackWith", "kind": "class", "doc": "\npoetry publish --build -u __token__ -p $PYPI_TOKEN`\n
CallbackWith Enum, used to control what is passed to callback function
\n", "bases": "enum.StrEnum"}, "csv_batcher.csv_pooler.CallbackWith.CSV_FILENAME": {"fullname": "csv_batcher.csv_pooler.CallbackWith.CSV_FILENAME", "modulename": "csv_batcher.csv_pooler", "qualname": "CallbackWith.CSV_FILENAME", "kind": "variable", "doc": "\n", "default_value": "<CallbackWith.CSV_FILENAME: 'csv_filename'>"}, "csv_batcher.csv_pooler.CallbackWith.DATAFRAME": {"fullname": "csv_batcher.csv_pooler.CallbackWith.DATAFRAME", "modulename": "csv_batcher.csv_pooler", "qualname": "CallbackWith.DATAFRAME", "kind": "variable", "doc": "\n", "default_value": "<CallbackWith.DATAFRAME: 'dataframe'>"}, "csv_batcher.csv_pooler.CallbackWith.DATAFRAME_ROW": {"fullname": "csv_batcher.csv_pooler.CallbackWith.DATAFRAME_ROW", "modulename": "csv_batcher.csv_pooler", "qualname": "CallbackWith.DATAFRAME_ROW", "kind": "variable", "doc": "\n", "default_value": "<CallbackWith.DATAFRAME_ROW: 'dataframe_row'>"}, "csv_batcher.csv_pooler.CSVPooler": {"fullname": "csv_batcher.csv_pooler.CSVPooler", "modulename": "csv_batcher.csv_pooler", "qualname": "CSVPooler", "kind": "class", "doc": "\n"}, "csv_batcher.csv_pooler.CSVPooler.__init__": {"fullname": "csv_batcher.csv_pooler.CSVPooler.__init__", "modulename": "csv_batcher.csv_pooler", "qualname": "CSVPooler.__init__", "kind": "function", "doc": "Construct
\n\nPooler
with givencsv_filename
,process_fn
,as_dataframe
, `pool_size', 'chunk_size'Args:\n csv_filename (str): Name of CSV file\n process_fn (Callable): A function that accepts a single argument\n By default, this is the path to a chunked CSV file\n If
\n", "signature": "(\tcsv_filename: str,\tprocess_fn: Callable,\tcallback_with: csv_batcher.csv_pooler.CallbackWith = <CallbackWith.CSV_FILENAME: 'csv_filename'>,\tpool_size: int = 5,\tchunk_lines: int = 10000)"}, "csv_batcher.csv_pooler.CSVPooler.csv_filename": {"fullname": "csv_batcher.csv_pooler.CSVPooler.csv_filename", "modulename": "csv_batcher.csv_pooler", "qualname": "CSVPooler.csv_filename", "kind": "variable", "doc": "\n"}, "csv_batcher.csv_pooler.CSVPooler.process_fn": {"fullname": "csv_batcher.csv_pooler.CSVPooler.process_fn", "modulename": "csv_batcher.csv_pooler", "qualname": "CSVPooler.process_fn", "kind": "variable", "doc": "\n"}, "csv_batcher.csv_pooler.CSVPooler.callback_with": {"fullname": "csv_batcher.csv_pooler.CSVPooler.callback_with", "modulename": "csv_batcher.csv_pooler", "qualname": "CSVPooler.callback_with", "kind": "variable", "doc": "\n"}, "csv_batcher.csv_pooler.CSVPooler.pool_size": {"fullname": "csv_batcher.csv_pooler.CSVPooler.pool_size", "modulename": "csv_batcher.csv_pooler", "qualname": "CSVPooler.pool_size", "kind": "variable", "doc": "\n"}, "csv_batcher.csv_pooler.CSVPooler.chunk_lines": {"fullname": "csv_batcher.csv_pooler.CSVPooler.chunk_lines", "modulename": "csv_batcher.csv_pooler", "qualname": "CSVPooler.chunk_lines", "kind": "variable", "doc": "\n"}, "csv_batcher.csv_pooler.CSVPooler.process": {"fullname": "csv_batcher.csv_pooler.CSVPooler.process", "modulename": "csv_batcher.csv_pooler", "qualname": "CSVPooler.process", "kind": "function", "doc": "as_dataframe
is True, then the argument sent is a dataframe of the chunked CSV\n callback_with (CallbackWith): Controls what is sent to callback function.\n @see CallbackWith enumeration for details\n Defaults to CallbackWith.CSV_FILENAME.\n as_dataframe_rows (bool): When True, a dataframe is created as with as_dataframe.\n that is sent toprocess_fn
instead. Defaults to False.\n pool_size (int, optional): Number of workers to uses. Defaults to 8.\n chunk_lines (int, optional): Target row count for each chunked CSV. Last chunk may\n have fewer rows. Defaults to 10000.Processes
\n", "signature": "(self):", "funcdef": "def"}, "csv_batcher.csv_splitter": {"fullname": "csv_batcher.csv_splitter", "modulename": "csv_batcher.csv_splitter", "kind": "module", "doc": "\n"}, "csv_batcher.csv_splitter.CSVSplitter": {"fullname": "csv_batcher.csv_splitter.CSVSplitter", "modulename": "csv_batcher.csv_splitter", "qualname": "CSVSplitter", "kind": "class", "doc": "self.csv_filename
by usingCSVSplitter
to split it\n into multiple temporary files defined byself.chunk_lines
.\nUsemultiprocessing.Pool
to use multiple process workers to process\nthe group of CSVs.Splits a CSV file into multiple files
\n"}, "csv_batcher.csv_splitter.CSVSplitter.__init__": {"fullname": "csv_batcher.csv_splitter.CSVSplitter.__init__", "modulename": "csv_batcher.csv_splitter", "qualname": "CSVSplitter.__init__", "kind": "function", "doc": "Construct CSVSplitter
\n\nArgs:\n csv_filename (str): path to CSV file\n chunk_line_cnt (int, optional): Target lines for each chunk. Last chunk might\n be smaller than this. Defaults to 10000.
\n", "signature": "(csv_filename: str, chunk_line_cnt: int = 10000)"}, "csv_batcher.csv_splitter.CSVSplitter.csv_filename": {"fullname": "csv_batcher.csv_splitter.CSVSplitter.csv_filename", "modulename": "csv_batcher.csv_splitter", "qualname": "CSVSplitter.csv_filename", "kind": "variable", "doc": "\n"}, "csv_batcher.csv_splitter.CSVSplitter.chunk_line_cnt": {"fullname": "csv_batcher.csv_splitter.CSVSplitter.chunk_line_cnt", "modulename": "csv_batcher.csv_splitter", "qualname": "CSVSplitter.chunk_line_cnt", "kind": "variable", "doc": "\n"}, "csv_batcher.csv_splitter.CSVSplitter.chunk_dir": {"fullname": "csv_batcher.csv_splitter.CSVSplitter.chunk_dir", "modulename": "csv_batcher.csv_splitter", "qualname": "CSVSplitter.chunk_dir", "kind": "variable", "doc": "\n"}, "csv_batcher.csv_splitter.CSVSplitter.chunk_files": {"fullname": "csv_batcher.csv_splitter.CSVSplitter.chunk_files", "modulename": "csv_batcher.csv_splitter", "qualname": "CSVSplitter.chunk_files", "kind": "variable", "doc": "\n"}, "csv_batcher.csv_splitter.CSVSplitter.csv_files": {"fullname": "csv_batcher.csv_splitter.CSVSplitter.csv_files", "modulename": "csv_batcher.csv_splitter", "qualname": "CSVSplitter.csv_files", "kind": "function", "doc": "Returns
\n", "signature": "(self):", "funcdef": "def"}, "csv_batcher.csv_splitter.CSVSplitter.cleanup": {"fullname": "csv_batcher.csv_splitter.CSVSplitter.cleanup", "modulename": "csv_batcher.csv_splitter", "qualname": "CSVSplitter.cleanup", "kind": "function", "doc": "self.chunk_files
Remove temporary directory for chunk files; this must be called\nand should be called in a
\n", "signature": "(self):", "funcdef": "def"}, "csv_batcher.test_csv_pooler": {"fullname": "csv_batcher.test_csv_pooler", "modulename": "csv_batcher.test_csv_pooler", "kind": "module", "doc": "\n"}, "csv_batcher.test_csv_pooler.test_big_file_as_csv": {"fullname": "csv_batcher.test_csv_pooler.test_big_file_as_csv", "modulename": "csv_batcher.test_csv_pooler", "qualname": "test_big_file_as_csv", "kind": "function", "doc": "\n", "signature": "():", "funcdef": "def"}, "csv_batcher.test_csv_pooler.test_big_file_as_dataframe": {"fullname": "csv_batcher.test_csv_pooler.test_big_file_as_dataframe", "modulename": "csv_batcher.test_csv_pooler", "qualname": "test_big_file_as_dataframe", "kind": "function", "doc": "\n", "signature": "():", "funcdef": "def"}, "csv_batcher.test_csv_pooler.test_big_file_as_dataframe_rows": {"fullname": "csv_batcher.test_csv_pooler.test_big_file_as_dataframe_rows", "modulename": "csv_batcher.test_csv_pooler", "qualname": "test_big_file_as_dataframe_rows", "kind": "function", "doc": "\n", "signature": "():", "funcdef": "def"}, "csv_batcher.test_csv_pooler.test_no_pooler": {"fullname": "csv_batcher.test_csv_pooler.test_no_pooler", "modulename": "csv_batcher.test_csv_pooler", "qualname": "test_no_pooler", "kind": "function", "doc": "\n", "signature": "():", "funcdef": "def"}}, "docInfo": {"csv_batcher": {"qualname": 0, "fullname": 2, "annotation": 0, "default_value": 0, "signature": 0, "bases": 0, "doc": 1055}, "csv_batcher.csv_pooler": {"qualname": 0, "fullname": 4, "annotation": 0, "default_value": 0, "signature": 0, "bases": 0, "doc": 3}, "csv_batcher.csv_pooler.CallbackWith": {"qualname": 1, "fullname": 5, "annotation": 0, "default_value": 0, "signature": 0, "bases": 2, "doc": 13}, "csv_batcher.csv_pooler.CallbackWith.CSV_FILENAME": {"qualname": 3, "fullname": 7, "annotation": 0, "default_value": 11, "signature": 0, "bases": 0, "doc": 3}, "csv_batcher.csv_pooler.CallbackWith.DATAFRAME": {"qualname": 2, "fullname": 6, "annotation": 0, "default_value": 9, "signature": 0, "bases": 0, "doc": 3}, "csv_batcher.csv_pooler.CallbackWith.DATAFRAME_ROW": {"qualname": 3, "fullname": 7, "annotation": 0, "default_value": 11, "signature": 0, "bases": 0, "doc": 3}, "csv_batcher.csv_pooler.CSVPooler": {"qualname": 1, "fullname": 5, "annotation": 0, "default_value": 0, "signature": 0, "bases": 0, "doc": 3}, "csv_batcher.csv_pooler.CSVPooler.__init__": {"qualname": 3, "fullname": 7, "annotation": 0, "default_value": 0, "signature": 121, "bases": 0, "doc": 159}, "csv_batcher.csv_pooler.CSVPooler.csv_filename": {"qualname": 3, "fullname": 7, "annotation": 0, "default_value": 0, "signature": 0, "bases": 0, "doc": 3}, "csv_batcher.csv_pooler.CSVPooler.process_fn": {"qualname": 3, "fullname": 7, "annotation": 0, "default_value": 0, "signature": 0, "bases": 0, "doc": 3}, "csv_batcher.csv_pooler.CSVPooler.callback_with": {"qualname": 3, "fullname": 7, "annotation": 0, "default_value": 0, "signature": 0, "bases": 0, "doc": 3}, "csv_batcher.csv_pooler.CSVPooler.pool_size": {"qualname": 3, "fullname": 7, "annotation": 0, "default_value": 0, "signature": 0, "bases": 0, "doc": 3}, "csv_batcher.csv_pooler.CSVPooler.chunk_lines": {"qualname": 3, "fullname": 7, "annotation": 0, "default_value": 0, "signature": 0, "bases": 0, "doc": 3}, "csv_batcher.csv_pooler.CSVPooler.process": {"qualname": 2, "fullname": 6, "annotation": 0, "default_value": 0, "signature": 11, "bases": 0, "doc": 44}, "csv_batcher.csv_splitter": {"qualname": 0, "fullname": 4, "annotation": 0, "default_value": 0, "signature": 0, "bases": 0, "doc": 3}, "csv_batcher.csv_splitter.CSVSplitter": {"qualname": 1, "fullname": 5, "annotation": 0, "default_value": 0, "signature": 0, "bases": 0, "doc": 9}, "csv_batcher.csv_splitter.CSVSplitter.__init__": {"qualname": 3, "fullname": 7, "annotation": 0, "default_value": 0, "signature": 34, "bases": 0, "doc": 37}, "csv_batcher.csv_splitter.CSVSplitter.csv_filename": {"qualname": 3, "fullname": 7, "annotation": 0, "default_value": 0, "signature": 0, "bases": 0, "doc": 3}, "csv_batcher.csv_splitter.CSVSplitter.chunk_line_cnt": {"qualname": 4, "fullname": 8, "annotation": 0, "default_value": 0, "signature": 0, "bases": 0, "doc": 3}, "csv_batcher.csv_splitter.CSVSplitter.chunk_dir": {"qualname": 3, "fullname": 7, "annotation": 0, "default_value": 0, "signature": 0, "bases": 0, "doc": 3}, "csv_batcher.csv_splitter.CSVSplitter.chunk_files": {"qualname": 3, "fullname": 7, "annotation": 0, "default_value": 0, "signature": 0, "bases": 0, "doc": 3}, "csv_batcher.csv_splitter.CSVSplitter.csv_files": {"qualname": 3, "fullname": 7, "annotation": 0, "default_value": 0, "signature": 11, "bases": 0, "doc": 8}, "csv_batcher.csv_splitter.CSVSplitter.cleanup": {"qualname": 2, "fullname": 6, "annotation": 0, "default_value": 0, "signature": 11, "bases": 0, "doc": 22}, "csv_batcher.test_csv_pooler": {"qualname": 0, "fullname": 5, "annotation": 0, "default_value": 0, "signature": 0, "bases": 0, "doc": 3}, "csv_batcher.test_csv_pooler.test_big_file_as_csv": {"qualname": 5, "fullname": 10, "annotation": 0, "default_value": 0, "signature": 7, "bases": 0, "doc": 3}, "csv_batcher.test_csv_pooler.test_big_file_as_dataframe": {"qualname": 5, "fullname": 10, "annotation": 0, "default_value": 0, "signature": 7, "bases": 0, "doc": 3}, "csv_batcher.test_csv_pooler.test_big_file_as_dataframe_rows": {"qualname": 6, "fullname": 11, "annotation": 0, "default_value": 0, "signature": 7, "bases": 0, "doc": 3}, "csv_batcher.test_csv_pooler.test_no_pooler": {"qualname": 3, "fullname": 8, "annotation": 0, "default_value": 0, "signature": 7, "bases": 0, "doc": 3}}, "length": 28, "save": true}, "index": {"qualname": {"root": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1}}, "df": 2, "c": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "b": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "c": {"docs": {}, "df": 0, "k": {"docs": {"csv_batcher.csv_pooler.CSVPooler.callback_with": {"tf": 1}}, "df": 1, "w": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "h": {"docs": {"csv_batcher.csv_pooler.CallbackWith": {"tf": 1}, "csv_batcher.csv_pooler.CallbackWith.CSV_FILENAME": {"tf": 1}, "csv_batcher.csv_pooler.CallbackWith.DATAFRAME": {"tf": 1}, "csv_batcher.csv_pooler.CallbackWith.DATAFRAME_ROW": {"tf": 1}}, "df": 4}}}}}}}}}}}, "s": {"docs": {}, "df": 0, "v": {"docs": {"csv_batcher.csv_pooler.CallbackWith.CSV_FILENAME": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.csv_filename": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.csv_filename": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.csv_files": {"tf": 1}, "csv_batcher.test_csv_pooler.test_big_file_as_csv": {"tf": 1}}, "df": 5, "p": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "r": {"docs": {"csv_batcher.csv_pooler.CSVPooler": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.csv_filename": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.process_fn": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.callback_with": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.pool_size": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.chunk_lines": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.process": {"tf": 1}}, "df": 8}}}}}}, "s": {"docs": {}, "df": 0, "p": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "r": {"docs": {"csv_batcher.csv_splitter.CSVSplitter": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.csv_filename": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.chunk_line_cnt": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.chunk_dir": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.chunk_files": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.csv_files": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.cleanup": {"tf": 1}}, "df": 8}}}}}}}}}}, "h": {"docs": {}, "df": 0, "u": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "k": {"docs": {"csv_batcher.csv_pooler.CSVPooler.chunk_lines": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.chunk_line_cnt": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.chunk_dir": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.chunk_files": {"tf": 1}}, "df": 4}}}}, "n": {"docs": {}, "df": 0, "t": {"docs": {"csv_batcher.csv_splitter.CSVSplitter.chunk_line_cnt": {"tf": 1}}, "df": 1}}, "l": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "u": {"docs": {}, "df": 0, "p": {"docs": {"csv_batcher.csv_splitter.CSVSplitter.cleanup": {"tf": 1}}, "df": 1}}}}}}}, "f": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "e": {"docs": {"csv_batcher.test_csv_pooler.test_big_file_as_csv": {"tf": 1}, "csv_batcher.test_csv_pooler.test_big_file_as_dataframe": {"tf": 1}, "csv_batcher.test_csv_pooler.test_big_file_as_dataframe_rows": {"tf": 1}}, "df": 3, "n": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "m": {"docs": {}, "df": 0, "e": {"docs": {"csv_batcher.csv_pooler.CallbackWith.CSV_FILENAME": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.csv_filename": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.csv_filename": {"tf": 1}}, "df": 3}}}}, "s": {"docs": {"csv_batcher.csv_splitter.CSVSplitter.chunk_files": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.csv_files": {"tf": 1}}, "df": 2}}}}, "n": {"docs": {"csv_batcher.csv_pooler.CSVPooler.process_fn": {"tf": 1}}, "df": 1}}, "d": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "f": {"docs": {}, "df": 0, "r": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "m": {"docs": {}, "df": 0, "e": {"docs": {"csv_batcher.csv_pooler.CallbackWith.DATAFRAME": {"tf": 1}, "csv_batcher.csv_pooler.CallbackWith.DATAFRAME_ROW": {"tf": 1}, "csv_batcher.test_csv_pooler.test_big_file_as_dataframe": {"tf": 1}, "csv_batcher.test_csv_pooler.test_big_file_as_dataframe_rows": {"tf": 1}}, "df": 4}}}}}}}}, "i": {"docs": {}, "df": 0, "r": {"docs": {"csv_batcher.csv_splitter.CSVSplitter.chunk_dir": {"tf": 1}}, "df": 1}}}, "r": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "w": {"docs": {"csv_batcher.csv_pooler.CallbackWith.DATAFRAME_ROW": {"tf": 1}}, "df": 1, "s": {"docs": {"csv_batcher.test_csv_pooler.test_big_file_as_dataframe_rows": {"tf": 1}}, "df": 1}}}}, "i": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "t": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1}}, "df": 2}}}}, "p": {"docs": {}, "df": 0, "r": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "c": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "s": {"docs": {}, "df": 0, "s": {"docs": {"csv_batcher.csv_pooler.CSVPooler.process_fn": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.process": {"tf": 1}}, "df": 2}}}}}}, "o": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "l": {"docs": {"csv_batcher.csv_pooler.CSVPooler.pool_size": {"tf": 1}}, "df": 1, "e": {"docs": {}, "df": 0, "r": {"docs": {"csv_batcher.test_csv_pooler.test_no_pooler": {"tf": 1}}, "df": 1}}}}}}, "w": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "h": {"docs": {"csv_batcher.csv_pooler.CSVPooler.callback_with": {"tf": 1}}, "df": 1}}}}, "s": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "z": {"docs": {}, "df": 0, "e": {"docs": {"csv_batcher.csv_pooler.CSVPooler.pool_size": {"tf": 1}}, "df": 1}}}}, "l": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "e": {"docs": {"csv_batcher.csv_splitter.CSVSplitter.chunk_line_cnt": {"tf": 1}}, "df": 1, "s": {"docs": {"csv_batcher.csv_pooler.CSVPooler.chunk_lines": {"tf": 1}}, "df": 1}}}}}, "t": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "s": {"docs": {}, "df": 0, "t": {"docs": {"csv_batcher.test_csv_pooler.test_big_file_as_csv": {"tf": 1}, "csv_batcher.test_csv_pooler.test_big_file_as_dataframe": {"tf": 1}, "csv_batcher.test_csv_pooler.test_big_file_as_dataframe_rows": {"tf": 1}, "csv_batcher.test_csv_pooler.test_no_pooler": {"tf": 1}}, "df": 4}}}}, "b": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "g": {"docs": {"csv_batcher.test_csv_pooler.test_big_file_as_csv": {"tf": 1}, "csv_batcher.test_csv_pooler.test_big_file_as_dataframe": {"tf": 1}, "csv_batcher.test_csv_pooler.test_big_file_as_dataframe_rows": {"tf": 1}}, "df": 3}}}, "a": {"docs": {}, "df": 0, "s": {"docs": {"csv_batcher.test_csv_pooler.test_big_file_as_csv": {"tf": 1}, "csv_batcher.test_csv_pooler.test_big_file_as_dataframe": {"tf": 1}, "csv_batcher.test_csv_pooler.test_big_file_as_dataframe_rows": {"tf": 1}}, "df": 3}}, "n": {"docs": {}, "df": 0, "o": {"docs": {"csv_batcher.test_csv_pooler.test_no_pooler": {"tf": 1}}, "df": 1}}}}, "fullname": {"root": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1}}, "df": 2, "c": {"docs": {}, "df": 0, "s": {"docs": {}, "df": 0, "v": {"docs": {"csv_batcher": {"tf": 1}, "csv_batcher.csv_pooler": {"tf": 1.4142135623730951}, "csv_batcher.csv_pooler.CallbackWith": {"tf": 1.4142135623730951}, "csv_batcher.csv_pooler.CallbackWith.CSV_FILENAME": {"tf": 1.7320508075688772}, "csv_batcher.csv_pooler.CallbackWith.DATAFRAME": {"tf": 1.4142135623730951}, "csv_batcher.csv_pooler.CallbackWith.DATAFRAME_ROW": {"tf": 1.4142135623730951}, "csv_batcher.csv_pooler.CSVPooler": {"tf": 1.4142135623730951}, "csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1.4142135623730951}, "csv_batcher.csv_pooler.CSVPooler.csv_filename": {"tf": 1.7320508075688772}, "csv_batcher.csv_pooler.CSVPooler.process_fn": {"tf": 1.4142135623730951}, "csv_batcher.csv_pooler.CSVPooler.callback_with": {"tf": 1.4142135623730951}, "csv_batcher.csv_pooler.CSVPooler.pool_size": {"tf": 1.4142135623730951}, "csv_batcher.csv_pooler.CSVPooler.chunk_lines": {"tf": 1.4142135623730951}, "csv_batcher.csv_pooler.CSVPooler.process": {"tf": 1.4142135623730951}, "csv_batcher.csv_splitter": {"tf": 1.4142135623730951}, "csv_batcher.csv_splitter.CSVSplitter": {"tf": 1.4142135623730951}, "csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1.4142135623730951}, "csv_batcher.csv_splitter.CSVSplitter.csv_filename": {"tf": 1.7320508075688772}, "csv_batcher.csv_splitter.CSVSplitter.chunk_line_cnt": {"tf": 1.4142135623730951}, "csv_batcher.csv_splitter.CSVSplitter.chunk_dir": {"tf": 1.4142135623730951}, "csv_batcher.csv_splitter.CSVSplitter.chunk_files": {"tf": 1.4142135623730951}, "csv_batcher.csv_splitter.CSVSplitter.csv_files": {"tf": 1.7320508075688772}, "csv_batcher.csv_splitter.CSVSplitter.cleanup": {"tf": 1.4142135623730951}, "csv_batcher.test_csv_pooler": {"tf": 1.4142135623730951}, "csv_batcher.test_csv_pooler.test_big_file_as_csv": {"tf": 1.7320508075688772}, "csv_batcher.test_csv_pooler.test_big_file_as_dataframe": {"tf": 1.4142135623730951}, "csv_batcher.test_csv_pooler.test_big_file_as_dataframe_rows": {"tf": 1.4142135623730951}, "csv_batcher.test_csv_pooler.test_no_pooler": {"tf": 1.4142135623730951}}, "df": 28, "p": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "r": {"docs": {"csv_batcher.csv_pooler.CSVPooler": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.csv_filename": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.process_fn": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.callback_with": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.pool_size": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.chunk_lines": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.process": {"tf": 1}}, "df": 8}}}}}}, "s": {"docs": {}, "df": 0, "p": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "r": {"docs": {"csv_batcher.csv_splitter.CSVSplitter": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.csv_filename": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.chunk_line_cnt": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.chunk_dir": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.chunk_files": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.csv_files": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.cleanup": {"tf": 1}}, "df": 8}}}}}}}}}}, "a": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "b": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "c": {"docs": {}, "df": 0, "k": {"docs": {"csv_batcher.csv_pooler.CSVPooler.callback_with": {"tf": 1}}, "df": 1, "w": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "h": {"docs": {"csv_batcher.csv_pooler.CallbackWith": {"tf": 1}, "csv_batcher.csv_pooler.CallbackWith.CSV_FILENAME": {"tf": 1}, "csv_batcher.csv_pooler.CallbackWith.DATAFRAME": {"tf": 1}, "csv_batcher.csv_pooler.CallbackWith.DATAFRAME_ROW": {"tf": 1}}, "df": 4}}}}}}}}}}}, "h": {"docs": {}, "df": 0, "u": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "k": {"docs": {"csv_batcher.csv_pooler.CSVPooler.chunk_lines": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.chunk_line_cnt": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.chunk_dir": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.chunk_files": {"tf": 1}}, "df": 4}}}}, "n": {"docs": {}, "df": 0, "t": {"docs": {"csv_batcher.csv_splitter.CSVSplitter.chunk_line_cnt": {"tf": 1}}, "df": 1}}, "l": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "u": {"docs": {}, "df": 0, "p": {"docs": {"csv_batcher.csv_splitter.CSVSplitter.cleanup": {"tf": 1}}, "df": 1}}}}}}}, "b": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "c": {"docs": {}, "df": 0, "h": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "r": {"docs": {"csv_batcher": {"tf": 1}, "csv_batcher.csv_pooler": {"tf": 1}, "csv_batcher.csv_pooler.CallbackWith": {"tf": 1}, "csv_batcher.csv_pooler.CallbackWith.CSV_FILENAME": {"tf": 1}, "csv_batcher.csv_pooler.CallbackWith.DATAFRAME": {"tf": 1}, "csv_batcher.csv_pooler.CallbackWith.DATAFRAME_ROW": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.csv_filename": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.process_fn": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.callback_with": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.pool_size": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.chunk_lines": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.process": {"tf": 1}, "csv_batcher.csv_splitter": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.csv_filename": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.chunk_line_cnt": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.chunk_dir": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.chunk_files": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.csv_files": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.cleanup": {"tf": 1}, "csv_batcher.test_csv_pooler": {"tf": 1}, "csv_batcher.test_csv_pooler.test_big_file_as_csv": {"tf": 1}, "csv_batcher.test_csv_pooler.test_big_file_as_dataframe": {"tf": 1}, "csv_batcher.test_csv_pooler.test_big_file_as_dataframe_rows": {"tf": 1}, "csv_batcher.test_csv_pooler.test_no_pooler": {"tf": 1}}, "df": 28}}}}}}, "i": {"docs": {}, "df": 0, "g": {"docs": {"csv_batcher.test_csv_pooler.test_big_file_as_csv": {"tf": 1}, "csv_batcher.test_csv_pooler.test_big_file_as_dataframe": {"tf": 1}, "csv_batcher.test_csv_pooler.test_big_file_as_dataframe_rows": {"tf": 1}}, "df": 3}}}, "p": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "l": {"docs": {"csv_batcher.csv_pooler.CSVPooler.pool_size": {"tf": 1}}, "df": 1, "e": {"docs": {}, "df": 0, "r": {"docs": {"csv_batcher.csv_pooler": {"tf": 1}, "csv_batcher.csv_pooler.CallbackWith": {"tf": 1}, "csv_batcher.csv_pooler.CallbackWith.CSV_FILENAME": {"tf": 1}, "csv_batcher.csv_pooler.CallbackWith.DATAFRAME": {"tf": 1}, "csv_batcher.csv_pooler.CallbackWith.DATAFRAME_ROW": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.csv_filename": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.process_fn": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.callback_with": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.pool_size": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.chunk_lines": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.process": {"tf": 1}, "csv_batcher.test_csv_pooler": {"tf": 1}, "csv_batcher.test_csv_pooler.test_big_file_as_csv": {"tf": 1}, "csv_batcher.test_csv_pooler.test_big_file_as_dataframe": {"tf": 1}, "csv_batcher.test_csv_pooler.test_big_file_as_dataframe_rows": {"tf": 1}, "csv_batcher.test_csv_pooler.test_no_pooler": {"tf": 1.4142135623730951}}, "df": 18}}}}}, "r": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "c": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "s": {"docs": {}, "df": 0, "s": {"docs": {"csv_batcher.csv_pooler.CSVPooler.process_fn": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.process": {"tf": 1}}, "df": 2}}}}}}}, "f": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "e": {"docs": {"csv_batcher.test_csv_pooler.test_big_file_as_csv": {"tf": 1}, "csv_batcher.test_csv_pooler.test_big_file_as_dataframe": {"tf": 1}, "csv_batcher.test_csv_pooler.test_big_file_as_dataframe_rows": {"tf": 1}}, "df": 3, "n": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "m": {"docs": {}, "df": 0, "e": {"docs": {"csv_batcher.csv_pooler.CallbackWith.CSV_FILENAME": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.csv_filename": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.csv_filename": {"tf": 1}}, "df": 3}}}}, "s": {"docs": {"csv_batcher.csv_splitter.CSVSplitter.chunk_files": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.csv_files": {"tf": 1}}, "df": 2}}}}, "n": {"docs": {"csv_batcher.csv_pooler.CSVPooler.process_fn": {"tf": 1}}, "df": 1}}, "d": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "f": {"docs": {}, "df": 0, "r": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "m": {"docs": {}, "df": 0, "e": {"docs": {"csv_batcher.csv_pooler.CallbackWith.DATAFRAME": {"tf": 1}, "csv_batcher.csv_pooler.CallbackWith.DATAFRAME_ROW": {"tf": 1}, "csv_batcher.test_csv_pooler.test_big_file_as_dataframe": {"tf": 1}, "csv_batcher.test_csv_pooler.test_big_file_as_dataframe_rows": {"tf": 1}}, "df": 4}}}}}}}}, "i": {"docs": {}, "df": 0, "r": {"docs": {"csv_batcher.csv_splitter.CSVSplitter.chunk_dir": {"tf": 1}}, "df": 1}}}, "r": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "w": {"docs": {"csv_batcher.csv_pooler.CallbackWith.DATAFRAME_ROW": {"tf": 1}}, "df": 1, "s": {"docs": {"csv_batcher.test_csv_pooler.test_big_file_as_dataframe_rows": {"tf": 1}}, "df": 1}}}}, "i": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "t": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1}}, "df": 2}}}}, "w": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "h": {"docs": {"csv_batcher.csv_pooler.CSVPooler.callback_with": {"tf": 1}}, "df": 1}}}}, "s": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "z": {"docs": {}, "df": 0, "e": {"docs": {"csv_batcher.csv_pooler.CSVPooler.pool_size": {"tf": 1}}, "df": 1}}}, "p": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "r": {"docs": {"csv_batcher.csv_splitter": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.csv_filename": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.chunk_line_cnt": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.chunk_dir": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.chunk_files": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.csv_files": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.cleanup": {"tf": 1}}, "df": 9}}}}}}}}, "l": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "e": {"docs": {"csv_batcher.csv_splitter.CSVSplitter.chunk_line_cnt": {"tf": 1}}, "df": 1, "s": {"docs": {"csv_batcher.csv_pooler.CSVPooler.chunk_lines": {"tf": 1}}, "df": 1}}}}}, "t": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "s": {"docs": {}, "df": 0, "t": {"docs": {"csv_batcher.test_csv_pooler": {"tf": 1}, "csv_batcher.test_csv_pooler.test_big_file_as_csv": {"tf": 1.4142135623730951}, "csv_batcher.test_csv_pooler.test_big_file_as_dataframe": {"tf": 1.4142135623730951}, "csv_batcher.test_csv_pooler.test_big_file_as_dataframe_rows": {"tf": 1.4142135623730951}, "csv_batcher.test_csv_pooler.test_no_pooler": {"tf": 1.4142135623730951}}, "df": 5}}}}, "a": {"docs": {}, "df": 0, "s": {"docs": {"csv_batcher.test_csv_pooler.test_big_file_as_csv": {"tf": 1}, "csv_batcher.test_csv_pooler.test_big_file_as_dataframe": {"tf": 1}, "csv_batcher.test_csv_pooler.test_big_file_as_dataframe_rows": {"tf": 1}}, "df": 3}}, "n": {"docs": {}, "df": 0, "o": {"docs": {"csv_batcher.test_csv_pooler.test_no_pooler": {"tf": 1}}, "df": 1}}}}, "annotation": {"root": {"docs": {}, "df": 0}}, "default_value": {"root": {"docs": {"csv_batcher.csv_pooler.CallbackWith.CSV_FILENAME": {"tf": 1.4142135623730951}, "csv_batcher.csv_pooler.CallbackWith.DATAFRAME": {"tf": 1.4142135623730951}, "csv_batcher.csv_pooler.CallbackWith.DATAFRAME_ROW": {"tf": 1.4142135623730951}}, "df": 3, "l": {"docs": {}, "df": 0, "t": {"docs": {"csv_batcher.csv_pooler.CallbackWith.CSV_FILENAME": {"tf": 1}, "csv_batcher.csv_pooler.CallbackWith.DATAFRAME": {"tf": 1}, "csv_batcher.csv_pooler.CallbackWith.DATAFRAME_ROW": {"tf": 1}}, "df": 3}}, "c": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "b": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "c": {"docs": {}, "df": 0, "k": {"docs": {}, "df": 0, "w": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "h": {"docs": {"csv_batcher.csv_pooler.CallbackWith.CSV_FILENAME": {"tf": 1}, "csv_batcher.csv_pooler.CallbackWith.DATAFRAME": {"tf": 1}, "csv_batcher.csv_pooler.CallbackWith.DATAFRAME_ROW": {"tf": 1}}, "df": 3}}}}}}}}}}}, "s": {"docs": {}, "df": 0, "v": {"docs": {"csv_batcher.csv_pooler.CallbackWith.CSV_FILENAME": {"tf": 1.4142135623730951}}, "df": 1}}}, "f": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "m": {"docs": {}, "df": 0, "e": {"docs": {"csv_batcher.csv_pooler.CallbackWith.CSV_FILENAME": {"tf": 1.4142135623730951}}, "df": 1}}}}}}}}, "x": {"2": {"7": {"docs": {"csv_batcher.csv_pooler.CallbackWith.CSV_FILENAME": {"tf": 1.4142135623730951}, "csv_batcher.csv_pooler.CallbackWith.DATAFRAME": {"tf": 1.4142135623730951}, "csv_batcher.csv_pooler.CallbackWith.DATAFRAME_ROW": {"tf": 1.4142135623730951}}, "df": 3}, "docs": {}, "df": 0}, "docs": {}, "df": 0}, "g": {"docs": {}, "df": 0, "t": {"docs": {"csv_batcher.csv_pooler.CallbackWith.CSV_FILENAME": {"tf": 1}, "csv_batcher.csv_pooler.CallbackWith.DATAFRAME": {"tf": 1}, "csv_batcher.csv_pooler.CallbackWith.DATAFRAME_ROW": {"tf": 1}}, "df": 3}}, "d": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "f": {"docs": {}, "df": 0, "r": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "m": {"docs": {}, "df": 0, "e": {"docs": {"csv_batcher.csv_pooler.CallbackWith.DATAFRAME": {"tf": 1.4142135623730951}, "csv_batcher.csv_pooler.CallbackWith.DATAFRAME_ROW": {"tf": 1.4142135623730951}}, "df": 2}}}}}}}}}, "r": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "w": {"docs": {"csv_batcher.csv_pooler.CallbackWith.DATAFRAME_ROW": {"tf": 1.4142135623730951}}, "df": 1}}}}}, "signature": {"root": {"1": {"0": {"0": {"0": {"0": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1}}, "df": 2}, "docs": {}, "df": 0}, "docs": {}, "df": 0}, "docs": {}, "df": 0}, "docs": {}, "df": 0}, "3": {"9": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1.4142135623730951}}, "df": 1}, "docs": {}, "df": 0}, "5": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}}, "df": 1}, "docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 9.539392014169456}, "csv_batcher.csv_pooler.CSVPooler.process": {"tf": 3.1622776601683795}, "csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 5.0990195135927845}, "csv_batcher.csv_splitter.CSVSplitter.csv_files": {"tf": 3.1622776601683795}, "csv_batcher.csv_splitter.CSVSplitter.cleanup": {"tf": 3.1622776601683795}, "csv_batcher.test_csv_pooler.test_big_file_as_csv": {"tf": 2.6457513110645907}, "csv_batcher.test_csv_pooler.test_big_file_as_dataframe": {"tf": 2.6457513110645907}, "csv_batcher.test_csv_pooler.test_big_file_as_dataframe_rows": {"tf": 2.6457513110645907}, "csv_batcher.test_csv_pooler.test_no_pooler": {"tf": 2.6457513110645907}}, "df": 9, "c": {"docs": {}, "df": 0, "s": {"docs": {}, "df": 0, "v": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 2.23606797749979}, "csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1}}, "df": 2}}, "a": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "b": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "e": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}}, "df": 1}}}}, "b": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "c": {"docs": {}, "df": 0, "k": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}}, "df": 1, "w": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "h": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1.4142135623730951}}, "df": 1}}}}}}}}}}}, "h": {"docs": {}, "df": 0, "u": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "k": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1}}, "df": 2}}}}, "n": {"docs": {}, "df": 0, "t": {"docs": {"csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1}}, "df": 1}}}, "f": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "m": {"docs": {}, "df": 0, "e": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1.7320508075688772}, "csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1}}, "df": 2}}}}}}}, "n": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}}, "df": 1}}, "s": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "r": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1}}, "df": 2}}, "i": {"docs": {}, "df": 0, "z": {"docs": {}, "df": 0, "e": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}}, "df": 1}}}, "e": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "f": {"docs": {"csv_batcher.csv_pooler.CSVPooler.process": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.csv_files": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.cleanup": {"tf": 1}}, "df": 3}}}}, "p": {"docs": {}, "df": 0, "r": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "c": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "s": {"docs": {}, "df": 0, "s": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}}, "df": 1}}}}}}, "o": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "l": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}}, "df": 1, "e": {"docs": {}, "df": 0, "r": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}}, "df": 1}}}}}}, "w": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "h": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}}, "df": 1}}}}, "b": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "c": {"docs": {}, "df": 0, "h": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "r": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}}, "df": 1}}}}}}}, "l": {"docs": {}, "df": 0, "t": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}}, "df": 1}, "i": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "e": {"docs": {"csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1}}, "df": 1, "s": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}}, "df": 1}}}}}, "g": {"docs": {}, "df": 0, "t": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}}, "df": 1}}, "i": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "t": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1.4142135623730951}, "csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1}}, "df": 2}}}}}, "bases": {"root": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "u": {"docs": {}, "df": 0, "m": {"docs": {"csv_batcher.csv_pooler.CallbackWith": {"tf": 1}}, "df": 1}}}}, "s": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "r": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "u": {"docs": {}, "df": 0, "m": {"docs": {"csv_batcher.csv_pooler.CallbackWith": {"tf": 1}}, "df": 1}}}}}}}}}, "doc": {"root": {"0": {"docs": {"csv_batcher": {"tf": 1.7320508075688772}}, "df": 1}, "1": {"0": {"0": {"0": {"0": {"docs": {"csv_batcher": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1}}, "df": 3}, "docs": {}, "df": 0}, "docs": {}, "df": 0}, "docs": {}, "df": 0}, "6": {"docs": {"csv_batcher": {"tf": 1.7320508075688772}}, "df": 1}, "docs": {"csv_batcher": {"tf": 1.4142135623730951}}, "df": 1}, "3": {"9": {"docs": {"csv_batcher": {"tf": 1.4142135623730951}}, "df": 1}, "docs": {}, "df": 0}, "5": {"docs": {}, "df": 0, "m": {"docs": {}, "df": 0, "s": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "s": {"docs": {}, "df": 0, "r": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "c": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "r": {"docs": {}, "df": 0, "d": {"docs": {}, "df": 0, "s": {"docs": {"csv_batcher": {"tf": 1.7320508075688772}}, "df": 1}}}}}}}}}}}}}}, "8": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}}, "df": 1}, "docs": {"csv_batcher": {"tf": 26.13426869074396}, "csv_batcher.csv_pooler": {"tf": 1.7320508075688772}, "csv_batcher.csv_pooler.CallbackWith": {"tf": 1.4142135623730951}, "csv_batcher.csv_pooler.CallbackWith.CSV_FILENAME": {"tf": 1.7320508075688772}, "csv_batcher.csv_pooler.CallbackWith.DATAFRAME": {"tf": 1.7320508075688772}, "csv_batcher.csv_pooler.CallbackWith.DATAFRAME_ROW": {"tf": 1.7320508075688772}, "csv_batcher.csv_pooler.CSVPooler": {"tf": 1.7320508075688772}, "csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 4.898979485566356}, "csv_batcher.csv_pooler.CSVPooler.csv_filename": {"tf": 1.7320508075688772}, "csv_batcher.csv_pooler.CSVPooler.process_fn": {"tf": 1.7320508075688772}, "csv_batcher.csv_pooler.CSVPooler.callback_with": {"tf": 1.7320508075688772}, "csv_batcher.csv_pooler.CSVPooler.pool_size": {"tf": 1.7320508075688772}, "csv_batcher.csv_pooler.CSVPooler.chunk_lines": {"tf": 1.7320508075688772}, "csv_batcher.csv_pooler.CSVPooler.process": {"tf": 3.3166247903554}, "csv_batcher.csv_splitter": {"tf": 1.7320508075688772}, "csv_batcher.csv_splitter.CSVSplitter": {"tf": 1.4142135623730951}, "csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 2.6457513110645907}, "csv_batcher.csv_splitter.CSVSplitter.csv_filename": {"tf": 1.7320508075688772}, "csv_batcher.csv_splitter.CSVSplitter.chunk_line_cnt": {"tf": 1.7320508075688772}, "csv_batcher.csv_splitter.CSVSplitter.chunk_dir": {"tf": 1.7320508075688772}, "csv_batcher.csv_splitter.CSVSplitter.chunk_files": {"tf": 1.7320508075688772}, "csv_batcher.csv_splitter.CSVSplitter.csv_files": {"tf": 2}, "csv_batcher.csv_splitter.CSVSplitter.cleanup": {"tf": 2}, "csv_batcher.test_csv_pooler": {"tf": 1.7320508075688772}, "csv_batcher.test_csv_pooler.test_big_file_as_csv": {"tf": 1.7320508075688772}, "csv_batcher.test_csv_pooler.test_big_file_as_dataframe": {"tf": 1.7320508075688772}, "csv_batcher.test_csv_pooler.test_big_file_as_dataframe_rows": {"tf": 1.7320508075688772}, "csv_batcher.test_csv_pooler.test_no_pooler": {"tf": 1.7320508075688772}}, "df": 28, "c": {"docs": {}, "df": 0, "s": {"docs": {}, "df": 0, "v": {"docs": {"csv_batcher": {"tf": 4.898979485566356}, "csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 2.6457513110645907}, "csv_batcher.csv_pooler.CSVPooler.process": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1.4142135623730951}}, "df": 5, "s": {"docs": {"csv_batcher": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.process": {"tf": 1}}, "df": 2, "p": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "r": {"docs": {"csv_batcher.csv_pooler.CSVPooler.process": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1}}, "df": 2}}}}}}}}, "p": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "r": {"docs": {"csv_batcher": {"tf": 2.449489742783178}}, "df": 1}}}}}}}}, "o": {"docs": {}, "df": 0, "m": {"docs": {}, "df": 0, "/": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "g": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "d": {"docs": {}, "df": 0, "p": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "h": {"docs": {}, "df": 0, "/": {"docs": {}, "df": 0, "c": {"docs": {}, "df": 0, "s": {"docs": {}, "df": 0, "v": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}}}}}}}}}}}}}}, "p": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "c": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "d": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}}}}}}}, "n": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "r": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "l": {"docs": {"csv_batcher.csv_pooler.CallbackWith": {"tf": 1}}, "df": 1, "l": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "d": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}, "s": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}}, "df": 1}}}}}, "s": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "r": {"docs": {}, "df": 0, "u": {"docs": {}, "df": 0, "c": {"docs": {}, "df": 0, "t": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1}}, "df": 2}}}}}}}, "l": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}, "u": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "t": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}}, "df": 1}}}}, "e": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "r": {"docs": {}, "df": 0, "y": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}}}, "a": {"docs": {}, "df": 0, "n": {"docs": {"csv_batcher": {"tf": 1.4142135623730951}}, "df": 1}, "l": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "b": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "c": {"docs": {}, "df": 0, "k": {"docs": {"csv_batcher": {"tf": 2.6457513110645907}, "csv_batcher.csv_pooler.CallbackWith": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1.4142135623730951}}, "df": 3, "w": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "h": {"docs": {"csv_batcher": {"tf": 2.6457513110645907}, "csv_batcher.csv_pooler.CallbackWith": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1.7320508075688772}}, "df": 3}}}}}}}}, "a": {"docs": {}, "df": 0, "b": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "e": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}}, "df": 1}}}}, "e": {"docs": {}, "df": 0, "d": {"docs": {"csv_batcher.csv_splitter.CSVSplitter.cleanup": {"tf": 1.4142135623730951}}, "df": 1}}}}}, "r": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "g": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}, "e": {"docs": {}, "df": 0, "d": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}}, "df": 1}}}}}}, "h": {"docs": {}, "df": 0, "u": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "k": {"docs": {"csv_batcher": {"tf": 2}, "csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1.7320508075688772}, "csv_batcher.csv_pooler.CSVPooler.process": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1.7320508075688772}, "csv_batcher.csv_splitter.CSVSplitter.csv_files": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.cleanup": {"tf": 1}}, "df": 6, "e": {"docs": {}, "df": 0, "d": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1.7320508075688772}}, "df": 1}}}}}, "e": {"docs": {}, "df": 0, "c": {"docs": {}, "df": 0, "k": {"docs": {"csv_batcher": {"tf": 1.4142135623730951}}, "df": 1}}}}, "l": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "r": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}}, "n": {"docs": {}, "df": 0, "t": {"docs": {"csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1}}, "df": 1}}}, "b": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "c": {"docs": {}, "df": 0, "h": {"docs": {"csv_batcher": {"tf": 2.449489742783178}}, "df": 1, "e": {"docs": {}, "df": 0, "r": {"docs": {"csv_batcher": {"tf": 3.1622776601683795}}, "df": 1, "/": {"docs": {}, "df": 0, "c": {"docs": {}, "df": 0, "s": {"docs": {}, "df": 0, "v": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}}}}}}}, "s": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "d": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}}, "r": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "g": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}, "o": {"docs": {}, "df": 0, "w": {"docs": {}, "df": 0, "s": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "r": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}}}}, "e": {"docs": {"csv_batcher": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.cleanup": {"tf": 1.4142135623730951}}, "df": 3}, "y": {"docs": {"csv_batcher": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.process": {"tf": 1.4142135623730951}}, "df": 3}, "u": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "d": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}}, "o": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "l": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}}, "df": 1}}}, "l": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "c": {"docs": {}, "df": 0, "k": {"docs": {"csv_batcher.csv_splitter.CSVSplitter.cleanup": {"tf": 1}}, "df": 1}}}}}, "s": {"docs": {"csv_batcher": {"tf": 1.4142135623730951}}, "df": 1, "c": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "g": {"docs": {"csv_batcher": {"tf": 1.4142135623730951}}, "df": 1}}}}}}, "u": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "b": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "e": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}}}}}, "e": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "t": {"docs": {"csv_batcher": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1.7320508075688772}}, "df": 2}}, "r": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "s": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}}, "e": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}}, "df": 1}, "l": {"docs": {}, "df": 0, "f": {"docs": {"csv_batcher.csv_pooler.CSVPooler.process": {"tf": 1.4142135623730951}, "csv_batcher.csv_splitter.CSVSplitter.csv_files": {"tf": 1}}, "df": 2}}}, "i": {"docs": {}, "df": 0, "z": {"docs": {}, "df": 0, "e": {"docs": {"csv_batcher": {"tf": 1.7320508075688772}, "csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1.7320508075688772}}, "df": 2}}, "n": {"docs": {}, "df": 0, "g": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "e": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}}, "df": 1}}}}}, "o": {"docs": {}, "df": 0, "m": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "h": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "g": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}}}}}}, "k": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "p": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "s": {"docs": {}, "df": 0, "p": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "c": {"docs": {}, "df": 0, "e": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}}}}}}}}}}}}}, "h": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "w": {"docs": {}, "df": 0, "s": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}, "u": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "d": {"docs": {"csv_batcher.csv_splitter.CSVSplitter.cleanup": {"tf": 1}}, "df": 1}}}}}, "t": {"docs": {}, "df": 0, "r": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1}}, "df": 2}}, "p": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "t": {"docs": {"csv_batcher.csv_pooler.CSVPooler.process": {"tf": 1}}, "df": 1, "s": {"docs": {"csv_batcher.csv_splitter.CSVSplitter": {"tf": 1}}, "df": 1}}}}}, "m": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "r": {"docs": {"csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1}}, "df": 1}}}}}}}, "v": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "r": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "c": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "y": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}}}}}}}, "a": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "u": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "s": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}}}, "i": {"docs": {}, "df": 0, "a": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}, "w": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "h": {"docs": {"csv_batcher": {"tf": 3}, "csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1.7320508075688772}}, "df": 2}}}, "h": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "c": {"docs": {}, "df": 0, "h": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}, "e": {"docs": {}, "df": 0, "r": {"docs": {}, "df": 0, "e": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}, "n": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}}, "df": 1}}, "a": {"docs": {}, "df": 0, "t": {"docs": {"csv_batcher.csv_pooler.CallbackWith": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}}, "df": 2}}}, "o": {"docs": {}, "df": 0, "r": {"docs": {}, "df": 0, "k": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "r": {"docs": {}, "df": 0, "s": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.process": {"tf": 1}}, "df": 2}}}}}}}, "a": {"docs": {"csv_batcher": {"tf": 2}, "csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 2.23606797749979}, "csv_batcher.csv_splitter.CSVSplitter": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.cleanup": {"tf": 1}}, "df": 4, "p": {"docs": {}, "df": 0, "p": {"docs": {}, "df": 0, "s": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}, "l": {"docs": {}, "df": 0, "y": {"docs": {"csv_batcher": {"tf": 2.23606797749979}}, "df": 1}}}}, "b": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "u": {"docs": {}, "df": 0, "t": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}}, "r": {"docs": {}, "df": 0, "g": {"docs": {}, "df": 0, "u": {"docs": {}, "df": 0, "m": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "t": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1.4142135623730951}}, "df": 1, "s": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}}}}, "s": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1}}, "df": 2}}}, "n": {"docs": {}, "df": 0, "d": {"docs": {"csv_batcher": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.cleanup": {"tf": 1}}, "df": 2}}, "s": {"docs": {"csv_batcher": {"tf": 2.23606797749979}, "csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 2.23606797749979}}, "df": 2}, "c": {"docs": {}, "df": 0, "c": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "p": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "s": {"docs": {"csv_batcher": {"tf": 1.4142135623730951}, "csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}}, "df": 2}}}}}}, "x": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "s": {"docs": {"csv_batcher": {"tf": 1.4142135623730951}}, "df": 1}}}, "u": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "o": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}}, "l": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "g": {"docs": {}, "df": 0, "h": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "w": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "g": {"docs": {}, "df": 0, "h": {"docs": {}, "df": 0, "t": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}}}}}}}, "n": {"docs": {}, "df": 0, "e": {"docs": {"csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1}}, "df": 1, "s": {"docs": {"csv_batcher": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.process": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1}}, "df": 4}}, "t": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "g": {"docs": {"csv_batcher": {"tf": 1.7320508075688772}}, "df": 1}}}}}}, "a": {"docs": {}, "df": 0, "r": {"docs": {}, "df": 0, "g": {"docs": {}, "df": 0, "e": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}, "s": {"docs": {}, "df": 0, "t": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1}}, "df": 2}}}, "e": {"docs": {}, "df": 0, "n": {"docs": {"csv_batcher": {"tf": 1.4142135623730951}}, "df": 1}}}, "p": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1, "y": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "h": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "n": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}, "e": {"docs": {}, "df": 0, "s": {"docs": {}, "df": 0, "t": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}}, "p": {"docs": {}, "df": 0, "i": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}, "r": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "c": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "s": {"docs": {}, "df": 0, "s": {"docs": {"csv_batcher": {"tf": 3.872983346207417}, "csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1.7320508075688772}, "csv_batcher.csv_pooler.CSVPooler.process": {"tf": 1.4142135623730951}}, "df": 3, "i": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "g": {"docs": {"csv_batcher": {"tf": 1.4142135623730951}}, "df": 1}}}, "e": {"docs": {}, "df": 0, "d": {"docs": {"csv_batcher": {"tf": 2.449489742783178}}, "df": 1}, "s": {"docs": {"csv_batcher.csv_pooler.CSVPooler.process": {"tf": 1}}, "df": 1}}}}}}}, "i": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "t": {"docs": {"csv_batcher": {"tf": 2}}, "df": 1}}}}, "i": {"docs": {}, "df": 0, "p": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}, "o": {"docs": {}, "df": 0, "s": {"docs": {}, "df": 0, "s": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "b": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "y": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}, "e": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}}}}, "o": {"docs": {}, "df": 0, "l": {"docs": {"csv_batcher": {"tf": 1.7320508075688772}, "csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1.4142135623730951}, "csv_batcher.csv_pooler.CSVPooler.process": {"tf": 1}}, "df": 3, "i": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "g": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}, "e": {"docs": {}, "df": 0, "r": {"docs": {"csv_batcher": {"tf": 3.4641016151377544}, "csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}}, "df": 2}}}}, "e": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "r": {"docs": {}, "df": 0, "y": {"docs": {"csv_batcher": {"tf": 1.7320508075688772}}, "df": 1}}}}}, "a": {"docs": {}, "df": 0, "s": {"docs": {}, "df": 0, "s": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "d": {"docs": {"csv_batcher": {"tf": 1.4142135623730951}, "csv_batcher.csv_pooler.CallbackWith": {"tf": 1}}, "df": 2}}}}, "n": {"docs": {}, "df": 0, "d": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "s": {"docs": {"csv_batcher": {"tf": 1.4142135623730951}}, "df": 1}}}}, "t": {"docs": {}, "df": 0, "h": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1}}, "df": 2}}}, "d": {"docs": {"csv_batcher": {"tf": 1.4142135623730951}}, "df": 1, "o": {"docs": {}, "df": 0, "c": {"docs": {"csv_batcher": {"tf": 1.4142135623730951}}, "df": 1}}}, "u": {"docs": {}, "df": 0, "b": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "s": {"docs": {}, "df": 0, "h": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1, "i": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "g": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}}}}}}}}, "m": {"docs": {}, "df": 0, "u": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "p": {"docs": {}, "df": 0, "r": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "c": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "s": {"docs": {}, "df": 0, "s": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1, "i": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "g": {"docs": {"csv_batcher.csv_pooler.CSVPooler.process": {"tf": 1}}, "df": 1}}}}}}}}}, "l": {"docs": {}, "df": 0, "e": {"docs": {"csv_batcher.csv_pooler.CSVPooler.process": {"tf": 1.4142135623730951}, "csv_batcher.csv_splitter.CSVSplitter": {"tf": 1}}, "df": 2}}}}}}, "s": {"docs": {}, "df": 0, "t": {"docs": {"csv_batcher.csv_splitter.CSVSplitter.cleanup": {"tf": 1}}, "df": 1}}}, "o": {"docs": {}, "df": 0, "r": {"docs": {}, "df": 0, "e": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}, "a": {"docs": {}, "df": 0, "y": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}}, "df": 1}}, "i": {"docs": {}, "df": 0, "g": {"docs": {}, "df": 0, "h": {"docs": {}, "df": 0, "t": {"docs": {"csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1}}, "df": 1}}}}}, "f": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "r": {"docs": {"csv_batcher": {"tf": 2.23606797749979}, "csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1.4142135623730951}, "csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.cleanup": {"tf": 1}}, "df": 4}, "o": {"docs": {"csv_batcher": {"tf": 1.4142135623730951}}, "df": 1}}, "i": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "e": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1.4142135623730951}, "csv_batcher.csv_splitter.CSVSplitter": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1}}, "df": 3, "s": {"docs": {"csv_batcher": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.process": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.csv_files": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.cleanup": {"tf": 1}}, "df": 5}, "n": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "m": {"docs": {}, "df": 0, "e": {"docs": {"csv_batcher": {"tf": 2.8284271247461903}, "csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1.7320508075688772}, "csv_batcher.csv_pooler.CSVPooler.process": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1}}, "df": 4}}}}}}, "n": {"docs": {}, "df": 0, "d": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}, "a": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "y": {"docs": {"csv_batcher.csv_splitter.CSVSplitter.cleanup": {"tf": 1}}, "df": 1}}}}}, "x": {"docs": {"csv_batcher": {"tf": 1.4142135623730951}}, "df": 1}}, "u": {"docs": {}, "df": 0, "r": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "h": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "r": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}}}, "n": {"docs": {}, "df": 0, "c": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "n": {"docs": {"csv_batcher": {"tf": 1.7320508075688772}, "csv_batcher.csv_pooler.CallbackWith": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1.4142135623730951}}, "df": 3}}}}}}}, "r": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "m": {"docs": {"csv_batcher": {"tf": 2.23606797749979}}, "df": 1}}}, "n": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1.7320508075688772}}, "df": 1}, "a": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "s": {"docs": {}, "df": 0, "e": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}}, "df": 1}}}}, "e": {"docs": {}, "df": 0, "w": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "r": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}}, "df": 1}}}}}, "u": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1, "s": {"docs": {}, "df": 0, "e": {"docs": {"csv_batcher": {"tf": 1.4142135623730951}, "csv_batcher.csv_pooler.CSVPooler.process": {"tf": 1.4142135623730951}}, "df": 2, "d": {"docs": {"csv_batcher": {"tf": 1.4142135623730951}, "csv_batcher.csv_pooler.CallbackWith": {"tf": 1}}, "df": 2}, "s": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}}, "df": 1}}, "a": {"docs": {}, "df": 0, "g": {"docs": {}, "df": 0, "e": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}, "i": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "g": {"docs": {"csv_batcher.csv_pooler.CSVPooler.process": {"tf": 1}}, "df": 1}}}}}, "d": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "f": {"docs": {}, "df": 0, "r": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "m": {"docs": {}, "df": 0, "e": {"docs": {"csv_batcher": {"tf": 3.7416573867739413}, "csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 2.449489742783178}}, "df": 2, "s": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}}, "o": {"docs": {}, "df": 0, "m": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}}}}}, "e": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "l": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}, "f": {"docs": {"csv_batcher": {"tf": 2.23606797749979}}, "df": 1, "a": {"docs": {}, "df": 0, "u": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "t": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}}, "df": 1, "s": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 2}, "csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1}}, "df": 2}}}}}, "i": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "d": {"docs": {"csv_batcher.csv_pooler.CSVPooler.process": {"tf": 1}}, "df": 1}}}}}, "v": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "p": {"docs": {}, "df": 0, "m": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "t": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}}}}}}}, "t": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "s": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}}, "df": 1}}}}}}, "o": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1, "c": {"docs": {}, "df": 0, "u": {"docs": {}, "df": 0, "m": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "n": {"docs": {"csv_batcher": {"tf": 1.4142135623730951}}, "df": 1}}}}}}}}}}, "s": {"docs": {"csv_batcher": {"tf": 1.4142135623730951}}, "df": 1}}}, "j": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "g": {"docs": {}, "df": 0, "o": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}}}, "f": {"docs": {"csv_batcher": {"tf": 2.449489742783178}}, "df": 1}, "i": {"docs": {}, "df": 0, "r": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "c": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "r": {"docs": {}, "df": 0, "y": {"docs": {"csv_batcher.csv_splitter.CSVSplitter.cleanup": {"tf": 1}}, "df": 1}}}}}}}}}, "o": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1, "r": {"docs": {"csv_batcher": {"tf": 1.7320508075688772}}, "df": 1}, "t": {"docs": {}, "df": 0, "h": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "r": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}}, "f": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1.7320508075688772}, "csv_batcher.csv_pooler.CSVPooler.process": {"tf": 1}}, "df": 2}, "p": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "l": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1.4142135623730951}, "csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1}}, "df": 2}}}}}}}}, "t": {"docs": {}, "df": 0, "o": {"docs": {"csv_batcher": {"tf": 2}, "csv_batcher.csv_pooler.CallbackWith": {"tf": 1.4142135623730951}, "csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 2.8284271247461903}, "csv_batcher.csv_pooler.CSVPooler.process": {"tf": 1.7320508075688772}, "csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1.4142135623730951}}, "df": 5, "o": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "s": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}, "k": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "n": {"docs": {"csv_batcher": {"tf": 1.4142135623730951}}, "df": 1}}}}, "h": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "t": {"docs": {"csv_batcher": {"tf": 1.4142135623730951}, "csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1.4142135623730951}}, "df": 2}, "n": {"docs": {"csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1}}, "df": 1}}, "o": {"docs": {}, "df": 0, "s": {"docs": {}, "df": 0, "e": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}, "e": {"docs": {"csv_batcher": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1.7320508075688772}, "csv_batcher.csv_pooler.CSVPooler.process": {"tf": 1}}, "df": 3, "n": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}}, "df": 1}}, "i": {"docs": {}, "df": 0, "s": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.cleanup": {"tf": 1}}, "df": 3}}}, "i": {"docs": {}, "df": 0, "m": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "y": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}}}, "r": {"docs": {}, "df": 0, "u": {"docs": {}, "df": 0, "e": {"docs": {"csv_batcher": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1.4142135623730951}}, "df": 2}}}, "e": {"docs": {}, "df": 0, "s": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "g": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}}}, "m": {"docs": {}, "df": 0, "p": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "r": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "r": {"docs": {}, "df": 0, "y": {"docs": {"csv_batcher.csv_pooler.CSVPooler.process": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.cleanup": {"tf": 1}}, "df": 2}}}}}}}}, "a": {"docs": {}, "df": 0, "r": {"docs": {}, "df": 0, "g": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "t": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1}}, "df": 2}}}}}}, "r": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "q": {"docs": {}, "df": 0, "u": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "r": {"docs": {}, "df": 0, "e": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}}}, "t": {"docs": {}, "df": 0, "u": {"docs": {}, "df": 0, "r": {"docs": {}, "df": 0, "n": {"docs": {"csv_batcher": {"tf": 2.23606797749979}}, "df": 1, "s": {"docs": {"csv_batcher.csv_splitter.CSVSplitter.csv_files": {"tf": 1}}, "df": 1}}}}}, "a": {"docs": {}, "df": 0, "d": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}, "m": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "v": {"docs": {}, "df": 0, "e": {"docs": {"csv_batcher.csv_splitter.CSVSplitter.cleanup": {"tf": 1}}, "df": 1}}}}}, "o": {"docs": {}, "df": 0, "w": {"docs": {"csv_batcher": {"tf": 3.872983346207417}, "csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}}, "df": 2, "s": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1.4142135623730951}}, "df": 1}}}, "u": {"docs": {}, "df": 0, "f": {"docs": {}, "df": 0, "f": {"docs": {"csv_batcher": {"tf": 1.4142135623730951}}, "df": 1}}, "n": {"docs": {"csv_batcher": {"tf": 1.4142135623730951}}, "df": 1}}}, "i": {"docs": {}, "df": 0, "n": {"docs": {"csv_batcher": {"tf": 2.23606797749979}, "csv_batcher.csv_splitter.CSVSplitter.cleanup": {"tf": 1}}, "df": 2, "s": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "l": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1, "a": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "n": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}}}}}}, "e": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "d": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}}, "df": 1}}}}}, "d": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "x": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}, "t": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1.4142135623730951}, "csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1}}, "df": 2, "o": {"docs": {"csv_batcher.csv_pooler.CSVPooler.process": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter": {"tf": 1}}, "df": 2}}}, "o": {"docs": {}, "df": 0, "/": {"docs": {}, "df": 0, "c": {"docs": {}, "df": 0, "s": {"docs": {}, "df": 0, "v": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}}}, "m": {"docs": {}, "df": 0, "p": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "m": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "t": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}}}}, "o": {"docs": {}, "df": 0, "r": {"docs": {}, "df": 0, "t": {"docs": {"csv_batcher": {"tf": 2}}, "df": 1}}}}}, "l": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "c": {"docs": {"csv_batcher": {"tf": 1.7320508075688772}}, "df": 1}}}, "s": {"docs": {"csv_batcher.csv_pooler.CallbackWith": {"tf": 1}, "csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 2.449489742783178}}, "df": 2}, "f": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}}, "df": 1}, "t": {"docs": {"csv_batcher.csv_pooler.CSVPooler.process": {"tf": 1}}, "df": 1}}, "g": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "h": {"docs": {}, "df": 0, "u": {"docs": {}, "df": 0, "b": {"docs": {"csv_batcher": {"tf": 1.4142135623730951}}, "df": 1}}}}, "v": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "n": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}}, "df": 1}}}}, "e": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "r": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "s": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}}}}}}, "r": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "u": {"docs": {}, "df": 0, "p": {"docs": {"csv_batcher.csv_pooler.CSVPooler.process": {"tf": 1}}, "df": 1}}}}}, "h": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "p": {"docs": {}, "df": 0, "s": {"docs": {}, "df": 0, ":": {"docs": {}, "df": 0, "/": {"docs": {}, "df": 0, "/": {"docs": {}, "df": 0, "g": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "h": {"docs": {}, "df": 0, "u": {"docs": {}, "df": 0, "b": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}}}}, "t": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "g": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "d": {"docs": {}, "df": 0, "p": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "h": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}}}}}}}}}}}}}}}, "m": {"docs": {}, "df": 0, "l": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}, "o": {"docs": {}, "df": 0, "r": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "z": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "l": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}}}}}}}, "a": {"docs": {}, "df": 0, "v": {"docs": {}, "df": 0, "e": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}}, "df": 1}}}}, "e": {"docs": {}, "df": 0, "x": {"docs": {}, "df": 0, "c": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "r": {"docs": {}, "df": 0, "c": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "s": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "s": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}}}}}}}, "t": {"docs": {}, "df": 0, "c": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}, "n": {"docs": {}, "df": 0, "u": {"docs": {}, "df": 0, "m": {"docs": {"csv_batcher": {"tf": 1}, "csv_batcher.csv_pooler.CallbackWith": {"tf": 1}}, "df": 2, "e": {"docs": {}, "df": 0, "r": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "n": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}}, "df": 1}}}}}}}}}}, "r": {"docs": {}, "df": 0, "r": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "r": {"docs": {}, "df": 0, "s": {"docs": {"csv_batcher": {"tf": 1.4142135623730951}}, "df": 1}}}}}, "a": {"docs": {}, "df": 0, "c": {"docs": {}, "df": 0, "h": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}, "csv_batcher.csv_splitter.CSVSplitter.__init__": {"tf": 1}}, "df": 2}}}}, "q": {"docs": {}, "df": 0, "u": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "t": {"docs": {"csv_batcher": {"tf": 2.8284271247461903}}, "df": 1}}}}, "n": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "n": {"docs": {}, "df": 0, "e": {"docs": {"csv_batcher": {"tf": 1}}, "df": 1}}}, "a": {"docs": {}, "df": 0, "m": {"docs": {}, "df": 0, "e": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}}, "df": 1}}}, "u": {"docs": {}, "df": 0, "m": {"docs": {}, "df": 0, "b": {"docs": {}, "df": 0, "e": {"docs": {}, "df": 0, "r": {"docs": {"csv_batcher.csv_pooler.CSVPooler.__init__": {"tf": 1}}, "df": 1}}}}}}}}}, "pipeline": ["trimmer"], "_isPrebuiltIndex": true}; // mirrored in build-search-index.js (part 1) // Also split on html tags. this is a cheap heuristic, but good enough. diff --git a/poetry.lock b/poetry.lock index e84f940..7fd03ae 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.1 and should not be changed by hand. [[package]] name = "attrs" @@ -212,13 +212,13 @@ files = [ [[package]] name = "packaging" -version = "23.2" +version = "24.0" description = "Core utilities for Python packages" optional = false python-versions = ">=3.7" files = [ - {file = "packaging-23.2-py3-none-any.whl", hash = "sha256:8c491190033a9af7e1d931d0b5dacc2ef47509b34dd0de67ed209b5203fc88c7"}, - {file = "packaging-23.2.tar.gz", hash = "sha256:048fb0e9405036518eaaf48a55953c750c11e1a1b68e0dd1a9d62ed0c092cfc5"}, + {file = "packaging-24.0-py3-none-any.whl", hash = "sha256:2ddfb553fdf02fb784c234c7ba6ccc288296ceabec964ad2eae3777778130bc5"}, + {file = "packaging-24.0.tar.gz", hash = "sha256:eb82c5e3e56209074766e6885bb04b8c38a0c015d0a30036ebe7ece34c9989e9"}, ] [[package]] @@ -359,23 +359,23 @@ windows-terminal = ["colorama (>=0.4.6)"] [[package]] name = "pytest" -version = "8.0.1" +version = "8.1.1" description = "pytest: simple powerful testing with Python" optional = false python-versions = ">=3.8" files = [ - {file = "pytest-8.0.1-py3-none-any.whl", hash = "sha256:3e4f16fe1c0a9dc9d9389161c127c3edc5d810c38d6793042fb81d9f48a59fca"}, - {file = "pytest-8.0.1.tar.gz", hash = "sha256:267f6563751877d772019b13aacbe4e860d73fe8f651f28112e9ac37de7513ae"}, + {file = "pytest-8.1.1-py3-none-any.whl", hash = "sha256:2a8386cfc11fa9d2c50ee7b2a57e7d898ef90470a7a34c4b949ff59662bb78b7"}, + {file = "pytest-8.1.1.tar.gz", hash = "sha256:ac978141a75948948817d360297b7aae0fcb9d6ff6bc9ec6d514b85d5a65c044"}, ] [package.dependencies] colorama = {version = "*", markers = "sys_platform == \"win32\""} iniconfig = "*" packaging = "*" -pluggy = ">=1.3.0,<2.0" +pluggy = ">=1.4,<2.0" [package.extras] -testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] +testing = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] [[package]] name = "pytest-mock" @@ -396,13 +396,13 @@ dev = ["pre-commit", "pytest-asyncio", "tox"] [[package]] name = "python-dateutil" -version = "2.8.2" +version = "2.9.0.post0" description = "Extensions to the standard Python datetime module" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" files = [ - {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"}, - {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"}, + {file = "python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3"}, + {file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"}, ] [package.dependencies] @@ -421,28 +421,28 @@ files = [ [[package]] name = "ruff" -version = "0.2.2" +version = "0.3.3" description = "An extremely fast Python linter and code formatter, written in Rust." optional = false python-versions = ">=3.7" files = [ - {file = "ruff-0.2.2-py3-none-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:0a9efb032855ffb3c21f6405751d5e147b0c6b631e3ca3f6b20f917572b97eb6"}, - {file = "ruff-0.2.2-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:d450b7fbff85913f866a5384d8912710936e2b96da74541c82c1b458472ddb39"}, - {file = "ruff-0.2.2-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ecd46e3106850a5c26aee114e562c329f9a1fbe9e4821b008c4404f64ff9ce73"}, - {file = "ruff-0.2.2-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5e22676a5b875bd72acd3d11d5fa9075d3a5f53b877fe7b4793e4673499318ba"}, - {file = "ruff-0.2.2-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1695700d1e25a99d28f7a1636d85bafcc5030bba9d0578c0781ba1790dbcf51c"}, - {file = "ruff-0.2.2-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:b0c232af3d0bd8f521806223723456ffebf8e323bd1e4e82b0befb20ba18388e"}, - {file = "ruff-0.2.2-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f63d96494eeec2fc70d909393bcd76c69f35334cdbd9e20d089fb3f0640216ca"}, - {file = "ruff-0.2.2-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6a61ea0ff048e06de273b2e45bd72629f470f5da8f71daf09fe481278b175001"}, - {file = "ruff-0.2.2-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5e1439c8f407e4f356470e54cdecdca1bd5439a0673792dbe34a2b0a551a2fe3"}, - {file = "ruff-0.2.2-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:940de32dc8853eba0f67f7198b3e79bc6ba95c2edbfdfac2144c8235114d6726"}, - {file = "ruff-0.2.2-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:0c126da55c38dd917621552ab430213bdb3273bb10ddb67bc4b761989210eb6e"}, - {file = "ruff-0.2.2-py3-none-musllinux_1_2_i686.whl", hash = "sha256:3b65494f7e4bed2e74110dac1f0d17dc8e1f42faaa784e7c58a98e335ec83d7e"}, - {file = "ruff-0.2.2-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:1ec49be4fe6ddac0503833f3ed8930528e26d1e60ad35c2446da372d16651ce9"}, - {file = "ruff-0.2.2-py3-none-win32.whl", hash = "sha256:d920499b576f6c68295bc04e7b17b6544d9d05f196bb3aac4358792ef6f34325"}, - {file = "ruff-0.2.2-py3-none-win_amd64.whl", hash = "sha256:cc9a91ae137d687f43a44c900e5d95e9617cb37d4c989e462980ba27039d239d"}, - {file = "ruff-0.2.2-py3-none-win_arm64.whl", hash = "sha256:c9d15fc41e6054bfc7200478720570078f0b41c9ae4f010bcc16bd6f4d1aacdd"}, - {file = "ruff-0.2.2.tar.gz", hash = "sha256:e62ed7f36b3068a30ba39193a14274cd706bc486fad521276458022f7bccb31d"}, + {file = "ruff-0.3.3-py3-none-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:973a0e388b7bc2e9148c7f9be8b8c6ae7471b9be37e1cc732f8f44a6f6d7720d"}, + {file = "ruff-0.3.3-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:cfa60d23269d6e2031129b053fdb4e5a7b0637fc6c9c0586737b962b2f834493"}, + {file = "ruff-0.3.3-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1eca7ff7a47043cf6ce5c7f45f603b09121a7cc047447744b029d1b719278eb5"}, + {file = "ruff-0.3.3-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e7d3f6762217c1da954de24b4a1a70515630d29f71e268ec5000afe81377642d"}, + {file = "ruff-0.3.3-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b24c19e8598916d9c6f5a5437671f55ee93c212a2c4c569605dc3842b6820386"}, + {file = "ruff-0.3.3-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:5a6cbf216b69c7090f0fe4669501a27326c34e119068c1494f35aaf4cc683778"}, + {file = "ruff-0.3.3-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:352e95ead6964974b234e16ba8a66dad102ec7bf8ac064a23f95371d8b198aab"}, + {file = "ruff-0.3.3-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8d6ab88c81c4040a817aa432484e838aaddf8bfd7ca70e4e615482757acb64f8"}, + {file = "ruff-0.3.3-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:79bca3a03a759cc773fca69e0bdeac8abd1c13c31b798d5bb3c9da4a03144a9f"}, + {file = "ruff-0.3.3-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:2700a804d5336bcffe063fd789ca2c7b02b552d2e323a336700abb8ae9e6a3f8"}, + {file = "ruff-0.3.3-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:fd66469f1a18fdb9d32e22b79f486223052ddf057dc56dea0caaf1a47bdfaf4e"}, + {file = "ruff-0.3.3-py3-none-musllinux_1_2_i686.whl", hash = "sha256:45817af234605525cdf6317005923bf532514e1ea3d9270acf61ca2440691376"}, + {file = "ruff-0.3.3-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:0da458989ce0159555ef224d5b7c24d3d2e4bf4c300b85467b08c3261c6bc6a8"}, + {file = "ruff-0.3.3-py3-none-win32.whl", hash = "sha256:f2831ec6a580a97f1ea82ea1eda0401c3cdf512cf2045fa3c85e8ef109e87de0"}, + {file = "ruff-0.3.3-py3-none-win_amd64.whl", hash = "sha256:be90bcae57c24d9f9d023b12d627e958eb55f595428bafcb7fec0791ad25ddfc"}, + {file = "ruff-0.3.3-py3-none-win_arm64.whl", hash = "sha256:0171aab5fecdc54383993389710a3d1227f2da124d76a2784a7098e818f92d61"}, + {file = "ruff-0.3.3.tar.gz", hash = "sha256:38671be06f57a2f8aba957d9f701ea889aa5736be806f18c0cd03d6ff0cbca8d"}, ] [[package]] @@ -479,13 +479,13 @@ files = [ [[package]] name = "typing-extensions" -version = "4.9.0" +version = "4.10.0" description = "Backported and Experimental Type Hints for Python 3.8+" optional = false python-versions = ">=3.8" files = [ - {file = "typing_extensions-4.9.0-py3-none-any.whl", hash = "sha256:af72aea155e91adfc61c3ae9e0e342dbc0cba726d6cba4b6c72c1f34e47291cd"}, - {file = "typing_extensions-4.9.0.tar.gz", hash = "sha256:23478f88c37f27d76ac8aee6c905017a143b0b1b886c3c9f66bc2fd94f9f5783"}, + {file = "typing_extensions-4.10.0-py3-none-any.whl", hash = "sha256:69b1a937c3a517342112fb4c6df7e72fc39a38e7891a5730ed4985b5214b5475"}, + {file = "typing_extensions-4.10.0.tar.gz", hash = "sha256:b0abd7c89e8fb96f98db18d86106ff1d90ab692004eb746cf6eda2682f91b3cb"}, ] [[package]] diff --git a/pyproject.toml b/pyproject.toml index 955e1e0..8891394 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,10 +1,11 @@ [tool.poetry] name = "csv-batcher" -version = "0.1.3" +version = "0.1.4" description = "A python utility to split a large CSV into smaller ones, and uses multiprocessing to process the CSVs in parallel." authors = ["Steven Miersfinally
block"] readme = "README.md" license = "The Unlicense" + [tool.poetry.dependencies] python = "^3.12.2" pandas = "^2.2.0"