From df7cc803e989b7511cc47efa203dbc0ba7b12120 Mon Sep 17 00:00:00 2001 From: Dave Date: Sun, 21 Aug 2022 17:37:42 -0400 Subject: [PATCH] display ID registration prep, formatter registration fixes, index updates, dtype handlers, expanded data generators (#16) * add random dataframe generator for convenience * add logging for renderables * more callout options; set display ID * update black/flake8 configs * version bump; add pyarrow * register display ID; update black configs * caffeine fever dream that needs a lot of cleanup later once prototyping is done * add success! * logging/callout updates * callout update * flatten multiindex series better; pass metadata in ipydisplay * add settings.LOG_LEVEL; fix stringify columns * version bump; add helper for generating random dataframe * rename "default" display mode to "plain" * add More Info user help section after truncating dataframe * ensure temporary dx.display calls revert properly * hotfix * remove old code * remove cell_id tracking * add poetry.toml * add loggers * fix bug with .sample() resetting index * cleanup * don't mess with the original dataframe object during registration * remove structlog; update loggers * SQL instead of parquet/pyarrow * more attempts at fixing column dtype wrangling * move filtering out of main formatter * check for dataframe subset associations before display ID registration * push-down filter override for max display rows * fix set_option reference * add comms config * add geopandas as extra install * move utils out of formatters dir * handle circular imports; add subset filter tracking; extra logging * separate sampling from utils * remove column handling from sampling * add convenience function for adding renderable * fix reference for testing * handle missing key * fix ref * disable re-rendering for updates; pass applied filters to frontend * pull filters from comm msg * fix subset filter tracking; update logging * update to default random sampling * allow passing ipython shell for registering/getting display IDs * pass new display id into sampling if needed * update applied_filters assignment * add settings_context; switch renderables to Set instead of List * move sampling tests to their own file * add renderables test * remove truncating/sampling tests, fix custom index test * assign unique name for unassigned variable tracking * check setting before index resets * flake8 * adjust pandas options for display/schema changes * add structlog back in * pull display ID registration out of get_display_id(); update logging * update display mode with settings_context; add log level changes * pass index flag to dataframe_info metadata; update logging * updates for logging * use settings_context; don't display callout with unassigned dataframe; parse df_name for .query() * use update_display for user query * remove hyphens from df uuid to enable sqlite filtering * add top margin * update logging; handle display ID register after rendering; update user query callout * fix index and column stringifcation for multiindexing * more docstrings * clean up settings * add pandas option transfer on row/column validation * add flatten_index; handle index/column flattening behind settings * add docstring * don't reset multiindex level names * more explicit multiindex handling * add media type prefixes back to settings so env vars don't overwrite them all * check for custom index before normalizing * ugh multiindex. * remove import * fix log message * update .gitignore * break apart utils/helpers into more readable structure to handle new datatype generation, testing, and cleaning * remove helpers * remove function imports * store datetime string format * move geopandas check out of config * add faker as extra * remove comment * handle flattening/expanding lists/sets/tuples * handle extra dtype cleaning * fix for mixed dtypes * updates for dtype generation and testing * change log time format * check default index earlier; don't generate hash over and over * add ENABLE_DATALINK setting to toggle off all the tracking/hashing/etc * remove get_applied_filters * separate cleaning functions between build_table_schema/hash_pandas_object/store_in_sqlite * fix cleaning; remove extra hashing calls; update docstrings * add geopandas and faker dev dependencies for dtype testing; add isort dev dep * make sure we can toggle datalink setting on/off without errors * generate display ID if not passed (datalink enabled) * clean columns before rendering with datalink disabled * update random_dataframe columns and testing * verbose unit testing in github workflow * handle no args, test for default data * enable datalink setting by default * use settings context * trigger html.table_schema pandas changes on settings changes * remove config.py * refactor display formatter registration * remove configs * remove configs * fix settings tests * fix registering tests * refactor dx media type formatter registration; remove configs * updates for testing to remove redundant mediatype nesting * more debug logging * remove flatten_index and fix index/column flattening logic * comms config behind datalink setting * bump up log level, disable datalink, be done with this PR * ugh patch this when datalink is disabled * disable logging auto-config so it doesn't start showing other loggers * fix log message * don't update other loggers levels to INFO * turn logging back on * changelog * this needs more work with the new display formatter registration --- .github/workflows/unit-tests.yaml | 2 +- .gitignore | 2 + CHANGELOG.md | 27 + dx/__init__.py | 8 +- dx/comms.py | 37 ++ dx/config.py | 33 -- dx/dx.py | 8 +- dx/filtering.py | 88 +++ dx/formatters/callouts.py | 22 +- dx/formatters/dataresource.py | 208 +++++-- dx/formatters/dx.py | 199 +++++-- dx/formatters/main.py | 20 +- dx/helpers.py | 13 - dx/loggers.py | 93 ++++ dx/{formatters/utils.py => sampling.py} | 121 +--- dx/settings.py | 123 ++++- dx/tests/test_dataresource.py | 34 +- dx/tests/test_datatypes.py | 165 ++++++ dx/tests/test_dx.py | 30 +- dx/tests/test_registering.py | 28 +- dx/tests/test_sampling.py | 83 +++ dx/tests/test_settings.py | 37 +- dx/tests/test_utils.py | 100 +--- dx/utils/__init__.py | 5 + dx/utils/datatypes.py | 278 ++++++++++ dx/utils/date_time.py | 70 +++ dx/utils/formatting.py | 185 +++++++ dx/utils/geometry.py | 93 ++++ dx/utils/tracking.py | 156 ++++++ poetry.lock | 698 +++++++++++++++++++----- poetry.toml | 3 + pyproject.toml | 15 +- 32 files changed, 2439 insertions(+), 545 deletions(-) create mode 100644 dx/comms.py delete mode 100644 dx/config.py create mode 100644 dx/filtering.py delete mode 100644 dx/helpers.py create mode 100644 dx/loggers.py rename dx/{formatters/utils.py => sampling.py} (72%) create mode 100644 dx/tests/test_datatypes.py create mode 100644 dx/tests/test_sampling.py create mode 100644 dx/utils/__init__.py create mode 100644 dx/utils/datatypes.py create mode 100644 dx/utils/date_time.py create mode 100644 dx/utils/formatting.py create mode 100644 dx/utils/geometry.py create mode 100644 dx/utils/tracking.py create mode 100644 poetry.toml diff --git a/.github/workflows/unit-tests.yaml b/.github/workflows/unit-tests.yaml index 0148aeef..3271a994 100644 --- a/.github/workflows/unit-tests.yaml +++ b/.github/workflows/unit-tests.yaml @@ -28,4 +28,4 @@ jobs: poetry install - name: Pytest - Unit tests run: | - poetry run pytest dx/tests -x + poetry run pytest dx/tests -xv diff --git a/.gitignore b/.gitignore index 2930ab24..4b484a2f 100644 --- a/.gitignore +++ b/.gitignore @@ -4,4 +4,6 @@ __pycache__/ dist/ .pytest_cache +.python-versions +.venv .vscode \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index 58eb1cfb..c9de21bb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,33 @@ All notable changes will be documented here. --- +## Unreleased +_2022-08-21_ + +### Added +- `pd.Series` as a default renderable type (to go with the existing `pd.DataFrame` and `np.ndarray` types) +- Support for the following data types inside `pd.DataFrame` columns: + - `type` and `np.dtype` + - `shapely.geometry` objects + - `pd.Timedelta` and `datetime.timedelta` + - `pd.Period` + - `pd.Interval` + - `complex` numbers + - `ipaddress.IPv4Address` and `.IPv6Address` +- Extra dataset generation functions for development/testing under `dx.utils.datatypes` + - `dx.quick_random_dataframe(n_rows, n_columns)` to get a `pd.DataFrame` of 0.0-1.0 floats (convenience wrapper for `pd.DataFrame(np.random.rand(n_rows, n_columns))`) + - `dx.random_dataframe()` with different boolean values to enable based on available datatypes (`dx.DX_DATATYPES`) +- `settings_context` context manager to allow temporarily changing a setting (or multiple) +- Logging via `structlog` (default level: `logging.WARNING`) + +### Changed +- Default sampling method changed from `outer` to `random` + +### Fixed +- Displaying a dataframe with an out-of-order index (like with `.sample()`) no longer resets the index before sending data to the frontend. +- Index/column flattening and string-formatting is behind settings and is handled more explicitly +- `dx` should no longer interfere with other media type / mime bundles (e.g. matplotlib) formatted by the existing IPython display formatter + ## `1.1.3` _2022-08-05_ ### Added diff --git a/dx/__init__.py b/dx/__init__.py index 7c9b2a99..342b7ecd 100644 --- a/dx/__init__.py +++ b/dx/__init__.py @@ -1,9 +1,11 @@ -from .config import * +from .comms import * from .dx import * from .formatters import * -from .helpers import * +from .loggers import * from .settings import * +from .utils import * -__version__ = "1.1.3" +__version__ = "1.2.0" +configure_logging() set_display_mode("simple") diff --git a/dx/comms.py b/dx/comms.py new file mode 100644 index 00000000..02f3a508 --- /dev/null +++ b/dx/comms.py @@ -0,0 +1,37 @@ +import structlog +from IPython import get_ipython + +from dx.settings import get_settings + +settings = get_settings() +logger = structlog.get_logger(__name__) + + +# ref: https://jupyter-notebook.readthedocs.io/en/stable/comms.html#opening-a-comm-from-the-frontend +def target_func(comm, open_msg): + @comm.on_msg + def _recv(msg): + from dx.filtering import update_display_id + + data = msg["content"]["data"] + if "display_id" in data: + update_display_id( + display_id=data["display_id"], + pandas_filter=data.get("pandas_filter"), + sql_filter=data.get("sql_filter"), + filters=data.get("filters"), + output_variable_name=data.get("output_variable_name"), + limit=data["limit"], + ) + + comm.send({"connected": True}) + + +ipython_shell = get_ipython() +if ( + ipython_shell is not None + and getattr(ipython_shell, "kernel", None) + and settings.ENABLE_DATALINK +): + COMM_MANAGER = ipython_shell.kernel.comm_manager + COMM_MANAGER.register_target("datalink", target_func) diff --git a/dx/config.py b/dx/config.py deleted file mode 100644 index 9ee592dd..00000000 --- a/dx/config.py +++ /dev/null @@ -1,33 +0,0 @@ -import os - -from IPython import get_ipython -from IPython.core.formatters import DisplayFormatter - -IN_IPYTHON_ENV = get_ipython() is not None - -DEFAULT_IPYTHON_DISPLAY_FORMATTER = DisplayFormatter() -if IN_IPYTHON_ENV: - DEFAULT_IPYTHON_DISPLAY_FORMATTER = get_ipython().display_formatter - -# we don't want to require geopandas as a hard dependency -try: - import geopandas as gpd - - GEOPANDAS_INSTALLED = True -except ImportError: - GEOPANDAS_INSTALLED = False - - -def in_noteable_env() -> bool: - """ - Check if we are running in a Noteable environment. - - FUTURE: this will be used to determine whether IPython formatters - are automatically updated. - """ - return "NTBL_USER_ID" in os.environ - - -def in_nteract_env() -> bool: - # TODO: handle this? - return False diff --git a/dx/dx.py b/dx/dx.py index 0fc6e3bc..1349cdcf 100644 --- a/dx/dx.py +++ b/dx/dx.py @@ -5,7 +5,7 @@ from IPython.core.interactiveshell import InteractiveShell from IPython.display import display as ipydisplay -from dx.settings import set_display_mode, settings +from dx.settings import settings_context from dx.types import DXDisplayMode @@ -29,11 +29,9 @@ def display( raise ValueError(f"Unsupported file type: `{path.suffix}`") df = pd.DataFrame(data) + with settings_context(display_mode=mode, ipython_shell=ipython_shell): + ipydisplay(df) - orig_mode = settings.DISPLAY_MODE.value - set_display_mode(mode, ipython_shell=ipython_shell) - ipydisplay(df) - set_display_mode(orig_mode, ipython_shell=ipython_shell) return diff --git a/dx/filtering.py b/dx/filtering.py new file mode 100644 index 00000000..9ab6c1b1 --- /dev/null +++ b/dx/filtering.py @@ -0,0 +1,88 @@ +from typing import Optional + +import pandas as pd +import structlog +from IPython.display import update_display + +from dx.formatters.callouts import display_callout +from dx.settings import get_settings, settings_context +from dx.utils.formatting import expand_sequences +from dx.utils.tracking import ( + DATAFRAME_HASH_TO_VAR_NAME, + DISPLAY_ID_TO_DATAFRAME_HASH, + SUBSET_TO_DATAFRAME_HASH, + generate_df_hash, +) + +logger = structlog.get_logger(__name__) + +settings = get_settings() + + +SUBSET_FILTERS = {} + + +def update_display_id( + display_id: str, + sql_filter: str, + pandas_filter: Optional[str] = None, + filters: Optional[dict] = None, + output_variable_name: Optional[str] = None, + limit: Optional[int] = None, +) -> None: + """ + Filters the dataframe in the cell with the given display_id. + """ + from dx.utils.tracking import sql_engine + + global SUBSET_FILTERS + + row_limit = limit or settings.DISPLAY_MAX_ROWS + df_hash = DISPLAY_ID_TO_DATAFRAME_HASH[display_id] + df_name = DATAFRAME_HASH_TO_VAR_NAME[df_hash] + table_name = f"{df_name}__{df_hash}" + + query_string = sql_filter.format(table_name=table_name) + logger.debug(f"sql query string: {query_string}") + new_df = pd.read_sql(query_string, sql_engine) + + # in the event there were nested values stored, + # try to expand them back to their original datatypes + for col in new_df.columns: + new_df[col] = new_df[col].apply(expand_sequences) + + # this is associating the subset with the original dataframe, + # which will be checked when the DisplayFormatter.format() is called + # during update_display(), which will prevent re-registering the display ID to the subset + new_df_hash = generate_df_hash(new_df) + + # store filters to be passed through metadata to the frontend + logger.debug(f"applying {filters=}") + filters = filters or [] + SUBSET_FILTERS[new_df_hash] = filters + + logger.debug(f"assigning subset {new_df_hash} to parent {df_hash=}") + SUBSET_TO_DATAFRAME_HASH[new_df_hash] = df_hash + + # allow temporary override of the display limit + with settings_context(DISPLAY_MAX_ROWS=row_limit): + logger.debug(f"updating {display_id=} with {min(row_limit, len(new_df))}-row resample") + update_display(new_df, display_id=display_id) + + # we can't reference a variable type to suggest to users to perform a `df.query()` + # type operation since it was never declared in the first place + if not df_name.startswith("unk_dataframe_"): + # TODO: replace with custom callout media type + output_variable_name = output_variable_name or "new_df" + filter_code = f"""{output_variable_name} = {df_name}.query("{pandas_filter.format(df_name=df_name)}", engine="python")""" + filter_msg = f"""Copy the following snippet into a cell below to save this subset to a new dataframe: +
{filter_code}
+ """ + display_callout( + filter_msg, + header=False, + icon="info", + level="success", + display_id=display_id + "-primary", + update=True, + ) diff --git a/dx/formatters/callouts.py b/dx/formatters/callouts.py index 4293b72e..5f44777d 100644 --- a/dx/formatters/callouts.py +++ b/dx/formatters/callouts.py @@ -2,7 +2,7 @@ import uuid from typing import Optional -from IPython.display import HTML, display +from IPython.display import HTML, display, update_display from pydantic import BaseModel @@ -37,11 +37,18 @@ def html(self): callout_classes.append(f"bp3-icon-{self.icon.value}-sign") callout_class_str = " ".join(callout_classes) + msg = self.message if self.use_header: heading_html = f"
{self.level.value.title()}
" - return f"""
{heading_html}{self.message}
""" + msg = f"{heading_html}{self.message}" - return f"""
{self.message}
""" + style = ";".join( + [ + "margin-bottom: 0.5rem", + "margin-top: 0.5rem", + ] + ) + return f"""
{msg}
""" def display_callout( @@ -50,6 +57,7 @@ def display_callout( header: bool = True, icon: Optional[CalloutIcon] = None, display_id: str = None, + update: bool = False, ) -> None: callout = Callout( message=message, @@ -61,4 +69,10 @@ def display_callout( # TODO: coordinate with frontend to replace this with a standalone media type # instead of rendering HTML with custom classes/styles - display(HTML(callout.html), display_id=display_id) + if update: + update_display(HTML(callout.html), display_id=display_id) + else: + display( + HTML(callout.html), + display_id=display_id, + ) diff --git a/dx/formatters/dataresource.py b/dx/formatters/dataresource.py index b3e645ce..a0079269 100644 --- a/dx/formatters/dataresource.py +++ b/dx/formatters/dataresource.py @@ -1,19 +1,28 @@ import uuid from functools import lru_cache -from typing import List, Optional +from typing import Optional, Set import numpy as np import pandas as pd +import structlog from IPython import get_ipython -from IPython.core.formatters import DisplayFormatter +from IPython.core.formatters import BaseFormatter from IPython.core.interactiveshell import InteractiveShell -from IPython.display import display as ipydisplay from pandas.io.json import build_table_schema from pydantic import BaseSettings, Field -from dx.config import DEFAULT_IPYTHON_DISPLAY_FORMATTER, IN_IPYTHON_ENV -from dx.formatters.utils import normalize_index_and_columns, truncate_and_describe +from dx.filtering import SUBSET_FILTERS +from dx.sampling import sample_and_describe from dx.settings import settings +from dx.utils.datatypes import to_dataframe +from dx.utils.formatting import is_default_index, normalize_index_and_columns +from dx.utils.tracking import ( + SUBSET_TO_DATAFRAME_HASH, + generate_df_hash, + get_display_id, + register_display_id, + store_in_sqlite, +) class DataResourceSettings(BaseSettings): @@ -22,10 +31,16 @@ class DataResourceSettings(BaseSettings): DATARESOURCE_DISPLAY_MAX_COLUMNS: int = 50 DATARESOURCE_HTML_TABLE_SCHEMA: bool = Field(True, allow_mutation=False) DATARESOURCE_MEDIA_TYPE: str = Field("application/vnd.dataresource+json", allow_mutation=False) - DATARESOURCE_RENDERABLE_OBJECTS: List[type] = [pd.DataFrame, np.ndarray] + DATARESOURCE_RENDERABLE_OBJECTS: Set[type] = {pd.Series, pd.DataFrame, np.ndarray} + + DATARESOURCE_FLATTEN_INDEX_VALUES: bool = False + DATARESOURCE_FLATTEN_COLUMN_VALUES: bool = True + DATARESOURCE_STRINGIFY_INDEX_VALUES: bool = True + DATARESOURCE_STRINGIFY_COLUMN_VALUES: bool = True class Config: validate_assignment = True # we need this to enforce `allow_mutation` + json_encoders = {type: lambda t: str(t)} @lru_cache @@ -35,63 +50,120 @@ def get_dataresource_settings(): dataresource_settings = get_dataresource_settings() - -class DXDataResourceDisplayFormatter(DisplayFormatter): - def format(self, obj, **kwargs): - - if isinstance(obj, tuple(settings.RENDERABLE_OBJECTS)): - display_id = str(uuid.uuid4()) - df_obj = pd.DataFrame(obj) - payload, metadata = format_dataresource(df_obj, display_id) - # TODO: determine if/how we can pass payload/metadata with - # display_id for the frontend to pick up properly - return ({}, {}) - - return DEFAULT_IPYTHON_DISPLAY_FORMATTER.format(obj, **kwargs) - - -def generate_dataresource_body(df: pd.DataFrame, display_id: Optional[str] = None) -> tuple: +logger = structlog.get_logger(__name__) + + +def handle_dataresource_format(obj): + logger.debug(f"*** handling dataresource format for {type(obj)=} ***") + if not isinstance(obj, pd.DataFrame): + obj = to_dataframe(obj) + + default_index_used = is_default_index(obj.index) + obj = normalize_index_and_columns(obj) + + if not settings.ENABLE_DATALINK: + payload, metadata = format_dataresource( + obj, + has_default_index=default_index_used, + ) + return payload, metadata + + obj_hash = generate_df_hash(obj) + update_existing_display = obj_hash in SUBSET_TO_DATAFRAME_HASH + applied_filters = SUBSET_FILTERS.get(obj_hash) + display_id = get_display_id(obj_hash) + sqlite_df_table = register_display_id( + obj, + display_id=display_id, + df_hash=obj_hash, + is_subset=update_existing_display, + ) + + payload, metadata = format_dataresource( + obj, + update=update_existing_display, + display_id=display_id, + filters=applied_filters, + has_default_index=default_index_used, + ) + + # this needs to happen after sending to the frontend + # so the user doesn't wait as long for writing larger datasets + store_in_sqlite(sqlite_df_table, obj) + return payload, metadata + + +class DXDataResourceDisplayFormatter(BaseFormatter): + print_method = "_repr_data_resource_" + _return_type = (dict,) + + +def generate_dataresource_body( + df: pd.DataFrame, + display_id: Optional[str] = None, +) -> tuple: """ Transforms the dataframe to a payload dictionary containing the table schema and column values as arrays. """ - payload_body = { + payload = { "schema": build_table_schema(df), "data": df.reset_index().to_dict("records"), - "datalink": {}, + "datalink": {"display_id": display_id}, } - payload = {dataresource_settings.DATARESOURCE_MEDIA_TYPE: payload_body} - metadata_body = { + metadata = { "datalink": { "dataframe_info": {}, "dx_settings": settings.json(exclude={"RENDERABLE_OBJECTS": True}), + "applied_filters": [], + "display_id": display_id, }, + "display_id": display_id, } - metadata = {dataresource_settings.DATARESOURCE_MEDIA_TYPE: metadata_body} - - display_id = display_id or str(uuid.uuid4()) - payload_body["datalink"]["display_id"] = display_id - metadata_body["datalink"]["display_id"] = display_id - return (payload, metadata) -def format_dataresource(df, display_id) -> tuple: - # enable 0-n row counts for frontend - df = normalize_index_and_columns(df) - df, dataframe_info = truncate_and_describe(df) - payload, metadata = generate_dataresource_body(df, display_id) - metadata[dataresource_settings.DATARESOURCE_MEDIA_TYPE]["datalink"][ - "dataframe_info" - ] = dataframe_info - - # don't pass a dataframe in here, otherwise you'll get recursion errors - with pd.option_context( - "html.table_schema", dataresource_settings.DATARESOURCE_HTML_TABLE_SCHEMA - ): - ipydisplay(payload, raw=True, metadata=metadata, display_id=display_id) - +def format_dataresource( + df: pd.DataFrame, + update: bool = False, + display_id: Optional[str] = None, + filters: Optional[list] = None, + has_default_index: bool = True, +) -> tuple: + display_id = display_id or str(uuid.uuid4()) + df, dataframe_info = sample_and_describe(df, display_id=display_id) + dataframe_info["default_index_used"] = has_default_index + payload, metadata = generate_dataresource_body(df, display_id=display_id) + metadata["datalink"].update( + { + "dataframe_info": dataframe_info, + "applied_filters": filters, + } + ) + + # TODO: figure out a way to mimic this behavior since it was helpful + # having a display handle that we could update in place, + # but that went through as a display_data message, instead of execute_result + # and we can't do it with BaseFormatter, otherwise we'll double-render + # with pd.option_context( + # "html.table_schema", dataresource_settings.DATARESOURCE_HTML_TABLE_SCHEMA + # ): + # logger.debug(f"displaying dataresource payload in {display_id=}") + # ipydisplay( + # payload, + # raw=True, + # metadata=metadata, + # display_id=display_id, + # update=update, + # ) + + # # temporary placeholder for copy/paste user messaging + # ipydisplay( + # HTML("
"), + # display_id=display_id + "-primary", + # update=update, + # ) return (payload, metadata) @@ -101,19 +173,43 @@ def deregister(ipython_shell: Optional[InteractiveShell] = None) -> None: display formatter, used for simpleTable / "classic DEX" outputs and updates global dx & pandas settings with dataresource settings. """ - if not IN_IPYTHON_ENV and ipython_shell is None: + from dx.formatters.dx import get_dx_settings + + if get_ipython() is None and ipython_shell is None: return global settings settings.DISPLAY_MODE = "simple" - settings.DISPLAY_MAX_COLUMNS = dataresource_settings.DATARESOURCE_DISPLAY_MAX_COLUMNS - settings.DISPLAY_MAX_ROWS = dataresource_settings.DATARESOURCE_DISPLAY_MAX_ROWS - settings.MEDIA_TYPE = dataresource_settings.DATARESOURCE_MEDIA_TYPE - settings.RENDERABLE_OBJECTS = dataresource_settings.DATARESOURCE_RENDERABLE_OBJECTS - - pd.set_option("display.max_columns", dataresource_settings.DATARESOURCE_DISPLAY_MAX_COLUMNS) - pd.set_option("display.max_rows", dataresource_settings.DATARESOURCE_DISPLAY_MAX_ROWS) + settings_to_apply = { + "DISPLAY_MAX_COLUMNS", + "DISPLAY_MAX_ROWS", + "HTML_TABLE_SCHEMA", + "MEDIA_TYPE", + "RENDERABLE_OBJECTS", + "FLATTEN_INDEX_VALUES", + "FLATTEN_COLUMN_VALUES", + "STRINGIFY_INDEX_VALUES", + "STRINGIFY_COLUMN_VALUES", + } + for setting in settings_to_apply: + val = getattr(dataresource_settings, f"DATARESOURCE_{setting}", None) + setattr(settings, setting, val) ipython = ipython_shell or get_ipython() - ipython.display_formatter = DXDataResourceDisplayFormatter() + + # https://github.com/pandas-dev/pandas/blob/ad190575aa75962d2d0eade2de81a5fe5a2e285b/pandas/io/formats/printing.py#L244 + # https://github.com/pandas-dev/pandas/blob/926b9ceff10d9b7a957811f0a4de3167332196de/pandas/io/formats/printing.py?q=_repr_data_resource_#L268 + # https://ipython.readthedocs.io/en/stable/config/integrating.html#formatters-for-third-party-types + # https://ipython.readthedocs.io/en/stable/api/generated/IPython.display.html#:~:text=plain.for_type(int%2C%20int_formatter) + formatters = ipython.display_formatter.formatters + mimetype = dataresource_settings.DATARESOURCE_MEDIA_TYPE + + formatters[mimetype] = DXDataResourceDisplayFormatter() + for obj in settings.RENDERABLE_OBJECTS: + formatters[mimetype].for_type(obj, handle_dataresource_format) + formatters[mimetype].enabled = True + + for other_media_type in [get_dx_settings().DX_MEDIA_TYPE]: + if other_media_type in formatters: + del formatters[other_media_type] diff --git a/dx/formatters/dx.py b/dx/formatters/dx.py index 91992392..933a9695 100644 --- a/dx/formatters/dx.py +++ b/dx/formatters/dx.py @@ -1,19 +1,28 @@ import uuid from functools import lru_cache -from typing import List, Optional +from typing import Optional, Set import numpy as np import pandas as pd +import structlog from IPython import get_ipython -from IPython.core.formatters import DisplayFormatter +from IPython.core.formatters import BaseFormatter from IPython.core.interactiveshell import InteractiveShell -from IPython.display import display as ipydisplay from pandas.io.json import build_table_schema from pydantic import BaseSettings, Field -from dx.config import DEFAULT_IPYTHON_DISPLAY_FORMATTER, IN_IPYTHON_ENV -from dx.formatters.utils import normalize_index_and_columns, truncate_and_describe +from dx.filtering import SUBSET_FILTERS +from dx.sampling import sample_and_describe from dx.settings import settings +from dx.utils.datatypes import to_dataframe +from dx.utils.formatting import is_default_index, normalize_index_and_columns +from dx.utils.tracking import ( + SUBSET_TO_DATAFRAME_HASH, + generate_df_hash, + get_display_id, + register_display_id, + store_in_sqlite, +) class DXSettings(BaseSettings): @@ -21,7 +30,12 @@ class DXSettings(BaseSettings): DX_DISPLAY_MAX_COLUMNS: int = 50 DX_HTML_TABLE_SCHEMA: bool = Field(True, allow_mutation=False) DX_MEDIA_TYPE: str = Field("application/vnd.dex.v1+json", allow_mutation=False) - DX_RENDERABLE_OBJECTS: List[type] = [pd.DataFrame, np.ndarray] + DX_RENDERABLE_OBJECTS: Set[type] = {pd.Series, pd.DataFrame, np.ndarray} + + DX_FLATTEN_INDEX_VALUES: bool = False + DX_FLATTEN_COLUMN_VALUES: bool = True + DX_STRINGIFY_INDEX_VALUES: bool = True + DX_STRINGIFY_COLUMN_VALUES: bool = True class Config: validate_assignment = True # we need this to enforce `allow_mutation` @@ -35,58 +49,118 @@ def get_dx_settings(): dx_settings = get_dx_settings() - -class DXDisplayFormatter(DisplayFormatter): - def format(self, obj, **kwargs): - - if isinstance(obj, tuple(settings.RENDERABLE_OBJECTS)): - display_id = str(uuid.uuid4()) - df_obj = pd.DataFrame(obj) - payload, metadata = format_dx(df_obj, display_id) - # TODO: determine if/how we can pass payload/metadata with - # display_id for the frontend to pick up properly - return ({}, {}) - - return DEFAULT_IPYTHON_DISPLAY_FORMATTER.format(obj, **kwargs) - - -def generate_dx_body(df: pd.DataFrame, display_id: Optional[str] = None) -> tuple: +logger = structlog.get_logger(__name__) + + +def handle_dx_format(obj): + if not isinstance(obj, pd.DataFrame): + obj = to_dataframe(obj) + + default_index_used = is_default_index(obj.index) + obj = normalize_index_and_columns(obj) + + if not settings.ENABLE_DATALINK: + payload, metadata = format_dx( + obj, + has_default_index=default_index_used, + ) + return payload, metadata + + obj_hash = generate_df_hash(obj) + update_existing_display = obj_hash in SUBSET_TO_DATAFRAME_HASH + applied_filters = SUBSET_FILTERS.get(obj_hash) + display_id = get_display_id(obj_hash) + sqlite_df_table = register_display_id( + obj, + display_id=display_id, + df_hash=obj_hash, + is_subset=update_existing_display, + ) + + payload, metadata = format_dx( + obj.copy(), + update=update_existing_display, + display_id=display_id, + filters=applied_filters, + has_default_index=default_index_used, + ) + + # this needs to happen after sending to the frontend + # so the user doesn't wait as long for writing larger datasets + store_in_sqlite(sqlite_df_table, obj) + return payload, metadata + + +class DXDisplayFormatter(BaseFormatter): + print_method = "_repr_data_resource_" + _return_type = (dict,) + + +def generate_dx_body( + df: pd.DataFrame, + display_id: Optional[str] = None, +) -> tuple: """ Transforms the dataframe to a payload dictionary containing the table schema and column values as arrays. """ # this will include the `df.index` by default (e.g. slicing/sampling) - payload_body = { + payload = { "schema": build_table_schema(df), "data": df.reset_index().transpose().values.tolist(), - "datalink": {}, + "datalink": {"display_id": display_id}, } - payload = {dx_settings.DX_MEDIA_TYPE: payload_body} - metadata_body = { + metadata = { "datalink": { "dataframe_info": {}, "dx_settings": settings.json(exclude={"RENDERABLE_OBJECTS": True}), + "applied_filters": [], + "display_id": display_id, }, + "display_id": display_id, } - metadata = {dx_settings.DX_MEDIA_TYPE: metadata_body} - - display_id = display_id or str(uuid.uuid4()) - payload_body["datalink"]["display_id"] = display_id - metadata_body["datalink"]["display_id"] = display_id - return (payload, metadata) -def format_dx(df, display_id) -> tuple: - df = normalize_index_and_columns(df) - df, dataframe_info = truncate_and_describe(df) - payload, metadata = generate_dx_body(df, display_id) - metadata[dx_settings.DX_MEDIA_TYPE]["datalink"]["dataframe_info"] = dataframe_info - - # don't pass a dataframe in here, otherwise you'll get recursion errors - with pd.option_context("html.table_schema", dx_settings.DX_HTML_TABLE_SCHEMA): - ipydisplay(payload, raw=True, metadata=metadata, display_id=display_id) +def format_dx( + df: pd.DataFrame, + update: bool = False, + display_id: Optional[str] = None, + filters: Optional[list] = None, + has_default_index: bool = True, +) -> tuple: + display_id = display_id or str(uuid.uuid4()) + df, dataframe_info = sample_and_describe(df, display_id=display_id) + dataframe_info["default_index_used"] = has_default_index + payload, metadata = generate_dx_body(df, display_id=display_id) + metadata["datalink"].update( + { + "dataframe_info": dataframe_info, + "applied_filters": filters, + } + ) + + # TODO: figure out a way to mimic this behavior since it was helpful + # having a display handle that we could update in place, + # but that went through as a display_data message, instead of execute_result + # and we can't do it with BaseFormatter, otherwise we'll double-render + # with pd.option_context("html.table_schema", dx_settings.DX_HTML_TABLE_SCHEMA): + # ipydisplay( + # payload, + # raw=True, + # metadata=metadata, + # display_id=display_id, + # update=update, + # ) + + # # temporary placeholder for copy/paste user messaging + # if settings.ENABLE_DATALINK: + # ipydisplay( + # HTML("
"), + # display_id=display_id + "-primary", + # update=update, + # ) return (payload, metadata) @@ -96,19 +170,44 @@ def register(ipython_shell: Optional[InteractiveShell] = None) -> None: Enables the DEX media type output display formatting and updates global dx & pandas settings with DX settings. """ - if not IN_IPYTHON_ENV and ipython_shell is None: + + from dx.formatters.dataresource import get_dataresource_settings + + if get_ipython() is None and ipython_shell is None: return global settings settings.DISPLAY_MODE = "enhanced" - settings.DISPLAY_MAX_COLUMNS = dx_settings.DX_DISPLAY_MAX_COLUMNS - settings.DISPLAY_MAX_ROWS = dx_settings.DX_DISPLAY_MAX_ROWS - settings.MEDIA_TYPE = dx_settings.DX_MEDIA_TYPE - settings.RENDERABLE_OBJECTS = dx_settings.DX_RENDERABLE_OBJECTS - - pd.set_option("display.max_columns", dx_settings.DX_DISPLAY_MAX_COLUMNS) - pd.set_option("display.max_rows", dx_settings.DX_DISPLAY_MAX_ROWS) + settings_to_apply = { + "DISPLAY_MAX_COLUMNS", + "DISPLAY_MAX_ROWS", + "HTML_TABLE_SCHEMA", + "MEDIA_TYPE", + "RENDERABLE_OBJECTS", + "FLATTEN_INDEX_VALUES", + "FLATTEN_COLUMN_VALUES", + "STRINGIFY_INDEX_VALUES", + "STRINGIFY_COLUMN_VALUES", + } + for setting in settings_to_apply: + val = getattr(dx_settings, f"DX_{setting}", None) + setattr(settings, setting, val) ipython = ipython_shell or get_ipython() - ipython.display_formatter = DXDisplayFormatter() + + # https://github.com/pandas-dev/pandas/blob/ad190575aa75962d2d0eade2de81a5fe5a2e285b/pandas/io/formats/printing.py#L244 + # https://github.com/pandas-dev/pandas/blob/926b9ceff10d9b7a957811f0a4de3167332196de/pandas/io/formats/printing.py?q=_repr_data_resource_#L268 + # https://ipython.readthedocs.io/en/stable/config/integrating.html#formatters-for-third-party-types + # https://ipython.readthedocs.io/en/stable/api/generated/IPython.display.html#:~:text=plain.for_type(int%2C%20int_formatter) + formatters = ipython.display_formatter.formatters + media_type = dx_settings.DX_MEDIA_TYPE + + formatters[media_type] = DXDisplayFormatter() + for obj in settings.RENDERABLE_OBJECTS: + formatters[media_type].for_type(obj, handle_dx_format) + formatters[media_type].enabled = True + + for other_media_type in [get_dataresource_settings().DATARESOURCE_MEDIA_TYPE]: + if other_media_type in formatters: + del formatters[other_media_type] diff --git a/dx/formatters/main.py b/dx/formatters/main.py index 4215a46a..951adab1 100644 --- a/dx/formatters/main.py +++ b/dx/formatters/main.py @@ -3,18 +3,21 @@ from typing import Optional import pandas as pd +import structlog from IPython import get_ipython from IPython.core.interactiveshell import InteractiveShell from pydantic import BaseSettings, Field -from dx.config import DEFAULT_IPYTHON_DISPLAY_FORMATTER, IN_IPYTHON_ENV +from dx.formatters.dataresource import get_dataresource_settings +from dx.formatters.dx import get_dx_settings from dx.settings import get_settings +logger = structlog.get_logger(__name__) settings = get_settings() -warnings.filterwarnings("ignore") - +dataresource_settings = get_dataresource_settings() +dx_settings = get_dx_settings() -DISPLAY_ID_TO_DATAFRAME = {} +warnings.filterwarnings("ignore") class PandasSettings(BaseSettings): @@ -41,7 +44,7 @@ def reset(ipython_shell: Optional[InteractiveShell] = None) -> None: Resets all nteract/Noteable options, reverting to the default pandas display options and IPython display formatter. """ - if not IN_IPYTHON_ENV and ipython_shell is None: + if get_ipython() is None and ipython_shell is None: return global settings @@ -55,4 +58,9 @@ def reset(ipython_shell: Optional[InteractiveShell] = None) -> None: pd.set_option("display.max_rows", pandas_settings.PANDAS_DISPLAY_MAX_ROWS) ipython = ipython_shell or get_ipython() - ipython.display_formatter = DEFAULT_IPYTHON_DISPLAY_FORMATTER + + formatters = ipython.display_formatter.formatters + + for media_type in [dataresource_settings.DATARESOURCE_MEDIA_TYPE, dx_settings.DX_MEDIA_TYPE]: + if media_type in formatters: + del formatters[media_type] diff --git a/dx/helpers.py b/dx/helpers.py deleted file mode 100644 index 3aff66a4..00000000 --- a/dx/helpers.py +++ /dev/null @@ -1,13 +0,0 @@ -import numpy as np -import pandas as pd - - -def random_dataframe( - num_rows: int = 5, - num_cols: int = 2, - dtype: str = "float", - factor: float = 1.0, -) -> pd.DataFrame: - data = np.random.rand(num_rows, num_cols) * factor - df = pd.DataFrame(data) - return df.astype(dtype, errors="ignore") diff --git a/dx/loggers.py b/dx/loggers.py new file mode 100644 index 00000000..7f478276 --- /dev/null +++ b/dx/loggers.py @@ -0,0 +1,93 @@ +import logging +import logging.config +import sys +from typing import Optional + +import structlog + +from dx.settings import settings + +# Timestamp format applied to both vanilla and structlog messages +timestamper = structlog.processors.TimeStamper(fmt=settings.DATETIME_STRING_FORMAT) + +# Pre-processing for Vanilla Log messages +pre_chain = [ + # Add extra attributes of LogRecord objects to the event dictionary + # so that values passed in the extra parameter of log methods pass + # through to log output. + structlog.stdlib.ExtraAdder(), +] + +# Pre-processing for Structlog messages +structlog.configure( + processors=[ + structlog.stdlib.PositionalArgumentsFormatter(), + structlog.processors.StackInfoRenderer(), + structlog.processors.format_exc_info, + structlog.stdlib.ProcessorFormatter.wrap_for_formatter, + ], + logger_factory=structlog.stdlib.LoggerFactory(), + wrapper_class=structlog.stdlib.BoundLogger, + cache_logger_on_first_use=True, +) + + +# List of processors to be applied after pre-processing both vanilla +# and structlog messages, but before a final processor that formats +# the logs into JSON format or colored terminal output. +shared_processors = [ + # log level / logger name, effects coloring in ConsoleRenderer(colors=True) + structlog.stdlib.add_log_level, + structlog.stdlib.add_logger_name, + # timestamp format + timestamper, + # To see all CallsiteParameterAdder options: + # https://www.structlog.org/en/stable/api.html?highlight=CallsiteParameterAdder#structlog.processors.CallsiteParameterAdder + # more options include module, pathname, process, process_name, thread, thread_name + structlog.processors.CallsiteParameterAdder( + { + structlog.processors.CallsiteParameter.FILENAME, + structlog.processors.CallsiteParameter.FUNC_NAME, + structlog.processors.CallsiteParameter.LINENO, + } + ), + # Any structlog.contextvars.bind_contextvars included in middleware/functions + structlog.contextvars.merge_contextvars, + # strip _record and _from_structlog keys from event dictionary + structlog.stdlib.ProcessorFormatter.remove_processors_meta, +] + + +def configure_logging(app_level: Optional[int] = None): + logging.config.dictConfig( + { + "version": 1, + "disable_existing_loggers": False, + "formatters": { + "color": { + "()": structlog.stdlib.ProcessorFormatter, + "processors": shared_processors + + [ + structlog.dev.ConsoleRenderer(colors=True), + ], + "foreign_pre_chain": pre_chain, + }, + }, + "handlers": { + "default": { + "class": "logging.StreamHandler", + "formatter": "color", + "stream": sys.stdout, + }, + }, + "loggers": { + "": { + "handlers": ["default"], + "level": "WARNING", + "propagate": True, + }, + }, + } + ) + # Example of setting one specific logger at a level lower than loggers config + logging.getLogger("dx").setLevel(app_level or settings.LOG_LEVEL) diff --git a/dx/formatters/utils.py b/dx/sampling.py similarity index 72% rename from dx/formatters/utils.py rename to dx/sampling.py index 25a2d3ef..042b1d04 100644 --- a/dx/formatters/utils.py +++ b/dx/sampling.py @@ -1,31 +1,26 @@ import sys -from typing import Tuple +from typing import Optional, Tuple import numpy as np import pandas as pd +import structlog -from dx.config import GEOPANDAS_INSTALLED from dx.formatters.callouts import display_callout from dx.settings import settings from dx.types import DXSamplingMethod +from dx.utils.formatting import human_readable_size +from dx.utils.tracking import get_display_id_for_df +logger = structlog.get_logger(__name__) -def human_readable_size(size_bytes: int) -> str: - size_str = "" - for unit in ["B", "KiB", "MiB", "GiB", "TiB"]: - if abs(size_bytes) < 1024.0: - size_str = f"{size_bytes:3.1f} {unit}" - break - size_bytes /= 1024.0 - return size_str - -def truncate_if_too_big(df: pd.DataFrame) -> pd.DataFrame: +def sample_if_too_big(df: pd.DataFrame, display_id: Optional[str] = None) -> pd.DataFrame: """ Reduces the size of a dataframe if it is too big, to help reduce the amount of data being sent to the frontend for non-default media types. """ + warnings = [] # check number of columns first, then trim rows if needed @@ -33,7 +28,7 @@ def truncate_if_too_big(df: pd.DataFrame) -> pd.DataFrame: df_too_wide = len(df.columns) > max_columns if df_too_wide: num_orig_columns = len(df.columns) - df = sample_columns(df, max_columns) + df = sample_columns(df, num_cols=max_columns) col_warning = f"""Dataframe has {num_orig_columns:,} column(s), which is more than {settings.DISPLAY_MAX_COLUMNS=}""" warnings.append(col_warning) @@ -43,7 +38,7 @@ def truncate_if_too_big(df: pd.DataFrame) -> pd.DataFrame: df_too_long = len(df) > max_rows if df_too_long: num_orig_rows = len(df) - df = sample_rows(df, max_rows) + df = sample_rows(df, num_rows=max_rows, display_id=display_id) row_warning = f"""Dataframe has {num_orig_rows:,} row(s), which is more than {settings.DISPLAY_MAX_ROWS=}""" warnings.append(row_warning) @@ -61,6 +56,7 @@ def truncate_if_too_big(df: pd.DataFrame) -> pd.DataFrame: size_warning = f"""Dataframe is {size_str}, which is more than {settings_size_str}""" warnings.append(size_warning) + # TODO: replace with custom callout media type if warnings: warning_html = "
".join(warnings) new_size_html = f"""A truncated version with {len(df):,} row(s) and @@ -99,7 +95,7 @@ def reduce_df(df: pd.DataFrame, orig_num_rows: int = 0) -> pd.DataFrame: return df num_current_rows = len(df) - num_rows_to_remove = int(num_current_rows * settings.TRUNCATION_FACTOR) + num_rows_to_remove = int(num_current_rows * settings.SAMPLING_FACTOR) num_truncated_rows = num_current_rows - num_rows_to_remove truncated_rows = sample_rows(df, num_truncated_rows) @@ -139,7 +135,7 @@ def sample_columns(df: pd.DataFrame, num_cols: int) -> pd.DataFrame: raise ValueError(f"Unknown sampling method: {sampling}") -def sample_rows(df: pd.DataFrame, num_rows: int) -> pd.DataFrame: +def sample_rows(df: pd.DataFrame, num_rows: int, display_id: Optional[str] = None) -> pd.DataFrame: """ Samples a dataframe to a specified number of rows based on Settings.SAMPLING_METHOD, or @@ -150,7 +146,7 @@ def sample_rows(df: pd.DataFrame, num_rows: int) -> pd.DataFrame: sampling = row_sampling if sampling == DXSamplingMethod.random: - return sample_random(df, num_rows) + return sample_random(df, num_rows, display_id=display_id) if sampling == DXSamplingMethod.first: return sample_first(df, num_rows) if sampling == DXSamplingMethod.last: @@ -183,14 +179,22 @@ def sample_last(df: pd.DataFrame, num: int) -> pd.DataFrame: return df.tail(num) -def sample_random(df: pd.DataFrame, num: int) -> pd.DataFrame: +def sample_random(df: pd.DataFrame, num: int, display_id: Optional[str] = None) -> pd.DataFrame: """ Samples a random selection of N rows based on the RANDOM_STATE seed. Example: sampling random 8 of 20 rows: [XX...XX.X..X...X.XX.] """ - return df.sample(num, random_state=settings.RANDOM_STATE) + if settings.ENABLE_DATALINK: + # TODO: use hash for seed instead? + display_id = display_id or get_display_id_for_df(df) + display_id_array = [ord(v) for v in str(display_id)] + random_state = np.random.RandomState(seed=display_id_array) + logger.debug(f"using random seed {random_state} from {display_id=}") + else: + random_state = settings.RANDOM_STATE + return df.sample(num, random_state=random_state) def sample_inner(df: pd.DataFrame, num: int) -> pd.DataFrame: @@ -220,29 +224,10 @@ def sample_outer(df: pd.DataFrame, num: int) -> pd.DataFrame: return pd.concat([start_rows, end_rows]) -def stringify_columns(df: pd.DataFrame) -> pd.DataFrame: - """ - Convert numeric columns to strings, or flatten - MultiIndex columns and convert to strings. - """ - cols = df.columns - - def stringify_multiindex(vals): - string_vals = [str(val) for val in vals if str(val)] - return ", ".join(string_vals) - - if isinstance(cols, pd.MultiIndex): - # .to_flat_index() would work if we didn't - # have to convert to strings here - cols = cols.map(stringify_multiindex) - else: - cols = cols.map(str) - - df.columns = cols - return df - - -def truncate_and_describe(df: pd.DataFrame) -> Tuple[pd.DataFrame, dict]: +def sample_and_describe( + df: pd.DataFrame, + display_id: Optional[str] = None, +) -> Tuple[pd.DataFrame, dict]: """ Reduces the size of the dataframe, if necessary, and generates a dictionary of shape/size information @@ -251,7 +236,7 @@ def truncate_and_describe(df: pd.DataFrame) -> Tuple[pd.DataFrame, dict]: num_orig_rows, num_orig_cols = df.shape orig_size_bytes = sys.getsizeof(df) - df = truncate_if_too_big(df) + df = sample_if_too_big(df, display_id=display_id) num_truncated_rows, num_truncated_cols = df.shape truncated_size_bytes = sys.getsizeof(df) @@ -265,53 +250,3 @@ def truncate_and_describe(df: pd.DataFrame) -> Tuple[pd.DataFrame, dict]: "truncated_num_cols": num_truncated_cols, } return df, dataframe_info - - -def is_default_index(index: pd.Index) -> bool: - """ - Returns True if the index values are 0-n, where n is the number of items in the series. - """ - index_vals = index.values.tolist() - default_index = pd.Index(list(range(len(index_vals)))) - index = pd.Index(index_vals) - return index.equals(default_index) - - -def normalize_index_and_columns(df: pd.DataFrame) -> pd.DataFrame: - """ - Any additional formatting that needs to happen to the index, - the columns, or the data itself should be done here. - """ - display_df = df.copy() - - # preserve 0-n row numbers for frontend - # if custom/MultiIndex is used - if not is_default_index(display_df.index): - display_df.reset_index(inplace=True) - - # temporary workaround for numeric column rendering errors with GRID - # https://noteables.slack.com/archives/C03CB8A4Z2L/p1658497348488939 - display_df = stringify_columns(display_df) - - # build_table_schema() doesn't like pd.NAs - display_df.fillna(np.nan, inplace=True) - - for column in display_df.columns: - display_df[column] = handle_geoseries(display_df[column]) - - return display_df - - -def handle_geoseries(col: pd.Series) -> pd.Series: - """ - Workaround to JSONify shapely geometries without - requiring shapely/geopandas dependency. - """ - if not GEOPANDAS_INSTALLED: - return col - - import geopandas as gpd - - if isinstance(col, gpd.GeoSeries): - col = col.to_json() - return col diff --git a/dx/settings.py b/dx/settings.py index 0480c4c1..fc9e000b 100644 --- a/dx/settings.py +++ b/dx/settings.py @@ -1,61 +1,97 @@ +import logging +from contextlib import contextmanager from functools import lru_cache -from typing import List, Optional +from typing import Optional, Set, Union -import numpy as np import pandas as pd +import structlog from IPython.core.interactiveshell import InteractiveShell +from pandas import set_option as pandas_set_option from pydantic import BaseSettings, validator from dx.types import DXDisplayMode, DXSamplingMethod MB = 1024 * 1024 +logger = structlog.get_logger(__name__) + class Settings(BaseSettings): - # "enhanced" (GRID) display mode + LOG_LEVEL = logging.WARNING + DISPLAY_MAX_ROWS: int = 60 DISPLAY_MAX_COLUMNS: int = 20 HTML_TABLE_SCHEMA: bool = False MEDIA_TYPE: str = "application/vnd.dataresource+json" MAX_RENDER_SIZE_BYTES: int = 100 * MB - RENDERABLE_OBJECTS: List[type] = [pd.DataFrame, np.ndarray] + RENDERABLE_OBJECTS: Set[type] = set() - # what percentage of the dataset to remove during each truncation + # what percentage of the dataset to remove during each sampling # in order to get large datasets under MAX_RENDER_SIZE_BYTES - TRUNCATION_FACTOR: float = 0.1 + SAMPLING_FACTOR: float = 0.1 DISPLAY_MODE: DXDisplayMode = DXDisplayMode.simple - SAMPLING_METHOD: DXSamplingMethod = DXSamplingMethod.outer - COLUMN_SAMPLING_METHOD: DXSamplingMethod = DXSamplingMethod.outer - ROW_SAMPLING_METHOD: DXSamplingMethod = DXSamplingMethod.outer + SAMPLING_METHOD: DXSamplingMethod = DXSamplingMethod.random + COLUMN_SAMPLING_METHOD: DXSamplingMethod = DXSamplingMethod.random + ROW_SAMPLING_METHOD: DXSamplingMethod = DXSamplingMethod.random # TODO: support more than just int type here # https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.sample.html RANDOM_STATE: int = 12_648_430 + RESET_INDEX_VALUES: bool = False + + FLATTEN_INDEX_VALUES: bool = False + FLATTEN_COLUMN_VALUES: bool = False + STRINGIFY_INDEX_VALUES: bool = False + STRINGIFY_COLUMN_VALUES: bool = False + + DATETIME_STRING_FORMAT: str = "%Y-%m-%dT%H:%M:%S.%f" + + # controls dataframe variable tracking, hashing, and storing in sqlite + ENABLE_DATALINK: bool = False + @validator("RENDERABLE_OBJECTS", pre=True, always=True) def validate_renderables(cls, vals): """Allow passing comma-separated strings or actual types.""" if isinstance(vals, str): vals = vals.replace(",", "").split() - if not isinstance(vals, list): - vals = [vals] + if not isinstance(vals, set): + vals = {vals} - valid_vals = [] + valid_vals = set() for val in vals: if isinstance(val, type): - valid_vals.append(val) + valid_vals.add(val) continue try: val_type = eval(str(val)) - valid_vals.append(val_type) + valid_vals.add(val_type) except Exception as e: - # TODO: add some logging here - pass + raise ValueError(f"can't evaluate {val} type as renderable object: {e}") return valid_vals + @validator("DISPLAY_MAX_COLUMNS", pre=True, always=True) + def validate_display_max_columns(cls, val): + if val < 0: + raise ValueError("DISPLAY_MAX_COLUMNS must be >= 0") + pd.set_option("display.max_columns", val) + return val + + @validator("DISPLAY_MAX_ROWS", pre=True, always=True) + def validate_display_max_rows(cls, val): + if val < 0: + raise ValueError("DISPLAY_MAX_ROWS must be >= 0") + pd.set_option("display.max_rows", val) + return val + + @validator("HTML_TABLE_SCHEMA", pre=True, always=True) + def validate_html_table_schema(cls, val): + pd.set_option("html.table_schema", val) + return val + class Config: validate_assignment = True @@ -96,6 +132,10 @@ def set_display_mode( raise ValueError(f"`{mode}` is not a supported display mode") +def set_log_level(level: int): + logging.getLogger("dx").setLevel(level) + + def set_option( key, value, @@ -104,14 +144,61 @@ def set_option( key = str(key).upper() global settings - if getattr(settings, key, None): + if key in vars(settings): setattr(settings, key, value) + # make sure pandas settings are updated as well for display sizes + pd_options = { + "DISPLAY_MAX_ROWS": "display.max_rows", + "DISPLAY_MAX_COLUMNS": "display.max_columns", + "HTML_TABLE_SCHEMA": "html.table_schema", + } + if key in pd_options: + logger.debug(f"setting pandas option {pd_options[key]} to {value}") + pandas_set_option(pd_options[key], value) + # this may be the most straightforward way to handle # IPython display formatter changes being done through # settings updates for now, but I don't like it being here if key == "DISPLAY_MODE": set_display_mode(value, ipython_shell=ipython_shell) + if key == "LOG_LEVEL": + set_log_level(value) + return - raise ValueError(f"{key} is not a valid setting") + raise ValueError(f"`{key}` is not a valid setting") + + +@contextmanager +def settings_context(ipython_shell: Optional[InteractiveShell] = None, **option_kwargs): + global settings + orig_settings = settings.dict() + option_kwargs = {str(k).upper(): v for k, v in option_kwargs.items()} + + # handle DISPLAY_MODE updates first since it can overwrite other settings + if display_mode := option_kwargs.pop("DISPLAY_MODE", None): + set_display_mode(display_mode, ipython_shell=ipython_shell) + + try: + for setting, value in option_kwargs.items(): + set_option(setting, value, ipython_shell=ipython_shell) + yield settings + finally: + for setting, value in orig_settings.items(): + set_option(setting, value, ipython_shell=ipython_shell) + + +def add_renderable_type(renderable_type: Union[type, list]): + """ + Convenience function to add a type (or list of types) + to the types that can be processed by the display formatter. + (settings.RENDERABLE_OBJECTS default: [pd.Series, pd.DataFrame, np.ndarray]) + """ + global settings + + if not isinstance(renderable_type, list): + renderable_type = [renderable_type] + + logger.debug(f"adding `{renderable_type}` to {settings.RENDERABLE_OBJECTS=}") + settings.RENDERABLE_OBJECTS.update(renderable_type) diff --git a/dx/tests/test_dataresource.py b/dx/tests/test_dataresource.py index a4de9307..2ae9279f 100644 --- a/dx/tests/test_dataresource.py +++ b/dx/tests/test_dataresource.py @@ -1,14 +1,16 @@ import uuid -from dx.formatters.dataresource import generate_dataresource_body, get_dataresource_settings - -dataresource_settings = get_dataresource_settings() +import pytest +from dx.formatters.dataresource import ( + format_dataresource, + generate_dataresource_body, + get_dataresource_settings, +) +from dx.settings import settings_context +from dx.utils.datatypes import quick_random_dataframe -def test_media_type(sample_dataframe): - display_id = str(uuid.uuid4()) - payload, _ = generate_dataresource_body(sample_dataframe, display_id) - assert dataresource_settings.DATARESOURCE_MEDIA_TYPE in payload +dataresource_settings = get_dataresource_settings() def test_data_structure(sample_dataframe): @@ -19,7 +21,7 @@ def test_data_structure(sample_dataframe): """ display_id = str(uuid.uuid4()) payload, _ = generate_dataresource_body(sample_dataframe, display_id) - data = payload[dataresource_settings.DATARESOURCE_MEDIA_TYPE]["data"] + data = payload["data"] assert isinstance(data, list) assert len(data) == 3 assert isinstance(data[0], dict) @@ -32,7 +34,7 @@ def test_data_list_order(sample_dataframe): """ display_id = str(uuid.uuid4()) payload, _ = generate_dataresource_body(sample_dataframe, display_id) - data = payload[dataresource_settings.DATARESOURCE_MEDIA_TYPE]["data"] + data = payload["data"] assert data[0] == {"col_1": "a", "col_2": "b", "col_3": "c", "index": 0} assert data[1] == {"col_1": "a", "col_2": "b", "col_3": "c", "index": 1} assert data[2] == {"col_1": "a", "col_2": "b", "col_3": "c", "index": 2} @@ -45,6 +47,16 @@ def test_fields_match_data_width(sample_dataframe): """ display_id = str(uuid.uuid4()) payload, _ = generate_dataresource_body(sample_dataframe, display_id) - data = payload[dataresource_settings.DATARESOURCE_MEDIA_TYPE]["data"] - fields = payload[dataresource_settings.DATARESOURCE_MEDIA_TYPE]["schema"]["fields"] + data = payload["data"] + fields = payload["schema"]["fields"] assert len(data[0]) == len(fields) + + +@pytest.mark.parametrize("enabled", [True, False]) +def test_datalink_toggle(enabled: bool): + df = quick_random_dataframe() + with settings_context(enable_datalink=enabled): + try: + format_dataresource(df) + except Exception as e: + assert False, f"failed with {e}" diff --git a/dx/tests/test_datatypes.py b/dx/tests/test_datatypes.py new file mode 100644 index 00000000..fc8f1207 --- /dev/null +++ b/dx/tests/test_datatypes.py @@ -0,0 +1,165 @@ +""" +Tests to ensure various data types can be sent functions to +- build the table schema and payload/metadata body for each display formatter +- hash the dataframe for tracking +- write to sqlite for tracking/filtering +""" + +import pandas as pd +import pytest +from pandas.io.json import build_table_schema +from pandas.util import hash_pandas_object + +from dx.formatters import dataresource, dx +from dx.utils.datatypes import ( + DX_DATATYPES, + SORTED_DX_DATATYPES, + quick_random_dataframe, + random_dataframe, +) +from dx.utils.tracking import generate_df_hash, sql_engine, store_in_sqlite + + +@pytest.mark.parametrize("dtype", SORTED_DX_DATATYPES) +def test_df_generator(dtype: str, num_rows: int = 5): + params = {dt: False for dt in SORTED_DX_DATATYPES} + params[dtype] = True + df = random_dataframe(num_rows=num_rows, **params) + assert len(df) == num_rows + assert isinstance(df[dtype], pd.Series) + assert df[dtype].notnull().all() + + +def test_random_dataframe_has_default_data(num_rows: int = 5): + df = random_dataframe(num_rows=num_rows) + assert len(df) == num_rows + default_enabled_columns = [column for column, enabled in DX_DATATYPES.items() if enabled] + assert len(df.columns) == len(default_enabled_columns) + for col in default_enabled_columns: + assert col in df.columns + assert df[col].notnull().all() + + +def test_quick_random_dataframe_has_default_data(): + df = quick_random_dataframe() + assert df.shape[0] >= 1 + assert df.shape[1] >= 1 + for col in df.columns: + assert df[col].notnull().all() + + +@pytest.mark.xfail(reason="only for dev") +@pytest.mark.parametrize("dtype", SORTED_DX_DATATYPES) +def test_data_types_with_build_table_schema(dtype: str): + """ + DEV: Test which data types pass/fail when passed directly through build_table_schema(). + """ + params = {dt: False for dt in SORTED_DX_DATATYPES} + params[dtype] = True + df = random_dataframe(**params) + try: + schema = build_table_schema(df) + except Exception as e: + assert False, f"{dtype} failed with {e}" + assert isinstance(schema, dict) + + +@pytest.mark.parametrize("dtype", SORTED_DX_DATATYPES) +def test_generate_dataresource_body(dtype: str): + """ + Test that we've correctly handled data types before building the schema and metadata for + the DXDataResourceDisplayFormatter. + """ + params = {dt: False for dt in SORTED_DX_DATATYPES} + params[dtype] = True + df = random_dataframe(**params) + try: + payload, metadata = dataresource.generate_dataresource_body(df) + except Exception as e: + assert False, f"{dtype} failed with {e}" + assert isinstance(payload, dict) + assert isinstance(metadata, dict) + + +@pytest.mark.parametrize("dtype", SORTED_DX_DATATYPES) +def test_generate_dx_body(dtype: str): + """ + Test that we've correctly handled data types before building the schema and metadata for + the DXDisplayFormatter. + """ + params = {dt: False for dt in SORTED_DX_DATATYPES} + params[dtype] = True + df = random_dataframe(**params) + try: + payload, metadata = dx.generate_dx_body(df) + except Exception as e: + assert False, f"{dtype} failed with {e}" + assert isinstance(payload, dict) + assert isinstance(metadata, dict) + + +@pytest.mark.xfail(reason="only for dev") +@pytest.mark.parametrize("dtype", SORTED_DX_DATATYPES) +def test_hash_pandas_object(dtype: str): + """ + DEV: Test which data types pass/fail when passed directly through hash_pandas_object(). + """ + params = {dt: False for dt in SORTED_DX_DATATYPES} + params[dtype] = True + df = random_dataframe(**params) + try: + hash_series = hash_pandas_object(df) + except Exception as e: + assert False, f"{dtype} failed with {e}" + assert isinstance(hash_series, pd.Series) + + +@pytest.mark.parametrize("dtype", SORTED_DX_DATATYPES) +def test_generate_df_hash(dtype: str): + """ + Test that we've correctly handled data types before creating a hash of a dataframe. + """ + params = {dt: False for dt in SORTED_DX_DATATYPES} + params[dtype] = True + df = random_dataframe(**params) + try: + hash_str = generate_df_hash(df) + except Exception as e: + assert False, f"{dtype} failed with {e}" + assert isinstance(hash_str, str) + + +@pytest.mark.xfail(reason="only for dev") +@pytest.mark.parametrize("dtype", SORTED_DX_DATATYPES) +def test_to_sql(dtype: str): + """ + DEV: Test which data types pass/fail when passed directly through .to_sql() + with the sqlalchemy engine. + """ + params = {dt: False for dt in SORTED_DX_DATATYPES} + params[dtype] = True + df = random_dataframe(**params) + try: + with sql_engine.connect() as conn: + num_rows = df.to_sql("test", conn, if_exists="replace") + except Exception as e: + assert False, f"{dtype} failed with {e}" + assert num_rows == df.shape[0] + + +@pytest.mark.parametrize("dtype", SORTED_DX_DATATYPES) +def test_store_in_sqlite(dtype: str): + """ + Test that we've correctly handled data types before storing in sqlite. + """ + params = {dt: False for dt in SORTED_DX_DATATYPES} + params[dtype] = True + df = random_dataframe(**params) + try: + num_rows = store_in_sqlite(f"{dtype}_test", df) + except Exception as e: + assert False, f"{dtype} failed with {e}" + assert num_rows == df.shape[0] + + +# TODO: test that we can convert back to original datatypes after read_sql? diff --git a/dx/tests/test_dx.py b/dx/tests/test_dx.py index ff5cb48d..a489acc1 100644 --- a/dx/tests/test_dx.py +++ b/dx/tests/test_dx.py @@ -1,14 +1,12 @@ import uuid -from dx.formatters.dx import generate_dx_body, get_dx_settings - -dx_settings = get_dx_settings() +import pytest +from dx.formatters.dx import format_dx, generate_dx_body, get_dx_settings +from dx.settings import settings_context +from dx.utils.datatypes import quick_random_dataframe -def test_media_type(sample_dataframe): - display_id = str(uuid.uuid4()) - payload, _ = generate_dx_body(sample_dataframe, display_id) - assert dx_settings.DX_MEDIA_TYPE in payload +dx_settings = get_dx_settings() def test_data_structure(sample_dataframe): @@ -19,7 +17,7 @@ def test_data_structure(sample_dataframe): """ display_id = str(uuid.uuid4()) payload, _ = generate_dx_body(sample_dataframe, display_id) - data = payload[dx_settings.DX_MEDIA_TYPE]["data"] + data = payload["data"] assert isinstance(data, list) assert len(data) == 4 assert isinstance(data[0], list) @@ -32,7 +30,7 @@ def test_data_list_order(sample_dataframe): """ display_id = str(uuid.uuid4()) payload, _ = generate_dx_body(sample_dataframe, display_id) - data = payload[dx_settings.DX_MEDIA_TYPE]["data"] + data = payload["data"] assert data[0] == [0, 1, 2] # index values assert data[1] == list("aaa") # "col_1" values assert data[2] == list("bbb") # "col_2" values @@ -46,6 +44,16 @@ def test_fields_match_data_length(sample_dataframe): """ display_id = str(uuid.uuid4()) payload, _ = generate_dx_body(sample_dataframe, display_id) - data = payload[dx_settings.DX_MEDIA_TYPE]["data"] - fields = payload[dx_settings.DX_MEDIA_TYPE]["schema"]["fields"] + data = payload["data"] + fields = payload["schema"]["fields"] assert len(data) == len(fields) + + +@pytest.mark.parametrize("enabled", [True, False]) +def test_datalink_toggle(enabled: bool): + df = quick_random_dataframe() + with settings_context(enable_datalink=enabled): + try: + format_dx(df) + except Exception as e: + assert False, f"failed with {e}" diff --git a/dx/tests/test_registering.py b/dx/tests/test_registering.py index b16c268a..e395e7e3 100644 --- a/dx/tests/test_registering.py +++ b/dx/tests/test_registering.py @@ -1,13 +1,8 @@ import pandas as pd -from IPython.core.formatters import DisplayFormatter from IPython.terminal.interactiveshell import TerminalInteractiveShell -from dx.formatters.dataresource import ( - DXDataResourceDisplayFormatter, - deregister, - get_dataresource_settings, -) -from dx.formatters.dx import DXDisplayFormatter, get_dx_settings, register +from dx.formatters.dataresource import deregister, get_dataresource_settings +from dx.formatters.dx import get_dx_settings, register from dx.formatters.main import get_pandas_settings, reset from dx.settings import get_settings @@ -25,8 +20,11 @@ def test_register_ipython_display_formatter( successfully registered as a DXDisplayFormatter and that global settings have been properly updated. """ + formatters = get_ipython.display_formatter.formatters + register(ipython_shell=get_ipython) - assert isinstance(get_ipython.display_formatter, DXDisplayFormatter) + assert dataresource_settings.DATARESOURCE_MEDIA_TYPE not in formatters + assert dx_settings.DX_MEDIA_TYPE in formatters assert settings.DISPLAY_MAX_COLUMNS == dx_settings.DX_DISPLAY_MAX_COLUMNS assert settings.DISPLAY_MAX_ROWS == dx_settings.DX_DISPLAY_MAX_ROWS @@ -43,11 +41,15 @@ def test_deregister_ipython_display_formatter( successfully registered as a DXDataResourceDisplayFormatter and that global settings have been properly updated. """ + formatters = get_ipython.display_formatter.formatters + register(ipython_shell=get_ipython) - assert isinstance(get_ipython.display_formatter, DXDisplayFormatter) + assert dataresource_settings.DATARESOURCE_MEDIA_TYPE not in formatters + assert dx_settings.DX_MEDIA_TYPE in formatters deregister(ipython_shell=get_ipython) - assert isinstance(get_ipython.display_formatter, DXDataResourceDisplayFormatter) + assert dataresource_settings.DATARESOURCE_MEDIA_TYPE in formatters + assert dx_settings.DX_MEDIA_TYPE not in formatters assert settings.DISPLAY_MAX_COLUMNS == dataresource_settings.DATARESOURCE_DISPLAY_MAX_COLUMNS assert settings.DISPLAY_MAX_ROWS == dataresource_settings.DATARESOURCE_DISPLAY_MAX_ROWS @@ -64,11 +66,13 @@ def test_reset_ipython_display_formatter( `IPython.core.formatters.DisplayFormatter` after resetting and that global settings have been properly updated. """ + formatters = get_ipython.display_formatter.formatters deregister(ipython_shell=get_ipython) - assert isinstance(get_ipython.display_formatter, DXDataResourceDisplayFormatter) + assert dataresource_settings.DATARESOURCE_MEDIA_TYPE in formatters reset(ipython_shell=get_ipython) - assert isinstance(get_ipython.display_formatter, DisplayFormatter) + assert dataresource_settings.DATARESOURCE_MEDIA_TYPE not in formatters + assert dx_settings.DX_MEDIA_TYPE not in formatters assert settings.DISPLAY_MAX_COLUMNS == pandas_settings.PANDAS_DISPLAY_MAX_COLUMNS assert settings.DISPLAY_MAX_ROWS == pandas_settings.PANDAS_DISPLAY_MAX_ROWS diff --git a/dx/tests/test_sampling.py b/dx/tests/test_sampling.py new file mode 100644 index 00000000..aa552b86 --- /dev/null +++ b/dx/tests/test_sampling.py @@ -0,0 +1,83 @@ +import sys + +import pandas as pd + +from dx.sampling import sample_if_too_big +from dx.settings import get_settings, settings_context + +settings = get_settings() + + +def test_small_dataframe_is_not_sampled(sample_dataframe: pd.DataFrame): + """ + Test that a small dataframe is not sampled. + """ + original_size_bytes = sys.getsizeof(sample_dataframe) + sampled_df = sample_if_too_big(sample_dataframe) + sampled_size_bytes = sys.getsizeof(sampled_df) + assert sampled_size_bytes <= settings.MAX_RENDER_SIZE_BYTES + assert sampled_size_bytes == original_size_bytes + + +def test_large_dataframe_is_sampled(sample_large_dataframe: pd.DataFrame): + """ + Test that a large dataframe is sampled to below the size of + MAX_RENDER_SIZE_BYTES. + """ + with settings_context(MAX_RENDER_SIZE_BYTES=1024 * 1024): + original_size_bytes = sys.getsizeof(sample_large_dataframe) + sampled_df = sample_if_too_big(sample_large_dataframe) + sampled_size_bytes = sys.getsizeof(sampled_df) + assert sampled_size_bytes <= settings.MAX_RENDER_SIZE_BYTES + assert sampled_size_bytes < original_size_bytes + + +def test_sampled_dataframe_keeps_dtypes(sample_large_dataframe: pd.DataFrame): + """ + Test that a sampled dataframe doesn't alter column datatypes. + """ + orig_dtypes = sample_large_dataframe.dtypes + sampled_df = sample_if_too_big(sample_large_dataframe) + assert (sampled_df.dtypes == orig_dtypes).all() + + +def test_wide_dataframe_is_narrowed(sample_wide_dataframe: pd.DataFrame): + """ + Test that a wide dataframe is narrowed to below the size of + the display mode's MAX_COLUMNS setting. + """ + orig_width = len(sample_wide_dataframe.columns) + narrow_df = sample_if_too_big(sample_wide_dataframe) + narrow_width = len(narrow_df.columns) + assert narrow_width < orig_width, f"{narrow_width=}" + assert narrow_width <= settings.DISPLAY_MAX_COLUMNS + + +def test_long_dataframe_is_shortened(sample_long_dataframe: pd.DataFrame): + """ + Test that a long dataframe is shortened to below the size of + the display mode's MAX_ROWS setting. + """ + orig_length = len(sample_long_dataframe) + short_df = sample_if_too_big(sample_long_dataframe) + short_length = len(short_df) + assert short_length < orig_length, f"{short_length=}" + assert short_length <= settings.DISPLAY_MAX_ROWS + + +def test_long_wide_dataframe_is_reduced_from_both_dimensions( + sample_long_wide_dataframe: pd.DataFrame, +): + """ + Test that a long wide dataframe is reduced from both dimensions + to below the size of the display mode's MAX_COLUMNS and MAX_ROWS settings. + """ + orig_width = len(sample_long_wide_dataframe.columns) + orig_length = len(sample_long_wide_dataframe) + reduced_df = sample_if_too_big(sample_long_wide_dataframe) + reduced_width = len(reduced_df.columns) + reduced_length = len(reduced_df) + assert reduced_width < orig_width + assert reduced_width <= settings.DISPLAY_MAX_COLUMNS + assert reduced_length < orig_length + assert reduced_length <= settings.DISPLAY_MAX_ROWS diff --git a/dx/tests/test_settings.py b/dx/tests/test_settings.py index afff3ed8..d1c5cb93 100644 --- a/dx/tests/test_settings.py +++ b/dx/tests/test_settings.py @@ -1,11 +1,6 @@ -import pandas as pd -from IPython.core.formatters import DisplayFormatter from IPython.terminal.interactiveshell import TerminalInteractiveShell -import dx -from dx.formatters.dataresource import DXDataResourceDisplayFormatter -from dx.formatters.dx import DXDisplayFormatter -from dx.settings import get_settings, set_display_mode +from dx.settings import add_renderable_type, get_settings, set_display_mode, settings_context from dx.types import DXDisplayMode settings = get_settings() @@ -18,32 +13,34 @@ def test_set_display_mode(get_ipython: TerminalInteractiveShell): """ set_display_mode("plain", ipython_shell=get_ipython) assert settings.DISPLAY_MODE == DXDisplayMode.plain - assert isinstance(get_ipython.display_formatter, DisplayFormatter) set_display_mode("simple", ipython_shell=get_ipython) assert settings.DISPLAY_MODE == DXDisplayMode.simple - assert isinstance(get_ipython.display_formatter, DXDataResourceDisplayFormatter) set_display_mode("enhanced", ipython_shell=get_ipython) assert settings.DISPLAY_MODE == DXDisplayMode.enhanced - assert isinstance(get_ipython.display_formatter, DXDisplayFormatter) -def test_display_preserves_global_display_mode( +def test_settings_context_preserves_global_setting( get_ipython: TerminalInteractiveShell, - sample_dataframe: pd.DataFrame, ): """ - Test that calling dx.display(df, mode=X) does not permanently change - the global display mode to X. + Test that using the settings_context() context manager + does not permanently change a global setting. """ set_display_mode("simple", ipython_shell=get_ipython) - assert settings.DISPLAY_MODE == DXDisplayMode.simple - assert isinstance(get_ipython.display_formatter, DXDataResourceDisplayFormatter) - dx.display( - sample_dataframe, - mode=DXDisplayMode.enhanced, - ipython_shell=get_ipython, - ) + with settings_context(display_mode="enhanced", ipython_shell=get_ipython): + assert settings.DISPLAY_MODE == DXDisplayMode.enhanced, f"{settings=}" + assert settings.DISPLAY_MODE == DXDisplayMode.simple, f"{settings=}" + + +def test_add_renderables(): + renderables = set(settings.RENDERABLE_OBJECTS) + + class FakeRenderable: + pass + + add_renderable_type(FakeRenderable) + assert settings.RENDERABLE_OBJECTS == renderables | {FakeRenderable} diff --git a/dx/tests/test_utils.py b/dx/tests/test_utils.py index 66e5e409..a5488cf3 100644 --- a/dx/tests/test_utils.py +++ b/dx/tests/test_utils.py @@ -1,92 +1,22 @@ -import sys - import pandas as pd -from dx.formatters.utils import is_default_index, normalize_index_and_columns, truncate_if_too_big -from dx.settings import get_settings +from dx.settings import get_settings, settings_context +from dx.utils.formatting import is_default_index, normalize_index_and_columns settings = get_settings() -def test_small_dataframe_is_not_truncated(sample_dataframe: pd.DataFrame): - """ - Test that a small dataframe is not truncated. - """ - original_size_bytes = sys.getsizeof(sample_dataframe) - truncated_df = truncate_if_too_big(sample_dataframe) - truncated_size_bytes = sys.getsizeof(truncated_df) - assert truncated_size_bytes <= settings.MAX_RENDER_SIZE_BYTES - assert truncated_size_bytes == original_size_bytes - - -def test_large_dataframe_is_truncated(sample_large_dataframe: pd.DataFrame): - """ - Test that a large dataframe is truncated to below the size of - MAX_RENDER_SIZE_BYTES. - """ - original_size_bytes = sys.getsizeof(sample_large_dataframe) - truncated_df = truncate_if_too_big(sample_large_dataframe) - truncated_size_bytes = sys.getsizeof(truncated_df) - assert truncated_size_bytes <= settings.MAX_RENDER_SIZE_BYTES - assert truncated_size_bytes < original_size_bytes - - -def test_truncated_dataframe_keeps_dtypes(sample_large_dataframe: pd.DataFrame): - """ - Test that a truncated dataframe doesn't alter column datatypes. - """ - orig_dtypes = sample_large_dataframe.dtypes - truncated_df = truncate_if_too_big(sample_large_dataframe) - assert (truncated_df.dtypes == orig_dtypes).all() - - -def test_wide_dataframe_is_narrowed(sample_wide_dataframe: pd.DataFrame): - """ - Test that a wide dataframe is narrowed to below the size of - the display mode's MAX_COLUMNS setting. - """ - orig_width = len(sample_wide_dataframe.columns) - narrow_df = truncate_if_too_big(sample_wide_dataframe) - narrow_width = len(narrow_df.columns) - assert narrow_width < orig_width, f"{narrow_width=}" - assert narrow_width <= settings.DISPLAY_MAX_COLUMNS - - -def test_long_dataframe_is_shortened(sample_long_dataframe: pd.DataFrame): - """ - Test that a long dataframe is shortened to below the size of - the display mode's MAX_ROWS setting. - """ - orig_length = len(sample_long_dataframe) - short_df = truncate_if_too_big(sample_long_dataframe) - short_length = len(short_df) - assert short_length < orig_length, f"{short_length=}" - assert short_length <= settings.DISPLAY_MAX_ROWS - - -def test_long_wide_dataframe_is_reduced_from_both_dimensions( - sample_long_wide_dataframe: pd.DataFrame, -): - """ - Test that a long wide dataframe is reduced from both dimensions - to below the size of the display mode's MAX_COLUMNS and MAX_ROWS settings. - """ - orig_width = len(sample_long_wide_dataframe.columns) - orig_length = len(sample_long_wide_dataframe) - reduced_df = truncate_if_too_big(sample_long_wide_dataframe) - reduced_width = len(reduced_df.columns) - reduced_length = len(reduced_df) - assert reduced_width < orig_width - assert reduced_width <= settings.DISPLAY_MAX_COLUMNS - assert reduced_length < orig_length - assert reduced_length <= settings.DISPLAY_MAX_ROWS - - def test_default_index_returns_true(sample_dataframe: pd.DataFrame): index = sample_dataframe.index assert is_default_index(index) +def test_unsorted_default_index_returns_true(sample_dataframe: pd.DataFrame): + shuffled_sample_dataframe = sample_dataframe.sample(len(sample_dataframe)) + index = shuffled_sample_dataframe.index + assert is_default_index(index) + + def test_custom_index_returns_false(sample_dataframe: pd.DataFrame): sample_dataframe.set_index("col_1", inplace=True) index = sample_dataframe.index @@ -103,8 +33,9 @@ def test_default_index_persists(sample_dataframe: pd.DataFrame): """ Default indexes should not be reset. """ - df = normalize_index_and_columns(sample_dataframe.copy()) - assert df.index.equals(sample_dataframe.index) + with settings_context(STRINGIFY_INDEX_VALUES=False): + df = normalize_index_and_columns(sample_dataframe.copy()) + assert list(df.index) == list(sample_dataframe.index) def test_custom_index_resets(sample_dataframe: pd.DataFrame): @@ -112,6 +43,9 @@ def test_custom_index_resets(sample_dataframe: pd.DataFrame): Custom indexes should reset to ensure the `index` is passed with row value numbers to the frontend, from 0 to the length of the dataframe. """ - sample_dataframe.set_index(["col_1", "col_2"], inplace=True) - df = normalize_index_and_columns(sample_dataframe.copy()) - assert not df.index.equals(sample_dataframe.index) + with settings_context(RESET_INDEX_VALUES=True): + sample_dataframe.set_index(["col_1", "col_2"], inplace=True) + assert isinstance(sample_dataframe.index, pd.MultiIndex) + df = normalize_index_and_columns(sample_dataframe.copy()) + assert not df.index.equals(sample_dataframe.index) + assert not isinstance(df.index, pd.MultiIndex) diff --git a/dx/utils/__init__.py b/dx/utils/__init__.py new file mode 100644 index 00000000..8123c46a --- /dev/null +++ b/dx/utils/__init__.py @@ -0,0 +1,5 @@ +from .datatypes import * +from .date_time import * +from .formatting import * +from .geometry import * +from .tracking import * diff --git a/dx/utils/datatypes.py b/dx/utils/datatypes.py new file mode 100644 index 00000000..d5b8ae69 --- /dev/null +++ b/dx/utils/datatypes.py @@ -0,0 +1,278 @@ +import ipaddress +import json +import random +import string + +import numpy as np +import pandas as pd +import structlog + +from dx.utils import date_time, geometry + +try: + from faker import Faker + + fake = Faker() + FAKER_INSTALLED = True +except ImportError: + FAKER_INSTALLED = False + + +logger = structlog.get_logger(__name__) + + +DX_DATATYPES = { + "dtype_column": True, + "integer_column": True, + "float_column": True, + "datetime_column": True, + "time_delta_column": False, + "time_period_column": False, + "time_interval_column": False, + "text_column": False, + "keyword_column": True, + "dict_column": False, + "list_column": False, + "nested_tabular_column": False, + "latlon_point_column": False, + "filled_geojson_column": False, + "exterior_geojson_column": False, + "bytes_column": True, + "ipv4_address_column": False, + "ipv6_address_column": False, + "complex_number_column": False, +} +SORTED_DX_DATATYPES = sorted(list(DX_DATATYPES.keys())) + + +def generate_integer_series(num_rows: int) -> pd.Series: + return pd.Series([np.random.randint(-100, 100) for _ in range(num_rows)]) + + +def generate_float_series(num_rows: int) -> pd.Series: + return pd.Series([np.random.rand() for _ in range(num_rows)]) + + +def generate_complex_number_series(num_rows: int) -> pd.Series: + return pd.Series( + [complex(real=np.random.rand(), imag=np.random.rand()) for _ in range(num_rows)] + ) + + +def generate_dtype_series(num_rows: int) -> pd.Series: + return pd.Series( + [random.choice([float, int, str, bool, set, tuple, dict, list]) for _ in range(num_rows)] + ) + + +def generate_text_series(num_rows: int) -> pd.Series: + if not FAKER_INSTALLED: + logger.warning("faker is not installed, skipping text_column") + return np.nan + + return pd.Series([fake.text() for _ in range(num_rows)]) + + +def generate_keyword_series(num_rows: int, num_letters: int = 2) -> pd.Series: + return pd.Series( + ["".join(random.sample(string.ascii_uppercase, num_letters)) for _ in range(num_rows)] + ) + + +def generate_dict_series(num_rows: int) -> pd.Series: + return pd.Series( + [ + { + "nested_property": random.choice(["apple", "banana", "orange", "pear"]), + "nested_other_property": random.randint(0, 10), + "nested_bool": random.choice([True, False]), + } + for _ in range(num_rows) + ] + ) + + +def generate_list_series(num_rows: int) -> pd.Series: + return pd.Series([[random.randint(0, 5) for _ in range(5)] for _ in range(num_rows)]) + + +def generate_bytes_series(num_rows: int, n_bytes: int = 10) -> pd.Series: + return pd.Series([np.random.bytes(n_bytes) for _ in range(num_rows)]) + + +def generate_nested_tabular_series(num_rows: int, num_nested_rows: int = 5, **kwargs) -> pd.Series: + return pd.Series( + [ + random_dataframe(num_rows=num_nested_rows, **kwargs).to_dict("records") + for _ in range(num_rows) + ] + ) + + +def generate_ipv4_series(num_rows: int) -> pd.Series: + def random_ipv4(): + address_str = ".".join(str(random.randint(0, 255)) for _ in range(4)) + return ipaddress.ip_address(address_str) + + return pd.Series([random_ipv4() for _ in range(num_rows)]) + + +def generate_ipv6_series(num_rows: int) -> pd.Series: + def random_ipv6(): + address_str = ":".join( + str(hex(random.randint(0, 65_535))).replace("0x", "") for _ in range(8) + ) + return ipaddress.ip_address(address_str) + + return pd.Series([random_ipv6() for _ in range(num_rows)]) + + +def handle_complex_number_series(s: pd.Series) -> pd.Series: + types = (complex, np.complex) + if any(isinstance(v, types) for v in s.values): + logger.debug(f"series `{s.name}` has complex numbers; converting to real/imag string") + s = s.apply(lambda x: f"{x.real}+{x.imag}j" if isinstance(x, types) else x) + return s + + +def handle_dict_series(s: pd.Series) -> pd.Series: + types = dict + if any(isinstance(v, types) for v in s.values): + logger.debug(f"series `{s.name}` has dicts; converting to json string") + s = s.apply(lambda x: json.dumps(x) if isinstance(x, types) else x) + return s + + +def handle_dtype_series(s: pd.Series): + """ + Casts dtypes as strings. + """ + types = (type, np.dtype) + if any(isinstance(v, types) for v in s.values): + logger.debug(f"series `{s.name}` has types; converting to strings") + s = s.astype(str) + return s + + +def handle_interval_series(s: pd.Series) -> pd.Series: + types = pd.Interval + if any(isinstance(v, types) for v in s.values): + logger.debug(f"series `{s.name}` has intervals; converting to left/right") + s = s.apply(lambda x: [x.left, x.right] if isinstance(x, types) else x) + return s + + +def handle_ip_address_series(s: pd.Series) -> pd.Series: + types = (ipaddress.IPv4Address, ipaddress.IPv6Address) + if any(isinstance(v, types) for v in s.values): + logger.debug(f"series `{s.name}` has ip addresses; converting to strings") + s = s.astype(str) + return s + + +def handle_sequence_series(s: pd.Series) -> pd.Series: + types = (list, tuple, set, np.ndarray) + if any(isinstance(v, types) for v in s.values): + logger.debug(f"series `{s.name}` has sequences; converting to comma-separated string") + s = s.apply(lambda x: ", ".join([str(val) for val in x] if isinstance(x, types) else x)) + return s + + +def quick_random_dataframe( + num_rows: int = 5, + num_cols: int = 2, + dtype: str = "float", + factor: float = 1.0, +) -> pd.DataFrame: + """ + Convenience function wrapping `pd.DataFrame(np.random.rand( num_rows, num_columns ))` + to create a dataframe of random 0.0-1.0 values. + """ + data = np.random.rand(num_rows, num_cols) * factor + df = pd.DataFrame(data) + return df.astype(dtype, errors="ignore") + + +def random_dataframe(num_rows: int = 5, **kwargs): # noqa: C901 + + kwargs = kwargs or DX_DATATYPES + df = pd.DataFrame(index=list(range(num_rows))) + + if kwargs.get("dtype_column"): + df["dtype_column"] = generate_dtype_series(num_rows) + + # numeric columns + if kwargs.get("integer_column"): + df["integer_column"] = generate_integer_series(num_rows) + + if kwargs.get("float_column"): + df["float_column"] = generate_float_series(num_rows) + + if kwargs.get("complex_number_column"): + df["complex_number_column"] = generate_complex_number_series(num_rows) + + # date/time columns + if kwargs.get("datetime_column"): + df["datetime_column"] = date_time.generate_datetime_series(num_rows) + + if kwargs.get("time_delta_column"): + df["time_delta_column"] = date_time.generate_time_delta_series(num_rows) + + if kwargs.get("time_period_column"): + df["time_period_column"] = date_time.generate_time_period_series(num_rows) + + if kwargs.get("time_interval_column"): + df["time_interval_column"] = date_time.generate_time_interval_series(num_rows) + + # string columns + if kwargs.get("text_column"): + df["text_column"] = generate_text_series(num_rows) + + if kwargs.get("keyword_column"): + df["keyword_column"] = generate_keyword_series(num_rows) + + # container columns + if kwargs.get("dict_column"): + df["dict_column"] = generate_dict_series(num_rows) + + if kwargs.get("list_column"): + df["list_column"] = generate_list_series(num_rows) + + if kwargs.get("nested_tabular_column"): + df["nested_tabular_column"] = generate_nested_tabular_series( + num_rows, + float_column=True, + keyword_column=True, + ) + + # geopandas/shapely columns + if kwargs.get("latlon_point_column"): + df["latlon_point_column"] = geometry.generate_latlon_series(num_rows) + + if kwargs.get("filled_geojson_column"): + df["filled_geojson_column"] = geometry.generate_filled_geojson_series(num_rows) + + if kwargs.get("exterior_geojson_column"): + df["exterior_geojson_column"] = geometry.generate_exterior_bounds_geojson_series(num_rows) + + # extras + if kwargs.get("bytes_column"): + df["bytes_column"] = generate_bytes_series(num_rows) + + if kwargs.get("ipv4_address_column"): + df["ipv4_address_column"] = generate_ipv4_series(num_rows) + + if kwargs.get("ipv6_address_column"): + df["ipv6_address_column"] = generate_ipv6_series(num_rows) + + return df + + +def to_dataframe(obj) -> pd.DataFrame: + """ + Converts an object to a pandas dataframe. + """ + logger.debug(f"converting {type(obj)} to pd.DataFrame") + # TODO: support custom converters + df = pd.DataFrame(obj) + return df diff --git a/dx/utils/date_time.py b/dx/utils/date_time.py new file mode 100644 index 00000000..0c8cbfd9 --- /dev/null +++ b/dx/utils/date_time.py @@ -0,0 +1,70 @@ +import datetime + +import numpy as np +import pandas as pd +import structlog + +from dx.settings import get_settings + +settings = get_settings() +logger = structlog.get_logger(__name__) + + +def generate_datetime_series(num_rows: int) -> pd.Series: + return pd.Series( + [ + ( + pd.Timestamp("now") + pd.Timedelta(f"{np.random.randint(-1000, 1000)} hours") + ).to_pydatetime() + for _ in range(num_rows) + ] + ) + + +def generate_time_period_series(num_rows: int) -> pd.Series: + return pd.Series( + [ + ( + pd.Timestamp("now") + pd.Timedelta(f"{np.random.randint(-1000, 1000)} hours") + ).to_period(freq="W") + for _ in range(num_rows) + ] + ) + + +def generate_time_interval_series(num_rows: int) -> pd.Series: + return pd.Series( + [ + pd.Interval( + pd.Timestamp("now") + pd.Timedelta(f"{np.random.randint(-1000, 0)} hours"), + pd.Timestamp("now") + pd.Timedelta(f"{np.random.randint(0, 1000)} hours"), + ) + for _ in range(num_rows) + ] + ) + + +def generate_time_delta_series(num_rows: int) -> pd.Series: + return pd.Series( + [pd.Timedelta(f"{np.random.randint(-1000, 1000)} hours") for _ in range(num_rows)] + ) + + +def handle_time_period_series(s: pd.Series) -> pd.Series: + types = (pd.Period, pd.PeriodIndex) + if any(isinstance(v, types) for v in s.values): + logger.debug(f"series `{s.name}` has pd.Period values; converting to string") + s = s.apply(lambda x: [x.start_time, x.end_time] if isinstance(x, types) else x) + return s + + +def handle_time_delta_series(s: pd.Series) -> pd.Series: + types = ( + datetime.timedelta, + np.timedelta64, + pd.Timedelta, + ) + if any(isinstance(v, types) for v in s.values): + logger.debug(f"series `{s.name}` has pd.TimeDelta values; converting to total seconds") + s = s.apply(lambda x: x.total_seconds() if isinstance(x, types) else x) + return s diff --git a/dx/utils/formatting.py b/dx/utils/formatting.py new file mode 100644 index 00000000..19ee5807 --- /dev/null +++ b/dx/utils/formatting.py @@ -0,0 +1,185 @@ +import numpy as np +import pandas as pd +import structlog + +from dx.settings import settings +from dx.utils import datatypes, date_time, geometry + +logger = structlog.get_logger(__name__) + + +def human_readable_size(size_bytes: int) -> str: + """ + Converts bytes to a more human-readable string. + + >>> human_readable_size(1689445298) + '1.5 GiB' + """ + size_str = "" + for unit in ["B", "KiB", "MiB", "GiB", "TiB"]: + if abs(size_bytes) < 1024.0: + size_str = f"{size_bytes:3.1f} {unit}" + break + size_bytes /= 1024.0 + return size_str + + +def is_default_index(index: pd.Index) -> bool: + """ + Returns True if the index values are 0-n, where n is the number of items in the series. + """ + index_vals = index.values.tolist() + default_index = pd.Index(list(range(len(index_vals)))) + index = pd.Index(sorted(index_vals)) + return index.equals(default_index) + + +def normalize_index_and_columns(df: pd.DataFrame) -> pd.DataFrame: + """ + Any additional formatting that needs to happen to the index, + the columns, or the data itself should be done here. + """ + display_df = df.copy() + + display_df = normalize_index(display_df) + display_df = normalize_columns(display_df) + + # build_table_schema() doesn't like pd.NAs + display_df.fillna(np.nan, inplace=True) + + return display_df + + +def normalize_index(df: pd.DataFrame) -> pd.DataFrame: + """ """ + if settings.RESET_INDEX_VALUES and not is_default_index(df.index): + # preserve 0-n row numbers for frontend + # if custom/MultiIndex is used + df.reset_index(inplace=True) + + is_multiindex = isinstance(df.index, pd.MultiIndex) + + # if index or column values are numeric, we need to convert to strings + # (whether pd.Index or pd.MultiIndex) to avoid build_table_schema() errors + logger.debug(f"before: {df.index[:5]=}") + + index_name = getattr(df.index, "names", None) + # may be `FrozenList([None, None ...])` + if not any(index_name): + index_name = getattr(df.index, "name") + index_name = index_name or "index" + logger.debug(f"{index_name=}") + # build_table_schema() doesn't like non-string index names + if not isinstance(index_name, str): + if is_multiindex: + index_name = list(map(str, index_name)) + else: + index_name = str(index_name) + logger.debug(f"{index_name=}") + + if settings.FLATTEN_INDEX_VALUES and is_multiindex: + df.index = df.index.to_flat_index() + df.index = [", ".join([str(val) for val in index_vals]) for index_vals in df.index] + + if settings.STRINGIFY_INDEX_VALUES: + if is_multiindex: + df.index = pd.MultiIndex.from_tuples(stringify_index(df.index), names=index_name) + else: + df.index = pd.Index(stringify_index(df.index), name=index_name) + logger.debug(f"after: {df.index[:5]=}") + return df + + +def normalize_columns(df: pd.DataFrame) -> pd.DataFrame: + """ + Any additional formatting that needs to happen to the columns, + or the data itself should be done here. + """ + logger.debug(f"before: {df.columns[:5]=}") + + if settings.FLATTEN_COLUMN_VALUES and isinstance(df.columns, pd.MultiIndex): + df.columns = df.columns.to_flat_index() + df.columns = [", ".join([str(val) for val in column_vals]) for column_vals in df.columns] + + if settings.STRINGIFY_COLUMN_VALUES: + df.columns = pd.Index(stringify_index(df.columns)) + + for column in df.columns: + df[column] = clean_column_values_for_display(df[column]) + + logger.debug(f"after: {df.columns[:5]=}") + return df + + +def stringify_index(index: pd.Index): + """ + Convenience method to cast index/column values as strings. + (Handles pd.Index as well as pd.MultiIndex objects) + """ + if isinstance(index[0], (list, tuple)): + # pd.MultiIndex + return list(map(stringify_index, index)) + return tuple(map(str, index)) + + +def clean_column_values_for_display(s: pd.Series) -> pd.Series: + """ + Cleaning/conversion for values in a series to prevent + build_table_schema() or frontend rendering errors. + """ + s = date_time.handle_time_period_series(s) + s = date_time.handle_time_delta_series(s) + + s = datatypes.handle_dtype_series(s) + s = datatypes.handle_interval_series(s) + s = datatypes.handle_ip_address_series(s) + s = datatypes.handle_complex_number_series(s) + + s = geometry.handle_geometry_series(s) + return s + + +def clean_column_values_for_hash(s: pd.Series) -> pd.Series: + """ + Cleaning/conversion for values in a series to prevent + hash_pandas_object() errors. + """ + s = geometry.handle_geometry_series(s) + + s = datatypes.handle_dict_series(s) + s = datatypes.handle_sequence_series(s) + return s + + +def clean_column_values_for_sqlite(s: pd.Series) -> pd.Series: + """ + Cleaning/conversion for values in a series to prevent + errors writing to sqlite. + """ + s = datatypes.handle_dtype_series(s) + s = datatypes.handle_interval_series(s) + s = datatypes.handle_complex_number_series(s) + s = datatypes.handle_ip_address_series(s) + + s = date_time.handle_time_period_series(s) + + s = geometry.handle_geometry_series(s) + + s = datatypes.handle_dict_series(s) + s = datatypes.handle_sequence_series(s) + return s + + +# TODO: clean this up +def expand_sequences(val, separator: str = ", "): + if separator not in str(val): + return val + + vals = [] + for val in val.split(separator): + try: + val = eval(val) + except Exception as e: + logger.debug(f"can't eval({val}): {e}") + vals.append(val) + return vals diff --git a/dx/utils/geometry.py b/dx/utils/geometry.py new file mode 100644 index 00000000..2d37c89c --- /dev/null +++ b/dx/utils/geometry.py @@ -0,0 +1,93 @@ +import random +from typing import Optional + +import numpy as np +import pandas as pd +import structlog + +try: + import geopandas as gpd + import shapely.geometry.base + from shapely.geometry import mapping + + GEOPANDAS_INSTALLED = True +except ImportError: + GEOPANDAS_INSTALLED = False + +logger = structlog.get_logger(__name__) + + +def generate_latlon_series(num_rows: int): + """ + Creates a series of shapely.geometry.Point values for latitude and longitude. + """ + if not GEOPANDAS_INSTALLED: + logger.warning("geopandas is not installed, skipping generate_latlon_series") + return np.nan + + lats = [random.randint(-90, 89) + np.random.rand() for _ in range(num_rows)] + lons = [random.randint(-180, 179) + np.random.rand() for _ in range(num_rows)] + return gpd.points_from_xy(lons, lats) + + +def generate_filled_geojson_series( + num_rows: int, + existing_latlon_series: Optional[pd.Series] = None, +): + """ + Creates a series of shapely.geometry.Polygon values by + generating shapely.geometry.Point values and calling .buffer() + on them, resulting in circular filled Polygon objects. + """ + if not GEOPANDAS_INSTALLED: + logger.warning("geopandas is not installed, skipping filled_geojson_column") + return np.nan + + if existing_latlon_series is None: + latlon_series = generate_latlon_series(num_rows) + else: + latlon_series = existing_latlon_series + buffer_series = gpd.GeoSeries(latlon_series).apply(lambda x: x.buffer(np.random.rand())) + return gpd.GeoSeries(buffer_series) + + +def generate_exterior_bounds_geojson_series( + num_rows: int, + existing_latlon_series: Optional[pd.Series] = None, +): + """ + Creates a series of shapely.geometry.Polygon values by + generating shapely.geometry.Point values, calling .buffer() + on them, and getting the exterior of the resulting object's .envelope, + resulting in rectangular LineString objects. + """ + if not GEOPANDAS_INSTALLED: + logger.warning("geopandas is not installed, skipping exterior_geojson_column") + return np.nan + + if existing_latlon_series is None: + latlon_series = generate_latlon_series(num_rows) + else: + latlon_series = existing_latlon_series + + envelope_series = gpd.GeoSeries(latlon_series).apply( + lambda x: x.buffer(np.random.rand()).envelope.exterior + ) + return gpd.GeoSeries(envelope_series) + + +def handle_geometry_series(s: pd.Series) -> pd.Series: + """ + Converts shapely.geometry values to JSON. + """ + if not GEOPANDAS_INSTALLED: + return s + + types = ( + shapely.geometry.base.BaseGeometry, + shapely.geometry.base.BaseMultipartGeometry, + ) + if any(isinstance(v, types) for v in s.values): + logger.debug(f"series `{s.name}` has geometries; converting to JSON") + s = s.apply(lambda x: mapping(x) if isinstance(x, types) else x) + return s diff --git a/dx/utils/tracking.py b/dx/utils/tracking.py new file mode 100644 index 00000000..374e003e --- /dev/null +++ b/dx/utils/tracking.py @@ -0,0 +1,156 @@ +import hashlib +import uuid +from typing import Optional + +import pandas as pd +import structlog +from IPython import get_ipython +from IPython.core.interactiveshell import InteractiveShell +from pandas.util import hash_pandas_object +from sqlalchemy import create_engine + +from dx.utils.formatting import clean_column_values_for_hash, clean_column_values_for_sqlite + +logger = structlog.get_logger(__name__) +sql_engine = create_engine("sqlite://", echo=False) + + +DATAFRAME_HASH_TO_DISPLAY_ID = {} +DATAFRAME_HASH_TO_VAR_NAME = {} +DISPLAY_ID_TO_COLUMNS = {} +DISPLAY_ID_TO_DATAFRAME_HASH = {} +SUBSET_TO_DATAFRAME_HASH = {} + + +def get_display_id_for_df(df: pd.DataFrame) -> str: + df_hash = generate_df_hash(df) + return DATAFRAME_HASH_TO_DISPLAY_ID.get(df_hash) + + +def generate_df_hash(df: pd.DataFrame) -> str: + """ + Generates a single hash string for the dataframe object. + + Example + ---------------- + Original df: + 0 1 2 3 4 + 0 0.230950 0.766084 0.913629 0.133418 0.916593 + 1 0.156634 0.103393 0.373932 0.619625 0.386718 + 2 0.204738 0.411156 0.172771 0.502443 0.484988 + 3 0.026402 0.248560 0.260528 0.064049 0.831178 + 4 0.911507 0.261114 0.618599 0.984881 0.128810 + + After hash_pandas_object(): + 0 14963028434725389246 + 1 13734102023063095786 + 2 14568529259697808682 + 3 1257782805939107919 + 4 10935027788698945420 + dtype: uint64 + + String-concatenate the hash series values: + '14963028434725389246-13734102023063095786-14568529259697808682-1257782805939107919-10935027788698945420' + + SHA256 hash the string-concatenated values: + 'd3148913511e79be9b301d5ef665196a889b53cce82643b9fdee9d25403828b8' + """ + hash_df = df.copy() + + for col in hash_df.columns: + hash_df[col] = clean_column_values_for_hash(hash_df[col]) + + # this will be a series of hash values the length of df + df_hash_series = hash_pandas_object(hash_df) + # then string-concatenate all the hashed values, which could be very large + df_hash_str = "-".join(df_hash_series.astype(str)) + # then hash the resulting (potentially large) string + hash_str = hashlib.sha256(df_hash_str.encode()).hexdigest() + return hash_str + + +def get_df_variable_name( + df: pd.DataFrame, + ipython_shell: Optional[InteractiveShell] = None, +) -> str: + """ + Returns the variable name of the DataFrame object. + """ + ipython = ipython_shell or get_ipython() + df_vars = {k: v for k, v in ipython.user_ns.items() if isinstance(v, pd.DataFrame)} + logger.debug(f"dataframe variables present: {list(df_vars.keys())}") + + matching_df_vars = [ + k for k, v in df_vars.items() if generate_df_hash(v) == generate_df_hash(df) + ] + # we might get a mix of references here like ['_', '__', 'df'] + named_df_vars_with_same_hash = [name for name in matching_df_vars if not name.startswith("_")] + if named_df_vars_with_same_hash: + logger.debug(f"{named_df_vars_with_same_hash=}") + return named_df_vars_with_same_hash[0] + + if matching_df_vars: + # dataframe rendered without variable assignment + logger.debug(f"no matching dataframe variables found: {matching_df_vars=}") + return matching_df_vars[-1] + + # no dataframe variables found, assign a new one for internal referencing + logger.debug("no variables found matching this dataframe") + df_uuid = f"unk_dataframe_{uuid.uuid4()}".replace("-", "") + return df_uuid + + +def register_display_id( + df: pd.DataFrame, + display_id: str, + df_hash: str, + is_subset: bool = False, + ipython_shell: Optional[InteractiveShell] = None, +) -> str: + """ + Hashes the dataframe object and tracks display_id for future references in other function calls, + and writes the data to a local sqlite table for follow-on SQL querying. + """ + + if is_subset: + logger.debug("rendered subset of original dataset; not re-registering") + return + + global DATAFRAME_HASH_TO_DISPLAY_ID + global DATAFRAME_HASH_TO_VAR_NAME + global DISPLAY_ID_TO_DATAFRAME_HASH + global DISPLAY_ID_TO_COLUMNS + + DISPLAY_ID_TO_DATAFRAME_HASH[display_id] = df_hash + DATAFRAME_HASH_TO_DISPLAY_ID[df_hash] = display_id + + df_name = get_df_variable_name(df, ipython_shell=ipython_shell) + DATAFRAME_HASH_TO_VAR_NAME[df_hash] = df_name + logger.debug(f"registering display_id {display_id=} for `{df_name}`") + return f"{df_name}__{df_hash}" + + +def get_display_id(df_hash: str) -> str: + """ + Checks whether `df` is a subset of any others currently being tracked, + and either returns the known display ID or creates a new one. + """ + if df_hash in SUBSET_TO_DATAFRAME_HASH: + parent_df_hash = SUBSET_TO_DATAFRAME_HASH[df_hash] + display_id = DATAFRAME_HASH_TO_DISPLAY_ID[parent_df_hash] + else: + display_id = str(uuid.uuid4()) + return display_id + + +def store_in_sqlite(table_name: str, df: pd.DataFrame): + tracking_df = df.copy() + + for col in tracking_df.columns: + tracking_df[col] = clean_column_values_for_sqlite(tracking_df[col]) + + logger.debug(f"writing to `{table_name}` table in sqlite") + with sql_engine.begin() as conn: + num_written_rows = tracking_df.to_sql(table_name, con=conn, if_exists="replace") + logger.debug(f"wrote {num_written_rows} row(s) to `{table_name}` table") + return num_written_rows diff --git a/poetry.lock b/poetry.lock index 4a07d001..fe0e35bc 100644 --- a/poetry.lock +++ b/poetry.lock @@ -8,7 +8,7 @@ python-versions = "*" [[package]] name = "asttokens" -version = "2.0.5" +version = "2.0.8" description = "Annotate AST trees with source code positions" category = "main" optional = false @@ -18,11 +18,11 @@ python-versions = "*" six = "*" [package.extras] -test = ["astroid", "pytest"] +test = ["pytest", "astroid (<=2.5.3)"] [[package]] name = "atomicwrites" -version = "1.4.0" +version = "1.4.1" description = "Atomic file writes." category = "dev" optional = false @@ -30,17 +30,17 @@ python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" [[package]] name = "attrs" -version = "21.4.0" +version = "22.1.0" description = "Classes Without Boilerplate" -category = "dev" +category = "main" optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +python-versions = ">=3.5" [package.extras] -dev = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins", "zope.interface", "furo", "sphinx", "sphinx-notfound-page", "pre-commit", "cloudpickle"] +dev = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "mypy (>=0.900,!=0.940)", "pytest-mypy-plugins", "zope.interface", "furo", "sphinx", "sphinx-notfound-page", "pre-commit", "cloudpickle"] docs = ["furo", "sphinx", "zope.interface", "sphinx-notfound-page"] -tests = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins", "zope.interface", "cloudpickle"] -tests_no_zope = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins", "cloudpickle"] +tests = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "mypy (>=0.900,!=0.940)", "pytest-mypy-plugins", "zope.interface", "cloudpickle"] +tests_no_zope = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "mypy (>=0.900,!=0.940)", "pytest-mypy-plugins", "cloudpickle"] [[package]] name = "backcall" @@ -72,20 +72,56 @@ d = ["aiohttp (>=3.7.4)"] jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"] uvloop = ["uvloop (>=0.15.2)"] +[[package]] +name = "certifi" +version = "2022.6.15" +description = "Python package for providing Mozilla's CA Bundle." +category = "main" +optional = false +python-versions = ">=3.6" + [[package]] name = "click" version = "8.1.3" description = "Composable command line interface toolkit" -category = "dev" +category = "main" optional = false python-versions = ">=3.7" [package.dependencies] colorama = {version = "*", markers = "platform_system == \"Windows\""} +[[package]] +name = "click-plugins" +version = "1.1.1" +description = "An extension module for click to enable registering CLI commands via setuptools entry-points." +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +click = ">=4.0" + +[package.extras] +dev = ["coveralls", "wheel", "pytest-cov", "pytest (>=3.6)"] + +[[package]] +name = "cligj" +version = "0.7.2" +description = "Click params for commmand line interfaces to GeoJSON" +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, <4" + +[package.dependencies] +click = ">=4.0" + +[package.extras] +test = ["pytest-cov"] + [[package]] name = "colorama" -version = "0.4.4" +version = "0.4.5" description = "Cross-platform colored terminal text." category = "main" optional = false @@ -101,12 +137,46 @@ python-versions = ">=3.5" [[package]] name = "executing" -version = "0.8.3" +version = "0.10.0" description = "Get the currently executing AST node of a frame, and other information" category = "main" optional = false python-versions = "*" +[[package]] +name = "faker" +version = "14.1.0" +description = "Faker is a Python package that generates fake data for you." +category = "main" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +python-dateutil = ">=2.4" + +[[package]] +name = "fiona" +version = "1.8.21" +description = "Fiona reads and writes spatial data files" +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +attrs = ">=17" +certifi = "*" +click = ">=4.0" +click-plugins = ">=1.0" +cligj = ">=0.5" +munch = "*" +six = ">=1.7" + +[package.extras] +all = ["boto3 (>=1.2.4)", "pytest-cov", "shapely", "pytest (>=3)", "mock"] +calc = ["shapely"] +s3 = ["boto3 (>=1.2.4)"] +test = ["pytest (>=3)", "pytest-cov", "boto3 (>=1.2.4)", "mock"] + [[package]] name = "flake8" version = "4.0.1" @@ -120,6 +190,32 @@ mccabe = ">=0.6.0,<0.7.0" pycodestyle = ">=2.8.0,<2.9.0" pyflakes = ">=2.4.0,<2.5.0" +[[package]] +name = "geopandas" +version = "0.11.1" +description = "Geographic pandas extensions" +category = "main" +optional = false +python-versions = ">=3.8" + +[package.dependencies] +fiona = ">=1.8" +packaging = "*" +pandas = ">=1.0.0" +pyproj = ">=2.6.1.post1" +shapely = ">=1.7,<2" + +[[package]] +name = "greenlet" +version = "1.1.2" +description = "Lightweight in-process concurrent programming" +category = "main" +optional = false +python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*" + +[package.extras] +docs = ["sphinx"] + [[package]] name = "iniconfig" version = "1.1.1" @@ -130,7 +226,7 @@ python-versions = "*" [[package]] name = "ipython" -version = "8.2.0" +version = "8.4.0" description = "IPython: Productive Interactive Computing" category = "main" optional = false @@ -163,6 +259,20 @@ qtconsole = ["qtconsole"] test = ["pytest (<7.1)", "pytest-asyncio", "testpath"] test_extra = ["pytest (<7.1)", "pytest-asyncio", "testpath", "curio", "matplotlib (!=3.2.0)", "nbformat", "numpy (>=1.19)", "pandas", "trio"] +[[package]] +name = "isort" +version = "5.10.1" +description = "A Python utility / library to sort Python imports." +category = "dev" +optional = false +python-versions = ">=3.6.1,<4.0" + +[package.extras] +pipfile_deprecated_finder = ["pipreqs", "requirementslib"] +requirements_deprecated_finder = ["pipreqs", "pip-api"] +colors = ["colorama (>=0.4.3,<0.5.0)"] +plugins = ["setuptools"] + [[package]] name = "jedi" version = "0.18.1" @@ -197,6 +307,21 @@ category = "dev" optional = false python-versions = "*" +[[package]] +name = "munch" +version = "2.5.0" +description = "A dot-accessible dictionary (a la JavaScript objects)" +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +six = "*" + +[package.extras] +testing = ["pytest", "coverage", "astroid (>=1.5.3,<1.6.0)", "pylint (>=1.7.2,<1.8.0)", "astroid (>=2.0)", "pylint (>=2.3.1,<2.4.0)"] +yaml = ["PyYAML (>=5.1.0)"] + [[package]] name = "mypy-extensions" version = "0.4.3" @@ -207,7 +332,7 @@ python-versions = "*" [[package]] name = "numpy" -version = "1.22.3" +version = "1.23.2" description = "NumPy is the fundamental package for array computing with Python." category = "main" optional = false @@ -217,7 +342,7 @@ python-versions = ">=3.8" name = "packaging" version = "21.3" description = "Core utilities for Python packages" -category = "dev" +category = "main" optional = false python-versions = ">=3.6" @@ -226,7 +351,7 @@ pyparsing = ">=2.0.2,<3.0.5 || >3.0.5" [[package]] name = "pandas" -version = "1.4.2" +version = "1.4.3" description = "Powerful data structures for data analysis, time series, and statistics" category = "main" optional = false @@ -310,7 +435,7 @@ testing = ["pytest", "pytest-benchmark"] [[package]] name = "prompt-toolkit" -version = "3.0.29" +version = "3.0.30" description = "Library for building powerful interactive command lines in Python" category = "main" optional = false @@ -346,6 +471,17 @@ category = "dev" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +[[package]] +name = "pyarrow" +version = "8.0.0" +description = "Python library for Apache Arrow" +category = "main" +optional = false +python-versions = ">=3.7" + +[package.dependencies] +numpy = ">=1.16.6" + [[package]] name = "pycodestyle" version = "2.8.0" @@ -356,7 +492,7 @@ python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" [[package]] name = "pydantic" -version = "1.9.1" +version = "1.9.2" description = "Data validation and settings management using python type hints" category = "main" optional = false @@ -387,15 +523,26 @@ python-versions = ">=3.6" [[package]] name = "pyparsing" -version = "3.0.8" +version = "3.0.9" description = "pyparsing module - Classes and methods to define and execute parsing grammars" -category = "dev" +category = "main" optional = false python-versions = ">=3.6.8" [package.extras] diagrams = ["railroad-diagrams", "jinja2"] +[[package]] +name = "pyproj" +version = "3.3.1" +description = "Python interface to PROJ (cartographic projections and coordinate transformations library)" +category = "main" +optional = false +python-versions = ">=3.8" + +[package.dependencies] +certifi = "*" + [[package]] name = "pytest" version = "7.1.2" @@ -430,12 +577,25 @@ six = ">=1.5" [[package]] name = "pytz" -version = "2022.1" +version = "2022.2.1" description = "World timezone definitions, modern and historical" category = "main" optional = false python-versions = "*" +[[package]] +name = "shapely" +version = "1.8.2" +description = "Geometric objects, predicates, and operations" +category = "main" +optional = false +python-versions = ">=3.6" + +[package.extras] +all = ["pytest", "pytest-cov", "numpy"] +test = ["pytest", "pytest-cov"] +vectorized = ["numpy"] + [[package]] name = "six" version = "1.16.0" @@ -444,9 +604,41 @@ category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" +[[package]] +name = "sqlalchemy" +version = "1.4.40" +description = "Database Abstraction Library" +category = "main" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" + +[package.dependencies] +greenlet = {version = "!=0.4.17", markers = "python_version >= \"3\" and (platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\")"} + +[package.extras] +aiomysql = ["greenlet (!=0.4.17)", "aiomysql"] +aiosqlite = ["typing_extensions (!=3.10.0.1)", "greenlet (!=0.4.17)", "aiosqlite"] +asyncio = ["greenlet (!=0.4.17)"] +asyncmy = ["greenlet (!=0.4.17)", "asyncmy (>=0.2.3,!=0.2.4)"] +mariadb_connector = ["mariadb (>=1.0.1,!=1.1.2)"] +mssql = ["pyodbc"] +mssql_pymssql = ["pymssql"] +mssql_pyodbc = ["pyodbc"] +mypy = ["sqlalchemy2-stubs", "mypy (>=0.910)"] +mysql = ["mysqlclient (>=1.4.0,<2)", "mysqlclient (>=1.4.0)"] +mysql_connector = ["mysql-connector-python"] +oracle = ["cx_oracle (>=7,<8)", "cx_oracle (>=7)"] +postgresql = ["psycopg2 (>=2.7)"] +postgresql_asyncpg = ["greenlet (!=0.4.17)", "asyncpg"] +postgresql_pg8000 = ["pg8000 (>=1.16.6,!=1.29.0)"] +postgresql_psycopg2binary = ["psycopg2-binary"] +postgresql_psycopg2cffi = ["psycopg2cffi"] +pymysql = ["pymysql (<1)", "pymysql"] +sqlcipher = ["sqlcipher3-binary"] + [[package]] name = "stack-data" -version = "0.2.0" +version = "0.4.0" description = "Extract data from python stack frames and tracebacks for informative displays" category = "main" optional = false @@ -458,7 +650,20 @@ executing = "*" pure-eval = "*" [package.extras] -tests = ["pytest", "typeguard", "pygments", "littleutils", "cython"] +tests = ["cython", "littleutils", "pygments", "typeguard", "pytest"] + +[[package]] +name = "structlog" +version = "22.1.0" +description = "Structured Logging for Python" +category = "main" +optional = false +python-versions = ">=3.7" + +[package.extras] +dev = ["pre-commit", "rich", "cogapp", "tomli", "coverage", "freezegun (>=0.2.8)", "pretend", "pytest-asyncio (>=0.17)", "pytest (>=6.0)", "simplejson", "furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-mermaid", "twisted"] +docs = ["furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-mermaid", "twisted"] +tests = ["coverage", "freezegun (>=0.2.8)", "pretend", "pytest-asyncio (>=0.17)", "pytest (>=6.0)", "simplejson"] [[package]] name = "tomli" @@ -470,14 +675,14 @@ python-versions = ">=3.7" [[package]] name = "traitlets" -version = "5.1.1" -description = "Traitlets Python configuration system" +version = "5.3.0" +description = "" category = "main" optional = false python-versions = ">=3.7" [package.extras] -test = ["pytest"] +test = ["pre-commit", "pytest"] [[package]] name = "typing-extensions" @@ -495,10 +700,14 @@ category = "main" optional = false python-versions = "*" +[extras] +faker = ["Faker"] +geopandas = ["geopandas"] + [metadata] lock-version = "1.1" python-versions = "^3.8" -content-hash = "8b60acafcaea9ba29848da56dcdd5a35a9e5f8b4af53a4e2cd904fd5123e7fef" +content-hash = "fdd769ce73260ffde270e61fae0e6c79d73ba83f1aa51af6b79e21f5dabebf59" [metadata.files] appnope = [ @@ -506,16 +715,15 @@ appnope = [ {file = "appnope-0.1.3.tar.gz", hash = "sha256:02bd91c4de869fbb1e1c50aafc4098827a7a54ab2f39d9dcba6c9547ed920e24"}, ] asttokens = [ - {file = "asttokens-2.0.5-py2.py3-none-any.whl", hash = "sha256:0844691e88552595a6f4a4281a9f7f79b8dd45ca4ccea82e5e05b4bbdb76705c"}, - {file = "asttokens-2.0.5.tar.gz", hash = "sha256:9a54c114f02c7a9480d56550932546a3f1fe71d8a02f1bc7ccd0ee3ee35cf4d5"}, + {file = "asttokens-2.0.8-py2.py3-none-any.whl", hash = "sha256:e3305297c744ae53ffa032c45dc347286165e4ffce6875dc662b205db0623d86"}, + {file = "asttokens-2.0.8.tar.gz", hash = "sha256:c61e16246ecfb2cde2958406b4c8ebc043c9e6d73aaa83c941673b35e5d3a76b"}, ] atomicwrites = [ - {file = "atomicwrites-1.4.0-py2.py3-none-any.whl", hash = "sha256:6d1784dea7c0c8d4a5172b6c620f40b6e4cbfdf96d783691f2e1302a7b88e197"}, - {file = "atomicwrites-1.4.0.tar.gz", hash = "sha256:ae70396ad1a434f9c7046fd2dd196fc04b12f9e91ffb859164193be8b6168a7a"}, + {file = "atomicwrites-1.4.1.tar.gz", hash = "sha256:81b2c9071a49367a7f770170e5eec8cb66567cfbbc8c73d20ce5ca4a8d71cf11"}, ] attrs = [ - {file = "attrs-21.4.0-py2.py3-none-any.whl", hash = "sha256:2d27e3784d7a565d36ab851fe94887c5eccd6a463168875832a1be79c82828b4"}, - {file = "attrs-21.4.0.tar.gz", hash = "sha256:626ba8234211db98e869df76230a137c4c40a12d72445c45d5f5b716f076e2fd"}, + {file = "attrs-22.1.0-py2.py3-none-any.whl", hash = "sha256:86efa402f67bf2df34f51a335487cf46b1ec130d02b8d39fd248abfd30da551c"}, + {file = "attrs-22.1.0.tar.gz", hash = "sha256:29adc2665447e5191d0e7c568fde78b21f9672d344281d0c6e1ab085429b22b6"}, ] backcall = [ {file = "backcall-0.2.0-py2.py3-none-any.whl", hash = "sha256:fbbce6a29f263178a1f7915c1940bde0ec2b2a967566fe1c65c1dfb7422bd255"}, @@ -546,33 +754,124 @@ black = [ {file = "black-22.6.0-py3-none-any.whl", hash = "sha256:ac609cf8ef5e7115ddd07d85d988d074ed00e10fbc3445aee393e70164a2219c"}, {file = "black-22.6.0.tar.gz", hash = "sha256:6c6d39e28aed379aec40da1c65434c77d75e65bb59a1e1c283de545fb4e7c6c9"}, ] +certifi = [ + {file = "certifi-2022.6.15-py3-none-any.whl", hash = "sha256:fe86415d55e84719d75f8b69414f6438ac3547d2078ab91b67e779ef69378412"}, + {file = "certifi-2022.6.15.tar.gz", hash = "sha256:84c85a9078b11105f04f3036a9482ae10e4621616db313fe045dd24743a0820d"}, +] click = [ {file = "click-8.1.3-py3-none-any.whl", hash = "sha256:bb4d8133cb15a609f44e8213d9b391b0809795062913b383c62be0ee95b1db48"}, {file = "click-8.1.3.tar.gz", hash = "sha256:7682dc8afb30297001674575ea00d1814d808d6a36af415a82bd481d37ba7b8e"}, ] +click-plugins = [ + {file = "click-plugins-1.1.1.tar.gz", hash = "sha256:46ab999744a9d831159c3411bb0c79346d94a444df9a3a3742e9ed63645f264b"}, + {file = "click_plugins-1.1.1-py2.py3-none-any.whl", hash = "sha256:5d262006d3222f5057fd81e1623d4443e41dcda5dc815c06b442aa3c02889fc8"}, +] +cligj = [ + {file = "cligj-0.7.2-py3-none-any.whl", hash = "sha256:c1ca117dbce1fe20a5809dc96f01e1c2840f6dcc939b3ddbb1111bf330ba82df"}, + {file = "cligj-0.7.2.tar.gz", hash = "sha256:a4bc13d623356b373c2c27c53dbd9c68cae5d526270bfa71f6c6fa69669c6b27"}, +] colorama = [ - {file = "colorama-0.4.4-py2.py3-none-any.whl", hash = "sha256:9f47eda37229f68eee03b24b9748937c7dc3868f906e8ba69fbcbdd3bc5dc3e2"}, - {file = "colorama-0.4.4.tar.gz", hash = "sha256:5941b2b48a20143d2267e95b1c2a7603ce057ee39fd88e7329b0c292aa16869b"}, + {file = "colorama-0.4.5-py2.py3-none-any.whl", hash = "sha256:854bf444933e37f5824ae7bfc1e98d5bce2ebe4160d46b5edf346a89358e99da"}, + {file = "colorama-0.4.5.tar.gz", hash = "sha256:e6c6b4334fc50988a639d9b98aa429a0b57da6e17b9a44f0451f930b6967b7a4"}, ] decorator = [ {file = "decorator-5.1.1-py3-none-any.whl", hash = "sha256:b8c3f85900b9dc423225913c5aace94729fe1fa9763b38939a95226f02d37186"}, {file = "decorator-5.1.1.tar.gz", hash = "sha256:637996211036b6385ef91435e4fae22989472f9d571faba8927ba8253acbc330"}, ] executing = [ - {file = "executing-0.8.3-py2.py3-none-any.whl", hash = "sha256:d1eef132db1b83649a3905ca6dd8897f71ac6f8cac79a7e58a1a09cf137546c9"}, - {file = "executing-0.8.3.tar.gz", hash = "sha256:c6554e21c6b060590a6d3be4b82fb78f8f0194d809de5ea7df1c093763311501"}, + {file = "executing-0.10.0-py2.py3-none-any.whl", hash = "sha256:9c745f80cda11eb22b62cbecf21156491a794eb56ab06f9d286a44e62822b24e"}, + {file = "executing-0.10.0.tar.gz", hash = "sha256:d1cd87c2e371e9966261410c5b3769d6df2f9e4a79a83eebd2662dd3388f9833"}, +] +faker = [] +fiona = [ + {file = "Fiona-1.8.21-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:39c656421e25b4d0d73d0b6acdcbf9848e71f3d9b74f44c27d2d516d463409ae"}, + {file = "Fiona-1.8.21-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:43b1d2e45506e56cf3a9f59ba5d6f7981f3f75f4725d1e6cb9a33ba856371ebd"}, + {file = "Fiona-1.8.21-cp36-cp36m-macosx_10_10_x86_64.whl", hash = "sha256:315e186cb880a8128e110312eb92f5956bbc54d7152af999d3483b463758d6f9"}, + {file = "Fiona-1.8.21-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9fb2407623c4f44732a33b3f056f8c58c54152b51f0324bf8f10945e711eb549"}, + {file = "Fiona-1.8.21-cp37-cp37m-macosx_10_10_x86_64.whl", hash = "sha256:b69054ed810eb7339d7effa88589afca48003206d7627d0b0b149715fc3fde41"}, + {file = "Fiona-1.8.21-cp37-cp37m-manylinux2014_x86_64.whl", hash = "sha256:11532ccfda1073d3f5f558e4bb78d45b268e8680fd6e14993a394c564ddbd069"}, + {file = "Fiona-1.8.21-cp38-cp38-macosx_10_10_x86_64.whl", hash = "sha256:3789523c811809a6e2e170cf9c437631f959f4c7a868f024081612d30afab468"}, + {file = "Fiona-1.8.21-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:085f18d943097ac3396f3f9664ac1ae04ad0ff272f54829f03442187f01b6116"}, + {file = "Fiona-1.8.21-cp39-cp39-macosx_10_10_x86_64.whl", hash = "sha256:388acc9fa07ba7858d508dfe826d4b04d813818bced16c4049de19cc7ca322ef"}, + {file = "Fiona-1.8.21-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:40b4eaf5b88407421d6c9e707520abd2ff16d7cd43efb59cd398aa41d2de332c"}, + {file = "Fiona-1.8.21.tar.gz", hash = "sha256:3a0edca2a7a070db405d71187214a43d2333a57b4097544a3fcc282066a58bfc"}, ] flake8 = [ {file = "flake8-4.0.1-py2.py3-none-any.whl", hash = "sha256:479b1304f72536a55948cb40a32dce8bb0ffe3501e26eaf292c7e60eb5e0428d"}, {file = "flake8-4.0.1.tar.gz", hash = "sha256:806e034dda44114815e23c16ef92f95c91e4c71100ff52813adf7132a6ad870d"}, ] +geopandas = [ + {file = "geopandas-0.11.1-py3-none-any.whl", hash = "sha256:f3344937f3866e52996c7e505d56dae78be117dc840cd1c23507da0b33c0af71"}, + {file = "geopandas-0.11.1.tar.gz", hash = "sha256:f0f0c8d0423d30cf81de2056d853145c4362739350a7f8f2d72cc7409ef1eca1"}, +] +greenlet = [ + {file = "greenlet-1.1.2-cp27-cp27m-macosx_10_14_x86_64.whl", hash = "sha256:58df5c2a0e293bf665a51f8a100d3e9956febfbf1d9aaf8c0677cf70218910c6"}, + {file = "greenlet-1.1.2-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:aec52725173bd3a7b56fe91bc56eccb26fbdff1386ef123abb63c84c5b43b63a"}, + {file = "greenlet-1.1.2-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:833e1551925ed51e6b44c800e71e77dacd7e49181fdc9ac9a0bf3714d515785d"}, + {file = "greenlet-1.1.2-cp27-cp27m-win32.whl", hash = "sha256:aa5b467f15e78b82257319aebc78dd2915e4c1436c3c0d1ad6f53e47ba6e2713"}, + {file = "greenlet-1.1.2-cp27-cp27m-win_amd64.whl", hash = "sha256:40b951f601af999a8bf2ce8c71e8aaa4e8c6f78ff8afae7b808aae2dc50d4c40"}, + {file = "greenlet-1.1.2-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:95e69877983ea39b7303570fa6760f81a3eec23d0e3ab2021b7144b94d06202d"}, + {file = "greenlet-1.1.2-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:356b3576ad078c89a6107caa9c50cc14e98e3a6c4874a37c3e0273e4baf33de8"}, + {file = "greenlet-1.1.2-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:8639cadfda96737427330a094476d4c7a56ac03de7265622fcf4cfe57c8ae18d"}, + {file = "greenlet-1.1.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:97e5306482182170ade15c4b0d8386ded995a07d7cc2ca8f27958d34d6736497"}, + {file = "greenlet-1.1.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e6a36bb9474218c7a5b27ae476035497a6990e21d04c279884eb10d9b290f1b1"}, + {file = "greenlet-1.1.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:abb7a75ed8b968f3061327c433a0fbd17b729947b400747c334a9c29a9af6c58"}, + {file = "greenlet-1.1.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b336501a05e13b616ef81ce329c0e09ac5ed8c732d9ba7e3e983fcc1a9e86965"}, + {file = "greenlet-1.1.2-cp310-cp310-win_amd64.whl", hash = "sha256:14d4f3cd4e8b524ae9b8aa567858beed70c392fdec26dbdb0a8a418392e71708"}, + {file = "greenlet-1.1.2-cp35-cp35m-macosx_10_14_x86_64.whl", hash = "sha256:17ff94e7a83aa8671a25bf5b59326ec26da379ace2ebc4411d690d80a7fbcf23"}, + {file = "greenlet-1.1.2-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:9f3cba480d3deb69f6ee2c1825060177a22c7826431458c697df88e6aeb3caee"}, + {file = "greenlet-1.1.2-cp35-cp35m-manylinux2010_x86_64.whl", hash = "sha256:fa877ca7f6b48054f847b61d6fa7bed5cebb663ebc55e018fda12db09dcc664c"}, + {file = "greenlet-1.1.2-cp35-cp35m-win32.whl", hash = "sha256:7cbd7574ce8e138bda9df4efc6bf2ab8572c9aff640d8ecfece1b006b68da963"}, + {file = "greenlet-1.1.2-cp35-cp35m-win_amd64.whl", hash = "sha256:903bbd302a2378f984aef528f76d4c9b1748f318fe1294961c072bdc7f2ffa3e"}, + {file = "greenlet-1.1.2-cp36-cp36m-macosx_10_14_x86_64.whl", hash = "sha256:049fe7579230e44daef03a259faa24511d10ebfa44f69411d99e6a184fe68073"}, + {file = "greenlet-1.1.2-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:dd0b1e9e891f69e7675ba5c92e28b90eaa045f6ab134ffe70b52e948aa175b3c"}, + {file = "greenlet-1.1.2-cp36-cp36m-manylinux2010_x86_64.whl", hash = "sha256:7418b6bfc7fe3331541b84bb2141c9baf1ec7132a7ecd9f375912eca810e714e"}, + {file = "greenlet-1.1.2-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f9d29ca8a77117315101425ec7ec2a47a22ccf59f5593378fc4077ac5b754fce"}, + {file = "greenlet-1.1.2-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:21915eb821a6b3d9d8eefdaf57d6c345b970ad722f856cd71739493ce003ad08"}, + {file = "greenlet-1.1.2-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eff9d20417ff9dcb0d25e2defc2574d10b491bf2e693b4e491914738b7908168"}, + {file = "greenlet-1.1.2-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:b8c008de9d0daba7b6666aa5bbfdc23dcd78cafc33997c9b7741ff6353bafb7f"}, + {file = "greenlet-1.1.2-cp36-cp36m-win32.whl", hash = "sha256:32ca72bbc673adbcfecb935bb3fb1b74e663d10a4b241aaa2f5a75fe1d1f90aa"}, + {file = "greenlet-1.1.2-cp36-cp36m-win_amd64.whl", hash = "sha256:f0214eb2a23b85528310dad848ad2ac58e735612929c8072f6093f3585fd342d"}, + {file = "greenlet-1.1.2-cp37-cp37m-macosx_10_14_x86_64.whl", hash = "sha256:b92e29e58bef6d9cfd340c72b04d74c4b4e9f70c9fa7c78b674d1fec18896dc4"}, + {file = "greenlet-1.1.2-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:fdcec0b8399108577ec290f55551d926d9a1fa6cad45882093a7a07ac5ec147b"}, + {file = "greenlet-1.1.2-cp37-cp37m-manylinux2010_x86_64.whl", hash = "sha256:93f81b134a165cc17123626ab8da2e30c0455441d4ab5576eed73a64c025b25c"}, + {file = "greenlet-1.1.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1e12bdc622676ce47ae9abbf455c189e442afdde8818d9da983085df6312e7a1"}, + {file = "greenlet-1.1.2-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8c790abda465726cfb8bb08bd4ca9a5d0a7bd77c7ac1ca1b839ad823b948ea28"}, + {file = "greenlet-1.1.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f276df9830dba7a333544bd41070e8175762a7ac20350786b322b714b0e654f5"}, + {file = "greenlet-1.1.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:8c5d5b35f789a030ebb95bff352f1d27a93d81069f2adb3182d99882e095cefe"}, + {file = "greenlet-1.1.2-cp37-cp37m-win32.whl", hash = "sha256:64e6175c2e53195278d7388c454e0b30997573f3f4bd63697f88d855f7a6a1fc"}, + {file = "greenlet-1.1.2-cp37-cp37m-win_amd64.whl", hash = "sha256:b11548073a2213d950c3f671aa88e6f83cda6e2fb97a8b6317b1b5b33d850e06"}, + {file = "greenlet-1.1.2-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:9633b3034d3d901f0a46b7939f8c4d64427dfba6bbc5a36b1a67364cf148a1b0"}, + {file = "greenlet-1.1.2-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:eb6ea6da4c787111adf40f697b4e58732ee0942b5d3bd8f435277643329ba627"}, + {file = "greenlet-1.1.2-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:f3acda1924472472ddd60c29e5b9db0cec629fbe3c5c5accb74d6d6d14773478"}, + {file = "greenlet-1.1.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e859fcb4cbe93504ea18008d1df98dee4f7766db66c435e4882ab35cf70cac43"}, + {file = "greenlet-1.1.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:00e44c8afdbe5467e4f7b5851be223be68adb4272f44696ee71fe46b7036a711"}, + {file = "greenlet-1.1.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ec8c433b3ab0419100bd45b47c9c8551248a5aee30ca5e9d399a0b57ac04651b"}, + {file = "greenlet-1.1.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:2bde6792f313f4e918caabc46532aa64aa27a0db05d75b20edfc5c6f46479de2"}, + {file = "greenlet-1.1.2-cp38-cp38-win32.whl", hash = "sha256:288c6a76705dc54fba69fbcb59904ae4ad768b4c768839b8ca5fdadec6dd8cfd"}, + {file = "greenlet-1.1.2-cp38-cp38-win_amd64.whl", hash = "sha256:8d2f1fb53a421b410751887eb4ff21386d119ef9cde3797bf5e7ed49fb51a3b3"}, + {file = "greenlet-1.1.2-cp39-cp39-macosx_10_14_x86_64.whl", hash = "sha256:166eac03e48784a6a6e0e5f041cfebb1ab400b394db188c48b3a84737f505b67"}, + {file = "greenlet-1.1.2-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:572e1787d1460da79590bf44304abbc0a2da944ea64ec549188fa84d89bba7ab"}, + {file = "greenlet-1.1.2-cp39-cp39-manylinux2010_x86_64.whl", hash = "sha256:be5f425ff1f5f4b3c1e33ad64ab994eed12fc284a6ea71c5243fd564502ecbe5"}, + {file = "greenlet-1.1.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b1692f7d6bc45e3200844be0dba153612103db241691088626a33ff1f24a0d88"}, + {file = "greenlet-1.1.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7227b47e73dedaa513cdebb98469705ef0d66eb5a1250144468e9c3097d6b59b"}, + {file = "greenlet-1.1.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7ff61ff178250f9bb3cd89752df0f1dd0e27316a8bd1465351652b1b4a4cdfd3"}, + {file = "greenlet-1.1.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:0051c6f1f27cb756ffc0ffbac7d2cd48cb0362ac1736871399a739b2885134d3"}, + {file = "greenlet-1.1.2-cp39-cp39-win32.whl", hash = "sha256:f70a9e237bb792c7cc7e44c531fd48f5897961701cdaa06cf22fc14965c496cf"}, + {file = "greenlet-1.1.2-cp39-cp39-win_amd64.whl", hash = "sha256:013d61294b6cd8fe3242932c1c5e36e5d1db2c8afb58606c5a67efce62c1f5fd"}, + {file = "greenlet-1.1.2.tar.gz", hash = "sha256:e30f5ea4ae2346e62cedde8794a56858a67b878dd79f7df76a0767e356b1744a"}, +] iniconfig = [ {file = "iniconfig-1.1.1-py2.py3-none-any.whl", hash = "sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3"}, {file = "iniconfig-1.1.1.tar.gz", hash = "sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32"}, ] ipython = [ - {file = "ipython-8.2.0-py3-none-any.whl", hash = "sha256:1b672bfd7a48d87ab203d9af8727a3b0174a4566b4091e9447c22fb63ea32857"}, - {file = "ipython-8.2.0.tar.gz", hash = "sha256:70e5eb132cac594a34b5f799bd252589009905f05104728aea6a403ec2519dc1"}, + {file = "ipython-8.4.0-py3-none-any.whl", hash = "sha256:7ca74052a38fa25fe9bedf52da0be7d3fdd2fb027c3b778ea78dfe8c212937d1"}, + {file = "ipython-8.4.0.tar.gz", hash = "sha256:f2db3a10254241d9b447232cec8b424847f338d9d36f9a577a6192c332a46abd"}, +] +isort = [ + {file = "isort-5.10.1-py3-none-any.whl", hash = "sha256:6f62d78e2f89b4500b080fe3a81690850cd254227f27f75c3a0c491a1f351ba7"}, + {file = "isort-5.10.1.tar.gz", hash = "sha256:e8443a5e7a020e9d7f97f1d7d9cd17c88bcb3bc7e218bf9cf5095fe550be2951"}, ] jedi = [ {file = "jedi-0.18.1-py2.py3-none-any.whl", hash = "sha256:637c9635fcf47945ceb91cd7f320234a7be540ded6f3e99a50cb6febdfd1ba8d"}, @@ -586,58 +885,70 @@ mccabe = [ {file = "mccabe-0.6.1-py2.py3-none-any.whl", hash = "sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42"}, {file = "mccabe-0.6.1.tar.gz", hash = "sha256:dd8d182285a0fe56bace7f45b5e7d1a6ebcbf524e8f3bd87eb0f125271b8831f"}, ] +munch = [ + {file = "munch-2.5.0-py2.py3-none-any.whl", hash = "sha256:6f44af89a2ce4ed04ff8de41f70b226b984db10a91dcc7b9ac2efc1c77022fdd"}, + {file = "munch-2.5.0.tar.gz", hash = "sha256:2d735f6f24d4dba3417fa448cae40c6e896ec1fdab6cdb5e6510999758a4dbd2"}, +] mypy-extensions = [ {file = "mypy_extensions-0.4.3-py2.py3-none-any.whl", hash = "sha256:090fedd75945a69ae91ce1303b5824f428daf5a028d2f6ab8a299250a846f15d"}, {file = "mypy_extensions-0.4.3.tar.gz", hash = "sha256:2d82818f5bb3e369420cb3c4060a7970edba416647068eb4c5343488a6c604a8"}, ] numpy = [ - {file = "numpy-1.22.3-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:92bfa69cfbdf7dfc3040978ad09a48091143cffb778ec3b03fa170c494118d75"}, - {file = "numpy-1.22.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8251ed96f38b47b4295b1ae51631de7ffa8260b5b087808ef09a39a9d66c97ab"}, - {file = "numpy-1.22.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:48a3aecd3b997bf452a2dedb11f4e79bc5bfd21a1d4cc760e703c31d57c84b3e"}, - {file = "numpy-1.22.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a3bae1a2ed00e90b3ba5f7bd0a7c7999b55d609e0c54ceb2b076a25e345fa9f4"}, - {file = "numpy-1.22.3-cp310-cp310-win32.whl", hash = "sha256:f950f8845b480cffe522913d35567e29dd381b0dc7e4ce6a4a9f9156417d2430"}, - {file = "numpy-1.22.3-cp310-cp310-win_amd64.whl", hash = "sha256:08d9b008d0156c70dc392bb3ab3abb6e7a711383c3247b410b39962263576cd4"}, - {file = "numpy-1.22.3-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:201b4d0552831f7250a08d3b38de0d989d6f6e4658b709a02a73c524ccc6ffce"}, - {file = "numpy-1.22.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:f8c1f39caad2c896bc0018f699882b345b2a63708008be29b1f355ebf6f933fe"}, - {file = "numpy-1.22.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:568dfd16224abddafb1cbcce2ff14f522abe037268514dd7e42c6776a1c3f8e5"}, - {file = "numpy-1.22.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ca688e1b9b95d80250bca34b11a05e389b1420d00e87a0d12dc45f131f704a1"}, - {file = "numpy-1.22.3-cp38-cp38-win32.whl", hash = "sha256:e7927a589df200c5e23c57970bafbd0cd322459aa7b1ff73b7c2e84d6e3eae62"}, - {file = "numpy-1.22.3-cp38-cp38-win_amd64.whl", hash = "sha256:07a8c89a04997625236c5ecb7afe35a02af3896c8aa01890a849913a2309c676"}, - {file = "numpy-1.22.3-cp39-cp39-macosx_10_14_x86_64.whl", hash = "sha256:2c10a93606e0b4b95c9b04b77dc349b398fdfbda382d2a39ba5a822f669a0123"}, - {file = "numpy-1.22.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:fade0d4f4d292b6f39951b6836d7a3c7ef5b2347f3c420cd9820a1d90d794802"}, - {file = "numpy-1.22.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5bfb1bb598e8229c2d5d48db1860bcf4311337864ea3efdbe1171fb0c5da515d"}, - {file = "numpy-1.22.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:97098b95aa4e418529099c26558eeb8486e66bd1e53a6b606d684d0c3616b168"}, - {file = "numpy-1.22.3-cp39-cp39-win32.whl", hash = "sha256:fdf3c08bce27132395d3c3ba1503cac12e17282358cb4bddc25cc46b0aca07aa"}, - {file = "numpy-1.22.3-cp39-cp39-win_amd64.whl", hash = "sha256:639b54cdf6aa4f82fe37ebf70401bbb74b8508fddcf4797f9fe59615b8c5813a"}, - {file = "numpy-1.22.3-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c34ea7e9d13a70bf2ab64a2532fe149a9aced424cd05a2c4ba662fd989e3e45f"}, - {file = "numpy-1.22.3.zip", hash = "sha256:dbc7601a3b7472d559dc7b933b18b4b66f9aa7452c120e87dfb33d02008c8a18"}, + {file = "numpy-1.23.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e603ca1fb47b913942f3e660a15e55a9ebca906857edfea476ae5f0fe9b457d5"}, + {file = "numpy-1.23.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:633679a472934b1c20a12ed0c9a6c9eb167fbb4cb89031939bfd03dd9dbc62b8"}, + {file = "numpy-1.23.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:17e5226674f6ea79e14e3b91bfbc153fdf3ac13f5cc54ee7bc8fdbe820a32da0"}, + {file = "numpy-1.23.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bdc02c0235b261925102b1bd586579b7158e9d0d07ecb61148a1799214a4afd5"}, + {file = "numpy-1.23.2-cp310-cp310-win32.whl", hash = "sha256:df28dda02c9328e122661f399f7655cdcbcf22ea42daa3650a26bce08a187450"}, + {file = "numpy-1.23.2-cp310-cp310-win_amd64.whl", hash = "sha256:8ebf7e194b89bc66b78475bd3624d92980fca4e5bb86dda08d677d786fefc414"}, + {file = "numpy-1.23.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:dc76bca1ca98f4b122114435f83f1fcf3c0fe48e4e6f660e07996abf2f53903c"}, + {file = "numpy-1.23.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ecfdd68d334a6b97472ed032b5b37a30d8217c097acfff15e8452c710e775524"}, + {file = "numpy-1.23.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5593f67e66dea4e237f5af998d31a43e447786b2154ba1ad833676c788f37cde"}, + {file = "numpy-1.23.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ac987b35df8c2a2eab495ee206658117e9ce867acf3ccb376a19e83070e69418"}, + {file = "numpy-1.23.2-cp311-cp311-win32.whl", hash = "sha256:d98addfd3c8728ee8b2c49126f3c44c703e2b005d4a95998e2167af176a9e722"}, + {file = "numpy-1.23.2-cp311-cp311-win_amd64.whl", hash = "sha256:8ecb818231afe5f0f568c81f12ce50f2b828ff2b27487520d85eb44c71313b9e"}, + {file = "numpy-1.23.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:909c56c4d4341ec8315291a105169d8aae732cfb4c250fbc375a1efb7a844f8f"}, + {file = "numpy-1.23.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:8247f01c4721479e482cc2f9f7d973f3f47810cbc8c65e38fd1bbd3141cc9842"}, + {file = "numpy-1.23.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b8b97a8a87cadcd3f94659b4ef6ec056261fa1e1c3317f4193ac231d4df70215"}, + {file = "numpy-1.23.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bd5b7ccae24e3d8501ee5563e82febc1771e73bd268eef82a1e8d2b4d556ae66"}, + {file = "numpy-1.23.2-cp38-cp38-win32.whl", hash = "sha256:9b83d48e464f393d46e8dd8171687394d39bc5abfe2978896b77dc2604e8635d"}, + {file = "numpy-1.23.2-cp38-cp38-win_amd64.whl", hash = "sha256:dec198619b7dbd6db58603cd256e092bcadef22a796f778bf87f8592b468441d"}, + {file = "numpy-1.23.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:4f41f5bf20d9a521f8cab3a34557cd77b6f205ab2116651f12959714494268b0"}, + {file = "numpy-1.23.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:806cc25d5c43e240db709875e947076b2826f47c2c340a5a2f36da5bb10c58d6"}, + {file = "numpy-1.23.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f9d84a24889ebb4c641a9b99e54adb8cab50972f0166a3abc14c3b93163f074"}, + {file = "numpy-1.23.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c403c81bb8ffb1c993d0165a11493fd4bf1353d258f6997b3ee288b0a48fce77"}, + {file = "numpy-1.23.2-cp39-cp39-win32.whl", hash = "sha256:cf8c6aed12a935abf2e290860af8e77b26a042eb7f2582ff83dc7ed5f963340c"}, + {file = "numpy-1.23.2-cp39-cp39-win_amd64.whl", hash = "sha256:5e28cd64624dc2354a349152599e55308eb6ca95a13ce6a7d5679ebff2962913"}, + {file = "numpy-1.23.2-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:806970e69106556d1dd200e26647e9bee5e2b3f1814f9da104a943e8d548ca38"}, + {file = "numpy-1.23.2-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2bd879d3ca4b6f39b7770829f73278b7c5e248c91d538aab1e506c628353e47f"}, + {file = "numpy-1.23.2-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:be6b350dfbc7f708d9d853663772a9310783ea58f6035eec649fb9c4371b5389"}, + {file = "numpy-1.23.2.tar.gz", hash = "sha256:b78d00e48261fbbd04aa0d7427cf78d18401ee0abd89c7559bbf422e5b1c7d01"}, ] packaging = [ {file = "packaging-21.3-py3-none-any.whl", hash = "sha256:ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522"}, {file = "packaging-21.3.tar.gz", hash = "sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb"}, ] pandas = [ - {file = "pandas-1.4.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:be67c782c4f1b1f24c2f16a157e12c2693fd510f8df18e3287c77f33d124ed07"}, - {file = "pandas-1.4.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5a206afa84ed20e07603f50d22b5f0db3fb556486d8c2462d8bc364831a4b417"}, - {file = "pandas-1.4.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0010771bd9223f7afe5f051eb47c4a49534345dfa144f2f5470b27189a4dd3b5"}, - {file = "pandas-1.4.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3228198333dd13c90b6434ddf61aa6d57deaca98cf7b654f4ad68a2db84f8cfe"}, - {file = "pandas-1.4.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5b79af3a69e5175c6fa7b4e046b21a646c8b74e92c6581a9d825687d92071b51"}, - {file = "pandas-1.4.2-cp310-cp310-win_amd64.whl", hash = "sha256:5586cc95692564b441f4747c47c8a9746792e87b40a4680a2feb7794defb1ce3"}, - {file = "pandas-1.4.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:061609334a8182ab500a90fe66d46f6f387de62d3a9cb9aa7e62e3146c712167"}, - {file = "pandas-1.4.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:b8134651258bce418cb79c71adeff0a44090c98d955f6953168ba16cc285d9f7"}, - {file = "pandas-1.4.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:df82739e00bb6daf4bba4479a40f38c718b598a84654cbd8bb498fd6b0aa8c16"}, - {file = "pandas-1.4.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:385c52e85aaa8ea6a4c600a9b2821181a51f8be0aee3af6f2dcb41dafc4fc1d0"}, - {file = "pandas-1.4.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:295872bf1a09758aba199992c3ecde455f01caf32266d50abc1a073e828a7b9d"}, - {file = "pandas-1.4.2-cp38-cp38-win32.whl", hash = "sha256:95c1e422ced0199cf4a34385ff124b69412c4bc912011ce895582bee620dfcaa"}, - {file = "pandas-1.4.2-cp38-cp38-win_amd64.whl", hash = "sha256:5c54ea4ef3823108cd4ec7fb27ccba4c3a775e0f83e39c5e17f5094cb17748bc"}, - {file = "pandas-1.4.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:c072c7f06b9242c855ed8021ff970c0e8f8b10b35e2640c657d2a541c5950f59"}, - {file = "pandas-1.4.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:f549097993744ff8c41b5e8f2f0d3cbfaabe89b4ae32c8c08ead6cc535b80139"}, - {file = "pandas-1.4.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ff08a14ef21d94cdf18eef7c569d66f2e24e0bc89350bcd7d243dd804e3b5eb2"}, - {file = "pandas-1.4.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8c5bf555b6b0075294b73965adaafb39cf71c312e38c5935c93d78f41c19828a"}, - {file = "pandas-1.4.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:51649ef604a945f781105a6d2ecf88db7da0f4868ac5d45c51cb66081c4d9c73"}, - {file = "pandas-1.4.2-cp39-cp39-win32.whl", hash = "sha256:d0d4f13e4be7ce89d7057a786023c461dd9370040bdb5efa0a7fe76b556867a0"}, - {file = "pandas-1.4.2-cp39-cp39-win_amd64.whl", hash = "sha256:09d8be7dd9e1c4c98224c4dfe8abd60d145d934e9fc1f5f411266308ae683e6a"}, - {file = "pandas-1.4.2.tar.gz", hash = "sha256:92bc1fc585f1463ca827b45535957815b7deb218c549b7c18402c322c7549a12"}, + {file = "pandas-1.4.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:d51674ed8e2551ef7773820ef5dab9322be0828629f2cbf8d1fc31a0c4fed640"}, + {file = "pandas-1.4.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:16ad23db55efcc93fa878f7837267973b61ea85d244fc5ff0ccbcfa5638706c5"}, + {file = "pandas-1.4.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:958a0588149190c22cdebbc0797e01972950c927a11a900fe6c2296f207b1d6f"}, + {file = "pandas-1.4.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e48fbb64165cda451c06a0f9e4c7a16b534fcabd32546d531b3c240ce2844112"}, + {file = "pandas-1.4.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6f803320c9da732cc79210d7e8cc5c8019aad512589c910c66529eb1b1818230"}, + {file = "pandas-1.4.3-cp310-cp310-win_amd64.whl", hash = "sha256:2893e923472a5e090c2d5e8db83e8f907364ec048572084c7d10ef93546be6d1"}, + {file = "pandas-1.4.3-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:24ea75f47bbd5574675dae21d51779a4948715416413b30614c1e8b480909f81"}, + {file = "pandas-1.4.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:d5ebc990bd34f4ac3c73a2724c2dcc9ee7bf1ce6cf08e87bb25c6ad33507e318"}, + {file = "pandas-1.4.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:d6c0106415ff1a10c326c49bc5dd9ea8b9897a6ca0c8688eb9c30ddec49535ef"}, + {file = "pandas-1.4.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:78b00429161ccb0da252229bcda8010b445c4bf924e721265bec5a6e96a92e92"}, + {file = "pandas-1.4.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6dfbf16b1ea4f4d0ee11084d9c026340514d1d30270eaa82a9f1297b6c8ecbf0"}, + {file = "pandas-1.4.3-cp38-cp38-win32.whl", hash = "sha256:48350592665ea3cbcd07efc8c12ff12d89be09cd47231c7925e3b8afada9d50d"}, + {file = "pandas-1.4.3-cp38-cp38-win_amd64.whl", hash = "sha256:605d572126eb4ab2eadf5c59d5d69f0608df2bf7bcad5c5880a47a20a0699e3e"}, + {file = "pandas-1.4.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:a3924692160e3d847e18702bb048dc38e0e13411d2b503fecb1adf0fcf950ba4"}, + {file = "pandas-1.4.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:07238a58d7cbc8a004855ade7b75bbd22c0db4b0ffccc721556bab8a095515f6"}, + {file = "pandas-1.4.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:755679c49460bd0d2f837ab99f0a26948e68fa0718b7e42afbabd074d945bf84"}, + {file = "pandas-1.4.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:41fc406e374590a3d492325b889a2686b31e7a7780bec83db2512988550dadbf"}, + {file = "pandas-1.4.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1d9382f72a4f0e93909feece6fef5500e838ce1c355a581b3d8f259839f2ea76"}, + {file = "pandas-1.4.3-cp39-cp39-win32.whl", hash = "sha256:0daf876dba6c622154b2e6741f29e87161f844e64f84801554f879d27ba63c0d"}, + {file = "pandas-1.4.3-cp39-cp39-win_amd64.whl", hash = "sha256:721a3dd2f06ef942f83a819c0f3f6a648b2830b191a72bbe9451bcd49c3bd42e"}, + {file = "pandas-1.4.3.tar.gz", hash = "sha256:2ff7788468e75917574f080cd4681b27e1a7bf36461fe968b49a87b5a54d007c"}, ] parso = [ {file = "parso-0.8.3-py2.py3-none-any.whl", hash = "sha256:c001d4636cd3aecdaf33cbb40aebb59b094be2a74c556778ef5576c175e19e75"}, @@ -664,8 +975,8 @@ pluggy = [ {file = "pluggy-1.0.0.tar.gz", hash = "sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159"}, ] prompt-toolkit = [ - {file = "prompt_toolkit-3.0.29-py3-none-any.whl", hash = "sha256:62291dad495e665fca0bda814e342c69952086afb0f4094d0893d357e5c78752"}, - {file = "prompt_toolkit-3.0.29.tar.gz", hash = "sha256:bd640f60e8cecd74f0dc249713d433ace2ddc62b65ee07f96d358e0b152b6ea7"}, + {file = "prompt_toolkit-3.0.30-py3-none-any.whl", hash = "sha256:d8916d3f62a7b67ab353a952ce4ced6a1d2587dfe9ef8ebc30dd7c386751f289"}, + {file = "prompt_toolkit-3.0.30.tar.gz", hash = "sha256:859b283c50bde45f5f97829f77a4674d1c1fcd88539364f1b28a37805cfd89c0"}, ] ptyprocess = [ {file = "ptyprocess-0.7.0-py2.py3-none-any.whl", hash = "sha256:4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35"}, @@ -679,46 +990,78 @@ py = [ {file = "py-1.11.0-py2.py3-none-any.whl", hash = "sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378"}, {file = "py-1.11.0.tar.gz", hash = "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719"}, ] +pyarrow = [ + {file = "pyarrow-8.0.0-cp310-cp310-macosx_10_13_universal2.whl", hash = "sha256:d5ef4372559b191cafe7db8932801eee252bfc35e983304e7d60b6954576a071"}, + {file = "pyarrow-8.0.0-cp310-cp310-macosx_10_13_x86_64.whl", hash = "sha256:863be6bad6c53797129610930794a3e797cb7d41c0a30e6794a2ac0e42ce41b8"}, + {file = "pyarrow-8.0.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:69b043a3fce064ebd9fbae6abc30e885680296e5bd5e6f7353e6a87966cf2ad7"}, + {file = "pyarrow-8.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:51e58778fcb8829fca37fbfaea7f208d5ce7ea89ea133dd13d8ce745278ee6f0"}, + {file = "pyarrow-8.0.0-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:15511ce2f50343f3fd5e9f7c30e4d004da9134e9597e93e9c96c3985928cbe82"}, + {file = "pyarrow-8.0.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ea132067ec712d1b1116a841db1c95861508862b21eddbcafefbce8e4b96b867"}, + {file = "pyarrow-8.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:deb400df8f19a90b662babceb6dd12daddda6bb357c216e558b207c0770c7654"}, + {file = "pyarrow-8.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:3bd201af6e01f475f02be88cf1f6ee9856ab98c11d8bbb6f58347c58cd07be00"}, + {file = "pyarrow-8.0.0-cp37-cp37m-macosx_10_13_x86_64.whl", hash = "sha256:78a6ac39cd793582998dac88ab5c1c1dd1e6503df6672f064f33a21937ec1d8d"}, + {file = "pyarrow-8.0.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:d6f1e1040413651819074ef5b500835c6c42e6c446532a1ddef8bc5054e8dba5"}, + {file = "pyarrow-8.0.0-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:98c13b2e28a91b0fbf24b483df54a8d7814c074c2623ecef40dce1fa52f6539b"}, + {file = "pyarrow-8.0.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c9c97c8e288847e091dfbcdf8ce51160e638346f51919a9e74fe038b2e8aee62"}, + {file = "pyarrow-8.0.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:edad25522ad509e534400d6ab98cf1872d30c31bc5e947712bfd57def7af15bb"}, + {file = "pyarrow-8.0.0-cp37-cp37m-win_amd64.whl", hash = "sha256:ece333706a94c1221ced8b299042f85fd88b5db802d71be70024433ddf3aecab"}, + {file = "pyarrow-8.0.0-cp38-cp38-macosx_10_13_x86_64.whl", hash = "sha256:95c7822eb37663e073da9892f3499fe28e84f3464711a3e555e0c5463fd53a19"}, + {file = "pyarrow-8.0.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:25a5f7c7f36df520b0b7363ba9f51c3070799d4b05d587c60c0adaba57763479"}, + {file = "pyarrow-8.0.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:ce64bc1da3109ef5ab9e4c60316945a7239c798098a631358e9ab39f6e5529e9"}, + {file = "pyarrow-8.0.0-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:541e7845ce5f27a861eb5b88ee165d931943347eec17b9ff1e308663531c9647"}, + {file = "pyarrow-8.0.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8cd86e04a899bef43e25184f4b934584861d787cf7519851a8c031803d45c6d8"}, + {file = "pyarrow-8.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba2b7aa7efb59156b87987a06f5241932914e4d5bbb74a465306b00a6c808849"}, + {file = "pyarrow-8.0.0-cp38-cp38-win_amd64.whl", hash = "sha256:42b7982301a9ccd06e1dd4fabd2e8e5df74b93ce4c6b87b81eb9e2d86dc79871"}, + {file = "pyarrow-8.0.0-cp39-cp39-macosx_10_13_universal2.whl", hash = "sha256:1dd482ccb07c96188947ad94d7536ab696afde23ad172df8e18944ec79f55055"}, + {file = "pyarrow-8.0.0-cp39-cp39-macosx_10_13_x86_64.whl", hash = "sha256:81b87b782a1366279411f7b235deab07c8c016e13f9af9f7c7b0ee564fedcc8f"}, + {file = "pyarrow-8.0.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:03a10daad957970e914920b793f6a49416699e791f4c827927fd4e4d892a5d16"}, + {file = "pyarrow-8.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:65c7f4cc2be195e3db09296d31a654bb6d8786deebcab00f0e2455fd109d7456"}, + {file = "pyarrow-8.0.0-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:3fee786259d986f8c046100ced54d63b0c8c9f7cdb7d1bbe07dc69e0f928141c"}, + {file = "pyarrow-8.0.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6ea2c54e6b5ecd64e8299d2abb40770fe83a718f5ddc3825ddd5cd28e352cce1"}, + {file = "pyarrow-8.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8392b9a1e837230090fe916415ed4c3433b2ddb1a798e3f6438303c70fbabcfc"}, + {file = "pyarrow-8.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:cb06cacc19f3b426681f2f6803cc06ff481e7fe5b3a533b406bc5b2138843d4f"}, + {file = "pyarrow-8.0.0.tar.gz", hash = "sha256:4a18a211ed888f1ac0b0ebcb99e2d9a3e913a481120ee9b1fe33d3fedb945d4e"}, +] pycodestyle = [ {file = "pycodestyle-2.8.0-py2.py3-none-any.whl", hash = "sha256:720f8b39dde8b293825e7ff02c475f3077124006db4f440dcbc9a20b76548a20"}, {file = "pycodestyle-2.8.0.tar.gz", hash = "sha256:eddd5847ef438ea1c7870ca7eb78a9d47ce0cdb4851a5523949f2601d0cbbe7f"}, ] pydantic = [ - {file = "pydantic-1.9.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c8098a724c2784bf03e8070993f6d46aa2eeca031f8d8a048dff277703e6e193"}, - {file = "pydantic-1.9.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c320c64dd876e45254bdd350f0179da737463eea41c43bacbee9d8c9d1021f11"}, - {file = "pydantic-1.9.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:18f3e912f9ad1bdec27fb06b8198a2ccc32f201e24174cec1b3424dda605a310"}, - {file = "pydantic-1.9.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c11951b404e08b01b151222a1cb1a9f0a860a8153ce8334149ab9199cd198131"}, - {file = "pydantic-1.9.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:8bc541a405423ce0e51c19f637050acdbdf8feca34150e0d17f675e72d119580"}, - {file = "pydantic-1.9.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:e565a785233c2d03724c4dc55464559639b1ba9ecf091288dd47ad9c629433bd"}, - {file = "pydantic-1.9.1-cp310-cp310-win_amd64.whl", hash = "sha256:a4a88dcd6ff8fd47c18b3a3709a89adb39a6373f4482e04c1b765045c7e282fd"}, - {file = "pydantic-1.9.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:447d5521575f18e18240906beadc58551e97ec98142266e521c34968c76c8761"}, - {file = "pydantic-1.9.1-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:985ceb5d0a86fcaa61e45781e567a59baa0da292d5ed2e490d612d0de5796918"}, - {file = "pydantic-1.9.1-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:059b6c1795170809103a1538255883e1983e5b831faea6558ef873d4955b4a74"}, - {file = "pydantic-1.9.1-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:d12f96b5b64bec3f43c8e82b4aab7599d0157f11c798c9f9c528a72b9e0b339a"}, - {file = "pydantic-1.9.1-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:ae72f8098acb368d877b210ebe02ba12585e77bd0db78ac04a1ee9b9f5dd2166"}, - {file = "pydantic-1.9.1-cp36-cp36m-win_amd64.whl", hash = "sha256:79b485767c13788ee314669008d01f9ef3bc05db9ea3298f6a50d3ef596a154b"}, - {file = "pydantic-1.9.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:494f7c8537f0c02b740c229af4cb47c0d39840b829ecdcfc93d91dcbb0779892"}, - {file = "pydantic-1.9.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f0f047e11febe5c3198ed346b507e1d010330d56ad615a7e0a89fae604065a0e"}, - {file = "pydantic-1.9.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:969dd06110cb780da01336b281f53e2e7eb3a482831df441fb65dd30403f4608"}, - {file = "pydantic-1.9.1-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:177071dfc0df6248fd22b43036f936cfe2508077a72af0933d0c1fa269b18537"}, - {file = "pydantic-1.9.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:9bcf8b6e011be08fb729d110f3e22e654a50f8a826b0575c7196616780683380"}, - {file = "pydantic-1.9.1-cp37-cp37m-win_amd64.whl", hash = "sha256:a955260d47f03df08acf45689bd163ed9df82c0e0124beb4251b1290fa7ae728"}, - {file = "pydantic-1.9.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9ce157d979f742a915b75f792dbd6aa63b8eccaf46a1005ba03aa8a986bde34a"}, - {file = "pydantic-1.9.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:0bf07cab5b279859c253d26a9194a8906e6f4a210063b84b433cf90a569de0c1"}, - {file = "pydantic-1.9.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5d93d4e95eacd313d2c765ebe40d49ca9dd2ed90e5b37d0d421c597af830c195"}, - {file = "pydantic-1.9.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1542636a39c4892c4f4fa6270696902acb186a9aaeac6f6cf92ce6ae2e88564b"}, - {file = "pydantic-1.9.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:a9af62e9b5b9bc67b2a195ebc2c2662fdf498a822d62f902bf27cccb52dbbf49"}, - {file = "pydantic-1.9.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:fe4670cb32ea98ffbf5a1262f14c3e102cccd92b1869df3bb09538158ba90fe6"}, - {file = "pydantic-1.9.1-cp38-cp38-win_amd64.whl", hash = "sha256:9f659a5ee95c8baa2436d392267988fd0f43eb774e5eb8739252e5a7e9cf07e0"}, - {file = "pydantic-1.9.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b83ba3825bc91dfa989d4eed76865e71aea3a6ca1388b59fc801ee04c4d8d0d6"}, - {file = "pydantic-1.9.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:1dd8fecbad028cd89d04a46688d2fcc14423e8a196d5b0a5c65105664901f810"}, - {file = "pydantic-1.9.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:02eefd7087268b711a3ff4db528e9916ac9aa18616da7bca69c1871d0b7a091f"}, - {file = "pydantic-1.9.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7eb57ba90929bac0b6cc2af2373893d80ac559adda6933e562dcfb375029acee"}, - {file = "pydantic-1.9.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:4ce9ae9e91f46c344bec3b03d6ee9612802682c1551aaf627ad24045ce090761"}, - {file = "pydantic-1.9.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:72ccb318bf0c9ab97fc04c10c37683d9eea952ed526707fabf9ac5ae59b701fd"}, - {file = "pydantic-1.9.1-cp39-cp39-win_amd64.whl", hash = "sha256:61b6760b08b7c395975d893e0b814a11cf011ebb24f7d869e7118f5a339a82e1"}, - {file = "pydantic-1.9.1-py3-none-any.whl", hash = "sha256:4988c0f13c42bfa9ddd2fe2f569c9d54646ce84adc5de84228cfe83396f3bd58"}, - {file = "pydantic-1.9.1.tar.gz", hash = "sha256:1ed987c3ff29fff7fd8c3ea3a3ea877ad310aae2ef9889a119e22d3f2db0691a"}, + {file = "pydantic-1.9.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:9c9e04a6cdb7a363d7cb3ccf0efea51e0abb48e180c0d31dca8d247967d85c6e"}, + {file = "pydantic-1.9.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fafe841be1103f340a24977f61dee76172e4ae5f647ab9e7fd1e1fca51524f08"}, + {file = "pydantic-1.9.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:afacf6d2a41ed91fc631bade88b1d319c51ab5418870802cedb590b709c5ae3c"}, + {file = "pydantic-1.9.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3ee0d69b2a5b341fc7927e92cae7ddcfd95e624dfc4870b32a85568bd65e6131"}, + {file = "pydantic-1.9.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:ff68fc85355532ea77559ede81f35fff79a6a5543477e168ab3a381887caea76"}, + {file = "pydantic-1.9.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:c0f5e142ef8217019e3eef6ae1b6b55f09a7a15972958d44fbd228214cede567"}, + {file = "pydantic-1.9.2-cp310-cp310-win_amd64.whl", hash = "sha256:615661bfc37e82ac677543704437ff737418e4ea04bef9cf11c6d27346606044"}, + {file = "pydantic-1.9.2-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:328558c9f2eed77bd8fffad3cef39dbbe3edc7044517f4625a769d45d4cf7555"}, + {file = "pydantic-1.9.2-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2bd446bdb7755c3a94e56d7bdfd3ee92396070efa8ef3a34fab9579fe6aa1d84"}, + {file = "pydantic-1.9.2-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e0b214e57623a535936005797567231a12d0da0c29711eb3514bc2b3cd008d0f"}, + {file = "pydantic-1.9.2-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:d8ce3fb0841763a89322ea0432f1f59a2d3feae07a63ea2c958b2315e1ae8adb"}, + {file = "pydantic-1.9.2-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:b34ba24f3e2d0b39b43f0ca62008f7ba962cff51efa56e64ee25c4af6eed987b"}, + {file = "pydantic-1.9.2-cp36-cp36m-win_amd64.whl", hash = "sha256:84d76ecc908d917f4684b354a39fd885d69dd0491be175f3465fe4b59811c001"}, + {file = "pydantic-1.9.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:4de71c718c9756d679420c69f216776c2e977459f77e8f679a4a961dc7304a56"}, + {file = "pydantic-1.9.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5803ad846cdd1ed0d97eb00292b870c29c1f03732a010e66908ff48a762f20e4"}, + {file = "pydantic-1.9.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a8c5360a0297a713b4123608a7909e6869e1b56d0e96eb0d792c27585d40757f"}, + {file = "pydantic-1.9.2-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:cdb4272678db803ddf94caa4f94f8672e9a46bae4a44f167095e4d06fec12979"}, + {file = "pydantic-1.9.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:19b5686387ea0d1ea52ecc4cffb71abb21702c5e5b2ac626fd4dbaa0834aa49d"}, + {file = "pydantic-1.9.2-cp37-cp37m-win_amd64.whl", hash = "sha256:32e0b4fb13ad4db4058a7c3c80e2569adbd810c25e6ca3bbd8b2a9cc2cc871d7"}, + {file = "pydantic-1.9.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:91089b2e281713f3893cd01d8e576771cd5bfdfbff5d0ed95969f47ef6d676c3"}, + {file = "pydantic-1.9.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:e631c70c9280e3129f071635b81207cad85e6c08e253539467e4ead0e5b219aa"}, + {file = "pydantic-1.9.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4b3946f87e5cef3ba2e7bd3a4eb5a20385fe36521d6cc1ebf3c08a6697c6cfb3"}, + {file = "pydantic-1.9.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5565a49effe38d51882cb7bac18bda013cdb34d80ac336428e8908f0b72499b0"}, + {file = "pydantic-1.9.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:bd67cb2c2d9602ad159389c29e4ca964b86fa2f35c2faef54c3eb28b4efd36c8"}, + {file = "pydantic-1.9.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:4aafd4e55e8ad5bd1b19572ea2df546ccace7945853832bb99422a79c70ce9b8"}, + {file = "pydantic-1.9.2-cp38-cp38-win_amd64.whl", hash = "sha256:d70916235d478404a3fa8c997b003b5f33aeac4686ac1baa767234a0f8ac2326"}, + {file = "pydantic-1.9.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:f0ca86b525264daa5f6b192f216a0d1e860b7383e3da1c65a1908f9c02f42801"}, + {file = "pydantic-1.9.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:1061c6ee6204f4f5a27133126854948e3b3d51fcc16ead2e5d04378c199b2f44"}, + {file = "pydantic-1.9.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e78578f0c7481c850d1c969aca9a65405887003484d24f6110458fb02cca7747"}, + {file = "pydantic-1.9.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5da164119602212a3fe7e3bc08911a89db4710ae51444b4224c2382fd09ad453"}, + {file = "pydantic-1.9.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:7ead3cd020d526f75b4188e0a8d71c0dbbe1b4b6b5dc0ea775a93aca16256aeb"}, + {file = "pydantic-1.9.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:7d0f183b305629765910eaad707800d2f47c6ac5bcfb8c6397abdc30b69eeb15"}, + {file = "pydantic-1.9.2-cp39-cp39-win_amd64.whl", hash = "sha256:f1a68f4f65a9ee64b6ccccb5bf7e17db07caebd2730109cb8a95863cfa9c4e55"}, + {file = "pydantic-1.9.2-py3-none-any.whl", hash = "sha256:78a4d6bdfd116a559aeec9a4cfe77dda62acc6233f8b56a716edad2651023e5e"}, + {file = "pydantic-1.9.2.tar.gz", hash = "sha256:8cb0bc509bfb71305d7a59d00163d5f9fc4530f0881ea32c74ff4f74c85f3d3d"}, ] pyflakes = [ {file = "pyflakes-2.4.0-py2.py3-none-any.whl", hash = "sha256:3bb3a3f256f4b7968c9c788781e4ff07dce46bdf12339dcda61053375426ee2e"}, @@ -729,8 +1072,35 @@ pygments = [ {file = "Pygments-2.12.0.tar.gz", hash = "sha256:5eb116118f9612ff1ee89ac96437bb6b49e8f04d8a13b514ba26f620208e26eb"}, ] pyparsing = [ - {file = "pyparsing-3.0.8-py3-none-any.whl", hash = "sha256:ef7b523f6356f763771559412c0d7134753f037822dad1b16945b7b846f7ad06"}, - {file = "pyparsing-3.0.8.tar.gz", hash = "sha256:7bf433498c016c4314268d95df76c81b842a4cb2b276fa3312cfb1e1d85f6954"}, + {file = "pyparsing-3.0.9-py3-none-any.whl", hash = "sha256:5026bae9a10eeaefb61dab2f09052b9f4307d44aee4eda64b309723d8d206bbc"}, + {file = "pyparsing-3.0.9.tar.gz", hash = "sha256:2b020ecf7d21b687f219b71ecad3631f644a47f01403fa1d1036b0c6416d70fb"}, +] +pyproj = [ + {file = "pyproj-3.3.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:473961faef7a9fd723c5d432f65220ea6ab3854e606bf84b4d409a75a4261c78"}, + {file = "pyproj-3.3.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:07c9d8d7ec009bbac09e233cfc725601586fe06880e5538a3a44eaf560ba3a62"}, + {file = "pyproj-3.3.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2fef9c1e339f25c57f6ae0558b5ab1bbdf7994529a30d8d7504fc6302ea51c03"}, + {file = "pyproj-3.3.1-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:140fa649fedd04f680a39f8ad339799a55cb1c49f6a84e1b32b97e49646647aa"}, + {file = "pyproj-3.3.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b59c08aea13ee428cf8a919212d55c036cc94784805ed77c8f31a4d1f541058c"}, + {file = "pyproj-3.3.1-cp310-cp310-win32.whl", hash = "sha256:1adc9ccd1bf04998493b6a2e87e60656c75ab790653b36cfe351e9ef214828ed"}, + {file = "pyproj-3.3.1-cp310-cp310-win_amd64.whl", hash = "sha256:42eea10afc750fccd1c5c4ba56de29ab791ab4d83c1f7db72705566282ac5396"}, + {file = "pyproj-3.3.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:531ea36519fa7b581466d4b6ab32f66ae4dadd9499d726352f71ee5e19c3d1c5"}, + {file = "pyproj-3.3.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:67025e37598a6bbed2c9c6c9e4c911f6dd39315d3e1148ead935a5c4d64309d5"}, + {file = "pyproj-3.3.1-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:aed1a3c0cd4182425f91b48d5db39f459bc2fe0d88017ead6425a1bc85faee33"}, + {file = "pyproj-3.3.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3cc4771403db54494e1e55bca8e6d33cde322f8cf0ed39f1557ff109c66d2cd1"}, + {file = "pyproj-3.3.1-cp38-cp38-win32.whl", hash = "sha256:c99f7b5757a28040a2dd4a28c9805fdf13eef79a796f4a566ab5cb362d10630d"}, + {file = "pyproj-3.3.1-cp38-cp38-win_amd64.whl", hash = "sha256:5dac03d4338a4c8bd0f69144c527474f517b4cbd7d2d8c532cd8937799723248"}, + {file = "pyproj-3.3.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:56b0f9ee2c5b2520b18db30a393a7b86130cf527ddbb8c96e7f3c837474a9d79"}, + {file = "pyproj-3.3.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f1032e5dfb50eae06382bcc7b9011b994f7104d932fe91bd83a722275e30e8ce"}, + {file = "pyproj-3.3.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5f92d8f6514516124abb714dce912b20867831162cfff9fae2678ef07b6fcf0f"}, + {file = "pyproj-3.3.1-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1ef1bfbe2dcc558c7a98e2f1836abdcd630390f3160724a6f4f5c818b2be0ad5"}, + {file = "pyproj-3.3.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5ca5f32b56210429b367ca4f9a57ffe67975c487af82e179a24370879a3daf68"}, + {file = "pyproj-3.3.1-cp39-cp39-win32.whl", hash = "sha256:aba199704c824fb84ab64927e7bc9ef71e603e483130ec0f7e09e97259b8f61f"}, + {file = "pyproj-3.3.1-cp39-cp39-win_amd64.whl", hash = "sha256:120d45ed73144c65e9677dc73ba8a531c495d179dd9f9f0471ac5acc02d7ac4b"}, + {file = "pyproj-3.3.1-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:52efb681647dfac185cc655a709bc0caaf910031a0390f816f5fc8ce150cbedc"}, + {file = "pyproj-3.3.1-pp38-pypy38_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5ab0d6e38fda7c13726afacaf62e9f9dd858089d67910471758afd9cb24e0ecd"}, + {file = "pyproj-3.3.1-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:45487942c19c5a8b09c91964ea3201f4e094518e34743cae373889a36e3d9260"}, + {file = "pyproj-3.3.1-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:797ad5655d484feac14b0fbb4a4efeaac0cf780a223046e2465494c767fd1c3b"}, + {file = "pyproj-3.3.1.tar.gz", hash = "sha256:b3d8e14d91cc95fb3dbc03a9d0588ac58326803eefa5bbb0978d109de3304fbe"}, ] pytest = [ {file = "pytest-7.1.2-py3-none-any.whl", hash = "sha256:13d0e3ccfc2b6e26be000cb6568c832ba67ba32e719443bfe725814d3c42433c"}, @@ -741,24 +1111,102 @@ python-dateutil = [ {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"}, ] pytz = [ - {file = "pytz-2022.1-py2.py3-none-any.whl", hash = "sha256:e68985985296d9a66a881eb3193b0906246245294a881e7c8afe623866ac6a5c"}, - {file = "pytz-2022.1.tar.gz", hash = "sha256:1e760e2fe6a8163bc0b3d9a19c4f84342afa0a2affebfaa84b01b978a02ecaa7"}, + {file = "pytz-2022.2.1-py2.py3-none-any.whl", hash = "sha256:220f481bdafa09c3955dfbdddb7b57780e9a94f5127e35456a48589b9e0c0197"}, + {file = "pytz-2022.2.1.tar.gz", hash = "sha256:cea221417204f2d1a2aa03ddae3e867921971d0d76f14d87abb4414415bbdcf5"}, +] +shapely = [ + {file = "Shapely-1.8.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7c9e3400b716c51ba43eea1678c28272580114e009b6c78cdd00c44df3e325fa"}, + {file = "Shapely-1.8.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ce0b5c5f7acbccf98b3460eecaa40e9b18272b2a734f74fcddf1d7696e047e95"}, + {file = "Shapely-1.8.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3a40bf497b57a6625b83996aed10ce2233bca0e5471b8af771b186d681433ac5"}, + {file = "Shapely-1.8.2-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:6bdc7728f1e5df430d8c588661f79f1eed4a2728c8b689e12707cfec217f68f8"}, + {file = "Shapely-1.8.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a60861b5ca2c488ebcdc706eca94d325c26d1567921c74acc83df5e6913590c7"}, + {file = "Shapely-1.8.2-cp310-cp310-win32.whl", hash = "sha256:840be3f27a1152851c54b968f2e12d718c9f13b7acd51c482e58a70f60f29e31"}, + {file = "Shapely-1.8.2-cp310-cp310-win_amd64.whl", hash = "sha256:c60f3758212ec480675b820b13035dda8af8f7cc560d2cc67999b2717fb8faef"}, + {file = "Shapely-1.8.2-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:56413f7d32c70b63f239eb0865b24c0c61029e38757de456cc4ab3c416559a0b"}, + {file = "Shapely-1.8.2-cp36-cp36m-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:256bdf8080bb7bb504d47b2c76919ecebab9708cc1b26266b3ec32b42448f642"}, + {file = "Shapely-1.8.2-cp36-cp36m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:c0a0d7752b145343838bd36ed09382d85f5befe426832d7384c5b051c147acbd"}, + {file = "Shapely-1.8.2-cp36-cp36m-win32.whl", hash = "sha256:62056e64b12b6d483d79f8e34bf058d2fe734d51c9227c1713705399434eff3b"}, + {file = "Shapely-1.8.2-cp36-cp36m-win_amd64.whl", hash = "sha256:8e3ed52a081da58eb4a885c157c594876633dbd4eb283f13ba5bf39c82322d76"}, + {file = "Shapely-1.8.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:7c8eda45085ccdd7f9805ea4a93fdd5eb0b6039a61d5f0cefb960487e6dc17a1"}, + {file = "Shapely-1.8.2-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:beee3949ddf381735049cfa6532fb234d5d20a5be910c4f2fb7c7295fd7960e3"}, + {file = "Shapely-1.8.2-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:e07b0bd2a0e61a8afd4d1c1bd23f3550b711f01274ffb53de99358fd781eefd8"}, + {file = "Shapely-1.8.2-cp37-cp37m-win32.whl", hash = "sha256:78966332a89813b237de357a03f612fd451a871fe6e26c12b6b71645fe8eee39"}, + {file = "Shapely-1.8.2-cp37-cp37m-win_amd64.whl", hash = "sha256:8fe641f1f61b3d43dd61b5a85d2ef023e6e19bf8f204a5160a1cb1ec645cbc09"}, + {file = "Shapely-1.8.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:cec89a5617c0137f4678282e983c3d63bf838fb00cdf318cc555b4d8409f7130"}, + {file = "Shapely-1.8.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:68c8e18dc9dc8a198c3addc8c9596f64137101f566f04b96ecfca0b214cb8b12"}, + {file = "Shapely-1.8.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:f12695662c3ad1e6031b3de98f191963d0f09de6d1a4988acd907405644032ba"}, + {file = "Shapely-1.8.2-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:15a856fbb588ad5d042784e00918c662902776452008c771ecba2ff615cd197a"}, + {file = "Shapely-1.8.2-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:d74de394684d66e25e780b0359fda85be7766af85940fa2dfad728b1a815c71f"}, + {file = "Shapely-1.8.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d3f3fac625690f01f35af665649e993f15f924e740b5c0ac0376900655815521"}, + {file = "Shapely-1.8.2-cp38-cp38-win32.whl", hash = "sha256:1d95842cc6bbbeab673061b63e70b07be9a375c15a60f4098f8fbd29f43af1b4"}, + {file = "Shapely-1.8.2-cp38-cp38-win_amd64.whl", hash = "sha256:a58e1f362f2091743e5e13212f5d5d16251a4bb63dd0ed587c652d3be9620d3a"}, + {file = "Shapely-1.8.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:5254240eefc44139ab0d128faf671635d8bdd9c23955ee063d4d6b8f20073ae0"}, + {file = "Shapely-1.8.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:75042e8039c79dd01f102bb288beace9dc2f49fc44a2dea875f9b697aa8cd30d"}, + {file = "Shapely-1.8.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:0c0fd457ce477b1dced507a72f1e2084c9191bfcb8a1e09886990ebd02acf024"}, + {file = "Shapely-1.8.2-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:6fcb28836ae93809de1dde73c03c9c24bab0ba2b2bf419ddb2aeb72c96d110e9"}, + {file = "Shapely-1.8.2-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:44d2832c1b706bf43101fda92831a083467cc4b4923a7ed17319ab599c1025d8"}, + {file = "Shapely-1.8.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:137f1369630408024a62ff79a437a5657e6c5b76b9cd352dde704b425acdb298"}, + {file = "Shapely-1.8.2-cp39-cp39-win32.whl", hash = "sha256:2e02da2e988e74d61f15c720f9f613fab51942aae2dfeacdcb78eadece00e1f3"}, + {file = "Shapely-1.8.2-cp39-cp39-win_amd64.whl", hash = "sha256:3423299254deec075e79fb7dc7909d702104e4167149de7f45510c3a6342eeea"}, + {file = "Shapely-1.8.2.tar.gz", hash = "sha256:572af9d5006fd5e3213e37ee548912b0341fb26724d6dc8a4e3950c10197ebb6"}, ] six = [ {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, ] +sqlalchemy = [ + {file = "SQLAlchemy-1.4.40-cp27-cp27m-macosx_10_14_x86_64.whl", hash = "sha256:b07fc38e6392a65935dc8b486229679142b2ea33c94059366b4d8b56f1e35a97"}, + {file = "SQLAlchemy-1.4.40-cp27-cp27m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:fb4edb6c354eac0fcc07cb91797e142f702532dbb16c1d62839d6eec35f814cf"}, + {file = "SQLAlchemy-1.4.40-cp27-cp27m-win32.whl", hash = "sha256:2026632051a93997cf8f6fda14360f99230be1725b7ab2ef15be205a4b8a5430"}, + {file = "SQLAlchemy-1.4.40-cp27-cp27m-win_amd64.whl", hash = "sha256:f2aa85aebc0ef6b342d5d3542f969caa8c6a63c8d36cf5098769158a9fa2123c"}, + {file = "SQLAlchemy-1.4.40-cp27-cp27mu-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:a0b9e3d81f86ba04007f0349e373a5b8c81ec2047aadb8d669caf8c54a092461"}, + {file = "SQLAlchemy-1.4.40-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:1ab08141d93de83559f6a7d9a962830f918623a885b3759ec2b9d1a531ff28fe"}, + {file = "SQLAlchemy-1.4.40-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:00dd998b43b282c71de46b061627b5edb9332510eb1edfc5017b9e4356ed44ea"}, + {file = "SQLAlchemy-1.4.40-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:bb342c0e25cc8f78a0e7c692da3b984f072666b316fbbec2a0e371cb4dfef5f0"}, + {file = "SQLAlchemy-1.4.40-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:23b693876ac7963b6bc7b1a5f3a2642f38d2624af834faad5933913928089d1b"}, + {file = "SQLAlchemy-1.4.40-cp310-cp310-win32.whl", hash = "sha256:2cf50611ef4221ad587fb7a1708e61ff72966f84330c6317642e08d6db4138fd"}, + {file = "SQLAlchemy-1.4.40-cp310-cp310-win_amd64.whl", hash = "sha256:26ee4dbac5dd7abf18bf3cd8f04e51f72c339caf702f68172d308888cd26c6c9"}, + {file = "SQLAlchemy-1.4.40-cp36-cp36m-macosx_10_14_x86_64.whl", hash = "sha256:b41b87b929118838bafc4bb18cf3c5cd1b3be4b61cd9042e75174df79e8ac7a2"}, + {file = "SQLAlchemy-1.4.40-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:885e11638946472b4a0a7db8e6df604b2cf64d23dc40eedc3806d869fcb18fae"}, + {file = "SQLAlchemy-1.4.40-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:b7ff0a8bf0aec1908b92b8dfa1246128bf4f94adbdd3da6730e9c542e112542d"}, + {file = "SQLAlchemy-1.4.40-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cfa8ab4ba0c97ab6bcae1f0948497d14c11b6c6ecd1b32b8a79546a0823d8211"}, + {file = "SQLAlchemy-1.4.40-cp36-cp36m-win32.whl", hash = "sha256:d259fa08e4b3ed952c01711268bcf6cd2442b0c54866d64aece122f83da77c6d"}, + {file = "SQLAlchemy-1.4.40-cp36-cp36m-win_amd64.whl", hash = "sha256:c8d974c991eef0cd29418a5957ae544559dc326685a6f26b3a914c87759bf2f4"}, + {file = "SQLAlchemy-1.4.40-cp37-cp37m-macosx_10_15_x86_64.whl", hash = "sha256:28b1791a30d62fc104070965f1a2866699c45bbf5adc0be0cf5f22935edcac58"}, + {file = "SQLAlchemy-1.4.40-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b7ccdca6cd167611f4a62a8c2c0c4285c2535640d77108f782ce3f3cccb70f3a"}, + {file = "SQLAlchemy-1.4.40-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:69deec3a94de10062080d91e1ba69595efeafeafe68b996426dec9720031fb25"}, + {file = "SQLAlchemy-1.4.40-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:63ad778f4e80913fb171247e4fa82123d0068615ae1d51a9791fc4284cb81748"}, + {file = "SQLAlchemy-1.4.40-cp37-cp37m-win32.whl", hash = "sha256:9ced2450c9fd016f9232d976661623e54c450679eeefc7aa48a3d29924a63189"}, + {file = "SQLAlchemy-1.4.40-cp37-cp37m-win_amd64.whl", hash = "sha256:cdee4d475e35684d210dc6b430ff8ca2ed0636378ac19b457e2f6f350d1f5acc"}, + {file = "SQLAlchemy-1.4.40-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:08b47c971327e733ffd6bae2d4f50a7b761793efe69d41067fcba86282819eea"}, + {file = "SQLAlchemy-1.4.40-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1cf03d37819dc17a388d313919daf32058d19ba1e592efdf14ce8cbd997e6023"}, + {file = "SQLAlchemy-1.4.40-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:a62c0ecbb9976550f26f7bf75569f425e661e7249349487f1483115e5fc893a6"}, + {file = "SQLAlchemy-1.4.40-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4ec440990ab00650d0c7ea2c75bc225087afdd7ddcb248e3d934def4dff62762"}, + {file = "SQLAlchemy-1.4.40-cp38-cp38-win32.whl", hash = "sha256:2b64955850a14b9d481c17becf0d3f62fb1bb31ac2c45c2caf5ad06d9e811187"}, + {file = "SQLAlchemy-1.4.40-cp38-cp38-win_amd64.whl", hash = "sha256:959bf4390766a8696aa01285016c766b4eb676f712878aac5fce956dd49695d9"}, + {file = "SQLAlchemy-1.4.40-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:0992f3cc640ec0f88f721e426da884c34ff0a60eb73d3d64172e23dfadfc8a0b"}, + {file = "SQLAlchemy-1.4.40-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fa9e0d7832b7511b3b3fd0e67fac85ff11fd752834c143ca2364c9b778c0485a"}, + {file = "SQLAlchemy-1.4.40-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:c9d0f1a9538cc5e75f2ea0cb6c3d70155a1b7f18092c052e0d84105622a41b63"}, + {file = "SQLAlchemy-1.4.40-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0c956a5d1adb49a35d78ef0fae26717afc48a36262359bb5b0cbd7a3a247c26f"}, + {file = "SQLAlchemy-1.4.40-cp39-cp39-win32.whl", hash = "sha256:6b70d02bbe1adbbf715d2249cacf9ac17c6f8d22dfcb3f1a4fbc5bf64364da8a"}, + {file = "SQLAlchemy-1.4.40-cp39-cp39-win_amd64.whl", hash = "sha256:bf073c619b5a7f7cd731507d0fdc7329bee14b247a63b0419929e4acd24afea8"}, + {file = "SQLAlchemy-1.4.40.tar.gz", hash = "sha256:44a660506080cc975e1dfa5776fe5f6315ddc626a77b50bf0eee18b0389ea265"}, +] stack-data = [ - {file = "stack_data-0.2.0-py3-none-any.whl", hash = "sha256:999762f9c3132308789affa03e9271bbbe947bf78311851f4d485d8402ed858e"}, - {file = "stack_data-0.2.0.tar.gz", hash = "sha256:45692d41bd633a9503a5195552df22b583caf16f0b27c4e58c98d88c8b648e12"}, + {file = "stack_data-0.4.0-py3-none-any.whl", hash = "sha256:b94fed36d725cfabc6d09ed5886913e35eed9009766a1af1d5941b9da3a94aaa"}, + {file = "stack_data-0.4.0.tar.gz", hash = "sha256:a90ae7e260f7d15aefeceb46f0a028d4ccb9eb8856475c53e341945342d41ea7"}, +] +structlog = [ + {file = "structlog-22.1.0-py3-none-any.whl", hash = "sha256:760d37b8839bd4fe1747bed7b80f7f4de160078405f4b6a1db9270ccbfce6c30"}, + {file = "structlog-22.1.0.tar.gz", hash = "sha256:94b29b1d62b2659db154f67a9379ec1770183933d6115d21f21aa25cfc9a7393"}, ] tomli = [ {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, ] traitlets = [ - {file = "traitlets-5.1.1-py3-none-any.whl", hash = "sha256:2d313cc50a42cd6c277e7d7dc8d4d7fedd06a2c215f78766ae7b1a66277e0033"}, - {file = "traitlets-5.1.1.tar.gz", hash = "sha256:059f456c5a7c1c82b98c2e8c799f39c9b8128f6d0d46941ee118daace9eb70c7"}, + {file = "traitlets-5.3.0-py3-none-any.whl", hash = "sha256:65fa18961659635933100db8ca120ef6220555286949774b9cfc106f941d1c7a"}, + {file = "traitlets-5.3.0.tar.gz", hash = "sha256:0bb9f1f9f017aa8ec187d8b1b2a7a6626a2a1d877116baba52a129bfa124f8e2"}, ] typing-extensions = [ {file = "typing_extensions-4.3.0-py3-none-any.whl", hash = "sha256:25642c956049920a5aa49edcdd6ab1e06d7e5d467fc00e0506c44ac86fbfca02"}, diff --git a/poetry.toml b/poetry.toml new file mode 100644 index 00000000..53b35d37 --- /dev/null +++ b/poetry.toml @@ -0,0 +1,3 @@ +[virtualenvs] +create = true +in-project = true diff --git a/pyproject.toml b/pyproject.toml index 11cb9827..57a8cc4e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dx" -version = "1.1.3" +version = "1.2.0" description = "Python wrapper for Data Explorer" authors = ["Dave Shoup ", "Kyle Kelley "] readme = "README.md" @@ -14,11 +14,24 @@ python = "^3.8" pandas = "^1.3.5" ipython = ">=7.31.1" pydantic = "^1.9.1" +pyarrow = "^8.0.0" +SQLAlchemy = "^1.4.40" +geopandas = { version = "^0.11.1", optional = true} +structlog = "^22.1.0" +Faker = {version = "^14.1.0", optional = true} [tool.poetry.dev-dependencies] pytest = "^7.1.2" black = {version = "^22.6.0", allow-prereleases = true} flake8 = "^4.0.1" +isort = "^5.10.1" +# for datatype testing +geopandas = "^0.11.1" +Faker = "^14.1.0" + +[tool.poetry.extras] +geopandas = ["geopandas"] +faker = ["faker"] [build-system] requires = ["poetry_core>=1.0.0"]