Skip to content

Commit

Permalink
Merge pull request #1 from fatbuddy/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
fatbuddy authored Mar 16, 2024
2 parents c3782e5 + c5ecbfb commit d398af0
Show file tree
Hide file tree
Showing 239 changed files with 565 additions and 236 deletions.
5 changes: 4 additions & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ on:
push:
branches:
- develop
- dev
- release
pull_request:
branches:
Expand All @@ -26,7 +27,7 @@ jobs:
strategy:
fail-fast: false
matrix:
python: ["3.8", "3.9", "3.10", "3.11"]
python: ["3.8", "3.9", "3.10", "3.11", "3.12"]
os: [ubuntu-latest, macos-latest, windows-latest]
runs-on: ${{ matrix.os }}
steps:
Expand Down Expand Up @@ -56,6 +57,8 @@ jobs:
- name: Install Graphviz for other platforms
if: runner.os != 'Windows'
uses: ts-graphviz/setup-graphviz@v2
with:
macos-skip-brew-update: 'true'

- name: Cache venv
uses: actions/cache@v2
Expand Down
1 change: 1 addition & 0 deletions dataprep/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
Dataprep let you prepare your data using a single library with a few lines of code.
"""

import logging

DEFAULT_PARTITIONS = 1
Expand Down
1 change: 1 addition & 0 deletions dataprep/clean/address_utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Constants used by the clean_address() and validate_address() functions
"""

# pylint: disable=C0301, C0302, E1101

from builtins import zip
Expand Down
1 change: 1 addition & 0 deletions dataprep/clean/clean_ad_nrt.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
Clean and validate a DataFrame column containing
Andorra NRT (Número de Registre Tributari, Andorra tax number).
"""

# pylint: disable=too-many-lines, too-many-arguments, too-many-branches, unused-argument, E1101, E1133
from typing import Any, Union
from operator import itemgetter
Expand Down
1 change: 1 addition & 0 deletions dataprep/clean/clean_al_nipt.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
Clean and validate a DataFrame column containing
NIPT (Numri i Identifikimit për Personin e Tatueshëm, Albanian VAT number).
"""

# pylint: disable=too-many-lines, too-many-arguments, too-many-branches, unused-argument, E1101, E1133
from typing import Any, Union
from operator import itemgetter
Expand Down
1 change: 1 addition & 0 deletions dataprep/clean/clean_ar_cbu.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
Clean and validate a DataFrame column containing
CBU (Clave Bancaria Uniforme, Argentine bank account number).
"""

# pylint: disable=too-many-lines, too-many-arguments, too-many-branches, unused-argument, E1101, E1133
from typing import Any, Union
from operator import itemgetter
Expand Down
1 change: 1 addition & 0 deletions dataprep/clean/clean_ar_cuit.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
Clean and validate a DataFrame column containing
CUIT (Código Único de Identificación Tributaria, Argentinian tax number).
"""

# pylint: disable=too-many-lines, too-many-arguments, too-many-branches, unused-argument, E1101, E1133
from typing import Any, Union
from operator import itemgetter
Expand Down
1 change: 1 addition & 0 deletions dataprep/clean/clean_ar_dni.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
Clean and validate a DataFrame column containing
DNI (Documento Nacional de Identidad, Argentinian national identity nr.).
"""

# pylint: disable=too-many-lines, too-many-arguments, too-many-branches, unused-argument, E1101, E1133
from typing import Any, Union
from operator import itemgetter
Expand Down
1 change: 1 addition & 0 deletions dataprep/clean/clean_at_uid.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
Clean and validate a DataFrame column containing
UID (Umsatzsteuer-Identifikationsnummer, Austrian VAT number).
"""

# pylint: disable=too-many-lines, too-many-arguments, too-many-branches, unused-argument, E1101, E1133
from typing import Any, Union
from operator import itemgetter
Expand Down
1 change: 1 addition & 0 deletions dataprep/clean/clean_at_vnr.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
Clean and validate a DataFrame column containing
VNR, SVNR, VSNR (Versicherungsnummer, Austrian social security number).
"""

# pylint: disable=too-many-lines, too-many-arguments, too-many-branches, unused-argument, E1101, E1133
from typing import Any, Union
from operator import itemgetter
Expand Down
1 change: 1 addition & 0 deletions dataprep/clean/clean_au_abn.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Clean and validate a DataFrame column containing Australian Business Numbers (ABNs).
"""

# pylint: disable=too-many-lines, too-many-arguments, too-many-branches
from typing import Any, Union
from operator import itemgetter
Expand Down
1 change: 1 addition & 0 deletions dataprep/clean/clean_au_acn.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Clean and validate a DataFrame column containing Australian Company Numbers (ACNs).
"""

# pylint: disable=too-many-lines, too-many-arguments, too-many-branches
from typing import Any, Union
from operator import itemgetter
Expand Down
1 change: 1 addition & 0 deletions dataprep/clean/clean_au_tfn.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Clean and validate a DataFrame column containing Australian Tax File Numbers (TFNs).
"""

# pylint: disable=too-many-lines, too-many-arguments, too-many-branches
from typing import Any, Union
from operator import itemgetter
Expand Down
1 change: 1 addition & 0 deletions dataprep/clean/clean_be_iban.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Clean and validate a DataFrame column containing Belgian IBANs.
"""

# pylint: disable=too-many-lines, too-many-arguments, too-many-branches
from typing import Any, Union
from operator import itemgetter
Expand Down
1 change: 1 addition & 0 deletions dataprep/clean/clean_be_vat.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Clean and validate a DataFrame column containing Belgian VAT numbers (VATs).
"""

# pylint: disable=too-many-lines, too-many-arguments, too-many-branches
from typing import Any, Union
from operator import itemgetter
Expand Down
1 change: 1 addition & 0 deletions dataprep/clean/clean_bg_egn.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Clean and validate a DataFrame column containing Bulgarian national identification numbers (EGNs).
"""

# pylint: disable=too-many-lines, too-many-arguments, too-many-branches
from typing import Any, Union
from operator import itemgetter
Expand Down
1 change: 1 addition & 0 deletions dataprep/clean/clean_bg_pnf.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Clean and validate a DataFrame column containing Bulgarian personal number of a foreigner.
"""

# pylint: disable=too-many-lines, too-many-arguments, too-many-branches
from typing import Any, Union
from operator import itemgetter
Expand Down
1 change: 1 addition & 0 deletions dataprep/clean/clean_bg_vat.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Clean and validate a DataFrame column containing Bulgarian VAT numbers (VATs).
"""

# pylint: disable=too-many-lines, too-many-arguments, too-many-branches
from typing import Any, Union
from operator import itemgetter
Expand Down
1 change: 1 addition & 0 deletions dataprep/clean/clean_bic.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Clean and validate a DataFrame column containing ISO 9362 Business identifier codes.
"""

# pylint: disable=too-many-lines, too-many-arguments, too-many-branches
from typing import Any, Union
from operator import itemgetter
Expand Down
1 change: 1 addition & 0 deletions dataprep/clean/clean_bitcoin.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Clean and validate a DataFrame column containing Bitcoin Addresses.
"""

# pylint: disable=too-many-lines, too-many-arguments, too-many-branches, unused-argument
from typing import Any, Union
from operator import itemgetter
Expand Down
1 change: 1 addition & 0 deletions dataprep/clean/clean_br_cnpj.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Clean and validate a DataFrame column containing CNPJ numbers, Brazilian company identifier.
"""

# pylint: disable=too-many-lines, too-many-arguments, too-many-branches
from typing import Any, Union
from operator import itemgetter
Expand Down
1 change: 1 addition & 0 deletions dataprep/clean/clean_br_cpf.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Clean and validate a DataFrame column containing CPF numbers, Brazilian national identifier.
"""

# pylint: disable=too-many-lines, too-many-arguments, too-many-branches
from typing import Any, Union
from operator import itemgetter
Expand Down
1 change: 1 addition & 0 deletions dataprep/clean/clean_by_unp.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Clean and validate a DataFrame column containing Belarusian UNP numbers (UNPs).
"""

# pylint: disable=too-many-lines, too-many-arguments, too-many-branches
from typing import Any, Union
from operator import itemgetter
Expand Down
1 change: 1 addition & 0 deletions dataprep/clean/clean_ca_bn.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Clean and validate a DataFrame column containing Canadian Business Numbers (BNs).
"""

# pylint: disable=too-many-lines, too-many-arguments, too-many-branches
from typing import Any, Union
from operator import itemgetter
Expand Down
1 change: 1 addition & 0 deletions dataprep/clean/clean_ca_sin.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Clean and validate a DataFrame column containing Canadian Social Insurance Numbers(SINs).
"""

# pylint: disable=too-many-lines, too-many-arguments, too-many-branches
from typing import Any, Union
from operator import itemgetter
Expand Down
1 change: 1 addition & 0 deletions dataprep/clean/clean_casrn.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Clean and validate a DataFrame column containing CAS Registry Numbers.
"""

# pylint: disable=too-many-lines, too-many-arguments, too-many-branches, unused-argument
from typing import Any, Union
from operator import itemgetter
Expand Down
1 change: 1 addition & 0 deletions dataprep/clean/clean_ch_esr.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Clean and validate a DataFrame column containing Swiss EinzahlungsSchein mit Referenznummer (ESRs).
"""

# pylint: disable=too-many-lines, too-many-arguments, too-many-branches
from typing import Any, Union
from operator import itemgetter
Expand Down
1 change: 1 addition & 0 deletions dataprep/clean/clean_ch_ssn.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Clean and validate a DataFrame column containing Swiss social security numbers (SSNs).
"""

# pylint: disable=too-many-lines, too-many-arguments, too-many-branches
from typing import Any, Union
from operator import itemgetter
Expand Down
1 change: 1 addition & 0 deletions dataprep/clean/clean_ch_uid.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Clean and validate a DataFrame column containing Swiss business identifiers (UIDs).
"""

# pylint: disable=too-many-lines, too-many-arguments, too-many-branches
from typing import Any, Union
from operator import itemgetter
Expand Down
1 change: 1 addition & 0 deletions dataprep/clean/clean_ch_vat.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Clean and validate a DataFrame column containing Swiss VAT numbers (VATs).
"""

# pylint: disable=too-many-lines, too-many-arguments, too-many-branches
from typing import Any, Union
from operator import itemgetter
Expand Down
1 change: 1 addition & 0 deletions dataprep/clean/clean_cl_rut.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Clean and validate a DataFrame column containing Chile RUT/RUN numbers (RUTs).
"""

# pylint: disable=too-many-lines, too-many-arguments, too-many-branches
from typing import Any, Union
from operator import itemgetter
Expand Down
1 change: 1 addition & 0 deletions dataprep/clean/clean_cn_ric.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Clean and validate a DataFrame column containing Chinese Resident Identity Card Number (RICs).
"""

# pylint: disable=too-many-lines, too-many-arguments, too-many-branches
from typing import Any, Union
from operator import itemgetter
Expand Down
1 change: 1 addition & 0 deletions dataprep/clean/clean_cn_uscc.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
Clean and validate a DataFrame column containing Chinese Unified Social Credit Code
(China tax number) (USCCs).
"""

# pylint: disable=too-many-lines, too-many-arguments, too-many-branches
from typing import Any, Union
from operator import itemgetter
Expand Down
1 change: 1 addition & 0 deletions dataprep/clean/clean_co_nit.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Clean and validate a DataFrame column containing Colombian identity codes (NITs).
"""

# pylint: disable=too-many-lines, too-many-arguments, too-many-branches
from typing import Any, Union
from operator import itemgetter
Expand Down
5 changes: 2 additions & 3 deletions dataprep/clean/clean_country.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Clean and validate a DataFrame column containing country names.
"""

from functools import lru_cache
from operator import itemgetter
from os import path
Expand Down Expand Up @@ -371,9 +372,7 @@ def _get_format_if_allowed(input_format: str, allowed_formats: Tuple[str, ...])
return (
"name"
if "name" in allowed_formats
else "official"
if "official" in allowed_formats
else None
else "official" if "official" in allowed_formats else None
)

return input_format if input_format in allowed_formats else None
Expand Down
1 change: 1 addition & 0 deletions dataprep/clean/clean_cr_cpf.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Clean and validate a DataFrame column containing Costa Rica physical person ID number (CPFs).
"""

# pylint: disable=too-many-lines, too-many-arguments, too-many-branches
from typing import Any, Union
from operator import itemgetter
Expand Down
1 change: 1 addition & 0 deletions dataprep/clean/clean_cr_cpj.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Clean and validate a DataFrame column containing Costa Rica tax number (CPJs).
"""

# pylint: disable=too-many-lines, too-many-arguments, too-many-branches
from typing import Any, Union
from operator import itemgetter
Expand Down
1 change: 1 addition & 0 deletions dataprep/clean/clean_cr_cr.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Clean and validate a DataFrame column containing Costa Rica foreigners ID number (CRs).
"""

# pylint: disable=too-many-lines, too-many-arguments, too-many-branches
from typing import Any, Union
from operator import itemgetter
Expand Down
1 change: 1 addition & 0 deletions dataprep/clean/clean_cu_ni.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Clean and validate a DataFrame column containing Cuban identity card numbers (NIs).
"""

# pylint: disable=too-many-lines, too-many-arguments, too-many-branches
from typing import Any, Union
from operator import itemgetter
Expand Down
1 change: 0 additions & 1 deletion dataprep/clean/clean_currency.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@ def clean_currency(
report: bool = True,
progress: bool = False,
) -> Union[pd.DataFrame, dd.DataFrame]:

"""
Clean, standardize and convert currencies.
Expand Down
1 change: 1 addition & 0 deletions dataprep/clean/clean_cusip.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
Clean and validate a DataFrame column containing
CUSIP numbers (financial security identification number).
"""

# pylint: disable=too-many-lines, too-many-arguments, too-many-branches, unused-argument
from typing import Any, Union
from operator import itemgetter
Expand Down
1 change: 1 addition & 0 deletions dataprep/clean/clean_cy_vat.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Clean and validate a DataFrame column containing Cypriot VAT number (VATs).
"""

# pylint: disable=too-many-lines, too-many-arguments, too-many-branches
from typing import Any, Union
from operator import itemgetter
Expand Down
1 change: 1 addition & 0 deletions dataprep/clean/clean_cz_dic.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Clean and validate a DataFrame column containing Czech VAT number (DICs).
"""

# pylint: disable=too-many-lines, too-many-arguments, too-many-branches
from typing import Any, Union
from operator import itemgetter
Expand Down
1 change: 1 addition & 0 deletions dataprep/clean/clean_cz_rc.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Clean and validate a DataFrame column containing Czech birth numbers (RCs).
"""

# pylint: disable=too-many-lines, too-many-arguments, too-many-branches
from typing import Any, Union
from operator import itemgetter
Expand Down
1 change: 1 addition & 0 deletions dataprep/clean/clean_date.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Clean and validate a DataFrame column containing dates and times.
"""

# pylint: disable=too-many-lines
import datetime
from copy import deepcopy
Expand Down
Loading

0 comments on commit d398af0

Please sign in to comment.