diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..0d031818 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,49 @@ +# See https://pre-commit.com for more information +# See https://pre-commit.com/hooks.html for more hooks +repos: +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v3.2.0 + hooks: + - id: trailing-whitespace + language_version: python3 + - id: end-of-file-fixer + language_version: python3 + - id: check-yaml + language_version: python3 + - id: check-added-large-files + language_version: python3 + - id: check-byte-order-marker + language_version: python3 + - id: check-case-conflict + language_version: python3 + - id: check-json + language_version: python3 + - id: mixed-line-ending + language_version: python3 +- repo: https://github.com/psf/black + rev: 21.12b0 + hooks: + - id: black + language_version: python3 +- repo: https://github.com/asottile/blacken-docs + rev: v1.8.0 + hooks: + - id: blacken-docs + additional_dependencies: [black] + language_version: python3 +- repo: https://github.com/timothycrosley/isort + rev: 5.7.0 + hooks: + - id: isort + language_version: python3 +- repo: https://gitlab.com/pycqa/flake8 + rev: 3.8.4 + hooks: + - id: flake8 + language_version: python3 +- repo: https://github.com/asottile/pyupgrade + rev: v2.7.2 + hooks: + - id: pyupgrade + language_version: python3 + args: [--py37-plus] diff --git a/Makefile b/Makefile index ef1e0a8f..8f891a75 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ # # Colors -# +# # Define ANSI color codes RESET_COLOR = \033[m diff --git a/Pipfile b/Pipfile index e15b7ca7..92ee187e 100644 --- a/Pipfile +++ b/Pipfile @@ -17,6 +17,7 @@ flake8-docstrings = "*" setuptools-scm = "*" us = "*" jinja2 = "*" +pre-commit = "*" [packages] bs4 = "*" diff --git a/Pipfile.lock b/Pipfile.lock index b85ddb41..1a2d2935 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "ad306702ab6e0d33bd923322380b638015e9a9bb3ea05ae669862afc6d3119d3" + "sha256": "0c3753239c69e74394cdaf449be6a60caa765d9650ae679a70969ac1c42be207" }, "pipfile-spec": 6, "requires": { @@ -506,6 +506,14 @@ ], "version": "==1.15.0" }, + "cfgv": { + "hashes": [ + "sha256:c6a0883f3917a037485059700b9e75da2464e6c27051014ad85ba6aaa5884426", + "sha256:f5a830efb9ce7a445376bb66ec94c638a9787422f96264c98edc6bdeed8ab736" + ], + "markers": "python_full_version >= '3.6.1'", + "version": "==3.3.1" + }, "charset-normalizer": { "hashes": [ "sha256:876d180e9d7432c5d1dfd4c5d26b72f099d503e8fcc0feb7532c9289be60fcbd", @@ -609,6 +617,13 @@ "markers": "python_version >= '3.6'", "version": "==36.0.1" }, + "distlib": { + "hashes": [ + "sha256:6564fe0a8f51e734df6333d08b8b94d4ea8ee6b99b5ed50613f731fd4089f34b", + "sha256:e4b58818180336dc9c529bfb9a0b58728ffc09ad92027a3f30b7cd91e3458579" + ], + "version": "==0.3.4" + }, "docutils": { "hashes": [ "sha256:686577d2e4c32380bb50cbb22f575ed742d58168cee37e99117a854bcd88f125", @@ -617,6 +632,14 @@ "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", "version": "==0.17.1" }, + "filelock": { + "hashes": [ + "sha256:38b4f4c989f9d06d44524df1b24bd19e167d851f19b50bf3e3559952dddc5b80", + "sha256:cf0fc6a2f8d26bd900f19bf33915ca70ba4dd8c56903eeb14e1e7a2fd7590146" + ], + "markers": "python_version >= '3.7'", + "version": "==3.4.2" + }, "flake8": { "hashes": [ "sha256:479b1304f72536a55948cb40a32dce8bb0ffe3501e26eaf292c7e60eb5e0428d", @@ -633,6 +656,14 @@ "index": "pypi", "version": "==1.6.0" }, + "identify": { + "hashes": [ + "sha256:6b4b5031f69c48bf93a646b90de9b381c6b5f560df4cbe0ed3cf7650ae741e4d", + "sha256:aa68609c7454dbcaae60a01ff6b8df1de9b39fe6e50b1f6107ec81dcda624aa6" + ], + "markers": "python_full_version >= '3.6.1'", + "version": "==2.4.4" + }, "idna": { "hashes": [ "sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff", @@ -899,6 +930,13 @@ "index": "pypi", "version": "==0.16.1" }, + "nodeenv": { + "hashes": [ + "sha256:3ef13ff90291ba2a4a7a4ff9a979b63ffdd00a464dbe04acf0ea6471517a4c2b", + "sha256:621e6b7076565ddcacd2db0294c0381e01fd28945ab36bcf00f41c5daf63bef7" + ], + "version": "==1.6.0" + }, "packaging": { "hashes": [ "sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb", @@ -937,6 +975,14 @@ "markers": "python_version >= '3.6'", "version": "==1.0.0" }, + "pre-commit": { + "hashes": [ + "sha256:725fa7459782d7bec5ead072810e47351de01709be838c2ce1726b9591dad616", + "sha256:c1a8040ff15ad3d648c70cc3e55b93e4d2d5b687320955505587fd79bbaed06a" + ], + "index": "pypi", + "version": "==2.17.0" + }, "py": { "hashes": [ "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719", @@ -1315,6 +1361,14 @@ "markers": "python_version >= '3.5'", "version": "==4.1.1" }, + "virtualenv": { + "hashes": [ + "sha256:339f16c4a86b44240ba7223d0f93a7887c3ca04b5f9c8129da7958447d079b09", + "sha256:d8458cf8d59d0ea495ad9b34c2599487f8a7772d796f9910858376d1600dd2dd" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", + "version": "==20.13.0" + }, "webencodings": { "hashes": [ "sha256:a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78", diff --git a/docs/contributing.rst b/docs/contributing.rst index 7e19c488..fe70871e 100644 --- a/docs/contributing.rst +++ b/docs/contributing.rst @@ -41,6 +41,12 @@ The `pipenv` package manager can install all of the Python tools necessary to ru pipenv install --dev +Now install `pre-commit` to run a battery of automatic quick fixes against your work. + +.. code-block:: bash + + pipenv run pre-commit install + Create an issue ############### @@ -221,8 +227,3 @@ Send a pull request ################### The final step is to submit a `pull request `_ to the main respository, asking the maintainers to consider integrating your patch. GitHub has `a short guide `_ that can walk you through the process. You should tag your issue number in the request so that they linked in GitHub’s system. - - - - - diff --git a/docs/scrapers/al.md b/docs/scrapers/al.md index f0ea093a..5e269fb3 100644 --- a/docs/scrapers/al.md +++ b/docs/scrapers/al.md @@ -8,7 +8,7 @@ -Media Inquiries: 334.242.0400 -Email: contact@madeinalabama.com - WARN Data Contact found by Cody: Jessica D. (phone # unknown) - + ### July 21, 2021 12PM I had an intuition that I needed to call earlier in order to get a response, so I did! I called back and I was transferred to Jessica D. (sounded like Jessica Dent or Dint?). Unfortunately I had to leave a voicemail again, but at least I got somewhere. @@ -21,7 +21,7 @@ Email follow-up ### July 1, 2021 Called the Media Inquiries number, left a voicemail with my callback number. Called the Toll Free number, left a voicemail with my callback number. Sent an email to the contact address with my questions. -### Jun 30th, 2021 +### Jun 30th, 2021 The following are some specific issues with this data that needs further processing and phone calls: - Q: I noticed in the last 8 lines of the data there are alternating rows of future dates and jan 1 1970, with some strange characters in the other fields. Do these lines mean anything, or can we discard them? diff --git a/docs/scrapers/ca.md b/docs/scrapers/ca.md index e6e9b7e1..e4632e56 100644 --- a/docs/scrapers/ca.md +++ b/docs/scrapers/ca.md @@ -4,4 +4,3 @@ California provides historical data as PDFs and data for the current fiscal year an Excel file. - [Home page](https://edd.ca.gov/Jobs_and_Training/Layoff_Services_WARN.htm) - diff --git a/docs/scrapers/co.md b/docs/scrapers/co.md index 8cdeba29..723fa9d1 100644 --- a/docs/scrapers/co.md +++ b/docs/scrapers/co.md @@ -7,10 +7,10 @@ WARN site: https://cdle.colorado.gov/employers/layoff-separations/layoff-warn-li Website feedback google form: https://docs.google.com/forms/d/e/1FAIpQLSdlZ6HyYpV4wzE-BMBS3S9YDclg5hfyaJlDblwXkETdMWFs1A/viewform (in case the other email isnt useful) ### Email sent to RRTeam@state.co.us Aug 6, 2021 ->Hi, I'm emailing on behalf of Stanford's Big Local News project. We're trying to increase journalists' access to WARN data, and we had a question about Colorado's WARN data. +>Hi, I'm emailing on behalf of Stanford's Big Local News project. We're trying to increase journalists' access to WARN data, and we had a question about Colorado's WARN data. >In comparing your 2021 data with previous years, I'm wondering if 'layoff total' in the archived data (2015-2019) represents a sum of the 'permanent layoffs', 'temporary layoffs', and 'furloughs' columns in the 2021 data. Would that be a fair assessment? >Thanks for your help! >(P.S: if you happen to have any data for Colorado before 2015, that would be nice to have!) Response Received: -> Thank you for your question. You are correct. The Total layoffs from 2015-2019 includes all permanent, temporary and furloughs reported under WARN for those years. We evolved the data as time progressed and we better understood the data needs of the public. +> Thank you for your question. You are correct. The Total layoffs from 2015-2019 includes all permanent, temporary and furloughs reported under WARN for those years. We evolved the data as time progressed and we better understood the data needs of the public. diff --git a/docs/scrapers/dc.md b/docs/scrapers/dc.md index 30abf387..c0bce1cb 100644 --- a/docs/scrapers/dc.md +++ b/docs/scrapers/dc.md @@ -4,4 +4,4 @@ ### Jul 1st, 2021 -On the most recent page for WARN notices, the link for 2014 data directs to the page for 2018 data. To account for this problem, url for 2014 page is hard-coded in the code at the moment. Reported this problem to the [website maintenance](https://dc.gov/page/dcgovernmentwebsite). +On the most recent page for WARN notices, the link for 2014 data directs to the page for 2018 data. To account for this problem, url for 2014 page is hard-coded in the code at the moment. Reported this problem to the [website maintenance](https://dc.gov/page/dcgovernmentwebsite). diff --git a/docs/scrapers/in.md b/docs/scrapers/in.md index 00a0e332..f8fad604 100644 --- a/docs/scrapers/in.md +++ b/docs/scrapers/in.md @@ -13,7 +13,7 @@ Indianapolis, IN 46204 warn-notice@dwd.in.gov ### July 21, 2021 -Response: +Response: >Cody: >This error should be fixed soon. diff --git a/docs/scrapers/ks.md b/docs/scrapers/ks.md index 020e5511..a12ce252 100644 --- a/docs/scrapers/ks.md +++ b/docs/scrapers/ks.md @@ -13,4 +13,3 @@ The Kansas Job Center site does not display Non-WARN layoff notices by default but our scraper captures both WARN and Non-WARN notices. Therefore, our record counts are significantly higher than what you'll get from a count of records displayed online. [All data]: https://www.kansasworks.com/search/warn_lookups?commit=Search&page=1&q%5Bemployer_name_cont%5D=&q%5Bmain_contact_contact_info_addresses_full_location_city_matches%5D=&q%5Bnotice_eq%5D=true&q%5Bnotice_on_gteq%5D=&q%5Bnotice_on_lteq%5D=&q%5Bs%5D=notice_on+desc&q%5Bservice_delivery_area_id_eq%5D=&q%5Bzipcode_code_start%5D=&utf8=%E2%9C%93 - diff --git a/docs/scrapers/mo.md b/docs/scrapers/mo.md index 87328f6f..2b8ac88c 100644 --- a/docs/scrapers/mo.md +++ b/docs/scrapers/mo.md @@ -22,7 +22,7 @@ Response from MO Department of Higher Education and Workforce Devlopment about t ![image](https://user-images.githubusercontent.com/56002814/124181019-fc64cc00-da82-11eb-87ed-d3c6cd2e3021.png) -### Jun 29th, 2021 +### Jun 29th, 2021 The following are some specific issues with this data that needs further processing and phone calls: - year 2015-2019 data is in fiscal year, but year 2020 and 2021 data is in calendar year. This produced some number of duplicate data entries between 2019 and 2020 data page. Some of these duplicate entries have minor differences in number affected. This issue requires us to inquiry about the difference between the 2020 and 2019 data page as well as how updated date works for different companies (some companies have multiple entries with same date but different numbers). Currently, I am waiting for a response from the state agency. diff --git a/docs/scrapers/ny.md b/docs/scrapers/ny.md index fb660c37..43917af0 100644 --- a/docs/scrapers/ny.md +++ b/docs/scrapers/ny.md @@ -3,18 +3,18 @@ - [Homepage](https://dol.ny.gov/warn-notices) - To request for full documentation of any notices prior to 2021, email WebHelp@labor.ny.gov - [Contacts](https://dol.ny.gov/worker-adjustment-and-retraining-notification-warn) - - NYS WARN Coordinator: Janet Faraone (phone: 518-457-1518, email: [Janet.Faraone@labor.ny.gov](Janet.Faraone@labor.ny.gov)) + - NYS WARN Coordinator: Janet Faraone (phone: 518-457-1518, email: [Janet.Faraone@labor.ny.gov](Janet.Faraone@labor.ny.gov)) ### Jan 5, 2022 We've implemented historical document scraping for NY (issue #136) and removed the old website scraping system while we wait for their website to be upgraded. As mentioned in July, we should implement a system of periodically requesting a historical document from NY. We are hosting this historical document at https://storage.googleapis.com/bln-data-public/warn-layoffs/ny_historical.xlsx . ### Jul 12th, 2021 -reaching out to the NYS WARN coordinator was fruitful. the state agency prepared and sent over an excel containing records from 2016 to 2021 within a day upon email request. Here is a snippet of the data: +reaching out to the NYS WARN coordinator was fruitful. the state agency prepared and sent over an excel containing records from 2016 to 2021 within a day upon email request. Here is a snippet of the data: ![image](https://user-images.githubusercontent.com/56002814/125342121-736f4f80-e322-11eb-8c22-b1f22346ded7.png) The full dataset is currently attached to issue #136, it should be hosted on some platform in the future. -When requesting the data, the state agency asked for a timeframe from which data is needed (i responded with 2016-2021), but it might be possible to request data prior to 2016 as well. +When requesting the data, the state agency asked for a timeframe from which data is needed (i responded with 2016-2021), but it might be possible to request data prior to 2016 as well. Regarding updating data in the future, the NYS WARN website is undergoing updates, and it should allow scraping in the future. In the meantime, we should periodically request updated data from NYS. ### Jul 6th, 2021 -- 2021 data doesn't directly contain # employee affected in the html table on the website, needs to parse the corresponding pdf to obtain the # affected. -- for data prior to 2021, each year is contained in a pdf and doesn't contain # affected, needs to reach out and ask if there is a better form of data/access company specific data. +- 2021 data doesn't directly contain # employee affected in the html table on the website, needs to parse the corresponding pdf to obtain the # affected. +- for data prior to 2021, each year is contained in a pdf and doesn't contain # affected, needs to reach out and ask if there is a better form of data/access company specific data. diff --git a/docs/scrapers/sc.md b/docs/scrapers/sc.md index f9273540..f93ae2e8 100644 --- a/docs/scrapers/sc.md +++ b/docs/scrapers/sc.md @@ -1,6 +1,6 @@ # South Carolina -## Site: +## Site: https://scworks.org/employer/employer-programs/at-risk-of-closing/layoff-notification-reports ### 10/8/2021 diff --git a/docs/scrapers/tx.md b/docs/scrapers/tx.md index e1b97ee7..9ccf6442 100644 --- a/docs/scrapers/tx.md +++ b/docs/scrapers/tx.md @@ -4,7 +4,7 @@ URL: https://www.twc.texas.gov/businesses/worker-adjustment-and-retraining-notif "To access older WARN notices or if you have any questions regarding WARN notices, contact TWC at warn.list@twc.texas.gov." ### 1/5/22 -Just want to document here that TX is an edge-case scraper, meaning that it has a hybrid strategy of both scraping the website and downloading a historical document, merging the two data. We've hosted the document at https://storage.googleapis.com/bln-data-public/warn-layoffs/tx_historical.xlsx +Just want to document here that TX is an edge-case scraper, meaning that it has a hybrid strategy of both scraping the website and downloading a historical document, merging the two data. We've hosted the document at https://storage.googleapis.com/bln-data-public/warn-layoffs/tx_historical.xlsx ### 8/19/21 **Cody:** "Hi Francisco, @@ -23,14 +23,14 @@ So it seems like our guess was correct, that the xtra data included in the histo ### 8/18/21 -Noticed historical data seemed more comprehensive than yearly data for 2019 (see WARN/#227). Sent an email bringing to their attention & asking which is the best source of the WARN data. +Noticed historical data seemed more comprehensive than yearly data for 2019 (see WARN/#227). Sent an email bringing to their attention & asking which is the best source of the WARN data. "Thanks so much for this data. I noticed something interesting that I thought you might like to be aware of. After looking at the differences between the WARN historical file you provided and the 2019 data available on the public-facing website, I noticed some interesting discrepancies: https://www.diffchecker.com/LBUWndL9I was surprised to find that the historical data includes about 12 additional rows.I think this trend might extend to other years as well. I am wondering if you all are aware of this, and if so, whether the historical data or the data on the website would be a more accurate source for Texas' WARN information." ### 8/16/2021 -Received a reply from TX from cisco.gamez@twc.texas.gov, with the following .xlsx file. +Received a reply from TX from cisco.gamez@twc.texas.gov, with the following .xlsx file. [Warns 01-01-89-09-30-19.xlsx](https://github.com/biglocalnews/WARN/files/6994307/Warns.01-01-89-09-30-19.xlsx) @@ -39,5 +39,5 @@ This file seems to overlap quite a bit with our website scraper, maybe the websi ### 8/13/21 -Sent an email to the provided email address requesting WARN from prior years. Received an email back from cisco.gamez@twc.state.tx.us with the historical WARN data. He also CC'd "Hession,Margaret" and +Sent an email to the provided email address requesting WARN from prior years. Received an email back from cisco.gamez@twc.state.tx.us with the historical WARN data. He also CC'd "Hession,Margaret" and "Bernsen,James" diff --git a/docs/scrapers/vt.md b/docs/scrapers/vt.md index bd0dd5db..919264e9 100644 --- a/docs/scrapers/vt.md +++ b/docs/scrapers/vt.md @@ -13,4 +13,4 @@ The Vermont Job Center site does not display Non-WARN layoff notices by default but our scraper captures both WARN and Non-WARN notices. Therefore, our record counts are significantly higher than what you'll get from a count of records displayed online. -[All data]: https://www.vermontjoblink.com/search/warn_lookups?utf8=%E2%9C%93&q%5Bemployer_name_cont%5D=&q%5Bmain_contact_contact_info_addresses_full_location_city_matches%5D=&q%5Bzipcode_code_start%5D=&q%5Bservice_delivery_area_id_eq%5D=&q%5Bnotice_on_gteq%5D=&q%5Bnotice_on_lteq%5D=&q%5Bnotice_eq%5D=true&commit=Search +[All data]: https://www.vermontjoblink.com/search/warn_lookups?utf8=%E2%9C%93&q%5Bemployer_name_cont%5D=&q%5Bmain_contact_contact_info_addresses_full_location_city_matches%5D=&q%5Bzipcode_code_start%5D=&q%5Bservice_delivery_area_id_eq%5D=&q%5Bnotice_on_gteq%5D=&q%5Bnotice_on_lteq%5D=&q%5Bnotice_eq%5D=true&commit=Search diff --git a/docs/scrapers/wi.md b/docs/scrapers/wi.md index 4ea1e6b6..d61f2077 100644 --- a/docs/scrapers/wi.md +++ b/docs/scrapers/wi.md @@ -15,7 +15,7 @@ Madison, WI 53707 ### July 23, 2021 Sent an email asking about the 'Y' column. Hopefully we get a response! Seems like it's valid data, just unmarked. -RESPONSE: +RESPONSE: Hi Cody – The unlabeled column you cite in your inquiry denotes whether an update to the notice was received at some point in time after receiving the initial notice. Thanks! diff --git a/docs/usage.md b/docs/usage.md index 7dba537f..cdfa8925 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -13,9 +13,9 @@ warn-scraper AK CT To use the `warn` library in Python, import a state's scraper and run it directly. ```python ->>> # Scrape Alaska to the default download directory ->>> from warn.scrapers import ak ->>> ak.scrape() +from warn.scrapers import ak + +ak.scrape() ``` ## Configuration diff --git a/setup.py b/setup.py index 465bde05..b60279a4 100644 --- a/setup.py +++ b/setup.py @@ -1,13 +1,14 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- """Configure the package for distribution.""" +import distutils.cmd import os +from pathlib import Path + +import jinja2 import us +from setuptools import find_packages, setup + import warn -import jinja2 -import distutils.cmd -from pathlib import Path -from setuptools import setup, find_packages def read(file_name): @@ -27,6 +28,7 @@ def version_scheme(version): If that issue is resolved, this method can be removed. """ import time + from setuptools_scm.version import guess_next_version if version.exact: diff --git a/tests/conftest.py b/tests/conftest.py index a1e65938..cfbb5b59 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -3,7 +3,6 @@ import pytest - # NOTE: To check if vcrpy/pytest-vcr # is using cassettes as opposed to making # live web requests, uncomment below @@ -67,7 +66,7 @@ def read_fixture(file_name): def file_contents(pth): """Read provided file path.""" - with open(pth, "r", newline="") as f: + with open(pth, newline="") as f: return f.read() diff --git a/tests/test_cache.py b/tests/test_cache.py index c7ed0e60..797313fd 100644 --- a/tests/test_cache.py +++ b/tests/test_cache.py @@ -1,11 +1,12 @@ -from unittest.mock import patch from pathlib import Path +from unittest.mock import patch import pytest -from .conftest import file_contents from warn.cache import Cache +from .conftest import file_contents + def test_default_cache_dir(): """Override the output of the expanduser method.""" diff --git a/tests/test_job_center_cache.py b/tests/test_job_center_cache.py index f26da2df..ee922eb1 100644 --- a/tests/test_job_center_cache.py +++ b/tests/test_job_center_cache.py @@ -1,4 +1,5 @@ from pathlib import Path + import pytest from warn.platforms.job_center.cache import Cache diff --git a/warn/__init__.py b/warn/__init__.py index c23969ca..528c47fe 100644 --- a/warn/__init__.py +++ b/warn/__init__.py @@ -1,5 +1,4 @@ from warn import utils from warn.runner import Runner - __all__ = ("Runner", "utils") diff --git a/warn/cache.py b/warn/cache.py index 57290b92..09e94935 100644 --- a/warn/cache.py +++ b/warn/cache.py @@ -1,6 +1,6 @@ import os +from os.path import expanduser, join from pathlib import Path -from os.path import join, expanduser class Cache: @@ -47,7 +47,7 @@ def read(self, name): """ path = Path(self.path, name) - with open(path, "r", newline="") as infile: + with open(path, newline="") as infile: return infile.read() def write(self, name, content): diff --git a/warn/cli.py b/warn/cli.py index 5f87806c..6c4bd5e9 100644 --- a/warn/cli.py +++ b/warn/cli.py @@ -4,8 +4,7 @@ import click -from . import Runner -from . import utils +from . import Runner, utils @click.command() diff --git a/warn/platforms/__init__.py b/warn/platforms/__init__.py index d19d9192..d35ce6ab 100644 --- a/warn/platforms/__init__.py +++ b/warn/platforms/__init__.py @@ -1,4 +1,3 @@ from .job_center.site import Site as JobCenterSite - __all__ = ("JobCenterSite",) diff --git a/warn/platforms/job_center/cache.py b/warn/platforms/job_center/cache.py index 81a45910..aeea9224 100644 --- a/warn/platforms/job_center/cache.py +++ b/warn/platforms/job_center/cache.py @@ -2,8 +2,8 @@ import re from warn.cache import Cache as BaseCache -from .urls import urls +from .urls import urls logger = logging.getLogger(__name__) diff --git a/warn/platforms/job_center/site.py b/warn/platforms/job_center/site.py index 871c7055..bab264df 100644 --- a/warn/platforms/job_center/site.py +++ b/warn/platforms/job_center/site.py @@ -1,15 +1,14 @@ import html as html_mod import logging -import requests import urllib.parse from datetime import date +import requests from bs4 import BeautifulSoup from .cache import Cache from .urls import urls - logger = logging.getLogger(__name__) diff --git a/warn/platforms/job_center/utils.py b/warn/platforms/job_center/utils.py index 787eeb12..94194967 100644 --- a/warn/platforms/job_center/utils.py +++ b/warn/platforms/job_center/utils.py @@ -3,9 +3,10 @@ from collections import OrderedDict from datetime import datetime as dt -from .site import Site as JobCenterSite from warn.utils import write_dict_rows_to_csv, write_rows_to_csv +from .site import Site as JobCenterSite + logger = logging.getLogger(__name__) @@ -116,7 +117,7 @@ def _dedupe(raw_csv, output_csv): """Create an ordered dict to discard dupes while preserving row order.""" data = OrderedDict() raw_count = 0 - with open(raw_csv, "r", newline="") as src: + with open(raw_csv, newline="") as src: for row in src: raw_count += 1 data[row] = row diff --git a/warn/runner.py b/warn/runner.py index 20a7e800..214cfc88 100644 --- a/warn/runner.py +++ b/warn/runner.py @@ -1,5 +1,4 @@ import logging - from importlib import import_module from pathlib import Path diff --git a/warn/scrapers/al.py b/warn/scrapers/al.py index 1e85da46..e2257122 100644 --- a/warn/scrapers/al.py +++ b/warn/scrapers/al.py @@ -1,6 +1,6 @@ +import logging import re import typing -import logging from pathlib import Path from bs4 import BeautifulSoup diff --git a/warn/scrapers/ca.py b/warn/scrapers/ca.py index 691182db..aca371a0 100644 --- a/warn/scrapers/ca.py +++ b/warn/scrapers/ca.py @@ -1,18 +1,17 @@ +import logging import re import shutil import typing -import logging from pathlib import Path -import requests import pdfplumber +import requests from bs4 import BeautifulSoup from openpyxl import load_workbook from .. import utils from ..cache import Cache - logger = logging.getLogger(__name__) @@ -40,7 +39,7 @@ def scrape( # Initially write to a temp file in cache_dir before # over-writing prior output_csv, so we can use append # mode while avoiding data corruption if script errors out - temp_csv = "{}/ca_temp.csv".format(cache_state) + temp_csv = f"{cache_state}/ca_temp.csv" # Create Cache instance for downstream operations cache = Cache(cache_dir) # Update pdfs and Excel files diff --git a/warn/scrapers/co.py b/warn/scrapers/co.py index 71f4a40b..91524a9d 100644 --- a/warn/scrapers/co.py +++ b/warn/scrapers/co.py @@ -1,11 +1,10 @@ import csv -import typing import logging +import typing from pathlib import Path from .. import utils - logger = logging.getLogger(__name__) # one set of fields per year, in order as listed on the year's document @@ -107,7 +106,7 @@ def scrape( intermediate_csv_path = f"{cache_state}/{num}.csv" # TODO try to read from cache first utils.download_file(url, intermediate_csv_path) - with open(intermediate_csv_path, "r", newline="") as csvfile: + with open(intermediate_csv_path, newline="") as csvfile: reader = csv.reader(csvfile) rows_to_add = [] for row_idx, row in enumerate(reader): diff --git a/warn/scrapers/ct.py b/warn/scrapers/ct.py index 0cae0600..3e00d6af 100644 --- a/warn/scrapers/ct.py +++ b/warn/scrapers/ct.py @@ -1,6 +1,6 @@ import csv -import typing import logging +import typing from pathlib import Path from bs4 import BeautifulSoup diff --git a/warn/scrapers/dc.py b/warn/scrapers/dc.py index d6da67e2..bae5682e 100755 --- a/warn/scrapers/dc.py +++ b/warn/scrapers/dc.py @@ -1,10 +1,10 @@ import csv -import typing import logging +import typing +from datetime import datetime from pathlib import Path from bs4 import BeautifulSoup -from datetime import datetime from .. import utils diff --git a/warn/scrapers/fl.py b/warn/scrapers/fl.py index 5fe9068d..352ae632 100755 --- a/warn/scrapers/fl.py +++ b/warn/scrapers/fl.py @@ -1,14 +1,14 @@ +import datetime +import logging import re import typing -import logging -import urllib3 -import datetime -from pathlib import Path from os.path import exists +from pathlib import Path +import pdfplumber import requests import tenacity -import pdfplumber +import urllib3 from bs4 import BeautifulSoup from .. import utils diff --git a/warn/scrapers/ia.py b/warn/scrapers/ia.py index 527d9b00..5c58dbd8 100644 --- a/warn/scrapers/ia.py +++ b/warn/scrapers/ia.py @@ -1,5 +1,5 @@ -import typing import logging +import typing from pathlib import Path import pandas as pd diff --git a/warn/scrapers/in.py b/warn/scrapers/in.py index 20f6a681..970b6b06 100644 --- a/warn/scrapers/in.py +++ b/warn/scrapers/in.py @@ -1,6 +1,6 @@ import csv -import typing import logging +import typing from pathlib import Path from bs4 import BeautifulSoup diff --git a/warn/scrapers/md.py b/warn/scrapers/md.py index 38ce5c44..5e2d16f2 100644 --- a/warn/scrapers/md.py +++ b/warn/scrapers/md.py @@ -1,7 +1,7 @@ -import re import csv -import typing import logging +import re +import typing from pathlib import Path from bs4 import BeautifulSoup diff --git a/warn/scrapers/mo.py b/warn/scrapers/mo.py index 23b88b15..f3492640 100644 --- a/warn/scrapers/mo.py +++ b/warn/scrapers/mo.py @@ -1,6 +1,6 @@ import csv -import typing import logging +import typing from pathlib import Path import pandas as pd diff --git a/warn/scrapers/ne.py b/warn/scrapers/ne.py index ac202035..fab3e6e1 100644 --- a/warn/scrapers/ne.py +++ b/warn/scrapers/ne.py @@ -1,6 +1,6 @@ import csv -import typing import logging +import typing from pathlib import Path import pandas as pd diff --git a/warn/scrapers/nj.py b/warn/scrapers/nj.py index 1a863806..d46af4b5 100644 --- a/warn/scrapers/nj.py +++ b/warn/scrapers/nj.py @@ -1,6 +1,6 @@ -import typing -import logging import itertools +import logging +import typing from pathlib import Path import pandas as pd diff --git a/warn/scrapers/ny.py b/warn/scrapers/ny.py index e8448785..94c0b21b 100644 --- a/warn/scrapers/ny.py +++ b/warn/scrapers/ny.py @@ -1,5 +1,5 @@ -import typing import logging +import typing from pathlib import Path import pandas as pd diff --git a/warn/scrapers/oh.py b/warn/scrapers/oh.py index ea66fd7b..b1a4916e 100644 --- a/warn/scrapers/oh.py +++ b/warn/scrapers/oh.py @@ -1,6 +1,6 @@ import csv -import typing import logging +import typing from pathlib import Path from bs4 import BeautifulSoup diff --git a/warn/scrapers/or.py b/warn/scrapers/or.py index 198c2888..3fc8091d 100644 --- a/warn/scrapers/or.py +++ b/warn/scrapers/or.py @@ -1,5 +1,5 @@ -import typing import logging +import typing from pathlib import Path import pandas as pd diff --git a/warn/scrapers/ri.py b/warn/scrapers/ri.py index b69e0922..4ce53327 100644 --- a/warn/scrapers/ri.py +++ b/warn/scrapers/ri.py @@ -1,6 +1,6 @@ import csv -import typing import logging +import typing from pathlib import Path from bs4 import BeautifulSoup diff --git a/warn/scrapers/sd.py b/warn/scrapers/sd.py index fbbc912c..d3bd767b 100644 --- a/warn/scrapers/sd.py +++ b/warn/scrapers/sd.py @@ -1,7 +1,7 @@ -import re import csv -import typing import logging +import re +import typing from pathlib import Path from bs4 import BeautifulSoup diff --git a/warn/scrapers/tx.py b/warn/scrapers/tx.py index 7e737201..caec7391 100644 --- a/warn/scrapers/tx.py +++ b/warn/scrapers/tx.py @@ -1,12 +1,12 @@ +import logging import re import typing -import logging from pathlib import Path +from zipfile import BadZipFile import pandas as pd -from xlrd import XLRDError from bs4 import BeautifulSoup -from zipfile import BadZipFile +from xlrd import XLRDError from .. import utils diff --git a/warn/scrapers/ut.py b/warn/scrapers/ut.py index a2c1f402..49ae09ed 100644 --- a/warn/scrapers/ut.py +++ b/warn/scrapers/ut.py @@ -1,6 +1,6 @@ import csv -import typing import logging +import typing from pathlib import Path from bs4 import BeautifulSoup diff --git a/warn/scrapers/va.py b/warn/scrapers/va.py index a7be9396..21d19fcd 100644 --- a/warn/scrapers/va.py +++ b/warn/scrapers/va.py @@ -1,5 +1,5 @@ -import typing import logging +import typing from pathlib import Path from bs4 import BeautifulSoup diff --git a/warn/scrapers/wa.py b/warn/scrapers/wa.py index b3d98892..394dbed0 100644 --- a/warn/scrapers/wa.py +++ b/warn/scrapers/wa.py @@ -1,6 +1,6 @@ import csv -import typing import logging +import typing from pathlib import Path import requests @@ -64,7 +64,7 @@ def scrape( try: formdata = { "__EVENTTARGET": "ucPSW$gvMain", - "__EVENTARGUMENT": "Page${}".format(page), + "__EVENTARGUMENT": f"Page${page}", "__VIEWSTATE": soup_content.find( "input", attrs={"name": "__VIEWSTATE"} )["value"], diff --git a/warn/scrapers/wi.py b/warn/scrapers/wi.py index eb774bc3..114a1baf 100644 --- a/warn/scrapers/wi.py +++ b/warn/scrapers/wi.py @@ -1,7 +1,7 @@ -import re import csv -import typing import logging +import re +import typing from pathlib import Path from bs4 import BeautifulSoup diff --git a/warn/utils.py b/warn/utils.py index c43ecc40..b77259c1 100644 --- a/warn/utils.py +++ b/warn/utils.py @@ -1,6 +1,6 @@ -import os import csv import logging +import os from pathlib import Path import requests