Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Expand our new directory creation system #332

Merged
merged 1 commit into from
Jan 20, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# Our download dir
.warn-scraper

# Data folders
logs/
old_data/
Expand Down
4 changes: 2 additions & 2 deletions warn/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,13 @@
@click.option(
"--data-dir",
default=utils.WARN_DATA_DIR,
type=click.Path(exists=True),
type=click.Path(),
help="The Path were the results will be saved",
)
@click.option(
"--cache-dir",
default=utils.WARN_CACHE_DIR,
type=click.Path(exists=True),
type=click.Path(),
help="The Path where results can be cached",
)
@click.option(
Expand Down
5 changes: 3 additions & 2 deletions warn/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
from importlib import import_module
from pathlib import Path

from . import utils

logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -31,8 +33,7 @@ def __init__(self, data_dir: Path, cache_dir: Path):

def setup(self):
"""Create the necessary directories."""
for d in [self.cache_dir, self.data_dir]:
Path(d).mkdir(parents=True, exist_ok=True)
utils.create_directory(self.data_dir, self.cache_dir)

def scrape(self, state):
"""Run the scraper for the provided state."""
Expand Down
2 changes: 1 addition & 1 deletion warn/scrapers/ca.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def scrape(
# Initially write to a temp file in cache_dir before
# over-writing prior output_csv, so we can use append
# mode while avoiding data corruption if script errors out
temp_csv = f"{cache_state}/ca_temp.csv"
temp_csv = cache_state / "ca_temp.csv"
# Create Cache instance for downstream operations
cache = Cache(cache_dir)
# Update pdfs and Excel files
Expand Down
2 changes: 1 addition & 1 deletion warn/scrapers/mo.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def scrape(
Returns: the Path where the file is written
"""
output_csv = data_dir / "mo.csv"
raw_csv = f"{cache_dir}/mo_raw.csv"
raw_csv = cache_dir / "mo_raw.csv"
years = range(2021, 2014, -1)
url = "https://jobs.mo.gov/warn2021"

Expand Down
27 changes: 23 additions & 4 deletions warn/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,29 @@
WARN_DATA_DIR = WARN_OUTPUT_DIR / "exports"
WARN_LOG_DIR = WARN_OUTPUT_DIR / "logs"

# Ensure needed directories exist
for localdir in [WARN_CACHE_DIR, WARN_DATA_DIR, WARN_LOG_DIR]:
os.makedirs(localdir, exist_ok=True)

def create_directory(*args: Path):
"""Create the filesystem directories for the provided Path objects."""
# Ensure needed directories exist
for path in args:
# Get the directory path
if path.is_file():
# If it's a file, take the parent
directory = path.parent
else:
# Other, assume it's a directory and we're good
directory = path

def write_rows_to_csv(rows, output_path, mode="w"):
# If the path already exists, we're good
if directory.exists():
return

# If not, lets make it
logger.debug(f"Creating directory at {directory}")
directory.mkdir(parents=True)


def write_rows_to_csv(rows: list, output_path: Path, mode="w"):
"""
Write the provided list to the provided path as comma-separated values.

Expand All @@ -39,6 +56,7 @@ def write_rows_to_csv(rows, output_path, mode="w"):
Keyword arguments:
mode -- the mode to be used when opening the file (default 'w')
"""
create_directory(output_path)
logger.debug(f"Writing {len(rows)} rows to {output_path}")
with open(output_path, mode, newline="") as f:
writer = csv.writer(f)
Expand All @@ -58,6 +76,7 @@ def write_dict_rows_to_csv(output_path, headers, rows, mode="w", extrasaction="r
mode -- the mode to be used when opening the file (default 'w')
extrasaction -- what to do if the if a field isn't in the headers (default 'raise')
"""
create_directory(output_path)
logger.debug(f"Writing {len(rows)} rows to {output_path}")
with open(output_path, mode, newline="") as f:
# Create the writer object
Expand Down