Skip to content

Commit

Permalink
Extend runner functionality, and clean up logging messages (#153)
Browse files Browse the repository at this point in the history
* remove print statements for easier query debugging

* feat: add support for markdown files outside of whale.

* fix: tests and finish refactor -> macros directory.

* chore: advance python version.
  • Loading branch information
rsyi authored Mar 5, 2021
1 parent ce95bba commit d9ad108
Show file tree
Hide file tree
Showing 8 changed files with 49 additions and 36 deletions.
2 changes: 1 addition & 1 deletion pipelines/run_script.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,4 @@
result = execute_markdown_sql_blocks(args.filename)
else: # Catchall for all sql-based extensions
result = execute_sql_file(args.filename, warehouse_name=args.warehouse_name)
print(result)
print(result)
2 changes: 1 addition & 1 deletion pipelines/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

setuptools.setup(
name="whale-pipelines",
version="1.5.1",
version="2.0.0b0",
author="Robert Yi",
author_email="[email protected]",
description="A pared-down metadata scraper + SQL runner.",
Expand Down
6 changes: 3 additions & 3 deletions pipelines/tests/unit/fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def mock_whale_dir(monkeypatch, tmp_path, request):
"MANIFEST_PATH": paths.MANIFEST_PATH,
"METRICS_PATH": paths.METRICS_PATH,
"METADATA_PATH": paths.METADATA_PATH,
"TEMPLATE_DIR": paths.TEMPLATE_DIR,
"MACROS_DIR": paths.MACROS_DIR,
}.items():
d = get_mocked_path(tmp_path, path)
monkeypatch.setattr(paths, attr_name, d)
Expand All @@ -31,12 +31,12 @@ def mock_whale_dir(monkeypatch, tmp_path, request):
d.parent.mkdir(parents=True, exist_ok=True)
elif d.is_file():
d.touch(exist_ok=True)
if attr_name in ["TEMPLATE_DIR"]:
if attr_name in ["MACROS_DIR"]:
monkeypatch.setattr(sql, attr_name, d)
return tmp_path


@pytest.mark.parametrize(mock_whale_dir, [True], indirect=True)
@pytest.fixture()
def mock_template_dir(mock_whale_dir):
return mock_whale_dir / paths.TEMPLATE_DIR.parts[-1]
return mock_whale_dir / paths.MACROS_DIR.parts[-1]
5 changes: 0 additions & 5 deletions pipelines/tests/unit/utils/test_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,10 @@
from mock import call, patch
import pytest

from whale.utils.paths import TEMPLATE_DIR
from whale.utils.sql import (
template_query,
validate_templates,
_validate_and_print_result,
)
from whale.utils import paths

from whale.utils import sql

VALID_TEMPLATE = "{% set main = 5 %}"
VALID_FILE_NAME = "valid.sql"
Expand Down
13 changes: 5 additions & 8 deletions pipelines/whale/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
find_blocks_and_process,
sections_from_markdown,
markdown_from_sections,
HEADER_SECTION,
UGC_SECTION,
)
from whale.utils.config import get_connection, read_connections
Expand Down Expand Up @@ -58,8 +59,8 @@ def execute_markdown_sql_blocks(filepath: str) -> str:
"""
database, _, _, _ = get_table_info_from_path(filepath)
sections = sections_from_markdown(filepath)
ugc_blob = sections[UGC_SECTION]
with open(filepath, "r") as f:
ugc_blob = "".join(f.readlines())

def run_and_append_results(sql, warehouse_name=None) -> str:
if EXECUTION_FLAG in sql:
Expand All @@ -69,24 +70,20 @@ def run_and_append_results(sql, warehouse_name=None) -> str:
else:
return sql

new_markdown_blob = markdown_from_sections(sections)
if EXECUTION_FLAG in ugc_blob:
ugc_blob = find_blocks_and_process(
ugc_blob,
run_and_append_results,
function_kwargs={"warehouse_name": database},
)

sections[UGC_SECTION] = ugc_blob
new_markdown_blob = markdown_from_sections(sections)

with open(filepath, "w") as f:
f.write(new_markdown_blob)
f.write(ugc_blob)

elif EXECUTION_FLAG.strip() in ugc_blob:
LOGGER.warning(f"{EXECUTION_FLAG.strip()} must be on its own line.")

return new_markdown_blob
return ugc_blob


def execute_sql_file(filepath: str, warehouse_name: str = None):
Expand Down
46 changes: 34 additions & 12 deletions pipelines/whale/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,24 +48,46 @@ def create_base_table_stub(file_path, database, cluster, schema, table):
safe_write(file_path, text_to_write)


def path_is_parent(parent_path, child_path):
# Smooth out relative path names, note: if you are concerned about symbolic
# links, you should use os.path.realpath too
parent_path = os.path.abspath(parent_path)
child_path = os.path.abspath(child_path)

# Compare the common path of the parent and child path with the common path
# of just the parent path. Using the commonpath method on just the parent
# path will regularise the path name in the same way as the comparison that
# deals with both paths, removing any trailing path separator
return os.path.commonpath([parent_path]) == \
os.path.commonpath([parent_path, child_path])


def get_table_info_from_path(
file_path,
):
database = os.path.dirname(file_path)
table_string = str(file_path).split(database + "/")[-1]

database = str(database).split("/")[-1]
table_components = table_string.split(".")
table = table_components[-2]
if len(table_components) == 4:
cluster = table_components[-4]
schema = table_components[-3]
elif len(table_components) == 3:
cluster = None
schema = table_components[-3]
if path_is_parent(paths.BASE_DIR, file_path):
database = os.path.dirname(file_path)
table_string = str(file_path).split(database + "/")[-1]

database = str(database).split("/")[-1]
table_components = table_string.split(".")
table = table_components[-2]
if len(table_components) == 4:
cluster = table_components[-4]
schema = table_components[-3]
elif len(table_components) == 3:
cluster = None
schema = table_components[-3]
else:
cluster = None
schema = None

else:
database = None
cluster = None
schema = None
table = None

return database, cluster, schema, table


Expand Down
1 change: 0 additions & 1 deletion pipelines/whale/utils/paths.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
TMP_MANIFEST_PATH = MANIFEST_DIR / "tmp_manifest.txt"
ETL_LOG_PATH = LOGS_DIR / "cron.log"
TABLE_COUNT_PATH = LOGS_DIR / "table_count.csv"
TEMPLATE_DIR = BASE_DIR / "templates/"


def get_subdir_without_whale(path):
Expand Down
10 changes: 5 additions & 5 deletions pipelines/whale/utils/sql.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from jinja2 import Environment, BaseLoader
from pathlib import Path
from termcolor import colored
from whale.utils.paths import TEMPLATE_DIR
from whale.utils.paths import MACROS_DIR

DEFAULT_TEMPLATE_NAME = "default.sql"
FAILING_COLOR = "red"
Expand All @@ -15,7 +15,7 @@ def template_query(query, connection_name=""):
templated query.
"""
# First determine the connection type, and look for a "connection_name.sql" file in templates.
template_file_path = TEMPLATE_DIR / (connection_name + ".sql")
template_file_path = MACROS_DIR / ((connection_name or "") + ".sql")
is_template_file_path_found = template_file_path.is_file()

if is_template_file_path_found:
Expand All @@ -31,8 +31,8 @@ def template_query(query, connection_name=""):


def validate_templates():
if TEMPLATE_DIR.is_dir():
for template_file in TEMPLATE_DIR.glob("**/*"):
if MACROS_DIR.is_dir():
for template_file in MACROS_DIR.glob("**/*"):
_validate_and_print_result(template_file)
else:
warning_text = textwrap.dedent(
Expand All @@ -48,7 +48,7 @@ def validate_templates():


def _validate_and_print_result(template_file: Path):
relative_file_path = template_file.relative_to(TEMPLATE_DIR)
relative_file_path = template_file.relative_to(MACROS_DIR)
with open(template_file, "r") as f:
template = f.read()
try:
Expand Down

0 comments on commit d9ad108

Please sign in to comment.