Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

generate-dag-docs recipe move DAGs.md to documentation folder #3061

Merged
merged 10 commits into from
Sep 26, 2023
8 changes: 4 additions & 4 deletions catalog/justfile
Original file line number Diff line number Diff line change
Expand Up @@ -126,15 +126,15 @@ generate-dag-docs fail_on_diff="false":
just ../run \
{{ SERVICE }} \
"bash -c 'python catalog/utilities/dag_doc_gen/dag_doc_generation.py && chmod 666 /opt/airflow/catalog/utilities/dag_doc_gen/DAGs.md'"
# Move the file to the top level, since that level is not mounted into the container
mv utilities/dag_doc_gen/DAGs.md DAGs.md
# Move the file to the top level on the documentation folder, since that level is not mounted into the container
mv utilities/dag_doc_gen/DAGs.md ../documentation/catalog/reference/DAGs.md
echo -n "Running linting..."
# Linting step afterwards is necessary since the generated output differs greatly from what prettier expects
just ../lint prettier catalog/DAGs.md &>/dev/null || true
just ../lint prettier documentation/catalog/reference/DAGs.md &>/dev/null || true
echo "Done!"
if {{ fail_on_diff }}; then
set +e
git diff --exit-code DAGs.md
git diff --exit-code -- documentation/catalog/reference/DAGs.md
if [ $? -ne 0 ]; then
printf "\n\n\e[31m!! Changes found in DAG documentation, please run 'just generate-dag-docs' locally and commit difference !!\n\n"
exit 1
Expand Down
12 changes: 6 additions & 6 deletions catalog/tests/utilities/dag_doc_gen/test_dag_doc_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ class DagMock(NamedTuple):
[
(None, None),
("Sample simple doc", "Sample simple doc"),
("# Big header", "### Big header"),
("# Big header", "#### Big header"),
],
)
@pytest.mark.parametrize(
Expand Down Expand Up @@ -107,7 +107,7 @@ def test_get_dags_info(
),
False,
"""
## Special Name
### Special Name

| DAG ID | Schedule Interval |
| --- | --- |
Expand All @@ -126,7 +126,7 @@ def test_get_dags_info(
),
False,
"""
## Special Name
### Special Name

| DAG ID | Schedule Interval |
| --- | --- |
Expand All @@ -145,7 +145,7 @@ def test_get_dags_info(
),
True,
"""
## Special Name
### Special Name

| DAG ID | Schedule Interval | Dated | Media Type(s) |
| --- | --- | --- | --- |
Expand All @@ -167,7 +167,7 @@ def test_generate_dag_doc():
+ """\
1. [T1](#t1)

## T1
### T1

| DAG ID | Schedule Interval |
| --- | --- |
Expand All @@ -180,7 +180,7 @@ def test_generate_dag_doc():
1. [`b`](#b)


## `b`
### `b`

this one has a doc
"""
Expand Down
14 changes: 7 additions & 7 deletions catalog/utilities/dag_doc_gen/dag_doc_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,13 @@
- [DAGs by Type](#dags-by-type)
- [Individual DAG documentation](#dag-documentation)

# DAGs by Type
## DAGs by Type

The following are DAGs grouped by their primary tag:

"""
MIDAMBLE = """
# DAG documentation
## DAG documentation

The following is documentation associated with each DAG (where available):

Expand Down Expand Up @@ -94,7 +94,7 @@ def get_provider_workflows() -> dict[str, ProviderWorkflow]:

def fix_headings(doc: str) -> str:
"""
Increase all heading levels by 2.
Increase all heading levels by 3.

This is necessary to accommodate the embedded setting of the DAG docs
in the final Markdown output.
Expand All @@ -105,7 +105,7 @@ def fix_headings(doc: str) -> str:
for match in reversed(list(HEADING_PROG.finditer(doc))):
start, end = match.span()
original_heading = match.string[start:end]
new_heading = f"##{original_heading}"
new_heading = f"###{original_heading}"
doc = f"{doc[:start]}{new_heading}{doc[end:]}"

return doc
Expand Down Expand Up @@ -149,7 +149,7 @@ def generate_type_subsection(
) -> str:
"""Generate the documentation for a "DAGs by type" subsection."""
log.info(f"Building subsection for '{name}'")
text = f"## {name}\n\n"
text = f"### {name}\n\n"
# Columns for all DAGs
header = "| DAG ID | Schedule Interval |"
# Conditionally add the other columns for the provider-specific DAGs
Expand Down Expand Up @@ -183,7 +183,7 @@ def generate_type_subsection(
def generate_single_documentation(dag: DagInfo) -> str:
"""Generate the documentation for a single DAG."""
return f"""
## `{dag.dag_id}`
### `{dag.dag_id}`

{dag.doc}

Expand Down Expand Up @@ -217,7 +217,7 @@ def generate_dag_doc(dag_folder: Path = DAG_FOLDER) -> str:
# For each type we generate a sub-list of DAGs. We add a link to each generated
# sub-list as part of a table of contents, but defer adding the sub-lists until
# all are generated.
text += f" 1. [{name}](#{type_})\n"
text += f" 1. [{name}](#{type_.replace('_', '-')})\n"
dag_types.append(generate_type_subsection(name, dags, is_provider))

text += "\n" + "\n\n".join(dag_types)
Expand Down
Loading