-
Notifications
You must be signed in to change notification settings - Fork 180
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add support for loading dbt project from cloud store using Airflow Object Store #1148
Changes from 20 commits
e92f632
0522e38
151b4bb
1f9878a
7064047
40a253a
8cde3a8
c303d9b
979e13d
d9cf682
5b7a7ca
85ce534
ed6d9aa
4044dba
ad118b8
b1634c3
013d5c9
8c4ec34
511fb52
eb3ec85
897090e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -36,12 +36,8 @@ def test_init_with_manifest_path_and_project_path_succeeds(): | |
project_name in this case should be based on dbt_project_path | ||
""" | ||
project_config = ProjectConfig(dbt_project_path="/tmp/some-path", manifest_path="target/manifest.json") | ||
if AIRFLOW_IO_AVAILABLE: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This test is actually wrong since AIRFLOW_IO_AVAILABLE just means we are using airflow >=2.8.0, and not that we are using object storage. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @corsettigyg wasn't object storage introduced in Airflow 2.8? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @tatiana indeed, although using airflow 2.8 does not exactly mean we are using object storage, which was the assumption of this test, so I just fixed it. |
||
from airflow.io.path import ObjectStoragePath | ||
|
||
assert project_config.manifest_path == ObjectStoragePath("target/manifest.json") | ||
else: | ||
assert project_config.manifest_path == Path("target/manifest.json") | ||
assert project_config.manifest_path == Path("target/manifest.json") | ||
assert project_config.dbt_project_path == Path("/tmp/some-path") | ||
assert project_config.project_name == "some-path" | ||
|
||
|
||
|
@@ -324,10 +320,49 @@ def test_remote_manifest_path(manifest_path, given_manifest_conn_id, used_manife | |
from airflow.version import version as airflow_version | ||
|
||
error_msg = ( | ||
f"The manifest path {manifest_path} uses a remote file scheme, but the required Object Storage feature is " | ||
f"The path {manifest_path} uses a remote file scheme, but the required Object Storage feature is " | ||
f"unavailable in Airflow version {airflow_version}. Please upgrade to Airflow 2.8 or later." | ||
) | ||
with pytest.raises(CosmosValueError, match=error_msg): | ||
_ = ProjectConfig( | ||
dbt_project_path="/tmp/some-path", manifest_path=manifest_path, manifest_conn_id=given_manifest_conn_id | ||
) | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"dbt_project_path, given_dbt_project_conn_id, used_dbt_project_conn_id, project_name", | ||
[ | ||
("s3://cosmos-dbt-project-test/test-project", None, "aws_default", "custom-project-name"), | ||
("s3://cosmos-dbt-project-test/test-project", "aws_s3_conn", "aws_s3_conn", None), | ||
("gs://cosmos-dbt-project-test/test-project", None, "google_cloud_default", "custom-project-name"), | ||
("gs://cosmos-dbt-project-test/test-project", "gcp_gs_conn", "gcp_gs_conn", None), | ||
("abfs://cosmos-dbt-project-test/test-project", None, "wasb_default", "custom-project-name"), | ||
("abfs://cosmos-dbt-project-test/test-project", "azure_abfs_conn", "azure_abfs_conn", None), | ||
], | ||
) | ||
def test_remote_dbt_project_path(dbt_project_path, given_dbt_project_conn_id, used_dbt_project_conn_id, project_name): | ||
if AIRFLOW_IO_AVAILABLE: | ||
project_config = ProjectConfig( | ||
dbt_project_path=dbt_project_path, | ||
dbt_project_conn_id=given_dbt_project_conn_id, | ||
manifest_path="/some/manifest.json", | ||
project_name=project_name, | ||
) | ||
|
||
from airflow.io.path import ObjectStoragePath | ||
|
||
assert project_config.dbt_project_path == ObjectStoragePath(dbt_project_path, conn_id=used_dbt_project_conn_id) | ||
assert project_config.project_name == project_name if project_name else "test-project" | ||
else: | ||
from airflow.version import version as airflow_version | ||
|
||
error_msg = ( | ||
f"The path {dbt_project_path} uses a remote file scheme, but the required Object Storage feature is " | ||
f"unavailable in Airflow version {airflow_version}. Please upgrade to Airflow 2.8 or later." | ||
) | ||
with pytest.raises(CosmosValueError, match=error_msg): | ||
_ = ProjectConfig( | ||
dbt_project_path=dbt_project_path, | ||
dbt_project_conn_id=given_dbt_project_conn_id, | ||
manifest_path="/some/manifest.json", | ||
) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
honestly speaking I could not understand this even after reading the comment. both the self.dbt_project_path and self.models_path are already Path as specified in the init. I just reverted to its previous state