Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make fixtures files full-fledged members of manifest and enable partial parsing #9225

Merged
merged 17 commits into from
Dec 7, 2023
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .changes/unreleased/Features-20231205-200447.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Features
body: Make fixture files full-fledged parts of the manifest and enable partial parsing
time: 2023-12-05T20:04:47.117029-05:00
custom:
Author: gshank
Issue: "9067"
17 changes: 15 additions & 2 deletions core/dbt/contracts/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
Documentation = "docs"
Schema = "schema"
Hook = "hook" # not a real filetype, from dbt_project.yml
Fixture = "fixture"


parse_file_type_to_parser = {
Expand All @@ -35,6 +36,7 @@
ParseFileType.Documentation: "DocumentationParser",
ParseFileType.Schema: "SchemaParser",
ParseFileType.Hook: "HookParser",
ParseFileType.Fixture: "FixtureParser",
}


Expand Down Expand Up @@ -152,7 +154,6 @@
parse_file_type: Optional[ParseFileType] = None
# we don't want to serialize this
contents: Optional[str] = None
# the unique IDs contained in this file

@property
def file_id(self):
Expand All @@ -168,6 +169,8 @@
def _deserialize(cls, dct: Dict[str, int]):
if dct["parse_file_type"] == "schema":
sf = SchemaSourceFile.from_dict(dct)
elif dct["parse_file_type"] == "fixture":
sf = FixtureSourceFile.from_dict(dct)
else:
sf = SourceFile.from_dict(dct)
return sf
Expand Down Expand Up @@ -328,4 +331,14 @@
del self.env_vars[yaml_key]


AnySourceFile = Union[SchemaSourceFile, SourceFile]
@dataclass
class FixtureSourceFile(BaseSourceFile):
fixture: Optional[str] = None
unit_tests: List[str] = field(default_factory=list)

def add_unit_test(self, value):
if value not in self.unit_tests:
self.unit_tests.append(value)

Check warning on line 341 in core/dbt/contracts/files.py

View check run for this annotation

Codecov / codecov/patch

core/dbt/contracts/files.py#L340-L341

Added lines #L340 - L341 were not covered by tests


AnySourceFile = Union[SchemaSourceFile, SourceFile, FixtureSourceFile]
20 changes: 19 additions & 1 deletion core/dbt/contracts/graph/manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,17 @@
SourceDefinition,
UnpatchedSourceDefinition,
UnitTestDefinition,
UnitTestFixture,
)
from dbt.contracts.graph.unparsed import SourcePatch, NodeVersion, UnparsedVersion
from dbt.contracts.graph.manifest_upgrade import upgrade_manifest_json
from dbt.contracts.files import SourceFile, SchemaSourceFile, FileHash, AnySourceFile
from dbt.contracts.files import (
SourceFile,
SchemaSourceFile,
FileHash,
AnySourceFile,
FixtureSourceFile,
)
from dbt.contracts.util import (
BaseArtifactMetadata,
SourceKey,
Expand Down Expand Up @@ -802,6 +809,7 @@
semantic_models: MutableMapping[str, SemanticModel] = field(default_factory=dict)
unit_tests: MutableMapping[str, UnitTestDefinition] = field(default_factory=dict)
saved_queries: MutableMapping[str, SavedQuery] = field(default_factory=dict)
fixtures: MutableMapping[str, UnitTestFixture] = field(default_factory=dict)

_doc_lookup: Optional[DocLookup] = field(
default=None, metadata={"serialize": lambda x: None, "deserialize": lambda x: None}
Expand Down Expand Up @@ -1444,6 +1452,8 @@
source_file.exposures.append(node.unique_id)
if isinstance(node, Group):
source_file.groups.append(node.unique_id)
elif isinstance(source_file, FixtureSourceFile):
pass

Check warning on line 1456 in core/dbt/contracts/graph/manifest.py

View check run for this annotation

Codecov / codecov/patch

core/dbt/contracts/graph/manifest.py#L1456

Added line #L1456 was not covered by tests
else:
source_file.nodes.append(node.unique_id)

Expand Down Expand Up @@ -1486,6 +1496,8 @@
source_file.semantic_models.append(node.unique_id)
if isinstance(node, Exposure):
source_file.exposures.append(node.unique_id)
elif isinstance(source_file, FixtureSourceFile):
pass

Check warning on line 1500 in core/dbt/contracts/graph/manifest.py

View check run for this annotation

Codecov / codecov/patch

core/dbt/contracts/graph/manifest.py#L1500

Added line #L1500 was not covered by tests
else:
source_file.nodes.append(node.unique_id)

Expand All @@ -1505,6 +1517,12 @@
self.unit_tests[unit_test.unique_id] = unit_test
source_file.unit_tests.append(unit_test.unique_id)

def add_fixture(self, source_file: FixtureSourceFile, fixture: UnitTestFixture):
if fixture.unique_id in self.fixtures:
raise DuplicateResourceNameError(fixture, self.fixtures[fixture.unique_id])
self.fixtures[fixture.unique_id] = fixture
source_file.fixture = fixture.unique_id

def add_saved_query(self, source_file: SchemaSourceFile, saved_query: SavedQuery) -> None:
_check_duplicates(saved_query, self.saved_queries)
self.saved_queries[saved_query.unique_id] = saved_query
Expand Down
5 changes: 5 additions & 0 deletions core/dbt/contracts/graph/nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -1134,6 +1134,11 @@ def same_contents(self, other: Optional["UnitTestDefinition"]) -> bool:
return self.checksum == other.checksum


class UnitTestFixture(BaseNode):
resource_type: Literal[NodeType.Fixture]
rows: Optional[List[Dict[str, Any]]] = None


# ====================================
# Snapshot node
# ====================================
Expand Down
30 changes: 15 additions & 15 deletions core/dbt/graph/selector_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -727,21 +727,21 @@ def search(self, included_nodes: Set[UniqueId], selector: str) -> Iterator[Uniqu

manifest: WritableManifest = self.previous_state.manifest

for node, real_node in self.all_nodes(included_nodes):
for unique_id, node in self.all_nodes(included_nodes):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thank you for renaming this here -- this has been bugging me forever!

previous_node: Optional[SelectorTarget] = None

if node in manifest.nodes:
previous_node = manifest.nodes[node]
elif node in manifest.sources:
previous_node = manifest.sources[node]
elif node in manifest.exposures:
previous_node = manifest.exposures[node]
elif node in manifest.metrics:
previous_node = manifest.metrics[node]
elif node in manifest.semantic_models:
previous_node = manifest.semantic_models[node]
elif node in manifest.unit_tests:
previous_node = manifest.unit_tests[node]
if unique_id in manifest.nodes:
previous_node = manifest.nodes[unique_id]
elif unique_id in manifest.sources:
previous_node = manifest.sources[unique_id]
elif unique_id in manifest.exposures:
previous_node = manifest.exposures[unique_id]
elif unique_id in manifest.metrics:
previous_node = manifest.metrics[unique_id]
elif unique_id in manifest.semantic_models:
previous_node = manifest.semantic_models[unique_id]
elif unique_id in manifest.unit_tests:
previous_node = manifest.unit_tests[unique_id]

keyword_args = {}
if checker.__name__ in [
Expand All @@ -751,8 +751,8 @@ def search(self, included_nodes: Set[UniqueId], selector: str) -> Iterator[Uniqu
]:
keyword_args["adapter_type"] = adapter_type # type: ignore

if checker(previous_node, real_node, **keyword_args): # type: ignore
yield node
if checker(previous_node, node, **keyword_args): # type: ignore
yield unique_id


class ResultSelectorMethod(SelectorMethod):
Expand Down
1 change: 1 addition & 0 deletions core/dbt/node_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ class NodeType(StrEnum):
SavedQuery = "saved_query"
SemanticModel = "semantic_model"
Unit = "unit_test"
Fixture = "fixture"

@classmethod
def executable(cls) -> List["NodeType"]:
Expand Down
35 changes: 35 additions & 0 deletions core/dbt/parser/fixtures.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
from typing import Optional

from dbt.contracts.files import FixtureSourceFile
from dbt.contracts.graph.nodes import UnitTestFixture
from dbt.node_types import NodeType
from dbt.parser.base import Parser
from dbt.parser.search import FileBlock


class FixtureParser(Parser[UnitTestFixture]):
@property
def resource_type(self) -> NodeType:
return NodeType.Fixture

@classmethod
def get_compiled_path(cls, block: FileBlock):
# Is this necessary?
return block.path.relative_path

Check warning on line 18 in core/dbt/parser/fixtures.py

View check run for this annotation

Codecov / codecov/patch

core/dbt/parser/fixtures.py#L18

Added line #L18 was not covered by tests

def generate_unique_id(self, resource_name: str, _: Optional[str] = None) -> str:
return f"fixture.{self.project.project_name}.{resource_name}"

def parse_file(self, file_block: FileBlock):
assert isinstance(file_block.file, FixtureSourceFile)
unique_id = self.generate_unique_id(file_block.name)

fixture = UnitTestFixture(
name=file_block.name,
path=file_block.file.path.relative_path,
original_file_path=file_block.path.original_file_path,
package_name=self.project.project_name,
unique_id=unique_id,
resource_type=NodeType.Fixture,
)
self.manifest.add_fixture(file_block.file, fixture)
2 changes: 2 additions & 0 deletions core/dbt/parser/manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@
from dbt.parser.generic_test import GenericTestParser
from dbt.parser.singular_test import SingularTestParser
from dbt.parser.docs import DocumentationParser
from dbt.parser.fixtures import FixtureParser
from dbt.parser.hooks import HookParser
from dbt.parser.macros import MacroParser
from dbt.parser.models import ModelParser
Expand Down Expand Up @@ -471,6 +472,7 @@ def load(self) -> Manifest:
SeedParser,
DocumentationParser,
HookParser,
FixtureParser,
]
for project in self.all_projects.values():
if project.project_name not in project_parser_files:
Expand Down
29 changes: 29 additions & 0 deletions core/dbt/parser/partial.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,10 @@
if saved_source_file.parse_file_type == ParseFileType.Documentation:
self.delete_doc_node(saved_source_file)

# fixtures
if saved_source_file.parse_file_type == ParseFileType.Fixture:
self.delete_fixture_node(saved_source_file)

fire_event(PartialParsingFile(operation="deleted", file_id=file_id))

# Updates for non-schema files
Expand All @@ -293,6 +297,8 @@
self.update_macro_in_saved(new_source_file, old_source_file)
elif new_source_file.parse_file_type == ParseFileType.Documentation:
self.update_doc_in_saved(new_source_file, old_source_file)
elif new_source_file.parse_file_type == ParseFileType.Fixture:
self.update_fixture_in_saved(new_source_file, old_source_file)
else:
raise Exception(f"Invalid parse_file_type in source_file {file_id}")
fire_event(PartialParsingFile(operation="updated", file_id=file_id))
Expand Down Expand Up @@ -377,6 +383,13 @@
self.saved_files[new_source_file.file_id] = deepcopy(new_source_file)
self.add_to_pp_files(new_source_file)

def update_fixture_in_saved(self, new_source_file, old_source_file):
if self.already_scheduled_for_parsing(old_source_file):
return

Check warning on line 388 in core/dbt/parser/partial.py

View check run for this annotation

Codecov / codecov/patch

core/dbt/parser/partial.py#L388

Added line #L388 was not covered by tests
self.delete_fixture_node(old_source_file)
self.saved_files[new_source_file.file_id] = deepcopy(new_source_file)
self.add_to_pp_files(new_source_file)

def remove_mssat_file(self, source_file):
# nodes [unique_ids] -- SQL files
# There should always be a node for a SQL file
Expand Down Expand Up @@ -579,6 +592,20 @@
# Remove the file object
self.saved_manifest.files.pop(source_file.file_id)

def delete_fixture_node(self, source_file):
# remove fixtures from the "fixtures" dictionary
fixture_unique_id = source_file.fixture
self.saved_manifest.fixtures.pop(fixture_unique_id)
unit_tests = source_file.unit_tests.copy()
for unique_id in unit_tests:
unit_test = self.saved_manifest.unit_tests.pop(unique_id)
# schedule unit_test for parsing
self._schedule_for_parsing(
"unit_tests", unit_test, unit_test.name, self.delete_schema_unit_test
)
source_file.unit_tests.remove(unique_id)
self.saved_manifest.files.pop(source_file.file_id)

# Schema files -----------------------
# Changed schema files
def change_schema_file(self, file_id):
Expand Down Expand Up @@ -1021,6 +1048,8 @@
# Create a list of file_ids for source_files that need to be reparsed, and
# a dictionary of file_ids to yaml_keys to names.
for source_file in self.saved_files.values():
if source_file.parse_file_type == ParseFileType.Fixture:
continue
file_id = source_file.file_id
if not source_file.env_vars:
continue
Expand Down
14 changes: 13 additions & 1 deletion core/dbt/parser/read_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
FileHash,
AnySourceFile,
SchemaSourceFile,
FixtureSourceFile,
)
from dbt.config import Project
from dbt.dataclass_schema import dbtClassMixin
Expand Down Expand Up @@ -46,7 +47,13 @@ def load_source_file(
saved_files,
) -> Optional[AnySourceFile]:

sf_cls = SchemaSourceFile if parse_file_type == ParseFileType.Schema else SourceFile
if parse_file_type == ParseFileType.Schema:
sf_cls = SchemaSourceFile
elif parse_file_type == ParseFileType.Fixture:
sf_cls = FixtureSourceFile # type:ignore[assignment]
else:
sf_cls = SourceFile # type:ignore[assignment]

source_file = sf_cls(
path=path,
checksum=FileHash.empty(),
Expand Down Expand Up @@ -422,5 +429,10 @@ def get_file_types_for_project(project):
"extensions": [".yml", ".yaml"],
"parser": "SchemaParser",
},
ParseFileType.Fixture: {
"paths": project.fixture_paths,
"extensions": [".csv"],
"parser": "FixtureParser",
},
}
return file_types
32 changes: 31 additions & 1 deletion core/dbt/parser/unit_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def __init__(self, manifest, root_project, selected) -> None:
def load(self) -> Manifest:
for unique_id in self.selected:
if unique_id in self.manifest.unit_tests:
unit_test_case = self.manifest.unit_tests[unique_id]
unit_test_case: UnitTestDefinition = self.manifest.unit_tests[unique_id]
self.parse_unit_test_case(unit_test_case)
return self.unit_test_manifest

Expand Down Expand Up @@ -264,6 +264,9 @@ def parse(self) -> ParseResult:
config=unit_test_config,
schema=tested_model_node.schema,
)
# For partial parsing, we add the unique_id of the unit test definition to the
# fixture file records
self._add_unit_test_to_fixture_files(unit_test_definition)
# for calculating state:modified
unit_test_definition.build_unit_test_checksum(
self.schema_parser.project.project_root, self.schema_parser.project.fixture_paths
Expand All @@ -272,6 +275,33 @@ def parse(self) -> ParseResult:

return ParseResult()

def _add_unit_test_to_fixture_files(self, unit_test_definition):
for given in unit_test_definition.given:
if given.fixture:
# find fixture file object and store unit_test_definition unique_id
fixture_source_file = self.get_fixture_source_file(
given.fixture, self.project.project_name
)
fixture_source_file.unit_tests.append(unit_test_definition.unique_id)
if unit_test_definition.expect.fixture:
# find fixture file object and store unit_test_definition unique_id
fixture_source_file = self.get_fixture_source_file(
unit_test_definition.expect.fixture,
self.project.project_name,
)
fixture_source_file.unit_tests.append(unit_test_definition.unique_id)

def get_fixture_source_file(self, fixture_name: str, project_name: str):
gshank marked this conversation as resolved.
Show resolved Hide resolved
fixture_unique_id = f"fixture.{project_name}.{fixture_name}"
if fixture_unique_id in self.manifest.fixtures:
fixture = self.manifest.fixtures[fixture_unique_id]
fixture_source_file = self.manifest.files[fixture.file_id]
return fixture_source_file
else:
raise ParsingError(
f"File not found for fixture '{fixture_name}' in unit tests in {self.yaml.path.original_file_path}"
)

def _get_unit_test(self, data: Dict[str, Any]) -> UnparsedUnitTest:
try:
UnparsedUnitTest.validate(data)
Expand Down
Empty file removed core/dbt/task/parse.py
Empty file.
Loading
Loading