-
Notifications
You must be signed in to change notification settings - Fork 1.6k
/
base.py
396 lines (336 loc) · 14.4 KB
/
base.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
import abc
import itertools
import os
from typing import List, Dict, Any, Generic, Optional, TypeVar
from dbt.dataclass_schema import ValidationError
from dbt import utils
from dbt.clients.jinja import MacroGenerator
from dbt.context.providers import (
generate_parser_model_context,
generate_generate_name_macro_context,
)
from dbt.adapters.factory import get_adapter # noqa: F401
from dbt.clients.jinja import get_rendered
from dbt.config import Project, RuntimeConfig
from dbt.context.context_config import ContextConfig
from dbt.contracts.graph.manifest import Manifest
from dbt.contracts.graph.parsed import HasUniqueID, ManifestNodes
from dbt.contracts.graph.unparsed import UnparsedNode
from dbt.exceptions import ParsingException, validator_error_message, InternalException
from dbt import hooks
from dbt.node_types import NodeType
from dbt.parser.search import FileBlock
# internally, the parser may store a less-restrictive type that will be
# transformed into the final type. But it will have to be derived from
# ParsedNode to be operable.
FinalValue = TypeVar("FinalValue", bound=HasUniqueID)
IntermediateValue = TypeVar("IntermediateValue", bound=HasUniqueID)
IntermediateNode = TypeVar("IntermediateNode", bound=Any)
FinalNode = TypeVar("FinalNode", bound=ManifestNodes)
ConfiguredBlockType = TypeVar("ConfiguredBlockType", bound=FileBlock)
class BaseParser(Generic[FinalValue]):
def __init__(self, project: Project, manifest: Manifest) -> None:
self.project = project
self.manifest = manifest
@abc.abstractmethod
def parse_file(self, block: FileBlock) -> None:
pass
@abc.abstractproperty
def resource_type(self) -> NodeType:
pass
def generate_unique_id(self, resource_name: str, hash: Optional[str] = None) -> str:
"""Returns a unique identifier for a resource
An optional hash may be passed in to ensure uniqueness for edge cases"""
return ".".join(
filter(None, [self.resource_type, self.project.project_name, resource_name, hash])
)
class Parser(BaseParser[FinalValue], Generic[FinalValue]):
def __init__(
self,
project: Project,
manifest: Manifest,
root_project: RuntimeConfig,
) -> None:
super().__init__(project, manifest)
self.root_project = root_project
class RelationUpdate:
def __init__(self, config: RuntimeConfig, manifest: Manifest, component: str) -> None:
macro = manifest.find_generate_macro_by_name(
component=component,
root_project_name=config.project_name,
)
if macro is None:
raise InternalException(f"No macro with name generate_{component}_name found")
root_context = generate_generate_name_macro_context(macro, config, manifest)
self.updater = MacroGenerator(macro, root_context)
self.component = component
def __call__(self, parsed_node: Any, config_dict: Dict[str, Any]) -> None:
override = config_dict.get(self.component)
new_value = self.updater(override, parsed_node)
if isinstance(new_value, str):
new_value = new_value.strip()
setattr(parsed_node, self.component, new_value)
class ConfiguredParser(
Parser[FinalNode],
Generic[ConfiguredBlockType, IntermediateNode, FinalNode],
):
def __init__(
self,
project: Project,
manifest: Manifest,
root_project: RuntimeConfig,
) -> None:
super().__init__(project, manifest, root_project)
self._update_node_database = RelationUpdate(
manifest=manifest, config=root_project, component="database"
)
self._update_node_schema = RelationUpdate(
manifest=manifest, config=root_project, component="schema"
)
self._update_node_alias = RelationUpdate(
manifest=manifest, config=root_project, component="alias"
)
@abc.abstractclassmethod
def get_compiled_path(cls, block: ConfiguredBlockType) -> str:
pass
@abc.abstractmethod
def parse_from_dict(self, dict, validate=True) -> IntermediateNode:
pass
@abc.abstractproperty
def resource_type(self) -> NodeType:
pass
@property
def default_schema(self):
return self.root_project.credentials.schema
@property
def default_database(self):
return self.root_project.credentials.database
def get_fqn_prefix(self, path: str) -> List[str]:
no_ext = os.path.splitext(path)[0]
fqn = [self.project.project_name]
fqn.extend(utils.split_path(no_ext)[:-1])
return fqn
def get_fqn(self, path: str, name: str) -> List[str]:
"""Get the FQN for the node. This impacts node selection and config
application.
"""
fqn = self.get_fqn_prefix(path)
fqn.append(name)
return fqn
def _mangle_hooks(self, config):
"""Given a config dict that may have `pre-hook`/`post-hook` keys,
convert it from the yucky maybe-a-string, maybe-a-dict to a dict.
"""
# Like most of parsing, this is a horrible hack :(
for key in hooks.ModelHookType:
if key in config:
config[key] = [hooks.get_hook_dict(h) for h in config[key]]
def _create_error_node(
self,
name: str,
path: str,
original_file_path: str,
raw_sql: str,
) -> UnparsedNode:
"""If we hit an error before we've actually parsed a node, provide some
level of useful information by attaching this to the exception.
"""
# this is a bit silly, but build an UnparsedNode just for error
# message reasons
return UnparsedNode(
name=name,
resource_type=self.resource_type,
path=path,
original_file_path=original_file_path,
root_path=self.project.project_root,
package_name=self.project.project_name,
raw_sql=raw_sql,
)
def _create_parsetime_node(
self,
block: ConfiguredBlockType,
path: str,
config: ContextConfig,
fqn: List[str],
name=None,
**kwargs,
) -> IntermediateNode:
"""Create the node that will be passed in to the parser context for
"rendering". Some information may be partial, as it'll be updated by
config() and any ref()/source() calls discovered during rendering.
"""
if name is None:
name = block.name
dct = {
"alias": name,
"schema": self.default_schema,
"database": self.default_database,
"fqn": fqn,
"name": name,
"root_path": self.project.project_root,
"resource_type": self.resource_type,
"path": path,
"original_file_path": block.path.original_file_path,
"package_name": self.project.project_name,
"raw_sql": block.contents,
"unique_id": self.generate_unique_id(name),
"config": self.config_dict(config),
"checksum": block.file.checksum.to_dict(omit_none=True),
}
dct.update(kwargs)
try:
return self.parse_from_dict(dct, validate=True)
except ValidationError as exc:
msg = validator_error_message(exc)
# this is a bit silly, but build an UnparsedNode just for error
# message reasons
node = self._create_error_node(
name=block.name,
path=path,
original_file_path=block.path.original_file_path,
raw_sql=block.contents,
)
raise ParsingException(msg, node=node)
def _context_for(self, parsed_node: IntermediateNode, config: ContextConfig) -> Dict[str, Any]:
return generate_parser_model_context(parsed_node, self.root_project, self.manifest, config)
def render_with_context(self, parsed_node: IntermediateNode, config: ContextConfig):
# Given the parsed node and a ContextConfig to use during parsing,
# render the node's sql wtih macro capture enabled.
# Note: this mutates the config object when config calls are rendered.
context = self._context_for(parsed_node, config)
# this goes through the process of rendering, but just throws away
# the rendered result. The "macro capture" is the point?
get_rendered(parsed_node.raw_sql, context, parsed_node, capture_macros=True)
return context
# This is taking the original config for the node, converting it to a dict,
# updating the config with new config passed in, then re-creating the
# config from the dict in the node.
def update_parsed_node_config_dict(
self, parsed_node: IntermediateNode, config_dict: Dict[str, Any]
) -> None:
# Overwrite node config
final_config_dict = parsed_node.config.to_dict(omit_none=True)
final_config_dict.update({k.strip(): v for (k, v) in config_dict.items()})
# re-mangle hooks, in case we got new ones
self._mangle_hooks(final_config_dict)
parsed_node.config = parsed_node.config.from_dict(final_config_dict)
def update_parsed_node_name(
self, parsed_node: IntermediateNode, config_dict: Dict[str, Any]
) -> None:
self._update_node_database(parsed_node, config_dict)
self._update_node_schema(parsed_node, config_dict)
self._update_node_alias(parsed_node, config_dict)
def update_parsed_node_config(
self,
parsed_node: IntermediateNode,
config: ContextConfig,
context=None,
patch_config_dict=None,
) -> None:
"""Given the ContextConfig used for parsing and the parsed node,
generate and set the true values to use, overriding the temporary parse
values set in _build_intermediate_parsed_node.
"""
# build_config_dict takes the config_call_dict in the ContextConfig object
# and calls calculate_node_config to combine dbt_project configs and
# config calls from SQL files
config_dict = config.build_config_dict(patch_config_dict=patch_config_dict)
# Set tags on node provided in config blocks. Tags are additive, so even if
# config has been built before, we don't have to reset tags in the parsed_node.
model_tags = config_dict.get("tags", [])
for tag in model_tags:
if tag not in parsed_node.tags:
parsed_node.tags.append(tag)
# If we have meta in the config, copy to node level, for backwards
# compatibility with earlier node-only config.
if "meta" in config_dict and config_dict["meta"]:
parsed_node.meta = config_dict["meta"]
# unrendered_config is used to compare the original database/schema/alias
# values and to handle 'same_config' and 'same_contents' calls
parsed_node.unrendered_config = config.build_config_dict(rendered=False)
parsed_node.config_call_dict = config._config_call_dict
# do this once before we parse the node database/schema/alias, so
# parsed_node.config is what it would be if they did nothing
self.update_parsed_node_config_dict(parsed_node, config_dict)
# This updates the node database/schema/alias
self.update_parsed_node_name(parsed_node, config_dict)
# tests don't have hooks
if parsed_node.resource_type == NodeType.Test:
return
# at this point, we've collected our hooks. Use the node context to
# render each hook and collect refs/sources
hooks = list(itertools.chain(parsed_node.config.pre_hook, parsed_node.config.post_hook))
# skip context rebuilding if there aren't any hooks
if not hooks:
return
if not context:
context = self._context_for(parsed_node, config)
for hook in hooks:
get_rendered(hook.sql, context, parsed_node, capture_macros=True)
def initial_config(self, fqn: List[str]) -> ContextConfig:
config_version = min([self.project.config_version, self.root_project.config_version])
if config_version == 2:
return ContextConfig(
self.root_project,
fqn,
self.resource_type,
self.project.project_name,
)
else:
raise InternalException(
f"Got an unexpected project version={config_version}, " f"expected 2"
)
def config_dict(
self,
config: ContextConfig,
) -> Dict[str, Any]:
config_dict = config.build_config_dict(base=True)
self._mangle_hooks(config_dict)
return config_dict
def render_update(self, node: IntermediateNode, config: ContextConfig) -> None:
try:
context = self.render_with_context(node, config)
self.update_parsed_node_config(node, config, context=context)
except ValidationError as exc:
# we got a ValidationError - probably bad types in config()
msg = validator_error_message(exc)
raise ParsingException(msg, node=node) from exc
def add_result_node(self, block: FileBlock, node: ManifestNodes):
if node.config.enabled:
self.manifest.add_node(block.file, node)
else:
self.manifest.add_disabled(block.file, node)
def parse_node(self, block: ConfiguredBlockType) -> FinalNode:
compiled_path: str = self.get_compiled_path(block)
fqn = self.get_fqn(compiled_path, block.name)
config: ContextConfig = self.initial_config(fqn)
node = self._create_parsetime_node(
block=block,
path=compiled_path,
config=config,
fqn=fqn,
)
self.render_update(node, config)
result = self.transform(node)
self.add_result_node(block, result)
return result
@abc.abstractmethod
def parse_file(self, file_block: FileBlock) -> None:
pass
@abc.abstractmethod
def transform(self, node: IntermediateNode) -> FinalNode:
pass
class SimpleParser(
ConfiguredParser[ConfiguredBlockType, FinalNode, FinalNode],
Generic[ConfiguredBlockType, FinalNode],
):
def transform(self, node):
return node
class SQLParser(
ConfiguredParser[FileBlock, IntermediateNode, FinalNode], Generic[IntermediateNode, FinalNode]
):
def parse_file(self, file_block: FileBlock) -> None:
self.parse_node(file_block)
class SimpleSQLParser(SQLParser[FinalNode, FinalNode]):
def transform(self, node):
return node