From 32dfd04a52134d9260be3939f488547cc42cce37 Mon Sep 17 00:00:00 2001
From: Michelle Ark <michelle.ark@dbtlabs.com>
Date: Thu, 25 Jan 2024 15:41:40 -0500
Subject: [PATCH 1/7] fix seed selection: apply selected_ids to catalog
 construction

---
 core/dbt/task/docs/generate.py         | 25 +++++++++++++++++--------
 tests/functional/docs/test_generate.py | 21 ++++++++++++++++-----
 2 files changed, 33 insertions(+), 13 deletions(-)

diff --git a/core/dbt/task/docs/generate.py b/core/dbt/task/docs/generate.py
index acd68cb37ee..2d296b225e5 100644
--- a/core/dbt/task/docs/generate.py
+++ b/core/dbt/task/docs/generate.py
@@ -13,6 +13,8 @@
 from dbt.task.compile import CompileTask
 
 from dbt.adapters.factory import get_adapter
+
+from dbt.graph.graph import UniqueId
 from dbt.contracts.graph.nodes import ResultNode
 from dbt.contracts.graph.manifest import Manifest
 from dbt.artifacts.schemas.results import NodeStatus
@@ -112,7 +114,7 @@ def add_column(self, data: PrimitiveDict):
         table.columns[column.name] = column
 
     def make_unique_id_map(
-        self, manifest: Manifest
+        self, manifest: Manifest, selected_node_ids: Set[UniqueId]
     ) -> Tuple[Dict[str, CatalogTable], Dict[str, CatalogTable]]:
         nodes: Dict[str, CatalogTable] = {}
         sources: Dict[str, CatalogTable] = {}
@@ -123,7 +125,8 @@ def make_unique_id_map(
             key = table.key()
             if key in node_map:
                 unique_id = node_map[key]
-                nodes[unique_id] = table.replace(unique_id=unique_id)
+                if unique_id in selected_node_ids:
+                    nodes[unique_id] = table.replace(unique_id=unique_id)
 
             unique_ids = source_map.get(table.key(), set())
             for unique_id in unique_ids:
@@ -134,7 +137,8 @@ def make_unique_id_map(
                         table.to_dict(omit_none=True),
                     )
                 else:
-                    sources[unique_id] = table.replace(unique_id=unique_id)
+                    if unique_id in selected_node_ids:
+                        sources[unique_id] = table.replace(unique_id=unique_id)
         return nodes, sources
 
 
@@ -238,6 +242,7 @@ def run(self) -> CatalogArtifact:
         if self.manifest is None:
             raise DbtInternalError("self.manifest was None in run!")
 
+        selected_node_ids: Set[UniqueId] = set()
         if self.args.empty_catalog:
             catalog_table: agate.Table = agate.Table([])
             exceptions: List[Exception] = []
@@ -251,14 +256,18 @@ def run(self) -> CatalogArtifact:
                     selected_node_ids = self.job_queue.get_selected_nodes()
                     selected_nodes = self._get_nodes_from_ids(self.manifest, selected_node_ids)
 
-                    source_ids = self._get_nodes_from_ids(
+                    source_nodes = self._get_nodes_from_ids(
                         self.manifest, self.manifest.sources.keys()
                     )
-                    selected_nodes.extend(source_ids)
+
+                    selected_node_ids.update(
+                        {UniqueId(unique_id) for unique_id in self.manifest.sources.keys()}
+                    )
+                    selected_nodes.extend(source_nodes)
 
                     relations = {
-                        adapter.Relation.create_from(adapter.config, node_id)
-                        for node_id in selected_nodes
+                        adapter.Relation.create_from(adapter.config, node)
+                        for node in selected_nodes
                     }
 
                 # This generates the catalog as an agate.Table
@@ -285,7 +294,7 @@ def run(self) -> CatalogArtifact:
         if exceptions:
             errors = [str(e) for e in exceptions]
 
-        nodes, sources = catalog.make_unique_id_map(self.manifest)
+        nodes, sources = catalog.make_unique_id_map(self.manifest, selected_node_ids)
         results = self.get_catalog_results(
             nodes=nodes,
             sources=sources,
diff --git a/tests/functional/docs/test_generate.py b/tests/functional/docs/test_generate.py
index 1047600a820..e1880689e72 100644
--- a/tests/functional/docs/test_generate.py
+++ b/tests/functional/docs/test_generate.py
@@ -74,6 +74,7 @@ def test_select_limits_no_match(self, project):
         run_dbt(["run"])
         catalog = run_dbt(["docs", "generate", "--select", "my_missing_model"])
         assert len(catalog.nodes) == 0
+        assert len(catalog.sources) == 0
 
 
 class TestGenerateCatalogWithSources(TestBaseGenerate):
@@ -90,11 +91,21 @@ def test_catalog_with_sources(self, project):
 class TestGenerateSelectSource(TestBaseGenerate):
     def test_select_source(self, project):
         run_dbt(["build"])
-        catalog = run_dbt(["docs", "generate", "--select", "source:test.my_seed.sample_seed"])
 
-        # 2 seeds
-        # TODO: Filtering doesn't work for seeds
-        assert len(catalog.nodes) == 2
         # 2 sources
-        # TODO: Filtering doesn't work for sources
+        catalog = run_dbt(["docs", "generate", "--select", "source:test.my_seed.sample_seed"])
+        # TODO: Filtering doesn't work for sources (should be 1)
+        assert len(catalog.sources) == 2
+
+
+class TestGenerateSelectSeed(TestBaseGenerate):
+    def test_select_seed(self, project):
+        run_dbt(["build"])
+
+        # 2 seeds, 1 selected
+        catalog = run_dbt(["docs", "generate", "--select", "sample_seed"])
+        assert len(catalog.nodes) == 1
+        assert "seed.test.sample_seed" in catalog.nodes
+
+        # TODO: Filtering doesn't work for sources (should be 0)
         assert len(catalog.sources) == 2

From 5149cc4cbe8125ac654de432fd1725cc00e93f54 Mon Sep 17 00:00:00 2001
From: Michelle Ark <michelle.ark@dbtlabs.com>
Date: Thu, 25 Jan 2024 15:54:49 -0500
Subject: [PATCH 2/7] fix source selection in docs generate: apply source
 selection

---
 core/dbt/task/docs/generate.py         | 56 ++++++++++++++++----------
 tests/functional/docs/test_generate.py | 13 +++---
 2 files changed, 42 insertions(+), 27 deletions(-)

diff --git a/core/dbt/task/docs/generate.py b/core/dbt/task/docs/generate.py
index 2d296b225e5..efc6b0bde78 100644
--- a/core/dbt/task/docs/generate.py
+++ b/core/dbt/task/docs/generate.py
@@ -32,7 +32,7 @@
 from dbt_common.exceptions import DbtInternalError
 from dbt.exceptions import AmbiguousCatalogMatchError
 from dbt.graph import ResourceTypeSelector
-from dbt.node_types import EXECUTABLE_NODE_TYPES
+from dbt.node_types import EXECUTABLE_NODE_TYPES, NodeType
 from dbt_common.events.functions import fire_event
 from dbt.adapters.events.types import (
     WriteCatalogFailure,
@@ -256,14 +256,15 @@ def run(self) -> CatalogArtifact:
                     selected_node_ids = self.job_queue.get_selected_nodes()
                     selected_nodes = self._get_nodes_from_ids(self.manifest, selected_node_ids)
 
-                    source_nodes = self._get_nodes_from_ids(
-                        self.manifest, self.manifest.sources.keys()
+                    # Source selection is handled separately from main job_queue selection because
+                    # SourceDefinition nodes cannot be safely compiled / run by the CompileRunner / CompileTask,
+                    # but can still be included in the catalog
+                    selected_source_ids = self._get_selected_source_ids()
+                    selected_source_nodes = self._get_nodes_from_ids(
+                        self.manifest, selected_source_ids
                     )
-
-                    selected_node_ids.update(
-                        {UniqueId(unique_id) for unique_id in self.manifest.sources.keys()}
-                    )
-                    selected_nodes.extend(source_nodes)
+                    selected_node_ids.update(selected_source_ids)
+                    selected_nodes.extend(selected_source_nodes)
 
                     relations = {
                         adapter.Relation.create_from(adapter.config, node)
@@ -331,19 +332,6 @@ def run(self) -> CatalogArtifact:
         fire_event(CatalogWritten(path=os.path.abspath(catalog_path)))
         return results
 
-    @staticmethod
-    def _get_nodes_from_ids(manifest: Manifest, node_ids: Iterable[str]) -> List[ResultNode]:
-        selected: List[ResultNode] = []
-        for unique_id in node_ids:
-            if unique_id in manifest.nodes:
-                node = manifest.nodes[unique_id]
-                if node.is_relational and not node.is_ephemeral_model:
-                    selected.append(node)
-            elif unique_id in manifest.sources:
-                source = manifest.sources[unique_id]
-                selected.append(source)
-        return selected
-
     def get_node_selector(self) -> ResourceTypeSelector:
         if self.manifest is None or self.graph is None:
             raise DbtInternalError("manifest and graph must be set to perform node selection")
@@ -381,3 +369,29 @@ def interpret_results(self, results: Optional[CatalogResults]) -> bool:
             return True
 
         return super().interpret_results(compile_results)
+
+    @staticmethod
+    def _get_nodes_from_ids(manifest: Manifest, node_ids: Iterable[str]) -> List[ResultNode]:
+        selected: List[ResultNode] = []
+        for unique_id in node_ids:
+            if unique_id in manifest.nodes:
+                node = manifest.nodes[unique_id]
+                if node.is_relational and not node.is_ephemeral_model:
+                    selected.append(node)
+            elif unique_id in manifest.sources:
+                source = manifest.sources[unique_id]
+                selected.append(source)
+        return selected
+
+    def _get_selected_source_ids(self) -> Set[UniqueId]:
+        if self.manifest is None or self.graph is None:
+            raise DbtInternalError("manifest and graph must be set to perform node selection")
+
+        source_selector = ResourceTypeSelector(
+            graph=self.graph,
+            manifest=self.manifest,
+            previous_state=self.previous_state,
+            resource_types=[NodeType.Source],
+        )
+
+        return source_selector.get_graph_queue(self.get_selection_spec()).get_selected_nodes()
diff --git a/tests/functional/docs/test_generate.py b/tests/functional/docs/test_generate.py
index e1880689e72..a62fab73f82 100644
--- a/tests/functional/docs/test_generate.py
+++ b/tests/functional/docs/test_generate.py
@@ -92,10 +92,12 @@ class TestGenerateSelectSource(TestBaseGenerate):
     def test_select_source(self, project):
         run_dbt(["build"])
 
-        # 2 sources
+        # 2 sources, 1 selected
         catalog = run_dbt(["docs", "generate", "--select", "source:test.my_seed.sample_seed"])
-        # TODO: Filtering doesn't work for sources (should be 1)
-        assert len(catalog.sources) == 2
+        assert len(catalog.sources) == 1
+        assert "test.my_seed.sample_seed" in catalog.sources
+        # no nodes selected
+        assert len(catalog.nodes) == 0
 
 
 class TestGenerateSelectSeed(TestBaseGenerate):
@@ -106,6 +108,5 @@ def test_select_seed(self, project):
         catalog = run_dbt(["docs", "generate", "--select", "sample_seed"])
         assert len(catalog.nodes) == 1
         assert "seed.test.sample_seed" in catalog.nodes
-
-        # TODO: Filtering doesn't work for sources (should be 0)
-        assert len(catalog.sources) == 2
+        # no sources selected
+        assert len(catalog.sources) == 0

From 518e3bcb21febc534a7fa7715ff64d16ed2e8af6 Mon Sep 17 00:00:00 2001
From: Michelle Ark <michelle.ark@dbtlabs.com>
Date: Thu, 25 Jan 2024 15:56:48 -0500
Subject: [PATCH 3/7] changelog entry

---
 .changes/unreleased/Fixes-20240125-155641.yaml | 6 ++++++
 1 file changed, 6 insertions(+)
 create mode 100644 .changes/unreleased/Fixes-20240125-155641.yaml

diff --git a/.changes/unreleased/Fixes-20240125-155641.yaml b/.changes/unreleased/Fixes-20240125-155641.yaml
new file mode 100644
index 00000000000..c5e3b45d06f
--- /dev/null
+++ b/.changes/unreleased/Fixes-20240125-155641.yaml
@@ -0,0 +1,6 @@
+kind: Fixes
+body: Fix seed and source selection in `dbt docs generate`
+time: 2024-01-25T15:56:41.557934-05:00
+custom:
+  Author: michelleark
+  Issue: "9161"

From cd2bafc82fdc65e015cc1873b96e1d9e4f647b36 Mon Sep 17 00:00:00 2001
From: Michelle Ark <michelle.ark@dbtlabs.com>
Date: Thu, 25 Jan 2024 16:07:28 -0500
Subject: [PATCH 4/7] more precise assertion in
 TestGenerateSelectSource.test_select_source

---
 tests/functional/docs/test_generate.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/functional/docs/test_generate.py b/tests/functional/docs/test_generate.py
index a62fab73f82..3ba7c8929e0 100644
--- a/tests/functional/docs/test_generate.py
+++ b/tests/functional/docs/test_generate.py
@@ -95,7 +95,7 @@ def test_select_source(self, project):
         # 2 sources, 1 selected
         catalog = run_dbt(["docs", "generate", "--select", "source:test.my_seed.sample_seed"])
         assert len(catalog.sources) == 1
-        assert "test.my_seed.sample_seed" in catalog.sources
+        assert "source.test.my_seed.sample_seed" in catalog.sources
         # no nodes selected
         assert len(catalog.nodes) == 0
 

From 5634453018853d12530b6167609448e0c699eba5 Mon Sep 17 00:00:00 2001
From: Michelle Ark <michelle.ark@dbtlabs.com>
Date: Thu, 25 Jan 2024 16:10:54 -0500
Subject: [PATCH 5/7] restore default behaviour for make_unique_id_map

---
 core/dbt/task/docs/generate.py | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/core/dbt/task/docs/generate.py b/core/dbt/task/docs/generate.py
index efc6b0bde78..f56868093cb 100644
--- a/core/dbt/task/docs/generate.py
+++ b/core/dbt/task/docs/generate.py
@@ -114,8 +114,13 @@ def add_column(self, data: PrimitiveDict):
         table.columns[column.name] = column
 
     def make_unique_id_map(
-        self, manifest: Manifest, selected_node_ids: Set[UniqueId]
+        self, manifest: Manifest, selected_node_ids: Optional[Set[UniqueId]] = None
     ) -> Tuple[Dict[str, CatalogTable], Dict[str, CatalogTable]]:
+        """
+        Create mappings between CatalogKeys and CatalogTables for nodes and sources, filteded by selected_node_ids.
+
+        By default, selected_node_ids is None and all nodes and sources defined in the manifest are included in the mappings.
+        """
         nodes: Dict[str, CatalogTable] = {}
         sources: Dict[str, CatalogTable] = {}
 
@@ -125,7 +130,7 @@ def make_unique_id_map(
             key = table.key()
             if key in node_map:
                 unique_id = node_map[key]
-                if unique_id in selected_node_ids:
+                if selected_node_ids is None or unique_id in selected_node_ids:
                     nodes[unique_id] = table.replace(unique_id=unique_id)
 
             unique_ids = source_map.get(table.key(), set())
@@ -137,7 +142,7 @@ def make_unique_id_map(
                         table.to_dict(omit_none=True),
                     )
                 else:
-                    if unique_id in selected_node_ids:
+                    if selected_node_ids is None or unique_id in selected_node_ids:
                         sources[unique_id] = table.replace(unique_id=unique_id)
         return nodes, sources
 
@@ -242,10 +247,11 @@ def run(self) -> CatalogArtifact:
         if self.manifest is None:
             raise DbtInternalError("self.manifest was None in run!")
 
-        selected_node_ids: Set[UniqueId] = set()
+        selected_node_ids: Optional[Set[UniqueId]] = None
         if self.args.empty_catalog:
             catalog_table: agate.Table = agate.Table([])
             exceptions: List[Exception] = []
+            selected_node_ids = set()
         else:
             adapter = get_adapter(self.config)
             with adapter.connection_named("generate_catalog"):

From e6ffbfd68be34fa57241c678986139f856a11fc9 Mon Sep 17 00:00:00 2001
From: Michelle Ark <michelle.ark@dbtlabs.com>
Date: Mon, 29 Jan 2024 17:59:31 -0500
Subject: [PATCH 6/7] typos

---
 core/dbt/task/docs/generate.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/core/dbt/task/docs/generate.py b/core/dbt/task/docs/generate.py
index f56868093cb..1712c17a9d0 100644
--- a/core/dbt/task/docs/generate.py
+++ b/core/dbt/task/docs/generate.py
@@ -117,7 +117,7 @@ def make_unique_id_map(
         self, manifest: Manifest, selected_node_ids: Optional[Set[UniqueId]] = None
     ) -> Tuple[Dict[str, CatalogTable], Dict[str, CatalogTable]]:
         """
-        Create mappings between CatalogKeys and CatalogTables for nodes and sources, filteded by selected_node_ids.
+        Create mappings between CatalogKeys and CatalogTables for nodes and sources, filtered by selected_node_ids.
 
         By default, selected_node_ids is None and all nodes and sources defined in the manifest are included in the mappings.
         """
@@ -264,7 +264,7 @@ def run(self) -> CatalogArtifact:
 
                     # Source selection is handled separately from main job_queue selection because
                     # SourceDefinition nodes cannot be safely compiled / run by the CompileRunner / CompileTask,
-                    # but can still be included in the catalog
+                    # but should still be included in the catalog based on the selection spec
                     selected_source_ids = self._get_selected_source_ids()
                     selected_source_nodes = self._get_nodes_from_ids(
                         self.manifest, selected_source_ids

From 2b4b810db680f8807fdab86ef1886ada91c76576 Mon Sep 17 00:00:00 2001
From: Michelle Ark <michelle.ark@dbtlabs.com>
Date: Mon, 29 Jan 2024 18:16:26 -0500
Subject: [PATCH 7/7] improve test setup to decouple seeds & sources + explicit
 testing for node and source w same relation

---
 core/dbt/task/docs/generate.py         |  5 +--
 tests/functional/docs/test_generate.py | 62 ++++++++++++++++++++++----
 2 files changed, 55 insertions(+), 12 deletions(-)

diff --git a/core/dbt/task/docs/generate.py b/core/dbt/task/docs/generate.py
index 5043a1d54cb..36fa394d041 100644
--- a/core/dbt/task/docs/generate.py
+++ b/core/dbt/task/docs/generate.py
@@ -141,9 +141,8 @@ def make_unique_id_map(
                         sources[unique_id].to_dict(omit_none=True),
                         table.to_dict(omit_none=True),
                     )
-                else:
-                    if selected_node_ids is None or unique_id in selected_node_ids:
-                        sources[unique_id] = table.replace(unique_id=unique_id)
+                elif selected_node_ids is None or unique_id in selected_node_ids:
+                    sources[unique_id] = table.replace(unique_id=unique_id)
         return nodes, sources
 
 
diff --git a/tests/functional/docs/test_generate.py b/tests/functional/docs/test_generate.py
index de9163ca743..4129fc0abb5 100644
--- a/tests/functional/docs/test_generate.py
+++ b/tests/functional/docs/test_generate.py
@@ -18,12 +18,13 @@
 
 sample_config = """
 sources:
-  - name: my_seed
+  - name: my_source_schema
     schema: "{{ target.schema }}"
     tables:
-      - name: sample_seed
-      - name: second_seed
-      - name: fake_seed
+      - name: sample_source
+      - name: second_source
+      - name: non_existent_source
+      - name: source_from_seed
 """
 
 
@@ -81,7 +82,13 @@ def test_select_limits_no_match(self, project):
 
 class TestGenerateCatalogWithSources(TestBaseGenerate):
     def test_catalog_with_sources(self, project):
+        # populate sources other than non_existent_source
+        project.run_sql("create table {}.sample_source (id int)".format(project.test_schema))
+        project.run_sql("create table {}.second_source (id int)".format(project.test_schema))
+
+        # build nodes
         run_dbt(["build"])
+
         catalog = run_dbt(["docs", "generate"])
 
         # 2 seeds + 2 models
@@ -92,7 +99,7 @@ def test_catalog_with_sources(self, project):
 
 class TestGenerateCatalogWithExternalNodes(TestBaseGenerate):
     @mock.patch("dbt.plugins.get_plugin_manager")
-    def test_catalog_with_sources(self, get_plugin_manager, project):
+    def test_catalog_with_external_node(self, get_plugin_manager, project):
         project.run_sql("create table {}.external_model (id int)".format(project.test_schema))
 
         run_dbt(["build"])
@@ -113,24 +120,61 @@ def test_catalog_with_sources(self, get_plugin_manager, project):
 
 
 class TestGenerateSelectSource(TestBaseGenerate):
+    @pytest.fixture(scope="class")
+    def seeds(self):
+        return {
+            "sample_seed.csv": sample_seed,
+            "second_seed.csv": sample_seed,
+            "source_from_seed.csv": sample_seed,
+        }
+
     def test_select_source(self, project):
         run_dbt(["build"])
 
-        # 2 sources, 1 selected
-        catalog = run_dbt(["docs", "generate", "--select", "source:test.my_seed.sample_seed"])
+        project.run_sql("create table {}.sample_source (id int)".format(project.test_schema))
+        project.run_sql("create table {}.second_source (id int)".format(project.test_schema))
+
+        # 2 existing sources, 1 selected
+        catalog = run_dbt(
+            ["docs", "generate", "--select", "source:test.my_source_schema.sample_source"]
+        )
         assert len(catalog.sources) == 1
-        assert "source.test.my_seed.sample_seed" in catalog.sources
+        assert "source.test.my_source_schema.sample_source" in catalog.sources
         # no nodes selected
         assert len(catalog.nodes) == 0
 
+        # 2 existing sources sources, 1 selected that has relation as a seed
+        catalog = run_dbt(
+            ["docs", "generate", "--select", "source:test.my_source_schema.source_from_seed"]
+        )
+        assert len(catalog.sources) == 1
+        assert "source.test.my_source_schema.source_from_seed" in catalog.sources
+        # seed with same relation that was not selected not in catalog
+        assert len(catalog.nodes) == 0
+
 
 class TestGenerateSelectSeed(TestBaseGenerate):
+    @pytest.fixture(scope="class")
+    def seeds(self):
+        return {
+            "sample_seed.csv": sample_seed,
+            "second_seed.csv": sample_seed,
+            "source_from_seed.csv": sample_seed,
+        }
+
     def test_select_seed(self, project):
         run_dbt(["build"])
 
-        # 2 seeds, 1 selected
+        # 3 seeds, 1 selected
         catalog = run_dbt(["docs", "generate", "--select", "sample_seed"])
         assert len(catalog.nodes) == 1
         assert "seed.test.sample_seed" in catalog.nodes
         # no sources selected
         assert len(catalog.sources) == 0
+
+        # 3 seeds, 1 selected that has same relation as a source
+        catalog = run_dbt(["docs", "generate", "--select", "source_from_seed"])
+        assert len(catalog.nodes) == 1
+        assert "seed.test.source_from_seed" in catalog.nodes
+        # source with same relation that was not selected not in catalog
+        assert len(catalog.sources) == 0