bridgecrewio · nimrodkor · Oct 30, 2022 · Oct 4, 2022 · Oct 5, 2022 · Oct 5, 2022
diff --git a/...v/common/bridgecrew/integration_features/features/attribute_resource_types_integration.py b/...v/common/bridgecrew/integration_features/features/attribute_resource_types_integration.py
@@ -0,0 +1,157 @@
+from __future__ import annotations
+
+import logging
+from typing import TYPE_CHECKING, Any, Dict, Optional, List, Set
+
+from checkov.common.bridgecrew.integration_features.base_integration_feature import BaseIntegrationFeature
+from checkov.common.bridgecrew.platform_integration import bc_integration
+from checkov.common.output.report import Report
+from checkov.common.typing import _ResourceDefinitions, _ResourceTypes
+
+if TYPE_CHECKING:
+    from checkov.common.bridgecrew.platform_integration import BcPlatformIntegration
+
+
+ALL_TYPES = '__all__'
+
+
+class AttributeResourceTypesIntegration(BaseIntegrationFeature):
+    def __init__(self, bc_integration: BcPlatformIntegration) -> None:
+        super().__init__(bc_integration=bc_integration, order=3)  # must be after policy metadata
+        self.attribute_resources: Dict[str, Dict[str, List[str]]] = {}
+        self.provider_resources: Dict[str, List[str]] = {}
+
+    def is_valid(self) -> bool:
+        return (
+            self.bc_integration.is_integration_configured()
+            and not self.bc_integration.skip_download
+            and not self.integration_feature_failures
+        )
+
+    def pre_runner(self) -> None:
+        # not used
+        pass
+
+    def post_runner(self, scan_reports: Report) -> None:
+        # not used
+        pass
+
+    def pre_scan(self) -> None:
+        try:
+            if not self.bc_integration.customer_run_config_response:
+                logging.debug('In the pre-scan for attribute resource types, but nothing was fetched from the platform')
+                self.integration_feature_failures = True
+                return
+
+            if 'resourceDefinitions' not in self.bc_integration.customer_run_config_response:
+                # TODO remove - this makes it easier to make sure that platform scans will also work
+                logging.debug('resourceDefinitions is not in the run config response - might not be deployed to the platform yet')
+                return
+
+            resource_definitions = self.bc_integration.customer_run_config_response.get('resourceDefinitions')
+            self._build_attribute_resource_map(resource_definitions)
+
+        except Exception:
+            self.integration_feature_failures = True
+            logging.debug("Scanning without handling 'all' resource type policies.", exc_info=True)
+
+    def get_attribute_resource_types(self, solver: Dict[str, Any], provider: Optional[str] = None) -> Optional[List[str]]:
+        attr = solver.get('attribute')
+        if not attr:
+            return None
+        if '.' in attr:
+            attr = attr[0:attr.index('.')]
+
+        resource_types = self.attribute_resources.get(attr, None)
+        if not resource_types:
+            return None
+
+        return resource_types.get(provider or ALL_TYPES)
+
+    def _build_attribute_resource_map(self, resource_definitions: _ResourceDefinitions) -> None:
+        """
+        Builds two internal maps to be referenced during policy evaluation.
+
+        1. self.attribute_resources - a mapping of attributes to providers to resource types in that provider
+        that have the attribute.
+
+        Example:
+        {
+          tags: {
+            aws: [
+              aws_s3_bucket,
+              aws_instance,
+              ...
+            ],
+            azure: [
+              azurerm_storage_account,
+              ...
+            ],
+            __all__: [
+              aws_s3_bucket,
+              aws_instance,
+              ...
+              azurerm_storage_account,
+              ...
+            ]
+          },
+          labels:
+            gcp: [
+              google_sql_database_instance,
+              ...
+            ],
+            __all__: [...]
+          },
+          freeform_tags: {
+            oci:
+            ...etc
+        }
+
+        Later, whenever we see a policy condition with "all" resource types and one of these attributes, we can
+        replace the resource list with the list from the given provider, or __all__ if we do not know the provider
+
+        2.  self.provider_resources - A mapping of providers to all resource types for that provider (irrespective of attributes)
+
+        :param resource_definitions: returned from the platform, contains a map of resource types to their metadata
+        (provider and attributes), and a map of attribute names to their providers that we should substitute whenever
+        we see "all" resource types in a yaml policy
+        :return:
+        """
+
+        filter_attributes: Dict[str, List[str]] = resource_definitions['filterAttributes']
+        resource_types: Dict[str, _ResourceTypes] = resource_definitions['resourceTypes']
+
+        attribute_resources: Dict[str, Dict[str, Set[str]]] = {}
+
+        for attribute, providers in filter_attributes.items():
+            attribute_resources[attribute] = {p: set() for p in providers}
+            attribute_resources[attribute][ALL_TYPES] = set()
+
+        for resource, properties in resource_types.items():
+            provider = properties['provider'].lower()
+            if provider == 'ali':
+                # 'alibabacloud' is the actual provider value in the custom policy, but the resource provider is just 'ali'
+                provider = 'alibabacloud'
+
+            if provider in self.provider_resources:
+                self.provider_resources[provider].append(resource)
+            else:
+                self.provider_resources[provider] = [resource]
+
+            for attribute in properties['arguments']:
+                if '.' in attribute:
+                    attribute = attribute[:attribute.index('.')]
+                if attribute not in filter_attributes or provider not in filter_attributes[attribute]:
+                    continue
+                attribute_resources[attribute][provider].add(resource)
+                attribute_resources[attribute][ALL_TYPES].add(resource)
+
+        # convert to list
+        self.attribute_resources = {
+            attribute: {
+                provider: list(resources) for provider, resources in provider_map.items()
+            } for attribute, provider_map in attribute_resources.items()
+        }
+
+
+integration = AttributeResourceTypesIntegration(bc_integration)
diff --git a/checkov/common/bridgecrew/integration_features/features/custom_policies_integration.py b/checkov/common/bridgecrew/integration_features/features/custom_policies_integration.py
@@ -11,7 +11,7 @@
 from checkov.common.bridgecrew.platform_integration import bc_integration
 from checkov.common.bridgecrew.severities import Severities
 from checkov.common.checks_infra.checks_parser import NXGraphCheckParser
-from checkov.common.checks_infra.registry import Registry, get_graph_checks_registry
+from checkov.common.checks_infra.registry import get_graph_checks_registry
 
 if TYPE_CHECKING:
     from checkov.common.bridgecrew.platform_integration import BcPlatformIntegration
@@ -24,9 +24,8 @@
 
 class CustomPoliciesIntegration(BaseIntegrationFeature):
     def __init__(self, bc_integration: BcPlatformIntegration) -> None:
-        super().__init__(bc_integration=bc_integration, order=1)  # must be after policy metadata and before suppression integration
+        super().__init__(bc_integration=bc_integration, order=2)  # must be after policy metadata and before suppression integration
         self.platform_policy_parser = NXGraphCheckParser()
-        self.policies_url = f"{self.bc_integration.api_url}/api/v1/policies/table/data"
         self.bc_cloned_checks: dict[str, list[dict[str, Any]]] = defaultdict(list)
 
     def is_valid(self) -> bool:
@@ -53,8 +52,7 @@ def pre_scan(self) -> None:
                         policy['severity'] = Severities[policy['severity']]
                         self.bc_cloned_checks[source_incident_id].append(policy)
                         continue
-                    resource_types = Registry._get_resource_types(converted_check['metadata'])
-                    check = self.platform_policy_parser.parse_raw_check(converted_check, resources_types=resource_types)
+                    check = self.platform_policy_parser.parse_raw_check(converted_check)
                     check.severity = Severities[policy['severity']]
                     check.bc_id = check.id
                     if check.frameworks:
@@ -84,6 +82,13 @@ def _convert_raw_check(policy: dict[str, Any]) -> dict[str, Any]:
             'category': policy['category'],
             'frameworks': policy.get('frameworks', [])
         }
+
+        provider = policy.get('provider')
+        if provider:
+            metadata['scope'] = {
+                'provider': provider.lower()
+            }
+
         check = {
             'metadata': metadata,
             'definition': json.loads(policy['code'])

diff --git a/checkov/common/bridgecrew/integration_features/features/repo_config_integration.py b/checkov/common/bridgecrew/integration_features/features/repo_config_integration.py
@@ -16,7 +16,7 @@
 
 class RepoConfigIntegration(BaseIntegrationFeature):
     def __init__(self, bc_integration: BcPlatformIntegration) -> None:
-        super().__init__(bc_integration=bc_integration, order=0)
+        super().__init__(bc_integration=bc_integration, order=1)
         self.skip_paths: set[str] = set()
         self.enforcement_rule: dict[str, Any] = {}
         self.code_category_configs: dict[str, CodeCategoryConfiguration] = {}

diff --git a/checkov/common/bridgecrew/integration_features/features/suppressions_integration.py b/checkov/common/bridgecrew/integration_features/features/suppressions_integration.py
@@ -22,7 +22,7 @@
 
 class SuppressionsIntegration(BaseIntegrationFeature):
     def __init__(self, bc_integration: BcPlatformIntegration) -> None:
-        super().__init__(bc_integration=bc_integration, order=2)  # must be after the custom policies integration
+        super().__init__(bc_integration=bc_integration, order=4)  # must be after the custom policies integration
         self.suppressions: dict[str, list[dict[str, Any]]] = {}
         self.suppressions_url = f"{self.bc_integration.api_url}/api/v1/suppressions"
 

diff --git a/checkov/common/bridgecrew/platform_integration.py b/checkov/common/bridgecrew/platform_integration.py
@@ -524,7 +524,7 @@ def get_platform_run_config(self) -> None:
             self.get_public_run_config()
 
     def get_run_config_url(self) -> str:
-        return f'{self.platform_run_config_url}?module={"bc" if self.is_bc_token(self.bc_api_key) else "pc"}'
+        return f'{self.platform_run_config_url}?module={"bc" if self.is_bc_token(self.bc_api_key) else "pc"}&includeResources=true'
 
     def get_customer_run_config(self) -> None:
         if self.skip_download is True:

diff --git a/checkov/common/checks_infra/checks_parser.py b/checkov/common/checks_infra/checks_parser.py
@@ -46,6 +46,7 @@
     EqualsIgnoreCaseAttributeSolver,
     NotEqualsIgnoreCaseAttributeSolver
 )
+from checkov.common.bridgecrew.integration_features.features.attribute_resource_types_integration import integration as attribute_resource_type_integration
 from checkov.common.checks_infra.solvers.connections_solvers.connection_one_exists_solver import \
     ConnectionOneExistsSolver
 from checkov.common.graph.checks_infra.base_check import BaseGraphCheck
@@ -133,18 +134,33 @@
 class NXGraphCheckParser(BaseGraphCheckParser):
     def parse_raw_check(self, raw_check: Dict[str, Dict[str, Any]], **kwargs: Any) -> BaseGraphCheck:
         policy_definition = raw_check.get("definition", {})
-        check = self._parse_raw_check(policy_definition, kwargs.get("resources_types"))
-        check.id = raw_check.get("metadata", {}).get("id", "")
-        check.name = raw_check.get("metadata", {}).get("name", "")
-        check.category = raw_check.get("metadata", {}).get("category", "")
-        check.frameworks = raw_check.get("metadata", {}).get("frameworks", [])
-        check.guideline = raw_check.get("metadata", {}).get("guideline")
+
+        metadata = raw_check.get("metadata", {})
+
+        # the first approach comes from the custom policy integration
+        provider = metadata.get("scope", {}).get("provider")
+
+        # but the platform injects check metadata in a different way
+        if not provider and "scope" in raw_check:
+            raw_provider = raw_check["scope"].get("provider")  # will be a None, an empty list, or a list with the provider
+            if raw_provider:
+                provider = raw_provider[0].lower()
+
+        check = self._parse_raw_check(policy_definition, provider)
+
+        check.id = metadata.get("id", "")
+        check.name = metadata.get("name", "")
+        check.category = metadata.get("category", "")
+        check.frameworks = metadata.get("frameworks", [])
+        check.guideline = metadata.get("guideline")
+        check.provider = provider
+
         solver = self.get_check_solver(check)
         check.set_solver(solver)
 
         return check
 
-    def _parse_raw_check(self, raw_check: Dict[str, Any], resources_types: Optional[List[str]]) -> BaseGraphCheck:
+    def _parse_raw_check(self, raw_check: Dict[str, Any], provider: Optional[str]) -> BaseGraphCheck:
         check = BaseGraphCheck()
         complex_operator = get_complex_operator(raw_check)
         if complex_operator:
@@ -158,7 +174,11 @@ def _parse_raw_check(self, raw_check: Dict[str, Any], resources_types: Optional[
                 sub_solvers = [sub_solvers]
 
             for sub_solver in sub_solvers:
-                check.sub_checks.append(self._parse_raw_check(sub_solver, resources_types))
+                check.sub_checks.append(self._parse_raw_check(sub_solver, provider))
+
+            # conditions with enumerated resource types will have them as a list. conditions where `all` is replaced with the
+            # actual list of resource for the attribute (e.g. tags) will have them as a set, because that logic works best with sets
+            # here, they will end up as a list in the policy resource types
             resources_types_of_sub_solvers = [
                 force_list(q.resource_types) for q in check.sub_checks if q is not None and q.resource_types is not None
             ]
@@ -173,13 +193,17 @@ def _parse_raw_check(self, raw_check: Dict[str, Any], resources_types: Optional[
                     or (isinstance(resource_type, str) and resource_type.lower() == "all")
                     or (isinstance(resource_type, list) and resource_type[0].lower() == "all")
             ):
-                check.resource_types = resources_types or []
+                resource_types_for_attribute = attribute_resource_type_integration.get_attribute_resource_types(raw_check, provider)
+                check.resource_types = resource_types_for_attribute or []
             else:
                 check.resource_types = resource_type
 
             connected_resources_type = raw_check.get("connected_resource_types", [])
+
+            # TODO this code has a capital 'All', so I am pretty sure this rarely gets used. need to validate the use case
+            # and make it work with the resource types from the platform if needed
             if connected_resources_type == ["All"] or connected_resources_type == "all":
-                check.connected_resources_types = resources_types or []
+                check.connected_resources_types = []
             else:
                 check.connected_resources_types = connected_resources_type
 

diff --git a/checkov/common/checks_infra/registry.py b/checkov/common/checks_infra/registry.py
@@ -4,15 +4,14 @@
 import logging
 import os
 from pathlib import Path
-from typing import Any, TYPE_CHECKING
+from typing import TYPE_CHECKING
 
 import yaml
 
 from checkov.common.checks_infra.checks_parser import NXGraphCheckParser
 from checkov.common.graph.checks_infra.base_parser import BaseGraphCheckParser
 from checkov.common.graph.checks_infra.registry import BaseRegistry
 from checkov.runner_filter import RunnerFilter
-from checkov.common.checks_infra.resources_types import resources_types
 
 if TYPE_CHECKING:
     from checkov.common.graph.checks_infra.base_check import BaseGraphCheck
@@ -48,9 +47,7 @@ def _load_checks_from_dir(self, directory: str, external_check: bool) -> None:
                         if not isinstance(check_json, dict):
                             self.logger.error(f"Loaded data from JSON is not Dict. Skipping. Data: {check_json}.")
                             continue
-                        check = self.parser.parse_raw_check(
-                            check_json, resources_types=self._get_resource_types(check_json)
-                        )
+                        check = self.parser.parse_raw_check(check_json)
                         if not any(c for c in self.checks if check.id == c.id):
                             if external_check:
                                 # Note the external check; used in the should_run_check logic
@@ -60,11 +57,6 @@ def _load_checks_from_dir(self, directory: str, external_check: bool) -> None:
     def load_external_checks(self, dir: str) -> None:
         self._load_checks_from_dir(dir, True)
 
-    @staticmethod
-    def _get_resource_types(check_json: dict[str, dict[str, Any]]) -> list[str] | None:
-        provider = check_json.get("scope", {}).get("provider", "").lower()
-        return resources_types.get(provider)
-
 
 _registry_instances: dict[str, Registry] = {}