Merge branch 'master' into master

soumyadipDe · Jul 22, 2024 · bcda1f1 · bcda1f1
2 parents 18628aa + a79b3c8
commit bcda1f1
Show file tree

Hide file tree

Showing 10 changed files with 430 additions and 25 deletions.
diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
@@ -0,0 +1,23 @@
+### Summary
+> Describe your changes.
+
+
+
+### Related issues or links
+> Include links to relevant issues or other pages.
+
+- https://github.com/lyft/cartography/issues/...
+
+
+### Checklist
+
+Provide proof that this works (this makes reviews move faster). Please perform one or more of the following:
+- [ ] Update/add unit or integration tests.
+- [ ] Include a screenshot showing what the graph looked like before and after your changes.
+- [ ] Include console log trace showing what happened before and after your changes.
+
+If you are changing a node or relationship:
+- [ ] Update the [schema](https://github.com/lyft/cartography/tree/master/docs/root/modules) and [readme](https://github.com/lyft/cartography/blob/master/docs/schema/README.md).
+
+If you are implementing a new intel module:
+- [ ] Use the NodeSchema [data model](https://lyft.github.io/cartography/dev/writing-intel-modules.html#defining-a-node).
diff --git a/.github/workflows/ossf-scorecard.yml b/.github/workflows/ossf-scorecard.yml
@@ -0,0 +1,64 @@
+---
+# https://github.com/marketplace/actions/ossf-scorecard-action
+name: Scorecards supply-chain security
+on:
+  # Only the default branch is supported.
+  branch_protection_rule:
+  schedule:
+    # Weekly on Saturdays.
+    - cron: '30 1 * * 6'
+  push:
+    branches: [ main, master ]
+
+# Declare default permissions as read only.
+permissions: read-all
+
+jobs:
+  analysis:
+    name: Scorecards analysis
+    runs-on: ubuntu-latest
+    permissions:
+      # Needed to upload the results to code-scanning dashboard.
+      security-events: write
+      # Used to receive a badge. (Upcoming feature)
+      id-token: write
+      actions: read
+      contents: read
+
+    steps:
+      - name: "Checkout code"
+        uses: actions/checkout@a12a3943b4bdde767164f792f33f40b04645d846 # tag=v3.0.0
+        with:
+          persist-credentials: false
+
+      - name: "Run analysis"
+        uses: ossf/scorecard-action@3e15ea8318eee9b333819ec77a36aca8d39df13e # tag=v1.1.1
+        with:
+          results_file: results.sarif
+          results_format: sarif
+          # (Optional) Read-only PAT token. Uncomment the `repo_token` line below if:
+          # - you want to enable the Branch-Protection check on a *public* repository, or
+          # - you are installing Scorecards on a *private* repository
+          # To create the PAT, follow the steps in https://github.com/ossf/scorecard-action#authentication-with-pat.
+          # repo_token: ${{ secrets.SCORECARD_READ_TOKEN }}
+
+          # Publish the results for public repositories to enable scorecard badges. For more details, see
+          # https://github.com/ossf/scorecard-action#publishing-results.
+          # For private repositories, `publish_results` will automatically be set to `false`, regardless
+          # of the value entered here.
+          publish_results: true
+
+      # Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF
+      # format to the repository Actions tab.
+      - name: "Upload artifact"
+        uses: actions/upload-artifact@6673cd052c4cd6fcf4b4e6e60ea986c889389535 # tag=v3.0.0
+        with:
+          name: SARIF file
+          path: results.sarif
+          retention-days: 5
+
+      # Upload the results to GitHub's code scanning dashboard.
+      - name: "Upload to code-scanning"
+        uses: github/codeql-action/upload-sarif@5f532563584d71fdef14ee64d17bafb34f751ce5 # tag=v1.0.26
+        with:
+          sarif_file: results.sarif
diff --git a/cartography/intel/aws/ec2/launch_templates.py b/cartography/intel/aws/ec2/launch_templates.py
@@ -3,6 +3,7 @@
 
 import boto3
 import neo4j
+from botocore.exceptions import ClientError
 
 from .util import get_botocore_config
 from cartography.client.core.tx import load
@@ -17,13 +18,30 @@
 
 @timeit
 @aws_handle_regions
-def get_launch_templates(boto3_session: boto3.session.Session, region: str) -> list[dict[str, Any]]:
+def get_launch_templates(
+    boto3_session: boto3.session.Session,
+    region: str,
+) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]:
     client = boto3_session.client('ec2', region_name=region, config=get_botocore_config())
     paginator = client.get_paginator('describe_launch_templates')
     templates: list[dict[str, Any]] = []
+    template_versions: list[dict[str, Any]] = []
     for page in paginator.paginate():
-        templates.extend(page['LaunchTemplates'])
-    return templates
+        paginated_templates = page['LaunchTemplates']
+        for template in paginated_templates:
+            template_id = template['LaunchTemplateId']
+            try:
+                versions = get_launch_template_versions_by_template(boto3_session, template_id, region)
+            except ClientError as e:
+                logger.warning(
+                    f"Failed to get launch template versions for {template_id}: {e}",
+                    exc_info=True,
+                )
+                versions = []
+            # Using a key not defined in latest boto3 documentation
+            template_versions.extend(versions)
+        templates.extend(paginated_templates)
+    return templates, template_versions
 
 
 def transform_launch_templates(templates: list[dict[str, Any]]) -> list[dict[str, Any]]:
@@ -55,17 +73,16 @@ def load_launch_templates(
 
 @timeit
 @aws_handle_regions
-def get_launch_template_versions(
+def get_launch_template_versions_by_template(
         boto3_session: boto3.session.Session,
-        templates: list[dict[str, Any]],
+        template: str,
         region: str,
 ) -> list[dict[str, Any]]:
     client = boto3_session.client('ec2', region_name=region, config=get_botocore_config())
     v_paginator = client.get_paginator('describe_launch_template_versions')
     template_versions = []
-    for template in templates:
-        for versions in v_paginator.paginate(LaunchTemplateId=template['LaunchTemplateId']):
-            template_versions.extend(versions['LaunchTemplateVersions'])
+    for versions in v_paginator.paginate(LaunchTemplateId=template):
+        template_versions.extend(versions['LaunchTemplateVersions'])
     return template_versions
 
 
@@ -136,11 +153,9 @@ def sync_ec2_launch_templates(
 ) -> None:
     for region in regions:
         logger.info(f"Syncing launch templates for region '{region}' in account '{current_aws_account_id}'.")
-        templates = get_launch_templates(boto3_session, region)
+        templates, versions = get_launch_templates(boto3_session, region)
         templates = transform_launch_templates(templates)
         load_launch_templates(neo4j_session, templates, region, current_aws_account_id, update_tag)
-
-        versions = get_launch_template_versions(boto3_session, templates, region)
         versions = transform_launch_template_versions(versions)
         load_launch_template_versions(neo4j_session, versions, region, current_aws_account_id, update_tag)
 

diff --git a/cartography/intel/cve/feed.py b/cartography/intel/cve/feed.py
@@ -23,7 +23,8 @@
 logger = logging.getLogger(__name__)
 
 MAX_RETRIES = 3
-REQUEST_TIMEOUT = 10
+# Connect and read timeouts of 60 seconds each; see https://requests.readthedocs.io/en/master/user/advanced/#timeouts
+CONNECT_AND_READ_TIMEOUT = (60, 60)
 CVE_FEED_ID = "NIST_NVD"
 BATCH_SIZE_DAYS = 120
 RESULTS_PER_PAGE = 2000
@@ -87,7 +88,7 @@ def _call_cves_api(url: str, api_key: str, params: Dict[str, Any]) -> Dict[Any,
     while params["resultsPerPage"] > 0 or params["startIndex"] < totalResults:
         try:
             res = requests.get(
-                url, params=params, headers=headers, timeout=REQUEST_TIMEOUT,
+                url, params=params, headers=headers, timeout=CONNECT_AND_READ_TIMEOUT,
             )
             res.raise_for_status()
         except requests.exceptions.HTTPError:

diff --git a/cartography/intel/github/teams.py b/cartography/intel/github/teams.py
@@ -1,4 +1,6 @@
 import logging
+from collections import namedtuple
+from time import sleep
 from typing import Any
 from typing import Dict
 from typing import List
@@ -15,6 +17,8 @@
 
 logger = logging.getLogger(__name__)
 
+RepoPermission = namedtuple('RepoPermission', ['repo_url', 'permission'])
+
 
 @timeit
 def get_teams(org: str, api_url: str, token: str) -> Tuple[PaginatedGraphqlData, Dict[str, Any]]:
@@ -45,26 +49,53 @@ def get_teams(org: str, api_url: str, token: str) -> Tuple[PaginatedGraphqlData,
 
 @timeit
 def _get_team_repos_for_multiple_teams(
-        team_raw_data: List[Dict[str, Any]],
+        team_raw_data: list[dict[str, Any]],
         org: str,
         api_url: str,
         token: str,
-) -> Dict[str, Any]:
-    result = {}
+) -> dict[str, list[RepoPermission]]:
+    result: dict[str, list[RepoPermission]] = {}
     for team in team_raw_data:
         team_name = team['slug']
         repo_count = team['repositories']['totalCount']
 
-        team_repos = _get_team_repos(org, api_url, token, team_name) if repo_count > 0 else None
+        if repo_count == 0:
+            # This team has access to no repos so let's move on
+            result[team_name] = []
+            continue
 
         repo_urls = []
         repo_permissions = []
-        if team_repos:
-            repo_urls = [t['url'] for t in team_repos.nodes] if team_repos.nodes else []
-            repo_permissions = [t['permission'] for t in team_repos.edges] if team_repos.edges else []
+
+        max_tries = 5
+
+        for current_try in range(1, max_tries + 1):
+            team_repos = _get_team_repos(org, api_url, token, team_name)
+
+            try:
+                # The `or []` is because `.nodes` can be None. See:
+                # https://docs.github.com/en/graphql/reference/objects#teamrepositoryconnection
+                for repo in team_repos.nodes or []:
+                    repo_urls.append(repo['url'])
+
+                # The `or []` is because `.edges` can be None.
+                for edge in team_repos.edges or []:
+                    repo_permissions.append(edge['permission'])
+                # We're done! Break out of the retry loop.
+                break
+
+            except TypeError:
+                # Handles issue #1334
+                logger.warning(
+                    f"GitHub returned None when trying to find repo or permission data for team {team_name}.",
+                    exc_info=True,
+                )
+                if current_try == max_tries:
+                    raise RuntimeError(f"GitHub returned a None repo url for team {team_name}, retries exhausted.")
+                sleep(current_try ** 2)
 
         # Shape = [(repo_url, 'WRITE'), ...]]
-        result[team_name] = list(zip(repo_urls, repo_permissions))
+        result[team_name] = [RepoPermission(url, perm) for url, perm in zip(repo_urls, repo_permissions)]
     return result
 
 
@@ -114,8 +145,8 @@ def _get_team_repos(org: str, api_url: str, token: str, team: str) -> PaginatedG
 def transform_teams(
         team_paginated_data: PaginatedGraphqlData,
         org_data: Dict[str, Any],
-        team_repo_data: Dict[str, Any],
-) -> List[Dict[str, Any]]:
+        team_repo_data: dict[str, list[RepoPermission]],
+) -> list[dict[str, Any]]:
     result = []
     for team in team_paginated_data.nodes:
         team_name = team['slug']

diff --git a/cartography/util.py b/cartography/util.py
@@ -225,7 +225,7 @@ def aws_paginate(
     return items
 
 
-AWSGetFunc = TypeVar('AWSGetFunc', bound=Callable[..., List])
+AWSGetFunc = TypeVar('AWSGetFunc', bound=Callable[..., Iterable])
 
 # fix for AWS TooManyRequestsException
 # https://github.com/lyft/cartography/issues/297

diff --git a/setup.py b/setup.py
@@ -1,7 +1,7 @@
 from setuptools import find_packages
 from setuptools import setup
 
-__version__ = '0.92.0'
+__version__ = '0.94.0rc1'
 
 
 setup(

diff --git a/test-requirements.txt b/test-requirements.txt
@@ -1,4 +1,5 @@
 backoff>=2.1.2
+moto
 pre-commit
 pytest>=6.2.4
 pytest-mock

diff --git a/tests/integration/cartography/intel/aws/ec2/test_launch_templates.py b/tests/integration/cartography/intel/aws/ec2/test_launch_templates.py
@@ -1,3 +1,11 @@
+from unittest.mock import patch
+
+import boto3
+from botocore.exceptions import ClientError
+from moto import mock_aws
+
+import cartography.intel.aws.ec2.launch_templates
+from cartography.intel.aws.ec2.launch_templates import get_launch_templates
 from cartography.intel.aws.ec2.launch_templates import load_launch_template_versions
 from cartography.intel.aws.ec2.launch_templates import load_launch_templates
 from cartography.intel.aws.ec2.launch_templates import transform_launch_template_versions
@@ -11,6 +19,48 @@
 TEST_UPDATE_TAG = 123456789
 
 
+@mock_aws(config={'core': {'reset_boto3_session': True, 'mock_credentials': True}})
+@patch.object(
+    cartography.intel.aws.ec2.launch_templates,
+    'get_launch_template_versions_by_template',
+)
+def test_get_launch_template_throws_exception(mock_get_template_versions, *args):
+    # Arrange
+    template_data = {
+        "ImageId": "ami-abc123",
+        "TagSpecifications": [
+            {
+                "ResourceType": "instance", "Tags": [
+                    {"Key": "eks:cluster-name", "Value": "eks-cluster-example"},
+                    {"Key": "eks:nodegroup-name", "Value": "private-node-group-example"},
+                ],
+            },
+        ],
+        "SecurityGroupIds": ["sg-1234"],
+    }
+    client = boto3.client('ec2', region_name=TEST_REGION)
+    mock_template = client.create_launch_template(
+        LaunchTemplateName='eks-00000000-0000-0000-0000-000000000000',
+        LaunchTemplateData=template_data,
+    )
+    template_id = mock_template['LaunchTemplate']['LaunchTemplateId']
+    error_response = {
+        "Error": {
+            "Code": "InvalidLaunchTemplateId.NotFound",
+            "Message": f"The specified launch template, with template ID {template_id}, does not exist.",
+        },
+    }
+    mock_get_template_versions.side_effect = ClientError(error_response, "DescribeLaunchTemplateVersions")
+    session = boto3.Session(region_name=TEST_REGION)
+    # Act: get the launch template versions
+
+    templates, versions = get_launch_templates(session, TEST_REGION)
+
+    # Assert: the launch template versions are as expected
+    assert len(templates) == 1
+    assert len(versions) == 0
+
+
 def test_load_launch_templates(neo4j_session, *args):
     # Arrange: an AWSAccount must exist
     neo4j_session.run(