From 8316a4e4be0bb49fc584517fd7cd10b9c5a11447 Mon Sep 17 00:00:00 2001 From: lions1988 <110122560+lions1988@users.noreply.github.com> Date: Sun, 10 Nov 2024 18:14:51 +0200 Subject: [PATCH 1/2] add ThreadPoolExecutor, remove query_scope --- .../generate_violations_csv.py | 144 ++++++++---------- 1 file changed, 65 insertions(+), 79 deletions(-) diff --git a/util-scripts/generate_violations_csv/generate_violations_csv.py b/util-scripts/generate_violations_csv/generate_violations_csv.py index 3d4c6c7..ae025ee 100644 --- a/util-scripts/generate_violations_csv/generate_violations_csv.py +++ b/util-scripts/generate_violations_csv/generate_violations_csv.py @@ -1,137 +1,123 @@ -import os, shutil +import os +import shutil import requests import pandas as pd -import json from pandas import json_normalize from datetime import datetime -import argparse +from concurrent.futures import ThreadPoolExecutor, as_completed - -def convert_argsparse(value): - return value.replace('+', '%2B') - -parser = argparse.ArgumentParser(description='Provide filter to ACS query') -parser.add_argument('--query_scope', type=str, help='limit query to cluster/namespace, e.g: Cluster:cluster_name, Cluster:cluster_name+Namespace:namespace_name"', required=True) -args = parser.parse_args() -query_scope = convert_argsparse(args.query_scope) - - -current_date = datetime.now().strftime("%Y-%m-%d") -tmp_workdir = f"reports/tmp" - -violations_csv = f"{tmp_workdir}/violations_raw_{query_scope}_{current_date}.csv" -violations_images_list_tmp = f"{tmp_workdir}/violations_images_list_{query_scope}_{current_date}.txt" -violations_csv_tmp = f"{tmp_workdir}/violations_{query_scope}_{current_date}.csv" - -violations_images_csv = f"reports/violations_images_{query_scope}_{current_date}.csv" - -acs_api_key = os.getenv('acs_api_key') -acs_central_api = os.getenv('acs_central_api') - -def verify_acs_api_key(acs_api_key): +def verify_api_key(acs_api_key,acs_central_api): if acs_api_key is None: - raise Exception("ACS API key not found.") + raise Exception("acs_api_key not found.") if acs_central_api is None: - raise Exception("ACS Central API endpoint not found.") + raise Exception("acs_central_api not found.") -def pull_violations(acs_api_key,query_scope): +def pull_violations(acs_api_key,acs_central_api): offset = 0 - limit = 50 + limit = 500 all_violations = [] while True: - if query_scope == 'all': - endpoint = f"{acs_central_api}/alerts?&pagination.offset={offset}&pagination.limit={limit}&pagination.sortOption.field=Violation Time&pagination.sortOption.reversed=true" - headers = {'Authorization': f'Bearer {acs_api_key}'} - else: - endpoint = f"{acs_central_api}/alerts?query={query_scope}&pagination.offset={offset}&pagination.limit={limit}&pagination.sortOption.field=Violation Time&pagination.sortOption.reversed=true" - headers = {'Authorization': f'Bearer {acs_api_key}'} - + endpoint = f"https://{acs_central_api}/v1/alerts?&pagination.offset={offset}&pagination.limit={limit}&pagination.sortOption.field=Violation Time&pagination.sortOption.reversed=true" + headers = {'Authorization': f'Bearer {acs_api_key}'} + response = requests.get(endpoint, headers=headers) response_body = response.json() if isinstance(response_body, dict) and 'alerts' in response_body: - results = response_body['alerts'] - print(f"INFO : pulled {len(results)} violations") + results = response_body['alerts'] + print(f"INFO : pulled {len(results)} violations") else: - print("ERROR: no violations found") - results = [] + print("ERROR: no violations found") + results = [] all_violations.extend(results) if len(results) < limit: - break + break else: - offset += limit + offset += limit return all_violations - -def pull_violations_images(acs_api_key): - violations_images = [] +def pull_violations_images(acs_api_key, violations_data): image_names_list = [] - alert_ids = [item['id'] for item in violations_data] - - for alert_id in alert_ids: + def fetch_image_names(alert_id): headers = {'Authorization': f'Bearer {acs_api_key}'} - response = requests.get(f"{acs_central_api}/alerts/{alert_id}",headers=headers) + response = requests.get(f"https://{acs_central_api}/v1/alerts/{alert_id}", headers=headers) if response.status_code == 200: result = response.json() - violations_images.append(result) image_names = [] try: - containers = result['deployment']['containers'] - for container in containers: - full_name = container['image']['name']['fullName'] - image_names.append(full_name) + containers = result['deployment']['containers'] + for container in containers: + full_name = container['image']['name']['fullName'] + image_names.append(full_name) except KeyError: print(f"ERROR: failed to find image names for alert_id: {alert_id}") - if image_names: - output = ','.join(image_names) if len(image_names) > 1 else image_names[0] - image_names_list.append(output) + if image_names: + return ','.join(image_names) if len(image_names) > 1 else image_names[0] + else: + return 'N/A' + else: + print(f"ERROR: failed to retrieve data for alert_id: {alert_id}") + return 'N/A' + with ThreadPoolExecutor(max_workers=10) as executor: + future_to_alert = {executor.submit(fetch_image_names, item['id']): item['id'] for item in violations_data} + for future in as_completed(future_to_alert): + image_names_list.append(future.result()) with open(violations_images_list_tmp, 'w') as output_file: for image_names in image_names_list: output_file.write(f"{image_names}\n") - -def construct_violations_csv(violations_csv): +def construct_violations_csv(): df = pd.read_csv(violations_csv) - columns_to_delete = [0,1,2,3,4,5,6,8,9,10,11,12,13,14,15,16,17,21,22,23] - df.drop(df.columns[columns_to_delete], axis=1, inplace=True) - df.to_csv(violations_csv_tmp, index=False) + columns_to_keep = [ + 'policy.name', + 'policy.severity', + 'deployment.name', + 'deployment.clusterName', + 'deployment.namespace' + + ] + df = df[columns_to_keep] - df = pd.read_csv(violations_csv_tmp) with open(violations_images_list_tmp, 'r') as f: images_list = f.read().splitlines() df['images'] = images_list - df.to_csv(violations_images_csv , index=False) - + df.to_csv(violations_images_csv, index=False) ##### <<< main >>> ####### -verify_acs_api_key(acs_api_key) +current_date = datetime.now().strftime("%Y-%m-%d") -os.makedirs(tmp_workdir, exist_ok=True) +tmp_workdir = "reports/tmp" +violations_csv = f"{tmp_workdir}/violations_raw_{current_date}.csv" +violations_images_list_tmp = f"{tmp_workdir}/violations_images_list_{current_date}.txt" +violations_csv_tmp = f"{tmp_workdir}/violations_{current_date}.csv" +violations_images_csv = f"reports/violations_images_{current_date}.csv" +acs_api_key = os.getenv('acs_api_key') +acs_central_api = os.getenv('acs_central_api') -violations_data = pull_violations(acs_api_key,query_scope) -print(f"INFO : pulled total of {len(violations_data)} violations") +verify_api_key(acs_api_key,acs_central_api) +os.makedirs(tmp_workdir, exist_ok=True) +violations_data = pull_violations(acs_api_key,acs_central_api) +print(f"INFO : pulled total of {len(violations_data)} violations") if violations_data: df = json_normalize(violations_data) + df.to_csv(violations_csv, index=False) + print("INFO : matching image names to violations") + pull_violations_images(acs_api_key, violations_data) + print("INFO : exporting to csv") + construct_violations_csv() + print(f"INFO : CSV file saved at {violations_images_csv}") else: - df = pd.DataFrame() -df.to_csv(violations_csv , index=False) - -print("INFO : matching image names to violations") -violation_images = pull_violations_images(acs_api_key) - -print("INFO : exporting csv") -construct_violations_csv(violations_csv) - + print("ERROR: No violations data available, skipping CSV") shutil.rmtree(tmp_workdir) From 1ad5daf3ead847f3c02cd8314006a44457261d8b Mon Sep 17 00:00:00 2001 From: lions1988 <110122560+lions1988@users.noreply.github.com> Date: Sun, 10 Nov 2024 18:19:06 +0200 Subject: [PATCH 2/2] Update README.md --- .../generate_violations_csv/README.md | 32 ++++--------------- 1 file changed, 6 insertions(+), 26 deletions(-) diff --git a/util-scripts/generate_violations_csv/README.md b/util-scripts/generate_violations_csv/README.md index 8f64deb..cbce60f 100644 --- a/util-scripts/generate_violations_csv/README.md +++ b/util-scripts/generate_violations_csv/README.md @@ -1,36 +1,16 @@ -### Requirements +### Install required python libs ``` pip install -r requirements.txt ``` -### Set ACS API keys and Central API endpoint +### Export ACS API key and Central ``` -export acs_api_key=<> -export acs_central_api= +export acs_api_key= +export acs_central_api= ``` -### Expected query arguments - -##### Limit cluster - -`Cluster:cluster_name1,cluster_name2` - -``` -python generate_violations_csv.py --query_scope Cluster:dev_cluster,stating_cluster -``` - -##### Limit cluster and namespace - -`Cluster:cluster_name+namespace_name` - -``` -python generate_violations_csv.py --query_scope Cluster:dev_cluster+Namespace:sandbox_namespace -``` - -#### Pull all violations - -`all` +### Pull all violations ``` -python generate_violations_csv.py --query_scope all +python generate_violations_csv.py ```