Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve query speed #113

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 6 additions & 26 deletions util-scripts/generate_violations_csv/README.md
Original file line number Diff line number Diff line change
@@ -1,36 +1,16 @@
### Requirements
### Install required python libs
```
pip install -r requirements.txt
```

### Set ACS API keys and Central API endpoint
### Export ACS API key and Central
```
export acs_api_key=<<api_key>>
export acs_central_api=<https://<acs_central_url/v1>
export acs_api_key=<api_key>
export acs_central_api=<acs_central_url:443>
```

### Expected query arguments

##### Limit cluster

`Cluster:cluster_name1,cluster_name2`

```
python generate_violations_csv.py --query_scope Cluster:dev_cluster,stating_cluster
```

##### Limit cluster and namespace

`Cluster:cluster_name+namespace_name`

```
python generate_violations_csv.py --query_scope Cluster:dev_cluster+Namespace:sandbox_namespace
```

#### Pull all violations

`all`
### Pull all violations

```
python generate_violations_csv.py --query_scope all
python generate_violations_csv.py
```
144 changes: 65 additions & 79 deletions util-scripts/generate_violations_csv/generate_violations_csv.py
Original file line number Diff line number Diff line change
@@ -1,137 +1,123 @@
import os, shutil
import os
import shutil
import requests
import pandas as pd
import json
from pandas import json_normalize
from datetime import datetime
import argparse
from concurrent.futures import ThreadPoolExecutor, as_completed


def convert_argsparse(value):
return value.replace('+', '%2B')

parser = argparse.ArgumentParser(description='Provide filter to ACS query')
parser.add_argument('--query_scope', type=str, help='limit query to cluster/namespace, e.g: Cluster:cluster_name, Cluster:cluster_name+Namespace:namespace_name"', required=True)
args = parser.parse_args()
query_scope = convert_argsparse(args.query_scope)


current_date = datetime.now().strftime("%Y-%m-%d")
tmp_workdir = f"reports/tmp"

violations_csv = f"{tmp_workdir}/violations_raw_{query_scope}_{current_date}.csv"
violations_images_list_tmp = f"{tmp_workdir}/violations_images_list_{query_scope}_{current_date}.txt"
violations_csv_tmp = f"{tmp_workdir}/violations_{query_scope}_{current_date}.csv"

violations_images_csv = f"reports/violations_images_{query_scope}_{current_date}.csv"

acs_api_key = os.getenv('acs_api_key')
acs_central_api = os.getenv('acs_central_api')

def verify_acs_api_key(acs_api_key):
def verify_api_key(acs_api_key,acs_central_api):
if acs_api_key is None:
raise Exception("ACS API key not found.")
raise Exception("acs_api_key not found.")
if acs_central_api is None:
raise Exception("ACS Central API endpoint not found.")
raise Exception("acs_central_api not found.")

def pull_violations(acs_api_key,query_scope):
def pull_violations(acs_api_key,acs_central_api):
offset = 0
limit = 50
limit = 500
all_violations = []

while True:
if query_scope == 'all':
endpoint = f"{acs_central_api}/alerts?&pagination.offset={offset}&pagination.limit={limit}&pagination.sortOption.field=Violation Time&pagination.sortOption.reversed=true"
headers = {'Authorization': f'Bearer {acs_api_key}'}
else:
endpoint = f"{acs_central_api}/alerts?query={query_scope}&pagination.offset={offset}&pagination.limit={limit}&pagination.sortOption.field=Violation Time&pagination.sortOption.reversed=true"
headers = {'Authorization': f'Bearer {acs_api_key}'}

endpoint = f"https://{acs_central_api}/v1/alerts?&pagination.offset={offset}&pagination.limit={limit}&pagination.sortOption.field=Violation Time&pagination.sortOption.reversed=true"
headers = {'Authorization': f'Bearer {acs_api_key}'}

response = requests.get(endpoint, headers=headers)
response_body = response.json()

if isinstance(response_body, dict) and 'alerts' in response_body:
results = response_body['alerts']
print(f"INFO : pulled {len(results)} violations")
results = response_body['alerts']
print(f"INFO : pulled {len(results)} violations")
else:
print("ERROR: no violations found")
results = []
print("ERROR: no violations found")
results = []
all_violations.extend(results)

if len(results) < limit:
break
break
else:
offset += limit
offset += limit
return all_violations


def pull_violations_images(acs_api_key):
violations_images = []
def pull_violations_images(acs_api_key, violations_data):
image_names_list = []

alert_ids = [item['id'] for item in violations_data]

for alert_id in alert_ids:
def fetch_image_names(alert_id):
headers = {'Authorization': f'Bearer {acs_api_key}'}
response = requests.get(f"{acs_central_api}/alerts/{alert_id}",headers=headers)
response = requests.get(f"https://{acs_central_api}/v1/alerts/{alert_id}", headers=headers)
if response.status_code == 200:
result = response.json()
violations_images.append(result)

image_names = []
try:
containers = result['deployment']['containers']
for container in containers:
full_name = container['image']['name']['fullName']
image_names.append(full_name)
containers = result['deployment']['containers']
for container in containers:
full_name = container['image']['name']['fullName']
image_names.append(full_name)
except KeyError:
print(f"ERROR: failed to find image names for alert_id: {alert_id}")

if image_names:
output = ','.join(image_names) if len(image_names) > 1 else image_names[0]
image_names_list.append(output)
if image_names:
return ','.join(image_names) if len(image_names) > 1 else image_names[0]
else:
return 'N/A'
else:
print(f"ERROR: failed to retrieve data for alert_id: {alert_id}")
return 'N/A'

with ThreadPoolExecutor(max_workers=10) as executor:
future_to_alert = {executor.submit(fetch_image_names, item['id']): item['id'] for item in violations_data}
for future in as_completed(future_to_alert):
image_names_list.append(future.result())

with open(violations_images_list_tmp, 'w') as output_file:
for image_names in image_names_list:
output_file.write(f"{image_names}\n")


def construct_violations_csv(violations_csv):
def construct_violations_csv():
df = pd.read_csv(violations_csv)
columns_to_delete = [0,1,2,3,4,5,6,8,9,10,11,12,13,14,15,16,17,21,22,23]
df.drop(df.columns[columns_to_delete], axis=1, inplace=True)
df.to_csv(violations_csv_tmp, index=False)
columns_to_keep = [
'policy.name',
'policy.severity',
'deployment.name',
'deployment.clusterName',
'deployment.namespace'

]
df = df[columns_to_keep]

df = pd.read_csv(violations_csv_tmp)
with open(violations_images_list_tmp, 'r') as f:
images_list = f.read().splitlines()
df['images'] = images_list
df.to_csv(violations_images_csv , index=False)

df.to_csv(violations_images_csv, index=False)

##### <<< main >>> #######

verify_acs_api_key(acs_api_key)
current_date = datetime.now().strftime("%Y-%m-%d")

os.makedirs(tmp_workdir, exist_ok=True)
tmp_workdir = "reports/tmp"
violations_csv = f"{tmp_workdir}/violations_raw_{current_date}.csv"
violations_images_list_tmp = f"{tmp_workdir}/violations_images_list_{current_date}.txt"
violations_csv_tmp = f"{tmp_workdir}/violations_{current_date}.csv"
violations_images_csv = f"reports/violations_images_{current_date}.csv"

acs_api_key = os.getenv('acs_api_key')
acs_central_api = os.getenv('acs_central_api')

violations_data = pull_violations(acs_api_key,query_scope)
print(f"INFO : pulled total of {len(violations_data)} violations")
verify_api_key(acs_api_key,acs_central_api)
os.makedirs(tmp_workdir, exist_ok=True)

violations_data = pull_violations(acs_api_key,acs_central_api)
print(f"INFO : pulled total of {len(violations_data)} violations")

if violations_data:
df = json_normalize(violations_data)
df.to_csv(violations_csv, index=False)
print("INFO : matching image names to violations")
pull_violations_images(acs_api_key, violations_data)
print("INFO : exporting to csv")
construct_violations_csv()
print(f"INFO : CSV file saved at {violations_images_csv}")
else:
df = pd.DataFrame()
df.to_csv(violations_csv , index=False)

print("INFO : matching image names to violations")
violation_images = pull_violations_images(acs_api_key)

print("INFO : exporting csv")
construct_violations_csv(violations_csv)

print("ERROR: No violations data available, skipping CSV")

shutil.rmtree(tmp_workdir)
Loading