Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

External asset database boefje #1175

Merged
merged 13 commits into from
Jul 3, 2023
Merged
Empty file.
22 changes: 22 additions & 0 deletions boefjes/boefjes/plugins/kat_external_db/boefje.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"id": "external_db",
"name": "External Database",
"description": "Fetch hostnames and IP addresses/netblocks from an external database with API. See `description.md` for more information.",
"consumes": [
"Network"
],
"produces": [
"Hostname",
"IPAddressV4",
"IPV4NetBlock",
"IPAddressV6",
"IPV6NetBlock"
],
"environment_keys": [
"DB_URL",
"DB_ACCESS_TOKEN",
"DB_ORGANIZATION_IDENTIFIER",
"DB_ENDPOINT_FORMAT"
],
"scan_level": 0
}
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
32 changes: 32 additions & 0 deletions boefjes/boefjes/plugins/kat_external_db/description.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
This is an external assets database boefje that adds the IPs, netblocks and hostnames from an external API to KAT. As there is no real input for this boefje, it runs on the network object (usually "internet").

To make the API call work, there are four environment variables:

- `DB_URL`; the URL where the API for the database lives (without path, with port), for example `http://host.docker.internal:9000`.
- `DB_ACCESS_TOKEN`; an API access token as `GET` parameter.
- `DB_ORGANIZATION_IDENTIFIER`; by default uses the organisation ID in KAT. If this is not preferred it can be changed to something else. Otherwise, make sure that the organization code in kat matches the id of the organisation in the database.
- `DB_ENDPOINT_FORMAT`; a Python format string with all variables above (optionally empty) and any path specifics of the API. E.g. `{DB_URL}/api/v1/participants/assets/{DB_ORGANIZATION_IDENTIFIER}?access_token={DB_ACCESS_TOKEN}' (without quotes)`

The response expected is JSON of the form

```json
{
"ip_key1":
...
"ip_keyN": [{"ip_item_key1": "ip_item_keyN": IPv4/6}],
"domain_key1":
...
"domain_keyN": [{"domain_item_key1": "domain_item_keyN": hostname}]
}
```

For example:

```json
{
"ip_addresses": [{"ip_address": "198.51.100.2"}, {"ip_address": "2001:db8:ffff:ffff:ffff:ffff:ffff:ffff"}, {"ip_address": "192.0.2.0/24"}],
"domains": [{"domain": "example.com"}]
}
```

The expected ip and domain (item) key lists can be configured in `normalize.py`. Ranges are expected as strings in CIDR notation. Clearance level for fetched items is set to `L0`. Reference implementation of the API server is in the works.
27 changes: 27 additions & 0 deletions boefjes/boefjes/plugins/kat_external_db/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
"""Boefje script for getting domaions and ipaddresses from dadb"""
from os import getenv
from typing import List, Tuple, Union

import requests

from boefjes.job_models import BoefjeMeta


def run(boefje_meta: BoefjeMeta) -> List[Tuple[set, Union[bytes, str]]]:
"""Fetch external database response."""
api_format = getenv(
"DB_ENDPOINT_FORMAT",
"{DB_URL}/api/v1/participants/assets/{DB_ORGANIZATION_IDENTIFIER}?access_token={DB_ACCESS_TOKEN}",
)
request_timeout = 100

get_request = api_format.format(
DB_URL=getenv("DB_URL"),
DB_ORGANIZATION_IDENTIFIER=getenv("DB_ORGANIZATION_IDENTIFIER", boefje_meta.organization),
DB_ACCESS_TOKEN=getenv("DB_ACCESS_TOKEN", ""),
)
response = requests.get(get_request, timeout=request_timeout)
if not response.ok:
raise ValueError(response.content)

return [(set(), response.content)]
73 changes: 73 additions & 0 deletions boefjes/boefjes/plugins/kat_external_db/normalize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
import json
import logging
from ipaddress import IPv4Interface, ip_interface
from typing import Iterator, Union

from boefjes.job_models import NormalizerMeta
from octopoes.models import OOI
from octopoes.models.ooi.dns.zone import Hostname
from octopoes.models.ooi.network import IPAddressV4, IPAddressV6, IPV4NetBlock, IPV6NetBlock, Network

# Expects raw to be json containing a list of ip_addresses/netblocks
# (as dictionaries) and a list of domains (as dictionaries).
# The paths through the dictionaries (to the lists and through the lists)
# are defined below.
# T O D O add these variables as normalizer settings in UI.
IP_ADDRESS_LIST_PATH = ["ip_addresses"]
IP_ADDRESS_ITEM_PATH = ["ip_address"]
underdarknl marked this conversation as resolved.
Show resolved Hide resolved
DOMAIN_LIST_PATH = ["domains"]
DOMAIN_ITEM_PATH = ["domain"]


def follow_path_in_dict(path, path_dict):
"""Follows a list of keys in a dictionary recursively."""
if path:
key = path[0]
if key not in path_dict:
raise KeyError(f"Key {key} not in {list(path_dict.keys())}")
return follow_path_in_dict(path=path[1:], path_dict=path_dict[key])
return path_dict


def run(normalizer_meta: NormalizerMeta, raw: Union[bytes, str]) -> Iterator[OOI]:
"""Yields hostnames, IPv4/6 addresses or netblocks."""
results = json.loads(raw)
network = Network(name=normalizer_meta.raw_data.boefje_meta.arguments["input"]["name"])
addresses_count, blocks_count, hostnames_count = 0, 0, 0

for address_item in follow_path_in_dict(path=IP_ADDRESS_LIST_PATH, path_dict=results):
interface = ip_interface(follow_path_in_dict(path=IP_ADDRESS_ITEM_PATH, path_dict=address_item))
address, mask = interface.with_prefixlen.split("/")
mask = int(mask)

# Decide whether we yield IPv4 or IPv6.
if isinstance(interface, IPv4Interface):
address_type = IPAddressV4
block_type = IPV4NetBlock
else:
address_type = IPAddressV6
block_type = IPV6NetBlock

ip_address = address_type(address=address, network=network.reference)
yield ip_address
addresses_count += 1

if mask < interface.ip.max_prefixlen:
yield block_type(
start_ip=ip_address.reference,
mask=mask,
network=network.reference,
)
blocks_count += 1

for hostname in follow_path_in_dict(path=DOMAIN_LIST_PATH, path_dict=results):
yield Hostname(name=follow_path_in_dict(path=DOMAIN_ITEM_PATH, path_dict=hostname), network=network.reference)
hostnames_count += 1

logging.info(
"Yielded %d IP addresses, %d netblocks and %d hostnames on %s.",
addresses_count,
blocks_count,
hostnames_count,
network,
)
13 changes: 13 additions & 0 deletions boefjes/boefjes/plugins/kat_external_db/normalizer.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"id": "kat_external_db_normalize",
"consumes": [
"external_db"
],
"produces": [
"Hostname",
"IPAddressV4",
"IPV4NetBlock",
"IPAddressV6",
"IPV6NetBlock"
]
}
1 change: 1 addition & 0 deletions boefjes/boefjes/plugins/kat_external_db/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ipaddress==1.0.23
33 changes: 33 additions & 0 deletions boefjes/boefjes/plugins/kat_external_db/schema.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
{
"title": "Arguments",
"type": "object",
"properties": {
"DB_URL": {
"title": "DB_URL",
"type": "string",
"maxLength": 2048,
"description": "URL for external DB, including port, e.g. 'http://host.docker.internal:9000' (without quotes)."
},
"DB_ACCESS_TOKEN": {
"title": "DB_ACCESS_TOKEN",
"maxLength": 2048,
"type": "string",
"description": "Access token (API KEY) for external DB. Defaults to empty string."
},
"DB_ORGANIZATION_IDENTIFIER": {
"title": "DB_ORGANIZATION_IDENTIFIER",
"maxLength": 2048,
"type": "string",
"description": "Identifier for an organisation. Defaults to KAT organization code."
},
"DB_ENDPOINT_FORMAT": {
"title": "DB_ENDPOINT_FORMAT",
"maxLength": 2048,
"type": "string",
"description": "Python format string with all variables above (optionally empty). E.g. '{DB_URL}/api/v1/participants/assets/{DB_ORGANIZATION_IDENTIFIER}?access_token={DB_ACCESS_TOKEN}' (without quotes)"
}
},
"required": [
"DB_URL"
]
}