From 7fb3f997660abc6ddfb14f803de38c5b8b943f4c Mon Sep 17 00:00:00 2001 From: mkolasinski-splunk Date: Mon, 19 Sep 2022 23:13:22 +0200 Subject: [PATCH 1/5] chore: working version using xmltodict lib --- .../standard_lib/utilities/host_assigner.py | 83 +++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 pytest_splunk_addon/standard_lib/utilities/host_assigner.py diff --git a/pytest_splunk_addon/standard_lib/utilities/host_assigner.py b/pytest_splunk_addon/standard_lib/utilities/host_assigner.py new file mode 100644 index 000000000..2cec65687 --- /dev/null +++ b/pytest_splunk_addon/standard_lib/utilities/host_assigner.py @@ -0,0 +1,83 @@ +# +# Copyright 2021 Splunk Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import re +import xmltodict +import os +import argparse +from collections import defaultdict +import xml.etree.ElementTree as ET + + +def assign_host(sample_path, pattern, output_dir=None): + """ + Assigns host value to transport stanza in xml format file based on provided pattern. + + Args: + sample_path: path to file with samples + output_dir: path to directory where separated files will be stored. Default the same dir as input file + pattern: regex pattern with capture group for host value in samples + """ + if output_dir is None: + output_dir = os.path.dirname(sample_path) + with open(sample_path, "r", encoding="utf-8") as sample_file: + sample_raw = sample_file.read() + samples = xmltodict.parse(sample_raw) + events = samples["device"]["event"] + events = events if type(events) == list else [events] + processed_events = list() + filename, ext = os.path.splitext(sample_path) + output_filename = f"{filename}_assigned_host{ext}" + for each_event in events: + try: + extracted_hosts = re.search(pattern, each_event["raw"]).groups() + if len(extracted_hosts) == 0: + print(f"Using {pattern}. No host value matched.") + elif len(extracted_hosts) == 1: + host = extracted_hosts[0] + print(f"Found matching unique host value. Assigning host value to: {host}") + each_event["transport"]["@host"] = host + processed_events.append(each_event) + elif len(extracted_hosts) > 1: + found_hosts = [h for h in extracted_hosts] + print(f"Found multiple values matching pattern: {pattern}. Matching values: {found_hosts}") + else: + print("Undefined behavior") + except Exception as e: + print(f"Event: {each_event['raw']} caused an exception") + print(e) + + samples["device"].update(event=processed_events) + with open(os.path.join(output_dir, output_filename), "w", encoding="utf-8") as output_file: + xmltodict.unparse(samples, output=output_file, pretty=True, indent=" ") + + +def main(): + parser = argparse.ArgumentParser( + description="Assign host value to transport stanza based on provided regular expression" + ) + parser.add_argument("file", help="xml file with samples that need host assignment") + parser.add_argument("-r", "--regex", help="Regular expression needed for host assignment") + parser.add_argument("-o", "--output_dir", default=None, help="Output dir for xml file with assigned host") + args = parser.parse_args() + sample_path = args.file + pattern = args.regex + output_dir = args.output_dir + assign_host(sample_path, pattern, output_dir) + + +if __name__ == '__main__': + main() + From 812394e89b59c8bd579848acd9c40e83dee84029 Mon Sep 17 00:00:00 2001 From: mkolasinski-splunk Date: Tue, 20 Sep 2022 00:14:22 +0200 Subject: [PATCH 2/5] chore: working version with ET --- .../standard_lib/utilities/host_assigner.py | 51 ++++++++----------- 1 file changed, 21 insertions(+), 30 deletions(-) diff --git a/pytest_splunk_addon/standard_lib/utilities/host_assigner.py b/pytest_splunk_addon/standard_lib/utilities/host_assigner.py index 2cec65687..a49c349a4 100644 --- a/pytest_splunk_addon/standard_lib/utilities/host_assigner.py +++ b/pytest_splunk_addon/standard_lib/utilities/host_assigner.py @@ -14,12 +14,9 @@ # limitations under the License. # import re -import xmltodict import os import argparse -from collections import defaultdict -import xml.etree.ElementTree as ET - +from lxml import etree def assign_host(sample_path, pattern, output_dir=None): """ @@ -30,38 +27,32 @@ def assign_host(sample_path, pattern, output_dir=None): output_dir: path to directory where separated files will be stored. Default the same dir as input file pattern: regex pattern with capture group for host value in samples """ + + parser = etree.XMLParser(strip_cdata=False) if output_dir is None: output_dir = os.path.dirname(sample_path) - with open(sample_path, "r", encoding="utf-8") as sample_file: - sample_raw = sample_file.read() - samples = xmltodict.parse(sample_raw) - events = samples["device"]["event"] - events = events if type(events) == list else [events] - processed_events = list() + tree = etree.parse(sample_path, parser) + root = tree.getroot() + events = root.findall("event") filename, ext = os.path.splitext(sample_path) output_filename = f"{filename}_assigned_host{ext}" for each_event in events: - try: - extracted_hosts = re.search(pattern, each_event["raw"]).groups() - if len(extracted_hosts) == 0: - print(f"Using {pattern}. No host value matched.") - elif len(extracted_hosts) == 1: - host = extracted_hosts[0] - print(f"Found matching unique host value. Assigning host value to: {host}") - each_event["transport"]["@host"] = host - processed_events.append(each_event) - elif len(extracted_hosts) > 1: - found_hosts = [h for h in extracted_hosts] - print(f"Found multiple values matching pattern: {pattern}. Matching values: {found_hosts}") + raw = each_event.find("raw") + transport = each_event.find("transport") + if raw is not None and transport is not None: + try: + extracted_hosts = re.search(pattern, raw.text).groups() + except Exception as e: + print(f"Exception occured for event: {raw.text}") + print(e) else: - print("Undefined behavior") - except Exception as e: - print(f"Event: {each_event['raw']} caused an exception") - print(e) - - samples["device"].update(event=processed_events) - with open(os.path.join(output_dir, output_filename), "w", encoding="utf-8") as output_file: - xmltodict.unparse(samples, output=output_file, pretty=True, indent=" ") + if len(extracted_hosts) == 1: + host = extracted_hosts[0] + print(f"Found matching unique host value. Assigning host value to: {host}") + transport.set("host", host) + else: + print(f"Unexpected behavior for pattern: {pattern} and sample {raw.text}") + tree.write(os.path.join(output_dir, output_filename), encoding="utf-8", xml_declaration=True) def main(): From ba16fe50db196381c0a879bddf0d9c106060a61d Mon Sep 17 00:00:00 2001 From: mkolasinski-splunk Date: Tue, 20 Sep 2022 22:06:53 +0200 Subject: [PATCH 3/5] chore: black formatting --- .../standard_lib/utilities/host_assigner.py | 29 ++++++++++++++----- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/pytest_splunk_addon/standard_lib/utilities/host_assigner.py b/pytest_splunk_addon/standard_lib/utilities/host_assigner.py index a49c349a4..36123be45 100644 --- a/pytest_splunk_addon/standard_lib/utilities/host_assigner.py +++ b/pytest_splunk_addon/standard_lib/utilities/host_assigner.py @@ -18,6 +18,7 @@ import argparse from lxml import etree + def assign_host(sample_path, pattern, output_dir=None): """ Assigns host value to transport stanza in xml format file based on provided pattern. @@ -48,11 +49,19 @@ def assign_host(sample_path, pattern, output_dir=None): else: if len(extracted_hosts) == 1: host = extracted_hosts[0] - print(f"Found matching unique host value. Assigning host value to: {host}") + print( + f"Found matching unique host value. Assigning host value to: {host}" + ) transport.set("host", host) else: - print(f"Unexpected behavior for pattern: {pattern} and sample {raw.text}") - tree.write(os.path.join(output_dir, output_filename), encoding="utf-8", xml_declaration=True) + print( + f"Unexpected behavior for pattern: {pattern} and sample {raw.text}" + ) + tree.write( + os.path.join(output_dir, output_filename), + encoding="utf-8", + xml_declaration=True, + ) def main(): @@ -60,8 +69,15 @@ def main(): description="Assign host value to transport stanza based on provided regular expression" ) parser.add_argument("file", help="xml file with samples that need host assignment") - parser.add_argument("-r", "--regex", help="Regular expression needed for host assignment") - parser.add_argument("-o", "--output_dir", default=None, help="Output dir for xml file with assigned host") + parser.add_argument( + "-r", "--regex", help="Regular expression needed for host assignment" + ) + parser.add_argument( + "-o", + "--output_dir", + default=None, + help="Output dir for xml file with assigned host", + ) args = parser.parse_args() sample_path = args.file pattern = args.regex @@ -69,6 +85,5 @@ def main(): assign_host(sample_path, pattern, output_dir) -if __name__ == '__main__': +if __name__ == "__main__": main() - From 1bddd994c64b475e51519889e3d3bd0dffeec049 Mon Sep 17 00:00:00 2001 From: mkolasinski-splunk Date: Mon, 26 Sep 2022 13:46:57 +0200 Subject: [PATCH 4/5] chore: add handling matching for other than first group --- pytest_splunk_addon/standard_lib/utilities/host_assigner.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pytest_splunk_addon/standard_lib/utilities/host_assigner.py b/pytest_splunk_addon/standard_lib/utilities/host_assigner.py index 36123be45..a0cf121e0 100644 --- a/pytest_splunk_addon/standard_lib/utilities/host_assigner.py +++ b/pytest_splunk_addon/standard_lib/utilities/host_assigner.py @@ -43,6 +43,7 @@ def assign_host(sample_path, pattern, output_dir=None): if raw is not None and transport is not None: try: extracted_hosts = re.search(pattern, raw.text).groups() + extracted_hosts = [h for h in extracted_hosts if h] except Exception as e: print(f"Exception occured for event: {raw.text}") print(e) From bd6b307b0c191a1b7c6fc1dcca0ff110df564056 Mon Sep 17 00:00:00 2001 From: mkolasinski-splunk <105011638+mkolasinski-splunk@users.noreply.github.com> Date: Wed, 28 Sep 2022 09:34:41 +0200 Subject: [PATCH 5/5] Update host_assigner.py --- pytest_splunk_addon/standard_lib/utilities/host_assigner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytest_splunk_addon/standard_lib/utilities/host_assigner.py b/pytest_splunk_addon/standard_lib/utilities/host_assigner.py index a0cf121e0..766e52844 100644 --- a/pytest_splunk_addon/standard_lib/utilities/host_assigner.py +++ b/pytest_splunk_addon/standard_lib/utilities/host_assigner.py @@ -45,7 +45,7 @@ def assign_host(sample_path, pattern, output_dir=None): extracted_hosts = re.search(pattern, raw.text).groups() extracted_hosts = [h for h in extracted_hosts if h] except Exception as e: - print(f"Exception occured for event: {raw.text}") + print(f"Exception occurred for event: {raw.text}") print(e) else: if len(extracted_hosts) == 1: