diff --git a/modules/xml/CMakeLists.txt b/modules/xml/CMakeLists.txt index ea4f29b68e..e3a06c2118 100644 --- a/modules/xml/CMakeLists.txt +++ b/modules/xml/CMakeLists.txt @@ -10,12 +10,14 @@ set(xml_SOURCES "xml-private.h" "windows-eventlog-xml-parser.h" "filterx-parse-xml.h" + "filterx-parse-windows-eventlog-xml.h" "xml-plugin.c" "xml-parser.c" "xml.c" "windows-eventlog-xml-parser.c" "filterx-parse-xml.c" + "filterx-parse-windows-eventlog-xml.c" ) diff --git a/modules/xml/Makefile.am b/modules/xml/Makefile.am index c4af2ef716..38e7aa8a72 100644 --- a/modules/xml/Makefile.am +++ b/modules/xml/Makefile.am @@ -10,7 +10,10 @@ modules_xml_libxml_la_SOURCES = \ modules/xml/windows-eventlog-xml-parser.h \ modules/xml/windows-eventlog-xml-parser.c \ modules/xml/filterx-parse-xml.h \ - modules/xml/filterx-parse-xml.c + modules/xml/filterx-parse-xml.c \ + modules/xml/filterx-parse-windows-eventlog-xml.h \ + modules/xml/filterx-parse-windows-eventlog-xml.c + BUILT_SOURCES += \ diff --git a/modules/xml/filterx-parse-windows-eventlog-xml.c b/modules/xml/filterx-parse-windows-eventlog-xml.c new file mode 100644 index 0000000000..9009485849 --- /dev/null +++ b/modules/xml/filterx-parse-windows-eventlog-xml.c @@ -0,0 +1,363 @@ +/* + * Copyright (c) 2024 Axoflow + * Copyright (c) 2024 Attila Szakacs + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * As an additional exemption you are allowed to compile & link against the + * OpenSSL libraries as published by the OpenSSL project. See the file + * COPYING for details. + * + */ + +#include "filterx-parse-windows-eventlog-xml.h" +#include "filterx/object-string.h" +#include "filterx/object-dict-interface.h" +#include "scratch-buffers.h" + +static void _set_error(GError **error, const gchar *format, ...) G_GNUC_PRINTF(2, 0); +static void +_set_error(GError **error, const gchar *format, ...) +{ + if (!error) + return; + + va_list va; + va_start(va, format); + *error = g_error_new_valist(g_quark_from_static_string("filterx-parse-windows-eventlog-xml"), 0, format, va); + va_end(va); +} + + +typedef enum FilterXParseWEVTPos_ +{ + WEVT_POS_NONE, + WEVT_POS_EVENT, + WEVT_POS_EVENT_DATA, + WEVT_POS_DATA, +} FilterXParseWEVTPos; + +typedef struct FilterXParseWEVTState_ +{ + FilterXParseXmlState super; + FilterXParseWEVTPos position; + gboolean has_named_data; + GString *last_data_name; +} FilterXParseWEVTState; + +static FilterXParseXmlState * +_state_new(void) +{ + FilterXParseWEVTState *self = g_new0(FilterXParseWEVTState, 1); + filterx_parse_xml_state_init_instance(&self->super); + self->last_data_name = scratch_buffers_alloc(); + self->position = WEVT_POS_NONE; + return &self->super; +} + +static gboolean +_convert_to_dict(GMarkupParseContext *context, XmlElemContext *elem_context, GError **error) +{ + const gchar *parent_elem_name = (const gchar *) g_markup_parse_context_get_element_stack(context)->next->data; + FilterXObject *key = filterx_string_new(parent_elem_name, -1); + + FilterXObject *dict_obj = filterx_object_create_dict(elem_context->parent_obj); + if (!dict_obj) + goto exit; + + if (!filterx_object_is_type(elem_context->parent_obj, &FILTERX_TYPE_NAME(dict))) + { + _set_error(error, "failed to convert EventData string to dict, parent must be a dict"); + goto exit; + } + + if (!filterx_object_set_subscript(elem_context->parent_obj, key, &dict_obj)) + { + _set_error(error, "failed to replace leaf node object with: \"%s\"={}", parent_elem_name); + goto exit; + } + +exit: + if (!(*error)) + xml_elem_context_set_current_obj(elem_context, dict_obj); + + filterx_object_unref(key); + filterx_object_unref(dict_obj); + return !(*error); +} + +static gboolean +_prepare_elem(const gchar *new_elem_name, XmlElemContext *last_elem_context, XmlElemContext *new_elem_context, + GError **error) +{ + xml_elem_context_init(new_elem_context, last_elem_context->current_obj, NULL); + + FilterXObject *new_elem_key = filterx_string_new(new_elem_name, -1); + FilterXObject *existing_obj = NULL; + + if (!filterx_object_is_key_set(new_elem_context->parent_obj, new_elem_key)) + { + FilterXObject *empty_dict = filterx_object_create_dict(new_elem_context->parent_obj); + xml_elem_context_set_current_obj(new_elem_context, empty_dict); + filterx_object_unref(empty_dict); + + if (!filterx_object_set_subscript(new_elem_context->parent_obj, new_elem_key, &new_elem_context->current_obj)) + _set_error(error, "failed to prepare dict for named param", new_elem_name); + goto exit; + } + + existing_obj = filterx_object_get_subscript(new_elem_context->parent_obj, new_elem_key); + if (!filterx_object_is_type(existing_obj, &FILTERX_TYPE_NAME(dict))) + { + _set_error(error, "failed to prepare dict for named param, parent must be dict, got \"%s\"", + existing_obj->type->name); + goto exit; + } + + xml_elem_context_set_current_obj(new_elem_context, existing_obj); + +exit: + filterx_object_unref(new_elem_key); + filterx_object_unref(existing_obj); + + if (*error) + { + xml_elem_context_destroy(new_elem_context); + return FALSE; + } + + return TRUE; +} + +static void +_collect_attrs(const gchar **attribute_names, const gchar **attribute_values, + FilterXParseWEVTState *state, GError **error) +{ + g_string_assign(state->last_data_name, attribute_values[0]); + state->has_named_data = TRUE; +} + +static gboolean +_has_valid_schema_url(const gchar **attribute_names, const gchar **attribute_values, GError **error) +{ + if (!attribute_names[0]) + return FALSE; + + if (g_strcmp0(attribute_names[0], "xmlns") != 0) + return FALSE; + + if (g_strcmp0(attribute_values[0], "http://schemas.microsoft.com/win/2004/08/events/event") != 0) + { + _set_error(error, "unexpected schema URL: %s", attribute_values[0]); + return FALSE; + } + + if (attribute_names[1]) + { + _set_error(error, "unexpected attribute in Event, number of attributes must be 1, got: %s", attribute_names[1]); + return FALSE; + } + + return TRUE; +} + +static gboolean +_is_root_elem_valid(const gchar *element_name, const gchar **attribute_names, const gchar **attribute_values, + GError **error) +{ + if (g_strcmp0(element_name, "Event") != 0) + { + _set_error(error, "unexpected Windows EventLog XML root element: %s, expected \"Event\"", element_name); + return FALSE; + } + + if (!_has_valid_schema_url(attribute_names, attribute_values, error)) + return FALSE; + + return TRUE; +} + +static gboolean +_push_position(FilterXParseWEVTState *state, const gchar *element_name, + const gchar **attribute_names, const gchar **attribute_values, GError **error) +{ + switch (state->position) + { + case WEVT_POS_NONE: + if (!_is_root_elem_valid(element_name, attribute_names, attribute_values, error)) + return FALSE; + state->position = WEVT_POS_EVENT; + return TRUE; + case WEVT_POS_EVENT: + if (g_strcmp0(element_name, "EventData") == 0) + state->position = WEVT_POS_EVENT_DATA; + return TRUE; + case WEVT_POS_EVENT_DATA: + if (g_strcmp0(element_name, "Data") == 0) + state->position = WEVT_POS_DATA; + return TRUE; + case WEVT_POS_DATA: + return TRUE; + default: + g_assert_not_reached(); + } +} + +static void +_pop_position(FilterXParseWEVTState *state, const gchar *element_name) +{ + switch (state->position) + { + case WEVT_POS_NONE: + break; + case WEVT_POS_EVENT: + if (g_strcmp0(element_name, "Event") == 0) + state->position = WEVT_POS_NONE; + break; + case WEVT_POS_EVENT_DATA: + if (g_strcmp0(element_name, "EventData") == 0) + state->position = WEVT_POS_EVENT; + break; + case WEVT_POS_DATA: + if (g_strcmp0(element_name, "Data") == 0) + state->position = WEVT_POS_EVENT_DATA; + break; + default: + g_assert_not_reached(); + } +} + +static gboolean +_has_wevt_event_data_attr(const gchar **attribute_names, FilterXParseWEVTState *state, GError **error) +{ + if (state->position != WEVT_POS_DATA) + return FALSE; + + if (!attribute_names[0]) + return FALSE; + + if (g_strcmp0(attribute_names[0], "Name") != 0) + { + _set_error(error, "unexpected attribute in Data, expected: Name, got: %s", attribute_names[0]); + return FALSE; + } + + if (attribute_names[1]) + { + _set_error(error, "unexpected attribute in Data, number of attributes must be 1, got: %s", attribute_names[1]); + return FALSE; + } + + return TRUE; +} + +static void +_start_elem(FilterXGeneratorFunctionParseXml *s, + GMarkupParseContext *context, const gchar *element_name, + const gchar **attribute_names, const gchar **attribute_values, + FilterXParseXmlState *st, GError **error) +{ + FilterXParseWEVTState *state = (FilterXParseWEVTState *) st; + XmlElemContext *last_elem_context = xml_elem_context_stack_peek_last(state->super.xml_elem_context_stack); + + if (!_push_position(state, element_name, attribute_names, attribute_values, error)) + return; + + if (!_has_wevt_event_data_attr(attribute_names, state, error)) + { + if (*error) + return; + + filterx_parse_xml_start_elem_method(s, context, element_name, attribute_names, attribute_values, st, error); + return; + } + + if (!filterx_object_is_type(last_elem_context->current_obj, &FILTERX_TYPE_NAME(dict))) + { + if (!_convert_to_dict(context, last_elem_context, error)) + return; + } + + XmlElemContext new_elem_context = { 0 }; + if (!_prepare_elem(element_name, last_elem_context, &new_elem_context, error)) + return; + + xml_elem_context_stack_push(state->super.xml_elem_context_stack, &new_elem_context); + + _collect_attrs(attribute_names, attribute_values, state, error); +} + +static void +_end_elem(FilterXGeneratorFunctionParseXml *s, + GMarkupParseContext *context, const gchar *element_name, + FilterXParseXmlState *st, GError **error) +{ + FilterXParseWEVTState *state = (FilterXParseWEVTState *) st; + + _pop_position(state, element_name); + filterx_parse_xml_end_elem_method(s, context, element_name, st, error); +} + +static void +_text(FilterXGeneratorFunctionParseXml *s, + GMarkupParseContext *context, const gchar *text, gsize text_len, + FilterXParseXmlState *st, GError **error) +{ + FilterXParseWEVTState *state = (FilterXParseWEVTState *) st; + XmlElemContext *elem_context = xml_elem_context_stack_peek_last(state->super.xml_elem_context_stack); + + if (!filterx_object_is_type(elem_context->current_obj, &FILTERX_TYPE_NAME(dict)) || + !state->has_named_data) + { + filterx_parse_xml_text_method(s, context, text, text_len, st, error); + return; + } + + FilterXObject *key = filterx_string_new(state->last_data_name->str, state->last_data_name->len); + FilterXObject *text_obj = filterx_string_new(text, text_len); + + if (!filterx_object_set_subscript(elem_context->current_obj, key, &text_obj)) + { + _set_error(error, "failed to add text to dict: \"%s\"=\"%s\"", state->last_data_name->str, text); + goto fail; + } + + xml_elem_context_set_parent_obj(elem_context, elem_context->current_obj); + xml_elem_context_set_current_obj(elem_context, text_obj); + + state->has_named_data = FALSE; + +fail: + filterx_object_unref(key); + filterx_object_unref(text_obj); +} + +FilterXExpr * +filterx_generator_function_parse_windows_eventlog_xml_new(FilterXFunctionArgs *args, GError **error) +{ + FilterXExpr *s = filterx_generator_function_parse_xml_new(args, error); + FilterXGeneratorFunctionParseXml *self = (FilterXGeneratorFunctionParseXml *) s; + + if (!self) + return NULL; + + self->create_state = _state_new; + self->start_elem = _start_elem; + self->end_elem = _end_elem; + self->text = _text; + + return s; +} + +FILTERX_GENERATOR_FUNCTION(parse_windows_eventlog_xml, filterx_generator_function_parse_windows_eventlog_xml_new); diff --git a/modules/xml/filterx-parse-windows-eventlog-xml.h b/modules/xml/filterx-parse-windows-eventlog-xml.h new file mode 100644 index 0000000000..09faffecfc --- /dev/null +++ b/modules/xml/filterx-parse-windows-eventlog-xml.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2024 Axoflow + * Copyright (c) 2024 Attila Szakacs + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * As an additional exemption you are allowed to compile & link against the + * OpenSSL libraries as published by the OpenSSL project. See the file + * COPYING for details. + * + */ + +#ifndef FILTERX_PARSE_WINDOWS_EVENTLOG_XML_H_INCLUDED +#define FILTERX_PARSE_WINDOWS_EVENTLOG_XML_H_INCLUDED + +#include "filterx-parse-xml.h" + +FILTERX_GENERATOR_FUNCTION_DECLARE(parse_windows_eventlog_xml); + +FilterXExpr *filterx_generator_function_parse_windows_eventlog_xml_new(FilterXFunctionArgs *args, GError **error); + +#endif diff --git a/modules/xml/filterx-parse-xml.c b/modules/xml/filterx-parse-xml.c index 6abbd9e359..35cdc3e41b 100644 --- a/modules/xml/filterx-parse-xml.c +++ b/modules/xml/filterx-parse-xml.c @@ -82,7 +82,7 @@ */ #define FILTERX_FUNC_PARSE_XML_USAGE "Usage: parse_xml(raw_xml)" - +#define XML_ELEM_CTX_STACK_INIT_SIZE (8) static void _set_error(GError **error, const gchar *format, ...) G_GNUC_PRINTF(2, 0); static void @@ -97,50 +97,58 @@ _set_error(GError **error, const gchar *format, ...) va_end(va); } - -typedef struct XmlElemContext_ -{ - FilterXObject *current_obj; - FilterXObject *parent_obj; -} XmlElemContext; - -static void -_elem_context_set_current_obj(XmlElemContext *self, FilterXObject *current_obj) +void +xml_elem_context_set_current_obj(XmlElemContext *self, FilterXObject *current_obj) { filterx_object_unref(self->current_obj); self->current_obj = filterx_object_ref(current_obj); } -static void -_elem_context_set_parent_obj(XmlElemContext *self, FilterXObject *parent_obj) +void +xml_elem_context_set_parent_obj(XmlElemContext *self, FilterXObject *parent_obj) { filterx_object_unref(self->parent_obj); self->parent_obj = filterx_object_ref(parent_obj); } -static void -_elem_context_free(XmlElemContext *self) +void +xml_elem_context_destroy(XmlElemContext *self) { - _elem_context_set_current_obj(self, NULL); - _elem_context_set_parent_obj(self, NULL); - g_free(self); + xml_elem_context_set_current_obj(self, NULL); + xml_elem_context_set_parent_obj(self, NULL); } -static XmlElemContext * -_elem_context_new(FilterXObject *parent_obj, FilterXObject *current_obj) +void +xml_elem_context_init(XmlElemContext *self, FilterXObject *parent_obj, FilterXObject *current_obj) { - XmlElemContext *self = g_new0(XmlElemContext, 1); - _elem_context_set_parent_obj(self, parent_obj); - _elem_context_set_current_obj(self, current_obj); - return self; + xml_elem_context_set_parent_obj(self, parent_obj); + xml_elem_context_set_current_obj(self, current_obj); } -typedef struct FilterXGeneratorFunctionParseXml_ +void +filterx_parse_xml_state_init_instance(FilterXParseXmlState *self) { - FilterXGeneratorFunction super; - FilterXExpr *xml_expr; -} FilterXGeneratorFunctionParseXml; + self->xml_elem_context_stack = g_array_sized_new(FALSE, FALSE, sizeof(XmlElemContext), XML_ELEM_CTX_STACK_INIT_SIZE); + self->free_fn = filterx_parse_xml_state_free_method; +} + +void +filterx_parse_xml_state_free_method(FilterXParseXmlState *self) +{ + for (guint i = 0; i < self->xml_elem_context_stack->len; i++) + xml_elem_context_destroy(&g_array_index(self->xml_elem_context_stack, XmlElemContext, i)); + g_array_free(self->xml_elem_context_stack, TRUE); +} + +static FilterXParseXmlState * +_state_new(void) +{ + FilterXParseXmlState *self = g_new0(FilterXParseXmlState, 1); + filterx_parse_xml_state_init_instance(self); + return self; +} + static FilterXObject * _create_object_for_new_elem(FilterXObject *parent_obj, gboolean has_attrs, const gchar **new_elem_repr) @@ -198,7 +206,7 @@ _store_second_elem(XmlElemContext *new_elem_context, FilterXObject **existing_ob if (!filterx_object_set_subscript(new_elem_context->parent_obj, new_elem_key, &list_obj)) goto fail; - _elem_context_set_parent_obj(new_elem_context, list_obj); + xml_elem_context_set_parent_obj(new_elem_context, list_obj); filterx_object_unref(list_obj); return; @@ -223,17 +231,18 @@ _store_nth_elem(XmlElemContext *new_elem_context, FilterXObject *existing_obj, F return; } - _elem_context_set_parent_obj(new_elem_context, existing_obj); + xml_elem_context_set_parent_obj(new_elem_context, existing_obj); } -static XmlElemContext * -_prepare_elem(const gchar *new_elem_name, XmlElemContext *last_elem_context, gboolean has_attrs, GError **error) +static gboolean +_prepare_elem(const gchar *new_elem_name, XmlElemContext *last_elem_context, gboolean has_attrs, + XmlElemContext *new_elem_context, GError **error) { g_assert(filterx_object_is_type(last_elem_context->current_obj, &FILTERX_TYPE_NAME(dict))); const gchar *new_elem_repr; FilterXObject *new_elem_obj = _create_object_for_new_elem(last_elem_context->current_obj, has_attrs, &new_elem_repr); - XmlElemContext *new_elem_context = _elem_context_new(last_elem_context->current_obj, new_elem_obj); + xml_elem_context_init(new_elem_context, last_elem_context->current_obj, new_elem_obj); FilterXObject *new_elem_key = filterx_string_new(new_elem_name, -1); FilterXObject *existing_obj = NULL; @@ -264,8 +273,7 @@ _prepare_elem(const gchar *new_elem_name, XmlElemContext *last_elem_context, gbo _set_error(error, "failed to unset existing unexpected node"); goto exit; } - _elem_context_free(new_elem_context); - new_elem_context = _prepare_elem(new_elem_name, last_elem_context, has_attrs, error); + _prepare_elem(new_elem_name, last_elem_context, has_attrs, new_elem_context, error); exit: filterx_object_unref(new_elem_key); @@ -274,11 +282,11 @@ _prepare_elem(const gchar *new_elem_name, XmlElemContext *last_elem_context, gbo if (*error) { - _elem_context_free(new_elem_context); - new_elem_context = NULL; + xml_elem_context_destroy(new_elem_context); + return FALSE; } - return new_elem_context; + return TRUE; } static void @@ -365,20 +373,20 @@ _convert_to_dict(GMarkupParseContext *context, XmlElemContext *elem_context, GEr exit: if (!(*error)) - _elem_context_set_current_obj(elem_context, dict_obj); + xml_elem_context_set_current_obj(elem_context, dict_obj); filterx_object_unref(key); filterx_object_unref(dict_obj); return !(*error); } -static void -_start_elem_cb(GMarkupParseContext *context, const gchar *element_name, - const gchar **attribute_names, const gchar **attribute_values, - gpointer user_data, GError **error) +void +filterx_parse_xml_start_elem_method(FilterXGeneratorFunctionParseXml *self, + GMarkupParseContext *context, const gchar *element_name, + const gchar **attribute_names, const gchar **attribute_values, + FilterXParseXmlState *state, GError **error) { - GQueue *obj_stack = (GQueue *) user_data; - XmlElemContext *last_elem_context = g_queue_peek_head(obj_stack); + XmlElemContext *last_elem_context = xml_elem_context_stack_peek_last(state->xml_elem_context_stack); if (!filterx_object_is_type(last_elem_context->current_obj, &FILTERX_TYPE_NAME(dict))) { @@ -392,22 +400,23 @@ _start_elem_cb(GMarkupParseContext *context, const gchar *element_name, } gboolean has_attrs = !!attribute_names[0]; - XmlElemContext *new_elem_context = _prepare_elem(element_name, last_elem_context, has_attrs, error); - if (!new_elem_context) + + XmlElemContext new_elem_context = { 0 }; + if (!_prepare_elem(element_name, last_elem_context, has_attrs, &new_elem_context, error)) return; - g_queue_push_head(obj_stack, new_elem_context); + xml_elem_context_stack_push(state->xml_elem_context_stack, &new_elem_context); if (has_attrs) - _collect_attrs(element_name, new_elem_context, attribute_names, attribute_values, error); + _collect_attrs(element_name, &new_elem_context, attribute_names, attribute_values, error); } void -_end_elem_cb(GMarkupParseContext *context, const gchar *element_name, gpointer user_data, GError **error) +filterx_parse_xml_end_elem_method(FilterXGeneratorFunctionParseXml *self, + GMarkupParseContext *context, const gchar *element_name, + FilterXParseXmlState *state, GError **error) { - GQueue *obj_stack = (GQueue *) user_data; - XmlElemContext *elem_context = g_queue_pop_head(obj_stack); - _elem_context_free(elem_context); + xml_elem_context_stack_remove_last(state->xml_elem_context_stack); } static gchar * @@ -459,7 +468,7 @@ _replace_string_text(XmlElemContext *elem_context, const gchar *element_name, co g_assert_not_reached(); success: - _elem_context_set_current_obj(elem_context, text_obj); + xml_elem_context_set_current_obj(elem_context, text_obj); fail: filterx_object_unref(text_obj); } @@ -510,19 +519,20 @@ _add_text_to_dict(XmlElemContext *elem_context, const gchar *text, gsize text_le goto fail; } - _elem_context_set_parent_obj(elem_context, elem_context->current_obj); - _elem_context_set_current_obj(elem_context, text_obj); + xml_elem_context_set_parent_obj(elem_context, elem_context->current_obj); + xml_elem_context_set_current_obj(elem_context, text_obj); fail: filterx_object_unref(key); filterx_object_unref(text_obj); } -static void -_text_cb(GMarkupParseContext *context, const gchar *text, gsize text_len, gpointer user_data, GError **error) +void +filterx_parse_xml_text_method(FilterXGeneratorFunctionParseXml *self, + GMarkupParseContext *context, const gchar *text, gsize text_len, + FilterXParseXmlState *state, GError **error) { - GQueue *obj_stack = (GQueue *) user_data; - XmlElemContext *elem_context = g_queue_peek_head(obj_stack); + XmlElemContext *elem_context = xml_elem_context_stack_peek_last(state->xml_elem_context_stack); const gchar *element_name = g_markup_parse_context_get_element(context); gsize stripped_text_len; @@ -581,6 +591,37 @@ _extract_raw_xml(FilterXGeneratorFunctionParseXml *self, FilterXObject *xml_obj, return raw_xml; } +static void +_start_elem_cb(GMarkupParseContext *context, const gchar *element_name, + const gchar **attribute_names, const gchar **attribute_values, + gpointer cb_user_data, GError **error) +{ + FilterXGeneratorFunctionParseXml *self = ((gpointer *) cb_user_data)[0]; + FilterXParseXmlState *user_data = ((gpointer *) cb_user_data)[1]; + + self->start_elem(self, context, element_name, attribute_names, attribute_values, user_data, error); +} + +static void +_end_elem_cb(GMarkupParseContext *context, const gchar *element_name, + gpointer cb_user_data, GError **error) +{ + FilterXGeneratorFunctionParseXml *self = ((gpointer *) cb_user_data)[0]; + FilterXParseXmlState *user_data = ((gpointer *) cb_user_data)[1]; + + self->end_elem(self, context, element_name, user_data, error); +} + +static void +_text_cb(GMarkupParseContext *context, const gchar *text, gsize text_len, + gpointer cb_user_data, GError **error) +{ + FilterXGeneratorFunctionParseXml *self = ((gpointer *) cb_user_data)[0]; + FilterXParseXmlState *user_data = ((gpointer *) cb_user_data)[1]; + + self->text(self, context, text, text_len, user_data, error); +} + static gboolean _parse(FilterXGeneratorFunctionParseXml *self, const gchar *raw_xml, gsize raw_xml_len, FilterXObject *fillable) { @@ -591,10 +632,14 @@ _parse(FilterXGeneratorFunctionParseXml *self, const gchar *raw_xml, gsize raw_x .text = _text_cb, }; - GQueue *obj_stack = g_queue_new(); - XmlElemContext *root_elem_context = _elem_context_new(NULL, fillable); - g_queue_push_head(obj_stack, root_elem_context); - GMarkupParseContext *context = g_markup_parse_context_new(&scanner_callbacks, 0, obj_stack, NULL); + FilterXParseXmlState *state = self->create_state();; + gpointer user_data[] = { self, state }; + + XmlElemContext root_elem_context = { 0 }; + xml_elem_context_init(&root_elem_context, NULL, fillable); + xml_elem_context_stack_push(state->xml_elem_context_stack, &root_elem_context); + + GMarkupParseContext *context = g_markup_parse_context_new(&scanner_callbacks, 0, user_data, NULL); GError *error = NULL; gboolean success = g_markup_parse_context_parse(context, raw_xml, raw_xml_len, &error) && @@ -609,7 +654,7 @@ _parse(FilterXGeneratorFunctionParseXml *self, const gchar *raw_xml, gsize raw_x } exit: - g_queue_free_full(obj_stack, (GDestroyNotify) _elem_context_free); + filterx_parse_xml_state_free(state); g_markup_parse_context_free(context); return success; } @@ -673,6 +718,11 @@ filterx_generator_function_parse_xml_new(FilterXFunctionArgs *args, GError **err self->super.super.create_container = filterx_generator_create_dict_container; self->super.super.super.free_fn = _free; + self->create_state = _state_new; + self->start_elem = filterx_parse_xml_start_elem_method; + self->end_elem = filterx_parse_xml_end_elem_method; + self->text = filterx_parse_xml_text_method; + if (!_extract_args(self, args, error) || !filterx_function_args_check(args, error)) goto fail; diff --git a/modules/xml/filterx-parse-xml.h b/modules/xml/filterx-parse-xml.h index b640aeb69d..2e5d5ba69b 100644 --- a/modules/xml/filterx-parse-xml.h +++ b/modules/xml/filterx-parse-xml.h @@ -1,5 +1,6 @@ /* - * Copyright (c) 2024 Attila Szakacs + * Copyright (c) 2024 Axoflow + * Copyright (c) 2024 Attila Szakacs * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 as published @@ -29,4 +30,87 @@ FILTERX_GENERATOR_FUNCTION_DECLARE(parse_xml); FilterXExpr *filterx_generator_function_parse_xml_new(FilterXFunctionArgs *args, GError **error); + +typedef struct FilterXParseXmlState_ FilterXParseXmlState; +struct FilterXParseXmlState_ +{ + GArray *xml_elem_context_stack; + + void (*free_fn)(FilterXParseXmlState *self); +}; + +void filterx_parse_xml_state_init_instance(FilterXParseXmlState *self); +void filterx_parse_xml_state_free_method(FilterXParseXmlState *self); + +static inline void +filterx_parse_xml_state_free(FilterXParseXmlState *self) +{ + self->free_fn(self); + g_free(self); +} + + +typedef struct XmlElemContext_ +{ + FilterXObject *current_obj; + FilterXObject *parent_obj; +} XmlElemContext; + +void xml_elem_context_init(XmlElemContext *self, FilterXObject *parent_obj, FilterXObject *current_obj); +void xml_elem_context_destroy(XmlElemContext *self); +void xml_elem_context_set_current_obj(XmlElemContext *self, FilterXObject *current_obj); +void xml_elem_context_set_parent_obj(XmlElemContext *self, FilterXObject *parent_obj); + + +static inline void +xml_elem_context_stack_push(GArray *xml_elem_context_stack, XmlElemContext *elem_context) +{ + g_array_append_val(xml_elem_context_stack, *elem_context); +} + +static inline XmlElemContext * +xml_elem_context_stack_peek_last(GArray *xml_elem_context_stack) +{ + return &g_array_index(xml_elem_context_stack, XmlElemContext, xml_elem_context_stack->len - 1); +} + +static inline void +xml_elem_context_stack_remove_last(GArray *xml_elem_context_stack) +{ + xml_elem_context_destroy(xml_elem_context_stack_peek_last(xml_elem_context_stack)); + g_array_remove_index(xml_elem_context_stack, xml_elem_context_stack->len - 1); +} + + +typedef struct FilterXGeneratorFunctionParseXml_ FilterXGeneratorFunctionParseXml; +struct FilterXGeneratorFunctionParseXml_ +{ + FilterXGeneratorFunction super; + FilterXExpr *xml_expr; + + FilterXParseXmlState *(*create_state)(void); + + void (*start_elem)(FilterXGeneratorFunctionParseXml *self, + GMarkupParseContext *context, const gchar *element_name, + const gchar **attribute_names, const gchar **attribute_values, + FilterXParseXmlState *state, GError **error); + void (*end_elem)(FilterXGeneratorFunctionParseXml *self, + GMarkupParseContext *context, const gchar *element_name, + FilterXParseXmlState *state, GError **error); + void (*text)(FilterXGeneratorFunctionParseXml *self, + GMarkupParseContext *context, const gchar *text, gsize text_len, + FilterXParseXmlState *state, GError **error); +}; + +void filterx_parse_xml_start_elem_method(FilterXGeneratorFunctionParseXml *self, + GMarkupParseContext *context, const gchar *element_name, + const gchar **attribute_names, const gchar **attribute_values, + FilterXParseXmlState *state, GError **error); +void filterx_parse_xml_end_elem_method(FilterXGeneratorFunctionParseXml *self, + GMarkupParseContext *context, const gchar *element_name, + FilterXParseXmlState *state, GError **error); +void filterx_parse_xml_text_method(FilterXGeneratorFunctionParseXml *self, + GMarkupParseContext *context, const gchar *text, gsize text_len, + FilterXParseXmlState *state, GError **error); + #endif diff --git a/modules/xml/tests/CMakeLists.txt b/modules/xml/tests/CMakeLists.txt index d2fc633bb6..f3741ab6fa 100644 --- a/modules/xml/tests/CMakeLists.txt +++ b/modules/xml/tests/CMakeLists.txt @@ -1,3 +1,4 @@ add_unit_test(CRITERION TARGET test_xml_parser DEPENDS xml syslog-ng) add_unit_test(CRITERION TARGET test_windows_eventlog_xml_parser DEPENDS xml syslog-ng) add_unit_test(LIBTEST CRITERION TARGET test_filterx_parse_xml DEPENDS xml syslog-ng) +add_unit_test(LIBTEST CRITERION TARGET test_filterx_parse_windows_eventlog_xml DEPENDS xml syslog-ng) diff --git a/modules/xml/tests/Makefile.am b/modules/xml/tests/Makefile.am index 5e48270fa8..4b54ec6fbe 100644 --- a/modules/xml/tests/Makefile.am +++ b/modules/xml/tests/Makefile.am @@ -1,7 +1,8 @@ modules_xml_tests_TESTS = \ modules/xml/tests/test_xml_parser \ modules/xml/tests/test_windows_eventlog_xml_parser \ - modules/xml/tests/test_filterx_parse_xml + modules/xml/tests/test_filterx_parse_xml \ + modules/xml/tests/test_filterx_parse_windows_eventlog_xml check_PROGRAMS += ${modules_xml_tests_TESTS} @@ -26,4 +27,11 @@ modules_xml_tests_test_filterx_parse_xml_LDFLAGS = \ -dlpreopen $(top_builddir)/modules/xml/libxml.la EXTRA_modules_xml_tests_test_filterx_parse_xml_DEPENDENCIES = $(top_builddir)/modules/xml/libxml.la +modules_xml_tests_test_filterx_parse_windows_eventlog_xml_CFLAGS = $(TEST_CFLAGS) -I$(top_srcdir)/modules/xml +modules_xml_tests_test_filterx_parse_windows_eventlog_xml_LDADD = $(TEST_LDADD) +modules_xml_tests_test_filterx_parse_windows_eventlog_xml_LDFLAGS = \ + $(PREOPEN_SYSLOGFORMAT) \ + -dlpreopen $(top_builddir)/modules/xml/libxml.la +EXTRA_modules_xml_tests_test_filterx_parse_windows_eventlog_xml_DEPENDENCIES = $(top_builddir)/modules/xml/libxml.la + EXTRA_DIST += modules/xml/tests/CMakeLists.txt diff --git a/modules/xml/tests/test_filterx_parse_windows_eventlog_xml.c b/modules/xml/tests/test_filterx_parse_windows_eventlog_xml.c new file mode 100644 index 0000000000..53a37dbe58 --- /dev/null +++ b/modules/xml/tests/test_filterx_parse_windows_eventlog_xml.c @@ -0,0 +1,199 @@ +/* + * Copyright (c) 2024 Axoflow + * Copyright (c) 2024 Attila Szakacs + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * As an additional exemption you are allowed to compile & link against the + * OpenSSL libraries as published by the OpenSSL project. See the file + * COPYING for details. + */ + + +#include + +#include "filterx-parse-windows-eventlog-xml.h" +#include "filterx/object-string.h" +#include "filterx/object-json.h" +#include "filterx/filterx-eval.h" +#include "scratch-buffers.h" +#include "apphook.h" +#include "cfg.h" + +#include "libtest/filterx-lib.h" +#include + +static FilterXExpr * +_create_expr(const gchar *raw_xml, FilterXObject *fillable) +{ + FilterXFunctionArg *input = filterx_function_arg_new(NULL, filterx_non_literal_new(filterx_string_new(raw_xml, -1))); + GList *args_list = g_list_append(NULL, input); + GError *error = NULL; + FilterXFunctionArgs *args = filterx_function_args_new(args_list, &error); + g_assert(!error); + + FilterXExpr *func = filterx_generator_function_parse_windows_eventlog_xml_new(args, &error); + g_assert(!error); + + FilterXExpr *fillable_expr = filterx_non_literal_new(fillable); + filterx_generator_set_fillable(func, fillable_expr); + + g_error_free(error); + return func; +} + +static const gchar * +_create_input_from_event_data(const gchar *event_data_xml) +{ + GString *xml = scratch_buffers_alloc(); + g_string_printf(xml, + "\n" + " \n" + " \n" + " 999\n" + " 0\n" + " 2\n" + " 0\n" + " 0\n" + " 0x80000000000000\n" + " \n" + " 934\n" + " \n" + " \n" + " Application\n" + " DESKTOP-2MBFIV7\n" + " \n" + " \n" + " \n" + " foobar\n" + " Error\n" + " \n" + " Info\n" + " \n" + " \n" + " \n" + " Classic\n" + " \n" + " \n" + " \n" + " %s\n" + " \n" + "", + event_data_xml); + return xml->str; +} + +static void +_assert_parse_event_data(const gchar *event_data_xml, const gchar *expected_eventdata_json) +{ + FilterXExpr *func = _create_expr(_create_input_from_event_data(event_data_xml), filterx_json_object_new_empty()); + + FilterXObject *result = filterx_expr_eval(func); + cr_assert(result); + cr_assert(!filterx_eval_get_last_error()); + + cr_assert(filterx_object_is_type(result, &FILTERX_TYPE_NAME(json_object))); + + GString *formatted_result = g_string_new(NULL); + filterx_object_repr(result, formatted_result); + + const gchar *prefix = "{\"Event\":{\"@xmlns\":\"http:\\/\\/schemas.microsoft.com\\/win\\/2004\\/08\\/events\\/event\"" + ",\"System\":{\"Provider\":{\"@Name\":\"EventCreate\"},\"EventID\":{\"@Qualifiers\":\"0\",\"#te" + "xt\":\"999\"},\"Version\":\"0\",\"Level\":\"2\",\"Task\":\"0\",\"Opcode\":\"0\",\"Keywords\":" + "\"0x80000000000000\",\"TimeCreated\":{\"@SystemTime\":\"2024-01-12T09:30:12.1566754Z\"},\"Even" + "tRecordID\":\"934\",\"Correlation\":\"\",\"Execution\":{\"@ProcessID\":\"0\",\"@ThreadID\":\"0" + "\"},\"Channel\":\"Application\",\"Computer\":\"DESKTOP-2MBFIV7\",\"Security\":{\"@UserID\":\"S" + "-1-5-21-3714454296-2738353472-899133108-1001\"}},\"RenderingInfo\":{\"@Culture\":\"en-US\",\"M" + "essage\":\"foobar\",\"Level\":\"Error\",\"Task\":\"\",\"Opcode\":\"Info\",\"Channel\":\"\",\"P" + "rovider\":\"\",\"Keywords\":{\"Keyword\":\"Classic\"}},\"EventData\":"; + const gchar *suffix = "}}"; + + cr_assert_eq(memcmp(formatted_result->str, prefix, strlen(prefix)), 0); + + /* Needed for sensible assertion error reporting. */ + GString *formatted_eventdata = g_string_new(formatted_result->str + strlen(prefix)); + g_string_truncate(formatted_eventdata, formatted_eventdata->len - strlen(suffix)); + cr_assert_str_eq(formatted_eventdata->str, expected_eventdata_json); + + cr_assert_eq(memcmp(formatted_result->str + strlen(prefix) + strlen(expected_eventdata_json), + suffix, strlen(suffix)), 0); + + g_string_free(formatted_eventdata, TRUE); + g_string_free(formatted_result, TRUE); + filterx_object_unref(result); + filterx_expr_unref(func); +} + +static void +_assert_parse_fail(const gchar *xml) +{ + FilterXExpr *func = _create_expr(xml, filterx_json_object_new_empty()); + + FilterXObject *result = filterx_expr_eval(func); + cr_assert(!result); + cr_assert(filterx_eval_get_last_error()); + + filterx_eval_clear_errors(); + filterx_expr_unref(func); +} + +static void +_assert_parse_event_data_fail(const gchar *event_data_xml) +{ + _assert_parse_fail(_create_input_from_event_data(event_data_xml)); +} + +Test(filterx_parse_windows_eventlog_xml, valid_inputs) +{ + _assert_parse_event_data("foo\n", + "{\"Data\":{\"param1\":\"foo\"}}"); + + _assert_parse_event_data("foo\n" + "bar\n", + "{\"Data\":{\"param1\":\"foo\",\"param2\":\"bar\"}}"); + + _assert_parse_event_data("foo\n", + "{\"Data\":\"foo\"}"); + + _assert_parse_event_data("foo\n" + "bar\n", + "{\"Data\":[\"foo\",\"bar\"]}"); +} + +Test(filterx_parse_windows_eventlog_xml, invalid_inputs) +{ + _assert_parse_event_data_fail("foo\n"); + _assert_parse_event_data_fail("foo\n"); + _assert_parse_fail(""); + _assert_parse_fail(""); +} + +static void +setup(void) +{ + configuration = cfg_new_snippet(); + app_startup(); + init_libtest_filterx(); +} + +static void +teardown(void) +{ + scratch_buffers_explicit_gc(); + deinit_libtest_filterx(); + app_shutdown(); + cfg_free(configuration); +} + +TestSuite(filterx_parse_windows_eventlog_xml, .init = setup, .fini = teardown); diff --git a/modules/xml/xml-plugin.c b/modules/xml/xml-plugin.c index f60ab1cdfd..e0a7d3a1ef 100644 --- a/modules/xml/xml-plugin.c +++ b/modules/xml/xml-plugin.c @@ -21,7 +21,7 @@ */ #include "filterx-parse-xml.h" -#include "filterx/expr-function.h" +#include "filterx-parse-windows-eventlog-xml.h" #include "cfg-parser.h" #include "plugin.h" @@ -42,6 +42,7 @@ static Plugin xml_plugins[] = .parser = &xml_parser, }, FILTERX_GENERATOR_FUNCTION_PLUGIN(parse_xml), + FILTERX_GENERATOR_FUNCTION_PLUGIN(parse_windows_eventlog_xml), }; gboolean diff --git a/tests/copyright/policy b/tests/copyright/policy index 3547677766..0b3a44acd5 100644 --- a/tests/copyright/policy +++ b/tests/copyright/policy @@ -274,8 +274,10 @@ modules/correlation/id-counter\.[ch]$ modules/correlation/group-lines.h modules/xml/windows-eventlog-xml-parser\.h modules/xml/filterx-parse-xml\.[ch]$ +modules/xml/filterx-parse-windows-eventlog-xml\.[ch]$ modules/xml/tests/test_windows_eventlog_xml_parser\.c modules/xml/tests/test_filterx_parse_xml\.c +modules/xml/tests/test_filterx_parse_windows_eventlog_xml\.c modules/examples/filterx/example-filterx-func/example-filterx-func-plugin\.[ch] modules/grpc/otel/filterx modules/kvformat/filterx-func-parse-kv\.[ch] diff --git a/tests/light/functional_tests/filterx/test_filterx.py b/tests/light/functional_tests/filterx/test_filterx.py index 55481234a8..b7c9747b1a 100644 --- a/tests/light/functional_tests/filterx/test_filterx.py +++ b/tests/light/functional_tests/filterx/test_filterx.py @@ -2117,3 +2117,87 @@ def test_parse_xml(config, syslog_ng): assert file_true.get_stats()["processed"] == 1 assert "processed" not in file_false.get_stats() assert file_true.read_log() == "{\"a\":{\"b\":[{\"@attr\":\"attr_val\",\"#text\":\"c\"},\"e\"]}}\n" + + +def test_parse_windows_eventlog_xml(config, syslog_ng): + (file_true, file_false) = create_config( + config, r""" + xml = " + + + + 999 + 0 + 2 + 0 + 0 + 0x80000000000000 + + 934 + + + Application + DESKTOP-2MBFIV7 + + + + foobar + Error + + Info + + + + Classic + + + + foo + bar + + + "; + $MSG = json(parse_windows_eventlog_xml(xml)); + """, + ) + syslog_ng.start(config) + + assert file_true.get_stats()["processed"] == 1 + assert "processed" not in file_false.get_stats() + assert json.loads(file_true.read_log()) == { + "Event": { + "@xmlns": "http://schemas.microsoft.com/win/2004/08/events/event", + "System": { + "Provider": {"@Name": "EventCreate"}, + "EventID": {"@Qualifiers": "0", "#text": "999"}, + "Version": "0", + "Level": "2", + "Task": "0", + "Opcode": "0", + "Keywords": "0x80000000000000", + "TimeCreated": {"@SystemTime": "2024-01-12T09:30:12.1566754Z"}, + "EventRecordID": "934", + "Correlation": "", + "Execution": {"@ProcessID": "0", "@ThreadID": "0"}, + "Channel": "Application", + "Computer": "DESKTOP-2MBFIV7", + "Security": {"@UserID": "S-1-5-21-3714454296-2738353472-899133108-1001"}, + }, + "RenderingInfo": { + "@Culture": "en-US", + "Message": "foobar", + "Level": "Error", + "Task": "", + "Opcode": "Info", + "Channel": "", + "Provider": "", + "Keywords": {"Keyword": "Classic"}, + }, + "EventData": { + "Data": { + "param1": "foo", + "param2": "bar", + }, + }, + }, + }