From 6e044b0b68e6a512fa80086c65a1e2a8fb4a27b0 Mon Sep 17 00:00:00 2001 From: "Victor M. Alvarez" Date: Mon, 28 Oct 2024 16:48:19 +0100 Subject: [PATCH] feat(capi+go+py): implement APIs for exposing rule tags. --- capi/include/yara_x.h | 24 ++++ capi/src/rule.rs | 42 +++++- capi/src/tests.rs | 33 +++-- go/compiler_test.go | 5 +- go/main.go | 46 ++++++- go/scanner_test.go | 2 +- py/src/lib.rs | 12 ++ py/tests/test_api.py | 13 ++ py/yara_x.pyi | 298 ++++++++++++++++++++++-------------------- 9 files changed, 317 insertions(+), 158 deletions(-) diff --git a/capi/include/yara_x.h b/capi/include/yara_x.h index b93bd575b..e02e7fad0 100644 --- a/capi/include/yara_x.h +++ b/capi/include/yara_x.h @@ -181,6 +181,19 @@ typedef void (*YRX_METADATA_CALLBACK)(const struct YRX_METADATA *metadata, typedef void (*YRX_PATTERN_CALLBACK)(const struct YRX_PATTERN *pattern, void *user_data); +// Callback function passed to [`yrx_rule_iter_tags`]. +// +// The callback is called for each tag defined in the rule, and it receives +// a pointer to a string with the tag name. This pointer is guaranteed to be +// valid while the callback function is being executed, but it will be freed +// after the callback function returns, so you cannot use this pointer, or +// any other pointer contained in the structure, outside the callback. +// +// The callback also receives a `user_data` pointer that can point to arbitrary +// data owned by the user. +typedef void (*YRX_TAG_CALLBACK)(const char *tag, + void *user_data); + // Callback function passed to [`yrx_scanner_on_matching_rule`] or // [`yrx_rules_iter`]. // @@ -458,6 +471,17 @@ enum YRX_RESULT yrx_rule_iter_patterns(const struct YRX_RULE *rule, YRX_PATTERN_CALLBACK callback, void *user_data); +// Iterates over the tags in a rule, calling the callback with a pointer +// to each tag. +// +// The `user_data` pointer can be used to provide additional context to your +// callback function. +// +// See [`YRX_TAG_CALLBACK`] for more details. +enum YRX_RESULT yrx_rule_iter_tags(const struct YRX_RULE *rule, + YRX_TAG_CALLBACK callback, + void *user_data); + // Iterates over the compiled rules, calling the callback function for each // rule. // diff --git a/capi/src/rule.rs b/capi/src/rule.rs index 089cd4943..1d9f276e1 100644 --- a/capi/src/rule.rs +++ b/capi/src/rule.rs @@ -1,4 +1,4 @@ -use std::ffi::{c_void, CString}; +use std::ffi::{c_char, c_void, CString}; use yara_x::MetaValue; use crate::{ @@ -179,3 +179,43 @@ pub unsafe extern "C" fn yrx_rule_iter_patterns( YRX_RESULT::SUCCESS } + +/// Callback function passed to [`yrx_rule_iter_tags`]. +/// +/// The callback is called for each tag defined in the rule, and it receives +/// a pointer to a string with the tag name. This pointer is guaranteed to be +/// valid while the callback function is being executed, but it will be freed +/// after the callback function returns, so you cannot use this pointer, or +/// any other pointer contained in the structure, outside the callback. +/// +/// The callback also receives a `user_data` pointer that can point to arbitrary +/// data owned by the user. +pub type YRX_TAG_CALLBACK = + extern "C" fn(tag: *const c_char, user_data: *mut c_void) -> (); + +/// Iterates over the tags in a rule, calling the callback with a pointer +/// to each tag. +/// +/// The `user_data` pointer can be used to provide additional context to your +/// callback function. +/// +/// See [`YRX_TAG_CALLBACK`] for more details. +#[no_mangle] +pub unsafe extern "C" fn yrx_rule_iter_tags( + rule: *const YRX_RULE, + callback: YRX_TAG_CALLBACK, + user_data: *mut c_void, +) -> YRX_RESULT { + let tags_iter = if let Some(rule) = rule.as_ref() { + rule.0.tags() + } else { + return YRX_RESULT::INVALID_ARGUMENT; + }; + + for tag in tags_iter { + let tag_name = CString::new(tag.identifier()).unwrap(); + callback(tag_name.as_ptr(), user_data) + } + + YRX_RESULT::SUCCESS +} diff --git a/capi/src/tests.rs b/capi/src/tests.rs index 68b8fb3e2..5eb0470a3 100644 --- a/capi/src/tests.rs +++ b/capi/src/tests.rs @@ -7,14 +7,14 @@ use crate::compiler::{ }; use crate::{ yrx_buffer_destroy, yrx_last_error, yrx_rule_identifier, - yrx_rule_iter_metadata, yrx_rule_iter_patterns, yrx_rule_namespace, - yrx_rules_deserialize, yrx_rules_destroy, yrx_rules_iter, - yrx_rules_iter_imports, yrx_rules_serialize, yrx_scanner_create, - yrx_scanner_destroy, yrx_scanner_on_matching_rule, yrx_scanner_scan, - yrx_scanner_set_global_bool, yrx_scanner_set_global_float, - yrx_scanner_set_global_int, yrx_scanner_set_global_str, - yrx_scanner_set_timeout, YRX_BUFFER, YRX_METADATA, YRX_PATTERN, - YRX_RESULT, YRX_RULE, + yrx_rule_iter_metadata, yrx_rule_iter_patterns, yrx_rule_iter_tags, + yrx_rule_namespace, yrx_rules_deserialize, yrx_rules_destroy, + yrx_rules_iter, yrx_rules_iter_imports, yrx_rules_serialize, + yrx_scanner_create, yrx_scanner_destroy, yrx_scanner_on_matching_rule, + yrx_scanner_scan, yrx_scanner_set_global_bool, + yrx_scanner_set_global_float, yrx_scanner_set_global_int, + yrx_scanner_set_global_str, yrx_scanner_set_timeout, YRX_BUFFER, + YRX_METADATA, YRX_PATTERN, YRX_RESULT, YRX_RULE, }; use std::ffi::{c_char, c_void, CStr, CString}; @@ -49,6 +49,12 @@ extern "C" fn on_pattern_iter( *count += 1; } +extern "C" fn on_tag_iter(_tag: *const c_char, user_data: *mut c_void) { + let ptr = user_data as *mut i32; + let count = unsafe { ptr.as_mut().unwrap() }; + *count += 1; +} + extern "C" fn on_rule_match(rule: *const YRX_RULE, user_data: *mut c_void) { let mut ptr = std::ptr::null(); let mut len = 0; @@ -74,6 +80,15 @@ extern "C" fn on_rule_match(rule: *const YRX_RULE, user_data: *mut c_void) { ); // The rule has one pattern. assert_eq!(count, 1); + + let mut count = 0; + yrx_rule_iter_tags( + rule, + on_tag_iter, + &mut count as *mut i32 as *mut c_void, + ); + // The rule has two tags. + assert_eq!(count, 2); } let ptr = user_data as *mut i32; @@ -93,7 +108,7 @@ fn capi() { let src = CString::new( br#" import "pe" - rule test { + rule test : tag1 tag2 { meta: some_int = 1 some_string = "foo" diff --git a/go/compiler_test.go b/go/compiler_test.go index 9cf069e4e..75dcbc80d 100644 --- a/go/compiler_test.go +++ b/go/compiler_test.go @@ -215,7 +215,7 @@ func TestRules(t *testing.T) { c, err := NewCompiler() assert.NoError(t, err) - c.AddSource(`rule test_1 { + c.AddSource(`rule test_1 : tag1 tag2 { condition: true }`) @@ -243,6 +243,9 @@ func TestRules(t *testing.T) { assert.Equal(t, "default", slice[0].Namespace()) assert.Equal(t, "default", slice[1].Namespace()) + assert.Equal(t, []string{"tag1", "tag2"}, slice[0].Tags()) + assert.Equal(t, []string{}, slice[1].Tags()) + assert.Len(t, slice[0].Metadata(), 0) assert.Len(t, slice[1].Metadata(), 4) diff --git a/go/main.go b/go/main.go index 718a28fb5..4573adb9c 100644 --- a/go/main.go +++ b/go/main.go @@ -41,6 +41,14 @@ package yara_x // return yrx_rules_iter_imports(rules, callback, (void*) imports_handle); // } // +// enum YRX_RESULT static inline _yrx_rule_iter_tags( +// const struct YRX_RULE *rule, +// YRX_TAG_CALLBACK callback, +// uintptr_t tags_handle) +// { +// return yrx_rule_iter_tags(rule, callback, (void*) tags_handle); +// } +// // enum YRX_RESULT static inline _yrx_rule_iter_metadata( // const struct YRX_RULE *rule, // YRX_METADATA_CALLBACK callback, @@ -70,6 +78,7 @@ package yara_x // extern void metadataCallback(YRX_METADATA*, uintptr_t); // extern void patternCallback(YRX_PATTERN*, uintptr_t); // extern void matchCallback(YRX_MATCH*, uintptr_t); +// extern void tagCallback(char*, uintptr_t); // import "C" @@ -201,6 +210,7 @@ func (r *Rules) Destroy() { // This is the callback called by yrx_rules_iterate, when Rules.GetRules is // called. +// //export onRule func onRule(rule *C.YRX_RULE, handle C.uintptr_t) { h := cgo.Handle(handle) @@ -255,6 +265,7 @@ func (r *Rules) Imports() []string { type Rule struct { namespace string identifier string + tags []string patterns []Pattern metadata []Metadata } @@ -295,6 +306,17 @@ func newRule(cRule *C.YRX_RULE) *Rule { identifier := C.GoStringN((*C.char)(unsafe.Pointer(str)), C.int(len)) + tags := make([]string, 0) + tagsHandle := cgo.NewHandle(&tags) + defer tagsHandle.Delete() + + if C._yrx_rule_iter_tags( + cRule, + C.YRX_TAG_CALLBACK(C.tagCallback), + C.uintptr_t(tagsHandle)) != C.SUCCESS { + panic("yrx_rule_iter_tags failed") + } + metadata := make([]Metadata, 0) metadataHandle := cgo.NewHandle(&metadata) defer metadataHandle.Delete() @@ -320,6 +342,7 @@ func newRule(cRule *C.YRX_RULE) *Rule { rule := &Rule{ namespace, identifier, + tags, patterns, metadata, } @@ -337,6 +360,11 @@ func (r *Rule) Namespace() string { return r.namespace } +// Tags returns the rule's tags. +func (r *Rule) Tags() []string { + return r.tags +} + // Identifier associated to the metadata. func (m *Metadata) Identifier() string { return m.identifier @@ -403,7 +431,19 @@ func importCallback(moduleName *C.char, handle C.uintptr_t) { *imports = append(*imports, C.GoString(moduleName)) } -// This is the callback called by yrx_rules_iter_patterns +// This is the callback called by yrx_rule_iter_tags +// +//export tagCallback +func tagCallback(tag *C.char, handle C.uintptr_t) { + h := cgo.Handle(handle) + tags, ok := h.Value().(*[]string) + if !ok { + panic("tagsCallback didn't receive a *[]string") + } + *tags = append(*tags, C.GoString(tag)) +} + +// This is the callback called by yrx_rule_iter_patterns // //export patternCallback func patternCallback(pattern *C.YRX_PATTERN, handle C.uintptr_t) { @@ -437,7 +477,7 @@ func patternCallback(pattern *C.YRX_PATTERN, handle C.uintptr_t) { }) } -// This is the callback called by yrx_rules_iter_patterns +// This is the callback called by yrx_rule_iter_metadata // //export metadataCallback func metadataCallback(metadata *C.YRX_METADATA, handle C.uintptr_t) { @@ -472,7 +512,7 @@ func metadataCallback(metadata *C.YRX_METADATA, handle C.uintptr_t) { }) } -// This is the callback called by yrx_rules_iter_patterns +// This is the callback called by yrx_pattern_iter_matches // //export matchCallback func matchCallback(match *C.YRX_MATCH, handle C.uintptr_t) { diff --git a/go/scanner_test.go b/go/scanner_test.go index 8f2ddc328..bd59e2883 100644 --- a/go/scanner_test.go +++ b/go/scanner_test.go @@ -138,4 +138,4 @@ func BenchmarkScan(b *testing.B) { _ = rule.Identifier() } } -} \ No newline at end of file +} diff --git a/py/src/lib.rs b/py/src/lib.rs index afeaf9f78..d49966bfb 100644 --- a/py/src/lib.rs +++ b/py/src/lib.rs @@ -397,6 +397,7 @@ impl ScanResults { struct Rule { identifier: String, namespace: String, + tags: Py, metadata: Py, patterns: Py, } @@ -415,6 +416,12 @@ impl Rule { self.namespace.as_str() } + /// Returns the rule's tags. + #[getter] + fn tags(&self) -> Py { + Python::with_gil(|py| self.tags.clone_ref(py)) + } + /// A tuple of pairs `(identifier, value)` with the metadata associated to /// the rule. #[getter] @@ -585,6 +592,11 @@ fn rule_to_py(py: Python, rule: yrx::Rule) -> PyResult> { Rule { identifier: rule.identifier().to_string(), namespace: rule.namespace().to_string(), + tags: PyTuple::new_bound( + py, + rule.tags().map(|tag| tag.identifier()), + ) + .unbind(), metadata: PyTuple::new_bound( py, rule.metadata() diff --git a/py/tests/test_api.py b/py/tests/test_api.py index 0fb6e24df..8248d2633 100644 --- a/py/tests/test_api.py +++ b/py/tests/test_api.py @@ -141,6 +141,19 @@ def test_metadata(): ) +def test_tags(): + rules = yara_x.compile(''' + rule test : tag1 tag2 { + condition: + true + } + ''') + + matching_rules = rules.scan(b'').matching_rules + + assert matching_rules[0].tags == ("tag1", "tag2") + + def test_compile_and_scan(): rules = yara_x.compile('rule foo {strings: $a = "foo" condition: $a}') matching_rules = rules.scan(b'foobar').matching_rules diff --git a/py/yara_x.pyi b/py/yara_x.pyi index de3158439..420b49931 100644 --- a/py/yara_x.pyi +++ b/py/yara_x.pyi @@ -1,176 +1,188 @@ import typing + class Compiler: + r""" + Compiles YARA source code producing a set of compiled [`Rules`]. + """ + + def __new__(cls, *, relaxed_re_syntax=..., error_on_slow_pattern=...): ... + + def add_source(self, src: str, origin: typing.Optional[str]) -> None: r""" - Compiles YARA source code producing a set of compiled [`Rules`]. + Adds a YARA source code to be compiled. + + This function may be invoked multiple times to add several sets of YARA + rules before calling [`Compiler::build`]. If the rules provided in + `src` contain errors that prevent compilation, the function will raise + an exception with the first error encountered. Additionally, the + compiler will store this error, along with any others discovered during + compilation, which can be accessed using [`Compiler::errors`]. + + Even if a previous invocation resulted in a compilation error, you can + continue calling this function. In such cases, any rules that failed to + compile will not be included in the final compiled set. + + The optional parameter `origin` allows to specify the origin of the + source code. This usually receives the path of the file from where the + code was read, but it can be any arbitrary string that conveys information + about the source code's origin. """ - def __new__(cls, *, relaxed_re_syntax=..., error_on_slow_pattern=...): ... - def add_source(self, src: str, origin: typing.Optional[str]) -> None: - r""" - Adds a YARA source code to be compiled. - - This function may be invoked multiple times to add several sets of YARA - rules before calling [`Compiler::build`]. If the rules provided in - `src` contain errors that prevent compilation, the function will raise - an exception with the first error encountered. Additionally, the - compiler will store this error, along with any others discovered during - compilation, which can be accessed using [`Compiler::errors`]. - - Even if a previous invocation resulted in a compilation error, you can - continue calling this function. In such cases, any rules that failed to - compile will not be included in the final compiled set. - - The optional parameter `origin` allows to specify the origin of the - source code. This usually receives the path of the file from where the - code was read, but it can be any arbitrary string that conveys information - about the source code's origin. - """ - ... - - def define_global(self, ident: str, value: typing.Any) -> None: - r""" - Defines a global variable and sets its initial value. - - Global variables must be defined before calling [`Compiler::add_source`] - with some YARA rule that uses the variable. The variable will retain its - initial value when the [`Rules`] are used for scanning data, however - each scanner can change the variable's value by calling - [`crate::Scanner::set_global`]. - - The type of `value` must be: bool, str, bytes, int or float. - - # Raises - - [TypeError](https://docs.python.org/3/library/exceptions.html#TypeError) - if the type of `value` is not one of the supported ones. - """ - ... - - def new_namespace(self, namespace: str) -> None: - r""" - Creates a new namespace. - - Further calls to [`Compiler::add_source`] will put the rules under the - newly created namespace. - """ - ... - - def ignore_module(self, module: str) -> None: - r""" - Tell the compiler that a YARA module is not supported. - - Import statements for unsupported modules will be ignored without - errors, but a warning will be issued. Any rule that make use of an - ignored module will be ignored, while the rest of rules that - don't rely on that module will be correctly compiled. - """ - ... - - def build(self) -> Rules: - r""" - Builds the source code previously added to the compiler. - - This function returns an instance of [`Rules`] containing all the rules - previously added with [`Compiler::add_source`] and sets the compiler - to its initial empty state. - """ - ... - - def errors(self) -> typing.Any: - r""" - Retrieves all errors generated by the compiler. - - This method returns every error encountered during the compilation, - across all invocations of [`Compiler::add_source`]. - """ - ... - - def warnings(self) -> typing.Any: - r""" - Retrieves all warnings generated by the compiler. - - This method returns every warning encountered during the compilation, - across all invocations of [`Compiler::add_source`]. - """ - ... + ... -class Match: + def define_global(self, ident: str, value: typing.Any) -> None: r""" - Represents a match found for a pattern. + Defines a global variable and sets its initial value. + + Global variables must be defined before calling [`Compiler::add_source`] + with some YARA rule that uses the variable. The variable will retain its + initial value when the [`Rules`] are used for scanning data, however + each scanner can change the variable's value by calling + [`crate::Scanner::set_global`]. + + The type of `value` must be: bool, str, bytes, int or float. + + # Raises + + [TypeError](https://docs.python.org/3/library/exceptions.html#TypeError) + if the type of `value` is not one of the supported ones. """ + ... - offset: int - length: int - xor_key: typing.Optional[int] + def new_namespace(self, namespace: str) -> None: + r""" + Creates a new namespace. -class Pattern: + Further calls to [`Compiler::add_source`] will put the rules under the + newly created namespace. + """ + ... + + def ignore_module(self, module: str) -> None: r""" - Represents a pattern in a YARA rule. + Tell the compiler that a YARA module is not supported. + + Import statements for unsupported modules will be ignored without + errors, but a warning will be issued. Any rule that make use of an + ignored module will be ignored, while the rest of rules that + don't rely on that module will be correctly compiled. """ + ... - identifier: str - matches: tuple + def build(self) -> Rules: + r""" + Builds the source code previously added to the compiler. -class Rule: + This function returns an instance of [`Rules`] containing all the rules + previously added with [`Compiler::add_source`] and sets the compiler + to its initial empty state. + """ + ... + + def errors(self) -> typing.Any: r""" - Represents a rule that matched while scanning some data. + Retrieves all errors generated by the compiler. + + This method returns every error encountered during the compilation, + across all invocations of [`Compiler::add_source`]. """ + ... + + def warnings(self) -> typing.Any: + r""" + Retrieves all warnings generated by the compiler. + + This method returns every warning encountered during the compilation, + across all invocations of [`Compiler::add_source`]. + """ + ... + + +class Match: + r""" + Represents a match found for a pattern. + """ + + offset: int + length: int + xor_key: typing.Optional[int] + + +class Pattern: + r""" + Represents a pattern in a YARA rule. + """ + + identifier: str + matches: tuple + + +class Rule: + r""" + Represents a rule that matched while scanning some data. + """ + + identifier: str + namespace: str + tags: tuple + metadata: tuple + patterns: tuple - identifier: str - namespace: str - metadata: tuple - patterns: tuple class Rules: + r""" + A set of YARA rules in compiled form. + + This is the result of [`Compiler::build`]. + """ + + def scan(self, data: bytes) -> ScanResults: r""" - A set of YARA rules in compiled form. + Scans in-memory data with these rules. + """ + ... - This is the result of [`Compiler::build`]. + def serialize_into(self, file: typing.Any) -> None: + r""" + Serializes the rules into a file-like object. """ - def scan(self, data: bytes) -> ScanResults: - r""" - Scans in-memory data with these rules. - """ - ... - - def serialize_into(self, file: typing.Any) -> None: - r""" - Serializes the rules into a file-like object. - """ - ... - - @staticmethod - def deserialize_from(file: typing.Any) -> Rules: - r""" - Deserializes rules from a file-like object. - """ - ... + ... -class ScanResults: + @staticmethod + def deserialize_from(file: typing.Any) -> Rules: r""" - Results produced by a scan operation. + Deserializes rules from a file-like object. """ + ... + + +class ScanResults: + r""" + Results produced by a scan operation. + """ + + matching_rules: tuple + module_outputs: dict - matching_rules: tuple - module_outputs: dict class Scanner: - r""" - Scans data with already compiled YARA rules. + r""" + Scans data with already compiled YARA rules. - The scanner receives a set of compiled Rules and scans data with those - rules. The same scanner can be used for scanning multiple files or - in-memory data sequentially, but you need multiple scanners for scanning - in parallel. - """ + The scanner receives a set of compiled Rules and scans data with those + rules. The same scanner can be used for scanning multiple files or + in-memory data sequentially, but you need multiple scanners for scanning + in parallel. + """ + + ... - ... def compile(src: str) -> Rules: - r""" - Compiles a YARA source code producing a set of compiled [`Rules`]. + r""" + Compiles a YARA source code producing a set of compiled [`Rules`]. - This function allows compiling simple rules that don't depend on external - variables. For more complex use cases you will need to use a [`Compiler`]. - """ - ... + This function allows compiling simple rules that don't depend on external + variables. For more complex use cases you will need to use a [`Compiler`]. + """ + ...