Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Rewriting RzNum parser and calculator #4326

Draft
wants to merge 4 commits into
base: dev
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion librz/include/rz_util/rz_big.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#ifndef RZ_BIG_H
#define RZ_BIG_H

#include "../rz_types.h"
#include "<rz_types.h>"

#ifdef __cplusplus
extern "C" {
Expand Down
19 changes: 19 additions & 0 deletions librz/include/rz_util/rz_num.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,32 @@
#define RZ_NUM_H

#include <rz_list.h>
#include <rz_big.h>
#include <rz_bitvector.h>

#define RZ_NUMCALC_STRSZ 1024

#ifdef __cplusplus
extern "C" {
#endif

typedef enum {
RZ_NUM_KIND_SIMPLE, //< simple numbers, fit into ut64
RZ_NUM_KIND_BIG, //< big numbers, using RzNumBig
RZ_NUM_KIND_BITVECTOR, //< bit vectors, using rz_bitvector
RZ_NUM_KIND_FLOAT, //< real numbers, using RzFloat with bitvector underneath
} RzNumKind;

typedef struct {
union {
ut64 n;
RzNumBig b;
RzBitvector bv;
RzFloat f;
} val;
RzNumKind kind;
} RzNumValue;

typedef struct {
double d;
ut64 n;
Expand Down
2 changes: 0 additions & 2 deletions librz/util/calc.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@
// SPDX-License-Identifier: LGPL-3.0-only

/* ported to C by pancake for r2 in 2012-2017 */
// TODO: integrate floating point support
// TODO: do not use global variables
/*
Reference Chapter 6:
"The C++ Programming Language", Special Edition.
Expand Down
93 changes: 93 additions & 0 deletions librz/util/num.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
// SPDX-FileCopyrightText: 2023 Anton Kochkov <[email protected]>
// SPDX-License-Identifier: LGPL-3.0-only

#include <stdio.h>
#include <rz_types.h>
#include <rz_list.h>
#include <rz_util/rz_str.h>
#include <rz_util/rz_file.h>
#include <rz_util/rz_assert.h>
#include <tree_sitter/api.h>

#define TS_START_END(node, start, end) \
do { \
start = ts_node_start_byte(node); \
end = ts_node_end_byte(node); \
} while (0)

static char *ts_node_sub_string(TSNode node, const char *cstr) {
ut32 start, end;
TS_START_END(node, start, end);
return rz_str_newf("%.*s", end - start, cstr + start);
}

TSLanguage *tree_sitter_rznum();

static int parse_string(NumParserState *state, const char *code, char **error_msg) {
// Create a parser.
TSParser *parser = ts_parser_new();
// Set the parser's language (RzNum in this case)
ts_parser_set_language(parser, tree_sitter_rznum());

TSTree *tree = ts_parser_parse_string(parser, NULL, code, strlen(code));

// Get the root node of the syntax tree.
TSNode root_node = ts_tree_root_node(tree);
int root_node_child_count = ts_node_named_child_count(root_node);
if (!root_node_child_count) {
parser_warning(state, "Root node is empty!\n");
ts_tree_delete(tree);
ts_parser_delete(parser);
return 0;
}

// Some debugging
if (state->verbose) {
parser_debug(state, "root_node (%d children): %s\n", root_node_child_count, ts_node_type(root_node));
// Print the syntax tree as an S-expression.
char *string = ts_node_string(root_node);
parser_debug(state, "Syntax tree: %s\n", string);
free(string);
}

// Filter types function prototypes and start parsing
int i = 0, result = 0;
for (i = 0; i < root_node_child_count; i++) {
TSNode child = ts_node_named_child(root_node, i);
// We skip ";" or "," - empty expressions
char *node_code = ts_node_sub_string(child, code);
if (!strcmp(node_code, ";") || !strcmp(node_code, ",")) {
free(node_code);
continue;
}
free(node_code);
parser_debug(state, "Processing %d child...\n", i);
result += parse_type_nodes_save(state, child, code);
}

// If there were errors during the parser then the result is different from 0
if (result) {
char *error_msgs = rz_strbuf_drain_nofree(state->errors);
RZ_LOG_DEBUG("Errors:\n");
RZ_LOG_DEBUG("%s", error_msgs);
char *warning_msgs = rz_strbuf_drain_nofree(state->warnings);
RZ_LOG_DEBUG("Warnings:\n");
RZ_LOG_DEBUG("%s", warning_msgs);
if (error_msg) {
*error_msg = strdup(error_msgs);
}
free(error_msgs);
free(warning_msgs);
}
if (state->verbose) {
char *debug_msgs = rz_strbuf_drain_nofree(state->debug);
RZ_LOG_DEBUG("%s", debug_msgs);
free(debug_msgs);
}

// After everything parsed, we should preserve the base type database
// And the state of the parser - anonymous structs, forward declarations, etc
ts_tree_delete(tree);
ts_parser_delete(parser);
return result;
}
34 changes: 18 additions & 16 deletions librz/util/unum.c
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,7 @@ static void error(RzNum *num, const char *err_str) {
}
}

// FIXME: Rename into `rz_num_value_get()`
// TODO: try to avoid the use of sscanf
/* old get_offset */
RZ_API ut64 rz_num_get(RZ_NULLABLE RzNum *num, RZ_NULLABLE const char *str) {
Expand Down Expand Up @@ -474,7 +475,7 @@ RZ_API ut64 rz_num_get(RZ_NULLABLE RzNum *num, RZ_NULLABLE const char *str) {
* \param str Numerical expression.
* \return Evaluated expression's value.
**/
RZ_API ut64 rz_num_math(RzNum *num, const char *str) {
RZ_API ut64 rz_num_math_ut64(RzNum *num, const char *str) {
ut64 ret;
const char *err = NULL;
if (!str || !*str) {
Expand All @@ -494,6 +495,7 @@ RZ_API ut64 rz_num_math(RzNum *num, const char *str) {
return ret;
}

// FIXME: Rename into rz_num_value_get_float()
RZ_API double rz_num_get_float(RzNum *num, const char *str) {
double d = 0.0f;
(void)sscanf(str, "%lf", &d);
Expand Down Expand Up @@ -567,14 +569,14 @@ RZ_API int rz_num_conditional(RzNum *num, const char *str) {
lgt = strchr(p, '<');
if (lgt) {
*lgt = 0;
a = rz_num_math(num, p);
a = rz_num_math_ut64(num, p);
if (lgt[1] == '=') {
b = rz_num_math(num, lgt + 2);
b = rz_num_math_ut64(num, lgt + 2);
if (a > b) {
goto fail;
}
} else {
b = rz_num_math(num, lgt + 1);
b = rz_num_math_ut64(num, lgt + 1);
if (a >= b) {
goto fail;
}
Expand All @@ -583,14 +585,14 @@ RZ_API int rz_num_conditional(RzNum *num, const char *str) {
lgt = strchr(p, '>');
if (lgt) {
*lgt = 0;
a = rz_num_math(num, p);
a = rz_num_math_ut64(num, p);
if (lgt[1] == '=') {
b = rz_num_math(num, lgt + 2);
b = rz_num_math_ut64(num, lgt + 2);
if (a < b) {
goto fail;
}
} else {
b = rz_num_math(num, lgt + 1);
b = rz_num_math_ut64(num, lgt + 1);
if (a <= b) {
goto fail;
}
Expand All @@ -602,7 +604,7 @@ RZ_API int rz_num_conditional(RzNum *num, const char *str) {
if (*lgt == '!') {
rz_str_replace_char(p, '!', ' ');
rz_str_replace_char(p, '=', '-');
n = rz_num_math(num, p);
n = rz_num_math_ut64(num, p);
if (!n) {
goto fail;
}
Expand All @@ -613,7 +615,7 @@ RZ_API int rz_num_conditional(RzNum *num, const char *str) {
*lgt = ' ';
}
rz_str_replace_char(p, '=', '-');
n = rz_num_math(num, p);
n = rz_num_math_ut64(num, p);
if (n) {
goto fail;
}
Expand All @@ -628,12 +630,12 @@ RZ_API int rz_num_conditional(RzNum *num, const char *str) {
}

RZ_API int rz_num_is_valid_input(RzNum *num, const char *input_value) {
ut64 value = input_value ? rz_num_math(num, input_value) : 0;
ut64 value = input_value ? rz_num_math_ut64(num, input_value) : 0;
return !(value == 0 && input_value && *input_value != '0') || !(value == 0 && input_value && *input_value != '@');
}

RZ_API ut64 rz_num_get_input_value(RzNum *num, const char *input_value) {
ut64 value = input_value ? rz_num_math(num, input_value) : 0;
ut64 value = input_value ? rz_num_math_ut64(num, input_value) : 0;
return value;
}

Expand Down Expand Up @@ -690,12 +692,12 @@ RZ_API bool rz_is_valid_input_num_value(RzNum *num, const char *input_value) {
if (!input_value) {
return false;
}
ut64 value = rz_num_math(num, input_value);
ut64 value = rz_num_math_ut64(num, input_value);
return !(value == 0 && *input_value != '0');
}

RZ_API ut64 rz_get_input_num_value(RzNum *num, const char *str) {
return (str && *str) ? rz_num_math(num, str) : 0;
return (str && *str) ? rz_num_math_ut64(num, str) : 0;
}

static inline ut64 __nth_nibble(ut64 n, ut32 i) {
Expand Down Expand Up @@ -741,7 +743,7 @@ RZ_API ut64 rz_num_tail(RzNum *num, ut64 addr, const char *hex) {
strcpy(p, "0x");
strcpy(p + 2, hex);
if (isxdigit((ut8)hex[0])) {
n = rz_num_math(num, p);
n = rz_num_math_ut64(num, p);
} else {
eprintf("Invalid argument\n");
free(p);
Expand Down Expand Up @@ -792,7 +794,7 @@ RZ_API int rz_num_between(RzNum *num, const char *input_value) {
len = 3;
}
for (i = 0; i < len; i++) {
ns[i] = rz_num_math(num, rz_list_pop_head(nums));
ns[i] = rz_num_math_ut64(num, rz_list_pop_head(nums));
}
free(str);
rz_list_free(nums);
Expand Down Expand Up @@ -915,7 +917,7 @@ RZ_API size_t rz_num_base_of_string(RzNum *num, RZ_NONNULL const char *str) {
break;
default:
// syscall
base = rz_num_math(num, str);
base = rz_num_math_ut64(num, str);
}
}
return base;
Expand Down
26 changes: 26 additions & 0 deletions subprojects/rizin-math-parser/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
[package]
name = "tree-sitter-rznum"
description = "RzNum grammar for the tree-sitter parsing library"
version = "0.0.1"
keywords = ["incremental", "parsing", "math"]
categories = ["parsing", "text-editors"]
repository = "https://github.com/rizinorg/tree-sitter-rznum"
edition = "2021"
license = "MIT"

build = "bindings/rust/build.rs"
include = [
"bindings/rust/*",
"grammar.js",
"queries/*",
"src/*",
]

[lib]
path = "bindings/rust/lib.rs"

[dependencies]
tree-sitter = "~0.20"

[build-dependencies]
cc = "1.0"
3 changes: 3 additions & 0 deletions subprojects/rizin-math-parser/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# tree-sitter-rznum

A tree-sitter grammar for Rizin's `RzNum` mathematical language
19 changes: 19 additions & 0 deletions subprojects/rizin-math-parser/binding.gyp
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{
"targets": [
{
"target_name": "tree_sitter_math_binding",
"include_dirs": [
"<!(node -e \"require('nan')\")",
"src"
],
"sources": [
"bindings/node/binding.cc",
"src/parser.c",
# If your language uses an external scanner, add it here.
],
"cflags_c": [
"-std=c99",
]
}
]
}
28 changes: 28 additions & 0 deletions subprojects/rizin-math-parser/bindings/node/binding.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#include "tree_sitter/parser.h"
#include <node.h>
#include "nan.h"

using namespace v8;

extern "C" TSLanguage * tree_sitter_math();

namespace {

NAN_METHOD(New) {}

void Init(Local<Object> exports, Local<Object> module) {
Local<FunctionTemplate> tpl = Nan::New<FunctionTemplate>(New);
tpl->SetClassName(Nan::New("Language").ToLocalChecked());
tpl->InstanceTemplate()->SetInternalFieldCount(1);

Local<Function> constructor = Nan::GetFunction(tpl).ToLocalChecked();
Local<Object> instance = constructor->NewInstance(Nan::GetCurrentContext()).ToLocalChecked();
Nan::SetInternalFieldPointer(instance, 0, tree_sitter_math());

Nan::Set(instance, Nan::New("name").ToLocalChecked(), Nan::New("math").ToLocalChecked());
Nan::Set(module, Nan::New("exports").ToLocalChecked(), instance);
}

NODE_MODULE(tree_sitter_math_binding, Init)

} // namespace
19 changes: 19 additions & 0 deletions subprojects/rizin-math-parser/bindings/node/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
try {
module.exports = require("../../build/Release/tree_sitter_math_binding");
} catch (error1) {
if (error1.code !== 'MODULE_NOT_FOUND') {
throw error1;
}
try {
module.exports = require("../../build/Debug/tree_sitter_math_binding");
} catch (error2) {
if (error2.code !== 'MODULE_NOT_FOUND') {
throw error2;
}
throw error1
}
}

try {
module.exports.nodeTypeInfo = require("../../src/node-types.json");
} catch (_) {}
Loading
Loading