Skip to content

Commit

Permalink
Updated regex derivative engine (#5567)
Browse files Browse the repository at this point in the history
* updated derivative engine

* some edit

* further improvements in derivative code

* more deriv code edits and re::to_str update

* optimized mk_deriv_accept

* fixed PR comments

* small syntax fix

* updated some simplifications

* bugfix:forgot to_re before reverse

* fixed PR comments

* more PR comment fixes

* more PR comment fixes

* forgot to delete

* deleting unused definition

* fixes

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>

* fixes

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>

Co-authored-by: Nikolaj Bjorner <nbjorner@microsoft.com>
veanes and NikolajBjorner authored Oct 8, 2021

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature. The key has expired.
1 parent c0c3e68 commit 146f462
Showing 7 changed files with 892 additions and 279 deletions.
2 changes: 1 addition & 1 deletion src/ast/rewriter/seq_axioms.cpp
Original file line number Diff line number Diff line change
@@ -850,7 +850,7 @@ namespace seq {
add_clause(~eq, ge10k);

for (unsigned i = 0; i < k; ++i) {
expr* ch = seq.str.mk_nth_i(ubvs, i);
expr* ch = seq.str.mk_nth_c(ubvs, i);
is_digit = seq.mk_char_is_digit(ch);
add_clause(~ge_len, is_digit);
}
529 changes: 457 additions & 72 deletions src/ast/rewriter/seq_rewriter.cpp

Large diffs are not rendered by default.

46 changes: 37 additions & 9 deletions src/ast/rewriter/seq_rewriter.h
Original file line number Diff line number Diff line change
@@ -117,20 +117,20 @@ class seq_rewriter {
class op_cache {
struct op_entry {
decl_kind k;
expr* a, *b, *r;
op_entry(decl_kind k, expr* a, expr* b, expr* r): k(k), a(a), b(b), r(r) {}
op_entry():k(0), a(nullptr), b(nullptr), r(nullptr) {}
expr* a, *b, *c, *r;
op_entry(decl_kind k, expr* a, expr* b, expr* c, expr* r): k(k), a(a), b(b), c(c), r(r) {}
op_entry():k(0), a(nullptr), b(nullptr), c(nullptr), r(nullptr) {}
};

struct hash_entry {
unsigned operator()(op_entry const& e) const {
return mk_mix(e.k, e.a ? e.a->get_id() : 0, e.b ? e.b->get_id() : 0);
return combine_hash(mk_mix(e.k, e.a ? e.a->get_id() : 0, e.b ? e.b->get_id() : 0), e.c ? e.c->get_id() : 0);
}
};

struct eq_entry {
bool operator()(op_entry const& a, op_entry const& b) const {
return a.k == b.k && a.a == b.a && a.b == b.b;
bool operator()(op_entry const& a, op_entry const& b) const {
return a.k == b.k && a.a == b.a && a.b == b.b && a.c == b.c;
}
};

@@ -143,8 +143,8 @@ class seq_rewriter {

public:
op_cache(ast_manager& m);
expr* find(decl_kind op, expr* a, expr* b);
void insert(decl_kind op, expr* a, expr* b, expr* r);
expr* find(decl_kind op, expr* a, expr* b, expr* c);
void insert(decl_kind op, expr* a, expr* b, expr* c, expr* r);
};

seq_util m_util;
@@ -208,8 +208,24 @@ class seq_rewriter {
bool check_deriv_normal_form(expr* r, int level = 3);
#endif

void mk_antimirov_deriv_rec(expr* e, expr* r, expr* path, expr_ref& result);

expr_ref mk_antimirov_deriv(expr* e, expr* r, expr* path);
expr_ref mk_in_antimirov_rec(expr* s, expr* d);
expr_ref mk_in_antimirov(expr* s, expr* d);

expr_ref mk_antimirov_deriv_intersection(expr* d1, expr* d2, expr* path);
expr_ref mk_antimirov_deriv_concat(expr* d, expr* r);
expr_ref mk_antimirov_deriv_negate(expr* d);
expr_ref mk_antimirov_deriv_union(expr* d1, expr* d2);
expr_ref mk_regex_reverse(expr* r);
expr_ref mk_regex_concat(expr* r1, expr* r2);

expr_ref simplify_path(expr* path);

bool lt_char(expr* ch1, expr* ch2);
bool eq_char(expr* ch1, expr* ch2);
bool neq_char(expr* ch1, expr* ch2);
bool le_char(expr* ch1, expr* ch2);
bool pred_implies(expr* a, expr* b);
bool are_complements(expr* r1, expr* r2) const;
@@ -286,6 +302,8 @@ class seq_rewriter {
expr_ref zero() { return expr_ref(m_autil.mk_int(0), m()); }
expr_ref one() { return expr_ref(m_autil.mk_int(1), m()); }
expr_ref minus_one() { return expr_ref(m_autil.mk_int(-1), m()); }
expr_ref mk_sub(expr* a, rational const& n);
expr_ref mk_sub(expr* a, unsigned n) { return mk_sub(a, rational(n)); }

bool is_suffix(expr* s, expr* offset, expr* len);
bool is_prefix(expr* s, expr* offset, expr* len);
@@ -379,9 +397,19 @@ class seq_rewriter {

void add_seqs(expr_ref_vector const& ls, expr_ref_vector const& rs, expr_ref_pair_vector& new_eqs);

// Expose derivative and nullability check
/*
create the nullability check for r
*/
expr_ref is_nullable(expr* r);
/*
make the derivative of r wrt the given element ele
*/
expr_ref mk_derivative(expr* ele, expr* r);
/*
make the derivative of r wrt the canonical variable v0 = (:var 0),
for example mk_derivative(a+) = (if (v0 = 'a') then a* else [])
*/
expr_ref mk_derivative(expr* r);

// heuristic elimination of element from condition that comes form a derivative.
// special case optimization for conjunctions of equalities, disequalities and ranges.
333 changes: 255 additions & 78 deletions src/ast/seq_decl_plugin.cpp

Large diffs are not rendered by default.

48 changes: 41 additions & 7 deletions src/ast/seq_decl_plugin.h
Original file line number Diff line number Diff line change
@@ -286,7 +286,7 @@ class seq_util {
app* mk_at(expr* s, expr* i) const { expr* es[2] = { s, i }; return m.mk_app(m_fid, OP_SEQ_AT, 2, es); }
app* mk_nth(expr* s, expr* i) const { expr* es[2] = { s, i }; return m.mk_app(m_fid, OP_SEQ_NTH, 2, es); }
app* mk_nth_i(expr* s, expr* i) const { expr* es[2] = { s, i }; return m.mk_app(m_fid, OP_SEQ_NTH_I, 2, es); }
app* mk_nth_i(expr* s, unsigned i) const;
app* mk_nth_c(expr* s, unsigned i) const;

app* mk_substr(expr* a, expr* b, expr* c) const { expr* es[3] = { a, b, c }; return m.mk_app(m_fid, OP_SEQ_EXTRACT, 3, es); }
app* mk_contains(expr* a, expr* b) const { expr* es[2] = { a, b }; return m.mk_app(m_fid, OP_SEQ_CONTAINS, 2, es); }
@@ -350,6 +350,13 @@ class seq_util {
bool is_from_code(expr const* n) const { return is_app_of(n, m_fid, OP_STRING_FROM_CODE); }
bool is_to_code(expr const* n) const { return is_app_of(n, m_fid, OP_STRING_TO_CODE); }

bool is_len_sub(expr const* n, expr*& l, expr*& u, rational& k) const;

/*
tests if s is a single character string(c) or a unit (c)
*/
bool is_unit_string(expr const* s, expr_ref& c) const;

bool is_string_term(expr const * n) const {
return u.is_string(n->get_sort());
}
@@ -530,7 +537,20 @@ class seq_util {
bool is_loop(expr const* n) const { return is_app_of(n, m_fid, OP_RE_LOOP); }
bool is_empty(expr const* n) const { return is_app_of(n, m_fid, OP_RE_EMPTY_SET); }
bool is_full_char(expr const* n) const { return is_app_of(n, m_fid, OP_RE_FULL_CHAR_SET); }
bool is_full_seq(expr const* n) const { return is_app_of(n, m_fid, OP_RE_FULL_SEQ_SET); }
bool is_full_seq(expr const* n) const {
expr* s;
return is_app_of(n, m_fid, OP_RE_FULL_SEQ_SET) || (is_star(n, s) && is_full_char(s));
}
bool is_dot_plus(expr const* n) const {
expr* s, * t;
if (is_plus(n, s) && is_full_char(s))
return true;
if (is_concat(n, s, t)) {
if ((is_full_char(s) && is_full_seq(t)) || (is_full_char(t) && is_full_seq(s)))
return true;
}
return false;
}
bool is_of_pred(expr const* n) const { return is_app_of(n, m_fid, OP_RE_OF_PRED); }
bool is_reverse(expr const* n) const { return is_app_of(n, m_fid, OP_RE_REVERSE); }
bool is_derivative(expr const* n) const { return is_app_of(n, m_fid, OP_RE_DERIVATIVE); }
@@ -559,18 +579,32 @@ class seq_util {
app* mk_epsilon(sort* seq_sort);
info get_info(expr* r) const;
std::string to_str(expr* r) const;
std::string to_strh(expr* r) const;

expr_ref mk_ite_simplify(expr* c, expr* t, expr* e)
{
expr_ref result(m);
if (m.is_true(c) || t == e)
result = t;
else if (m.is_false(c))
result = e;
else
result = m.mk_ite(c, t, e);
return result;
}

class pp {
seq_util::rex& re;
expr* e;
expr* ex;
bool html_encode;
bool can_skip_parenth(expr* r) const;
std::ostream& seq_unit(std::ostream& out, expr* s) const;
std::ostream& compact_helper_seq(std::ostream& out, expr* s) const;
std::ostream& compact_helper_range(std::ostream& out, expr* s1, expr* s2) const;
std::ostream& print_unit(std::ostream& out, expr* s) const;
std::ostream& print_seq(std::ostream& out, expr* s) const;
std::ostream& print_range(std::ostream& out, expr* s1, expr* s2) const;
std::ostream& print(std::ostream& out, expr* e) const;

public:
pp(seq_util::rex& r, expr* e, bool html = false) : re(r), e(e), html_encode(html) {}
pp(seq_util::rex& re, expr* ex, bool html) : re(re), ex(ex), html_encode(html) {}
std::ostream& display(std::ostream&) const;
};
};
209 changes: 99 additions & 110 deletions src/smt/seq_regex.cpp

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions src/smt/seq_regex.h
Original file line number Diff line number Diff line change
@@ -158,12 +158,12 @@ namespace smt {
expr_ref symmetric_diff(expr* r1, expr* r2);

expr_ref is_nullable_wrapper(expr* r);
expr_ref derivative_wrapper(expr* hd, expr* r);
expr_ref mk_derivative_wrapper(expr* hd, expr* r);

// Various support for unfolding derivative expressions that are
// returned by derivative_wrapper
expr_ref mk_deriv_accept(expr* s, unsigned i, expr* r);
void get_all_derivatives(expr* r, expr_ref_vector& results);
void get_derivative_targets(expr* r, expr_ref_vector& targets);
void get_cofactors(expr* r, expr_ref_pair_vector& result);
void get_cofactors_rec(expr* r, expr_ref_vector& conds,
expr_ref_pair_vector& result);

0 comments on commit 146f462

Please sign in to comment.