Skip to content

Commit

Permalink
Seq rewriter integration (#4599)
Browse files Browse the repository at this point in the history
* std::cout debugging statements

* comment out std::cout debugging as this is now a shared fork

* convert std::cout to TRACE statements for seq_rewriter and seq_regex

* add cases to min_length and max_length for regexes

* bug fix

* update min_length and max_length functions for REs

* initial pass on simplifying derivative normal forms by eliminating redundant predicates locally

* add seq_regex_brief trace statements

* working on debugging ref count issue

* fix ref count bug and convert trace statements to seq_regex_brief

* add compact tracing for cache hits/misses

* seq_regex fix cache hit/miss tracing and wrapper around is_nullable

* minor

* label and disable more experimental changes for testing

* minor documentation / tracing

* a few more @exp annotations

* dead state elimination skeleton code

* progress on dead state elimination

* more progress on dead state elimination

* refactor dead state class to separate self-contained state_graph class

* finish factoring state_graph to only work with unsigned values, and implement separate functionality for expr* logic

* implement get_all_derivatives, add debug tracing

* trace statements for debugging is_nullable loop bug

* fix is_nullable loop bug

* comment out local nullable change and mark experimental

* pretty printing for state_graph

* rewrite state graph to remove the fragile assumption that all edges from a state are added at a time

* start of general cycle detection check + fix some comments

* implement full cycle detection procedure

* normalize derivative conditions to form 'ele <= a'

* order derivative conditions by character code

* fix confusing names m_to and m_from

* assign increasing state IDs from 1 instead of using get_id on AST node

* remove elim_condition call in get_dall_derivatives

* use u_map instead of uint_map to avoid memory leak

* remove unnecessary call to is_ground

* debugging

* small improvements to seq_regex_brief tracing

* fix bug on evil2 example

* save work

* new propagate code

* work in progress on using same seq sort for deriv calls

* avoid re-computing derivatives: use same head var for every derivative call

* use min_length on regexes to prune search

* simple implementation of can_be_in_cycle using rank function idea

* add a disabled experimental change

* minor cleanup comments, etc.

* seq_rewriter cleanup for PR

* remove cache hit/miss counts tracing

* remove changes not in the rewriter

* remove cache hit/miss count tracing

Co-authored-by: calebstanford-msr <[email protected]>
  • Loading branch information
cdstanford and calebstanford-msr authored Jul 28, 2020
1 parent afdfc5e commit 5664b57
Show file tree
Hide file tree
Showing 2 changed files with 226 additions and 32 deletions.
246 changes: 218 additions & 28 deletions src/ast/rewriter/seq_rewriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2200,11 +2200,15 @@ expr_ref seq_rewriter::re_predicate(expr* cond, sort* seq_sort) {
}

expr_ref seq_rewriter::is_nullable(expr* r) {
STRACE("seq_verbose", tout << "is_nullable: "
<< mk_pp(r, m()) << std::endl;);
expr_ref result(m_op_cache.find(_OP_RE_IS_NULLABLE, r, nullptr), m());
if (!result) {
result = is_nullable_rec(r);
m_op_cache.insert(_OP_RE_IS_NULLABLE, r, nullptr, result);
}
STRACE("seq_verbose", tout << "is_nullable result: "
<< mk_pp(result, m()) << std::endl;);
return result;
}

Expand Down Expand Up @@ -2383,11 +2387,15 @@ br_status seq_rewriter::mk_re_derivative(expr* ele, expr* r, expr_ref& result) {
Duplicate nested conditions are eliminated.
*/
expr_ref seq_rewriter::mk_derivative(expr* ele, expr* r) {
STRACE("seq_verbose", tout << "derivative: " << mk_pp(ele, m())
<< "," << mk_pp(r, m()) << std::endl;);
expr_ref result(m_op_cache.find(OP_RE_DERIVATIVE, ele, r), m());
if (!result) {
result = mk_derivative_rec(ele, r);
m_op_cache.insert(OP_RE_DERIVATIVE, ele, r, result);
}
STRACE("seq_verbose", tout << "derivative result: "
<< mk_pp(result, m()) << std::endl;);
return result;
}

Expand All @@ -2403,40 +2411,129 @@ expr_ref seq_rewriter::mk_der_concat(expr* r1, expr* r2) {
return mk_der_op(OP_RE_CONCAT, r1, r2);
}

/*
Utility functions to decide char <, ==, and <=.
Return true if deduced, false if unknown.
*/
bool seq_rewriter::lt_char(expr* ch1, expr* ch2) {
unsigned u1, u2;
return u().is_const_char(ch1, u1) &&
u().is_const_char(ch2, u2) && (u1 < u2);
}
bool seq_rewriter::eq_char(expr* ch1, expr* ch2) {
return ch1 == ch2;
}
bool seq_rewriter::le_char(expr* ch1, expr* ch2) {
return eq_char(ch1, ch2) || lt_char(ch1, ch2);
}

/*
Utility function to decide if a simple predicate (ones that appear
as the conditions in if-then-else expressions in derivatives)
implies another.
Return true if we deduce that a implies b, false if unknown.
Current cases handled:
- a and b are char <= constraints, or negations of char <= constraints
*/
bool seq_rewriter::pred_implies(expr* a, expr* b) {
STRACE("seq_verbose", tout << "pred_implies: "
<< "," << mk_pp(a, m())
<< "," << mk_pp(b, m()) << std::endl;);
expr *cha1 = nullptr, *cha2 = nullptr, *nota = nullptr,
*chb1 = nullptr, *chb2 = nullptr, *notb = nullptr;
if (m().is_not(a, nota) &&
m().is_not(b, notb)) {
return pred_implies(notb, nota);
}
else if (u().is_char_le(a, cha1, cha2) &&
u().is_char_le(b, chb1, chb2)) {
return le_char(chb1, cha1) && le_char(cha2, chb2);
}
else if (u().is_char_le(a, cha1, cha2) &&
m().is_not(b, notb) &&
u().is_char_le(notb, chb1, chb2)) {
return (le_char(chb2, cha1) && lt_char(cha2, chb1)) ||
(lt_char(chb2, cha1) && le_char(cha2, chb1));
}
else if (u().is_char_le(b, chb1, chb2) &&
m().is_not(a, nota) &&
u().is_char_le(nota, cha1, cha2)) {
return le_char(chb1, cha2) && le_char(cha1, chb2);
}
return false;
}

/*
Apply a binary operation, preserving BDD normal form on derivative expressions.
Preconditions:
- k is a binary op code on REs (concat, intersection, or union)
- k is a binary op code on REs: one of concat, intersection, or union
(not difference)
- a and b are in BDD form
Postcondition:
- result is in BDD form
*/
expr_ref seq_rewriter::mk_der_op_rec(decl_kind k, expr* a, expr* b) {
STRACE("seq_verbose", tout << "mk_der_op_rec: " << k
<< "," << mk_pp(a, m())
<< "," << mk_pp(b, m()) << std::endl;);
expr* ca = nullptr, *a1 = nullptr, *a2 = nullptr;
expr* cb = nullptr, *b1 = nullptr, *b2 = nullptr;
expr_ref result(m());
// Simplify if-then-elses whenever possible
auto mk_ite = [&](expr* c, expr* a, expr* b) {
return (a == b) ? a : m().mk_ite(c, a, b);
};
// Use character code to order conditions
auto get_id = [&](expr* e) {
expr *ch1 = nullptr, *ch2 = nullptr;
unsigned ch;
if (u().is_char_le(e, ch1, ch2) && u().is_const_char(ch2, ch))
return ch;
// Fallback: use expression ID (but use same ID for complement)
re().is_complement(e, e);
return e->get_id();
};
if (m().is_ite(a, ca, a1, a2)) {
expr_ref r1(m()), r2(m());
expr_ref notca(m().mk_not(ca), m());
if (m().is_ite(b, cb, b1, b2)) {
// --- Core logic for combining two BDDs
expr_ref notcb(m().mk_not(cb), m());
if (ca == cb) {
expr_ref r1 = mk_der_op(k, a1, b1);
expr_ref r2 = mk_der_op(k, a2, b2);
r1 = mk_der_op(k, a1, b1);
r2 = mk_der_op(k, a2, b2);
result = mk_ite(ca, r1, r2);
return result;
}
else if (ca->get_id() < cb->get_id()) {
expr_ref r1 = mk_der_op(k, a, b1);
expr_ref r2 = mk_der_op(k, a, b2);
result = mk_ite(cb, r1, r2);
return result;
// Order with higher IDs on the outside
if (get_id(ca) < get_id(cb)) {
std::swap(a, b);
std::swap(ca, cb);
std::swap(notca, notcb);
std::swap(a1, b1);
std::swap(a2, b2);
}
// Simplify if there is a relationship between ca and cb
if (pred_implies(ca, cb)) {
r1 = mk_der_op(k, a1, b1);
}
else if (pred_implies(ca, notcb)) {
r1 = mk_der_op(k, a1, b2);
}
if (pred_implies(notca, cb)) {
r2 = mk_der_op(k, a2, b1);
}
else if (pred_implies(notca, notcb)) {
r2 = mk_der_op(k, a2, b2);
}
// --- End core logic
}
expr_ref r1 = mk_der_op(k, a1, b);
expr_ref r2 = mk_der_op(k, a2, b);
if (!r1) r1 = mk_der_op(k, a1, b);
if (!r2) r2 = mk_der_op(k, a2, b);
result = mk_ite(ca, r1, r2);
return result;
}
Expand Down Expand Up @@ -2501,6 +2598,8 @@ expr_ref seq_rewriter::mk_der_op(decl_kind k, expr* a, expr* b) {
}

expr_ref seq_rewriter::mk_der_compl(expr* r) {
STRACE("seq_verbose", tout << "mk_der_compl: " << mk_pp(r, m())
<< std::endl;);
expr_ref result(m_op_cache.find(OP_RE_COMPLEMENT, r, nullptr), m());
if (!result) {
expr* c = nullptr, * r1 = nullptr, * r2 = nullptr;
Expand All @@ -2514,6 +2613,71 @@ expr_ref seq_rewriter::mk_der_compl(expr* r) {
return result;
}

/*
Make an re_predicate with an arbitrary condition cond, enforcing
derivative normal form on how conditions are written.
Tries to rewrites everything to (ele <= x) constraints:
(ele = a) => ite(ele <= a-1, none, ite(ele <= a, epsilon, none))
(a = ele) => "
(a <= ele) => ite(ele <= a-1, none, epsilon)
(not p) => mk_der_compl(...)
(p and q) => mk_der_inter(...)
(p or q) => mk_der_union(...)
Postcondition: result is in BDD form
*/
expr_ref seq_rewriter::mk_der_cond(expr* cond, expr* ele, sort* seq_sort) {
STRACE("seq_verbose", tout << "mk_der_cond: "
<< mk_pp(cond, m()) << ", " << mk_pp(ele, m()) << std::endl;);
sort *ele_sort = nullptr;
VERIFY(u().is_seq(seq_sort, ele_sort));
SASSERT(ele_sort == m().get_sort(ele));
expr *c1 = nullptr, *c2 = nullptr, *ch1 = nullptr, *ch2 = nullptr;
unsigned ch = 0;
expr_ref result(m()), r1(m()), r2(m());
if (m().is_eq(cond, ch1, ch2)) {
r1 = u().mk_le(ch1, ch2);
r1 = mk_der_cond(r1, ele, seq_sort);
r2 = u().mk_le(ch2, ch1);
r2 = mk_der_cond(r2, ele, seq_sort);
result = mk_der_inter(r1, r2);
}
else if (u().is_char_le(cond, ch1, ch2) &&
u().is_const_char(ch1, ch) && (ch2 == ele)) {
if (ch > 0) {
result = u().mk_char(ch - 1);
result = u().mk_le(ele, result);
result = re_predicate(result, seq_sort);
result = mk_der_compl(result);
}
else {
result = m().mk_true();
result = re_predicate(result, seq_sort);
}
}
else if (m().is_not(cond, c1)) {
result = mk_der_cond(c1, ele, seq_sort);
result = mk_der_compl(result);
}
else if (m().is_and(cond, c1, c2)) {
r1 = mk_der_cond(c1, ele, seq_sort);
r2 = mk_der_cond(c2, ele, seq_sort);
result = mk_der_inter(r1, r2);
}
else if (m().is_or(cond, c1, c2)) {
r1 = mk_der_cond(c1, ele, seq_sort);
r2 = mk_der_cond(c2, ele, seq_sort);
result = mk_der_union(r1, r2);
}
else {
result = re_predicate(cond, seq_sort);
}
STRACE("seq_verbose", tout << "mk_der_cond result: "
<< mk_pp(result, m()) << std::endl;);
return result;
}

expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) {
expr_ref result(m());
sort* seq_sort = nullptr, *ele_sort = nullptr;
Expand Down Expand Up @@ -2588,14 +2752,19 @@ expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) {
expr_ref hd(m()), tl(m());
if (get_head_tail(r1, hd, tl)) {
// head must be equal; if so, derivative is tail
result = re().mk_to_re(tl);
return re_and(m_br.mk_eq_rw(ele, hd), result);
// Use mk_der_cond to normalize
STRACE("seq_verbose", tout << "deriv to_re" << std::endl;);
result = m().mk_eq(ele, hd);
result = mk_der_cond(result, ele, seq_sort);
expr_ref r1(re().mk_to_re(tl), m());
result = mk_der_concat(result, r1);
return result;
}
else if (str().is_empty(r1)) {
return mk_empty();
}
else {
#if 0
else {
hd = str().mk_nth_i(r1, m_autil.mk_int(0));
tl = str().mk_substr(r1, m_autil.mk_int(1), m_autil.mk_sub(str().mk_length(r1), m_autil.mk_int(1)));
result = re().mk_to_re(tl);
Expand All @@ -2604,25 +2773,25 @@ expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) {
mk_empty(),
re_and(m_br.mk_eq_rw(ele, hd), result));
return result;
#else
return expr_ref(re().mk_derivative(ele, r), m());
#endif
}
#endif
}
else if (re().is_reverse(r, r1) && re().is_to_re(r1, r2)) {
// Reverses are rewritten so that the only derivative case is
// derivative of a reverse of a string. (All other cases stuck)
// This is analagous to the previous is_to_re case.
expr_ref hd(m()), tl(m());
if (get_head_tail_reversed(r2, hd, tl)) {
return re_and(m_br.mk_eq_rw(ele, tl), re().mk_reverse(re().mk_to_re(hd)));
// Use mk_der_cond to normalize
STRACE("seq_verbose", tout << "deriv reverse to_re" << std::endl;);
result = m().mk_eq(ele, tl);
result = mk_der_cond(result, ele, seq_sort);
result = mk_der_concat(result, re().mk_reverse(re().mk_to_re(hd)));
return result;
}
else if (str().is_empty(r2)) {
return mk_empty();
}
else {
return expr_ref(re().mk_derivative(ele, r), m());
}
}
else if (re().is_range(r, r1, r2)) {
// r1, r2 are sequences.
Expand All @@ -2631,17 +2800,29 @@ expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) {
if (s1.length() == 1 && s2.length() == 1) {
expr_ref ch1(m_util.mk_char(s1[0]), m());
expr_ref ch2(m_util.mk_char(s2[0]), m());
return mk_der_inter(re_predicate(m_util.mk_le(ch1, ele), seq_sort),
re_predicate(m_util.mk_le(ele, ch2), seq_sort));
// Use mk_der_cond to normalize
STRACE("seq_verbose", tout << "deriv range zstring" << std::endl;);
expr_ref p1(u().mk_le(ch1, ele), m());
p1 = mk_der_cond(p1, ele, seq_sort);
expr_ref p2(u().mk_le(ele, ch2), m());
p2 = mk_der_cond(p2, ele, seq_sort);
result = mk_der_inter(p1, p2);
return result;
}
else {
return mk_empty();
}
}
expr* e1 = nullptr, *e2 = nullptr;
if (str().is_unit(r1, e1) && str().is_unit(r2, e2)) {
return mk_der_inter(re_predicate(m_util.mk_le(e1, ele), seq_sort),
re_predicate(m_util.mk_le(ele, e2), seq_sort));
// Use mk_der_cond to normalize
STRACE("seq_verbose", tout << "deriv range str" << std::endl;);
expr_ref p1(u().mk_le(e1, ele), m());
p1 = mk_der_cond(p1, ele, seq_sort);
expr_ref p2(u().mk_le(ele, e2), m());
p2 = mk_der_cond(p2, ele, seq_sort);
result = mk_der_inter(p1, p2);
return result;
}
}
else if (re().is_full_char(r)) {
Expand All @@ -2651,10 +2832,14 @@ expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) {
array_util array(m());
expr* args[2] = { p, ele };
result = array.mk_select(2, args);
return re_predicate(result, seq_sort);
}
// stuck cases: re().is_derivative, variable, ...
// and re().is_reverse if the reverse is not applied to a string
// Use mk_der_cond to normalize
STRACE("seq_verbose", tout << "deriv of_pred" << std::endl;);
return mk_der_cond(result, ele, seq_sort);
}
// stuck cases: re.derivative, variable,
// str.to_re if the head of the string can't be obtained,
// and re.reverse if not applied to a string or if the tail char
// of the string can't be obtained
return expr_ref(re().mk_derivative(ele, r), m());
}

Expand Down Expand Up @@ -2821,6 +3006,9 @@ Disabled rewrite:
*/
br_status seq_rewriter::mk_str_in_regexp(expr* a, expr* b, expr_ref& result) {

STRACE("seq_verbose", tout << "mk_str_in_regexp: " << mk_pp(a, m())
<< ", " << mk_pp(b, m()) << std::endl;);

if (re().is_empty(b)) {
result = m().mk_false();
return BR_DONE;
Expand Down Expand Up @@ -4107,6 +4295,7 @@ seq_rewriter::op_cache::op_cache(ast_manager& m):
expr* seq_rewriter::op_cache::find(decl_kind op, expr* a, expr* b) {
op_entry e(op, a, b, nullptr);
m_table.find(e, e);

return e.r;
}

Expand All @@ -4122,5 +4311,6 @@ void seq_rewriter::op_cache::cleanup() {
if (m_table.size() >= m_max_cache_size) {
m_trail.reset();
m_table.reset();
STRACE("seq_verbose", tout << "Derivative op cache reset" << std::endl;);
}
}
Loading

0 comments on commit 5664b57

Please sign in to comment.