From 2e676ff2bd37cae8806ba07f11aa2480d5cea99c Mon Sep 17 00:00:00 2001 From: calebstanford-msr Date: Tue, 9 Jun 2020 16:39:43 -0400 Subject: [PATCH 01/51] std::cout debugging statements --- src/ast/rewriter/seq_rewriter.cpp | 8 ++++++++ src/smt/seq_regex.cpp | 8 ++++++++ 2 files changed, 16 insertions(+) diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp index 4dde8e4c283..d2726b91132 100644 --- a/src/ast/rewriter/seq_rewriter.cpp +++ b/src/ast/rewriter/seq_rewriter.cpp @@ -2181,6 +2181,7 @@ expr_ref seq_rewriter::re_predicate(expr* cond, sort* seq_sort) { } expr_ref seq_rewriter::is_nullable_rec(expr* r) { + std::cout << "n"; expr_ref result(m_op_cache.find(_OP_RE_IS_NULLABLE, r, nullptr), m()); if (!result) { result = is_nullable(r); @@ -2364,6 +2365,7 @@ br_status seq_rewriter::mk_re_derivative(expr* ele, expr* r, expr_ref& result) { Duplicate nested conditions are eliminated. */ expr_ref seq_rewriter::mk_derivative(expr* ele, expr* r) { + std::cout << "d"; expr_ref result(m_op_cache.find(OP_RE_DERIVATIVE, ele, r), m()); if (!result) { result = mk_derivative_rec(ele, r); @@ -2449,6 +2451,7 @@ expr_ref seq_rewriter::mk_der_op_rec(decl_kind k, expr* a, expr* b) { } expr_ref seq_rewriter::mk_der_op(decl_kind k, expr* a, expr* b) { + std::cout << "."; // Recursive call expr_ref _a(a, m()), _b(b, m()); expr_ref result(m()); switch (k) { @@ -2476,6 +2479,7 @@ expr_ref seq_rewriter::mk_der_op(decl_kind k, expr* a, expr* b) { } expr_ref seq_rewriter::mk_der_compl(expr* r) { + std::cout << "."; // Recursive call expr_ref result(m_op_cache.find(OP_RE_COMPLEMENT, r, nullptr), m()); if (!result) { expr* c = nullptr, * r1 = nullptr, * r2 = nullptr; @@ -2490,6 +2494,7 @@ expr_ref seq_rewriter::mk_der_compl(expr* r) { } expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) { + std::cout << "."; // Recursive call expr_ref result(m()); sort* seq_sort = nullptr, *ele_sort = nullptr; VERIFY(m_util.is_re(r, seq_sort)); @@ -4069,6 +4074,9 @@ seq_rewriter::op_cache::op_cache(ast_manager& m): expr* seq_rewriter::op_cache::find(decl_kind op, expr* a, expr* b) { op_entry e(op, a, b, nullptr); m_table.find(e, e); + if (!(e.r)) { + std::cout << "!"; // Cache miss + } return e.r; } diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp index e667d0f5b96..be2aa91067b 100644 --- a/src/smt/seq_regex.cpp +++ b/src/smt/seq_regex.cpp @@ -101,6 +101,7 @@ namespace smt { expr* e = ctx.bool_var2expr(lit.var()); VERIFY(str().is_in_re(e, s, r)); + std::cout << "PI "; TRACE("seq", tout << "propagate " << mk_pp(e, m) << "\n";); // convert negative negative membership literals to positive @@ -142,6 +143,7 @@ namespace smt { } void seq_regex::propagate_accept(literal lit) { + std::cout << "PA "; if (!propagate(lit)) m_to_propagate.push_back(lit); } @@ -167,6 +169,7 @@ namespace smt { unsigned idx = 0; VERIFY(sk().is_accept(e, s, i, idx, r)); + std::cout << "P "; TRACE("seq", tout << "propagate " << mk_pp(e, m) << "\n";); if (re().is_empty(r)) { @@ -356,12 +359,14 @@ namespace smt { with optimizations for if-then-else expressions involving the head. */ expr_ref seq_regex::derivative_wrapper(expr* hd, expr* r) { + std::cout << "D "; expr_ref result = expr_ref(re().mk_derivative(hd, r), m); rewrite(result); return result; } void seq_regex::propagate_eq(expr* r1, expr* r2) { + std::cout << "PEQ "; expr_ref r = symmetric_diff(r1, r2); expr_ref emp(re().mk_empty(m.get_sort(r)), m); expr_ref is_empty = sk().mk_is_empty(r, emp); @@ -369,6 +374,7 @@ namespace smt { } void seq_regex::propagate_ne(expr* r1, expr* r2) { + std::cout << "PNEQ "; expr_ref r = symmetric_diff(r1, r2); expr_ref emp(re().mk_empty(m.get_sort(r)), m); expr_ref is_non_empty = sk().mk_is_non_empty(r, emp); @@ -393,6 +399,7 @@ namespace smt { * */ void seq_regex::propagate_is_non_empty(literal lit) { + std::cout << "PN "; expr* e = ctx.bool_var2expr(lit.var()), *r = nullptr, *u = nullptr; VERIFY(sk().is_is_non_empty(e, r, u)); expr_ref is_nullable = seq_rw().is_nullable(r); @@ -448,6 +455,7 @@ namespace smt { is_empty(r, u) is true if r is a member of u */ void seq_regex::propagate_is_empty(literal lit) { + std::cout << "PE "; expr* e = ctx.bool_var2expr(lit.var()), *r = nullptr, *u = nullptr; VERIFY(sk().is_is_empty(e, r, u)); expr_ref is_nullable = seq_rw().is_nullable(r); From 03b05f2781d3d5b0afd41dabf506fff311b7cab0 Mon Sep 17 00:00:00 2001 From: calebstanford-msr Date: Tue, 9 Jun 2020 18:23:02 -0400 Subject: [PATCH 02/51] comment out std::cout debugging as this is now a shared fork --- src/ast/rewriter/seq_rewriter.cpp | 18 ++++++++++-------- src/smt/seq_regex.cpp | 16 ++++++++-------- 2 files changed, 18 insertions(+), 16 deletions(-) diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp index d2726b91132..81521507950 100644 --- a/src/ast/rewriter/seq_rewriter.cpp +++ b/src/ast/rewriter/seq_rewriter.cpp @@ -2181,12 +2181,13 @@ expr_ref seq_rewriter::re_predicate(expr* cond, sort* seq_sort) { } expr_ref seq_rewriter::is_nullable_rec(expr* r) { - std::cout << "n"; + // std::cout << "n"; expr_ref result(m_op_cache.find(_OP_RE_IS_NULLABLE, r, nullptr), m()); if (!result) { result = is_nullable(r); m_op_cache.insert(_OP_RE_IS_NULLABLE, r, nullptr, result); } + // std::cout << " "; return result; } @@ -2365,12 +2366,13 @@ br_status seq_rewriter::mk_re_derivative(expr* ele, expr* r, expr_ref& result) { Duplicate nested conditions are eliminated. */ expr_ref seq_rewriter::mk_derivative(expr* ele, expr* r) { - std::cout << "d"; + // std::cout << "d"; expr_ref result(m_op_cache.find(OP_RE_DERIVATIVE, ele, r), m()); if (!result) { result = mk_derivative_rec(ele, r); m_op_cache.insert(OP_RE_DERIVATIVE, ele, r, result); } + // std::cout << " "; return result; } @@ -2451,7 +2453,7 @@ expr_ref seq_rewriter::mk_der_op_rec(decl_kind k, expr* a, expr* b) { } expr_ref seq_rewriter::mk_der_op(decl_kind k, expr* a, expr* b) { - std::cout << "."; // Recursive call + // std::cout << "."; // Recursive call expr_ref _a(a, m()), _b(b, m()); expr_ref result(m()); switch (k) { @@ -2479,7 +2481,7 @@ expr_ref seq_rewriter::mk_der_op(decl_kind k, expr* a, expr* b) { } expr_ref seq_rewriter::mk_der_compl(expr* r) { - std::cout << "."; // Recursive call + // std::cout << "."; // Recursive call expr_ref result(m_op_cache.find(OP_RE_COMPLEMENT, r, nullptr), m()); if (!result) { expr* c = nullptr, * r1 = nullptr, * r2 = nullptr; @@ -2494,7 +2496,7 @@ expr_ref seq_rewriter::mk_der_compl(expr* r) { } expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) { - std::cout << "."; // Recursive call + // std::cout << "."; // Recursive call expr_ref result(m()); sort* seq_sort = nullptr, *ele_sort = nullptr; VERIFY(m_util.is_re(r, seq_sort)); @@ -4074,9 +4076,9 @@ seq_rewriter::op_cache::op_cache(ast_manager& m): expr* seq_rewriter::op_cache::find(decl_kind op, expr* a, expr* b) { op_entry e(op, a, b, nullptr); m_table.find(e, e); - if (!(e.r)) { - std::cout << "!"; // Cache miss - } + // if (!(e.r)) { + // std::cout << "!"; // Cache miss + // } return e.r; } diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp index be2aa91067b..1ca53fcdf26 100644 --- a/src/smt/seq_regex.cpp +++ b/src/smt/seq_regex.cpp @@ -101,7 +101,7 @@ namespace smt { expr* e = ctx.bool_var2expr(lit.var()); VERIFY(str().is_in_re(e, s, r)); - std::cout << "PI "; + // std::cout << "PI "; TRACE("seq", tout << "propagate " << mk_pp(e, m) << "\n";); // convert negative negative membership literals to positive @@ -143,7 +143,7 @@ namespace smt { } void seq_regex::propagate_accept(literal lit) { - std::cout << "PA "; + // std::cout << "PA "; if (!propagate(lit)) m_to_propagate.push_back(lit); } @@ -169,7 +169,7 @@ namespace smt { unsigned idx = 0; VERIFY(sk().is_accept(e, s, i, idx, r)); - std::cout << "P "; + // std::cout << "P "; TRACE("seq", tout << "propagate " << mk_pp(e, m) << "\n";); if (re().is_empty(r)) { @@ -359,14 +359,14 @@ namespace smt { with optimizations for if-then-else expressions involving the head. */ expr_ref seq_regex::derivative_wrapper(expr* hd, expr* r) { - std::cout << "D "; + // std::cout << "D "; expr_ref result = expr_ref(re().mk_derivative(hd, r), m); rewrite(result); return result; } void seq_regex::propagate_eq(expr* r1, expr* r2) { - std::cout << "PEQ "; + // std::cout << "PEQ "; expr_ref r = symmetric_diff(r1, r2); expr_ref emp(re().mk_empty(m.get_sort(r)), m); expr_ref is_empty = sk().mk_is_empty(r, emp); @@ -374,7 +374,7 @@ namespace smt { } void seq_regex::propagate_ne(expr* r1, expr* r2) { - std::cout << "PNEQ "; + // std::cout << "PNEQ "; expr_ref r = symmetric_diff(r1, r2); expr_ref emp(re().mk_empty(m.get_sort(r)), m); expr_ref is_non_empty = sk().mk_is_non_empty(r, emp); @@ -399,7 +399,7 @@ namespace smt { * */ void seq_regex::propagate_is_non_empty(literal lit) { - std::cout << "PN "; + // std::cout << "PN "; expr* e = ctx.bool_var2expr(lit.var()), *r = nullptr, *u = nullptr; VERIFY(sk().is_is_non_empty(e, r, u)); expr_ref is_nullable = seq_rw().is_nullable(r); @@ -455,7 +455,7 @@ namespace smt { is_empty(r, u) is true if r is a member of u */ void seq_regex::propagate_is_empty(literal lit) { - std::cout << "PE "; + // std::cout << "PE "; expr* e = ctx.bool_var2expr(lit.var()), *r = nullptr, *u = nullptr; VERIFY(sk().is_is_empty(e, r, u)); expr_ref is_nullable = seq_rw().is_nullable(r); From 231f0d65936691c959c4746b42c054097564643c Mon Sep 17 00:00:00 2001 From: calebstanford-msr Date: Fri, 12 Jun 2020 12:54:27 -0400 Subject: [PATCH 03/51] convert std::cout to TRACE statements for seq_rewriter and seq_regex --- src/ast/rewriter/seq_rewriter.cpp | 21 +++++++++--------- src/smt/seq_regex.cpp | 36 +++++++++++++++++++------------ 2 files changed, 33 insertions(+), 24 deletions(-) diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp index 5fd17730d55..24085632ff0 100644 --- a/src/ast/rewriter/seq_rewriter.cpp +++ b/src/ast/rewriter/seq_rewriter.cpp @@ -2182,13 +2182,13 @@ expr_ref seq_rewriter::re_predicate(expr* cond, sort* seq_sort) { } expr_ref seq_rewriter::is_nullable_rec(expr* r) { - // std::cout << "n"; + STRACE("seq_regex_verbose", tout << "nullable";); expr_ref result(m_op_cache.find(_OP_RE_IS_NULLABLE, r, nullptr), m()); if (!result) { result = is_nullable(r); m_op_cache.insert(_OP_RE_IS_NULLABLE, r, nullptr, result); } - // std::cout << " "; + STRACE("seq_regex_verbose", tout << std::endl;); return result; } @@ -2367,13 +2367,13 @@ br_status seq_rewriter::mk_re_derivative(expr* ele, expr* r, expr_ref& result) { Duplicate nested conditions are eliminated. */ expr_ref seq_rewriter::mk_derivative(expr* ele, expr* r) { - // std::cout << "d"; + STRACE("seq_regex_verbose", tout << "derivative";); expr_ref result(m_op_cache.find(OP_RE_DERIVATIVE, ele, r), m()); if (!result) { result = mk_derivative_rec(ele, r); m_op_cache.insert(OP_RE_DERIVATIVE, ele, r, result); } - // std::cout << " "; + STRACE("seq_regex_verbose", tout << std::endl;); return result; } @@ -2454,7 +2454,6 @@ expr_ref seq_rewriter::mk_der_op_rec(decl_kind k, expr* a, expr* b) { } expr_ref seq_rewriter::mk_der_op(decl_kind k, expr* a, expr* b) { - // std::cout << "."; // Recursive call expr_ref _a(a, m()), _b(b, m()); expr_ref result(m()); switch (k) { @@ -2482,7 +2481,7 @@ expr_ref seq_rewriter::mk_der_op(decl_kind k, expr* a, expr* b) { } expr_ref seq_rewriter::mk_der_compl(expr* r) { - // std::cout << "."; // Recursive call + STRACE("seq_regex_verbose", tout << " (rec)";); expr_ref result(m_op_cache.find(OP_RE_COMPLEMENT, r, nullptr), m()); if (!result) { expr* c = nullptr, * r1 = nullptr, * r2 = nullptr; @@ -2497,7 +2496,7 @@ expr_ref seq_rewriter::mk_der_compl(expr* r) { } expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) { - // std::cout << "."; // Recursive call + STRACE("seq_regex_verbose", tout << " (rec)";); expr_ref result(m()); sort* seq_sort = nullptr, *ele_sort = nullptr; VERIFY(m_util.is_re(r, seq_sort)); @@ -4077,9 +4076,11 @@ seq_rewriter::op_cache::op_cache(ast_manager& m): expr* seq_rewriter::op_cache::find(decl_kind op, expr* a, expr* b) { op_entry e(op, a, b, nullptr); m_table.find(e, e); - // if (!(e.r)) { - // std::cout << "!"; // Cache miss - // } + + if (!(e.r)) { + STRACE("seq_regex_verbose", tout << " (cache miss)";); + } + return e.r; } diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp index 8155af79046..f61a1f02cb3 100644 --- a/src/smt/seq_regex.cpp +++ b/src/smt/seq_regex.cpp @@ -101,8 +101,7 @@ namespace smt { expr* e = ctx.bool_var2expr(lit.var()); VERIFY(str().is_in_re(e, s, r)); - // std::cout << "PI "; - TRACE("seq", tout << "propagate " << mk_pp(e, m) << "\n";); + TRACE("seq_regex", tout << "propagate in RE: " << mk_pp(e, m) << std::endl;); // convert negative negative membership literals to positive // ~(s in R) => s in C(R) @@ -143,7 +142,7 @@ namespace smt { } void seq_regex::propagate_accept(literal lit) { - // std::cout << "PA "; + TRACE("seq_regex", tout << "propagate accept" << std::endl;); if (!propagate(lit)) m_to_propagate.push_back(lit); } @@ -169,8 +168,7 @@ namespace smt { unsigned idx = 0; VERIFY(sk().is_accept(e, s, i, idx, r)); - // std::cout << "P "; - TRACE("seq", tout << "propagate " << mk_pp(e, m) << "\n";); + TRACE("seq_regex", tout << "propagate: " << mk_pp(e, m) << std::endl;); if (re().is_empty(r)) { th.add_axiom(~lit); @@ -195,6 +193,9 @@ namespace smt { void seq_regex::propagate_nullable(literal lit, expr* e, expr* s, unsigned idx, expr* r) { expr_ref is_nullable = seq_rw().is_nullable(r); rewrite(is_nullable); + + TRACE("seq_regex", tout << "propagate nullable: " << mk_pp(r, m) << std::endl;); + literal len_s_ge_i = th.m_ax.mk_ge(th.mk_len(s), idx); if (m.is_true(is_nullable)) { th.propagate_lit(nullptr, 1,&lit, len_s_ge_i); @@ -225,11 +226,13 @@ namespace smt { expr_ref d(m); expr_ref head = th.mk_nth(s, i); + TRACE("seq_regex", tout << "propagate derivative: " << mk_pp(r, m) << std::endl;); + d = derivative_wrapper(m.mk_var(0, m.get_sort(head)), r); // timer tm; - // std::cout << d->get_id() << " " << tm.get_seconds() << "\n"; + // std::cout << d->get_id() << " " << tm.get_seconds() << std::endl; //if (tm.get_seconds() > 0.3) - // std::cout << d << "\n"; + // std::cout << d << std::endl; // std::cout.flush(); literal_vector conds; conds.push_back(~lit); @@ -285,7 +288,7 @@ namespace smt { if (!re().is_empty(d)) conds.push_back(th.mk_literal(sk().mk_accept(s, a().mk_int(idx + 1), d))); th.add_axiom(conds); - TRACE("seq", tout << "unfold " << head << "\n" << mk_pp(r, m) << "\n";); + TRACE("seq_regex", tout << "unfold " << head << std::endl << mk_pp(r, m) << std::endl;); return true; } @@ -327,7 +330,7 @@ namespace smt { th.m_trail_stack.push(vector_value_trail(m_s_in_re, i)); m_s_in_re[i].m_active = false; IF_VERBOSE(11, verbose_stream() << "Intersect " << regex << " " << - mk_pp(entry.m_re, m) << " " << mk_pp(s, m) << " " << mk_pp(entry.m_s, m) << "\n";); + mk_pp(entry.m_re, m) << " " << mk_pp(s, m) << " " << mk_pp(entry.m_s, m) << std::endl;); regex = re().mk_inter(entry.m_re, regex); rewrite(regex); lits.push_back(~entry.m_lit); @@ -362,14 +365,16 @@ namespace smt { with optimizations for if-then-else expressions involving the head. */ expr_ref seq_regex::derivative_wrapper(expr* hd, expr* r) { - // std::cout << "D "; + STRACE("seq_regex", tout << "derivative: " << mk_pp(r, m) << std::endl;); expr_ref result = expr_ref(re().mk_derivative(hd, r), m); rewrite(result); + STRACE("seq_regex", tout << "derivative result: " << mk_pp(result, m) << std::endl;); + // IF_VERBOSE(10, verbose_stream() << std::endl << "Calculated derivative of: " << expr_ref(r, m) << " was: " << result << std::endl;); return result; } void seq_regex::propagate_eq(expr* r1, expr* r2) { - // std::cout << "PEQ "; + TRACE("seq_regex", tout << "propagate EQ: " << mk_pp(r1, m) << ", " << mk_pp(r2, m) << std::endl;); expr_ref r = symmetric_diff(r1, r2); expr_ref emp(re().mk_empty(m.get_sort(r)), m); expr_ref is_empty = sk().mk_is_empty(r, emp); @@ -377,7 +382,7 @@ namespace smt { } void seq_regex::propagate_ne(expr* r1, expr* r2) { - // std::cout << "PNEQ "; + TRACE("seq_regex", tout << "propagate NEQ: " << mk_pp(r1, m) << ", " << mk_pp(r2, m) << std::endl;); expr_ref r = symmetric_diff(r1, r2); expr_ref emp(re().mk_empty(m.get_sort(r)), m); expr_ref is_non_empty = sk().mk_is_non_empty(r, emp); @@ -402,9 +407,11 @@ namespace smt { * */ void seq_regex::propagate_is_non_empty(literal lit) { - // std::cout << "PN "; expr* e = ctx.bool_var2expr(lit.var()), *r = nullptr, *u = nullptr; VERIFY(sk().is_is_non_empty(e, r, u)); + + TRACE("seq_regex", tout << "propagate nonempty: " << mk_pp(e, m) << std::endl;); + expr_ref is_nullable = seq_rw().is_nullable(r); rewrite(is_nullable); if (m.is_true(is_nullable)) @@ -458,9 +465,10 @@ namespace smt { is_empty(r, u) is true if r is a member of u */ void seq_regex::propagate_is_empty(literal lit) { - // std::cout << "PE "; expr* e = ctx.bool_var2expr(lit.var()), *r = nullptr, *u = nullptr; VERIFY(sk().is_is_empty(e, r, u)); + TRACE("seq_regex", tout << "propagate empty: " << mk_pp(e, m) << std::endl;); + expr_ref is_nullable = seq_rw().is_nullable(r); rewrite(is_nullable); if (m.is_true(is_nullable)) { From fb7ffe96fd691786c5859a90972d31f939c46891 Mon Sep 17 00:00:00 2001 From: calebstanford-msr Date: Fri, 12 Jun 2020 20:34:02 -0400 Subject: [PATCH 04/51] add cases to min_length and max_length for regexes --- src/ast/seq_decl_plugin.cpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/ast/seq_decl_plugin.cpp b/src/ast/seq_decl_plugin.cpp index a33b5a254ae..fccc8a09266 100644 --- a/src/ast/seq_decl_plugin.cpp +++ b/src/ast/seq_decl_plugin.cpp @@ -1327,6 +1327,12 @@ unsigned seq_util::re::min_length(expr* r) const { return u.max_mul(lo, min_length(r1)); if (is_to_re(r, s)) return u.str.min_length(s); + if (is_reverse(r, s) || is_plus(r, s)) + return min_length(s); + if (is_range(r) || is_of_pred(r) || is_full_char(r)) + return 1; + if (is_empty(r)) + return UINT_MAX; return 0; } @@ -1350,6 +1356,12 @@ unsigned seq_util::re::max_length(expr* r) const { return u.max_mul(hi, max_length(r1)); if (is_to_re(r, s)) return u.str.max_length(s); + if (is_reverse(r, s) || is_plus(r, s)) + return max_length(s); + if (is_range(r) || is_of_pred(r) || is_full_char(r)) + return 1; + if (is_empty(r)) + return 0; return UINT_MAX; } From 20962e2332f99d1260b0dd661088e63e682eec0b Mon Sep 17 00:00:00 2001 From: calebstanford-msr Date: Fri, 12 Jun 2020 20:34:37 -0400 Subject: [PATCH 05/51] bug fix --- src/smt/seq_regex.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp index f61a1f02cb3..120c1ae8b12 100644 --- a/src/smt/seq_regex.cpp +++ b/src/smt/seq_regex.cpp @@ -424,6 +424,7 @@ namespace smt { lits.push_back(~lit); if (null_lit != false_literal) lits.push_back(null_lit); + expr_ref_pair_vector cofactors(m); get_cofactors(d, cofactors); for (auto const& p : cofactors) { @@ -453,8 +454,8 @@ namespace smt { conds.pop_back(); } else { - cond = mk_and(conds); - result.push_back(cond, r); + expr_ref conj = mk_and(conds); + result.push_back(conj, r); } } From 8b129ce4d0470246d9a47e012eac1d574f22e7d6 Mon Sep 17 00:00:00 2001 From: calebstanford-msr Date: Tue, 16 Jun 2020 11:56:34 -0400 Subject: [PATCH 06/51] update min_length and max_length functions for REs --- src/ast/seq_decl_plugin.cpp | 40 ++++++++++++++----------------------- 1 file changed, 15 insertions(+), 25 deletions(-) diff --git a/src/ast/seq_decl_plugin.cpp b/src/ast/seq_decl_plugin.cpp index fccc8a09266..e6a970fd4c8 100644 --- a/src/ast/seq_decl_plugin.cpp +++ b/src/ast/seq_decl_plugin.cpp @@ -1313,26 +1313,21 @@ unsigned seq_util::re::min_length(expr* r) const { unsigned lo = 0, hi = 0; if (is_empty(r)) return UINT_MAX; - if (is_concat(r, r1, r2)) + if (is_concat(r, r1, r2)) return u.max_plus(min_length(r1), min_length(r2)); - if (m.is_ite(r, s, r1, r2)) + if (is_union(r, r1, r2) || m.is_ite(r, s, r1, r2)) return std::min(min_length(r1), min_length(r2)); - if (is_diff(r, r1, r2)) - return min_length(r1); - if (is_union(r, r1, r2)) - return std::min(min_length(r1), min_length(r2)); - if (is_intersection(r, r1, r2)) + if (is_intersection(r, r1, r2)) return std::max(min_length(r1), min_length(r2)); - if (is_loop(r, r1, lo, hi)) + if (is_diff(r, r1, r2) || is_reverse(r, r1) || is_plus(r, r1)) + return min_length(r1); + if (is_loop(r, r1, lo) || is_loop(r, r1, lo, hi)) return u.max_mul(lo, min_length(r1)); - if (is_to_re(r, s)) + if (is_to_re(r, s)) return u.str.min_length(s); - if (is_reverse(r, s) || is_plus(r, s)) - return min_length(s); if (is_range(r) || is_of_pred(r) || is_full_char(r)) return 1; - if (is_empty(r)) - return UINT_MAX; + // Else: star, option, complement, full_seq, derivative return 0; } @@ -1342,26 +1337,21 @@ unsigned seq_util::re::max_length(expr* r) const { unsigned lo = 0, hi = 0; if (is_empty(r)) return 0; - if (is_concat(r, r1, r2)) + if (is_concat(r, r1, r2)) return u.max_plus(max_length(r1), max_length(r2)); - if (m.is_ite(r, s, r1, r2)) + if (is_union(r, r1, r2) || m.is_ite(r, s, r1, r2)) return std::max(max_length(r1), max_length(r2)); - if (is_diff(r, r1, r2)) - return max_length(r1); - if (is_union(r, r1, r2)) - return std::max(max_length(r1), max_length(r2)); - if (is_intersection(r, r1, r2)) + if (is_intersection(r, r1, r2)) return std::min(max_length(r1), max_length(r2)); + if (is_diff(r, r1, r2) || is_reverse(r, r1) || is_opt(r, r1)) + return max_length(r1); if (is_loop(r, r1, lo, hi)) return u.max_mul(hi, max_length(r1)); - if (is_to_re(r, s)) + if (is_to_re(r, s)) return u.str.max_length(s); - if (is_reverse(r, s) || is_plus(r, s)) - return max_length(s); if (is_range(r) || is_of_pred(r) || is_full_char(r)) return 1; - if (is_empty(r)) - return 0; + // Else: star, plus, complement, full_seq, loop(r,r1,lo), derivative return UINT_MAX; } From a98ca80b00bfca1c781f0602ba56fb3fe041a887 Mon Sep 17 00:00:00 2001 From: calebstanford-msr Date: Tue, 16 Jun 2020 20:46:18 -0400 Subject: [PATCH 07/51] initial pass on simplifying derivative normal forms by eliminating redundant predicates locally --- src/ast/rewriter/seq_rewriter.cpp | 126 +++++++++++++++++++++++++----- src/ast/rewriter/seq_rewriter.h | 4 + 2 files changed, 110 insertions(+), 20 deletions(-) diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp index 3166b1f5ab3..1cf421bacbb 100644 --- a/src/ast/rewriter/seq_rewriter.cpp +++ b/src/ast/rewriter/seq_rewriter.cpp @@ -2389,11 +2389,70 @@ expr_ref seq_rewriter::mk_der_concat(expr* r1, expr* r2) { return mk_der_op(OP_RE_CONCAT, r1, r2); } +/* + Utility functions to decide char <, ==, and <=. + Return true if deduced, false if unknown. +*/ +bool seq_rewriter::lt_char(expr* ch1, expr* ch2) { + unsigned u1, u2; + return (m_util.is_const_char(ch1, u1) && + m_util.is_const_char(ch2, u2) && + (u1 < u2)); +} +bool seq_rewriter::eq_char(expr* ch1, expr* ch2) { + unsigned u1, u2; + return ((ch1 == ch2) || ( + m_util.is_const_char(ch1, u1) && + m_util.is_const_char(ch2, u2) && + (u1 == u2) + )); +} +bool seq_rewriter::le_char(expr* ch1, expr* ch2) { + return (eq_char(ch1, ch2) || lt_char(ch1, ch2)); +} + +/* + Utility function to decide if a simple predicate (ones that appear + as the conditions in if-then-else expressions in derivatives) + implies another. + + Return true if we deduce that a implies b, false if unknown. + + Current cases handled: + - a and b are char <= constraints, or negations of char <= constraints +*/ +bool seq_rewriter::pred_implies(expr* a, expr* b) { + expr *cha1 = nullptr, *cha2 = nullptr, *nota = nullptr, + *chb1 = nullptr, *chb2 = nullptr, *notb = nullptr; + if (m().is_not(a, nota) && + m().is_not(b, notb)) { + return pred_implies(notb, nota); + } + else if (m_util.is_char_le(a, cha1, cha2) && + m_util.is_char_le(b, chb1, chb2)) { + return (le_char(chb1, cha1) && le_char(cha2, chb2)); + } + else if (m_util.is_char_le(a, cha1, cha2) && + m().is_not(b, notb) && + m_util.is_char_le(notb, chb1, chb2)) { + return ((le_char(chb2, cha1) && lt_char(cha2, chb1)) || + (lt_char(chb2, cha1) && le_char(cha2, chb1))); + } + else if (m_util.is_char_le(b, chb1, chb2) && + m().is_not(a, nota) && + m_util.is_char_le(nota, cha1, cha2)) { + return (le_char(chb1, cha2) && le_char(cha1, chb2)); + } + else { + return false; + } +} + /* Apply a binary operation, preserving BDD normal form on derivative expressions. Preconditions: - - k is a binary op code on REs (concat, intersection, or union) + - k is a binary op codes on REs: one of concat, intersection, or union - a and b are in BDD form Postcondition: @@ -2406,23 +2465,43 @@ expr_ref seq_rewriter::mk_der_op_rec(decl_kind k, expr* a, expr* b) { auto mk_ite = [&](expr* c, expr* a, expr* b) { return (a == b) ? a : m().mk_ite(c, a, b); }; + // TODO + // auto get_id = [&](expr* e) { re().is_complement(e, e); return e->get_id(); }; if (m().is_ite(a, ca, a1, a2)) { + expr_ref r1(m()), r2(m()); if (m().is_ite(b, cb, b1, b2)) { + // --- Core logic for combining two BDDs if (ca == cb) { expr_ref r1 = mk_der_op(k, a1, b1); expr_ref r2 = mk_der_op(k, a2, b2); result = mk_ite(ca, r1, r2); return result; } - else if (ca->get_id() < cb->get_id()) { - expr_ref r1 = mk_der_op(k, a, b1); - expr_ref r2 = mk_der_op(k, a, b2); - result = mk_ite(cb, r1, r2); - return result; + // Order with higher IDs on the outside + if (ca->get_id() < cb->get_id()) { + std::swap(a, b); + std::swap(ca, cb); + std::swap(a1, b1); + std::swap(a2, b2); + } + // Simplify if there is a relationship between ca and cb + if (pred_implies(ca, cb)) { + r1 = mk_der_op(k, a1, b1); + } + else if (pred_implies(ca, expr_ref(m().mk_not(cb), m()))) { + r1 = mk_der_op(k, a1, b2); + } + if (pred_implies(expr_ref(m().mk_not(ca), m()), cb)) { + r2 = mk_der_op(k, a2, b1); + } + else if (pred_implies(expr_ref(m().mk_not(ca), m()), + expr_ref(m().mk_not(cb), m()))) { + r2 = mk_der_op(k, a2, b2); } + // --- End core logic } - expr_ref r1 = mk_der_op(k, a1, b); - expr_ref r2 = mk_der_op(k, a2, b); + if (!r1) r1 = mk_der_op(k, a1, b); + if (!r2) r2 = mk_der_op(k, a2, b); result = mk_ite(ca, r1, r2); return result; } @@ -2539,7 +2618,7 @@ expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) { return mk_der_inter(mk_derivative(ele, r1), mk_der_compl(mk_derivative(ele, r2))); } else if (m().is_ite(r, p, r1, r2)) { - // there is no BDD normalization here + // Note: there is no BDD normalization here result = m().mk_ite(p, mk_derivative(ele, r1), mk_derivative(ele, r2)); return result; } @@ -2574,13 +2653,18 @@ expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) { expr_ref hd(m()), tl(m()); if (get_head_tail(r1, hd, tl)) { // head must be equal; if so, derivative is tail - return re_and(m_br.mk_eq_rw(ele, hd), re().mk_to_re(tl)); + // Write 'head is equal' as a range constraint: + // (ele <= hd) and (hd <= ele) + return mk_der_inter( + re_and(m_util.mk_le(ele, hd), re().mk_to_re(tl)), + re_and(m_util.mk_le(hd, ele), re().mk_to_re(tl)) + ); } else if (str().is_empty(r1)) { return mk_empty(); } - else { #if 0 + else { hd = str().mk_nth_i(r1, m_autil.mk_int(0)); tl = str().mk_substr(r1, m_autil.mk_int(1), m_autil.mk_sub(str().mk_length(r1), m_autil.mk_int(1))); result = @@ -2588,10 +2672,8 @@ expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) { mk_empty(), re_and(m_br.mk_eq_rw(ele, hd), re().mk_to_re(tl))); return result; -#else - return expr_ref(re().mk_derivative(ele, r), m()); -#endif } +#endif } else if (re().is_reverse(r, r1) && re().is_to_re(r1, r2)) { // Reverses are rewritten so that the only derivative case is @@ -2599,14 +2681,16 @@ expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) { // This is analagous to the previous is_to_re case. expr_ref hd(m()), tl(m()); if (get_head_tail_reversed(r2, hd, tl)) { - return re_and(m_br.mk_eq_rw(ele, tl), re().mk_reverse(re().mk_to_re(hd))); + // Write 'tail is equal' as a range constraint: + // (ele <= tl) and (tl <= ele) + return mk_der_inter( + re_and(m_util.mk_le(ele, tl), re().mk_reverse(re().mk_to_re(hd))), + re_and(m_util.mk_le(tl, ele), re().mk_reverse(re().mk_to_re(hd))) + ); } else if (str().is_empty(r2)) { return mk_empty(); } - else { - return expr_ref(re().mk_derivative(ele, r), m()); - } } else if (re().is_range(r, r1, r2)) { // r1, r2 are sequences. @@ -2637,8 +2721,10 @@ expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) { result = array.mk_select(2, args); return re_predicate(result, seq_sort); } - // stuck cases: re().is_derivative, variable, ... - // and re().is_reverse if the reverse is not applied to a string + // stuck cases: is_derivative, variable, + // str.to_re if it can't be simplified into a head character and tail + // and re().is_reverse if the reverse is not applied to a string thta + // can be coerced into a tail character and a head return expr_ref(re().mk_derivative(ele, r), m()); } diff --git a/src/ast/rewriter/seq_rewriter.h b/src/ast/rewriter/seq_rewriter.h index 1cba724442e..6b9d88a3a38 100644 --- a/src/ast/rewriter/seq_rewriter.h +++ b/src/ast/rewriter/seq_rewriter.h @@ -194,6 +194,10 @@ class seq_rewriter { expr_ref mk_der_compl(expr* a); expr_ref mk_der_reverse(expr* a); + bool lt_char(expr* ch1, expr* ch2); + bool eq_char(expr* ch1, expr* ch2); + bool le_char(expr* ch1, expr* ch2); + bool pred_implies(expr* a, expr* b); bool are_complements(expr* r1, expr* r2) const; bool is_subset(expr* r1, expr* r2) const; From 42cb8b6874f646c728c83c7996bba15776a4db4a Mon Sep 17 00:00:00 2001 From: calebstanford-msr Date: Wed, 17 Jun 2020 13:07:27 -0400 Subject: [PATCH 08/51] add seq_regex_brief trace statements --- src/smt/seq_regex.cpp | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp index 6a00fb0ab64..8a64872f5fa 100644 --- a/src/smt/seq_regex.cpp +++ b/src/smt/seq_regex.cpp @@ -102,6 +102,7 @@ namespace smt { VERIFY(str().is_in_re(e, s, r)); TRACE("seq_regex", tout << "propagate in RE: " << lit.sign() << " " << mk_pp(e, m) << std::endl;); + STRACE("seq_regex_brief", tout << " PIR";); // convert negative negative membership literals to positive // ~(s in R) => s in C(R) @@ -149,6 +150,7 @@ namespace smt { void seq_regex::propagate_accept(literal lit) { TRACE("seq_regex", tout << "propagate accept" << std::endl;); + STRACE("seq_regex_brief", tout << " PA";); if (!propagate(lit)) m_to_propagate.push_back(lit); } @@ -175,6 +177,11 @@ namespace smt { VERIFY(sk().is_accept(e, s, i, idx, r)); TRACE("seq_regex", tout << "propagate: " << mk_pp(e, m) << std::endl;); + STRACE("seq_regex_brief", + tout << std::endl << "P(" << mk_pp(s, m) + << "," << idx + << "," << r // pointer + << ")";); if (re().is_empty(r)) { th.add_axiom(~lit); @@ -213,6 +220,7 @@ namespace smt { rewrite(is_nullable); TRACE("seq_regex", tout << "propagate nullable: " << mk_pp(r, m) << std::endl;); + STRACE("seq_regex_brief", tout << " PN";); literal len_s_ge_i = th.m_ax.mk_ge(th.mk_len(s), idx); if (m.is_true(is_nullable)) { @@ -247,6 +255,7 @@ namespace smt { expr_ref head = th.mk_nth(s, i); TRACE("seq_regex", tout << "propagate derivative: " << mk_pp(r, m) << std::endl;); + STRACE("seq_regex_brief", tout << " PD";); d = derivative_wrapper(m.mk_var(0, m.get_sort(head)), r); // timer tm; @@ -386,6 +395,7 @@ namespace smt { */ expr_ref seq_regex::derivative_wrapper(expr* hd, expr* r) { STRACE("seq_regex", tout << "derivative: " << mk_pp(r, m) << std::endl;); + STRACE("seq_regex_brief", tout << " D";); expr_ref result = expr_ref(re().mk_derivative(hd, r), m); rewrite(result); STRACE("seq_regex", tout << "derivative result: " << mk_pp(result, m) << std::endl;); @@ -395,7 +405,8 @@ namespace smt { void seq_regex::propagate_eq(expr* r1, expr* r2) { TRACE("seq_regex", tout << "propagate EQ: " << mk_pp(r1, m) << ", " << mk_pp(r2, m) << std::endl;); - expr_ref r = symmetric_diff(r1, r2); + STRACE("seq_regex_brief", tout << " PEQ";); + expr_ref r = symmetric_diff(r1, r2); expr_ref emp(re().mk_empty(m.get_sort(r)), m); expr_ref is_empty = sk().mk_is_empty(r, emp); th.add_axiom(~th.mk_eq(r1, r2, false), th.mk_literal(is_empty)); @@ -403,6 +414,7 @@ namespace smt { void seq_regex::propagate_ne(expr* r1, expr* r2) { TRACE("seq_regex", tout << "propagate NEQ: " << mk_pp(r1, m) << ", " << mk_pp(r2, m) << std::endl;); + STRACE("seq_regex_brief", tout << " PNEQ";); expr_ref r = symmetric_diff(r1, r2); expr_ref emp(re().mk_empty(m.get_sort(r)), m); expr_ref is_non_empty = sk().mk_is_non_empty(r, emp); @@ -431,6 +443,7 @@ namespace smt { VERIFY(sk().is_is_non_empty(e, r, u)); TRACE("seq_regex", tout << "propagate nonempty: " << mk_pp(e, m) << std::endl;); + STRACE("seq_regex_brief", tout << " PNE";); expr_ref is_nullable = seq_rw().is_nullable(r); rewrite(is_nullable); @@ -489,6 +502,7 @@ namespace smt { expr* e = ctx.bool_var2expr(lit.var()), *r = nullptr, *u = nullptr; VERIFY(sk().is_is_empty(e, r, u)); TRACE("seq_regex", tout << "propagate empty: " << mk_pp(e, m) << std::endl;); + STRACE("seq_regex_brief", tout << " PE";); expr_ref is_nullable = seq_rw().is_nullable(r); rewrite(is_nullable); From 177b04affce617f41049e0d785c883e2b9cfdaef Mon Sep 17 00:00:00 2001 From: calebstanford-msr Date: Wed, 17 Jun 2020 19:51:20 -0400 Subject: [PATCH 09/51] working on debugging ref count issue --- src/ast/rewriter/seq_rewriter.cpp | 22 ++++++++++++++-------- src/smt/seq_regex.cpp | 2 ++ src/smt/theory_seq.cpp | 1 + 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp index 1cf421bacbb..c4fed412c0f 100644 --- a/src/ast/rewriter/seq_rewriter.cpp +++ b/src/ast/rewriter/seq_rewriter.cpp @@ -2472,31 +2472,37 @@ expr_ref seq_rewriter::mk_der_op_rec(decl_kind k, expr* a, expr* b) { if (m().is_ite(b, cb, b1, b2)) { // --- Core logic for combining two BDDs if (ca == cb) { - expr_ref r1 = mk_der_op(k, a1, b1); - expr_ref r2 = mk_der_op(k, a2, b2); + r1 = mk_der_op(k, a1, b1); + r2 = mk_der_op(k, a2, b2); result = mk_ite(ca, r1, r2); return result; } // Order with higher IDs on the outside - if (ca->get_id() < cb->get_id()) { - std::swap(a, b); - std::swap(ca, cb); - std::swap(a1, b1); - std::swap(a2, b2); - } + // if (ca->get_id() < cb->get_id()) { + // std::swap(a, b); + // std::swap(ca, cb); + // std::swap(a1, b1); + // std::swap(a2, b2); + // } // Simplify if there is a relationship between ca and cb if (pred_implies(ca, cb)) { r1 = mk_der_op(k, a1, b1); + // prevent memory ref count error + expr_ref _b2(b2, m()); } else if (pred_implies(ca, expr_ref(m().mk_not(cb), m()))) { r1 = mk_der_op(k, a1, b2); + expr_ref _b2(b1, m()); } if (pred_implies(expr_ref(m().mk_not(ca), m()), cb)) { r2 = mk_der_op(k, a2, b1); + // prevent memory ref count error + expr_ref _b2(b2, m()); } else if (pred_implies(expr_ref(m().mk_not(ca), m()), expr_ref(m().mk_not(cb), m()))) { r2 = mk_der_op(k, a2, b2); + expr_ref _b2(b1, m()); } // --- End core logic } diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp index 8a64872f5fa..8703f617f98 100644 --- a/src/smt/seq_regex.cpp +++ b/src/smt/seq_regex.cpp @@ -228,6 +228,7 @@ namespace smt { } else if (m.is_false(is_nullable)) { th.propagate_lit(nullptr, 1, &lit, th.m_ax.mk_ge(th.mk_len(s), idx + 1)); + // th.propagate_lit(nullptr, 1, &lit, th.m_ax.mk_ge(th.mk_len(s), idx + re().min_length(r))); } else { literal is_nullable_lit = th.mk_literal(is_nullable); @@ -395,6 +396,7 @@ namespace smt { */ expr_ref seq_regex::derivative_wrapper(expr* hd, expr* r) { STRACE("seq_regex", tout << "derivative: " << mk_pp(r, m) << std::endl;); + // STRACE("seq_regex_brief", tout << "derivative: " << mk_pp(r, m) << std::endl;); STRACE("seq_regex_brief", tout << " D";); expr_ref result = expr_ref(re().mk_derivative(hd, r), m); rewrite(result); diff --git a/src/smt/theory_seq.cpp b/src/smt/theory_seq.cpp index d3de47c61ff..295256ed7e0 100644 --- a/src/smt/theory_seq.cpp +++ b/src/smt/theory_seq.cpp @@ -3328,6 +3328,7 @@ void theory_seq::relevant_eh(app* n) { expr* arg = nullptr; if (m_sk.is_tail(n, arg)) { + // TODO: HERE add_length_limit(arg, m_max_unfolding_depth, true); } From 06bc1cd955b008fc2bc2bb58cea4ae3f2437c477 Mon Sep 17 00:00:00 2001 From: Caleb Stanford Date: Fri, 19 Jun 2020 10:07:42 -0400 Subject: [PATCH 10/51] fix ref count bug and convert trace statements to seq_regex_brief --- src/ast/rewriter/seq_rewriter.cpp | 29 ++++++++++++----------------- src/smt/seq_regex.cpp | 31 ++++++++++++++++++------------- 2 files changed, 30 insertions(+), 30 deletions(-) diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp index c4fed412c0f..1fe1851efc3 100644 --- a/src/ast/rewriter/seq_rewriter.cpp +++ b/src/ast/rewriter/seq_rewriter.cpp @@ -2182,17 +2182,17 @@ expr_ref seq_rewriter::re_predicate(expr* cond, sort* seq_sort) { } expr_ref seq_rewriter::is_nullable(expr* r) { - STRACE("seq_regex_verbose", tout << "nullable";); + STRACE("seq_regex_brief", tout << "n";); expr_ref result(m_op_cache.find(_OP_RE_IS_NULLABLE, r, nullptr), m()); if (!result) { result = is_nullable_rec(r); m_op_cache.insert(_OP_RE_IS_NULLABLE, r, nullptr, result); } - STRACE("seq_regex_verbose", tout << std::endl;); return result; } expr_ref seq_rewriter::is_nullable_rec(expr* r) { + STRACE("seq_regex_brief", tout << ".";); // recursive call SASSERT(m_util.is_re(r) || m_util.is_seq(r)); expr* r1 = nullptr, *r2 = nullptr, *cond = nullptr; sort* seq_sort = nullptr; @@ -2367,13 +2367,12 @@ br_status seq_rewriter::mk_re_derivative(expr* ele, expr* r, expr_ref& result) { Duplicate nested conditions are eliminated. */ expr_ref seq_rewriter::mk_derivative(expr* ele, expr* r) { - STRACE("seq_regex_verbose", tout << "derivative";); + STRACE("seq_regex_brief", tout << "d";); expr_ref result(m_op_cache.find(OP_RE_DERIVATIVE, ele, r), m()); if (!result) { result = mk_derivative_rec(ele, r); m_op_cache.insert(OP_RE_DERIVATIVE, ele, r, result); } - STRACE("seq_regex_verbose", tout << std::endl;); return result; } @@ -2459,6 +2458,7 @@ bool seq_rewriter::pred_implies(expr* a, expr* b) { - result is in BDD form */ expr_ref seq_rewriter::mk_der_op_rec(decl_kind k, expr* a, expr* b) { + STRACE("seq_regex_brief", tout << ".";); // recursive call expr* ca = nullptr, *a1 = nullptr, *a2 = nullptr; expr* cb = nullptr, *b1 = nullptr, *b2 = nullptr; expr_ref result(m()); @@ -2469,8 +2469,10 @@ expr_ref seq_rewriter::mk_der_op_rec(decl_kind k, expr* a, expr* b) { // auto get_id = [&](expr* e) { re().is_complement(e, e); return e->get_id(); }; if (m().is_ite(a, ca, a1, a2)) { expr_ref r1(m()), r2(m()); + expr_ref notca(m().mk_not(ca), m()); if (m().is_ite(b, cb, b1, b2)) { // --- Core logic for combining two BDDs + expr_ref notcb(m().mk_not(cb), m()); if (ca == cb) { r1 = mk_der_op(k, a1, b1); r2 = mk_der_op(k, a2, b2); @@ -2487,22 +2489,15 @@ expr_ref seq_rewriter::mk_der_op_rec(decl_kind k, expr* a, expr* b) { // Simplify if there is a relationship between ca and cb if (pred_implies(ca, cb)) { r1 = mk_der_op(k, a1, b1); - // prevent memory ref count error - expr_ref _b2(b2, m()); } - else if (pred_implies(ca, expr_ref(m().mk_not(cb), m()))) { + else if (pred_implies(ca, notcb)) { r1 = mk_der_op(k, a1, b2); - expr_ref _b2(b1, m()); } - if (pred_implies(expr_ref(m().mk_not(ca), m()), cb)) { + if (pred_implies(notca, cb)) { r2 = mk_der_op(k, a2, b1); - // prevent memory ref count error - expr_ref _b2(b2, m()); } - else if (pred_implies(expr_ref(m().mk_not(ca), m()), - expr_ref(m().mk_not(cb), m()))) { + else if (pred_implies(notca, notcb)) { r2 = mk_der_op(k, a2, b2); - expr_ref _b2(b1, m()); } // --- End core logic } @@ -2572,7 +2567,7 @@ expr_ref seq_rewriter::mk_der_op(decl_kind k, expr* a, expr* b) { } expr_ref seq_rewriter::mk_der_compl(expr* r) { - STRACE("seq_regex_verbose", tout << " (rec)";); + STRACE("seq_regex_brief", tout << ".";); // recursive call expr_ref result(m_op_cache.find(OP_RE_COMPLEMENT, r, nullptr), m()); if (!result) { expr* c = nullptr, * r1 = nullptr, * r2 = nullptr; @@ -2587,7 +2582,7 @@ expr_ref seq_rewriter::mk_der_compl(expr* r) { } expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) { - STRACE("seq_regex_verbose", tout << " (rec)";); + STRACE("seq_regex_brief", tout << ".";); // recursive call expr_ref result(m()); sort* seq_sort = nullptr, *ele_sort = nullptr; VERIFY(m_util.is_re(r, seq_sort)); @@ -4186,7 +4181,7 @@ expr* seq_rewriter::op_cache::find(decl_kind op, expr* a, expr* b) { m_table.find(e, e); if (!(e.r)) { - STRACE("seq_regex_verbose", tout << " (cache miss)";); + STRACE("seq_regex_brief", tout << "!";); // cache miss } return e.r; diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp index 8703f617f98..fe6067a8d6b 100644 --- a/src/smt/seq_regex.cpp +++ b/src/smt/seq_regex.cpp @@ -102,7 +102,7 @@ namespace smt { VERIFY(str().is_in_re(e, s, r)); TRACE("seq_regex", tout << "propagate in RE: " << lit.sign() << " " << mk_pp(e, m) << std::endl;); - STRACE("seq_regex_brief", tout << " PIR";); + STRACE("seq_regex_brief", tout << "PIR ";); // convert negative negative membership literals to positive // ~(s in R) => s in C(R) @@ -150,7 +150,7 @@ namespace smt { void seq_regex::propagate_accept(literal lit) { TRACE("seq_regex", tout << "propagate accept" << std::endl;); - STRACE("seq_regex_brief", tout << " PA";); + STRACE("seq_regex_brief", tout << "PA ";); if (!propagate(lit)) m_to_propagate.push_back(lit); } @@ -180,8 +180,8 @@ namespace smt { STRACE("seq_regex_brief", tout << std::endl << "P(" << mk_pp(s, m) << "," << idx - << "," << r // pointer - << ")";); + << "," << r->get_id() + << ") ";); if (re().is_empty(r)) { th.add_axiom(~lit); @@ -216,11 +216,13 @@ namespace smt { */ void seq_regex::propagate_nullable(literal lit, expr* s, unsigned idx, expr* r) { + TRACE("seq_regex", tout << "propagate nullable: " << mk_pp(r, m) << std::endl;); + STRACE("seq_regex_brief", tout << "PN ";); + expr_ref is_nullable = seq_rw().is_nullable(r); rewrite(is_nullable); - TRACE("seq_regex", tout << "propagate nullable: " << mk_pp(r, m) << std::endl;); - STRACE("seq_regex_brief", tout << " PN";); + STRACE("seq_regex_brief", tout << " ";); literal len_s_ge_i = th.m_ax.mk_ge(th.mk_len(s), idx); if (m.is_true(is_nullable)) { @@ -256,7 +258,7 @@ namespace smt { expr_ref head = th.mk_nth(s, i); TRACE("seq_regex", tout << "propagate derivative: " << mk_pp(r, m) << std::endl;); - STRACE("seq_regex_brief", tout << " PD";); + STRACE("seq_regex_brief", tout << "PD ";); d = derivative_wrapper(m.mk_var(0, m.get_sort(head)), r); // timer tm; @@ -311,6 +313,9 @@ namespace smt { #endif } } + + STRACE("seq_regex_brief", tout << "cont ";); + if (!is_ground(d)) { d = subst(d, sub); } @@ -396,18 +401,18 @@ namespace smt { */ expr_ref seq_regex::derivative_wrapper(expr* hd, expr* r) { STRACE("seq_regex", tout << "derivative: " << mk_pp(r, m) << std::endl;); - // STRACE("seq_regex_brief", tout << "derivative: " << mk_pp(r, m) << std::endl;); - STRACE("seq_regex_brief", tout << " D";); + STRACE("seq_regex_brief", tout << "D ";); expr_ref result = expr_ref(re().mk_derivative(hd, r), m); rewrite(result); STRACE("seq_regex", tout << "derivative result: " << mk_pp(result, m) << std::endl;); + STRACE("seq_regex_brief", tout << " ";); // IF_VERBOSE(10, verbose_stream() << std::endl << "Calculated derivative of: " << expr_ref(r, m) << " was: " << result << std::endl;); return result; } void seq_regex::propagate_eq(expr* r1, expr* r2) { TRACE("seq_regex", tout << "propagate EQ: " << mk_pp(r1, m) << ", " << mk_pp(r2, m) << std::endl;); - STRACE("seq_regex_brief", tout << " PEQ";); + STRACE("seq_regex_brief", tout << "PEQ ";); expr_ref r = symmetric_diff(r1, r2); expr_ref emp(re().mk_empty(m.get_sort(r)), m); expr_ref is_empty = sk().mk_is_empty(r, emp); @@ -416,7 +421,7 @@ namespace smt { void seq_regex::propagate_ne(expr* r1, expr* r2) { TRACE("seq_regex", tout << "propagate NEQ: " << mk_pp(r1, m) << ", " << mk_pp(r2, m) << std::endl;); - STRACE("seq_regex_brief", tout << " PNEQ";); + STRACE("seq_regex_brief", tout << "PNEQ ";); expr_ref r = symmetric_diff(r1, r2); expr_ref emp(re().mk_empty(m.get_sort(r)), m); expr_ref is_non_empty = sk().mk_is_non_empty(r, emp); @@ -445,7 +450,7 @@ namespace smt { VERIFY(sk().is_is_non_empty(e, r, u)); TRACE("seq_regex", tout << "propagate nonempty: " << mk_pp(e, m) << std::endl;); - STRACE("seq_regex_brief", tout << " PNE";); + STRACE("seq_regex_brief", tout << "PNE ";); expr_ref is_nullable = seq_rw().is_nullable(r); rewrite(is_nullable); @@ -504,7 +509,7 @@ namespace smt { expr* e = ctx.bool_var2expr(lit.var()), *r = nullptr, *u = nullptr; VERIFY(sk().is_is_empty(e, r, u)); TRACE("seq_regex", tout << "propagate empty: " << mk_pp(e, m) << std::endl;); - STRACE("seq_regex_brief", tout << " PE";); + STRACE("seq_regex_brief", tout << "PE ";); expr_ref is_nullable = seq_rw().is_nullable(r); rewrite(is_nullable); From 94d9db3507802c7b5b42e8203c6d542ba86eb6ed Mon Sep 17 00:00:00 2001 From: Caleb Stanford Date: Fri, 19 Jun 2020 12:19:58 -0400 Subject: [PATCH 11/51] add compact tracing for cache hits/misses --- src/ast/rewriter/seq_rewriter.cpp | 37 +++++++++++++++++++++++-------- src/ast/rewriter/seq_rewriter.h | 17 +++++++++++--- src/smt/seq_regex.cpp | 9 +++++--- 3 files changed, 48 insertions(+), 15 deletions(-) diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp index f2807fc6ab4..1573782cbb2 100644 --- a/src/ast/rewriter/seq_rewriter.cpp +++ b/src/ast/rewriter/seq_rewriter.cpp @@ -2182,7 +2182,7 @@ expr_ref seq_rewriter::re_predicate(expr* cond, sort* seq_sort) { } expr_ref seq_rewriter::is_nullable(expr* r) { - STRACE("seq_regex_brief", tout << "n";); + // STRACE("seq_regex_brief", tout << "n";); expr_ref result(m_op_cache.find(_OP_RE_IS_NULLABLE, r, nullptr), m()); if (!result) { result = is_nullable_rec(r); @@ -2192,7 +2192,7 @@ expr_ref seq_rewriter::is_nullable(expr* r) { } expr_ref seq_rewriter::is_nullable_rec(expr* r) { - STRACE("seq_regex_brief", tout << ".";); // recursive call + // STRACE("seq_regex_brief", tout << ".";); // recursive call SASSERT(m_util.is_re(r) || m_util.is_seq(r)); expr* r1 = nullptr, *r2 = nullptr, *cond = nullptr; sort* seq_sort = nullptr; @@ -2367,7 +2367,7 @@ br_status seq_rewriter::mk_re_derivative(expr* ele, expr* r, expr_ref& result) { Duplicate nested conditions are eliminated. */ expr_ref seq_rewriter::mk_derivative(expr* ele, expr* r) { - STRACE("seq_regex_brief", tout << "d";); + // STRACE("seq_regex_brief", tout << "d";); expr_ref result(m_op_cache.find(OP_RE_DERIVATIVE, ele, r), m()); if (!result) { result = mk_derivative_rec(ele, r); @@ -2458,7 +2458,7 @@ bool seq_rewriter::pred_implies(expr* a, expr* b) { - result is in BDD form */ expr_ref seq_rewriter::mk_der_op_rec(decl_kind k, expr* a, expr* b) { - STRACE("seq_regex_brief", tout << ".";); // recursive call + // STRACE("seq_regex_brief", tout << ".";); // recursive call expr* ca = nullptr, *a1 = nullptr, *a2 = nullptr; expr* cb = nullptr, *b1 = nullptr, *b2 = nullptr; expr_ref result(m()); @@ -2568,7 +2568,7 @@ expr_ref seq_rewriter::mk_der_op(decl_kind k, expr* a, expr* b) { } expr_ref seq_rewriter::mk_der_compl(expr* r) { - STRACE("seq_regex_brief", tout << ".";); // recursive call + // STRACE("seq_regex_brief", tout << ".";); // recursive call expr_ref result(m_op_cache.find(OP_RE_COMPLEMENT, r, nullptr), m()); if (!result) { expr* c = nullptr, * r1 = nullptr, * r2 = nullptr; @@ -2583,7 +2583,7 @@ expr_ref seq_rewriter::mk_der_compl(expr* r) { } expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) { - STRACE("seq_regex_brief", tout << ".";); // recursive call + // STRACE("seq_regex_brief", tout << ".";); // recursive call expr_ref result(m()); sort* seq_sort = nullptr, *ele_sort = nullptr; VERIFY(m_util.is_re(r, seq_sort)); @@ -4175,19 +4175,38 @@ bool seq_rewriter::reduce_subsequence(expr_ref_vector& ls, expr_ref_vector& rs, seq_rewriter::op_cache::op_cache(ast_manager& m): m(m), m_trail(m) + #ifdef _TRACE + , cache_hits(0), cache_misses(0) + #endif {} expr* seq_rewriter::op_cache::find(decl_kind op, expr* a, expr* b) { op_entry e(op, a, b, nullptr); m_table.find(e, e); - if (!(e.r)) { - STRACE("seq_regex_brief", tout << "!";); // cache miss - } + #ifdef _TRACE + (e.r) ? (cache_hits++) : (cache_misses++) ; + #endif return e.r; } +#ifdef _TRACE +void seq_rewriter::trace_and_reset_cache() { + unsigned hits = m_op_cache.cache_hits; + unsigned misses = m_op_cache.cache_misses; + // Suppress tracing of "0/0 hits" or "1/1 hits" + if (hits >= 2 || misses >= 1) { + STRACE("seq_regex_brief", + tout << "(" << hits << "/" << (hits + misses) + << " hits) "; + ); + } + m_op_cache.cache_hits = 0; + m_op_cache.cache_misses = 0; +} +#endif + void seq_rewriter::op_cache::insert(decl_kind op, expr* a, expr* b, expr* r) { cleanup(); if (a) m_trail.push_back(a); diff --git a/src/ast/rewriter/seq_rewriter.h b/src/ast/rewriter/seq_rewriter.h index 6b9d88a3a38..4531778f026 100644 --- a/src/ast/rewriter/seq_rewriter.h +++ b/src/ast/rewriter/seq_rewriter.h @@ -142,12 +142,18 @@ class seq_rewriter { unsigned m_max_cache_size { 10000 }; expr_ref_vector m_trail; op_table m_table; + void cleanup(); public: op_cache(ast_manager& m); expr* find(decl_kind op, expr* a, expr* b); void insert(decl_kind op, expr* a, expr* b, expr* r); + + #ifdef _TRACE + unsigned cache_hits; + unsigned cache_misses; + #endif }; seq_util m_util; @@ -184,7 +190,7 @@ class seq_rewriter { expr_ref mk_seq_concat(expr* a, expr* b); // Calculate derivative, memoized and enforcing a normal form - expr_ref mk_derivative(expr* ele, expr* r); + expr_ref is_nullable_rec(expr* r); expr_ref mk_derivative_rec(expr* ele, expr* r); expr_ref mk_der_op(decl_kind k, expr* a, expr* b); expr_ref mk_der_op_rec(decl_kind k, expr* a, expr* b); @@ -289,7 +295,6 @@ class seq_rewriter { class seq_util::str& str() { return u().str; } class seq_util::str const& str() const { return u().str; } - expr_ref is_nullable_rec(expr* r); void intersect(unsigned lo, unsigned hi, svector>& ranges); public: @@ -336,13 +341,19 @@ class seq_rewriter { void add_seqs(expr_ref_vector const& ls, expr_ref_vector const& rs, expr_ref_pair_vector& new_eqs); - // Check for acceptance of the empty string + // Expose derivative and nullability check expr_ref is_nullable(expr* r); + expr_ref mk_derivative(expr* ele, expr* r); // heuristic elimination of element from condition that comes form a derivative. // special case optimization for conjunctions of equalities, disequalities and ranges. void elim_condition(expr* elem, expr_ref& cond); + #ifdef _TRACE + void trace_and_reset_cache(); + #else + static inline void trace_and_reset_cache() {} + #endif }; #endif diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp index 9e0acc3ffcb..de7c3a4698d 100644 --- a/src/smt/seq_regex.cpp +++ b/src/smt/seq_regex.cpp @@ -241,7 +241,7 @@ namespace smt { expr_ref is_nullable = seq_rw().is_nullable(r); rewrite(is_nullable); - STRACE("seq_regex_brief", tout << " ";); + seq_rw().trace_and_reset_cache(); literal len_s_ge_i = th.m_ax.mk_ge(th.mk_len(s), idx); if (m.is_true(is_nullable)) { @@ -423,10 +423,13 @@ namespace smt { expr_ref seq_regex::derivative_wrapper(expr* hd, expr* r) { STRACE("seq_regex", tout << "derivative: " << mk_pp(r, m) << std::endl;); STRACE("seq_regex_brief", tout << "D ";); - expr_ref result = expr_ref(re().mk_derivative(hd, r), m); + + expr_ref result = seq_rw().mk_derivative(hd, r); rewrite(result); + STRACE("seq_regex", tout << "derivative result: " << mk_pp(result, m) << std::endl;); - STRACE("seq_regex_brief", tout << " ";); + seq_rw().trace_and_reset_cache(); + // IF_VERBOSE(10, verbose_stream() << std::endl << "Calculated derivative of: " << expr_ref(r, m) << " was: " << result << std::endl;); return result; } From 7f53dcaea12bb0794974fd7c11736411f9a3f9aa Mon Sep 17 00:00:00 2001 From: Caleb Stanford Date: Sat, 20 Jun 2020 14:15:40 -0400 Subject: [PATCH 12/51] seq_regex fix cache hit/miss tracing and wrapper around is_nullable --- src/ast/rewriter/seq_rewriter.cpp | 45 ++++++++++++++------------ src/ast/rewriter/seq_rewriter.h | 4 +-- src/smt/seq_regex.cpp | 54 ++++++++++++++++++++++++------- src/smt/seq_regex.h | 1 + 4 files changed, 69 insertions(+), 35 deletions(-) diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp index 1573782cbb2..79195ab7fa9 100644 --- a/src/ast/rewriter/seq_rewriter.cpp +++ b/src/ast/rewriter/seq_rewriter.cpp @@ -4173,12 +4173,7 @@ bool seq_rewriter::reduce_subsequence(expr_ref_vector& ls, expr_ref_vector& rs, } seq_rewriter::op_cache::op_cache(ast_manager& m): - m(m), - m_trail(m) - #ifdef _TRACE - , cache_hits(0), cache_misses(0) - #endif -{} + m(m), m_trail(m) {} expr* seq_rewriter::op_cache::find(decl_kind op, expr* a, expr* b) { op_entry e(op, a, b, nullptr); @@ -4191,12 +4186,35 @@ expr* seq_rewriter::op_cache::find(decl_kind op, expr* a, expr* b) { return e.r; } +void seq_rewriter::op_cache::insert(decl_kind op, expr* a, expr* b, expr* r) { + cleanup(); + if (a) m_trail.push_back(a); + if (b) m_trail.push_back(b); + if (r) m_trail.push_back(r); + m_table.insert(op_entry(op, a, b, r)); +} + +void seq_rewriter::op_cache::cleanup() { + if (m_table.size() >= m_max_cache_size) { + m_trail.reset(); + m_table.reset(); + } +} + #ifdef _TRACE +unsigned seq_rewriter::op_cache::cache_hits = 0; +unsigned seq_rewriter::op_cache::cache_misses = 0; + void seq_rewriter::trace_and_reset_cache() { unsigned hits = m_op_cache.cache_hits; unsigned misses = m_op_cache.cache_misses; // Suppress tracing of "0/0 hits" or "1/1 hits" if (hits >= 2 || misses >= 1) { + STRACE("seq_regex", + tout << "Op cache hits: " << hits + << " (out of " << (hits + misses) + << ")" << std::endl; + ); STRACE("seq_regex_brief", tout << "(" << hits << "/" << (hits + misses) << " hits) "; @@ -4206,18 +4224,3 @@ void seq_rewriter::trace_and_reset_cache() { m_op_cache.cache_misses = 0; } #endif - -void seq_rewriter::op_cache::insert(decl_kind op, expr* a, expr* b, expr* r) { - cleanup(); - if (a) m_trail.push_back(a); - if (b) m_trail.push_back(b); - if (r) m_trail.push_back(r); - m_table.insert(op_entry(op, a, b, r)); -} - -void seq_rewriter::op_cache::cleanup() { - if (m_table.size() >= m_max_cache_size) { - m_trail.reset(); - m_table.reset(); - } -} diff --git a/src/ast/rewriter/seq_rewriter.h b/src/ast/rewriter/seq_rewriter.h index 4531778f026..19c4590b48b 100644 --- a/src/ast/rewriter/seq_rewriter.h +++ b/src/ast/rewriter/seq_rewriter.h @@ -151,8 +151,8 @@ class seq_rewriter { void insert(decl_kind op, expr* a, expr* b, expr* r); #ifdef _TRACE - unsigned cache_hits; - unsigned cache_misses; + static unsigned cache_hits; + static unsigned cache_misses; #endif }; diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp index de7c3a4698d..d600881f219 100644 --- a/src/smt/seq_regex.cpp +++ b/src/smt/seq_regex.cpp @@ -238,10 +238,7 @@ namespace smt { TRACE("seq_regex", tout << "propagate nullable: " << mk_pp(r, m) << std::endl;); STRACE("seq_regex_brief", tout << "PN ";); - expr_ref is_nullable = seq_rw().is_nullable(r); - rewrite(is_nullable); - - seq_rw().trace_and_reset_cache(); + expr_ref is_nullable = is_nullable_wrapper(r); literal len_s_ge_i = th.m_ax.mk_ge(th.mk_len(s), idx); if (m.is_true(is_nullable)) { @@ -416,21 +413,56 @@ namespace smt { } /* - Wrapper around the regex symbolic derivative from the rewriter. + Wrapper around calls to is_nullable from the seq rewriter. + */ + expr_ref seq_regex::is_nullable_wrapper(expr* r) { + STRACE("seq_regex", tout << "nullable: " << mk_pp(r, m) << std::endl;); + STRACE("seq_regex_brief", tout << "n ";); + + expr_ref result = seq_rw().is_nullable(r); + rewrite(result); + + STRACE("seq_regex", tout << "nullable result: " << mk_pp(result, m) << std::endl;); + seq_rw().trace_and_reset_cache(); + + return result; + } + + /* + Wrapper around the regex symbolic derivative from the seq rewriter. Ensures that the derivative is written in a normalized BDD form with optimizations for if-then-else expressions involving the head. */ expr_ref seq_regex::derivative_wrapper(expr* hd, expr* r) { STRACE("seq_regex", tout << "derivative: " << mk_pp(r, m) << std::endl;); - STRACE("seq_regex_brief", tout << "D ";); + STRACE("seq_regex_brief", tout << "d ";); - expr_ref result = seq_rw().mk_derivative(hd, r); + expr_ref result = expr_ref(re().mk_derivative(hd, r), m); rewrite(result); STRACE("seq_regex", tout << "derivative result: " << mk_pp(result, m) << std::endl;); seq_rw().trace_and_reset_cache(); - // IF_VERBOSE(10, verbose_stream() << std::endl << "Calculated derivative of: " << expr_ref(r, m) << " was: " << result << std::endl;); + /* If the following lines are enabled instead, we use the + same rewriter for the nullable and derivative calls. + However, it currently seems to cause a performance + bug as a side effect. + + The two seq rewriters used are at: + m_seq_rewrite + (returned by seq_rw()) + th.m_rewrite.m_imp->m_cfg.m_seq_rw + (private, can't be accessed directly) + + TODO: experiment with making them the same and see + if it results in significant speedup (due to fewer + cache misses). + */ + // expr_ref result = seq_rw().mk_derivative(hd, r); + // rewrite(result) + // STRACE("seq_regex", tout << "derivative result: " << mk_pp(result, m) << std::endl;); + // seq_rw().trace_and_reset_cache(); + return result; } @@ -476,8 +508,7 @@ namespace smt { TRACE("seq_regex", tout << "propagate nonempty: " << mk_pp(e, m) << std::endl;); STRACE("seq_regex_brief", tout << "PNE ";); - expr_ref is_nullable = seq_rw().is_nullable(r); - rewrite(is_nullable); + expr_ref is_nullable = is_nullable_wrapper(r); if (m.is_true(is_nullable)) return; literal null_lit = th.mk_literal(is_nullable); @@ -535,8 +566,7 @@ namespace smt { TRACE("seq_regex", tout << "propagate empty: " << mk_pp(e, m) << std::endl;); STRACE("seq_regex_brief", tout << "PE ";); - expr_ref is_nullable = seq_rw().is_nullable(r); - rewrite(is_nullable); + expr_ref is_nullable = is_nullable_wrapper(r); if (m.is_true(is_nullable)) { th.add_axiom(~lit); return; diff --git a/src/smt/seq_regex.h b/src/smt/seq_regex.h index f339d36c929..38720c5db17 100644 --- a/src/smt/seq_regex.h +++ b/src/smt/seq_regex.h @@ -79,6 +79,7 @@ namespace smt { expr_ref symmetric_diff(expr* r1, expr* r2); + expr_ref is_nullable_wrapper(expr* r); expr_ref derivative_wrapper(expr* hd, expr* r); void get_cofactors(expr* r, expr_ref_vector& conds, expr_ref_pair_vector& result); From ed804dc224da81d2a4d397e681371f4e81a34c9c Mon Sep 17 00:00:00 2001 From: Caleb Stanford Date: Wed, 24 Jun 2020 19:21:39 -0400 Subject: [PATCH 13/51] minor --- src/ast/rewriter/seq_rewriter.cpp | 7 ++++- src/ast/rewriter/seq_rewriter.h | 4 +-- src/smt/seq_regex.cpp | 45 +++++++++++++++++++++++++------ 3 files changed, 45 insertions(+), 11 deletions(-) diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp index 79195ab7fa9..37e96b1d31b 100644 --- a/src/ast/rewriter/seq_rewriter.cpp +++ b/src/ast/rewriter/seq_rewriter.cpp @@ -2487,6 +2487,7 @@ expr_ref seq_rewriter::mk_der_op_rec(decl_kind k, expr* a, expr* b) { std::swap(a1, b1); std::swap(a2, b2); } + // @EXP (experimental change) // Simplify if there is a relationship between ca and cb if (pred_implies(ca, cb)) { r1 = mk_der_op(k, a1, b1); @@ -2655,6 +2656,8 @@ expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) { expr_ref hd(m()), tl(m()); if (get_head_tail(r1, hd, tl)) { // head must be equal; if so, derivative is tail + // return re_and(m_br.mk_eq_rw(ele, hd), re().mk_to_re(tl)); + // @EXP (experimental change) // Write 'head is equal' as a range constraint: // (ele <= hd) and (hd <= ele) return mk_der_inter( @@ -2683,6 +2686,8 @@ expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) { // This is analagous to the previous is_to_re case. expr_ref hd(m()), tl(m()); if (get_head_tail_reversed(r2, hd, tl)) { + // return re_and(m_br.mk_eq_rw(ele, tl), re().mk_reverse(re().mk_to_re(hd))); + // @EXP (experimental change) // Write 'tail is equal' as a range constraint: // (ele <= tl) and (tl <= ele) return mk_der_inter( @@ -4205,7 +4210,7 @@ void seq_rewriter::op_cache::cleanup() { unsigned seq_rewriter::op_cache::cache_hits = 0; unsigned seq_rewriter::op_cache::cache_misses = 0; -void seq_rewriter::trace_and_reset_cache() { +void seq_rewriter::trace_and_reset_cache_counts() { unsigned hits = m_op_cache.cache_hits; unsigned misses = m_op_cache.cache_misses; // Suppress tracing of "0/0 hits" or "1/1 hits" diff --git a/src/ast/rewriter/seq_rewriter.h b/src/ast/rewriter/seq_rewriter.h index 19c4590b48b..1ac8d0157cd 100644 --- a/src/ast/rewriter/seq_rewriter.h +++ b/src/ast/rewriter/seq_rewriter.h @@ -350,9 +350,9 @@ class seq_rewriter { void elim_condition(expr* elem, expr_ref& cond); #ifdef _TRACE - void trace_and_reset_cache(); + void trace_and_reset_cache_counts(); #else - static inline void trace_and_reset_cache() {} + static inline void trace_and_reset_cache_counts() {} #endif }; diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp index 50b74c20db3..c882314edc7 100644 --- a/src/smt/seq_regex.cpp +++ b/src/smt/seq_regex.cpp @@ -424,7 +424,7 @@ namespace smt { rewrite(result); STRACE("seq_regex", tout << "nullable result: " << mk_pp(result, m) << std::endl;); - seq_rw().trace_and_reset_cache(); + seq_rw().trace_and_reset_cache_counts(); return result; } @@ -435,14 +435,21 @@ namespace smt { with optimizations for if-then-else expressions involving the head. */ expr_ref seq_regex::derivative_wrapper(expr* hd, expr* r) { - STRACE("seq_regex", tout << "derivative: " << mk_pp(r, m) << std::endl;); - STRACE("seq_regex_brief", tout << "d ";); + STRACE("seq_regex", tout << "derivative(" << mk_pp(hd, m) << "): " << mk_pp(r, m) << std::endl;); expr_ref result = expr_ref(re().mk_derivative(hd, r), m); rewrite(result); STRACE("seq_regex", tout << "derivative result: " << mk_pp(result, m) << std::endl;); - seq_rw().trace_and_reset_cache(); + STRACE("seq_regex_brief", + tout << "d(" + << mk_pp(hd, m) + << "," + << r->get_id() + << "->" + << result->get_id() + << ") ";); + seq_rw().trace_and_reset_cache_counts(); /* If the following lines are enabled instead, we use the same rewriter for the nullable and derivative calls. @@ -462,7 +469,7 @@ namespace smt { // expr_ref result = seq_rw().mk_derivative(hd, r); // rewrite(result) // STRACE("seq_regex", tout << "derivative result: " << mk_pp(result, m) << std::endl;); - // seq_rw().trace_and_reset_cache(); + // seq_rw().trace_and_reset_cache_counts(); return result; } @@ -507,7 +514,11 @@ namespace smt { VERIFY(sk().is_is_non_empty(e, r, u)); TRACE("seq_regex", tout << "propagate nonempty: " << mk_pp(e, m) << std::endl;); - STRACE("seq_regex_brief", tout << "PNE ";); + STRACE("seq_regex_brief", + tout << std::endl << "PNE(" << e->get_id() + << "," << r->get_id() + << "," << u->get_id() + << ") ";); expr_ref is_nullable = is_nullable_wrapper(r); if (m.is_true(is_nullable)) @@ -515,7 +526,16 @@ namespace smt { literal null_lit = th.mk_literal(is_nullable); expr_ref hd = mk_first(r); expr_ref d(m); - d = derivative_wrapper(hd, r); + d = derivative_wrapper(m.mk_var(0, m.get_sort(hd)), r); + + var_subst subst(m); + expr_ref_vector sub(m); + sub.push_back(hd); + d = subst(d, sub); + + STRACE("seq_regex_brief", tout << "(d subbed: " << d->get_id() << ") ";); + TRACE("seq_regex", tout << "d subbed: " << mk_pp(d, m) << std::endl;); + literal_vector lits; lits.push_back(~lit); if (null_lit != false_literal) @@ -536,6 +556,11 @@ namespace smt { next_non_empty = m.mk_and(cond, next_non_empty); lits.push_back(th.mk_literal(next_non_empty)); } + + TRACE("seq_regex", tout << "solved lits: " << mk_pp(lits) << std::endl;); + // STRACE("seq_regex_brief", tout << "(d solved: " << d->get_id() << ") ";); + // mk_pp asdfasdfasdfasdfasdfasdfadsfasdfasdf literal_vector + th.add_axiom(lits); } @@ -565,7 +590,11 @@ namespace smt { expr* e = ctx.bool_var2expr(lit.var()), *r = nullptr, *u = nullptr; VERIFY(sk().is_is_empty(e, r, u)); TRACE("seq_regex", tout << "propagate empty: " << mk_pp(e, m) << std::endl;); - STRACE("seq_regex_brief", tout << "PE ";); + STRACE("seq_regex_brief", + tout << std::endl << "PE(" << e->get_id() + << "," << r->get_id() + << "," << u->get_id() + << ") ";); expr_ref is_nullable = is_nullable_wrapper(r); if (m.is_true(is_nullable)) { From a7df4e572fc8cf27a427042d6607db639893ca63 Mon Sep 17 00:00:00 2001 From: Caleb Stanford Date: Thu, 25 Jun 2020 13:23:08 -0400 Subject: [PATCH 14/51] label and disable more experimental changes for testing --- src/ast/rewriter/seq_rewriter.cpp | 49 +++++++++++++++++-------------- src/smt/seq_regex.cpp | 2 +- 2 files changed, 28 insertions(+), 23 deletions(-) diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp index 561767a5b91..c7f4c4d494a 100644 --- a/src/ast/rewriter/seq_rewriter.cpp +++ b/src/ast/rewriter/seq_rewriter.cpp @@ -2489,18 +2489,18 @@ expr_ref seq_rewriter::mk_der_op_rec(decl_kind k, expr* a, expr* b) { } // @EXP (experimental change) // Simplify if there is a relationship between ca and cb - if (pred_implies(ca, cb)) { - r1 = mk_der_op(k, a1, b1); - } - else if (pred_implies(ca, notcb)) { - r1 = mk_der_op(k, a1, b2); - } - if (pred_implies(notca, cb)) { - r2 = mk_der_op(k, a2, b1); - } - else if (pred_implies(notca, notcb)) { - r2 = mk_der_op(k, a2, b2); - } + // if (pred_implies(ca, cb)) { + // r1 = mk_der_op(k, a1, b1); + // } + // else if (pred_implies(ca, notcb)) { + // r1 = mk_der_op(k, a1, b2); + // } + // if (pred_implies(notca, cb)) { + // r2 = mk_der_op(k, a2, b1); + // } + // else if (pred_implies(notca, notcb)) { + // r2 = mk_der_op(k, a2, b2); + // } // --- End core logic } if (!r1) r1 = mk_der_op(k, a1, b); @@ -2656,14 +2656,14 @@ expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) { expr_ref hd(m()), tl(m()); if (get_head_tail(r1, hd, tl)) { // head must be equal; if so, derivative is tail - // return re_and(m_br.mk_eq_rw(ele, hd), re().mk_to_re(tl)); + return re_and(m_br.mk_eq_rw(ele, hd), re().mk_to_re(tl)); // @EXP (experimental change) // Write 'head is equal' as a range constraint: // (ele <= hd) and (hd <= ele) - return mk_der_inter( - re_and(m_util.mk_le(ele, hd), re().mk_to_re(tl)), - re_and(m_util.mk_le(hd, ele), re().mk_to_re(tl)) - ); + // return mk_der_inter( + // re_and(m_util.mk_le(ele, hd), re().mk_to_re(tl)), + // re_and(m_util.mk_le(hd, ele), re().mk_to_re(tl)) + // ); } else if (str().is_empty(r1)) { return mk_empty(); @@ -2686,14 +2686,14 @@ expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) { // This is analagous to the previous is_to_re case. expr_ref hd(m()), tl(m()); if (get_head_tail_reversed(r2, hd, tl)) { - // return re_and(m_br.mk_eq_rw(ele, tl), re().mk_reverse(re().mk_to_re(hd))); + return re_and(m_br.mk_eq_rw(ele, tl), re().mk_reverse(re().mk_to_re(hd))); // @EXP (experimental change) // Write 'tail is equal' as a range constraint: // (ele <= tl) and (tl <= ele) - return mk_der_inter( - re_and(m_util.mk_le(ele, tl), re().mk_reverse(re().mk_to_re(hd))), - re_and(m_util.mk_le(tl, ele), re().mk_reverse(re().mk_to_re(hd))) - ); + // return mk_der_inter( + // re_and(m_util.mk_le(ele, tl), re().mk_reverse(re().mk_to_re(hd))), + // re_and(m_util.mk_le(tl, ele), re().mk_reverse(re().mk_to_re(hd))) + // ); } else if (str().is_empty(r2)) { return mk_empty(); @@ -2706,6 +2706,11 @@ expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) { if (s1.length() == 1 && s2.length() == 1) { expr_ref ch1(m_util.mk_char(s1[0]), m()); expr_ref ch2(m_util.mk_char(s2[0]), m()); + // @EXP (experimental change) + // expr_ref p1(m_util.mk_le(ch1, ele), m()); + // expr_ref p2(m_util.mk_le(ele, ch2), m()); + // expr_ref conj(m().mk_and(p1, p2), m()); + // return re_predicate(conj, seq_sort); return mk_der_inter(re_predicate(m_util.mk_le(ch1, ele), seq_sort), re_predicate(m_util.mk_le(ele, ch2), seq_sort)); } diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp index cc5217b277a..9e1b2ebd3c3 100644 --- a/src/smt/seq_regex.cpp +++ b/src/smt/seq_regex.cpp @@ -364,6 +364,7 @@ namespace smt { * within the same Regex. */ bool seq_regex::coallesce_in_re(literal lit) { + // @EXP (experimental change) return false; expr* s = nullptr, *r = nullptr; expr* e = ctx.bool_var2expr(lit.var()); @@ -633,7 +634,6 @@ namespace smt { sort* elem_sort = nullptr, *seq_sort = nullptr; VERIFY(u().is_re(r, seq_sort)); VERIFY(u().is_seq(seq_sort, elem_sort)); - sort* domain[2] = { m.get_sort(n), a().mk_int() }; return sk().mk("re.first", n, a().mk_int(r->get_id()), elem_sort); } } From e074fb3544cb99e804010442065938b40d65b933 Mon Sep 17 00:00:00 2001 From: Caleb Stanford Date: Thu, 25 Jun 2020 13:59:44 -0400 Subject: [PATCH 15/51] minor documentation / tracing --- src/smt/seq_regex.cpp | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp index 9e1b2ebd3c3..e8fa6e38fd2 100644 --- a/src/smt/seq_regex.cpp +++ b/src/smt/seq_regex.cpp @@ -103,14 +103,14 @@ namespace smt { } /** - * Propagate the atom (str.in.re s r) + * Propagate the atom (str.in_re s r) * * Propagation implements the following inference rules * - * (not (str.in.re s r)) => (str.in.re s (complement r)) - * (str.in.re s r) => r != {} + * (not (str.in_re s r)) => (str.in_re s (complement r)) + * (str.in_re s r) => r != {} * - * (str.in.re s r) => (accept s 0 r) + * (str.in_re s r) => (accept s 0 r) */ void seq_regex::propagate_in_re(literal lit) { @@ -119,7 +119,12 @@ namespace smt { VERIFY(str().is_in_re(e, s, r)); TRACE("seq_regex", tout << "propagate in RE: " << lit.sign() << " " << mk_pp(e, m) << std::endl;); - STRACE("seq_regex_brief", tout << "PIR ";); + STRACE("seq_regex_brief", + tout << "PIR(" + << s->get_id() + << "," + << r->get_id() + << ") ";); // convert negative negative membership literals to positive // ~(s in R) => s in C(R) @@ -419,12 +424,17 @@ namespace smt { */ expr_ref seq_regex::is_nullable_wrapper(expr* r) { STRACE("seq_regex", tout << "nullable: " << mk_pp(r, m) << std::endl;); - STRACE("seq_regex_brief", tout << "n ";); expr_ref result = seq_rw().is_nullable(r); rewrite(result); STRACE("seq_regex", tout << "nullable result: " << mk_pp(result, m) << std::endl;); + STRACE("seq_regex_brief", + tout << "n(" + << r->get_id() + << "->" + << result->get_id() + << ") ";); seq_rw().trace_and_reset_cache_counts(); return result; From 4e2ba58f6064077a1a6e38c3928b0c50640e5abc Mon Sep 17 00:00:00 2001 From: Caleb Stanford Date: Thu, 25 Jun 2020 15:57:09 -0400 Subject: [PATCH 16/51] a few more @EXP annotations --- src/ast/rewriter/seq_rewriter.cpp | 3 ++- src/smt/seq_regex.cpp | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp index c7f4c4d494a..253fb0ca050 100644 --- a/src/ast/rewriter/seq_rewriter.cpp +++ b/src/ast/rewriter/seq_rewriter.cpp @@ -2465,7 +2465,8 @@ expr_ref seq_rewriter::mk_der_op_rec(decl_kind k, expr* a, expr* b) { auto mk_ite = [&](expr* c, expr* a, expr* b) { return (a == b) ? a : m().mk_ite(c, a, b); }; - // TODO + // @EXP (experimental change) + // Use same ID for related predicates to improve simplifications // auto get_id = [&](expr* e) { re().is_complement(e, e); return e->get_id(); }; if (m().is_ite(a, ca, a1, a2)) { expr_ref r1(m()), r2(m()); diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp index e8fa6e38fd2..3cf4adab64a 100644 --- a/src/smt/seq_regex.cpp +++ b/src/smt/seq_regex.cpp @@ -251,6 +251,7 @@ namespace smt { } else if (m.is_false(is_nullable)) { th.propagate_lit(nullptr, 1, &lit, th.m_ax.mk_ge(th.mk_len(s), idx + 1)); + // @EXP (experimental change) //unsigned len = std::max(1u, re().min_length(r)); //th.propagate_lit(nullptr, 1, &lit, th.m_ax.mk_ge(th.mk_len(s), idx + re().min_length(r))); } From f610d3a0b9c8567d8c0e5c6e5d8b0bccdce1009e Mon Sep 17 00:00:00 2001 From: Caleb Stanford Date: Thu, 25 Jun 2020 18:41:29 -0400 Subject: [PATCH 17/51] dead state elimination skeleton code --- src/smt/seq_regex.h | 75 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) diff --git a/src/smt/seq_regex.h b/src/smt/seq_regex.h index 826727347a0..1b3eada1fc7 100644 --- a/src/smt/seq_regex.h +++ b/src/smt/seq_regex.h @@ -17,6 +17,8 @@ Module Name: #pragma once #include "util/scoped_vector.h" +#include "util/obj_ref_hashtable.h" +#include "util/union_find.h" #include "ast/seq_decl_plugin.h" #include "ast/rewriter/seq_rewriter.h" #include "smt/smt_context.h" @@ -27,6 +29,79 @@ namespace smt { class theory_seq; class seq_regex { + /* + Info saved about the set of states (regexes) seen so far + */ + class seen_states { + typedef expr state; + typedef obj_ref_map state_set; + typedef obj_ref_map edge_rel; + typedef basic_union_find state_union_find; + + private: + /* + All seen states are exactly one of: + - alive: known to be nonempty + - dead: known to be empty + - unknown: all outgoing transitions have been + seen, but the state is not known + to be alive or dead + - unvisited: not all outgoing transitions have + been seen + */ + state_set m_seen; + state_set m_alive; + state_set m_dead; + state_set m_unknown; + state_set m_unvisited; + + void mark_unknown(state s); // unvisited -> unknown + void mark_alive(state s); // unknown -> alive + void mark_dead(state s); // unknown -> dead + + /* + A graph of strongly connected + components is kept on unknown states + */ + state_union_find m_cnctd_cmpnts; + edge_rel m_from; + edge_rel m_to; + + void merge_states(state_set s); + + /* + Caching details + */ + unsigned m_max_cache_size { 10000 }; + expr_ref_vector m_trail; + + /* + Core cycle-detection routine + */ + // Heuristic + bool can_be_in_cycle(state s1, state s2); + // Full check + void find_cycle(state s1, state s2); + + public: + /* + Exposed methods: + - adding a state + - adding a transition from a state + - marking a state as visited (no more transitions) + - checking if a state is known to be alive or dead + */ + void add_state(state s); + void add_transition(state s1, state s2); + + bool is_alive(state s); + bool is_dead(state s); + }; + + /* + Struct representing data about a constraint of + the form (str.in_re s R) + */ struct s_in_re { literal m_lit; expr* m_s; From 4e5873eaf612b3fdc59e22872b965306f4a44bbf Mon Sep 17 00:00:00 2001 From: Caleb Stanford Date: Fri, 26 Jun 2020 22:25:00 -0400 Subject: [PATCH 18/51] progress on dead state elimination --- src/smt/seq_regex.cpp | 158 ++++++++++++++++++++++++++++++++++++++++++ src/smt/seq_regex.h | 106 ++++++++++++++++++---------- 2 files changed, 229 insertions(+), 35 deletions(-) diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp index 3cf4adab64a..ce9a7b5d90d 100644 --- a/src/smt/seq_regex.cpp +++ b/src/smt/seq_regex.cpp @@ -647,4 +647,162 @@ namespace smt { VERIFY(u().is_seq(seq_sort, elem_sort)); return sk().mk("re.first", n, a().mk_int(r->get_id()), elem_sort); } + + /**************************************************** + *** Dead state elimination and seen_states class *** + ****************************************************/ + + seq_regex::seen_states::state seq_regex::seen_states::get_state(expr* e) { + return m_state_ufind.find(e->get_id()); + } + + void seq_regex::seen_states::mark_unknown(state s) { + SASSERT(m_unvisited.contains(s)); + m_unvisited.remove(s); + m_unknown.insert(s); + } + void seq_regex::seen_states::mark_alive(state s) { + SASSERT(m_unknown.contains(s)); + m_unknown.remove(s); + m_alive.insert(s); + } + void seq_regex::seen_states::mark_dead(state s) { + SASSERT(m_unknown.contains(s)); + m_unknown.remove(s); + m_dead.insert(s); + } + + bool seq_regex::seen_states::is_resolved(state s) { + return (m_alive.contains(s) || m_dead.contains(s)); + } + bool seq_regex::seen_states::is_unresolved(state s) { + return (m_unknown.contains(s) || m_unvisited.contains(s)); + } + + /* + Merge two states or more generally a set of states into one, + returning the new state. + + Preconditions: the set should be nonempty, and every state + in the set should be unresolved. Also, each state should + be current (not a previous SCC that was later merged into another). + + Removes the old state from m_unknown or m_univisited, + but leaves it in m_seen. + */ + seq_regex::seen_states::state + seq_regex::seen_states::merge_states(state s1, state s2) { + SASSERT(is_unresolved(s1)); + SASSERT(is_unresolved(s2)); + SASSERT(m_state_ufind.is_root(s1)); + SASSERT(m_state_ufind.is_root(s2)); + m_state_ufind.merge(s1, s2); + if (m_state_ufind.is_root(s1)) std::swap(s1, s2); + // Remove old state s2 + if (m_unknown.contains(s2)) { + m_unknown.remove(s2); + } else { + m_unvisited.remove(s2); + } + return s1; + } + seq_regex::seen_states::state + seq_regex::seen_states::merge_states(state_set& s_set) { + SASSERT(s_set.num_elems() > 0); + state prev_s; + bool first_iter = true; + for (auto const& s: s_set) { + if (first_iter) { + prev_s = s; + first_iter = false; + } else { + prev_s = merge_states(prev_s, s); + } + } + return prev_s; + } + + bool seq_regex::seen_states::can_be_in_cycle(expr *e1, expr *e2) { + // Simple placeholder. TODO: Implement full check + return true; + } + void seq_regex::seen_states::find_and_merge_cycles(state s1, state s2) { + // Search backwards from s1 to see if (s1, s2) creates a cycle. + if (s1 == s2) return; + // TODO: Implement full check + // Simple placeholder for now: check if this is a loop or if there + // is an edge both ways + if (m_to.find(s2)->contains(s1)) { + merge_states(s1, s2); + } + } + + void seq_regex::seen_states::add_state(expr* e) { + unsigned id = e->get_id(); + if (m_seen.contains(id)) return; + if (m_seen.num_elems() >= m_max_size) { + STRACE("seq_regex", tout << "Warning: max size of seen states reached!" << std::endl;); + STRACE("seq_regex_brief", tout << "(MAX SIZE REACHED) ";); + return; + } + // Save e as expr_ref so it's not deleted + m_trail.push_back(e); + // Ensure corresponding var in connected components + while (id >= m_state_ufind.get_num_vars()) { + m_state_ufind.mk_var(); + } + // Initialize as unvisited + m_seen.insert(id); + m_unvisited.insert(id); + m_to.insert(id, new state_set()); + m_from_cycle.insert(id, new state_set()); + m_from_nocycle.insert(id, new state_set()); + } + void seq_regex::seen_states::add_transition(expr* e1, expr* e2) { + // Precondition: e1 and e2 already correspond to existing states + SASSERT(m_seen.contains(e1->get_id())); + SASSERT(m_seen.contains(e2->get_id())); + state s1 = get_state(e1); + state s2 = get_state(e2); + if (s1 == s2) { + return; + } + // TODO: + // If e1 is dead, assert e1 is marked dead + // If e1 is live, add edge and return + // If e2 is live, mark e1 live, propagate backwards + else if (!can_be_in_cycle(e1, e2)) { + // Don't need to check for cycles here + if (m_from_nocycle.find(s2)->contains(s1)) { + return; + } + else if (m_from_cycle.find(s2)->contains(s2)) { + // update edge label + m_from_cycle.find(s2)->remove(s2); + m_from_nocycle.find(s2)->insert(s1); + } + else { + // add edge + m_to.find(s1)->insert(s2); + m_from_nocycle.find(s2)->insert(s1); + } + } + else if (m_to.find(s1)->contains(s2)) { + return; + } + else { + // Need to check for cycles here + m_to.find(s1)->insert(s2); + m_from_cycle.find(s2)->insert(s1); + find_and_merge_cycles(s1, s2); + } + } + + bool seq_regex::seen_states::is_alive(expr* e) { + return m_alive.contains(get_state(e)); + } + bool seq_regex::seen_states::is_dead(expr* e) { + return m_dead.contains(get_state(e)); + } + } diff --git a/src/smt/seq_regex.h b/src/smt/seq_regex.h index 1b3eada1fc7..05e4ee5ca9e 100644 --- a/src/smt/seq_regex.h +++ b/src/smt/seq_regex.h @@ -17,7 +17,8 @@ Module Name: #pragma once #include "util/scoped_vector.h" -#include "util/obj_ref_hashtable.h" +#include "util/uint_set.h" +#include "util/uint_map.h" #include "util/union_find.h" #include "ast/seq_decl_plugin.h" #include "ast/rewriter/seq_rewriter.h" @@ -30,17 +31,29 @@ namespace smt { class seq_regex { /* - Info saved about the set of states (regexes) seen so far + seen_states + + Info saved about the set of states (regexes) seen so far. + + "States" here are strongly connected components -- states that + are mutually reachable from each other. States + are represented as unsigned integers. + + Used for the core incremental dead state elimination algorithm. + + Class invariants: + - TODO */ class seen_states { - typedef expr state; - typedef obj_ref_map state_set; - typedef obj_ref_map edge_rel; - typedef basic_union_find state_union_find; + typedef unsigned state; + typedef uint_set state_set; + typedef uint_map edge_rel; + typedef basic_union_find state_ufind; + // typedef uint_map exprs_of_state; private: /* - All seen states are exactly one of: + All states are exactly one of: - alive: known to be nonempty - dead: known to be empty - unknown: all outgoing transitions have been @@ -48,59 +61,77 @@ namespace smt { to be alive or dead - unvisited: not all outgoing transitions have been seen + + The set m_seen keeps all of these and in addition, + seen states that have been merged and no longer reprsent + a current SCC. */ - state_set m_seen; - state_set m_alive; - state_set m_dead; - state_set m_unknown; - state_set m_unvisited; + state_set m_seen; + state_set m_alive; + state_set m_dead; + state_set m_unknown; + state_set m_unvisited; void mark_unknown(state s); // unvisited -> unknown void mark_alive(state s); // unknown -> alive void mark_dead(state s); // unknown -> dead + bool is_resolved(state s); // alive or dead + bool is_unresolved(state s); // unknown or unvisited + /* - A graph of strongly connected - components is kept on unknown states + Initially a state is represented by an expression ID. + A union find data structure collapses an ID to a state. + + Edges are saved in both from and to maps. + Additionally edges from are divided into those possibly + in a cycle, and those not in a cycle. */ - state_union_find m_cnctd_cmpnts; - edge_rel m_from; - edge_rel m_to; + state_ufind m_state_ufind; + + state get_state(expr* e); + state merge_states(state s1, state s2); + state merge_states(state_set& s_set); - void merge_states(state_set s); + edge_rel m_from_cycle; + edge_rel m_from_nocycle; + edge_rel m_to; /* Caching details */ - unsigned m_max_cache_size { 10000 }; + unsigned m_max_size { 10000 }; expr_ref_vector m_trail; /* Core cycle-detection routine */ - // Heuristic - bool can_be_in_cycle(state s1, state s2); - // Full check - void find_cycle(state s1, state s2); + // Heuristic on syntactic expressions + bool can_be_in_cycle(expr* e1, expr* e2); + // Full check: if new edge (s1, s2) will create at least one cycle, + // merge all states in the new SCC + void find_and_merge_cycles(state s1, state s2); public: /* - Exposed methods: - - adding a state - - adding a transition from a state - - marking a state as visited (no more transitions) - - checking if a state is known to be alive or dead + Main exposed methods: + - adding a state + - adding a transition from a state + - checking if a state is known to be alive or dead */ - void add_state(state s); - void add_transition(state s1, state s2); - - bool is_alive(state s); - bool is_dead(state s); + void add_state(expr* e); + void add_transition(expr* e1, expr* e2); + bool is_alive(expr* e); + bool is_dead(expr* e); + + seen_states(ast_manager& m): + m_seen(), m_alive(), m_dead(), m_unknown(), m_unvisited(), + m_state_ufind(), m_from_cycle(), m_from_nocycle(), m_to(), + m_trail(m) {} }; /* - Struct representing data about a constraint of - the form (str.in_re s R) + Data about a constraint of the form (str.in_re s R) */ struct s_in_re { literal m_lit; @@ -111,6 +142,11 @@ namespace smt { m_lit(l), m_s(s), m_re(r), m_active(true) {} }; + /* + Data about a literal for the solver to propagate + The trigger guards whether the literal is ready + to be addressed yet -- see seq_regex::can_propagate + */ struct propagation_lit { literal m_lit; literal m_trigger; From 1f1f127bdf4a59711e252fba63593b9c9cc7f8c7 Mon Sep 17 00:00:00 2001 From: Caleb Stanford Date: Sat, 27 Jun 2020 18:57:40 -0400 Subject: [PATCH 19/51] more progress on dead state elimination --- src/smt/seq_regex.cpp | 302 ++++++++++++++++++++++++++++++------------ src/smt/seq_regex.h | 108 ++++++++------- 2 files changed, 278 insertions(+), 132 deletions(-) diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp index ce9a7b5d90d..43b510f08be 100644 --- a/src/smt/seq_regex.cpp +++ b/src/smt/seq_regex.cpp @@ -24,7 +24,8 @@ namespace smt { seq_regex::seq_regex(theory_seq& th): th(th), ctx(th.get_context()), - m(th.get_manager()) + m(th.get_manager()), + m_seen_states(m, *this) {} seq_util& seq_regex::u() { return th.m_util; } @@ -591,6 +592,32 @@ namespace smt { } } + void seq_regex::get_all_derivatives(expr* r, expr_ref_vector& results) { + // Use get_cofactors method and check which conds are + // satisfiable + // TODO + return; + // + // get_cofactors(d, cofactors); + // for (auto const& p : cofactors) { + // if (is_member(p.second, u)) + // continue; + // expr_ref cond(p.first, m); + // seq_rw().elim_condition(hd, cond); + // rewrite(cond); + // if (m.is_false(cond)) + // continue; + // lits.reset(); + // lits.push_back(~lit); + // if (!m.is_true(cond)) { + // expr_ref ncond(mk_not(m, cond), m); + // lits.push_back(th.mk_literal(mk_forall(m, hd, ncond))); + // } + // expr_ref is_empty1 = sk().mk_is_empty(p.second, re().mk_union(u, r), n); + // lits.push_back(th.mk_literal(is_empty1)); + // th.add_axiom(lits); + } + /* is_empty(r, u) => ~is_nullable(r) is_empty(r, u) => (forall x . ~cond(x)) or is_empty(r1, u union r) for (cond, r) in min-terms(D(x,r)) @@ -652,66 +679,145 @@ namespace smt { *** Dead state elimination and seen_states class *** ****************************************************/ - seq_regex::seen_states::state seq_regex::seen_states::get_state(expr* e) { - return m_state_ufind.find(e->get_id()); + void seq_regex::seen_states::add_state(state s) { + SASSERT(!m_seen.contains(s)); + // Ensure corresponding var in connected components + while (s >= m_state_ufind.get_num_vars()) { + m_state_ufind.mk_var(); + } + // Initialize as unvisited + m_seen.insert(s); + m_unvisited.insert(s); + m_to.insert(s, new state_set()); + m_from.insert(s, new state_set()); + m_from_maybecycle.insert(s, new state_set()); + } + void seq_regex::seen_states::remove_state(state s) { + // This is a partial deletion -- the state is still seen and can't be + // added again later + SASSERT(m_seen.contains(s)); + SASSERT(!m_state_ufind.is_root(s)); + m_to.erase(s); + m_from.erase(s); + m_from_maybecycle.erase(s); + if (m_unvisited.contains(s)) { + UNREACHABLE(); // for testing TODO: remove + m_unvisited.remove(s); + } + else if (m_unknown.contains(s)) { + m_unknown.remove(s); + } + else if (m_dead.contains(s)) { + UNREACHABLE(); // for testing TODO: remove + m_unknown.remove(s); + } + else if (m_live.contains(s)) { + UNREACHABLE(); // for testing TODO: remove + m_live.remove(s); + } + else { + UNREACHABLE(); + } } void seq_regex::seen_states::mark_unknown(state s) { + SASSERT(m_state_ufind.is_root(s)); SASSERT(m_unvisited.contains(s)); m_unvisited.remove(s); m_unknown.insert(s); } - void seq_regex::seen_states::mark_alive(state s) { + void seq_regex::seen_states::mark_live(state s) { + SASSERT(m_state_ufind.is_root(s)); SASSERT(m_unknown.contains(s)); m_unknown.remove(s); - m_alive.insert(s); + m_live.insert(s); } void seq_regex::seen_states::mark_dead(state s) { + SASSERT(m_state_ufind.is_root(s)); SASSERT(m_unknown.contains(s)); m_unknown.remove(s); m_dead.insert(s); } - bool seq_regex::seen_states::is_resolved(state s) { - return (m_alive.contains(s) || m_dead.contains(s)); + // bool seq_regex::seen_states::is_resolved(state s) { + // SASSERT(m_state_ufind.is_root(s)); + // return (m_live.contains(s) || m_dead.contains(s)); + // } + // bool seq_regex::seen_states::is_unresolved(state s) { + // SASSERT(m_state_ufind.is_root(s)); + // return (m_unknown.contains(s) || m_unvisited.contains(s)); + // } + + /* + Add edge to the graph + May already exist, in which case a nocycle edge overrides + a cycle edge. + */ + void seq_regex::seen_states::add_edge(state s1, state s2, + bool maybecycle) { + SASSERT(m_state_ufind.is_root(s1)); + SASSERT(m_state_ufind.is_root(s2)); + if (s1 == s2) return; + if (!m_to.find(s1)->contains(s2)) { + // add new edge + m_to.find(s1)->insert(s2); + m_from.find(s2)->insert(s1); + if (maybecycle) m_from_maybecycle.find(s2)->insert(s1); + } + else if (!maybecycle && m_from_maybecycle.find(s2)->contains(s1)) { + // update existing edge + m_from_maybecycle.find(s2)->remove(s1); + } + } + void seq_regex::seen_states::remove_edge(state s1, state s2) { + SASSERT(m_to.find(s1)->contains(s2)); + SASSERT(m_from.find(s2)->contains(s1)); + m_to.find(s1)->remove(s2); + m_from.find(s2)->remove(s1); + if (m_from_maybecycle.find(s2)->contains(s1)) { + m_from_maybecycle.find(s2)->remove(s1); + } } - bool seq_regex::seen_states::is_unresolved(state s) { - return (m_unknown.contains(s) || m_unvisited.contains(s)); + void seq_regex::seen_states::rename_edge(state old1, state old2, + state new1, state new2) { + SASSERT(m_to.find(old1)->contains(old2)); + SASSERT(m_from.find(old2)->contains(old1)); + bool maybecycle = m_from_maybecycle.find(old2)->contains(old1); + remove_edge(old1, old2); + add_edge(new1, new2, maybecycle); } /* Merge two states or more generally a set of states into one, - returning the new state. + returning the new state. Also merges associated edges. Preconditions: the set should be nonempty, and every state - in the set should be unresolved. Also, each state should + in the set should be unknown (in particular, *not* unvisited). + Also, each state should be current (not a previous SCC that was later merged into another). - - Removes the old state from m_unknown or m_univisited, - but leaves it in m_seen. */ - seq_regex::seen_states::state - seq_regex::seen_states::merge_states(state s1, state s2) { - SASSERT(is_unresolved(s1)); - SASSERT(is_unresolved(s2)); + auto seq_regex::seen_states::merge_states(state s1, state s2) -> state { SASSERT(m_state_ufind.is_root(s1)); SASSERT(m_state_ufind.is_root(s2)); + SASSERT(m_unknown.contains(s1)); + SASSERT(m_unknown.contains(s2)); m_state_ufind.merge(s1, s2); if (m_state_ufind.is_root(s1)) std::swap(s1, s2); - // Remove old state s2 - if (m_unknown.contains(s2)) { - m_unknown.remove(s2); - } else { - m_unvisited.remove(s2); + // merge edges + for (auto s_to: *m_to.find(s2)) { + rename_edge(s2, s_to, s1, s_to); + } + for (auto s_from: *m_from.find(s2)) { + rename_edge(s_from, s2, s_from, s1); } + remove_state(s2); return s1; } - seq_regex::seen_states::state - seq_regex::seen_states::merge_states(state_set& s_set) { + auto seq_regex::seen_states::merge_states(state_set& s_set) -> state { SASSERT(s_set.num_elems() > 0); state prev_s; bool first_iter = true; - for (auto const& s: s_set) { + for (auto s: s_set) { if (first_iter) { prev_s = s; first_iter = false; @@ -722,84 +828,106 @@ namespace smt { return prev_s; } - bool seq_regex::seen_states::can_be_in_cycle(expr *e1, expr *e2) { - // Simple placeholder. TODO: Implement full check - return true; + /* + if s is not live, mark it, and recurse on all states into s + Precondition: s is live or unknown + */ + void seq_regex::seen_states::mark_live_recursive(state s) { + SASSERT(m_live.contains(s) || m_unknown.contains(s)); + if (m_live.contains(s)) return; + mark_live(s); + for (auto s_from: *m_from.find(s)) { + mark_live_recursive(s_from); + } } - void seq_regex::seen_states::find_and_merge_cycles(state s1, state s2) { - // Search backwards from s1 to see if (s1, s2) creates a cycle. - if (s1 == s2) return; + + /* + check if s is now known to be dead. If so, mark and recurse + on all states into s. + Precondition: s is live, dead, or unknown + */ + void seq_regex::seen_states::mark_dead_recursive(state s) { + SASSERT(!m_unvisited.contains(s)); + if (!m_unknown.contains(s)) return; + for (auto s_to: *m_to.find(s)) { + // unknown pointing to live should have been marked as live + SASSERT(!m_live.contains(s_to)); + if (m_unknown.contains(s_to) || m_unvisited.contains(s_to)) return; + } + // all states from s are dead + mark_dead(s); + for (auto s_from: *m_from.find(s)) { + mark_dead_recursive(s_from); + } + } + + /* + if new edges from s1 to s_to will create at least one cycle, + merge all states in the new SCC + */ + auto seq_regex::seen_states::merge_all_cycles(state s1, state_set& s_to) + -> state { + // Mark s_to, then search backwards from s to mark the SCC // TODO: Implement full check - // Simple placeholder for now: check if this is a loop or if there - // is an edge both ways - if (m_to.find(s2)->contains(s1)) { - merge_states(s1, s2); + // Simple placeholder for now: check if there is an edge both ways + for (auto s2: s_to) { + if (m_to.find(s2)->contains(s1)) { + s1 = merge_states(s1, s2); + } } + return s1; } - void seq_regex::seen_states::add_state(expr* e) { - unsigned id = e->get_id(); - if (m_seen.contains(id)) return; - if (m_seen.num_elems() >= m_max_size) { + auto seq_regex::seen_states::get_state(expr* e) -> state { + return m_state_ufind.find(e->get_id()); + } + bool seq_regex::seen_states::can_be_in_cycle(expr *e1, expr *e2) { + // Simple placeholder. TODO: Implement full check + return true; + } + + void seq_regex::seen_states::add_state(expr* e, bool live) { + unsigned s = e->get_id(); + if (m_seen.contains(s)) return; + if (s >= m_max_size) { STRACE("seq_regex", tout << "Warning: max size of seen states reached!" << std::endl;); STRACE("seq_regex_brief", tout << "(MAX SIZE REACHED) ";); return; } - // Save e as expr_ref so it's not deleted + // Save e as expr_ref so it's not deallocated m_trail.push_back(e); - // Ensure corresponding var in connected components - while (id >= m_state_ufind.get_num_vars()) { - m_state_ufind.mk_var(); - } - // Initialize as unvisited - m_seen.insert(id); - m_unvisited.insert(id); - m_to.insert(id, new state_set()); - m_from_cycle.insert(id, new state_set()); - m_from_nocycle.insert(id, new state_set()); - } - void seq_regex::seen_states::add_transition(expr* e1, expr* e2) { - // Precondition: e1 and e2 already correspond to existing states + // Add state + add_state(s); + if (live) mark_live_recursive(s); + } + void seq_regex::seen_states::add_all_transitions(expr* e1) { + // Precondition: e already corresponds to an existing state SASSERT(m_seen.contains(e1->get_id())); - SASSERT(m_seen.contains(e2->get_id())); state s1 = get_state(e1); - state s2 = get_state(e2); - if (s1 == s2) { - return; + if (!m_unvisited.contains(s1)) return; + // Add edges to all derivatives + expr_ref_vector derivatives(m); + m_parent.get_all_derivatives(e1, derivatives); + mark_unknown(s1); + bool s1_live = false; + state_set s2_set = *(new state_set()); + for (auto const& e2: derivatives) { + state s2 = get_state(e2); + bool maybecycle = can_be_in_cycle(e1, e2); + add_edge(s1, s2, maybecycle); + if (m_live.contains(s2)) s1_live = true; } - // TODO: - // If e1 is dead, assert e1 is marked dead - // If e1 is live, add edge and return - // If e2 is live, mark e1 live, propagate backwards - else if (!can_be_in_cycle(e1, e2)) { - // Don't need to check for cycles here - if (m_from_nocycle.find(s2)->contains(s1)) { - return; - } - else if (m_from_cycle.find(s2)->contains(s2)) { - // update edge label - m_from_cycle.find(s2)->remove(s2); - m_from_nocycle.find(s2)->insert(s1); - } - else { - // add edge - m_to.find(s1)->insert(s2); - m_from_nocycle.find(s2)->insert(s1); - } - } - else if (m_to.find(s1)->contains(s2)) { + if (s1_live) { + mark_live_recursive(s1); return; } - else { - // Need to check for cycles here - m_to.find(s1)->insert(s2); - m_from_cycle.find(s2)->insert(s1); - find_and_merge_cycles(s1, s2); - } + s1 = merge_all_cycles(s1, s2_set); + // check if dead + mark_dead_recursive(s1); } - bool seq_regex::seen_states::is_alive(expr* e) { - return m_alive.contains(get_state(e)); + bool seq_regex::seen_states::is_live(expr* e) { + return m_live.contains(get_state(e)); } bool seq_regex::seen_states::is_dead(expr* e) { return m_dead.contains(get_state(e)); diff --git a/src/smt/seq_regex.h b/src/smt/seq_regex.h index 05e4ee5ca9e..4b1c4028095 100644 --- a/src/smt/seq_regex.h +++ b/src/smt/seq_regex.h @@ -52,51 +52,57 @@ namespace smt { // typedef uint_map exprs_of_state; private: + ast_manager& m; + seq_regex& m_parent; + /* All states are exactly one of: - - alive: known to be nonempty + - live: known to be nonempty - dead: known to be empty - unknown: all outgoing transitions have been - seen, but the state is not known - to be alive or dead - - unvisited: not all outgoing transitions have - been seen - - The set m_seen keeps all of these and in addition, - seen states that have been merged and no longer reprsent - a current SCC. + added, but the state is not known + to be live or dead + - unvisited: outgoing transitions have not been added + + As SCCs are merged, some states become aliases, and a + union find data structure collapses a now obsolete + state to its current representative. m_seen keeps track + of states we have seen, including obsolete states. */ - state_set m_seen; - state_set m_alive; + state_set m_live; state_set m_dead; state_set m_unknown; state_set m_unvisited; + state_set m_seen; + state_ufind m_state_ufind; + + void add_state(state s); // unvisited + seen + void remove_state(state s); // * -> m_seen only + void mark_unknown(state s); // unvisited -> unknown - void mark_alive(state s); // unknown -> alive + void mark_live(state s); // unknown -> live void mark_dead(state s); // unknown -> dead - bool is_resolved(state s); // alive or dead - bool is_unresolved(state s); // unknown or unvisited + // bool is_resolved(state s); // live or dead + // bool is_unresolved(state s); // unknown or unvisited /* - Initially a state is represented by an expression ID. - A union find data structure collapses an ID to a state. - Edges are saved in both from and to maps. - Additionally edges from are divided into those possibly - in a cycle, and those not in a cycle. + A subset of edges are also marked as possibly being + part of a cycle by being stored in m_from_maybecycle. */ - state_ufind m_state_ufind; + edge_rel m_from; + edge_rel m_to; + edge_rel m_from_maybecycle; + + void add_edge(state s1, state s2, bool maybecycle); + void remove_edge(state s1, state s2); + void rename_edge(state old1, state old2, state new1, state new2); - state get_state(expr* e); state merge_states(state s1, state s2); state merge_states(state_set& s_set); - edge_rel m_from_cycle; - edge_rel m_from_nocycle; - edge_rel m_to; - /* Caching details */ @@ -104,29 +110,39 @@ namespace smt { expr_ref_vector m_trail; /* - Core cycle-detection routine + Core algorithmic search routines + - live state propagation + - dead state propagation + - cycle detection + */ + void mark_live_recursive(state s); + void mark_dead_recursive(state s); + state merge_all_cycles(state s1, state_set& s_to); + + /* + Methods on original expressions (before they are turned + into states) */ - // Heuristic on syntactic expressions + // Convert expression to state + state get_state(expr* e); + // Cycle-detection heuristic (sound but not complete) bool can_be_in_cycle(expr* e1, expr* e2); - // Full check: if new edge (s1, s2) will create at least one cycle, - // merge all states in the new SCC - void find_and_merge_cycles(state s1, state s2); public: /* - Main exposed methods: - - adding a state - - adding a transition from a state - - checking if a state is known to be alive or dead + Exposed methods: + - adding a state and all its transitions + - checking if a state is known to be live or dead */ - void add_state(expr* e); - void add_transition(expr* e1, expr* e2); - bool is_alive(expr* e); + void add_state(expr* e, bool live); + void add_all_transitions(expr* e1); + bool is_live(expr* e); bool is_dead(expr* e); - seen_states(ast_manager& m): - m_seen(), m_alive(), m_dead(), m_unknown(), m_unvisited(), - m_state_ufind(), m_from_cycle(), m_from_nocycle(), m_to(), + seen_states(ast_manager& m, seq_regex& parent): + m(m), m_parent(parent), + m_live(), m_dead(), m_unknown(), m_unvisited(), m_seen(), + m_state_ufind(), m_from(), m_to(), m_from_maybecycle(), m_trail(m) {} }; @@ -155,11 +171,12 @@ namespace smt { propagation_lit(): m_lit(null_literal), m_trigger(null_literal) {} }; - theory_seq& th; - context& ctx; - ast_manager& m; - vector m_s_in_re; - scoped_vector m_to_propagate; + theory_seq& th; + context& ctx; + ast_manager& m; + vector m_s_in_re; + scoped_vector m_to_propagate; + seen_states m_seen_states; seq_util& u(); class seq_util::re& re(); @@ -194,6 +211,7 @@ namespace smt { expr_ref derivative_wrapper(expr* hd, expr* r); void get_cofactors(expr* r, expr_ref_vector& conds, expr_ref_pair_vector& result); + void get_all_derivatives(expr* r, expr_ref_vector& results); void get_cofactors(expr* r, expr_ref_pair_vector& result) { expr_ref_vector conds(m); From d96a274a79efe56f7cbe78b8717d1f54cf85a3ee Mon Sep 17 00:00:00 2001 From: Caleb Stanford Date: Sat, 27 Jun 2020 20:16:21 -0400 Subject: [PATCH 20/51] refactor dead state class to separate self-contained state_graph class --- src/smt/seq_regex.cpp | 46 ++++----- src/smt/seq_regex.h | 232 ++++++++++++++++++++++-------------------- 2 files changed, 142 insertions(+), 136 deletions(-) diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp index 43b510f08be..9e28acf4289 100644 --- a/src/smt/seq_regex.cpp +++ b/src/smt/seq_regex.cpp @@ -25,7 +25,7 @@ namespace smt { th(th), ctx(th.get_context()), m(th.get_manager()), - m_seen_states(m, *this) + m_state_graph(m, *this) {} seq_util& seq_regex::u() { return th.m_util; } @@ -676,10 +676,10 @@ namespace smt { } /**************************************************** - *** Dead state elimination and seen_states class *** + *** Dead state elimination and state_graph class *** ****************************************************/ - void seq_regex::seen_states::add_state(state s) { + void state_graph::add_state(state s) { SASSERT(!m_seen.contains(s)); // Ensure corresponding var in connected components while (s >= m_state_ufind.get_num_vars()) { @@ -692,7 +692,7 @@ namespace smt { m_from.insert(s, new state_set()); m_from_maybecycle.insert(s, new state_set()); } - void seq_regex::seen_states::remove_state(state s) { + void state_graph::remove_state(state s) { // This is a partial deletion -- the state is still seen and can't be // added again later SASSERT(m_seen.contains(s)); @@ -720,30 +720,30 @@ namespace smt { } } - void seq_regex::seen_states::mark_unknown(state s) { + void state_graph::mark_unknown(state s) { SASSERT(m_state_ufind.is_root(s)); SASSERT(m_unvisited.contains(s)); m_unvisited.remove(s); m_unknown.insert(s); } - void seq_regex::seen_states::mark_live(state s) { + void state_graph::mark_live(state s) { SASSERT(m_state_ufind.is_root(s)); SASSERT(m_unknown.contains(s)); m_unknown.remove(s); m_live.insert(s); } - void seq_regex::seen_states::mark_dead(state s) { + void state_graph::mark_dead(state s) { SASSERT(m_state_ufind.is_root(s)); SASSERT(m_unknown.contains(s)); m_unknown.remove(s); m_dead.insert(s); } - // bool seq_regex::seen_states::is_resolved(state s) { + // bool state_graph::is_resolved(state s) { // SASSERT(m_state_ufind.is_root(s)); // return (m_live.contains(s) || m_dead.contains(s)); // } - // bool seq_regex::seen_states::is_unresolved(state s) { + // bool state_graph::is_unresolved(state s) { // SASSERT(m_state_ufind.is_root(s)); // return (m_unknown.contains(s) || m_unvisited.contains(s)); // } @@ -753,7 +753,7 @@ namespace smt { May already exist, in which case a nocycle edge overrides a cycle edge. */ - void seq_regex::seen_states::add_edge(state s1, state s2, + void state_graph::add_edge(state s1, state s2, bool maybecycle) { SASSERT(m_state_ufind.is_root(s1)); SASSERT(m_state_ufind.is_root(s2)); @@ -769,7 +769,7 @@ namespace smt { m_from_maybecycle.find(s2)->remove(s1); } } - void seq_regex::seen_states::remove_edge(state s1, state s2) { + void state_graph::remove_edge(state s1, state s2) { SASSERT(m_to.find(s1)->contains(s2)); SASSERT(m_from.find(s2)->contains(s1)); m_to.find(s1)->remove(s2); @@ -778,7 +778,7 @@ namespace smt { m_from_maybecycle.find(s2)->remove(s1); } } - void seq_regex::seen_states::rename_edge(state old1, state old2, + void state_graph::rename_edge(state old1, state old2, state new1, state new2) { SASSERT(m_to.find(old1)->contains(old2)); SASSERT(m_from.find(old2)->contains(old1)); @@ -796,7 +796,7 @@ namespace smt { Also, each state should be current (not a previous SCC that was later merged into another). */ - auto seq_regex::seen_states::merge_states(state s1, state s2) -> state { + auto state_graph::merge_states(state s1, state s2) -> state { SASSERT(m_state_ufind.is_root(s1)); SASSERT(m_state_ufind.is_root(s2)); SASSERT(m_unknown.contains(s1)); @@ -813,7 +813,7 @@ namespace smt { remove_state(s2); return s1; } - auto seq_regex::seen_states::merge_states(state_set& s_set) -> state { + auto state_graph::merge_states(state_set& s_set) -> state { SASSERT(s_set.num_elems() > 0); state prev_s; bool first_iter = true; @@ -832,7 +832,7 @@ namespace smt { if s is not live, mark it, and recurse on all states into s Precondition: s is live or unknown */ - void seq_regex::seen_states::mark_live_recursive(state s) { + void state_graph::mark_live_recursive(state s) { SASSERT(m_live.contains(s) || m_unknown.contains(s)); if (m_live.contains(s)) return; mark_live(s); @@ -846,7 +846,7 @@ namespace smt { on all states into s. Precondition: s is live, dead, or unknown */ - void seq_regex::seen_states::mark_dead_recursive(state s) { + void state_graph::mark_dead_recursive(state s) { SASSERT(!m_unvisited.contains(s)); if (!m_unknown.contains(s)) return; for (auto s_to: *m_to.find(s)) { @@ -865,7 +865,7 @@ namespace smt { if new edges from s1 to s_to will create at least one cycle, merge all states in the new SCC */ - auto seq_regex::seen_states::merge_all_cycles(state s1, state_set& s_to) + auto state_graph::merge_all_cycles(state s1, state_set& s_to) -> state { // Mark s_to, then search backwards from s to mark the SCC // TODO: Implement full check @@ -878,15 +878,15 @@ namespace smt { return s1; } - auto seq_regex::seen_states::get_state(expr* e) -> state { + auto state_graph::get_state(expr* e) -> state { return m_state_ufind.find(e->get_id()); } - bool seq_regex::seen_states::can_be_in_cycle(expr *e1, expr *e2) { + bool state_graph::can_be_in_cycle(expr *e1, expr *e2) { // Simple placeholder. TODO: Implement full check return true; } - void seq_regex::seen_states::add_state(expr* e, bool live) { + void state_graph::add_state(expr* e, bool live) { unsigned s = e->get_id(); if (m_seen.contains(s)) return; if (s >= m_max_size) { @@ -900,7 +900,7 @@ namespace smt { add_state(s); if (live) mark_live_recursive(s); } - void seq_regex::seen_states::add_all_transitions(expr* e1) { + void state_graph::add_all_transitions(expr* e1) { // Precondition: e already corresponds to an existing state SASSERT(m_seen.contains(e1->get_id())); state s1 = get_state(e1); @@ -926,10 +926,10 @@ namespace smt { mark_dead_recursive(s1); } - bool seq_regex::seen_states::is_live(expr* e) { + bool state_graph::is_live(expr* e) { return m_live.contains(get_state(e)); } - bool seq_regex::seen_states::is_dead(expr* e) { + bool state_graph::is_dead(expr* e) { return m_dead.contains(get_state(e)); } diff --git a/src/smt/seq_regex.h b/src/smt/seq_regex.h index 4b1c4028095..ee809d65bda 100644 --- a/src/smt/seq_regex.h +++ b/src/smt/seq_regex.h @@ -29,123 +29,129 @@ namespace smt { class theory_seq; - class seq_regex { + class seq_regex; + + /* + state_graph + + Data structure which calculates live states and dead states. + + ---- + + Info saved about the set of states (regexes) seen so far. + + "States" here are strongly connected components -- states that + are mutually reachable from each other. States + are represented as unsigned integers. + + Used for the core incremental dead state elimination algorithm. + + Class invariants: + - TODO + */ + class state_graph { + typedef unsigned state; + typedef uint_set state_set; + typedef uint_map edge_rel; + typedef basic_union_find state_ufind; + // typedef uint_map exprs_of_state; + + private: + ast_manager& m; + seq_regex& m_parent; + /* - seen_states + All states are exactly one of: + - live: known to be nonempty + - dead: known to be empty + - unknown: all outgoing transitions have been + added, but the state is not known + to be live or dead + - unvisited: outgoing transitions have not been added + + As SCCs are merged, some states become aliases, and a + union find data structure collapses a now obsolete + state to its current representative. m_seen keeps track + of states we have seen, including obsolete states. + */ + state_set m_live; + state_set m_dead; + state_set m_unknown; + state_set m_unvisited; + + state_set m_seen; + state_ufind m_state_ufind; - Info saved about the set of states (regexes) seen so far. + void add_state(state s); // unvisited + seen + void remove_state(state s); // * -> m_seen only - "States" here are strongly connected components -- states that - are mutually reachable from each other. States - are represented as unsigned integers. + void mark_unknown(state s); // unvisited -> unknown + void mark_live(state s); // unknown -> live + void mark_dead(state s); // unknown -> dead - Used for the core incremental dead state elimination algorithm. + // bool is_resolved(state s); // live or dead + // bool is_unresolved(state s); // unknown or unvisited - Class invariants: - - TODO + /* + Edges are saved in both from and to maps. + A subset of edges are also marked as possibly being + part of a cycle by being stored in m_from_maybecycle. */ - class seen_states { - typedef unsigned state; - typedef uint_set state_set; - typedef uint_map edge_rel; - typedef basic_union_find state_ufind; - // typedef uint_map exprs_of_state; - - private: - ast_manager& m; - seq_regex& m_parent; - - /* - All states are exactly one of: - - live: known to be nonempty - - dead: known to be empty - - unknown: all outgoing transitions have been - added, but the state is not known - to be live or dead - - unvisited: outgoing transitions have not been added - - As SCCs are merged, some states become aliases, and a - union find data structure collapses a now obsolete - state to its current representative. m_seen keeps track - of states we have seen, including obsolete states. - */ - state_set m_live; - state_set m_dead; - state_set m_unknown; - state_set m_unvisited; - - state_set m_seen; - state_ufind m_state_ufind; - - void add_state(state s); // unvisited + seen - void remove_state(state s); // * -> m_seen only - - void mark_unknown(state s); // unvisited -> unknown - void mark_live(state s); // unknown -> live - void mark_dead(state s); // unknown -> dead - - // bool is_resolved(state s); // live or dead - // bool is_unresolved(state s); // unknown or unvisited - - /* - Edges are saved in both from and to maps. - A subset of edges are also marked as possibly being - part of a cycle by being stored in m_from_maybecycle. - */ - edge_rel m_from; - edge_rel m_to; - edge_rel m_from_maybecycle; - - void add_edge(state s1, state s2, bool maybecycle); - void remove_edge(state s1, state s2); - void rename_edge(state old1, state old2, state new1, state new2); - - state merge_states(state s1, state s2); - state merge_states(state_set& s_set); - - /* - Caching details - */ - unsigned m_max_size { 10000 }; - expr_ref_vector m_trail; - - /* - Core algorithmic search routines - - live state propagation - - dead state propagation - - cycle detection - */ - void mark_live_recursive(state s); - void mark_dead_recursive(state s); - state merge_all_cycles(state s1, state_set& s_to); - - /* - Methods on original expressions (before they are turned - into states) - */ - // Convert expression to state - state get_state(expr* e); - // Cycle-detection heuristic (sound but not complete) - bool can_be_in_cycle(expr* e1, expr* e2); - - public: - /* - Exposed methods: - - adding a state and all its transitions - - checking if a state is known to be live or dead - */ - void add_state(expr* e, bool live); - void add_all_transitions(expr* e1); - bool is_live(expr* e); - bool is_dead(expr* e); - - seen_states(ast_manager& m, seq_regex& parent): - m(m), m_parent(parent), - m_live(), m_dead(), m_unknown(), m_unvisited(), m_seen(), - m_state_ufind(), m_from(), m_to(), m_from_maybecycle(), - m_trail(m) {} - }; + edge_rel m_from; + edge_rel m_to; + edge_rel m_from_maybecycle; + + void add_edge(state s1, state s2, bool maybecycle); + void remove_edge(state s1, state s2); + void rename_edge(state old1, state old2, state new1, state new2); + + state merge_states(state s1, state s2); + state merge_states(state_set& s_set); + /* + Caching details + */ + unsigned m_max_size { 10000 }; + expr_ref_vector m_trail; + + /* + Core algorithmic search routines + - live state propagation + - dead state propagation + - cycle detection + */ + void mark_live_recursive(state s); + void mark_dead_recursive(state s); + state merge_all_cycles(state s1, state_set& s_to); + + /* + Methods on original expressions (before they are turned + into states) + */ + // Convert expression to state + state get_state(expr* e); + // Cycle-detection heuristic (sound but not complete) + bool can_be_in_cycle(expr* e1, expr* e2); + + public: + /* + Exposed methods: + - adding a state and all its transitions + - checking if a state is known to be live or dead + */ + void add_state(expr* e, bool live); + void add_all_transitions(expr* e1); + bool is_live(expr* e); + bool is_dead(expr* e); + + state_graph(ast_manager& m, seq_regex& parent): + m(m), m_parent(parent), + m_live(), m_dead(), m_unknown(), m_unvisited(), m_seen(), + m_state_ufind(), m_from(), m_to(), m_from_maybecycle(), + m_trail(m) {} + }; + + class seq_regex { /* Data about a constraint of the form (str.in_re s R) */ @@ -176,7 +182,7 @@ namespace smt { ast_manager& m; vector m_s_in_re; scoped_vector m_to_propagate; - seen_states m_seen_states; + state_graph m_state_graph; seq_util& u(); class seq_util::re& re(); @@ -211,7 +217,6 @@ namespace smt { expr_ref derivative_wrapper(expr* hd, expr* r); void get_cofactors(expr* r, expr_ref_vector& conds, expr_ref_pair_vector& result); - void get_all_derivatives(expr* r, expr_ref_vector& results); void get_cofactors(expr* r, expr_ref_pair_vector& result) { expr_ref_vector conds(m); @@ -220,6 +225,8 @@ namespace smt { public: + void get_all_derivatives(expr* r, expr_ref_vector& results); + seq_regex(theory_seq& th); void push_scope() { m_to_propagate.push_scope(); } @@ -247,4 +254,3 @@ namespace smt { }; }; - From 47f45faaee435d4a2a2a0c0c782ec3fcb31505d0 Mon Sep 17 00:00:00 2001 From: Caleb Stanford Date: Sat, 27 Jun 2020 21:19:46 -0400 Subject: [PATCH 21/51] finish factoring state_graph to only work with unsigned values, and implement separate functionality for expr* logic --- src/smt/seq_regex.cpp | 118 ++++++++++++++++++++++++------------------ src/smt/seq_regex.h | 88 ++++++++++++++++--------------- 2 files changed, 114 insertions(+), 92 deletions(-) diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp index 9e28acf4289..48d9961e193 100644 --- a/src/smt/seq_regex.cpp +++ b/src/smt/seq_regex.cpp @@ -25,7 +25,8 @@ namespace smt { th(th), ctx(th.get_context()), m(th.get_manager()), - m_state_graph(m, *this) + m_state_graph(), + m_state_trail(m) {} seq_util& seq_regex::u() { return th.m_util; } @@ -679,7 +680,7 @@ namespace smt { *** Dead state elimination and state_graph class *** ****************************************************/ - void state_graph::add_state(state s) { + void state_graph::add_state_core(state s) { SASSERT(!m_seen.contains(s)); // Ensure corresponding var in connected components while (s >= m_state_ufind.get_num_vars()) { @@ -753,8 +754,7 @@ namespace smt { May already exist, in which case a nocycle edge overrides a cycle edge. */ - void state_graph::add_edge(state s1, state s2, - bool maybecycle) { + void state_graph::add_edge_core(state s1, state s2, bool maybecycle) { SASSERT(m_state_ufind.is_root(s1)); SASSERT(m_state_ufind.is_root(s2)); if (s1 == s2) return; @@ -784,7 +784,7 @@ namespace smt { SASSERT(m_from.find(old2)->contains(old1)); bool maybecycle = m_from_maybecycle.find(old2)->contains(old1); remove_edge(old1, old2); - add_edge(new1, new2, maybecycle); + add_edge_core(new1, new2, maybecycle); } /* @@ -865,8 +865,7 @@ namespace smt { if new edges from s1 to s_to will create at least one cycle, merge all states in the new SCC */ - auto state_graph::merge_all_cycles(state s1, state_set& s_to) - -> state { + auto state_graph::merge_all_cycles(state s1, state_set& s_to) -> state { // Mark s_to, then search backwards from s to mark the SCC // TODO: Implement full check // Simple placeholder for now: check if there is an edge both ways @@ -878,59 +877,78 @@ namespace smt { return s1; } - auto state_graph::get_state(expr* e) -> state { - return m_state_ufind.find(e->get_id()); + void state_graph::add_state(state s, bool live) { + if (m_seen.contains(s)) return; + add_state_core(s); + if (live) mark_live_recursive(s); + } + void state_graph::add_edge(state s1, state s2, bool maybecycle) { + SASSERT(m_seen.contains(s1)); + SASSERT(m_seen.contains(s2)); + s1 = m_state_ufind.find(s1); + s2 = m_state_ufind.find(s1); + add_edge_core(s1, s2, maybecycle); + if (m_live.contains(s2)) { + if (m_unvisited.contains(s1)) mark_unknown(s1); + mark_live_recursive(s1); + } + } + void state_graph::done_adding(state s) { + s = m_state_ufind.find(s); + if (m_unvisited.contains(s)) mark_unknown(s); + s = merge_all_cycles(s, *m_to.find(s)); + // check if dead + mark_dead_recursive(s); + } + + unsigned state_graph::get_size() { + return m_state_ufind.get_num_vars(); + } + + bool state_graph::is_live(state s) { + return m_live.contains(m_state_ufind.find(s)); + } + bool state_graph::is_dead(state s) { + return m_dead.contains(m_state_ufind.find(s)); + } + + // ********************************** + + unsigned seq_regex::get_state_id(expr* e) { + return e->get_id(); } - bool state_graph::can_be_in_cycle(expr *e1, expr *e2) { + bool seq_regex::can_be_in_cycle(expr *e1, expr *e2) { // Simple placeholder. TODO: Implement full check return true; } - void state_graph::add_state(expr* e, bool live) { - unsigned s = e->get_id(); - if (m_seen.contains(s)) return; - if (s >= m_max_size) { - STRACE("seq_regex", tout << "Warning: max size of seen states reached!" << std::endl;); + /* + Update the state graph with expression r and all its derivatives. + */ + bool seq_regex::update_state_graph(expr* r) { + if (m_state_graph.get_size() >= m_max_state_graph_size) { + STRACE("seq_regex", tout << "Warning: ignored state graph update -- max size of seen states reached!" << std::endl;); STRACE("seq_regex_brief", tout << "(MAX SIZE REACHED) ";); - return; + return false; } // Save e as expr_ref so it's not deallocated - m_trail.push_back(e); - // Add state - add_state(s); - if (live) mark_live_recursive(s); - } - void state_graph::add_all_transitions(expr* e1) { - // Precondition: e already corresponds to an existing state - SASSERT(m_seen.contains(e1->get_id())); - state s1 = get_state(e1); - if (!m_unvisited.contains(s1)) return; + m_state_trail.push_back(r); + // Add state, live if nullable + unsigned r_id = get_state_id(r); + bool r_nullable = m.is_true(is_nullable_wrapper(r)); + m_state_graph.add_state(r_id, r_nullable); // Add edges to all derivatives expr_ref_vector derivatives(m); - m_parent.get_all_derivatives(e1, derivatives); - mark_unknown(s1); - bool s1_live = false; - state_set s2_set = *(new state_set()); - for (auto const& e2: derivatives) { - state s2 = get_state(e2); - bool maybecycle = can_be_in_cycle(e1, e2); - add_edge(s1, s2, maybecycle); - if (m_live.contains(s2)) s1_live = true; - } - if (s1_live) { - mark_live_recursive(s1); - return; - } - s1 = merge_all_cycles(s1, s2_set); - // check if dead - mark_dead_recursive(s1); - } - - bool state_graph::is_live(expr* e) { - return m_live.contains(get_state(e)); - } - bool state_graph::is_dead(expr* e) { - return m_dead.contains(get_state(e)); + get_all_derivatives(r, derivatives); + for (auto const& dr: derivatives) { + unsigned dr_id = get_state_id(dr); + bool dr_nullable = m.is_true(is_nullable_wrapper(dr)); + m_state_graph.add_state(dr_id, dr_nullable); + bool maybecycle = can_be_in_cycle(r, dr); + m_state_graph.add_edge(r_id, dr_id, maybecycle); + } + m_state_graph.done_adding(r_id); + return true; } } diff --git a/src/smt/seq_regex.h b/src/smt/seq_regex.h index ee809d65bda..84829f1c577 100644 --- a/src/smt/seq_regex.h +++ b/src/smt/seq_regex.h @@ -34,17 +34,21 @@ namespace smt { /* state_graph - Data structure which calculates live states and dead states. - - ---- - - Info saved about the set of states (regexes) seen so far. - - "States" here are strongly connected components -- states that - are mutually reachable from each other. States - are represented as unsigned integers. - - Used for the core incremental dead state elimination algorithm. + Data structure which is capable of incrementally tracking + live states and dead states. + + "States" are integers. States and edges are added to the data + structure incrementally. + - Some states are initially labeled as live. The data structure + tracks which other states are live (can reach a live state), dead + (can't reach a live state), or neither. + - Some edges are labeled as not contained in a cycle. This is to + optimize search if it is known by the user of the structure + that no cycle will ever contain this edge. + + Internally, we use union_find to identify states within an SCC, + and incrementally update SCCs, while propagating backwards + live and dead SCCs. Class invariants: - TODO @@ -54,12 +58,8 @@ namespace smt { typedef uint_set state_set; typedef uint_map edge_rel; typedef basic_union_find state_ufind; - // typedef uint_map exprs_of_state; private: - ast_manager& m; - seq_regex& m_parent; - /* All states are exactly one of: - live: known to be nonempty @@ -82,8 +82,8 @@ namespace smt { state_set m_seen; state_ufind m_state_ufind; - void add_state(state s); // unvisited + seen - void remove_state(state s); // * -> m_seen only + void add_state_core(state s); // unvisited + seen + void remove_state(state s); // * -> m_seen only void mark_unknown(state s); // unvisited -> unknown void mark_live(state s); // unknown -> live @@ -101,19 +101,13 @@ namespace smt { edge_rel m_to; edge_rel m_from_maybecycle; - void add_edge(state s1, state s2, bool maybecycle); + void add_edge_core(state s1, state s2, bool maybecycle); void remove_edge(state s1, state s2); void rename_edge(state old1, state old2, state new1, state new2); state merge_states(state s1, state s2); state merge_states(state_set& s_set); - /* - Caching details - */ - unsigned m_max_size { 10000 }; - expr_ref_vector m_trail; - /* Core algorithmic search routines - live state propagation @@ -124,31 +118,28 @@ namespace smt { void mark_dead_recursive(state s); state merge_all_cycles(state s1, state_set& s_to); - /* - Methods on original expressions (before they are turned - into states) - */ - // Convert expression to state - state get_state(expr* e); - // Cycle-detection heuristic (sound but not complete) - bool can_be_in_cycle(expr* e1, expr* e2); - public: /* Exposed methods: - adding a state and all its transitions - checking if a state is known to be live or dead + + ASSUMPTION: transitions from a state are added in order and after + all transitions are added, the state is marked as + finished. Also all states are added before the transitions. */ - void add_state(expr* e, bool live); - void add_all_transitions(expr* e1); - bool is_live(expr* e); - bool is_dead(expr* e); + void add_state(state s, bool live); + void add_edge(state s1, state s2, bool maybecycle); + void done_adding(state s); + unsigned get_size(); + + bool is_live(state s); + bool is_dead(state s); - state_graph(ast_manager& m, seq_regex& parent): - m(m), m_parent(parent), + state_graph(): m_live(), m_dead(), m_unknown(), m_unvisited(), m_seen(), - m_state_ufind(), m_from(), m_to(), m_from_maybecycle(), - m_trail(m) {} + m_state_ufind(), m_from(), m_to(), m_from_maybecycle() + {} }; class seq_regex { @@ -182,7 +173,20 @@ namespace smt { ast_manager& m; vector m_s_in_re; scoped_vector m_to_propagate; - state_graph m_state_graph; + + /* + state_graph for dead state detection, + and associated methods + */ + state_graph m_state_graph; + expr_ref_vector m_state_trail; + unsigned m_max_state_graph_size { 10000 }; + // Convert expression to state + unsigned get_state_id(expr* e); + // Cycle-detection heuristic (sound but not complete) + bool can_be_in_cycle(expr* e1, expr* e2); + // Update the graph + bool update_state_graph(expr* r); seq_util& u(); class seq_util::re& re(); From ae3a91a78ad0aa3de54fcf6664e4fd2c1fdbf378 Mon Sep 17 00:00:00 2001 From: Caleb Stanford Date: Sun, 28 Jun 2020 10:03:10 -0400 Subject: [PATCH 22/51] implement get_all_derivatives, add debug tracing --- src/smt/seq_regex.cpp | 88 ++++++++++++++++++++++++++++--------------- src/smt/seq_regex.h | 1 + 2 files changed, 59 insertions(+), 30 deletions(-) diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp index 48d9961e193..a78f64a6f89 100644 --- a/src/smt/seq_regex.cpp +++ b/src/smt/seq_regex.cpp @@ -209,10 +209,14 @@ namespace smt { << "," << r->get_id() << ") ";); - if (re().is_empty(r)) { + if (re().is_empty(r) + || m_state_graph.is_dead(get_state_id(r))) { th.add_axiom(~lit); return true; } + if (!m.is_ite(r) && is_ground(r)) { + update_state_graph(r); + } if (block_unfolding(lit, idx)) return true; @@ -594,29 +598,23 @@ namespace smt { } void seq_regex::get_all_derivatives(expr* r, expr_ref_vector& results) { - // Use get_cofactors method and check which conds are - // satisfiable - // TODO - return; - // - // get_cofactors(d, cofactors); - // for (auto const& p : cofactors) { - // if (is_member(p.second, u)) - // continue; - // expr_ref cond(p.first, m); - // seq_rw().elim_condition(hd, cond); - // rewrite(cond); - // if (m.is_false(cond)) - // continue; - // lits.reset(); - // lits.push_back(~lit); - // if (!m.is_true(cond)) { - // expr_ref ncond(mk_not(m, cond), m); - // lits.push_back(th.mk_literal(mk_forall(m, hd, ncond))); - // } - // expr_ref is_empty1 = sk().mk_is_empty(p.second, re().mk_union(u, r), n); - // lits.push_back(th.mk_literal(is_empty1)); - // th.add_axiom(lits); + // Get derivative + sort* seq_sort = nullptr; + VERIFY(u().is_re(r, seq_sort)); + expr_ref n(m.mk_fresh_const("re.char", seq_sort), m); + expr_ref hd = mk_first(r, n); + expr_ref d(m); + d = derivative_wrapper(hd, r); + // Use get_cofactors method and filter out unsatisfiable conds + expr_ref_pair_vector cofactors(m); + get_cofactors(d, cofactors); + for (auto const& p : cofactors) { + expr_ref cond(p.first, m); + seq_rw().elim_condition(hd, cond); + rewrite(cond); + if (m.is_false(cond)) continue; + results.push_back(p.second); + } } /* @@ -724,18 +722,21 @@ namespace smt { void state_graph::mark_unknown(state s) { SASSERT(m_state_ufind.is_root(s)); SASSERT(m_unvisited.contains(s)); + STRACE("seq_regex_brief", tout << "unk(" << s << ") ";); m_unvisited.remove(s); m_unknown.insert(s); } void state_graph::mark_live(state s) { SASSERT(m_state_ufind.is_root(s)); SASSERT(m_unknown.contains(s)); + STRACE("seq_regex_brief", tout << "live(" << s << ") ";); m_unknown.remove(s); m_live.insert(s); } void state_graph::mark_dead(state s) { SASSERT(m_state_ufind.is_root(s)); SASSERT(m_unknown.contains(s)); + STRACE("seq_regex_brief", tout << "dead(" << s << ") ";); m_unknown.remove(s); m_dead.insert(s); } @@ -757,15 +758,18 @@ namespace smt { void state_graph::add_edge_core(state s1, state s2, bool maybecycle) { SASSERT(m_state_ufind.is_root(s1)); SASSERT(m_state_ufind.is_root(s2)); + STRACE("seq_regex_brief", tout << std::endl << " DEBUG: add edge core " << s1 << "," << s2 << "," << maybecycle << " ";); if (s1 == s2) return; if (!m_to.find(s1)->contains(s2)) { // add new edge + STRACE("seq_regex_brief", tout << std::endl << " DEBUG: new edge! ";); m_to.find(s1)->insert(s2); m_from.find(s2)->insert(s1); if (maybecycle) m_from_maybecycle.find(s2)->insert(s1); } else if (!maybecycle && m_from_maybecycle.find(s2)->contains(s1)) { // update existing edge + STRACE("seq_regex_brief", tout << std::endl << " DEBUG: update edge! ";); m_from_maybecycle.find(s2)->remove(s1); } } @@ -801,6 +805,7 @@ namespace smt { SASSERT(m_state_ufind.is_root(s2)); SASSERT(m_unknown.contains(s1)); SASSERT(m_unknown.contains(s2)); + STRACE("seq_regex_brief", tout << "merge(" << s1 << "," << s2 << ") ";); m_state_ufind.merge(s1, s2); if (m_state_ufind.is_root(s1)) std::swap(s1, s2); // merge edges @@ -815,7 +820,7 @@ namespace smt { } auto state_graph::merge_states(state_set& s_set) -> state { SASSERT(s_set.num_elems() > 0); - state prev_s; + state prev_s = 0; // initialization here optional bool first_iter = true; for (auto s: s_set) { if (first_iter) { @@ -849,7 +854,9 @@ namespace smt { void state_graph::mark_dead_recursive(state s) { SASSERT(!m_unvisited.contains(s)); if (!m_unknown.contains(s)) return; + STRACE("seq_regex_brief", tout << std::endl << " DEBUG: mark dead recursive: " << s << " ";); for (auto s_to: *m_to.find(s)) { + STRACE("seq_regex_brief", tout << std::endl << " DEBUG: m_to searching: " << s_to << " ";); // unknown pointing to live should have been marked as live SASSERT(!m_live.contains(s_to)); if (m_unknown.contains(s_to) || m_unvisited.contains(s_to)) return; @@ -879,14 +886,20 @@ namespace smt { void state_graph::add_state(state s, bool live) { if (m_seen.contains(s)) return; + STRACE("seq_regex_brief", tout << "add(" << s << "," << live << ") ";); add_state_core(s); - if (live) mark_live_recursive(s); + if (live) { + mark_unknown(s); + mark_live_recursive(s); + } } void state_graph::add_edge(state s1, state s2, bool maybecycle) { SASSERT(m_seen.contains(s1)); SASSERT(m_seen.contains(s2)); + STRACE("seq_regex_brief", tout << "add(" << s1 << "," << s2 + << "," << maybecycle << ") ";); s1 = m_state_ufind.find(s1); - s2 = m_state_ufind.find(s1); + s2 = m_state_ufind.find(s2); add_edge_core(s1, s2, maybecycle); if (m_live.contains(s2)) { if (m_unvisited.contains(s1)) mark_unknown(s1); @@ -899,12 +912,16 @@ namespace smt { s = merge_all_cycles(s, *m_to.find(s)); // check if dead mark_dead_recursive(s); + STRACE("seq_regex_brief", tout << "done(" << s << ") ";); } unsigned state_graph::get_size() { return m_state_ufind.get_num_vars(); } + bool state_graph::is_seen(state s) { + return m_seen.contains(s); + } bool state_graph::is_live(state s) { return m_live.contains(m_state_ufind.find(s)); } @@ -926,28 +943,39 @@ namespace smt { Update the state graph with expression r and all its derivatives. */ bool seq_regex::update_state_graph(expr* r) { + unsigned r_id = get_state_id(r); + if (m_state_graph.is_seen(r_id)) return false; if (m_state_graph.get_size() >= m_max_state_graph_size) { STRACE("seq_regex", tout << "Warning: ignored state graph update -- max size of seen states reached!" << std::endl;); STRACE("seq_regex_brief", tout << "(MAX SIZE REACHED) ";); return false; } - // Save e as expr_ref so it's not deallocated + STRACE("seq_regex", tout << "Updating state graph for regex " + << mk_pp(r, m) << ") ";); + STRACE("seq_regex_brief", tout << std::endl + << "USG(" << r->get_id() << ") ";); + // Save r as expr_ref so it's not deallocated m_state_trail.push_back(r); // Add state, live if nullable - unsigned r_id = get_state_id(r); bool r_nullable = m.is_true(is_nullable_wrapper(r)); m_state_graph.add_state(r_id, r_nullable); // Add edges to all derivatives expr_ref_vector derivatives(m); + STRACE("seq_regex_brief", tout << std::endl << " DEBUG: getting all derivs: " << r_id << " ";); get_all_derivatives(r, derivatives); for (auto const& dr: derivatives) { unsigned dr_id = get_state_id(dr); - bool dr_nullable = m.is_true(is_nullable_wrapper(dr)); + STRACE("seq_regex_brief", tout << std::endl << " DEBUG: traversing deriv: " << dr_id << " ";); + expr_ref dr_n = is_nullable_wrapper(dr); + STRACE("seq_regex_brief", tout << "1... ";); + bool dr_nullable = m.is_true(dr_n); + STRACE("seq_regex_brief", tout << "2... ";); m_state_graph.add_state(dr_id, dr_nullable); bool maybecycle = can_be_in_cycle(r, dr); m_state_graph.add_edge(r_id, dr_id, maybecycle); } m_state_graph.done_adding(r_id); + STRACE("seq_regex_brief", tout << std::endl;); return true; } diff --git a/src/smt/seq_regex.h b/src/smt/seq_regex.h index 84829f1c577..c48437329e6 100644 --- a/src/smt/seq_regex.h +++ b/src/smt/seq_regex.h @@ -133,6 +133,7 @@ namespace smt { void done_adding(state s); unsigned get_size(); + bool is_seen(state s); bool is_live(state s); bool is_dead(state s); From 1295529553b8e125e2ab299b51d476efbb7673d3 Mon Sep 17 00:00:00 2001 From: Caleb Stanford Date: Sun, 28 Jun 2020 11:18:54 -0400 Subject: [PATCH 23/51] trace statements for debugging is_nullable loop bug --- src/ast/rewriter/seq_rewriter.cpp | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp index 253fb0ca050..47ebc856e81 100644 --- a/src/ast/rewriter/seq_rewriter.cpp +++ b/src/ast/rewriter/seq_rewriter.cpp @@ -2182,17 +2182,20 @@ expr_ref seq_rewriter::re_predicate(expr* cond, sort* seq_sort) { } expr_ref seq_rewriter::is_nullable(expr* r) { - // STRACE("seq_regex_brief", tout << "n";); + STRACE("seq_verbose", tout << "is_nullable: " + << mk_pp(r, m()) << std::endl;); expr_ref result(m_op_cache.find(_OP_RE_IS_NULLABLE, r, nullptr), m()); if (!result) { result = is_nullable_rec(r); - m_op_cache.insert(_OP_RE_IS_NULLABLE, r, nullptr, result); + m_op_cache.insert(_OP_RE_IS_NULLABLE, r, nullptr, result); } + STRACE("seq_verbose", tout << "is_nullable result: " + << mk_pp(result, m()) << std::endl;); return result; } expr_ref seq_rewriter::is_nullable_rec(expr* r) { - // STRACE("seq_regex_brief", tout << ".";); // recursive call + STRACE("seq_regex_brief", tout << ".";); // recursive call SASSERT(m_util.is_re(r) || m_util.is_seq(r)); expr* r1 = nullptr, *r2 = nullptr, *cond = nullptr; sort* seq_sort = nullptr; @@ -2367,12 +2370,16 @@ br_status seq_rewriter::mk_re_derivative(expr* ele, expr* r, expr_ref& result) { Duplicate nested conditions are eliminated. */ expr_ref seq_rewriter::mk_derivative(expr* ele, expr* r) { + STRACE("seq_verbose", tout << "derivative: " << mk_pp(ele, m()) + << "," << mk_pp(r, m()) << std::endl;); // STRACE("seq_regex_brief", tout << "d";); expr_ref result(m_op_cache.find(OP_RE_DERIVATIVE, ele, r), m()); if (!result) { result = mk_derivative_rec(ele, r); m_op_cache.insert(OP_RE_DERIVATIVE, ele, r, result); } + STRACE("seq_verbose", tout << "derivative result: " + << mk_pp(result, m()) << std::endl;); return result; } @@ -2904,6 +2911,9 @@ Disabled rewrite: */ br_status seq_rewriter::mk_str_in_regexp(expr* a, expr* b, expr_ref& result) { + STRACE("seq_verbose", tout << "mk_str_in_regexp: " << mk_pp(a, m()) + << ", " << mk_pp(b, m()) << std::endl;); + if (re().is_empty(b)) { result = m().mk_false(); return BR_DONE; @@ -2919,10 +2929,14 @@ br_status seq_rewriter::mk_str_in_regexp(expr* a, expr* b, expr_ref& result) { } if (str().is_empty(a)) { result = is_nullable(b); - if (str().is_in_re(result)) + if (str().is_in_re(result)) { + // STRACE("seq_regex_brief", tout << "mk_str_in_regexp: ...BR_DONE" << std::endl;); return BR_DONE; - else + } + else { + // STRACE("seq_regex_brief", tout << "mk_str_in_regexp: ...BR_REWRITE_FULL" << std::endl;); return BR_REWRITE_FULL; + } } expr_ref hd(m()), tl(m()); From 6d4008c3f5f55b65e4402d4552ca007f58e03103 Mon Sep 17 00:00:00 2001 From: Caleb Stanford Date: Sun, 28 Jun 2020 12:31:17 -0400 Subject: [PATCH 24/51] fix is_nullable loop bug --- src/ast/rewriter/seq_rewriter.cpp | 51 ++++++++++++++++++++++++++----- src/ast/rewriter/seq_rewriter.h | 3 ++ 2 files changed, 47 insertions(+), 7 deletions(-) diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp index 47ebc856e81..6c4ccb8ef5b 100644 --- a/src/ast/rewriter/seq_rewriter.cpp +++ b/src/ast/rewriter/seq_rewriter.cpp @@ -2194,8 +2194,45 @@ expr_ref seq_rewriter::is_nullable(expr* r) { return result; } +void seq_rewriter::mk_nullable_not(expr* a1, expr_ref& result) { + expr *s1 = nullptr, *r1 = nullptr; + if (str().is_in_re(a1, s1, r1)) { + SASSERT(str().is_empty(s1)); + result = re().mk_complement(r1); + result = re().mk_in_re(s1, result); + } + else { + m_br.mk_not(a1, result); + } +} +void seq_rewriter::mk_nullable_and(expr* a1, expr* a2, expr_ref& result) { + expr *s1 = nullptr, *s2 = nullptr, *r1 = nullptr, *r2 = nullptr; + if (str().is_in_re(a1, s1, r1) && + str().is_in_re(a2, s2, r2)) { + SASSERT(str().is_empty(s1)); + SASSERT(str().is_empty(s2)); + result = re().mk_inter(r1, r2); + result = re().mk_in_re(s1, result); + } + else { + m_br.mk_and(a1, a2, result); + } +} +void seq_rewriter::mk_nullable_or(expr* a1, expr* a2, expr_ref& result) { + expr *s1 = nullptr, *s2 = nullptr, *r1 = nullptr, *r2 = nullptr; + if (str().is_in_re(a1, s1, r1) && + str().is_in_re(a2, s2, r2)) { + SASSERT(str().is_empty(s1)); + SASSERT(str().is_empty(s2)); + result = re().mk_union(r1, r2); + result = re().mk_in_re(s1, result); + } + else { + m_br.mk_or(a1, a2, result); + } +} expr_ref seq_rewriter::is_nullable_rec(expr* r) { - STRACE("seq_regex_brief", tout << ".";); // recursive call + // STRACE("seq_regex_brief", tout << ".";); // recursive call SASSERT(m_util.is_re(r) || m_util.is_seq(r)); expr* r1 = nullptr, *r2 = nullptr, *cond = nullptr; sort* seq_sort = nullptr; @@ -2203,15 +2240,15 @@ expr_ref seq_rewriter::is_nullable_rec(expr* r) { zstring s1; expr_ref result(m()); if (re().is_concat(r, r1, r2) || - re().is_intersection(r, r1, r2)) { - m_br.mk_and(is_nullable(r1), is_nullable(r2), result); + re().is_intersection(r, r1, r2)) { + mk_nullable_and(is_nullable(r1), is_nullable(r2), result); } else if (re().is_union(r, r1, r2)) { - m_br.mk_or(is_nullable(r1), is_nullable(r2), result); + mk_nullable_or(is_nullable(r1), is_nullable(r2), result); } else if (re().is_diff(r, r1, r2)) { - m_br.mk_not(is_nullable(r2), result); - m_br.mk_and(result, is_nullable(r1), result); + mk_nullable_not(is_nullable(r2), result); + mk_nullable_and(result, is_nullable(r1), result); } else if (re().is_star(r) || re().is_opt(r) || @@ -2233,7 +2270,7 @@ expr_ref seq_rewriter::is_nullable_rec(expr* r) { result = is_nullable(r1); } else if (re().is_complement(r, r1)) { - m_br.mk_not(is_nullable(r1), result); + mk_nullable_not(is_nullable(r1), result); } else if (re().is_to_re(r, r1)) { result = is_nullable(r1); diff --git a/src/ast/rewriter/seq_rewriter.h b/src/ast/rewriter/seq_rewriter.h index 1ac8d0157cd..c7c01eeacf0 100644 --- a/src/ast/rewriter/seq_rewriter.h +++ b/src/ast/rewriter/seq_rewriter.h @@ -191,6 +191,9 @@ class seq_rewriter { // Calculate derivative, memoized and enforcing a normal form expr_ref is_nullable_rec(expr* r); + void mk_nullable_not(expr* a1, expr_ref& result); + void mk_nullable_and(expr* a1, expr* a2, expr_ref& result); + void mk_nullable_or(expr* a1, expr* a2, expr_ref& result); expr_ref mk_derivative_rec(expr* ele, expr* r); expr_ref mk_der_op(decl_kind k, expr* a, expr* b); expr_ref mk_der_op_rec(decl_kind k, expr* a, expr* b); From d2cfb2a61294e9eac9ab951a6d0b21ffc0b2d831 Mon Sep 17 00:00:00 2001 From: Caleb Stanford Date: Sun, 28 Jun 2020 15:03:51 -0400 Subject: [PATCH 25/51] comment out local nullable change and mark experimental --- src/ast/rewriter/seq_rewriter.cpp | 107 ++++++++++++++++-------------- src/ast/rewriter/seq_rewriter.h | 7 +- 2 files changed, 63 insertions(+), 51 deletions(-) diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp index 4af19969ab0..45f6cf5489c 100644 --- a/src/ast/rewriter/seq_rewriter.cpp +++ b/src/ast/rewriter/seq_rewriter.cpp @@ -2192,43 +2192,44 @@ expr_ref seq_rewriter::is_nullable(expr* r) { return result; } -void seq_rewriter::mk_nullable_not(expr* a1, expr_ref& result) { - expr *s1 = nullptr, *r1 = nullptr; - if (str().is_in_re(a1, s1, r1)) { - SASSERT(str().is_empty(s1)); - result = re().mk_complement(r1); - result = re().mk_in_re(s1, result); - } - else { - m_br.mk_not(a1, result); - } -} -void seq_rewriter::mk_nullable_and(expr* a1, expr* a2, expr_ref& result) { - expr *s1 = nullptr, *s2 = nullptr, *r1 = nullptr, *r2 = nullptr; - if (str().is_in_re(a1, s1, r1) && - str().is_in_re(a2, s2, r2)) { - SASSERT(str().is_empty(s1)); - SASSERT(str().is_empty(s2)); - result = re().mk_inter(r1, r2); - result = re().mk_in_re(s1, result); - } - else { - m_br.mk_and(a1, a2, result); - } -} -void seq_rewriter::mk_nullable_or(expr* a1, expr* a2, expr_ref& result) { - expr *s1 = nullptr, *s2 = nullptr, *r1 = nullptr, *r2 = nullptr; - if (str().is_in_re(a1, s1, r1) && - str().is_in_re(a2, s2, r2)) { - SASSERT(str().is_empty(s1)); - SASSERT(str().is_empty(s2)); - result = re().mk_union(r1, r2); - result = re().mk_in_re(s1, result); - } - else { - m_br.mk_or(a1, a2, result); - } -} +// @EXP (experimental change) +// void seq_rewriter::mk_nullable_not(expr* a1, expr_ref& result) { +// expr *s1 = nullptr, *r1 = nullptr; +// if (str().is_in_re(a1, s1, r1)) { +// SASSERT(str().is_empty(s1)); +// result = re().mk_complement(r1); +// result = re().mk_in_re(s1, result); +// } +// else { +// m_br.mk_not(a1, result); +// } +// } +// void seq_rewriter::mk_nullable_and(expr* a1, expr* a2, expr_ref& result) { +// expr *s1 = nullptr, *s2 = nullptr, *r1 = nullptr, *r2 = nullptr; +// if (str().is_in_re(a1, s1, r1) && +// str().is_in_re(a2, s2, r2)) { +// SASSERT(str().is_empty(s1)); +// SASSERT(str().is_empty(s2)); +// result = re().mk_inter(r1, r2); +// result = re().mk_in_re(s1, result); +// } +// else { +// m_br.mk_and(a1, a2, result); +// } +// } +// void seq_rewriter::mk_nullable_or(expr* a1, expr* a2, expr_ref& result) { +// expr *s1 = nullptr, *s2 = nullptr, *r1 = nullptr, *r2 = nullptr; +// if (str().is_in_re(a1, s1, r1) && +// str().is_in_re(a2, s2, r2)) { +// SASSERT(str().is_empty(s1)); +// SASSERT(str().is_empty(s2)); +// result = re().mk_union(r1, r2); +// result = re().mk_in_re(s1, result); +// } +// else { +// m_br.mk_or(a1, a2, result); +// } +// } expr_ref seq_rewriter::is_nullable_rec(expr* r) { // STRACE("seq_regex_brief", tout << ".";); // recursive call SASSERT(m_util.is_re(r) || m_util.is_seq(r)); @@ -2239,14 +2240,21 @@ expr_ref seq_rewriter::is_nullable_rec(expr* r) { expr_ref result(m()); if (re().is_concat(r, r1, r2) || re().is_intersection(r, r1, r2)) { - mk_nullable_and(is_nullable(r1), is_nullable(r2), result); + m_br.mk_and(is_nullable(r1), is_nullable(r2), result); + // @EXP (experimental change) + // mk_nullable_and(is_nullable(r1), is_nullable(r2), result); } else if (re().is_union(r, r1, r2)) { - mk_nullable_or(is_nullable(r1), is_nullable(r2), result); + m_br.mk_or(is_nullable(r1), is_nullable(r2), result); + // @EXP (experimental change) + // mk_nullable_or(is_nullable(r1), is_nullable(r2), result); } else if (re().is_diff(r, r1, r2)) { - mk_nullable_not(is_nullable(r2), result); - mk_nullable_and(result, is_nullable(r1), result); + m_br.mk_not(is_nullable(r2), result); + m_br.mk_and(result, is_nullable(r1), result); + // @EXP (experimental change) + // mk_nullable_not(is_nullable(r2), result); + // mk_nullable_and(result, is_nullable(r1), result); } else if (re().is_star(r) || re().is_opt(r) || @@ -2268,7 +2276,9 @@ expr_ref seq_rewriter::is_nullable_rec(expr* r) { result = is_nullable(r1); } else if (re().is_complement(r, r1)) { - mk_nullable_not(is_nullable(r1), result); + m_br.mk_not(is_nullable(r1), result); + // @EXP (experimental change) + // mk_nullable_not(is_nullable(r1), result); } else if (re().is_to_re(r, r1)) { result = is_nullable(r1); @@ -2493,7 +2503,8 @@ bool seq_rewriter::pred_implies(expr* a, expr* b) { Apply a binary operation, preserving BDD normal form on derivative expressions. Preconditions: - - k is a binary op codes on REs: one of concat, intersection, or union + - k is a binary op code on REs: one of concat, intersection, or union + (not difference) - a and b are in BDD form Postcondition: @@ -2664,7 +2675,7 @@ expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) { return mk_der_inter(mk_derivative(ele, r1), mk_der_compl(mk_derivative(ele, r2))); } else if (m().is_ite(r, p, r1, r2)) { - // Note: there is no BDD normalization here + // there is no BDD normalization here result = m().mk_ite(p, mk_derivative(ele, r1), mk_derivative(ele, r2)); return result; } @@ -2776,10 +2787,10 @@ expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) { result = array.mk_select(2, args); return re_predicate(result, seq_sort); } - // stuck cases: is_derivative, variable, - // str.to_re if it can't be simplified into a head character and tail - // and re().is_reverse if the reverse is not applied to a string thta - // can be coerced into a tail character and a head + // stuck cases: re.derivative, variable, + // str.to_re if the head of the string can't be obtained, + // and re.reverse if not applied to a string or if the tail char + // of the string can't be obtained return expr_ref(re().mk_derivative(ele, r), m()); } diff --git a/src/ast/rewriter/seq_rewriter.h b/src/ast/rewriter/seq_rewriter.h index c7c01eeacf0..a82e7a6ba65 100644 --- a/src/ast/rewriter/seq_rewriter.h +++ b/src/ast/rewriter/seq_rewriter.h @@ -191,9 +191,10 @@ class seq_rewriter { // Calculate derivative, memoized and enforcing a normal form expr_ref is_nullable_rec(expr* r); - void mk_nullable_not(expr* a1, expr_ref& result); - void mk_nullable_and(expr* a1, expr* a2, expr_ref& result); - void mk_nullable_or(expr* a1, expr* a2, expr_ref& result); + // @EXP (experimental change) + // void mk_nullable_not(expr* a1, expr_ref& result); + // void mk_nullable_and(expr* a1, expr* a2, expr_ref& result); + // void mk_nullable_or(expr* a1, expr* a2, expr_ref& result); expr_ref mk_derivative_rec(expr* ele, expr* r); expr_ref mk_der_op(decl_kind k, expr* a, expr* b); expr_ref mk_der_op_rec(decl_kind k, expr* a, expr* b); From 336d6c8444ffe66090b17e467565de756cef0168 Mon Sep 17 00:00:00 2001 From: Caleb Stanford Date: Sun, 28 Jun 2020 15:19:35 -0400 Subject: [PATCH 26/51] pretty printing for state_graph --- src/smt/seq_regex.cpp | 51 +++++++++++++++++++++++++++++++++++-------- src/smt/seq_regex.h | 18 +++++++++++---- 2 files changed, 56 insertions(+), 13 deletions(-) diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp index a78f64a6f89..6eda9612564 100644 --- a/src/smt/seq_regex.cpp +++ b/src/smt/seq_regex.cpp @@ -613,6 +613,7 @@ namespace smt { seq_rw().elim_condition(hd, cond); rewrite(cond); if (m.is_false(cond)) continue; + if (re().is_empty(p.second)) continue; results.push_back(p.second); } } @@ -758,18 +759,18 @@ namespace smt { void state_graph::add_edge_core(state s1, state s2, bool maybecycle) { SASSERT(m_state_ufind.is_root(s1)); SASSERT(m_state_ufind.is_root(s2)); - STRACE("seq_regex_brief", tout << std::endl << " DEBUG: add edge core " << s1 << "," << s2 << "," << maybecycle << " ";); + STRACE("seq_regex_debug", tout << std::endl << " DEBUG: add edge core " << s1 << "," << s2 << "," << maybecycle << " ";); if (s1 == s2) return; if (!m_to.find(s1)->contains(s2)) { // add new edge - STRACE("seq_regex_brief", tout << std::endl << " DEBUG: new edge! ";); + STRACE("seq_regex_debug", tout << std::endl << " DEBUG: new edge! ";); m_to.find(s1)->insert(s2); m_from.find(s2)->insert(s1); if (maybecycle) m_from_maybecycle.find(s2)->insert(s1); } else if (!maybecycle && m_from_maybecycle.find(s2)->contains(s1)) { // update existing edge - STRACE("seq_regex_brief", tout << std::endl << " DEBUG: update edge! ";); + STRACE("seq_regex_debug", tout << std::endl << " DEBUG: update edge! ";); m_from_maybecycle.find(s2)->remove(s1); } } @@ -854,9 +855,9 @@ namespace smt { void state_graph::mark_dead_recursive(state s) { SASSERT(!m_unvisited.contains(s)); if (!m_unknown.contains(s)) return; - STRACE("seq_regex_brief", tout << std::endl << " DEBUG: mark dead recursive: " << s << " ";); + STRACE("seq_regex_debug", tout << std::endl << " DEBUG: mark dead recursive: " << s << " ";); for (auto s_to: *m_to.find(s)) { - STRACE("seq_regex_brief", tout << std::endl << " DEBUG: m_to searching: " << s_to << " ";); + STRACE("seq_regex_debug", tout << std::endl << " DEBUG: m_to searching: " << s_to << " ";); // unknown pointing to live should have been marked as live SASSERT(!m_live.contains(s_to)); if (m_unknown.contains(s_to) || m_unvisited.contains(s_to)) return; @@ -929,6 +930,39 @@ namespace smt { return m_dead.contains(m_state_ufind.find(s)); } + // void pretty_print_set(std::ofstream& of, state_set& s_set) { + // for (auto s: s_set) { + // of << " " << s; + // } + // of << std::endl; + // } + void state_graph::pretty_print(std::ofstream& of) { + of << "---------- State Graph ----------" << std::endl; + of << "Seen:"; + for (auto s: m_seen) { + of << " " << s; + state s_root = m_state_ufind.find(s); + if (s_root != s) + of << "(=" << s_root << ")"; + } + of << std::endl; + + of << "Live:" << m_live << std::endl; + of << "Dead:" << m_dead << std::endl; + of << "Unknown:" << m_unknown << std::endl; + of << "Unvisited:" << m_unvisited << std::endl; + + of << "Edges:" << std::endl; + for (auto s1: m_seen) { + if (m_state_ufind.is_root(s1)) { + of << " " << s1 << " -> " << *m_to.find(s1) << std::endl; + } + } + + of << "---------------------------------" << std::endl; + + } + // ********************************** unsigned seq_regex::get_state_id(expr* e) { @@ -961,21 +995,20 @@ namespace smt { m_state_graph.add_state(r_id, r_nullable); // Add edges to all derivatives expr_ref_vector derivatives(m); - STRACE("seq_regex_brief", tout << std::endl << " DEBUG: getting all derivs: " << r_id << " ";); + STRACE("seq_regex_debug", tout << std::endl << " DEBUG: getting all derivs: " << r_id << " ";); get_all_derivatives(r, derivatives); for (auto const& dr: derivatives) { unsigned dr_id = get_state_id(dr); - STRACE("seq_regex_brief", tout << std::endl << " DEBUG: traversing deriv: " << dr_id << " ";); + STRACE("seq_regex_debug", tout << std::endl << " DEBUG: traversing deriv: " << dr_id << " ";); expr_ref dr_n = is_nullable_wrapper(dr); - STRACE("seq_regex_brief", tout << "1... ";); bool dr_nullable = m.is_true(dr_n); - STRACE("seq_regex_brief", tout << "2... ";); m_state_graph.add_state(dr_id, dr_nullable); bool maybecycle = can_be_in_cycle(r, dr); m_state_graph.add_edge(r_id, dr_id, maybecycle); } m_state_graph.done_adding(r_id); STRACE("seq_regex_brief", tout << std::endl;); + STRACE("seq_regex_brief", m_state_graph.pretty_print(tout);); return true; } diff --git a/src/smt/seq_regex.h b/src/smt/seq_regex.h index c48437329e6..3911e228b50 100644 --- a/src/smt/seq_regex.h +++ b/src/smt/seq_regex.h @@ -118,7 +118,16 @@ namespace smt { void mark_dead_recursive(state s); state merge_all_cycles(state s1, state_set& s_to); + /* + Pretty printing support + */ + // void pretty_print_set(std::ofstream& of, state_set& s_set); + public: + state_graph(): + m_live(), m_dead(), m_unknown(), m_unvisited(), m_seen(), + m_state_ufind(), m_from(), m_to(), m_from_maybecycle() {} + /* Exposed methods: - adding a state and all its transitions @@ -137,10 +146,11 @@ namespace smt { bool is_live(state s); bool is_dead(state s); - state_graph(): - m_live(), m_dead(), m_unknown(), m_unvisited(), m_seen(), - m_state_ufind(), m_from(), m_to(), m_from_maybecycle() - {} + /* + Pretty printing + */ + void pretty_print(std::ofstream& of); + }; class seq_regex { From 005432650e37246233c116485478e61fe8b85856 Mon Sep 17 00:00:00 2001 From: Caleb Stanford Date: Mon, 29 Jun 2020 18:33:07 -0400 Subject: [PATCH 27/51] rewrite state graph to remove the fragile assumption that all edges from a state are added at a time --- src/smt/seq_regex.cpp | 219 +++++++++++++++++++++--------------------- src/smt/seq_regex.h | 92 +++++++++--------- 2 files changed, 156 insertions(+), 155 deletions(-) diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp index 6eda9612564..bf034f6764c 100644 --- a/src/smt/seq_regex.cpp +++ b/src/smt/seq_regex.cpp @@ -608,12 +608,20 @@ namespace smt { // Use get_cofactors method and filter out unsatisfiable conds expr_ref_pair_vector cofactors(m); get_cofactors(d, cofactors); + STRACE("seq_regex_debug", tout << "getting all derivatives of: " << mk_pp(r, m) << std::endl;); for (auto const& p : cofactors) { + STRACE("seq_regex_debug", tout << "visiting cofactor: cond: " << mk_pp(p.first, m) << ", deriv: " << mk_pp(p.second, m) << std::endl;); expr_ref cond(p.first, m); + STRACE("seq_regex_debug", tout << "head: " << mk_pp(hd, m) << std::endl;); + STRACE("seq_regex_debug", tout << "cond: " << mk_pp(cond, m) << std::endl;); seq_rw().elim_condition(hd, cond); + STRACE("seq_regex_debug", tout << "cond: " << mk_pp(cond, m) << std::endl;); rewrite(cond); + STRACE("seq_regex_debug", tout << "cond: " << mk_pp(cond, m) << std::endl;); if (m.is_false(cond)) continue; + STRACE("seq_regex_debug", tout << "cofactor labeled true!" << std::endl;); if (re().is_empty(p.second)) continue; + STRACE("seq_regex_debug", tout << "added derivative!" << std::endl;); results.push_back(p.second); } } @@ -680,86 +688,68 @@ namespace smt { ****************************************************/ void state_graph::add_state_core(state s) { + STRACE("seq_regex_brief", tout << "add(" << s << ") ";); SASSERT(!m_seen.contains(s)); - // Ensure corresponding var in connected components + // Ensure corresponding var in union find structure while (s >= m_state_ufind.get_num_vars()) { m_state_ufind.mk_var(); } // Initialize as unvisited m_seen.insert(s); - m_unvisited.insert(s); + m_unexplored.insert(s); m_to.insert(s, new state_set()); m_from.insert(s, new state_set()); m_from_maybecycle.insert(s, new state_set()); } - void state_graph::remove_state(state s) { + void state_graph::remove_state_core(state s) { // This is a partial deletion -- the state is still seen and can't be - // added again later + // added again later. + // The state should be unknown, and all edges to or from the state + // should already have been renamed. + STRACE("seq_regex_brief", tout << "del(" << s << ") ";); SASSERT(m_seen.contains(s)); SASSERT(!m_state_ufind.is_root(s)); + SASSERT(m_unknown.contains(s)); m_to.erase(s); m_from.erase(s); m_from_maybecycle.erase(s); - if (m_unvisited.contains(s)) { - UNREACHABLE(); // for testing TODO: remove - m_unvisited.remove(s); - } - else if (m_unknown.contains(s)) { - m_unknown.remove(s); - } - else if (m_dead.contains(s)) { - UNREACHABLE(); // for testing TODO: remove - m_unknown.remove(s); - } - else if (m_live.contains(s)) { - UNREACHABLE(); // for testing TODO: remove - m_live.remove(s); - } - else { - UNREACHABLE(); - } + m_unknown.remove(s); } - void state_graph::mark_unknown(state s) { - SASSERT(m_state_ufind.is_root(s)); - SASSERT(m_unvisited.contains(s)); + void state_graph::mark_unknown_core(state s) { STRACE("seq_regex_brief", tout << "unk(" << s << ") ";); - m_unvisited.remove(s); + SASSERT(m_state_ufind.is_root(s)); + SASSERT(m_unexplored.contains(s)); + m_unexplored.remove(s); m_unknown.insert(s); } - void state_graph::mark_live(state s) { + void state_graph::mark_live_core(state s) { + STRACE("seq_regex_brief", tout << "live(" << s << ") ";); SASSERT(m_state_ufind.is_root(s)); SASSERT(m_unknown.contains(s)); - STRACE("seq_regex_brief", tout << "live(" << s << ") ";); m_unknown.remove(s); m_live.insert(s); } - void state_graph::mark_dead(state s) { + void state_graph::mark_dead_core(state s) { + STRACE("seq_regex_brief", tout << "dead(" << s << ") ";); SASSERT(m_state_ufind.is_root(s)); SASSERT(m_unknown.contains(s)); - STRACE("seq_regex_brief", tout << "dead(" << s << ") ";); m_unknown.remove(s); m_dead.insert(s); } - // bool state_graph::is_resolved(state s) { - // SASSERT(m_state_ufind.is_root(s)); - // return (m_live.contains(s) || m_dead.contains(s)); - // } - // bool state_graph::is_unresolved(state s) { - // SASSERT(m_state_ufind.is_root(s)); - // return (m_unknown.contains(s) || m_unvisited.contains(s)); - // } - /* - Add edge to the graph - May already exist, in which case a nocycle edge overrides - a cycle edge. + Add edge to the graph. + - If the annotation 'maybecycle' is false, then the user is sure + that this edge will never be part of a cycle. + - May already exist, in which case maybecycle = false overrides + maybecycle = true. */ void state_graph::add_edge_core(state s1, state s2, bool maybecycle) { + STRACE("seq_regex_brief", tout << "add(" << s1 << "," << s2 << "," + << (maybecycle ? "y" : "n") << ") ";); SASSERT(m_state_ufind.is_root(s1)); SASSERT(m_state_ufind.is_root(s2)); - STRACE("seq_regex_debug", tout << std::endl << " DEBUG: add edge core " << s1 << "," << s2 << "," << maybecycle << " ";); if (s1 == s2) return; if (!m_to.find(s1)->contains(s2)) { // add new edge @@ -774,21 +764,19 @@ namespace smt { m_from_maybecycle.find(s2)->remove(s1); } } - void state_graph::remove_edge(state s1, state s2) { + void state_graph::remove_edge_core(state s1, state s2) { SASSERT(m_to.find(s1)->contains(s2)); SASSERT(m_from.find(s2)->contains(s1)); m_to.find(s1)->remove(s2); m_from.find(s2)->remove(s1); - if (m_from_maybecycle.find(s2)->contains(s1)) { - m_from_maybecycle.find(s2)->remove(s1); - } + m_from_maybecycle.find(s2)->remove(s1); } - void state_graph::rename_edge(state old1, state old2, - state new1, state new2) { + void state_graph::rename_edge_core(state old1, state old2, + state new1, state new2) { SASSERT(m_to.find(old1)->contains(old2)); SASSERT(m_from.find(old2)->contains(old1)); bool maybecycle = m_from_maybecycle.find(old2)->contains(old1); - remove_edge(old1, old2); + remove_edge_core(old1, old2); add_edge_core(new1, new2, maybecycle); } @@ -798,8 +786,7 @@ namespace smt { Preconditions: the set should be nonempty, and every state in the set should be unknown (in particular, *not* unvisited). - Also, each state should - be current (not a previous SCC that was later merged into another). + Also, each state should currently exist */ auto state_graph::merge_states(state s1, state s2) -> state { SASSERT(m_state_ufind.is_root(s1)); @@ -808,15 +795,15 @@ namespace smt { SASSERT(m_unknown.contains(s2)); STRACE("seq_regex_brief", tout << "merge(" << s1 << "," << s2 << ") ";); m_state_ufind.merge(s1, s2); - if (m_state_ufind.is_root(s1)) std::swap(s1, s2); - // merge edges + if (m_state_ufind.is_root(s2)) std::swap(s1, s2); + // rename s2 to s1 in edges for (auto s_to: *m_to.find(s2)) { - rename_edge(s2, s_to, s1, s_to); + rename_edge_core(s2, s_to, s1, s_to); } for (auto s_from: *m_from.find(s2)) { - rename_edge(s_from, s2, s_from, s1); + rename_edge_core(s_from, s2, s_from, s1); } - remove_state(s2); + remove_state_core(s2); return s1; } auto state_graph::merge_states(state_set& s_set) -> state { @@ -827,9 +814,9 @@ namespace smt { if (first_iter) { prev_s = s; first_iter = false; - } else { - prev_s = merge_states(prev_s, s); + continue; } + prev_s = merge_states(prev_s, s); } return prev_s; } @@ -840,8 +827,10 @@ namespace smt { */ void state_graph::mark_live_recursive(state s) { SASSERT(m_live.contains(s) || m_unknown.contains(s)); + STRACE("seq_regex_debug", tout + << std::endl << " DEBUG: mark live recursive: " << s << " ";); if (m_live.contains(s)) return; - mark_live(s); + mark_live_core(s); for (auto s_from: *m_from.find(s)) { mark_live_recursive(s_from); } @@ -853,64 +842,70 @@ namespace smt { Precondition: s is live, dead, or unknown */ void state_graph::mark_dead_recursive(state s) { - SASSERT(!m_unvisited.contains(s)); + SASSERT(m_live.contains(s) || m_dead.contains(s) || + m_unknown.contains(s)); + STRACE("seq_regex_debug", tout + << std::endl << " DEBUG: mark dead recursive: " << s << " ";); if (!m_unknown.contains(s)) return; - STRACE("seq_regex_debug", tout << std::endl << " DEBUG: mark dead recursive: " << s << " ";); for (auto s_to: *m_to.find(s)) { - STRACE("seq_regex_debug", tout << std::endl << " DEBUG: m_to searching: " << s_to << " ";); - // unknown pointing to live should have been marked as live + // unknown pointing to live should have been marked as live! SASSERT(!m_live.contains(s_to)); - if (m_unknown.contains(s_to) || m_unvisited.contains(s_to)) return; + if (m_unknown.contains(s_to) || m_unexplored.contains(s_to)) return; } // all states from s are dead - mark_dead(s); + mark_dead_core(s); for (auto s_from: *m_from.find(s)) { mark_dead_recursive(s_from); } } /* - if new edges from s1 to s_to will create at least one cycle, - merge all states in the new SCC + Merge all cycles of unknown states containing s1 into one state. + Return the new state + Precondition: s1 is unknown. */ - auto state_graph::merge_all_cycles(state s1, state_set& s_to) -> state { + auto state_graph::merge_all_cycles(state s) -> state { + SASSERT(m_unknown.contains(s)); // Mark s_to, then search backwards from s to mark the SCC // TODO: Implement full check // Simple placeholder for now: check if there is an edge both ways - for (auto s2: s_to) { - if (m_to.find(s2)->contains(s1)) { - s1 = merge_states(s1, s2); - } + state_set s_to_set = *m_to.find(s); // makes a copy. Reference could + // lead to a bug + for (auto s_to: s_to_set) { + if (m_to.find(s_to)->contains(s)) + s = merge_states(s, s_to); } - return s1; + return s; } - void state_graph::add_state(state s, bool live) { + /* + Exposed methods + */ + + void state_graph::add_state(state s) { if (m_seen.contains(s)) return; - STRACE("seq_regex_brief", tout << "add(" << s << "," << live << ") ";); add_state_core(s); - if (live) { - mark_unknown(s); - mark_live_recursive(s); - } + } + void state_graph::mark_live(state s) { + SASSERT(m_unexplored.contains(s) || m_live.contains(s)); + SASSERT(m_state_ufind.is_root(s)); + if (m_unexplored.contains(s)) mark_unknown_core(s); + mark_live_recursive(s); } void state_graph::add_edge(state s1, state s2, bool maybecycle) { - SASSERT(m_seen.contains(s1)); + SASSERT(m_unexplored.contains(s1) || m_live.contains(s1)); + SASSERT(m_state_ufind.is_root(s1)); SASSERT(m_seen.contains(s2)); - STRACE("seq_regex_brief", tout << "add(" << s1 << "," << s2 - << "," << maybecycle << ") ";); - s1 = m_state_ufind.find(s1); s2 = m_state_ufind.find(s2); add_edge_core(s1, s2, maybecycle); - if (m_live.contains(s2)) { - if (m_unvisited.contains(s1)) mark_unknown(s1); - mark_live_recursive(s1); - } + if (m_live.contains(s2)) mark_live(s1); } - void state_graph::done_adding(state s) { - s = m_state_ufind.find(s); - if (m_unvisited.contains(s)) mark_unknown(s); - s = merge_all_cycles(s, *m_to.find(s)); + void state_graph::mark_done(state s) { + SASSERT(m_unexplored.contains(s) || m_live.contains(s)); + SASSERT(m_state_ufind.is_root(s)); + if (m_live.contains(s)) return; + if (m_unexplored.contains(s)) mark_unknown_core(s); + s = merge_all_cycles(s); // check if dead mark_dead_recursive(s); STRACE("seq_regex_brief", tout << "done(" << s << ") ";); @@ -929,13 +924,11 @@ namespace smt { bool state_graph::is_dead(state s) { return m_dead.contains(m_state_ufind.find(s)); } + bool state_graph::is_done(state s) { + return (m_seen.contains(s) && + !m_unexplored.contains(m_state_ufind.find(s))); + } - // void pretty_print_set(std::ofstream& of, state_set& s_set) { - // for (auto s: s_set) { - // of << " " << s; - // } - // of << std::endl; - // } void state_graph::pretty_print(std::ofstream& of) { of << "---------- State Graph ----------" << std::endl; of << "Seen:"; @@ -950,7 +943,7 @@ namespace smt { of << "Live:" << m_live << std::endl; of << "Dead:" << m_dead << std::endl; of << "Unknown:" << m_unknown << std::endl; - of << "Unvisited:" << m_unvisited << std::endl; + of << "Unexplored:" << m_unexplored << std::endl; of << "Edges:" << std::endl; for (auto s1: m_seen) { @@ -978,7 +971,7 @@ namespace smt { */ bool seq_regex::update_state_graph(expr* r) { unsigned r_id = get_state_id(r); - if (m_state_graph.is_seen(r_id)) return false; + if (m_state_graph.is_done(r_id)) return false; if (m_state_graph.get_size() >= m_max_state_graph_size) { STRACE("seq_regex", tout << "Warning: ignored state graph update -- max size of seen states reached!" << std::endl;); STRACE("seq_regex_brief", tout << "(MAX SIZE REACHED) ";); @@ -986,27 +979,31 @@ namespace smt { } STRACE("seq_regex", tout << "Updating state graph for regex " << mk_pp(r, m) << ") ";); - STRACE("seq_regex_brief", tout << std::endl - << "USG(" << r->get_id() << ") ";); + STRACE("seq_regex_brief", tout + << std::endl << "USG(" << r->get_id() << ") ";); // Save r as expr_ref so it's not deallocated m_state_trail.push_back(r); - // Add state, live if nullable - bool r_nullable = m.is_true(is_nullable_wrapper(r)); - m_state_graph.add_state(r_id, r_nullable); + // Add state + m_state_graph.add_state(r_id); + expr_ref r_nullable = is_nullable_wrapper(r); + if (m.is_true(r_nullable)) { + m_state_graph.mark_live(r_id); + return true; + } // Add edges to all derivatives expr_ref_vector derivatives(m); - STRACE("seq_regex_debug", tout << std::endl << " DEBUG: getting all derivs: " << r_id << " ";); + STRACE("seq_regex_debug", tout + << std::endl << " DEBUG: getting all derivs: " << r_id << " ";); get_all_derivatives(r, derivatives); for (auto const& dr: derivatives) { unsigned dr_id = get_state_id(dr); - STRACE("seq_regex_debug", tout << std::endl << " DEBUG: traversing deriv: " << dr_id << " ";); - expr_ref dr_n = is_nullable_wrapper(dr); - bool dr_nullable = m.is_true(dr_n); - m_state_graph.add_state(dr_id, dr_nullable); + STRACE("seq_regex_debug", tout + << std::endl << " DEBUG: traversing deriv: " << dr_id << " ";); + m_state_graph.add_state(dr_id); bool maybecycle = can_be_in_cycle(r, dr); m_state_graph.add_edge(r_id, dr_id, maybecycle); } - m_state_graph.done_adding(r_id); + m_state_graph.mark_done(r_id); STRACE("seq_regex_brief", tout << std::endl;); STRACE("seq_regex_brief", m_state_graph.pretty_print(tout);); return true; diff --git a/src/smt/seq_regex.h b/src/smt/seq_regex.h index 3911e228b50..8f7252d9163 100644 --- a/src/smt/seq_regex.h +++ b/src/smt/seq_regex.h @@ -39,8 +39,10 @@ namespace smt { "States" are integers. States and edges are added to the data structure incrementally. - - Some states are initially labeled as live. The data structure - tracks which other states are live (can reach a live state), dead + - States can be marked as live + or as done -- to indicate that no more edges will be added and the + state will not be marked as live. The data structure then tracks + which other states are live (can reach a live state), dead (can't reach a live state), or neither. - Some edges are labeled as not contained in a cycle. This is to optimize search if it is known by the user of the structure @@ -49,9 +51,6 @@ namespace smt { Internally, we use union_find to identify states within an SCC, and incrementally update SCCs, while propagating backwards live and dead SCCs. - - Class invariants: - - TODO */ class state_graph { typedef unsigned state; @@ -61,90 +60,95 @@ namespace smt { private: /* - All states are exactly one of: - - live: known to be nonempty - - dead: known to be empty - - unknown: all outgoing transitions have been - added, but the state is not known - to be live or dead - - unvisited: outgoing transitions have not been added + All states are internally exactly one of: + - live: known to reach a live state + - dead: known to never reach a live state + - unknown: all outgoing edges have been added, but the + state is not known to be live or dead + - unexplored: not all outgoing edges have been added As SCCs are merged, some states become aliases, and a union find data structure collapses a now obsolete state to its current representative. m_seen keeps track of states we have seen, including obsolete states. + + Invariants: + - TODO */ state_set m_live; state_set m_dead; state_set m_unknown; - state_set m_unvisited; + state_set m_unexplored; state_set m_seen; state_ufind m_state_ufind; - void add_state_core(state s); // unvisited + seen - void remove_state(state s); // * -> m_seen only - - void mark_unknown(state s); // unvisited -> unknown - void mark_live(state s); // unknown -> live - void mark_dead(state s); // unknown -> dead - - // bool is_resolved(state s); // live or dead - // bool is_unresolved(state s); // unknown or unvisited - /* Edges are saved in both from and to maps. A subset of edges are also marked as possibly being part of a cycle by being stored in m_from_maybecycle. + + Invariants: + - TODO */ edge_rel m_from; edge_rel m_to; edge_rel m_from_maybecycle; + /* + 'Core' functions that modify the plain graph, without + updating SCCs or propagating live/dead state information. + These are for internal use only. + */ + void add_state_core(state s); // unexplored + seen + void remove_state_core(state s); // unknown + seen -> seen + void mark_unknown_core(state s); // unexplored -> unknown + void mark_live_core(state s); // unknown -> live + void mark_dead_core(state s); // unknown -> dead + void add_edge_core(state s1, state s2, bool maybecycle); - void remove_edge(state s1, state s2); - void rename_edge(state old1, state old2, state new1, state new2); + void remove_edge_core(state s1, state s2); + void rename_edge_core(state old1, state old2, state new1, state new2); state merge_states(state s1, state s2); state merge_states(state_set& s_set); /* - Core algorithmic search routines + Algorithmic search routines - live state propagation - dead state propagation - - cycle detection + - cycle / strongly-connected component detection */ void mark_live_recursive(state s); void mark_dead_recursive(state s); - state merge_all_cycles(state s1, state_set& s_to); - - /* - Pretty printing support - */ - // void pretty_print_set(std::ofstream& of, state_set& s_set); + state merge_all_cycles(state s); public: state_graph(): - m_live(), m_dead(), m_unknown(), m_unvisited(), m_seen(), + m_live(), m_dead(), m_unknown(), m_unexplored(), m_seen(), m_state_ufind(), m_from(), m_to(), m_from_maybecycle() {} /* - Exposed methods: - - adding a state and all its transitions - - checking if a state is known to be live or dead - - ASSUMPTION: transitions from a state are added in order and after - all transitions are added, the state is marked as - finished. Also all states are added before the transitions. + Exposed methods + + These methods may be called in any order, as long as: + - states are added before edges are added between them + - edges are not added from a done state + - a done state is not marked as live + - edges are not added creating a cycle containing an edge with + maybecycle = false */ - void add_state(state s, bool live); + void add_state(state s); void add_edge(state s1, state s2, bool maybecycle); - void done_adding(state s); - unsigned get_size(); + void mark_live(state s); + void mark_done(state s); bool is_seen(state s); bool is_live(state s); bool is_dead(state s); + bool is_done(state s); + + unsigned get_size(); /* Pretty printing From d4bdf5937752ce7b6a9c5a2550ceab2cb2abec33 Mon Sep 17 00:00:00 2001 From: Caleb Stanford Date: Mon, 29 Jun 2020 21:58:35 -0400 Subject: [PATCH 28/51] start of general cycle detection check + fix some comments --- src/smt/seq_regex.cpp | 13 +++++++++++-- src/smt/seq_regex.h | 7 ++++--- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp index bf034f6764c..fe7f868e27e 100644 --- a/src/smt/seq_regex.cpp +++ b/src/smt/seq_regex.cpp @@ -860,13 +860,22 @@ namespace smt { } /* - Merge all cycles of unknown states containing s1 into one state. + Merge all cycles of unknown states containing s into one state. Return the new state - Precondition: s1 is unknown. + Precondition: s is unknown. */ auto state_graph::merge_all_cycles(state s) -> state { SASSERT(m_unknown.contains(s)); // Mark s_to, then search backwards from s to mark the SCC + // state_set visited = *(new state_set()); + // state_set marked = *(new state_set()); + // visited.insert(s); + // auto to_search = *(new vector>()) + // to_search.push_back(s, s) + // while (to_search.size() > 0) { + // auto p = to_search.pop_back(); + // } + // TODO: Implement full check // Simple placeholder for now: check if there is an edge both ways state_set s_to_set = *m_to.find(s); // makes a copy. Reference could diff --git a/src/smt/seq_regex.h b/src/smt/seq_regex.h index 8f7252d9163..6a745b0853f 100644 --- a/src/smt/seq_regex.h +++ b/src/smt/seq_regex.h @@ -40,8 +40,9 @@ namespace smt { "States" are integers. States and edges are added to the data structure incrementally. - States can be marked as live - or as done -- to indicate that no more edges will be added and the - state will not be marked as live. The data structure then tracks + or as done -- to indicate that no more outgoing edges will be + added and the state will not be marked as live. The data + structure then tracks which other states are live (can reach a live state), dead (can't reach a live state), or neither. - Some edges are labeled as not contained in a cycle. This is to @@ -133,7 +134,7 @@ namespace smt { These methods may be called in any order, as long as: - states are added before edges are added between them - - edges are not added from a done state + - outgoing edges are not added from a done state - a done state is not marked as live - edges are not added creating a cycle containing an edge with maybecycle = false From 7f922e1df52fe721709b678dff8ffe8acb45ca5e Mon Sep 17 00:00:00 2001 From: Caleb Stanford Date: Tue, 30 Jun 2020 12:44:46 -0400 Subject: [PATCH 29/51] implement full cycle detection procedure --- src/smt/seq_regex.cpp | 102 +++++++++++++++++++++++++++--------------- 1 file changed, 66 insertions(+), 36 deletions(-) diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp index fe7f868e27e..59147fd8ade 100644 --- a/src/smt/seq_regex.cpp +++ b/src/smt/seq_regex.cpp @@ -784,9 +784,12 @@ namespace smt { Merge two states or more generally a set of states into one, returning the new state. Also merges associated edges. - Preconditions: the set should be nonempty, and every state - in the set should be unknown (in particular, *not* unvisited). - Also, each state should currently exist + Preconditions: + - The set should be nonempty + - Every state in the set should be unknown + - Each state should currently exist + - If passing a set of states by reference, it should not be a set + from the edge relations, as merging states modifies edge relations. */ auto state_graph::merge_states(state s1, state s2) -> state { SASSERT(m_state_ufind.is_root(s1)); @@ -822,7 +825,7 @@ namespace smt { } /* - if s is not live, mark it, and recurse on all states into s + If s is not live, mark it, and recurse on all states into s Precondition: s is live or unknown */ void state_graph::mark_live_recursive(state s) { @@ -837,7 +840,7 @@ namespace smt { } /* - check if s is now known to be dead. If so, mark and recurse + Check if s is now known to be dead. If so, mark and recurse on all states into s. Precondition: s is live, dead, or unknown */ @@ -866,25 +869,51 @@ namespace smt { */ auto state_graph::merge_all_cycles(state s) -> state { SASSERT(m_unknown.contains(s)); - // Mark s_to, then search backwards from s to mark the SCC - // state_set visited = *(new state_set()); - // state_set marked = *(new state_set()); - // visited.insert(s); - // auto to_search = *(new vector>()) - // to_search.push_back(s, s) - // while (to_search.size() > 0) { - // auto p = to_search.pop_back(); - // } - - // TODO: Implement full check - // Simple placeholder for now: check if there is an edge both ways - state_set s_to_set = *m_to.find(s); // makes a copy. Reference could - // lead to a bug - for (auto s_to: s_to_set) { - if (m_to.find(s_to)->contains(s)) - s = merge_states(s, s_to); + // Visit states in a DFS backwards from s + state_set visited; // all backwards edges pushed + state_set resolved; // known in SCC or not + state_set scc; // known in SCC + resolved.insert(s); + scc.insert(s); + vector to_search; + to_search.push_back(s); + while (to_search.size() > 0) { + state x = to_search.back(); + if (!visited.contains(x)) { + visited.insert(x); + // recurse backwards only on maybecycle edges + // and only on unknown states + for (auto y: *m_from_maybecycle.find(x)) { + if (m_unknown.contains(y)) + to_search.push_back(y); + } + } + else if (!resolved.contains(x)) { + resolved.insert(x); + to_search.pop_back(); + // determine in SCC or not + for (auto y: *m_from_maybecycle.find(x)) { + if (scc.contains(y)) { + scc.insert(x); + break; + } + } + } + else { + to_search.pop_back(); + } } - return s; + // scc is the union of all cycles containing s + return merge_states(scc); + + // Previous simple placeholder: check if there is an edge both ways + // state_set s_to_set = *m_to.find(s); // makes a copy. Reference could + // // lead to a bug + // for (auto s_to: s_to_set) { + // if (m_to.find(s_to)->contains(s)) + // s = merge_states(s, s_to); + // } + // return s; } /* @@ -997,22 +1026,23 @@ namespace smt { expr_ref r_nullable = is_nullable_wrapper(r); if (m.is_true(r_nullable)) { m_state_graph.mark_live(r_id); - return true; } - // Add edges to all derivatives - expr_ref_vector derivatives(m); - STRACE("seq_regex_debug", tout - << std::endl << " DEBUG: getting all derivs: " << r_id << " ";); - get_all_derivatives(r, derivatives); - for (auto const& dr: derivatives) { - unsigned dr_id = get_state_id(dr); + else { + // Add edges to all derivatives + expr_ref_vector derivatives(m); STRACE("seq_regex_debug", tout - << std::endl << " DEBUG: traversing deriv: " << dr_id << " ";); - m_state_graph.add_state(dr_id); - bool maybecycle = can_be_in_cycle(r, dr); - m_state_graph.add_edge(r_id, dr_id, maybecycle); + << std::endl << " DEBUG: getting all derivs: " << r_id << " ";); + get_all_derivatives(r, derivatives); + for (auto const& dr: derivatives) { + unsigned dr_id = get_state_id(dr); + STRACE("seq_regex_debug", tout + << std::endl << " DEBUG: traversing deriv: " << dr_id << " ";); + m_state_graph.add_state(dr_id); + bool maybecycle = can_be_in_cycle(r, dr); + m_state_graph.add_edge(r_id, dr_id, maybecycle); + } + m_state_graph.mark_done(r_id); } - m_state_graph.mark_done(r_id); STRACE("seq_regex_brief", tout << std::endl;); STRACE("seq_regex_brief", m_state_graph.pretty_print(tout);); return true; From 12f7a1feeeaee82f238543403184b6a6fc4554b4 Mon Sep 17 00:00:00 2001 From: Caleb Stanford Date: Tue, 30 Jun 2020 21:47:17 -0400 Subject: [PATCH 30/51] normalize derivative conditions to form 'ele <= a' --- src/ast/rewriter/seq_rewriter.cpp | 145 +++++++++++++++++++++++++----- src/ast/rewriter/seq_rewriter.h | 2 +- src/smt/seq_regex.h | 3 +- 3 files changed, 124 insertions(+), 26 deletions(-) diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp index 45f6cf5489c..41b7b06f95d 100644 --- a/src/ast/rewriter/seq_rewriter.cpp +++ b/src/ast/rewriter/seq_rewriter.cpp @@ -2543,18 +2543,18 @@ expr_ref seq_rewriter::mk_der_op_rec(decl_kind k, expr* a, expr* b) { } // @EXP (experimental change) // Simplify if there is a relationship between ca and cb - // if (pred_implies(ca, cb)) { - // r1 = mk_der_op(k, a1, b1); - // } - // else if (pred_implies(ca, notcb)) { - // r1 = mk_der_op(k, a1, b2); - // } - // if (pred_implies(notca, cb)) { - // r2 = mk_der_op(k, a2, b1); - // } - // else if (pred_implies(notca, notcb)) { - // r2 = mk_der_op(k, a2, b2); - // } + if (pred_implies(ca, cb)) { + r1 = mk_der_op(k, a1, b1); + } + else if (pred_implies(ca, notcb)) { + r1 = mk_der_op(k, a1, b2); + } + if (pred_implies(notca, cb)) { + r2 = mk_der_op(k, a2, b1); + } + else if (pred_implies(notca, notcb)) { + r2 = mk_der_op(k, a2, b2); + } // --- End core logic } if (!r1) r1 = mk_der_op(k, a1, b); @@ -2637,6 +2637,73 @@ expr_ref seq_rewriter::mk_der_compl(expr* r) { return result; } +/* + Make an re_predicate with condition cond, enforcing derivative + normal form on how conditions are written. + + Rewrites everything to (ele <= x) constraints: + (ele = a) => ite(ele <= a-1, none, ite(ele <= a, epsilon, none)) + (a = ele) => " + (a <= ele) => ite(ele <= a-1, none, epsilon) + (not p) => mk_der_compl(...) + (p and q) => mk_der_inter(...) + (p or q) => mk_der_union(...) + + Postcondition: result is in BDD form +*/ +expr_ref seq_rewriter::mk_der_cond(expr* cond, expr* ele, sort* seq_sort) { + STRACE("seq_verbose", tout << "mk_der_cond: " + << mk_pp(cond, m()) << ", " << mk_pp(ele, m()) << std::endl;); + sort *ele_sort = nullptr; + VERIFY(u().is_seq(seq_sort, ele_sort)); + SASSERT(ele_sort == m().get_sort(ele)); + expr *c1 = nullptr, *c2 = nullptr, *ch1 = nullptr, *ch2 = nullptr; + unsigned ch = 0; + expr_ref result(m()), r1(m()), r2(m()); + if (m().is_eq(cond, ch1, ch2)) { + r1 = u().mk_le(ch1, ch2); + r1 = mk_der_cond(r1, ele, seq_sort); + r2 = u().mk_le(ch2, ch1); + r2 = mk_der_cond(r2, ele, seq_sort); + result = mk_der_inter(r1, r2); + } + else if (u().is_char_le(cond, ch1, ch2) && + u().is_const_char(ch1, ch) && (ch2 == ele)) { + if (ch > 0) { + result = u().mk_char(ch - 1); + result = u().mk_le(ele, result); + result = re_predicate(result, seq_sort); + result = mk_der_compl(result); + } + else { + result = m().mk_true(); + } + } + else if (m().is_not(cond, c1)) { + UNREACHABLE(); + result = mk_der_cond(c1, ele, seq_sort); + result = mk_der_compl(result); + } + else if (m().is_and(cond, c1, c2)) { + UNREACHABLE(); + r1 = mk_der_cond(c1, ele, seq_sort); + r2 = mk_der_cond(c2, ele, seq_sort); + result = mk_der_inter(r1, r2); + } + else if (m().is_or(cond, c1, c2)) { + UNREACHABLE(); + r1 = mk_der_cond(c1, ele, seq_sort); + r2 = mk_der_cond(c2, ele, seq_sort); + result = mk_der_union(r1, r2); + } + else { + result = re_predicate(cond, seq_sort); + } + STRACE("seq_verbose", tout << "mk_der_cond result: " + << mk_pp(result, m()) << std::endl;); + return result; +} + expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) { // STRACE("seq_regex_brief", tout << ".";); // recursive call expr_ref result(m()); @@ -2710,7 +2777,7 @@ expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) { expr_ref hd(m()), tl(m()); if (get_head_tail(r1, hd, tl)) { // head must be equal; if so, derivative is tail - return re_and(m_br.mk_eq_rw(ele, hd), re().mk_to_re(tl)); + // return re_and(m_br.mk_eq_rw(ele, hd), re().mk_to_re(tl)); // @EXP (experimental change) // Write 'head is equal' as a range constraint: // (ele <= hd) and (hd <= ele) @@ -2718,6 +2785,13 @@ expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) { // re_and(m_util.mk_le(ele, hd), re().mk_to_re(tl)), // re_and(m_util.mk_le(hd, ele), re().mk_to_re(tl)) // ); + // @EXP (experimental change) + // Use mk_der_cond to normalize + STRACE("seq_verbose", tout << "deriv to_re" << std::endl;); + result = m().mk_eq(ele, hd); + result = mk_der_cond(result, ele, seq_sort); + result = mk_der_concat(result, re().mk_to_re(tl)); + return result; } else if (str().is_empty(r1)) { return mk_empty(); @@ -2740,7 +2814,7 @@ expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) { // This is analagous to the previous is_to_re case. expr_ref hd(m()), tl(m()); if (get_head_tail_reversed(r2, hd, tl)) { - return re_and(m_br.mk_eq_rw(ele, tl), re().mk_reverse(re().mk_to_re(hd))); + // return re_and(m_br.mk_eq_rw(ele, tl), re().mk_reverse(re().mk_to_re(hd))); // @EXP (experimental change) // Write 'tail is equal' as a range constraint: // (ele <= tl) and (tl <= ele) @@ -2748,6 +2822,13 @@ expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) { // re_and(m_util.mk_le(ele, tl), re().mk_reverse(re().mk_to_re(hd))), // re_and(m_util.mk_le(tl, ele), re().mk_reverse(re().mk_to_re(hd))) // ); + // @EXP (experimental change) + // Use mk_der_cond to normalize + STRACE("seq_verbose", tout << "deriv reverse to_re" << std::endl;); + result = m().mk_eq(ele, tl); + result = mk_der_cond(result, ele, seq_sort); + result = mk_der_concat(result, re().mk_reverse(re().mk_to_re(hd))); + return result; } else if (str().is_empty(r2)) { return mk_empty(); @@ -2760,13 +2841,17 @@ expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) { if (s1.length() == 1 && s2.length() == 1) { expr_ref ch1(m_util.mk_char(s1[0]), m()); expr_ref ch2(m_util.mk_char(s2[0]), m()); + // return mk_der_inter(re_predicate(m_util.mk_le(ch1, ele), seq_sort), + // re_predicate(m_util.mk_le(ele, ch2), seq_sort)); // @EXP (experimental change) - // expr_ref p1(m_util.mk_le(ch1, ele), m()); - // expr_ref p2(m_util.mk_le(ele, ch2), m()); - // expr_ref conj(m().mk_and(p1, p2), m()); - // return re_predicate(conj, seq_sort); - return mk_der_inter(re_predicate(m_util.mk_le(ch1, ele), seq_sort), - re_predicate(m_util.mk_le(ele, ch2), seq_sort)); + // Use mk_der_cond to normalize + STRACE("seq_verbose", tout << "deriv range zstring" << std::endl;); + expr_ref p1(u().mk_le(ch1, ele), m()); + p1 = mk_der_cond(p1, ele, seq_sort); + expr_ref p2(u().mk_le(ele, ch2), m()); + p2 = mk_der_cond(p2, ele, seq_sort); + result = mk_der_inter(p1, p2); + return result; } else { return mk_empty(); @@ -2774,8 +2859,17 @@ expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) { } expr* e1 = nullptr, *e2 = nullptr; if (str().is_unit(r1, e1) && str().is_unit(r2, e2)) { - return mk_der_inter(re_predicate(m_util.mk_le(e1, ele), seq_sort), - re_predicate(m_util.mk_le(ele, e2), seq_sort)); + // return mk_der_inter(re_predicate(m_util.mk_le(e1, ele), seq_sort), + // re_predicate(m_util.mk_le(ele, e2), seq_sort)); + // @EXP (experimental change) + // Use mk_der_cond to normalize + STRACE("seq_verbose", tout << "deriv range str" << std::endl;); + expr_ref p1(u().mk_le(e1, ele), m()); + p1 = mk_der_cond(p1, ele, seq_sort); + expr_ref p2(u().mk_le(ele, e2), m()); + p2 = mk_der_cond(p2, ele, seq_sort); + result = mk_der_inter(p1, p2); + return result; } } else if (re().is_full_char(r)) { @@ -2785,7 +2879,12 @@ expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) { array_util array(m()); expr* args[2] = { p, ele }; result = array.mk_select(2, args); - return re_predicate(result, seq_sort); + // return re_predicate(result, seq_sort); + // @EXP (experimental change) + // Use mk_der_cond to normalize + // (It's a no-op in this case, however) + STRACE("seq_verbose", tout << "deriv of_pred" << std::endl;); + return mk_der_cond(result, ele, seq_sort); } // stuck cases: re.derivative, variable, // str.to_re if the head of the string can't be obtained, diff --git a/src/ast/rewriter/seq_rewriter.h b/src/ast/rewriter/seq_rewriter.h index a82e7a6ba65..bbd5d3a345e 100644 --- a/src/ast/rewriter/seq_rewriter.h +++ b/src/ast/rewriter/seq_rewriter.h @@ -202,7 +202,7 @@ class seq_rewriter { expr_ref mk_der_union(expr* a, expr* b); expr_ref mk_der_inter(expr* a, expr* b); expr_ref mk_der_compl(expr* a); - expr_ref mk_der_reverse(expr* a); + expr_ref mk_der_cond(expr* cond, expr* ele, sort* seq_sort); bool lt_char(expr* ch1, expr* ch2); bool eq_char(expr* ch1, expr* ch2); diff --git a/src/smt/seq_regex.h b/src/smt/seq_regex.h index 6a745b0853f..161423efe74 100644 --- a/src/smt/seq_regex.h +++ b/src/smt/seq_regex.h @@ -191,8 +191,7 @@ namespace smt { scoped_vector m_to_propagate; /* - state_graph for dead state detection, - and associated methods + state_graph for dead state detection, and associated methods */ state_graph m_state_graph; expr_ref_vector m_state_trail; From 1543ca793823ad5321e8c5f1874fc52b49951bd1 Mon Sep 17 00:00:00 2001 From: Caleb Stanford Date: Tue, 30 Jun 2020 22:10:38 -0400 Subject: [PATCH 31/51] order derivative conditions by character code --- src/ast/rewriter/seq_rewriter.cpp | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp index 41b7b06f95d..99635a54529 100644 --- a/src/ast/rewriter/seq_rewriter.cpp +++ b/src/ast/rewriter/seq_rewriter.cpp @@ -2519,8 +2519,15 @@ expr_ref seq_rewriter::mk_der_op_rec(decl_kind k, expr* a, expr* b) { return (a == b) ? a : m().mk_ite(c, a, b); }; // @EXP (experimental change) - // Use same ID for related predicates to improve simplifications - // auto get_id = [&](expr* e) { re().is_complement(e, e); return e->get_id(); }; + // Use character code to order conditions + auto get_id = [&](expr* e) { + expr *ch1 = nullptr, *ch2 = nullptr; + unsigned ch; + if (u().is_char_le(e, ch1, ch2) && u().is_const_char(ch2, ch)) + return ch; + re().is_complement(e, e); + return e->get_id(); + }; if (m().is_ite(a, ca, a1, a2)) { expr_ref r1(m()), r2(m()); expr_ref notca(m().mk_not(ca), m()); @@ -2534,7 +2541,7 @@ expr_ref seq_rewriter::mk_der_op_rec(decl_kind k, expr* a, expr* b) { return result; } // Order with higher IDs on the outside - if (ca->get_id() < cb->get_id()) { + if (get_id(ca) < get_id(cb)) { std::swap(a, b); std::swap(ca, cb); std::swap(notca, notcb); From 11bda7e916c86c3b834c50586f8089193513d6a9 Mon Sep 17 00:00:00 2001 From: Caleb Stanford Date: Tue, 30 Jun 2020 22:30:03 -0400 Subject: [PATCH 32/51] fix confusing names m_to and m_from --- src/smt/seq_regex.cpp | 65 +++++++++++++++++++------------------------ src/smt/seq_regex.h | 10 +++---- 2 files changed, 33 insertions(+), 42 deletions(-) diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp index 59147fd8ade..826d82a6eda 100644 --- a/src/smt/seq_regex.cpp +++ b/src/smt/seq_regex.cpp @@ -697,9 +697,9 @@ namespace smt { // Initialize as unvisited m_seen.insert(s); m_unexplored.insert(s); - m_to.insert(s, new state_set()); - m_from.insert(s, new state_set()); - m_from_maybecycle.insert(s, new state_set()); + m_targets.insert(s, new state_set()); + m_sources.insert(s, new state_set()); + m_sources_maybecycle.insert(s, new state_set()); } void state_graph::remove_state_core(state s) { // This is a partial deletion -- the state is still seen and can't be @@ -710,9 +710,9 @@ namespace smt { SASSERT(m_seen.contains(s)); SASSERT(!m_state_ufind.is_root(s)); SASSERT(m_unknown.contains(s)); - m_to.erase(s); - m_from.erase(s); - m_from_maybecycle.erase(s); + m_targets.erase(s); + m_sources.erase(s); + m_sources_maybecycle.erase(s); m_unknown.remove(s); } @@ -751,31 +751,31 @@ namespace smt { SASSERT(m_state_ufind.is_root(s1)); SASSERT(m_state_ufind.is_root(s2)); if (s1 == s2) return; - if (!m_to.find(s1)->contains(s2)) { + if (!m_targets.find(s1)->contains(s2)) { // add new edge STRACE("seq_regex_debug", tout << std::endl << " DEBUG: new edge! ";); - m_to.find(s1)->insert(s2); - m_from.find(s2)->insert(s1); - if (maybecycle) m_from_maybecycle.find(s2)->insert(s1); + m_targets.find(s1)->insert(s2); + m_sources.find(s2)->insert(s1); + if (maybecycle) m_sources_maybecycle.find(s2)->insert(s1); } - else if (!maybecycle && m_from_maybecycle.find(s2)->contains(s1)) { + else if (!maybecycle && m_sources_maybecycle.find(s2)->contains(s1)) { // update existing edge STRACE("seq_regex_debug", tout << std::endl << " DEBUG: update edge! ";); - m_from_maybecycle.find(s2)->remove(s1); + m_sources_maybecycle.find(s2)->remove(s1); } } void state_graph::remove_edge_core(state s1, state s2) { - SASSERT(m_to.find(s1)->contains(s2)); - SASSERT(m_from.find(s2)->contains(s1)); - m_to.find(s1)->remove(s2); - m_from.find(s2)->remove(s1); - m_from_maybecycle.find(s2)->remove(s1); + SASSERT(m_targets.find(s1)->contains(s2)); + SASSERT(m_sources.find(s2)->contains(s1)); + m_targets.find(s1)->remove(s2); + m_sources.find(s2)->remove(s1); + m_sources_maybecycle.find(s2)->remove(s1); } void state_graph::rename_edge_core(state old1, state old2, state new1, state new2) { - SASSERT(m_to.find(old1)->contains(old2)); - SASSERT(m_from.find(old2)->contains(old1)); - bool maybecycle = m_from_maybecycle.find(old2)->contains(old1); + SASSERT(m_targets.find(old1)->contains(old2)); + SASSERT(m_sources.find(old2)->contains(old1)); + bool maybecycle = m_sources_maybecycle.find(old2)->contains(old1); remove_edge_core(old1, old2); add_edge_core(new1, new2, maybecycle); } @@ -800,10 +800,10 @@ namespace smt { m_state_ufind.merge(s1, s2); if (m_state_ufind.is_root(s2)) std::swap(s1, s2); // rename s2 to s1 in edges - for (auto s_to: *m_to.find(s2)) { + for (auto s_to: *m_targets.find(s2)) { rename_edge_core(s2, s_to, s1, s_to); } - for (auto s_from: *m_from.find(s2)) { + for (auto s_from: *m_sources.find(s2)) { rename_edge_core(s_from, s2, s_from, s1); } remove_state_core(s2); @@ -834,7 +834,7 @@ namespace smt { << std::endl << " DEBUG: mark live recursive: " << s << " ";); if (m_live.contains(s)) return; mark_live_core(s); - for (auto s_from: *m_from.find(s)) { + for (auto s_from: *m_sources.find(s)) { mark_live_recursive(s_from); } } @@ -850,14 +850,14 @@ namespace smt { STRACE("seq_regex_debug", tout << std::endl << " DEBUG: mark dead recursive: " << s << " ";); if (!m_unknown.contains(s)) return; - for (auto s_to: *m_to.find(s)) { + for (auto s_to: *m_targets.find(s)) { // unknown pointing to live should have been marked as live! SASSERT(!m_live.contains(s_to)); if (m_unknown.contains(s_to) || m_unexplored.contains(s_to)) return; } // all states from s are dead mark_dead_core(s); - for (auto s_from: *m_from.find(s)) { + for (auto s_from: *m_sources.find(s)) { mark_dead_recursive(s_from); } } @@ -883,7 +883,7 @@ namespace smt { visited.insert(x); // recurse backwards only on maybecycle edges // and only on unknown states - for (auto y: *m_from_maybecycle.find(x)) { + for (auto y: *m_sources_maybecycle.find(x)) { if (m_unknown.contains(y)) to_search.push_back(y); } @@ -892,7 +892,7 @@ namespace smt { resolved.insert(x); to_search.pop_back(); // determine in SCC or not - for (auto y: *m_from_maybecycle.find(x)) { + for (auto y: *m_sources_maybecycle.find(x)) { if (scc.contains(y)) { scc.insert(x); break; @@ -905,15 +905,6 @@ namespace smt { } // scc is the union of all cycles containing s return merge_states(scc); - - // Previous simple placeholder: check if there is an edge both ways - // state_set s_to_set = *m_to.find(s); // makes a copy. Reference could - // // lead to a bug - // for (auto s_to: s_to_set) { - // if (m_to.find(s_to)->contains(s)) - // s = merge_states(s, s_to); - // } - // return s; } /* @@ -986,7 +977,7 @@ namespace smt { of << "Edges:" << std::endl; for (auto s1: m_seen) { if (m_state_ufind.is_root(s1)) { - of << " " << s1 << " -> " << *m_to.find(s1) << std::endl; + of << " " << s1 << " -> " << *m_targets.find(s1) << std::endl; } } diff --git a/src/smt/seq_regex.h b/src/smt/seq_regex.h index 161423efe74..743c149e8bb 100644 --- a/src/smt/seq_regex.h +++ b/src/smt/seq_regex.h @@ -87,14 +87,14 @@ namespace smt { /* Edges are saved in both from and to maps. A subset of edges are also marked as possibly being - part of a cycle by being stored in m_from_maybecycle. + part of a cycle by being stored in m_sources_maybecycle. Invariants: - TODO */ - edge_rel m_from; - edge_rel m_to; - edge_rel m_from_maybecycle; + edge_rel m_sources; + edge_rel m_targets; + edge_rel m_sources_maybecycle; /* 'Core' functions that modify the plain graph, without @@ -127,7 +127,7 @@ namespace smt { public: state_graph(): m_live(), m_dead(), m_unknown(), m_unexplored(), m_seen(), - m_state_ufind(), m_from(), m_to(), m_from_maybecycle() {} + m_state_ufind(), m_sources(), m_targets(), m_sources_maybecycle() {} /* Exposed methods From 4b5a89ee0a5c5e385a5fb6be05fa6a6b0c05dfeb Mon Sep 17 00:00:00 2001 From: Caleb Stanford Date: Wed, 1 Jul 2020 07:34:20 -0400 Subject: [PATCH 33/51] assign increasing state IDs from 1 instead of using get_id on AST node --- src/smt/seq_regex.cpp | 19 +++++++++++++++---- src/smt/seq_regex.h | 12 ++++++++---- 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp index 826d82a6eda..63b5e8afb42 100644 --- a/src/smt/seq_regex.cpp +++ b/src/smt/seq_regex.cpp @@ -26,7 +26,8 @@ namespace smt { ctx(th.get_context()), m(th.get_manager()), m_state_graph(), - m_state_trail(m) + m_expr_to_state(), + m_state_to_expr(m) {} seq_util& seq_regex::u() { return th.m_util; } @@ -988,8 +989,20 @@ namespace smt { // ********************************** unsigned seq_regex::get_state_id(expr* e) { - return e->get_id(); + // Assign increasing IDs starting from 1 + if (!m_expr_to_state.contains(e)) { + m_state_to_expr.push_back(e); + unsigned new_id = m_state_to_expr.size(); + m_expr_to_state.insert(e, new_id); + } + return m_expr_to_state.find(e); + } + expr* seq_regex::get_expr_from_id(unsigned id) { + SASSERT(id >= 1); + SASSERT(id <= m_state_to_expr.size()); + return m_state_to_expr.get(id); } + bool seq_regex::can_be_in_cycle(expr *e1, expr *e2) { // Simple placeholder. TODO: Implement full check return true; @@ -1010,8 +1023,6 @@ namespace smt { << mk_pp(r, m) << ") ";); STRACE("seq_regex_brief", tout << std::endl << "USG(" << r->get_id() << ") ";); - // Save r as expr_ref so it's not deallocated - m_state_trail.push_back(r); // Add state m_state_graph.add_state(r_id); expr_ref r_nullable = is_nullable_wrapper(r); diff --git a/src/smt/seq_regex.h b/src/smt/seq_regex.h index 743c149e8bb..39ac16bd2ce 100644 --- a/src/smt/seq_regex.h +++ b/src/smt/seq_regex.h @@ -193,16 +193,20 @@ namespace smt { /* state_graph for dead state detection, and associated methods */ - state_graph m_state_graph; - expr_ref_vector m_state_trail; - unsigned m_max_state_graph_size { 10000 }; - // Convert expression to state + state_graph m_state_graph; + ptr_addr_map m_expr_to_state; + expr_ref_vector m_state_to_expr; + unsigned m_max_state_graph_size { 10000 }; + // Convert between expressions and states (IDs) unsigned get_state_id(expr* e); + expr* get_expr_from_id(unsigned id); // Cycle-detection heuristic (sound but not complete) bool can_be_in_cycle(expr* e1, expr* e2); // Update the graph bool update_state_graph(expr* r); + // ******************** + seq_util& u(); class seq_util::re& re(); class seq_util::str& str(); From e12bf862bd59f1f6c5f3fae0722f857b0da700c6 Mon Sep 17 00:00:00 2001 From: Caleb Stanford Date: Wed, 1 Jul 2020 07:52:51 -0400 Subject: [PATCH 34/51] remove elim_condition call in get_dall_derivatives --- src/smt/seq_regex.cpp | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp index 63b5e8afb42..015fe4f3a3f 100644 --- a/src/smt/seq_regex.cpp +++ b/src/smt/seq_regex.cpp @@ -611,18 +611,8 @@ namespace smt { get_cofactors(d, cofactors); STRACE("seq_regex_debug", tout << "getting all derivatives of: " << mk_pp(r, m) << std::endl;); for (auto const& p : cofactors) { - STRACE("seq_regex_debug", tout << "visiting cofactor: cond: " << mk_pp(p.first, m) << ", deriv: " << mk_pp(p.second, m) << std::endl;); - expr_ref cond(p.first, m); - STRACE("seq_regex_debug", tout << "head: " << mk_pp(hd, m) << std::endl;); - STRACE("seq_regex_debug", tout << "cond: " << mk_pp(cond, m) << std::endl;); - seq_rw().elim_condition(hd, cond); - STRACE("seq_regex_debug", tout << "cond: " << mk_pp(cond, m) << std::endl;); - rewrite(cond); - STRACE("seq_regex_debug", tout << "cond: " << mk_pp(cond, m) << std::endl;); - if (m.is_false(cond)) continue; - STRACE("seq_regex_debug", tout << "cofactor labeled true!" << std::endl;); - if (re().is_empty(p.second)) continue; - STRACE("seq_regex_debug", tout << "added derivative!" << std::endl;); + if (m.is_false(p.first) || re().is_empty(p.second)) continue; + STRACE("seq_regex_debug", tout << "adding derivative: " << mk_pp(p.second, m) << std::endl;); results.push_back(p.second); } } From 938dc433261e21b134d25026bf9b194974ab7688 Mon Sep 17 00:00:00 2001 From: Caleb Stanford Date: Wed, 1 Jul 2020 08:03:58 -0400 Subject: [PATCH 35/51] use u_map instead of uint_map to avoid memory leak --- src/smt/seq_regex.cpp | 56 +++++++++++++++++++++---------------------- src/smt/seq_regex.h | 9 ++++--- 2 files changed, 32 insertions(+), 33 deletions(-) diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp index 015fe4f3a3f..021e946b059 100644 --- a/src/smt/seq_regex.cpp +++ b/src/smt/seq_regex.cpp @@ -688,9 +688,9 @@ namespace smt { // Initialize as unvisited m_seen.insert(s); m_unexplored.insert(s); - m_targets.insert(s, new state_set()); - m_sources.insert(s, new state_set()); - m_sources_maybecycle.insert(s, new state_set()); + m_targets.insert(s, state_set()); + m_sources.insert(s, state_set()); + m_sources_maybecycle.insert(s, state_set()); } void state_graph::remove_state_core(state s) { // This is a partial deletion -- the state is still seen and can't be @@ -701,9 +701,9 @@ namespace smt { SASSERT(m_seen.contains(s)); SASSERT(!m_state_ufind.is_root(s)); SASSERT(m_unknown.contains(s)); - m_targets.erase(s); - m_sources.erase(s); - m_sources_maybecycle.erase(s); + m_targets.remove(s); + m_sources.remove(s); + m_sources_maybecycle.remove(s); m_unknown.remove(s); } @@ -742,31 +742,31 @@ namespace smt { SASSERT(m_state_ufind.is_root(s1)); SASSERT(m_state_ufind.is_root(s2)); if (s1 == s2) return; - if (!m_targets.find(s1)->contains(s2)) { + if (!m_targets.find(s1).contains(s2)) { // add new edge STRACE("seq_regex_debug", tout << std::endl << " DEBUG: new edge! ";); - m_targets.find(s1)->insert(s2); - m_sources.find(s2)->insert(s1); - if (maybecycle) m_sources_maybecycle.find(s2)->insert(s1); + m_targets.find(s1).insert(s2); + m_sources.find(s2).insert(s1); + if (maybecycle) m_sources_maybecycle.find(s2).insert(s1); } - else if (!maybecycle && m_sources_maybecycle.find(s2)->contains(s1)) { + else if (!maybecycle && m_sources_maybecycle.find(s2).contains(s1)) { // update existing edge STRACE("seq_regex_debug", tout << std::endl << " DEBUG: update edge! ";); - m_sources_maybecycle.find(s2)->remove(s1); + m_sources_maybecycle.find(s2).remove(s1); } } void state_graph::remove_edge_core(state s1, state s2) { - SASSERT(m_targets.find(s1)->contains(s2)); - SASSERT(m_sources.find(s2)->contains(s1)); - m_targets.find(s1)->remove(s2); - m_sources.find(s2)->remove(s1); - m_sources_maybecycle.find(s2)->remove(s1); + SASSERT(m_targets.find(s1).contains(s2)); + SASSERT(m_sources.find(s2).contains(s1)); + m_targets.find(s1).remove(s2); + m_sources.find(s2).remove(s1); + m_sources_maybecycle.find(s2).remove(s1); } void state_graph::rename_edge_core(state old1, state old2, state new1, state new2) { - SASSERT(m_targets.find(old1)->contains(old2)); - SASSERT(m_sources.find(old2)->contains(old1)); - bool maybecycle = m_sources_maybecycle.find(old2)->contains(old1); + SASSERT(m_targets.find(old1).contains(old2)); + SASSERT(m_sources.find(old2).contains(old1)); + bool maybecycle = m_sources_maybecycle.find(old2).contains(old1); remove_edge_core(old1, old2); add_edge_core(new1, new2, maybecycle); } @@ -791,10 +791,10 @@ namespace smt { m_state_ufind.merge(s1, s2); if (m_state_ufind.is_root(s2)) std::swap(s1, s2); // rename s2 to s1 in edges - for (auto s_to: *m_targets.find(s2)) { + for (auto s_to: m_targets.find(s2)) { rename_edge_core(s2, s_to, s1, s_to); } - for (auto s_from: *m_sources.find(s2)) { + for (auto s_from: m_sources.find(s2)) { rename_edge_core(s_from, s2, s_from, s1); } remove_state_core(s2); @@ -825,7 +825,7 @@ namespace smt { << std::endl << " DEBUG: mark live recursive: " << s << " ";); if (m_live.contains(s)) return; mark_live_core(s); - for (auto s_from: *m_sources.find(s)) { + for (auto s_from: m_sources.find(s)) { mark_live_recursive(s_from); } } @@ -841,14 +841,14 @@ namespace smt { STRACE("seq_regex_debug", tout << std::endl << " DEBUG: mark dead recursive: " << s << " ";); if (!m_unknown.contains(s)) return; - for (auto s_to: *m_targets.find(s)) { + for (auto s_to: m_targets.find(s)) { // unknown pointing to live should have been marked as live! SASSERT(!m_live.contains(s_to)); if (m_unknown.contains(s_to) || m_unexplored.contains(s_to)) return; } // all states from s are dead mark_dead_core(s); - for (auto s_from: *m_sources.find(s)) { + for (auto s_from: m_sources.find(s)) { mark_dead_recursive(s_from); } } @@ -874,7 +874,7 @@ namespace smt { visited.insert(x); // recurse backwards only on maybecycle edges // and only on unknown states - for (auto y: *m_sources_maybecycle.find(x)) { + for (auto y: m_sources_maybecycle.find(x)) { if (m_unknown.contains(y)) to_search.push_back(y); } @@ -883,7 +883,7 @@ namespace smt { resolved.insert(x); to_search.pop_back(); // determine in SCC or not - for (auto y: *m_sources_maybecycle.find(x)) { + for (auto y: m_sources_maybecycle.find(x)) { if (scc.contains(y)) { scc.insert(x); break; @@ -968,7 +968,7 @@ namespace smt { of << "Edges:" << std::endl; for (auto s1: m_seen) { if (m_state_ufind.is_root(s1)) { - of << " " << s1 << " -> " << *m_targets.find(s1) << std::endl; + of << " " << s1 << " -> " << m_targets.find(s1) << std::endl; } } diff --git a/src/smt/seq_regex.h b/src/smt/seq_regex.h index 39ac16bd2ce..63e6673787e 100644 --- a/src/smt/seq_regex.h +++ b/src/smt/seq_regex.h @@ -18,7 +18,6 @@ Module Name: #include "util/scoped_vector.h" #include "util/uint_set.h" -#include "util/uint_map.h" #include "util/union_find.h" #include "ast/seq_decl_plugin.h" #include "ast/rewriter/seq_rewriter.h" @@ -54,10 +53,10 @@ namespace smt { live and dead SCCs. */ class state_graph { - typedef unsigned state; - typedef uint_set state_set; - typedef uint_map edge_rel; - typedef basic_union_find state_ufind; + typedef unsigned state; + typedef uint_set state_set; + typedef u_map edge_rel; + typedef basic_union_find state_ufind; private: /* From 2a735b76e4893be3a91d2c8e2f359434fb8b066e Mon Sep 17 00:00:00 2001 From: Caleb Stanford Date: Wed, 1 Jul 2020 08:07:43 -0400 Subject: [PATCH 36/51] remove unnecessary call to is_ground --- src/smt/seq_regex.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp index 021e946b059..c62800d36a6 100644 --- a/src/smt/seq_regex.cpp +++ b/src/smt/seq_regex.cpp @@ -210,13 +210,16 @@ namespace smt { << "," << r->get_id() << ") ";); - if (re().is_empty(r) - || m_state_graph.is_dead(get_state_id(r))) { + if (re().is_empty(r)) { th.add_axiom(~lit); return true; } - if (!m.is_ite(r) && is_ground(r)) { + if (!m.is_ite(r)) { update_state_graph(r); + if (m_state_graph.is_dead(get_state_id(r))) { + th.add_axiom(~lit); + return true; + } } if (block_unfolding(lit, idx)) From 448e673e20cff7bfd650ffffd77fbbce5f383317 Mon Sep 17 00:00:00 2001 From: Caleb Stanford Date: Wed, 1 Jul 2020 14:15:48 -0400 Subject: [PATCH 37/51] debugging --- src/ast/rewriter/seq_rewriter.cpp | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp index 99635a54529..c9a43e98a93 100644 --- a/src/ast/rewriter/seq_rewriter.cpp +++ b/src/ast/rewriter/seq_rewriter.cpp @@ -2446,15 +2446,15 @@ expr_ref seq_rewriter::mk_der_concat(expr* r1, expr* r2) { */ bool seq_rewriter::lt_char(expr* ch1, expr* ch2) { unsigned u1, u2; - return (m_util.is_const_char(ch1, u1) && - m_util.is_const_char(ch2, u2) && + return (u().is_const_char(ch1, u1) && + u().is_const_char(ch2, u2) && (u1 < u2)); } bool seq_rewriter::eq_char(expr* ch1, expr* ch2) { unsigned u1, u2; return ((ch1 == ch2) || ( - m_util.is_const_char(ch1, u1) && - m_util.is_const_char(ch2, u2) && + u().is_const_char(ch1, u1) && + u().is_const_char(ch2, u2) && (u1 == u2) )); } @@ -2473,25 +2473,28 @@ bool seq_rewriter::le_char(expr* ch1, expr* ch2) { - a and b are char <= constraints, or negations of char <= constraints */ bool seq_rewriter::pred_implies(expr* a, expr* b) { + STRACE("seq_verbose", tout << "pred_implies: " + << "," << mk_pp(a, m()) + << "," << mk_pp(b, m()) << std::endl;); expr *cha1 = nullptr, *cha2 = nullptr, *nota = nullptr, *chb1 = nullptr, *chb2 = nullptr, *notb = nullptr; if (m().is_not(a, nota) && m().is_not(b, notb)) { return pred_implies(notb, nota); } - else if (m_util.is_char_le(a, cha1, cha2) && - m_util.is_char_le(b, chb1, chb2)) { + else if (u().is_char_le(a, cha1, cha2) && + u().is_char_le(b, chb1, chb2)) { return (le_char(chb1, cha1) && le_char(cha2, chb2)); } - else if (m_util.is_char_le(a, cha1, cha2) && + else if (u().is_char_le(a, cha1, cha2) && m().is_not(b, notb) && - m_util.is_char_le(notb, chb1, chb2)) { + u().is_char_le(notb, chb1, chb2)) { return ((le_char(chb2, cha1) && lt_char(cha2, chb1)) || (lt_char(chb2, cha1) && le_char(cha2, chb1))); } - else if (m_util.is_char_le(b, chb1, chb2) && + else if (u().is_char_le(b, chb1, chb2) && m().is_not(a, nota) && - m_util.is_char_le(nota, cha1, cha2)) { + u().is_char_le(nota, cha1, cha2)) { return (le_char(chb1, cha2) && le_char(cha1, chb2)); } else { @@ -2511,6 +2514,9 @@ bool seq_rewriter::pred_implies(expr* a, expr* b) { - result is in BDD form */ expr_ref seq_rewriter::mk_der_op_rec(decl_kind k, expr* a, expr* b) { + STRACE("seq_verbose", tout << "mk_der_op_rec: " << k + << "," << mk_pp(a, m()) + << "," << mk_pp(b, m()) << std::endl;); // STRACE("seq_regex_brief", tout << ".";); // recursive call expr* ca = nullptr, *a1 = nullptr, *a2 = nullptr; expr* cb = nullptr, *b1 = nullptr, *b2 = nullptr; @@ -2630,6 +2636,8 @@ expr_ref seq_rewriter::mk_der_op(decl_kind k, expr* a, expr* b) { } expr_ref seq_rewriter::mk_der_compl(expr* r) { + STRACE("seq_verbose", tout << "mk_der_compl: " << mk_pp(r, m()) + << std::endl;); // STRACE("seq_regex_brief", tout << ".";); // recursive call expr_ref result(m_op_cache.find(OP_RE_COMPLEMENT, r, nullptr), m()); if (!result) { @@ -4377,6 +4385,9 @@ void seq_rewriter::op_cache::cleanup() { if (m_table.size() >= m_max_cache_size) { m_trail.reset(); m_table.reset(); + STRACE("seq_regex", tout << "Op cache reset!" << std::endl;); + STRACE("seq_regex_brief", tout << " (OP CACHE RESET)";); + // trace_and_reset_cache_counts(); } } From 0fd25e08f333c93cc0accd5d74f4557f48f413cb Mon Sep 17 00:00:00 2001 From: Caleb Stanford Date: Wed, 1 Jul 2020 17:39:37 -0400 Subject: [PATCH 38/51] small improvements to seq_regex_brief tracing --- src/smt/seq_regex.cpp | 108 +++++++++++++++++++++--------------------- src/smt/seq_regex.h | 6 ++- 2 files changed, 59 insertions(+), 55 deletions(-) diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp index c62800d36a6..26b52e2c850 100644 --- a/src/smt/seq_regex.cpp +++ b/src/smt/seq_regex.cpp @@ -122,12 +122,8 @@ namespace smt { VERIFY(str().is_in_re(e, s, r)); TRACE("seq_regex", tout << "propagate in RE: " << lit.sign() << " " << mk_pp(e, m) << std::endl;); - STRACE("seq_regex_brief", - tout << "PIR(" - << s->get_id() - << "," - << r->get_id() - << ") ";); + STRACE("seq_regex_brief", tout << "PIR(" << mk_pp(s, m) << "," + << state_str(r) << ") ";); // convert negative negative membership literals to positive // ~(s in R) => s in C(R) @@ -204,11 +200,9 @@ namespace smt { VERIFY(sk().is_accept(e, s, i, idx, r)); TRACE("seq_regex", tout << "propagate: " << mk_pp(e, m) << std::endl;); - STRACE("seq_regex_brief", - tout << std::endl << "P(" << mk_pp(s, m) - << "," << idx - << "," << r->get_id() - << ") ";); + STRACE("seq_regex_brief", tout << std::endl + << "P(" << mk_pp(s, m) << "@" << idx + << "," << state_str(r) << ") ";); if (re().is_empty(r)) { th.add_axiom(~lit); @@ -295,7 +289,7 @@ namespace smt { d = derivative_wrapper(m.mk_var(0, m.get_sort(head)), r); // timer tm; - // std::cout << d->get_id() << " " << tm.get_seconds() << std::endl; + // std::cout << state_str(d) << " " << tm.get_seconds() << std::endl; //if (tm.get_seconds() > 0.3) // std::cout << d << std::endl; // std::cout.flush(); @@ -440,12 +434,8 @@ namespace smt { rewrite(result); STRACE("seq_regex", tout << "nullable result: " << mk_pp(result, m) << std::endl;); - STRACE("seq_regex_brief", - tout << "n(" - << r->get_id() - << "->" - << result->get_id() - << ") ";); + STRACE("seq_regex_brief", tout << "n(" << state_str(r) << ")=" + << mk_pp(result, m) << " ";); seq_rw().trace_and_reset_cache_counts(); return result; @@ -463,14 +453,8 @@ namespace smt { rewrite(result); STRACE("seq_regex", tout << "derivative result: " << mk_pp(result, m) << std::endl;); - STRACE("seq_regex_brief", - tout << "d(" - << mk_pp(hd, m) - << "," - << r->get_id() - << "->" - << result->get_id() - << ") ";); + STRACE("seq_regex_brief", tout << "d(" << state_str(r) << ")=" + << state_str(result) << " ";); seq_rw().trace_and_reset_cache_counts(); /* If the following lines are enabled instead, we use the @@ -545,9 +529,10 @@ namespace smt { TRACE("seq_regex", tout << "propagate nonempty: " << mk_pp(e, m) << std::endl;); STRACE("seq_regex_brief", - tout << std::endl << "PNE(" << e->get_id() - << "," << r->get_id() - << "," << u->get_id() + tout << std::endl << "PNE(" << expr_id_str(e) + << "," << state_str(r) + << "," << expr_id_str(u) + << "," << expr_id_str(n) << ") ";); expr_ref is_nullable = is_nullable_wrapper(r); @@ -558,8 +543,8 @@ namespace smt { expr_ref d(m); d = derivative_wrapper(hd, r); - STRACE("seq_regex_brief", tout << "(d subbed: " << d->get_id() << ") ";); - TRACE("seq_regex", tout << "d subbed: " << mk_pp(d, m) << std::endl;); + // STRACE("seq_regex_brief", tout << "(d subbed: " << state_str(d) << ") ";); + // TRACE("seq_regex", tout << "d subbed: " << mk_pp(d, m) << std::endl;); literal_vector lits; lits.push_back(~lit); @@ -633,10 +618,10 @@ namespace smt { TRACE("seq_regex", tout << "propagate empty: " << mk_pp(e, m) << std::endl;); STRACE("seq_regex_brief", - tout << std::endl << "PE(" << e->get_id() - << "," << r->get_id() - << "," << u->get_id() - << "," << n->get_id() + tout << std::endl << "PE(" << expr_id_str(e) + << "," << state_str(r) + << "," << expr_id_str(u) + << "," << expr_id_str(n) << ") ";); if (m.is_true(is_nullable)) { @@ -952,32 +937,35 @@ namespace smt { !m_unexplored.contains(m_state_ufind.find(s))); } - void state_graph::pretty_print(std::ofstream& of) { - of << "---------- State Graph ----------" << std::endl; - of << "Seen:"; + /* + Pretty printing + */ + void state_graph::pretty_print(std::ostream& o) { + o << "---------- State Graph ----------" << std::endl + << "Seen:"; for (auto s: m_seen) { - of << " " << s; + o << " " << s; state s_root = m_state_ufind.find(s); if (s_root != s) - of << "(=" << s_root << ")"; + o << "(=" << s_root << ")"; } - of << std::endl; - - of << "Live:" << m_live << std::endl; - of << "Dead:" << m_dead << std::endl; - of << "Unknown:" << m_unknown << std::endl; - of << "Unexplored:" << m_unexplored << std::endl; - - of << "Edges:" << std::endl; + o << std::endl + << "Live:" << m_live << std::endl + << "Dead:" << m_dead << std::endl + << "Unknown:" << m_unknown << std::endl + << "Unexplored:" << m_unexplored << std::endl + << "Edges:" << std::endl; for (auto s1: m_seen) { if (m_state_ufind.is_root(s1)) { - of << " " << s1 << " -> " << m_targets.find(s1) << std::endl; + o << " " << s1 << " -> " << m_targets.find(s1) << std::endl; } } - - of << "---------------------------------" << std::endl; - + o << "---------------------------------" << std::endl; } + // std::ostream& operator<<(std::ostream& o, const state_graph& sg) { + // sg.pretty_print(o); + // return o; + // } // ********************************** @@ -987,6 +975,8 @@ namespace smt { m_state_to_expr.push_back(e); unsigned new_id = m_state_to_expr.size(); m_expr_to_state.insert(e, new_id); + STRACE("seq_regex_brief", tout << "new(" << expr_id_str(e) + << ")=" << state_str(e) << " ";); } return m_expr_to_state.find(e); } @@ -1014,10 +1004,10 @@ namespace smt { } STRACE("seq_regex", tout << "Updating state graph for regex " << mk_pp(r, m) << ") ";); - STRACE("seq_regex_brief", tout - << std::endl << "USG(" << r->get_id() << ") ";); // Add state m_state_graph.add_state(r_id); + STRACE("seq_regex_brief", tout << std::endl << "USG(" + << state_str(r) << ") ";); expr_ref r_nullable = is_nullable_wrapper(r); if (m.is_true(r_nullable)) { m_state_graph.mark_live(r_id); @@ -1043,4 +1033,14 @@ namespace smt { return true; } + std::string seq_regex::state_str(expr* e) { + if (m_expr_to_state.contains(e)) + return std::to_string(get_state_id(e)); + else + return expr_id_str(e); + } + std::string seq_regex::expr_id_str(expr* e) { + return std::string("id") + std::to_string(e->get_id()); + } + } diff --git a/src/smt/seq_regex.h b/src/smt/seq_regex.h index 63e6673787e..494a7842a09 100644 --- a/src/smt/seq_regex.h +++ b/src/smt/seq_regex.h @@ -153,7 +153,7 @@ namespace smt { /* Pretty printing */ - void pretty_print(std::ofstream& of); + void pretty_print(std::ostream& o); }; @@ -204,6 +204,10 @@ namespace smt { // Update the graph bool update_state_graph(expr* r); + // Printing for seq_regex_brief + std::string state_str(expr* e); + std::string expr_id_str(expr* e); + // ******************** seq_util& u(); From 1fc751f5bfa59edef4669af248ae3e27d3da06b7 Mon Sep 17 00:00:00 2001 From: Caleb Stanford Date: Wed, 1 Jul 2020 18:47:25 -0400 Subject: [PATCH 39/51] fix bug on evil2 example --- src/ast/rewriter/seq_rewriter.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp index 272c25f0417..bd399f8bded 100644 --- a/src/ast/rewriter/seq_rewriter.cpp +++ b/src/ast/rewriter/seq_rewriter.cpp @@ -2692,6 +2692,7 @@ expr_ref seq_rewriter::mk_der_cond(expr* cond, expr* ele, sort* seq_sort) { } else { result = m().mk_true(); + result = re_predicate(result, seq_sort); } } else if (m().is_not(cond, c1)) { From 0fa8396f9e90d1638fc763761eef7e519f28d94f Mon Sep 17 00:00:00 2001 From: Caleb Stanford Date: Thu, 2 Jul 2020 14:46:20 -0400 Subject: [PATCH 40/51] save work --- src/smt/seq_regex.cpp | 96 +++++++++++++++++++++++++++++++++++++++---- src/smt/seq_regex.h | 3 +- 2 files changed, 89 insertions(+), 10 deletions(-) diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp index 26b52e2c850..7d5c385e670 100644 --- a/src/smt/seq_regex.cpp +++ b/src/smt/seq_regex.cpp @@ -204,24 +204,90 @@ namespace smt { << "P(" << mk_pp(s, m) << "@" << idx << "," << state_str(r) << ") ";); + expr* cond = nullptr, *tt = nullptr, *el = nullptr; if (re().is_empty(r)) { + STRACE("seq_regex_brief", tout << "f ";); th.add_axiom(~lit); return true; } - if (!m.is_ite(r)) { - update_state_graph(r); - if (m_state_graph.is_dead(get_state_id(r))) { - th.add_axiom(~lit); - return true; - } + else if (m.is_ite(r, cond, tt, el)) { + STRACE("seq_regex_brief", tout << "??? ";); + return false; + + // literal lcond = th.mk_literal(cond); + // ctx.mark_as_relevant(lcond); + // trigger = lcond; + // expr_ref ncond(m), acc1(m), acc2(m), + // choice1(m), choice2(m), choice(m); + // ncond = m.mk_not(cond); + // acc1 = sk().mk_accept(s, a().mk_int(idx), tt); + // acc2 = sk().mk_accept(s, a().mk_int(idx), el); + // choice1 = m.mk_and(cond, acc1); + // choice2 = m.mk_and(ncond, acc2); + // choice = m.mk_or(choice1, choice2); + // th.propagate_lit(nullptr, 1, &lit, th.mk_literal(choice)); + // // th.propagate_lit(th.mk_literal(choice)); + // // literal_vector choice_lit; + // // choice_lit.push_back(th.mk_literal(choice)); + // // th.add_axiom(choice_lit); + // return true; } - if (block_unfolding(lit, idx)) + update_state_graph(r); + + if (m_state_graph.is_dead(get_state_id(r))) { + STRACE("seq_regex_brief", tout << "f ";); + th.add_axiom(~lit); return true; + } + + if (block_unfolding(lit, idx)) { + STRACE("seq_regex_brief", tout << "(blocked) ";); + return true; + } + + // Unfold + STRACE("seq_regex_brief", tout << "u ";); + expr_ref is_nullable = is_nullable_wrapper(r); + expr_ref hd = th.mk_nth(s, i); + expr_ref deriv(m); + deriv = derivative_wrapper(hd, r); - propagate_nullable(lit, s, idx, r); + literal_vector unfold_disj; + unfold_disj.push_back(~lit); + unfold_disj.push_back(th.mk_literal(is_nullable)); + expr_ref_pair_vector cofactors(m); + get_cofactors(deriv, cofactors); + for (auto const& p : cofactors) { + if (m.is_false(p.first) || re().is_empty(p.second)) continue; + expr_ref cond(p.first, m); + expr_ref deriv_leaf(p.second, m); + expr_ref acc = sk().mk_accept(s, a().mk_int(idx + 1), deriv_leaf); + expr_ref choice(m); + choice = m.mk_and(cond, acc); + unfold_disj.push_back(th.mk_literal(choice)); + STRACE("seq_regex_debug", tout << "adding choice: " + << mk_pp(choice, m) << std::endl;); + } + th.add_axiom(unfold_disj); + return true; + + // expr_ref is_nullable(m), head(m), deriv(m), acc_next(m), unfold(m); + // head = th.mk_nth(s, i); + // deriv = derivative_wrapper(head, r); + // th.add_axiom(~lit, ~th.mk_literal(is_nullable)); + // + // acc_next = sk().mk_accept(s, a().mk_int(idx + 1), deriv); + // unfold = m.mk_or(is_nullable, acc_next); + // + // literal_vector unfold_lit; + // unfold_lit.push_back(th.mk_literal(unfold)); + // th.add_axiom(unfold_lit); + // return true; - return propagate_derivative(lit, e, s, i, idx, r, trigger); + // propagate_nullable(lit, s, idx, r); + // + // return propagate_derivative(lit, e, s, i, idx, r, trigger); } /** @@ -251,15 +317,18 @@ namespace smt { literal len_s_ge_i = th.m_ax.mk_ge(th.mk_len(s), idx); if (m.is_true(is_nullable)) { + STRACE("seq_regex_brief", tout << "t ";); th.propagate_lit(nullptr, 1,&lit, len_s_ge_i); } else if (m.is_false(is_nullable)) { + STRACE("seq_regex_brief", tout << "f ";); th.propagate_lit(nullptr, 1, &lit, th.m_ax.mk_ge(th.mk_len(s), idx + 1)); // @EXP (experimental change) //unsigned len = std::max(1u, re().min_length(r)); //th.propagate_lit(nullptr, 1, &lit, th.m_ax.mk_ge(th.mk_len(s), idx + re().min_length(r))); } else { + STRACE("seq_regex_brief", tout << "? ";); literal is_nullable_lit = th.mk_literal(is_nullable); ctx.mark_as_relevant(is_nullable_lit); literal len_s_le_i = th.m_ax.mk_le(th.mk_len(s), idx); @@ -288,6 +357,11 @@ namespace smt { expr_ref head = th.mk_nth(s, i); d = derivative_wrapper(m.mk_var(0, m.get_sort(head)), r); + + // TODO + // conds.push_back(th.mk_literal(sk().mk_accept(s, a().mk_int(idx + 1), d))); + // th.add_axiom(conds); + // timer tm; // std::cout << state_str(d) << " " << tm.get_seconds() << std::endl; //if (tm.get_seconds() > 0.3) @@ -306,14 +380,17 @@ namespace smt { literal lcond = th.mk_literal(subst(cond, sub)); switch (ctx.get_assignment(lcond)) { case l_true: + STRACE("seq_regex_brief", tout << "t ";); conds.push_back(~lcond); d = tt; break; case l_false: + STRACE("seq_regex_brief", tout << "f ";); conds.push_back(lcond); d = el; break; case l_undef: + STRACE("seq_regex_brief", tout << "? ";); #if 1 ctx.mark_as_relevant(lcond); trigger = lcond; @@ -449,6 +526,7 @@ namespace smt { expr_ref seq_regex::derivative_wrapper(expr* hd, expr* r) { STRACE("seq_regex", tout << "derivative(" << mk_pp(hd, m) << "): " << mk_pp(r, m) << std::endl;); + // expr_ref result = seq_rw().mk_derivative(hd, r); expr_ref result = expr_ref(re().mk_derivative(hd, r), m); rewrite(result); diff --git a/src/smt/seq_regex.h b/src/smt/seq_regex.h index 494a7842a09..5b27fd50b7b 100644 --- a/src/smt/seq_regex.h +++ b/src/smt/seq_regex.h @@ -199,7 +199,8 @@ namespace smt { // Convert between expressions and states (IDs) unsigned get_state_id(expr* e); expr* get_expr_from_id(unsigned id); - // Cycle-detection heuristic (sound but not complete) + // Cycle-detection heuristic + // Note: Doesn't need to be sound or complete (doesn't affect soundness) bool can_be_in_cycle(expr* e1, expr* e2); // Update the graph bool update_state_graph(expr* r); From 5623024d3e754842c923a9fab1464d2a11855220 Mon Sep 17 00:00:00 2001 From: Caleb Stanford Date: Thu, 2 Jul 2020 15:57:39 -0400 Subject: [PATCH 41/51] new propagate code --- src/smt/seq_regex.cpp | 39 +++++++++++++++++++++++++++------------ 1 file changed, 27 insertions(+), 12 deletions(-) diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp index 7d5c385e670..1850b3f415e 100644 --- a/src/smt/seq_regex.cpp +++ b/src/smt/seq_regex.cpp @@ -206,14 +206,17 @@ namespace smt { expr* cond = nullptr, *tt = nullptr, *el = nullptr; if (re().is_empty(r)) { - STRACE("seq_regex_brief", tout << "f ";); + STRACE("seq_regex_brief", tout << "(empty) ";); th.add_axiom(~lit); return true; } else if (m.is_ite(r, cond, tt, el)) { - STRACE("seq_regex_brief", tout << "??? ";); + STRACE("seq_regex_brief", tout << "(ite) ";); return false; + // @EXP (Experimental change) + // This code tries to unfold the derivative one step at a time + // and propagate the if the elses. // literal lcond = th.mk_literal(cond); // ctx.mark_as_relevant(lcond); // trigger = lcond; @@ -236,7 +239,7 @@ namespace smt { update_state_graph(r); if (m_state_graph.is_dead(get_state_id(r))) { - STRACE("seq_regex_brief", tout << "f ";); + STRACE("seq_regex_brief", tout << "(dead) ";); th.add_axiom(~lit); return true; } @@ -247,15 +250,25 @@ namespace smt { } // Unfold - STRACE("seq_regex_brief", tout << "u ";); - expr_ref is_nullable = is_nullable_wrapper(r); + STRACE("seq_regex_brief", tout << "(unfold) ";); + + // First axiom: accept(s, idx, r) => len(s) >= idx + literal len_s_ge_i = th.m_ax.mk_ge(th.mk_len(s), idx); + th.add_axiom(~lit, len_s_ge_i); + + // Second axiom: accept(s, idx, r) and len(s) <= idx => r nullable + literal len_s_le_i = th.m_ax.mk_le(th.mk_len(s), idx); + literal is_nullable = th.mk_literal(is_nullable_wrapper(r)); + th.add_axiom(~lit, ~len_s_le_i, is_nullable); + + // Third axiom: accept(s, idx, r) and not (len_s_le_i) => + // accept(s, idx+1, dr) for some derivative r + literal_vector accept_next; expr_ref hd = th.mk_nth(s, i); expr_ref deriv(m); deriv = derivative_wrapper(hd, r); - - literal_vector unfold_disj; - unfold_disj.push_back(~lit); - unfold_disj.push_back(th.mk_literal(is_nullable)); + accept_next.push_back(~lit); + accept_next.push_back(len_s_le_i); expr_ref_pair_vector cofactors(m); get_cofactors(deriv, cofactors); for (auto const& p : cofactors) { @@ -265,13 +278,15 @@ namespace smt { expr_ref acc = sk().mk_accept(s, a().mk_int(idx + 1), deriv_leaf); expr_ref choice(m); choice = m.mk_and(cond, acc); - unfold_disj.push_back(th.mk_literal(choice)); + accept_next.push_back(th.mk_literal(choice)); STRACE("seq_regex_debug", tout << "adding choice: " << mk_pp(choice, m) << std::endl;); } - th.add_axiom(unfold_disj); + th.add_axiom(accept_next); + + // Done (successful propagation) return true; - + // expr_ref is_nullable(m), head(m), deriv(m), acc_next(m), unfold(m); // head = th.mk_nth(s, i); // deriv = derivative_wrapper(head, r); From 17e0ef19bec3d7489e2ecb5b3301a643e0ed5d65 Mon Sep 17 00:00:00 2001 From: Caleb Stanford Date: Thu, 2 Jul 2020 21:55:35 -0400 Subject: [PATCH 42/51] work in progress on using same seq sort for deriv calls --- src/ast/rewriter/seq_rewriter.cpp | 2 +- src/smt/seq_regex.cpp | 14 ++++++++++++++ src/smt/seq_regex.h | 7 +++++++ 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp index bd399f8bded..1a94a947a45 100644 --- a/src/ast/rewriter/seq_rewriter.cpp +++ b/src/ast/rewriter/seq_rewriter.cpp @@ -4392,7 +4392,7 @@ void seq_rewriter::op_cache::cleanup() { m_trail.reset(); m_table.reset(); STRACE("seq_regex", tout << "Op cache reset!" << std::endl;); - STRACE("seq_regex_brief", tout << " (OP CACHE RESET)";); + STRACE("seq_regex_brief", tout << "(OP CACHE RESET) ";); // trace_and_reset_cache_counts(); } } diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp index 1850b3f415e..f389b5e97f2 100644 --- a/src/smt/seq_regex.cpp +++ b/src/smt/seq_regex.cpp @@ -25,6 +25,7 @@ namespace smt { th(th), ctx(th.get_context()), m(th.get_manager()), + m_deriv_head(), m_state_graph(), m_expr_to_state(), m_state_to_expr(m) @@ -748,6 +749,19 @@ namespace smt { } } + expr_ref get_head_var(sort* seq_sort) { + expr_ref result(m); + if (m_deriv_head.contains(seq_sort)) { + result = m_deriv_head.find(seq_sort); + STRACE("seq_regex_brief", tout << " ghv=" << mk_pp(result, m);); + } + else { + result = m.mk_fresh_const("re.char", seq_sort); + STRACE("seq_regex_brief", tout << " NEWghv=" << mk_pp(result, m);); + } + return result; + } + expr_ref seq_regex::mk_first(expr* r, expr* n) { sort* elem_sort = nullptr, *seq_sort = nullptr; VERIFY(u().is_re(r, seq_sort)); diff --git a/src/smt/seq_regex.h b/src/smt/seq_regex.h index 5b27fd50b7b..bf86e4a6e52 100644 --- a/src/smt/seq_regex.h +++ b/src/smt/seq_regex.h @@ -189,6 +189,13 @@ namespace smt { vector m_s_in_re; scoped_vector m_to_propagate; + /* + ID for fresh variable for derivative + */ + ptr_addr_map m_deriv_head; + // Get var for head based on sort + expr_ref get_head_var(sort* seq_sort); + /* state_graph for dead state detection, and associated methods */ From 6cb1ef95eafc1d2ea428a85e856df2ee8a40bc28 Mon Sep 17 00:00:00 2001 From: Caleb Stanford Date: Fri, 3 Jul 2020 08:17:38 -0400 Subject: [PATCH 43/51] avoid re-computing derivatives: use same head var for every derivative call --- src/smt/seq_regex.cpp | 49 +++++++++++++++++++++++++++---------------- src/smt/seq_regex.h | 7 ------- 2 files changed, 31 insertions(+), 25 deletions(-) diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp index f389b5e97f2..8cd56672dae 100644 --- a/src/smt/seq_regex.cpp +++ b/src/smt/seq_regex.cpp @@ -25,7 +25,6 @@ namespace smt { th(th), ctx(th.get_context()), m(th.get_manager()), - m_deriv_head(), m_state_graph(), m_expr_to_state(), m_state_to_expr(m) @@ -212,12 +211,13 @@ namespace smt { return true; } else if (m.is_ite(r, cond, tt, el)) { + UNREACHABLE(); STRACE("seq_regex_brief", tout << "(ite) ";); return false; // @EXP (Experimental change) // This code tries to unfold the derivative one step at a time - // and propagate the if the elses. + // and propagate the if-then-elses. // literal lcond = th.mk_literal(cond); // ctx.mark_as_relevant(lcond); // trigger = lcond; @@ -250,7 +250,7 @@ namespace smt { return true; } - // Unfold + // Unfold the constraint into 3 axioms STRACE("seq_regex_brief", tout << "(unfold) ";); // First axiom: accept(s, idx, r) => len(s) >= idx @@ -285,7 +285,7 @@ namespace smt { } th.add_axiom(accept_next); - // Done (successful propagation) + // Propagated successfully return true; // expr_ref is_nullable(m), head(m), deriv(m), acc_next(m), unfold(m); @@ -542,10 +542,20 @@ namespace smt { expr_ref seq_regex::derivative_wrapper(expr* hd, expr* r) { STRACE("seq_regex", tout << "derivative(" << mk_pp(hd, m) << "): " << mk_pp(r, m) << std::endl;); - // expr_ref result = seq_rw().mk_derivative(hd, r); - expr_ref result = expr_ref(re().mk_derivative(hd, r), m); + // Use canonical variable for head; substitute with hd later + // sort* seq_sort = nullptr; + // VERIFY(u().is_re(r, seq_sort)); + // expr_ref hd_canon = get_head_var(sq_sort); + expr_ref hd_canon(m.mk_var(0, m.get_sort(hd)), m); + expr_ref result(re().mk_derivative(hd_canon, r), m); rewrite(result); + // Substitute + var_subst subst(m); + expr_ref_vector sub(m); + sub.push_back(hd); + result = subst(result, sub); + STRACE("seq_regex", tout << "derivative result: " << mk_pp(result, m) << std::endl;); STRACE("seq_regex_brief", tout << "d(" << state_str(r) << ")=" << state_str(result) << " ";); @@ -749,18 +759,21 @@ namespace smt { } } - expr_ref get_head_var(sort* seq_sort) { - expr_ref result(m); - if (m_deriv_head.contains(seq_sort)) { - result = m_deriv_head.find(seq_sort); - STRACE("seq_regex_brief", tout << " ghv=" << mk_pp(result, m);); - } - else { - result = m.mk_fresh_const("re.char", seq_sort); - STRACE("seq_regex_brief", tout << " NEWghv=" << mk_pp(result, m);); - } - return result; - } + // @EXP: Experimental change + // Some code to compute a canonical head variable, but I think + // this stuff is unnecessary. + // expr_ref seq_regex::get_head_var(sort* seq_sort) { + // expr_ref result(m); + // if (m_deriv_head.contains(seq_sort)) { + // result = m_deriv_head.find(seq_sort); + // STRACE("seq_regex_brief", tout << " ghv=" << mk_pp(result, m);); + // } + // else { + // result = m.mk_fresh_const("re.char", seq_sort); + // STRACE("seq_regex_brief", tout << " NEWghv=" << mk_pp(result, m);); + // } + // return result; + // } expr_ref seq_regex::mk_first(expr* r, expr* n) { sort* elem_sort = nullptr, *seq_sort = nullptr; diff --git a/src/smt/seq_regex.h b/src/smt/seq_regex.h index bf86e4a6e52..5b27fd50b7b 100644 --- a/src/smt/seq_regex.h +++ b/src/smt/seq_regex.h @@ -189,13 +189,6 @@ namespace smt { vector m_s_in_re; scoped_vector m_to_propagate; - /* - ID for fresh variable for derivative - */ - ptr_addr_map m_deriv_head; - // Get var for head based on sort - expr_ref get_head_var(sort* seq_sort); - /* state_graph for dead state detection, and associated methods */ From becbdbaaa71fca8a0176d7dcb00df535ae048388 Mon Sep 17 00:00:00 2001 From: Caleb Stanford Date: Fri, 3 Jul 2020 11:49:06 -0400 Subject: [PATCH 44/51] use min_length on regexes to prune search --- src/smt/seq_regex.cpp | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp index 8cd56672dae..4f9068aec77 100644 --- a/src/smt/seq_regex.cpp +++ b/src/smt/seq_regex.cpp @@ -253,9 +253,16 @@ namespace smt { // Unfold the constraint into 3 axioms STRACE("seq_regex_brief", tout << "(unfold) ";); - // First axiom: accept(s, idx, r) => len(s) >= idx - literal len_s_ge_i = th.m_ax.mk_ge(th.mk_len(s), idx); - th.add_axiom(~lit, len_s_ge_i); + // @EXP: First axiom: accept(s, idx, r) => len(s) >= idx + min_len(r); + expr_ref s_to_re(re().mk_to_re(s), m); + expr_ref s_plus_r(re().mk_concat(s_to_re, r), m); + unsigned min_len = re().min_length(s_plus_r); + literal len_s_ge_min = th.m_ax.mk_ge(th.mk_len(s), min_len); + th.add_axiom(~lit, len_s_ge_min); + + // // First axiom: accept(s, idx, r) => len(s) >= idx + // literal len_s_ge_i = th.m_ax.mk_ge(th.mk_len(s), idx); + // th.add_axiom(~lit, len_s_ge_i); // Second axiom: accept(s, idx, r) and len(s) <= idx => r nullable literal len_s_le_i = th.m_ax.mk_le(th.mk_len(s), idx); @@ -277,10 +284,17 @@ namespace smt { expr_ref cond(p.first, m); expr_ref deriv_leaf(p.second, m); expr_ref acc = sk().mk_accept(s, a().mk_int(idx + 1), deriv_leaf); - expr_ref choice(m); - choice = m.mk_and(cond, acc); - accept_next.push_back(th.mk_literal(choice)); - STRACE("seq_regex_debug", tout << "adding choice: " + expr_ref choice(m.mk_and(cond, acc), m); + literal choice_lit = th.mk_literal(choice); + accept_next.push_back(choice_lit); + // Prioritize unvisited states + // if (!m_state_graph.is_done(get_state_id(deriv_leaf))) { + // // @EXP Unsound test: only push if not done + // accept_next.push_back(choice_lit); + // // @EXP This didn't work -- just marking as relevant + // // ctx.mark_as_relevant(choice_lit); + // } + STRACE("seq_regex_debug", tout << "added choice: " << mk_pp(choice, m) << std::endl;); } th.add_axiom(accept_next); From a1da9ae6c0b932dce1d53dbf76590fdbe00664d7 Mon Sep 17 00:00:00 2001 From: Caleb Stanford Date: Fri, 3 Jul 2020 12:54:03 -0400 Subject: [PATCH 45/51] simple implementation of can_be_in_cycle using rank function idea --- src/smt/seq_regex.cpp | 46 ++++++++++++++++++++++++++++++++++++++++--- src/smt/seq_regex.h | 4 +++- 2 files changed, 46 insertions(+), 4 deletions(-) diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp index 4f9068aec77..7b7e405af0b 100644 --- a/src/smt/seq_regex.cpp +++ b/src/smt/seq_regex.cpp @@ -1120,9 +1120,49 @@ namespace smt { return m_state_to_expr.get(id); } - bool seq_regex::can_be_in_cycle(expr *e1, expr *e2) { - // Simple placeholder. TODO: Implement full check - return true; + + unsigned seq_regex::concat_length(expr* r) { + // length of the concatenations at the top level + expr *r1 = nullptr, *r2 = nullptr; + if (re().is_concat(r, r1, r2)) + return concat_length(r1) + concat_length(r2); + else + return 1; + } + + unsigned seq_regex::re_rank(expr* r) { + SASSERT(u.is_re(r)); + expr *r1 = nullptr, *r2 = nullptr, *s = nullptr; + unsigned lo = 0, hi = 0; + if (re().is_empty(r)) + return 0; + if (re().is_concat(r, r1, r2)) + return std::max(re_rank(r1) + concat_length(r2), re_rank(r2)); + if (re().is_union(r, r1, r2) || m.is_ite(r, s, r1, r2)) + return std::max(re_rank(r1), re_rank(r2)); + if (re().is_intersection(r, r1, r2) || re().is_diff(r, r1, r2)) + return re_rank(r1) + re_rank(r2); + if (re().is_plus(r, r1) || re().is_star(r, r1)) + return re_rank(r1) + 1; + if (re().is_loop(r, r1, lo) || re().is_loop(r, r1, lo, hi)) + return re_rank(r1) + lo; + if (re().is_reverse(r, r1) || re().is_opt(r, r1)) + // in reverse case, should be r1 is a string + return re_rank(r1); + if (re().is_to_re(r, s)) + return u().str.min_length(s); + // Else: range, pred, char, full_seq, derivative + return 1; + } + + bool seq_regex::can_be_in_cycle(expr *r1, expr *r2) { + // @EXP (experimental change): Use a "rank" function, which is + // a pseudo-topological order on the state graph, to detect when r2 + // is a simpler regex than r1 + unsigned k1 = re_rank(r1); + unsigned k2 = re_rank(r2); + SASSERT(k1 >= k2); + return (k1 == k2); } /* diff --git a/src/smt/seq_regex.h b/src/smt/seq_regex.h index 5b27fd50b7b..e6b7afe41f0 100644 --- a/src/smt/seq_regex.h +++ b/src/smt/seq_regex.h @@ -201,7 +201,9 @@ namespace smt { expr* get_expr_from_id(unsigned id); // Cycle-detection heuristic // Note: Doesn't need to be sound or complete (doesn't affect soundness) - bool can_be_in_cycle(expr* e1, expr* e2); + unsigned concat_length(expr* r); + unsigned re_rank(expr* r); + bool can_be_in_cycle(expr* r1, expr* r2); // Update the graph bool update_state_graph(expr* r); From 20000126b5f5f7e185bd87b052bc59f911749240 Mon Sep 17 00:00:00 2001 From: Caleb Stanford Date: Fri, 3 Jul 2020 13:55:23 -0400 Subject: [PATCH 46/51] add a disabled experimental change --- src/smt/seq_regex.cpp | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp index 7b7e405af0b..93005fdfe1b 100644 --- a/src/smt/seq_regex.cpp +++ b/src/smt/seq_regex.cpp @@ -250,27 +250,30 @@ namespace smt { return true; } - // Unfold the constraint into 3 axioms STRACE("seq_regex_brief", tout << "(unfold) ";); - // @EXP: First axiom: accept(s, idx, r) => len(s) >= idx + min_len(r); + // First axiom: use min_length to prune search + // accept(s, idx, r) => len(s) >= idx + min_len(r) expr_ref s_to_re(re().mk_to_re(s), m); expr_ref s_plus_r(re().mk_concat(s_to_re, r), m); unsigned min_len = re().min_length(s_plus_r); literal len_s_ge_min = th.m_ax.mk_ge(th.mk_len(s), min_len); th.add_axiom(~lit, len_s_ge_min); - // // First axiom: accept(s, idx, r) => len(s) >= idx + // Old first axiom: accept(s, idx, r) => len(s) >= idx // literal len_s_ge_i = th.m_ax.mk_ge(th.mk_len(s), idx); // th.add_axiom(~lit, len_s_ge_i); - // Second axiom: accept(s, idx, r) and len(s) <= idx => r nullable + // Second axiom: nullable check + // accept(s, idx, r) and len(s) <= idx => r nullable literal len_s_le_i = th.m_ax.mk_le(th.mk_len(s), idx); literal is_nullable = th.mk_literal(is_nullable_wrapper(r)); th.add_axiom(~lit, ~len_s_le_i, is_nullable); - // Third axiom: accept(s, idx, r) and not (len_s_le_i) => - // accept(s, idx+1, dr) for some derivative r + // Third axiom: derivative unfolding + // accept(s, idx, r) and not (len_s_le_i) => + // OR_(cond, dr) cond and accept(s, idx+1, dr) + // over all derivatives dr and conditions cond on the head literal_vector accept_next; expr_ref hd = th.mk_nth(s, i); expr_ref deriv(m); @@ -283,6 +286,14 @@ namespace smt { if (m.is_false(p.first) || re().is_empty(p.second)) continue; expr_ref cond(p.first, m); expr_ref deriv_leaf(p.second, m); + + // @EXP (Experimental change) + // Skip searching when can_be_in_cycle returns true + // Result: Besides being unsound as written, this is not + // fine-grained enough. In case of intersections, many + // edges return true for can_be_in_cycle + // if (can_be_in_cycle(deriv, deriv_leaf)) continue; + expr_ref acc = sk().mk_accept(s, a().mk_int(idx + 1), deriv_leaf); expr_ref choice(m.mk_and(cond, acc), m); literal choice_lit = th.mk_literal(choice); @@ -1131,7 +1142,7 @@ namespace smt { } unsigned seq_regex::re_rank(expr* r) { - SASSERT(u.is_re(r)); + SASSERT(u().is_re(r)); expr *r1 = nullptr, *r2 = nullptr, *s = nullptr; unsigned lo = 0, hi = 0; if (re().is_empty(r)) @@ -1162,6 +1173,7 @@ namespace smt { unsigned k1 = re_rank(r1); unsigned k2 = re_rank(r2); SASSERT(k1 >= k2); + STRACE("seq_regex_brief", tout << "(k:" << k1 << "->" << k2 << ")";); return (k1 == k2); } From 3f55875cc00cb23c2db6d62f4f7003cb861c75db Mon Sep 17 00:00:00 2001 From: Caleb Stanford Date: Fri, 3 Jul 2020 14:46:29 -0400 Subject: [PATCH 47/51] minor cleanup comments, etc. --- src/ast/rewriter/seq_rewriter.cpp | 65 +++++-------------------------- src/smt/seq_regex.cpp | 6 +-- src/smt/seq_regex.h | 5 ++- 3 files changed, 15 insertions(+), 61 deletions(-) diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp index 1a94a947a45..d4d7468d21c 100644 --- a/src/ast/rewriter/seq_rewriter.cpp +++ b/src/ast/rewriter/seq_rewriter.cpp @@ -2192,46 +2192,7 @@ expr_ref seq_rewriter::is_nullable(expr* r) { return result; } -// @EXP (experimental change) -// void seq_rewriter::mk_nullable_not(expr* a1, expr_ref& result) { -// expr *s1 = nullptr, *r1 = nullptr; -// if (str().is_in_re(a1, s1, r1)) { -// SASSERT(str().is_empty(s1)); -// result = re().mk_complement(r1); -// result = re().mk_in_re(s1, result); -// } -// else { -// m_br.mk_not(a1, result); -// } -// } -// void seq_rewriter::mk_nullable_and(expr* a1, expr* a2, expr_ref& result) { -// expr *s1 = nullptr, *s2 = nullptr, *r1 = nullptr, *r2 = nullptr; -// if (str().is_in_re(a1, s1, r1) && -// str().is_in_re(a2, s2, r2)) { -// SASSERT(str().is_empty(s1)); -// SASSERT(str().is_empty(s2)); -// result = re().mk_inter(r1, r2); -// result = re().mk_in_re(s1, result); -// } -// else { -// m_br.mk_and(a1, a2, result); -// } -// } -// void seq_rewriter::mk_nullable_or(expr* a1, expr* a2, expr_ref& result) { -// expr *s1 = nullptr, *s2 = nullptr, *r1 = nullptr, *r2 = nullptr; -// if (str().is_in_re(a1, s1, r1) && -// str().is_in_re(a2, s2, r2)) { -// SASSERT(str().is_empty(s1)); -// SASSERT(str().is_empty(s2)); -// result = re().mk_union(r1, r2); -// result = re().mk_in_re(s1, result); -// } -// else { -// m_br.mk_or(a1, a2, result); -// } -// } expr_ref seq_rewriter::is_nullable_rec(expr* r) { - // STRACE("seq_regex_brief", tout << ".";); // recursive call SASSERT(m_util.is_re(r) || m_util.is_seq(r)); expr* r1 = nullptr, *r2 = nullptr, *cond = nullptr; sort* seq_sort = nullptr; @@ -2239,22 +2200,15 @@ expr_ref seq_rewriter::is_nullable_rec(expr* r) { zstring s1; expr_ref result(m()); if (re().is_concat(r, r1, r2) || - re().is_intersection(r, r1, r2)) { + re().is_intersection(r, r1, r2)) { m_br.mk_and(is_nullable(r1), is_nullable(r2), result); - // @EXP (experimental change) - // mk_nullable_and(is_nullable(r1), is_nullable(r2), result); } else if (re().is_union(r, r1, r2)) { m_br.mk_or(is_nullable(r1), is_nullable(r2), result); - // @EXP (experimental change) - // mk_nullable_or(is_nullable(r1), is_nullable(r2), result); } else if (re().is_diff(r, r1, r2)) { m_br.mk_not(is_nullable(r2), result); m_br.mk_and(result, is_nullable(r1), result); - // @EXP (experimental change) - // mk_nullable_not(is_nullable(r2), result); - // mk_nullable_and(result, is_nullable(r1), result); } else if (re().is_star(r) || re().is_opt(r) || @@ -2277,8 +2231,6 @@ expr_ref seq_rewriter::is_nullable_rec(expr* r) { } else if (re().is_complement(r, r1)) { m_br.mk_not(is_nullable(r1), result); - // @EXP (experimental change) - // mk_nullable_not(is_nullable(r1), result); } else if (re().is_to_re(r, r1)) { result = is_nullable(r1); @@ -2417,7 +2369,6 @@ br_status seq_rewriter::mk_re_derivative(expr* ele, expr* r, expr_ref& result) { expr_ref seq_rewriter::mk_derivative(expr* ele, expr* r) { STRACE("seq_verbose", tout << "derivative: " << mk_pp(ele, m()) << "," << mk_pp(r, m()) << std::endl;); - // STRACE("seq_regex_brief", tout << "d";); expr_ref result(m_op_cache.find(OP_RE_DERIVATIVE, ele, r), m()); if (!result) { result = mk_derivative_rec(ele, r); @@ -2517,7 +2468,6 @@ expr_ref seq_rewriter::mk_der_op_rec(decl_kind k, expr* a, expr* b) { STRACE("seq_verbose", tout << "mk_der_op_rec: " << k << "," << mk_pp(a, m()) << "," << mk_pp(b, m()) << std::endl;); - // STRACE("seq_regex_brief", tout << ".";); // recursive call expr* ca = nullptr, *a1 = nullptr, *a2 = nullptr; expr* cb = nullptr, *b1 = nullptr, *b2 = nullptr; expr_ref result(m()); @@ -2638,7 +2588,6 @@ expr_ref seq_rewriter::mk_der_op(decl_kind k, expr* a, expr* b) { expr_ref seq_rewriter::mk_der_compl(expr* r) { STRACE("seq_verbose", tout << "mk_der_compl: " << mk_pp(r, m()) << std::endl;); - // STRACE("seq_regex_brief", tout << ".";); // recursive call expr_ref result(m_op_cache.find(OP_RE_COMPLEMENT, r, nullptr), m()); if (!result) { expr* c = nullptr, * r1 = nullptr, * r2 = nullptr; @@ -2721,7 +2670,6 @@ expr_ref seq_rewriter::mk_der_cond(expr* cond, expr* ele, sort* seq_sort) { } expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) { - // STRACE("seq_regex_brief", tout << ".";); // recursive call expr_ref result(m()); sort* seq_sort = nullptr, *ele_sort = nullptr; VERIFY(m_util.is_re(r, seq_sort)); @@ -4393,7 +4341,6 @@ void seq_rewriter::op_cache::cleanup() { m_table.reset(); STRACE("seq_regex", tout << "Op cache reset!" << std::endl;); STRACE("seq_regex_brief", tout << "(OP CACHE RESET) ";); - // trace_and_reset_cache_counts(); } } @@ -4401,10 +4348,18 @@ void seq_rewriter::op_cache::cleanup() { unsigned seq_rewriter::op_cache::cache_hits = 0; unsigned seq_rewriter::op_cache::cache_misses = 0; +/* + Reset the tracing counts of # of cache hits and misses, and + report them. + + Suppress reporting in the cases of 0/0 or 1/1 hits. + + Hits and misses are tracked globally using static variables + m_op_cache.cache_hits and m_op_cache.cache_misses. +*/ void seq_rewriter::trace_and_reset_cache_counts() { unsigned hits = m_op_cache.cache_hits; unsigned misses = m_op_cache.cache_misses; - // Suppress tracing of "0/0 hits" or "1/1 hits" if (hits >= 2 || misses >= 1) { STRACE("seq_regex", tout << "Op cache hits: " << hits diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp index 93005fdfe1b..5544879253d 100644 --- a/src/smt/seq_regex.cpp +++ b/src/smt/seq_regex.cpp @@ -25,8 +25,6 @@ namespace smt { th(th), ctx(th.get_context()), m(th.get_manager()), - m_state_graph(), - m_expr_to_state(), m_state_to_expr(m) {} @@ -404,9 +402,9 @@ namespace smt { // th.add_axiom(conds); // timer tm; - // std::cout << state_str(d) << " " << tm.get_seconds() << std::endl; + // std::cout << d->get_id() << " " << tm.get_seconds() << "\n"; //if (tm.get_seconds() > 0.3) - // std::cout << d << std::endl; + // std::cout << d << "\n"; // std::cout.flush(); literal_vector conds; conds.push_back(~lit); diff --git a/src/smt/seq_regex.h b/src/smt/seq_regex.h index e6b7afe41f0..2b861fd1aad 100644 --- a/src/smt/seq_regex.h +++ b/src/smt/seq_regex.h @@ -87,7 +87,7 @@ namespace smt { Edges are saved in both from and to maps. A subset of edges are also marked as possibly being part of a cycle by being stored in m_sources_maybecycle. - + Invariants: - TODO */ @@ -136,7 +136,8 @@ namespace smt { - outgoing edges are not added from a done state - a done state is not marked as live - edges are not added creating a cycle containing an edge with - maybecycle = false + maybecycle = false (this is not necessary for soundness, but + prevents completeness for successfully detecting dead states) */ void add_state(state s); void add_edge(state s1, state s2, bool maybecycle); From 0c33f03b28b28d4667fbf70f2876579588e048ea Mon Sep 17 00:00:00 2001 From: Caleb Stanford Date: Mon, 27 Jul 2020 18:57:27 -0400 Subject: [PATCH 48/51] seq_rewriter cleanup for PR --- src/ast/rewriter/seq_rewriter.cpp | 79 +++++++------------------------ src/ast/rewriter/seq_rewriter.h | 5 -- 2 files changed, 18 insertions(+), 66 deletions(-) diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp index 9d1ea3cb1d0..74286b3b383 100644 --- a/src/ast/rewriter/seq_rewriter.cpp +++ b/src/ast/rewriter/seq_rewriter.cpp @@ -2205,7 +2205,7 @@ expr_ref seq_rewriter::is_nullable(expr* r) { expr_ref result(m_op_cache.find(_OP_RE_IS_NULLABLE, r, nullptr), m()); if (!result) { result = is_nullable_rec(r); - m_op_cache.insert(_OP_RE_IS_NULLABLE, r, nullptr, result); + m_op_cache.insert(_OP_RE_IS_NULLABLE, r, nullptr, result); } STRACE("seq_verbose", tout << "is_nullable result: " << mk_pp(result, m()) << std::endl;); @@ -2417,20 +2417,14 @@ expr_ref seq_rewriter::mk_der_concat(expr* r1, expr* r2) { */ bool seq_rewriter::lt_char(expr* ch1, expr* ch2) { unsigned u1, u2; - return (u().is_const_char(ch1, u1) && - u().is_const_char(ch2, u2) && - (u1 < u2)); + return u().is_const_char(ch1, u1) && + u().is_const_char(ch2, u2) && (u1 < u2); } bool seq_rewriter::eq_char(expr* ch1, expr* ch2) { - unsigned u1, u2; - return ((ch1 == ch2) || ( - u().is_const_char(ch1, u1) && - u().is_const_char(ch2, u2) && - (u1 == u2) - )); + return ch1 == ch2; } bool seq_rewriter::le_char(expr* ch1, expr* ch2) { - return (eq_char(ch1, ch2) || lt_char(ch1, ch2)); + return eq_char(ch1, ch2) || lt_char(ch1, ch2); } /* @@ -2441,7 +2435,7 @@ bool seq_rewriter::le_char(expr* ch1, expr* ch2) { Return true if we deduce that a implies b, false if unknown. Current cases handled: - - a and b are char <= constraints, or negations of char <= constraints + - a and b are char <= constraints, or negations of char <= constraints */ bool seq_rewriter::pred_implies(expr* a, expr* b) { STRACE("seq_verbose", tout << "pred_implies: " @@ -2455,22 +2449,20 @@ bool seq_rewriter::pred_implies(expr* a, expr* b) { } else if (u().is_char_le(a, cha1, cha2) && u().is_char_le(b, chb1, chb2)) { - return (le_char(chb1, cha1) && le_char(cha2, chb2)); + return le_char(chb1, cha1) && le_char(cha2, chb2); } else if (u().is_char_le(a, cha1, cha2) && m().is_not(b, notb) && u().is_char_le(notb, chb1, chb2)) { - return ((le_char(chb2, cha1) && lt_char(cha2, chb1)) || - (lt_char(chb2, cha1) && le_char(cha2, chb1))); + return (le_char(chb2, cha1) && lt_char(cha2, chb1)) || + (lt_char(chb2, cha1) && le_char(cha2, chb1)); } else if (u().is_char_le(b, chb1, chb2) && m().is_not(a, nota) && u().is_char_le(nota, cha1, cha2)) { - return (le_char(chb1, cha2) && le_char(cha1, chb2)); - } - else { - return false; + return le_char(chb1, cha2) && le_char(cha1, chb2); } + return false; } /* @@ -2491,16 +2483,17 @@ expr_ref seq_rewriter::mk_der_op_rec(decl_kind k, expr* a, expr* b) { expr* ca = nullptr, *a1 = nullptr, *a2 = nullptr; expr* cb = nullptr, *b1 = nullptr, *b2 = nullptr; expr_ref result(m()); + // Simplify if-then-elses whenever possible auto mk_ite = [&](expr* c, expr* a, expr* b) { return (a == b) ? a : m().mk_ite(c, a, b); }; - // @EXP (experimental change) // Use character code to order conditions auto get_id = [&](expr* e) { expr *ch1 = nullptr, *ch2 = nullptr; unsigned ch; if (u().is_char_le(e, ch1, ch2) && u().is_const_char(ch2, ch)) return ch; + // Fallback: use expression ID (but use same ID for complement) re().is_complement(e, e); return e->get_id(); }; @@ -2524,7 +2517,6 @@ expr_ref seq_rewriter::mk_der_op_rec(decl_kind k, expr* a, expr* b) { std::swap(a1, b1); std::swap(a2, b2); } - // @EXP (experimental change) // Simplify if there is a relationship between ca and cb if (pred_implies(ca, cb)) { r1 = mk_der_op(k, a1, b1); @@ -2622,10 +2614,10 @@ expr_ref seq_rewriter::mk_der_compl(expr* r) { } /* - Make an re_predicate with condition cond, enforcing derivative - normal form on how conditions are written. + Make an re_predicate with an arbitrary condition cond, enforcing + derivative normal form on how conditions are written. - Rewrites everything to (ele <= x) constraints: + Tries to rewrites everything to (ele <= x) constraints: (ele = a) => ite(ele <= a-1, none, ite(ele <= a, epsilon, none)) (a = ele) => " (a <= ele) => ite(ele <= a-1, none, epsilon) @@ -2665,18 +2657,15 @@ expr_ref seq_rewriter::mk_der_cond(expr* cond, expr* ele, sort* seq_sort) { } } else if (m().is_not(cond, c1)) { - UNREACHABLE(); result = mk_der_cond(c1, ele, seq_sort); result = mk_der_compl(result); } else if (m().is_and(cond, c1, c2)) { - UNREACHABLE(); r1 = mk_der_cond(c1, ele, seq_sort); r2 = mk_der_cond(c2, ele, seq_sort); result = mk_der_inter(r1, r2); } else if (m().is_or(cond, c1, c2)) { - UNREACHABLE(); r1 = mk_der_cond(c1, ele, seq_sort); r2 = mk_der_cond(c2, ele, seq_sort); result = mk_der_union(r1, r2); @@ -2763,16 +2752,6 @@ expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) { expr_ref hd(m()), tl(m()); if (get_head_tail(r1, hd, tl)) { // head must be equal; if so, derivative is tail - // result = re().mk_to_re(tl); - // return re_and(m_br.mk_eq_rw(ele, hd), result); - // @EXP (experimental change) - // Write 'head is equal' as a range constraint: - // (ele <= hd) and (hd <= ele) - // return mk_der_inter( - // re_and(m_util.mk_le(ele, hd), re().mk_to_re(tl)), - // re_and(m_util.mk_le(hd, ele), re().mk_to_re(tl)) - // ); - // @EXP (experimental change) // Use mk_der_cond to normalize STRACE("seq_verbose", tout << "deriv to_re" << std::endl;); result = m().mk_eq(ele, hd); @@ -2803,15 +2782,6 @@ expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) { // This is analagous to the previous is_to_re case. expr_ref hd(m()), tl(m()); if (get_head_tail_reversed(r2, hd, tl)) { - // return re_and(m_br.mk_eq_rw(ele, tl), re().mk_reverse(re().mk_to_re(hd))); - // @EXP (experimental change) - // Write 'tail is equal' as a range constraint: - // (ele <= tl) and (tl <= ele) - // return mk_der_inter( - // re_and(m_util.mk_le(ele, tl), re().mk_reverse(re().mk_to_re(hd))), - // re_and(m_util.mk_le(tl, ele), re().mk_reverse(re().mk_to_re(hd))) - // ); - // @EXP (experimental change) // Use mk_der_cond to normalize STRACE("seq_verbose", tout << "deriv reverse to_re" << std::endl;); result = m().mk_eq(ele, tl); @@ -2830,9 +2800,6 @@ expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) { if (s1.length() == 1 && s2.length() == 1) { expr_ref ch1(m_util.mk_char(s1[0]), m()); expr_ref ch2(m_util.mk_char(s2[0]), m()); - // return mk_der_inter(re_predicate(m_util.mk_le(ch1, ele), seq_sort), - // re_predicate(m_util.mk_le(ele, ch2), seq_sort)); - // @EXP (experimental change) // Use mk_der_cond to normalize STRACE("seq_verbose", tout << "deriv range zstring" << std::endl;); expr_ref p1(u().mk_le(ch1, ele), m()); @@ -2848,9 +2815,6 @@ expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) { } expr* e1 = nullptr, *e2 = nullptr; if (str().is_unit(r1, e1) && str().is_unit(r2, e2)) { - // return mk_der_inter(re_predicate(m_util.mk_le(e1, ele), seq_sort), - // re_predicate(m_util.mk_le(ele, e2), seq_sort)); - // @EXP (experimental change) // Use mk_der_cond to normalize STRACE("seq_verbose", tout << "deriv range str" << std::endl;); expr_ref p1(u().mk_le(e1, ele), m()); @@ -2868,10 +2832,7 @@ expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) { array_util array(m()); expr* args[2] = { p, ele }; result = array.mk_select(2, args); - // return re_predicate(result, seq_sort); - // @EXP (experimental change) // Use mk_der_cond to normalize - // (It's a no-op in this case, however) STRACE("seq_verbose", tout << "deriv of_pred" << std::endl;); return mk_der_cond(result, ele, seq_sort); } @@ -3063,14 +3024,10 @@ br_status seq_rewriter::mk_str_in_regexp(expr* a, expr* b, expr_ref& result) { } if (str().is_empty(a)) { result = is_nullable(b); - if (str().is_in_re(result)) { - // STRACE("seq_regex_brief", tout << "mk_str_in_regexp: ...BR_DONE" << std::endl;); + if (str().is_in_re(result)) return BR_DONE; - } - else { - // STRACE("seq_regex_brief", tout << "mk_str_in_regexp: ...BR_REWRITE_FULL" << std::endl;); + else return BR_REWRITE_FULL; - } } expr_ref hd(m()), tl(m()); diff --git a/src/ast/rewriter/seq_rewriter.h b/src/ast/rewriter/seq_rewriter.h index 106fb79f5aa..c36cd399965 100644 --- a/src/ast/rewriter/seq_rewriter.h +++ b/src/ast/rewriter/seq_rewriter.h @@ -140,7 +140,6 @@ class seq_rewriter { unsigned m_max_cache_size { 10000 }; expr_ref_vector m_trail; op_table m_table; - void cleanup(); public: @@ -189,10 +188,6 @@ class seq_rewriter { // Calculate derivative, memoized and enforcing a normal form expr_ref is_nullable_rec(expr* r); - // @EXP (experimental change) - // void mk_nullable_not(expr* a1, expr_ref& result); - // void mk_nullable_and(expr* a1, expr* a2, expr_ref& result); - // void mk_nullable_or(expr* a1, expr* a2, expr_ref& result); expr_ref mk_derivative_rec(expr* ele, expr* r); expr_ref mk_der_op(decl_kind k, expr* a, expr* b); expr_ref mk_der_op_rec(decl_kind k, expr* a, expr* b); From 47661925f18feed6ccd15c63e0fb7e6996456a64 Mon Sep 17 00:00:00 2001 From: Caleb Stanford Date: Mon, 27 Jul 2020 19:27:06 -0400 Subject: [PATCH 49/51] remove cache hit/miss counts tracing --- src/ast/rewriter/seq_rewriter.cpp | 36 ------------------------------- src/ast/rewriter/seq_rewriter.h | 6 ------ 2 files changed, 42 deletions(-) diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp index 74286b3b383..ec9984264b8 100644 --- a/src/ast/rewriter/seq_rewriter.cpp +++ b/src/ast/rewriter/seq_rewriter.cpp @@ -4296,10 +4296,6 @@ expr* seq_rewriter::op_cache::find(decl_kind op, expr* a, expr* b) { op_entry e(op, a, b, nullptr); m_table.find(e, e); - #ifdef _TRACE - (e.r) ? (cache_hits++) : (cache_misses++) ; - #endif - return e.r; } @@ -4319,35 +4315,3 @@ void seq_rewriter::op_cache::cleanup() { STRACE("seq_regex_brief", tout << "(OP CACHE RESET) ";); } } - -#ifdef _TRACE -unsigned seq_rewriter::op_cache::cache_hits = 0; -unsigned seq_rewriter::op_cache::cache_misses = 0; - -/* - Reset the tracing counts of # of cache hits and misses, and - report them. - - Suppress reporting in the cases of 0/0 or 1/1 hits. - - Hits and misses are tracked globally using static variables - m_op_cache.cache_hits and m_op_cache.cache_misses. -*/ -void seq_rewriter::trace_and_reset_cache_counts() { - unsigned hits = m_op_cache.cache_hits; - unsigned misses = m_op_cache.cache_misses; - if (hits >= 2 || misses >= 1) { - STRACE("seq_regex", - tout << "Op cache hits: " << hits - << " (out of " << (hits + misses) - << ")" << std::endl; - ); - STRACE("seq_regex_brief", - tout << "(" << hits << "/" << (hits + misses) - << " hits) "; - ); - } - m_op_cache.cache_hits = 0; - m_op_cache.cache_misses = 0; -} -#endif diff --git a/src/ast/rewriter/seq_rewriter.h b/src/ast/rewriter/seq_rewriter.h index c36cd399965..19091a6f24b 100644 --- a/src/ast/rewriter/seq_rewriter.h +++ b/src/ast/rewriter/seq_rewriter.h @@ -345,11 +345,5 @@ class seq_rewriter { // heuristic elimination of element from condition that comes form a derivative. // special case optimization for conjunctions of equalities, disequalities and ranges. void elim_condition(expr* elem, expr_ref& cond); - - #ifdef _TRACE - void trace_and_reset_cache_counts(); - #else - static inline void trace_and_reset_cache_counts() {} - #endif }; From 8761356680ba6e3e9f7124622f600836384bd370 Mon Sep 17 00:00:00 2001 From: Caleb Stanford Date: Mon, 27 Jul 2020 19:33:58 -0400 Subject: [PATCH 50/51] remove changes not in the rewriter --- src/ast/seq_decl_plugin.cpp | 38 +- src/smt/seq_regex.cpp | 737 ++---------------------------------- src/smt/seq_regex.h | 178 +-------- src/smt/theory_seq.cpp | 1 - 4 files changed, 53 insertions(+), 901 deletions(-) diff --git a/src/ast/seq_decl_plugin.cpp b/src/ast/seq_decl_plugin.cpp index 3040ee8999a..9764ba18c27 100644 --- a/src/ast/seq_decl_plugin.cpp +++ b/src/ast/seq_decl_plugin.cpp @@ -1316,21 +1316,22 @@ unsigned seq_util::re::min_length(expr* r) const { unsigned lo = 0, hi = 0; if (is_empty(r)) return UINT_MAX; - if (is_concat(r, r1, r2)) + if (is_concat(r, r1, r2)) return u.max_plus(min_length(r1), min_length(r2)); - if (is_union(r, r1, r2) || m.is_ite(r, s, r1, r2)) + if (m.is_ite(r, s, r1, r2)) return std::min(min_length(r1), min_length(r2)); - if (is_intersection(r, r1, r2)) - return std::max(min_length(r1), min_length(r2)); - if (is_diff(r, r1, r2) || is_reverse(r, r1) || is_plus(r, r1)) + if (is_diff(r, r1, r2)) return min_length(r1); - if (is_loop(r, r1, lo) || is_loop(r, r1, lo, hi)) + if (is_union(r, r1, r2)) + return std::min(min_length(r1), min_length(r2)); + if (is_intersection(r, r1, r2)) + return std::max(min_length(r1), min_length(r2)); + if (is_loop(r, r1, lo, hi)) return u.max_mul(lo, min_length(r1)); - if (is_to_re(r, s)) - return u.str.min_length(s); - if (is_range(r) || is_of_pred(r) || is_full_char(r)) + if (is_range(r)) return 1; - // Else: star, option, complement, full_seq, derivative + if (is_to_re(r, s)) + return u.str.min_length(s); return 0; } @@ -1340,21 +1341,20 @@ unsigned seq_util::re::max_length(expr* r) const { unsigned lo = 0, hi = 0; if (is_empty(r)) return 0; - if (is_concat(r, r1, r2)) + if (is_concat(r, r1, r2)) return u.max_plus(max_length(r1), max_length(r2)); - if (is_union(r, r1, r2) || m.is_ite(r, s, r1, r2)) + if (m.is_ite(r, s, r1, r2)) return std::max(max_length(r1), max_length(r2)); - if (is_intersection(r, r1, r2)) - return std::min(max_length(r1), max_length(r2)); - if (is_diff(r, r1, r2) || is_reverse(r, r1) || is_opt(r, r1)) + if (is_diff(r, r1, r2)) return max_length(r1); + if (is_union(r, r1, r2)) + return std::max(max_length(r1), max_length(r2)); + if (is_intersection(r, r1, r2)) + return std::min(max_length(r1), max_length(r2)); if (is_loop(r, r1, lo, hi)) return u.max_mul(hi, max_length(r1)); - if (is_to_re(r, s)) + if (is_to_re(r, s)) return u.str.max_length(s); - if (is_range(r) || is_of_pred(r) || is_full_char(r)) - return 1; - // Else: star, plus, complement, full_seq, loop(r,r1,lo), derivative return UINT_MAX; } diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp index 5544879253d..41a79ae9efb 100644 --- a/src/smt/seq_regex.cpp +++ b/src/smt/seq_regex.cpp @@ -24,8 +24,7 @@ namespace smt { seq_regex::seq_regex(theory_seq& th): th(th), ctx(th.get_context()), - m(th.get_manager()), - m_state_to_expr(m) + m(th.get_manager()) {} seq_util& seq_regex::u() { return th.m_util; } @@ -104,14 +103,14 @@ namespace smt { } /** - * Propagate the atom (str.in_re s r) + * Propagate the atom (str.in.re s r) * * Propagation implements the following inference rules * - * (not (str.in_re s r)) => (str.in_re s (complement r)) - * (str.in_re s r) => r != {} + * (not (str.in.re s r)) => (str.in.re s (complement r)) + * (str.in.re s r) => r != {} * - * (str.in_re s r) => (accept s 0 r) + * (str.in.re s r) => (accept s 0 r) */ void seq_regex::propagate_in_re(literal lit) { @@ -119,9 +118,7 @@ namespace smt { expr* e = ctx.bool_var2expr(lit.var()); VERIFY(str().is_in_re(e, s, r)); - TRACE("seq_regex", tout << "propagate in RE: " << lit.sign() << " " << mk_pp(e, m) << std::endl;); - STRACE("seq_regex_brief", tout << "PIR(" << mk_pp(s, m) << "," - << state_str(r) << ") ";); + TRACE("seq", tout << "propagate " << lit.sign() << " " << mk_pp(e, m) << "\n";); // convert negative negative membership literals to positive // ~(s in R) => s in C(R) @@ -168,13 +165,11 @@ namespace smt { } void seq_regex::propagate_accept(literal lit) { - TRACE("seq_regex", tout << "propagate accept" << std::endl;); - STRACE("seq_regex_brief", tout << "PA ";); - + // std::cout << "PA "; literal t = null_literal; if (!propagate(lit, t)) m_to_propagate.push_back(propagation_lit(lit, t)); - } + } /** * Propagate the atom (accept s i r) @@ -197,136 +192,21 @@ namespace smt { unsigned idx = 0; VERIFY(sk().is_accept(e, s, i, idx, r)); - TRACE("seq_regex", tout << "propagate: " << mk_pp(e, m) << std::endl;); - STRACE("seq_regex_brief", tout << std::endl - << "P(" << mk_pp(s, m) << "@" << idx - << "," << state_str(r) << ") ";); - - expr* cond = nullptr, *tt = nullptr, *el = nullptr; - if (re().is_empty(r)) { - STRACE("seq_regex_brief", tout << "(empty) ";); - th.add_axiom(~lit); - return true; - } - else if (m.is_ite(r, cond, tt, el)) { - UNREACHABLE(); - STRACE("seq_regex_brief", tout << "(ite) ";); - return false; - - // @EXP (Experimental change) - // This code tries to unfold the derivative one step at a time - // and propagate the if-then-elses. - // literal lcond = th.mk_literal(cond); - // ctx.mark_as_relevant(lcond); - // trigger = lcond; - // expr_ref ncond(m), acc1(m), acc2(m), - // choice1(m), choice2(m), choice(m); - // ncond = m.mk_not(cond); - // acc1 = sk().mk_accept(s, a().mk_int(idx), tt); - // acc2 = sk().mk_accept(s, a().mk_int(idx), el); - // choice1 = m.mk_and(cond, acc1); - // choice2 = m.mk_and(ncond, acc2); - // choice = m.mk_or(choice1, choice2); - // th.propagate_lit(nullptr, 1, &lit, th.mk_literal(choice)); - // // th.propagate_lit(th.mk_literal(choice)); - // // literal_vector choice_lit; - // // choice_lit.push_back(th.mk_literal(choice)); - // // th.add_axiom(choice_lit); - // return true; - } + // std::cout << "\nP " << idx << " " << r->get_id() << " "; - update_state_graph(r); + TRACE("seq", tout << "propagate " << mk_pp(e, m) << "\n";); - if (m_state_graph.is_dead(get_state_id(r))) { - STRACE("seq_regex_brief", tout << "(dead) ";); + if (re().is_empty(r)) { th.add_axiom(~lit); return true; } - if (block_unfolding(lit, idx)) { - STRACE("seq_regex_brief", tout << "(blocked) ";); + if (block_unfolding(lit, idx)) return true; - } - - STRACE("seq_regex_brief", tout << "(unfold) ";); - - // First axiom: use min_length to prune search - // accept(s, idx, r) => len(s) >= idx + min_len(r) - expr_ref s_to_re(re().mk_to_re(s), m); - expr_ref s_plus_r(re().mk_concat(s_to_re, r), m); - unsigned min_len = re().min_length(s_plus_r); - literal len_s_ge_min = th.m_ax.mk_ge(th.mk_len(s), min_len); - th.add_axiom(~lit, len_s_ge_min); - - // Old first axiom: accept(s, idx, r) => len(s) >= idx - // literal len_s_ge_i = th.m_ax.mk_ge(th.mk_len(s), idx); - // th.add_axiom(~lit, len_s_ge_i); - - // Second axiom: nullable check - // accept(s, idx, r) and len(s) <= idx => r nullable - literal len_s_le_i = th.m_ax.mk_le(th.mk_len(s), idx); - literal is_nullable = th.mk_literal(is_nullable_wrapper(r)); - th.add_axiom(~lit, ~len_s_le_i, is_nullable); - - // Third axiom: derivative unfolding - // accept(s, idx, r) and not (len_s_le_i) => - // OR_(cond, dr) cond and accept(s, idx+1, dr) - // over all derivatives dr and conditions cond on the head - literal_vector accept_next; - expr_ref hd = th.mk_nth(s, i); - expr_ref deriv(m); - deriv = derivative_wrapper(hd, r); - accept_next.push_back(~lit); - accept_next.push_back(len_s_le_i); - expr_ref_pair_vector cofactors(m); - get_cofactors(deriv, cofactors); - for (auto const& p : cofactors) { - if (m.is_false(p.first) || re().is_empty(p.second)) continue; - expr_ref cond(p.first, m); - expr_ref deriv_leaf(p.second, m); - - // @EXP (Experimental change) - // Skip searching when can_be_in_cycle returns true - // Result: Besides being unsound as written, this is not - // fine-grained enough. In case of intersections, many - // edges return true for can_be_in_cycle - // if (can_be_in_cycle(deriv, deriv_leaf)) continue; - expr_ref acc = sk().mk_accept(s, a().mk_int(idx + 1), deriv_leaf); - expr_ref choice(m.mk_and(cond, acc), m); - literal choice_lit = th.mk_literal(choice); - accept_next.push_back(choice_lit); - // Prioritize unvisited states - // if (!m_state_graph.is_done(get_state_id(deriv_leaf))) { - // // @EXP Unsound test: only push if not done - // accept_next.push_back(choice_lit); - // // @EXP This didn't work -- just marking as relevant - // // ctx.mark_as_relevant(choice_lit); - // } - STRACE("seq_regex_debug", tout << "added choice: " - << mk_pp(choice, m) << std::endl;); - } - th.add_axiom(accept_next); - - // Propagated successfully - return true; - - // expr_ref is_nullable(m), head(m), deriv(m), acc_next(m), unfold(m); - // head = th.mk_nth(s, i); - // deriv = derivative_wrapper(head, r); - // th.add_axiom(~lit, ~th.mk_literal(is_nullable)); - // - // acc_next = sk().mk_accept(s, a().mk_int(idx + 1), deriv); - // unfold = m.mk_or(is_nullable, acc_next); - // - // literal_vector unfold_lit; - // unfold_lit.push_back(th.mk_literal(unfold)); - // th.add_axiom(unfold_lit); - // return true; + propagate_nullable(lit, s, idx, r); - // propagate_nullable(lit, s, idx, r); - // - // return propagate_derivative(lit, e, s, i, idx, r, trigger); + return propagate_derivative(lit, e, s, i, idx, r, trigger); } /** @@ -349,25 +229,19 @@ namespace smt { */ void seq_regex::propagate_nullable(literal lit, expr* s, unsigned idx, expr* r) { - TRACE("seq_regex", tout << "propagate nullable: " << mk_pp(r, m) << std::endl;); - STRACE("seq_regex_brief", tout << "PN ";); - - expr_ref is_nullable = is_nullable_wrapper(r); - + // std::cout << "PN "; + expr_ref is_nullable = seq_rw().is_nullable(r); + rewrite(is_nullable); literal len_s_ge_i = th.m_ax.mk_ge(th.mk_len(s), idx); if (m.is_true(is_nullable)) { - STRACE("seq_regex_brief", tout << "t ";); th.propagate_lit(nullptr, 1,&lit, len_s_ge_i); } else if (m.is_false(is_nullable)) { - STRACE("seq_regex_brief", tout << "f ";); th.propagate_lit(nullptr, 1, &lit, th.m_ax.mk_ge(th.mk_len(s), idx + 1)); - // @EXP (experimental change) //unsigned len = std::max(1u, re().min_length(r)); //th.propagate_lit(nullptr, 1, &lit, th.m_ax.mk_ge(th.mk_len(s), idx + re().min_length(r))); } else { - STRACE("seq_regex_brief", tout << "? ";); literal is_nullable_lit = th.mk_literal(is_nullable); ctx.mark_as_relevant(is_nullable_lit); literal len_s_le_i = th.m_ax.mk_le(th.mk_len(s), idx); @@ -388,19 +262,12 @@ namespace smt { } bool seq_regex::propagate_derivative(literal lit, expr* e, expr* s, expr* i, unsigned idx, expr* r, literal& trigger) { - TRACE("seq_regex", tout << "propagate derivative: " << mk_pp(r, m) << std::endl;); - STRACE("seq_regex_brief", tout << "PD ";); - // (accept s i R) & len(s) > i => (accept s (+ i 1) D(nth(s, i), R)) or conds + // std::cout << "PD "; expr_ref d(m); expr_ref head = th.mk_nth(s, i); d = derivative_wrapper(m.mk_var(0, m.get_sort(head)), r); - - // TODO - // conds.push_back(th.mk_literal(sk().mk_accept(s, a().mk_int(idx + 1), d))); - // th.add_axiom(conds); - // timer tm; // std::cout << d->get_id() << " " << tm.get_seconds() << "\n"; //if (tm.get_seconds() > 0.3) @@ -419,17 +286,14 @@ namespace smt { literal lcond = th.mk_literal(subst(cond, sub)); switch (ctx.get_assignment(lcond)) { case l_true: - STRACE("seq_regex_brief", tout << "t ";); conds.push_back(~lcond); d = tt; break; case l_false: - STRACE("seq_regex_brief", tout << "f ";); conds.push_back(lcond); d = el; break; case l_undef: - STRACE("seq_regex_brief", tout << "? ";); #if 1 ctx.mark_as_relevant(lcond); trigger = lcond; @@ -458,7 +322,6 @@ namespace smt { #endif } } - if (!is_ground(d)) { d = subst(d, sub); } @@ -466,9 +329,8 @@ namespace smt { if (!re().is_empty(d)) conds.push_back(th.mk_literal(sk().mk_accept(s, a().mk_int(idx + 1), d))); th.add_axiom(conds); - TRACE("seq_regex", tout << "unfold " << head << std::endl << mk_pp(r, m) << std::endl;); - STRACE("seq_regex_brief", tout << "u ";); - + TRACE("seq", tout << "unfold " << head << "\n" << mk_pp(r, m) << "\n";); + // std::cout << "D "; return true; } @@ -490,7 +352,6 @@ namespace smt { * within the same Regex. */ bool seq_regex::coallesce_in_re(literal lit) { - // @EXP (experimental change) return false; expr* s = nullptr, *r = nullptr; expr* e = ctx.bool_var2expr(lit.var()); @@ -511,7 +372,7 @@ namespace smt { th.m_trail_stack.push(vector_value_trail(m_s_in_re, i)); m_s_in_re[i].m_active = false; IF_VERBOSE(11, verbose_stream() << "Intersect " << regex << " " << - mk_pp(entry.m_re, m) << " " << mk_pp(s, m) << " " << mk_pp(entry.m_s, m) << std::endl;); + mk_pp(entry.m_re, m) << " " << mk_pp(s, m) << " " << mk_pp(entry.m_s, m) << "\n";); regex = re().mk_inter(entry.m_re, regex); rewrite(regex); lits.push_back(~entry.m_lit); @@ -541,79 +402,20 @@ namespace smt { } /* - Wrapper around calls to is_nullable from the seq rewriter. - */ - expr_ref seq_regex::is_nullable_wrapper(expr* r) { - STRACE("seq_regex", tout << "nullable: " << mk_pp(r, m) << std::endl;); - - expr_ref result = seq_rw().is_nullable(r); - rewrite(result); - - STRACE("seq_regex", tout << "nullable result: " << mk_pp(result, m) << std::endl;); - STRACE("seq_regex_brief", tout << "n(" << state_str(r) << ")=" - << mk_pp(result, m) << " ";); - seq_rw().trace_and_reset_cache_counts(); - - return result; - } - - /* - Wrapper around the regex symbolic derivative from the seq rewriter. + Wrapper around the regex symbolic derivative from the rewriter. Ensures that the derivative is written in a normalized BDD form with optimizations for if-then-else expressions involving the head. */ expr_ref seq_regex::derivative_wrapper(expr* hd, expr* r) { - STRACE("seq_regex", tout << "derivative(" << mk_pp(hd, m) << "): " << mk_pp(r, m) << std::endl;); - - // Use canonical variable for head; substitute with hd later - // sort* seq_sort = nullptr; - // VERIFY(u().is_re(r, seq_sort)); - // expr_ref hd_canon = get_head_var(sq_sort); - expr_ref hd_canon(m.mk_var(0, m.get_sort(hd)), m); - expr_ref result(re().mk_derivative(hd_canon, r), m); + expr_ref result = expr_ref(re().mk_derivative(hd, r), m); rewrite(result); - - // Substitute - var_subst subst(m); - expr_ref_vector sub(m); - sub.push_back(hd); - result = subst(result, sub); - - STRACE("seq_regex", tout << "derivative result: " << mk_pp(result, m) << std::endl;); - STRACE("seq_regex_brief", tout << "d(" << state_str(r) << ")=" - << state_str(result) << " ";); - seq_rw().trace_and_reset_cache_counts(); - - /* If the following lines are enabled instead, we use the - same rewriter for the nullable and derivative calls. - However, it currently seems to cause a performance - bug as a side effect. - - The two seq rewriters used are at: - m_seq_rewrite - (returned by seq_rw()) - th.m_rewrite.m_imp->m_cfg.m_seq_rw - (private, can't be accessed directly) - - TODO: experiment with making them the same and see - if it results in significant speedup (due to fewer - cache misses). - */ - // expr_ref result = seq_rw().mk_derivative(hd, r); - // rewrite(result) - // STRACE("seq_regex", tout << "derivative result: " << mk_pp(result, m) << std::endl;); - // seq_rw().trace_and_reset_cache_counts(); - return result; } void seq_regex::propagate_eq(expr* r1, expr* r2) { - TRACE("seq_regex", tout << "propagate EQ: " << mk_pp(r1, m) << ", " << mk_pp(r2, m) << std::endl;); - STRACE("seq_regex_brief", tout << "PEQ ";); - sort* seq_sort = nullptr; VERIFY(u().is_re(r1, seq_sort)); - expr_ref r = symmetric_diff(r1, r2); + expr_ref r = symmetric_diff(r1, r2); expr_ref emp(re().mk_empty(m.get_sort(r)), m); expr_ref n(m.mk_fresh_const("re.char", seq_sort), m); expr_ref is_empty = sk().mk_is_empty(r, emp, n); @@ -621,9 +423,6 @@ namespace smt { } void seq_regex::propagate_ne(expr* r1, expr* r2) { - TRACE("seq_regex", tout << "propagate NEQ: " << mk_pp(r1, m) << ", " << mk_pp(r2, m) << std::endl;); - STRACE("seq_regex_brief", tout << "PNEQ ";); - sort* seq_sort = nullptr; VERIFY(u().is_re(r1, seq_sort)); expr_ref r = symmetric_diff(r1, r2); @@ -653,31 +452,18 @@ namespace smt { void seq_regex::propagate_is_non_empty(literal lit) { expr* e = ctx.bool_var2expr(lit.var()), *r = nullptr, *u = nullptr, *n = nullptr; VERIFY(sk().is_is_non_empty(e, r, u, n)); - - TRACE("seq_regex", tout << "propagate nonempty: " << mk_pp(e, m) << std::endl;); - STRACE("seq_regex_brief", - tout << std::endl << "PNE(" << expr_id_str(e) - << "," << state_str(r) - << "," << expr_id_str(u) - << "," << expr_id_str(n) - << ") ";); - - expr_ref is_nullable = is_nullable_wrapper(r); + expr_ref is_nullable = seq_rw().is_nullable(r); + rewrite(is_nullable); if (m.is_true(is_nullable)) return; literal null_lit = th.mk_literal(is_nullable); expr_ref hd = mk_first(r, n); expr_ref d(m); d = derivative_wrapper(hd, r); - - // STRACE("seq_regex_brief", tout << "(d subbed: " << state_str(d) << ") ";); - // TRACE("seq_regex", tout << "d subbed: " << mk_pp(d, m) << std::endl;); - literal_vector lits; lits.push_back(~lit); if (null_lit != false_literal) lits.push_back(null_lit); - expr_ref_pair_vector cofactors(m); get_cofactors(d, cofactors); for (auto const& p : cofactors) { @@ -693,7 +479,6 @@ namespace smt { next_non_empty = m.mk_and(cond, next_non_empty); lits.push_back(th.mk_literal(next_non_empty)); } - th.add_axiom(lits); } @@ -713,25 +498,6 @@ namespace smt { } } - void seq_regex::get_all_derivatives(expr* r, expr_ref_vector& results) { - // Get derivative - sort* seq_sort = nullptr; - VERIFY(u().is_re(r, seq_sort)); - expr_ref n(m.mk_fresh_const("re.char", seq_sort), m); - expr_ref hd = mk_first(r, n); - expr_ref d(m); - d = derivative_wrapper(hd, r); - // Use get_cofactors method and filter out unsatisfiable conds - expr_ref_pair_vector cofactors(m); - get_cofactors(d, cofactors); - STRACE("seq_regex_debug", tout << "getting all derivatives of: " << mk_pp(r, m) << std::endl;); - for (auto const& p : cofactors) { - if (m.is_false(p.first) || re().is_empty(p.second)) continue; - STRACE("seq_regex_debug", tout << "adding derivative: " << mk_pp(p.second, m) << std::endl;); - results.push_back(p.second); - } - } - /* is_empty(r, u) => ~is_nullable(r) is_empty(r, u) => (forall x . ~cond(x)) or is_empty(r1, u union r) for (cond, r) in min-terms(D(x,r)) @@ -741,16 +507,8 @@ namespace smt { void seq_regex::propagate_is_empty(literal lit) { expr* e = ctx.bool_var2expr(lit.var()), *r = nullptr, *u = nullptr, *n = nullptr; VERIFY(sk().is_is_empty(e, r, u, n)); - expr_ref is_nullable = is_nullable_wrapper(r); - - TRACE("seq_regex", tout << "propagate empty: " << mk_pp(e, m) << std::endl;); - STRACE("seq_regex_brief", - tout << std::endl << "PE(" << expr_id_str(e) - << "," << state_str(r) - << "," << expr_id_str(u) - << "," << expr_id_str(n) - << ") ";); - + expr_ref is_nullable = seq_rw().is_nullable(r); + rewrite(is_nullable); if (m.is_true(is_nullable)) { th.add_axiom(~lit); return; @@ -782,449 +540,10 @@ namespace smt { } } - // @EXP: Experimental change - // Some code to compute a canonical head variable, but I think - // this stuff is unnecessary. - // expr_ref seq_regex::get_head_var(sort* seq_sort) { - // expr_ref result(m); - // if (m_deriv_head.contains(seq_sort)) { - // result = m_deriv_head.find(seq_sort); - // STRACE("seq_regex_brief", tout << " ghv=" << mk_pp(result, m);); - // } - // else { - // result = m.mk_fresh_const("re.char", seq_sort); - // STRACE("seq_regex_brief", tout << " NEWghv=" << mk_pp(result, m);); - // } - // return result; - // } - expr_ref seq_regex::mk_first(expr* r, expr* n) { sort* elem_sort = nullptr, *seq_sort = nullptr; VERIFY(u().is_re(r, seq_sort)); VERIFY(u().is_seq(seq_sort, elem_sort)); return sk().mk("re.first", n, a().mk_int(r->get_id()), elem_sort); } - - /**************************************************** - *** Dead state elimination and state_graph class *** - ****************************************************/ - - void state_graph::add_state_core(state s) { - STRACE("seq_regex_brief", tout << "add(" << s << ") ";); - SASSERT(!m_seen.contains(s)); - // Ensure corresponding var in union find structure - while (s >= m_state_ufind.get_num_vars()) { - m_state_ufind.mk_var(); - } - // Initialize as unvisited - m_seen.insert(s); - m_unexplored.insert(s); - m_targets.insert(s, state_set()); - m_sources.insert(s, state_set()); - m_sources_maybecycle.insert(s, state_set()); - } - void state_graph::remove_state_core(state s) { - // This is a partial deletion -- the state is still seen and can't be - // added again later. - // The state should be unknown, and all edges to or from the state - // should already have been renamed. - STRACE("seq_regex_brief", tout << "del(" << s << ") ";); - SASSERT(m_seen.contains(s)); - SASSERT(!m_state_ufind.is_root(s)); - SASSERT(m_unknown.contains(s)); - m_targets.remove(s); - m_sources.remove(s); - m_sources_maybecycle.remove(s); - m_unknown.remove(s); - } - - void state_graph::mark_unknown_core(state s) { - STRACE("seq_regex_brief", tout << "unk(" << s << ") ";); - SASSERT(m_state_ufind.is_root(s)); - SASSERT(m_unexplored.contains(s)); - m_unexplored.remove(s); - m_unknown.insert(s); - } - void state_graph::mark_live_core(state s) { - STRACE("seq_regex_brief", tout << "live(" << s << ") ";); - SASSERT(m_state_ufind.is_root(s)); - SASSERT(m_unknown.contains(s)); - m_unknown.remove(s); - m_live.insert(s); - } - void state_graph::mark_dead_core(state s) { - STRACE("seq_regex_brief", tout << "dead(" << s << ") ";); - SASSERT(m_state_ufind.is_root(s)); - SASSERT(m_unknown.contains(s)); - m_unknown.remove(s); - m_dead.insert(s); - } - - /* - Add edge to the graph. - - If the annotation 'maybecycle' is false, then the user is sure - that this edge will never be part of a cycle. - - May already exist, in which case maybecycle = false overrides - maybecycle = true. - */ - void state_graph::add_edge_core(state s1, state s2, bool maybecycle) { - STRACE("seq_regex_brief", tout << "add(" << s1 << "," << s2 << "," - << (maybecycle ? "y" : "n") << ") ";); - SASSERT(m_state_ufind.is_root(s1)); - SASSERT(m_state_ufind.is_root(s2)); - if (s1 == s2) return; - if (!m_targets.find(s1).contains(s2)) { - // add new edge - STRACE("seq_regex_debug", tout << std::endl << " DEBUG: new edge! ";); - m_targets.find(s1).insert(s2); - m_sources.find(s2).insert(s1); - if (maybecycle) m_sources_maybecycle.find(s2).insert(s1); - } - else if (!maybecycle && m_sources_maybecycle.find(s2).contains(s1)) { - // update existing edge - STRACE("seq_regex_debug", tout << std::endl << " DEBUG: update edge! ";); - m_sources_maybecycle.find(s2).remove(s1); - } - } - void state_graph::remove_edge_core(state s1, state s2) { - SASSERT(m_targets.find(s1).contains(s2)); - SASSERT(m_sources.find(s2).contains(s1)); - m_targets.find(s1).remove(s2); - m_sources.find(s2).remove(s1); - m_sources_maybecycle.find(s2).remove(s1); - } - void state_graph::rename_edge_core(state old1, state old2, - state new1, state new2) { - SASSERT(m_targets.find(old1).contains(old2)); - SASSERT(m_sources.find(old2).contains(old1)); - bool maybecycle = m_sources_maybecycle.find(old2).contains(old1); - remove_edge_core(old1, old2); - add_edge_core(new1, new2, maybecycle); - } - - /* - Merge two states or more generally a set of states into one, - returning the new state. Also merges associated edges. - - Preconditions: - - The set should be nonempty - - Every state in the set should be unknown - - Each state should currently exist - - If passing a set of states by reference, it should not be a set - from the edge relations, as merging states modifies edge relations. - */ - auto state_graph::merge_states(state s1, state s2) -> state { - SASSERT(m_state_ufind.is_root(s1)); - SASSERT(m_state_ufind.is_root(s2)); - SASSERT(m_unknown.contains(s1)); - SASSERT(m_unknown.contains(s2)); - STRACE("seq_regex_brief", tout << "merge(" << s1 << "," << s2 << ") ";); - m_state_ufind.merge(s1, s2); - if (m_state_ufind.is_root(s2)) std::swap(s1, s2); - // rename s2 to s1 in edges - for (auto s_to: m_targets.find(s2)) { - rename_edge_core(s2, s_to, s1, s_to); - } - for (auto s_from: m_sources.find(s2)) { - rename_edge_core(s_from, s2, s_from, s1); - } - remove_state_core(s2); - return s1; - } - auto state_graph::merge_states(state_set& s_set) -> state { - SASSERT(s_set.num_elems() > 0); - state prev_s = 0; // initialization here optional - bool first_iter = true; - for (auto s: s_set) { - if (first_iter) { - prev_s = s; - first_iter = false; - continue; - } - prev_s = merge_states(prev_s, s); - } - return prev_s; - } - - /* - If s is not live, mark it, and recurse on all states into s - Precondition: s is live or unknown - */ - void state_graph::mark_live_recursive(state s) { - SASSERT(m_live.contains(s) || m_unknown.contains(s)); - STRACE("seq_regex_debug", tout - << std::endl << " DEBUG: mark live recursive: " << s << " ";); - if (m_live.contains(s)) return; - mark_live_core(s); - for (auto s_from: m_sources.find(s)) { - mark_live_recursive(s_from); - } - } - - /* - Check if s is now known to be dead. If so, mark and recurse - on all states into s. - Precondition: s is live, dead, or unknown - */ - void state_graph::mark_dead_recursive(state s) { - SASSERT(m_live.contains(s) || m_dead.contains(s) || - m_unknown.contains(s)); - STRACE("seq_regex_debug", tout - << std::endl << " DEBUG: mark dead recursive: " << s << " ";); - if (!m_unknown.contains(s)) return; - for (auto s_to: m_targets.find(s)) { - // unknown pointing to live should have been marked as live! - SASSERT(!m_live.contains(s_to)); - if (m_unknown.contains(s_to) || m_unexplored.contains(s_to)) return; - } - // all states from s are dead - mark_dead_core(s); - for (auto s_from: m_sources.find(s)) { - mark_dead_recursive(s_from); - } - } - - /* - Merge all cycles of unknown states containing s into one state. - Return the new state - Precondition: s is unknown. - */ - auto state_graph::merge_all_cycles(state s) -> state { - SASSERT(m_unknown.contains(s)); - // Visit states in a DFS backwards from s - state_set visited; // all backwards edges pushed - state_set resolved; // known in SCC or not - state_set scc; // known in SCC - resolved.insert(s); - scc.insert(s); - vector to_search; - to_search.push_back(s); - while (to_search.size() > 0) { - state x = to_search.back(); - if (!visited.contains(x)) { - visited.insert(x); - // recurse backwards only on maybecycle edges - // and only on unknown states - for (auto y: m_sources_maybecycle.find(x)) { - if (m_unknown.contains(y)) - to_search.push_back(y); - } - } - else if (!resolved.contains(x)) { - resolved.insert(x); - to_search.pop_back(); - // determine in SCC or not - for (auto y: m_sources_maybecycle.find(x)) { - if (scc.contains(y)) { - scc.insert(x); - break; - } - } - } - else { - to_search.pop_back(); - } - } - // scc is the union of all cycles containing s - return merge_states(scc); - } - - /* - Exposed methods - */ - - void state_graph::add_state(state s) { - if (m_seen.contains(s)) return; - add_state_core(s); - } - void state_graph::mark_live(state s) { - SASSERT(m_unexplored.contains(s) || m_live.contains(s)); - SASSERT(m_state_ufind.is_root(s)); - if (m_unexplored.contains(s)) mark_unknown_core(s); - mark_live_recursive(s); - } - void state_graph::add_edge(state s1, state s2, bool maybecycle) { - SASSERT(m_unexplored.contains(s1) || m_live.contains(s1)); - SASSERT(m_state_ufind.is_root(s1)); - SASSERT(m_seen.contains(s2)); - s2 = m_state_ufind.find(s2); - add_edge_core(s1, s2, maybecycle); - if (m_live.contains(s2)) mark_live(s1); - } - void state_graph::mark_done(state s) { - SASSERT(m_unexplored.contains(s) || m_live.contains(s)); - SASSERT(m_state_ufind.is_root(s)); - if (m_live.contains(s)) return; - if (m_unexplored.contains(s)) mark_unknown_core(s); - s = merge_all_cycles(s); - // check if dead - mark_dead_recursive(s); - STRACE("seq_regex_brief", tout << "done(" << s << ") ";); - } - - unsigned state_graph::get_size() { - return m_state_ufind.get_num_vars(); - } - - bool state_graph::is_seen(state s) { - return m_seen.contains(s); - } - bool state_graph::is_live(state s) { - return m_live.contains(m_state_ufind.find(s)); - } - bool state_graph::is_dead(state s) { - return m_dead.contains(m_state_ufind.find(s)); - } - bool state_graph::is_done(state s) { - return (m_seen.contains(s) && - !m_unexplored.contains(m_state_ufind.find(s))); - } - - /* - Pretty printing - */ - void state_graph::pretty_print(std::ostream& o) { - o << "---------- State Graph ----------" << std::endl - << "Seen:"; - for (auto s: m_seen) { - o << " " << s; - state s_root = m_state_ufind.find(s); - if (s_root != s) - o << "(=" << s_root << ")"; - } - o << std::endl - << "Live:" << m_live << std::endl - << "Dead:" << m_dead << std::endl - << "Unknown:" << m_unknown << std::endl - << "Unexplored:" << m_unexplored << std::endl - << "Edges:" << std::endl; - for (auto s1: m_seen) { - if (m_state_ufind.is_root(s1)) { - o << " " << s1 << " -> " << m_targets.find(s1) << std::endl; - } - } - o << "---------------------------------" << std::endl; - } - // std::ostream& operator<<(std::ostream& o, const state_graph& sg) { - // sg.pretty_print(o); - // return o; - // } - - // ********************************** - - unsigned seq_regex::get_state_id(expr* e) { - // Assign increasing IDs starting from 1 - if (!m_expr_to_state.contains(e)) { - m_state_to_expr.push_back(e); - unsigned new_id = m_state_to_expr.size(); - m_expr_to_state.insert(e, new_id); - STRACE("seq_regex_brief", tout << "new(" << expr_id_str(e) - << ")=" << state_str(e) << " ";); - } - return m_expr_to_state.find(e); - } - expr* seq_regex::get_expr_from_id(unsigned id) { - SASSERT(id >= 1); - SASSERT(id <= m_state_to_expr.size()); - return m_state_to_expr.get(id); - } - - - unsigned seq_regex::concat_length(expr* r) { - // length of the concatenations at the top level - expr *r1 = nullptr, *r2 = nullptr; - if (re().is_concat(r, r1, r2)) - return concat_length(r1) + concat_length(r2); - else - return 1; - } - - unsigned seq_regex::re_rank(expr* r) { - SASSERT(u().is_re(r)); - expr *r1 = nullptr, *r2 = nullptr, *s = nullptr; - unsigned lo = 0, hi = 0; - if (re().is_empty(r)) - return 0; - if (re().is_concat(r, r1, r2)) - return std::max(re_rank(r1) + concat_length(r2), re_rank(r2)); - if (re().is_union(r, r1, r2) || m.is_ite(r, s, r1, r2)) - return std::max(re_rank(r1), re_rank(r2)); - if (re().is_intersection(r, r1, r2) || re().is_diff(r, r1, r2)) - return re_rank(r1) + re_rank(r2); - if (re().is_plus(r, r1) || re().is_star(r, r1)) - return re_rank(r1) + 1; - if (re().is_loop(r, r1, lo) || re().is_loop(r, r1, lo, hi)) - return re_rank(r1) + lo; - if (re().is_reverse(r, r1) || re().is_opt(r, r1)) - // in reverse case, should be r1 is a string - return re_rank(r1); - if (re().is_to_re(r, s)) - return u().str.min_length(s); - // Else: range, pred, char, full_seq, derivative - return 1; - } - - bool seq_regex::can_be_in_cycle(expr *r1, expr *r2) { - // @EXP (experimental change): Use a "rank" function, which is - // a pseudo-topological order on the state graph, to detect when r2 - // is a simpler regex than r1 - unsigned k1 = re_rank(r1); - unsigned k2 = re_rank(r2); - SASSERT(k1 >= k2); - STRACE("seq_regex_brief", tout << "(k:" << k1 << "->" << k2 << ")";); - return (k1 == k2); - } - - /* - Update the state graph with expression r and all its derivatives. - */ - bool seq_regex::update_state_graph(expr* r) { - unsigned r_id = get_state_id(r); - if (m_state_graph.is_done(r_id)) return false; - if (m_state_graph.get_size() >= m_max_state_graph_size) { - STRACE("seq_regex", tout << "Warning: ignored state graph update -- max size of seen states reached!" << std::endl;); - STRACE("seq_regex_brief", tout << "(MAX SIZE REACHED) ";); - return false; - } - STRACE("seq_regex", tout << "Updating state graph for regex " - << mk_pp(r, m) << ") ";); - // Add state - m_state_graph.add_state(r_id); - STRACE("seq_regex_brief", tout << std::endl << "USG(" - << state_str(r) << ") ";); - expr_ref r_nullable = is_nullable_wrapper(r); - if (m.is_true(r_nullable)) { - m_state_graph.mark_live(r_id); - } - else { - // Add edges to all derivatives - expr_ref_vector derivatives(m); - STRACE("seq_regex_debug", tout - << std::endl << " DEBUG: getting all derivs: " << r_id << " ";); - get_all_derivatives(r, derivatives); - for (auto const& dr: derivatives) { - unsigned dr_id = get_state_id(dr); - STRACE("seq_regex_debug", tout - << std::endl << " DEBUG: traversing deriv: " << dr_id << " ";); - m_state_graph.add_state(dr_id); - bool maybecycle = can_be_in_cycle(r, dr); - m_state_graph.add_edge(r_id, dr_id, maybecycle); - } - m_state_graph.mark_done(r_id); - } - STRACE("seq_regex_brief", tout << std::endl;); - STRACE("seq_regex_brief", m_state_graph.pretty_print(tout);); - return true; - } - - std::string seq_regex::state_str(expr* e) { - if (m_expr_to_state.contains(e)) - return std::to_string(get_state_id(e)); - else - return expr_id_str(e); - } - std::string seq_regex::expr_id_str(expr* e) { - return std::string("id") + std::to_string(e->get_id()); - } - } diff --git a/src/smt/seq_regex.h b/src/smt/seq_regex.h index 2b861fd1aad..1d77cf81dbe 100644 --- a/src/smt/seq_regex.h +++ b/src/smt/seq_regex.h @@ -17,8 +17,6 @@ Module Name: #pragma once #include "util/scoped_vector.h" -#include "util/uint_set.h" -#include "util/union_find.h" #include "ast/seq_decl_plugin.h" #include "ast/rewriter/seq_rewriter.h" #include "smt/smt_context.h" @@ -28,140 +26,7 @@ namespace smt { class theory_seq; - class seq_regex; - - /* - state_graph - - Data structure which is capable of incrementally tracking - live states and dead states. - - "States" are integers. States and edges are added to the data - structure incrementally. - - States can be marked as live - or as done -- to indicate that no more outgoing edges will be - added and the state will not be marked as live. The data - structure then tracks - which other states are live (can reach a live state), dead - (can't reach a live state), or neither. - - Some edges are labeled as not contained in a cycle. This is to - optimize search if it is known by the user of the structure - that no cycle will ever contain this edge. - - Internally, we use union_find to identify states within an SCC, - and incrementally update SCCs, while propagating backwards - live and dead SCCs. - */ - class state_graph { - typedef unsigned state; - typedef uint_set state_set; - typedef u_map edge_rel; - typedef basic_union_find state_ufind; - - private: - /* - All states are internally exactly one of: - - live: known to reach a live state - - dead: known to never reach a live state - - unknown: all outgoing edges have been added, but the - state is not known to be live or dead - - unexplored: not all outgoing edges have been added - - As SCCs are merged, some states become aliases, and a - union find data structure collapses a now obsolete - state to its current representative. m_seen keeps track - of states we have seen, including obsolete states. - - Invariants: - - TODO - */ - state_set m_live; - state_set m_dead; - state_set m_unknown; - state_set m_unexplored; - - state_set m_seen; - state_ufind m_state_ufind; - - /* - Edges are saved in both from and to maps. - A subset of edges are also marked as possibly being - part of a cycle by being stored in m_sources_maybecycle. - - Invariants: - - TODO - */ - edge_rel m_sources; - edge_rel m_targets; - edge_rel m_sources_maybecycle; - - /* - 'Core' functions that modify the plain graph, without - updating SCCs or propagating live/dead state information. - These are for internal use only. - */ - void add_state_core(state s); // unexplored + seen - void remove_state_core(state s); // unknown + seen -> seen - void mark_unknown_core(state s); // unexplored -> unknown - void mark_live_core(state s); // unknown -> live - void mark_dead_core(state s); // unknown -> dead - - void add_edge_core(state s1, state s2, bool maybecycle); - void remove_edge_core(state s1, state s2); - void rename_edge_core(state old1, state old2, state new1, state new2); - - state merge_states(state s1, state s2); - state merge_states(state_set& s_set); - - /* - Algorithmic search routines - - live state propagation - - dead state propagation - - cycle / strongly-connected component detection - */ - void mark_live_recursive(state s); - void mark_dead_recursive(state s); - state merge_all_cycles(state s); - - public: - state_graph(): - m_live(), m_dead(), m_unknown(), m_unexplored(), m_seen(), - m_state_ufind(), m_sources(), m_targets(), m_sources_maybecycle() {} - - /* - Exposed methods - - These methods may be called in any order, as long as: - - states are added before edges are added between them - - outgoing edges are not added from a done state - - a done state is not marked as live - - edges are not added creating a cycle containing an edge with - maybecycle = false (this is not necessary for soundness, but - prevents completeness for successfully detecting dead states) - */ - void add_state(state s); - void add_edge(state s1, state s2, bool maybecycle); - void mark_live(state s); - void mark_done(state s); - - bool is_seen(state s); - bool is_live(state s); - bool is_dead(state s); - bool is_done(state s); - - unsigned get_size(); - - /* - Pretty printing - */ - void pretty_print(std::ostream& o); - - }; - class seq_regex { - /* - Data about a constraint of the form (str.in_re s R) - */ struct s_in_re { literal m_lit; expr* m_s; @@ -171,11 +36,6 @@ namespace smt { m_lit(l), m_s(s), m_re(r), m_active(true) {} }; - /* - Data about a literal for the solver to propagate - The trigger guards whether the literal is ready - to be addressed yet -- see seq_regex::can_propagate - */ struct propagation_lit { literal m_lit; literal m_trigger; @@ -184,35 +44,11 @@ namespace smt { propagation_lit(): m_lit(null_literal), m_trigger(null_literal) {} }; - theory_seq& th; - context& ctx; - ast_manager& m; - vector m_s_in_re; - scoped_vector m_to_propagate; - - /* - state_graph for dead state detection, and associated methods - */ - state_graph m_state_graph; - ptr_addr_map m_expr_to_state; - expr_ref_vector m_state_to_expr; - unsigned m_max_state_graph_size { 10000 }; - // Convert between expressions and states (IDs) - unsigned get_state_id(expr* e); - expr* get_expr_from_id(unsigned id); - // Cycle-detection heuristic - // Note: Doesn't need to be sound or complete (doesn't affect soundness) - unsigned concat_length(expr* r); - unsigned re_rank(expr* r); - bool can_be_in_cycle(expr* r1, expr* r2); - // Update the graph - bool update_state_graph(expr* r); - - // Printing for seq_regex_brief - std::string state_str(expr* e); - std::string expr_id_str(expr* e); - - // ******************** + theory_seq& th; + context& ctx; + ast_manager& m; + vector m_s_in_re; + scoped_vector m_to_propagate; seq_util& u(); class seq_util::re& re(); @@ -243,7 +79,6 @@ namespace smt { expr_ref symmetric_diff(expr* r1, expr* r2); - expr_ref is_nullable_wrapper(expr* r); expr_ref derivative_wrapper(expr* hd, expr* r); void get_cofactors(expr* r, expr_ref_vector& conds, expr_ref_pair_vector& result); @@ -255,8 +90,6 @@ namespace smt { public: - void get_all_derivatives(expr* r, expr_ref_vector& results); - seq_regex(theory_seq& th); void push_scope() { m_to_propagate.push_scope(); } @@ -284,3 +117,4 @@ namespace smt { }; }; + diff --git a/src/smt/theory_seq.cpp b/src/smt/theory_seq.cpp index 16507cd24af..a835c4634aa 100644 --- a/src/smt/theory_seq.cpp +++ b/src/smt/theory_seq.cpp @@ -3379,7 +3379,6 @@ void theory_seq::relevant_eh(app* n) { expr* arg = nullptr; if (m_sk.is_tail(n, arg)) { - // TODO: HERE add_length_limit(arg, m_max_unfolding_depth, true); } From 95d65f5e126c6bcb30bfeaec7683d65c9b63f8fa Mon Sep 17 00:00:00 2001 From: Caleb Stanford Date: Mon, 27 Jul 2020 19:39:09 -0400 Subject: [PATCH 51/51] remove cache hit/miss count tracing --- src/ast/rewriter/seq_rewriter.cpp | 3 +-- src/ast/rewriter/seq_rewriter.h | 6 +----- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp index ec9984264b8..afc000c33ae 100644 --- a/src/ast/rewriter/seq_rewriter.cpp +++ b/src/ast/rewriter/seq_rewriter.cpp @@ -4311,7 +4311,6 @@ void seq_rewriter::op_cache::cleanup() { if (m_table.size() >= m_max_cache_size) { m_trail.reset(); m_table.reset(); - STRACE("seq_regex", tout << "Op cache reset!" << std::endl;); - STRACE("seq_regex_brief", tout << "(OP CACHE RESET) ";); + STRACE("seq_verbose", tout << "Derivative op cache reset" << std::endl;); } } diff --git a/src/ast/rewriter/seq_rewriter.h b/src/ast/rewriter/seq_rewriter.h index 19091a6f24b..5fc3febb526 100644 --- a/src/ast/rewriter/seq_rewriter.h +++ b/src/ast/rewriter/seq_rewriter.h @@ -146,11 +146,6 @@ class seq_rewriter { op_cache(ast_manager& m); expr* find(decl_kind op, expr* a, expr* b); void insert(decl_kind op, expr* a, expr* b, expr* r); - - #ifdef _TRACE - static unsigned cache_hits; - static unsigned cache_misses; - #endif }; seq_util m_util; @@ -345,5 +340,6 @@ class seq_rewriter { // heuristic elimination of element from condition that comes form a derivative. // special case optimization for conjunctions of equalities, disequalities and ranges. void elim_condition(expr* elem, expr_ref& cond); + };