From 2e676ff2bd37cae8806ba07f11aa2480d5cea99c Mon Sep 17 00:00:00 2001
From: calebstanford-msr <t-casta@microsoft.com>
Date: Tue, 9 Jun 2020 16:39:43 -0400
Subject: [PATCH 01/51] std::cout debugging statements

---
 src/ast/rewriter/seq_rewriter.cpp | 8 ++++++++
 src/smt/seq_regex.cpp             | 8 ++++++++
 2 files changed, 16 insertions(+)

diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp
index 4dde8e4c283..d2726b91132 100644
--- a/src/ast/rewriter/seq_rewriter.cpp
+++ b/src/ast/rewriter/seq_rewriter.cpp
@@ -2181,6 +2181,7 @@ expr_ref seq_rewriter::re_predicate(expr* cond, sort* seq_sort) {
 }
 
 expr_ref seq_rewriter::is_nullable_rec(expr* r) {
+    std::cout << "n";
     expr_ref result(m_op_cache.find(_OP_RE_IS_NULLABLE, r, nullptr), m());
     if (!result) {
         result = is_nullable(r);
@@ -2364,6 +2365,7 @@ br_status seq_rewriter::mk_re_derivative(expr* ele, expr* r, expr_ref& result) {
         Duplicate nested conditions are eliminated.
 */
 expr_ref seq_rewriter::mk_derivative(expr* ele, expr* r) {
+    std::cout << "d";
     expr_ref result(m_op_cache.find(OP_RE_DERIVATIVE, ele, r), m());
     if (!result) {
         result = mk_derivative_rec(ele, r);
@@ -2449,6 +2451,7 @@ expr_ref seq_rewriter::mk_der_op_rec(decl_kind k, expr* a, expr* b) {
 }
 
 expr_ref seq_rewriter::mk_der_op(decl_kind k, expr* a, expr* b) {
+    std::cout << "."; // Recursive call
     expr_ref _a(a, m()), _b(b, m());
     expr_ref result(m());
     switch (k) {
@@ -2476,6 +2479,7 @@ expr_ref seq_rewriter::mk_der_op(decl_kind k, expr* a, expr* b) {
 }
 
 expr_ref seq_rewriter::mk_der_compl(expr* r) {
+    std::cout << "."; // Recursive call
     expr_ref result(m_op_cache.find(OP_RE_COMPLEMENT, r, nullptr), m());
     if (!result) {
         expr* c = nullptr, * r1 = nullptr, * r2 = nullptr;
@@ -2490,6 +2494,7 @@ expr_ref seq_rewriter::mk_der_compl(expr* r) {
 }
 
 expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) {
+    std::cout << "."; // Recursive call
     expr_ref result(m());
     sort* seq_sort = nullptr, *ele_sort = nullptr;
     VERIFY(m_util.is_re(r, seq_sort));
@@ -4069,6 +4074,9 @@ seq_rewriter::op_cache::op_cache(ast_manager& m):
 expr* seq_rewriter::op_cache::find(decl_kind op, expr* a, expr* b) {
     op_entry e(op, a, b, nullptr);
     m_table.find(e, e);
+    if (!(e.r)) {
+        std::cout << "!"; // Cache miss
+    }
     return e.r;
 }
 
diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp
index e667d0f5b96..be2aa91067b 100644
--- a/src/smt/seq_regex.cpp
+++ b/src/smt/seq_regex.cpp
@@ -101,6 +101,7 @@ namespace smt {
         expr* e = ctx.bool_var2expr(lit.var());
         VERIFY(str().is_in_re(e, s, r));
 
+        std::cout << "PI ";
         TRACE("seq", tout << "propagate " << mk_pp(e, m) << "\n";);
 
         // convert negative negative membership literals to positive
@@ -142,6 +143,7 @@ namespace smt {
     }
 
     void seq_regex::propagate_accept(literal lit) {
+        std::cout << "PA ";
         if (!propagate(lit))
             m_to_propagate.push_back(lit);
     }
@@ -167,6 +169,7 @@ namespace smt {
         unsigned idx = 0;
         VERIFY(sk().is_accept(e, s, i, idx, r));
 
+        std::cout << "P ";
         TRACE("seq", tout << "propagate " << mk_pp(e, m) << "\n";);
 
         if (re().is_empty(r)) {
@@ -356,12 +359,14 @@ namespace smt {
         with optimizations for if-then-else expressions involving the head.
     */
     expr_ref seq_regex::derivative_wrapper(expr* hd, expr* r) {
+        std::cout << "D ";
         expr_ref result = expr_ref(re().mk_derivative(hd, r), m);
         rewrite(result);
         return result;
     }
 
     void seq_regex::propagate_eq(expr* r1, expr* r2) {
+        std::cout << "PEQ ";
         expr_ref r = symmetric_diff(r1, r2);       
         expr_ref emp(re().mk_empty(m.get_sort(r)), m);
         expr_ref is_empty = sk().mk_is_empty(r, emp);
@@ -369,6 +374,7 @@ namespace smt {
     }
     
     void seq_regex::propagate_ne(expr* r1, expr* r2) {
+        std::cout << "PNEQ ";
         expr_ref r = symmetric_diff(r1, r2);
         expr_ref emp(re().mk_empty(m.get_sort(r)), m);
         expr_ref is_non_empty = sk().mk_is_non_empty(r, emp);
@@ -393,6 +399,7 @@ namespace smt {
      *
      */
     void seq_regex::propagate_is_non_empty(literal lit) {
+        std::cout << "PN ";
         expr* e = ctx.bool_var2expr(lit.var()), *r = nullptr, *u = nullptr;
         VERIFY(sk().is_is_non_empty(e, r, u));
         expr_ref is_nullable = seq_rw().is_nullable(r);
@@ -448,6 +455,7 @@ namespace smt {
       is_empty(r, u) is true if r is a member of u
      */
     void seq_regex::propagate_is_empty(literal lit) {
+        std::cout << "PE ";
         expr* e = ctx.bool_var2expr(lit.var()), *r = nullptr, *u = nullptr;
         VERIFY(sk().is_is_empty(e, r, u));
         expr_ref is_nullable = seq_rw().is_nullable(r);

From 03b05f2781d3d5b0afd41dabf506fff311b7cab0 Mon Sep 17 00:00:00 2001
From: calebstanford-msr <t-casta@microsoft.com>
Date: Tue, 9 Jun 2020 18:23:02 -0400
Subject: [PATCH 02/51] comment out std::cout debugging as this is now a shared
 fork

---
 src/ast/rewriter/seq_rewriter.cpp | 18 ++++++++++--------
 src/smt/seq_regex.cpp             | 16 ++++++++--------
 2 files changed, 18 insertions(+), 16 deletions(-)

diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp
index d2726b91132..81521507950 100644
--- a/src/ast/rewriter/seq_rewriter.cpp
+++ b/src/ast/rewriter/seq_rewriter.cpp
@@ -2181,12 +2181,13 @@ expr_ref seq_rewriter::re_predicate(expr* cond, sort* seq_sort) {
 }
 
 expr_ref seq_rewriter::is_nullable_rec(expr* r) {
-    std::cout << "n";
+    // std::cout << "n";
     expr_ref result(m_op_cache.find(_OP_RE_IS_NULLABLE, r, nullptr), m());
     if (!result) {
         result = is_nullable(r);
         m_op_cache.insert(_OP_RE_IS_NULLABLE, r, nullptr, result);        
     }
+    // std::cout << " ";
     return result;
 }
 
@@ -2365,12 +2366,13 @@ br_status seq_rewriter::mk_re_derivative(expr* ele, expr* r, expr_ref& result) {
         Duplicate nested conditions are eliminated.
 */
 expr_ref seq_rewriter::mk_derivative(expr* ele, expr* r) {
-    std::cout << "d";
+    // std::cout << "d";
     expr_ref result(m_op_cache.find(OP_RE_DERIVATIVE, ele, r), m());
     if (!result) {
         result = mk_derivative_rec(ele, r);
         m_op_cache.insert(OP_RE_DERIVATIVE, ele, r, result);
     }
+    // std::cout << " ";
     return result;
 }
 
@@ -2451,7 +2453,7 @@ expr_ref seq_rewriter::mk_der_op_rec(decl_kind k, expr* a, expr* b) {
 }
 
 expr_ref seq_rewriter::mk_der_op(decl_kind k, expr* a, expr* b) {
-    std::cout << "."; // Recursive call
+    // std::cout << "."; // Recursive call
     expr_ref _a(a, m()), _b(b, m());
     expr_ref result(m());
     switch (k) {
@@ -2479,7 +2481,7 @@ expr_ref seq_rewriter::mk_der_op(decl_kind k, expr* a, expr* b) {
 }
 
 expr_ref seq_rewriter::mk_der_compl(expr* r) {
-    std::cout << "."; // Recursive call
+    // std::cout << "."; // Recursive call
     expr_ref result(m_op_cache.find(OP_RE_COMPLEMENT, r, nullptr), m());
     if (!result) {
         expr* c = nullptr, * r1 = nullptr, * r2 = nullptr;
@@ -2494,7 +2496,7 @@ expr_ref seq_rewriter::mk_der_compl(expr* r) {
 }
 
 expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) {
-    std::cout << "."; // Recursive call
+    // std::cout << "."; // Recursive call
     expr_ref result(m());
     sort* seq_sort = nullptr, *ele_sort = nullptr;
     VERIFY(m_util.is_re(r, seq_sort));
@@ -4074,9 +4076,9 @@ seq_rewriter::op_cache::op_cache(ast_manager& m):
 expr* seq_rewriter::op_cache::find(decl_kind op, expr* a, expr* b) {
     op_entry e(op, a, b, nullptr);
     m_table.find(e, e);
-    if (!(e.r)) {
-        std::cout << "!"; // Cache miss
-    }
+    // if (!(e.r)) {
+    //     std::cout << "!"; // Cache miss
+    // }
     return e.r;
 }
 
diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp
index be2aa91067b..1ca53fcdf26 100644
--- a/src/smt/seq_regex.cpp
+++ b/src/smt/seq_regex.cpp
@@ -101,7 +101,7 @@ namespace smt {
         expr* e = ctx.bool_var2expr(lit.var());
         VERIFY(str().is_in_re(e, s, r));
 
-        std::cout << "PI ";
+        // std::cout << "PI ";
         TRACE("seq", tout << "propagate " << mk_pp(e, m) << "\n";);
 
         // convert negative negative membership literals to positive
@@ -143,7 +143,7 @@ namespace smt {
     }
 
     void seq_regex::propagate_accept(literal lit) {
-        std::cout << "PA ";
+        // std::cout << "PA ";
         if (!propagate(lit))
             m_to_propagate.push_back(lit);
     }
@@ -169,7 +169,7 @@ namespace smt {
         unsigned idx = 0;
         VERIFY(sk().is_accept(e, s, i, idx, r));
 
-        std::cout << "P ";
+        // std::cout << "P ";
         TRACE("seq", tout << "propagate " << mk_pp(e, m) << "\n";);
 
         if (re().is_empty(r)) {
@@ -359,14 +359,14 @@ namespace smt {
         with optimizations for if-then-else expressions involving the head.
     */
     expr_ref seq_regex::derivative_wrapper(expr* hd, expr* r) {
-        std::cout << "D ";
+        // std::cout << "D ";
         expr_ref result = expr_ref(re().mk_derivative(hd, r), m);
         rewrite(result);
         return result;
     }
 
     void seq_regex::propagate_eq(expr* r1, expr* r2) {
-        std::cout << "PEQ ";
+        // std::cout << "PEQ ";
         expr_ref r = symmetric_diff(r1, r2);       
         expr_ref emp(re().mk_empty(m.get_sort(r)), m);
         expr_ref is_empty = sk().mk_is_empty(r, emp);
@@ -374,7 +374,7 @@ namespace smt {
     }
     
     void seq_regex::propagate_ne(expr* r1, expr* r2) {
-        std::cout << "PNEQ ";
+        // std::cout << "PNEQ ";
         expr_ref r = symmetric_diff(r1, r2);
         expr_ref emp(re().mk_empty(m.get_sort(r)), m);
         expr_ref is_non_empty = sk().mk_is_non_empty(r, emp);
@@ -399,7 +399,7 @@ namespace smt {
      *
      */
     void seq_regex::propagate_is_non_empty(literal lit) {
-        std::cout << "PN ";
+        // std::cout << "PN ";
         expr* e = ctx.bool_var2expr(lit.var()), *r = nullptr, *u = nullptr;
         VERIFY(sk().is_is_non_empty(e, r, u));
         expr_ref is_nullable = seq_rw().is_nullable(r);
@@ -455,7 +455,7 @@ namespace smt {
       is_empty(r, u) is true if r is a member of u
      */
     void seq_regex::propagate_is_empty(literal lit) {
-        std::cout << "PE ";
+        // std::cout << "PE ";
         expr* e = ctx.bool_var2expr(lit.var()), *r = nullptr, *u = nullptr;
         VERIFY(sk().is_is_empty(e, r, u));
         expr_ref is_nullable = seq_rw().is_nullable(r);

From 231f0d65936691c959c4746b42c054097564643c Mon Sep 17 00:00:00 2001
From: calebstanford-msr <t-casta@microsoft.com>
Date: Fri, 12 Jun 2020 12:54:27 -0400
Subject: [PATCH 03/51] convert std::cout to TRACE statements for seq_rewriter
 and seq_regex

---
 src/ast/rewriter/seq_rewriter.cpp | 21 +++++++++---------
 src/smt/seq_regex.cpp             | 36 +++++++++++++++++++------------
 2 files changed, 33 insertions(+), 24 deletions(-)

diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp
index 5fd17730d55..24085632ff0 100644
--- a/src/ast/rewriter/seq_rewriter.cpp
+++ b/src/ast/rewriter/seq_rewriter.cpp
@@ -2182,13 +2182,13 @@ expr_ref seq_rewriter::re_predicate(expr* cond, sort* seq_sort) {
 }
 
 expr_ref seq_rewriter::is_nullable_rec(expr* r) {
-    // std::cout << "n";
+    STRACE("seq_regex_verbose", tout << "nullable";);
     expr_ref result(m_op_cache.find(_OP_RE_IS_NULLABLE, r, nullptr), m());
     if (!result) {
         result = is_nullable(r);
         m_op_cache.insert(_OP_RE_IS_NULLABLE, r, nullptr, result);        
     }
-    // std::cout << " ";
+    STRACE("seq_regex_verbose", tout << std::endl;);
     return result;
 }
 
@@ -2367,13 +2367,13 @@ br_status seq_rewriter::mk_re_derivative(expr* ele, expr* r, expr_ref& result) {
         Duplicate nested conditions are eliminated.
 */
 expr_ref seq_rewriter::mk_derivative(expr* ele, expr* r) {
-    // std::cout << "d";
+    STRACE("seq_regex_verbose", tout << "derivative";);
     expr_ref result(m_op_cache.find(OP_RE_DERIVATIVE, ele, r), m());
     if (!result) {
         result = mk_derivative_rec(ele, r);
         m_op_cache.insert(OP_RE_DERIVATIVE, ele, r, result);
     }
-    // std::cout << " ";
+    STRACE("seq_regex_verbose", tout << std::endl;);
     return result;
 }
 
@@ -2454,7 +2454,6 @@ expr_ref seq_rewriter::mk_der_op_rec(decl_kind k, expr* a, expr* b) {
 }
 
 expr_ref seq_rewriter::mk_der_op(decl_kind k, expr* a, expr* b) {
-    // std::cout << "."; // Recursive call
     expr_ref _a(a, m()), _b(b, m());
     expr_ref result(m());
     switch (k) {
@@ -2482,7 +2481,7 @@ expr_ref seq_rewriter::mk_der_op(decl_kind k, expr* a, expr* b) {
 }
 
 expr_ref seq_rewriter::mk_der_compl(expr* r) {
-    // std::cout << "."; // Recursive call
+    STRACE("seq_regex_verbose", tout << " (rec)";);
     expr_ref result(m_op_cache.find(OP_RE_COMPLEMENT, r, nullptr), m());
     if (!result) {
         expr* c = nullptr, * r1 = nullptr, * r2 = nullptr;
@@ -2497,7 +2496,7 @@ expr_ref seq_rewriter::mk_der_compl(expr* r) {
 }
 
 expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) {
-    // std::cout << "."; // Recursive call
+    STRACE("seq_regex_verbose", tout << " (rec)";);
     expr_ref result(m());
     sort* seq_sort = nullptr, *ele_sort = nullptr;
     VERIFY(m_util.is_re(r, seq_sort));
@@ -4077,9 +4076,11 @@ seq_rewriter::op_cache::op_cache(ast_manager& m):
 expr* seq_rewriter::op_cache::find(decl_kind op, expr* a, expr* b) {
     op_entry e(op, a, b, nullptr);
     m_table.find(e, e);
-    // if (!(e.r)) {
-    //     std::cout << "!"; // Cache miss
-    // }
+
+    if (!(e.r)) {
+        STRACE("seq_regex_verbose", tout << " (cache miss)";);
+    }
+
     return e.r;
 }
 
diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp
index 8155af79046..f61a1f02cb3 100644
--- a/src/smt/seq_regex.cpp
+++ b/src/smt/seq_regex.cpp
@@ -101,8 +101,7 @@ namespace smt {
         expr* e = ctx.bool_var2expr(lit.var());
         VERIFY(str().is_in_re(e, s, r));
 
-        // std::cout << "PI ";
-        TRACE("seq", tout << "propagate " << mk_pp(e, m) << "\n";);
+        TRACE("seq_regex", tout << "propagate in RE: " << mk_pp(e, m) << std::endl;);
 
         // convert negative negative membership literals to positive
         // ~(s in R) => s in C(R)
@@ -143,7 +142,7 @@ namespace smt {
     }
 
     void seq_regex::propagate_accept(literal lit) {
-        // std::cout << "PA ";
+        TRACE("seq_regex", tout << "propagate accept" << std::endl;);
         if (!propagate(lit))
             m_to_propagate.push_back(lit);
     }
@@ -169,8 +168,7 @@ namespace smt {
         unsigned idx = 0;
         VERIFY(sk().is_accept(e, s, i, idx, r));
 
-        // std::cout << "P ";
-        TRACE("seq", tout << "propagate " << mk_pp(e, m) << "\n";);
+        TRACE("seq_regex", tout << "propagate: " << mk_pp(e, m) << std::endl;);
 
         if (re().is_empty(r)) {
             th.add_axiom(~lit);
@@ -195,6 +193,9 @@ namespace smt {
     void seq_regex::propagate_nullable(literal lit, expr* e, expr* s, unsigned idx, expr* r) {
         expr_ref is_nullable = seq_rw().is_nullable(r);
         rewrite(is_nullable);
+
+        TRACE("seq_regex", tout << "propagate nullable: " << mk_pp(r, m) << std::endl;);
+
         literal len_s_ge_i = th.m_ax.mk_ge(th.mk_len(s), idx);
         if (m.is_true(is_nullable)) {
             th.propagate_lit(nullptr, 1,&lit, len_s_ge_i);
@@ -225,11 +226,13 @@ namespace smt {
         expr_ref d(m);
         expr_ref head = th.mk_nth(s, i);
 
+        TRACE("seq_regex", tout << "propagate derivative: " << mk_pp(r, m) << std::endl;);
+
         d = derivative_wrapper(m.mk_var(0, m.get_sort(head)), r);
         // timer tm;
-        // std::cout << d->get_id() << " " << tm.get_seconds() << "\n";
+        // std::cout << d->get_id() << " " << tm.get_seconds() << std::endl;
         //if (tm.get_seconds() > 0.3) 
-        //    std::cout << d << "\n";
+        //    std::cout << d << std::endl;
         // std::cout.flush();
         literal_vector conds;
         conds.push_back(~lit);
@@ -285,7 +288,7 @@ namespace smt {
         if (!re().is_empty(d)) 
             conds.push_back(th.mk_literal(sk().mk_accept(s, a().mk_int(idx + 1), d)));
         th.add_axiom(conds);        
-        TRACE("seq", tout << "unfold " << head << "\n" << mk_pp(r, m) << "\n";);
+        TRACE("seq_regex", tout << "unfold " << head << std::endl << mk_pp(r, m) << std::endl;);
         return true;
     }
 
@@ -327,7 +330,7 @@ namespace smt {
             th.m_trail_stack.push(vector_value_trail<theory_seq, s_in_re, true>(m_s_in_re, i));
             m_s_in_re[i].m_active = false;
             IF_VERBOSE(11, verbose_stream() << "Intersect " << regex << " " << 
-                       mk_pp(entry.m_re, m) << " " << mk_pp(s, m) << " " << mk_pp(entry.m_s, m) << "\n";);
+                       mk_pp(entry.m_re, m) << " " << mk_pp(s, m) << " " << mk_pp(entry.m_s, m) << std::endl;);
             regex = re().mk_inter(entry.m_re, regex);
             rewrite(regex);
             lits.push_back(~entry.m_lit);
@@ -362,14 +365,16 @@ namespace smt {
         with optimizations for if-then-else expressions involving the head.
     */
     expr_ref seq_regex::derivative_wrapper(expr* hd, expr* r) {
-        // std::cout << "D ";
+        STRACE("seq_regex", tout << "derivative: " << mk_pp(r, m) << std::endl;);
         expr_ref result = expr_ref(re().mk_derivative(hd, r), m);
         rewrite(result);
+        STRACE("seq_regex", tout << "derivative result: " << mk_pp(result, m) << std::endl;);
+        // IF_VERBOSE(10, verbose_stream() << std::endl << "Calculated derivative of: " << expr_ref(r, m) << " was: " << result << std::endl;);
         return result;
     }
 
     void seq_regex::propagate_eq(expr* r1, expr* r2) {
-        // std::cout << "PEQ ";
+        TRACE("seq_regex", tout << "propagate EQ: " << mk_pp(r1, m) << ", " << mk_pp(r2, m) << std::endl;);
         expr_ref r = symmetric_diff(r1, r2);       
         expr_ref emp(re().mk_empty(m.get_sort(r)), m);
         expr_ref is_empty = sk().mk_is_empty(r, emp);
@@ -377,7 +382,7 @@ namespace smt {
     }
     
     void seq_regex::propagate_ne(expr* r1, expr* r2) {
-        // std::cout << "PNEQ ";
+        TRACE("seq_regex", tout << "propagate NEQ: " << mk_pp(r1, m) << ", " << mk_pp(r2, m) << std::endl;);
         expr_ref r = symmetric_diff(r1, r2);
         expr_ref emp(re().mk_empty(m.get_sort(r)), m);
         expr_ref is_non_empty = sk().mk_is_non_empty(r, emp);
@@ -402,9 +407,11 @@ namespace smt {
      *
      */
     void seq_regex::propagate_is_non_empty(literal lit) {
-        // std::cout << "PN ";
         expr* e = ctx.bool_var2expr(lit.var()), *r = nullptr, *u = nullptr;
         VERIFY(sk().is_is_non_empty(e, r, u));
+
+        TRACE("seq_regex", tout << "propagate nonempty: " << mk_pp(e, m) << std::endl;);
+
         expr_ref is_nullable = seq_rw().is_nullable(r);
         rewrite(is_nullable);
         if (m.is_true(is_nullable))
@@ -458,9 +465,10 @@ namespace smt {
       is_empty(r, u) is true if r is a member of u
      */
     void seq_regex::propagate_is_empty(literal lit) {
-        // std::cout << "PE ";
         expr* e = ctx.bool_var2expr(lit.var()), *r = nullptr, *u = nullptr;
         VERIFY(sk().is_is_empty(e, r, u));
+        TRACE("seq_regex", tout << "propagate empty: " << mk_pp(e, m) << std::endl;);
+
         expr_ref is_nullable = seq_rw().is_nullable(r);
         rewrite(is_nullable);
         if (m.is_true(is_nullable)) {

From fb7ffe96fd691786c5859a90972d31f939c46891 Mon Sep 17 00:00:00 2001
From: calebstanford-msr <t-casta@microsoft.com>
Date: Fri, 12 Jun 2020 20:34:02 -0400
Subject: [PATCH 04/51] add cases to min_length and max_length for regexes

---
 src/ast/seq_decl_plugin.cpp | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/src/ast/seq_decl_plugin.cpp b/src/ast/seq_decl_plugin.cpp
index a33b5a254ae..fccc8a09266 100644
--- a/src/ast/seq_decl_plugin.cpp
+++ b/src/ast/seq_decl_plugin.cpp
@@ -1327,6 +1327,12 @@ unsigned seq_util::re::min_length(expr* r) const {
         return u.max_mul(lo, min_length(r1));
     if (is_to_re(r, s)) 
         return u.str.min_length(s);
+    if (is_reverse(r, s) || is_plus(r, s))
+        return min_length(s);
+    if (is_range(r) || is_of_pred(r) || is_full_char(r))
+        return 1;
+    if (is_empty(r))
+        return UINT_MAX;
     return 0;
 }
 
@@ -1350,6 +1356,12 @@ unsigned seq_util::re::max_length(expr* r) const {
         return u.max_mul(hi, max_length(r1));
     if (is_to_re(r, s)) 
         return u.str.max_length(s);
+    if (is_reverse(r, s) || is_plus(r, s))
+        return max_length(s);
+    if (is_range(r) || is_of_pred(r) || is_full_char(r))
+        return 1;
+    if (is_empty(r))
+        return 0;
     return UINT_MAX;
 }
 

From 20962e2332f99d1260b0dd661088e63e682eec0b Mon Sep 17 00:00:00 2001
From: calebstanford-msr <t-casta@microsoft.com>
Date: Fri, 12 Jun 2020 20:34:37 -0400
Subject: [PATCH 05/51] bug fix

---
 src/smt/seq_regex.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp
index f61a1f02cb3..120c1ae8b12 100644
--- a/src/smt/seq_regex.cpp
+++ b/src/smt/seq_regex.cpp
@@ -424,6 +424,7 @@ namespace smt {
         lits.push_back(~lit);
         if (null_lit != false_literal) 
             lits.push_back(null_lit);
+
         expr_ref_pair_vector cofactors(m);
         get_cofactors(d, cofactors);
         for (auto const& p : cofactors) {
@@ -453,8 +454,8 @@ namespace smt {
             conds.pop_back();
         }
         else {
-            cond = mk_and(conds);
-            result.push_back(cond, r);
+            expr_ref conj = mk_and(conds);
+            result.push_back(conj, r);
         }
     }
 

From 8b129ce4d0470246d9a47e012eac1d574f22e7d6 Mon Sep 17 00:00:00 2001
From: calebstanford-msr <t-casta@microsoft.com>
Date: Tue, 16 Jun 2020 11:56:34 -0400
Subject: [PATCH 06/51] update min_length and max_length functions for REs

---
 src/ast/seq_decl_plugin.cpp | 40 ++++++++++++++-----------------------
 1 file changed, 15 insertions(+), 25 deletions(-)

diff --git a/src/ast/seq_decl_plugin.cpp b/src/ast/seq_decl_plugin.cpp
index fccc8a09266..e6a970fd4c8 100644
--- a/src/ast/seq_decl_plugin.cpp
+++ b/src/ast/seq_decl_plugin.cpp
@@ -1313,26 +1313,21 @@ unsigned seq_util::re::min_length(expr* r) const {
     unsigned lo = 0, hi = 0;
     if (is_empty(r))
         return UINT_MAX;
-    if (is_concat(r, r1, r2)) 
+    if (is_concat(r, r1, r2))
         return u.max_plus(min_length(r1), min_length(r2));
-    if (m.is_ite(r, s, r1, r2)) 
+    if (is_union(r, r1, r2) || m.is_ite(r, s, r1, r2))
         return std::min(min_length(r1), min_length(r2));
-    if (is_diff(r, r1, r2))
-        return min_length(r1);
-    if (is_union(r, r1, r2)) 
-        return std::min(min_length(r1), min_length(r2));
-    if (is_intersection(r, r1, r2)) 
+    if (is_intersection(r, r1, r2))
         return std::max(min_length(r1), min_length(r2));
-    if (is_loop(r, r1, lo, hi))
+    if (is_diff(r, r1, r2) || is_reverse(r, r1) || is_plus(r, r1))
+        return min_length(r1);
+    if (is_loop(r, r1, lo) || is_loop(r, r1, lo, hi))
         return u.max_mul(lo, min_length(r1));
-    if (is_to_re(r, s)) 
+    if (is_to_re(r, s))
         return u.str.min_length(s);
-    if (is_reverse(r, s) || is_plus(r, s))
-        return min_length(s);
     if (is_range(r) || is_of_pred(r) || is_full_char(r))
         return 1;
-    if (is_empty(r))
-        return UINT_MAX;
+    // Else: star, option, complement, full_seq, derivative
     return 0;
 }
 
@@ -1342,26 +1337,21 @@ unsigned seq_util::re::max_length(expr* r) const {
     unsigned lo = 0, hi = 0;
     if (is_empty(r))
         return 0;
-    if (is_concat(r, r1, r2)) 
+    if (is_concat(r, r1, r2))
         return u.max_plus(max_length(r1), max_length(r2));
-    if (m.is_ite(r, s, r1, r2)) 
+    if (is_union(r, r1, r2) || m.is_ite(r, s, r1, r2))
         return std::max(max_length(r1), max_length(r2));
-    if (is_diff(r, r1, r2))
-        return max_length(r1);
-    if (is_union(r, r1, r2)) 
-        return std::max(max_length(r1), max_length(r2));
-    if (is_intersection(r, r1, r2)) 
+    if (is_intersection(r, r1, r2))
         return std::min(max_length(r1), max_length(r2));
+    if (is_diff(r, r1, r2) || is_reverse(r, r1) || is_opt(r, r1))
+        return max_length(r1);
     if (is_loop(r, r1, lo, hi))
         return u.max_mul(hi, max_length(r1));
-    if (is_to_re(r, s)) 
+    if (is_to_re(r, s))
         return u.str.max_length(s);
-    if (is_reverse(r, s) || is_plus(r, s))
-        return max_length(s);
     if (is_range(r) || is_of_pred(r) || is_full_char(r))
         return 1;
-    if (is_empty(r))
-        return 0;
+    // Else: star, plus, complement, full_seq, loop(r,r1,lo), derivative
     return UINT_MAX;
 }
 

From a98ca80b00bfca1c781f0602ba56fb3fe041a887 Mon Sep 17 00:00:00 2001
From: calebstanford-msr <t-casta@microsoft.com>
Date: Tue, 16 Jun 2020 20:46:18 -0400
Subject: [PATCH 07/51] initial pass on simplifying derivative normal forms by
 eliminating redundant predicates locally

---
 src/ast/rewriter/seq_rewriter.cpp | 126 +++++++++++++++++++++++++-----
 src/ast/rewriter/seq_rewriter.h   |   4 +
 2 files changed, 110 insertions(+), 20 deletions(-)

diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp
index 3166b1f5ab3..1cf421bacbb 100644
--- a/src/ast/rewriter/seq_rewriter.cpp
+++ b/src/ast/rewriter/seq_rewriter.cpp
@@ -2389,11 +2389,70 @@ expr_ref seq_rewriter::mk_der_concat(expr* r1, expr* r2) {
     return mk_der_op(OP_RE_CONCAT, r1, r2);
 }
 
+/*
+    Utility functions to decide char <, ==, and <=.
+    Return true if deduced, false if unknown.
+*/
+bool seq_rewriter::lt_char(expr* ch1, expr* ch2) {
+    unsigned u1, u2;
+    return (m_util.is_const_char(ch1, u1) &&
+            m_util.is_const_char(ch2, u2) &&
+            (u1 < u2));
+}
+bool seq_rewriter::eq_char(expr* ch1, expr* ch2) {
+    unsigned u1, u2;
+    return ((ch1 == ch2) || (
+        m_util.is_const_char(ch1, u1) &&
+        m_util.is_const_char(ch2, u2) &&
+        (u1 == u2)
+    ));
+}
+bool seq_rewriter::le_char(expr* ch1, expr* ch2) {
+    return (eq_char(ch1, ch2) || lt_char(ch1, ch2));
+}
+
+/*
+    Utility function to decide if a simple predicate (ones that appear
+    as the conditions in if-then-else expressions in derivatives)
+    implies another.
+
+    Return true if we deduce that a implies b, false if unknown.
+
+    Current cases handled:
+        - a and b are char <= constraints, or negations of char <= constraints
+*/
+bool seq_rewriter::pred_implies(expr* a, expr* b) {
+    expr *cha1 = nullptr, *cha2 = nullptr, *nota = nullptr,
+         *chb1 = nullptr, *chb2 = nullptr, *notb = nullptr;
+    if (m().is_not(a, nota) &&
+        m().is_not(b, notb)) {
+        return pred_implies(notb, nota);
+    }
+    else if (m_util.is_char_le(a, cha1, cha2) &&
+             m_util.is_char_le(b, chb1, chb2)) {
+        return (le_char(chb1, cha1) && le_char(cha2, chb2));
+    }
+    else if (m_util.is_char_le(a, cha1, cha2) &&
+             m().is_not(b, notb) &&
+             m_util.is_char_le(notb, chb1, chb2)) {
+        return ((le_char(chb2, cha1) && lt_char(cha2, chb1)) ||
+                (lt_char(chb2, cha1) && le_char(cha2, chb1)));
+    }
+    else if (m_util.is_char_le(b, chb1, chb2) &&
+             m().is_not(a, nota) &&
+             m_util.is_char_le(nota, cha1, cha2)) {
+        return (le_char(chb1, cha2) && le_char(cha1, chb2));
+    }
+    else {
+        return false;
+    }
+}
+
 /*
     Apply a binary operation, preserving BDD normal form on derivative expressions.
 
     Preconditions:
-        - k is a binary op code on REs (concat, intersection, or union)
+        - k is a binary op codes on REs: one of concat, intersection, or union
         - a and b are in BDD form
 
     Postcondition:
@@ -2406,23 +2465,43 @@ expr_ref seq_rewriter::mk_der_op_rec(decl_kind k, expr* a, expr* b) {
     auto mk_ite = [&](expr* c, expr* a, expr* b) {
         return (a == b) ? a : m().mk_ite(c, a, b);
     };
+    // TODO
+    // auto get_id = [&](expr* e) { re().is_complement(e, e); return e->get_id(); };
     if (m().is_ite(a, ca, a1, a2)) {
+        expr_ref r1(m()), r2(m());
         if (m().is_ite(b, cb, b1, b2)) {
+            // --- Core logic for combining two BDDs
             if (ca == cb) {
                 expr_ref r1 = mk_der_op(k, a1, b1);
                 expr_ref r2 = mk_der_op(k, a2, b2);
                 result = mk_ite(ca, r1, r2);
                 return result;
             }
-            else if (ca->get_id() < cb->get_id()) {
-                expr_ref r1 = mk_der_op(k, a, b1);
-                expr_ref r2 = mk_der_op(k, a, b2);
-                result = mk_ite(cb, r1, r2);
-                return result;
+            // Order with higher IDs on the outside
+            if (ca->get_id() < cb->get_id()) {
+                std::swap(a, b);
+                std::swap(ca, cb);
+                std::swap(a1, b1);
+                std::swap(a2, b2);
+            }
+            // Simplify if there is a relationship between ca and cb
+            if (pred_implies(ca, cb)) {
+                r1 = mk_der_op(k, a1, b1);
+            }
+            else if (pred_implies(ca, expr_ref(m().mk_not(cb), m()))) {
+                r1 = mk_der_op(k, a1, b2);
+            }
+            if (pred_implies(expr_ref(m().mk_not(ca), m()), cb)) {
+                r2 = mk_der_op(k, a2, b1);
+            }
+            else if (pred_implies(expr_ref(m().mk_not(ca), m()),
+                                  expr_ref(m().mk_not(cb), m()))) {
+                r2 = mk_der_op(k, a2, b2);
             }
+            // --- End core logic
         }
-        expr_ref r1 = mk_der_op(k, a1, b);
-        expr_ref r2 = mk_der_op(k, a2, b);
+        if (!r1) r1 = mk_der_op(k, a1, b);
+        if (!r2) r2 = mk_der_op(k, a2, b);
         result = mk_ite(ca, r1, r2);
         return result;
     }
@@ -2539,7 +2618,7 @@ expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) {
         return mk_der_inter(mk_derivative(ele, r1), mk_der_compl(mk_derivative(ele, r2)));
     }
     else if (m().is_ite(r, p, r1, r2)) {
-        // there is no BDD normalization here
+        // Note: there is no BDD normalization here
         result = m().mk_ite(p, mk_derivative(ele, r1), mk_derivative(ele, r2));
         return result;
     }
@@ -2574,13 +2653,18 @@ expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) {
         expr_ref hd(m()), tl(m());
         if (get_head_tail(r1, hd, tl)) {
             // head must be equal; if so, derivative is tail
-            return re_and(m_br.mk_eq_rw(ele, hd), re().mk_to_re(tl));
+            // Write 'head is equal' as a range constraint:
+            // (ele <= hd) and (hd <= ele)
+            return mk_der_inter(
+                re_and(m_util.mk_le(ele, hd), re().mk_to_re(tl)),
+                re_and(m_util.mk_le(hd, ele), re().mk_to_re(tl))
+            );
         }
         else if (str().is_empty(r1)) {
             return mk_empty();
         }
-        else {
 #if 0
+        else {
             hd = str().mk_nth_i(r1, m_autil.mk_int(0));
             tl = str().mk_substr(r1, m_autil.mk_int(1), m_autil.mk_sub(str().mk_length(r1), m_autil.mk_int(1)));
             result = 
@@ -2588,10 +2672,8 @@ expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) {
                            mk_empty(),
                            re_and(m_br.mk_eq_rw(ele, hd), re().mk_to_re(tl)));
             return result;
-#else
-            return expr_ref(re().mk_derivative(ele, r), m());
-#endif
         }
+#endif
     }
     else if (re().is_reverse(r, r1) && re().is_to_re(r1, r2)) {
         // Reverses are rewritten so that the only derivative case is
@@ -2599,14 +2681,16 @@ expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) {
         // This is analagous to the previous is_to_re case.
         expr_ref hd(m()), tl(m());
         if (get_head_tail_reversed(r2, hd, tl)) {
-            return re_and(m_br.mk_eq_rw(ele, tl), re().mk_reverse(re().mk_to_re(hd)));
+            // Write 'tail is equal' as a range constraint:
+            // (ele <= tl) and (tl <= ele)
+            return mk_der_inter(
+                re_and(m_util.mk_le(ele, tl), re().mk_reverse(re().mk_to_re(hd))),
+                re_and(m_util.mk_le(tl, ele), re().mk_reverse(re().mk_to_re(hd)))
+            );
         }
         else if (str().is_empty(r2)) {
             return mk_empty();
         }
-        else {
-            return expr_ref(re().mk_derivative(ele, r), m());
-        }
     }
     else if (re().is_range(r, r1, r2)) {
         // r1, r2 are sequences.
@@ -2637,8 +2721,10 @@ expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) {
         result = array.mk_select(2, args);
         return re_predicate(result, seq_sort);
     }
-    // stuck cases: re().is_derivative, variable, ...
-    // and re().is_reverse if the reverse is not applied to a string
+    // stuck cases: is_derivative, variable,
+    // str.to_re if it can't be simplified into a head character and tail
+    // and re().is_reverse if the reverse is not applied to a string thta
+    // can be coerced into a tail character and a head
     return expr_ref(re().mk_derivative(ele, r), m());
 }
 
diff --git a/src/ast/rewriter/seq_rewriter.h b/src/ast/rewriter/seq_rewriter.h
index 1cba724442e..6b9d88a3a38 100644
--- a/src/ast/rewriter/seq_rewriter.h
+++ b/src/ast/rewriter/seq_rewriter.h
@@ -194,6 +194,10 @@ class seq_rewriter {
     expr_ref mk_der_compl(expr* a);
     expr_ref mk_der_reverse(expr* a);
 
+    bool lt_char(expr* ch1, expr* ch2);
+    bool eq_char(expr* ch1, expr* ch2);
+    bool le_char(expr* ch1, expr* ch2);
+    bool pred_implies(expr* a, expr* b);
     bool are_complements(expr* r1, expr* r2) const;
     bool is_subset(expr* r1, expr* r2) const;
 

From 42cb8b6874f646c728c83c7996bba15776a4db4a Mon Sep 17 00:00:00 2001
From: calebstanford-msr <t-casta@microsoft.com>
Date: Wed, 17 Jun 2020 13:07:27 -0400
Subject: [PATCH 08/51] add seq_regex_brief trace statements

---
 src/smt/seq_regex.cpp | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp
index 6a00fb0ab64..8a64872f5fa 100644
--- a/src/smt/seq_regex.cpp
+++ b/src/smt/seq_regex.cpp
@@ -102,6 +102,7 @@ namespace smt {
         VERIFY(str().is_in_re(e, s, r));
 
         TRACE("seq_regex", tout << "propagate in RE: " << lit.sign() << " " << mk_pp(e, m) << std::endl;);
+        STRACE("seq_regex_brief", tout << " PIR";);
 
         // convert negative negative membership literals to positive
         // ~(s in R) => s in C(R)
@@ -149,6 +150,7 @@ namespace smt {
 
     void seq_regex::propagate_accept(literal lit) {
         TRACE("seq_regex", tout << "propagate accept" << std::endl;);
+        STRACE("seq_regex_brief", tout << " PA";);
         if (!propagate(lit))
             m_to_propagate.push_back(lit);
     }
@@ -175,6 +177,11 @@ namespace smt {
         VERIFY(sk().is_accept(e, s, i, idx, r));
 
         TRACE("seq_regex", tout << "propagate: " << mk_pp(e, m) << std::endl;);
+        STRACE("seq_regex_brief",
+            tout << std::endl << "P(" << mk_pp(s, m)
+                              << "," << idx
+                              << "," << r // pointer
+                              << ")";);
 
         if (re().is_empty(r)) {
             th.add_axiom(~lit);
@@ -213,6 +220,7 @@ namespace smt {
         rewrite(is_nullable);
 
         TRACE("seq_regex", tout << "propagate nullable: " << mk_pp(r, m) << std::endl;);
+        STRACE("seq_regex_brief", tout << " PN";);
 
         literal len_s_ge_i = th.m_ax.mk_ge(th.mk_len(s), idx);
         if (m.is_true(is_nullable)) {
@@ -247,6 +255,7 @@ namespace smt {
         expr_ref head = th.mk_nth(s, i);
 
         TRACE("seq_regex", tout << "propagate derivative: " << mk_pp(r, m) << std::endl;);
+        STRACE("seq_regex_brief", tout << " PD";);
 
         d = derivative_wrapper(m.mk_var(0, m.get_sort(head)), r);
         // timer tm;
@@ -386,6 +395,7 @@ namespace smt {
     */
     expr_ref seq_regex::derivative_wrapper(expr* hd, expr* r) {
         STRACE("seq_regex", tout << "derivative: " << mk_pp(r, m) << std::endl;);
+        STRACE("seq_regex_brief", tout << " D";);
         expr_ref result = expr_ref(re().mk_derivative(hd, r), m);
         rewrite(result);
         STRACE("seq_regex", tout << "derivative result: " << mk_pp(result, m) << std::endl;);
@@ -395,7 +405,8 @@ namespace smt {
 
     void seq_regex::propagate_eq(expr* r1, expr* r2) {
         TRACE("seq_regex", tout << "propagate EQ: " << mk_pp(r1, m) << ", " << mk_pp(r2, m) << std::endl;);
-        expr_ref r = symmetric_diff(r1, r2);       
+        STRACE("seq_regex_brief", tout << " PEQ";);
+        expr_ref r = symmetric_diff(r1, r2);
         expr_ref emp(re().mk_empty(m.get_sort(r)), m);
         expr_ref is_empty = sk().mk_is_empty(r, emp);
         th.add_axiom(~th.mk_eq(r1, r2, false), th.mk_literal(is_empty));
@@ -403,6 +414,7 @@ namespace smt {
     
     void seq_regex::propagate_ne(expr* r1, expr* r2) {
         TRACE("seq_regex", tout << "propagate NEQ: " << mk_pp(r1, m) << ", " << mk_pp(r2, m) << std::endl;);
+        STRACE("seq_regex_brief", tout << " PNEQ";);
         expr_ref r = symmetric_diff(r1, r2);
         expr_ref emp(re().mk_empty(m.get_sort(r)), m);
         expr_ref is_non_empty = sk().mk_is_non_empty(r, emp);
@@ -431,6 +443,7 @@ namespace smt {
         VERIFY(sk().is_is_non_empty(e, r, u));
 
         TRACE("seq_regex", tout << "propagate nonempty: " << mk_pp(e, m) << std::endl;);
+        STRACE("seq_regex_brief", tout << " PNE";);
 
         expr_ref is_nullable = seq_rw().is_nullable(r);
         rewrite(is_nullable);
@@ -489,6 +502,7 @@ namespace smt {
         expr* e = ctx.bool_var2expr(lit.var()), *r = nullptr, *u = nullptr;
         VERIFY(sk().is_is_empty(e, r, u));
         TRACE("seq_regex", tout << "propagate empty: " << mk_pp(e, m) << std::endl;);
+        STRACE("seq_regex_brief", tout << " PE";);
 
         expr_ref is_nullable = seq_rw().is_nullable(r);
         rewrite(is_nullable);

From 177b04affce617f41049e0d785c883e2b9cfdaef Mon Sep 17 00:00:00 2001
From: calebstanford-msr <t-casta@microsoft.com>
Date: Wed, 17 Jun 2020 19:51:20 -0400
Subject: [PATCH 09/51] working on debugging ref count issue

---
 src/ast/rewriter/seq_rewriter.cpp | 22 ++++++++++++++--------
 src/smt/seq_regex.cpp             |  2 ++
 src/smt/theory_seq.cpp            |  1 +
 3 files changed, 17 insertions(+), 8 deletions(-)

diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp
index 1cf421bacbb..c4fed412c0f 100644
--- a/src/ast/rewriter/seq_rewriter.cpp
+++ b/src/ast/rewriter/seq_rewriter.cpp
@@ -2472,31 +2472,37 @@ expr_ref seq_rewriter::mk_der_op_rec(decl_kind k, expr* a, expr* b) {
         if (m().is_ite(b, cb, b1, b2)) {
             // --- Core logic for combining two BDDs
             if (ca == cb) {
-                expr_ref r1 = mk_der_op(k, a1, b1);
-                expr_ref r2 = mk_der_op(k, a2, b2);
+                r1 = mk_der_op(k, a1, b1);
+                r2 = mk_der_op(k, a2, b2);
                 result = mk_ite(ca, r1, r2);
                 return result;
             }
             // Order with higher IDs on the outside
-            if (ca->get_id() < cb->get_id()) {
-                std::swap(a, b);
-                std::swap(ca, cb);
-                std::swap(a1, b1);
-                std::swap(a2, b2);
-            }
+            // if (ca->get_id() < cb->get_id()) {
+            //     std::swap(a, b);
+            //     std::swap(ca, cb);
+            //     std::swap(a1, b1);
+            //     std::swap(a2, b2);
+            // }
             // Simplify if there is a relationship between ca and cb
             if (pred_implies(ca, cb)) {
                 r1 = mk_der_op(k, a1, b1);
+                // prevent memory ref count error
+                expr_ref _b2(b2, m());
             }
             else if (pred_implies(ca, expr_ref(m().mk_not(cb), m()))) {
                 r1 = mk_der_op(k, a1, b2);
+                expr_ref _b2(b1, m());
             }
             if (pred_implies(expr_ref(m().mk_not(ca), m()), cb)) {
                 r2 = mk_der_op(k, a2, b1);
+                // prevent memory ref count error
+                expr_ref _b2(b2, m());
             }
             else if (pred_implies(expr_ref(m().mk_not(ca), m()),
                                   expr_ref(m().mk_not(cb), m()))) {
                 r2 = mk_der_op(k, a2, b2);
+                expr_ref _b2(b1, m());
             }
             // --- End core logic
         }
diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp
index 8a64872f5fa..8703f617f98 100644
--- a/src/smt/seq_regex.cpp
+++ b/src/smt/seq_regex.cpp
@@ -228,6 +228,7 @@ namespace smt {
         }
         else if (m.is_false(is_nullable)) {
             th.propagate_lit(nullptr, 1, &lit, th.m_ax.mk_ge(th.mk_len(s), idx + 1));
+            // th.propagate_lit(nullptr, 1, &lit, th.m_ax.mk_ge(th.mk_len(s), idx + re().min_length(r)));
         }
         else {
             literal is_nullable_lit = th.mk_literal(is_nullable);
@@ -395,6 +396,7 @@ namespace smt {
     */
     expr_ref seq_regex::derivative_wrapper(expr* hd, expr* r) {
         STRACE("seq_regex", tout << "derivative: " << mk_pp(r, m) << std::endl;);
+        // STRACE("seq_regex_brief", tout << "derivative: " << mk_pp(r, m) << std::endl;);
         STRACE("seq_regex_brief", tout << " D";);
         expr_ref result = expr_ref(re().mk_derivative(hd, r), m);
         rewrite(result);
diff --git a/src/smt/theory_seq.cpp b/src/smt/theory_seq.cpp
index d3de47c61ff..295256ed7e0 100644
--- a/src/smt/theory_seq.cpp
+++ b/src/smt/theory_seq.cpp
@@ -3328,6 +3328,7 @@ void theory_seq::relevant_eh(app* n) {
 
     expr* arg = nullptr;
     if (m_sk.is_tail(n, arg)) {
+        // TODO: HERE
         add_length_limit(arg, m_max_unfolding_depth, true);
     }
 

From 06bc1cd955b008fc2bc2bb58cea4ae3f2437c477 Mon Sep 17 00:00:00 2001
From: Caleb Stanford <caleb.pirsquared@gmail.com>
Date: Fri, 19 Jun 2020 10:07:42 -0400
Subject: [PATCH 10/51] fix ref count bug and convert trace statements to
 seq_regex_brief

---
 src/ast/rewriter/seq_rewriter.cpp | 29 ++++++++++++-----------------
 src/smt/seq_regex.cpp             | 31 ++++++++++++++++++-------------
 2 files changed, 30 insertions(+), 30 deletions(-)

diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp
index c4fed412c0f..1fe1851efc3 100644
--- a/src/ast/rewriter/seq_rewriter.cpp
+++ b/src/ast/rewriter/seq_rewriter.cpp
@@ -2182,17 +2182,17 @@ expr_ref seq_rewriter::re_predicate(expr* cond, sort* seq_sort) {
 }
 
 expr_ref seq_rewriter::is_nullable(expr* r) {
-    STRACE("seq_regex_verbose", tout << "nullable";);
+    STRACE("seq_regex_brief", tout << "n";);
     expr_ref result(m_op_cache.find(_OP_RE_IS_NULLABLE, r, nullptr), m());
     if (!result) {
         result = is_nullable_rec(r);
         m_op_cache.insert(_OP_RE_IS_NULLABLE, r, nullptr, result);        
     }
-    STRACE("seq_regex_verbose", tout << std::endl;);
     return result;
 }
 
 expr_ref seq_rewriter::is_nullable_rec(expr* r) {
+    STRACE("seq_regex_brief", tout << ".";); // recursive call
     SASSERT(m_util.is_re(r) || m_util.is_seq(r));
     expr* r1 = nullptr, *r2 = nullptr, *cond = nullptr;
     sort* seq_sort = nullptr;
@@ -2367,13 +2367,12 @@ br_status seq_rewriter::mk_re_derivative(expr* ele, expr* r, expr_ref& result) {
         Duplicate nested conditions are eliminated.
 */
 expr_ref seq_rewriter::mk_derivative(expr* ele, expr* r) {
-    STRACE("seq_regex_verbose", tout << "derivative";);
+    STRACE("seq_regex_brief", tout << "d";);
     expr_ref result(m_op_cache.find(OP_RE_DERIVATIVE, ele, r), m());
     if (!result) {
         result = mk_derivative_rec(ele, r);
         m_op_cache.insert(OP_RE_DERIVATIVE, ele, r, result);
     }
-    STRACE("seq_regex_verbose", tout << std::endl;);
     return result;
 }
 
@@ -2459,6 +2458,7 @@ bool seq_rewriter::pred_implies(expr* a, expr* b) {
         - result is in BDD form
 */
 expr_ref seq_rewriter::mk_der_op_rec(decl_kind k, expr* a, expr* b) {
+    STRACE("seq_regex_brief", tout << ".";); // recursive call
     expr* ca = nullptr, *a1 = nullptr, *a2 = nullptr;
     expr* cb = nullptr, *b1 = nullptr, *b2 = nullptr;
     expr_ref result(m());
@@ -2469,8 +2469,10 @@ expr_ref seq_rewriter::mk_der_op_rec(decl_kind k, expr* a, expr* b) {
     // auto get_id = [&](expr* e) { re().is_complement(e, e); return e->get_id(); };
     if (m().is_ite(a, ca, a1, a2)) {
         expr_ref r1(m()), r2(m());
+        expr_ref notca(m().mk_not(ca), m());
         if (m().is_ite(b, cb, b1, b2)) {
             // --- Core logic for combining two BDDs
+            expr_ref notcb(m().mk_not(cb), m());
             if (ca == cb) {
                 r1 = mk_der_op(k, a1, b1);
                 r2 = mk_der_op(k, a2, b2);
@@ -2487,22 +2489,15 @@ expr_ref seq_rewriter::mk_der_op_rec(decl_kind k, expr* a, expr* b) {
             // Simplify if there is a relationship between ca and cb
             if (pred_implies(ca, cb)) {
                 r1 = mk_der_op(k, a1, b1);
-                // prevent memory ref count error
-                expr_ref _b2(b2, m());
             }
-            else if (pred_implies(ca, expr_ref(m().mk_not(cb), m()))) {
+            else if (pred_implies(ca, notcb)) {
                 r1 = mk_der_op(k, a1, b2);
-                expr_ref _b2(b1, m());
             }
-            if (pred_implies(expr_ref(m().mk_not(ca), m()), cb)) {
+            if (pred_implies(notca, cb)) {
                 r2 = mk_der_op(k, a2, b1);
-                // prevent memory ref count error
-                expr_ref _b2(b2, m());
             }
-            else if (pred_implies(expr_ref(m().mk_not(ca), m()),
-                                  expr_ref(m().mk_not(cb), m()))) {
+            else if (pred_implies(notca, notcb)) {
                 r2 = mk_der_op(k, a2, b2);
-                expr_ref _b2(b1, m());
             }
             // --- End core logic
         }
@@ -2572,7 +2567,7 @@ expr_ref seq_rewriter::mk_der_op(decl_kind k, expr* a, expr* b) {
 }
 
 expr_ref seq_rewriter::mk_der_compl(expr* r) {
-    STRACE("seq_regex_verbose", tout << " (rec)";);
+    STRACE("seq_regex_brief", tout << ".";); // recursive call
     expr_ref result(m_op_cache.find(OP_RE_COMPLEMENT, r, nullptr), m());
     if (!result) {
         expr* c = nullptr, * r1 = nullptr, * r2 = nullptr;
@@ -2587,7 +2582,7 @@ expr_ref seq_rewriter::mk_der_compl(expr* r) {
 }
 
 expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) {
-    STRACE("seq_regex_verbose", tout << " (rec)";);
+    STRACE("seq_regex_brief", tout << ".";); // recursive call
     expr_ref result(m());
     sort* seq_sort = nullptr, *ele_sort = nullptr;
     VERIFY(m_util.is_re(r, seq_sort));
@@ -4186,7 +4181,7 @@ expr* seq_rewriter::op_cache::find(decl_kind op, expr* a, expr* b) {
     m_table.find(e, e);
 
     if (!(e.r)) {
-        STRACE("seq_regex_verbose", tout << " (cache miss)";);
+        STRACE("seq_regex_brief", tout << "!";); // cache miss
     }
 
     return e.r;
diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp
index 8703f617f98..fe6067a8d6b 100644
--- a/src/smt/seq_regex.cpp
+++ b/src/smt/seq_regex.cpp
@@ -102,7 +102,7 @@ namespace smt {
         VERIFY(str().is_in_re(e, s, r));
 
         TRACE("seq_regex", tout << "propagate in RE: " << lit.sign() << " " << mk_pp(e, m) << std::endl;);
-        STRACE("seq_regex_brief", tout << " PIR";);
+        STRACE("seq_regex_brief", tout << "PIR ";);
 
         // convert negative negative membership literals to positive
         // ~(s in R) => s in C(R)
@@ -150,7 +150,7 @@ namespace smt {
 
     void seq_regex::propagate_accept(literal lit) {
         TRACE("seq_regex", tout << "propagate accept" << std::endl;);
-        STRACE("seq_regex_brief", tout << " PA";);
+        STRACE("seq_regex_brief", tout << "PA ";);
         if (!propagate(lit))
             m_to_propagate.push_back(lit);
     }
@@ -180,8 +180,8 @@ namespace smt {
         STRACE("seq_regex_brief",
             tout << std::endl << "P(" << mk_pp(s, m)
                               << "," << idx
-                              << "," << r // pointer
-                              << ")";);
+                              << "," << r->get_id()
+                              << ") ";);
 
         if (re().is_empty(r)) {
             th.add_axiom(~lit);
@@ -216,11 +216,13 @@ namespace smt {
      */
 
     void seq_regex::propagate_nullable(literal lit, expr* s, unsigned idx, expr* r) {
+        TRACE("seq_regex", tout << "propagate nullable: " << mk_pp(r, m) << std::endl;);
+        STRACE("seq_regex_brief", tout << "PN ";);
+
         expr_ref is_nullable = seq_rw().is_nullable(r);
         rewrite(is_nullable);
 
-        TRACE("seq_regex", tout << "propagate nullable: " << mk_pp(r, m) << std::endl;);
-        STRACE("seq_regex_brief", tout << " PN";);
+        STRACE("seq_regex_brief", tout << " ";);
 
         literal len_s_ge_i = th.m_ax.mk_ge(th.mk_len(s), idx);
         if (m.is_true(is_nullable)) {
@@ -256,7 +258,7 @@ namespace smt {
         expr_ref head = th.mk_nth(s, i);
 
         TRACE("seq_regex", tout << "propagate derivative: " << mk_pp(r, m) << std::endl;);
-        STRACE("seq_regex_brief", tout << " PD";);
+        STRACE("seq_regex_brief", tout << "PD ";);
 
         d = derivative_wrapper(m.mk_var(0, m.get_sort(head)), r);
         // timer tm;
@@ -311,6 +313,9 @@ namespace smt {
 #endif
             }
         }
+
+        STRACE("seq_regex_brief", tout << "cont ";);
+
         if (!is_ground(d)) {
             d = subst(d, sub);
         }
@@ -396,18 +401,18 @@ namespace smt {
     */
     expr_ref seq_regex::derivative_wrapper(expr* hd, expr* r) {
         STRACE("seq_regex", tout << "derivative: " << mk_pp(r, m) << std::endl;);
-        // STRACE("seq_regex_brief", tout << "derivative: " << mk_pp(r, m) << std::endl;);
-        STRACE("seq_regex_brief", tout << " D";);
+        STRACE("seq_regex_brief", tout << "D ";);
         expr_ref result = expr_ref(re().mk_derivative(hd, r), m);
         rewrite(result);
         STRACE("seq_regex", tout << "derivative result: " << mk_pp(result, m) << std::endl;);
+        STRACE("seq_regex_brief", tout << " ";);
         // IF_VERBOSE(10, verbose_stream() << std::endl << "Calculated derivative of: " << expr_ref(r, m) << " was: " << result << std::endl;);
         return result;
     }
 
     void seq_regex::propagate_eq(expr* r1, expr* r2) {
         TRACE("seq_regex", tout << "propagate EQ: " << mk_pp(r1, m) << ", " << mk_pp(r2, m) << std::endl;);
-        STRACE("seq_regex_brief", tout << " PEQ";);
+        STRACE("seq_regex_brief", tout << "PEQ ";);
         expr_ref r = symmetric_diff(r1, r2);
         expr_ref emp(re().mk_empty(m.get_sort(r)), m);
         expr_ref is_empty = sk().mk_is_empty(r, emp);
@@ -416,7 +421,7 @@ namespace smt {
     
     void seq_regex::propagate_ne(expr* r1, expr* r2) {
         TRACE("seq_regex", tout << "propagate NEQ: " << mk_pp(r1, m) << ", " << mk_pp(r2, m) << std::endl;);
-        STRACE("seq_regex_brief", tout << " PNEQ";);
+        STRACE("seq_regex_brief", tout << "PNEQ ";);
         expr_ref r = symmetric_diff(r1, r2);
         expr_ref emp(re().mk_empty(m.get_sort(r)), m);
         expr_ref is_non_empty = sk().mk_is_non_empty(r, emp);
@@ -445,7 +450,7 @@ namespace smt {
         VERIFY(sk().is_is_non_empty(e, r, u));
 
         TRACE("seq_regex", tout << "propagate nonempty: " << mk_pp(e, m) << std::endl;);
-        STRACE("seq_regex_brief", tout << " PNE";);
+        STRACE("seq_regex_brief", tout << "PNE ";);
 
         expr_ref is_nullable = seq_rw().is_nullable(r);
         rewrite(is_nullable);
@@ -504,7 +509,7 @@ namespace smt {
         expr* e = ctx.bool_var2expr(lit.var()), *r = nullptr, *u = nullptr;
         VERIFY(sk().is_is_empty(e, r, u));
         TRACE("seq_regex", tout << "propagate empty: " << mk_pp(e, m) << std::endl;);
-        STRACE("seq_regex_brief", tout << " PE";);
+        STRACE("seq_regex_brief", tout << "PE ";);
 
         expr_ref is_nullable = seq_rw().is_nullable(r);
         rewrite(is_nullable);

From 94d9db3507802c7b5b42e8203c6d542ba86eb6ed Mon Sep 17 00:00:00 2001
From: Caleb Stanford <caleb.pirsquared@gmail.com>
Date: Fri, 19 Jun 2020 12:19:58 -0400
Subject: [PATCH 11/51] add compact tracing for cache hits/misses

---
 src/ast/rewriter/seq_rewriter.cpp | 37 +++++++++++++++++++++++--------
 src/ast/rewriter/seq_rewriter.h   | 17 +++++++++++---
 src/smt/seq_regex.cpp             |  9 +++++---
 3 files changed, 48 insertions(+), 15 deletions(-)

diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp
index f2807fc6ab4..1573782cbb2 100644
--- a/src/ast/rewriter/seq_rewriter.cpp
+++ b/src/ast/rewriter/seq_rewriter.cpp
@@ -2182,7 +2182,7 @@ expr_ref seq_rewriter::re_predicate(expr* cond, sort* seq_sort) {
 }
 
 expr_ref seq_rewriter::is_nullable(expr* r) {
-    STRACE("seq_regex_brief", tout << "n";);
+    // STRACE("seq_regex_brief", tout << "n";);
     expr_ref result(m_op_cache.find(_OP_RE_IS_NULLABLE, r, nullptr), m());
     if (!result) {
         result = is_nullable_rec(r);
@@ -2192,7 +2192,7 @@ expr_ref seq_rewriter::is_nullable(expr* r) {
 }
 
 expr_ref seq_rewriter::is_nullable_rec(expr* r) {
-    STRACE("seq_regex_brief", tout << ".";); // recursive call
+    // STRACE("seq_regex_brief", tout << ".";); // recursive call
     SASSERT(m_util.is_re(r) || m_util.is_seq(r));
     expr* r1 = nullptr, *r2 = nullptr, *cond = nullptr;
     sort* seq_sort = nullptr;
@@ -2367,7 +2367,7 @@ br_status seq_rewriter::mk_re_derivative(expr* ele, expr* r, expr_ref& result) {
         Duplicate nested conditions are eliminated.
 */
 expr_ref seq_rewriter::mk_derivative(expr* ele, expr* r) {
-    STRACE("seq_regex_brief", tout << "d";);
+    // STRACE("seq_regex_brief", tout << "d";);
     expr_ref result(m_op_cache.find(OP_RE_DERIVATIVE, ele, r), m());
     if (!result) {
         result = mk_derivative_rec(ele, r);
@@ -2458,7 +2458,7 @@ bool seq_rewriter::pred_implies(expr* a, expr* b) {
         - result is in BDD form
 */
 expr_ref seq_rewriter::mk_der_op_rec(decl_kind k, expr* a, expr* b) {
-    STRACE("seq_regex_brief", tout << ".";); // recursive call
+    // STRACE("seq_regex_brief", tout << ".";); // recursive call
     expr* ca = nullptr, *a1 = nullptr, *a2 = nullptr;
     expr* cb = nullptr, *b1 = nullptr, *b2 = nullptr;
     expr_ref result(m());
@@ -2568,7 +2568,7 @@ expr_ref seq_rewriter::mk_der_op(decl_kind k, expr* a, expr* b) {
 }
 
 expr_ref seq_rewriter::mk_der_compl(expr* r) {
-    STRACE("seq_regex_brief", tout << ".";); // recursive call
+    // STRACE("seq_regex_brief", tout << ".";); // recursive call
     expr_ref result(m_op_cache.find(OP_RE_COMPLEMENT, r, nullptr), m());
     if (!result) {
         expr* c = nullptr, * r1 = nullptr, * r2 = nullptr;
@@ -2583,7 +2583,7 @@ expr_ref seq_rewriter::mk_der_compl(expr* r) {
 }
 
 expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) {
-    STRACE("seq_regex_brief", tout << ".";); // recursive call
+    // STRACE("seq_regex_brief", tout << ".";); // recursive call
     expr_ref result(m());
     sort* seq_sort = nullptr, *ele_sort = nullptr;
     VERIFY(m_util.is_re(r, seq_sort));
@@ -4175,19 +4175,38 @@ bool seq_rewriter::reduce_subsequence(expr_ref_vector& ls, expr_ref_vector& rs,
 seq_rewriter::op_cache::op_cache(ast_manager& m):
     m(m),
     m_trail(m)
+    #ifdef _TRACE
+    , cache_hits(0), cache_misses(0)
+    #endif
 {}
 
 expr* seq_rewriter::op_cache::find(decl_kind op, expr* a, expr* b) {
     op_entry e(op, a, b, nullptr);
     m_table.find(e, e);
 
-    if (!(e.r)) {
-        STRACE("seq_regex_brief", tout << "!";); // cache miss
-    }
+    #ifdef _TRACE
+    (e.r) ? (cache_hits++) : (cache_misses++) ;
+    #endif
 
     return e.r;
 }
 
+#ifdef _TRACE
+void seq_rewriter::trace_and_reset_cache() {
+    unsigned hits = m_op_cache.cache_hits;
+    unsigned misses = m_op_cache.cache_misses;
+    // Suppress tracing of "0/0 hits" or "1/1 hits"
+    if (hits >= 2 || misses >= 1) {
+        STRACE("seq_regex_brief",
+            tout << "(" << hits << "/" << (hits + misses)
+                 << " hits) ";
+        );
+    }
+    m_op_cache.cache_hits = 0;
+    m_op_cache.cache_misses = 0;
+}
+#endif
+
 void seq_rewriter::op_cache::insert(decl_kind op, expr* a, expr* b, expr* r) {
     cleanup();
     if (a) m_trail.push_back(a);
diff --git a/src/ast/rewriter/seq_rewriter.h b/src/ast/rewriter/seq_rewriter.h
index 6b9d88a3a38..4531778f026 100644
--- a/src/ast/rewriter/seq_rewriter.h
+++ b/src/ast/rewriter/seq_rewriter.h
@@ -142,12 +142,18 @@ class seq_rewriter {
         unsigned        m_max_cache_size { 10000 };
         expr_ref_vector m_trail;
         op_table        m_table;
+
         void cleanup();
 
     public:
         op_cache(ast_manager& m);
         expr* find(decl_kind op, expr* a, expr* b);
         void insert(decl_kind op, expr* a, expr* b, expr* r);
+
+        #ifdef _TRACE
+        unsigned        cache_hits;
+        unsigned        cache_misses;
+        #endif
     };
 
     seq_util       m_util;
@@ -184,7 +190,7 @@ class seq_rewriter {
     expr_ref mk_seq_concat(expr* a, expr* b);    
 
     // Calculate derivative, memoized and enforcing a normal form
-    expr_ref mk_derivative(expr* ele, expr* r);
+    expr_ref is_nullable_rec(expr* r);
     expr_ref mk_derivative_rec(expr* ele, expr* r);
     expr_ref mk_der_op(decl_kind k, expr* a, expr* b);
     expr_ref mk_der_op_rec(decl_kind k, expr* a, expr* b);
@@ -289,7 +295,6 @@ class seq_rewriter {
     class seq_util::str& str() { return u().str; }
     class seq_util::str const& str() const { return u().str; }
 
-    expr_ref is_nullable_rec(expr* r);
     void intersect(unsigned lo, unsigned hi, svector<std::pair<unsigned, unsigned>>& ranges);
 
 public:
@@ -336,13 +341,19 @@ class seq_rewriter {
 
     void add_seqs(expr_ref_vector const& ls, expr_ref_vector const& rs, expr_ref_pair_vector& new_eqs);
 
-    // Check for acceptance of the empty string
+    // Expose derivative and nullability check
     expr_ref is_nullable(expr* r);
+    expr_ref mk_derivative(expr* ele, expr* r);
 
     // heuristic elimination of element from condition that comes form a derivative.
     // special case optimization for conjunctions of equalities, disequalities and ranges.
     void elim_condition(expr* elem, expr_ref& cond);
 
+    #ifdef _TRACE
+    void trace_and_reset_cache();
+    #else
+    static inline void trace_and_reset_cache() {}
+    #endif
 };
 
 #endif
diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp
index 9e0acc3ffcb..de7c3a4698d 100644
--- a/src/smt/seq_regex.cpp
+++ b/src/smt/seq_regex.cpp
@@ -241,7 +241,7 @@ namespace smt {
         expr_ref is_nullable = seq_rw().is_nullable(r);
         rewrite(is_nullable);
 
-        STRACE("seq_regex_brief", tout << " ";);
+        seq_rw().trace_and_reset_cache();
 
         literal len_s_ge_i = th.m_ax.mk_ge(th.mk_len(s), idx);
         if (m.is_true(is_nullable)) {
@@ -423,10 +423,13 @@ namespace smt {
     expr_ref seq_regex::derivative_wrapper(expr* hd, expr* r) {
         STRACE("seq_regex", tout << "derivative: " << mk_pp(r, m) << std::endl;);
         STRACE("seq_regex_brief", tout << "D ";);
-        expr_ref result = expr_ref(re().mk_derivative(hd, r), m);
+
+        expr_ref result = seq_rw().mk_derivative(hd, r);
         rewrite(result);
+
         STRACE("seq_regex", tout << "derivative result: " << mk_pp(result, m) << std::endl;);
-        STRACE("seq_regex_brief", tout << " ";);
+        seq_rw().trace_and_reset_cache();
+
         // IF_VERBOSE(10, verbose_stream() << std::endl << "Calculated derivative of: " << expr_ref(r, m) << " was: " << result << std::endl;);
         return result;
     }

From 7f53dcaea12bb0794974fd7c11736411f9a3f9aa Mon Sep 17 00:00:00 2001
From: Caleb Stanford <caleb.pirsquared@gmail.com>
Date: Sat, 20 Jun 2020 14:15:40 -0400
Subject: [PATCH 12/51] seq_regex fix cache hit/miss tracing and wrapper around
 is_nullable

---
 src/ast/rewriter/seq_rewriter.cpp | 45 ++++++++++++++------------
 src/ast/rewriter/seq_rewriter.h   |  4 +--
 src/smt/seq_regex.cpp             | 54 ++++++++++++++++++++++++-------
 src/smt/seq_regex.h               |  1 +
 4 files changed, 69 insertions(+), 35 deletions(-)

diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp
index 1573782cbb2..79195ab7fa9 100644
--- a/src/ast/rewriter/seq_rewriter.cpp
+++ b/src/ast/rewriter/seq_rewriter.cpp
@@ -4173,12 +4173,7 @@ bool seq_rewriter::reduce_subsequence(expr_ref_vector& ls, expr_ref_vector& rs,
 } 
 
 seq_rewriter::op_cache::op_cache(ast_manager& m):
-    m(m),
-    m_trail(m)
-    #ifdef _TRACE
-    , cache_hits(0), cache_misses(0)
-    #endif
-{}
+    m(m), m_trail(m) {}
 
 expr* seq_rewriter::op_cache::find(decl_kind op, expr* a, expr* b) {
     op_entry e(op, a, b, nullptr);
@@ -4191,12 +4186,35 @@ expr* seq_rewriter::op_cache::find(decl_kind op, expr* a, expr* b) {
     return e.r;
 }
 
+void seq_rewriter::op_cache::insert(decl_kind op, expr* a, expr* b, expr* r) {
+    cleanup();
+    if (a) m_trail.push_back(a);
+    if (b) m_trail.push_back(b);
+    if (r) m_trail.push_back(r);
+    m_table.insert(op_entry(op, a, b, r));
+}
+
+void seq_rewriter::op_cache::cleanup() {
+    if (m_table.size() >= m_max_cache_size) {
+        m_trail.reset();
+        m_table.reset();
+    }
+}
+
 #ifdef _TRACE
+unsigned seq_rewriter::op_cache::cache_hits = 0;
+unsigned seq_rewriter::op_cache::cache_misses = 0;
+
 void seq_rewriter::trace_and_reset_cache() {
     unsigned hits = m_op_cache.cache_hits;
     unsigned misses = m_op_cache.cache_misses;
     // Suppress tracing of "0/0 hits" or "1/1 hits"
     if (hits >= 2 || misses >= 1) {
+        STRACE("seq_regex",
+            tout << "Op cache hits: " << hits
+                 << " (out of " << (hits + misses)
+                 << ")" << std::endl;
+        );
         STRACE("seq_regex_brief",
             tout << "(" << hits << "/" << (hits + misses)
                  << " hits) ";
@@ -4206,18 +4224,3 @@ void seq_rewriter::trace_and_reset_cache() {
     m_op_cache.cache_misses = 0;
 }
 #endif
-
-void seq_rewriter::op_cache::insert(decl_kind op, expr* a, expr* b, expr* r) {
-    cleanup();
-    if (a) m_trail.push_back(a);
-    if (b) m_trail.push_back(b);
-    if (r) m_trail.push_back(r);
-    m_table.insert(op_entry(op, a, b, r));
-}
-
-void seq_rewriter::op_cache::cleanup() {
-    if (m_table.size() >= m_max_cache_size) {
-        m_trail.reset();
-        m_table.reset();
-    }
-}
diff --git a/src/ast/rewriter/seq_rewriter.h b/src/ast/rewriter/seq_rewriter.h
index 4531778f026..19c4590b48b 100644
--- a/src/ast/rewriter/seq_rewriter.h
+++ b/src/ast/rewriter/seq_rewriter.h
@@ -151,8 +151,8 @@ class seq_rewriter {
         void insert(decl_kind op, expr* a, expr* b, expr* r);
 
         #ifdef _TRACE
-        unsigned        cache_hits;
-        unsigned        cache_misses;
+        static unsigned cache_hits;
+        static unsigned cache_misses;
         #endif
     };
 
diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp
index de7c3a4698d..d600881f219 100644
--- a/src/smt/seq_regex.cpp
+++ b/src/smt/seq_regex.cpp
@@ -238,10 +238,7 @@ namespace smt {
         TRACE("seq_regex", tout << "propagate nullable: " << mk_pp(r, m) << std::endl;);
         STRACE("seq_regex_brief", tout << "PN ";);
 
-        expr_ref is_nullable = seq_rw().is_nullable(r);
-        rewrite(is_nullable);
-
-        seq_rw().trace_and_reset_cache();
+        expr_ref is_nullable = is_nullable_wrapper(r);
 
         literal len_s_ge_i = th.m_ax.mk_ge(th.mk_len(s), idx);
         if (m.is_true(is_nullable)) {
@@ -416,21 +413,56 @@ namespace smt {
     }
 
     /*
-        Wrapper around the regex symbolic derivative from the rewriter.
+        Wrapper around calls to is_nullable from the seq rewriter.
+    */
+    expr_ref seq_regex::is_nullable_wrapper(expr* r) {
+        STRACE("seq_regex", tout << "nullable: " << mk_pp(r, m) << std::endl;);
+        STRACE("seq_regex_brief", tout << "n ";);
+
+        expr_ref result = seq_rw().is_nullable(r);
+        rewrite(result);
+
+        STRACE("seq_regex", tout << "nullable result: " << mk_pp(result, m) << std::endl;);
+        seq_rw().trace_and_reset_cache();
+
+        return result;
+    }
+
+    /*
+        Wrapper around the regex symbolic derivative from the seq rewriter.
         Ensures that the derivative is written in a normalized BDD form
         with optimizations for if-then-else expressions involving the head.
     */
     expr_ref seq_regex::derivative_wrapper(expr* hd, expr* r) {
         STRACE("seq_regex", tout << "derivative: " << mk_pp(r, m) << std::endl;);
-        STRACE("seq_regex_brief", tout << "D ";);
+        STRACE("seq_regex_brief", tout << "d ";);
 
-        expr_ref result = seq_rw().mk_derivative(hd, r);
+        expr_ref result = expr_ref(re().mk_derivative(hd, r), m);
         rewrite(result);
 
         STRACE("seq_regex", tout << "derivative result: " << mk_pp(result, m) << std::endl;);
         seq_rw().trace_and_reset_cache();
 
-        // IF_VERBOSE(10, verbose_stream() << std::endl << "Calculated derivative of: " << expr_ref(r, m) << " was: " << result << std::endl;);
+        /*  If the following lines are enabled instead, we use the
+            same rewriter for the nullable and derivative calls.
+            However, it currently seems to cause a performance
+            bug as a side effect.
+
+            The two seq rewriters used are at:
+                m_seq_rewrite
+                    (returned by seq_rw())
+                th.m_rewrite.m_imp->m_cfg.m_seq_rw
+                    (private, can't be accessed directly)
+
+            TODO: experiment with making them the same and see
+            if it results in significant speedup (due to fewer
+            cache misses).
+           */
+        // expr_ref result = seq_rw().mk_derivative(hd, r);
+        // rewrite(result)
+        // STRACE("seq_regex", tout << "derivative result: " << mk_pp(result, m) << std::endl;);
+        // seq_rw().trace_and_reset_cache();
+
         return result;
     }
 
@@ -476,8 +508,7 @@ namespace smt {
         TRACE("seq_regex", tout << "propagate nonempty: " << mk_pp(e, m) << std::endl;);
         STRACE("seq_regex_brief", tout << "PNE ";);
 
-        expr_ref is_nullable = seq_rw().is_nullable(r);
-        rewrite(is_nullable);
+        expr_ref is_nullable = is_nullable_wrapper(r);
         if (m.is_true(is_nullable))
             return;
         literal null_lit = th.mk_literal(is_nullable);
@@ -535,8 +566,7 @@ namespace smt {
         TRACE("seq_regex", tout << "propagate empty: " << mk_pp(e, m) << std::endl;);
         STRACE("seq_regex_brief", tout << "PE ";);
 
-        expr_ref is_nullable = seq_rw().is_nullable(r);
-        rewrite(is_nullable);
+        expr_ref is_nullable = is_nullable_wrapper(r);
         if (m.is_true(is_nullable)) {
             th.add_axiom(~lit);
             return;
diff --git a/src/smt/seq_regex.h b/src/smt/seq_regex.h
index f339d36c929..38720c5db17 100644
--- a/src/smt/seq_regex.h
+++ b/src/smt/seq_regex.h
@@ -79,6 +79,7 @@ namespace smt {
 
         expr_ref symmetric_diff(expr* r1, expr* r2);
 
+        expr_ref is_nullable_wrapper(expr* r);
         expr_ref derivative_wrapper(expr* hd, expr* r);
 
         void get_cofactors(expr* r, expr_ref_vector& conds, expr_ref_pair_vector& result);

From ed804dc224da81d2a4d397e681371f4e81a34c9c Mon Sep 17 00:00:00 2001
From: Caleb Stanford <caleb.pirsquared@gmail.com>
Date: Wed, 24 Jun 2020 19:21:39 -0400
Subject: [PATCH 13/51] minor

---
 src/ast/rewriter/seq_rewriter.cpp |  7 ++++-
 src/ast/rewriter/seq_rewriter.h   |  4 +--
 src/smt/seq_regex.cpp             | 45 +++++++++++++++++++++++++------
 3 files changed, 45 insertions(+), 11 deletions(-)

diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp
index 79195ab7fa9..37e96b1d31b 100644
--- a/src/ast/rewriter/seq_rewriter.cpp
+++ b/src/ast/rewriter/seq_rewriter.cpp
@@ -2487,6 +2487,7 @@ expr_ref seq_rewriter::mk_der_op_rec(decl_kind k, expr* a, expr* b) {
                 std::swap(a1, b1);
                 std::swap(a2, b2);
             }
+            // @EXP (experimental change)
             // Simplify if there is a relationship between ca and cb
             if (pred_implies(ca, cb)) {
                 r1 = mk_der_op(k, a1, b1);
@@ -2655,6 +2656,8 @@ expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) {
         expr_ref hd(m()), tl(m());
         if (get_head_tail(r1, hd, tl)) {
             // head must be equal; if so, derivative is tail
+            // return re_and(m_br.mk_eq_rw(ele, hd), re().mk_to_re(tl));
+            // @EXP (experimental change)
             // Write 'head is equal' as a range constraint:
             // (ele <= hd) and (hd <= ele)
             return mk_der_inter(
@@ -2683,6 +2686,8 @@ expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) {
         // This is analagous to the previous is_to_re case.
         expr_ref hd(m()), tl(m());
         if (get_head_tail_reversed(r2, hd, tl)) {
+            // return re_and(m_br.mk_eq_rw(ele, tl), re().mk_reverse(re().mk_to_re(hd)));
+            // @EXP (experimental change)
             // Write 'tail is equal' as a range constraint:
             // (ele <= tl) and (tl <= ele)
             return mk_der_inter(
@@ -4205,7 +4210,7 @@ void seq_rewriter::op_cache::cleanup() {
 unsigned seq_rewriter::op_cache::cache_hits = 0;
 unsigned seq_rewriter::op_cache::cache_misses = 0;
 
-void seq_rewriter::trace_and_reset_cache() {
+void seq_rewriter::trace_and_reset_cache_counts() {
     unsigned hits = m_op_cache.cache_hits;
     unsigned misses = m_op_cache.cache_misses;
     // Suppress tracing of "0/0 hits" or "1/1 hits"
diff --git a/src/ast/rewriter/seq_rewriter.h b/src/ast/rewriter/seq_rewriter.h
index 19c4590b48b..1ac8d0157cd 100644
--- a/src/ast/rewriter/seq_rewriter.h
+++ b/src/ast/rewriter/seq_rewriter.h
@@ -350,9 +350,9 @@ class seq_rewriter {
     void elim_condition(expr* elem, expr_ref& cond);
 
     #ifdef _TRACE
-    void trace_and_reset_cache();
+    void trace_and_reset_cache_counts();
     #else
-    static inline void trace_and_reset_cache() {}
+    static inline void trace_and_reset_cache_counts() {}
     #endif
 };
 
diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp
index 50b74c20db3..c882314edc7 100644
--- a/src/smt/seq_regex.cpp
+++ b/src/smt/seq_regex.cpp
@@ -424,7 +424,7 @@ namespace smt {
         rewrite(result);
 
         STRACE("seq_regex", tout << "nullable result: " << mk_pp(result, m) << std::endl;);
-        seq_rw().trace_and_reset_cache();
+        seq_rw().trace_and_reset_cache_counts();
 
         return result;
     }
@@ -435,14 +435,21 @@ namespace smt {
         with optimizations for if-then-else expressions involving the head.
     */
     expr_ref seq_regex::derivative_wrapper(expr* hd, expr* r) {
-        STRACE("seq_regex", tout << "derivative: " << mk_pp(r, m) << std::endl;);
-        STRACE("seq_regex_brief", tout << "d ";);
+        STRACE("seq_regex", tout << "derivative(" << mk_pp(hd, m) << "): " << mk_pp(r, m) << std::endl;);
 
         expr_ref result = expr_ref(re().mk_derivative(hd, r), m);
         rewrite(result);
 
         STRACE("seq_regex", tout << "derivative result: " << mk_pp(result, m) << std::endl;);
-        seq_rw().trace_and_reset_cache();
+        STRACE("seq_regex_brief",
+            tout << "d("
+                 << mk_pp(hd, m)
+                 << ","
+                 << r->get_id()
+                 << "->"
+                 << result->get_id()
+                 << ") ";);
+        seq_rw().trace_and_reset_cache_counts();
 
         /*  If the following lines are enabled instead, we use the
             same rewriter for the nullable and derivative calls.
@@ -462,7 +469,7 @@ namespace smt {
         // expr_ref result = seq_rw().mk_derivative(hd, r);
         // rewrite(result)
         // STRACE("seq_regex", tout << "derivative result: " << mk_pp(result, m) << std::endl;);
-        // seq_rw().trace_and_reset_cache();
+        // seq_rw().trace_and_reset_cache_counts();
 
         return result;
     }
@@ -507,7 +514,11 @@ namespace smt {
         VERIFY(sk().is_is_non_empty(e, r, u));
 
         TRACE("seq_regex", tout << "propagate nonempty: " << mk_pp(e, m) << std::endl;);
-        STRACE("seq_regex_brief", tout << "PNE ";);
+        STRACE("seq_regex_brief",
+            tout << std::endl << "PNE(" << e->get_id()
+                              << "," << r->get_id()
+                              << "," << u->get_id()
+                              << ") ";);
 
         expr_ref is_nullable = is_nullable_wrapper(r);
         if (m.is_true(is_nullable))
@@ -515,7 +526,16 @@ namespace smt {
         literal null_lit = th.mk_literal(is_nullable);
         expr_ref hd = mk_first(r);
         expr_ref d(m);
-        d = derivative_wrapper(hd, r);
+        d = derivative_wrapper(m.mk_var(0, m.get_sort(hd)), r);
+
+        var_subst subst(m);
+        expr_ref_vector sub(m);
+        sub.push_back(hd);
+        d = subst(d, sub);
+
+        STRACE("seq_regex_brief", tout << "(d subbed: " << d->get_id() << ") ";);
+        TRACE("seq_regex", tout << "d subbed: " << mk_pp(d, m) << std::endl;);
+
         literal_vector lits;
         lits.push_back(~lit);
         if (null_lit != false_literal) 
@@ -536,6 +556,11 @@ namespace smt {
                 next_non_empty = m.mk_and(cond, next_non_empty);
             lits.push_back(th.mk_literal(next_non_empty));
         }
+
+        TRACE("seq_regex", tout << "solved lits: " << mk_pp(lits) << std::endl;);
+        // STRACE("seq_regex_brief", tout << "(d solved: " << d->get_id() << ") ";);
+        // mk_pp asdfasdfasdfasdfasdfasdfadsfasdfasdf literal_vector
+
         th.add_axiom(lits);
     }
 
@@ -565,7 +590,11 @@ namespace smt {
         expr* e = ctx.bool_var2expr(lit.var()), *r = nullptr, *u = nullptr;
         VERIFY(sk().is_is_empty(e, r, u));
         TRACE("seq_regex", tout << "propagate empty: " << mk_pp(e, m) << std::endl;);
-        STRACE("seq_regex_brief", tout << "PE ";);
+        STRACE("seq_regex_brief",
+            tout << std::endl << "PE(" << e->get_id()
+                              << "," << r->get_id()
+                              << "," << u->get_id()
+                              << ") ";);
 
         expr_ref is_nullable = is_nullable_wrapper(r);
         if (m.is_true(is_nullable)) {

From a7df4e572fc8cf27a427042d6607db639893ca63 Mon Sep 17 00:00:00 2001
From: Caleb Stanford <caleb.pirsquared@gmail.com>
Date: Thu, 25 Jun 2020 13:23:08 -0400
Subject: [PATCH 14/51] label and disable more experimental changes for testing

---
 src/ast/rewriter/seq_rewriter.cpp | 49 +++++++++++++++++--------------
 src/smt/seq_regex.cpp             |  2 +-
 2 files changed, 28 insertions(+), 23 deletions(-)

diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp
index 561767a5b91..c7f4c4d494a 100644
--- a/src/ast/rewriter/seq_rewriter.cpp
+++ b/src/ast/rewriter/seq_rewriter.cpp
@@ -2489,18 +2489,18 @@ expr_ref seq_rewriter::mk_der_op_rec(decl_kind k, expr* a, expr* b) {
             }
             // @EXP (experimental change)
             // Simplify if there is a relationship between ca and cb
-            if (pred_implies(ca, cb)) {
-                r1 = mk_der_op(k, a1, b1);
-            }
-            else if (pred_implies(ca, notcb)) {
-                r1 = mk_der_op(k, a1, b2);
-            }
-            if (pred_implies(notca, cb)) {
-                r2 = mk_der_op(k, a2, b1);
-            }
-            else if (pred_implies(notca, notcb)) {
-                r2 = mk_der_op(k, a2, b2);
-            }
+            // if (pred_implies(ca, cb)) {
+            //     r1 = mk_der_op(k, a1, b1);
+            // }
+            // else if (pred_implies(ca, notcb)) {
+            //     r1 = mk_der_op(k, a1, b2);
+            // }
+            // if (pred_implies(notca, cb)) {
+            //     r2 = mk_der_op(k, a2, b1);
+            // }
+            // else if (pred_implies(notca, notcb)) {
+            //     r2 = mk_der_op(k, a2, b2);
+            // }
             // --- End core logic
         }
         if (!r1) r1 = mk_der_op(k, a1, b);
@@ -2656,14 +2656,14 @@ expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) {
         expr_ref hd(m()), tl(m());
         if (get_head_tail(r1, hd, tl)) {
             // head must be equal; if so, derivative is tail
-            // return re_and(m_br.mk_eq_rw(ele, hd), re().mk_to_re(tl));
+            return re_and(m_br.mk_eq_rw(ele, hd), re().mk_to_re(tl));
             // @EXP (experimental change)
             // Write 'head is equal' as a range constraint:
             // (ele <= hd) and (hd <= ele)
-            return mk_der_inter(
-                re_and(m_util.mk_le(ele, hd), re().mk_to_re(tl)),
-                re_and(m_util.mk_le(hd, ele), re().mk_to_re(tl))
-            );
+            // return mk_der_inter(
+            //     re_and(m_util.mk_le(ele, hd), re().mk_to_re(tl)),
+            //     re_and(m_util.mk_le(hd, ele), re().mk_to_re(tl))
+            // );
         }
         else if (str().is_empty(r1)) {
             return mk_empty();
@@ -2686,14 +2686,14 @@ expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) {
         // This is analagous to the previous is_to_re case.
         expr_ref hd(m()), tl(m());
         if (get_head_tail_reversed(r2, hd, tl)) {
-            // return re_and(m_br.mk_eq_rw(ele, tl), re().mk_reverse(re().mk_to_re(hd)));
+            return re_and(m_br.mk_eq_rw(ele, tl), re().mk_reverse(re().mk_to_re(hd)));
             // @EXP (experimental change)
             // Write 'tail is equal' as a range constraint:
             // (ele <= tl) and (tl <= ele)
-            return mk_der_inter(
-                re_and(m_util.mk_le(ele, tl), re().mk_reverse(re().mk_to_re(hd))),
-                re_and(m_util.mk_le(tl, ele), re().mk_reverse(re().mk_to_re(hd)))
-            );
+            // return mk_der_inter(
+            //     re_and(m_util.mk_le(ele, tl), re().mk_reverse(re().mk_to_re(hd))),
+            //     re_and(m_util.mk_le(tl, ele), re().mk_reverse(re().mk_to_re(hd)))
+            // );
         }
         else if (str().is_empty(r2)) {
             return mk_empty();
@@ -2706,6 +2706,11 @@ expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) {
             if (s1.length() == 1 && s2.length() == 1) {
                 expr_ref ch1(m_util.mk_char(s1[0]), m());
                 expr_ref ch2(m_util.mk_char(s2[0]), m());
+                // @EXP (experimental change)
+                // expr_ref p1(m_util.mk_le(ch1, ele), m());
+                // expr_ref p2(m_util.mk_le(ele, ch2), m());
+                // expr_ref conj(m().mk_and(p1, p2), m());
+                // return re_predicate(conj, seq_sort);
                 return mk_der_inter(re_predicate(m_util.mk_le(ch1, ele), seq_sort),
                                     re_predicate(m_util.mk_le(ele, ch2), seq_sort));
             }
diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp
index cc5217b277a..9e1b2ebd3c3 100644
--- a/src/smt/seq_regex.cpp
+++ b/src/smt/seq_regex.cpp
@@ -364,6 +364,7 @@ namespace smt {
      * within the same Regex.
      */
     bool seq_regex::coallesce_in_re(literal lit) {
+        // @EXP (experimental change)
         return false;
         expr* s = nullptr, *r = nullptr;
         expr* e = ctx.bool_var2expr(lit.var());
@@ -633,7 +634,6 @@ namespace smt {
         sort* elem_sort = nullptr, *seq_sort = nullptr;
         VERIFY(u().is_re(r, seq_sort));
         VERIFY(u().is_seq(seq_sort, elem_sort));
-        sort* domain[2] = { m.get_sort(n), a().mk_int() };
         return sk().mk("re.first", n, a().mk_int(r->get_id()), elem_sort);
     }
 }

From e074fb3544cb99e804010442065938b40d65b933 Mon Sep 17 00:00:00 2001
From: Caleb Stanford <caleb.pirsquared@gmail.com>
Date: Thu, 25 Jun 2020 13:59:44 -0400
Subject: [PATCH 15/51] minor documentation / tracing

---
 src/smt/seq_regex.cpp | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp
index 9e1b2ebd3c3..e8fa6e38fd2 100644
--- a/src/smt/seq_regex.cpp
+++ b/src/smt/seq_regex.cpp
@@ -103,14 +103,14 @@ namespace smt {
     }
 
     /**
-     * Propagate the atom (str.in.re s r)
+     * Propagate the atom (str.in_re s r)
      * 
      * Propagation implements the following inference rules
      * 
-     * (not (str.in.re s r)) => (str.in.re s (complement r))
-     * (str.in.re s r) => r != {}
+     * (not (str.in_re s r)) => (str.in_re s (complement r))
+     * (str.in_re s r) => r != {}
      * 
-     * (str.in.re s r) => (accept s 0 r)
+     * (str.in_re s r) => (accept s 0 r)
      */
 
     void seq_regex::propagate_in_re(literal lit) {
@@ -119,7 +119,12 @@ namespace smt {
         VERIFY(str().is_in_re(e, s, r));
 
         TRACE("seq_regex", tout << "propagate in RE: " << lit.sign() << " " << mk_pp(e, m) << std::endl;);
-        STRACE("seq_regex_brief", tout << "PIR ";);
+        STRACE("seq_regex_brief",
+            tout << "PIR("
+                 << s->get_id()
+                 << ","
+                 << r->get_id()
+                 << ") ";);
 
         // convert negative negative membership literals to positive
         // ~(s in R) => s in C(R)
@@ -419,12 +424,17 @@ namespace smt {
     */
     expr_ref seq_regex::is_nullable_wrapper(expr* r) {
         STRACE("seq_regex", tout << "nullable: " << mk_pp(r, m) << std::endl;);
-        STRACE("seq_regex_brief", tout << "n ";);
 
         expr_ref result = seq_rw().is_nullable(r);
         rewrite(result);
 
         STRACE("seq_regex", tout << "nullable result: " << mk_pp(result, m) << std::endl;);
+        STRACE("seq_regex_brief",
+            tout << "n("
+                 << r->get_id()
+                 << "->"
+                 << result->get_id()
+                 << ") ";);
         seq_rw().trace_and_reset_cache_counts();
 
         return result;

From 4e2ba58f6064077a1a6e38c3928b0c50640e5abc Mon Sep 17 00:00:00 2001
From: Caleb Stanford <caleb.pirsquared@gmail.com>
Date: Thu, 25 Jun 2020 15:57:09 -0400
Subject: [PATCH 16/51] a few more @EXP annotations

---
 src/ast/rewriter/seq_rewriter.cpp | 3 ++-
 src/smt/seq_regex.cpp             | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp
index c7f4c4d494a..253fb0ca050 100644
--- a/src/ast/rewriter/seq_rewriter.cpp
+++ b/src/ast/rewriter/seq_rewriter.cpp
@@ -2465,7 +2465,8 @@ expr_ref seq_rewriter::mk_der_op_rec(decl_kind k, expr* a, expr* b) {
     auto mk_ite = [&](expr* c, expr* a, expr* b) {
         return (a == b) ? a : m().mk_ite(c, a, b);
     };
-    // TODO
+    // @EXP (experimental change)
+    // Use same ID for related predicates to improve simplifications
     // auto get_id = [&](expr* e) { re().is_complement(e, e); return e->get_id(); };
     if (m().is_ite(a, ca, a1, a2)) {
         expr_ref r1(m()), r2(m());
diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp
index e8fa6e38fd2..3cf4adab64a 100644
--- a/src/smt/seq_regex.cpp
+++ b/src/smt/seq_regex.cpp
@@ -251,6 +251,7 @@ namespace smt {
         }
         else if (m.is_false(is_nullable)) {
             th.propagate_lit(nullptr, 1, &lit, th.m_ax.mk_ge(th.mk_len(s), idx + 1));
+            // @EXP (experimental change)
             //unsigned len = std::max(1u, re().min_length(r));
             //th.propagate_lit(nullptr, 1, &lit, th.m_ax.mk_ge(th.mk_len(s), idx + re().min_length(r)));
         }

From f610d3a0b9c8567d8c0e5c6e5d8b0bccdce1009e Mon Sep 17 00:00:00 2001
From: Caleb Stanford <caleb.pirsquared@gmail.com>
Date: Thu, 25 Jun 2020 18:41:29 -0400
Subject: [PATCH 17/51] dead state elimination skeleton code

---
 src/smt/seq_regex.h | 75 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 75 insertions(+)

diff --git a/src/smt/seq_regex.h b/src/smt/seq_regex.h
index 826727347a0..1b3eada1fc7 100644
--- a/src/smt/seq_regex.h
+++ b/src/smt/seq_regex.h
@@ -17,6 +17,8 @@ Module Name:
 #pragma once
 
 #include "util/scoped_vector.h"
+#include "util/obj_ref_hashtable.h"
+#include "util/union_find.h"
 #include "ast/seq_decl_plugin.h"
 #include "ast/rewriter/seq_rewriter.h"
 #include "smt/smt_context.h"
@@ -27,6 +29,79 @@ namespace smt {
     class theory_seq;
 
     class seq_regex {
+        /*
+            Info saved about the set of states (regexes) seen so far
+        */
+        class seen_states {
+            typedef expr state;
+            typedef obj_ref_map<ast_manager, state, bool> state_set;
+            typedef obj_ref_map<ast_manager, state, state_set> edge_rel;
+            typedef basic_union_find state_union_find;
+
+        private:
+            /*
+                All seen states are exactly one of:
+                - alive:      known to be nonempty
+                - dead:       known to be empty
+                - unknown:    all outgoing transitions have been
+                              seen, but the state is not known
+                              to be alive or dead
+                - unvisited:  not all outgoing transitions have
+                              been seen
+            */
+            state_set         m_seen;
+            state_set         m_alive;
+            state_set         m_dead;
+            state_set         m_unknown;
+            state_set         m_unvisited;
+
+            void mark_unknown(state s); // unvisited -> unknown
+            void mark_alive(state s);   // unknown -> alive
+            void mark_dead(state s);    // unknown -> dead
+
+            /*
+                A graph of strongly connected
+                components is kept on unknown states
+            */
+            state_union_find  m_cnctd_cmpnts;
+            edge_rel          m_from;
+            edge_rel          m_to;
+
+            void merge_states(state_set s);
+
+            /*
+                Caching details
+            */
+            unsigned          m_max_cache_size { 10000 };
+            expr_ref_vector   m_trail;
+
+            /*
+                Core cycle-detection routine
+            */
+            // Heuristic
+            bool can_be_in_cycle(state s1, state s2);
+            // Full check
+            void find_cycle(state s1, state s2);
+
+        public:
+            /*
+                Exposed methods:
+                    - adding a state
+                    - adding a transition from a state
+                    - marking a state as visited (no more transitions)
+                    - checking if a state is known to be alive or dead
+            */
+            void add_state(state s);
+            void add_transition(state s1, state s2);
+
+            bool is_alive(state s);
+            bool is_dead(state s);
+        };
+
+        /*
+            Struct representing data about a constraint of
+            the form (str.in_re s R)
+        */
         struct s_in_re {
             literal m_lit;
             expr*   m_s;

From 4e5873eaf612b3fdc59e22872b965306f4a44bbf Mon Sep 17 00:00:00 2001
From: Caleb Stanford <caleb.pirsquared@gmail.com>
Date: Fri, 26 Jun 2020 22:25:00 -0400
Subject: [PATCH 18/51] progress on dead state elimination

---
 src/smt/seq_regex.cpp | 158 ++++++++++++++++++++++++++++++++++++++++++
 src/smt/seq_regex.h   | 106 ++++++++++++++++++----------
 2 files changed, 229 insertions(+), 35 deletions(-)

diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp
index 3cf4adab64a..ce9a7b5d90d 100644
--- a/src/smt/seq_regex.cpp
+++ b/src/smt/seq_regex.cpp
@@ -647,4 +647,162 @@ namespace smt {
         VERIFY(u().is_seq(seq_sort, elem_sort));
         return sk().mk("re.first", n, a().mk_int(r->get_id()), elem_sort);
     }
+
+    /****************************************************
+     *** Dead state elimination and seen_states class ***
+     ****************************************************/
+
+    seq_regex::seen_states::state seq_regex::seen_states::get_state(expr* e) {
+        return m_state_ufind.find(e->get_id());
+    }
+
+    void seq_regex::seen_states::mark_unknown(state s) {
+        SASSERT(m_unvisited.contains(s));
+        m_unvisited.remove(s);
+        m_unknown.insert(s);
+    }
+    void seq_regex::seen_states::mark_alive(state s) {
+        SASSERT(m_unknown.contains(s));
+        m_unknown.remove(s);
+        m_alive.insert(s);
+    }
+    void seq_regex::seen_states::mark_dead(state s) {
+        SASSERT(m_unknown.contains(s));
+        m_unknown.remove(s);
+        m_dead.insert(s);
+    }
+
+    bool seq_regex::seen_states::is_resolved(state s) {
+        return (m_alive.contains(s) || m_dead.contains(s));
+    }
+    bool seq_regex::seen_states::is_unresolved(state s) {
+        return (m_unknown.contains(s) || m_unvisited.contains(s));
+    }
+
+    /*
+        Merge two states or more generally a set of states into one,
+        returning the new state.
+
+        Preconditions: the set should be nonempty, and every state
+        in the set should be unresolved. Also, each state should
+        be current (not a previous SCC that was later merged into another).
+
+        Removes the old state from m_unknown or m_univisited,
+        but leaves it in m_seen.
+    */
+    seq_regex::seen_states::state
+            seq_regex::seen_states::merge_states(state s1, state s2) {
+        SASSERT(is_unresolved(s1));
+        SASSERT(is_unresolved(s2));
+        SASSERT(m_state_ufind.is_root(s1));
+        SASSERT(m_state_ufind.is_root(s2));
+        m_state_ufind.merge(s1, s2);
+        if (m_state_ufind.is_root(s1)) std::swap(s1, s2);
+        // Remove old state s2
+        if (m_unknown.contains(s2)) {
+            m_unknown.remove(s2);
+        } else {
+            m_unvisited.remove(s2);
+        }
+        return s1;
+    }
+    seq_regex::seen_states::state
+            seq_regex::seen_states::merge_states(state_set& s_set) {
+        SASSERT(s_set.num_elems() > 0);
+        state prev_s;
+        bool first_iter = true;
+        for (auto const& s: s_set) {
+            if (first_iter) {
+                prev_s = s;
+                first_iter = false;
+            } else {
+                prev_s = merge_states(prev_s, s);
+            }
+        }
+        return prev_s;
+    }
+
+    bool seq_regex::seen_states::can_be_in_cycle(expr *e1, expr *e2) {
+        // Simple placeholder. TODO: Implement full check
+        return true;
+    }
+    void seq_regex::seen_states::find_and_merge_cycles(state s1, state s2) {
+        // Search backwards from s1 to see if (s1, s2) creates a cycle.
+        if (s1 == s2) return;
+        // TODO: Implement full check
+        // Simple placeholder for now: check if this is a loop or if there
+        // is an edge both ways
+        if (m_to.find(s2)->contains(s1)) {
+            merge_states(s1, s2);
+        }
+    }
+
+    void seq_regex::seen_states::add_state(expr* e) {
+        unsigned id = e->get_id();
+        if (m_seen.contains(id)) return;
+        if (m_seen.num_elems() >= m_max_size) {
+            STRACE("seq_regex", tout << "Warning: max size of seen states reached!" << std::endl;);
+            STRACE("seq_regex_brief", tout << "(MAX SIZE REACHED) ";);
+            return;
+        }
+        // Save e as expr_ref so it's not deleted
+        m_trail.push_back(e);
+        // Ensure corresponding var in connected components
+        while (id >= m_state_ufind.get_num_vars()) {
+            m_state_ufind.mk_var();
+        }
+        // Initialize as unvisited
+        m_seen.insert(id);
+        m_unvisited.insert(id);
+        m_to.insert(id, new state_set());
+        m_from_cycle.insert(id, new state_set());
+        m_from_nocycle.insert(id, new state_set());
+    }
+    void seq_regex::seen_states::add_transition(expr* e1, expr* e2) {
+        // Precondition: e1 and e2 already correspond to existing states
+        SASSERT(m_seen.contains(e1->get_id()));
+        SASSERT(m_seen.contains(e2->get_id()));
+        state s1 = get_state(e1);
+        state s2 = get_state(e2);
+        if (s1 == s2) {
+            return;
+        }
+        // TODO:
+        // If e1 is dead, assert e1 is marked dead
+        // If e1 is live, add edge and return
+        // If e2 is live, mark e1 live, propagate backwards
+        else if (!can_be_in_cycle(e1, e2)) {
+            // Don't need to check for cycles here
+            if (m_from_nocycle.find(s2)->contains(s1)) {
+                return;
+            }
+            else if (m_from_cycle.find(s2)->contains(s2)) {
+                // update edge label
+                m_from_cycle.find(s2)->remove(s2);
+                m_from_nocycle.find(s2)->insert(s1);
+            }
+            else {
+                // add edge
+                m_to.find(s1)->insert(s2);
+                m_from_nocycle.find(s2)->insert(s1);
+            }
+        }
+        else if (m_to.find(s1)->contains(s2)) {
+            return;
+        }
+        else {
+            // Need to check for cycles here
+            m_to.find(s1)->insert(s2);
+            m_from_cycle.find(s2)->insert(s1);
+            find_and_merge_cycles(s1, s2);
+        }
+    }
+
+    bool seq_regex::seen_states::is_alive(expr* e) {
+        return m_alive.contains(get_state(e));
+    }
+    bool seq_regex::seen_states::is_dead(expr* e) {
+        return m_dead.contains(get_state(e));
+    }
+
 }
diff --git a/src/smt/seq_regex.h b/src/smt/seq_regex.h
index 1b3eada1fc7..05e4ee5ca9e 100644
--- a/src/smt/seq_regex.h
+++ b/src/smt/seq_regex.h
@@ -17,7 +17,8 @@ Module Name:
 #pragma once
 
 #include "util/scoped_vector.h"
-#include "util/obj_ref_hashtable.h"
+#include "util/uint_set.h"
+#include "util/uint_map.h"
 #include "util/union_find.h"
 #include "ast/seq_decl_plugin.h"
 #include "ast/rewriter/seq_rewriter.h"
@@ -30,17 +31,29 @@ namespace smt {
 
     class seq_regex {
         /*
-            Info saved about the set of states (regexes) seen so far
+            seen_states
+
+            Info saved about the set of states (regexes) seen so far.
+
+            "States" here are strongly connected components -- states that
+            are mutually reachable from each other. States
+            are represented as unsigned integers.
+
+            Used for the core incremental dead state elimination algorithm.
+
+            Class invariants:
+                - TODO
         */
         class seen_states {
-            typedef expr state;
-            typedef obj_ref_map<ast_manager, state, bool> state_set;
-            typedef obj_ref_map<ast_manager, state, state_set> edge_rel;
-            typedef basic_union_find state_union_find;
+            typedef unsigned              state;
+            typedef uint_set              state_set;
+            typedef uint_map<state_set>   edge_rel;
+            typedef basic_union_find      state_ufind;
+            // typedef uint_map<expr_ref_vector>  exprs_of_state;
 
         private:
             /*
-                All seen states are exactly one of:
+                All states are exactly one of:
                 - alive:      known to be nonempty
                 - dead:       known to be empty
                 - unknown:    all outgoing transitions have been
@@ -48,59 +61,77 @@ namespace smt {
                               to be alive or dead
                 - unvisited:  not all outgoing transitions have
                               been seen
+
+                The set m_seen keeps all of these and in addition,
+                seen states that have been merged and no longer reprsent
+                a current SCC.
             */
-            state_set         m_seen;
-            state_set         m_alive;
-            state_set         m_dead;
-            state_set         m_unknown;
-            state_set         m_unvisited;
+            state_set   m_seen;
+            state_set   m_alive;
+            state_set   m_dead;
+            state_set   m_unknown;
+            state_set   m_unvisited;
 
             void mark_unknown(state s); // unvisited -> unknown
             void mark_alive(state s);   // unknown -> alive
             void mark_dead(state s);    // unknown -> dead
 
+            bool is_resolved(state s);   // alive or dead
+            bool is_unresolved(state s); // unknown or unvisited
+
             /*
-                A graph of strongly connected
-                components is kept on unknown states
+                Initially a state is represented by an expression ID.
+                A union find data structure collapses an ID to a state.
+
+                Edges are saved in both from and to maps.
+                Additionally edges from are divided into those possibly
+                in a cycle, and those not in a cycle.
             */
-            state_union_find  m_cnctd_cmpnts;
-            edge_rel          m_from;
-            edge_rel          m_to;
+            state_ufind   m_state_ufind;
+
+            state get_state(expr* e);
+            state merge_states(state s1, state s2);
+            state merge_states(state_set& s_set);
 
-            void merge_states(state_set s);
+            edge_rel      m_from_cycle;
+            edge_rel      m_from_nocycle;
+            edge_rel      m_to;
 
             /*
                 Caching details
             */
-            unsigned          m_max_cache_size { 10000 };
+            unsigned          m_max_size { 10000 };
             expr_ref_vector   m_trail;
 
             /*
                 Core cycle-detection routine
             */
-            // Heuristic
-            bool can_be_in_cycle(state s1, state s2);
-            // Full check
-            void find_cycle(state s1, state s2);
+            // Heuristic on syntactic expressions
+            bool can_be_in_cycle(expr* e1, expr* e2);
+            // Full check: if new edge (s1, s2) will create at least one cycle,
+            // merge all states in the new SCC
+            void find_and_merge_cycles(state s1, state s2);
 
         public:
             /*
-                Exposed methods:
-                    - adding a state
-                    - adding a transition from a state
-                    - marking a state as visited (no more transitions)
-                    - checking if a state is known to be alive or dead
+                Main exposed methods:
+                - adding a state
+                - adding a transition from a state
+                - checking if a state is known to be alive or dead
             */
-            void add_state(state s);
-            void add_transition(state s1, state s2);
-
-            bool is_alive(state s);
-            bool is_dead(state s);
+            void add_state(expr* e);
+            void add_transition(expr* e1, expr* e2);
+            bool is_alive(expr* e);
+            bool is_dead(expr* e);
+
+            seen_states(ast_manager& m):
+                m_seen(), m_alive(), m_dead(), m_unknown(), m_unvisited(),
+                m_state_ufind(), m_from_cycle(), m_from_nocycle(), m_to(),
+                m_trail(m) {}
         };
 
         /*
-            Struct representing data about a constraint of
-            the form (str.in_re s R)
+            Data about a constraint of the form (str.in_re s R)
         */
         struct s_in_re {
             literal m_lit;
@@ -111,6 +142,11 @@ namespace smt {
             m_lit(l), m_s(s), m_re(r), m_active(true) {}
         };
 
+        /*
+            Data about a literal for the solver to propagate
+            The trigger guards whether the literal is ready
+            to be addressed yet -- see seq_regex::can_propagate
+        */
         struct propagation_lit {
             literal m_lit;
             literal m_trigger;

From 1f1f127bdf4a59711e252fba63593b9c9cc7f8c7 Mon Sep 17 00:00:00 2001
From: Caleb Stanford <caleb.pirsquared@gmail.com>
Date: Sat, 27 Jun 2020 18:57:40 -0400
Subject: [PATCH 19/51] more progress on dead state elimination

---
 src/smt/seq_regex.cpp | 302 ++++++++++++++++++++++++++++++------------
 src/smt/seq_regex.h   | 108 ++++++++-------
 2 files changed, 278 insertions(+), 132 deletions(-)

diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp
index ce9a7b5d90d..43b510f08be 100644
--- a/src/smt/seq_regex.cpp
+++ b/src/smt/seq_regex.cpp
@@ -24,7 +24,8 @@ namespace smt {
     seq_regex::seq_regex(theory_seq& th):
         th(th),
         ctx(th.get_context()),
-        m(th.get_manager())
+        m(th.get_manager()),
+        m_seen_states(m, *this)
     {}
 
     seq_util& seq_regex::u() { return th.m_util; }
@@ -591,6 +592,32 @@ namespace smt {
         }
     }
 
+    void seq_regex::get_all_derivatives(expr* r, expr_ref_vector& results) {
+        // Use get_cofactors method and check which conds are
+        // satisfiable
+        // TODO
+        return;
+        // 
+        // get_cofactors(d, cofactors);        
+        // for (auto const& p : cofactors) {
+        //     if (is_member(p.second, u))
+        //         continue;
+        //     expr_ref cond(p.first, m);
+        //     seq_rw().elim_condition(hd, cond);
+        //     rewrite(cond);
+        //     if (m.is_false(cond))
+        //         continue;
+        //     lits.reset();
+        //     lits.push_back(~lit);
+        //     if (!m.is_true(cond)) {
+        //         expr_ref ncond(mk_not(m, cond), m);
+        //         lits.push_back(th.mk_literal(mk_forall(m, hd, ncond)));
+        //     }
+        //     expr_ref is_empty1 = sk().mk_is_empty(p.second, re().mk_union(u, r), n);    
+        //     lits.push_back(th.mk_literal(is_empty1)); 
+        //     th.add_axiom(lits);
+    }
+
     /*
       is_empty(r, u) => ~is_nullable(r)
       is_empty(r, u) => (forall x . ~cond(x)) or is_empty(r1, u union r)    for (cond, r) in min-terms(D(x,r))      
@@ -652,66 +679,145 @@ namespace smt {
      *** Dead state elimination and seen_states class ***
      ****************************************************/
 
-    seq_regex::seen_states::state seq_regex::seen_states::get_state(expr* e) {
-        return m_state_ufind.find(e->get_id());
+    void seq_regex::seen_states::add_state(state s) {
+        SASSERT(!m_seen.contains(s));
+        // Ensure corresponding var in connected components
+        while (s >= m_state_ufind.get_num_vars()) {
+            m_state_ufind.mk_var();
+        }
+        // Initialize as unvisited
+        m_seen.insert(s);
+        m_unvisited.insert(s);
+        m_to.insert(s, new state_set());
+        m_from.insert(s, new state_set());
+        m_from_maybecycle.insert(s, new state_set());
+    }
+    void seq_regex::seen_states::remove_state(state s) {
+        // This is a partial deletion -- the state is still seen and can't be
+        // added again later
+        SASSERT(m_seen.contains(s));
+        SASSERT(!m_state_ufind.is_root(s));
+        m_to.erase(s);
+        m_from.erase(s);
+        m_from_maybecycle.erase(s);
+        if (m_unvisited.contains(s)) {
+            UNREACHABLE(); // for testing TODO: remove
+            m_unvisited.remove(s);
+        }
+        else if (m_unknown.contains(s)) {
+            m_unknown.remove(s);
+        }
+        else if (m_dead.contains(s)) {
+            UNREACHABLE(); // for testing TODO: remove
+            m_unknown.remove(s);
+        }
+        else if (m_live.contains(s)) {
+            UNREACHABLE(); // for testing TODO: remove
+            m_live.remove(s);
+        }
+        else {
+            UNREACHABLE();
+        }
     }
 
     void seq_regex::seen_states::mark_unknown(state s) {
+        SASSERT(m_state_ufind.is_root(s));
         SASSERT(m_unvisited.contains(s));
         m_unvisited.remove(s);
         m_unknown.insert(s);
     }
-    void seq_regex::seen_states::mark_alive(state s) {
+    void seq_regex::seen_states::mark_live(state s) {
+        SASSERT(m_state_ufind.is_root(s));
         SASSERT(m_unknown.contains(s));
         m_unknown.remove(s);
-        m_alive.insert(s);
+        m_live.insert(s);
     }
     void seq_regex::seen_states::mark_dead(state s) {
+        SASSERT(m_state_ufind.is_root(s));
         SASSERT(m_unknown.contains(s));
         m_unknown.remove(s);
         m_dead.insert(s);
     }
 
-    bool seq_regex::seen_states::is_resolved(state s) {
-        return (m_alive.contains(s) || m_dead.contains(s));
+    // bool seq_regex::seen_states::is_resolved(state s) {
+    //     SASSERT(m_state_ufind.is_root(s));
+    //     return (m_live.contains(s) || m_dead.contains(s));
+    // }
+    // bool seq_regex::seen_states::is_unresolved(state s) {
+    //     SASSERT(m_state_ufind.is_root(s));
+    //     return (m_unknown.contains(s) || m_unvisited.contains(s));
+    // }
+
+    /*
+        Add edge to the graph
+        May already exist, in which case a nocycle edge overrides
+        a cycle edge.
+    */
+    void seq_regex::seen_states::add_edge(state s1, state s2,
+                                          bool maybecycle) {
+        SASSERT(m_state_ufind.is_root(s1));
+        SASSERT(m_state_ufind.is_root(s2));
+        if (s1 == s2) return;
+        if (!m_to.find(s1)->contains(s2)) {
+            // add new edge
+            m_to.find(s1)->insert(s2);
+            m_from.find(s2)->insert(s1);
+            if (maybecycle) m_from_maybecycle.find(s2)->insert(s1);
+        }
+        else if (!maybecycle && m_from_maybecycle.find(s2)->contains(s1)) {
+            // update existing edge
+            m_from_maybecycle.find(s2)->remove(s1);
+        }
+    }
+    void seq_regex::seen_states::remove_edge(state s1, state s2) {
+        SASSERT(m_to.find(s1)->contains(s2));
+        SASSERT(m_from.find(s2)->contains(s1));
+        m_to.find(s1)->remove(s2);
+        m_from.find(s2)->remove(s1);
+        if (m_from_maybecycle.find(s2)->contains(s1)) {
+            m_from_maybecycle.find(s2)->remove(s1);
+        }
     }
-    bool seq_regex::seen_states::is_unresolved(state s) {
-        return (m_unknown.contains(s) || m_unvisited.contains(s));
+    void seq_regex::seen_states::rename_edge(state old1, state old2,
+                                             state new1, state new2) {
+        SASSERT(m_to.find(old1)->contains(old2));
+        SASSERT(m_from.find(old2)->contains(old1));
+        bool maybecycle = m_from_maybecycle.find(old2)->contains(old1);
+        remove_edge(old1, old2);
+        add_edge(new1, new2, maybecycle);
     }
 
     /*
         Merge two states or more generally a set of states into one,
-        returning the new state.
+        returning the new state. Also merges associated edges.
 
         Preconditions: the set should be nonempty, and every state
-        in the set should be unresolved. Also, each state should
+        in the set should be unknown (in particular, *not* unvisited).
+        Also, each state should
         be current (not a previous SCC that was later merged into another).
-
-        Removes the old state from m_unknown or m_univisited,
-        but leaves it in m_seen.
     */
-    seq_regex::seen_states::state
-            seq_regex::seen_states::merge_states(state s1, state s2) {
-        SASSERT(is_unresolved(s1));
-        SASSERT(is_unresolved(s2));
+    auto seq_regex::seen_states::merge_states(state s1, state s2) -> state {
         SASSERT(m_state_ufind.is_root(s1));
         SASSERT(m_state_ufind.is_root(s2));
+        SASSERT(m_unknown.contains(s1));
+        SASSERT(m_unknown.contains(s2));
         m_state_ufind.merge(s1, s2);
         if (m_state_ufind.is_root(s1)) std::swap(s1, s2);
-        // Remove old state s2
-        if (m_unknown.contains(s2)) {
-            m_unknown.remove(s2);
-        } else {
-            m_unvisited.remove(s2);
+        // merge edges
+        for (auto s_to: *m_to.find(s2)) {
+            rename_edge(s2, s_to, s1, s_to);
+        }
+        for (auto s_from: *m_from.find(s2)) {
+            rename_edge(s_from, s2, s_from, s1);
         }
+        remove_state(s2);
         return s1;
     }
-    seq_regex::seen_states::state
-            seq_regex::seen_states::merge_states(state_set& s_set) {
+    auto seq_regex::seen_states::merge_states(state_set& s_set) -> state {
         SASSERT(s_set.num_elems() > 0);
         state prev_s;
         bool first_iter = true;
-        for (auto const& s: s_set) {
+        for (auto s: s_set) {
             if (first_iter) {
                 prev_s = s;
                 first_iter = false;
@@ -722,84 +828,106 @@ namespace smt {
         return prev_s;
     }
 
-    bool seq_regex::seen_states::can_be_in_cycle(expr *e1, expr *e2) {
-        // Simple placeholder. TODO: Implement full check
-        return true;
+    /*
+        if s is not live, mark it, and recurse on all states into s
+        Precondition: s is live or unknown
+    */
+    void seq_regex::seen_states::mark_live_recursive(state s) {
+        SASSERT(m_live.contains(s) || m_unknown.contains(s));
+        if (m_live.contains(s)) return;
+        mark_live(s);
+        for (auto s_from: *m_from.find(s)) {
+            mark_live_recursive(s_from);
+        }
     }
-    void seq_regex::seen_states::find_and_merge_cycles(state s1, state s2) {
-        // Search backwards from s1 to see if (s1, s2) creates a cycle.
-        if (s1 == s2) return;
+
+    /*
+        check if s is now known to be dead. If so, mark and recurse
+        on all states into s.
+        Precondition: s is live, dead, or unknown
+    */
+    void seq_regex::seen_states::mark_dead_recursive(state s) {
+        SASSERT(!m_unvisited.contains(s));
+        if (!m_unknown.contains(s)) return;
+        for (auto s_to: *m_to.find(s)) {
+            // unknown pointing to live should have been marked as live
+            SASSERT(!m_live.contains(s_to));
+            if (m_unknown.contains(s_to) || m_unvisited.contains(s_to)) return;
+        }
+        // all states from s are dead
+        mark_dead(s);
+        for (auto s_from: *m_from.find(s)) {
+            mark_dead_recursive(s_from);
+        }
+    }
+
+    /*
+        if new edges from s1 to s_to will create at least one cycle,
+        merge all states in the new SCC
+    */
+    auto seq_regex::seen_states::merge_all_cycles(state s1, state_set& s_to)
+                                                  -> state {
+        // Mark s_to, then search backwards from s to mark the SCC
         // TODO: Implement full check
-        // Simple placeholder for now: check if this is a loop or if there
-        // is an edge both ways
-        if (m_to.find(s2)->contains(s1)) {
-            merge_states(s1, s2);
+        // Simple placeholder for now: check if there is an edge both ways
+        for (auto s2: s_to) {
+            if (m_to.find(s2)->contains(s1)) {
+                s1 = merge_states(s1, s2);
+            }
         }
+        return s1;
     }
 
-    void seq_regex::seen_states::add_state(expr* e) {
-        unsigned id = e->get_id();
-        if (m_seen.contains(id)) return;
-        if (m_seen.num_elems() >= m_max_size) {
+    auto seq_regex::seen_states::get_state(expr* e) -> state {
+        return m_state_ufind.find(e->get_id());
+    }
+    bool seq_regex::seen_states::can_be_in_cycle(expr *e1, expr *e2) {
+        // Simple placeholder. TODO: Implement full check
+        return true;
+    }
+
+    void seq_regex::seen_states::add_state(expr* e, bool live) {
+        unsigned s = e->get_id();
+        if (m_seen.contains(s)) return;
+        if (s >= m_max_size) {
             STRACE("seq_regex", tout << "Warning: max size of seen states reached!" << std::endl;);
             STRACE("seq_regex_brief", tout << "(MAX SIZE REACHED) ";);
             return;
         }
-        // Save e as expr_ref so it's not deleted
+        // Save e as expr_ref so it's not deallocated
         m_trail.push_back(e);
-        // Ensure corresponding var in connected components
-        while (id >= m_state_ufind.get_num_vars()) {
-            m_state_ufind.mk_var();
-        }
-        // Initialize as unvisited
-        m_seen.insert(id);
-        m_unvisited.insert(id);
-        m_to.insert(id, new state_set());
-        m_from_cycle.insert(id, new state_set());
-        m_from_nocycle.insert(id, new state_set());
-    }
-    void seq_regex::seen_states::add_transition(expr* e1, expr* e2) {
-        // Precondition: e1 and e2 already correspond to existing states
+        // Add state
+        add_state(s);
+        if (live) mark_live_recursive(s);
+    }
+    void seq_regex::seen_states::add_all_transitions(expr* e1) {
+        // Precondition: e already corresponds to an existing state
         SASSERT(m_seen.contains(e1->get_id()));
-        SASSERT(m_seen.contains(e2->get_id()));
         state s1 = get_state(e1);
-        state s2 = get_state(e2);
-        if (s1 == s2) {
-            return;
+        if (!m_unvisited.contains(s1)) return;
+        // Add edges to all derivatives
+        expr_ref_vector derivatives(m);
+        m_parent.get_all_derivatives(e1, derivatives);
+        mark_unknown(s1);
+        bool s1_live = false;
+        state_set s2_set = *(new state_set());
+        for (auto const& e2: derivatives) {
+            state s2 = get_state(e2);
+            bool maybecycle = can_be_in_cycle(e1, e2);
+            add_edge(s1, s2, maybecycle);
+            if (m_live.contains(s2)) s1_live = true;
         }
-        // TODO:
-        // If e1 is dead, assert e1 is marked dead
-        // If e1 is live, add edge and return
-        // If e2 is live, mark e1 live, propagate backwards
-        else if (!can_be_in_cycle(e1, e2)) {
-            // Don't need to check for cycles here
-            if (m_from_nocycle.find(s2)->contains(s1)) {
-                return;
-            }
-            else if (m_from_cycle.find(s2)->contains(s2)) {
-                // update edge label
-                m_from_cycle.find(s2)->remove(s2);
-                m_from_nocycle.find(s2)->insert(s1);
-            }
-            else {
-                // add edge
-                m_to.find(s1)->insert(s2);
-                m_from_nocycle.find(s2)->insert(s1);
-            }
-        }
-        else if (m_to.find(s1)->contains(s2)) {
+        if (s1_live) {
+            mark_live_recursive(s1);
             return;
         }
-        else {
-            // Need to check for cycles here
-            m_to.find(s1)->insert(s2);
-            m_from_cycle.find(s2)->insert(s1);
-            find_and_merge_cycles(s1, s2);
-        }
+        s1 = merge_all_cycles(s1, s2_set);
+        // check if dead
+        mark_dead_recursive(s1);
     }
 
-    bool seq_regex::seen_states::is_alive(expr* e) {
-        return m_alive.contains(get_state(e));
+    bool seq_regex::seen_states::is_live(expr* e) {
+        return m_live.contains(get_state(e));
     }
     bool seq_regex::seen_states::is_dead(expr* e) {
         return m_dead.contains(get_state(e));
diff --git a/src/smt/seq_regex.h b/src/smt/seq_regex.h
index 05e4ee5ca9e..4b1c4028095 100644
--- a/src/smt/seq_regex.h
+++ b/src/smt/seq_regex.h
@@ -52,51 +52,57 @@ namespace smt {
             // typedef uint_map<expr_ref_vector>  exprs_of_state;
 
         private:
+            ast_manager& m;
+            seq_regex& m_parent;
+
             /*
                 All states are exactly one of:
-                - alive:      known to be nonempty
+                - live:       known to be nonempty
                 - dead:       known to be empty
                 - unknown:    all outgoing transitions have been
-                              seen, but the state is not known
-                              to be alive or dead
-                - unvisited:  not all outgoing transitions have
-                              been seen
-
-                The set m_seen keeps all of these and in addition,
-                seen states that have been merged and no longer reprsent
-                a current SCC.
+                              added, but the state is not known
+                              to be live or dead
+                - unvisited:  outgoing transitions have not been added
+
+                As SCCs are merged, some states become aliases, and a
+                union find data structure collapses a now obsolete
+                state to its current representative. m_seen keeps track
+                of states we have seen, including obsolete states.
             */
-            state_set   m_seen;
-            state_set   m_alive;
+            state_set   m_live;
             state_set   m_dead;
             state_set   m_unknown;
             state_set   m_unvisited;
 
+            state_set     m_seen;
+            state_ufind   m_state_ufind;
+
+            void add_state(state s);    // unvisited + seen
+            void remove_state(state s); // * -> m_seen only
+
             void mark_unknown(state s); // unvisited -> unknown
-            void mark_alive(state s);   // unknown -> alive
+            void mark_live(state s);    // unknown -> live
             void mark_dead(state s);    // unknown -> dead
 
-            bool is_resolved(state s);   // alive or dead
-            bool is_unresolved(state s); // unknown or unvisited
+            // bool is_resolved(state s);   // live or dead
+            // bool is_unresolved(state s); // unknown or unvisited
 
             /*
-                Initially a state is represented by an expression ID.
-                A union find data structure collapses an ID to a state.
-
                 Edges are saved in both from and to maps.
-                Additionally edges from are divided into those possibly
-                in a cycle, and those not in a cycle.
+                A subset of edges are also marked as possibly being
+                part of a cycle by being stored in m_from_maybecycle.
             */
-            state_ufind   m_state_ufind;
+            edge_rel   m_from;
+            edge_rel   m_to;
+            edge_rel   m_from_maybecycle;
+
+            void add_edge(state s1, state s2, bool maybecycle);
+            void remove_edge(state s1, state s2);
+            void rename_edge(state old1, state old2, state new1, state new2);
 
-            state get_state(expr* e);
             state merge_states(state s1, state s2);
             state merge_states(state_set& s_set);
 
-            edge_rel      m_from_cycle;
-            edge_rel      m_from_nocycle;
-            edge_rel      m_to;
-
             /*
                 Caching details
             */
@@ -104,29 +110,39 @@ namespace smt {
             expr_ref_vector   m_trail;
 
             /*
-                Core cycle-detection routine
+                Core algorithmic search routines
+                - live state propagation
+                - dead state propagation
+                - cycle detection
+            */
+            void mark_live_recursive(state s);
+            void mark_dead_recursive(state s);
+            state merge_all_cycles(state s1, state_set& s_to);
+
+            /*
+                Methods on original expressions (before they are turned
+                into states)
             */
-            // Heuristic on syntactic expressions
+            // Convert expression to state
+            state get_state(expr* e);
+            // Cycle-detection heuristic (sound but not complete)
             bool can_be_in_cycle(expr* e1, expr* e2);
-            // Full check: if new edge (s1, s2) will create at least one cycle,
-            // merge all states in the new SCC
-            void find_and_merge_cycles(state s1, state s2);
 
         public:
             /*
-                Main exposed methods:
-                - adding a state
-                - adding a transition from a state
-                - checking if a state is known to be alive or dead
+                Exposed methods:
+                - adding a state and all its transitions
+                - checking if a state is known to be live or dead
             */
-            void add_state(expr* e);
-            void add_transition(expr* e1, expr* e2);
-            bool is_alive(expr* e);
+            void add_state(expr* e, bool live);
+            void add_all_transitions(expr* e1);
+            bool is_live(expr* e);
             bool is_dead(expr* e);
 
-            seen_states(ast_manager& m):
-                m_seen(), m_alive(), m_dead(), m_unknown(), m_unvisited(),
-                m_state_ufind(), m_from_cycle(), m_from_nocycle(), m_to(),
+            seen_states(ast_manager& m, seq_regex& parent):
+                m(m), m_parent(parent),
+                m_live(), m_dead(), m_unknown(), m_unvisited(), m_seen(),
+                m_state_ufind(), m_from(), m_to(), m_from_maybecycle(),
                 m_trail(m) {}
         };
 
@@ -155,11 +171,12 @@ namespace smt {
             propagation_lit(): m_lit(null_literal), m_trigger(null_literal) {}
         };
 
-        theory_seq&      th;
-        context&         ctx;
-        ast_manager&     m;
-        vector<s_in_re> m_s_in_re;
-        scoped_vector<propagation_lit> m_to_propagate;
+        theory_seq&                      th;
+        context&                         ctx;
+        ast_manager&                     m;
+        vector<s_in_re>                  m_s_in_re;
+        scoped_vector<propagation_lit>   m_to_propagate;
+        seen_states                      m_seen_states;
 
         seq_util& u();
         class seq_util::re& re();
@@ -194,6 +211,7 @@ namespace smt {
         expr_ref derivative_wrapper(expr* hd, expr* r);
 
         void get_cofactors(expr* r, expr_ref_vector& conds, expr_ref_pair_vector& result);
+        void get_all_derivatives(expr* r, expr_ref_vector& results);
 
         void get_cofactors(expr* r, expr_ref_pair_vector& result) {
             expr_ref_vector conds(m);

From d96a274a79efe56f7cbe78b8717d1f54cf85a3ee Mon Sep 17 00:00:00 2001
From: Caleb Stanford <caleb.pirsquared@gmail.com>
Date: Sat, 27 Jun 2020 20:16:21 -0400
Subject: [PATCH 20/51] refactor dead state class to separate self-contained
 state_graph class

---
 src/smt/seq_regex.cpp |  46 ++++-----
 src/smt/seq_regex.h   | 232 ++++++++++++++++++++++--------------------
 2 files changed, 142 insertions(+), 136 deletions(-)

diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp
index 43b510f08be..9e28acf4289 100644
--- a/src/smt/seq_regex.cpp
+++ b/src/smt/seq_regex.cpp
@@ -25,7 +25,7 @@ namespace smt {
         th(th),
         ctx(th.get_context()),
         m(th.get_manager()),
-        m_seen_states(m, *this)
+        m_state_graph(m, *this)
     {}
 
     seq_util& seq_regex::u() { return th.m_util; }
@@ -676,10 +676,10 @@ namespace smt {
     }
 
     /****************************************************
-     *** Dead state elimination and seen_states class ***
+     *** Dead state elimination and state_graph class ***
      ****************************************************/
 
-    void seq_regex::seen_states::add_state(state s) {
+    void state_graph::add_state(state s) {
         SASSERT(!m_seen.contains(s));
         // Ensure corresponding var in connected components
         while (s >= m_state_ufind.get_num_vars()) {
@@ -692,7 +692,7 @@ namespace smt {
         m_from.insert(s, new state_set());
         m_from_maybecycle.insert(s, new state_set());
     }
-    void seq_regex::seen_states::remove_state(state s) {
+    void state_graph::remove_state(state s) {
         // This is a partial deletion -- the state is still seen and can't be
         // added again later
         SASSERT(m_seen.contains(s));
@@ -720,30 +720,30 @@ namespace smt {
         }
     }
 
-    void seq_regex::seen_states::mark_unknown(state s) {
+    void state_graph::mark_unknown(state s) {
         SASSERT(m_state_ufind.is_root(s));
         SASSERT(m_unvisited.contains(s));
         m_unvisited.remove(s);
         m_unknown.insert(s);
     }
-    void seq_regex::seen_states::mark_live(state s) {
+    void state_graph::mark_live(state s) {
         SASSERT(m_state_ufind.is_root(s));
         SASSERT(m_unknown.contains(s));
         m_unknown.remove(s);
         m_live.insert(s);
     }
-    void seq_regex::seen_states::mark_dead(state s) {
+    void state_graph::mark_dead(state s) {
         SASSERT(m_state_ufind.is_root(s));
         SASSERT(m_unknown.contains(s));
         m_unknown.remove(s);
         m_dead.insert(s);
     }
 
-    // bool seq_regex::seen_states::is_resolved(state s) {
+    // bool state_graph::is_resolved(state s) {
     //     SASSERT(m_state_ufind.is_root(s));
     //     return (m_live.contains(s) || m_dead.contains(s));
     // }
-    // bool seq_regex::seen_states::is_unresolved(state s) {
+    // bool state_graph::is_unresolved(state s) {
     //     SASSERT(m_state_ufind.is_root(s));
     //     return (m_unknown.contains(s) || m_unvisited.contains(s));
     // }
@@ -753,7 +753,7 @@ namespace smt {
         May already exist, in which case a nocycle edge overrides
         a cycle edge.
     */
-    void seq_regex::seen_states::add_edge(state s1, state s2,
+    void state_graph::add_edge(state s1, state s2,
                                           bool maybecycle) {
         SASSERT(m_state_ufind.is_root(s1));
         SASSERT(m_state_ufind.is_root(s2));
@@ -769,7 +769,7 @@ namespace smt {
             m_from_maybecycle.find(s2)->remove(s1);
         }
     }
-    void seq_regex::seen_states::remove_edge(state s1, state s2) {
+    void state_graph::remove_edge(state s1, state s2) {
         SASSERT(m_to.find(s1)->contains(s2));
         SASSERT(m_from.find(s2)->contains(s1));
         m_to.find(s1)->remove(s2);
@@ -778,7 +778,7 @@ namespace smt {
             m_from_maybecycle.find(s2)->remove(s1);
         }
     }
-    void seq_regex::seen_states::rename_edge(state old1, state old2,
+    void state_graph::rename_edge(state old1, state old2,
                                              state new1, state new2) {
         SASSERT(m_to.find(old1)->contains(old2));
         SASSERT(m_from.find(old2)->contains(old1));
@@ -796,7 +796,7 @@ namespace smt {
         Also, each state should
         be current (not a previous SCC that was later merged into another).
     */
-    auto seq_regex::seen_states::merge_states(state s1, state s2) -> state {
+    auto state_graph::merge_states(state s1, state s2) -> state {
         SASSERT(m_state_ufind.is_root(s1));
         SASSERT(m_state_ufind.is_root(s2));
         SASSERT(m_unknown.contains(s1));
@@ -813,7 +813,7 @@ namespace smt {
         remove_state(s2);
         return s1;
     }
-    auto seq_regex::seen_states::merge_states(state_set& s_set) -> state {
+    auto state_graph::merge_states(state_set& s_set) -> state {
         SASSERT(s_set.num_elems() > 0);
         state prev_s;
         bool first_iter = true;
@@ -832,7 +832,7 @@ namespace smt {
         if s is not live, mark it, and recurse on all states into s
         Precondition: s is live or unknown
     */
-    void seq_regex::seen_states::mark_live_recursive(state s) {
+    void state_graph::mark_live_recursive(state s) {
         SASSERT(m_live.contains(s) || m_unknown.contains(s));
         if (m_live.contains(s)) return;
         mark_live(s);
@@ -846,7 +846,7 @@ namespace smt {
         on all states into s.
         Precondition: s is live, dead, or unknown
     */
-    void seq_regex::seen_states::mark_dead_recursive(state s) {
+    void state_graph::mark_dead_recursive(state s) {
         SASSERT(!m_unvisited.contains(s));
         if (!m_unknown.contains(s)) return;
         for (auto s_to: *m_to.find(s)) {
@@ -865,7 +865,7 @@ namespace smt {
         if new edges from s1 to s_to will create at least one cycle,
         merge all states in the new SCC
     */
-    auto seq_regex::seen_states::merge_all_cycles(state s1, state_set& s_to)
+    auto state_graph::merge_all_cycles(state s1, state_set& s_to)
                                                   -> state {
         // Mark s_to, then search backwards from s to mark the SCC
         // TODO: Implement full check
@@ -878,15 +878,15 @@ namespace smt {
         return s1;
     }
 
-    auto seq_regex::seen_states::get_state(expr* e) -> state {
+    auto state_graph::get_state(expr* e) -> state {
         return m_state_ufind.find(e->get_id());
     }
-    bool seq_regex::seen_states::can_be_in_cycle(expr *e1, expr *e2) {
+    bool state_graph::can_be_in_cycle(expr *e1, expr *e2) {
         // Simple placeholder. TODO: Implement full check
         return true;
     }
 
-    void seq_regex::seen_states::add_state(expr* e, bool live) {
+    void state_graph::add_state(expr* e, bool live) {
         unsigned s = e->get_id();
         if (m_seen.contains(s)) return;
         if (s >= m_max_size) {
@@ -900,7 +900,7 @@ namespace smt {
         add_state(s);
         if (live) mark_live_recursive(s);
     }
-    void seq_regex::seen_states::add_all_transitions(expr* e1) {
+    void state_graph::add_all_transitions(expr* e1) {
         // Precondition: e already corresponds to an existing state
         SASSERT(m_seen.contains(e1->get_id()));
         state s1 = get_state(e1);
@@ -926,10 +926,10 @@ namespace smt {
         mark_dead_recursive(s1);
     }
 
-    bool seq_regex::seen_states::is_live(expr* e) {
+    bool state_graph::is_live(expr* e) {
         return m_live.contains(get_state(e));
     }
-    bool seq_regex::seen_states::is_dead(expr* e) {
+    bool state_graph::is_dead(expr* e) {
         return m_dead.contains(get_state(e));
     }
 
diff --git a/src/smt/seq_regex.h b/src/smt/seq_regex.h
index 4b1c4028095..ee809d65bda 100644
--- a/src/smt/seq_regex.h
+++ b/src/smt/seq_regex.h
@@ -29,123 +29,129 @@ namespace smt {
 
     class theory_seq;
 
-    class seq_regex {
+    class seq_regex;
+
+    /*
+        state_graph
+
+        Data structure which calculates live states and dead states.
+
+        ----
+
+        Info saved about the set of states (regexes) seen so far.
+
+        "States" here are strongly connected components -- states that
+        are mutually reachable from each other. States
+        are represented as unsigned integers.
+
+        Used for the core incremental dead state elimination algorithm.
+
+        Class invariants:
+            - TODO
+    */
+    class state_graph {
+        typedef unsigned              state;
+        typedef uint_set              state_set;
+        typedef uint_map<state_set>   edge_rel;
+        typedef basic_union_find      state_ufind;
+        // typedef uint_map<expr_ref_vector>  exprs_of_state;
+
+    private:
+        ast_manager& m;
+        seq_regex& m_parent;
+
         /*
-            seen_states
+            All states are exactly one of:
+            - live:       known to be nonempty
+            - dead:       known to be empty
+            - unknown:    all outgoing transitions have been
+                          added, but the state is not known
+                          to be live or dead
+            - unvisited:  outgoing transitions have not been added
+
+            As SCCs are merged, some states become aliases, and a
+            union find data structure collapses a now obsolete
+            state to its current representative. m_seen keeps track
+            of states we have seen, including obsolete states.
+        */
+        state_set   m_live;
+        state_set   m_dead;
+        state_set   m_unknown;
+        state_set   m_unvisited;
+
+        state_set     m_seen;
+        state_ufind   m_state_ufind;
 
-            Info saved about the set of states (regexes) seen so far.
+        void add_state(state s);    // unvisited + seen
+        void remove_state(state s); // * -> m_seen only
 
-            "States" here are strongly connected components -- states that
-            are mutually reachable from each other. States
-            are represented as unsigned integers.
+        void mark_unknown(state s); // unvisited -> unknown
+        void mark_live(state s);    // unknown -> live
+        void mark_dead(state s);    // unknown -> dead
 
-            Used for the core incremental dead state elimination algorithm.
+        // bool is_resolved(state s);   // live or dead
+        // bool is_unresolved(state s); // unknown or unvisited
 
-            Class invariants:
-                - TODO
+        /*
+            Edges are saved in both from and to maps.
+            A subset of edges are also marked as possibly being
+            part of a cycle by being stored in m_from_maybecycle.
         */
-        class seen_states {
-            typedef unsigned              state;
-            typedef uint_set              state_set;
-            typedef uint_map<state_set>   edge_rel;
-            typedef basic_union_find      state_ufind;
-            // typedef uint_map<expr_ref_vector>  exprs_of_state;
-
-        private:
-            ast_manager& m;
-            seq_regex& m_parent;
-
-            /*
-                All states are exactly one of:
-                - live:       known to be nonempty
-                - dead:       known to be empty
-                - unknown:    all outgoing transitions have been
-                              added, but the state is not known
-                              to be live or dead
-                - unvisited:  outgoing transitions have not been added
-
-                As SCCs are merged, some states become aliases, and a
-                union find data structure collapses a now obsolete
-                state to its current representative. m_seen keeps track
-                of states we have seen, including obsolete states.
-            */
-            state_set   m_live;
-            state_set   m_dead;
-            state_set   m_unknown;
-            state_set   m_unvisited;
-
-            state_set     m_seen;
-            state_ufind   m_state_ufind;
-
-            void add_state(state s);    // unvisited + seen
-            void remove_state(state s); // * -> m_seen only
-
-            void mark_unknown(state s); // unvisited -> unknown
-            void mark_live(state s);    // unknown -> live
-            void mark_dead(state s);    // unknown -> dead
-
-            // bool is_resolved(state s);   // live or dead
-            // bool is_unresolved(state s); // unknown or unvisited
-
-            /*
-                Edges are saved in both from and to maps.
-                A subset of edges are also marked as possibly being
-                part of a cycle by being stored in m_from_maybecycle.
-            */
-            edge_rel   m_from;
-            edge_rel   m_to;
-            edge_rel   m_from_maybecycle;
-
-            void add_edge(state s1, state s2, bool maybecycle);
-            void remove_edge(state s1, state s2);
-            void rename_edge(state old1, state old2, state new1, state new2);
-
-            state merge_states(state s1, state s2);
-            state merge_states(state_set& s_set);
-
-            /*
-                Caching details
-            */
-            unsigned          m_max_size { 10000 };
-            expr_ref_vector   m_trail;
-
-            /*
-                Core algorithmic search routines
-                - live state propagation
-                - dead state propagation
-                - cycle detection
-            */
-            void mark_live_recursive(state s);
-            void mark_dead_recursive(state s);
-            state merge_all_cycles(state s1, state_set& s_to);
-
-            /*
-                Methods on original expressions (before they are turned
-                into states)
-            */
-            // Convert expression to state
-            state get_state(expr* e);
-            // Cycle-detection heuristic (sound but not complete)
-            bool can_be_in_cycle(expr* e1, expr* e2);
-
-        public:
-            /*
-                Exposed methods:
-                - adding a state and all its transitions
-                - checking if a state is known to be live or dead
-            */
-            void add_state(expr* e, bool live);
-            void add_all_transitions(expr* e1);
-            bool is_live(expr* e);
-            bool is_dead(expr* e);
-
-            seen_states(ast_manager& m, seq_regex& parent):
-                m(m), m_parent(parent),
-                m_live(), m_dead(), m_unknown(), m_unvisited(), m_seen(),
-                m_state_ufind(), m_from(), m_to(), m_from_maybecycle(),
-                m_trail(m) {}
-        };
+        edge_rel   m_from;
+        edge_rel   m_to;
+        edge_rel   m_from_maybecycle;
+
+        void add_edge(state s1, state s2, bool maybecycle);
+        void remove_edge(state s1, state s2);
+        void rename_edge(state old1, state old2, state new1, state new2);
+
+        state merge_states(state s1, state s2);
+        state merge_states(state_set& s_set);
 
+        /*
+            Caching details
+        */
+        unsigned          m_max_size { 10000 };
+        expr_ref_vector   m_trail;
+
+        /*
+            Core algorithmic search routines
+            - live state propagation
+            - dead state propagation
+            - cycle detection
+        */
+        void mark_live_recursive(state s);
+        void mark_dead_recursive(state s);
+        state merge_all_cycles(state s1, state_set& s_to);
+
+        /*
+            Methods on original expressions (before they are turned
+            into states)
+        */
+        // Convert expression to state
+        state get_state(expr* e);
+        // Cycle-detection heuristic (sound but not complete)
+        bool can_be_in_cycle(expr* e1, expr* e2);
+
+    public:
+        /*
+            Exposed methods:
+            - adding a state and all its transitions
+            - checking if a state is known to be live or dead
+        */
+        void add_state(expr* e, bool live);
+        void add_all_transitions(expr* e1);
+        bool is_live(expr* e);
+        bool is_dead(expr* e);
+
+        state_graph(ast_manager& m, seq_regex& parent):
+            m(m), m_parent(parent),
+            m_live(), m_dead(), m_unknown(), m_unvisited(), m_seen(),
+            m_state_ufind(), m_from(), m_to(), m_from_maybecycle(),
+            m_trail(m) {}
+    };
+
+    class seq_regex {
         /*
             Data about a constraint of the form (str.in_re s R)
         */
@@ -176,7 +182,7 @@ namespace smt {
         ast_manager&                     m;
         vector<s_in_re>                  m_s_in_re;
         scoped_vector<propagation_lit>   m_to_propagate;
-        seen_states                      m_seen_states;
+        state_graph                      m_state_graph;
 
         seq_util& u();
         class seq_util::re& re();
@@ -211,7 +217,6 @@ namespace smt {
         expr_ref derivative_wrapper(expr* hd, expr* r);
 
         void get_cofactors(expr* r, expr_ref_vector& conds, expr_ref_pair_vector& result);
-        void get_all_derivatives(expr* r, expr_ref_vector& results);
 
         void get_cofactors(expr* r, expr_ref_pair_vector& result) {
             expr_ref_vector conds(m);
@@ -220,6 +225,8 @@ namespace smt {
 
     public:
 
+        void get_all_derivatives(expr* r, expr_ref_vector& results);
+
         seq_regex(theory_seq& th);
 
         void push_scope() { m_to_propagate.push_scope(); }
@@ -247,4 +254,3 @@ namespace smt {
     };
 
 };
-

From 47f45faaee435d4a2a2a0c0c782ec3fcb31505d0 Mon Sep 17 00:00:00 2001
From: Caleb Stanford <caleb.pirsquared@gmail.com>
Date: Sat, 27 Jun 2020 21:19:46 -0400
Subject: [PATCH 21/51] finish factoring state_graph to only work with unsigned
 values, and implement separate functionality for expr* logic

---
 src/smt/seq_regex.cpp | 118 ++++++++++++++++++++++++------------------
 src/smt/seq_regex.h   |  88 ++++++++++++++++---------------
 2 files changed, 114 insertions(+), 92 deletions(-)

diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp
index 9e28acf4289..48d9961e193 100644
--- a/src/smt/seq_regex.cpp
+++ b/src/smt/seq_regex.cpp
@@ -25,7 +25,8 @@ namespace smt {
         th(th),
         ctx(th.get_context()),
         m(th.get_manager()),
-        m_state_graph(m, *this)
+        m_state_graph(),
+        m_state_trail(m)
     {}
 
     seq_util& seq_regex::u() { return th.m_util; }
@@ -679,7 +680,7 @@ namespace smt {
      *** Dead state elimination and state_graph class ***
      ****************************************************/
 
-    void state_graph::add_state(state s) {
+    void state_graph::add_state_core(state s) {
         SASSERT(!m_seen.contains(s));
         // Ensure corresponding var in connected components
         while (s >= m_state_ufind.get_num_vars()) {
@@ -753,8 +754,7 @@ namespace smt {
         May already exist, in which case a nocycle edge overrides
         a cycle edge.
     */
-    void state_graph::add_edge(state s1, state s2,
-                                          bool maybecycle) {
+    void state_graph::add_edge_core(state s1, state s2, bool maybecycle) {
         SASSERT(m_state_ufind.is_root(s1));
         SASSERT(m_state_ufind.is_root(s2));
         if (s1 == s2) return;
@@ -784,7 +784,7 @@ namespace smt {
         SASSERT(m_from.find(old2)->contains(old1));
         bool maybecycle = m_from_maybecycle.find(old2)->contains(old1);
         remove_edge(old1, old2);
-        add_edge(new1, new2, maybecycle);
+        add_edge_core(new1, new2, maybecycle);
     }
 
     /*
@@ -865,8 +865,7 @@ namespace smt {
         if new edges from s1 to s_to will create at least one cycle,
         merge all states in the new SCC
     */
-    auto state_graph::merge_all_cycles(state s1, state_set& s_to)
-                                                  -> state {
+    auto state_graph::merge_all_cycles(state s1, state_set& s_to) -> state {
         // Mark s_to, then search backwards from s to mark the SCC
         // TODO: Implement full check
         // Simple placeholder for now: check if there is an edge both ways
@@ -878,59 +877,78 @@ namespace smt {
         return s1;
     }
 
-    auto state_graph::get_state(expr* e) -> state {
-        return m_state_ufind.find(e->get_id());
+    void state_graph::add_state(state s, bool live) {
+        if (m_seen.contains(s)) return;
+        add_state_core(s);
+        if (live) mark_live_recursive(s);
+    }
+    void state_graph::add_edge(state s1, state s2, bool maybecycle) {
+        SASSERT(m_seen.contains(s1));
+        SASSERT(m_seen.contains(s2));
+        s1 = m_state_ufind.find(s1);
+        s2 = m_state_ufind.find(s1);
+        add_edge_core(s1, s2, maybecycle);
+        if (m_live.contains(s2)) {
+            if (m_unvisited.contains(s1)) mark_unknown(s1);
+            mark_live_recursive(s1);
+        }
+    }
+    void state_graph::done_adding(state s) {
+        s = m_state_ufind.find(s);
+        if (m_unvisited.contains(s)) mark_unknown(s);
+        s = merge_all_cycles(s, *m_to.find(s));
+        // check if dead
+        mark_dead_recursive(s);
+    }
+
+    unsigned state_graph::get_size() {
+        return m_state_ufind.get_num_vars();
+    }
+
+    bool state_graph::is_live(state s) {
+        return m_live.contains(m_state_ufind.find(s));
+    }
+    bool state_graph::is_dead(state s) {
+        return m_dead.contains(m_state_ufind.find(s));
+    }
+
+    // **********************************
+
+    unsigned seq_regex::get_state_id(expr* e) {
+        return e->get_id();
     }
-    bool state_graph::can_be_in_cycle(expr *e1, expr *e2) {
+    bool seq_regex::can_be_in_cycle(expr *e1, expr *e2) {
         // Simple placeholder. TODO: Implement full check
         return true;
     }
 
-    void state_graph::add_state(expr* e, bool live) {
-        unsigned s = e->get_id();
-        if (m_seen.contains(s)) return;
-        if (s >= m_max_size) {
-            STRACE("seq_regex", tout << "Warning: max size of seen states reached!" << std::endl;);
+    /*
+        Update the state graph with expression r and all its derivatives.
+    */
+    bool seq_regex::update_state_graph(expr* r) {
+        if (m_state_graph.get_size() >= m_max_state_graph_size) {
+            STRACE("seq_regex", tout << "Warning: ignored state graph update -- max size of seen states reached!" << std::endl;);
             STRACE("seq_regex_brief", tout << "(MAX SIZE REACHED) ";);
-            return;
+            return false;
         }
         // Save e as expr_ref so it's not deallocated
-        m_trail.push_back(e);
-        // Add state
-        add_state(s);
-        if (live) mark_live_recursive(s);
-    }
-    void state_graph::add_all_transitions(expr* e1) {
-        // Precondition: e already corresponds to an existing state
-        SASSERT(m_seen.contains(e1->get_id()));
-        state s1 = get_state(e1);
-        if (!m_unvisited.contains(s1)) return;
+        m_state_trail.push_back(r);
+        // Add state, live if nullable
+        unsigned r_id = get_state_id(r);
+        bool r_nullable = m.is_true(is_nullable_wrapper(r));
+        m_state_graph.add_state(r_id, r_nullable);
         // Add edges to all derivatives
         expr_ref_vector derivatives(m);
-        m_parent.get_all_derivatives(e1, derivatives);
-        mark_unknown(s1);
-        bool s1_live = false;
-        state_set s2_set = *(new state_set());
-        for (auto const& e2: derivatives) {
-            state s2 = get_state(e2);
-            bool maybecycle = can_be_in_cycle(e1, e2);
-            add_edge(s1, s2, maybecycle);
-            if (m_live.contains(s2)) s1_live = true;
-        }
-        if (s1_live) {
-            mark_live_recursive(s1);
-            return;
-        }
-        s1 = merge_all_cycles(s1, s2_set);
-        // check if dead
-        mark_dead_recursive(s1);
-    }
-
-    bool state_graph::is_live(expr* e) {
-        return m_live.contains(get_state(e));
-    }
-    bool state_graph::is_dead(expr* e) {
-        return m_dead.contains(get_state(e));
+        get_all_derivatives(r, derivatives);
+        for (auto const& dr: derivatives) {
+            unsigned dr_id = get_state_id(dr);
+            bool dr_nullable = m.is_true(is_nullable_wrapper(dr));
+            m_state_graph.add_state(dr_id, dr_nullable);
+            bool maybecycle = can_be_in_cycle(r, dr);
+            m_state_graph.add_edge(r_id, dr_id, maybecycle);
+        }
+        m_state_graph.done_adding(r_id);
+        return true;
     }
 
 }
diff --git a/src/smt/seq_regex.h b/src/smt/seq_regex.h
index ee809d65bda..84829f1c577 100644
--- a/src/smt/seq_regex.h
+++ b/src/smt/seq_regex.h
@@ -34,17 +34,21 @@ namespace smt {
     /*
         state_graph
 
-        Data structure which calculates live states and dead states.
-
-        ----
-
-        Info saved about the set of states (regexes) seen so far.
-
-        "States" here are strongly connected components -- states that
-        are mutually reachable from each other. States
-        are represented as unsigned integers.
-
-        Used for the core incremental dead state elimination algorithm.
+        Data structure which is capable of incrementally tracking
+        live states and dead states.
+
+        "States" are integers. States and edges are added to the data
+        structure incrementally.
+        - Some states are initially labeled as live. The data structure
+          tracks which other states are live (can reach a live state), dead
+          (can't reach a live state), or neither.
+        - Some edges are labeled as not contained in a cycle. This is to
+          optimize search if it is known by the user of the structure
+          that no cycle will ever contain this edge.
+
+        Internally, we use union_find to identify states within an SCC,
+        and incrementally update SCCs, while propagating backwards
+        live and dead SCCs.
 
         Class invariants:
             - TODO
@@ -54,12 +58,8 @@ namespace smt {
         typedef uint_set              state_set;
         typedef uint_map<state_set>   edge_rel;
         typedef basic_union_find      state_ufind;
-        // typedef uint_map<expr_ref_vector>  exprs_of_state;
 
     private:
-        ast_manager& m;
-        seq_regex& m_parent;
-
         /*
             All states are exactly one of:
             - live:       known to be nonempty
@@ -82,8 +82,8 @@ namespace smt {
         state_set     m_seen;
         state_ufind   m_state_ufind;
 
-        void add_state(state s);    // unvisited + seen
-        void remove_state(state s); // * -> m_seen only
+        void add_state_core(state s); // unvisited + seen
+        void remove_state(state s);   // * -> m_seen only
 
         void mark_unknown(state s); // unvisited -> unknown
         void mark_live(state s);    // unknown -> live
@@ -101,19 +101,13 @@ namespace smt {
         edge_rel   m_to;
         edge_rel   m_from_maybecycle;
 
-        void add_edge(state s1, state s2, bool maybecycle);
+        void add_edge_core(state s1, state s2, bool maybecycle);
         void remove_edge(state s1, state s2);
         void rename_edge(state old1, state old2, state new1, state new2);
 
         state merge_states(state s1, state s2);
         state merge_states(state_set& s_set);
 
-        /*
-            Caching details
-        */
-        unsigned          m_max_size { 10000 };
-        expr_ref_vector   m_trail;
-
         /*
             Core algorithmic search routines
             - live state propagation
@@ -124,31 +118,28 @@ namespace smt {
         void mark_dead_recursive(state s);
         state merge_all_cycles(state s1, state_set& s_to);
 
-        /*
-            Methods on original expressions (before they are turned
-            into states)
-        */
-        // Convert expression to state
-        state get_state(expr* e);
-        // Cycle-detection heuristic (sound but not complete)
-        bool can_be_in_cycle(expr* e1, expr* e2);
-
     public:
         /*
             Exposed methods:
             - adding a state and all its transitions
             - checking if a state is known to be live or dead
+
+            ASSUMPTION: transitions from a state are added in order and after
+            all transitions are added, the state is marked as
+            finished. Also all states are added before the transitions.
         */
-        void add_state(expr* e, bool live);
-        void add_all_transitions(expr* e1);
-        bool is_live(expr* e);
-        bool is_dead(expr* e);
+        void add_state(state s, bool live);
+        void add_edge(state s1, state s2, bool maybecycle);
+        void done_adding(state s);
+        unsigned get_size();
+
+        bool is_live(state s);
+        bool is_dead(state s);
 
-        state_graph(ast_manager& m, seq_regex& parent):
-            m(m), m_parent(parent),
+        state_graph():
             m_live(), m_dead(), m_unknown(), m_unvisited(), m_seen(),
-            m_state_ufind(), m_from(), m_to(), m_from_maybecycle(),
-            m_trail(m) {}
+            m_state_ufind(), m_from(), m_to(), m_from_maybecycle()
+            {}
     };
 
     class seq_regex {
@@ -182,7 +173,20 @@ namespace smt {
         ast_manager&                     m;
         vector<s_in_re>                  m_s_in_re;
         scoped_vector<propagation_lit>   m_to_propagate;
-        state_graph                      m_state_graph;
+
+        /*
+            state_graph for dead state detection,
+            and associated methods
+        */
+        state_graph       m_state_graph;
+        expr_ref_vector   m_state_trail;
+        unsigned          m_max_state_graph_size { 10000 };
+        // Convert expression to state
+        unsigned get_state_id(expr* e);
+        // Cycle-detection heuristic (sound but not complete)
+        bool can_be_in_cycle(expr* e1, expr* e2);
+        // Update the graph
+        bool update_state_graph(expr* r);
 
         seq_util& u();
         class seq_util::re& re();

From ae3a91a78ad0aa3de54fcf6664e4fd2c1fdbf378 Mon Sep 17 00:00:00 2001
From: Caleb Stanford <caleb.pirsquared@gmail.com>
Date: Sun, 28 Jun 2020 10:03:10 -0400
Subject: [PATCH 22/51] implement get_all_derivatives, add debug tracing

---
 src/smt/seq_regex.cpp | 88 ++++++++++++++++++++++++++++---------------
 src/smt/seq_regex.h   |  1 +
 2 files changed, 59 insertions(+), 30 deletions(-)

diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp
index 48d9961e193..a78f64a6f89 100644
--- a/src/smt/seq_regex.cpp
+++ b/src/smt/seq_regex.cpp
@@ -209,10 +209,14 @@ namespace smt {
                               << "," << r->get_id()
                               << ") ";);
 
-        if (re().is_empty(r)) {
+        if (re().is_empty(r)
+            || m_state_graph.is_dead(get_state_id(r))) {
             th.add_axiom(~lit);
             return true;
         }
+        if (!m.is_ite(r) && is_ground(r)) {
+            update_state_graph(r);
+        }
 
         if (block_unfolding(lit, idx))
             return true;
@@ -594,29 +598,23 @@ namespace smt {
     }
 
     void seq_regex::get_all_derivatives(expr* r, expr_ref_vector& results) {
-        // Use get_cofactors method and check which conds are
-        // satisfiable
-        // TODO
-        return;
-        // 
-        // get_cofactors(d, cofactors);        
-        // for (auto const& p : cofactors) {
-        //     if (is_member(p.second, u))
-        //         continue;
-        //     expr_ref cond(p.first, m);
-        //     seq_rw().elim_condition(hd, cond);
-        //     rewrite(cond);
-        //     if (m.is_false(cond))
-        //         continue;
-        //     lits.reset();
-        //     lits.push_back(~lit);
-        //     if (!m.is_true(cond)) {
-        //         expr_ref ncond(mk_not(m, cond), m);
-        //         lits.push_back(th.mk_literal(mk_forall(m, hd, ncond)));
-        //     }
-        //     expr_ref is_empty1 = sk().mk_is_empty(p.second, re().mk_union(u, r), n);    
-        //     lits.push_back(th.mk_literal(is_empty1)); 
-        //     th.add_axiom(lits);
+        // Get derivative
+        sort* seq_sort = nullptr;
+        VERIFY(u().is_re(r, seq_sort));
+        expr_ref n(m.mk_fresh_const("re.char", seq_sort), m);
+        expr_ref hd = mk_first(r, n);
+        expr_ref d(m);
+        d = derivative_wrapper(hd, r);
+        // Use get_cofactors method and filter out unsatisfiable conds
+        expr_ref_pair_vector cofactors(m);
+        get_cofactors(d, cofactors);
+        for (auto const& p : cofactors) {
+            expr_ref cond(p.first, m);
+            seq_rw().elim_condition(hd, cond);
+            rewrite(cond);
+            if (m.is_false(cond)) continue;
+            results.push_back(p.second);
+        }
     }
 
     /*
@@ -724,18 +722,21 @@ namespace smt {
     void state_graph::mark_unknown(state s) {
         SASSERT(m_state_ufind.is_root(s));
         SASSERT(m_unvisited.contains(s));
+        STRACE("seq_regex_brief", tout << "unk(" << s << ") ";);
         m_unvisited.remove(s);
         m_unknown.insert(s);
     }
     void state_graph::mark_live(state s) {
         SASSERT(m_state_ufind.is_root(s));
         SASSERT(m_unknown.contains(s));
+        STRACE("seq_regex_brief", tout << "live(" << s << ") ";);
         m_unknown.remove(s);
         m_live.insert(s);
     }
     void state_graph::mark_dead(state s) {
         SASSERT(m_state_ufind.is_root(s));
         SASSERT(m_unknown.contains(s));
+        STRACE("seq_regex_brief", tout << "dead(" << s << ") ";);
         m_unknown.remove(s);
         m_dead.insert(s);
     }
@@ -757,15 +758,18 @@ namespace smt {
     void state_graph::add_edge_core(state s1, state s2, bool maybecycle) {
         SASSERT(m_state_ufind.is_root(s1));
         SASSERT(m_state_ufind.is_root(s2));
+        STRACE("seq_regex_brief", tout << std::endl << "  DEBUG: add edge core " << s1 << "," << s2 << "," << maybecycle << " ";);
         if (s1 == s2) return;
         if (!m_to.find(s1)->contains(s2)) {
             // add new edge
+            STRACE("seq_regex_brief", tout << std::endl << "  DEBUG: new edge! ";);
             m_to.find(s1)->insert(s2);
             m_from.find(s2)->insert(s1);
             if (maybecycle) m_from_maybecycle.find(s2)->insert(s1);
         }
         else if (!maybecycle && m_from_maybecycle.find(s2)->contains(s1)) {
             // update existing edge
+            STRACE("seq_regex_brief", tout << std::endl << "  DEBUG: update edge! ";);
             m_from_maybecycle.find(s2)->remove(s1);
         }
     }
@@ -801,6 +805,7 @@ namespace smt {
         SASSERT(m_state_ufind.is_root(s2));
         SASSERT(m_unknown.contains(s1));
         SASSERT(m_unknown.contains(s2));
+        STRACE("seq_regex_brief", tout << "merge(" << s1 << "," << s2 << ") ";);
         m_state_ufind.merge(s1, s2);
         if (m_state_ufind.is_root(s1)) std::swap(s1, s2);
         // merge edges
@@ -815,7 +820,7 @@ namespace smt {
     }
     auto state_graph::merge_states(state_set& s_set) -> state {
         SASSERT(s_set.num_elems() > 0);
-        state prev_s;
+        state prev_s = 0; // initialization here optional
         bool first_iter = true;
         for (auto s: s_set) {
             if (first_iter) {
@@ -849,7 +854,9 @@ namespace smt {
     void state_graph::mark_dead_recursive(state s) {
         SASSERT(!m_unvisited.contains(s));
         if (!m_unknown.contains(s)) return;
+        STRACE("seq_regex_brief", tout << std::endl << "  DEBUG: mark dead recursive: " << s << " ";);
         for (auto s_to: *m_to.find(s)) {
+            STRACE("seq_regex_brief", tout << std::endl << "  DEBUG: m_to searching: " << s_to << " ";);
             // unknown pointing to live should have been marked as live
             SASSERT(!m_live.contains(s_to));
             if (m_unknown.contains(s_to) || m_unvisited.contains(s_to)) return;
@@ -879,14 +886,20 @@ namespace smt {
 
     void state_graph::add_state(state s, bool live) {
         if (m_seen.contains(s)) return;
+        STRACE("seq_regex_brief", tout << "add(" << s << "," << live << ") ";);
         add_state_core(s);
-        if (live) mark_live_recursive(s);
+        if (live) {
+            mark_unknown(s);
+            mark_live_recursive(s);
+        }
     }
     void state_graph::add_edge(state s1, state s2, bool maybecycle) {
         SASSERT(m_seen.contains(s1));
         SASSERT(m_seen.contains(s2));
+        STRACE("seq_regex_brief", tout << "add(" << s1 << "," << s2
+                                       << "," << maybecycle << ") ";);
         s1 = m_state_ufind.find(s1);
-        s2 = m_state_ufind.find(s1);
+        s2 = m_state_ufind.find(s2);
         add_edge_core(s1, s2, maybecycle);
         if (m_live.contains(s2)) {
             if (m_unvisited.contains(s1)) mark_unknown(s1);
@@ -899,12 +912,16 @@ namespace smt {
         s = merge_all_cycles(s, *m_to.find(s));
         // check if dead
         mark_dead_recursive(s);
+        STRACE("seq_regex_brief", tout << "done(" << s << ") ";);
     }
 
     unsigned state_graph::get_size() {
         return m_state_ufind.get_num_vars();
     }
 
+    bool state_graph::is_seen(state s) {
+        return m_seen.contains(s);
+    }
     bool state_graph::is_live(state s) {
         return m_live.contains(m_state_ufind.find(s));
     }
@@ -926,28 +943,39 @@ namespace smt {
         Update the state graph with expression r and all its derivatives.
     */
     bool seq_regex::update_state_graph(expr* r) {
+        unsigned r_id = get_state_id(r);
+        if (m_state_graph.is_seen(r_id)) return false;
         if (m_state_graph.get_size() >= m_max_state_graph_size) {
             STRACE("seq_regex", tout << "Warning: ignored state graph update -- max size of seen states reached!" << std::endl;);
             STRACE("seq_regex_brief", tout << "(MAX SIZE REACHED) ";);
             return false;
         }
-        // Save e as expr_ref so it's not deallocated
+        STRACE("seq_regex", tout << "Updating state graph for regex "
+                                 << mk_pp(r, m) << ") ";);
+        STRACE("seq_regex_brief", tout << std::endl
+                                       << "USG(" << r->get_id() << ") ";);
+        // Save r as expr_ref so it's not deallocated
         m_state_trail.push_back(r);
         // Add state, live if nullable
-        unsigned r_id = get_state_id(r);
         bool r_nullable = m.is_true(is_nullable_wrapper(r));
         m_state_graph.add_state(r_id, r_nullable);
         // Add edges to all derivatives
         expr_ref_vector derivatives(m);
+        STRACE("seq_regex_brief", tout << std::endl << "  DEBUG: getting all derivs: " << r_id << " ";);
         get_all_derivatives(r, derivatives);
         for (auto const& dr: derivatives) {
             unsigned dr_id = get_state_id(dr);
-            bool dr_nullable = m.is_true(is_nullable_wrapper(dr));
+            STRACE("seq_regex_brief", tout << std::endl << "  DEBUG: traversing deriv: " << dr_id << " ";);
+            expr_ref dr_n = is_nullable_wrapper(dr);
+            STRACE("seq_regex_brief", tout << "1... ";);
+            bool dr_nullable = m.is_true(dr_n);
+            STRACE("seq_regex_brief", tout << "2... ";);
             m_state_graph.add_state(dr_id, dr_nullable);
             bool maybecycle = can_be_in_cycle(r, dr);
             m_state_graph.add_edge(r_id, dr_id, maybecycle);
         }
         m_state_graph.done_adding(r_id);
+        STRACE("seq_regex_brief", tout << std::endl;);
         return true;
     }
 
diff --git a/src/smt/seq_regex.h b/src/smt/seq_regex.h
index 84829f1c577..c48437329e6 100644
--- a/src/smt/seq_regex.h
+++ b/src/smt/seq_regex.h
@@ -133,6 +133,7 @@ namespace smt {
         void done_adding(state s);
         unsigned get_size();
 
+        bool is_seen(state s);
         bool is_live(state s);
         bool is_dead(state s);
 

From 1295529553b8e125e2ab299b51d476efbb7673d3 Mon Sep 17 00:00:00 2001
From: Caleb Stanford <caleb.pirsquared@gmail.com>
Date: Sun, 28 Jun 2020 11:18:54 -0400
Subject: [PATCH 23/51] trace statements for debugging is_nullable loop bug

---
 src/ast/rewriter/seq_rewriter.cpp | 24 +++++++++++++++++++-----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp
index 253fb0ca050..47ebc856e81 100644
--- a/src/ast/rewriter/seq_rewriter.cpp
+++ b/src/ast/rewriter/seq_rewriter.cpp
@@ -2182,17 +2182,20 @@ expr_ref seq_rewriter::re_predicate(expr* cond, sort* seq_sort) {
 }
 
 expr_ref seq_rewriter::is_nullable(expr* r) {
-    // STRACE("seq_regex_brief", tout << "n";);
+    STRACE("seq_verbose", tout << "is_nullable: "
+                               << mk_pp(r, m()) << std::endl;);
     expr_ref result(m_op_cache.find(_OP_RE_IS_NULLABLE, r, nullptr), m());
     if (!result) {
         result = is_nullable_rec(r);
-        m_op_cache.insert(_OP_RE_IS_NULLABLE, r, nullptr, result);        
+        m_op_cache.insert(_OP_RE_IS_NULLABLE, r, nullptr, result);
     }
+    STRACE("seq_verbose", tout << "is_nullable result: "
+                               << mk_pp(result, m()) << std::endl;);
     return result;
 }
 
 expr_ref seq_rewriter::is_nullable_rec(expr* r) {
-    // STRACE("seq_regex_brief", tout << ".";); // recursive call
+    STRACE("seq_regex_brief", tout << ".";); // recursive call
     SASSERT(m_util.is_re(r) || m_util.is_seq(r));
     expr* r1 = nullptr, *r2 = nullptr, *cond = nullptr;
     sort* seq_sort = nullptr;
@@ -2367,12 +2370,16 @@ br_status seq_rewriter::mk_re_derivative(expr* ele, expr* r, expr_ref& result) {
         Duplicate nested conditions are eliminated.
 */
 expr_ref seq_rewriter::mk_derivative(expr* ele, expr* r) {
+    STRACE("seq_verbose", tout << "derivative: " << mk_pp(ele, m())
+                               << "," << mk_pp(r, m()) << std::endl;);
     // STRACE("seq_regex_brief", tout << "d";);
     expr_ref result(m_op_cache.find(OP_RE_DERIVATIVE, ele, r), m());
     if (!result) {
         result = mk_derivative_rec(ele, r);
         m_op_cache.insert(OP_RE_DERIVATIVE, ele, r, result);
     }
+    STRACE("seq_verbose", tout << "derivative result: "
+                               << mk_pp(result, m()) << std::endl;);
     return result;
 }
 
@@ -2904,6 +2911,9 @@ Disabled rewrite:
 */
 br_status seq_rewriter::mk_str_in_regexp(expr* a, expr* b, expr_ref& result) {
 
+    STRACE("seq_verbose", tout << "mk_str_in_regexp: " << mk_pp(a, m())
+                               << ", " << mk_pp(b, m()) << std::endl;);
+
     if (re().is_empty(b)) {
         result = m().mk_false();
         return BR_DONE;
@@ -2919,10 +2929,14 @@ br_status seq_rewriter::mk_str_in_regexp(expr* a, expr* b, expr_ref& result) {
     }
     if (str().is_empty(a)) {
         result = is_nullable(b);
-        if (str().is_in_re(result))
+        if (str().is_in_re(result)) {
+            // STRACE("seq_regex_brief", tout << "mk_str_in_regexp: ...BR_DONE" << std::endl;);
             return BR_DONE;
-        else
+        }
+        else {
+            // STRACE("seq_regex_brief", tout << "mk_str_in_regexp: ...BR_REWRITE_FULL" << std::endl;);
             return BR_REWRITE_FULL;
+        }
     }
 
     expr_ref hd(m()), tl(m());

From 6d4008c3f5f55b65e4402d4552ca007f58e03103 Mon Sep 17 00:00:00 2001
From: Caleb Stanford <caleb.pirsquared@gmail.com>
Date: Sun, 28 Jun 2020 12:31:17 -0400
Subject: [PATCH 24/51] fix is_nullable loop bug

---
 src/ast/rewriter/seq_rewriter.cpp | 51 ++++++++++++++++++++++++++-----
 src/ast/rewriter/seq_rewriter.h   |  3 ++
 2 files changed, 47 insertions(+), 7 deletions(-)

diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp
index 47ebc856e81..6c4ccb8ef5b 100644
--- a/src/ast/rewriter/seq_rewriter.cpp
+++ b/src/ast/rewriter/seq_rewriter.cpp
@@ -2194,8 +2194,45 @@ expr_ref seq_rewriter::is_nullable(expr* r) {
     return result;
 }
 
+void seq_rewriter::mk_nullable_not(expr* a1, expr_ref& result) {
+    expr *s1 = nullptr, *r1 = nullptr;
+    if (str().is_in_re(a1, s1, r1)) {
+        SASSERT(str().is_empty(s1));
+        result = re().mk_complement(r1);
+        result = re().mk_in_re(s1, result);
+    }
+    else {
+        m_br.mk_not(a1, result);
+    }
+}
+void seq_rewriter::mk_nullable_and(expr* a1, expr* a2, expr_ref& result) {
+    expr *s1 = nullptr, *s2 = nullptr, *r1 = nullptr, *r2 = nullptr;
+    if (str().is_in_re(a1, s1, r1) &&
+        str().is_in_re(a2, s2, r2)) {
+        SASSERT(str().is_empty(s1));
+        SASSERT(str().is_empty(s2));
+        result = re().mk_inter(r1, r2);
+        result = re().mk_in_re(s1, result);
+    }
+    else {
+        m_br.mk_and(a1, a2, result);
+    }
+}
+void seq_rewriter::mk_nullable_or(expr* a1, expr* a2, expr_ref& result) {
+    expr *s1 = nullptr, *s2 = nullptr, *r1 = nullptr, *r2 = nullptr;
+    if (str().is_in_re(a1, s1, r1) &&
+        str().is_in_re(a2, s2, r2)) {
+        SASSERT(str().is_empty(s1));
+        SASSERT(str().is_empty(s2));
+        result = re().mk_union(r1, r2);
+        result = re().mk_in_re(s1, result);
+    }
+    else {
+        m_br.mk_or(a1, a2, result);
+    }
+}
 expr_ref seq_rewriter::is_nullable_rec(expr* r) {
-    STRACE("seq_regex_brief", tout << ".";); // recursive call
+    // STRACE("seq_regex_brief", tout << ".";); // recursive call
     SASSERT(m_util.is_re(r) || m_util.is_seq(r));
     expr* r1 = nullptr, *r2 = nullptr, *cond = nullptr;
     sort* seq_sort = nullptr;
@@ -2203,15 +2240,15 @@ expr_ref seq_rewriter::is_nullable_rec(expr* r) {
     zstring s1;
     expr_ref result(m());
     if (re().is_concat(r, r1, r2) ||
-        re().is_intersection(r, r1, r2)) { 
-        m_br.mk_and(is_nullable(r1), is_nullable(r2), result);
+        re().is_intersection(r, r1, r2)) {
+        mk_nullable_and(is_nullable(r1), is_nullable(r2), result);
     }
     else if (re().is_union(r, r1, r2)) {
-        m_br.mk_or(is_nullable(r1), is_nullable(r2), result);
+        mk_nullable_or(is_nullable(r1), is_nullable(r2), result);
     }
     else if (re().is_diff(r, r1, r2)) {
-        m_br.mk_not(is_nullable(r2), result);
-        m_br.mk_and(result, is_nullable(r1), result);
+        mk_nullable_not(is_nullable(r2), result);
+        mk_nullable_and(result, is_nullable(r1), result);
     }
     else if (re().is_star(r) || 
         re().is_opt(r) ||
@@ -2233,7 +2270,7 @@ expr_ref seq_rewriter::is_nullable_rec(expr* r) {
         result = is_nullable(r1);
     }
     else if (re().is_complement(r, r1)) {
-        m_br.mk_not(is_nullable(r1), result);
+        mk_nullable_not(is_nullable(r1), result);
     }
     else if (re().is_to_re(r, r1)) {        
         result = is_nullable(r1);
diff --git a/src/ast/rewriter/seq_rewriter.h b/src/ast/rewriter/seq_rewriter.h
index 1ac8d0157cd..c7c01eeacf0 100644
--- a/src/ast/rewriter/seq_rewriter.h
+++ b/src/ast/rewriter/seq_rewriter.h
@@ -191,6 +191,9 @@ class seq_rewriter {
 
     // Calculate derivative, memoized and enforcing a normal form
     expr_ref is_nullable_rec(expr* r);
+    void mk_nullable_not(expr* a1, expr_ref& result);
+    void mk_nullable_and(expr* a1, expr* a2, expr_ref& result);
+    void mk_nullable_or(expr* a1, expr* a2, expr_ref& result);
     expr_ref mk_derivative_rec(expr* ele, expr* r);
     expr_ref mk_der_op(decl_kind k, expr* a, expr* b);
     expr_ref mk_der_op_rec(decl_kind k, expr* a, expr* b);

From d2cfb2a61294e9eac9ab951a6d0b21ffc0b2d831 Mon Sep 17 00:00:00 2001
From: Caleb Stanford <caleb.pirsquared@gmail.com>
Date: Sun, 28 Jun 2020 15:03:51 -0400
Subject: [PATCH 25/51] comment out local nullable change and mark experimental

---
 src/ast/rewriter/seq_rewriter.cpp | 107 ++++++++++++++++--------------
 src/ast/rewriter/seq_rewriter.h   |   7 +-
 2 files changed, 63 insertions(+), 51 deletions(-)

diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp
index 4af19969ab0..45f6cf5489c 100644
--- a/src/ast/rewriter/seq_rewriter.cpp
+++ b/src/ast/rewriter/seq_rewriter.cpp
@@ -2192,43 +2192,44 @@ expr_ref seq_rewriter::is_nullable(expr* r) {
     return result;
 }
 
-void seq_rewriter::mk_nullable_not(expr* a1, expr_ref& result) {
-    expr *s1 = nullptr, *r1 = nullptr;
-    if (str().is_in_re(a1, s1, r1)) {
-        SASSERT(str().is_empty(s1));
-        result = re().mk_complement(r1);
-        result = re().mk_in_re(s1, result);
-    }
-    else {
-        m_br.mk_not(a1, result);
-    }
-}
-void seq_rewriter::mk_nullable_and(expr* a1, expr* a2, expr_ref& result) {
-    expr *s1 = nullptr, *s2 = nullptr, *r1 = nullptr, *r2 = nullptr;
-    if (str().is_in_re(a1, s1, r1) &&
-        str().is_in_re(a2, s2, r2)) {
-        SASSERT(str().is_empty(s1));
-        SASSERT(str().is_empty(s2));
-        result = re().mk_inter(r1, r2);
-        result = re().mk_in_re(s1, result);
-    }
-    else {
-        m_br.mk_and(a1, a2, result);
-    }
-}
-void seq_rewriter::mk_nullable_or(expr* a1, expr* a2, expr_ref& result) {
-    expr *s1 = nullptr, *s2 = nullptr, *r1 = nullptr, *r2 = nullptr;
-    if (str().is_in_re(a1, s1, r1) &&
-        str().is_in_re(a2, s2, r2)) {
-        SASSERT(str().is_empty(s1));
-        SASSERT(str().is_empty(s2));
-        result = re().mk_union(r1, r2);
-        result = re().mk_in_re(s1, result);
-    }
-    else {
-        m_br.mk_or(a1, a2, result);
-    }
-}
+// @EXP (experimental change)
+// void seq_rewriter::mk_nullable_not(expr* a1, expr_ref& result) {
+//     expr *s1 = nullptr, *r1 = nullptr;
+//     if (str().is_in_re(a1, s1, r1)) {
+//         SASSERT(str().is_empty(s1));
+//         result = re().mk_complement(r1);
+//         result = re().mk_in_re(s1, result);
+//     }
+//     else {
+//         m_br.mk_not(a1, result);
+//     }
+// }
+// void seq_rewriter::mk_nullable_and(expr* a1, expr* a2, expr_ref& result) {
+//     expr *s1 = nullptr, *s2 = nullptr, *r1 = nullptr, *r2 = nullptr;
+//     if (str().is_in_re(a1, s1, r1) &&
+//         str().is_in_re(a2, s2, r2)) {
+//         SASSERT(str().is_empty(s1));
+//         SASSERT(str().is_empty(s2));
+//         result = re().mk_inter(r1, r2);
+//         result = re().mk_in_re(s1, result);
+//     }
+//     else {
+//         m_br.mk_and(a1, a2, result);
+//     }
+// }
+// void seq_rewriter::mk_nullable_or(expr* a1, expr* a2, expr_ref& result) {
+//     expr *s1 = nullptr, *s2 = nullptr, *r1 = nullptr, *r2 = nullptr;
+//     if (str().is_in_re(a1, s1, r1) &&
+//         str().is_in_re(a2, s2, r2)) {
+//         SASSERT(str().is_empty(s1));
+//         SASSERT(str().is_empty(s2));
+//         result = re().mk_union(r1, r2);
+//         result = re().mk_in_re(s1, result);
+//     }
+//     else {
+//         m_br.mk_or(a1, a2, result);
+//     }
+// }
 expr_ref seq_rewriter::is_nullable_rec(expr* r) {
     // STRACE("seq_regex_brief", tout << ".";); // recursive call
     SASSERT(m_util.is_re(r) || m_util.is_seq(r));
@@ -2239,14 +2240,21 @@ expr_ref seq_rewriter::is_nullable_rec(expr* r) {
     expr_ref result(m());
     if (re().is_concat(r, r1, r2) ||
         re().is_intersection(r, r1, r2)) {
-        mk_nullable_and(is_nullable(r1), is_nullable(r2), result);
+        m_br.mk_and(is_nullable(r1), is_nullable(r2), result);
+        // @EXP (experimental change)
+        // mk_nullable_and(is_nullable(r1), is_nullable(r2), result);
     }
     else if (re().is_union(r, r1, r2)) {
-        mk_nullable_or(is_nullable(r1), is_nullable(r2), result);
+        m_br.mk_or(is_nullable(r1), is_nullable(r2), result);
+        // @EXP (experimental change)
+        // mk_nullable_or(is_nullable(r1), is_nullable(r2), result);
     }
     else if (re().is_diff(r, r1, r2)) {
-        mk_nullable_not(is_nullable(r2), result);
-        mk_nullable_and(result, is_nullable(r1), result);
+        m_br.mk_not(is_nullable(r2), result);
+        m_br.mk_and(result, is_nullable(r1), result);
+        // @EXP (experimental change)
+        // mk_nullable_not(is_nullable(r2), result);
+        // mk_nullable_and(result, is_nullable(r1), result);
     }
     else if (re().is_star(r) || 
         re().is_opt(r) ||
@@ -2268,7 +2276,9 @@ expr_ref seq_rewriter::is_nullable_rec(expr* r) {
         result = is_nullable(r1);
     }
     else if (re().is_complement(r, r1)) {
-        mk_nullable_not(is_nullable(r1), result);
+        m_br.mk_not(is_nullable(r1), result);
+        // @EXP (experimental change)
+        // mk_nullable_not(is_nullable(r1), result);
     }
     else if (re().is_to_re(r, r1)) {        
         result = is_nullable(r1);
@@ -2493,7 +2503,8 @@ bool seq_rewriter::pred_implies(expr* a, expr* b) {
     Apply a binary operation, preserving BDD normal form on derivative expressions.
 
     Preconditions:
-        - k is a binary op codes on REs: one of concat, intersection, or union
+        - k is a binary op code on REs: one of concat, intersection, or union
+          (not difference)
         - a and b are in BDD form
 
     Postcondition:
@@ -2664,7 +2675,7 @@ expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) {
         return mk_der_inter(mk_derivative(ele, r1), mk_der_compl(mk_derivative(ele, r2)));
     }
     else if (m().is_ite(r, p, r1, r2)) {
-        // Note: there is no BDD normalization here
+        // there is no BDD normalization here
         result = m().mk_ite(p, mk_derivative(ele, r1), mk_derivative(ele, r2));
         return result;
     }
@@ -2776,10 +2787,10 @@ expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) {
         result = array.mk_select(2, args);
         return re_predicate(result, seq_sort);
     }
-    // stuck cases: is_derivative, variable,
-    // str.to_re if it can't be simplified into a head character and tail
-    // and re().is_reverse if the reverse is not applied to a string thta
-    // can be coerced into a tail character and a head
+    // stuck cases: re.derivative, variable,
+    // str.to_re if the head of the string can't be obtained,
+    // and re.reverse if not applied to a string or if the tail char
+    // of the string can't be obtained
     return expr_ref(re().mk_derivative(ele, r), m());
 }
 
diff --git a/src/ast/rewriter/seq_rewriter.h b/src/ast/rewriter/seq_rewriter.h
index c7c01eeacf0..a82e7a6ba65 100644
--- a/src/ast/rewriter/seq_rewriter.h
+++ b/src/ast/rewriter/seq_rewriter.h
@@ -191,9 +191,10 @@ class seq_rewriter {
 
     // Calculate derivative, memoized and enforcing a normal form
     expr_ref is_nullable_rec(expr* r);
-    void mk_nullable_not(expr* a1, expr_ref& result);
-    void mk_nullable_and(expr* a1, expr* a2, expr_ref& result);
-    void mk_nullable_or(expr* a1, expr* a2, expr_ref& result);
+    // @EXP (experimental change)
+    // void mk_nullable_not(expr* a1, expr_ref& result);
+    // void mk_nullable_and(expr* a1, expr* a2, expr_ref& result);
+    // void mk_nullable_or(expr* a1, expr* a2, expr_ref& result);
     expr_ref mk_derivative_rec(expr* ele, expr* r);
     expr_ref mk_der_op(decl_kind k, expr* a, expr* b);
     expr_ref mk_der_op_rec(decl_kind k, expr* a, expr* b);

From 336d6c8444ffe66090b17e467565de756cef0168 Mon Sep 17 00:00:00 2001
From: Caleb Stanford <caleb.pirsquared@gmail.com>
Date: Sun, 28 Jun 2020 15:19:35 -0400
Subject: [PATCH 26/51] pretty printing for state_graph

---
 src/smt/seq_regex.cpp | 51 +++++++++++++++++++++++++++++++++++--------
 src/smt/seq_regex.h   | 18 +++++++++++----
 2 files changed, 56 insertions(+), 13 deletions(-)

diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp
index a78f64a6f89..6eda9612564 100644
--- a/src/smt/seq_regex.cpp
+++ b/src/smt/seq_regex.cpp
@@ -613,6 +613,7 @@ namespace smt {
             seq_rw().elim_condition(hd, cond);
             rewrite(cond);
             if (m.is_false(cond)) continue;
+            if (re().is_empty(p.second)) continue;
             results.push_back(p.second);
         }
     }
@@ -758,18 +759,18 @@ namespace smt {
     void state_graph::add_edge_core(state s1, state s2, bool maybecycle) {
         SASSERT(m_state_ufind.is_root(s1));
         SASSERT(m_state_ufind.is_root(s2));
-        STRACE("seq_regex_brief", tout << std::endl << "  DEBUG: add edge core " << s1 << "," << s2 << "," << maybecycle << " ";);
+        STRACE("seq_regex_debug", tout << std::endl << "  DEBUG: add edge core " << s1 << "," << s2 << "," << maybecycle << " ";);
         if (s1 == s2) return;
         if (!m_to.find(s1)->contains(s2)) {
             // add new edge
-            STRACE("seq_regex_brief", tout << std::endl << "  DEBUG: new edge! ";);
+            STRACE("seq_regex_debug", tout << std::endl << "  DEBUG: new edge! ";);
             m_to.find(s1)->insert(s2);
             m_from.find(s2)->insert(s1);
             if (maybecycle) m_from_maybecycle.find(s2)->insert(s1);
         }
         else if (!maybecycle && m_from_maybecycle.find(s2)->contains(s1)) {
             // update existing edge
-            STRACE("seq_regex_brief", tout << std::endl << "  DEBUG: update edge! ";);
+            STRACE("seq_regex_debug", tout << std::endl << "  DEBUG: update edge! ";);
             m_from_maybecycle.find(s2)->remove(s1);
         }
     }
@@ -854,9 +855,9 @@ namespace smt {
     void state_graph::mark_dead_recursive(state s) {
         SASSERT(!m_unvisited.contains(s));
         if (!m_unknown.contains(s)) return;
-        STRACE("seq_regex_brief", tout << std::endl << "  DEBUG: mark dead recursive: " << s << " ";);
+        STRACE("seq_regex_debug", tout << std::endl << "  DEBUG: mark dead recursive: " << s << " ";);
         for (auto s_to: *m_to.find(s)) {
-            STRACE("seq_regex_brief", tout << std::endl << "  DEBUG: m_to searching: " << s_to << " ";);
+            STRACE("seq_regex_debug", tout << std::endl << "  DEBUG: m_to searching: " << s_to << " ";);
             // unknown pointing to live should have been marked as live
             SASSERT(!m_live.contains(s_to));
             if (m_unknown.contains(s_to) || m_unvisited.contains(s_to)) return;
@@ -929,6 +930,39 @@ namespace smt {
         return m_dead.contains(m_state_ufind.find(s));
     }
 
+    // void pretty_print_set(std::ofstream& of, state_set& s_set) {
+    //     for (auto s: s_set) {
+    //         of << " " << s;
+    //     }
+    //     of << std::endl;
+    // }
+    void state_graph::pretty_print(std::ofstream& of) {
+        of << "---------- State Graph ----------" << std::endl;
+        of << "Seen:";
+        for (auto s: m_seen) {
+            of << " " << s;
+            state s_root = m_state_ufind.find(s);
+            if (s_root != s)
+                of << "(=" << s_root << ")";
+        }
+        of << std::endl;
+
+        of << "Live:" << m_live << std::endl;
+        of << "Dead:" << m_dead << std::endl;
+        of << "Unknown:" << m_unknown << std::endl;
+        of << "Unvisited:" << m_unvisited << std::endl;
+
+        of << "Edges:" << std::endl;
+        for (auto s1: m_seen) {
+            if (m_state_ufind.is_root(s1)) {
+                of << "  " << s1 << " -> " << *m_to.find(s1) << std::endl;
+            }
+        }
+
+        of << "---------------------------------" << std::endl;
+
+    }
+
     // **********************************
 
     unsigned seq_regex::get_state_id(expr* e) {
@@ -961,21 +995,20 @@ namespace smt {
         m_state_graph.add_state(r_id, r_nullable);
         // Add edges to all derivatives
         expr_ref_vector derivatives(m);
-        STRACE("seq_regex_brief", tout << std::endl << "  DEBUG: getting all derivs: " << r_id << " ";);
+        STRACE("seq_regex_debug", tout << std::endl << "  DEBUG: getting all derivs: " << r_id << " ";);
         get_all_derivatives(r, derivatives);
         for (auto const& dr: derivatives) {
             unsigned dr_id = get_state_id(dr);
-            STRACE("seq_regex_brief", tout << std::endl << "  DEBUG: traversing deriv: " << dr_id << " ";);
+            STRACE("seq_regex_debug", tout << std::endl << "  DEBUG: traversing deriv: " << dr_id << " ";);
             expr_ref dr_n = is_nullable_wrapper(dr);
-            STRACE("seq_regex_brief", tout << "1... ";);
             bool dr_nullable = m.is_true(dr_n);
-            STRACE("seq_regex_brief", tout << "2... ";);
             m_state_graph.add_state(dr_id, dr_nullable);
             bool maybecycle = can_be_in_cycle(r, dr);
             m_state_graph.add_edge(r_id, dr_id, maybecycle);
         }
         m_state_graph.done_adding(r_id);
         STRACE("seq_regex_brief", tout << std::endl;);
+        STRACE("seq_regex_brief", m_state_graph.pretty_print(tout););
         return true;
     }
 
diff --git a/src/smt/seq_regex.h b/src/smt/seq_regex.h
index c48437329e6..3911e228b50 100644
--- a/src/smt/seq_regex.h
+++ b/src/smt/seq_regex.h
@@ -118,7 +118,16 @@ namespace smt {
         void mark_dead_recursive(state s);
         state merge_all_cycles(state s1, state_set& s_to);
 
+        /*
+            Pretty printing support
+        */
+        // void pretty_print_set(std::ofstream& of, state_set& s_set);
+
     public:
+        state_graph():
+            m_live(), m_dead(), m_unknown(), m_unvisited(), m_seen(),
+            m_state_ufind(), m_from(), m_to(), m_from_maybecycle() {}
+
         /*
             Exposed methods:
             - adding a state and all its transitions
@@ -137,10 +146,11 @@ namespace smt {
         bool is_live(state s);
         bool is_dead(state s);
 
-        state_graph():
-            m_live(), m_dead(), m_unknown(), m_unvisited(), m_seen(),
-            m_state_ufind(), m_from(), m_to(), m_from_maybecycle()
-            {}
+        /*
+            Pretty printing
+        */
+        void pretty_print(std::ofstream& of);
+
     };
 
     class seq_regex {

From 005432650e37246233c116485478e61fe8b85856 Mon Sep 17 00:00:00 2001
From: Caleb Stanford <caleb.pirsquared@gmail.com>
Date: Mon, 29 Jun 2020 18:33:07 -0400
Subject: [PATCH 27/51] rewrite state graph to remove the fragile assumption
 that all edges from a state are added at a time

---
 src/smt/seq_regex.cpp | 219 +++++++++++++++++++++---------------------
 src/smt/seq_regex.h   |  92 +++++++++---------
 2 files changed, 156 insertions(+), 155 deletions(-)

diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp
index 6eda9612564..bf034f6764c 100644
--- a/src/smt/seq_regex.cpp
+++ b/src/smt/seq_regex.cpp
@@ -608,12 +608,20 @@ namespace smt {
         // Use get_cofactors method and filter out unsatisfiable conds
         expr_ref_pair_vector cofactors(m);
         get_cofactors(d, cofactors);
+        STRACE("seq_regex_debug", tout << "getting all derivatives of: " << mk_pp(r, m) << std::endl;);
         for (auto const& p : cofactors) {
+            STRACE("seq_regex_debug", tout << "visiting cofactor: cond: " << mk_pp(p.first, m) << ", deriv: " << mk_pp(p.second, m) << std::endl;);
             expr_ref cond(p.first, m);
+            STRACE("seq_regex_debug", tout << "head: " << mk_pp(hd, m) << std::endl;);
+            STRACE("seq_regex_debug", tout << "cond: " << mk_pp(cond, m) << std::endl;);
             seq_rw().elim_condition(hd, cond);
+            STRACE("seq_regex_debug", tout << "cond: " << mk_pp(cond, m) << std::endl;);
             rewrite(cond);
+            STRACE("seq_regex_debug", tout << "cond: " << mk_pp(cond, m) << std::endl;);
             if (m.is_false(cond)) continue;
+            STRACE("seq_regex_debug", tout << "cofactor labeled true!" << std::endl;);
             if (re().is_empty(p.second)) continue;
+            STRACE("seq_regex_debug", tout << "added derivative!" << std::endl;);
             results.push_back(p.second);
         }
     }
@@ -680,86 +688,68 @@ namespace smt {
      ****************************************************/
 
     void state_graph::add_state_core(state s) {
+        STRACE("seq_regex_brief", tout << "add(" << s << ") ";);
         SASSERT(!m_seen.contains(s));
-        // Ensure corresponding var in connected components
+        // Ensure corresponding var in union find structure
         while (s >= m_state_ufind.get_num_vars()) {
             m_state_ufind.mk_var();
         }
         // Initialize as unvisited
         m_seen.insert(s);
-        m_unvisited.insert(s);
+        m_unexplored.insert(s);
         m_to.insert(s, new state_set());
         m_from.insert(s, new state_set());
         m_from_maybecycle.insert(s, new state_set());
     }
-    void state_graph::remove_state(state s) {
+    void state_graph::remove_state_core(state s) {
         // This is a partial deletion -- the state is still seen and can't be
-        // added again later
+        // added again later.
+        // The state should be unknown, and all edges to or from the state
+        // should already have been renamed.
+        STRACE("seq_regex_brief", tout << "del(" << s << ") ";);
         SASSERT(m_seen.contains(s));
         SASSERT(!m_state_ufind.is_root(s));
+        SASSERT(m_unknown.contains(s));
         m_to.erase(s);
         m_from.erase(s);
         m_from_maybecycle.erase(s);
-        if (m_unvisited.contains(s)) {
-            UNREACHABLE(); // for testing TODO: remove
-            m_unvisited.remove(s);
-        }
-        else if (m_unknown.contains(s)) {
-            m_unknown.remove(s);
-        }
-        else if (m_dead.contains(s)) {
-            UNREACHABLE(); // for testing TODO: remove
-            m_unknown.remove(s);
-        }
-        else if (m_live.contains(s)) {
-            UNREACHABLE(); // for testing TODO: remove
-            m_live.remove(s);
-        }
-        else {
-            UNREACHABLE();
-        }
+        m_unknown.remove(s);
     }
 
-    void state_graph::mark_unknown(state s) {
-        SASSERT(m_state_ufind.is_root(s));
-        SASSERT(m_unvisited.contains(s));
+    void state_graph::mark_unknown_core(state s) {
         STRACE("seq_regex_brief", tout << "unk(" << s << ") ";);
-        m_unvisited.remove(s);
+        SASSERT(m_state_ufind.is_root(s));
+        SASSERT(m_unexplored.contains(s));
+        m_unexplored.remove(s);
         m_unknown.insert(s);
     }
-    void state_graph::mark_live(state s) {
+    void state_graph::mark_live_core(state s) {
+        STRACE("seq_regex_brief", tout << "live(" << s << ") ";);
         SASSERT(m_state_ufind.is_root(s));
         SASSERT(m_unknown.contains(s));
-        STRACE("seq_regex_brief", tout << "live(" << s << ") ";);
         m_unknown.remove(s);
         m_live.insert(s);
     }
-    void state_graph::mark_dead(state s) {
+    void state_graph::mark_dead_core(state s) {
+        STRACE("seq_regex_brief", tout << "dead(" << s << ") ";);
         SASSERT(m_state_ufind.is_root(s));
         SASSERT(m_unknown.contains(s));
-        STRACE("seq_regex_brief", tout << "dead(" << s << ") ";);
         m_unknown.remove(s);
         m_dead.insert(s);
     }
 
-    // bool state_graph::is_resolved(state s) {
-    //     SASSERT(m_state_ufind.is_root(s));
-    //     return (m_live.contains(s) || m_dead.contains(s));
-    // }
-    // bool state_graph::is_unresolved(state s) {
-    //     SASSERT(m_state_ufind.is_root(s));
-    //     return (m_unknown.contains(s) || m_unvisited.contains(s));
-    // }
-
     /*
-        Add edge to the graph
-        May already exist, in which case a nocycle edge overrides
-        a cycle edge.
+        Add edge to the graph.
+        - If the annotation 'maybecycle' is false, then the user is sure
+          that this edge will never be part of a cycle.
+        - May already exist, in which case maybecycle = false overrides
+          maybecycle = true.
     */
     void state_graph::add_edge_core(state s1, state s2, bool maybecycle) {
+        STRACE("seq_regex_brief", tout << "add(" << s1 << "," << s2 << ","
+                                       << (maybecycle ? "y" : "n") << ") ";);
         SASSERT(m_state_ufind.is_root(s1));
         SASSERT(m_state_ufind.is_root(s2));
-        STRACE("seq_regex_debug", tout << std::endl << "  DEBUG: add edge core " << s1 << "," << s2 << "," << maybecycle << " ";);
         if (s1 == s2) return;
         if (!m_to.find(s1)->contains(s2)) {
             // add new edge
@@ -774,21 +764,19 @@ namespace smt {
             m_from_maybecycle.find(s2)->remove(s1);
         }
     }
-    void state_graph::remove_edge(state s1, state s2) {
+    void state_graph::remove_edge_core(state s1, state s2) {
         SASSERT(m_to.find(s1)->contains(s2));
         SASSERT(m_from.find(s2)->contains(s1));
         m_to.find(s1)->remove(s2);
         m_from.find(s2)->remove(s1);
-        if (m_from_maybecycle.find(s2)->contains(s1)) {
-            m_from_maybecycle.find(s2)->remove(s1);
-        }
+        m_from_maybecycle.find(s2)->remove(s1);
     }
-    void state_graph::rename_edge(state old1, state old2,
-                                             state new1, state new2) {
+    void state_graph::rename_edge_core(state old1, state old2,
+                                       state new1, state new2) {
         SASSERT(m_to.find(old1)->contains(old2));
         SASSERT(m_from.find(old2)->contains(old1));
         bool maybecycle = m_from_maybecycle.find(old2)->contains(old1);
-        remove_edge(old1, old2);
+        remove_edge_core(old1, old2);
         add_edge_core(new1, new2, maybecycle);
     }
 
@@ -798,8 +786,7 @@ namespace smt {
 
         Preconditions: the set should be nonempty, and every state
         in the set should be unknown (in particular, *not* unvisited).
-        Also, each state should
-        be current (not a previous SCC that was later merged into another).
+        Also, each state should currently exist
     */
     auto state_graph::merge_states(state s1, state s2) -> state {
         SASSERT(m_state_ufind.is_root(s1));
@@ -808,15 +795,15 @@ namespace smt {
         SASSERT(m_unknown.contains(s2));
         STRACE("seq_regex_brief", tout << "merge(" << s1 << "," << s2 << ") ";);
         m_state_ufind.merge(s1, s2);
-        if (m_state_ufind.is_root(s1)) std::swap(s1, s2);
-        // merge edges
+        if (m_state_ufind.is_root(s2)) std::swap(s1, s2);
+        // rename s2 to s1 in edges
         for (auto s_to: *m_to.find(s2)) {
-            rename_edge(s2, s_to, s1, s_to);
+            rename_edge_core(s2, s_to, s1, s_to);
         }
         for (auto s_from: *m_from.find(s2)) {
-            rename_edge(s_from, s2, s_from, s1);
+            rename_edge_core(s_from, s2, s_from, s1);
         }
-        remove_state(s2);
+        remove_state_core(s2);
         return s1;
     }
     auto state_graph::merge_states(state_set& s_set) -> state {
@@ -827,9 +814,9 @@ namespace smt {
             if (first_iter) {
                 prev_s = s;
                 first_iter = false;
-            } else {
-                prev_s = merge_states(prev_s, s);
+                continue;
             }
+            prev_s = merge_states(prev_s, s);
         }
         return prev_s;
     }
@@ -840,8 +827,10 @@ namespace smt {
     */
     void state_graph::mark_live_recursive(state s) {
         SASSERT(m_live.contains(s) || m_unknown.contains(s));
+        STRACE("seq_regex_debug", tout
+            << std::endl << "  DEBUG: mark live recursive: " << s << " ";);
         if (m_live.contains(s)) return;
-        mark_live(s);
+        mark_live_core(s);
         for (auto s_from: *m_from.find(s)) {
             mark_live_recursive(s_from);
         }
@@ -853,64 +842,70 @@ namespace smt {
         Precondition: s is live, dead, or unknown
     */
     void state_graph::mark_dead_recursive(state s) {
-        SASSERT(!m_unvisited.contains(s));
+        SASSERT(m_live.contains(s) || m_dead.contains(s) ||
+                m_unknown.contains(s));
+        STRACE("seq_regex_debug", tout
+            << std::endl << "  DEBUG: mark dead recursive: " << s << " ";);
         if (!m_unknown.contains(s)) return;
-        STRACE("seq_regex_debug", tout << std::endl << "  DEBUG: mark dead recursive: " << s << " ";);
         for (auto s_to: *m_to.find(s)) {
-            STRACE("seq_regex_debug", tout << std::endl << "  DEBUG: m_to searching: " << s_to << " ";);
-            // unknown pointing to live should have been marked as live
+            // unknown pointing to live should have been marked as live!
             SASSERT(!m_live.contains(s_to));
-            if (m_unknown.contains(s_to) || m_unvisited.contains(s_to)) return;
+            if (m_unknown.contains(s_to) || m_unexplored.contains(s_to)) return;
         }
         // all states from s are dead
-        mark_dead(s);
+        mark_dead_core(s);
         for (auto s_from: *m_from.find(s)) {
             mark_dead_recursive(s_from);
         }
     }
 
     /*
-        if new edges from s1 to s_to will create at least one cycle,
-        merge all states in the new SCC
+        Merge all cycles of unknown states containing s1 into one state.
+        Return the new state
+        Precondition: s1 is unknown.
     */
-    auto state_graph::merge_all_cycles(state s1, state_set& s_to) -> state {
+    auto state_graph::merge_all_cycles(state s) -> state {
+        SASSERT(m_unknown.contains(s));
         // Mark s_to, then search backwards from s to mark the SCC
         // TODO: Implement full check
         // Simple placeholder for now: check if there is an edge both ways
-        for (auto s2: s_to) {
-            if (m_to.find(s2)->contains(s1)) {
-                s1 = merge_states(s1, s2);
-            }
+        state_set s_to_set = *m_to.find(s); // makes a copy. Reference could
+                                            // lead to a bug
+        for (auto s_to: s_to_set) {
+            if (m_to.find(s_to)->contains(s))
+                s = merge_states(s, s_to);
         }
-        return s1;
+        return s;
     }
 
-    void state_graph::add_state(state s, bool live) {
+    /*
+        Exposed methods
+    */
+
+    void state_graph::add_state(state s) {
         if (m_seen.contains(s)) return;
-        STRACE("seq_regex_brief", tout << "add(" << s << "," << live << ") ";);
         add_state_core(s);
-        if (live) {
-            mark_unknown(s);
-            mark_live_recursive(s);
-        }
+    }
+    void state_graph::mark_live(state s) {
+        SASSERT(m_unexplored.contains(s) || m_live.contains(s));
+        SASSERT(m_state_ufind.is_root(s));
+        if (m_unexplored.contains(s)) mark_unknown_core(s);
+        mark_live_recursive(s);
     }
     void state_graph::add_edge(state s1, state s2, bool maybecycle) {
-        SASSERT(m_seen.contains(s1));
+        SASSERT(m_unexplored.contains(s1) || m_live.contains(s1));
+        SASSERT(m_state_ufind.is_root(s1));
         SASSERT(m_seen.contains(s2));
-        STRACE("seq_regex_brief", tout << "add(" << s1 << "," << s2
-                                       << "," << maybecycle << ") ";);
-        s1 = m_state_ufind.find(s1);
         s2 = m_state_ufind.find(s2);
         add_edge_core(s1, s2, maybecycle);
-        if (m_live.contains(s2)) {
-            if (m_unvisited.contains(s1)) mark_unknown(s1);
-            mark_live_recursive(s1);
-        }
+        if (m_live.contains(s2)) mark_live(s1);
     }
-    void state_graph::done_adding(state s) {
-        s = m_state_ufind.find(s);
-        if (m_unvisited.contains(s)) mark_unknown(s);
-        s = merge_all_cycles(s, *m_to.find(s));
+    void state_graph::mark_done(state s) {
+        SASSERT(m_unexplored.contains(s) || m_live.contains(s));
+        SASSERT(m_state_ufind.is_root(s));
+        if (m_live.contains(s)) return;
+        if (m_unexplored.contains(s)) mark_unknown_core(s);
+        s = merge_all_cycles(s);
         // check if dead
         mark_dead_recursive(s);
         STRACE("seq_regex_brief", tout << "done(" << s << ") ";);
@@ -929,13 +924,11 @@ namespace smt {
     bool state_graph::is_dead(state s) {
         return m_dead.contains(m_state_ufind.find(s));
     }
+    bool state_graph::is_done(state s) {
+        return (m_seen.contains(s) &&
+                !m_unexplored.contains(m_state_ufind.find(s)));
+    }
 
-    // void pretty_print_set(std::ofstream& of, state_set& s_set) {
-    //     for (auto s: s_set) {
-    //         of << " " << s;
-    //     }
-    //     of << std::endl;
-    // }
     void state_graph::pretty_print(std::ofstream& of) {
         of << "---------- State Graph ----------" << std::endl;
         of << "Seen:";
@@ -950,7 +943,7 @@ namespace smt {
         of << "Live:" << m_live << std::endl;
         of << "Dead:" << m_dead << std::endl;
         of << "Unknown:" << m_unknown << std::endl;
-        of << "Unvisited:" << m_unvisited << std::endl;
+        of << "Unexplored:" << m_unexplored << std::endl;
 
         of << "Edges:" << std::endl;
         for (auto s1: m_seen) {
@@ -978,7 +971,7 @@ namespace smt {
     */
     bool seq_regex::update_state_graph(expr* r) {
         unsigned r_id = get_state_id(r);
-        if (m_state_graph.is_seen(r_id)) return false;
+        if (m_state_graph.is_done(r_id)) return false;
         if (m_state_graph.get_size() >= m_max_state_graph_size) {
             STRACE("seq_regex", tout << "Warning: ignored state graph update -- max size of seen states reached!" << std::endl;);
             STRACE("seq_regex_brief", tout << "(MAX SIZE REACHED) ";);
@@ -986,27 +979,31 @@ namespace smt {
         }
         STRACE("seq_regex", tout << "Updating state graph for regex "
                                  << mk_pp(r, m) << ") ";);
-        STRACE("seq_regex_brief", tout << std::endl
-                                       << "USG(" << r->get_id() << ") ";);
+        STRACE("seq_regex_brief", tout
+            << std::endl << "USG(" << r->get_id() << ") ";);
         // Save r as expr_ref so it's not deallocated
         m_state_trail.push_back(r);
-        // Add state, live if nullable
-        bool r_nullable = m.is_true(is_nullable_wrapper(r));
-        m_state_graph.add_state(r_id, r_nullable);
+        // Add state
+        m_state_graph.add_state(r_id);
+        expr_ref r_nullable = is_nullable_wrapper(r);
+        if (m.is_true(r_nullable)) {
+            m_state_graph.mark_live(r_id);
+            return true;
+        }
         // Add edges to all derivatives
         expr_ref_vector derivatives(m);
-        STRACE("seq_regex_debug", tout << std::endl << "  DEBUG: getting all derivs: " << r_id << " ";);
+        STRACE("seq_regex_debug", tout
+            << std::endl << "  DEBUG: getting all derivs: " << r_id << " ";);
         get_all_derivatives(r, derivatives);
         for (auto const& dr: derivatives) {
             unsigned dr_id = get_state_id(dr);
-            STRACE("seq_regex_debug", tout << std::endl << "  DEBUG: traversing deriv: " << dr_id << " ";);
-            expr_ref dr_n = is_nullable_wrapper(dr);
-            bool dr_nullable = m.is_true(dr_n);
-            m_state_graph.add_state(dr_id, dr_nullable);
+            STRACE("seq_regex_debug", tout
+                << std::endl << "  DEBUG: traversing deriv: " << dr_id << " ";);
+            m_state_graph.add_state(dr_id);
             bool maybecycle = can_be_in_cycle(r, dr);
             m_state_graph.add_edge(r_id, dr_id, maybecycle);
         }
-        m_state_graph.done_adding(r_id);
+        m_state_graph.mark_done(r_id);
         STRACE("seq_regex_brief", tout << std::endl;);
         STRACE("seq_regex_brief", m_state_graph.pretty_print(tout););
         return true;
diff --git a/src/smt/seq_regex.h b/src/smt/seq_regex.h
index 3911e228b50..8f7252d9163 100644
--- a/src/smt/seq_regex.h
+++ b/src/smt/seq_regex.h
@@ -39,8 +39,10 @@ namespace smt {
 
         "States" are integers. States and edges are added to the data
         structure incrementally.
-        - Some states are initially labeled as live. The data structure
-          tracks which other states are live (can reach a live state), dead
+        - States can be marked as live
+          or as done -- to indicate that no more edges will be added and the
+          state will not be marked as live. The data structure then tracks
+          which other states are live (can reach a live state), dead
           (can't reach a live state), or neither.
         - Some edges are labeled as not contained in a cycle. This is to
           optimize search if it is known by the user of the structure
@@ -49,9 +51,6 @@ namespace smt {
         Internally, we use union_find to identify states within an SCC,
         and incrementally update SCCs, while propagating backwards
         live and dead SCCs.
-
-        Class invariants:
-            - TODO
     */
     class state_graph {
         typedef unsigned              state;
@@ -61,90 +60,95 @@ namespace smt {
 
     private:
         /*
-            All states are exactly one of:
-            - live:       known to be nonempty
-            - dead:       known to be empty
-            - unknown:    all outgoing transitions have been
-                          added, but the state is not known
-                          to be live or dead
-            - unvisited:  outgoing transitions have not been added
+            All states are internally exactly one of:
+            - live:       known to reach a live state
+            - dead:       known to never reach a live state
+            - unknown:    all outgoing edges have been added, but the
+                          state is not known to be live or dead
+            - unexplored: not all outgoing edges have been added
 
             As SCCs are merged, some states become aliases, and a
             union find data structure collapses a now obsolete
             state to its current representative. m_seen keeps track
             of states we have seen, including obsolete states.
+
+            Invariants:
+            - TODO
         */
         state_set   m_live;
         state_set   m_dead;
         state_set   m_unknown;
-        state_set   m_unvisited;
+        state_set   m_unexplored;
 
         state_set     m_seen;
         state_ufind   m_state_ufind;
 
-        void add_state_core(state s); // unvisited + seen
-        void remove_state(state s);   // * -> m_seen only
-
-        void mark_unknown(state s); // unvisited -> unknown
-        void mark_live(state s);    // unknown -> live
-        void mark_dead(state s);    // unknown -> dead
-
-        // bool is_resolved(state s);   // live or dead
-        // bool is_unresolved(state s); // unknown or unvisited
-
         /*
             Edges are saved in both from and to maps.
             A subset of edges are also marked as possibly being
             part of a cycle by being stored in m_from_maybecycle.
+            
+            Invariants:
+            - TODO
         */
         edge_rel   m_from;
         edge_rel   m_to;
         edge_rel   m_from_maybecycle;
 
+        /*
+            'Core' functions that modify the plain graph, without
+            updating SCCs or propagating live/dead state information.
+            These are for internal use only.
+        */
+        void add_state_core(state s);    // unexplored + seen
+        void remove_state_core(state s); // unknown + seen -> seen
+        void mark_unknown_core(state s); // unexplored -> unknown
+        void mark_live_core(state s);    // unknown -> live
+        void mark_dead_core(state s);    // unknown -> dead
+
         void add_edge_core(state s1, state s2, bool maybecycle);
-        void remove_edge(state s1, state s2);
-        void rename_edge(state old1, state old2, state new1, state new2);
+        void remove_edge_core(state s1, state s2);
+        void rename_edge_core(state old1, state old2, state new1, state new2);
 
         state merge_states(state s1, state s2);
         state merge_states(state_set& s_set);
 
         /*
-            Core algorithmic search routines
+            Algorithmic search routines
             - live state propagation
             - dead state propagation
-            - cycle detection
+            - cycle / strongly-connected component detection
         */
         void mark_live_recursive(state s);
         void mark_dead_recursive(state s);
-        state merge_all_cycles(state s1, state_set& s_to);
-
-        /*
-            Pretty printing support
-        */
-        // void pretty_print_set(std::ofstream& of, state_set& s_set);
+        state merge_all_cycles(state s);
 
     public:
         state_graph():
-            m_live(), m_dead(), m_unknown(), m_unvisited(), m_seen(),
+            m_live(), m_dead(), m_unknown(), m_unexplored(), m_seen(),
             m_state_ufind(), m_from(), m_to(), m_from_maybecycle() {}
 
         /*
-            Exposed methods:
-            - adding a state and all its transitions
-            - checking if a state is known to be live or dead
-
-            ASSUMPTION: transitions from a state are added in order and after
-            all transitions are added, the state is marked as
-            finished. Also all states are added before the transitions.
+            Exposed methods
+
+            These methods may be called in any order, as long as:
+            - states are added before edges are added between them
+            - edges are not added from a done state
+            - a done state is not marked as live
+            - edges are not added creating a cycle containing an edge with
+              maybecycle = false
         */
-        void add_state(state s, bool live);
+        void add_state(state s);
         void add_edge(state s1, state s2, bool maybecycle);
-        void done_adding(state s);
-        unsigned get_size();
+        void mark_live(state s);
+        void mark_done(state s);
 
         bool is_seen(state s);
         bool is_live(state s);
         bool is_dead(state s);
+        bool is_done(state s);
+
+        unsigned get_size();
 
         /*
             Pretty printing

From d4bdf5937752ce7b6a9c5a2550ceab2cb2abec33 Mon Sep 17 00:00:00 2001
From: Caleb Stanford <caleb.pirsquared@gmail.com>
Date: Mon, 29 Jun 2020 21:58:35 -0400
Subject: [PATCH 28/51] start of general cycle detection check + fix some
 comments

---
 src/smt/seq_regex.cpp | 13 +++++++++++--
 src/smt/seq_regex.h   |  7 ++++---
 2 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp
index bf034f6764c..fe7f868e27e 100644
--- a/src/smt/seq_regex.cpp
+++ b/src/smt/seq_regex.cpp
@@ -860,13 +860,22 @@ namespace smt {
     }
 
     /*
-        Merge all cycles of unknown states containing s1 into one state.
+        Merge all cycles of unknown states containing s into one state.
         Return the new state
-        Precondition: s1 is unknown.
+        Precondition: s is unknown.
     */
     auto state_graph::merge_all_cycles(state s) -> state {
         SASSERT(m_unknown.contains(s));
         // Mark s_to, then search backwards from s to mark the SCC
+        // state_set visited = *(new state_set());
+        // state_set marked = *(new state_set());
+        // visited.insert(s);
+        // auto to_search = *(new vector<pair<state, state>>())
+        // to_search.push_back(s, s)
+        // while (to_search.size() > 0) {
+        //     auto p = to_search.pop_back();
+        // }
+
         // TODO: Implement full check
         // Simple placeholder for now: check if there is an edge both ways
         state_set s_to_set = *m_to.find(s); // makes a copy. Reference could
diff --git a/src/smt/seq_regex.h b/src/smt/seq_regex.h
index 8f7252d9163..6a745b0853f 100644
--- a/src/smt/seq_regex.h
+++ b/src/smt/seq_regex.h
@@ -40,8 +40,9 @@ namespace smt {
         "States" are integers. States and edges are added to the data
         structure incrementally.
         - States can be marked as live
-          or as done -- to indicate that no more edges will be added and the
-          state will not be marked as live. The data structure then tracks
+          or as done -- to indicate that no more outgoing edges will be
+          added and the state will not be marked as live. The data
+          structure then tracks
           which other states are live (can reach a live state), dead
           (can't reach a live state), or neither.
         - Some edges are labeled as not contained in a cycle. This is to
@@ -133,7 +134,7 @@ namespace smt {
 
             These methods may be called in any order, as long as:
             - states are added before edges are added between them
-            - edges are not added from a done state
+            - outgoing edges are not added from a done state
             - a done state is not marked as live
             - edges are not added creating a cycle containing an edge with
               maybecycle = false

From 7f922e1df52fe721709b678dff8ffe8acb45ca5e Mon Sep 17 00:00:00 2001
From: Caleb Stanford <caleb.pirsquared@gmail.com>
Date: Tue, 30 Jun 2020 12:44:46 -0400
Subject: [PATCH 29/51] implement full cycle detection procedure

---
 src/smt/seq_regex.cpp | 102 +++++++++++++++++++++++++++---------------
 1 file changed, 66 insertions(+), 36 deletions(-)

diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp
index fe7f868e27e..59147fd8ade 100644
--- a/src/smt/seq_regex.cpp
+++ b/src/smt/seq_regex.cpp
@@ -784,9 +784,12 @@ namespace smt {
         Merge two states or more generally a set of states into one,
         returning the new state. Also merges associated edges.
 
-        Preconditions: the set should be nonempty, and every state
-        in the set should be unknown (in particular, *not* unvisited).
-        Also, each state should currently exist
+        Preconditions:
+        - The set should be nonempty
+        - Every state in the set should be unknown
+        - Each state should currently exist
+        - If passing a set of states by reference, it should not be a set
+          from the edge relations, as merging states modifies edge relations.
     */
     auto state_graph::merge_states(state s1, state s2) -> state {
         SASSERT(m_state_ufind.is_root(s1));
@@ -822,7 +825,7 @@ namespace smt {
     }
 
     /*
-        if s is not live, mark it, and recurse on all states into s
+        If s is not live, mark it, and recurse on all states into s
         Precondition: s is live or unknown
     */
     void state_graph::mark_live_recursive(state s) {
@@ -837,7 +840,7 @@ namespace smt {
     }
 
     /*
-        check if s is now known to be dead. If so, mark and recurse
+        Check if s is now known to be dead. If so, mark and recurse
         on all states into s.
         Precondition: s is live, dead, or unknown
     */
@@ -866,25 +869,51 @@ namespace smt {
     */
     auto state_graph::merge_all_cycles(state s) -> state {
         SASSERT(m_unknown.contains(s));
-        // Mark s_to, then search backwards from s to mark the SCC
-        // state_set visited = *(new state_set());
-        // state_set marked = *(new state_set());
-        // visited.insert(s);
-        // auto to_search = *(new vector<pair<state, state>>())
-        // to_search.push_back(s, s)
-        // while (to_search.size() > 0) {
-        //     auto p = to_search.pop_back();
-        // }
-
-        // TODO: Implement full check
-        // Simple placeholder for now: check if there is an edge both ways
-        state_set s_to_set = *m_to.find(s); // makes a copy. Reference could
-                                            // lead to a bug
-        for (auto s_to: s_to_set) {
-            if (m_to.find(s_to)->contains(s))
-                s = merge_states(s, s_to);
+        // Visit states in a DFS backwards from s
+        state_set visited;  // all backwards edges pushed
+        state_set resolved; // known in SCC or not
+        state_set scc;      // known in SCC
+        resolved.insert(s);
+        scc.insert(s);
+        vector<state> to_search;
+        to_search.push_back(s);
+        while (to_search.size() > 0) {
+            state x = to_search.back();
+            if (!visited.contains(x)) {
+                visited.insert(x);
+                // recurse backwards only on maybecycle edges
+                // and only on unknown states
+                for (auto y: *m_from_maybecycle.find(x)) {
+                    if (m_unknown.contains(y))
+                        to_search.push_back(y);
+                }
+            }
+            else if (!resolved.contains(x)) {
+                resolved.insert(x);
+                to_search.pop_back();
+                // determine in SCC or not
+                for (auto y: *m_from_maybecycle.find(x)) {
+                    if (scc.contains(y)) {
+                        scc.insert(x);
+                        break;
+                    }
+                }
+            }
+            else {
+                to_search.pop_back();
+            }
         }
-        return s;
+        // scc is the union of all cycles containing s
+        return merge_states(scc);
+
+        // Previous simple placeholder: check if there is an edge both ways
+        // state_set s_to_set = *m_to.find(s); // makes a copy. Reference could
+        //                                     // lead to a bug
+        // for (auto s_to: s_to_set) {
+        //     if (m_to.find(s_to)->contains(s))
+        //         s = merge_states(s, s_to);
+        // }
+        // return s;
     }
 
     /*
@@ -997,22 +1026,23 @@ namespace smt {
         expr_ref r_nullable = is_nullable_wrapper(r);
         if (m.is_true(r_nullable)) {
             m_state_graph.mark_live(r_id);
-            return true;
         }
-        // Add edges to all derivatives
-        expr_ref_vector derivatives(m);
-        STRACE("seq_regex_debug", tout
-            << std::endl << "  DEBUG: getting all derivs: " << r_id << " ";);
-        get_all_derivatives(r, derivatives);
-        for (auto const& dr: derivatives) {
-            unsigned dr_id = get_state_id(dr);
+        else {
+            // Add edges to all derivatives
+            expr_ref_vector derivatives(m);
             STRACE("seq_regex_debug", tout
-                << std::endl << "  DEBUG: traversing deriv: " << dr_id << " ";);
-            m_state_graph.add_state(dr_id);
-            bool maybecycle = can_be_in_cycle(r, dr);
-            m_state_graph.add_edge(r_id, dr_id, maybecycle);
+                << std::endl << "  DEBUG: getting all derivs: " << r_id << " ";);
+            get_all_derivatives(r, derivatives);
+            for (auto const& dr: derivatives) {
+                unsigned dr_id = get_state_id(dr);
+                STRACE("seq_regex_debug", tout
+                    << std::endl << "  DEBUG: traversing deriv: " << dr_id << " ";);
+                m_state_graph.add_state(dr_id);
+                bool maybecycle = can_be_in_cycle(r, dr);
+                m_state_graph.add_edge(r_id, dr_id, maybecycle);
+            }
+            m_state_graph.mark_done(r_id);
         }
-        m_state_graph.mark_done(r_id);
         STRACE("seq_regex_brief", tout << std::endl;);
         STRACE("seq_regex_brief", m_state_graph.pretty_print(tout););
         return true;

From 12f7a1feeeaee82f238543403184b6a6fc4554b4 Mon Sep 17 00:00:00 2001
From: Caleb Stanford <caleb.pirsquared@gmail.com>
Date: Tue, 30 Jun 2020 21:47:17 -0400
Subject: [PATCH 30/51] normalize derivative conditions to form 'ele <= a'

---
 src/ast/rewriter/seq_rewriter.cpp | 145 +++++++++++++++++++++++++-----
 src/ast/rewriter/seq_rewriter.h   |   2 +-
 src/smt/seq_regex.h               |   3 +-
 3 files changed, 124 insertions(+), 26 deletions(-)

diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp
index 45f6cf5489c..41b7b06f95d 100644
--- a/src/ast/rewriter/seq_rewriter.cpp
+++ b/src/ast/rewriter/seq_rewriter.cpp
@@ -2543,18 +2543,18 @@ expr_ref seq_rewriter::mk_der_op_rec(decl_kind k, expr* a, expr* b) {
             }
             // @EXP (experimental change)
             // Simplify if there is a relationship between ca and cb
-            // if (pred_implies(ca, cb)) {
-            //     r1 = mk_der_op(k, a1, b1);
-            // }
-            // else if (pred_implies(ca, notcb)) {
-            //     r1 = mk_der_op(k, a1, b2);
-            // }
-            // if (pred_implies(notca, cb)) {
-            //     r2 = mk_der_op(k, a2, b1);
-            // }
-            // else if (pred_implies(notca, notcb)) {
-            //     r2 = mk_der_op(k, a2, b2);
-            // }
+            if (pred_implies(ca, cb)) {
+                r1 = mk_der_op(k, a1, b1);
+            }
+            else if (pred_implies(ca, notcb)) {
+                r1 = mk_der_op(k, a1, b2);
+            }
+            if (pred_implies(notca, cb)) {
+                r2 = mk_der_op(k, a2, b1);
+            }
+            else if (pred_implies(notca, notcb)) {
+                r2 = mk_der_op(k, a2, b2);
+            }
             // --- End core logic
         }
         if (!r1) r1 = mk_der_op(k, a1, b);
@@ -2637,6 +2637,73 @@ expr_ref seq_rewriter::mk_der_compl(expr* r) {
     return result;
 }
 
+/*
+    Make an re_predicate with condition cond, enforcing derivative
+    normal form on how conditions are written.
+
+    Rewrites everything to (ele <= x) constraints:
+    (ele = a) => ite(ele <= a-1, none, ite(ele <= a, epsilon, none))
+    (a = ele) => "
+    (a <= ele) => ite(ele <= a-1, none, epsilon)
+    (not p)   => mk_der_compl(...)
+    (p and q) => mk_der_inter(...)
+    (p or q)  => mk_der_union(...)
+
+    Postcondition: result is in BDD form
+*/
+expr_ref seq_rewriter::mk_der_cond(expr* cond, expr* ele, sort* seq_sort) {
+    STRACE("seq_verbose", tout << "mk_der_cond: "
+        <<  mk_pp(cond, m()) << ", " << mk_pp(ele, m()) << std::endl;);
+    sort *ele_sort = nullptr;
+    VERIFY(u().is_seq(seq_sort, ele_sort));
+    SASSERT(ele_sort == m().get_sort(ele));
+    expr *c1 = nullptr, *c2 = nullptr, *ch1 = nullptr, *ch2 = nullptr;
+    unsigned ch = 0;
+    expr_ref result(m()), r1(m()), r2(m());
+    if (m().is_eq(cond, ch1, ch2)) {
+        r1 = u().mk_le(ch1, ch2);
+        r1 = mk_der_cond(r1, ele, seq_sort);
+        r2 = u().mk_le(ch2, ch1);
+        r2 = mk_der_cond(r2, ele, seq_sort);
+        result = mk_der_inter(r1, r2);
+    }
+    else if (u().is_char_le(cond, ch1, ch2) &&
+             u().is_const_char(ch1, ch) && (ch2 == ele)) {
+        if (ch > 0) {
+            result = u().mk_char(ch - 1);
+            result = u().mk_le(ele, result);
+            result = re_predicate(result, seq_sort);
+            result = mk_der_compl(result);
+        }
+        else {
+            result = m().mk_true();
+        }
+    }
+    else if (m().is_not(cond, c1)) {
+        UNREACHABLE();
+        result = mk_der_cond(c1, ele, seq_sort);
+        result = mk_der_compl(result);
+    }
+    else if (m().is_and(cond, c1, c2)) {
+        UNREACHABLE();
+        r1 = mk_der_cond(c1, ele, seq_sort);
+        r2 = mk_der_cond(c2, ele, seq_sort);
+        result = mk_der_inter(r1, r2);
+    }
+    else if (m().is_or(cond, c1, c2)) {
+        UNREACHABLE();
+        r1 = mk_der_cond(c1, ele, seq_sort);
+        r2 = mk_der_cond(c2, ele, seq_sort);
+        result = mk_der_union(r1, r2);
+    }
+    else {
+        result = re_predicate(cond, seq_sort);
+    }
+    STRACE("seq_verbose", tout << "mk_der_cond result: "
+        <<  mk_pp(result, m()) << std::endl;);
+    return result;
+}
+
 expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) {
     // STRACE("seq_regex_brief", tout << ".";); // recursive call
     expr_ref result(m());
@@ -2710,7 +2777,7 @@ expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) {
         expr_ref hd(m()), tl(m());
         if (get_head_tail(r1, hd, tl)) {
             // head must be equal; if so, derivative is tail
-            return re_and(m_br.mk_eq_rw(ele, hd), re().mk_to_re(tl));
+            // return re_and(m_br.mk_eq_rw(ele, hd), re().mk_to_re(tl));
             // @EXP (experimental change)
             // Write 'head is equal' as a range constraint:
             // (ele <= hd) and (hd <= ele)
@@ -2718,6 +2785,13 @@ expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) {
             //     re_and(m_util.mk_le(ele, hd), re().mk_to_re(tl)),
             //     re_and(m_util.mk_le(hd, ele), re().mk_to_re(tl))
             // );
+            // @EXP (experimental change)
+            // Use mk_der_cond to normalize
+            STRACE("seq_verbose", tout << "deriv to_re" << std::endl;);
+            result = m().mk_eq(ele, hd);
+            result = mk_der_cond(result, ele, seq_sort);
+            result = mk_der_concat(result, re().mk_to_re(tl));
+            return result;
         }
         else if (str().is_empty(r1)) {
             return mk_empty();
@@ -2740,7 +2814,7 @@ expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) {
         // This is analagous to the previous is_to_re case.
         expr_ref hd(m()), tl(m());
         if (get_head_tail_reversed(r2, hd, tl)) {
-            return re_and(m_br.mk_eq_rw(ele, tl), re().mk_reverse(re().mk_to_re(hd)));
+            // return re_and(m_br.mk_eq_rw(ele, tl), re().mk_reverse(re().mk_to_re(hd)));
             // @EXP (experimental change)
             // Write 'tail is equal' as a range constraint:
             // (ele <= tl) and (tl <= ele)
@@ -2748,6 +2822,13 @@ expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) {
             //     re_and(m_util.mk_le(ele, tl), re().mk_reverse(re().mk_to_re(hd))),
             //     re_and(m_util.mk_le(tl, ele), re().mk_reverse(re().mk_to_re(hd)))
             // );
+            // @EXP (experimental change)
+            // Use mk_der_cond to normalize
+            STRACE("seq_verbose", tout << "deriv reverse to_re" << std::endl;);
+            result = m().mk_eq(ele, tl);
+            result = mk_der_cond(result, ele, seq_sort);
+            result = mk_der_concat(result, re().mk_reverse(re().mk_to_re(hd)));
+            return result;
         }
         else if (str().is_empty(r2)) {
             return mk_empty();
@@ -2760,13 +2841,17 @@ expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) {
             if (s1.length() == 1 && s2.length() == 1) {
                 expr_ref ch1(m_util.mk_char(s1[0]), m());
                 expr_ref ch2(m_util.mk_char(s2[0]), m());
+                // return mk_der_inter(re_predicate(m_util.mk_le(ch1, ele), seq_sort),
+                //                     re_predicate(m_util.mk_le(ele, ch2), seq_sort));
                 // @EXP (experimental change)
-                // expr_ref p1(m_util.mk_le(ch1, ele), m());
-                // expr_ref p2(m_util.mk_le(ele, ch2), m());
-                // expr_ref conj(m().mk_and(p1, p2), m());
-                // return re_predicate(conj, seq_sort);
-                return mk_der_inter(re_predicate(m_util.mk_le(ch1, ele), seq_sort),
-                                    re_predicate(m_util.mk_le(ele, ch2), seq_sort));
+                // Use mk_der_cond to normalize
+                STRACE("seq_verbose", tout << "deriv range zstring" << std::endl;);
+                expr_ref p1(u().mk_le(ch1, ele), m());
+                p1 = mk_der_cond(p1, ele, seq_sort);
+                expr_ref p2(u().mk_le(ele, ch2), m());
+                p2 = mk_der_cond(p2, ele, seq_sort);
+                result = mk_der_inter(p1, p2);
+                return result;
             }
             else {
                 return mk_empty();
@@ -2774,8 +2859,17 @@ expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) {
         }
         expr* e1 = nullptr, *e2 = nullptr;
         if (str().is_unit(r1, e1) && str().is_unit(r2, e2)) {
-            return mk_der_inter(re_predicate(m_util.mk_le(e1, ele), seq_sort),
-                                re_predicate(m_util.mk_le(ele, e2), seq_sort));
+            // return mk_der_inter(re_predicate(m_util.mk_le(e1, ele), seq_sort),
+            //                     re_predicate(m_util.mk_le(ele, e2), seq_sort));
+            // @EXP (experimental change)
+            // Use mk_der_cond to normalize
+            STRACE("seq_verbose", tout << "deriv range str" << std::endl;);
+            expr_ref p1(u().mk_le(e1, ele), m());
+            p1 = mk_der_cond(p1, ele, seq_sort);
+            expr_ref p2(u().mk_le(ele, e2), m());
+            p2 = mk_der_cond(p2, ele, seq_sort);
+            result = mk_der_inter(p1, p2);
+            return result;
         }
     }
     else if (re().is_full_char(r)) {
@@ -2785,7 +2879,12 @@ expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) {
         array_util array(m());
         expr* args[2] = { p, ele };
         result = array.mk_select(2, args);
-        return re_predicate(result, seq_sort);
+        // return re_predicate(result, seq_sort);
+        // @EXP (experimental change)
+        // Use mk_der_cond to normalize
+        // (It's a no-op in this case, however)
+        STRACE("seq_verbose", tout << "deriv of_pred" << std::endl;);
+        return mk_der_cond(result, ele, seq_sort);
     }
     // stuck cases: re.derivative, variable,
     // str.to_re if the head of the string can't be obtained,
diff --git a/src/ast/rewriter/seq_rewriter.h b/src/ast/rewriter/seq_rewriter.h
index a82e7a6ba65..bbd5d3a345e 100644
--- a/src/ast/rewriter/seq_rewriter.h
+++ b/src/ast/rewriter/seq_rewriter.h
@@ -202,7 +202,7 @@ class seq_rewriter {
     expr_ref mk_der_union(expr* a, expr* b);
     expr_ref mk_der_inter(expr* a, expr* b);
     expr_ref mk_der_compl(expr* a);
-    expr_ref mk_der_reverse(expr* a);
+    expr_ref mk_der_cond(expr* cond, expr* ele, sort* seq_sort);
 
     bool lt_char(expr* ch1, expr* ch2);
     bool eq_char(expr* ch1, expr* ch2);
diff --git a/src/smt/seq_regex.h b/src/smt/seq_regex.h
index 6a745b0853f..161423efe74 100644
--- a/src/smt/seq_regex.h
+++ b/src/smt/seq_regex.h
@@ -191,8 +191,7 @@ namespace smt {
         scoped_vector<propagation_lit>   m_to_propagate;
 
         /*
-            state_graph for dead state detection,
-            and associated methods
+            state_graph for dead state detection, and associated methods
         */
         state_graph       m_state_graph;
         expr_ref_vector   m_state_trail;

From 1543ca793823ad5321e8c5f1874fc52b49951bd1 Mon Sep 17 00:00:00 2001
From: Caleb Stanford <caleb.pirsquared@gmail.com>
Date: Tue, 30 Jun 2020 22:10:38 -0400
Subject: [PATCH 31/51] order derivative conditions by character code

---
 src/ast/rewriter/seq_rewriter.cpp | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp
index 41b7b06f95d..99635a54529 100644
--- a/src/ast/rewriter/seq_rewriter.cpp
+++ b/src/ast/rewriter/seq_rewriter.cpp
@@ -2519,8 +2519,15 @@ expr_ref seq_rewriter::mk_der_op_rec(decl_kind k, expr* a, expr* b) {
         return (a == b) ? a : m().mk_ite(c, a, b);
     };
     // @EXP (experimental change)
-    // Use same ID for related predicates to improve simplifications
-    // auto get_id = [&](expr* e) { re().is_complement(e, e); return e->get_id(); };
+    // Use character code to order conditions
+    auto get_id = [&](expr* e) {
+        expr *ch1 = nullptr, *ch2 = nullptr;
+        unsigned ch;
+        if (u().is_char_le(e, ch1, ch2) && u().is_const_char(ch2, ch))
+            return ch;
+        re().is_complement(e, e);
+        return e->get_id();
+    };
     if (m().is_ite(a, ca, a1, a2)) {
         expr_ref r1(m()), r2(m());
         expr_ref notca(m().mk_not(ca), m());
@@ -2534,7 +2541,7 @@ expr_ref seq_rewriter::mk_der_op_rec(decl_kind k, expr* a, expr* b) {
                 return result;
             }
             // Order with higher IDs on the outside
-            if (ca->get_id() < cb->get_id()) {
+            if (get_id(ca) < get_id(cb)) {
                 std::swap(a, b);
                 std::swap(ca, cb);
                 std::swap(notca, notcb);

From 11bda7e916c86c3b834c50586f8089193513d6a9 Mon Sep 17 00:00:00 2001
From: Caleb Stanford <caleb.pirsquared@gmail.com>
Date: Tue, 30 Jun 2020 22:30:03 -0400
Subject: [PATCH 32/51] fix confusing names m_to and m_from

---
 src/smt/seq_regex.cpp | 65 +++++++++++++++++++------------------------
 src/smt/seq_regex.h   | 10 +++----
 2 files changed, 33 insertions(+), 42 deletions(-)

diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp
index 59147fd8ade..826d82a6eda 100644
--- a/src/smt/seq_regex.cpp
+++ b/src/smt/seq_regex.cpp
@@ -697,9 +697,9 @@ namespace smt {
         // Initialize as unvisited
         m_seen.insert(s);
         m_unexplored.insert(s);
-        m_to.insert(s, new state_set());
-        m_from.insert(s, new state_set());
-        m_from_maybecycle.insert(s, new state_set());
+        m_targets.insert(s, new state_set());
+        m_sources.insert(s, new state_set());
+        m_sources_maybecycle.insert(s, new state_set());
     }
     void state_graph::remove_state_core(state s) {
         // This is a partial deletion -- the state is still seen and can't be
@@ -710,9 +710,9 @@ namespace smt {
         SASSERT(m_seen.contains(s));
         SASSERT(!m_state_ufind.is_root(s));
         SASSERT(m_unknown.contains(s));
-        m_to.erase(s);
-        m_from.erase(s);
-        m_from_maybecycle.erase(s);
+        m_targets.erase(s);
+        m_sources.erase(s);
+        m_sources_maybecycle.erase(s);
         m_unknown.remove(s);
     }
 
@@ -751,31 +751,31 @@ namespace smt {
         SASSERT(m_state_ufind.is_root(s1));
         SASSERT(m_state_ufind.is_root(s2));
         if (s1 == s2) return;
-        if (!m_to.find(s1)->contains(s2)) {
+        if (!m_targets.find(s1)->contains(s2)) {
             // add new edge
             STRACE("seq_regex_debug", tout << std::endl << "  DEBUG: new edge! ";);
-            m_to.find(s1)->insert(s2);
-            m_from.find(s2)->insert(s1);
-            if (maybecycle) m_from_maybecycle.find(s2)->insert(s1);
+            m_targets.find(s1)->insert(s2);
+            m_sources.find(s2)->insert(s1);
+            if (maybecycle) m_sources_maybecycle.find(s2)->insert(s1);
         }
-        else if (!maybecycle && m_from_maybecycle.find(s2)->contains(s1)) {
+        else if (!maybecycle && m_sources_maybecycle.find(s2)->contains(s1)) {
             // update existing edge
             STRACE("seq_regex_debug", tout << std::endl << "  DEBUG: update edge! ";);
-            m_from_maybecycle.find(s2)->remove(s1);
+            m_sources_maybecycle.find(s2)->remove(s1);
         }
     }
     void state_graph::remove_edge_core(state s1, state s2) {
-        SASSERT(m_to.find(s1)->contains(s2));
-        SASSERT(m_from.find(s2)->contains(s1));
-        m_to.find(s1)->remove(s2);
-        m_from.find(s2)->remove(s1);
-        m_from_maybecycle.find(s2)->remove(s1);
+        SASSERT(m_targets.find(s1)->contains(s2));
+        SASSERT(m_sources.find(s2)->contains(s1));
+        m_targets.find(s1)->remove(s2);
+        m_sources.find(s2)->remove(s1);
+        m_sources_maybecycle.find(s2)->remove(s1);
     }
     void state_graph::rename_edge_core(state old1, state old2,
                                        state new1, state new2) {
-        SASSERT(m_to.find(old1)->contains(old2));
-        SASSERT(m_from.find(old2)->contains(old1));
-        bool maybecycle = m_from_maybecycle.find(old2)->contains(old1);
+        SASSERT(m_targets.find(old1)->contains(old2));
+        SASSERT(m_sources.find(old2)->contains(old1));
+        bool maybecycle = m_sources_maybecycle.find(old2)->contains(old1);
         remove_edge_core(old1, old2);
         add_edge_core(new1, new2, maybecycle);
     }
@@ -800,10 +800,10 @@ namespace smt {
         m_state_ufind.merge(s1, s2);
         if (m_state_ufind.is_root(s2)) std::swap(s1, s2);
         // rename s2 to s1 in edges
-        for (auto s_to: *m_to.find(s2)) {
+        for (auto s_to: *m_targets.find(s2)) {
             rename_edge_core(s2, s_to, s1, s_to);
         }
-        for (auto s_from: *m_from.find(s2)) {
+        for (auto s_from: *m_sources.find(s2)) {
             rename_edge_core(s_from, s2, s_from, s1);
         }
         remove_state_core(s2);
@@ -834,7 +834,7 @@ namespace smt {
             << std::endl << "  DEBUG: mark live recursive: " << s << " ";);
         if (m_live.contains(s)) return;
         mark_live_core(s);
-        for (auto s_from: *m_from.find(s)) {
+        for (auto s_from: *m_sources.find(s)) {
             mark_live_recursive(s_from);
         }
     }
@@ -850,14 +850,14 @@ namespace smt {
         STRACE("seq_regex_debug", tout
             << std::endl << "  DEBUG: mark dead recursive: " << s << " ";);
         if (!m_unknown.contains(s)) return;
-        for (auto s_to: *m_to.find(s)) {
+        for (auto s_to: *m_targets.find(s)) {
             // unknown pointing to live should have been marked as live!
             SASSERT(!m_live.contains(s_to));
             if (m_unknown.contains(s_to) || m_unexplored.contains(s_to)) return;
         }
         // all states from s are dead
         mark_dead_core(s);
-        for (auto s_from: *m_from.find(s)) {
+        for (auto s_from: *m_sources.find(s)) {
             mark_dead_recursive(s_from);
         }
     }
@@ -883,7 +883,7 @@ namespace smt {
                 visited.insert(x);
                 // recurse backwards only on maybecycle edges
                 // and only on unknown states
-                for (auto y: *m_from_maybecycle.find(x)) {
+                for (auto y: *m_sources_maybecycle.find(x)) {
                     if (m_unknown.contains(y))
                         to_search.push_back(y);
                 }
@@ -892,7 +892,7 @@ namespace smt {
                 resolved.insert(x);
                 to_search.pop_back();
                 // determine in SCC or not
-                for (auto y: *m_from_maybecycle.find(x)) {
+                for (auto y: *m_sources_maybecycle.find(x)) {
                     if (scc.contains(y)) {
                         scc.insert(x);
                         break;
@@ -905,15 +905,6 @@ namespace smt {
         }
         // scc is the union of all cycles containing s
         return merge_states(scc);
-
-        // Previous simple placeholder: check if there is an edge both ways
-        // state_set s_to_set = *m_to.find(s); // makes a copy. Reference could
-        //                                     // lead to a bug
-        // for (auto s_to: s_to_set) {
-        //     if (m_to.find(s_to)->contains(s))
-        //         s = merge_states(s, s_to);
-        // }
-        // return s;
     }
 
     /*
@@ -986,7 +977,7 @@ namespace smt {
         of << "Edges:" << std::endl;
         for (auto s1: m_seen) {
             if (m_state_ufind.is_root(s1)) {
-                of << "  " << s1 << " -> " << *m_to.find(s1) << std::endl;
+                of << "  " << s1 << " -> " << *m_targets.find(s1) << std::endl;
             }
         }
 
diff --git a/src/smt/seq_regex.h b/src/smt/seq_regex.h
index 161423efe74..743c149e8bb 100644
--- a/src/smt/seq_regex.h
+++ b/src/smt/seq_regex.h
@@ -87,14 +87,14 @@ namespace smt {
         /*
             Edges are saved in both from and to maps.
             A subset of edges are also marked as possibly being
-            part of a cycle by being stored in m_from_maybecycle.
+            part of a cycle by being stored in m_sources_maybecycle.
             
             Invariants:
             - TODO
         */
-        edge_rel   m_from;
-        edge_rel   m_to;
-        edge_rel   m_from_maybecycle;
+        edge_rel   m_sources;
+        edge_rel   m_targets;
+        edge_rel   m_sources_maybecycle;
 
         /*
             'Core' functions that modify the plain graph, without
@@ -127,7 +127,7 @@ namespace smt {
     public:
         state_graph():
             m_live(), m_dead(), m_unknown(), m_unexplored(), m_seen(),
-            m_state_ufind(), m_from(), m_to(), m_from_maybecycle() {}
+            m_state_ufind(), m_sources(), m_targets(), m_sources_maybecycle() {}
 
         /*
             Exposed methods

From 4b5a89ee0a5c5e385a5fb6be05fa6a6b0c05dfeb Mon Sep 17 00:00:00 2001
From: Caleb Stanford <caleb.pirsquared@gmail.com>
Date: Wed, 1 Jul 2020 07:34:20 -0400
Subject: [PATCH 33/51] assign increasing state IDs from 1 instead of using
 get_id on AST node

---
 src/smt/seq_regex.cpp | 19 +++++++++++++++----
 src/smt/seq_regex.h   | 12 ++++++++----
 2 files changed, 23 insertions(+), 8 deletions(-)

diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp
index 826d82a6eda..63b5e8afb42 100644
--- a/src/smt/seq_regex.cpp
+++ b/src/smt/seq_regex.cpp
@@ -26,7 +26,8 @@ namespace smt {
         ctx(th.get_context()),
         m(th.get_manager()),
         m_state_graph(),
-        m_state_trail(m)
+        m_expr_to_state(),
+        m_state_to_expr(m)
     {}
 
     seq_util& seq_regex::u() { return th.m_util; }
@@ -988,8 +989,20 @@ namespace smt {
     // **********************************
 
     unsigned seq_regex::get_state_id(expr* e) {
-        return e->get_id();
+        // Assign increasing IDs starting from 1
+        if (!m_expr_to_state.contains(e)) {
+            m_state_to_expr.push_back(e);
+            unsigned new_id = m_state_to_expr.size();
+            m_expr_to_state.insert(e, new_id);
+        }
+        return m_expr_to_state.find(e);
+    }
+    expr* seq_regex::get_expr_from_id(unsigned id) {
+        SASSERT(id >= 1);
+        SASSERT(id <= m_state_to_expr.size());
+        return m_state_to_expr.get(id);
     }
+
     bool seq_regex::can_be_in_cycle(expr *e1, expr *e2) {
         // Simple placeholder. TODO: Implement full check
         return true;
@@ -1010,8 +1023,6 @@ namespace smt {
                                  << mk_pp(r, m) << ") ";);
         STRACE("seq_regex_brief", tout
             << std::endl << "USG(" << r->get_id() << ") ";);
-        // Save r as expr_ref so it's not deallocated
-        m_state_trail.push_back(r);
         // Add state
         m_state_graph.add_state(r_id);
         expr_ref r_nullable = is_nullable_wrapper(r);
diff --git a/src/smt/seq_regex.h b/src/smt/seq_regex.h
index 743c149e8bb..39ac16bd2ce 100644
--- a/src/smt/seq_regex.h
+++ b/src/smt/seq_regex.h
@@ -193,16 +193,20 @@ namespace smt {
         /*
             state_graph for dead state detection, and associated methods
         */
-        state_graph       m_state_graph;
-        expr_ref_vector   m_state_trail;
-        unsigned          m_max_state_graph_size { 10000 };
-        // Convert expression to state
+        state_graph                    m_state_graph;
+        ptr_addr_map<expr, unsigned>   m_expr_to_state;
+        expr_ref_vector                m_state_to_expr;
+        unsigned                       m_max_state_graph_size { 10000 };
+        // Convert between expressions and states (IDs)
         unsigned get_state_id(expr* e);
+        expr* get_expr_from_id(unsigned id);
         // Cycle-detection heuristic (sound but not complete)
         bool can_be_in_cycle(expr* e1, expr* e2);
         // Update the graph
         bool update_state_graph(expr* r);
 
+        // ********************
+
         seq_util& u();
         class seq_util::re& re();
         class seq_util::str& str();

From e12bf862bd59f1f6c5f3fae0722f857b0da700c6 Mon Sep 17 00:00:00 2001
From: Caleb Stanford <caleb.pirsquared@gmail.com>
Date: Wed, 1 Jul 2020 07:52:51 -0400
Subject: [PATCH 34/51] remove elim_condition call in get_dall_derivatives

---
 src/smt/seq_regex.cpp | 14 ++------------
 1 file changed, 2 insertions(+), 12 deletions(-)

diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp
index 63b5e8afb42..015fe4f3a3f 100644
--- a/src/smt/seq_regex.cpp
+++ b/src/smt/seq_regex.cpp
@@ -611,18 +611,8 @@ namespace smt {
         get_cofactors(d, cofactors);
         STRACE("seq_regex_debug", tout << "getting all derivatives of: " << mk_pp(r, m) << std::endl;);
         for (auto const& p : cofactors) {
-            STRACE("seq_regex_debug", tout << "visiting cofactor: cond: " << mk_pp(p.first, m) << ", deriv: " << mk_pp(p.second, m) << std::endl;);
-            expr_ref cond(p.first, m);
-            STRACE("seq_regex_debug", tout << "head: " << mk_pp(hd, m) << std::endl;);
-            STRACE("seq_regex_debug", tout << "cond: " << mk_pp(cond, m) << std::endl;);
-            seq_rw().elim_condition(hd, cond);
-            STRACE("seq_regex_debug", tout << "cond: " << mk_pp(cond, m) << std::endl;);
-            rewrite(cond);
-            STRACE("seq_regex_debug", tout << "cond: " << mk_pp(cond, m) << std::endl;);
-            if (m.is_false(cond)) continue;
-            STRACE("seq_regex_debug", tout << "cofactor labeled true!" << std::endl;);
-            if (re().is_empty(p.second)) continue;
-            STRACE("seq_regex_debug", tout << "added derivative!" << std::endl;);
+            if (m.is_false(p.first) || re().is_empty(p.second)) continue;
+            STRACE("seq_regex_debug", tout << "adding derivative: " << mk_pp(p.second, m) << std::endl;);
             results.push_back(p.second);
         }
     }

From 938dc433261e21b134d25026bf9b194974ab7688 Mon Sep 17 00:00:00 2001
From: Caleb Stanford <caleb.pirsquared@gmail.com>
Date: Wed, 1 Jul 2020 08:03:58 -0400
Subject: [PATCH 35/51] use u_map instead of uint_map to avoid memory leak

---
 src/smt/seq_regex.cpp | 56 +++++++++++++++++++++----------------------
 src/smt/seq_regex.h   |  9 ++++---
 2 files changed, 32 insertions(+), 33 deletions(-)

diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp
index 015fe4f3a3f..021e946b059 100644
--- a/src/smt/seq_regex.cpp
+++ b/src/smt/seq_regex.cpp
@@ -688,9 +688,9 @@ namespace smt {
         // Initialize as unvisited
         m_seen.insert(s);
         m_unexplored.insert(s);
-        m_targets.insert(s, new state_set());
-        m_sources.insert(s, new state_set());
-        m_sources_maybecycle.insert(s, new state_set());
+        m_targets.insert(s, state_set());
+        m_sources.insert(s, state_set());
+        m_sources_maybecycle.insert(s, state_set());
     }
     void state_graph::remove_state_core(state s) {
         // This is a partial deletion -- the state is still seen and can't be
@@ -701,9 +701,9 @@ namespace smt {
         SASSERT(m_seen.contains(s));
         SASSERT(!m_state_ufind.is_root(s));
         SASSERT(m_unknown.contains(s));
-        m_targets.erase(s);
-        m_sources.erase(s);
-        m_sources_maybecycle.erase(s);
+        m_targets.remove(s);
+        m_sources.remove(s);
+        m_sources_maybecycle.remove(s);
         m_unknown.remove(s);
     }
 
@@ -742,31 +742,31 @@ namespace smt {
         SASSERT(m_state_ufind.is_root(s1));
         SASSERT(m_state_ufind.is_root(s2));
         if (s1 == s2) return;
-        if (!m_targets.find(s1)->contains(s2)) {
+        if (!m_targets.find(s1).contains(s2)) {
             // add new edge
             STRACE("seq_regex_debug", tout << std::endl << "  DEBUG: new edge! ";);
-            m_targets.find(s1)->insert(s2);
-            m_sources.find(s2)->insert(s1);
-            if (maybecycle) m_sources_maybecycle.find(s2)->insert(s1);
+            m_targets.find(s1).insert(s2);
+            m_sources.find(s2).insert(s1);
+            if (maybecycle) m_sources_maybecycle.find(s2).insert(s1);
         }
-        else if (!maybecycle && m_sources_maybecycle.find(s2)->contains(s1)) {
+        else if (!maybecycle && m_sources_maybecycle.find(s2).contains(s1)) {
             // update existing edge
             STRACE("seq_regex_debug", tout << std::endl << "  DEBUG: update edge! ";);
-            m_sources_maybecycle.find(s2)->remove(s1);
+            m_sources_maybecycle.find(s2).remove(s1);
         }
     }
     void state_graph::remove_edge_core(state s1, state s2) {
-        SASSERT(m_targets.find(s1)->contains(s2));
-        SASSERT(m_sources.find(s2)->contains(s1));
-        m_targets.find(s1)->remove(s2);
-        m_sources.find(s2)->remove(s1);
-        m_sources_maybecycle.find(s2)->remove(s1);
+        SASSERT(m_targets.find(s1).contains(s2));
+        SASSERT(m_sources.find(s2).contains(s1));
+        m_targets.find(s1).remove(s2);
+        m_sources.find(s2).remove(s1);
+        m_sources_maybecycle.find(s2).remove(s1);
     }
     void state_graph::rename_edge_core(state old1, state old2,
                                        state new1, state new2) {
-        SASSERT(m_targets.find(old1)->contains(old2));
-        SASSERT(m_sources.find(old2)->contains(old1));
-        bool maybecycle = m_sources_maybecycle.find(old2)->contains(old1);
+        SASSERT(m_targets.find(old1).contains(old2));
+        SASSERT(m_sources.find(old2).contains(old1));
+        bool maybecycle = m_sources_maybecycle.find(old2).contains(old1);
         remove_edge_core(old1, old2);
         add_edge_core(new1, new2, maybecycle);
     }
@@ -791,10 +791,10 @@ namespace smt {
         m_state_ufind.merge(s1, s2);
         if (m_state_ufind.is_root(s2)) std::swap(s1, s2);
         // rename s2 to s1 in edges
-        for (auto s_to: *m_targets.find(s2)) {
+        for (auto s_to: m_targets.find(s2)) {
             rename_edge_core(s2, s_to, s1, s_to);
         }
-        for (auto s_from: *m_sources.find(s2)) {
+        for (auto s_from: m_sources.find(s2)) {
             rename_edge_core(s_from, s2, s_from, s1);
         }
         remove_state_core(s2);
@@ -825,7 +825,7 @@ namespace smt {
             << std::endl << "  DEBUG: mark live recursive: " << s << " ";);
         if (m_live.contains(s)) return;
         mark_live_core(s);
-        for (auto s_from: *m_sources.find(s)) {
+        for (auto s_from: m_sources.find(s)) {
             mark_live_recursive(s_from);
         }
     }
@@ -841,14 +841,14 @@ namespace smt {
         STRACE("seq_regex_debug", tout
             << std::endl << "  DEBUG: mark dead recursive: " << s << " ";);
         if (!m_unknown.contains(s)) return;
-        for (auto s_to: *m_targets.find(s)) {
+        for (auto s_to: m_targets.find(s)) {
             // unknown pointing to live should have been marked as live!
             SASSERT(!m_live.contains(s_to));
             if (m_unknown.contains(s_to) || m_unexplored.contains(s_to)) return;
         }
         // all states from s are dead
         mark_dead_core(s);
-        for (auto s_from: *m_sources.find(s)) {
+        for (auto s_from: m_sources.find(s)) {
             mark_dead_recursive(s_from);
         }
     }
@@ -874,7 +874,7 @@ namespace smt {
                 visited.insert(x);
                 // recurse backwards only on maybecycle edges
                 // and only on unknown states
-                for (auto y: *m_sources_maybecycle.find(x)) {
+                for (auto y: m_sources_maybecycle.find(x)) {
                     if (m_unknown.contains(y))
                         to_search.push_back(y);
                 }
@@ -883,7 +883,7 @@ namespace smt {
                 resolved.insert(x);
                 to_search.pop_back();
                 // determine in SCC or not
-                for (auto y: *m_sources_maybecycle.find(x)) {
+                for (auto y: m_sources_maybecycle.find(x)) {
                     if (scc.contains(y)) {
                         scc.insert(x);
                         break;
@@ -968,7 +968,7 @@ namespace smt {
         of << "Edges:" << std::endl;
         for (auto s1: m_seen) {
             if (m_state_ufind.is_root(s1)) {
-                of << "  " << s1 << " -> " << *m_targets.find(s1) << std::endl;
+                of << "  " << s1 << " -> " << m_targets.find(s1) << std::endl;
             }
         }
 
diff --git a/src/smt/seq_regex.h b/src/smt/seq_regex.h
index 39ac16bd2ce..63e6673787e 100644
--- a/src/smt/seq_regex.h
+++ b/src/smt/seq_regex.h
@@ -18,7 +18,6 @@ Module Name:
 
 #include "util/scoped_vector.h"
 #include "util/uint_set.h"
-#include "util/uint_map.h"
 #include "util/union_find.h"
 #include "ast/seq_decl_plugin.h"
 #include "ast/rewriter/seq_rewriter.h"
@@ -54,10 +53,10 @@ namespace smt {
         live and dead SCCs.
     */
     class state_graph {
-        typedef unsigned              state;
-        typedef uint_set              state_set;
-        typedef uint_map<state_set>   edge_rel;
-        typedef basic_union_find      state_ufind;
+        typedef unsigned           state;
+        typedef uint_set           state_set;
+        typedef u_map<state_set>   edge_rel;
+        typedef basic_union_find   state_ufind;
 
     private:
         /*

From 2a735b76e4893be3a91d2c8e2f359434fb8b066e Mon Sep 17 00:00:00 2001
From: Caleb Stanford <caleb.pirsquared@gmail.com>
Date: Wed, 1 Jul 2020 08:07:43 -0400
Subject: [PATCH 36/51] remove unnecessary call to is_ground

---
 src/smt/seq_regex.cpp | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp
index 021e946b059..c62800d36a6 100644
--- a/src/smt/seq_regex.cpp
+++ b/src/smt/seq_regex.cpp
@@ -210,13 +210,16 @@ namespace smt {
                               << "," << r->get_id()
                               << ") ";);
 
-        if (re().is_empty(r)
-            || m_state_graph.is_dead(get_state_id(r))) {
+        if (re().is_empty(r)) {
             th.add_axiom(~lit);
             return true;
         }
-        if (!m.is_ite(r) && is_ground(r)) {
+        if (!m.is_ite(r)) {
             update_state_graph(r);
+            if (m_state_graph.is_dead(get_state_id(r))) {
+                th.add_axiom(~lit);
+                return true;
+            }
         }
 
         if (block_unfolding(lit, idx))

From 448e673e20cff7bfd650ffffd77fbbce5f383317 Mon Sep 17 00:00:00 2001
From: Caleb Stanford <caleb.pirsquared@gmail.com>
Date: Wed, 1 Jul 2020 14:15:48 -0400
Subject: [PATCH 37/51] debugging

---
 src/ast/rewriter/seq_rewriter.cpp | 31 +++++++++++++++++++++----------
 1 file changed, 21 insertions(+), 10 deletions(-)

diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp
index 99635a54529..c9a43e98a93 100644
--- a/src/ast/rewriter/seq_rewriter.cpp
+++ b/src/ast/rewriter/seq_rewriter.cpp
@@ -2446,15 +2446,15 @@ expr_ref seq_rewriter::mk_der_concat(expr* r1, expr* r2) {
 */
 bool seq_rewriter::lt_char(expr* ch1, expr* ch2) {
     unsigned u1, u2;
-    return (m_util.is_const_char(ch1, u1) &&
-            m_util.is_const_char(ch2, u2) &&
+    return (u().is_const_char(ch1, u1) &&
+            u().is_const_char(ch2, u2) &&
             (u1 < u2));
 }
 bool seq_rewriter::eq_char(expr* ch1, expr* ch2) {
     unsigned u1, u2;
     return ((ch1 == ch2) || (
-        m_util.is_const_char(ch1, u1) &&
-        m_util.is_const_char(ch2, u2) &&
+        u().is_const_char(ch1, u1) &&
+        u().is_const_char(ch2, u2) &&
         (u1 == u2)
     ));
 }
@@ -2473,25 +2473,28 @@ bool seq_rewriter::le_char(expr* ch1, expr* ch2) {
         - a and b are char <= constraints, or negations of char <= constraints
 */
 bool seq_rewriter::pred_implies(expr* a, expr* b) {
+    STRACE("seq_verbose", tout << "pred_implies: "
+                               << "," << mk_pp(a, m())
+                               << "," << mk_pp(b, m()) << std::endl;);
     expr *cha1 = nullptr, *cha2 = nullptr, *nota = nullptr,
          *chb1 = nullptr, *chb2 = nullptr, *notb = nullptr;
     if (m().is_not(a, nota) &&
         m().is_not(b, notb)) {
         return pred_implies(notb, nota);
     }
-    else if (m_util.is_char_le(a, cha1, cha2) &&
-             m_util.is_char_le(b, chb1, chb2)) {
+    else if (u().is_char_le(a, cha1, cha2) &&
+             u().is_char_le(b, chb1, chb2)) {
         return (le_char(chb1, cha1) && le_char(cha2, chb2));
     }
-    else if (m_util.is_char_le(a, cha1, cha2) &&
+    else if (u().is_char_le(a, cha1, cha2) &&
              m().is_not(b, notb) &&
-             m_util.is_char_le(notb, chb1, chb2)) {
+             u().is_char_le(notb, chb1, chb2)) {
         return ((le_char(chb2, cha1) && lt_char(cha2, chb1)) ||
                 (lt_char(chb2, cha1) && le_char(cha2, chb1)));
     }
-    else if (m_util.is_char_le(b, chb1, chb2) &&
+    else if (u().is_char_le(b, chb1, chb2) &&
              m().is_not(a, nota) &&
-             m_util.is_char_le(nota, cha1, cha2)) {
+             u().is_char_le(nota, cha1, cha2)) {
         return (le_char(chb1, cha2) && le_char(cha1, chb2));
     }
     else {
@@ -2511,6 +2514,9 @@ bool seq_rewriter::pred_implies(expr* a, expr* b) {
         - result is in BDD form
 */
 expr_ref seq_rewriter::mk_der_op_rec(decl_kind k, expr* a, expr* b) {
+    STRACE("seq_verbose", tout << "mk_der_op_rec: " << k
+                               << "," << mk_pp(a, m())
+                               << "," << mk_pp(b, m()) << std::endl;);
     // STRACE("seq_regex_brief", tout << ".";); // recursive call
     expr* ca = nullptr, *a1 = nullptr, *a2 = nullptr;
     expr* cb = nullptr, *b1 = nullptr, *b2 = nullptr;
@@ -2630,6 +2636,8 @@ expr_ref seq_rewriter::mk_der_op(decl_kind k, expr* a, expr* b) {
 }
 
 expr_ref seq_rewriter::mk_der_compl(expr* r) {
+    STRACE("seq_verbose", tout << "mk_der_compl: " << mk_pp(r, m())
+                               << std::endl;);
     // STRACE("seq_regex_brief", tout << ".";); // recursive call
     expr_ref result(m_op_cache.find(OP_RE_COMPLEMENT, r, nullptr), m());
     if (!result) {
@@ -4377,6 +4385,9 @@ void seq_rewriter::op_cache::cleanup() {
     if (m_table.size() >= m_max_cache_size) {
         m_trail.reset();
         m_table.reset();
+        STRACE("seq_regex", tout << "Op cache reset!" << std::endl;);
+        STRACE("seq_regex_brief", tout << " (OP CACHE RESET)";);
+        // trace_and_reset_cache_counts();
     }
 }
 

From 0fd25e08f333c93cc0accd5d74f4557f48f413cb Mon Sep 17 00:00:00 2001
From: Caleb Stanford <caleb.pirsquared@gmail.com>
Date: Wed, 1 Jul 2020 17:39:37 -0400
Subject: [PATCH 38/51] small improvements to seq_regex_brief tracing

---
 src/smt/seq_regex.cpp | 108 +++++++++++++++++++++---------------------
 src/smt/seq_regex.h   |   6 ++-
 2 files changed, 59 insertions(+), 55 deletions(-)

diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp
index c62800d36a6..26b52e2c850 100644
--- a/src/smt/seq_regex.cpp
+++ b/src/smt/seq_regex.cpp
@@ -122,12 +122,8 @@ namespace smt {
         VERIFY(str().is_in_re(e, s, r));
 
         TRACE("seq_regex", tout << "propagate in RE: " << lit.sign() << " " << mk_pp(e, m) << std::endl;);
-        STRACE("seq_regex_brief",
-            tout << "PIR("
-                 << s->get_id()
-                 << ","
-                 << r->get_id()
-                 << ") ";);
+        STRACE("seq_regex_brief", tout << "PIR(" << mk_pp(s, m) << ","
+                                       << state_str(r) << ") ";);
 
         // convert negative negative membership literals to positive
         // ~(s in R) => s in C(R)
@@ -204,11 +200,9 @@ namespace smt {
         VERIFY(sk().is_accept(e, s, i, idx, r));
 
         TRACE("seq_regex", tout << "propagate: " << mk_pp(e, m) << std::endl;);
-        STRACE("seq_regex_brief",
-            tout << std::endl << "P(" << mk_pp(s, m)
-                              << "," << idx
-                              << "," << r->get_id()
-                              << ") ";);
+        STRACE("seq_regex_brief", tout << std::endl
+                                       << "P(" << mk_pp(s, m) << "@" << idx
+                                       << "," << state_str(r) << ") ";);
 
         if (re().is_empty(r)) {
             th.add_axiom(~lit);
@@ -295,7 +289,7 @@ namespace smt {
 
         d = derivative_wrapper(m.mk_var(0, m.get_sort(head)), r);
         // timer tm;
-        // std::cout << d->get_id() << " " << tm.get_seconds() << std::endl;
+        // std::cout << state_str(d) << " " << tm.get_seconds() << std::endl;
         //if (tm.get_seconds() > 0.3) 
         //    std::cout << d << std::endl;
         // std::cout.flush();
@@ -440,12 +434,8 @@ namespace smt {
         rewrite(result);
 
         STRACE("seq_regex", tout << "nullable result: " << mk_pp(result, m) << std::endl;);
-        STRACE("seq_regex_brief",
-            tout << "n("
-                 << r->get_id()
-                 << "->"
-                 << result->get_id()
-                 << ") ";);
+        STRACE("seq_regex_brief", tout << "n(" << state_str(r) << ")="
+                                       << mk_pp(result, m) << " ";);
         seq_rw().trace_and_reset_cache_counts();
 
         return result;
@@ -463,14 +453,8 @@ namespace smt {
         rewrite(result);
 
         STRACE("seq_regex", tout << "derivative result: " << mk_pp(result, m) << std::endl;);
-        STRACE("seq_regex_brief",
-            tout << "d("
-                 << mk_pp(hd, m)
-                 << ","
-                 << r->get_id()
-                 << "->"
-                 << result->get_id()
-                 << ") ";);
+        STRACE("seq_regex_brief", tout << "d(" << state_str(r) << ")="
+                                       << state_str(result) << " ";);
         seq_rw().trace_and_reset_cache_counts();
 
         /*  If the following lines are enabled instead, we use the
@@ -545,9 +529,10 @@ namespace smt {
 
         TRACE("seq_regex", tout << "propagate nonempty: " << mk_pp(e, m) << std::endl;);
         STRACE("seq_regex_brief",
-            tout << std::endl << "PNE(" << e->get_id()
-                              << "," << r->get_id()
-                              << "," << u->get_id()
+            tout << std::endl << "PNE(" << expr_id_str(e)
+                              << "," << state_str(r)
+                              << "," << expr_id_str(u)
+                              << "," << expr_id_str(n)
                               << ") ";);
 
         expr_ref is_nullable = is_nullable_wrapper(r);
@@ -558,8 +543,8 @@ namespace smt {
         expr_ref d(m);
         d = derivative_wrapper(hd, r);
 
-        STRACE("seq_regex_brief", tout << "(d subbed: " << d->get_id() << ") ";);
-        TRACE("seq_regex", tout << "d subbed: " << mk_pp(d, m) << std::endl;);
+        // STRACE("seq_regex_brief", tout << "(d subbed: " << state_str(d) << ") ";);
+        // TRACE("seq_regex", tout << "d subbed: " << mk_pp(d, m) << std::endl;);
 
         literal_vector lits;
         lits.push_back(~lit);
@@ -633,10 +618,10 @@ namespace smt {
 
         TRACE("seq_regex", tout << "propagate empty: " << mk_pp(e, m) << std::endl;);
         STRACE("seq_regex_brief",
-            tout << std::endl << "PE(" << e->get_id()
-                              << "," << r->get_id()
-                              << "," << u->get_id()
-                              << "," << n->get_id()
+            tout << std::endl << "PE(" << expr_id_str(e)
+                              << "," << state_str(r)
+                              << "," << expr_id_str(u)
+                              << "," << expr_id_str(n)
                               << ") ";);
 
         if (m.is_true(is_nullable)) {
@@ -952,32 +937,35 @@ namespace smt {
                 !m_unexplored.contains(m_state_ufind.find(s)));
     }
 
-    void state_graph::pretty_print(std::ofstream& of) {
-        of << "---------- State Graph ----------" << std::endl;
-        of << "Seen:";
+    /*
+        Pretty printing
+    */
+    void state_graph::pretty_print(std::ostream& o) {
+        o << "---------- State Graph ----------" << std::endl
+          << "Seen:";
         for (auto s: m_seen) {
-            of << " " << s;
+            o << " " << s;
             state s_root = m_state_ufind.find(s);
             if (s_root != s)
-                of << "(=" << s_root << ")";
+                o << "(=" << s_root << ")";
         }
-        of << std::endl;
-
-        of << "Live:" << m_live << std::endl;
-        of << "Dead:" << m_dead << std::endl;
-        of << "Unknown:" << m_unknown << std::endl;
-        of << "Unexplored:" << m_unexplored << std::endl;
-
-        of << "Edges:" << std::endl;
+        o << std::endl
+          << "Live:" << m_live << std::endl
+          << "Dead:" << m_dead << std::endl
+          << "Unknown:" << m_unknown << std::endl
+          << "Unexplored:" << m_unexplored << std::endl
+          << "Edges:" << std::endl;
         for (auto s1: m_seen) {
             if (m_state_ufind.is_root(s1)) {
-                of << "  " << s1 << " -> " << m_targets.find(s1) << std::endl;
+                o << "  " << s1 << " -> " << m_targets.find(s1) << std::endl;
             }
         }
-
-        of << "---------------------------------" << std::endl;
-
+        o << "---------------------------------" << std::endl;
     }
+    // std::ostream& operator<<(std::ostream& o, const state_graph& sg) {
+    //     sg.pretty_print(o);
+    //     return o;
+    // }
 
     // **********************************
 
@@ -987,6 +975,8 @@ namespace smt {
             m_state_to_expr.push_back(e);
             unsigned new_id = m_state_to_expr.size();
             m_expr_to_state.insert(e, new_id);
+            STRACE("seq_regex_brief", tout << "new(" << expr_id_str(e)
+                                           << ")=" << state_str(e) << " ";);
         }
         return m_expr_to_state.find(e);
     }
@@ -1014,10 +1004,10 @@ namespace smt {
         }
         STRACE("seq_regex", tout << "Updating state graph for regex "
                                  << mk_pp(r, m) << ") ";);
-        STRACE("seq_regex_brief", tout
-            << std::endl << "USG(" << r->get_id() << ") ";);
         // Add state
         m_state_graph.add_state(r_id);
+        STRACE("seq_regex_brief", tout << std::endl << "USG("
+                                       << state_str(r) << ") ";);
         expr_ref r_nullable = is_nullable_wrapper(r);
         if (m.is_true(r_nullable)) {
             m_state_graph.mark_live(r_id);
@@ -1043,4 +1033,14 @@ namespace smt {
         return true;
     }
 
+    std::string seq_regex::state_str(expr* e) {
+        if (m_expr_to_state.contains(e))
+            return std::to_string(get_state_id(e));
+        else
+            return expr_id_str(e);
+    }
+    std::string seq_regex::expr_id_str(expr* e) {
+        return std::string("id") + std::to_string(e->get_id());
+    }
+
 }
diff --git a/src/smt/seq_regex.h b/src/smt/seq_regex.h
index 63e6673787e..494a7842a09 100644
--- a/src/smt/seq_regex.h
+++ b/src/smt/seq_regex.h
@@ -153,7 +153,7 @@ namespace smt {
         /*
             Pretty printing
         */
-        void pretty_print(std::ofstream& of);
+        void pretty_print(std::ostream& o);
 
     };
 
@@ -204,6 +204,10 @@ namespace smt {
         // Update the graph
         bool update_state_graph(expr* r);
 
+        // Printing for seq_regex_brief
+        std::string state_str(expr* e);
+        std::string expr_id_str(expr* e);
+
         // ********************
 
         seq_util& u();

From 1fc751f5bfa59edef4669af248ae3e27d3da06b7 Mon Sep 17 00:00:00 2001
From: Caleb Stanford <caleb.pirsquared@gmail.com>
Date: Wed, 1 Jul 2020 18:47:25 -0400
Subject: [PATCH 39/51] fix bug on evil2 example

---
 src/ast/rewriter/seq_rewriter.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp
index 272c25f0417..bd399f8bded 100644
--- a/src/ast/rewriter/seq_rewriter.cpp
+++ b/src/ast/rewriter/seq_rewriter.cpp
@@ -2692,6 +2692,7 @@ expr_ref seq_rewriter::mk_der_cond(expr* cond, expr* ele, sort* seq_sort) {
         }
         else {
             result = m().mk_true();
+            result = re_predicate(result, seq_sort);
         }
     }
     else if (m().is_not(cond, c1)) {

From 0fa8396f9e90d1638fc763761eef7e519f28d94f Mon Sep 17 00:00:00 2001
From: Caleb Stanford <caleb.pirsquared@gmail.com>
Date: Thu, 2 Jul 2020 14:46:20 -0400
Subject: [PATCH 40/51] save work

---
 src/smt/seq_regex.cpp | 96 +++++++++++++++++++++++++++++++++++++++----
 src/smt/seq_regex.h   |  3 +-
 2 files changed, 89 insertions(+), 10 deletions(-)

diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp
index 26b52e2c850..7d5c385e670 100644
--- a/src/smt/seq_regex.cpp
+++ b/src/smt/seq_regex.cpp
@@ -204,24 +204,90 @@ namespace smt {
                                        << "P(" << mk_pp(s, m) << "@" << idx
                                        << "," << state_str(r) << ") ";);
 
+        expr* cond = nullptr, *tt = nullptr, *el = nullptr;
         if (re().is_empty(r)) {
+            STRACE("seq_regex_brief", tout << "f ";);
             th.add_axiom(~lit);
             return true;
         }
-        if (!m.is_ite(r)) {
-            update_state_graph(r);
-            if (m_state_graph.is_dead(get_state_id(r))) {
-                th.add_axiom(~lit);
-                return true;
-            }
+        else if (m.is_ite(r, cond, tt, el)) {
+            STRACE("seq_regex_brief", tout << "??? ";);
+            return false;
+
+            // literal lcond = th.mk_literal(cond);
+            // ctx.mark_as_relevant(lcond);
+            // trigger = lcond;
+            // expr_ref ncond(m), acc1(m), acc2(m),
+            //          choice1(m), choice2(m), choice(m);
+            // ncond = m.mk_not(cond);
+            // acc1 = sk().mk_accept(s, a().mk_int(idx), tt);
+            // acc2 = sk().mk_accept(s, a().mk_int(idx), el);
+            // choice1 = m.mk_and(cond, acc1);
+            // choice2 = m.mk_and(ncond, acc2);
+            // choice = m.mk_or(choice1, choice2);
+            // th.propagate_lit(nullptr, 1, &lit, th.mk_literal(choice));
+            // // th.propagate_lit(th.mk_literal(choice));
+            // // literal_vector choice_lit;
+            // // choice_lit.push_back(th.mk_literal(choice));
+            // // th.add_axiom(choice_lit);
+            // return true;
         }
 
-        if (block_unfolding(lit, idx))
+        update_state_graph(r);
+
+        if (m_state_graph.is_dead(get_state_id(r))) {
+            STRACE("seq_regex_brief", tout << "f ";);
+            th.add_axiom(~lit);
             return true;
+        }
+
+        if (block_unfolding(lit, idx)) {
+            STRACE("seq_regex_brief", tout << "(blocked) ";);
+            return true;
+        }
+
+        // Unfold
+        STRACE("seq_regex_brief", tout << "u ";);
+        expr_ref is_nullable = is_nullable_wrapper(r);
+        expr_ref hd = th.mk_nth(s, i);
+        expr_ref deriv(m);
+        deriv = derivative_wrapper(hd, r);
 
-        propagate_nullable(lit, s, idx, r);
+        literal_vector unfold_disj;
+        unfold_disj.push_back(~lit);
+        unfold_disj.push_back(th.mk_literal(is_nullable));
+        expr_ref_pair_vector cofactors(m);
+        get_cofactors(deriv, cofactors);
+        for (auto const& p : cofactors) {
+            if (m.is_false(p.first) || re().is_empty(p.second)) continue;
+            expr_ref cond(p.first, m);
+            expr_ref deriv_leaf(p.second, m);
+            expr_ref acc = sk().mk_accept(s, a().mk_int(idx + 1), deriv_leaf);
+            expr_ref choice(m);
+            choice = m.mk_and(cond, acc);
+            unfold_disj.push_back(th.mk_literal(choice));
+            STRACE("seq_regex_debug", tout << "adding choice: "
+                                           << mk_pp(choice, m) << std::endl;);
+        }
+        th.add_axiom(unfold_disj);
+        return true;
+        
+        // expr_ref is_nullable(m), head(m), deriv(m), acc_next(m), unfold(m);
+        // head = th.mk_nth(s, i);
+        // deriv = derivative_wrapper(head, r);
+        // th.add_axiom(~lit, ~th.mk_literal(is_nullable));
+        // 
+        // acc_next = sk().mk_accept(s, a().mk_int(idx + 1), deriv);
+        // unfold = m.mk_or(is_nullable, acc_next);
+        // 
+        // literal_vector unfold_lit;
+        // unfold_lit.push_back(th.mk_literal(unfold));
+        // th.add_axiom(unfold_lit);
+        // return true;
 
-        return propagate_derivative(lit, e, s, i, idx, r, trigger);
+        // propagate_nullable(lit, s, idx, r);
+        // 
+        // return propagate_derivative(lit, e, s, i, idx, r, trigger);
     }
 
     /**
@@ -251,15 +317,18 @@ namespace smt {
 
         literal len_s_ge_i = th.m_ax.mk_ge(th.mk_len(s), idx);
         if (m.is_true(is_nullable)) {
+            STRACE("seq_regex_brief", tout << "t ";);
             th.propagate_lit(nullptr, 1,&lit, len_s_ge_i);
         }
         else if (m.is_false(is_nullable)) {
+            STRACE("seq_regex_brief", tout << "f ";);
             th.propagate_lit(nullptr, 1, &lit, th.m_ax.mk_ge(th.mk_len(s), idx + 1));
             // @EXP (experimental change)
             //unsigned len = std::max(1u, re().min_length(r));
             //th.propagate_lit(nullptr, 1, &lit, th.m_ax.mk_ge(th.mk_len(s), idx + re().min_length(r)));
         }
         else {
+            STRACE("seq_regex_brief", tout << "? ";);
             literal is_nullable_lit = th.mk_literal(is_nullable);
             ctx.mark_as_relevant(is_nullable_lit);
             literal len_s_le_i = th.m_ax.mk_le(th.mk_len(s), idx);
@@ -288,6 +357,11 @@ namespace smt {
         expr_ref head = th.mk_nth(s, i);
 
         d = derivative_wrapper(m.mk_var(0, m.get_sort(head)), r);
+
+        // TODO
+        // conds.push_back(th.mk_literal(sk().mk_accept(s, a().mk_int(idx + 1), d)));
+        // th.add_axiom(conds);
+
         // timer tm;
         // std::cout << state_str(d) << " " << tm.get_seconds() << std::endl;
         //if (tm.get_seconds() > 0.3) 
@@ -306,14 +380,17 @@ namespace smt {
             literal lcond = th.mk_literal(subst(cond, sub));
             switch (ctx.get_assignment(lcond)) {
             case l_true:
+                STRACE("seq_regex_brief", tout << "t ";);
                 conds.push_back(~lcond);
                 d = tt;
                 break;
             case l_false:
+                STRACE("seq_regex_brief", tout << "f ";);
                 conds.push_back(lcond);
                 d = el;
                 break;
             case l_undef:
+                STRACE("seq_regex_brief", tout << "? ";);
 #if 1
                 ctx.mark_as_relevant(lcond);
                 trigger = lcond;
@@ -449,6 +526,7 @@ namespace smt {
     expr_ref seq_regex::derivative_wrapper(expr* hd, expr* r) {
         STRACE("seq_regex", tout << "derivative(" << mk_pp(hd, m) << "): " << mk_pp(r, m) << std::endl;);
 
+        // expr_ref result = seq_rw().mk_derivative(hd, r);
         expr_ref result = expr_ref(re().mk_derivative(hd, r), m);
         rewrite(result);
 
diff --git a/src/smt/seq_regex.h b/src/smt/seq_regex.h
index 494a7842a09..5b27fd50b7b 100644
--- a/src/smt/seq_regex.h
+++ b/src/smt/seq_regex.h
@@ -199,7 +199,8 @@ namespace smt {
         // Convert between expressions and states (IDs)
         unsigned get_state_id(expr* e);
         expr* get_expr_from_id(unsigned id);
-        // Cycle-detection heuristic (sound but not complete)
+        // Cycle-detection heuristic
+        // Note: Doesn't need to be sound or complete (doesn't affect soundness)
         bool can_be_in_cycle(expr* e1, expr* e2);
         // Update the graph
         bool update_state_graph(expr* r);

From 5623024d3e754842c923a9fab1464d2a11855220 Mon Sep 17 00:00:00 2001
From: Caleb Stanford <caleb.pirsquared@gmail.com>
Date: Thu, 2 Jul 2020 15:57:39 -0400
Subject: [PATCH 41/51] new propagate code

---
 src/smt/seq_regex.cpp | 39 +++++++++++++++++++++++++++------------
 1 file changed, 27 insertions(+), 12 deletions(-)

diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp
index 7d5c385e670..1850b3f415e 100644
--- a/src/smt/seq_regex.cpp
+++ b/src/smt/seq_regex.cpp
@@ -206,14 +206,17 @@ namespace smt {
 
         expr* cond = nullptr, *tt = nullptr, *el = nullptr;
         if (re().is_empty(r)) {
-            STRACE("seq_regex_brief", tout << "f ";);
+            STRACE("seq_regex_brief", tout << "(empty) ";);
             th.add_axiom(~lit);
             return true;
         }
         else if (m.is_ite(r, cond, tt, el)) {
-            STRACE("seq_regex_brief", tout << "??? ";);
+            STRACE("seq_regex_brief", tout << "(ite) ";);
             return false;
 
+            // @EXP (Experimental change)
+            // This code tries to unfold the derivative one step at a time
+            // and propagate the if the elses.
             // literal lcond = th.mk_literal(cond);
             // ctx.mark_as_relevant(lcond);
             // trigger = lcond;
@@ -236,7 +239,7 @@ namespace smt {
         update_state_graph(r);
 
         if (m_state_graph.is_dead(get_state_id(r))) {
-            STRACE("seq_regex_brief", tout << "f ";);
+            STRACE("seq_regex_brief", tout << "(dead) ";);
             th.add_axiom(~lit);
             return true;
         }
@@ -247,15 +250,25 @@ namespace smt {
         }
 
         // Unfold
-        STRACE("seq_regex_brief", tout << "u ";);
-        expr_ref is_nullable = is_nullable_wrapper(r);
+        STRACE("seq_regex_brief", tout << "(unfold) ";);
+
+        // First axiom: accept(s, idx, r) => len(s) >= idx
+        literal len_s_ge_i = th.m_ax.mk_ge(th.mk_len(s), idx);
+        th.add_axiom(~lit, len_s_ge_i);
+
+        // Second axiom: accept(s, idx, r) and len(s) <= idx => r nullable
+        literal len_s_le_i = th.m_ax.mk_le(th.mk_len(s), idx);
+        literal is_nullable = th.mk_literal(is_nullable_wrapper(r));
+        th.add_axiom(~lit, ~len_s_le_i, is_nullable);
+
+        // Third axiom: accept(s, idx, r) and not (len_s_le_i) =>
+        //              accept(s, idx+1, dr) for some derivative r
+        literal_vector accept_next;
         expr_ref hd = th.mk_nth(s, i);
         expr_ref deriv(m);
         deriv = derivative_wrapper(hd, r);
-
-        literal_vector unfold_disj;
-        unfold_disj.push_back(~lit);
-        unfold_disj.push_back(th.mk_literal(is_nullable));
+        accept_next.push_back(~lit);
+        accept_next.push_back(len_s_le_i);
         expr_ref_pair_vector cofactors(m);
         get_cofactors(deriv, cofactors);
         for (auto const& p : cofactors) {
@@ -265,13 +278,15 @@ namespace smt {
             expr_ref acc = sk().mk_accept(s, a().mk_int(idx + 1), deriv_leaf);
             expr_ref choice(m);
             choice = m.mk_and(cond, acc);
-            unfold_disj.push_back(th.mk_literal(choice));
+            accept_next.push_back(th.mk_literal(choice));
             STRACE("seq_regex_debug", tout << "adding choice: "
                                            << mk_pp(choice, m) << std::endl;);
         }
-        th.add_axiom(unfold_disj);
+        th.add_axiom(accept_next);
+
+        // Done (successful propagation)
         return true;
-        
+
         // expr_ref is_nullable(m), head(m), deriv(m), acc_next(m), unfold(m);
         // head = th.mk_nth(s, i);
         // deriv = derivative_wrapper(head, r);

From 17e0ef19bec3d7489e2ecb5b3301a643e0ed5d65 Mon Sep 17 00:00:00 2001
From: Caleb Stanford <caleb.pirsquared@gmail.com>
Date: Thu, 2 Jul 2020 21:55:35 -0400
Subject: [PATCH 42/51] work in progress on using same seq sort for deriv calls

---
 src/ast/rewriter/seq_rewriter.cpp |  2 +-
 src/smt/seq_regex.cpp             | 14 ++++++++++++++
 src/smt/seq_regex.h               |  7 +++++++
 3 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp
index bd399f8bded..1a94a947a45 100644
--- a/src/ast/rewriter/seq_rewriter.cpp
+++ b/src/ast/rewriter/seq_rewriter.cpp
@@ -4392,7 +4392,7 @@ void seq_rewriter::op_cache::cleanup() {
         m_trail.reset();
         m_table.reset();
         STRACE("seq_regex", tout << "Op cache reset!" << std::endl;);
-        STRACE("seq_regex_brief", tout << " (OP CACHE RESET)";);
+        STRACE("seq_regex_brief", tout << "(OP CACHE RESET) ";);
         // trace_and_reset_cache_counts();
     }
 }
diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp
index 1850b3f415e..f389b5e97f2 100644
--- a/src/smt/seq_regex.cpp
+++ b/src/smt/seq_regex.cpp
@@ -25,6 +25,7 @@ namespace smt {
         th(th),
         ctx(th.get_context()),
         m(th.get_manager()),
+        m_deriv_head(),
         m_state_graph(),
         m_expr_to_state(),
         m_state_to_expr(m)
@@ -748,6 +749,19 @@ namespace smt {
         }        
     }
 
+    expr_ref get_head_var(sort* seq_sort) {
+        expr_ref result(m);
+        if (m_deriv_head.contains(seq_sort)) {
+            result = m_deriv_head.find(seq_sort);
+            STRACE("seq_regex_brief", tout << " ghv=" << mk_pp(result, m););
+        }
+        else {
+            result = m.mk_fresh_const("re.char", seq_sort);
+            STRACE("seq_regex_brief", tout << " NEWghv=" << mk_pp(result, m););
+        }
+        return result;
+    }
+
     expr_ref seq_regex::mk_first(expr* r, expr* n) {
         sort* elem_sort = nullptr, *seq_sort = nullptr;
         VERIFY(u().is_re(r, seq_sort));
diff --git a/src/smt/seq_regex.h b/src/smt/seq_regex.h
index 5b27fd50b7b..bf86e4a6e52 100644
--- a/src/smt/seq_regex.h
+++ b/src/smt/seq_regex.h
@@ -189,6 +189,13 @@ namespace smt {
         vector<s_in_re>                  m_s_in_re;
         scoped_vector<propagation_lit>   m_to_propagate;
 
+        /*
+            ID for fresh variable for derivative
+        */
+        ptr_addr_map<sort, expr_ref>   m_deriv_head;
+        // Get var for head based on sort
+        expr_ref get_head_var(sort* seq_sort);
+
         /*
             state_graph for dead state detection, and associated methods
         */

From 6cb1ef95eafc1d2ea428a85e856df2ee8a40bc28 Mon Sep 17 00:00:00 2001
From: Caleb Stanford <caleb.pirsquared@gmail.com>
Date: Fri, 3 Jul 2020 08:17:38 -0400
Subject: [PATCH 43/51] avoid re-computing derivatives: use same head var for
 every derivative call

---
 src/smt/seq_regex.cpp | 49 +++++++++++++++++++++++++++----------------
 src/smt/seq_regex.h   |  7 -------
 2 files changed, 31 insertions(+), 25 deletions(-)

diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp
index f389b5e97f2..8cd56672dae 100644
--- a/src/smt/seq_regex.cpp
+++ b/src/smt/seq_regex.cpp
@@ -25,7 +25,6 @@ namespace smt {
         th(th),
         ctx(th.get_context()),
         m(th.get_manager()),
-        m_deriv_head(),
         m_state_graph(),
         m_expr_to_state(),
         m_state_to_expr(m)
@@ -212,12 +211,13 @@ namespace smt {
             return true;
         }
         else if (m.is_ite(r, cond, tt, el)) {
+            UNREACHABLE();
             STRACE("seq_regex_brief", tout << "(ite) ";);
             return false;
 
             // @EXP (Experimental change)
             // This code tries to unfold the derivative one step at a time
-            // and propagate the if the elses.
+            // and propagate the if-then-elses.
             // literal lcond = th.mk_literal(cond);
             // ctx.mark_as_relevant(lcond);
             // trigger = lcond;
@@ -250,7 +250,7 @@ namespace smt {
             return true;
         }
 
-        // Unfold
+        // Unfold the constraint into 3 axioms
         STRACE("seq_regex_brief", tout << "(unfold) ";);
 
         // First axiom: accept(s, idx, r) => len(s) >= idx
@@ -285,7 +285,7 @@ namespace smt {
         }
         th.add_axiom(accept_next);
 
-        // Done (successful propagation)
+        // Propagated successfully
         return true;
 
         // expr_ref is_nullable(m), head(m), deriv(m), acc_next(m), unfold(m);
@@ -542,10 +542,20 @@ namespace smt {
     expr_ref seq_regex::derivative_wrapper(expr* hd, expr* r) {
         STRACE("seq_regex", tout << "derivative(" << mk_pp(hd, m) << "): " << mk_pp(r, m) << std::endl;);
 
-        // expr_ref result = seq_rw().mk_derivative(hd, r);
-        expr_ref result = expr_ref(re().mk_derivative(hd, r), m);
+        // Use canonical variable for head; substitute with hd later
+        // sort* seq_sort = nullptr;
+        // VERIFY(u().is_re(r, seq_sort));
+        // expr_ref hd_canon = get_head_var(sq_sort);
+        expr_ref hd_canon(m.mk_var(0, m.get_sort(hd)), m);
+        expr_ref result(re().mk_derivative(hd_canon, r), m);
         rewrite(result);
 
+        // Substitute
+        var_subst subst(m);
+        expr_ref_vector sub(m);
+        sub.push_back(hd);
+        result = subst(result, sub);
+
         STRACE("seq_regex", tout << "derivative result: " << mk_pp(result, m) << std::endl;);
         STRACE("seq_regex_brief", tout << "d(" << state_str(r) << ")="
                                        << state_str(result) << " ";);
@@ -749,18 +759,21 @@ namespace smt {
         }        
     }
 
-    expr_ref get_head_var(sort* seq_sort) {
-        expr_ref result(m);
-        if (m_deriv_head.contains(seq_sort)) {
-            result = m_deriv_head.find(seq_sort);
-            STRACE("seq_regex_brief", tout << " ghv=" << mk_pp(result, m););
-        }
-        else {
-            result = m.mk_fresh_const("re.char", seq_sort);
-            STRACE("seq_regex_brief", tout << " NEWghv=" << mk_pp(result, m););
-        }
-        return result;
-    }
+    // @EXP: Experimental change
+    // Some code to compute a canonical head variable, but I think
+    // this stuff is unnecessary.
+    // expr_ref seq_regex::get_head_var(sort* seq_sort) {
+    //     expr_ref result(m);
+    //     if (m_deriv_head.contains(seq_sort)) {
+    //         result = m_deriv_head.find(seq_sort);
+    //         STRACE("seq_regex_brief", tout << " ghv=" << mk_pp(result, m););
+    //     }
+    //     else {
+    //         result = m.mk_fresh_const("re.char", seq_sort);
+    //         STRACE("seq_regex_brief", tout << " NEWghv=" << mk_pp(result, m););
+    //     }
+    //     return result;
+    // }
 
     expr_ref seq_regex::mk_first(expr* r, expr* n) {
         sort* elem_sort = nullptr, *seq_sort = nullptr;
diff --git a/src/smt/seq_regex.h b/src/smt/seq_regex.h
index bf86e4a6e52..5b27fd50b7b 100644
--- a/src/smt/seq_regex.h
+++ b/src/smt/seq_regex.h
@@ -189,13 +189,6 @@ namespace smt {
         vector<s_in_re>                  m_s_in_re;
         scoped_vector<propagation_lit>   m_to_propagate;
 
-        /*
-            ID for fresh variable for derivative
-        */
-        ptr_addr_map<sort, expr_ref>   m_deriv_head;
-        // Get var for head based on sort
-        expr_ref get_head_var(sort* seq_sort);
-
         /*
             state_graph for dead state detection, and associated methods
         */

From becbdbaaa71fca8a0176d7dcb00df535ae048388 Mon Sep 17 00:00:00 2001
From: Caleb Stanford <caleb.pirsquared@gmail.com>
Date: Fri, 3 Jul 2020 11:49:06 -0400
Subject: [PATCH 44/51] use min_length on regexes to prune search

---
 src/smt/seq_regex.cpp | 28 +++++++++++++++++++++-------
 1 file changed, 21 insertions(+), 7 deletions(-)

diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp
index 8cd56672dae..4f9068aec77 100644
--- a/src/smt/seq_regex.cpp
+++ b/src/smt/seq_regex.cpp
@@ -253,9 +253,16 @@ namespace smt {
         // Unfold the constraint into 3 axioms
         STRACE("seq_regex_brief", tout << "(unfold) ";);
 
-        // First axiom: accept(s, idx, r) => len(s) >= idx
-        literal len_s_ge_i = th.m_ax.mk_ge(th.mk_len(s), idx);
-        th.add_axiom(~lit, len_s_ge_i);
+        // @EXP: First axiom: accept(s, idx, r) => len(s) >= idx + min_len(r);
+        expr_ref s_to_re(re().mk_to_re(s), m);
+        expr_ref s_plus_r(re().mk_concat(s_to_re, r), m);
+        unsigned min_len = re().min_length(s_plus_r);
+        literal len_s_ge_min = th.m_ax.mk_ge(th.mk_len(s), min_len);
+        th.add_axiom(~lit, len_s_ge_min);
+
+        // // First axiom: accept(s, idx, r) => len(s) >= idx
+        // literal len_s_ge_i = th.m_ax.mk_ge(th.mk_len(s), idx);
+        // th.add_axiom(~lit, len_s_ge_i);
 
         // Second axiom: accept(s, idx, r) and len(s) <= idx => r nullable
         literal len_s_le_i = th.m_ax.mk_le(th.mk_len(s), idx);
@@ -277,10 +284,17 @@ namespace smt {
             expr_ref cond(p.first, m);
             expr_ref deriv_leaf(p.second, m);
             expr_ref acc = sk().mk_accept(s, a().mk_int(idx + 1), deriv_leaf);
-            expr_ref choice(m);
-            choice = m.mk_and(cond, acc);
-            accept_next.push_back(th.mk_literal(choice));
-            STRACE("seq_regex_debug", tout << "adding choice: "
+            expr_ref choice(m.mk_and(cond, acc), m);
+            literal choice_lit = th.mk_literal(choice);
+            accept_next.push_back(choice_lit);
+            // Prioritize unvisited states
+            // if (!m_state_graph.is_done(get_state_id(deriv_leaf))) {
+            //     // @EXP Unsound test: only push if not done
+            //     accept_next.push_back(choice_lit);
+            //     // @EXP This didn't work -- just marking as relevant
+            //     // ctx.mark_as_relevant(choice_lit);
+            // }
+            STRACE("seq_regex_debug", tout << "added choice: "
                                            << mk_pp(choice, m) << std::endl;);
         }
         th.add_axiom(accept_next);

From a1da9ae6c0b932dce1d53dbf76590fdbe00664d7 Mon Sep 17 00:00:00 2001
From: Caleb Stanford <caleb.pirsquared@gmail.com>
Date: Fri, 3 Jul 2020 12:54:03 -0400
Subject: [PATCH 45/51] simple implementation of can_be_in_cycle using rank
 function idea

---
 src/smt/seq_regex.cpp | 46 ++++++++++++++++++++++++++++++++++++++++---
 src/smt/seq_regex.h   |  4 +++-
 2 files changed, 46 insertions(+), 4 deletions(-)

diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp
index 4f9068aec77..7b7e405af0b 100644
--- a/src/smt/seq_regex.cpp
+++ b/src/smt/seq_regex.cpp
@@ -1120,9 +1120,49 @@ namespace smt {
         return m_state_to_expr.get(id);
     }
 
-    bool seq_regex::can_be_in_cycle(expr *e1, expr *e2) {
-        // Simple placeholder. TODO: Implement full check
-        return true;
+
+    unsigned seq_regex::concat_length(expr* r) {
+        // length of the concatenations at the top level
+        expr *r1 = nullptr, *r2 = nullptr;
+        if (re().is_concat(r, r1, r2))
+            return concat_length(r1) + concat_length(r2);
+        else
+            return 1;
+    }
+
+    unsigned seq_regex::re_rank(expr* r) {
+        SASSERT(u.is_re(r));
+        expr *r1 = nullptr, *r2 = nullptr, *s = nullptr;
+        unsigned lo = 0, hi = 0;
+        if (re().is_empty(r))
+            return 0;
+        if (re().is_concat(r, r1, r2))
+            return std::max(re_rank(r1) + concat_length(r2), re_rank(r2));
+        if (re().is_union(r, r1, r2) || m.is_ite(r, s, r1, r2))
+            return std::max(re_rank(r1), re_rank(r2));
+        if (re().is_intersection(r, r1, r2) || re().is_diff(r, r1, r2))
+            return re_rank(r1) + re_rank(r2);
+        if (re().is_plus(r, r1) || re().is_star(r, r1))
+            return re_rank(r1) + 1;
+        if (re().is_loop(r, r1, lo) || re().is_loop(r, r1, lo, hi))
+            return re_rank(r1) + lo;
+        if (re().is_reverse(r, r1) || re().is_opt(r, r1))
+            // in reverse case, should be r1 is a string
+            return re_rank(r1);
+        if (re().is_to_re(r, s))
+            return u().str.min_length(s);
+        // Else: range, pred, char, full_seq, derivative
+        return 1;
+    }
+
+    bool seq_regex::can_be_in_cycle(expr *r1, expr *r2) {
+        // @EXP (experimental change): Use a "rank" function, which is
+        // a pseudo-topological order on the state graph, to detect when r2
+        // is a simpler regex than r1
+        unsigned k1 = re_rank(r1);
+        unsigned k2 = re_rank(r2);
+        SASSERT(k1 >= k2);
+        return (k1 == k2);
     }
 
     /*
diff --git a/src/smt/seq_regex.h b/src/smt/seq_regex.h
index 5b27fd50b7b..e6b7afe41f0 100644
--- a/src/smt/seq_regex.h
+++ b/src/smt/seq_regex.h
@@ -201,7 +201,9 @@ namespace smt {
         expr* get_expr_from_id(unsigned id);
         // Cycle-detection heuristic
         // Note: Doesn't need to be sound or complete (doesn't affect soundness)
-        bool can_be_in_cycle(expr* e1, expr* e2);
+        unsigned concat_length(expr* r);
+        unsigned re_rank(expr* r);
+        bool can_be_in_cycle(expr* r1, expr* r2);
         // Update the graph
         bool update_state_graph(expr* r);
 

From 20000126b5f5f7e185bd87b052bc59f911749240 Mon Sep 17 00:00:00 2001
From: Caleb Stanford <caleb.pirsquared@gmail.com>
Date: Fri, 3 Jul 2020 13:55:23 -0400
Subject: [PATCH 46/51] add a disabled experimental change

---
 src/smt/seq_regex.cpp | 26 +++++++++++++++++++-------
 1 file changed, 19 insertions(+), 7 deletions(-)

diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp
index 7b7e405af0b..93005fdfe1b 100644
--- a/src/smt/seq_regex.cpp
+++ b/src/smt/seq_regex.cpp
@@ -250,27 +250,30 @@ namespace smt {
             return true;
         }
 
-        // Unfold the constraint into 3 axioms
         STRACE("seq_regex_brief", tout << "(unfold) ";);
 
-        // @EXP: First axiom: accept(s, idx, r) => len(s) >= idx + min_len(r);
+        // First axiom: use min_length to prune search
+        // accept(s, idx, r) => len(s) >= idx + min_len(r)
         expr_ref s_to_re(re().mk_to_re(s), m);
         expr_ref s_plus_r(re().mk_concat(s_to_re, r), m);
         unsigned min_len = re().min_length(s_plus_r);
         literal len_s_ge_min = th.m_ax.mk_ge(th.mk_len(s), min_len);
         th.add_axiom(~lit, len_s_ge_min);
 
-        // // First axiom: accept(s, idx, r) => len(s) >= idx
+        // Old first axiom: accept(s, idx, r) => len(s) >= idx
         // literal len_s_ge_i = th.m_ax.mk_ge(th.mk_len(s), idx);
         // th.add_axiom(~lit, len_s_ge_i);
 
-        // Second axiom: accept(s, idx, r) and len(s) <= idx => r nullable
+        // Second axiom: nullable check
+        // accept(s, idx, r) and len(s) <= idx => r nullable
         literal len_s_le_i = th.m_ax.mk_le(th.mk_len(s), idx);
         literal is_nullable = th.mk_literal(is_nullable_wrapper(r));
         th.add_axiom(~lit, ~len_s_le_i, is_nullable);
 
-        // Third axiom: accept(s, idx, r) and not (len_s_le_i) =>
-        //              accept(s, idx+1, dr) for some derivative r
+        // Third axiom: derivative unfolding
+        // accept(s, idx, r) and not (len_s_le_i) =>
+        //     OR_(cond, dr) cond and accept(s, idx+1, dr)
+        // over all derivatives dr and conditions cond on the head
         literal_vector accept_next;
         expr_ref hd = th.mk_nth(s, i);
         expr_ref deriv(m);
@@ -283,6 +286,14 @@ namespace smt {
             if (m.is_false(p.first) || re().is_empty(p.second)) continue;
             expr_ref cond(p.first, m);
             expr_ref deriv_leaf(p.second, m);
+
+            // @EXP (Experimental change)
+            // Skip searching when can_be_in_cycle returns true
+            // Result: Besides being unsound as written, this is not
+            // fine-grained enough. In case of intersections, many
+            // edges return true for can_be_in_cycle
+            // if (can_be_in_cycle(deriv, deriv_leaf)) continue;
+
             expr_ref acc = sk().mk_accept(s, a().mk_int(idx + 1), deriv_leaf);
             expr_ref choice(m.mk_and(cond, acc), m);
             literal choice_lit = th.mk_literal(choice);
@@ -1131,7 +1142,7 @@ namespace smt {
     }
 
     unsigned seq_regex::re_rank(expr* r) {
-        SASSERT(u.is_re(r));
+        SASSERT(u().is_re(r));
         expr *r1 = nullptr, *r2 = nullptr, *s = nullptr;
         unsigned lo = 0, hi = 0;
         if (re().is_empty(r))
@@ -1162,6 +1173,7 @@ namespace smt {
         unsigned k1 = re_rank(r1);
         unsigned k2 = re_rank(r2);
         SASSERT(k1 >= k2);
+        STRACE("seq_regex_brief", tout << "(k:" << k1 << "->" << k2 << ")";);
         return (k1 == k2);
     }
 

From 3f55875cc00cb23c2db6d62f4f7003cb861c75db Mon Sep 17 00:00:00 2001
From: Caleb Stanford <caleb.pirsquared@gmail.com>
Date: Fri, 3 Jul 2020 14:46:29 -0400
Subject: [PATCH 47/51] minor cleanup comments, etc.

---
 src/ast/rewriter/seq_rewriter.cpp | 65 +++++--------------------------
 src/smt/seq_regex.cpp             |  6 +--
 src/smt/seq_regex.h               |  5 ++-
 3 files changed, 15 insertions(+), 61 deletions(-)

diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp
index 1a94a947a45..d4d7468d21c 100644
--- a/src/ast/rewriter/seq_rewriter.cpp
+++ b/src/ast/rewriter/seq_rewriter.cpp
@@ -2192,46 +2192,7 @@ expr_ref seq_rewriter::is_nullable(expr* r) {
     return result;
 }
 
-// @EXP (experimental change)
-// void seq_rewriter::mk_nullable_not(expr* a1, expr_ref& result) {
-//     expr *s1 = nullptr, *r1 = nullptr;
-//     if (str().is_in_re(a1, s1, r1)) {
-//         SASSERT(str().is_empty(s1));
-//         result = re().mk_complement(r1);
-//         result = re().mk_in_re(s1, result);
-//     }
-//     else {
-//         m_br.mk_not(a1, result);
-//     }
-// }
-// void seq_rewriter::mk_nullable_and(expr* a1, expr* a2, expr_ref& result) {
-//     expr *s1 = nullptr, *s2 = nullptr, *r1 = nullptr, *r2 = nullptr;
-//     if (str().is_in_re(a1, s1, r1) &&
-//         str().is_in_re(a2, s2, r2)) {
-//         SASSERT(str().is_empty(s1));
-//         SASSERT(str().is_empty(s2));
-//         result = re().mk_inter(r1, r2);
-//         result = re().mk_in_re(s1, result);
-//     }
-//     else {
-//         m_br.mk_and(a1, a2, result);
-//     }
-// }
-// void seq_rewriter::mk_nullable_or(expr* a1, expr* a2, expr_ref& result) {
-//     expr *s1 = nullptr, *s2 = nullptr, *r1 = nullptr, *r2 = nullptr;
-//     if (str().is_in_re(a1, s1, r1) &&
-//         str().is_in_re(a2, s2, r2)) {
-//         SASSERT(str().is_empty(s1));
-//         SASSERT(str().is_empty(s2));
-//         result = re().mk_union(r1, r2);
-//         result = re().mk_in_re(s1, result);
-//     }
-//     else {
-//         m_br.mk_or(a1, a2, result);
-//     }
-// }
 expr_ref seq_rewriter::is_nullable_rec(expr* r) {
-    // STRACE("seq_regex_brief", tout << ".";); // recursive call
     SASSERT(m_util.is_re(r) || m_util.is_seq(r));
     expr* r1 = nullptr, *r2 = nullptr, *cond = nullptr;
     sort* seq_sort = nullptr;
@@ -2239,22 +2200,15 @@ expr_ref seq_rewriter::is_nullable_rec(expr* r) {
     zstring s1;
     expr_ref result(m());
     if (re().is_concat(r, r1, r2) ||
-        re().is_intersection(r, r1, r2)) {
+        re().is_intersection(r, r1, r2)) { 
         m_br.mk_and(is_nullable(r1), is_nullable(r2), result);
-        // @EXP (experimental change)
-        // mk_nullable_and(is_nullable(r1), is_nullable(r2), result);
     }
     else if (re().is_union(r, r1, r2)) {
         m_br.mk_or(is_nullable(r1), is_nullable(r2), result);
-        // @EXP (experimental change)
-        // mk_nullable_or(is_nullable(r1), is_nullable(r2), result);
     }
     else if (re().is_diff(r, r1, r2)) {
         m_br.mk_not(is_nullable(r2), result);
         m_br.mk_and(result, is_nullable(r1), result);
-        // @EXP (experimental change)
-        // mk_nullable_not(is_nullable(r2), result);
-        // mk_nullable_and(result, is_nullable(r1), result);
     }
     else if (re().is_star(r) || 
         re().is_opt(r) ||
@@ -2277,8 +2231,6 @@ expr_ref seq_rewriter::is_nullable_rec(expr* r) {
     }
     else if (re().is_complement(r, r1)) {
         m_br.mk_not(is_nullable(r1), result);
-        // @EXP (experimental change)
-        // mk_nullable_not(is_nullable(r1), result);
     }
     else if (re().is_to_re(r, r1)) {        
         result = is_nullable(r1);
@@ -2417,7 +2369,6 @@ br_status seq_rewriter::mk_re_derivative(expr* ele, expr* r, expr_ref& result) {
 expr_ref seq_rewriter::mk_derivative(expr* ele, expr* r) {
     STRACE("seq_verbose", tout << "derivative: " << mk_pp(ele, m())
                                << "," << mk_pp(r, m()) << std::endl;);
-    // STRACE("seq_regex_brief", tout << "d";);
     expr_ref result(m_op_cache.find(OP_RE_DERIVATIVE, ele, r), m());
     if (!result) {
         result = mk_derivative_rec(ele, r);
@@ -2517,7 +2468,6 @@ expr_ref seq_rewriter::mk_der_op_rec(decl_kind k, expr* a, expr* b) {
     STRACE("seq_verbose", tout << "mk_der_op_rec: " << k
                                << "," << mk_pp(a, m())
                                << "," << mk_pp(b, m()) << std::endl;);
-    // STRACE("seq_regex_brief", tout << ".";); // recursive call
     expr* ca = nullptr, *a1 = nullptr, *a2 = nullptr;
     expr* cb = nullptr, *b1 = nullptr, *b2 = nullptr;
     expr_ref result(m());
@@ -2638,7 +2588,6 @@ expr_ref seq_rewriter::mk_der_op(decl_kind k, expr* a, expr* b) {
 expr_ref seq_rewriter::mk_der_compl(expr* r) {
     STRACE("seq_verbose", tout << "mk_der_compl: " << mk_pp(r, m())
                                << std::endl;);
-    // STRACE("seq_regex_brief", tout << ".";); // recursive call
     expr_ref result(m_op_cache.find(OP_RE_COMPLEMENT, r, nullptr), m());
     if (!result) {
         expr* c = nullptr, * r1 = nullptr, * r2 = nullptr;
@@ -2721,7 +2670,6 @@ expr_ref seq_rewriter::mk_der_cond(expr* cond, expr* ele, sort* seq_sort) {
 }
 
 expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) {
-    // STRACE("seq_regex_brief", tout << ".";); // recursive call
     expr_ref result(m());
     sort* seq_sort = nullptr, *ele_sort = nullptr;
     VERIFY(m_util.is_re(r, seq_sort));
@@ -4393,7 +4341,6 @@ void seq_rewriter::op_cache::cleanup() {
         m_table.reset();
         STRACE("seq_regex", tout << "Op cache reset!" << std::endl;);
         STRACE("seq_regex_brief", tout << "(OP CACHE RESET) ";);
-        // trace_and_reset_cache_counts();
     }
 }
 
@@ -4401,10 +4348,18 @@ void seq_rewriter::op_cache::cleanup() {
 unsigned seq_rewriter::op_cache::cache_hits = 0;
 unsigned seq_rewriter::op_cache::cache_misses = 0;
 
+/*
+    Reset the tracing counts of # of cache hits and misses, and
+    report them.
+
+    Suppress reporting in the cases of 0/0 or 1/1 hits.
+
+    Hits and misses are tracked globally using static variables
+    m_op_cache.cache_hits and m_op_cache.cache_misses.
+*/
 void seq_rewriter::trace_and_reset_cache_counts() {
     unsigned hits = m_op_cache.cache_hits;
     unsigned misses = m_op_cache.cache_misses;
-    // Suppress tracing of "0/0 hits" or "1/1 hits"
     if (hits >= 2 || misses >= 1) {
         STRACE("seq_regex",
             tout << "Op cache hits: " << hits
diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp
index 93005fdfe1b..5544879253d 100644
--- a/src/smt/seq_regex.cpp
+++ b/src/smt/seq_regex.cpp
@@ -25,8 +25,6 @@ namespace smt {
         th(th),
         ctx(th.get_context()),
         m(th.get_manager()),
-        m_state_graph(),
-        m_expr_to_state(),
         m_state_to_expr(m)
     {}
 
@@ -404,9 +402,9 @@ namespace smt {
         // th.add_axiom(conds);
 
         // timer tm;
-        // std::cout << state_str(d) << " " << tm.get_seconds() << std::endl;
+        // std::cout << d->get_id() << " " << tm.get_seconds() << "\n";
         //if (tm.get_seconds() > 0.3) 
-        //    std::cout << d << std::endl;
+        //    std::cout << d << "\n";
         // std::cout.flush();
         literal_vector conds;
         conds.push_back(~lit);
diff --git a/src/smt/seq_regex.h b/src/smt/seq_regex.h
index e6b7afe41f0..2b861fd1aad 100644
--- a/src/smt/seq_regex.h
+++ b/src/smt/seq_regex.h
@@ -87,7 +87,7 @@ namespace smt {
             Edges are saved in both from and to maps.
             A subset of edges are also marked as possibly being
             part of a cycle by being stored in m_sources_maybecycle.
-            
+
             Invariants:
             - TODO
         */
@@ -136,7 +136,8 @@ namespace smt {
             - outgoing edges are not added from a done state
             - a done state is not marked as live
             - edges are not added creating a cycle containing an edge with
-              maybecycle = false
+              maybecycle = false (this is not necessary for soundness, but
+              prevents completeness for successfully detecting dead states)
         */
         void add_state(state s);
         void add_edge(state s1, state s2, bool maybecycle);

From 0c33f03b28b28d4667fbf70f2876579588e048ea Mon Sep 17 00:00:00 2001
From: Caleb Stanford <caleb.pirsquared@gmail.com>
Date: Mon, 27 Jul 2020 18:57:27 -0400
Subject: [PATCH 48/51] seq_rewriter cleanup for PR

---
 src/ast/rewriter/seq_rewriter.cpp | 79 +++++++------------------------
 src/ast/rewriter/seq_rewriter.h   |  5 --
 2 files changed, 18 insertions(+), 66 deletions(-)

diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp
index 9d1ea3cb1d0..74286b3b383 100644
--- a/src/ast/rewriter/seq_rewriter.cpp
+++ b/src/ast/rewriter/seq_rewriter.cpp
@@ -2205,7 +2205,7 @@ expr_ref seq_rewriter::is_nullable(expr* r) {
     expr_ref result(m_op_cache.find(_OP_RE_IS_NULLABLE, r, nullptr), m());
     if (!result) {
         result = is_nullable_rec(r);
-        m_op_cache.insert(_OP_RE_IS_NULLABLE, r, nullptr, result);
+        m_op_cache.insert(_OP_RE_IS_NULLABLE, r, nullptr, result);        
     }
     STRACE("seq_verbose", tout << "is_nullable result: "
                                << mk_pp(result, m()) << std::endl;);
@@ -2417,20 +2417,14 @@ expr_ref seq_rewriter::mk_der_concat(expr* r1, expr* r2) {
 */
 bool seq_rewriter::lt_char(expr* ch1, expr* ch2) {
     unsigned u1, u2;
-    return (u().is_const_char(ch1, u1) &&
-            u().is_const_char(ch2, u2) &&
-            (u1 < u2));
+    return u().is_const_char(ch1, u1) &&
+           u().is_const_char(ch2, u2) && (u1 < u2);
 }
 bool seq_rewriter::eq_char(expr* ch1, expr* ch2) {
-    unsigned u1, u2;
-    return ((ch1 == ch2) || (
-        u().is_const_char(ch1, u1) &&
-        u().is_const_char(ch2, u2) &&
-        (u1 == u2)
-    ));
+    return ch1 == ch2;
 }
 bool seq_rewriter::le_char(expr* ch1, expr* ch2) {
-    return (eq_char(ch1, ch2) || lt_char(ch1, ch2));
+    return eq_char(ch1, ch2) || lt_char(ch1, ch2);
 }
 
 /*
@@ -2441,7 +2435,7 @@ bool seq_rewriter::le_char(expr* ch1, expr* ch2) {
     Return true if we deduce that a implies b, false if unknown.
 
     Current cases handled:
-        - a and b are char <= constraints, or negations of char <= constraints
+    - a and b are char <= constraints, or negations of char <= constraints
 */
 bool seq_rewriter::pred_implies(expr* a, expr* b) {
     STRACE("seq_verbose", tout << "pred_implies: "
@@ -2455,22 +2449,20 @@ bool seq_rewriter::pred_implies(expr* a, expr* b) {
     }
     else if (u().is_char_le(a, cha1, cha2) &&
              u().is_char_le(b, chb1, chb2)) {
-        return (le_char(chb1, cha1) && le_char(cha2, chb2));
+        return le_char(chb1, cha1) && le_char(cha2, chb2);
     }
     else if (u().is_char_le(a, cha1, cha2) &&
              m().is_not(b, notb) &&
              u().is_char_le(notb, chb1, chb2)) {
-        return ((le_char(chb2, cha1) && lt_char(cha2, chb1)) ||
-                (lt_char(chb2, cha1) && le_char(cha2, chb1)));
+        return (le_char(chb2, cha1) && lt_char(cha2, chb1)) ||
+               (lt_char(chb2, cha1) && le_char(cha2, chb1));
     }
     else if (u().is_char_le(b, chb1, chb2) &&
              m().is_not(a, nota) &&
              u().is_char_le(nota, cha1, cha2)) {
-        return (le_char(chb1, cha2) && le_char(cha1, chb2));
-    }
-    else {
-        return false;
+        return le_char(chb1, cha2) && le_char(cha1, chb2);
     }
+    return false;
 }
 
 /*
@@ -2491,16 +2483,17 @@ expr_ref seq_rewriter::mk_der_op_rec(decl_kind k, expr* a, expr* b) {
     expr* ca = nullptr, *a1 = nullptr, *a2 = nullptr;
     expr* cb = nullptr, *b1 = nullptr, *b2 = nullptr;
     expr_ref result(m());
+    // Simplify if-then-elses whenever possible
     auto mk_ite = [&](expr* c, expr* a, expr* b) {
         return (a == b) ? a : m().mk_ite(c, a, b);
     };
-    // @EXP (experimental change)
     // Use character code to order conditions
     auto get_id = [&](expr* e) {
         expr *ch1 = nullptr, *ch2 = nullptr;
         unsigned ch;
         if (u().is_char_le(e, ch1, ch2) && u().is_const_char(ch2, ch))
             return ch;
+        // Fallback: use expression ID (but use same ID for complement)
         re().is_complement(e, e);
         return e->get_id();
     };
@@ -2524,7 +2517,6 @@ expr_ref seq_rewriter::mk_der_op_rec(decl_kind k, expr* a, expr* b) {
                 std::swap(a1, b1);
                 std::swap(a2, b2);
             }
-            // @EXP (experimental change)
             // Simplify if there is a relationship between ca and cb
             if (pred_implies(ca, cb)) {
                 r1 = mk_der_op(k, a1, b1);
@@ -2622,10 +2614,10 @@ expr_ref seq_rewriter::mk_der_compl(expr* r) {
 }
 
 /*
-    Make an re_predicate with condition cond, enforcing derivative
-    normal form on how conditions are written.
+    Make an re_predicate with an arbitrary condition cond, enforcing
+    derivative normal form on how conditions are written.
 
-    Rewrites everything to (ele <= x) constraints:
+    Tries to rewrites everything to (ele <= x) constraints:
     (ele = a) => ite(ele <= a-1, none, ite(ele <= a, epsilon, none))
     (a = ele) => "
     (a <= ele) => ite(ele <= a-1, none, epsilon)
@@ -2665,18 +2657,15 @@ expr_ref seq_rewriter::mk_der_cond(expr* cond, expr* ele, sort* seq_sort) {
         }
     }
     else if (m().is_not(cond, c1)) {
-        UNREACHABLE();
         result = mk_der_cond(c1, ele, seq_sort);
         result = mk_der_compl(result);
     }
     else if (m().is_and(cond, c1, c2)) {
-        UNREACHABLE();
         r1 = mk_der_cond(c1, ele, seq_sort);
         r2 = mk_der_cond(c2, ele, seq_sort);
         result = mk_der_inter(r1, r2);
     }
     else if (m().is_or(cond, c1, c2)) {
-        UNREACHABLE();
         r1 = mk_der_cond(c1, ele, seq_sort);
         r2 = mk_der_cond(c2, ele, seq_sort);
         result = mk_der_union(r1, r2);
@@ -2763,16 +2752,6 @@ expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) {
         expr_ref hd(m()), tl(m());
         if (get_head_tail(r1, hd, tl)) {
             // head must be equal; if so, derivative is tail
-            // result = re().mk_to_re(tl);
-            // return re_and(m_br.mk_eq_rw(ele, hd), result);
-            // @EXP (experimental change)
-            // Write 'head is equal' as a range constraint:
-            // (ele <= hd) and (hd <= ele)
-            // return mk_der_inter(
-            //     re_and(m_util.mk_le(ele, hd), re().mk_to_re(tl)),
-            //     re_and(m_util.mk_le(hd, ele), re().mk_to_re(tl))
-            // );
-            // @EXP (experimental change)
             // Use mk_der_cond to normalize
             STRACE("seq_verbose", tout << "deriv to_re" << std::endl;);
             result = m().mk_eq(ele, hd);
@@ -2803,15 +2782,6 @@ expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) {
         // This is analagous to the previous is_to_re case.
         expr_ref hd(m()), tl(m());
         if (get_head_tail_reversed(r2, hd, tl)) {
-            // return re_and(m_br.mk_eq_rw(ele, tl), re().mk_reverse(re().mk_to_re(hd)));
-            // @EXP (experimental change)
-            // Write 'tail is equal' as a range constraint:
-            // (ele <= tl) and (tl <= ele)
-            // return mk_der_inter(
-            //     re_and(m_util.mk_le(ele, tl), re().mk_reverse(re().mk_to_re(hd))),
-            //     re_and(m_util.mk_le(tl, ele), re().mk_reverse(re().mk_to_re(hd)))
-            // );
-            // @EXP (experimental change)
             // Use mk_der_cond to normalize
             STRACE("seq_verbose", tout << "deriv reverse to_re" << std::endl;);
             result = m().mk_eq(ele, tl);
@@ -2830,9 +2800,6 @@ expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) {
             if (s1.length() == 1 && s2.length() == 1) {
                 expr_ref ch1(m_util.mk_char(s1[0]), m());
                 expr_ref ch2(m_util.mk_char(s2[0]), m());
-                // return mk_der_inter(re_predicate(m_util.mk_le(ch1, ele), seq_sort),
-                //                     re_predicate(m_util.mk_le(ele, ch2), seq_sort));
-                // @EXP (experimental change)
                 // Use mk_der_cond to normalize
                 STRACE("seq_verbose", tout << "deriv range zstring" << std::endl;);
                 expr_ref p1(u().mk_le(ch1, ele), m());
@@ -2848,9 +2815,6 @@ expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) {
         }
         expr* e1 = nullptr, *e2 = nullptr;
         if (str().is_unit(r1, e1) && str().is_unit(r2, e2)) {
-            // return mk_der_inter(re_predicate(m_util.mk_le(e1, ele), seq_sort),
-            //                     re_predicate(m_util.mk_le(ele, e2), seq_sort));
-            // @EXP (experimental change)
             // Use mk_der_cond to normalize
             STRACE("seq_verbose", tout << "deriv range str" << std::endl;);
             expr_ref p1(u().mk_le(e1, ele), m());
@@ -2868,10 +2832,7 @@ expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) {
         array_util array(m());
         expr* args[2] = { p, ele };
         result = array.mk_select(2, args);
-        // return re_predicate(result, seq_sort);
-        // @EXP (experimental change)
         // Use mk_der_cond to normalize
-        // (It's a no-op in this case, however)
         STRACE("seq_verbose", tout << "deriv of_pred" << std::endl;);
         return mk_der_cond(result, ele, seq_sort);
     }
@@ -3063,14 +3024,10 @@ br_status seq_rewriter::mk_str_in_regexp(expr* a, expr* b, expr_ref& result) {
     }
     if (str().is_empty(a)) {
         result = is_nullable(b);
-        if (str().is_in_re(result)) {
-            // STRACE("seq_regex_brief", tout << "mk_str_in_regexp: ...BR_DONE" << std::endl;);
+        if (str().is_in_re(result))
             return BR_DONE;
-        }
-        else {
-            // STRACE("seq_regex_brief", tout << "mk_str_in_regexp: ...BR_REWRITE_FULL" << std::endl;);
+        else
             return BR_REWRITE_FULL;
-        }
     }
 
     expr_ref hd(m()), tl(m());
diff --git a/src/ast/rewriter/seq_rewriter.h b/src/ast/rewriter/seq_rewriter.h
index 106fb79f5aa..c36cd399965 100644
--- a/src/ast/rewriter/seq_rewriter.h
+++ b/src/ast/rewriter/seq_rewriter.h
@@ -140,7 +140,6 @@ class seq_rewriter {
         unsigned        m_max_cache_size { 10000 };
         expr_ref_vector m_trail;
         op_table        m_table;
-
         void cleanup();
 
     public:
@@ -189,10 +188,6 @@ class seq_rewriter {
 
     // Calculate derivative, memoized and enforcing a normal form
     expr_ref is_nullable_rec(expr* r);
-    // @EXP (experimental change)
-    // void mk_nullable_not(expr* a1, expr_ref& result);
-    // void mk_nullable_and(expr* a1, expr* a2, expr_ref& result);
-    // void mk_nullable_or(expr* a1, expr* a2, expr_ref& result);
     expr_ref mk_derivative_rec(expr* ele, expr* r);
     expr_ref mk_der_op(decl_kind k, expr* a, expr* b);
     expr_ref mk_der_op_rec(decl_kind k, expr* a, expr* b);

From 47661925f18feed6ccd15c63e0fb7e6996456a64 Mon Sep 17 00:00:00 2001
From: Caleb Stanford <caleb.pirsquared@gmail.com>
Date: Mon, 27 Jul 2020 19:27:06 -0400
Subject: [PATCH 49/51] remove cache hit/miss counts tracing

---
 src/ast/rewriter/seq_rewriter.cpp | 36 -------------------------------
 src/ast/rewriter/seq_rewriter.h   |  6 ------
 2 files changed, 42 deletions(-)

diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp
index 74286b3b383..ec9984264b8 100644
--- a/src/ast/rewriter/seq_rewriter.cpp
+++ b/src/ast/rewriter/seq_rewriter.cpp
@@ -4296,10 +4296,6 @@ expr* seq_rewriter::op_cache::find(decl_kind op, expr* a, expr* b) {
     op_entry e(op, a, b, nullptr);
     m_table.find(e, e);
 
-    #ifdef _TRACE
-    (e.r) ? (cache_hits++) : (cache_misses++) ;
-    #endif
-
     return e.r;
 }
 
@@ -4319,35 +4315,3 @@ void seq_rewriter::op_cache::cleanup() {
         STRACE("seq_regex_brief", tout << "(OP CACHE RESET) ";);
     }
 }
-
-#ifdef _TRACE
-unsigned seq_rewriter::op_cache::cache_hits = 0;
-unsigned seq_rewriter::op_cache::cache_misses = 0;
-
-/*
-    Reset the tracing counts of # of cache hits and misses, and
-    report them.
-
-    Suppress reporting in the cases of 0/0 or 1/1 hits.
-
-    Hits and misses are tracked globally using static variables
-    m_op_cache.cache_hits and m_op_cache.cache_misses.
-*/
-void seq_rewriter::trace_and_reset_cache_counts() {
-    unsigned hits = m_op_cache.cache_hits;
-    unsigned misses = m_op_cache.cache_misses;
-    if (hits >= 2 || misses >= 1) {
-        STRACE("seq_regex",
-            tout << "Op cache hits: " << hits
-                 << " (out of " << (hits + misses)
-                 << ")" << std::endl;
-        );
-        STRACE("seq_regex_brief",
-            tout << "(" << hits << "/" << (hits + misses)
-                 << " hits) ";
-        );
-    }
-    m_op_cache.cache_hits = 0;
-    m_op_cache.cache_misses = 0;
-}
-#endif
diff --git a/src/ast/rewriter/seq_rewriter.h b/src/ast/rewriter/seq_rewriter.h
index c36cd399965..19091a6f24b 100644
--- a/src/ast/rewriter/seq_rewriter.h
+++ b/src/ast/rewriter/seq_rewriter.h
@@ -345,11 +345,5 @@ class seq_rewriter {
     // heuristic elimination of element from condition that comes form a derivative.
     // special case optimization for conjunctions of equalities, disequalities and ranges.
     void elim_condition(expr* elem, expr_ref& cond);
-
-    #ifdef _TRACE
-    void trace_and_reset_cache_counts();
-    #else
-    static inline void trace_and_reset_cache_counts() {}
-    #endif
 };
 

From 8761356680ba6e3e9f7124622f600836384bd370 Mon Sep 17 00:00:00 2001
From: Caleb Stanford <caleb.pirsquared@gmail.com>
Date: Mon, 27 Jul 2020 19:33:58 -0400
Subject: [PATCH 50/51] remove changes not in the rewriter

---
 src/ast/seq_decl_plugin.cpp |  38 +-
 src/smt/seq_regex.cpp       | 737 ++----------------------------------
 src/smt/seq_regex.h         | 178 +--------
 src/smt/theory_seq.cpp      |   1 -
 4 files changed, 53 insertions(+), 901 deletions(-)

diff --git a/src/ast/seq_decl_plugin.cpp b/src/ast/seq_decl_plugin.cpp
index 3040ee8999a..9764ba18c27 100644
--- a/src/ast/seq_decl_plugin.cpp
+++ b/src/ast/seq_decl_plugin.cpp
@@ -1316,21 +1316,22 @@ unsigned seq_util::re::min_length(expr* r) const {
     unsigned lo = 0, hi = 0;
     if (is_empty(r))
         return UINT_MAX;
-    if (is_concat(r, r1, r2))
+    if (is_concat(r, r1, r2)) 
         return u.max_plus(min_length(r1), min_length(r2));
-    if (is_union(r, r1, r2) || m.is_ite(r, s, r1, r2))
+    if (m.is_ite(r, s, r1, r2)) 
         return std::min(min_length(r1), min_length(r2));
-    if (is_intersection(r, r1, r2))
-        return std::max(min_length(r1), min_length(r2));
-    if (is_diff(r, r1, r2) || is_reverse(r, r1) || is_plus(r, r1))
+    if (is_diff(r, r1, r2))
         return min_length(r1);
-    if (is_loop(r, r1, lo) || is_loop(r, r1, lo, hi))
+    if (is_union(r, r1, r2)) 
+        return std::min(min_length(r1), min_length(r2));
+    if (is_intersection(r, r1, r2)) 
+        return std::max(min_length(r1), min_length(r2));
+    if (is_loop(r, r1, lo, hi))
         return u.max_mul(lo, min_length(r1));
-    if (is_to_re(r, s))
-        return u.str.min_length(s);
-    if (is_range(r) || is_of_pred(r) || is_full_char(r))
+    if (is_range(r)) 
         return 1;
-    // Else: star, option, complement, full_seq, derivative
+    if (is_to_re(r, s)) 
+        return u.str.min_length(s);
     return 0;
 }
 
@@ -1340,21 +1341,20 @@ unsigned seq_util::re::max_length(expr* r) const {
     unsigned lo = 0, hi = 0;
     if (is_empty(r))
         return 0;
-    if (is_concat(r, r1, r2))
+    if (is_concat(r, r1, r2)) 
         return u.max_plus(max_length(r1), max_length(r2));
-    if (is_union(r, r1, r2) || m.is_ite(r, s, r1, r2))
+    if (m.is_ite(r, s, r1, r2)) 
         return std::max(max_length(r1), max_length(r2));
-    if (is_intersection(r, r1, r2))
-        return std::min(max_length(r1), max_length(r2));
-    if (is_diff(r, r1, r2) || is_reverse(r, r1) || is_opt(r, r1))
+    if (is_diff(r, r1, r2))
         return max_length(r1);
+    if (is_union(r, r1, r2)) 
+        return std::max(max_length(r1), max_length(r2));
+    if (is_intersection(r, r1, r2)) 
+        return std::min(max_length(r1), max_length(r2));
     if (is_loop(r, r1, lo, hi))
         return u.max_mul(hi, max_length(r1));
-    if (is_to_re(r, s))
+    if (is_to_re(r, s)) 
         return u.str.max_length(s);
-    if (is_range(r) || is_of_pred(r) || is_full_char(r))
-        return 1;
-    // Else: star, plus, complement, full_seq, loop(r,r1,lo), derivative
     return UINT_MAX;
 }
 
diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp
index 5544879253d..41a79ae9efb 100644
--- a/src/smt/seq_regex.cpp
+++ b/src/smt/seq_regex.cpp
@@ -24,8 +24,7 @@ namespace smt {
     seq_regex::seq_regex(theory_seq& th):
         th(th),
         ctx(th.get_context()),
-        m(th.get_manager()),
-        m_state_to_expr(m)
+        m(th.get_manager())
     {}
 
     seq_util& seq_regex::u() { return th.m_util; }
@@ -104,14 +103,14 @@ namespace smt {
     }
 
     /**
-     * Propagate the atom (str.in_re s r)
+     * Propagate the atom (str.in.re s r)
      * 
      * Propagation implements the following inference rules
      * 
-     * (not (str.in_re s r)) => (str.in_re s (complement r))
-     * (str.in_re s r) => r != {}
+     * (not (str.in.re s r)) => (str.in.re s (complement r))
+     * (str.in.re s r) => r != {}
      * 
-     * (str.in_re s r) => (accept s 0 r)
+     * (str.in.re s r) => (accept s 0 r)
      */
 
     void seq_regex::propagate_in_re(literal lit) {
@@ -119,9 +118,7 @@ namespace smt {
         expr* e = ctx.bool_var2expr(lit.var());
         VERIFY(str().is_in_re(e, s, r));
 
-        TRACE("seq_regex", tout << "propagate in RE: " << lit.sign() << " " << mk_pp(e, m) << std::endl;);
-        STRACE("seq_regex_brief", tout << "PIR(" << mk_pp(s, m) << ","
-                                       << state_str(r) << ") ";);
+        TRACE("seq", tout << "propagate " << lit.sign() << " " << mk_pp(e, m) << "\n";);
 
         // convert negative negative membership literals to positive
         // ~(s in R) => s in C(R)
@@ -168,13 +165,11 @@ namespace smt {
     }
 
     void seq_regex::propagate_accept(literal lit) {
-        TRACE("seq_regex", tout << "propagate accept" << std::endl;);
-        STRACE("seq_regex_brief", tout << "PA ";);
-
+        // std::cout << "PA ";
         literal t = null_literal;
         if (!propagate(lit, t))
             m_to_propagate.push_back(propagation_lit(lit, t));
-    }
+                                     }
 
     /**
      * Propagate the atom (accept s i r)
@@ -197,136 +192,21 @@ namespace smt {
         unsigned idx = 0;
         VERIFY(sk().is_accept(e, s, i, idx, r));
 
-        TRACE("seq_regex", tout << "propagate: " << mk_pp(e, m) << std::endl;);
-        STRACE("seq_regex_brief", tout << std::endl
-                                       << "P(" << mk_pp(s, m) << "@" << idx
-                                       << "," << state_str(r) << ") ";);
-
-        expr* cond = nullptr, *tt = nullptr, *el = nullptr;
-        if (re().is_empty(r)) {
-            STRACE("seq_regex_brief", tout << "(empty) ";);
-            th.add_axiom(~lit);
-            return true;
-        }
-        else if (m.is_ite(r, cond, tt, el)) {
-            UNREACHABLE();
-            STRACE("seq_regex_brief", tout << "(ite) ";);
-            return false;
-
-            // @EXP (Experimental change)
-            // This code tries to unfold the derivative one step at a time
-            // and propagate the if-then-elses.
-            // literal lcond = th.mk_literal(cond);
-            // ctx.mark_as_relevant(lcond);
-            // trigger = lcond;
-            // expr_ref ncond(m), acc1(m), acc2(m),
-            //          choice1(m), choice2(m), choice(m);
-            // ncond = m.mk_not(cond);
-            // acc1 = sk().mk_accept(s, a().mk_int(idx), tt);
-            // acc2 = sk().mk_accept(s, a().mk_int(idx), el);
-            // choice1 = m.mk_and(cond, acc1);
-            // choice2 = m.mk_and(ncond, acc2);
-            // choice = m.mk_or(choice1, choice2);
-            // th.propagate_lit(nullptr, 1, &lit, th.mk_literal(choice));
-            // // th.propagate_lit(th.mk_literal(choice));
-            // // literal_vector choice_lit;
-            // // choice_lit.push_back(th.mk_literal(choice));
-            // // th.add_axiom(choice_lit);
-            // return true;
-        }
+        // std::cout << "\nP " << idx << " " << r->get_id() << " ";
 
-        update_state_graph(r);
+        TRACE("seq", tout << "propagate " << mk_pp(e, m) << "\n";);
 
-        if (m_state_graph.is_dead(get_state_id(r))) {
-            STRACE("seq_regex_brief", tout << "(dead) ";);
+        if (re().is_empty(r)) {
             th.add_axiom(~lit);
             return true;
         }
 
-        if (block_unfolding(lit, idx)) {
-            STRACE("seq_regex_brief", tout << "(blocked) ";);
+        if (block_unfolding(lit, idx))
             return true;
-        }
-
-        STRACE("seq_regex_brief", tout << "(unfold) ";);
-
-        // First axiom: use min_length to prune search
-        // accept(s, idx, r) => len(s) >= idx + min_len(r)
-        expr_ref s_to_re(re().mk_to_re(s), m);
-        expr_ref s_plus_r(re().mk_concat(s_to_re, r), m);
-        unsigned min_len = re().min_length(s_plus_r);
-        literal len_s_ge_min = th.m_ax.mk_ge(th.mk_len(s), min_len);
-        th.add_axiom(~lit, len_s_ge_min);
-
-        // Old first axiom: accept(s, idx, r) => len(s) >= idx
-        // literal len_s_ge_i = th.m_ax.mk_ge(th.mk_len(s), idx);
-        // th.add_axiom(~lit, len_s_ge_i);
-
-        // Second axiom: nullable check
-        // accept(s, idx, r) and len(s) <= idx => r nullable
-        literal len_s_le_i = th.m_ax.mk_le(th.mk_len(s), idx);
-        literal is_nullable = th.mk_literal(is_nullable_wrapper(r));
-        th.add_axiom(~lit, ~len_s_le_i, is_nullable);
-
-        // Third axiom: derivative unfolding
-        // accept(s, idx, r) and not (len_s_le_i) =>
-        //     OR_(cond, dr) cond and accept(s, idx+1, dr)
-        // over all derivatives dr and conditions cond on the head
-        literal_vector accept_next;
-        expr_ref hd = th.mk_nth(s, i);
-        expr_ref deriv(m);
-        deriv = derivative_wrapper(hd, r);
-        accept_next.push_back(~lit);
-        accept_next.push_back(len_s_le_i);
-        expr_ref_pair_vector cofactors(m);
-        get_cofactors(deriv, cofactors);
-        for (auto const& p : cofactors) {
-            if (m.is_false(p.first) || re().is_empty(p.second)) continue;
-            expr_ref cond(p.first, m);
-            expr_ref deriv_leaf(p.second, m);
-
-            // @EXP (Experimental change)
-            // Skip searching when can_be_in_cycle returns true
-            // Result: Besides being unsound as written, this is not
-            // fine-grained enough. In case of intersections, many
-            // edges return true for can_be_in_cycle
-            // if (can_be_in_cycle(deriv, deriv_leaf)) continue;
 
-            expr_ref acc = sk().mk_accept(s, a().mk_int(idx + 1), deriv_leaf);
-            expr_ref choice(m.mk_and(cond, acc), m);
-            literal choice_lit = th.mk_literal(choice);
-            accept_next.push_back(choice_lit);
-            // Prioritize unvisited states
-            // if (!m_state_graph.is_done(get_state_id(deriv_leaf))) {
-            //     // @EXP Unsound test: only push if not done
-            //     accept_next.push_back(choice_lit);
-            //     // @EXP This didn't work -- just marking as relevant
-            //     // ctx.mark_as_relevant(choice_lit);
-            // }
-            STRACE("seq_regex_debug", tout << "added choice: "
-                                           << mk_pp(choice, m) << std::endl;);
-        }
-        th.add_axiom(accept_next);
-
-        // Propagated successfully
-        return true;
-
-        // expr_ref is_nullable(m), head(m), deriv(m), acc_next(m), unfold(m);
-        // head = th.mk_nth(s, i);
-        // deriv = derivative_wrapper(head, r);
-        // th.add_axiom(~lit, ~th.mk_literal(is_nullable));
-        // 
-        // acc_next = sk().mk_accept(s, a().mk_int(idx + 1), deriv);
-        // unfold = m.mk_or(is_nullable, acc_next);
-        // 
-        // literal_vector unfold_lit;
-        // unfold_lit.push_back(th.mk_literal(unfold));
-        // th.add_axiom(unfold_lit);
-        // return true;
+        propagate_nullable(lit, s, idx, r);
 
-        // propagate_nullable(lit, s, idx, r);
-        // 
-        // return propagate_derivative(lit, e, s, i, idx, r, trigger);
+        return propagate_derivative(lit, e, s, i, idx, r, trigger);
     }
 
     /**
@@ -349,25 +229,19 @@ namespace smt {
      */
 
     void seq_regex::propagate_nullable(literal lit, expr* s, unsigned idx, expr* r) {
-        TRACE("seq_regex", tout << "propagate nullable: " << mk_pp(r, m) << std::endl;);
-        STRACE("seq_regex_brief", tout << "PN ";);
-
-        expr_ref is_nullable = is_nullable_wrapper(r);
-
+        // std::cout << "PN ";
+        expr_ref is_nullable = seq_rw().is_nullable(r);
+        rewrite(is_nullable);
         literal len_s_ge_i = th.m_ax.mk_ge(th.mk_len(s), idx);
         if (m.is_true(is_nullable)) {
-            STRACE("seq_regex_brief", tout << "t ";);
             th.propagate_lit(nullptr, 1,&lit, len_s_ge_i);
         }
         else if (m.is_false(is_nullable)) {
-            STRACE("seq_regex_brief", tout << "f ";);
             th.propagate_lit(nullptr, 1, &lit, th.m_ax.mk_ge(th.mk_len(s), idx + 1));
-            // @EXP (experimental change)
             //unsigned len = std::max(1u, re().min_length(r));
             //th.propagate_lit(nullptr, 1, &lit, th.m_ax.mk_ge(th.mk_len(s), idx + re().min_length(r)));
         }
         else {
-            STRACE("seq_regex_brief", tout << "? ";);
             literal is_nullable_lit = th.mk_literal(is_nullable);
             ctx.mark_as_relevant(is_nullable_lit);
             literal len_s_le_i = th.m_ax.mk_le(th.mk_len(s), idx);
@@ -388,19 +262,12 @@ namespace smt {
     }
     
     bool seq_regex::propagate_derivative(literal lit, expr* e, expr* s, expr* i, unsigned idx, expr* r, literal& trigger) {
-        TRACE("seq_regex", tout << "propagate derivative: " << mk_pp(r, m) << std::endl;);
-        STRACE("seq_regex_brief", tout << "PD ";);
-
         // (accept s i R) & len(s) > i => (accept s (+ i 1) D(nth(s, i), R)) or conds
+        // std::cout << "PD ";
         expr_ref d(m);
         expr_ref head = th.mk_nth(s, i);
 
         d = derivative_wrapper(m.mk_var(0, m.get_sort(head)), r);
-
-        // TODO
-        // conds.push_back(th.mk_literal(sk().mk_accept(s, a().mk_int(idx + 1), d)));
-        // th.add_axiom(conds);
-
         // timer tm;
         // std::cout << d->get_id() << " " << tm.get_seconds() << "\n";
         //if (tm.get_seconds() > 0.3) 
@@ -419,17 +286,14 @@ namespace smt {
             literal lcond = th.mk_literal(subst(cond, sub));
             switch (ctx.get_assignment(lcond)) {
             case l_true:
-                STRACE("seq_regex_brief", tout << "t ";);
                 conds.push_back(~lcond);
                 d = tt;
                 break;
             case l_false:
-                STRACE("seq_regex_brief", tout << "f ";);
                 conds.push_back(lcond);
                 d = el;
                 break;
             case l_undef:
-                STRACE("seq_regex_brief", tout << "? ";);
 #if 1
                 ctx.mark_as_relevant(lcond);
                 trigger = lcond;
@@ -458,7 +322,6 @@ namespace smt {
 #endif
             }
         }
-
         if (!is_ground(d)) {
             d = subst(d, sub);
         }
@@ -466,9 +329,8 @@ namespace smt {
         if (!re().is_empty(d)) 
             conds.push_back(th.mk_literal(sk().mk_accept(s, a().mk_int(idx + 1), d)));
         th.add_axiom(conds);        
-        TRACE("seq_regex", tout << "unfold " << head << std::endl << mk_pp(r, m) << std::endl;);
-        STRACE("seq_regex_brief", tout << "u ";);
-
+        TRACE("seq", tout << "unfold " << head << "\n" << mk_pp(r, m) << "\n";);
+        // std::cout << "D ";
         return true;
     }
 
@@ -490,7 +352,6 @@ namespace smt {
      * within the same Regex.
      */
     bool seq_regex::coallesce_in_re(literal lit) {
-        // @EXP (experimental change)
         return false;
         expr* s = nullptr, *r = nullptr;
         expr* e = ctx.bool_var2expr(lit.var());
@@ -511,7 +372,7 @@ namespace smt {
             th.m_trail_stack.push(vector_value_trail<theory_seq, s_in_re, true>(m_s_in_re, i));
             m_s_in_re[i].m_active = false;
             IF_VERBOSE(11, verbose_stream() << "Intersect " << regex << " " << 
-                       mk_pp(entry.m_re, m) << " " << mk_pp(s, m) << " " << mk_pp(entry.m_s, m) << std::endl;);
+                       mk_pp(entry.m_re, m) << " " << mk_pp(s, m) << " " << mk_pp(entry.m_s, m) << "\n";);
             regex = re().mk_inter(entry.m_re, regex);
             rewrite(regex);
             lits.push_back(~entry.m_lit);
@@ -541,79 +402,20 @@ namespace smt {
     }
 
     /*
-        Wrapper around calls to is_nullable from the seq rewriter.
-    */
-    expr_ref seq_regex::is_nullable_wrapper(expr* r) {
-        STRACE("seq_regex", tout << "nullable: " << mk_pp(r, m) << std::endl;);
-
-        expr_ref result = seq_rw().is_nullable(r);
-        rewrite(result);
-
-        STRACE("seq_regex", tout << "nullable result: " << mk_pp(result, m) << std::endl;);
-        STRACE("seq_regex_brief", tout << "n(" << state_str(r) << ")="
-                                       << mk_pp(result, m) << " ";);
-        seq_rw().trace_and_reset_cache_counts();
-
-        return result;
-    }
-
-    /*
-        Wrapper around the regex symbolic derivative from the seq rewriter.
+        Wrapper around the regex symbolic derivative from the rewriter.
         Ensures that the derivative is written in a normalized BDD form
         with optimizations for if-then-else expressions involving the head.
     */
     expr_ref seq_regex::derivative_wrapper(expr* hd, expr* r) {
-        STRACE("seq_regex", tout << "derivative(" << mk_pp(hd, m) << "): " << mk_pp(r, m) << std::endl;);
-
-        // Use canonical variable for head; substitute with hd later
-        // sort* seq_sort = nullptr;
-        // VERIFY(u().is_re(r, seq_sort));
-        // expr_ref hd_canon = get_head_var(sq_sort);
-        expr_ref hd_canon(m.mk_var(0, m.get_sort(hd)), m);
-        expr_ref result(re().mk_derivative(hd_canon, r), m);
+        expr_ref result = expr_ref(re().mk_derivative(hd, r), m);
         rewrite(result);
-
-        // Substitute
-        var_subst subst(m);
-        expr_ref_vector sub(m);
-        sub.push_back(hd);
-        result = subst(result, sub);
-
-        STRACE("seq_regex", tout << "derivative result: " << mk_pp(result, m) << std::endl;);
-        STRACE("seq_regex_brief", tout << "d(" << state_str(r) << ")="
-                                       << state_str(result) << " ";);
-        seq_rw().trace_and_reset_cache_counts();
-
-        /*  If the following lines are enabled instead, we use the
-            same rewriter for the nullable and derivative calls.
-            However, it currently seems to cause a performance
-            bug as a side effect.
-
-            The two seq rewriters used are at:
-                m_seq_rewrite
-                    (returned by seq_rw())
-                th.m_rewrite.m_imp->m_cfg.m_seq_rw
-                    (private, can't be accessed directly)
-
-            TODO: experiment with making them the same and see
-            if it results in significant speedup (due to fewer
-            cache misses).
-           */
-        // expr_ref result = seq_rw().mk_derivative(hd, r);
-        // rewrite(result)
-        // STRACE("seq_regex", tout << "derivative result: " << mk_pp(result, m) << std::endl;);
-        // seq_rw().trace_and_reset_cache_counts();
-
         return result;
     }
 
     void seq_regex::propagate_eq(expr* r1, expr* r2) {
-        TRACE("seq_regex", tout << "propagate EQ: " << mk_pp(r1, m) << ", " << mk_pp(r2, m) << std::endl;);
-        STRACE("seq_regex_brief", tout << "PEQ ";);
-
         sort* seq_sort = nullptr;
         VERIFY(u().is_re(r1, seq_sort));
-        expr_ref r = symmetric_diff(r1, r2);
+        expr_ref r = symmetric_diff(r1, r2);       
         expr_ref emp(re().mk_empty(m.get_sort(r)), m);
         expr_ref n(m.mk_fresh_const("re.char", seq_sort), m); 
         expr_ref is_empty = sk().mk_is_empty(r, emp, n);
@@ -621,9 +423,6 @@ namespace smt {
     }
     
     void seq_regex::propagate_ne(expr* r1, expr* r2) {
-        TRACE("seq_regex", tout << "propagate NEQ: " << mk_pp(r1, m) << ", " << mk_pp(r2, m) << std::endl;);
-        STRACE("seq_regex_brief", tout << "PNEQ ";);
-
         sort* seq_sort = nullptr;
         VERIFY(u().is_re(r1, seq_sort));
         expr_ref r = symmetric_diff(r1, r2);
@@ -653,31 +452,18 @@ namespace smt {
     void seq_regex::propagate_is_non_empty(literal lit) {
         expr* e = ctx.bool_var2expr(lit.var()), *r = nullptr, *u = nullptr, *n = nullptr;
         VERIFY(sk().is_is_non_empty(e, r, u, n));
-
-        TRACE("seq_regex", tout << "propagate nonempty: " << mk_pp(e, m) << std::endl;);
-        STRACE("seq_regex_brief",
-            tout << std::endl << "PNE(" << expr_id_str(e)
-                              << "," << state_str(r)
-                              << "," << expr_id_str(u)
-                              << "," << expr_id_str(n)
-                              << ") ";);
-
-        expr_ref is_nullable = is_nullable_wrapper(r);
+        expr_ref is_nullable = seq_rw().is_nullable(r);
+        rewrite(is_nullable);
         if (m.is_true(is_nullable))
             return;
         literal null_lit = th.mk_literal(is_nullable);
         expr_ref hd = mk_first(r, n);
         expr_ref d(m);
         d = derivative_wrapper(hd, r);
-
-        // STRACE("seq_regex_brief", tout << "(d subbed: " << state_str(d) << ") ";);
-        // TRACE("seq_regex", tout << "d subbed: " << mk_pp(d, m) << std::endl;);
-
         literal_vector lits;
         lits.push_back(~lit);
         if (null_lit != false_literal) 
             lits.push_back(null_lit);
-
         expr_ref_pair_vector cofactors(m);
         get_cofactors(d, cofactors);
         for (auto const& p : cofactors) {
@@ -693,7 +479,6 @@ namespace smt {
                 next_non_empty = m.mk_and(cond, next_non_empty);
             lits.push_back(th.mk_literal(next_non_empty));
         }
-
         th.add_axiom(lits);
     }
 
@@ -713,25 +498,6 @@ namespace smt {
         }
     }
 
-    void seq_regex::get_all_derivatives(expr* r, expr_ref_vector& results) {
-        // Get derivative
-        sort* seq_sort = nullptr;
-        VERIFY(u().is_re(r, seq_sort));
-        expr_ref n(m.mk_fresh_const("re.char", seq_sort), m);
-        expr_ref hd = mk_first(r, n);
-        expr_ref d(m);
-        d = derivative_wrapper(hd, r);
-        // Use get_cofactors method and filter out unsatisfiable conds
-        expr_ref_pair_vector cofactors(m);
-        get_cofactors(d, cofactors);
-        STRACE("seq_regex_debug", tout << "getting all derivatives of: " << mk_pp(r, m) << std::endl;);
-        for (auto const& p : cofactors) {
-            if (m.is_false(p.first) || re().is_empty(p.second)) continue;
-            STRACE("seq_regex_debug", tout << "adding derivative: " << mk_pp(p.second, m) << std::endl;);
-            results.push_back(p.second);
-        }
-    }
-
     /*
       is_empty(r, u) => ~is_nullable(r)
       is_empty(r, u) => (forall x . ~cond(x)) or is_empty(r1, u union r)    for (cond, r) in min-terms(D(x,r))      
@@ -741,16 +507,8 @@ namespace smt {
     void seq_regex::propagate_is_empty(literal lit) {
         expr* e = ctx.bool_var2expr(lit.var()), *r = nullptr, *u = nullptr, *n = nullptr;
         VERIFY(sk().is_is_empty(e, r, u, n));
-        expr_ref is_nullable = is_nullable_wrapper(r);
-
-        TRACE("seq_regex", tout << "propagate empty: " << mk_pp(e, m) << std::endl;);
-        STRACE("seq_regex_brief",
-            tout << std::endl << "PE(" << expr_id_str(e)
-                              << "," << state_str(r)
-                              << "," << expr_id_str(u)
-                              << "," << expr_id_str(n)
-                              << ") ";);
-
+        expr_ref is_nullable = seq_rw().is_nullable(r);
+        rewrite(is_nullable);
         if (m.is_true(is_nullable)) {
             th.add_axiom(~lit);
             return;
@@ -782,449 +540,10 @@ namespace smt {
         }        
     }
 
-    // @EXP: Experimental change
-    // Some code to compute a canonical head variable, but I think
-    // this stuff is unnecessary.
-    // expr_ref seq_regex::get_head_var(sort* seq_sort) {
-    //     expr_ref result(m);
-    //     if (m_deriv_head.contains(seq_sort)) {
-    //         result = m_deriv_head.find(seq_sort);
-    //         STRACE("seq_regex_brief", tout << " ghv=" << mk_pp(result, m););
-    //     }
-    //     else {
-    //         result = m.mk_fresh_const("re.char", seq_sort);
-    //         STRACE("seq_regex_brief", tout << " NEWghv=" << mk_pp(result, m););
-    //     }
-    //     return result;
-    // }
-
     expr_ref seq_regex::mk_first(expr* r, expr* n) {
         sort* elem_sort = nullptr, *seq_sort = nullptr;
         VERIFY(u().is_re(r, seq_sort));
         VERIFY(u().is_seq(seq_sort, elem_sort));
         return sk().mk("re.first", n, a().mk_int(r->get_id()), elem_sort);
     }
-
-    /****************************************************
-     *** Dead state elimination and state_graph class ***
-     ****************************************************/
-
-    void state_graph::add_state_core(state s) {
-        STRACE("seq_regex_brief", tout << "add(" << s << ") ";);
-        SASSERT(!m_seen.contains(s));
-        // Ensure corresponding var in union find structure
-        while (s >= m_state_ufind.get_num_vars()) {
-            m_state_ufind.mk_var();
-        }
-        // Initialize as unvisited
-        m_seen.insert(s);
-        m_unexplored.insert(s);
-        m_targets.insert(s, state_set());
-        m_sources.insert(s, state_set());
-        m_sources_maybecycle.insert(s, state_set());
-    }
-    void state_graph::remove_state_core(state s) {
-        // This is a partial deletion -- the state is still seen and can't be
-        // added again later.
-        // The state should be unknown, and all edges to or from the state
-        // should already have been renamed.
-        STRACE("seq_regex_brief", tout << "del(" << s << ") ";);
-        SASSERT(m_seen.contains(s));
-        SASSERT(!m_state_ufind.is_root(s));
-        SASSERT(m_unknown.contains(s));
-        m_targets.remove(s);
-        m_sources.remove(s);
-        m_sources_maybecycle.remove(s);
-        m_unknown.remove(s);
-    }
-
-    void state_graph::mark_unknown_core(state s) {
-        STRACE("seq_regex_brief", tout << "unk(" << s << ") ";);
-        SASSERT(m_state_ufind.is_root(s));
-        SASSERT(m_unexplored.contains(s));
-        m_unexplored.remove(s);
-        m_unknown.insert(s);
-    }
-    void state_graph::mark_live_core(state s) {
-        STRACE("seq_regex_brief", tout << "live(" << s << ") ";);
-        SASSERT(m_state_ufind.is_root(s));
-        SASSERT(m_unknown.contains(s));
-        m_unknown.remove(s);
-        m_live.insert(s);
-    }
-    void state_graph::mark_dead_core(state s) {
-        STRACE("seq_regex_brief", tout << "dead(" << s << ") ";);
-        SASSERT(m_state_ufind.is_root(s));
-        SASSERT(m_unknown.contains(s));
-        m_unknown.remove(s);
-        m_dead.insert(s);
-    }
-
-    /*
-        Add edge to the graph.
-        - If the annotation 'maybecycle' is false, then the user is sure
-          that this edge will never be part of a cycle.
-        - May already exist, in which case maybecycle = false overrides
-          maybecycle = true.
-    */
-    void state_graph::add_edge_core(state s1, state s2, bool maybecycle) {
-        STRACE("seq_regex_brief", tout << "add(" << s1 << "," << s2 << ","
-                                       << (maybecycle ? "y" : "n") << ") ";);
-        SASSERT(m_state_ufind.is_root(s1));
-        SASSERT(m_state_ufind.is_root(s2));
-        if (s1 == s2) return;
-        if (!m_targets.find(s1).contains(s2)) {
-            // add new edge
-            STRACE("seq_regex_debug", tout << std::endl << "  DEBUG: new edge! ";);
-            m_targets.find(s1).insert(s2);
-            m_sources.find(s2).insert(s1);
-            if (maybecycle) m_sources_maybecycle.find(s2).insert(s1);
-        }
-        else if (!maybecycle && m_sources_maybecycle.find(s2).contains(s1)) {
-            // update existing edge
-            STRACE("seq_regex_debug", tout << std::endl << "  DEBUG: update edge! ";);
-            m_sources_maybecycle.find(s2).remove(s1);
-        }
-    }
-    void state_graph::remove_edge_core(state s1, state s2) {
-        SASSERT(m_targets.find(s1).contains(s2));
-        SASSERT(m_sources.find(s2).contains(s1));
-        m_targets.find(s1).remove(s2);
-        m_sources.find(s2).remove(s1);
-        m_sources_maybecycle.find(s2).remove(s1);
-    }
-    void state_graph::rename_edge_core(state old1, state old2,
-                                       state new1, state new2) {
-        SASSERT(m_targets.find(old1).contains(old2));
-        SASSERT(m_sources.find(old2).contains(old1));
-        bool maybecycle = m_sources_maybecycle.find(old2).contains(old1);
-        remove_edge_core(old1, old2);
-        add_edge_core(new1, new2, maybecycle);
-    }
-
-    /*
-        Merge two states or more generally a set of states into one,
-        returning the new state. Also merges associated edges.
-
-        Preconditions:
-        - The set should be nonempty
-        - Every state in the set should be unknown
-        - Each state should currently exist
-        - If passing a set of states by reference, it should not be a set
-          from the edge relations, as merging states modifies edge relations.
-    */
-    auto state_graph::merge_states(state s1, state s2) -> state {
-        SASSERT(m_state_ufind.is_root(s1));
-        SASSERT(m_state_ufind.is_root(s2));
-        SASSERT(m_unknown.contains(s1));
-        SASSERT(m_unknown.contains(s2));
-        STRACE("seq_regex_brief", tout << "merge(" << s1 << "," << s2 << ") ";);
-        m_state_ufind.merge(s1, s2);
-        if (m_state_ufind.is_root(s2)) std::swap(s1, s2);
-        // rename s2 to s1 in edges
-        for (auto s_to: m_targets.find(s2)) {
-            rename_edge_core(s2, s_to, s1, s_to);
-        }
-        for (auto s_from: m_sources.find(s2)) {
-            rename_edge_core(s_from, s2, s_from, s1);
-        }
-        remove_state_core(s2);
-        return s1;
-    }
-    auto state_graph::merge_states(state_set& s_set) -> state {
-        SASSERT(s_set.num_elems() > 0);
-        state prev_s = 0; // initialization here optional
-        bool first_iter = true;
-        for (auto s: s_set) {
-            if (first_iter) {
-                prev_s = s;
-                first_iter = false;
-                continue;
-            }
-            prev_s = merge_states(prev_s, s);
-        }
-        return prev_s;
-    }
-
-    /*
-        If s is not live, mark it, and recurse on all states into s
-        Precondition: s is live or unknown
-    */
-    void state_graph::mark_live_recursive(state s) {
-        SASSERT(m_live.contains(s) || m_unknown.contains(s));
-        STRACE("seq_regex_debug", tout
-            << std::endl << "  DEBUG: mark live recursive: " << s << " ";);
-        if (m_live.contains(s)) return;
-        mark_live_core(s);
-        for (auto s_from: m_sources.find(s)) {
-            mark_live_recursive(s_from);
-        }
-    }
-
-    /*
-        Check if s is now known to be dead. If so, mark and recurse
-        on all states into s.
-        Precondition: s is live, dead, or unknown
-    */
-    void state_graph::mark_dead_recursive(state s) {
-        SASSERT(m_live.contains(s) || m_dead.contains(s) ||
-                m_unknown.contains(s));
-        STRACE("seq_regex_debug", tout
-            << std::endl << "  DEBUG: mark dead recursive: " << s << " ";);
-        if (!m_unknown.contains(s)) return;
-        for (auto s_to: m_targets.find(s)) {
-            // unknown pointing to live should have been marked as live!
-            SASSERT(!m_live.contains(s_to));
-            if (m_unknown.contains(s_to) || m_unexplored.contains(s_to)) return;
-        }
-        // all states from s are dead
-        mark_dead_core(s);
-        for (auto s_from: m_sources.find(s)) {
-            mark_dead_recursive(s_from);
-        }
-    }
-
-    /*
-        Merge all cycles of unknown states containing s into one state.
-        Return the new state
-        Precondition: s is unknown.
-    */
-    auto state_graph::merge_all_cycles(state s) -> state {
-        SASSERT(m_unknown.contains(s));
-        // Visit states in a DFS backwards from s
-        state_set visited;  // all backwards edges pushed
-        state_set resolved; // known in SCC or not
-        state_set scc;      // known in SCC
-        resolved.insert(s);
-        scc.insert(s);
-        vector<state> to_search;
-        to_search.push_back(s);
-        while (to_search.size() > 0) {
-            state x = to_search.back();
-            if (!visited.contains(x)) {
-                visited.insert(x);
-                // recurse backwards only on maybecycle edges
-                // and only on unknown states
-                for (auto y: m_sources_maybecycle.find(x)) {
-                    if (m_unknown.contains(y))
-                        to_search.push_back(y);
-                }
-            }
-            else if (!resolved.contains(x)) {
-                resolved.insert(x);
-                to_search.pop_back();
-                // determine in SCC or not
-                for (auto y: m_sources_maybecycle.find(x)) {
-                    if (scc.contains(y)) {
-                        scc.insert(x);
-                        break;
-                    }
-                }
-            }
-            else {
-                to_search.pop_back();
-            }
-        }
-        // scc is the union of all cycles containing s
-        return merge_states(scc);
-    }
-
-    /*
-        Exposed methods
-    */
-
-    void state_graph::add_state(state s) {
-        if (m_seen.contains(s)) return;
-        add_state_core(s);
-    }
-    void state_graph::mark_live(state s) {
-        SASSERT(m_unexplored.contains(s) || m_live.contains(s));
-        SASSERT(m_state_ufind.is_root(s));
-        if (m_unexplored.contains(s)) mark_unknown_core(s);
-        mark_live_recursive(s);
-    }
-    void state_graph::add_edge(state s1, state s2, bool maybecycle) {
-        SASSERT(m_unexplored.contains(s1) || m_live.contains(s1));
-        SASSERT(m_state_ufind.is_root(s1));
-        SASSERT(m_seen.contains(s2));
-        s2 = m_state_ufind.find(s2);
-        add_edge_core(s1, s2, maybecycle);
-        if (m_live.contains(s2)) mark_live(s1);
-    }
-    void state_graph::mark_done(state s) {
-        SASSERT(m_unexplored.contains(s) || m_live.contains(s));
-        SASSERT(m_state_ufind.is_root(s));
-        if (m_live.contains(s)) return;
-        if (m_unexplored.contains(s)) mark_unknown_core(s);
-        s = merge_all_cycles(s);
-        // check if dead
-        mark_dead_recursive(s);
-        STRACE("seq_regex_brief", tout << "done(" << s << ") ";);
-    }
-
-    unsigned state_graph::get_size() {
-        return m_state_ufind.get_num_vars();
-    }
-
-    bool state_graph::is_seen(state s) {
-        return m_seen.contains(s);
-    }
-    bool state_graph::is_live(state s) {
-        return m_live.contains(m_state_ufind.find(s));
-    }
-    bool state_graph::is_dead(state s) {
-        return m_dead.contains(m_state_ufind.find(s));
-    }
-    bool state_graph::is_done(state s) {
-        return (m_seen.contains(s) &&
-                !m_unexplored.contains(m_state_ufind.find(s)));
-    }
-
-    /*
-        Pretty printing
-    */
-    void state_graph::pretty_print(std::ostream& o) {
-        o << "---------- State Graph ----------" << std::endl
-          << "Seen:";
-        for (auto s: m_seen) {
-            o << " " << s;
-            state s_root = m_state_ufind.find(s);
-            if (s_root != s)
-                o << "(=" << s_root << ")";
-        }
-        o << std::endl
-          << "Live:" << m_live << std::endl
-          << "Dead:" << m_dead << std::endl
-          << "Unknown:" << m_unknown << std::endl
-          << "Unexplored:" << m_unexplored << std::endl
-          << "Edges:" << std::endl;
-        for (auto s1: m_seen) {
-            if (m_state_ufind.is_root(s1)) {
-                o << "  " << s1 << " -> " << m_targets.find(s1) << std::endl;
-            }
-        }
-        o << "---------------------------------" << std::endl;
-    }
-    // std::ostream& operator<<(std::ostream& o, const state_graph& sg) {
-    //     sg.pretty_print(o);
-    //     return o;
-    // }
-
-    // **********************************
-
-    unsigned seq_regex::get_state_id(expr* e) {
-        // Assign increasing IDs starting from 1
-        if (!m_expr_to_state.contains(e)) {
-            m_state_to_expr.push_back(e);
-            unsigned new_id = m_state_to_expr.size();
-            m_expr_to_state.insert(e, new_id);
-            STRACE("seq_regex_brief", tout << "new(" << expr_id_str(e)
-                                           << ")=" << state_str(e) << " ";);
-        }
-        return m_expr_to_state.find(e);
-    }
-    expr* seq_regex::get_expr_from_id(unsigned id) {
-        SASSERT(id >= 1);
-        SASSERT(id <= m_state_to_expr.size());
-        return m_state_to_expr.get(id);
-    }
-
-
-    unsigned seq_regex::concat_length(expr* r) {
-        // length of the concatenations at the top level
-        expr *r1 = nullptr, *r2 = nullptr;
-        if (re().is_concat(r, r1, r2))
-            return concat_length(r1) + concat_length(r2);
-        else
-            return 1;
-    }
-
-    unsigned seq_regex::re_rank(expr* r) {
-        SASSERT(u().is_re(r));
-        expr *r1 = nullptr, *r2 = nullptr, *s = nullptr;
-        unsigned lo = 0, hi = 0;
-        if (re().is_empty(r))
-            return 0;
-        if (re().is_concat(r, r1, r2))
-            return std::max(re_rank(r1) + concat_length(r2), re_rank(r2));
-        if (re().is_union(r, r1, r2) || m.is_ite(r, s, r1, r2))
-            return std::max(re_rank(r1), re_rank(r2));
-        if (re().is_intersection(r, r1, r2) || re().is_diff(r, r1, r2))
-            return re_rank(r1) + re_rank(r2);
-        if (re().is_plus(r, r1) || re().is_star(r, r1))
-            return re_rank(r1) + 1;
-        if (re().is_loop(r, r1, lo) || re().is_loop(r, r1, lo, hi))
-            return re_rank(r1) + lo;
-        if (re().is_reverse(r, r1) || re().is_opt(r, r1))
-            // in reverse case, should be r1 is a string
-            return re_rank(r1);
-        if (re().is_to_re(r, s))
-            return u().str.min_length(s);
-        // Else: range, pred, char, full_seq, derivative
-        return 1;
-    }
-
-    bool seq_regex::can_be_in_cycle(expr *r1, expr *r2) {
-        // @EXP (experimental change): Use a "rank" function, which is
-        // a pseudo-topological order on the state graph, to detect when r2
-        // is a simpler regex than r1
-        unsigned k1 = re_rank(r1);
-        unsigned k2 = re_rank(r2);
-        SASSERT(k1 >= k2);
-        STRACE("seq_regex_brief", tout << "(k:" << k1 << "->" << k2 << ")";);
-        return (k1 == k2);
-    }
-
-    /*
-        Update the state graph with expression r and all its derivatives.
-    */
-    bool seq_regex::update_state_graph(expr* r) {
-        unsigned r_id = get_state_id(r);
-        if (m_state_graph.is_done(r_id)) return false;
-        if (m_state_graph.get_size() >= m_max_state_graph_size) {
-            STRACE("seq_regex", tout << "Warning: ignored state graph update -- max size of seen states reached!" << std::endl;);
-            STRACE("seq_regex_brief", tout << "(MAX SIZE REACHED) ";);
-            return false;
-        }
-        STRACE("seq_regex", tout << "Updating state graph for regex "
-                                 << mk_pp(r, m) << ") ";);
-        // Add state
-        m_state_graph.add_state(r_id);
-        STRACE("seq_regex_brief", tout << std::endl << "USG("
-                                       << state_str(r) << ") ";);
-        expr_ref r_nullable = is_nullable_wrapper(r);
-        if (m.is_true(r_nullable)) {
-            m_state_graph.mark_live(r_id);
-        }
-        else {
-            // Add edges to all derivatives
-            expr_ref_vector derivatives(m);
-            STRACE("seq_regex_debug", tout
-                << std::endl << "  DEBUG: getting all derivs: " << r_id << " ";);
-            get_all_derivatives(r, derivatives);
-            for (auto const& dr: derivatives) {
-                unsigned dr_id = get_state_id(dr);
-                STRACE("seq_regex_debug", tout
-                    << std::endl << "  DEBUG: traversing deriv: " << dr_id << " ";);
-                m_state_graph.add_state(dr_id);
-                bool maybecycle = can_be_in_cycle(r, dr);
-                m_state_graph.add_edge(r_id, dr_id, maybecycle);
-            }
-            m_state_graph.mark_done(r_id);
-        }
-        STRACE("seq_regex_brief", tout << std::endl;);
-        STRACE("seq_regex_brief", m_state_graph.pretty_print(tout););
-        return true;
-    }
-
-    std::string seq_regex::state_str(expr* e) {
-        if (m_expr_to_state.contains(e))
-            return std::to_string(get_state_id(e));
-        else
-            return expr_id_str(e);
-    }
-    std::string seq_regex::expr_id_str(expr* e) {
-        return std::string("id") + std::to_string(e->get_id());
-    }
-
 }
diff --git a/src/smt/seq_regex.h b/src/smt/seq_regex.h
index 2b861fd1aad..1d77cf81dbe 100644
--- a/src/smt/seq_regex.h
+++ b/src/smt/seq_regex.h
@@ -17,8 +17,6 @@ Module Name:
 #pragma once
 
 #include "util/scoped_vector.h"
-#include "util/uint_set.h"
-#include "util/union_find.h"
 #include "ast/seq_decl_plugin.h"
 #include "ast/rewriter/seq_rewriter.h"
 #include "smt/smt_context.h"
@@ -28,140 +26,7 @@ namespace smt {
 
     class theory_seq;
 
-    class seq_regex;
-
-    /*
-        state_graph
-
-        Data structure which is capable of incrementally tracking
-        live states and dead states.
-
-        "States" are integers. States and edges are added to the data
-        structure incrementally.
-        - States can be marked as live
-          or as done -- to indicate that no more outgoing edges will be
-          added and the state will not be marked as live. The data
-          structure then tracks
-          which other states are live (can reach a live state), dead
-          (can't reach a live state), or neither.
-        - Some edges are labeled as not contained in a cycle. This is to
-          optimize search if it is known by the user of the structure
-          that no cycle will ever contain this edge.
-
-        Internally, we use union_find to identify states within an SCC,
-        and incrementally update SCCs, while propagating backwards
-        live and dead SCCs.
-    */
-    class state_graph {
-        typedef unsigned           state;
-        typedef uint_set           state_set;
-        typedef u_map<state_set>   edge_rel;
-        typedef basic_union_find   state_ufind;
-
-    private:
-        /*
-            All states are internally exactly one of:
-            - live:       known to reach a live state
-            - dead:       known to never reach a live state
-            - unknown:    all outgoing edges have been added, but the
-                          state is not known to be live or dead
-            - unexplored: not all outgoing edges have been added
-
-            As SCCs are merged, some states become aliases, and a
-            union find data structure collapses a now obsolete
-            state to its current representative. m_seen keeps track
-            of states we have seen, including obsolete states.
-
-            Invariants:
-            - TODO
-        */
-        state_set   m_live;
-        state_set   m_dead;
-        state_set   m_unknown;
-        state_set   m_unexplored;
-
-        state_set     m_seen;
-        state_ufind   m_state_ufind;
-
-        /*
-            Edges are saved in both from and to maps.
-            A subset of edges are also marked as possibly being
-            part of a cycle by being stored in m_sources_maybecycle.
-
-            Invariants:
-            - TODO
-        */
-        edge_rel   m_sources;
-        edge_rel   m_targets;
-        edge_rel   m_sources_maybecycle;
-
-        /*
-            'Core' functions that modify the plain graph, without
-            updating SCCs or propagating live/dead state information.
-            These are for internal use only.
-        */
-        void add_state_core(state s);    // unexplored + seen
-        void remove_state_core(state s); // unknown + seen -> seen
-        void mark_unknown_core(state s); // unexplored -> unknown
-        void mark_live_core(state s);    // unknown -> live
-        void mark_dead_core(state s);    // unknown -> dead
-
-        void add_edge_core(state s1, state s2, bool maybecycle);
-        void remove_edge_core(state s1, state s2);
-        void rename_edge_core(state old1, state old2, state new1, state new2);
-
-        state merge_states(state s1, state s2);
-        state merge_states(state_set& s_set);
-
-        /*
-            Algorithmic search routines
-            - live state propagation
-            - dead state propagation
-            - cycle / strongly-connected component detection
-        */
-        void mark_live_recursive(state s);
-        void mark_dead_recursive(state s);
-        state merge_all_cycles(state s);
-
-    public:
-        state_graph():
-            m_live(), m_dead(), m_unknown(), m_unexplored(), m_seen(),
-            m_state_ufind(), m_sources(), m_targets(), m_sources_maybecycle() {}
-
-        /*
-            Exposed methods
-
-            These methods may be called in any order, as long as:
-            - states are added before edges are added between them
-            - outgoing edges are not added from a done state
-            - a done state is not marked as live
-            - edges are not added creating a cycle containing an edge with
-              maybecycle = false (this is not necessary for soundness, but
-              prevents completeness for successfully detecting dead states)
-        */
-        void add_state(state s);
-        void add_edge(state s1, state s2, bool maybecycle);
-        void mark_live(state s);
-        void mark_done(state s);
-
-        bool is_seen(state s);
-        bool is_live(state s);
-        bool is_dead(state s);
-        bool is_done(state s);
-
-        unsigned get_size();
-
-        /*
-            Pretty printing
-        */
-        void pretty_print(std::ostream& o);
-
-    };
-
     class seq_regex {
-        /*
-            Data about a constraint of the form (str.in_re s R)
-        */
         struct s_in_re {
             literal m_lit;
             expr*   m_s;
@@ -171,11 +36,6 @@ namespace smt {
             m_lit(l), m_s(s), m_re(r), m_active(true) {}
         };
 
-        /*
-            Data about a literal for the solver to propagate
-            The trigger guards whether the literal is ready
-            to be addressed yet -- see seq_regex::can_propagate
-        */
         struct propagation_lit {
             literal m_lit;
             literal m_trigger;
@@ -184,35 +44,11 @@ namespace smt {
             propagation_lit(): m_lit(null_literal), m_trigger(null_literal) {}
         };
 
-        theory_seq&                      th;
-        context&                         ctx;
-        ast_manager&                     m;
-        vector<s_in_re>                  m_s_in_re;
-        scoped_vector<propagation_lit>   m_to_propagate;
-
-        /*
-            state_graph for dead state detection, and associated methods
-        */
-        state_graph                    m_state_graph;
-        ptr_addr_map<expr, unsigned>   m_expr_to_state;
-        expr_ref_vector                m_state_to_expr;
-        unsigned                       m_max_state_graph_size { 10000 };
-        // Convert between expressions and states (IDs)
-        unsigned get_state_id(expr* e);
-        expr* get_expr_from_id(unsigned id);
-        // Cycle-detection heuristic
-        // Note: Doesn't need to be sound or complete (doesn't affect soundness)
-        unsigned concat_length(expr* r);
-        unsigned re_rank(expr* r);
-        bool can_be_in_cycle(expr* r1, expr* r2);
-        // Update the graph
-        bool update_state_graph(expr* r);
-
-        // Printing for seq_regex_brief
-        std::string state_str(expr* e);
-        std::string expr_id_str(expr* e);
-
-        // ********************
+        theory_seq&      th;
+        context&         ctx;
+        ast_manager&     m;
+        vector<s_in_re> m_s_in_re;
+        scoped_vector<propagation_lit> m_to_propagate;
 
         seq_util& u();
         class seq_util::re& re();
@@ -243,7 +79,6 @@ namespace smt {
 
         expr_ref symmetric_diff(expr* r1, expr* r2);
 
-        expr_ref is_nullable_wrapper(expr* r);
         expr_ref derivative_wrapper(expr* hd, expr* r);
 
         void get_cofactors(expr* r, expr_ref_vector& conds, expr_ref_pair_vector& result);
@@ -255,8 +90,6 @@ namespace smt {
 
     public:
 
-        void get_all_derivatives(expr* r, expr_ref_vector& results);
-
         seq_regex(theory_seq& th);
 
         void push_scope() { m_to_propagate.push_scope(); }
@@ -284,3 +117,4 @@ namespace smt {
     };
 
 };
+
diff --git a/src/smt/theory_seq.cpp b/src/smt/theory_seq.cpp
index 16507cd24af..a835c4634aa 100644
--- a/src/smt/theory_seq.cpp
+++ b/src/smt/theory_seq.cpp
@@ -3379,7 +3379,6 @@ void theory_seq::relevant_eh(app* n) {
 
     expr* arg = nullptr;
     if (m_sk.is_tail(n, arg)) {
-        // TODO: HERE
         add_length_limit(arg, m_max_unfolding_depth, true);
     }
 

From 95d65f5e126c6bcb30bfeaec7683d65c9b63f8fa Mon Sep 17 00:00:00 2001
From: Caleb Stanford <caleb.pirsquared@gmail.com>
Date: Mon, 27 Jul 2020 19:39:09 -0400
Subject: [PATCH 51/51] remove cache hit/miss count tracing

---
 src/ast/rewriter/seq_rewriter.cpp | 3 +--
 src/ast/rewriter/seq_rewriter.h   | 6 +-----
 2 files changed, 2 insertions(+), 7 deletions(-)

diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp
index ec9984264b8..afc000c33ae 100644
--- a/src/ast/rewriter/seq_rewriter.cpp
+++ b/src/ast/rewriter/seq_rewriter.cpp
@@ -4311,7 +4311,6 @@ void seq_rewriter::op_cache::cleanup() {
     if (m_table.size() >= m_max_cache_size) {
         m_trail.reset();
         m_table.reset();
-        STRACE("seq_regex", tout << "Op cache reset!" << std::endl;);
-        STRACE("seq_regex_brief", tout << "(OP CACHE RESET) ";);
+        STRACE("seq_verbose", tout << "Derivative op cache reset" << std::endl;);
     }
 }
diff --git a/src/ast/rewriter/seq_rewriter.h b/src/ast/rewriter/seq_rewriter.h
index 19091a6f24b..5fc3febb526 100644
--- a/src/ast/rewriter/seq_rewriter.h
+++ b/src/ast/rewriter/seq_rewriter.h
@@ -146,11 +146,6 @@ class seq_rewriter {
         op_cache(ast_manager& m);
         expr* find(decl_kind op, expr* a, expr* b);
         void insert(decl_kind op, expr* a, expr* b, expr* r);
-
-        #ifdef _TRACE
-        static unsigned cache_hits;
-        static unsigned cache_misses;
-        #endif
     };
 
     seq_util       m_util;
@@ -345,5 +340,6 @@ class seq_rewriter {
     // heuristic elimination of element from condition that comes form a derivative.
     // special case optimization for conjunctions of equalities, disequalities and ranges.
     void elim_condition(expr* elem, expr_ref& cond);
+
 };