Skip to content

Commit

Permalink
pp support for regex expressions is more-or-less standard syntax
Browse files Browse the repository at this point in the history
  • Loading branch information
veanes committed Aug 13, 2020
1 parent 8439057 commit 3ab75bd
Show file tree
Hide file tree
Showing 3 changed files with 200 additions and 0 deletions.
188 changes: 188 additions & 0 deletions src/ast/seq_decl_plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1463,3 +1463,191 @@ bool seq_util::re::is_epsilon(expr* r) const {
app* seq_util::re::mk_epsilon(sort* seq_sort) {
return mk_to_re(u.str.mk_empty(seq_sort));
}

/*
Provides a standard pretty printed view of the regex r when possible.
*/
std::string seq_util::re::pp(expr* r) {
SASSERT(u.is_re(r));
std::ostringstream buffer;
pp_compact_to_buffer(buffer, r);
return buffer.str();
}

void seq_util::re::pp_compact_to_buffer(std::ostringstream& buffer, expr* r) {
SASSERT(u.is_re(r));
expr* r1 = nullptr, * r2 = nullptr, * s = nullptr, * s2 = nullptr;
unsigned lo = 0, hi = 0;
if (is_full_char(r))
buffer << ".";
else if (is_full_seq(r))
buffer << ".*";
else if (is_to_re(r, s))
pp_compact_helper_seq(buffer, s);
else if (is_range(r, s, s2))
pp_compact_helper_range(buffer, s, s2);
else if (is_epsilon(r))
buffer << "()";
else if (is_empty(r))
buffer << "[]";
else if (is_concat(r, r1, r2)) {
pp_compact_to_buffer(buffer, r1);
pp_compact_to_buffer(buffer, r2);
}
else if (is_union(r, r1, r2)) {
pp_compact_to_buffer(buffer, r1);
buffer << "|";
pp_compact_to_buffer(buffer, r2);
}
else if (is_intersection(r, r1, r2)) {
buffer << "(";
pp_compact_to_buffer(buffer, r1);
buffer << ")&(";
pp_compact_to_buffer(buffer, r2);
buffer << ")";
}
else if (is_complement(r, r1)) {
buffer << "~";
if (pp_can_skip_parenth(r1))
pp_compact_to_buffer(buffer, r1);
else {
buffer << "(";
pp_compact_to_buffer(buffer, r1);
buffer << ")";
}
}
else if (is_plus(r, r1))
if (pp_can_skip_parenth(r1)) {
pp_compact_to_buffer(buffer, r1);
buffer << "+";
}
else {
buffer << "(";
pp_compact_to_buffer(buffer, r1);
buffer << ")+";
}
else if (is_star(r, r1))
if (pp_can_skip_parenth(r1)) {
pp_compact_to_buffer(buffer, r1);
buffer << "*";
}
else {
buffer << "(";
pp_compact_to_buffer(buffer, r1);
buffer << ")*";
}
else if (is_loop(r, r1, lo))
if (pp_can_skip_parenth(r1))
{
pp_compact_to_buffer(buffer, r1);
buffer << "{" << std::to_string(lo) << ",}";
}
else {
buffer << "(";
pp_compact_to_buffer(buffer, r1);
buffer << "){" << std::to_string(lo) << ",}";
}
else if (is_loop(r, r1, lo, hi))
if (pp_can_skip_parenth(r1))
{
pp_compact_to_buffer(buffer, r1);
buffer << "{" << std::to_string(lo) << "," << std::to_string(hi) << "}";
}
else {
buffer << "(";
pp_compact_to_buffer(buffer, r1);
buffer << "){" << std::to_string(lo) << "," << std::to_string(hi) << "}";
}
else if (is_diff(r, r1, r2)) {
buffer << "(";
pp_compact_to_buffer(buffer, r1);
buffer << ")\\(";
pp_compact_to_buffer(buffer, r2);
buffer << ")";
}
else if (m.is_ite(r, s, r1, r2)) {
buffer << "if(" << mk_pp(s, m) << ",";
pp_compact_to_buffer(buffer, r1);
buffer << ",";
pp_compact_to_buffer(buffer, r2);
buffer << ")";
}
else if (is_opt(r, r1))
if (pp_can_skip_parenth(r1)) {
pp_compact_to_buffer(buffer, r1);
buffer << "?";
}
else {
buffer << "(";
pp_compact_to_buffer(buffer, r1);
buffer << ")?";
}
else if (is_reverse(r, r1)) {
buffer << "reverse(";
pp_compact_to_buffer(buffer, r1);
buffer << ")";
}
else
// Else: derivative or is_of_pred
buffer << mk_pp(r, m);
}

void seq_util::re::pp_compact_helper_seq(std::ostringstream& buffer, expr* s) {
SASSERT(u.is_seq(s));
if (m.is_value(s)) {
SASSERT(s->get_kind() == ast_kind::AST_APP);
if (u.str.is_concat(s)) {
expr_ref_vector es(m);
u.str.get_concat(s, es);
for (unsigned i = 0; i < es.size(); i++)
if (u.str.is_unit(es.get(i)))
pp_seq_unit(buffer, es.get(i));
else
buffer << mk_pp(es.get(i), m);
}
else
pp_seq_unit(buffer, s);
}
else
buffer << mk_pp(s, m);
}

void seq_util::re::pp_compact_helper_range(std::ostringstream& buffer, expr* s1, expr* s2) {
buffer << "[";
if (u.str.is_unit(s1))
pp_seq_unit(buffer, s1);
else
buffer << mk_pp(s1, m);
buffer << "-";
if (u.str.is_unit(s2))
pp_seq_unit(buffer, s2);
else
buffer << mk_pp(s1, m);
buffer << "]";
}

bool seq_util::re::pp_can_skip_parenth(expr* r) {
expr* s;
return ((is_to_re(r, s) && u.str.is_unit(s)) || is_range(r));
}

void seq_util::re::pp_seq_unit(std::ostringstream& buffer, expr* s) {
expr* e;
if (u.str.is_unit(s, e)) {
rational r;
unsigned sz;
if (u.bv().is_numeral(e, r, sz) && sz == 8 && r.is_unsigned()) {
unsigned n = r.get_unsigned();
if (32 < n && n < 127)
buffer << (char)n;
else if (n < 10)
buffer << "\\x0" << std::hex << n;
else
buffer << "\\x" << std::hex << n;
}
else
buffer << mk_pp(s, m);
}
else
buffer << mk_pp(s, m);
}
7 changes: 7 additions & 0 deletions src/ast/seq_decl_plugin.h
Original file line number Diff line number Diff line change
Expand Up @@ -410,6 +410,12 @@ class seq_util {
seq_util& u;
ast_manager& m;
family_id m_fid;
void seq_util::re::pp_compact_helper_seq(std::ostringstream& buffer, expr* s);
void seq_util::re::pp_compact_helper_range(std::ostringstream& buffer, expr* s1, expr* s2);
bool seq_util::re::pp_can_skip_parenth(expr* r);
void seq_util::re::pp_seq_unit(std::ostringstream& buffer, expr* s);
void pp_compact_to_buffer(std::ostringstream& buffer, expr* r);

public:
re(seq_util& u): u(u), m(u.m), m_fid(u.m_fid) {}

Expand Down Expand Up @@ -476,6 +482,7 @@ class seq_util {
unsigned max_length(expr* r) const;
bool is_epsilon(expr* r) const;
app* mk_epsilon(sort* seq_sort);
std::string pp(expr* r);
};
str str;
re re;
Expand Down
5 changes: 5 additions & 0 deletions src/smt/seq_regex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -651,6 +651,8 @@ namespace smt {
}
STRACE("seq_regex", tout << "Updating state graph for regex "
<< mk_pp(r, m) << ") ";);
if (!m_state_graph.is_seen(r_id))
STRACE("state_graph", tout << std::endl << "state(" << r_id << ") = " << re().pp(r) << std::endl;);
// Add state
m_state_graph.add_state(r_id);
STRACE("seq_regex_brief", tout << std::endl << "USG("
Expand All @@ -669,6 +671,9 @@ namespace smt {
unsigned dr_id = get_state_id(dr);
STRACE("seq_regex_verbose", tout
<< std::endl << " traversing deriv: " << dr_id << " ";);
if (!m_state_graph.is_seen(dr_id))
STRACE("state_graph", tout << "state(" << dr_id << ") = " << re().pp(dr) << std::endl;);
// Add state
m_state_graph.add_state(dr_id);
bool maybecycle = can_be_in_cycle(r, dr);
m_state_graph.add_edge(r_id, dr_id, maybecycle);
Expand Down

0 comments on commit 3ab75bd

Please sign in to comment.