Skip to content

Commit

Permalink
re info extension (#4659)
Browse files Browse the repository at this point in the history
* made loop info calculation more accurate

* made loop info calculation more accurate

* updated formattig and added const declarations
  • Loading branch information
veanes authored Aug 22, 2020
1 parent a58b8ce commit 5e5ef50
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 35 deletions.
47 changes: 31 additions & 16 deletions src/ast/seq_decl_plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1639,7 +1639,7 @@ seq_util::rex::info seq_util::rex::get_info_rec(expr* e) const {
seq_util::rex::info seq_util::rex::mk_info_rec(app* e) const {
info i1, i2;
lbool nullable(l_false);
unsigned min_length(0), lower_bound(0);
unsigned min_length(0), lower_bound(0), upper_bound(UINT_MAX);
bool is_value(false);
bool normalized(false);
if (e->get_family_id() == u.get_family_id()) {
Expand Down Expand Up @@ -1688,8 +1688,11 @@ seq_util::rex::info seq_util::rex::mk_info_rec(app* e) const {
return i1.complement();
case OP_RE_LOOP:
i1 = get_info_rec(e->get_arg(0));
lower_bound = e->get_decl()->get_parameter(0).get_int();
return i1.loop(lower_bound);
if (e->get_decl()->get_num_parameters() >= 1)
lower_bound = e->get_decl()->get_parameter(0).get_int();
if (e->get_decl()->get_num_parameters() == 2)
upper_bound = e->get_decl()->get_parameter(1).get_int();
return i1.loop(lower_bound, upper_bound);
case OP_RE_DIFF:
i1 = get_info_rec(e->get_arg(0));
i2 = get_info_rec(e->get_arg(1));
Expand Down Expand Up @@ -1742,16 +1745,17 @@ seq_util::rex::info seq_util::rex::info::star() const {

seq_util::rex::info seq_util::rex::info::plus() const {
if (is_known()) {
//r+ is not normalized if r is nullable, the normalized form would be r*
return info(classical, classical, interpreted, nonbranching, (nullable == l_false ? normalized : false), monadic, false, nullable, min_length, star_height + 1);
//plus never occurs in a normalized regex
return info(classical, classical, interpreted, nonbranching, false, monadic, false, nullable, min_length, star_height + 1);
}
else
return *this;
}

seq_util::rex::info seq_util::rex::info::opt() const {
//if is_known() is false then all mentioned properties will remain false
return seq_util::rex::info(classical, classical, interpreted, nonbranching, normalized, monadic, false, l_true, 0, star_height);
//optional construct never occurs in a normalized regex
return seq_util::rex::info(classical, classical, interpreted, nonbranching, false, monadic, false, l_true, 0, star_height);
}

seq_util::rex::info seq_util::rex::info::complement() const {
Expand All @@ -1764,7 +1768,7 @@ seq_util::rex::info seq_util::rex::info::complement() const {
return *this;
}

seq_util::rex::info seq_util::rex::info::concat(seq_util::rex::info & rhs, bool lhs_is_concat) const {
seq_util::rex::info seq_util::rex::info::concat(seq_util::rex::info const& rhs, bool lhs_is_concat) const {
if (is_known()) {
if (rhs.is_known()) {
unsigned m = min_length + rhs.min_length;
Expand All @@ -1788,7 +1792,7 @@ seq_util::rex::info seq_util::rex::info::concat(seq_util::rex::info & rhs, bool
return *this;
}

seq_util::rex::info seq_util::rex::info::disj(seq_util::rex::info& rhs) const {
seq_util::rex::info seq_util::rex::info::disj(seq_util::rex::info const& rhs) const {
if (is_known() || rhs.is_known()) {
//works correctly if one of the arguments is unknown
return info(classical & rhs.classical,
Expand All @@ -1806,7 +1810,7 @@ seq_util::rex::info seq_util::rex::info::disj(seq_util::rex::info& rhs) const {
return rhs;
}

seq_util::rex::info seq_util::rex::info::conj(seq_util::rex::info& rhs) const {
seq_util::rex::info seq_util::rex::info::conj(seq_util::rex::info const& rhs) const {
if (is_known()) {
if (rhs.is_known()) {
return info(false,
Expand All @@ -1827,7 +1831,7 @@ seq_util::rex::info seq_util::rex::info::conj(seq_util::rex::info& rhs) const {
return *this;
}

seq_util::rex::info seq_util::rex::info::diff(seq_util::rex::info& rhs) const {
seq_util::rex::info seq_util::rex::info::diff(seq_util::rex::info const& rhs) const {
if (is_known()) {
if (rhs.is_known()) {
return info(false,
Expand All @@ -1848,7 +1852,7 @@ seq_util::rex::info seq_util::rex::info::diff(seq_util::rex::info& rhs) const {
return *this;
}

seq_util::rex::info seq_util::rex::info::orelse(seq_util::rex::info& i) const {
seq_util::rex::info seq_util::rex::info::orelse(seq_util::rex::info const& i) const {
if (is_known()) {
if (i.is_known()) {
unsigned ite_min_length = std::min(min_length, i.min_length);
Expand All @@ -1863,16 +1867,27 @@ seq_util::rex::info seq_util::rex::info::orelse(seq_util::rex::info& i) const {
return *this;
}

seq_util::rex::info seq_util::rex::info::loop(unsigned lower) const {
seq_util::rex::info seq_util::rex::info::loop(unsigned lower, unsigned upper) const {
if (is_known()) {
//r{l,m} is not normalized if r is nullable but l > 0
unsigned m = min_length * lower;
if (m > 0 && (m < min_length || m < lower))
m = UINT_MAX;
bool loop_normalized = (nullable == l_false && lower > 0 ? false : normalized);
lbool loop_nullable = (nullable == l_true || lower == 0 ? l_true : nullable);
//TBD: pass also upper bound and if upper bound is UINT_MAX then this is * and the loop is thus not normalized
return info(classical, classical, interpreted, nonbranching, loop_normalized, singleton, false, loop_nullable, m, star_height);
if (upper == UINT_MAX) {
//this means the loop is r{lower,*} and is therefore not normalized
//normalized regex would be r{lower,lower}r* and would in particular not use r{0,} for r*
return info(classical, classical, interpreted, nonbranching, false, singleton, false, loop_nullable, m, star_height + 1);
}
else {
bool loop_normalized = normalized;
//r{lower,upper} is not normalized if r is nullable but lower > 0
//r{0,1} is not normalized: it should be ()|r
//r{1,1} is not normalized: it should be r
//r{lower,upper} is not normalized if lower > upper it should then be [] (empty)
if ((nullable == l_true && lower > 0) || upper == 1 || lower > upper)
loop_normalized = false;
return info(classical, classical, interpreted, nonbranching, loop_normalized, singleton, false, loop_nullable, m, star_height);
}
}
else
return *this;
Expand Down
38 changes: 19 additions & 19 deletions src/ast/seq_decl_plugin.h
Original file line number Diff line number Diff line change
Expand Up @@ -413,27 +413,27 @@ class seq_util {
public:
struct info {
/* Value is either undefined (known=l_undef) or defined and known (l_true) or defined but unknown (l_false)*/
lbool known{ l_undef };
lbool known { l_undef };
/* No complement, no intersection, no difference, and no if-then-else is used. Reverse is allowed. */
bool classical{ false };
bool classical { false };
/* Boolean-reverse combination of classical regexes (using reverse, union, complement, intersection or difference). */
bool standard{ false };
bool standard { false };
/* There are no uninterpreted symbols. */
bool interpreted{ false };
bool interpreted { false };
/* No if-then-else is used. */
bool nonbranching{ false };
bool nonbranching { false };
/* Concatenations are right associative and if a loop body is nullable then the lower bound is zero. */
bool normalized{ false };
bool normalized { false };
/* All bounded loops have a body that is a singleton. */
bool monadic{ false };
bool monadic { false };
/* Positive Boolean combination of ranges or predicates or singleton sequences. */
bool singleton{ false };
bool singleton { false };
/* If l_true then empty word is accepted, if l_false then empty word is not accepted. */
lbool nullable{ l_undef };
lbool nullable { l_undef };
/* Lower bound on the length of all accepted words. */
unsigned min_length{ 0 };
unsigned min_length { 0 };
/* Maximum nesting depth of Kleene stars. */
unsigned star_height{ 0 };
unsigned star_height { 0 };

/*
Default constructor of invalid info.
Expand Down Expand Up @@ -480,21 +480,21 @@ class seq_util {
info plus() const;
info opt() const;
info complement() const;
info concat(info & rhs, bool lhs_is_concat) const;
info disj(info& rhs) const;
info conj(info& rhs) const;
info diff(info& rhs) const;
info orelse(info& rhs) const;
info loop(unsigned lower) const;
info concat(info const& rhs, bool lhs_is_concat) const;
info disj(info const& rhs) const;
info conj(info const& rhs) const;
info diff(info const& rhs) const;
info orelse(info const& rhs) const;
info loop(unsigned lower, unsigned upper) const;
};
private:
seq_util& u;
ast_manager& m;
family_id m_fid;
vector<info> mutable m_infos;
expr_ref_vector mutable m_info_pinned;
info invalid_info{ info(l_undef) };
info unknown_info{ info(l_false) };
info invalid_info { info(l_undef) };
info unknown_info { info(l_false) };

bool has_valid_info(expr* r) const;
info get_info_rec(expr* r) const;
Expand Down

0 comments on commit 5e5ef50

Please sign in to comment.