diff --git a/src/api/api_seq.cpp b/src/api/api_seq.cpp index 6d3524544d1..311e238cbee 100644 --- a/src/api/api_seq.cpp +++ b/src/api/api_seq.cpp @@ -163,12 +163,32 @@ extern "C" { SET_ERROR_CODE(Z3_INVALID_ARG, "expression is not a string literal"); return ""; } - mk_c(c)->m_char_buffer.reset(); + auto& buffer = mk_c(c)->m_char_buffer; + buffer.reset(); + svector buff; for (unsigned i = 0; i < str.length(); ++i) { - mk_c(c)->m_char_buffer.push_back((char)str[i]); + unsigned ch = str[i]; + if (ch >= 256) { + buff.reset(); + buffer.push_back('\\'); + buffer.push_back('\\'); // possibly replace by native non-escaped version? + buffer.push_back('u'); + buffer.push_back('{'); + while (ch > 0) { + buff.push_back('0' + (ch & 0xF)); + ch /= 16; + } + for (unsigned j = buff.size(); j-- > 0; ) { + buffer.push_back(buff[j]); + } + buffer.push_back('}'); + } + else { + buffer.push_back((char)ch); + } } - *length = str.length(); - return mk_c(c)->m_char_buffer.c_ptr(); + *length = buffer.size(); + return buffer.c_ptr(); Z3_CATCH_RETURN(""); } diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp index 4eed120b5a8..0cb6f7394ee 100644 --- a/src/ast/rewriter/seq_rewriter.cpp +++ b/src/ast/rewriter/seq_rewriter.cpp @@ -1739,7 +1739,7 @@ br_status seq_rewriter::mk_str_lt(expr* a, expr* b, expr_ref& result) { br_status seq_rewriter::mk_str_from_code(expr* a, expr_ref& result) { rational r; if (m_autil.is_numeral(a, r)) { - if (r.is_neg() || r > m_util.str.max_char_value()) { + if (r.is_neg() || r > zstring::max_char()) { result = m_util.str.mk_string(symbol("")); } else { diff --git a/src/ast/seq_decl_plugin.cpp b/src/ast/seq_decl_plugin.cpp index 4ffe77d1abe..3360e300e41 100644 --- a/src/ast/seq_decl_plugin.cpp +++ b/src/ast/seq_decl_plugin.cpp @@ -158,28 +158,18 @@ static bool is_escape_char(char const *& s, unsigned& result) { return false; } -zstring::zstring() {} - zstring::zstring(char const* s) { - unsigned mask = 0xFF; // TBD for Unicode while (*s) { - unsigned ch; + unsigned ch = 0; if (is_escape_char(s, ch)) { - m_buffer.push_back(ch & mask); + m_buffer.push_back(ch); } else { - m_buffer.push_back(*s & mask); + m_buffer.push_back(*s); ++s; } } -} - -zstring::zstring(zstring const& other) { - m_buffer = other.m_buffer; -} - -zstring::zstring(unsigned sz, unsigned const* s) { - m_buffer.append(sz, s); + SASSERT(well_formed()); } zstring::zstring(unsigned num_bits, bool const* ch) { @@ -189,10 +179,18 @@ zstring::zstring(unsigned num_bits, bool const* ch) { n |= (((unsigned)ch[i]) << i); } m_buffer.push_back(n); + SASSERT(well_formed()); +} + +bool zstring::well_formed() const { + for (unsigned ch : m_buffer) { + if (ch > max_char()) + return false; + } + return true; } zstring::zstring(unsigned ch) { - SASSERT(ch <= 196607); m_buffer.push_back(ch); } @@ -267,20 +265,6 @@ std::string zstring::encode() const { return strm.str(); } -std::string zstring::as_string() const { - std::ostringstream strm; - char buffer[100]; - unsigned offset = 0; - for (unsigned i = 0; i < m_buffer.size(); ++i) { - if (offset == 99) { _flush(); } - unsigned char ch = m_buffer[i]; - buffer[offset++] = (char)(ch); - } - _flush(); - return strm.str(); -} - - bool zstring::suffixof(zstring const& other) const { if (length() > other.length()) return false; bool suffix = true; @@ -1079,13 +1063,12 @@ app* seq_util::str::mk_string(zstring const& s) const { return u.seq.mk_string(s); } -app* seq_util::str::mk_char(zstring const& s, unsigned idx) const { - // TBD: change to unicode +app* seq_util::str::mk_char(zstring const& s, unsigned idx) const { + // TBD for unicode return u.bv().mk_numeral(s[idx], 8); } -app* seq_util::str::mk_char(char ch) const { - // TBD: change to unicode +app* seq_util::str::mk_char(char ch) const { zstring s(ch); return mk_char(s, 0); } diff --git a/src/ast/seq_decl_plugin.h b/src/ast/seq_decl_plugin.h index 81e9009dcea..578e76a4d4b 100644 --- a/src/ast/seq_decl_plugin.h +++ b/src/ast/seq_decl_plugin.h @@ -17,6 +17,8 @@ Revision History: Updated to string sequences 2015-12-5 + Add SMTLIB 2.6 support 2020-5-17 + --*/ #ifndef SEQ_DECL_PLUGIN_H_ #define SEQ_DECL_PLUGIN_H_ @@ -103,17 +105,18 @@ enum seq_op_kind { class zstring { private: buffer m_buffer; + bool well_formed() const; public: - zstring(); + static unsigned max_char() { return 196607; } + zstring() {} zstring(char const* s); - zstring(unsigned sz, unsigned const* s); - zstring(zstring const& other); + zstring(unsigned sz, unsigned const* s) { m_buffer.append(sz, s); SASSERT(well_formed()); } + zstring(zstring const& other): m_buffer(other.m_buffer) {} zstring(unsigned num_bits, bool const* ch); zstring(unsigned ch); zstring& operator=(zstring const& other); zstring replace(zstring const& src, zstring const& dst) const; std::string encode() const; - std::string as_string() const; unsigned length() const { return m_buffer.size(); } unsigned operator[](unsigned i) const { return m_buffer[i]; } bool empty() const { return m_buffer.empty(); } @@ -257,9 +260,6 @@ class seq_util { public: str(seq_util& u): u(u), m(u.m), m_fid(u.m_fid) {} - unsigned min_char_value() const { return 0; } - unsigned max_char_value() const { return 196607; } - sort* mk_seq(sort* s) const { parameter param(s); return m.mk_sort(m_fid, SEQ_SORT, 1, ¶m); } sort* mk_string_sort() const { return m.mk_sort(m_fid, _STRING_SORT, 0, nullptr); } app* mk_empty(sort* s) const { return m.mk_const(m.mk_func_decl(m_fid, OP_SEQ_EMPTY, 0, nullptr, 0, (expr*const*)nullptr, s)); } diff --git a/src/smt/seq_axioms.cpp b/src/smt/seq_axioms.cpp index 620653c8c25..dccf8c81fe2 100644 --- a/src/smt/seq_axioms.cpp +++ b/src/smt/seq_axioms.cpp @@ -802,7 +802,7 @@ void seq_axioms::add_str_to_code_axiom(expr* n) { VERIFY(seq.str.is_to_code(n, e)); literal len_is1 = mk_eq(mk_len(e), a.mk_int(1)); add_axiom(~len_is1, mk_ge(n, 0)); - add_axiom(~len_is1, mk_le(n, seq.str.max_char_value())); + add_axiom(~len_is1, mk_le(n, zstring::max_char())); add_axiom(len_is1, mk_eq(n, a.mk_int(-1))); } @@ -815,7 +815,7 @@ void seq_axioms::add_str_from_code_axiom(expr* n) { expr* e = nullptr; VERIFY(seq.str.is_from_code(n, e)); literal ge = mk_ge(e, 0); - literal le = mk_le(e, seq.str.max_char_value()); + literal le = mk_le(e, zstring::max_char()); literal emp = mk_literal(seq.str.mk_is_empty(n)); add_axiom(~ge, ~le, mk_eq(mk_len(n), a.mk_int(1))); add_axiom(~ge, ~le, mk_eq(seq.str.mk_to_code(n), e));