Skip to content

Commit

Permalink
Added 16 bit string-encoding (#5540)
Browse files Browse the repository at this point in the history
  • Loading branch information
CEisenhofer authored Sep 9, 2021
1 parent e70f501 commit 47fdd6c
Show file tree
Hide file tree
Showing 7 changed files with 74 additions and 35 deletions.
1 change: 0 additions & 1 deletion src/ast/char_decl_plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ Module Name:

char_decl_plugin::char_decl_plugin():
m_charc_sym("Char") {
m_unicode = gparams::get_value("unicode") != "false";
}

char_decl_plugin::~char_decl_plugin() {
Expand Down
6 changes: 2 additions & 4 deletions src/ast/char_decl_plugin.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ enum char_op_kind {
class char_decl_plugin : public decl_plugin {
sort* m_char { nullptr };
symbol m_charc_sym;
bool m_unicode { true };

void set_manager(ast_manager * m, family_id id) override;

Expand Down Expand Up @@ -96,8 +95,7 @@ class char_decl_plugin : public decl_plugin {
MATCH_UNARY(is_to_int);
MATCH_BINARY(is_le);

bool unicode() const { return m_unicode; }
unsigned max_char() const { return m_unicode ? zstring::unicode_max_char() : zstring::ascii_max_char(); }
unsigned num_bits() const { return m_unicode ? zstring::unicode_num_bits() : zstring::ascii_num_bits(); }
static unsigned max_char() { return zstring::max_char(); }

static unsigned num_bits() { return zstring::num_bits(); }
};
2 changes: 0 additions & 2 deletions src/ast/seq_decl_plugin.h
Original file line number Diff line number Diff line change
Expand Up @@ -166,8 +166,6 @@ class seq_decl_plugin : public decl_plugin {

void finalize() override;

bool unicode() const { return get_char_plugin().unicode(); }

decl_plugin * mk_fresh() override { return alloc(seq_decl_plugin); }

sort * mk_sort(decl_kind k, unsigned num_parameters, parameter const * parameters) override;
Expand Down
15 changes: 11 additions & 4 deletions src/params/context_params.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,15 @@ void context_params::set(char const * param, char const * value) {
else if (p == "smtlib2_compliant") {
set_bool(m_smtlib2_compliant, param, value);
}
else if (p == "unicode") {
set_bool(m_unicode, param, value);
else if (p == "encoding") {
if (strcmp(value, "unicode") == 0 || strcmp(value, "bmp") == 0 || strcmp(value, "ascii") == 0) {
m_encoding = value;
}
else {
std::stringstream strm;
strm << "invalid value '" << value << "' for parameter '" << param << "' (supported: unicode, bmp, ascii)";
throw default_exception(strm.str());
}
}
else {
param_descrs d;
Expand Down Expand Up @@ -140,7 +147,7 @@ void context_params::updt_params(params_ref const & p) {
m_debug_ref_count = p.get_bool("debug_ref_count", m_debug_ref_count);
m_smtlib2_compliant = p.get_bool("smtlib2_compliant", m_smtlib2_compliant);
m_statistics = p.get_bool("stats", m_statistics);
m_unicode = p.get_bool("unicode", m_unicode);
m_encoding = p.get_str("encoding", m_encoding.c_str());
}

void context_params::collect_param_descrs(param_descrs & d) {
Expand All @@ -157,7 +164,7 @@ void context_params::collect_param_descrs(param_descrs & d) {
d.insert("debug_ref_count", CPK_BOOL, "debug support for AST reference counting", "false");
d.insert("smtlib2_compliant", CPK_BOOL, "enable/disable SMT-LIB 2.0 compliance", "false");
d.insert("stats", CPK_BOOL, "enable/disable statistics", "false");
d.insert("unicode", CPK_BOOL, "use unicode strings instead of ASCII strings");
d.insert("encoding", CPK_STRING, "string encoding used internally: unicode|bmp|ascii", "unicode");
// statistics are hidden as they are controlled by the /st option.
collect_solver_param_descrs(d);
}
Expand Down
31 changes: 16 additions & 15 deletions src/params/context_params.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ Module Name:
#pragma once

#include "util/params.h"
#include "util/zstring.h"

class context_params {
void set_bool(bool & opt, char const * param, char const * value);
Expand All @@ -28,21 +29,21 @@ class context_params {
unsigned m_rlimit { 0 };

public:
unsigned m_timeout { UINT_MAX } ;
std::string m_dot_proof_file;
std::string m_trace_file_name;
bool m_auto_config { true };
bool m_proof { false };
bool m_debug_ref_count { false };
bool m_trace { false };
bool m_well_sorted_check { false };
bool m_model { true };
bool m_model_validate { false };
bool m_dump_models { false };
bool m_unsat_core { false };
bool m_smtlib2_compliant { false }; // it must be here because it enable/disable the use of coercions in the ast_manager.
bool m_statistics { false };
bool m_unicode { true };
unsigned m_timeout { UINT_MAX } ;
std::string m_dot_proof_file;
std::string m_trace_file_name;
bool m_auto_config { true };
bool m_proof { false };
bool m_debug_ref_count { false };
bool m_trace { false };
bool m_well_sorted_check { false };
bool m_model { true };
bool m_model_validate { false };
bool m_dump_models { false };
bool m_unsat_core { false };
bool m_smtlib2_compliant { false }; // it must be here because it enable/disable the use of coercions in the ast_manager.
bool m_statistics { false };
std::string m_encoding { "unicode" };

unsigned rlimit() const { return m_rlimit; }
context_params();
Expand Down
22 changes: 14 additions & 8 deletions src/util/zstring.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,7 @@ bool zstring::is_escape_char(char const *& s, unsigned& result) {
result = 16*result + d;
}
else if (*(s+3+i) == '}') {
if (result > 255 && !uses_unicode())
return false;
if (result > unicode_max_char())
if (result > max_char())
return false;
s += 4 + i;
return true;
Expand All @@ -65,7 +63,7 @@ bool zstring::is_escape_char(char const *& s, unsigned& result) {
result = 16*result + d2;
result = 16*result + d3;
result = 16*result + d4;
if (result > unicode_max_char())
if (result > max_char())
return false;
s += 6;
return true;
Expand All @@ -87,14 +85,22 @@ zstring::zstring(char const* s) {
SASSERT(well_formed());
}


bool zstring::uses_unicode() const {
return gparams::get_value("unicode") != "false";
string_encoding zstring::get_encoding() {
if (gparams::get_value("encoding") == "unicode") {
return unicode;
}
if (gparams::get_value("encoding") == "bmp") {
return bmp;
}
if (gparams::get_value("encoding") == "ascii") {
return ascii;
}
return unicode;
}

bool zstring::well_formed() const {
for (unsigned ch : m_buffer) {
if (ch > unicode_max_char()) {
if (ch > max_char()) {
IF_VERBOSE(0, verbose_stream() << "large character: " << ch << "\n";);
return false;
}
Expand Down
32 changes: 31 additions & 1 deletion src/util/zstring.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,17 +21,47 @@ Module Name:
#include "util/buffer.h"
#include "util/rational.h"

enum string_encoding {
ascii, // exactly 8 bits
unicode,
bmp // basic multilingual plane; exactly 16 bits
};

class zstring {
private:
buffer<uint32_t> m_buffer;
bool well_formed() const;
bool uses_unicode() const;
bool is_escape_char(char const *& s, unsigned& result);
public:
static unsigned unicode_max_char() { return 196607; }
static unsigned unicode_num_bits() { return 18; }
static unsigned bmp_max_char() { return 65535; }
static unsigned bmp_num_bits() { return 16; }
static unsigned ascii_max_char() { return 255; }
static unsigned ascii_num_bits() { return 8; }
static unsigned max_char() {
switch (get_encoding()) {
case unicode:
return unicode_max_char();
case bmp:
return bmp_max_char();
case ascii:
return ascii_max_char();
}
return unicode_max_char();
}
static unsigned num_bits() {
switch (get_encoding()) {
case unicode:
return unicode_num_bits();
case bmp:
return bmp_num_bits();
case ascii:
return ascii_num_bits();
}
return unicode_num_bits();
}
static string_encoding get_encoding();
zstring() = default;
zstring(char const* s);
zstring(const std::string &str) : zstring(str.c_str()) {}
Expand Down

0 comments on commit 47fdd6c

Please sign in to comment.