Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added 16 bit string-encoding #5540

Merged
merged 2 commits into from
Sep 9, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion src/ast/char_decl_plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ Module Name:

char_decl_plugin::char_decl_plugin():
m_charc_sym("Char") {
m_unicode = gparams::get_value("unicode") != "false";
}

char_decl_plugin::~char_decl_plugin() {
Expand Down
6 changes: 2 additions & 4 deletions src/ast/char_decl_plugin.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ enum char_op_kind {
class char_decl_plugin : public decl_plugin {
sort* m_char { nullptr };
symbol m_charc_sym;
bool m_unicode { true };

void set_manager(ast_manager * m, family_id id) override;

Expand Down Expand Up @@ -96,8 +95,7 @@ class char_decl_plugin : public decl_plugin {
MATCH_UNARY(is_to_int);
MATCH_BINARY(is_le);

bool unicode() const { return m_unicode; }
unsigned max_char() const { return m_unicode ? zstring::unicode_max_char() : zstring::ascii_max_char(); }
unsigned num_bits() const { return m_unicode ? zstring::unicode_num_bits() : zstring::ascii_num_bits(); }
static unsigned max_char() { return zstring::max_char(); }

static unsigned num_bits() { return zstring::num_bits(); }
};
2 changes: 0 additions & 2 deletions src/ast/seq_decl_plugin.h
Original file line number Diff line number Diff line change
Expand Up @@ -166,8 +166,6 @@ class seq_decl_plugin : public decl_plugin {

void finalize() override;

bool unicode() const { return get_char_plugin().unicode(); }

decl_plugin * mk_fresh() override { return alloc(seq_decl_plugin); }

sort * mk_sort(decl_kind k, unsigned num_parameters, parameter const * parameters) override;
Expand Down
15 changes: 11 additions & 4 deletions src/params/context_params.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,15 @@ void context_params::set(char const * param, char const * value) {
else if (p == "smtlib2_compliant") {
set_bool(m_smtlib2_compliant, param, value);
}
else if (p == "unicode") {
set_bool(m_unicode, param, value);
else if (p == "encoding") {
if (strcmp(value, "unicode") == 0 || strcmp(value, "bmp") == 0 || strcmp(value, "ascii") == 0) {
m_encoding = value;
}
else {
std::stringstream strm;
strm << "invalid value '" << value << "' for parameter '" << param << "' (supported: unicode, bmp, ascii)";
throw default_exception(strm.str());
}
}
else {
param_descrs d;
Expand Down Expand Up @@ -140,7 +147,7 @@ void context_params::updt_params(params_ref const & p) {
m_debug_ref_count = p.get_bool("debug_ref_count", m_debug_ref_count);
m_smtlib2_compliant = p.get_bool("smtlib2_compliant", m_smtlib2_compliant);
m_statistics = p.get_bool("stats", m_statistics);
m_unicode = p.get_bool("unicode", m_unicode);
m_encoding = p.get_str("encoding", m_encoding.c_str());
}

void context_params::collect_param_descrs(param_descrs & d) {
Expand All @@ -157,7 +164,7 @@ void context_params::collect_param_descrs(param_descrs & d) {
d.insert("debug_ref_count", CPK_BOOL, "debug support for AST reference counting", "false");
d.insert("smtlib2_compliant", CPK_BOOL, "enable/disable SMT-LIB 2.0 compliance", "false");
d.insert("stats", CPK_BOOL, "enable/disable statistics", "false");
d.insert("unicode", CPK_BOOL, "use unicode strings instead of ASCII strings");
d.insert("encoding", CPK_STRING, "string encoding used internally: unicode|bmp|ascii", "unicode");
// statistics are hidden as they are controlled by the /st option.
collect_solver_param_descrs(d);
}
Expand Down
31 changes: 16 additions & 15 deletions src/params/context_params.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ Module Name:
#pragma once

#include "util/params.h"
#include "util/zstring.h"

class context_params {
void set_bool(bool & opt, char const * param, char const * value);
Expand All @@ -28,21 +29,21 @@ class context_params {
unsigned m_rlimit { 0 };

public:
unsigned m_timeout { UINT_MAX } ;
std::string m_dot_proof_file;
std::string m_trace_file_name;
bool m_auto_config { true };
bool m_proof { false };
bool m_debug_ref_count { false };
bool m_trace { false };
bool m_well_sorted_check { false };
bool m_model { true };
bool m_model_validate { false };
bool m_dump_models { false };
bool m_unsat_core { false };
bool m_smtlib2_compliant { false }; // it must be here because it enable/disable the use of coercions in the ast_manager.
bool m_statistics { false };
bool m_unicode { true };
unsigned m_timeout { UINT_MAX } ;
std::string m_dot_proof_file;
std::string m_trace_file_name;
bool m_auto_config { true };
bool m_proof { false };
bool m_debug_ref_count { false };
bool m_trace { false };
bool m_well_sorted_check { false };
bool m_model { true };
bool m_model_validate { false };
bool m_dump_models { false };
bool m_unsat_core { false };
bool m_smtlib2_compliant { false }; // it must be here because it enable/disable the use of coercions in the ast_manager.
bool m_statistics { false };
std::string m_encoding { "unicode" };

unsigned rlimit() const { return m_rlimit; }
context_params();
Expand Down
22 changes: 14 additions & 8 deletions src/util/zstring.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,7 @@ bool zstring::is_escape_char(char const *& s, unsigned& result) {
result = 16*result + d;
}
else if (*(s+3+i) == '}') {
if (result > 255 && !uses_unicode())
return false;
if (result > unicode_max_char())
if (result > max_char())
return false;
s += 4 + i;
return true;
Expand All @@ -65,7 +63,7 @@ bool zstring::is_escape_char(char const *& s, unsigned& result) {
result = 16*result + d2;
result = 16*result + d3;
result = 16*result + d4;
if (result > unicode_max_char())
if (result > max_char())
return false;
s += 6;
return true;
Expand All @@ -87,14 +85,22 @@ zstring::zstring(char const* s) {
SASSERT(well_formed());
}


bool zstring::uses_unicode() const {
return gparams::get_value("unicode") != "false";
string_encoding zstring::get_encoding() {
if (gparams::get_value("encoding") == "unicode") {
return unicode;
}
if (gparams::get_value("encoding") == "bmp") {
return bmp;
}
if (gparams::get_value("encoding") == "ascii") {
return ascii;
}
return unicode;
}

bool zstring::well_formed() const {
for (unsigned ch : m_buffer) {
if (ch > unicode_max_char()) {
if (ch > max_char()) {
IF_VERBOSE(0, verbose_stream() << "large character: " << ch << "\n";);
return false;
}
Expand Down
32 changes: 31 additions & 1 deletion src/util/zstring.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,17 +21,47 @@ Module Name:
#include "util/buffer.h"
#include "util/rational.h"

enum string_encoding {
ascii, // exactly 8 bits
unicode,
bmp // basic multilingual plane; exactly 16 bits
};

class zstring {
private:
buffer<uint32_t> m_buffer;
bool well_formed() const;
bool uses_unicode() const;
bool is_escape_char(char const *& s, unsigned& result);
public:
static unsigned unicode_max_char() { return 196607; }
static unsigned unicode_num_bits() { return 18; }
static unsigned bmp_max_char() { return 65535; }
static unsigned bmp_num_bits() { return 16; }
static unsigned ascii_max_char() { return 255; }
static unsigned ascii_num_bits() { return 8; }
static unsigned max_char() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

formatting...

switch (get_encoding()) {
case unicode:
return unicode_max_char();
case bmp:
return bmp_max_char();
case ascii:
return ascii_max_char();
}
return unicode_max_char();
}
static unsigned num_bits() {
switch (get_encoding()) {
case unicode:
return unicode_num_bits();
case bmp:
return bmp_num_bits();
case ascii:
return ascii_num_bits();
}
return unicode_num_bits();
}
static string_encoding get_encoding();
zstring() = default;
zstring(char const* s);
zstring(const std::string &str) : zstring(str.c_str()) {}
Expand Down