Skip to content

Commit

Permalink
Merge pull request #187 from eileencodes/escape-entities
Browse files Browse the repository at this point in the history
Added option to do entity encoding
  • Loading branch information
brianmario authored Jul 27, 2018
2 parents c85c858 + 8d0a552 commit 4a0fb0b
Show file tree
Hide file tree
Showing 4 changed files with 62 additions and 4 deletions.
42 changes: 39 additions & 3 deletions ext/yajl/yajl_encode.c
Original file line number Diff line number Diff line change
Expand Up @@ -59,12 +59,18 @@ yajl_string_encode2(const yajl_print_t print,
unsigned int htmlSafe)
{
unsigned int beg = 0;
unsigned int end = 0;
unsigned int end = 0;
unsigned int increment = 0;
char hexBuf[7];
char entityBuffer[7];
hexBuf[0] = '\\'; hexBuf[1] = 'u'; hexBuf[2] = '0'; hexBuf[3] = '0';
hexBuf[6] = 0;

entityBuffer[0] = '\\'; entityBuffer[1] = 'u'; entityBuffer[2] = '2'; entityBuffer[3] = '0';
entityBuffer[6] = 0;

while (end < len) {
increment = 1;
const char * escaped = NULL;
switch (str[end]) {
case '\r': escaped = "\\r"; break;
Expand All @@ -76,10 +82,39 @@ yajl_string_encode2(const yajl_print_t print,
case '\b': escaped = "\\b"; break;
case '\t': escaped = "\\t"; break;
case '/':
if (htmlSafe) {
if (htmlSafe == 1 || htmlSafe == 2) {
escaped = "\\/";
}
break;
/* Escaping 0xe280a8 0xe280a9 */
case 0xe2:
if (htmlSafe == 2) {
if (len - end >= 2 && str[end + 1] == 0x80) {
if (str[end + 2] == 0xa8) {
increment = 3;
entityBuffer[4] = '2';
entityBuffer[5] = '8';
escaped = entityBuffer;
break;
}

if (str[end + 2] == 0xa9) {
increment = 3;
entityBuffer[4] = '2';
entityBuffer[5] = '9';
escaped = entityBuffer;
break;
}
}
}
case '<':
case '>':
case '&':
if (htmlSafe == 2) {
CharToHex(str[end], hexBuf + 4);
escaped = hexBuf;
}
break;
default:
if ((unsigned char) str[end] < 32) {
CharToHex(str[end], hexBuf + 4);
Expand All @@ -90,7 +125,8 @@ yajl_string_encode2(const yajl_print_t print,
if (escaped != NULL) {
print(ctx, (const char *) (str + beg), end - beg);
print(ctx, escaped, (unsigned int)strlen(escaped));
beg = ++end;
end += increment;
beg = end;
} else {
++end;
}
Expand Down
6 changes: 6 additions & 0 deletions ext/yajl/yajl_ext.c
Original file line number Diff line number Diff line change
Expand Up @@ -1030,9 +1030,14 @@ static VALUE rb_yajl_encoder_new(int argc, VALUE * argv, VALUE klass) {
actualIndent = indentString;
}
}

if (rb_hash_aref(opts, sym_html_safe) == Qtrue) {
htmlSafe = 1;
}

if (rb_hash_aref(opts, sym_entities) == Qtrue) {
htmlSafe = 2;
}
}
if (!indentString) {
indentString = defaultIndentString;
Expand Down Expand Up @@ -1356,6 +1361,7 @@ void Init_yajl() {
sym_pretty = ID2SYM(rb_intern("pretty"));
sym_indent = ID2SYM(rb_intern("indent"));
sym_html_safe = ID2SYM(rb_intern("html_safe"));
sym_entities = ID2SYM(rb_intern("entities"));
sym_terminator = ID2SYM(rb_intern("terminator"));
sym_symbolize_keys = ID2SYM(rb_intern("symbolize_keys"));
sym_symbolize_names = ID2SYM(rb_intern("symbolize_names"));
Expand Down
2 changes: 1 addition & 1 deletion ext/yajl/yajl_ext.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ static rb_encoding *utf8Encoding;
static VALUE cStandardError, cParseError, cEncodeError, mYajl, cParser, cProjector, cEncoder;
static ID intern_io_read, intern_call, intern_keys, intern_to_s,
intern_to_json, intern_has_key, intern_to_sym, intern_as_json;
static ID sym_allow_comments, sym_check_utf8, sym_pretty, sym_indent, sym_terminator, sym_symbolize_keys, sym_symbolize_names, sym_html_safe;
static ID sym_allow_comments, sym_check_utf8, sym_pretty, sym_indent, sym_terminator, sym_symbolize_keys, sym_symbolize_names, sym_html_safe, sym_entities;

#define GetParser(obj, sval) Data_Get_Struct(obj, yajl_parser_wrapper, sval);
#define GetEncoder(obj, sval) Data_Get_Struct(obj, yajl_encoder_wrapper, sval);
Expand Down
16 changes: 16 additions & 0 deletions spec/encoding/encoding_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -275,11 +275,27 @@ def to_s
expect(safe_encoder.encode("</script>")).to eql("\"<\\/script>\"")
end

it "should not encode characters with entities by default" do
expect(Yajl.dump("\u2028\u2029><&")).to eql("\"\u2028\u2029><&\"")
end

it "should encode characters with entities when enabled" do
expect(Yajl.dump("\u2028\u2029><&", entities: true)).to eql("\"\\u2028\\u2029\\u003E\\u003C\\u0026\"")
end

it "should default to *not* escaping / characters" do
unsafe_encoder = Yajl::Encoder.new
expect(unsafe_encoder.encode("</script>")).not_to eql("\"<\\/script>\"")
end

it "should encode slashes when enabled" do
unsafe_encoder = Yajl::Encoder.new(:entities => false)
safe_encoder = Yajl::Encoder.new(:entities => true)

expect(unsafe_encoder.encode("</script>")).not_to eql("\"<\\/script>\"")
expect(safe_encoder.encode("</script>")).to eql("\"\\u003C\\/script\\u003E\"")
end

it "return value of #to_json must be a string" do
expect {
Yajl::Encoder.encode(TheMindKiller.new)
Expand Down

0 comments on commit 4a0fb0b

Please sign in to comment.