Skip to content

Commit

Permalink
parser.rl: parse_string implement a fast path
Browse files Browse the repository at this point in the history
If we assume most string don't contain any escape sequence we can avoid
a lot of costly operations when it holds true.

Before:

```
== Parsing activitypub.json (58160 bytes)
ruby 3.4.0dev (2024-11-06T07:59:09Z precompute-hash-wh.. 7943f98a8a) +YJIT +PRISM [arm64-darwin24]
Warming up --------------------------------------
                json   884.000 i/100ms
                  oj   789.000 i/100ms
          Oj::Parser   943.000 i/100ms
           rapidjson   584.000 i/100ms
Calculating -------------------------------------
                json      8.897k (± 1.3%) i/s  (112.40 μs/i) -     45.084k in   5.068520s
                  oj      7.967k (± 1.5%) i/s  (125.52 μs/i) -     40.239k in   5.051985s
          Oj::Parser      9.564k (± 1.4%) i/s  (104.56 μs/i) -     48.093k in   5.029626s
           rapidjson      5.947k (± 1.4%) i/s  (168.16 μs/i) -     29.784k in   5.009437s

Comparison:
                json:     8896.5 i/s
          Oj::Parser:     9563.8 i/s - 1.08x  faster
                  oj:     7966.8 i/s - 1.12x  slower
           rapidjson:     5946.7 i/s - 1.50x  slower

== Parsing twitter.json (567916 bytes)
ruby 3.4.0dev (2024-11-06T07:59:09Z precompute-hash-wh.. 7943f98a8a) +YJIT +PRISM [arm64-darwin24]
Warming up --------------------------------------
                json    83.000 i/100ms
                  oj    64.000 i/100ms
          Oj::Parser    77.000 i/100ms
           rapidjson    54.000 i/100ms
Calculating -------------------------------------
                json    823.083 (± 1.8%) i/s    (1.21 ms/i) -      4.150k in   5.043805s
                  oj    632.538 (± 1.4%) i/s    (1.58 ms/i) -      3.200k in   5.060073s
          Oj::Parser    769.122 (± 1.8%) i/s    (1.30 ms/i) -      3.850k in   5.007501s
           rapidjson    548.494 (± 1.5%) i/s    (1.82 ms/i) -      2.754k in   5.022153s

Comparison:
                json:      823.1 i/s
          Oj::Parser:      769.1 i/s - 1.07x  slower
                  oj:      632.5 i/s - 1.30x  slower
           rapidjson:      548.5 i/s - 1.50x  slower

== Parsing citm_catalog.json (1727030 bytes)
ruby 3.4.0dev (2024-11-06T07:59:09Z precompute-hash-wh.. 7943f98a8a) +YJIT +PRISM [arm64-darwin24]
Warming up --------------------------------------
                json    41.000 i/100ms
                  oj    34.000 i/100ms
          Oj::Parser    45.000 i/100ms
           rapidjson    39.000 i/100ms
Calculating -------------------------------------
                json    427.162 (± 1.2%) i/s    (2.34 ms/i) -      2.173k in   5.087666s
                  oj    351.463 (± 2.8%) i/s    (2.85 ms/i) -      1.768k in   5.035149s
          Oj::Parser    461.849 (± 3.7%) i/s    (2.17 ms/i) -      2.340k in   5.074461s
           rapidjson    395.155 (± 1.8%) i/s    (2.53 ms/i) -      1.989k in   5.034927s

Comparison:
                json:      427.2 i/s
          Oj::Parser:      461.8 i/s - 1.08x  faster
           rapidjson:      395.2 i/s - 1.08x  slower
                  oj:      351.5 i/s - 1.22x  slower
```

After:

```
== Parsing activitypub.json (58160 bytes)
ruby 3.4.0dev (2024-11-06T07:59:09Z precompute-hash-wh.. 7943f98a8a) +YJIT +PRISM [arm64-darwin24]
Warming up --------------------------------------
                json   953.000 i/100ms
                  oj   813.000 i/100ms
          Oj::Parser   956.000 i/100ms
           rapidjson   563.000 i/100ms
Calculating -------------------------------------
                json      9.525k (± 1.2%) i/s  (104.98 μs/i) -     47.650k in   5.003252s
                  oj      8.117k (± 0.5%) i/s  (123.20 μs/i) -     40.650k in   5.008283s
          Oj::Parser      9.590k (± 3.2%) i/s  (104.27 μs/i) -     48.756k in   5.089794s
           rapidjson      6.020k (± 0.9%) i/s  (166.10 μs/i) -     30.402k in   5.050155s

Comparison:
                json:     9525.3 i/s
          Oj::Parser:     9590.1 i/s - same-ish: difference falls within error
                  oj:     8116.7 i/s - 1.17x  slower
           rapidjson:     6020.5 i/s - 1.58x  slower

== Parsing twitter.json (567916 bytes)
ruby 3.4.0dev (2024-11-06T07:59:09Z precompute-hash-wh.. 7943f98a8a) +YJIT +PRISM [arm64-darwin24]
Warming up --------------------------------------
                json    87.000 i/100ms
                  oj    64.000 i/100ms
          Oj::Parser    75.000 i/100ms
           rapidjson    55.000 i/100ms
Calculating -------------------------------------
                json    866.563 (± 0.8%) i/s    (1.15 ms/i) -      4.350k in   5.020138s
                  oj    643.567 (± 0.8%) i/s    (1.55 ms/i) -      3.264k in   5.072101s
          Oj::Parser    777.346 (± 3.5%) i/s    (1.29 ms/i) -      3.900k in   5.023933s
           rapidjson    557.158 (± 0.7%) i/s    (1.79 ms/i) -      2.805k in   5.034731s

Comparison:
                json:      866.6 i/s
          Oj::Parser:      777.3 i/s - 1.11x  slower
                  oj:      643.6 i/s - 1.35x  slower
           rapidjson:      557.2 i/s - 1.56x  slower

== Parsing citm_catalog.json (1727030 bytes)
ruby 3.4.0dev (2024-11-06T07:59:09Z precompute-hash-wh.. 7943f98a8a) +YJIT +PRISM [arm64-darwin24]
Warming up --------------------------------------
                json    41.000 i/100ms
                  oj    35.000 i/100ms
          Oj::Parser    40.000 i/100ms
           rapidjson    39.000 i/100ms
Calculating -------------------------------------
                json    429.216 (± 1.2%) i/s    (2.33 ms/i) -      2.173k in   5.063351s
                  oj    354.755 (± 1.1%) i/s    (2.82 ms/i) -      1.785k in   5.032374s
          Oj::Parser    465.114 (± 3.7%) i/s    (2.15 ms/i) -      2.360k in   5.081634s
           rapidjson    387.135 (± 1.3%) i/s    (2.58 ms/i) -      1.950k in   5.037787s

Comparison:
                json:      429.2 i/s
          Oj::Parser:      465.1 i/s - 1.08x  faster
           rapidjson:      387.1 i/s - 1.11x  slower
                  oj:      354.8 i/s - 1.21x  slower
```
  • Loading branch information
byroot committed Nov 6, 2024
1 parent 35324a1 commit 96bd97c
Show file tree
Hide file tree
Showing 2 changed files with 140 additions and 65 deletions.
145 changes: 94 additions & 51 deletions ext/json/ext/parser/parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -2302,6 +2302,26 @@ static inline VALUE build_string(const char *start, const char *end, bool intern
return result;
}

static VALUE json_string_fastpath(JSON_Parser *json, char *string, char *stringEnd, bool is_name, bool intern, bool symbolize)
{
size_t bufferSize = stringEnd - string;

if (is_name) {
VALUE cached_key;
if (RB_UNLIKELY(symbolize)) {
cached_key = rsymbol_cache_fetch(&json->name_cache, string, bufferSize);
} else {
cached_key = rstring_cache_fetch(&json->name_cache, string, bufferSize);
}

if (RB_LIKELY(cached_key)) {
return cached_key;
}
}

return build_string(string, stringEnd, intern, symbolize);
}

static VALUE json_string_unescape(JSON_Parser *json, char *string, char *stringEnd, bool is_name, bool intern, bool symbolize)
{
size_t bufferSize = stringEnd - string;
Expand All @@ -2323,7 +2343,7 @@ static VALUE json_string_unescape(JSON_Parser *json, char *string, char *stringE
}

pe = memchr(p, '\\', bufferSize);
if (RB_LIKELY(pe == NULL)) {
if (RB_UNLIKELY(pe == NULL)) {
return build_string(string, stringEnd, intern, symbolize);
}

Expand Down Expand Up @@ -2424,15 +2444,15 @@ static VALUE json_string_unescape(JSON_Parser *json, char *string, char *stringE
}


#line 2428 "parser.c"
#line 2448 "parser.c"
enum {JSON_string_start = 1};
enum {JSON_string_first_final = 8};
enum {JSON_string_first_final = 9};
enum {JSON_string_error = 0};

enum {JSON_string_en_main = 1};


#line 1019 "parser.rl"
#line 1051 "parser.rl"


static int
Expand All @@ -2453,15 +2473,15 @@ static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *resu
VALUE match_string;


#line 2457 "parser.c"
#line 2477 "parser.c"
{
cs = JSON_string_start;
}

#line 1039 "parser.rl"
#line 1071 "parser.rl"
json->memo = p;

#line 2465 "parser.c"
#line 2485 "parser.c"
{
if ( p == pe )
goto _test_eof;
Expand All @@ -2486,47 +2506,56 @@ case 2:
goto st0;
goto st2;
tr2:
#line 1006 "parser.rl"
#line 1033 "parser.rl"
{
*result = json_string_fastpath(json, json->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names);
{p = (( p + 1))-1;}
p--;
{p++; cs = 9; goto _out;}
}
#line 1026 "parser.rl"
{
*result = json_string_unescape(json, json->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names);
if (NIL_P(*result)) {
p--;
{p++; cs = 8; goto _out;}
} else {
{p = (( p + 1))-1;}
}
{p = (( p + 1))-1;}
p--;
{p++; cs = 9; goto _out;}
}
#line 1016 "parser.rl"
{ p--; {p++; cs = 8; goto _out;} }
goto st8;
st8:
goto st9;
tr6:
#line 1026 "parser.rl"
{
*result = json_string_unescape(json, json->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names);
{p = (( p + 1))-1;}
p--;
{p++; cs = 9; goto _out;}
}
goto st9;
st9:
if ( ++p == pe )
goto _test_eof8;
case 8:
#line 2507 "parser.c"
goto _test_eof9;
case 9:
#line 2538 "parser.c"
goto st0;
st3:
if ( ++p == pe )
goto _test_eof3;
case 3:
if ( (*p) == 117 )
goto st4;
goto st5;
if ( 0 <= (signed char)(*(p)) && (*(p)) <= 31 )
goto st0;
goto st2;
goto st4;
st4:
if ( ++p == pe )
goto _test_eof4;
case 4:
if ( (*p) < 65 ) {
if ( 48 <= (*p) && (*p) <= 57 )
goto st5;
} else if ( (*p) > 70 ) {
if ( 97 <= (*p) && (*p) <= 102 )
goto st5;
} else
goto st5;
goto st0;
switch( (*p) ) {
case 34: goto tr6;
case 92: goto st3;
}
if ( 0 <= (signed char)(*(p)) && (*(p)) <= 31 )
goto st0;
goto st4;
st5:
if ( ++p == pe )
goto _test_eof5;
Expand Down Expand Up @@ -2559,27 +2588,41 @@ case 6:
case 7:
if ( (*p) < 65 ) {
if ( 48 <= (*p) && (*p) <= 57 )
goto st2;
goto st8;
} else if ( (*p) > 70 ) {
if ( 97 <= (*p) && (*p) <= 102 )
goto st8;
} else
goto st8;
goto st0;
st8:
if ( ++p == pe )
goto _test_eof8;
case 8:
if ( (*p) < 65 ) {
if ( 48 <= (*p) && (*p) <= 57 )
goto st4;
} else if ( (*p) > 70 ) {
if ( 97 <= (*p) && (*p) <= 102 )
goto st2;
goto st4;
} else
goto st2;
goto st4;
goto st0;
}
_test_eof2: cs = 2; goto _test_eof;
_test_eof8: cs = 8; goto _test_eof;
_test_eof9: cs = 9; goto _test_eof;
_test_eof3: cs = 3; goto _test_eof;
_test_eof4: cs = 4; goto _test_eof;
_test_eof5: cs = 5; goto _test_eof;
_test_eof6: cs = 6; goto _test_eof;
_test_eof7: cs = 7; goto _test_eof;
_test_eof8: cs = 8; goto _test_eof;

_test_eof: {}
_out: {}
}

#line 1041 "parser.rl"
#line 1073 "parser.rl"

if (json->create_additions && RTEST(match_string = json->match_string)) {
VALUE klass;
Expand Down Expand Up @@ -2732,15 +2775,15 @@ static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self)
}


#line 2736 "parser.c"
#line 2779 "parser.c"
enum {JSON_start = 1};
enum {JSON_first_final = 10};
enum {JSON_error = 0};

enum {JSON_en_main = 1};


#line 1207 "parser.rl"
#line 1239 "parser.rl"


/*
Expand Down Expand Up @@ -2769,16 +2812,16 @@ static VALUE cParser_parse(VALUE self)
json->stack = &stack;


#line 2773 "parser.c"
#line 2816 "parser.c"
{
cs = JSON_start;
}

#line 1235 "parser.rl"
#line 1267 "parser.rl"
p = json->source;
pe = p + json->len;

#line 2782 "parser.c"
#line 2825 "parser.c"
{
if ( p == pe )
goto _test_eof;
Expand Down Expand Up @@ -2812,7 +2855,7 @@ case 1:
cs = 0;
goto _out;
tr2:
#line 1199 "parser.rl"
#line 1231 "parser.rl"
{
char *np = JSON_parse_value(json, p, pe, &result, 0);
if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;}
Expand All @@ -2822,7 +2865,7 @@ cs = 0;
if ( ++p == pe )
goto _test_eof10;
case 10:
#line 2826 "parser.c"
#line 2869 "parser.c"
switch( (*p) ) {
case 13: goto st10;
case 32: goto st10;
Expand Down Expand Up @@ -2911,7 +2954,7 @@ case 9:
_out: {}
}

#line 1238 "parser.rl"
#line 1270 "parser.rl"

if (json->stack_handle) {
rvalue_stack_eagerly_release(json->stack_handle);
Expand Down Expand Up @@ -2947,16 +2990,16 @@ static VALUE cParser_m_parse(VALUE klass, VALUE source, VALUE opts)
json->stack = &stack;


#line 2951 "parser.c"
#line 2994 "parser.c"
{
cs = JSON_start;
}

#line 1273 "parser.rl"
#line 1305 "parser.rl"
p = json->source;
pe = p + json->len;

#line 2960 "parser.c"
#line 3003 "parser.c"
{
if ( p == pe )
goto _test_eof;
Expand Down Expand Up @@ -2990,7 +3033,7 @@ case 1:
cs = 0;
goto _out;
tr2:
#line 1199 "parser.rl"
#line 1231 "parser.rl"
{
char *np = JSON_parse_value(json, p, pe, &result, 0);
if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;}
Expand All @@ -3000,7 +3043,7 @@ cs = 0;
if ( ++p == pe )
goto _test_eof10;
case 10:
#line 3004 "parser.c"
#line 3047 "parser.c"
switch( (*p) ) {
case 13: goto st10;
case 32: goto st10;
Expand Down Expand Up @@ -3089,7 +3132,7 @@ case 9:
_out: {}
}

#line 1276 "parser.rl"
#line 1308 "parser.rl"

if (json->stack_handle) {
rvalue_stack_eagerly_release(json->stack_handle);
Expand Down
Loading

0 comments on commit 96bd97c

Please sign in to comment.