Skip to content

Commit

Permalink
Scheme parsing fix (https) and extending (ws, wss) (#1570)
Browse files Browse the repository at this point in the history
Contributes to #755

Signed-off-by: Aleksey Mikhaylov <[email protected]>
  • Loading branch information
ttaym authored Feb 25, 2022
1 parent 5f2851d commit 46dd34d
Show file tree
Hide file tree
Showing 2 changed files with 201 additions and 133 deletions.
315 changes: 182 additions & 133 deletions fw/http_parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -380,31 +380,31 @@ do { \
__FSM_I_MATCH_MOVE_fixup_finish(alphabet, to, flag, {})

/* Conditional transition from state @st to @st_next. */
#define __FSM_TX_COND(st, condition, st_next, field) \
__FSM_STATE(st) { \
#define __FSM_TX_COND(st, condition, st_next, field, ...) \
__FSM_STATE(st, __VA_ARGS__) { \
if (likely(condition)) \
__FSM_MOVE_f(st_next, field); \
TFW_PARSER_BLOCK(st); \
}

#define __FSM_TX_COND_nofixup(st, condition, st_next) \
__FSM_STATE(st) { \
#define __FSM_TX_COND_nofixup(st, condition, st_next, ...) \
__FSM_STATE(st, __VA_ARGS__) { \
if (likely(condition)) \
__FSM_MOVE_nofixup(st_next); \
TFW_PARSER_BLOCK(st); \
}

/* Automaton transition from state @st to @st_next on character @ch. */
#define __FSM_TX(st, ch, st_next) \
__FSM_TX_COND(st, c == (ch), st_next, &parser->hdr)
#define __FSM_TX_nofixup(st, ch, st_next) \
__FSM_TX_COND_nofixup(st, c == (ch), st_next)
#define __FSM_TX(st, ch, st_next, ...) \
__FSM_TX_COND(st, c == (ch), st_next, &parser->hdr, __VA_ARGS__)
#define __FSM_TX_nofixup(st, ch, st_next, ...) \
__FSM_TX_COND_nofixup(st, c == (ch), st_next, __VA_ARGS__)

/* Case-insensitive version of __FSM_TX(). */
#define __FSM_TX_LC(st, ch, st_next, field) \
__FSM_TX_COND(st, TFW_LC(c) == (ch), st_next, field)
#define __FSM_TX_LC_nofixup(st, ch, st_next) \
__FSM_TX_COND_nofixup(st, TFW_LC(c) == (ch), st_next)
#define __FSM_TX_LC(st, ch, st_next, field, ...) \
__FSM_TX_COND(st, TFW_LC(c) == (ch), st_next, field, __VA_ARGS__)
#define __FSM_TX_LC_nofixup(st, ch, st_next, ...) \
__FSM_TX_COND_nofixup(st, TFW_LC(c) == (ch), st_next, __VA_ARGS__)

/*
* Automaton transition with alphabet checking and fallback state.
Expand Down Expand Up @@ -3837,119 +3837,7 @@ tfw_http_parse_req(void *req_data, unsigned char *data, size_t len,
__FSM_STATE(Req_Uri, hot) {
if (likely(c == '/'))
__FSM_JMP(Req_UriMark);

if (likely(__data_available(p, 7)
&& C4_INT_LCM(p, 'h', 't', 't', 'p')
&& *(p + 4) == ':' && *(p + 5) == '/'
&& *(p + 6) == '/'))
__FSM_MOVE_nofixup_n(Req_UriAuthorityStart, 7);

/* "http://" slow path - step char-by-char. */
if (likely(TFW_LC(c) == 'h'))
__FSM_MOVE_nofixup(Req_UriSchH);

TFW_PARSER_BLOCK(Req_Uri);
}

/*
* URI host part.
* RFC 3986 chapter 3.2: authority = [userinfo@]host[:port]
*
* Authority parsing: it can be "host" or "userinfo@host" (port is
* parsed later). At the beginning we don't know, which of variants we
* have. So we fill req->host, and if we get '@', we copy host to
* req->userinfo, reset req->host and fill it.
*/
__FSM_STATE(Req_UriAuthorityStart) {
if (likely(isalnum(c) || c == '.' || c == '-')) {
__msg_field_open(&req->host, p);
__FSM_MOVE_f(Req_UriAuthority, &req->host);
} else if (likely(c == '/')) {
/*
* The case where "Host:" header value is empty.
* A special TfwStr{} string is created that has
* a valid pointer and the length of zero.
*/
T_DBG3("Handling http:///path\n");
tfw_http_msg_set_str_data(msg, &req->host, p);
req->host.flags |= TFW_STR_COMPLETE;
__FSM_JMP(Req_UriMark);
} else if (c == '[') {
__msg_field_open(&req->host, p);
__FSM_MOVE_f(Req_UriAuthorityIPv6, &req->host);
}
TFW_PARSER_BLOCK(Req_UriAuthorityStart);
}

__FSM_STATE(Req_UriAuthority) {
if (likely(isalnum(c) || c == '.' || c == '-' || c == '@')) {
if (unlikely(c == '@')) {
if (!TFW_STR_EMPTY(&req->userinfo)) {
T_DBG("Second '@' in authority\n");
TFW_PARSER_BLOCK(Req_UriAuthority);
}
T_DBG3("Authority contains userinfo\n");
/* copy current host to userinfo */
req->userinfo = req->host;
__msg_field_finish(&req->userinfo, p);
TFW_STR_INIT(&req->host);

__FSM_MOVE_nofixup(Req_UriAuthorityResetHost);
}

__FSM_MOVE_f(Req_UriAuthority, &req->host);
}
__FSM_JMP(Req_UriAuthorityEnd);
}

__FSM_STATE(Req_UriAuthorityIPv6) {
if (likely(isxdigit(c) || c == ':')) {
__FSM_MOVE_f(Req_UriAuthorityIPv6, &req->host);
} else if(c == ']') {
__FSM_MOVE_f(Req_UriAuthorityEnd, &req->host);
}
TFW_PARSER_BLOCK(Req_UriAuthorityIPv6);
}

__FSM_STATE(Req_UriAuthorityResetHost) {
if (likely(isalnum(c) || c == '.' || c == '-')) {
__msg_field_open(&req->host, p);
__FSM_MOVE_f(Req_UriAuthority, &req->host);
} else if (c == '[') {
__msg_field_open(&req->host, p);
__FSM_MOVE_f(Req_UriAuthorityIPv6, &req->host);
}
__FSM_JMP(Req_UriAuthorityEnd);
}

__FSM_STATE(Req_UriAuthorityEnd) {
if (c == ':')
__FSM_MOVE_f(Req_UriPort, &req->host);
/* Authority End */
__msg_field_finish(&req->host, p);
T_DBG3("Userinfo len = %i, host len = %i\n",
(int)req->userinfo.len, (int)req->host.len);
if (likely(c == '/')) {
__FSM_JMP(Req_UriMark);
}
else if (c == ' ') {
__FSM_MOVE_nofixup(Req_HttpVer);
}
TFW_PARSER_BLOCK(Req_UriAuthorityEnd);
}

/* Host port in URI */
__FSM_STATE(Req_UriPort) {
if (likely(isdigit(c)))
__FSM_MOVE_f(Req_UriPort, &req->host);
__msg_field_finish(&req->host, p);
if (likely(c == '/')) {
__FSM_JMP(Req_UriMark);
}
else if (c == ' ') {
__FSM_MOVE_nofixup(Req_HttpVer);
}
TFW_PARSER_BLOCK(Req_UriPort);
__FSM_JMP(Req_UriRareForms);
}

__FSM_STATE(Req_UriMark, hot) {
Expand Down Expand Up @@ -4735,13 +4623,174 @@ Req_Method_1CharStep: __attribute__((cold))
__FSM_MOVE_nofixup_n(Req_MUSpace, 0);
}

/* process URI scheme: "http://" */
__FSM_TX_LC_nofixup(Req_UriSchH, 't', Req_UriSchHt);
__FSM_TX_LC_nofixup(Req_UriSchHt, 't', Req_UriSchHtt);
__FSM_TX_LC_nofixup(Req_UriSchHtt, 'p', Req_UriSchHttp);
__FSM_TX_nofixup(Req_UriSchHttp, ':', Req_UriSchHttpColon);
__FSM_TX_nofixup(Req_UriSchHttpColon, '/', Req_UriSchHttpColonSlash);
__FSM_TX_nofixup(Req_UriSchHttpColonSlash, '/', Req_UriAuthorityStart);
__FSM_STATE(Req_UriRareForms, cold) {
/* There is also authority form as in RFC7230#section-5.3.3,
* but it only used with CONNECT that is not supported */
/* Asterisk form as in RFC7230#section-5.3.4 */
if (req->method == TFW_HTTP_METH_OPTIONS && c == '*')
__FSM_MOVE_nofixup(Req_UriMarkEnd);
/* Absolute form as in RFC7230#section-5.3.2 */
__FSM_JMP(Req_UriAbsoluteForm);
}

__FSM_STATE(Req_UriAbsoluteForm, cold) {
/* Rare form so there is no need to speed-up matching with
* fast path prefixing */
if (likely(TFW_LC(c) == 'h'))
__FSM_MOVE_nofixup(Req_UriSchH);
else if (TFW_LC(c) == 'w')
__FSM_MOVE_nofixup(Req_UriSchW);

TFW_PARSER_BLOCK(Req_UriAbsoluteForm);
}

/* process URI scheme */
/* path for 'http://' and 'https://' */
__FSM_TX_LC_nofixup(Req_UriSchH, 't', Req_UriSchHt, cold);
__FSM_TX_LC_nofixup(Req_UriSchHt, 't', Req_UriSchHtt, cold);
__FSM_TX_LC_nofixup(Req_UriSchHtt, 'p', Req_UriSchHttp, cold);
__FSM_STATE(Req_UriSchHttp, cold) {
switch (TFW_LC(c)) {
case ':':
__FSM_MOVE_nofixup(Req_UriSchHttpColon);
case 's':
__FSM_MOVE_nofixup(Req_UriSchHttps);
}
TFW_PARSER_BLOCK(Req_UriSchHttp);
}
/* http */
__FSM_TX_nofixup(Req_UriSchHttpColon, '/', Req_UriSchHttpColonSlash,
cold);
__FSM_TX_nofixup(Req_UriSchHttpColonSlash, '/', Req_UriAuthorityStart,
cold);
/* https */
__FSM_TX_nofixup(Req_UriSchHttps, ':', Req_UriSchHttpsColon, cold);
__FSM_TX_nofixup(Req_UriSchHttpsColon, '/', Req_UriSchHttpsColonSlash,
cold);
__FSM_TX_nofixup(Req_UriSchHttpsColonSlash, '/', Req_UriAuthorityStart,
cold);
/* path for 'ws://' and 'wss://' */
__FSM_TX_LC_nofixup(Req_UriSchW, 's', Req_UriSchWs, cold);
__FSM_STATE(Req_UriSchWs, cold) {
switch (TFW_LC(c)) {
case ':':
__FSM_MOVE_nofixup(Req_UriSchWsColon);
case 's':
__FSM_MOVE_nofixup(Req_UriSchWss);
}
TFW_PARSER_BLOCK(Req_UriSchWs);
}
/* ws */
__FSM_TX_nofixup(Req_UriSchWsColon, '/', Req_UriSchWsColonSlash, cold);
__FSM_TX_nofixup(Req_UriSchWsColonSlash, '/', Req_UriAuthorityStart,
cold);
/* wss */
__FSM_TX_nofixup(Req_UriSchWss, ':', Req_UriSchWssColon, cold);
__FSM_TX_nofixup(Req_UriSchWssColon, '/', Req_UriSchWssColonSlash,
cold);
__FSM_TX_nofixup(Req_UriSchWssColonSlash, '/', Req_UriAuthorityStart,
cold);

/*
* URI host part.
* RFC 3986 chapter 3.2: authority = [userinfo@]host[:port]
*
* Authority parsing: it can be "host" or "userinfo@host" (port is
* parsed later). At the beginning we don't know, which of variants we
* have. So we fill req->host, and if we get '@', we copy host to
* req->userinfo, reset req->host and fill it.
*/
__FSM_STATE(Req_UriAuthorityStart, cold) {
if (likely(isalnum(c) || c == '.' || c == '-')) {
__msg_field_open(&req->host, p);
__FSM_MOVE_f(Req_UriAuthority, &req->host);
} else if (likely(c == '/')) {
/*
* The case where "Host:" header value is empty.
* A special TfwStr{} string is created that has
* a valid pointer and the length of zero.
*/
T_DBG3("Handling http:///path\n");
tfw_http_msg_set_str_data(msg, &req->host, p);
req->host.flags |= TFW_STR_COMPLETE;
__FSM_JMP(Req_UriMark);
} else if (c == '[') {
__msg_field_open(&req->host, p);
__FSM_MOVE_f(Req_UriAuthorityIPv6, &req->host);
}
TFW_PARSER_BLOCK(Req_UriAuthorityStart);
}

__FSM_STATE(Req_UriAuthority, cold) {
if (likely(isalnum(c) || c == '.' || c == '-' || c == '@')) {
if (unlikely(c == '@')) {
if (!TFW_STR_EMPTY(&req->userinfo)) {
T_DBG("Second '@' in authority\n");
TFW_PARSER_BLOCK(Req_UriAuthority);
}
T_DBG3("Authority contains userinfo\n");
/* copy current host to userinfo */
req->userinfo = req->host;
__msg_field_finish(&req->userinfo, p);
TFW_STR_INIT(&req->host);

__FSM_MOVE_nofixup(Req_UriAuthorityResetHost);
}

__FSM_MOVE_f(Req_UriAuthority, &req->host);
}
__FSM_JMP(Req_UriAuthorityEnd);
}

__FSM_STATE(Req_UriAuthorityIPv6, cold) {
if (likely(isxdigit(c) || c == ':')) {
__FSM_MOVE_f(Req_UriAuthorityIPv6, &req->host);
} else if(c == ']') {
__FSM_MOVE_f(Req_UriAuthorityEnd, &req->host);
}
TFW_PARSER_BLOCK(Req_UriAuthorityIPv6);
}

__FSM_STATE(Req_UriAuthorityResetHost, cold) {
if (likely(isalnum(c) || c == '.' || c == '-')) {
__msg_field_open(&req->host, p);
__FSM_MOVE_f(Req_UriAuthority, &req->host);
} else if (c == '[') {
__msg_field_open(&req->host, p);
__FSM_MOVE_f(Req_UriAuthorityIPv6, &req->host);
}
__FSM_JMP(Req_UriAuthorityEnd);
}

__FSM_STATE(Req_UriAuthorityEnd, cold) {
if (c == ':')
__FSM_MOVE_f(Req_UriPort, &req->host);
/* Authority End */
__msg_field_finish(&req->host, p);
T_DBG3("Userinfo len = %i, host len = %i\n",
(int)req->userinfo.len, (int)req->host.len);
if (likely(c == '/')) {
__FSM_JMP(Req_UriMark);
}
else if (c == ' ') {
__FSM_MOVE_nofixup(Req_HttpVer);
}
TFW_PARSER_BLOCK(Req_UriAuthorityEnd);
}

/* Host port in URI */
__FSM_STATE(Req_UriPort, cold) {
if (likely(isdigit(c)))
__FSM_MOVE_f(Req_UriPort, &req->host);
__msg_field_finish(&req->host, p);
if (likely(c == '/')) {
__FSM_JMP(Req_UriMark);
}
else if (c == ' ') {
__FSM_MOVE_nofixup(Req_HttpVer);
}
TFW_PARSER_BLOCK(Req_UriPort);
}

/* Parse HTTP version (1.1 and 1.0 are supported). */
__FSM_TX_nofixup(Req_HttpVerT1, 'T', Req_HttpVerT2);
Expand Down Expand Up @@ -5731,7 +5780,7 @@ do { \
__FSM_I_field_chunk_flags(fld, TFW_STR_HDR_VALUE); \
__FSM_EXIT(CSTR_POSTPONE); \
}

#define H2_TRY_STR_LAMBDA_fixup(str, fld, lambda, curr_st, next_st) \
H2_TRY_STR_2LAMBDA_fixup(str, fld, {}, lambda, curr_st, next_st)

Expand Down
19 changes: 19 additions & 0 deletions fw/t/unit/test_http_parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -657,6 +657,25 @@ TEST(http_parser, parses_req_uri)
EXPECT_TFWSTR_EQ(&req->host, "natsys-lab.com");
}

FOR_REQ("GET https://[email protected] HTTP/1.1\r\n\r\n")
{
EXPECT_TFWSTR_EQ(&req->host, "natsys-lab.com");
}

FOR_REQ("GET ws://[email protected] HTTP/1.1\r\n\r\n")
{
EXPECT_TFWSTR_EQ(&req->host, "natsys-lab.com");
}

FOR_REQ("GET wss://[email protected] HTTP/1.1\r\n\r\n")
{
EXPECT_TFWSTR_EQ(&req->host, "natsys-lab.com");
}

FOR_REQ("OPTIONS * HTTP/1.1\r\n\r\n");

EXPECT_BLOCK_REQ("GET sch://[email protected] HTTP/1.1\r\n\r\n");

EXPECT_BLOCK_REQ("GET \x7f HTTP/1.1\r\n"
"Host: test\r\n"
"\r\n");
Expand Down

0 comments on commit 46dd34d

Please sign in to comment.