diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 2f56a9ac8..e07f192b2 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -61,7 +61,8 @@ set(qpid_dispatch_SOURCES adaptors/amqp/server_config.c observers/protocol_observer.c observers/tcp_observer.c - observers/http1_observer.c + observers/http1/http1_observer.c + observers/http1/http1_decoder.c observers/http2_observer.c alloc.c alloc_pool.c diff --git a/src/buffer_field_api.h b/src/buffer_field_api.h index 1fe6fbf29..eb01b079e 100644 --- a/src/buffer_field_api.h +++ b/src/buffer_field_api.h @@ -79,7 +79,9 @@ static inline size_t qd_buffer_field_extend(qd_buffer_field_t *bfield, size_t am /* qd_buffer_field_ncopy * * Copy up to n octets from bfield to dest, advance bfield by the number of - * octets copied + * octets copied. + * + * NOTE: dest will not be null terminated! If you are trying to extract a C string use qd_buffer_field_strdup! * * @return total of octets copied - may be < n if len(bfield) < n */ diff --git a/src/observers/http1/http1_decoder.c b/src/observers/http1/http1_decoder.c new file mode 100644 index 000000000..ff612e783 --- /dev/null +++ b/src/observers/http1/http1_decoder.c @@ -0,0 +1,965 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#include "http1_decoder.h" + +#include "qpid/dispatch/alloc_pool.h" +#include "qpid/dispatch/discriminator.h" +#include "buffer_field_api.h" + +#include +#include +#include +#include +#include + +#define DEBUG_DECODER 0 // 1 == turn on debug printf + +#define HTTP1_MAX_LINE_LEN 65535 // (arbitrary) limit line parsing to avoid DOS attacks + +// +// This file contains code for decoding an HTTP/1.x data stream. See decoder.h for details. +// + +typedef struct h1_decode_request_state_t h1_decode_request_state_t; +typedef struct decoder_t decoder_t; + + +static const uint8_t CR_TOKEN = '\r'; +static const uint8_t LF_TOKEN = '\n'; + +// RFC9112 linear white space +static const char * const LWS_CHARS = " \t"; + + +// Classify the response codes +#define IS_INFO_RESPONSE(code) ((code) / 100 == 1) // 1xx +#define IS_SUCCESS_RESPONSE(code) ((code) / 100 == 2) // 2xx + +// true if response code indicates that the response will NOT contain a body +// 204 = No Content +// 205 = Reset Content +// 304 = Not Modified +#define NO_BODY_RESPONSE(code) \ + ((code) == 204 || \ + (code) == 205 || \ + (code) == 304 || \ + IS_INFO_RESPONSE(code)) + + +typedef enum { + HTTP1_DECODE_REQUEST = 0, // parsing request start line + HTTP1_DECODE_RESPONSE, // parsing response start line + HTTP1_DECODE_HEADERS, // parsing headers + HTTP1_DECODE_BODY, // parsing fixed-length body + HTTP1_DECODE_CHUNK_HEADER, // parsing chunk encoded body header + HTTP1_DECODE_CHUNK_DATA, // parsing chunk encoded body data + HTTP1_DECODE_CHUNK_TRAILER, // parsing chunk encoded body trailer + HTTP1_DECODE_ERROR, // parse error (terminal state) +} http1_decoder_state_t; + +#if DEBUG_DECODER +const char * const decoder_state[] = { + "HTTP1_DECODE_REQUEST", + "HTTP1_DECODE_RESPONSE", + "HTTP1_DECODE_HEADERS", + "HTTP1_DECODE_BODY", + "HTTP1_DECODE_CHUNK_HEADER", + "HTTP1_DECODE_CHUNK_DATA", + "HTTP1_DECODE_CHUNK_TRAILER", + "HTTP1_DECODE_ERROR" +}; +#endif + +// A re-sizeable buffer for holding received HTTP data while parsing. +// Currently this is used for start lines, headers, and chunk headers. +// Null terminated. +// +typedef struct parse_buffer_t { + unsigned char *data; + uint32_t mem_size; // size of allocated memory for data, not content length! + uint32_t length; // amount of data in *data (not including null terminator). +} parse_buffer_t; + + +// State for a single request-response transaction. +// +// A new state is created when a new request appears in the client stream. Once the corresponding response message has +// completed this state is released. +// +struct h1_decode_request_state_t { + DEQ_LINKS(h1_decode_request_state_t); + qd_http1_decoder_connection_t *hconn; + uintptr_t user_context; + +#if 0 // TBD + uint64_t client_octets; // # total octets arriving from client for this request + uint64_t server_octets; // # total octets arriving from server for this response +#endif + + int32_t response_code; // sent by server + + bool head_request:1; // HEAD method + bool connect_request:1; // CONNECT method + bool request_complete:1; // true when request message done encoding/decoding + bool response_complete:1; // true when response message done encoding/decoding + bool close_expected:1; // if true do not signal request_complete cb until closed +}; +DEQ_DECLARE(h1_decode_request_state_t, h1_decode_request_state_list_t); +ALLOC_DECLARE(h1_decode_request_state_t); +ALLOC_DEFINE(h1_decode_request_state_t); + + +// Decodes an incoming stream of HTTP/1.x traffic. +// +// The decoder only copies incoming data where necessary. This includes start line data, headers, and the chunk +// boundaries. Raw body data is not copied. +// +struct decoder_t { + + qd_http1_decoder_connection_t *hconn; // parent connection + h1_decode_request_state_t *hrs; // current request/response state + http1_decoder_state_t state; + parse_buffer_t buffer; + + int64_t body_length; // content length or current chunk length + + bool is_client:1; + bool is_chunked:1; + bool is_http10:1; + + // decoded headers + bool hdr_transfer_encoding:1; + bool hdr_content_length:1; +}; + + +// The HTTP/1.1 connection +// +struct qd_http1_decoder_connection_t { + uintptr_t user_context; + const qd_http1_decoder_config_t *config; + + // Pipelining allows a client to send multiple requests before receiving any responses. Pending request are stored + // in order: new requests are added to TAIL. The in-progress response is at HEAD. + // + h1_decode_request_state_list_t hrs_queue; + + const char *parse_error; // if set parser has failed + decoder_t client; // client stream decoder + decoder_t server; // server stream decoder +}; +ALLOC_DECLARE(qd_http1_decoder_connection_t); +ALLOC_DEFINE(qd_http1_decoder_connection_t); + + +// Expand the parse buffer up to required octets. +// +static void ensure_buffer_size(parse_buffer_t *b, size_t required) +{ + if (b->mem_size < required) { + b->mem_size = required; + if (!b->data) + b->data = qd_malloc(b->mem_size); + else + b->data = qd_realloc(b->data, b->mem_size); + } +} + + +static inline void decoder_new_state(decoder_t *decoder, http1_decoder_state_t new_state) +{ +#if DEBUG_DECODER + fprintf(stdout, "%s decoder state %s -> %s\n", decoder->is_client ? "client" : "server", + decoder_state[decoder->state], decoder_state[new_state]); +#endif + decoder->state = new_state; +} + + +// A protocol parsing error has occurred. There is no recovery - we're dead in the water. +// +static void parser_error(qd_http1_decoder_connection_t *hconn, const char *reason) +{ + if (!hconn->parse_error) { + hconn->parse_error = reason; + decoder_new_state(&hconn->client, HTTP1_DECODE_ERROR); + decoder_new_state(&hconn->server, HTTP1_DECODE_ERROR); + if (hconn->config->protocol_error) + hconn->config->protocol_error(hconn, reason); + } +} + + +// reset the rx decoder state machine for re-use after a message completes decoding +// +static void decoder_reset(decoder_t *decoder) +{ + if (decoder->state == HTTP1_DECODE_ERROR) + // Once the decoder has failed parsing it cannot reliably be re-used on the stream + // since we have lost synchonization + return; + + decoder_new_state(decoder, decoder->is_client ? HTTP1_DECODE_REQUEST : HTTP1_DECODE_RESPONSE); + + decoder->hrs = 0; + decoder->body_length = 0; + decoder->buffer.length = 0; + + // do not change is_client! + + decoder->is_chunked = false; + decoder->is_http10 = false; + decoder->hdr_transfer_encoding = false; + decoder->hdr_content_length = false; +} + + +// Create a new request state - this is done when a new http request arrives. +// +static h1_decode_request_state_t *h1_decode_request_state(qd_http1_decoder_connection_t *hconn) +{ + h1_decode_request_state_t *hrs = new_h1_decode_request_state_t(); + ZERO(hrs); + DEQ_ITEM_INIT(hrs); + hrs->hconn = hconn; + return hrs; +} + + +// Free a request/response state +// +static void h1_decode_request_state_free(h1_decode_request_state_t *hrs) +{ + if (hrs) { + free_h1_decode_request_state_t(hrs); + } +} + + +// Create a new connection state instance. +// +qd_http1_decoder_connection_t *h1_decode_connection(const qd_http1_decoder_config_t *config, uintptr_t user_context) +{ + assert(config); + + qd_http1_decoder_connection_t *hconn = new_qd_http1_decoder_connection_t(); + ZERO(hconn); + + hconn->user_context = user_context; + hconn->config = config; + DEQ_INIT(hconn->hrs_queue); + + // init the decoders + + hconn->client.is_client = true; + hconn->client.hconn = hconn; + decoder_reset(&hconn->client); + + hconn->server.is_client = false; + hconn->server.hconn = hconn; + decoder_reset(&hconn->server); + + return hconn; +} + + +uintptr_t h1_decode_connection_get_context(const qd_http1_decoder_connection_t *hconn) +{ + assert(hconn); + return hconn->user_context; +} + + +// Free the connection +// +void h1_decode_connection_free(qd_http1_decoder_connection_t *conn) +{ + if (conn) { + h1_decode_request_state_t *hrs = DEQ_HEAD(conn->hrs_queue); + while (hrs) { + DEQ_REMOVE_HEAD(conn->hrs_queue); + h1_decode_request_state_free(hrs); + hrs = DEQ_HEAD(conn->hrs_queue); + } + + decoder_reset(&conn->client); + free(conn->client.buffer.data); + + decoder_reset(&conn->server); + free(conn->server.buffer.data); + + free_qd_http1_decoder_connection_t(conn); + } +} + + +static void debug_print_line(const char *prefix, const char *line) +{ +#if DEBUG_DECODER + fprintf(stdout, "%s: '%s'\n", prefix, line); + fflush(stdout); +#endif +} + + +// Read incoming http line starting at 'data' into the decoders parse buffer. Stop when a CRLF is parsed. +// +// Returns a pointer to the null-terminated line and advances *data/*length past the line. The returned line has the +// trailin CRLF stripped off. +// +// Returns null if the line is incomplete (need more data) or a parse error occurs. +// +// NOTE WELL: When a non-null pointer is returned the parsed line is stored in the decoder parse buffer. This parse +// buffer will be overwritten on the next call to read_line()! +// +static char *read_line(decoder_t *decoder, const unsigned char **data, size_t *length) +{ + const unsigned char *ptr = *data; + const unsigned char *end = *data + *length; + + while (ptr != end) { + if (*ptr == 0 || *ptr > 0x7E) { // invalid USASCII + parser_error(decoder->hconn, "protocol error: non USASCII data"); + return 0; + } + + if (*ptr == LF_TOKEN) { + break; + } + + ptr += 1; + } + + // at this point if ptr == end no LF found and we need to store all the data + + const size_t to_copy = (ptr == end) ? *length : ptr - *data + 1; // +1: skip LF as well + const size_t total = decoder->buffer.length + to_copy; + + // avoid infinite-line DOS attack by failing if the line is too long + if (total > HTTP1_MAX_LINE_LEN) { + parser_error(decoder->hconn, "protocol error: received line too long"); + return 0; + } + + ensure_buffer_size(&decoder->buffer, total + 1); + memcpy(&decoder->buffer.data[decoder->buffer.length], *data, to_copy); + decoder->buffer.length += to_copy; + *data += to_copy; + *length -= to_copy; + + if (ptr == end) { + // need more (will append at decoder->buffer.data[decoder->buffer.length]) + return 0; + } + + // At this point we know that buffer contains at least a LF (so buffer.length >= 1). + // Strip trailing CRLF and null terminate the buffer + + assert(decoder->buffer.length > 0); + unsigned char *trim = &decoder->buffer.data[decoder->buffer.length - 1]; + while (*trim == LF_TOKEN || *trim == CR_TOKEN) { + *trim-- = '\0'; + if (trim == &decoder->buffer.data[0]) + break; + } + decoder->buffer.length = 0; // reset line parser + return (char *) &decoder->buffer.data[0]; +} + + +// Return the first non-linear whitespace character in line. +// Will return empty string if line contains only whitespace. +// RFC7230 defines OWS as zero or more spaces or horizontal tabs +// +static char *trim_whitespace(char *line) +{ + while (*line && (*line == ' ' || *line == '\t')) + line++; + return line; +} + +// Remove any trailing linear whitespace characters in line. +// +static void truncate_whitespace(char *line) +{ + char *eol = &line[strlen((char *) line)]; + if (eol-- == line) + return; // empty line + while (*eol == ' ' || *eol == '\t') { + *eol = '\0'; + if (eol-- == line) + break; + } +} + + +// Called when incoming message is complete. +// Returns false on parse error, else true. +// +static bool message_done(qd_http1_decoder_connection_t *hconn, decoder_t *decoder) +{ + h1_decode_request_state_t *hrs = decoder->hrs; + + assert(hrs); + + if (!decoder->is_client) { + // Informational 1xx response codes are NOT terminal - further responses are allowed! + if (IS_INFO_RESPONSE(hrs->response_code)) { + hrs->response_code = 0; + } else { + hrs->response_complete = true; + } + } else { + hrs->request_complete = true; + } + + // signal the message receive is complete + int rc = hconn->config->message_done(hconn, hrs->user_context, decoder->is_client); + if (!rc) { + parser_error(hconn, "message_done callback failed"); + return false; + } + + decoder_reset(decoder); // resets decode state to parse_request or parse_response + + if (hrs->response_complete && hrs->request_complete) { + int rc = hconn->config->transaction_complete(hconn, hrs->user_context); + assert(DEQ_HEAD(hconn->hrs_queue) == hrs); // expect completed in-order + DEQ_REMOVE_HEAD(hconn->hrs_queue); + h1_decode_request_state_free(hrs); + hrs = 0; + if (!rc) { + parser_error(hconn, "transaction_complete callback failed"); + return false; + } + } + + return true; +} + + +////////////////////// +// Parse start line // +////////////////////// + + +// parse the HTTP/1.1 request line: +// "method SP request-target SP HTTP-version CRLF" +// +static bool parse_request_line(qd_http1_decoder_connection_t *hconn, decoder_t *decoder, const unsigned char **data, size_t *length) +{ + char *line = read_line(decoder, data, length); + if (!line) + return false; // need more data + + if (*line == '\0') { + // RFC9112 ignore blank lines before the request. Continue parsing data. + return !!(*length); + } + + debug_print_line("request line:", line); + + char *saveptr = 0; + char *method = strtok_r(line, LWS_CHARS, &saveptr); + char *target = strtok_r(0, LWS_CHARS, &saveptr); + char *version = strtok_r(0, LWS_CHARS, &saveptr); + + if (!method || !target || !version) { + parser_error(hconn, "Malformed request line"); + return false; + } + + uint32_t minor = 0; + if (strcmp(version, "HTTP/1.1") == 0) { + minor = 1; + } else if (strcmp(version, "HTTP/1.0") == 0) { + minor = 0; + decoder->is_http10 = true; + } else { + parser_error(hconn, "Unsupported HTTP/1.x version"); + return false; + } + + h1_decode_request_state_t *hrs = h1_decode_request_state(hconn); + DEQ_INSERT_TAIL(hconn->hrs_queue, hrs); + + // check for methods that do not support body content in the response: + if (strcmp(method, "HEAD") == 0) + hrs->head_request = true; + else if (strcmp(method, "CONNECT") == 0) + hrs->connect_request = true; + + decoder->hrs = hrs; + + int rc = hconn->config->rx_request(hconn, method, target, 1, minor, &hrs->user_context); + if (rc) { + parser_error(hconn, "rx_request callback failed"); + return false; + } + decoder_new_state(decoder, HTTP1_DECODE_HEADERS); + return !!(*length); +} + + +// parse the HTTP/1.1 response line +// "HTTP-version SP status-code [SP reason-phrase] CRLF" +// +static bool parse_response_line(qd_http1_decoder_connection_t *hconn, decoder_t *decoder, const unsigned char **data, size_t *length) +{ + char *line = read_line(decoder, data, length); + if (!line) + return false; // need more data + + const size_t in_octets = strlen(line); + char *eol = &line[in_octets]; // eol points to null terminator + + if (in_octets == 0) { + // RFC9112 ignore blank lines before the response + return !!(*length); + } + + debug_print_line("response line:", line); + + // Responses arrive in the same order as requests are generated so this new + // response corresponds to head hrs + + h1_decode_request_state_t *hrs = DEQ_HEAD(hconn->hrs_queue); + if (!hrs) { + // receiving a response without a corresponding request + parser_error(hconn, "Spurious HTTP response received"); + return false; + } + + assert(!decoder->hrs); // state machine violation + decoder->hrs = hrs; + + char *saveptr = 0; + char *version = strtok_r(line, LWS_CHARS, &saveptr); + char *status_code = strtok_r(0, LWS_CHARS, &saveptr); + + if (!version || !status_code) { + parser_error(hconn, "Malformed response line"); + return false; + } + + uint32_t minor = 0; + if (strcmp(version, "HTTP/1.1") == 0) { + minor = 1; + } else if (strcmp(version, "HTTP/1.0") == 0) { + minor = 0; + decoder->is_http10 = true; + } else { + parser_error(hconn, "Unsupported HTTP/1.x version"); + return false; + } + + char *eoc = 0; // set to octet past status_code + errno = 0; + hrs->response_code = strtol(status_code, &eoc, 10); + if (errno || hrs->response_code < 100 || hrs->response_code > 999 || *eoc != '\0') { + // expect 3 digit decimal terminated by a null + parser_error(hconn, "Bad response code"); + return false; + } + + // The reason phrase is optional and may contain spaces. + + while (eoc < eol) { + // There is something trailing the status code. + eoc = trim_whitespace(eoc); + } + + int rc = hconn->config->rx_response(hconn, hrs->user_context, hrs->response_code, *eoc ? eoc : 0, 1, minor); + if (rc) { + parser_error(hconn, "rx_response callback failed"); + return false; + } + + decoder_new_state(decoder, HTTP1_DECODE_HEADERS); + return !!(*length); +} + + +//////////////////// +// Header parsing // +//////////////////// + + +// Called after the last incoming header was decoded and passed to the +// application. +// +// Returns true on success, false on error +// +static bool headers_done(qd_http1_decoder_connection_t *hconn, struct decoder_t *decoder) +{ + h1_decode_request_state_t *hrs = decoder->hrs; + assert(hrs); + + int rc = hconn->config->rx_headers_done(hconn, hrs->user_context, decoder->is_client); + if (rc) { + parser_error(hconn, "rx_headers_done callback failed"); + return false; + } + + // Determine if a body is present. See RFC9112 Message Body Length + + if (!decoder->is_client) { // parsing a response message + // Cases 1 and 2: + if ((hrs->head_request || NO_BODY_RESPONSE(hrs->response_code)) + || (hrs->connect_request && IS_SUCCESS_RESPONSE(hrs->response_code)) + ) { + // No response body regardless of headers, fake zero content length: + decoder->hdr_transfer_encoding = false; + decoder->is_chunked = false; + decoder->hdr_content_length = true; + decoder->body_length = 0; + decoder_new_state(decoder, HTTP1_DECODE_BODY); + return true; + } + } + + if (decoder->hdr_transfer_encoding) { + // Case 3: Transfer-encoding invalidates Content-Length: + decoder->hdr_content_length = 0; + decoder->body_length = 0; + + // Case 4a: use chunked: + if (decoder->is_chunked) { + decoder_new_state(decoder, HTTP1_DECODE_CHUNK_HEADER); + return true; + + } else if (!decoder->is_client) { + // Case 4b: non-chunked response, body is terminated by connection close + // Hack an infinite content length so we receive "forever" + decoder->hdr_transfer_encoding = false; + decoder->hdr_content_length = true; + decoder->body_length = INT64_MAX; + decoder_new_state(decoder, HTTP1_DECODE_BODY); + return true; + + } else { + // Case 4c: invalid request + parser_error(hconn, "Unrecognized Transfer-Encoding"); + return false; + } + + } else if (!decoder->hdr_content_length) { // No Content-length nor Transfer-Encoding + // Case 8: Response without explict length: body until connection closed: + if (!decoder->is_client) { + // Hack an infinite content length so we receive forever + decoder->hdr_content_length = true; + decoder->body_length = INT64_MAX; + decoder_new_state(decoder, HTTP1_DECODE_BODY); + return true; + } + decoder->body_length = 0; // Case 7: assume zero length body + } + + // case 6: use content-length + decoder_new_state(decoder, HTTP1_DECODE_BODY); + return true; +} + + +// Process a received header to determine message body length, etc. +// Returns false if parse error occurs. +// +static bool process_header(qd_http1_decoder_connection_t *hconn, decoder_t *decoder, char *key, char *value) +{ + if (strlen(key) > strlen("Transfer-Encoding")) + // this is not the key we are looking for... + return true; + + if (strcasecmp("Content-Length", key) == 0) { + int64_t old = decoder->body_length; + char *eoc = 0; + + errno = 0; + decoder->body_length = strtol(value, &eoc, 10); + if (errno || eoc == value || decoder->body_length < 0) { + parser_error(hconn, "Malformed Content-Length value"); + return false; + } + if (old && old != decoder->body_length) { + parser_error(hconn, "Invalid duplicate Content-Length header"); + return false; + } + decoder->hdr_content_length = true; + + } else if (!decoder->is_http10 && strcasecmp("Transfer-Encoding", key) == 0) { + decoder->hdr_transfer_encoding = true; + char *saveptr = 0; + char *token = strtok_r(value, " ,", &saveptr); + while (token) { + if (strcasecmp("chunked", token) == 0) { + decoder->is_chunked = true; + break; + } + token = strtok_r(0, " ,", &saveptr); + } + } + + return true; +} + + +// Parse an HTTP header line. +// See RFC7230 for details. If header line folding (obs-folding) is detected, +// replace the folding with spaces. +// +static bool parse_header(qd_http1_decoder_connection_t *hconn, decoder_t *decoder, const unsigned char **data, size_t *length) +{ + char *line = read_line(decoder, data, length); + if (!line) + // need more data + return false; + + size_t in_octets = strlen(line); + char *eol = &line[in_octets]; // eol points to null terminator + + debug_print_line("header:", line); + + if (in_octets == 0) { + // empty header == end of headers + bool ok = headers_done(hconn, decoder); + if (!ok) + return false; + return !!(*length); + } + + // TODO: support obsolete line folding. For now I punt: + if (*line == ' ' || *line == '\t') + return !!(*length); + + // parse out key/value + + char *saveptr = 0; + char *key = strtok_r(line, ":", &saveptr); + + if (!key) { + parser_error(hconn, "Malformed header key"); + return false; + } + + // According to RFC9112, the key is immediately followed by ':'. Value may start and end with whitespace which must + // be removed before value can be processed. + + char *value = &key[strlen(key)]; // value points to null at end of key + if (value < eol) { + value++; // skip to start of value + value = trim_whitespace(value); + truncate_whitespace(value); + } + + assert(decoder->hrs); + int rc = hconn->config->rx_header(hconn, decoder->hrs->user_context, decoder->is_client, key, value); + if (rc) { + parser_error(hconn, "rx_header callback failed"); + return false; + } + + if (!process_header(hconn, decoder, key, value)) + return false; + + return !!(*length); +} + + +///////////////// +// Body Parser // +///////////////// + + +// Helper used by both content-length and chunked encoded bodies. Caller must check decoder->body_length to determine if +// all the expected data has been consumed (decoder->body_length == 0) and *length to see if any **data remains +// +static bool consume_body(qd_http1_decoder_connection_t *hconn, decoder_t *decoder, const unsigned char **data, size_t *length) +{ + size_t amount = MIN(*length, decoder->body_length); + + assert(decoder->hrs); + if (amount && hconn->config->rx_body) { + int rc = hconn->config->rx_body(hconn, decoder->hrs->user_context, decoder->is_client, *data, amount); + if (rc) { + parser_error(hconn, "rx_body callback failed"); + return false; + } + } + + *data += amount; + *length -= amount; + return true; +} + + +// parsing the start of a chunked header: +// (chunk-ext) CRLF +// chunk-ext = *( BWS ";" BWS chunk-ext-name +// [ BWS "=" BWS chunk-ext-val ] )*(OWS*(;)) +// +static bool parse_chunk_header(qd_http1_decoder_connection_t *hconn, decoder_t *decoder, const unsigned char **data, size_t *length) +{ + char *line = read_line(decoder, data, length); + if (!line) + return false; + + char *eoc = 0; + errno = 0; + decoder->body_length = strtol(line, &eoc, 16); + if (errno || eoc == line || decoder->body_length < 0) { + parser_error(hconn, "Invalid chunk length"); + return false; + } + + if (decoder->body_length) + decoder_new_state(decoder, HTTP1_DECODE_CHUNK_DATA); + else + decoder_new_state(decoder, HTTP1_DECODE_CHUNK_TRAILER); + + return !!(*length); +} + + +// Parse the data section of a chunk +// +static bool parse_chunk_data(qd_http1_decoder_connection_t *hconn, decoder_t *decoder, const unsigned char **data, size_t *length) +{ + bool ok = consume_body(hconn, decoder, data, length); + if (!ok) + return false; + + if (decoder->body_length == 0) { // end of chunk data + // consume CRLF at end of body + char *line = read_line(decoder, data, length); + if (!line) + return false; // need more data + + if (*line) { + // expected bare line, something is wrong + parser_error(hconn, "Unexpected chunk body end"); + return false; + } + + decoder_new_state(decoder, HTTP1_DECODE_CHUNK_HEADER); + } + + return !!(*length); +} + + +// Keep reading chunk trailers until the terminating empty line is read +// +static bool parse_chunk_trailer(qd_http1_decoder_connection_t *hconn, decoder_t *decoder, const unsigned char **data, size_t *length) +{ + char *line = read_line(decoder, data, length); + if (!line) + return false; // need more + + if (*line) { // non empty line == chunk trailer + + // update incoming data with strlen(line) + 2; + + } else { // end of trailers and message + + // update incoming data with +2 + + bool ok = message_done(hconn, decoder); + if (!ok) + return false; + } + + return !!(*length); +} + + +// Parse a message body using content-length +// +static bool parse_body(qd_http1_decoder_connection_t *hconn, struct decoder_t *decoder, const unsigned char **data, size_t *length) +{ + bool ok = consume_body(hconn, decoder, data, length); + if (!ok) + return false; + + if (decoder->body_length == 0) { // no more body + bool ok = message_done(hconn, decoder); + if (!ok) + return false; + } + + return !!(*length); +} + + +// Push inbound network data into the http1 protocol engine. +// +// This is the main decode loop. All callbacks take place in the context of this call. +// +// +int h1_decode_connection_rx_data(qd_http1_decoder_connection_t *hconn, bool from_client, const unsigned char *data, size_t length) +{ + bool more = true; + + if (hconn->parse_error) { + return -1; + } + + struct decoder_t *decoder = from_client ? &hconn->client : &hconn->server; + while (more) { +#if DEBUG_DECODER + fprintf(stdout, "hconn: %p State: %s data length=%zu\n", (void *) hconn, decoder_state[decoder->state], length); +#endif + switch (decoder->state) { + case HTTP1_DECODE_REQUEST: + more = parse_request_line(hconn, decoder, &data, &length); + break; + + case HTTP1_DECODE_RESPONSE: + more = parse_response_line(hconn, decoder, &data, &length); + break; + + case HTTP1_DECODE_HEADERS: + more = parse_header(hconn, decoder, &data, &length); + break; + + case HTTP1_DECODE_BODY: + more = parse_body(hconn, decoder, &data, &length); + break; + + case HTTP1_DECODE_CHUNK_HEADER: + more = parse_chunk_header(hconn, decoder, &data, &length); + break; + + case HTTP1_DECODE_CHUNK_DATA: + more = parse_chunk_data(hconn, decoder, &data, &length); + break; + + case HTTP1_DECODE_CHUNK_TRAILER: + more = parse_chunk_trailer(hconn, decoder, &data, &length); + break; + + case HTTP1_DECODE_ERROR: + more = false; + break; + } + } + + return !!hconn->parse_error ? -1 : 0; +} diff --git a/src/observers/http1/http1_decoder.h b/src/observers/http1/http1_decoder.h new file mode 100644 index 000000000..ff75d3dd7 --- /dev/null +++ b/src/observers/http1/http1_decoder.h @@ -0,0 +1,137 @@ +#ifndef __http1_decoder_h__ +#define __http1_decoder_h__ 1 +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#include +#include +#include + + +// HTTP/1.x Decoder Library +// +// This library provides an API for decoding HTTP/1.x protocol streams +// +// The decoder takes input containing HTTP/1.x data read from the TCP connection and issues callbacks as various parts +// (headers, body, status) of the HTTP/1.x message are parsed. +// +// This library provides two classes: +// +// * qd_http1_decoder_connection_t - a context for a single TCP connection over which HTTP/1.x messages are exchanged +// between a client and a server. +// +// * qd_http1_decoder_config.h - a structure to configure the callbacks for a given qd_http1_decoder_connection_t. A +// pointer to an initialized instances of this must be passed to the qd_http1_decoder_connection() constructor. It is +// expected that the instance's lifecycle will remain valid until return of the call to the +// qd_http1_decoder_connection_free() destructor. +// + +typedef struct qd_http1_decoder_config_t qd_http1_decoder_config_t; +typedef struct qd_http1_decoder_connection_t qd_http1_decoder_connection_t; + +struct qd_http1_decoder_config_t { + // + // Decoder callbacks + // + // Callbacks invoked when parsing incoming raw connection data. These callbacks are invoked from the + // h1_decode_connection_rx_data() call. These callbacks should return 0 on success or non-zero on error. A non-zero + // return code is used as the return code from h1_decode_connection_rx_data() + // + + // New HTTP request received. The caller should set a request_context that will be passed back in all callbacks + // pertaining to this request/response transaction. The method and target are not preserved on return from this + // call. The callback must copy the data associated with these values if they need to be saved. + // + int (*rx_request)(qd_http1_decoder_connection_t *hconn, + const char *method, + const char *target, + uint32_t version_major, + uint32_t version_minor, + uintptr_t *request_context); + + // HTTP response received. The request_context comes from the rx_request() callback issued for the corresponding + // request. Note well that if status_code is Informational (1xx) then this response is NOT the last response for the + // current request (See RFC7231, 6.2 Informational 1xx). The transaction_complete callback will be called after the + // LAST response for the given request has been received. The reason_phrase is not preserved on return from this + // call. The callback must copy the data associated with this value if it needs to be saved. + // + int (*rx_response)(qd_http1_decoder_connection_t *hconn, uintptr_t request_context, + int status_code, + const char *reason_phrase, + uint32_t version_major, + uint32_t version_minor); + + // Invoked for each HTTP header received. from_client is true if the header was read from the stream sent by the + // client, false if the stream is sent from the server. Neither key nor value is preserved on return from this + // call. The callback must copy the data associated with these values if they need to be saved. + // + int (*rx_header)(qd_http1_decoder_connection_t *hconn, uintptr_t request_context, bool from_client, + const char *key, const char *value); + + // Invoked after the last HTTP header (after the last rx_header() callback for this message). + // + int (*rx_headers_done)(qd_http1_decoder_connection_t *hconn, uintptr_t request_context, bool from_client); + + // (Optional) invoked as the HTTP1 message body is parsed. length is set to the number of data octets in the + // body buffer. The body buffer lifecycle ends on return from this call - if the caller needs to preserve the body + // data it must copy it. + // + int (*rx_body)(qd_http1_decoder_connection_t *hconn, uintptr_t request_context, bool from_client, const unsigned char *body, size_t length); + + // Invoked after a received HTTP message has been completely parsed. + // + int (*message_done)(qd_http1_decoder_connection_t *hconn, uintptr_t request_context, bool from_client); + + // Invoked when the HTTP request/response messages have been completely encoded/decoded and the transaction is complete. + // The request_context will never be used by the decoder again on return from this call. + // + int (*transaction_complete)(qd_http1_decoder_connection_t *hconn, uintptr_t request_context); + + // Invoked if the decoder is unable to parse the incoming stream. No further callbacks will occur for this + // connection. The h1_docode_connection_rx_data() will return a non-zero value. It is expected that the user will + // clean up all context(s) associated with the connection and any in-progress transactions. + // + void (*protocol_error)(qd_http1_decoder_connection_t *hconn, const char *reason); + +}; + + +// Create a new connection and assign it a context. The config object lifecycle must exist for the duration of the +// qd_http1_decoder_connection_t. +// +qd_http1_decoder_connection_t *qd_http1_decoder_connection(const qd_http1_decoder_config_t *config, uintptr_t context); + +// Obtain the connection context given in the h1_decode_connection() call. +// +uintptr_t qd_http1_decoder_connection_get_context(const qd_http1_decoder_connection_t *conn); + +// Release the codec. Any outstanding request/response state is released immediately, including any in-progress +// requests. +// +void qd_http1_decoder_connection_free(qd_http1_decoder_connection_t *conn); + +// Push inbound network data into the http1 decoder. All callbacks occur during this call. The return value is zero on +// success. If a parse error occurs then the protocol_error callback will be invoked and a non-zero value is returned. +// +// Errors are not recoverable: further calls will return a non-zero value. +// +int qd_http1_decoder_connection_rx_data(qd_http1_decoder_connection_t *conn, bool from_client, const unsigned char *data, size_t len); + +#endif // __http1_decoder_h__ diff --git a/src/observers/http1/http1_observer.c b/src/observers/http1/http1_observer.c new file mode 100644 index 000000000..b152e6282 --- /dev/null +++ b/src/observers/http1/http1_observer.c @@ -0,0 +1,125 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "observers/private.h" + +#include "http1_decoder.h" + +#include + +/** + + ### START RECORD + ### + # connection level record (listener based) + vflow_start_record(VFLOW_RECORD_FLOW, li->vflow); + # connection level record (connector) + vflow_start_record(VFLOW_RECORD_FLOW, connector->vflow); + # Client side request start flow + vflow_start_record(VFLOW_RECORD_FLOW, hconn->vflow) + # Server side request start flow + vflow_start_record(VFLOW_RECORD_FLOW, hconn->vflow) + + ### END RECORD + ### + vflow_end_record(hreq->vflow); End of request vflow + + # connection level flow end + vflow_end_record(hconn->vflow); End of connection vflow + + ### ATTRIBUTES CLIENT-FACING CONNECTION LEVEL + ### + ./http1_client.c:213: vflow_set_uint64(hconn->vflow, VFLOW_ATTRIBUTE_OCTETS, 0); + ./http1_client.c:214: vflow_add_rate(hconn->vflow, VFLOW_ATTRIBUTE_OCTETS, VFLOW_ATTRIBUTE_OCTET_RATE); + ./http1_client.c:499: vflow_set_uint64(hconn->vflow, VFLOW_ATTRIBUTE_OCTETS, hconn->in_http1_octets); + ./http1_client.c:597: vflow_set_uint64(hconn->vflow, VFLOW_ATTRIBUTE_OCTETS, hconn->in_http1_octets); + ./http1_client.c:691: qd_set_vflow_netaddr_string(hconn->vflow, hconn->raw_conn, true); + # moves any connection condition to vflow on DISCONNECT: + ./http1_client.c:713: qd_set_condition_on_vflow(hconn->raw_conn, hconn->vflow); + + ### ATTRIBUTES SERVER-FACING CONNECTION LEVEL + ### + ./http1_server.c:276: vflow_set_uint64(hconn->vflow, VFLOW_ATTRIBUTE_OCTETS, 0); + ./http1_server.c:277: vflow_add_rate(hconn->vflow, VFLOW_ATTRIBUTE_OCTETS, VFLOW_ATTRIBUTE_OCTET_RATE); + ./http1_server.c:603: vflow_set_uint64(hconn->vflow, VFLOW_ATTRIBUTE_OCTETS, hconn->in_http1_octets); + ./http1_server.c:707: vflow_set_uint64(hconn->vflow, VFLOW_ATTRIBUTE_OCTETS, hconn->in_http1_octets); + # gets addr from raw conn + ./http1_server.c:831: qd_set_vflow_netaddr_string(hconn->vflow, hconn->raw_conn, false); + # moves connection condition to vflow on DISCONNECT + ./http1_server.c:856: qd_set_condition_on_vflow(hconn->raw_conn, hconn->vflow); + + + ### ATTRIBUTES CLIENT-FACING PER-REQUEST LEVEL + ### + ./http1_client.c:954: vflow_set_uint64(hreq->base.vflow, VFLOW_ATTRIBUTE_OCTETS, 0); + ./http1_client.c:1209: vflow_set_uint64(hreq->base.vflow, VFLOW_ATTRIBUTE_OCTETS, in_octets); + + ./http1_client.c:955: vflow_set_string(hreq->base.vflow, VFLOW_ATTRIBUTE_METHOD, method); + ./http1_client.c:2074: vflow_set_string(hreq->base.vflow, VFLOW_ATTRIBUTE_RESULT, code_str); + ./http1_client.c:2076: vflow_set_string(hreq->base.vflow, VFLOW_ATTRIBUTE_REASON, reason_phrase); + + ./http1_client.c:969: vflow_serialize_identity(hreq->base.vflow, hreq->request_props); + + ./http1_client.c:959: vflow_latency_start(hreq->base.vflow); + ./http1_client.c:1855: vflow_latency_end(hreq->base.vflow); + + ### ATTRIBUTES SERVER-FACING PER-REQUEST LEVEL + ### + ./http1_server.c:1381: vflow_set_uint64(hreq->base.vflow, VFLOW_ATTRIBUTE_OCTETS, in_octets); + ./http1_server.c:1566: vflow_set_uint64(hreq->base.vflow, VFLOW_ATTRIBUTE_OCTETS, 0); + + ./http1_server.c:1702: vflow_set_ref_from_parsed(hreq->base.vflow, VFLOW_ATTRIBUTE_COUNTERFLOW, value); + + ./http1_server.c:1572: vflow_latency_start(hreq->base.vflow); + ./http1_server.c:1119: vflow_latency_end(hreq->base.vflow); + + ### WHAT ABOUT VFLOW_ATTRIBUTE_PROTOCOL ?? + + +https://www.rfc-editor.org/rfc/rfc9112.html (HTTP/1.1) +https://www.rfc-editor.org/rfc/rfc1945 (HTTP/1.0) +https://www.rfc-editor.org/rfc/rfc9110 (HTTP Semantics) + + */ + + +static void http1_observe(qdpo_transport_handle_t *th, bool from_client, const unsigned char *data, size_t length) +{ + qd_log(LOG_HTTP_ADAPTOR, QD_LOG_DEBUG, + "[C%" PRIu64 "] HTTP/1.1 observer classifying protocol: %zu %s octets", th->conn_id, length, from_client ? "client" : "server"); +} + + + +void qdpo_http1_init(qdpo_transport_handle_t *th) +{ + qd_log(LOG_HTTP_ADAPTOR, QD_LOG_DEBUG, "[C%" PRIu64 "] HTTP/1.1 observer initialized", th->conn_id); + + th->protocol = QD_PROTOCOL_HTTP1; + th->observe = http1_observe; + th->http1.tbd = 42; // whatever; + +} + +void qdpo_http1_final(qdpo_transport_handle_t *th) +{ + qd_log(LOG_HTTP_ADAPTOR, QD_LOG_DEBUG, "[C%" PRIu64 "] HTTP/1.1 observer finalized", th->conn_id); + th->observe = 0; +} + diff --git a/src/observers/http1_observer.c b/src/observers/http1_observer.c deleted file mode 100644 index 8468c44eb..000000000 --- a/src/observers/http1_observer.c +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -#include "private.h" - -#include - - -static void http1_observe(qdpo_transport_handle_t *th, bool from_client, const unsigned char *data, size_t length) -{ - qd_log(LOG_HTTP_ADAPTOR, QD_LOG_DEBUG, - "[C%" PRIu64 "] HTTP/1.1 observer classifying protocol: %zu %s octets", th->conn_id, length, from_client ? "client" : "server"); -} - - - -void qdpo_http1_init(qdpo_transport_handle_t *th) -{ - qd_log(LOG_HTTP_ADAPTOR, QD_LOG_DEBUG, "[C%" PRIu64 "] HTTP/1.1 observer initialized", th->conn_id); - - th->protocol = QD_PROTOCOL_HTTP1; - th->observe = http1_observe; - th->http1.tbd = 42; // whatever; - -} - -void qdpo_http1_final(qdpo_transport_handle_t *th) -{ - qd_log(LOG_HTTP_ADAPTOR, QD_LOG_DEBUG, "[C%" PRIu64 "] HTTP/1.1 observer finalized", th->conn_id); - th->observe = 0; -} -