1
0
Fork 0
mirror of https://github.com/warmcat/libwebsockets.git synced 2025-03-09 00:00:04 +01:00

h1: header parsing optimizations

This commit is contained in:
Andy Green 2018-03-07 18:15:17 +08:00
parent b49630e515
commit 1aed8c8127
7 changed files with 281 additions and 250 deletions

View file

@ -952,7 +952,7 @@ if (CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR (CMAKE_C_COMPILER_ID
set (GCOV_FLAGS "-fprofile-arcs -ftest-coverage -O0")
endif()
if (UNIX AND NOT LWS_WITH_ESP32)
set(CMAKE_C_FLAGS "-Wall -Wsign-compare -Wignored-qualifiers -Wtype-limits -Wuninitialized -Werror ${VISIBILITY_FLAG} -Wundef ${GCOV_FLAGS} ${CMAKE_C_FLAGS}" )
set(CMAKE_C_FLAGS "-O3 -Wall -Wsign-compare -Wignored-qualifiers -Wtype-limits -Wuninitialized -Werror ${VISIBILITY_FLAG} -Wundef ${GCOV_FLAGS} ${CMAKE_C_FLAGS}" )
else()
set(CMAKE_C_FLAGS "-Wall -Wsign-compare -Wignored-qualifiers -Wtype-limits -Wuninitialized -Werror ${VISIBILITY_FLAG} ${GCOV_FLAGS} ${CMAKE_C_FLAGS}" )
endif()

View file

@ -393,6 +393,8 @@ client_http_body_sent:
len = 1;
while (wsi->ah->parser_state != WSI_PARSING_COMPLETE &&
len > 0) {
int plen = 1;
n = lws_ssl_capable_read(wsi, &c, 1);
lws_latency(context, wsi, "send lws_issue_raw", n,
n == 1);
@ -405,7 +407,7 @@ client_http_body_sent:
return 0;
}
if (lws_parse(wsi, c)) {
if (lws_parse(wsi, &c, &plen)) {
lwsl_warn("problems parsing header\n");
goto bail3;
}
@ -882,6 +884,7 @@ check_extensions:
/* old first guy points back to us now */
wsi->same_vh_protocol_next->same_vh_protocol_prev =
&wsi->same_vh_protocol_next;
wsi->on_same_vh_list = 1;
lws_vhost_unlock(wsi->vhost);

View file

@ -779,7 +779,7 @@ int lws_hpack_interpret(struct lws *wsi, unsigned char c)
struct allocated_headers *ah = wsi->ah;
unsigned int prev;
unsigned char c1;
int n, m;
int n, m, plen;
if (!h2n)
return -1;
@ -1138,7 +1138,9 @@ pre_data:
"Uppercase literal hpack hdr");
return 1;
}
if (!h2n->unknown_header && lws_parse(wsi, c1))
plen = 1;
if (!h2n->unknown_header &&
lws_parse(wsi, &c1, &plen))
h2n->unknown_header = 1;
}
swallow:
@ -1172,7 +1174,9 @@ swallow:
if (ah->parser_state == WSI_TOKEN_NAME_PART) {
/* h2 headers come without the colon */
n = lws_parse(wsi, ':');
c1 = ':';
plen = 1;
n = lws_parse(wsi, &c1, &plen);
(void)n;
}

View file

@ -551,6 +551,8 @@ lws_same_vh_protocol_insert(struct lws *wsi, int n)
wsi->same_vh_protocol_next->same_vh_protocol_prev =
&wsi->same_vh_protocol_next;
wsi->on_same_vh_list = 1;
lws_vhost_unlock(wsi->vhost);
}
@ -566,7 +568,7 @@ lws_same_vh_protocol_remove(struct lws *wsi)
*/
lwsl_info("%s: removing same prot wsi %p\n", __func__, wsi);
if (!wsi->vhost)
if (!wsi->vhost || !wsi->on_same_vh_list)
return;
lws_vhost_lock(wsi->vhost);
@ -589,6 +591,7 @@ lws_same_vh_protocol_remove(struct lws *wsi)
wsi->same_vh_protocol_prev = NULL;
wsi->same_vh_protocol_next = NULL;
wsi->on_same_vh_list = 0;
lws_vhost_unlock(wsi->vhost);
}

View file

@ -1950,6 +1950,7 @@ struct lws {
unsigned int seen_zero_length_recv:1;
unsigned int rxflow_will_be_applied:1;
unsigned int event_pipe:1;
unsigned int on_same_vh_list:1;
unsigned int could_have_pending:1; /* detect back-to-back writes */
@ -2073,7 +2074,7 @@ LWS_EXTERN int LWS_WARN_UNUSED_RESULT
lws_client_rx_sm(struct lws *wsi, unsigned char c);
LWS_EXTERN int LWS_WARN_UNUSED_RESULT
lws_parse(struct lws *wsi, unsigned char c);
lws_parse(struct lws *wsi, unsigned char *buf, int *len);
LWS_EXTERN int LWS_WARN_UNUSED_RESULT
lws_parse_urldecode(struct lws *wsi, uint8_t *_c);

View file

@ -21,45 +21,12 @@
#include "private-libwebsockets.h"
const unsigned char lextable[] = {
static const unsigned char lextable[] = {
#include "lextable.h"
};
#define FAIL_CHAR 0x08
int LWS_WARN_UNUSED_RESULT
lextable_decode(int pos, char c)
{
if (c >= 'A' && c <= 'Z')
c += 'a' - 'A';
while (1) {
if (lextable[pos] & (1 << 7)) { /* 1-byte, fail on mismatch */
if ((lextable[pos] & 0x7f) != c)
return -1;
/* fall thru */
pos++;
if (lextable[pos] == FAIL_CHAR)
return -1;
return pos;
}
if (lextable[pos] == FAIL_CHAR)
return -1;
/* b7 = 0, end or 3-byte */
if (lextable[pos] < FAIL_CHAR) /* terminal marker */
return pos;
if (lextable[pos] == c) /* goto */
return pos + (lextable[pos + 1]) +
(lextable[pos + 2] << 8);
/* fall thru goto */
pos += 3;
/* continue */
}
}
static struct allocated_headers *
_lws_create_ah(struct lws_context_per_thread *pt, ah_data_idx_t data_size)
{
@ -241,14 +208,14 @@ lws_header_table_attach(struct lws *wsi, int autoservice)
(void *)wsi, (void *)wsi->ah, wsi->tsi,
pt->ah_count_in_use);
lws_pt_lock(pt, __func__);
/* if we are already bound to one, just clear it down */
if (wsi->ah) {
lwsl_info("%s: cleardown\n", __func__);
goto reset;
}
lws_pt_lock(pt, __func__);
n = pt->ah_count_in_use == context->max_http_header_pool;
#if defined(LWS_WITH_PEER_LIMITS)
if (!n) {
@ -294,8 +261,6 @@ lws_header_table_attach(struct lws *wsi, int autoservice)
lwsl_info("%s: did attach wsi %p: ah %p: count %d (on exit)\n", __func__,
(void *)wsi, (void *)wsi->ah, pt->ah_count_in_use);
lws_pt_unlock(pt);
reset:
/* and reset the rx state */
@ -304,6 +269,8 @@ reset:
__lws_header_table_reset(wsi, autoservice);
lws_pt_unlock(pt);
#ifndef LWS_NO_CLIENT
if (wsi->state == LWSS_CLIENT_UNCONNECTED)
if (!lws_client_connect_via_info2(wsi))
@ -615,7 +582,7 @@ char *lws_hdr_simple_ptr(struct lws *wsi, enum lws_token_indexes h)
return wsi->ah->data + wsi->ah->frags[n].offset;
}
int LWS_WARN_UNUSED_RESULT
static int LWS_WARN_UNUSED_RESULT
lws_pos_in_bounds(struct lws *wsi)
{
if (wsi->ah->pos <
@ -915,244 +882,295 @@ static const unsigned char methods[] = {
WSI_TOKEN_HEAD_URI,
};
/*
* possible returns:, -1 fail, 0 ok or 2, transition to raw
*/
int LWS_WARN_UNUSED_RESULT
lws_parse(struct lws *wsi, unsigned char c)
lws_parse(struct lws *wsi, unsigned char *buf, int *len)
{
struct allocated_headers *ah = wsi->ah;
struct lws_context *context = wsi->context;
unsigned int n, m;
int r;
unsigned char c;
int r, pos;
assert(wsi->ah);
switch (ah->parser_state) {
default:
do {
(*len)--;
c = *buf++;
lwsl_parser("WSI_TOK_(%d) '%c'\n", ah->parser_state, c);
switch (ah->parser_state) {
default:
/* collect into malloc'd buffers */
/* optional initial space swallow */
if (!ah->frags[ah->frag_index[ah->parser_state]].len &&
c == ' ')
break;
lwsl_parser("WSI_TOK_(%d) '%c'\n", ah->parser_state, c);
for (m = 0; m < ARRAY_SIZE(methods); m++)
if (ah->parser_state == methods[m])
/* collect into malloc'd buffers */
/* optional initial space swallow */
if (!ah->frags[ah->frag_index[ah->parser_state]].len &&
c == ' ')
break;
if (m == ARRAY_SIZE(methods))
/* it was not any of the methods */
goto check_eol;
/* special URI processing... end at space */
for (m = 0; m < ARRAY_SIZE(methods); m++)
if (ah->parser_state == methods[m])
break;
if (m == ARRAY_SIZE(methods))
/* it was not any of the methods */
goto check_eol;
if (c == ' ') {
/* enforce starting with / */
if (!ah->frags[ah->nfrag].len)
if (issue_char(wsi, '/') < 0)
return -1;
/* special URI processing... end at space */
if (ah->ups == URIPS_SEEN_SLASH_DOT_DOT) {
/*
* back up one dir level if possible
* safe against header fragmentation because
* the method URI can only be in 1 fragment
*/
if (ah->frags[ah->nfrag].len > 2) {
ah->pos--;
ah->frags[ah->nfrag].len--;
do {
if (c == ' ') {
/* enforce starting with / */
if (!ah->frags[ah->nfrag].len)
if (issue_char(wsi, '/') < 0)
return -1;
if (ah->ups == URIPS_SEEN_SLASH_DOT_DOT) {
/*
* back up one dir level if possible
* safe against header fragmentation because
* the method URI can only be in 1 fragment
*/
if (ah->frags[ah->nfrag].len > 2) {
ah->pos--;
ah->frags[ah->nfrag].len--;
} while (ah->frags[ah->nfrag].len > 1 &&
ah->data[ah->pos] != '/');
do {
ah->pos--;
ah->frags[ah->nfrag].len--;
} while (ah->frags[ah->nfrag].len > 1 &&
ah->data[ah->pos] != '/');
}
}
/* begin parsing HTTP version: */
if (issue_char(wsi, '\0') < 0)
return -1;
ah->parser_state = WSI_TOKEN_HTTP;
goto start_fragment;
}
/* begin parsing HTTP version: */
if (issue_char(wsi, '\0') < 0)
return -1;
ah->parser_state = WSI_TOKEN_HTTP;
goto start_fragment;
}
r = lws_parse_urldecode(wsi, &c);
switch (r) {
case LPUR_CONTINUE:
break;
case LPUR_SWALLOW:
goto swallow;
case LPUR_FORBID:
goto forbid;
case LPUR_EXCESSIVE:
goto excessive;
default:
return -1;
}
check_eol:
/* bail at EOL */
if (ah->parser_state != WSI_TOKEN_CHALLENGE &&
c == '\x0d') {
if (ah->ues != URIES_IDLE)
r = lws_parse_urldecode(wsi, &c);
switch (r) {
case LPUR_CONTINUE:
break;
case LPUR_SWALLOW:
goto swallow;
case LPUR_FORBID:
goto forbid;
case LPUR_EXCESSIVE:
goto excessive;
default:
return -1;
}
check_eol:
/* bail at EOL */
if (ah->parser_state != WSI_TOKEN_CHALLENGE &&
c == '\x0d') {
if (ah->ues != URIES_IDLE)
goto forbid;
c = '\0';
ah->parser_state = WSI_TOKEN_SKIPPING_SAW_CR;
lwsl_parser("*\n");
}
c = '\0';
ah->parser_state = WSI_TOKEN_SKIPPING_SAW_CR;
lwsl_parser("*\n");
}
n = issue_char(wsi, c);
if ((int)n < 0)
return -1;
if (n > 0)
ah->parser_state = WSI_TOKEN_SKIPPING;
n = issue_char(wsi, c);
if ((int)n < 0)
return -1;
if (n > 0)
ah->parser_state = WSI_TOKEN_SKIPPING;
swallow:
/* per-protocol end of headers management */
if (ah->parser_state == WSI_TOKEN_CHALLENGE)
goto set_parsing_complete;
break;
/* collecting and checking a name part */
case WSI_TOKEN_NAME_PART:
lwsl_parser("WSI_TOKEN_NAME_PART '%c' 0x%02X (mode=%d) "
"wsi->lextable_pos=%d\n", c, c, wsi->mode,
ah->lextable_pos);
ah->lextable_pos = lextable_decode(ah->lextable_pos, c);
/*
* Server needs to look out for unknown methods...
*/
if (ah->lextable_pos < 0 &&
(wsi->mode == LWSCM_HTTP_SERVING)) {
/* this is not a header we know about */
for (m = 0; m < ARRAY_SIZE(methods); m++)
if (ah->frag_index[methods[m]]) {
/*
* already had the method, no idea what
* this crap from the client is, ignore
*/
ah->parser_state = WSI_TOKEN_SKIPPING;
break;
}
/*
* hm it's an unknown http method from a client in fact,
* it cannot be valid http
*/
if (m == ARRAY_SIZE(methods)) {
/*
* are we set up to accept raw in these cases?
*/
if (lws_check_opt(wsi->vhost->options,
LWS_SERVER_OPTION_FALLBACK_TO_RAW))
return 2; /* transition to raw */
lwsl_info("Unknown method - dropping\n");
goto forbid;
}
break;
}
/*
* ...otherwise for a client, let him ignore unknown headers
* coming from the server
*/
if (ah->lextable_pos < 0) {
ah->parser_state = WSI_TOKEN_SKIPPING;
break;
}
if (lextable[ah->lextable_pos] < FAIL_CHAR) {
/* terminal state */
n = ((unsigned int)lextable[ah->lextable_pos] << 8) |
lextable[ah->lextable_pos + 1];
lwsl_parser("known hdr %d\n", n);
for (m = 0; m < ARRAY_SIZE(methods); m++)
if (n == methods[m] &&
ah->frag_index[methods[m]]) {
lwsl_warn("Duplicated method\n");
return -1;
}
/*
* WSORIGIN is protocol equiv to ORIGIN,
* JWebSocket likes to send it, map to ORIGIN
*/
if (n == WSI_TOKEN_SWORIGIN)
n = WSI_TOKEN_ORIGIN;
ah->parser_state = (enum lws_token_indexes)
(WSI_TOKEN_GET_URI + n);
ah->ups = URIPS_IDLE;
if (context->token_limits)
ah->current_token_limit = context->
token_limits->token_limit[
ah->parser_state];
else
ah->current_token_limit =
wsi->context->max_http_header_data;
/* per-protocol end of headers management */
if (ah->parser_state == WSI_TOKEN_CHALLENGE)
goto set_parsing_complete;
break;
goto start_fragment;
}
break;
/* collecting and checking a name part */
case WSI_TOKEN_NAME_PART:
lwsl_parser("WSI_TOKEN_NAME_PART '%c' 0x%02X (mode=%d) "
"wsi->lextable_pos=%d\n", c, c, wsi->mode,
ah->lextable_pos);
if (c >= 'A' && c <= 'Z')
c += 'a' - 'A';
pos = ah->lextable_pos;
while (1) {
if (lextable[pos] & (1 << 7)) { /* 1-byte, fail on mismatch */
if ((lextable[pos] & 0x7f) != c) {
nope:
ah->lextable_pos = -1;
break;
}
/* fall thru */
pos++;
if (lextable[pos] == FAIL_CHAR)
goto nope;
ah->lextable_pos = pos;
break;
}
if (lextable[pos] == FAIL_CHAR)
goto nope;
/* b7 = 0, end or 3-byte */
if (lextable[pos] < FAIL_CHAR) { /* terminal marker */
ah->lextable_pos = pos;
break;
}
if (lextable[pos] == c) { /* goto */
ah->lextable_pos = pos + (lextable[pos + 1]) +
(lextable[pos + 2] << 8);
break;
}
/* fall thru goto */
pos += 3;
/* continue */
}
/*
* Server needs to look out for unknown methods...
*/
if (ah->lextable_pos < 0 &&
(wsi->mode == LWSCM_HTTP_SERVING)) {
/* this is not a header we know about */
for (m = 0; m < ARRAY_SIZE(methods); m++)
if (ah->frag_index[methods[m]]) {
/*
* already had the method, no idea what
* this crap from the client is, ignore
*/
ah->parser_state = WSI_TOKEN_SKIPPING;
break;
}
/*
* hm it's an unknown http method from a client in fact,
* it cannot be valid http
*/
if (m == ARRAY_SIZE(methods)) {
/*
* are we set up to accept raw in these cases?
*/
if (lws_check_opt(wsi->vhost->options,
LWS_SERVER_OPTION_FALLBACK_TO_RAW))
return 2; /* transition to raw */
lwsl_info("Unknown method - dropping\n");
goto forbid;
}
break;
}
/*
* ...otherwise for a client, let him ignore unknown headers
* coming from the server
*/
if (ah->lextable_pos < 0) {
ah->parser_state = WSI_TOKEN_SKIPPING;
break;
}
if (lextable[ah->lextable_pos] < FAIL_CHAR) {
/* terminal state */
n = ((unsigned int)lextable[ah->lextable_pos] << 8) |
lextable[ah->lextable_pos + 1];
lwsl_parser("known hdr %d\n", n);
for (m = 0; m < ARRAY_SIZE(methods); m++)
if (n == methods[m] &&
ah->frag_index[methods[m]]) {
lwsl_warn("Duplicated method\n");
return -1;
}
/*
* WSORIGIN is protocol equiv to ORIGIN,
* JWebSocket likes to send it, map to ORIGIN
*/
if (n == WSI_TOKEN_SWORIGIN)
n = WSI_TOKEN_ORIGIN;
ah->parser_state = (enum lws_token_indexes)
(WSI_TOKEN_GET_URI + n);
ah->ups = URIPS_IDLE;
if (context->token_limits)
ah->current_token_limit = context->
token_limits->token_limit[
ah->parser_state];
else
ah->current_token_limit =
wsi->context->max_http_header_data;
if (ah->parser_state == WSI_TOKEN_CHALLENGE)
goto set_parsing_complete;
goto start_fragment;
}
break;
start_fragment:
ah->nfrag++;
ah->nfrag++;
excessive:
if (ah->nfrag == ARRAY_SIZE(ah->frags)) {
lwsl_warn("More hdr frags than we can deal with\n");
return -1;
}
if (ah->nfrag == ARRAY_SIZE(ah->frags)) {
lwsl_warn("More hdr frags than we can deal with\n");
return -1;
}
ah->frags[ah->nfrag].offset = ah->pos;
ah->frags[ah->nfrag].len = 0;
ah->frags[ah->nfrag].nfrag = 0;
ah->frags[ah->nfrag].flags = 2;
ah->frags[ah->nfrag].offset = ah->pos;
ah->frags[ah->nfrag].len = 0;
ah->frags[ah->nfrag].nfrag = 0;
ah->frags[ah->nfrag].flags = 2;
n = ah->frag_index[ah->parser_state];
if (!n) { /* first fragment */
ah->frag_index[ah->parser_state] = ah->nfrag;
ah->hdr_token_idx = ah->parser_state;
n = ah->frag_index[ah->parser_state];
if (!n) { /* first fragment */
ah->frag_index[ah->parser_state] = ah->nfrag;
ah->hdr_token_idx = ah->parser_state;
break;
}
/* continuation */
while (ah->frags[n].nfrag)
n = ah->frags[n].nfrag;
ah->frags[n].nfrag = ah->nfrag;
if (issue_char(wsi, ' ') < 0)
return -1;
break;
/* skipping arg part of a name we didn't recognize */
case WSI_TOKEN_SKIPPING:
lwsl_parser("WSI_TOKEN_SKIPPING '%c'\n", c);
if (c == '\x0d')
ah->parser_state = WSI_TOKEN_SKIPPING_SAW_CR;
break;
case WSI_TOKEN_SKIPPING_SAW_CR:
lwsl_parser("WSI_TOKEN_SKIPPING_SAW_CR '%c'\n", c);
if (ah->ues != URIES_IDLE)
goto forbid;
if (c == '\x0a') {
ah->parser_state = WSI_TOKEN_NAME_PART;
ah->lextable_pos = 0;
} else
ah->parser_state = WSI_TOKEN_SKIPPING;
break;
/* we're done, ignore anything else */
case WSI_PARSING_COMPLETE:
lwsl_parser("WSI_PARSING_COMPLETE '%c'\n", c);
break;
}
/* continuation */
while (ah->frags[n].nfrag)
n = ah->frags[n].nfrag;
ah->frags[n].nfrag = ah->nfrag;
if (issue_char(wsi, ' ') < 0)
return -1;
break;
/* skipping arg part of a name we didn't recognize */
case WSI_TOKEN_SKIPPING:
lwsl_parser("WSI_TOKEN_SKIPPING '%c'\n", c);
if (c == '\x0d')
ah->parser_state = WSI_TOKEN_SKIPPING_SAW_CR;
break;
case WSI_TOKEN_SKIPPING_SAW_CR:
lwsl_parser("WSI_TOKEN_SKIPPING_SAW_CR '%c'\n", c);
if (ah->ues != URIES_IDLE)
goto forbid;
if (c == '\x0a') {
ah->parser_state = WSI_TOKEN_NAME_PART;
ah->lextable_pos = 0;
} else
ah->parser_state = WSI_TOKEN_SKIPPING;
break;
/* we're done, ignore anything else */
case WSI_PARSING_COMPLETE:
lwsl_parser("WSI_PARSING_COMPLETE '%c'\n", c);
break;
}
} while (*len);
return 0;

View file

@ -1320,7 +1320,7 @@ lws_handshake_server(struct lws *wsi, unsigned char **buf, size_t len)
{
struct lws_context *context = lws_get_context(wsi);
struct lws_context_per_thread *pt = &context->pt[(int)wsi->tsi];
int protocol_len, n = 0, hit, non_space_char_found = 0, m;
int protocol_len, n = 0, hit, non_space_char_found = 0, m, i;
unsigned char *obuf = *buf;
char protocol_list[128];
char protocol_name[64];
@ -1337,7 +1337,7 @@ lws_handshake_server(struct lws *wsi, unsigned char **buf, size_t len)
assert(0);
}
while (len--) {
while (len) {
if (wsi->mode != LWSCM_HTTP_SERVING &&
wsi->mode != LWSCM_HTTP2_SERVING &&
wsi->mode != LWSCM_HTTP_SERVING_ACCEPTED) {
@ -1345,7 +1345,9 @@ lws_handshake_server(struct lws *wsi, unsigned char **buf, size_t len)
goto bail_nuke_ah;
}
m = lws_parse(wsi, *(*buf)++);
i = (int)len;
m = lws_parse(wsi, *buf, &i);
(*buf) += (int)len - i;
if (m) {
if (m == 2) {
/*