mirror of
https://github.com/warmcat/libwebsockets.git
synced 2025-03-23 00:00:06 +01:00

Lws now strips out http headers releated to h2, ws and unusual headers based on cmake config settings for those features... it saves some heap for the ah and reduces the table size in .rodata. It's possible code might have some external dependency on the original header indexes, but, eg, you don't enable h2 so those indexes are optimized with the h2 ones taken out. This introduces a cmake option "LWS_HTTP_HEADERS_ALL", default-OFF, that defeats the header table optimization for compatibility with older versions in the case the client software can't be adapted to use the lws-exported matching header enums. You probably don't need this.
500 lines
10 KiB
C
500 lines
10 KiB
C
/*
|
|
* minilex.c
|
|
*
|
|
* High efficiency lexical state parser
|
|
*
|
|
* Copyright (C)2011-2020 Andy Green <andy@warmcat.com>
|
|
*
|
|
* Licensed under MIT
|
|
*
|
|
* Usage: gcc minilex.c -o minilex && ./minilex > lextable.h
|
|
*
|
|
* Run it twice to test parsing on the generated table on stderr
|
|
*
|
|
* Whoo this got a bit complicated by lws-buildtime deselection of some
|
|
* headers optionally. There are 3 x vars, UNCOMMON, WS, H2 so we make
|
|
* eight copies of the lextable selected by the appropriate #if defined()
|
|
*/
|
|
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
/* get all the strings */
|
|
|
|
#define LWS_ROLE_WS 1
|
|
#define LWS_WITH_HTTP_UNCOMMON_HEADERS 1
|
|
#define LWS_ROLE_H2 1
|
|
|
|
#include "lextable-strings.h"
|
|
|
|
#undef LWS_ROLE_WS
|
|
#undef LWS_WITH_HTTP_UNCOMMON_HEADERS
|
|
#undef LWS_ROLE_H2
|
|
|
|
/* bitfield for the 8 versions as to which strings exist... index layout
|
|
*
|
|
* b0 b1 b2
|
|
* 0 =
|
|
* 1 = uncommon
|
|
* 2 = ws
|
|
* 3 = uncommon ws
|
|
* 4 = h2
|
|
* 5 = uncommon h2
|
|
* 6 = ws h2
|
|
* 7 = uncommon ws h2
|
|
*/
|
|
|
|
unsigned char filter_array[] = {
|
|
0xff, /* get */
|
|
0xff, /* post */
|
|
0xaa, /* options */
|
|
0xff, /* host */
|
|
0xff, /* connection */
|
|
0xff, /* upgrade */
|
|
0xff, /* origin */
|
|
0xcc, /* sec-ws-draft */
|
|
0xff, /* crlf */
|
|
0xcc, /* sec-ws-ext */
|
|
0xcc, /* sec-ws-key1 */
|
|
0xcc, /* sec-ws-key2 */
|
|
0xcc, /* sec-ws-protocol */
|
|
0xcc, /* sec-ws-accept */
|
|
0xcc, /* sec-ws-nonce */
|
|
0xff, /* http/1.1 */
|
|
0xf0, /* http2-settings */
|
|
0xff, /* accept */
|
|
0xaa, /* access-control-req-hdrs */
|
|
0xff, /* if-modified-since */
|
|
0xff, /* if-none-match */
|
|
0xff, /* accept-encoding */
|
|
0xff, /* accept-language */
|
|
0xff, /* pragma */
|
|
0xff, /* cache-control */
|
|
0xff, /* authorization */
|
|
0xff, /* cookie */
|
|
0xff, /* content-length */
|
|
0xff, /* content-type */
|
|
0xff, /* date */
|
|
0xff, /* range */
|
|
0xfa, /* referer */
|
|
0xcc, /* sec-ws-key */
|
|
0xcc, /* sec-ws-version */
|
|
0xcc, /* sec-sc-origin */
|
|
0xf0, /* authority */
|
|
0xf0, /* method */
|
|
0xf0, /* path */
|
|
0xf0, /* scheme */
|
|
0xf0, /* status */
|
|
0xfa, /* accept-charset */
|
|
0xff, /* accept-ranges */
|
|
0xfa, /* access-control-allow-origin */
|
|
0xff, /* age */
|
|
0xff, /* allow */
|
|
0xff, /* content-disposition */
|
|
0xff, /* content-encoding */
|
|
0xff, /* content-language */
|
|
0xff, /* content-location */
|
|
0xff, /* content-range */
|
|
0xff, /* etag */
|
|
0xff, /* expect */
|
|
0xff, /* expires */
|
|
0xff, /* from */
|
|
0xff, /* if-match */
|
|
0xff, /* if-range */
|
|
0xff, /* if-unmodified-since */
|
|
0xff, /* last-modified */
|
|
0xff, /* link */
|
|
0xff, /* location */
|
|
0xfa, /* max-forwards */
|
|
0xfa, /* proxy-authenticate */
|
|
0xfa, /* proxy-authorization */
|
|
0xff, /* refresh */
|
|
0xff, /* retry-after */
|
|
0xff, /* server */
|
|
0xff, /* set-cookie */
|
|
0xfa, /* strict-transport-security */
|
|
0xff, /* transfer-encoding */
|
|
0xfa, /* user-agent */
|
|
0xfa, /* vary */
|
|
0xfa, /* via */
|
|
0xfa, /* www-authenticate */
|
|
0xaa, /* patch */
|
|
0xaa, /* put */
|
|
0xaa, /* delete */
|
|
0xff, /* uri-args */
|
|
0xaa, /* proxy */
|
|
0xaa, /* x-real-ip */
|
|
0xff, /* http/1.0 */
|
|
0xff, /* x-forwarded-for */
|
|
0xff, /* connect */
|
|
0xff, /* head */
|
|
0xfa, /* te */
|
|
0xfa, /* replay-nonce */
|
|
0xf0, /* protocol */
|
|
0xff, /* x-auth-token */
|
|
0xff /* not matchable */
|
|
};
|
|
|
|
static unsigned char lws_header_implies_psuedoheader_map[] = {
|
|
0x07, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x00, 0x00, 0x00 /* <-64 */,
|
|
0x0e /* <- 72 */, 0x04 /* <- 80 */, 0, 0, 0, 0
|
|
};
|
|
|
|
/*
|
|
* b7 = 0 = 1-byte seq
|
|
* 0x08 = fail
|
|
* 2-byte seq
|
|
* 0x00 - 0x07, then terminal as given in 2nd byte
|
|
3-byte seq
|
|
* no match: go fwd 3 byte, match: jump fwd by amt in +1/+2 bytes
|
|
* = 1 = 1-byte seq
|
|
* no match: die, match go fwd 1 byte
|
|
*/
|
|
|
|
unsigned char lextable[][2000] = {
|
|
{
|
|
#include "lextable.h"
|
|
},
|
|
#define LWS_WITH_HTTP_UNCOMMON_HEADERS
|
|
{
|
|
#include "lextable.h"
|
|
},
|
|
#undef LWS_WITH_HTTP_UNCOMMON_HEADERS
|
|
#define LWS_ROLE_WS 1
|
|
{
|
|
#include "lextable.h"
|
|
},
|
|
#define LWS_WITH_HTTP_UNCOMMON_HEADERS
|
|
{
|
|
#include "lextable.h"
|
|
},
|
|
#undef LWS_ROLE_WS
|
|
#undef LWS_WITH_HTTP_UNCOMMON_HEADERS
|
|
#define LWS_ROLE_H2 1
|
|
{
|
|
#include "lextable.h"
|
|
},
|
|
#define LWS_WITH_HTTP_UNCOMMON_HEADERS
|
|
{
|
|
#include "lextable.h"
|
|
},
|
|
#undef LWS_WITH_HTTP_UNCOMMON_HEADERS
|
|
#define LWS_ROLE_WS 1
|
|
{
|
|
#include "lextable.h"
|
|
},
|
|
#define LWS_WITH_HTTP_UNCOMMON_HEADERS 1
|
|
{
|
|
#include "lextable.h"
|
|
},
|
|
};
|
|
|
|
#define PARALLEL 30
|
|
|
|
struct state {
|
|
char c[PARALLEL];
|
|
int state[PARALLEL];
|
|
int count;
|
|
int bytepos;
|
|
|
|
int real_pos;
|
|
};
|
|
|
|
static unsigned char pseudomap[8][16];
|
|
|
|
struct state state[1000];
|
|
int next = 1;
|
|
|
|
#define FAIL_CHAR 0x08
|
|
|
|
int lextable_decode(int version, int pos, char c)
|
|
{
|
|
while (1) {
|
|
if (lextable[version][pos] & (1 << 7)) { /* 1-byte, fail on mismatch */
|
|
if ((lextable[version][pos] & 0x7f) != c)
|
|
return -1;
|
|
/* fall thru */
|
|
pos++;
|
|
if (lextable[version][pos] == FAIL_CHAR)
|
|
return -1;
|
|
return pos;
|
|
} else { /* b7 = 0, end or 3-byte */
|
|
if (lextable[version][pos] < FAIL_CHAR) /* terminal marker */
|
|
return pos;
|
|
|
|
if (lextable[version][pos] == c) /* goto */
|
|
return pos + (lextable[version][pos + 1]) +
|
|
(lextable[version][pos + 2] << 8);
|
|
/* fall thru goto */
|
|
pos += 3;
|
|
/* continue */
|
|
}
|
|
}
|
|
}
|
|
|
|
int issue(int version)
|
|
{
|
|
const char *rset[200];
|
|
int n = 0;
|
|
int m;
|
|
int prev;
|
|
int walk;
|
|
int saw;
|
|
int y;
|
|
int j;
|
|
int pos = 0;
|
|
|
|
int setmembers = 0;
|
|
|
|
memset(rset, 0, sizeof(rset));
|
|
|
|
if (version == 7)
|
|
printf("#if defined(LWS_HTTP_HEADERS_ALL) || (%cdefined(LWS_WITH_HTTP_UNCOMMON_HEADERS) && "
|
|
"%cdefined(LWS_ROLE_WS) && "
|
|
"%cdefined(LWS_ROLE_H2))\n", version & 1 ? ' ' : '!',
|
|
version & 2 ? ' ' : '!', version & 4 ? ' ' : '!');
|
|
else
|
|
printf("#if !defined(LWS_HTTP_HEADERS_ALL) && %cdefined(LWS_WITH_HTTP_UNCOMMON_HEADERS) && "
|
|
"%cdefined(LWS_ROLE_WS) && "
|
|
"%cdefined(LWS_ROLE_H2)\n", version & 1 ? ' ' : '!',
|
|
version & 2 ? ' ' : '!', version & 4 ? ' ' : '!');
|
|
|
|
/*
|
|
* let's create version's view of the set of strings
|
|
*/
|
|
|
|
for (n = 0; n < sizeof(set) / sizeof(set[0]); n++)
|
|
if (filter_array[n] & (1 << version)) {
|
|
printf("\t/* %d: %d: %s */\n", setmembers, n, set[n]);
|
|
if (lws_header_implies_psuedoheader_map[n >> 3] & (1 << (n & 7)))
|
|
pseudomap[version][(setmembers >> 3)] |= 1 << (setmembers & 7);
|
|
rset[setmembers++] = set[n];
|
|
}
|
|
|
|
n = 0;
|
|
while (n < setmembers) {
|
|
|
|
m = 0;
|
|
walk = 0;
|
|
prev = 0;
|
|
|
|
if (rset[n][0] == '\0') {
|
|
n++;
|
|
continue;
|
|
}
|
|
|
|
while (rset[n][m]) {
|
|
|
|
saw = 0;
|
|
for (y = 0; y < state[walk].count; y++)
|
|
if (state[walk].c[y] == rset[n][m]) {
|
|
/* exists -- go forward */
|
|
walk = state[walk].state[y];
|
|
saw = 1;
|
|
break;
|
|
}
|
|
|
|
if (saw)
|
|
goto again;
|
|
|
|
/* something we didn't see before */
|
|
|
|
state[walk].c[state[walk].count] = rset[n][m];
|
|
|
|
state[walk].state[state[walk].count] = next;
|
|
state[walk].count++;
|
|
walk = next++;
|
|
again:
|
|
m++;
|
|
}
|
|
|
|
state[walk].c[0] = n++;
|
|
state[walk].state[0] = 0; /* terminal marker */
|
|
state[walk].count = 1;
|
|
}
|
|
|
|
walk = 0;
|
|
for (n = 0; n < next; n++) {
|
|
state[n].bytepos = walk;
|
|
walk += (2 * state[n].count);
|
|
}
|
|
|
|
/* compute everyone's position first */
|
|
|
|
pos = 0;
|
|
walk = 0;
|
|
for (n = 0; n < next; n++) {
|
|
|
|
state[n].real_pos = pos;
|
|
|
|
for (m = 0; m < state[n].count; m++) {
|
|
|
|
if (state[n].state[m] == 0)
|
|
pos += 2; /* terminal marker */
|
|
else { /* c is a character */
|
|
if ((state[state[n].state[m]].bytepos -
|
|
walk) == 2)
|
|
pos++;
|
|
else {
|
|
pos += 3;
|
|
if (m == state[n].count - 1)
|
|
pos++; /* fail */
|
|
}
|
|
}
|
|
walk += 2;
|
|
}
|
|
}
|
|
|
|
walk = 0;
|
|
pos = 0;
|
|
for (n = 0; n < next; n++) {
|
|
for (m = 0; m < state[n].count; m++) {
|
|
|
|
if (!m)
|
|
fprintf(stdout, "/* pos %04x: %3d */ ",
|
|
state[n].real_pos, n);
|
|
else
|
|
fprintf(stdout, " ");
|
|
|
|
y = state[n].c[m];
|
|
saw = state[n].state[m];
|
|
|
|
if (saw == 0) { // c is a terminal then
|
|
|
|
if (y > 0x7ff) {
|
|
fprintf(stderr, "terminal too big\n");
|
|
return 2;
|
|
}
|
|
|
|
fprintf(stdout, " 0x%02X, 0x%02X "
|
|
" "
|
|
"/* - terminal marker %2d - */,\n",
|
|
y >> 8, y & 0xff, y & 0x7f);
|
|
pos += 2;
|
|
walk += 2;
|
|
continue;
|
|
}
|
|
|
|
/* c is a character */
|
|
|
|
prev = y &0x7f;
|
|
if (prev < 32 || prev > 126)
|
|
prev = '.';
|
|
|
|
|
|
if ((state[saw].bytepos - walk) == 2) {
|
|
fprintf(stdout, " 0x%02X /* '%c' -> */,\n",
|
|
y | 0x80, prev);
|
|
pos++;
|
|
walk += 2;
|
|
continue;
|
|
}
|
|
|
|
j = state[saw].real_pos - pos;
|
|
|
|
if (j > 0xffff) {
|
|
fprintf(stderr,
|
|
"Jump > 64K bytes ahead (%d to %d)\n",
|
|
state[n].real_pos, state[saw].real_pos);
|
|
return 1;
|
|
}
|
|
fprintf(stdout, " 0x%02X /* '%c' */, 0x%02X, 0x%02X "
|
|
"/* (to 0x%04X state %3d) */,\n",
|
|
y, prev,
|
|
j & 0xff, j >> 8,
|
|
state[saw].real_pos, saw);
|
|
pos += 3;
|
|
|
|
if (m == state[n].count - 1) {
|
|
fprintf(stdout,
|
|
" 0x%02X, /* fail */\n",
|
|
FAIL_CHAR);
|
|
pos++; /* fail */
|
|
}
|
|
|
|
walk += 2;
|
|
}
|
|
}
|
|
|
|
fprintf(stdout, "/* total size %d bytes */\n", pos);
|
|
|
|
printf("#endif\n\n");
|
|
|
|
/*
|
|
* Try to parse every legal input string
|
|
*/
|
|
|
|
for (n = 0; n < setmembers; n++) {
|
|
walk = 0;
|
|
m = 0;
|
|
y = -1;
|
|
|
|
if (rset[n][0] == '\0')
|
|
continue;
|
|
|
|
fprintf(stderr, " trying %d '%s'\n", n, rset[n]);
|
|
|
|
while (rset[n][m]) {
|
|
walk = lextable_decode(version, walk, rset[n][m]);
|
|
if (walk < 0) {
|
|
fprintf(stderr, "failed\n");
|
|
return 3;
|
|
}
|
|
|
|
if (lextable[version][walk] < FAIL_CHAR) {
|
|
y = (lextable[version][walk] << 8) +
|
|
lextable[version][walk + 1];
|
|
break;
|
|
}
|
|
m++;
|
|
}
|
|
|
|
if (y != n) {
|
|
fprintf(stderr, "decode failed %d\n", y);
|
|
return 4;
|
|
}
|
|
}
|
|
|
|
fprintf(stderr, "All decode OK\n");
|
|
|
|
return 0;
|
|
}
|
|
|
|
int main(void)
|
|
{
|
|
int m, n;
|
|
|
|
for (n = 0; n < 8; n++) {
|
|
issue(n);
|
|
}
|
|
|
|
printf("\n/*\n");
|
|
|
|
for (n = 0; n < 8; n++) {
|
|
|
|
if (n == 7)
|
|
printf("#if defined(LWS_HTTP_HEADERS_ALL) || (%cdefined(LWS_WITH_HTTP_UNCOMMON_HEADERS) && "
|
|
"%cdefined(LWS_ROLE_WS) && "
|
|
"%cdefined(LWS_ROLE_H2))\n", n & 1 ? ' ' : '!',
|
|
n & 2 ? ' ' : '!', n & 4 ? ' ' : '!');
|
|
else
|
|
printf("#if !defined(LWS_HTTP_HEADERS_ALL) && %cdefined(LWS_WITH_HTTP_UNCOMMON_HEADERS) && "
|
|
"%cdefined(LWS_ROLE_WS) && "
|
|
"%cdefined(LWS_ROLE_H2)\n", n & 1 ? ' ' : '!',
|
|
n & 2 ? ' ' : '!', n & 4 ? ' ' : '!');
|
|
|
|
printf("static uint8_t lws_header_implies_psuedoheader_map[] = {\n\t");
|
|
|
|
for (m = 0; m < sizeof(pseudomap[n]); m++)
|
|
printf("0x%02x,", pseudomap[n][m]);
|
|
|
|
printf("\n};\n");
|
|
|
|
printf("#endif\n");
|
|
}
|
|
|
|
printf("*/\n");
|
|
|
|
fprintf(stderr, "did all the variants\n");
|
|
}
|