1
0
Fork 0
mirror of https://github.com/warmcat/libwebsockets.git synced 2025-03-23 00:00:06 +01:00
libwebsockets/lib/roles/http/minilex.c
Andy Green 99e778ac08 http: add cmake option for header reduction defeat
Lws now strips out http headers releated to h2, ws and unusual headers
based on cmake config settings for those features... it saves some heap
for the ah and reduces the table size in .rodata.

It's possible code might have some external dependency on the original
header indexes, but, eg, you don't enable h2 so those indexes are
optimized with the h2 ones taken out.

This introduces a cmake option "LWS_HTTP_HEADERS_ALL", default-OFF, that
defeats the header table optimization for compatibility with older
versions in the case the client software can't be adapted to use the
lws-exported matching header enums.

You probably don't need this.
2020-04-23 17:08:57 +01:00

500 lines
10 KiB
C

/*
* minilex.c
*
* High efficiency lexical state parser
*
* Copyright (C)2011-2020 Andy Green <andy@warmcat.com>
*
* Licensed under MIT
*
* Usage: gcc minilex.c -o minilex && ./minilex > lextable.h
*
* Run it twice to test parsing on the generated table on stderr
*
* Whoo this got a bit complicated by lws-buildtime deselection of some
* headers optionally. There are 3 x vars, UNCOMMON, WS, H2 so we make
* eight copies of the lextable selected by the appropriate #if defined()
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/* get all the strings */
#define LWS_ROLE_WS 1
#define LWS_WITH_HTTP_UNCOMMON_HEADERS 1
#define LWS_ROLE_H2 1
#include "lextable-strings.h"
#undef LWS_ROLE_WS
#undef LWS_WITH_HTTP_UNCOMMON_HEADERS
#undef LWS_ROLE_H2
/* bitfield for the 8 versions as to which strings exist... index layout
*
* b0 b1 b2
* 0 =
* 1 = uncommon
* 2 = ws
* 3 = uncommon ws
* 4 = h2
* 5 = uncommon h2
* 6 = ws h2
* 7 = uncommon ws h2
*/
unsigned char filter_array[] = {
0xff, /* get */
0xff, /* post */
0xaa, /* options */
0xff, /* host */
0xff, /* connection */
0xff, /* upgrade */
0xff, /* origin */
0xcc, /* sec-ws-draft */
0xff, /* crlf */
0xcc, /* sec-ws-ext */
0xcc, /* sec-ws-key1 */
0xcc, /* sec-ws-key2 */
0xcc, /* sec-ws-protocol */
0xcc, /* sec-ws-accept */
0xcc, /* sec-ws-nonce */
0xff, /* http/1.1 */
0xf0, /* http2-settings */
0xff, /* accept */
0xaa, /* access-control-req-hdrs */
0xff, /* if-modified-since */
0xff, /* if-none-match */
0xff, /* accept-encoding */
0xff, /* accept-language */
0xff, /* pragma */
0xff, /* cache-control */
0xff, /* authorization */
0xff, /* cookie */
0xff, /* content-length */
0xff, /* content-type */
0xff, /* date */
0xff, /* range */
0xfa, /* referer */
0xcc, /* sec-ws-key */
0xcc, /* sec-ws-version */
0xcc, /* sec-sc-origin */
0xf0, /* authority */
0xf0, /* method */
0xf0, /* path */
0xf0, /* scheme */
0xf0, /* status */
0xfa, /* accept-charset */
0xff, /* accept-ranges */
0xfa, /* access-control-allow-origin */
0xff, /* age */
0xff, /* allow */
0xff, /* content-disposition */
0xff, /* content-encoding */
0xff, /* content-language */
0xff, /* content-location */
0xff, /* content-range */
0xff, /* etag */
0xff, /* expect */
0xff, /* expires */
0xff, /* from */
0xff, /* if-match */
0xff, /* if-range */
0xff, /* if-unmodified-since */
0xff, /* last-modified */
0xff, /* link */
0xff, /* location */
0xfa, /* max-forwards */
0xfa, /* proxy-authenticate */
0xfa, /* proxy-authorization */
0xff, /* refresh */
0xff, /* retry-after */
0xff, /* server */
0xff, /* set-cookie */
0xfa, /* strict-transport-security */
0xff, /* transfer-encoding */
0xfa, /* user-agent */
0xfa, /* vary */
0xfa, /* via */
0xfa, /* www-authenticate */
0xaa, /* patch */
0xaa, /* put */
0xaa, /* delete */
0xff, /* uri-args */
0xaa, /* proxy */
0xaa, /* x-real-ip */
0xff, /* http/1.0 */
0xff, /* x-forwarded-for */
0xff, /* connect */
0xff, /* head */
0xfa, /* te */
0xfa, /* replay-nonce */
0xf0, /* protocol */
0xff, /* x-auth-token */
0xff /* not matchable */
};
static unsigned char lws_header_implies_psuedoheader_map[] = {
0x07, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x00, 0x00, 0x00 /* <-64 */,
0x0e /* <- 72 */, 0x04 /* <- 80 */, 0, 0, 0, 0
};
/*
* b7 = 0 = 1-byte seq
* 0x08 = fail
* 2-byte seq
* 0x00 - 0x07, then terminal as given in 2nd byte
3-byte seq
* no match: go fwd 3 byte, match: jump fwd by amt in +1/+2 bytes
* = 1 = 1-byte seq
* no match: die, match go fwd 1 byte
*/
unsigned char lextable[][2000] = {
{
#include "lextable.h"
},
#define LWS_WITH_HTTP_UNCOMMON_HEADERS
{
#include "lextable.h"
},
#undef LWS_WITH_HTTP_UNCOMMON_HEADERS
#define LWS_ROLE_WS 1
{
#include "lextable.h"
},
#define LWS_WITH_HTTP_UNCOMMON_HEADERS
{
#include "lextable.h"
},
#undef LWS_ROLE_WS
#undef LWS_WITH_HTTP_UNCOMMON_HEADERS
#define LWS_ROLE_H2 1
{
#include "lextable.h"
},
#define LWS_WITH_HTTP_UNCOMMON_HEADERS
{
#include "lextable.h"
},
#undef LWS_WITH_HTTP_UNCOMMON_HEADERS
#define LWS_ROLE_WS 1
{
#include "lextable.h"
},
#define LWS_WITH_HTTP_UNCOMMON_HEADERS 1
{
#include "lextable.h"
},
};
#define PARALLEL 30
struct state {
char c[PARALLEL];
int state[PARALLEL];
int count;
int bytepos;
int real_pos;
};
static unsigned char pseudomap[8][16];
struct state state[1000];
int next = 1;
#define FAIL_CHAR 0x08
int lextable_decode(int version, int pos, char c)
{
while (1) {
if (lextable[version][pos] & (1 << 7)) { /* 1-byte, fail on mismatch */
if ((lextable[version][pos] & 0x7f) != c)
return -1;
/* fall thru */
pos++;
if (lextable[version][pos] == FAIL_CHAR)
return -1;
return pos;
} else { /* b7 = 0, end or 3-byte */
if (lextable[version][pos] < FAIL_CHAR) /* terminal marker */
return pos;
if (lextable[version][pos] == c) /* goto */
return pos + (lextable[version][pos + 1]) +
(lextable[version][pos + 2] << 8);
/* fall thru goto */
pos += 3;
/* continue */
}
}
}
int issue(int version)
{
const char *rset[200];
int n = 0;
int m;
int prev;
int walk;
int saw;
int y;
int j;
int pos = 0;
int setmembers = 0;
memset(rset, 0, sizeof(rset));
if (version == 7)
printf("#if defined(LWS_HTTP_HEADERS_ALL) || (%cdefined(LWS_WITH_HTTP_UNCOMMON_HEADERS) && "
"%cdefined(LWS_ROLE_WS) && "
"%cdefined(LWS_ROLE_H2))\n", version & 1 ? ' ' : '!',
version & 2 ? ' ' : '!', version & 4 ? ' ' : '!');
else
printf("#if !defined(LWS_HTTP_HEADERS_ALL) && %cdefined(LWS_WITH_HTTP_UNCOMMON_HEADERS) && "
"%cdefined(LWS_ROLE_WS) && "
"%cdefined(LWS_ROLE_H2)\n", version & 1 ? ' ' : '!',
version & 2 ? ' ' : '!', version & 4 ? ' ' : '!');
/*
* let's create version's view of the set of strings
*/
for (n = 0; n < sizeof(set) / sizeof(set[0]); n++)
if (filter_array[n] & (1 << version)) {
printf("\t/* %d: %d: %s */\n", setmembers, n, set[n]);
if (lws_header_implies_psuedoheader_map[n >> 3] & (1 << (n & 7)))
pseudomap[version][(setmembers >> 3)] |= 1 << (setmembers & 7);
rset[setmembers++] = set[n];
}
n = 0;
while (n < setmembers) {
m = 0;
walk = 0;
prev = 0;
if (rset[n][0] == '\0') {
n++;
continue;
}
while (rset[n][m]) {
saw = 0;
for (y = 0; y < state[walk].count; y++)
if (state[walk].c[y] == rset[n][m]) {
/* exists -- go forward */
walk = state[walk].state[y];
saw = 1;
break;
}
if (saw)
goto again;
/* something we didn't see before */
state[walk].c[state[walk].count] = rset[n][m];
state[walk].state[state[walk].count] = next;
state[walk].count++;
walk = next++;
again:
m++;
}
state[walk].c[0] = n++;
state[walk].state[0] = 0; /* terminal marker */
state[walk].count = 1;
}
walk = 0;
for (n = 0; n < next; n++) {
state[n].bytepos = walk;
walk += (2 * state[n].count);
}
/* compute everyone's position first */
pos = 0;
walk = 0;
for (n = 0; n < next; n++) {
state[n].real_pos = pos;
for (m = 0; m < state[n].count; m++) {
if (state[n].state[m] == 0)
pos += 2; /* terminal marker */
else { /* c is a character */
if ((state[state[n].state[m]].bytepos -
walk) == 2)
pos++;
else {
pos += 3;
if (m == state[n].count - 1)
pos++; /* fail */
}
}
walk += 2;
}
}
walk = 0;
pos = 0;
for (n = 0; n < next; n++) {
for (m = 0; m < state[n].count; m++) {
if (!m)
fprintf(stdout, "/* pos %04x: %3d */ ",
state[n].real_pos, n);
else
fprintf(stdout, " ");
y = state[n].c[m];
saw = state[n].state[m];
if (saw == 0) { // c is a terminal then
if (y > 0x7ff) {
fprintf(stderr, "terminal too big\n");
return 2;
}
fprintf(stdout, " 0x%02X, 0x%02X "
" "
"/* - terminal marker %2d - */,\n",
y >> 8, y & 0xff, y & 0x7f);
pos += 2;
walk += 2;
continue;
}
/* c is a character */
prev = y &0x7f;
if (prev < 32 || prev > 126)
prev = '.';
if ((state[saw].bytepos - walk) == 2) {
fprintf(stdout, " 0x%02X /* '%c' -> */,\n",
y | 0x80, prev);
pos++;
walk += 2;
continue;
}
j = state[saw].real_pos - pos;
if (j > 0xffff) {
fprintf(stderr,
"Jump > 64K bytes ahead (%d to %d)\n",
state[n].real_pos, state[saw].real_pos);
return 1;
}
fprintf(stdout, " 0x%02X /* '%c' */, 0x%02X, 0x%02X "
"/* (to 0x%04X state %3d) */,\n",
y, prev,
j & 0xff, j >> 8,
state[saw].real_pos, saw);
pos += 3;
if (m == state[n].count - 1) {
fprintf(stdout,
" 0x%02X, /* fail */\n",
FAIL_CHAR);
pos++; /* fail */
}
walk += 2;
}
}
fprintf(stdout, "/* total size %d bytes */\n", pos);
printf("#endif\n\n");
/*
* Try to parse every legal input string
*/
for (n = 0; n < setmembers; n++) {
walk = 0;
m = 0;
y = -1;
if (rset[n][0] == '\0')
continue;
fprintf(stderr, " trying %d '%s'\n", n, rset[n]);
while (rset[n][m]) {
walk = lextable_decode(version, walk, rset[n][m]);
if (walk < 0) {
fprintf(stderr, "failed\n");
return 3;
}
if (lextable[version][walk] < FAIL_CHAR) {
y = (lextable[version][walk] << 8) +
lextable[version][walk + 1];
break;
}
m++;
}
if (y != n) {
fprintf(stderr, "decode failed %d\n", y);
return 4;
}
}
fprintf(stderr, "All decode OK\n");
return 0;
}
int main(void)
{
int m, n;
for (n = 0; n < 8; n++) {
issue(n);
}
printf("\n/*\n");
for (n = 0; n < 8; n++) {
if (n == 7)
printf("#if defined(LWS_HTTP_HEADERS_ALL) || (%cdefined(LWS_WITH_HTTP_UNCOMMON_HEADERS) && "
"%cdefined(LWS_ROLE_WS) && "
"%cdefined(LWS_ROLE_H2))\n", n & 1 ? ' ' : '!',
n & 2 ? ' ' : '!', n & 4 ? ' ' : '!');
else
printf("#if !defined(LWS_HTTP_HEADERS_ALL) && %cdefined(LWS_WITH_HTTP_UNCOMMON_HEADERS) && "
"%cdefined(LWS_ROLE_WS) && "
"%cdefined(LWS_ROLE_H2)\n", n & 1 ? ' ' : '!',
n & 2 ? ' ' : '!', n & 4 ? ' ' : '!');
printf("static uint8_t lws_header_implies_psuedoheader_map[] = {\n\t");
for (m = 0; m < sizeof(pseudomap[n]); m++)
printf("0x%02x,", pseudomap[n][m]);
printf("\n};\n");
printf("#endif\n");
}
printf("*/\n");
fprintf(stderr, "did all the variants\n");
}