improve minilex use external parsing header
Clean up minilex
Move the header output to stdout
Introduce lexfile.h as the header output
Use lexfile.h in both minilex itself and lws
Add the following header support
"Accept:",
"If-Modified-Since:",
"Accept-Encoding:",
"Accept-Language:",
"Pragma:",
"Cache-Control:",
"Authorization:",
"Cookie:",
"Content-Type:",
"Date:",
"Range:",
"Referer:"
Signed-off-by: Andy Green <andy.green@linaro.org>
2013-11-09 10:09:09 +08:00
|
|
|
/*
|
|
|
|
* minilex.c
|
|
|
|
*
|
|
|
|
* High efficiency lexical state parser
|
|
|
|
*
|
2014-03-09 11:49:21 +08:00
|
|
|
* Copyright (C)2011-2014 Andy Green <andy@warmcat.com>
|
improve minilex use external parsing header
Clean up minilex
Move the header output to stdout
Introduce lexfile.h as the header output
Use lexfile.h in both minilex itself and lws
Add the following header support
"Accept:",
"If-Modified-Since:",
"Accept-Encoding:",
"Accept-Language:",
"Pragma:",
"Cache-Control:",
"Authorization:",
"Cookie:",
"Content-Type:",
"Date:",
"Range:",
"Referer:"
Signed-off-by: Andy Green <andy.green@linaro.org>
2013-11-09 10:09:09 +08:00
|
|
|
*
|
|
|
|
* Licensed under LGPL2
|
|
|
|
*
|
|
|
|
* Usage: gcc minilex.c -o minilex && ./minilex > lextable.h
|
|
|
|
*
|
|
|
|
* Run it twice to test parsing on the generated table on stderr
|
|
|
|
*/
|
|
|
|
|
2013-01-18 01:55:48 +08:00
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <string.h>
|
|
|
|
|
2014-10-12 14:31:47 +08:00
|
|
|
#include "lextable-strings.h"
|
2013-01-18 01:55:48 +08:00
|
|
|
|
2014-03-09 11:49:21 +08:00
|
|
|
/*
|
|
|
|
* b7 = 0 = 1-byte seq
|
|
|
|
* 0x08 = fail
|
|
|
|
* 2-byte seq
|
|
|
|
* 0x00 - 0x07, then terminal as given in 2nd byte
|
|
|
|
3-byte seq
|
|
|
|
* no match: go fwd 3 byte, match: jump fwd by amt in +1/+2 bytes
|
|
|
|
* = 1 = 1-byte seq
|
|
|
|
* no match: die, match go fwd 1 byte
|
|
|
|
*/
|
|
|
|
|
2013-01-18 01:55:48 +08:00
|
|
|
unsigned char lextable[] = {
|
improve minilex use external parsing header
Clean up minilex
Move the header output to stdout
Introduce lexfile.h as the header output
Use lexfile.h in both minilex itself and lws
Add the following header support
"Accept:",
"If-Modified-Since:",
"Accept-Encoding:",
"Accept-Language:",
"Pragma:",
"Cache-Control:",
"Authorization:",
"Cookie:",
"Content-Type:",
"Date:",
"Range:",
"Referer:"
Signed-off-by: Andy Green <andy.green@linaro.org>
2013-11-09 10:09:09 +08:00
|
|
|
#include "lextable.h"
|
2013-01-18 01:55:48 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
#define PARALLEL 30
|
|
|
|
|
|
|
|
struct state {
|
|
|
|
char c[PARALLEL];
|
|
|
|
int state[PARALLEL];
|
|
|
|
int count;
|
|
|
|
int bytepos;
|
2014-03-09 11:49:21 +08:00
|
|
|
|
|
|
|
int real_pos;
|
2013-01-18 01:55:48 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
struct state state[1000];
|
|
|
|
int next = 1;
|
|
|
|
|
2014-03-09 11:49:21 +08:00
|
|
|
#define FAIL_CHAR 0x08
|
improve minilex use external parsing header
Clean up minilex
Move the header output to stdout
Introduce lexfile.h as the header output
Use lexfile.h in both minilex itself and lws
Add the following header support
"Accept:",
"If-Modified-Since:",
"Accept-Encoding:",
"Accept-Language:",
"Pragma:",
"Cache-Control:",
"Authorization:",
"Cookie:",
"Content-Type:",
"Date:",
"Range:",
"Referer:"
Signed-off-by: Andy Green <andy.green@linaro.org>
2013-11-09 10:09:09 +08:00
|
|
|
|
2013-01-18 01:55:48 +08:00
|
|
|
int lextable_decode(int pos, char c)
|
|
|
|
{
|
2014-03-09 11:49:21 +08:00
|
|
|
|
2013-01-18 01:55:48 +08:00
|
|
|
while (1) {
|
2014-03-09 11:49:21 +08:00
|
|
|
if (lextable[pos] & (1 << 7)) { /* 1-byte, fail on mismatch */
|
|
|
|
if ((lextable[pos] & 0x7f) != c)
|
|
|
|
return -1;
|
|
|
|
/* fall thru */
|
|
|
|
pos++;
|
|
|
|
if (lextable[pos] == FAIL_CHAR)
|
|
|
|
return -1;
|
2013-01-18 01:55:48 +08:00
|
|
|
return pos;
|
2014-03-09 11:49:21 +08:00
|
|
|
} else { /* b7 = 0, end or 3-byte */
|
|
|
|
if (lextable[pos] < FAIL_CHAR) /* terminal marker */
|
|
|
|
return pos;
|
|
|
|
|
|
|
|
if (lextable[pos] == c) /* goto */
|
|
|
|
return pos + (lextable[pos + 1]) +
|
|
|
|
(lextable[pos + 2] << 8);
|
|
|
|
/* fall thru goto */
|
|
|
|
pos += 3;
|
|
|
|
/* continue */
|
|
|
|
}
|
2013-01-18 01:55:48 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
int main(void)
|
|
|
|
{
|
|
|
|
int n = 0;
|
|
|
|
int m = 0;
|
|
|
|
int prev;
|
|
|
|
char c;
|
|
|
|
int walk;
|
|
|
|
int saw;
|
|
|
|
int y;
|
2014-03-09 11:49:21 +08:00
|
|
|
int j;
|
|
|
|
int pos = 0;
|
2013-01-18 01:55:48 +08:00
|
|
|
|
|
|
|
while (n < sizeof(set) / sizeof(set[0])) {
|
|
|
|
|
|
|
|
m = 0;
|
|
|
|
walk = 0;
|
|
|
|
prev = 0;
|
|
|
|
|
2013-11-13 08:03:05 +08:00
|
|
|
if (set[n][0] == '\0') {
|
|
|
|
n++;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2013-01-18 01:55:48 +08:00
|
|
|
while (set[n][m]) {
|
|
|
|
|
|
|
|
saw = 0;
|
|
|
|
for (y = 0; y < state[walk].count; y++)
|
improve minilex use external parsing header
Clean up minilex
Move the header output to stdout
Introduce lexfile.h as the header output
Use lexfile.h in both minilex itself and lws
Add the following header support
"Accept:",
"If-Modified-Since:",
"Accept-Encoding:",
"Accept-Language:",
"Pragma:",
"Cache-Control:",
"Authorization:",
"Cookie:",
"Content-Type:",
"Date:",
"Range:",
"Referer:"
Signed-off-by: Andy Green <andy.green@linaro.org>
2013-11-09 10:09:09 +08:00
|
|
|
if (state[walk].c[y] == set[n][m]) {
|
|
|
|
/* exists -- go forward */
|
|
|
|
walk = state[walk].state[y];
|
2013-01-18 01:55:48 +08:00
|
|
|
saw = 1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (saw)
|
|
|
|
goto again;
|
|
|
|
|
|
|
|
/* something we didn't see before */
|
|
|
|
|
|
|
|
state[walk].c[state[walk].count] = set[n][m];
|
|
|
|
|
|
|
|
state[walk].state[state[walk].count] = next;
|
|
|
|
state[walk].count++;
|
improve minilex use external parsing header
Clean up minilex
Move the header output to stdout
Introduce lexfile.h as the header output
Use lexfile.h in both minilex itself and lws
Add the following header support
"Accept:",
"If-Modified-Since:",
"Accept-Encoding:",
"Accept-Language:",
"Pragma:",
"Cache-Control:",
"Authorization:",
"Cookie:",
"Content-Type:",
"Date:",
"Range:",
"Referer:"
Signed-off-by: Andy Green <andy.green@linaro.org>
2013-11-09 10:09:09 +08:00
|
|
|
walk = next++;
|
2013-01-18 01:55:48 +08:00
|
|
|
again:
|
|
|
|
m++;
|
|
|
|
}
|
|
|
|
|
improve minilex use external parsing header
Clean up minilex
Move the header output to stdout
Introduce lexfile.h as the header output
Use lexfile.h in both minilex itself and lws
Add the following header support
"Accept:",
"If-Modified-Since:",
"Accept-Encoding:",
"Accept-Language:",
"Pragma:",
"Cache-Control:",
"Authorization:",
"Cookie:",
"Content-Type:",
"Date:",
"Range:",
"Referer:"
Signed-off-by: Andy Green <andy.green@linaro.org>
2013-11-09 10:09:09 +08:00
|
|
|
state[walk].c[0] = n++;
|
2013-01-18 01:55:48 +08:00
|
|
|
state[walk].state[0] = 0; /* terminal marker */
|
|
|
|
state[walk].count = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
walk = 0;
|
|
|
|
for (n = 0; n < next; n++) {
|
|
|
|
state[n].bytepos = walk;
|
|
|
|
walk += (2 * state[n].count);
|
|
|
|
}
|
|
|
|
|
2014-03-09 11:49:21 +08:00
|
|
|
/* compute everyone's position first */
|
|
|
|
|
|
|
|
pos = 0;
|
|
|
|
walk = 0;
|
|
|
|
for (n = 0; n < next; n++) {
|
|
|
|
|
|
|
|
state[n].real_pos = pos;
|
|
|
|
|
|
|
|
for (m = 0; m < state[n].count; m++) {
|
|
|
|
|
|
|
|
if (state[n].state[m] == 0)
|
|
|
|
pos += 2; /* terminal marker */
|
|
|
|
else { /* c is a character */
|
|
|
|
if ((state[state[n].state[m]].bytepos -
|
|
|
|
walk) == 2)
|
|
|
|
pos++;
|
|
|
|
else {
|
|
|
|
pos += 3;
|
|
|
|
if (m == state[n].count - 1)
|
|
|
|
pos++; /* fail */
|
|
|
|
}
|
|
|
|
}
|
|
|
|
walk += 2;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-01-18 01:55:48 +08:00
|
|
|
walk = 0;
|
2014-03-09 11:49:21 +08:00
|
|
|
pos = 0;
|
2013-01-18 01:55:48 +08:00
|
|
|
for (n = 0; n < next; n++) {
|
|
|
|
for (m = 0; m < state[n].count; m++) {
|
improve minilex use external parsing header
Clean up minilex
Move the header output to stdout
Introduce lexfile.h as the header output
Use lexfile.h in both minilex itself and lws
Add the following header support
"Accept:",
"If-Modified-Since:",
"Accept-Encoding:",
"Accept-Language:",
"Pragma:",
"Cache-Control:",
"Authorization:",
"Cookie:",
"Content-Type:",
"Date:",
"Range:",
"Referer:"
Signed-off-by: Andy Green <andy.green@linaro.org>
2013-11-09 10:09:09 +08:00
|
|
|
|
|
|
|
if (!m)
|
2014-03-09 11:49:21 +08:00
|
|
|
fprintf(stdout, "/* pos %04x: %3d */ ",
|
|
|
|
state[n].real_pos, n);
|
improve minilex use external parsing header
Clean up minilex
Move the header output to stdout
Introduce lexfile.h as the header output
Use lexfile.h in both minilex itself and lws
Add the following header support
"Accept:",
"If-Modified-Since:",
"Accept-Encoding:",
"Accept-Language:",
"Pragma:",
"Cache-Control:",
"Authorization:",
"Cookie:",
"Content-Type:",
"Date:",
"Range:",
"Referer:"
Signed-off-by: Andy Green <andy.green@linaro.org>
2013-11-09 10:09:09 +08:00
|
|
|
else
|
2014-03-09 11:49:21 +08:00
|
|
|
fprintf(stdout, " ");
|
improve minilex use external parsing header
Clean up minilex
Move the header output to stdout
Introduce lexfile.h as the header output
Use lexfile.h in both minilex itself and lws
Add the following header support
"Accept:",
"If-Modified-Since:",
"Accept-Encoding:",
"Accept-Language:",
"Pragma:",
"Cache-Control:",
"Authorization:",
"Cookie:",
"Content-Type:",
"Date:",
"Range:",
"Referer:"
Signed-off-by: Andy Green <andy.green@linaro.org>
2013-11-09 10:09:09 +08:00
|
|
|
|
2013-01-18 01:55:48 +08:00
|
|
|
y = state[n].c[m];
|
|
|
|
saw = state[n].state[m];
|
|
|
|
|
2014-03-09 11:49:21 +08:00
|
|
|
if (saw == 0) { // c is a terminal then
|
|
|
|
|
|
|
|
if (y > 0x7ff) {
|
|
|
|
fprintf(stderr, "terminal too big\n");
|
|
|
|
return 2;
|
2013-01-18 01:55:48 +08:00
|
|
|
}
|
2014-03-09 11:49:21 +08:00
|
|
|
|
|
|
|
fprintf(stdout, " 0x%02X, 0x%02X "
|
|
|
|
" "
|
|
|
|
"/* - terminal marker %2d - */,\n",
|
|
|
|
y >> 8, y & 0xff, y & 0x7f);
|
|
|
|
pos += 2;
|
|
|
|
walk += 2;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* c is a character */
|
|
|
|
|
|
|
|
prev = y &0x7f;
|
|
|
|
if (prev < 32 || prev > 126)
|
|
|
|
prev = '.';
|
|
|
|
|
|
|
|
|
|
|
|
if ((state[saw].bytepos - walk) == 2) {
|
|
|
|
fprintf(stdout, " 0x%02X /* '%c' -> */,\n",
|
|
|
|
y | 0x80, prev);
|
|
|
|
pos++;
|
|
|
|
walk += 2;
|
|
|
|
continue;
|
2013-01-18 01:55:48 +08:00
|
|
|
}
|
2014-03-09 11:49:21 +08:00
|
|
|
|
|
|
|
j = state[saw].real_pos - pos;
|
|
|
|
|
|
|
|
if (j > 0xffff) {
|
|
|
|
fprintf(stderr,
|
|
|
|
"Jump > 64K bytes ahead (%d to %d)\n",
|
|
|
|
state[n].real_pos, state[saw].real_pos);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
fprintf(stdout, " 0x%02X /* '%c' */, 0x%02X, 0x%02X "
|
|
|
|
"/* (to 0x%04X state %3d) */,\n",
|
|
|
|
y, prev,
|
|
|
|
j & 0xff, j >> 8,
|
|
|
|
state[saw].real_pos, saw);
|
|
|
|
pos += 3;
|
|
|
|
|
|
|
|
if (m == state[n].count - 1) {
|
|
|
|
fprintf(stdout,
|
|
|
|
" 0x%02X, /* fail */\n",
|
|
|
|
FAIL_CHAR);
|
|
|
|
pos++; /* fail */
|
|
|
|
}
|
|
|
|
|
2013-01-18 01:55:48 +08:00
|
|
|
walk += 2;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-03-09 11:49:21 +08:00
|
|
|
fprintf(stdout, "/* total size %d bytes */\n", pos);
|
improve minilex use external parsing header
Clean up minilex
Move the header output to stdout
Introduce lexfile.h as the header output
Use lexfile.h in both minilex itself and lws
Add the following header support
"Accept:",
"If-Modified-Since:",
"Accept-Encoding:",
"Accept-Language:",
"Pragma:",
"Cache-Control:",
"Authorization:",
"Cookie:",
"Content-Type:",
"Date:",
"Range:",
"Referer:"
Signed-off-by: Andy Green <andy.green@linaro.org>
2013-11-09 10:09:09 +08:00
|
|
|
|
|
|
|
/*
|
2014-03-09 11:49:21 +08:00
|
|
|
* Try to parse every legal input string
|
improve minilex use external parsing header
Clean up minilex
Move the header output to stdout
Introduce lexfile.h as the header output
Use lexfile.h in both minilex itself and lws
Add the following header support
"Accept:",
"If-Modified-Since:",
"Accept-Encoding:",
"Accept-Language:",
"Pragma:",
"Cache-Control:",
"Authorization:",
"Cookie:",
"Content-Type:",
"Date:",
"Range:",
"Referer:"
Signed-off-by: Andy Green <andy.green@linaro.org>
2013-11-09 10:09:09 +08:00
|
|
|
*/
|
2013-01-18 01:55:48 +08:00
|
|
|
|
|
|
|
for (n = 0; n < sizeof(set) / sizeof(set[0]); n++) {
|
|
|
|
walk = 0;
|
|
|
|
m = 0;
|
2014-03-09 11:49:21 +08:00
|
|
|
y = -1;
|
2013-01-18 01:55:48 +08:00
|
|
|
|
2013-11-13 08:03:05 +08:00
|
|
|
if (set[n][0] == '\0')
|
|
|
|
continue;
|
|
|
|
|
2014-03-09 11:49:21 +08:00
|
|
|
fprintf(stderr, " trying '%s'\n", set[n]);
|
2013-01-18 01:55:48 +08:00
|
|
|
|
|
|
|
while (set[n][m]) {
|
|
|
|
walk = lextable_decode(walk, set[n][m]);
|
|
|
|
if (walk < 0) {
|
|
|
|
fprintf(stderr, "failed\n");
|
2014-03-09 11:49:21 +08:00
|
|
|
return 3;
|
2013-01-18 01:55:48 +08:00
|
|
|
}
|
2014-03-09 11:49:21 +08:00
|
|
|
|
|
|
|
if (lextable[walk] < FAIL_CHAR) {
|
|
|
|
y = (lextable[walk] << 8) + lextable[walk + 1];
|
2013-01-18 01:55:48 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
m++;
|
|
|
|
}
|
2014-03-09 11:49:21 +08:00
|
|
|
|
|
|
|
if (y != n) {
|
|
|
|
fprintf(stderr, "decode failed %d\n", y);
|
|
|
|
return 4;
|
|
|
|
}
|
2013-01-18 01:55:48 +08:00
|
|
|
}
|
|
|
|
|
2014-03-09 11:49:21 +08:00
|
|
|
fprintf(stderr, "All decode OK\n");
|
|
|
|
|
2013-01-18 01:55:48 +08:00
|
|
|
return 0;
|
|
|
|
}
|