1
0
Fork 0
mirror of https://github.com/warmcat/libwebsockets.git synced 2025-03-09 00:00:04 +01:00

tokenizer: flags for dot not delim and floats as strings

This commit is contained in:
Andy Green 2018-11-13 09:34:10 +08:00
parent aa4143aebd
commit 97f9af5e3b
3 changed files with 77 additions and 9 deletions

View file

@ -21,7 +21,7 @@
* included from libwebsockets.h
*/
/* Do not treat - as a terminal character */
/* Do not treat - as a terminal character, so "my-token" is one token */
#define LWS_TOKENIZE_F_MINUS_NONTERM (1 << 0)
/* Separately report aggregate colon-delimited tokens */
#define LWS_TOKENIZE_F_AGG_COLON (1 << 1)
@ -30,6 +30,12 @@
/* Allow more characters in the tokens and less delimiters... default is
* only alphanumeric + underscore in tokens */
#define LWS_TOKENIZE_F_RFC7230_DELIMS (1 << 3)
/* Do not treat . as a terminal character, so "warmcat.com" is one token */
#define LWS_TOKENIZE_F_DOT_NONTERM (1 << 4)
/* If something starts looking like a float, like 1.2, force to be string token.
* This lets you receive dotted-quads like 192.168.0.1 as string tokens, and
* avoids illegal float format detection like 1.myserver.com */
#define LWS_TOKENIZE_F_NO_FLOATS (1 << 5)
typedef enum {

View file

@ -3031,10 +3031,22 @@ lws_tokenize(struct lws_tokenize *ts)
{
const char *rfc7230_delims = "(),/:;<=>?@[\\]{}";
lws_tokenize_state state = LWS_TOKZS_LEADING_WHITESPACE;
char c, flo = 0;
char c, flo = 0, d_minus = '-', d_dot = '.', s_minus = '\0',
s_dot = '\0';
signed char num = -1;
int utf8 = 0;
/* for speed, compute the effect of the flags outside the loop */
if (ts->flags & LWS_TOKENIZE_F_MINUS_NONTERM) {
d_minus = '\0';
s_minus = '-';
}
if (ts->flags & LWS_TOKENIZE_F_DOT_NONTERM) {
d_dot = '\0';
s_dot = '.';
}
ts->token = NULL;
ts->token_len = 0;
@ -3046,8 +3058,6 @@ lws_tokenize(struct lws_tokenize *ts)
if (utf8 < 0)
return LWS_TOKZE_ERR_BROKEN_UTF8;
lwsl_debug("%s: %c (%d) %d\n", __func__, c, state, (int)ts->len);
if (!c)
break;
@ -3111,7 +3121,8 @@ lws_tokenize(struct lws_tokenize *ts)
/* aggregate . in a number as a float */
if (c == '.' && state == LWS_TOKZS_TOKEN && num == 1) {
if (c == '.' && !(ts->flags & LWS_TOKENIZE_F_NO_FLOATS) &&
state == LWS_TOKZS_TOKEN && num == 1) {
if (flo)
return LWS_TOKZE_ERR_MALFORMED_FLOAT;
flo = 1;
@ -3140,9 +3151,9 @@ lws_tokenize(struct lws_tokenize *ts)
strchr(rfc7230_delims, c) && c > 32) ||
((!(ts->flags & LWS_TOKENIZE_F_RFC7230_DELIMS) &&
(c < '0' || c > '9') && (c < 'A' || c > 'Z') &&
(c < 'a' || c > 'z') && c != '_') && !(c == '-' &&
(ts->flags & LWS_TOKENIZE_F_MINUS_NONTERM))) ||
(c == '-' && !(ts->flags & LWS_TOKENIZE_F_MINUS_NONTERM))
(c < 'a' || c > 'z') && c != '_') &&
c != s_minus && c != s_dot) ||
c == d_minus || c == d_dot
)) {
switch (state) {
case LWS_TOKZS_LEADING_WHITESPACE:

View file

@ -135,6 +135,28 @@ struct expected expected1[] = {
{ LWS_TOKZE_TOKEN, "", 3 },
{ LWS_TOKZE_DELIMITER, ",", 1 },
{ LWS_TOKZE_ERR_BROKEN_UTF8, "", 0 },
},
expected11[] = {
{ LWS_TOKZE_TOKEN, "1.myserver", 10 },
{ LWS_TOKZE_DELIMITER, ".", 1 },
{ LWS_TOKZE_TOKEN, "com", 3 },
{ LWS_TOKZE_ENDED, "", 0 },
},
expected12[] = {
{ LWS_TOKZE_TOKEN, "1.myserver.com", 14 },
{ LWS_TOKZE_ENDED, "", 0 },
},
expected13[] = {
{ LWS_TOKZE_TOKEN, "1.myserver.com", 14 },
{ LWS_TOKZE_ENDED, "", 0 },
},
expected14[] = {
{ LWS_TOKZE_INTEGER, "1", 1 },
{ LWS_TOKZE_DELIMITER, ".", 1 },
{ LWS_TOKZE_TOKEN, "myserver", 8 },
{ LWS_TOKZE_DELIMITER, ".", 1 },
{ LWS_TOKZE_TOKEN, "com", 3 },
{ LWS_TOKZE_ENDED, "", 0 },
}
;
@ -184,6 +206,26 @@ struct tests tests[] = {
expected10, LWS_ARRAY_SIZE(expected10),
LWS_TOKENIZE_F_MINUS_NONTERM | LWS_TOKENIZE_F_RFC7230_DELIMS
},
{
"1.myserver.com",
expected11, LWS_ARRAY_SIZE(expected11),
0
},
{
"1.myserver.com",
expected12, LWS_ARRAY_SIZE(expected12),
LWS_TOKENIZE_F_DOT_NONTERM
},
{
"1.myserver.com",
expected13, LWS_ARRAY_SIZE(expected13),
LWS_TOKENIZE_F_DOT_NONTERM | LWS_TOKENIZE_F_NO_FLOATS
},
{
"1.myserver.com",
expected14, LWS_ARRAY_SIZE(expected14),
LWS_TOKENIZE_F_NO_FLOATS
},
};
/*
@ -309,7 +351,16 @@ int main(int argc, const char **argv)
printf(" | ");
printf("LWS_TOKENIZE_F_RFC7230_DELIMS");
}
if (flags & LWS_TOKENIZE_F_DOT_NONTERM) {
if (flags & 15)
printf(" | ");
printf("LWS_TOKENIZE_F_DOT_NONTERM");
}
if (flags & LWS_TOKENIZE_F_NO_FLOATS) {
if (flags & 31)
printf(" | ");
printf("LWS_TOKENIZE_F_NO_FLOATS");
}
printf("\n\t},\n");
}