mirror of
https://github.com/warmcat/libwebsockets.git
synced 2025-03-09 00:00:04 +01:00
tokenizer: flags for dot not delim and floats as strings
This commit is contained in:
parent
aa4143aebd
commit
97f9af5e3b
3 changed files with 77 additions and 9 deletions
|
@ -21,7 +21,7 @@
|
|||
* included from libwebsockets.h
|
||||
*/
|
||||
|
||||
/* Do not treat - as a terminal character */
|
||||
/* Do not treat - as a terminal character, so "my-token" is one token */
|
||||
#define LWS_TOKENIZE_F_MINUS_NONTERM (1 << 0)
|
||||
/* Separately report aggregate colon-delimited tokens */
|
||||
#define LWS_TOKENIZE_F_AGG_COLON (1 << 1)
|
||||
|
@ -30,6 +30,12 @@
|
|||
/* Allow more characters in the tokens and less delimiters... default is
|
||||
* only alphanumeric + underscore in tokens */
|
||||
#define LWS_TOKENIZE_F_RFC7230_DELIMS (1 << 3)
|
||||
/* Do not treat . as a terminal character, so "warmcat.com" is one token */
|
||||
#define LWS_TOKENIZE_F_DOT_NONTERM (1 << 4)
|
||||
/* If something starts looking like a float, like 1.2, force to be string token.
|
||||
* This lets you receive dotted-quads like 192.168.0.1 as string tokens, and
|
||||
* avoids illegal float format detection like 1.myserver.com */
|
||||
#define LWS_TOKENIZE_F_NO_FLOATS (1 << 5)
|
||||
|
||||
typedef enum {
|
||||
|
||||
|
|
|
@ -3031,10 +3031,22 @@ lws_tokenize(struct lws_tokenize *ts)
|
|||
{
|
||||
const char *rfc7230_delims = "(),/:;<=>?@[\\]{}";
|
||||
lws_tokenize_state state = LWS_TOKZS_LEADING_WHITESPACE;
|
||||
char c, flo = 0;
|
||||
char c, flo = 0, d_minus = '-', d_dot = '.', s_minus = '\0',
|
||||
s_dot = '\0';
|
||||
signed char num = -1;
|
||||
int utf8 = 0;
|
||||
|
||||
/* for speed, compute the effect of the flags outside the loop */
|
||||
|
||||
if (ts->flags & LWS_TOKENIZE_F_MINUS_NONTERM) {
|
||||
d_minus = '\0';
|
||||
s_minus = '-';
|
||||
}
|
||||
if (ts->flags & LWS_TOKENIZE_F_DOT_NONTERM) {
|
||||
d_dot = '\0';
|
||||
s_dot = '.';
|
||||
}
|
||||
|
||||
ts->token = NULL;
|
||||
ts->token_len = 0;
|
||||
|
||||
|
@ -3046,8 +3058,6 @@ lws_tokenize(struct lws_tokenize *ts)
|
|||
if (utf8 < 0)
|
||||
return LWS_TOKZE_ERR_BROKEN_UTF8;
|
||||
|
||||
lwsl_debug("%s: %c (%d) %d\n", __func__, c, state, (int)ts->len);
|
||||
|
||||
if (!c)
|
||||
break;
|
||||
|
||||
|
@ -3111,7 +3121,8 @@ lws_tokenize(struct lws_tokenize *ts)
|
|||
|
||||
/* aggregate . in a number as a float */
|
||||
|
||||
if (c == '.' && state == LWS_TOKZS_TOKEN && num == 1) {
|
||||
if (c == '.' && !(ts->flags & LWS_TOKENIZE_F_NO_FLOATS) &&
|
||||
state == LWS_TOKZS_TOKEN && num == 1) {
|
||||
if (flo)
|
||||
return LWS_TOKZE_ERR_MALFORMED_FLOAT;
|
||||
flo = 1;
|
||||
|
@ -3140,9 +3151,9 @@ lws_tokenize(struct lws_tokenize *ts)
|
|||
strchr(rfc7230_delims, c) && c > 32) ||
|
||||
((!(ts->flags & LWS_TOKENIZE_F_RFC7230_DELIMS) &&
|
||||
(c < '0' || c > '9') && (c < 'A' || c > 'Z') &&
|
||||
(c < 'a' || c > 'z') && c != '_') && !(c == '-' &&
|
||||
(ts->flags & LWS_TOKENIZE_F_MINUS_NONTERM))) ||
|
||||
(c == '-' && !(ts->flags & LWS_TOKENIZE_F_MINUS_NONTERM))
|
||||
(c < 'a' || c > 'z') && c != '_') &&
|
||||
c != s_minus && c != s_dot) ||
|
||||
c == d_minus || c == d_dot
|
||||
)) {
|
||||
switch (state) {
|
||||
case LWS_TOKZS_LEADING_WHITESPACE:
|
||||
|
|
|
@ -135,6 +135,28 @@ struct expected expected1[] = {
|
|||
{ LWS_TOKZE_TOKEN, "", 3 },
|
||||
{ LWS_TOKZE_DELIMITER, ",", 1 },
|
||||
{ LWS_TOKZE_ERR_BROKEN_UTF8, "", 0 },
|
||||
},
|
||||
expected11[] = {
|
||||
{ LWS_TOKZE_TOKEN, "1.myserver", 10 },
|
||||
{ LWS_TOKZE_DELIMITER, ".", 1 },
|
||||
{ LWS_TOKZE_TOKEN, "com", 3 },
|
||||
{ LWS_TOKZE_ENDED, "", 0 },
|
||||
},
|
||||
expected12[] = {
|
||||
{ LWS_TOKZE_TOKEN, "1.myserver.com", 14 },
|
||||
{ LWS_TOKZE_ENDED, "", 0 },
|
||||
},
|
||||
expected13[] = {
|
||||
{ LWS_TOKZE_TOKEN, "1.myserver.com", 14 },
|
||||
{ LWS_TOKZE_ENDED, "", 0 },
|
||||
},
|
||||
expected14[] = {
|
||||
{ LWS_TOKZE_INTEGER, "1", 1 },
|
||||
{ LWS_TOKZE_DELIMITER, ".", 1 },
|
||||
{ LWS_TOKZE_TOKEN, "myserver", 8 },
|
||||
{ LWS_TOKZE_DELIMITER, ".", 1 },
|
||||
{ LWS_TOKZE_TOKEN, "com", 3 },
|
||||
{ LWS_TOKZE_ENDED, "", 0 },
|
||||
}
|
||||
|
||||
;
|
||||
|
@ -184,6 +206,26 @@ struct tests tests[] = {
|
|||
expected10, LWS_ARRAY_SIZE(expected10),
|
||||
LWS_TOKENIZE_F_MINUS_NONTERM | LWS_TOKENIZE_F_RFC7230_DELIMS
|
||||
},
|
||||
{
|
||||
"1.myserver.com",
|
||||
expected11, LWS_ARRAY_SIZE(expected11),
|
||||
0
|
||||
},
|
||||
{
|
||||
"1.myserver.com",
|
||||
expected12, LWS_ARRAY_SIZE(expected12),
|
||||
LWS_TOKENIZE_F_DOT_NONTERM
|
||||
},
|
||||
{
|
||||
"1.myserver.com",
|
||||
expected13, LWS_ARRAY_SIZE(expected13),
|
||||
LWS_TOKENIZE_F_DOT_NONTERM | LWS_TOKENIZE_F_NO_FLOATS
|
||||
},
|
||||
{
|
||||
"1.myserver.com",
|
||||
expected14, LWS_ARRAY_SIZE(expected14),
|
||||
LWS_TOKENIZE_F_NO_FLOATS
|
||||
},
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -309,7 +351,16 @@ int main(int argc, const char **argv)
|
|||
printf(" | ");
|
||||
printf("LWS_TOKENIZE_F_RFC7230_DELIMS");
|
||||
}
|
||||
|
||||
if (flags & LWS_TOKENIZE_F_DOT_NONTERM) {
|
||||
if (flags & 15)
|
||||
printf(" | ");
|
||||
printf("LWS_TOKENIZE_F_DOT_NONTERM");
|
||||
}
|
||||
if (flags & LWS_TOKENIZE_F_NO_FLOATS) {
|
||||
if (flags & 31)
|
||||
printf(" | ");
|
||||
printf("LWS_TOKENIZE_F_NO_FLOATS");
|
||||
}
|
||||
printf("\n\t},\n");
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue