diff --git a/include/libwebsockets/lws-tokenize.h b/include/libwebsockets/lws-tokenize.h index 88f3a62a8..26a57dfba 100644 --- a/include/libwebsockets/lws-tokenize.h +++ b/include/libwebsockets/lws-tokenize.h @@ -21,7 +21,7 @@ * included from libwebsockets.h */ -/* Do not treat - as a terminal character */ +/* Do not treat - as a terminal character, so "my-token" is one token */ #define LWS_TOKENIZE_F_MINUS_NONTERM (1 << 0) /* Separately report aggregate colon-delimited tokens */ #define LWS_TOKENIZE_F_AGG_COLON (1 << 1) @@ -30,6 +30,12 @@ /* Allow more characters in the tokens and less delimiters... default is * only alphanumeric + underscore in tokens */ #define LWS_TOKENIZE_F_RFC7230_DELIMS (1 << 3) +/* Do not treat . as a terminal character, so "warmcat.com" is one token */ +#define LWS_TOKENIZE_F_DOT_NONTERM (1 << 4) +/* If something starts looking like a float, like 1.2, force to be string token. + * This lets you receive dotted-quads like 192.168.0.1 as string tokens, and + * avoids illegal float format detection like 1.myserver.com */ +#define LWS_TOKENIZE_F_NO_FLOATS (1 << 5) typedef enum { diff --git a/lib/core/libwebsockets.c b/lib/core/libwebsockets.c index a93537ae2..71c1a0971 100644 --- a/lib/core/libwebsockets.c +++ b/lib/core/libwebsockets.c @@ -3031,10 +3031,22 @@ lws_tokenize(struct lws_tokenize *ts) { const char *rfc7230_delims = "(),/:;<=>?@[\\]{}"; lws_tokenize_state state = LWS_TOKZS_LEADING_WHITESPACE; - char c, flo = 0; + char c, flo = 0, d_minus = '-', d_dot = '.', s_minus = '\0', + s_dot = '\0'; signed char num = -1; int utf8 = 0; + /* for speed, compute the effect of the flags outside the loop */ + + if (ts->flags & LWS_TOKENIZE_F_MINUS_NONTERM) { + d_minus = '\0'; + s_minus = '-'; + } + if (ts->flags & LWS_TOKENIZE_F_DOT_NONTERM) { + d_dot = '\0'; + s_dot = '.'; + } + ts->token = NULL; ts->token_len = 0; @@ -3046,8 +3058,6 @@ lws_tokenize(struct lws_tokenize *ts) if (utf8 < 0) return LWS_TOKZE_ERR_BROKEN_UTF8; - lwsl_debug("%s: %c (%d) %d\n", __func__, c, state, (int)ts->len); - if (!c) break; @@ -3111,7 +3121,8 @@ lws_tokenize(struct lws_tokenize *ts) /* aggregate . in a number as a float */ - if (c == '.' && state == LWS_TOKZS_TOKEN && num == 1) { + if (c == '.' && !(ts->flags & LWS_TOKENIZE_F_NO_FLOATS) && + state == LWS_TOKZS_TOKEN && num == 1) { if (flo) return LWS_TOKZE_ERR_MALFORMED_FLOAT; flo = 1; @@ -3140,9 +3151,9 @@ lws_tokenize(struct lws_tokenize *ts) strchr(rfc7230_delims, c) && c > 32) || ((!(ts->flags & LWS_TOKENIZE_F_RFC7230_DELIMS) && (c < '0' || c > '9') && (c < 'A' || c > 'Z') && - (c < 'a' || c > 'z') && c != '_') && !(c == '-' && - (ts->flags & LWS_TOKENIZE_F_MINUS_NONTERM))) || - (c == '-' && !(ts->flags & LWS_TOKENIZE_F_MINUS_NONTERM)) + (c < 'a' || c > 'z') && c != '_') && + c != s_minus && c != s_dot) || + c == d_minus || c == d_dot )) { switch (state) { case LWS_TOKZS_LEADING_WHITESPACE: diff --git a/minimal-examples/api-tests/api-test-lws_tokenize/main.c b/minimal-examples/api-tests/api-test-lws_tokenize/main.c index bd15e72b4..a3a9bd641 100644 --- a/minimal-examples/api-tests/api-test-lws_tokenize/main.c +++ b/minimal-examples/api-tests/api-test-lws_tokenize/main.c @@ -135,6 +135,28 @@ struct expected expected1[] = { { LWS_TOKZE_TOKEN, "퟿", 3 }, { LWS_TOKZE_DELIMITER, ",", 1 }, { LWS_TOKZE_ERR_BROKEN_UTF8, "", 0 }, + }, + expected11[] = { + { LWS_TOKZE_TOKEN, "1.myserver", 10 }, + { LWS_TOKZE_DELIMITER, ".", 1 }, + { LWS_TOKZE_TOKEN, "com", 3 }, + { LWS_TOKZE_ENDED, "", 0 }, + }, + expected12[] = { + { LWS_TOKZE_TOKEN, "1.myserver.com", 14 }, + { LWS_TOKZE_ENDED, "", 0 }, + }, + expected13[] = { + { LWS_TOKZE_TOKEN, "1.myserver.com", 14 }, + { LWS_TOKZE_ENDED, "", 0 }, + }, + expected14[] = { + { LWS_TOKZE_INTEGER, "1", 1 }, + { LWS_TOKZE_DELIMITER, ".", 1 }, + { LWS_TOKZE_TOKEN, "myserver", 8 }, + { LWS_TOKZE_DELIMITER, ".", 1 }, + { LWS_TOKZE_TOKEN, "com", 3 }, + { LWS_TOKZE_ENDED, "", 0 }, } ; @@ -184,6 +206,26 @@ struct tests tests[] = { expected10, LWS_ARRAY_SIZE(expected10), LWS_TOKENIZE_F_MINUS_NONTERM | LWS_TOKENIZE_F_RFC7230_DELIMS }, + { + "1.myserver.com", + expected11, LWS_ARRAY_SIZE(expected11), + 0 + }, + { + "1.myserver.com", + expected12, LWS_ARRAY_SIZE(expected12), + LWS_TOKENIZE_F_DOT_NONTERM + }, + { + "1.myserver.com", + expected13, LWS_ARRAY_SIZE(expected13), + LWS_TOKENIZE_F_DOT_NONTERM | LWS_TOKENIZE_F_NO_FLOATS + }, + { + "1.myserver.com", + expected14, LWS_ARRAY_SIZE(expected14), + LWS_TOKENIZE_F_NO_FLOATS + }, }; /* @@ -309,7 +351,16 @@ int main(int argc, const char **argv) printf(" | "); printf("LWS_TOKENIZE_F_RFC7230_DELIMS"); } - + if (flags & LWS_TOKENIZE_F_DOT_NONTERM) { + if (flags & 15) + printf(" | "); + printf("LWS_TOKENIZE_F_DOT_NONTERM"); + } + if (flags & LWS_TOKENIZE_F_NO_FLOATS) { + if (flags & 31) + printf(" | "); + printf("LWS_TOKENIZE_F_NO_FLOATS"); + } printf("\n\t},\n"); }