1
0
Fork 0
mirror of https://github.com/warmcat/libwebsockets.git synced 2025-03-09 00:00:04 +01:00

lws_tokenize

This commit is contained in:
Andy Green 2018-10-09 10:29:42 +08:00
parent 47e10ab200
commit 6cd80f9fc7
9 changed files with 896 additions and 27 deletions

View file

@ -408,6 +408,7 @@ struct lws;
#include <libwebsockets/lws-lejp.h>
#include <libwebsockets/lws-stats.h>
#include <libwebsockets/lws-threadpool.h>
#include <libwebsockets/lws-tokenize.h>
#if defined(LWS_WITH_TLS)

View file

@ -0,0 +1,130 @@
/*
* libwebsockets - small server side websockets and web server implementation
*
* Copyright (C) 2010-2018 Andy Green <andy@warmcat.com>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation:
* version 2.1 of the License.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301 USA
*
* included from libwebsockets.h
*/
/* Do not treat - as a terminal character */
#define LWS_TOKENIZE_F_MINUS_NONTERM (1 << 0)
/* Separately report aggregate colon-delimited tokens */
#define LWS_TOKENIZE_F_AGG_COLON (1 << 1)
/* Enforce sequencing for a simple token , token , token ... list */
#define LWS_TOKENIZE_F_COMMA_SEP_LIST (1 << 2)
/* Allow more characters in the tokens and less delimiters... default is
* only alphanumeric + underscore in tokens */
#define LWS_TOKENIZE_F_RFC7230_DELIMS (1 << 3)
typedef enum {
LWS_TOKZE_ERRS = 5, /* the number of errors defined */
LWS_TOKZE_ERR_BROKEN_UTF8 = -5, /* malformed or partial utf8 */
LWS_TOKZE_ERR_UNTERM_STRING = -4, /* ended while we were in "" */
LWS_TOKZE_ERR_MALFORMED_FLOAT = -3, /* like 0..1 or 0.1.1 */
LWS_TOKZE_ERR_NUM_ON_LHS = -2, /* like 123= or 0.1= */
LWS_TOKZE_ERR_COMMA_LIST = -1, /* like ",tok", or, "tok,," */
LWS_TOKZE_ENDED = 0, /* no more content */
/* Note: results have ordinal 1+, EOT is 0 and errors are < 0 */
LWS_TOKZE_DELIMITER, /* a delimiter appeared */
LWS_TOKZE_TOKEN, /* a token appeared */
LWS_TOKZE_INTEGER, /* an integer appeared */
LWS_TOKZE_FLOAT, /* a float appeared */
LWS_TOKZE_TOKEN_NAME_EQUALS, /* token [whitespace] = */
LWS_TOKZE_TOKEN_NAME_COLON, /* token [whitespace] : (only with
LWS_TOKENIZE_F_AGG_COLON flag) */
LWS_TOKZE_QUOTED_STRING, /* "*", where * may have any char */
} lws_tokenize_elem;
/*
* helper enums to allow caller to enforce legal delimiter sequencing, eg
* disallow "token,,token", "token,", and ",token"
*/
enum lws_tokenize_delimiter_tracking {
LWSTZ_DT_NEED_FIRST_CONTENT,
LWSTZ_DT_NEED_DELIM,
LWSTZ_DT_NEED_NEXT_CONTENT,
};
struct lws_tokenize {
const char *start; /**< set to the start of the string to tokenize */
const char *token; /**< the start of an identified token or delimiter */
int len; /**< set to the length of the string to tokenize */
int token_len; /**< the length of the identied token or delimiter */
int flags; /**< optional LWS_TOKENIZE_F_ flags, or 0 */
int delim;
};
/**
* lws_tokenize() - breaks down a string into tokens and delimiters in-place
*
* \param ts: the lws_tokenize struct to init
* \param start: the string to tokenize
* \param flags: LWS_TOKENIZE_F_ option flags
*
* This initializes the tokenize struct to point to the given string, and
* sets the length to 2GiB - 1 (so there must be a terminating NUL)... you can
* override this requirement by setting ts.len yourself before using it.
*
* .delim is also initialized to LWSTZ_DT_NEED_FIRST_CONTENT.
*/
LWS_VISIBLE LWS_EXTERN void
lws_tokenize_init(struct lws_tokenize *ts, const char *start, int flags);
/**
* lws_tokenize() - breaks down a string into tokens and delimiters in-place
*
* \param ts: the lws_tokenize struct with information and state on what to do
*
* The \p ts struct should have its start, len and flags members initialized to
* reflect the string to be tokenized and any options.
*
* Then `lws_tokenize()` may be called repeatedly on the struct, returning one
* of `lws_tokenize_elem` each time, and with the struct's `token` and
* `token_len` members set to describe the content of the delimiter or token
* payload each time.
*
* There are no allocations during the process.
*
* returns lws_tokenize_elem that was identified (LWS_TOKZE_ENDED means reached
* the end of the string).
*/
LWS_VISIBLE LWS_EXTERN lws_tokenize_elem
lws_tokenize(struct lws_tokenize *ts);
/**
* lws_tokenize_cstr() - copy token string to NUL-terminated buffer
*
* \param ts: pointer to lws_tokenize struct to operate on
* \param str: destination buffer
* \pparam max: bytes in destination buffer
*
* returns 0 if OK or nonzero if the string + NUL won't fit.
*/
LWS_VISIBLE LWS_EXTERN int
lws_tokenize_cstr(struct lws_tokenize *ts, char *str, int max);

View file

@ -2358,36 +2358,60 @@ __lws_rx_flow_control(struct lws *wsi)
return 0;
}
static const unsigned char e0f4[] = {
0xa0 | ((2 - 1) << 2) | 1, /* e0 */
0x80 | ((4 - 1) << 2) | 1, /* e1 */
0x80 | ((4 - 1) << 2) | 1, /* e2 */
0x80 | ((4 - 1) << 2) | 1, /* e3 */
0x80 | ((4 - 1) << 2) | 1, /* e4 */
0x80 | ((4 - 1) << 2) | 1, /* e5 */
0x80 | ((4 - 1) << 2) | 1, /* e6 */
0x80 | ((4 - 1) << 2) | 1, /* e7 */
0x80 | ((4 - 1) << 2) | 1, /* e8 */
0x80 | ((4 - 1) << 2) | 1, /* e9 */
0x80 | ((4 - 1) << 2) | 1, /* ea */
0x80 | ((4 - 1) << 2) | 1, /* eb */
0x80 | ((4 - 1) << 2) | 1, /* ec */
0x80 | ((2 - 1) << 2) | 1, /* ed */
0x80 | ((4 - 1) << 2) | 1, /* ee */
0x80 | ((4 - 1) << 2) | 1, /* ef */
0x90 | ((3 - 1) << 2) | 2, /* f0 */
0x80 | ((4 - 1) << 2) | 2, /* f1 */
0x80 | ((4 - 1) << 2) | 2, /* f2 */
0x80 | ((4 - 1) << 2) | 2, /* f3 */
0x80 | ((1 - 1) << 2) | 2, /* f4 */
0, /* s0 */
0x80 | ((4 - 1) << 2) | 0, /* s2 */
0x80 | ((4 - 1) << 2) | 1, /* s3 */
};
LWS_EXTERN int
lws_check_byte_utf8(unsigned char state, unsigned char c)
{
unsigned char s = state;
if (!s) {
if (c >= 0x80) {
if (c < 0xc2 || c > 0xf4)
return -1;
if (c < 0xe0)
return 0x80 | ((4 - 1) << 2);
else
return e0f4[c - 0xe0];
}
return s;
}
if (c < (s & 0xf0) || c >= (s & 0xf0) + 0x10 + ((s << 2) & 0x30))
return -1;
return e0f4[21 + (s & 3)];
}
LWS_EXTERN int
lws_check_utf8(unsigned char *state, unsigned char *buf, size_t len)
{
static const unsigned char e0f4[] = {
0xa0 | ((2 - 1) << 2) | 1, /* e0 */
0x80 | ((4 - 1) << 2) | 1, /* e1 */
0x80 | ((4 - 1) << 2) | 1, /* e2 */
0x80 | ((4 - 1) << 2) | 1, /* e3 */
0x80 | ((4 - 1) << 2) | 1, /* e4 */
0x80 | ((4 - 1) << 2) | 1, /* e5 */
0x80 | ((4 - 1) << 2) | 1, /* e6 */
0x80 | ((4 - 1) << 2) | 1, /* e7 */
0x80 | ((4 - 1) << 2) | 1, /* e8 */
0x80 | ((4 - 1) << 2) | 1, /* e9 */
0x80 | ((4 - 1) << 2) | 1, /* ea */
0x80 | ((4 - 1) << 2) | 1, /* eb */
0x80 | ((4 - 1) << 2) | 1, /* ec */
0x80 | ((2 - 1) << 2) | 1, /* ed */
0x80 | ((4 - 1) << 2) | 1, /* ee */
0x80 | ((4 - 1) << 2) | 1, /* ef */
0x90 | ((3 - 1) << 2) | 2, /* f0 */
0x80 | ((4 - 1) << 2) | 2, /* f1 */
0x80 | ((4 - 1) << 2) | 2, /* f2 */
0x80 | ((4 - 1) << 2) | 2, /* f3 */
0x80 | ((1 - 1) << 2) | 2, /* f4 */
0, /* s0 */
0x80 | ((4 - 1) << 2) | 0, /* s2 */
0x80 | ((4 - 1) << 2) | 1, /* s3 */
};
unsigned char s = *state;
while (len--) {
@ -2985,6 +3009,245 @@ lws_strncpy(char *dest, const char *src, size_t size)
return dest;
}
typedef enum {
LWS_TOKZS_LEADING_WHITESPACE,
LWS_TOKZS_QUOTED_STRING,
LWS_TOKZS_TOKEN,
LWS_TOKZS_TOKEN_POST_TERMINAL
} lws_tokenize_state;
int
lws_tokenize(struct lws_tokenize *ts)
{
const char *rfc7230_delims = "(),/:;<=>?@[\\]{}";
lws_tokenize_state state = LWS_TOKZS_LEADING_WHITESPACE;
char c, num = -1, flo = 0;
int utf8 = 0;
ts->token = NULL;
ts->token_len = 0;
while (ts->len) {
c = *ts->start++;
ts->len--;
utf8 = lws_check_byte_utf8((unsigned char)utf8, c);
if (utf8 < 0)
return LWS_TOKZE_ERR_BROKEN_UTF8;
lwsl_debug("%s: %c (%d) %d\n", __func__, c, state, (int)ts->len);
if (!c)
break;
/* whitespace */
if (c == ' ' || c == '\t' || c == '\n' || c == '\r' ||
c == '\f') {
switch (state) {
case LWS_TOKZS_LEADING_WHITESPACE:
case LWS_TOKZS_TOKEN_POST_TERMINAL:
continue;
case LWS_TOKZS_QUOTED_STRING:
ts->token_len++;
continue;
case LWS_TOKZS_TOKEN:
/* we want to scan forward to look for = */
state = LWS_TOKZS_TOKEN_POST_TERMINAL;
continue;
}
}
/* quoted string */
if (c == '\"') {
if (state == LWS_TOKZS_QUOTED_STRING)
return LWS_TOKZE_QUOTED_STRING;
/* starting a quoted string */
if (ts->flags & LWS_TOKENIZE_F_COMMA_SEP_LIST) {
if (ts->delim == LWSTZ_DT_NEED_DELIM)
return LWS_TOKZE_ERR_COMMA_LIST;
ts->delim = LWSTZ_DT_NEED_DELIM;
}
state = LWS_TOKZS_QUOTED_STRING;
ts->token = ts->start;
ts->token_len = 0;
continue;
}
/* token= aggregation */
if (c == '=' && (state == LWS_TOKZS_TOKEN_POST_TERMINAL ||
state == LWS_TOKZS_TOKEN)) {
if (num == 1)
return LWS_TOKZE_ERR_NUM_ON_LHS;
/* swallow the = */
return LWS_TOKZE_TOKEN_NAME_EQUALS;
}
/* optional token: aggregation */
if ((ts->flags & LWS_TOKENIZE_F_AGG_COLON) && c == ':' &&
(state == LWS_TOKZS_TOKEN_POST_TERMINAL ||
state == LWS_TOKZS_TOKEN))
/* swallow the : */
return LWS_TOKZE_TOKEN_NAME_COLON;
/* aggregate . in a number as a float */
if (c == '.' && state == LWS_TOKZS_TOKEN && num == 1) {
if (flo)
return LWS_TOKZE_ERR_MALFORMED_FLOAT;
flo = 1;
ts->token_len++;
continue;
}
/*
* Delimiter... by default anything that:
*
* - isn't matched earlier, or
* - is [A-Z, a-z, 0-9, _], and
* - is not a partial utf8 char
*
* is a "delimiter", it marks the end of a token and is itself
* reported as a single LWS_TOKZE_DELIMITER each time.
*
* However with LWS_TOKENIZE_F_RFC7230_DELIMS flag, tokens may
* contain any noncontrol character that isn't defined in
* rfc7230_delims, and only characters listed there are treated
* as delimiters.
*/
if (!utf8 &&
((ts->flags & LWS_TOKENIZE_F_RFC7230_DELIMS &&
strchr(rfc7230_delims, c) && c > 32) ||
((!(ts->flags & LWS_TOKENIZE_F_RFC7230_DELIMS) &&
(c < '0' || c > '9') && (c < 'A' || c > 'Z') &&
(c < 'a' || c > 'z') && c != '_') && !(c == '-' &&
(ts->flags & LWS_TOKENIZE_F_MINUS_NONTERM))) ||
(c == '-' && !(ts->flags & LWS_TOKENIZE_F_MINUS_NONTERM))
)) {
switch (state) {
case LWS_TOKZS_LEADING_WHITESPACE:
if (ts->flags & LWS_TOKENIZE_F_COMMA_SEP_LIST) {
if (c != ',' ||
ts->delim != LWSTZ_DT_NEED_DELIM)
return LWS_TOKZE_ERR_COMMA_LIST;
ts->delim = LWSTZ_DT_NEED_NEXT_CONTENT;
}
ts->token = ts->start - 1;
ts->token_len = 1;
return LWS_TOKZE_DELIMITER;
case LWS_TOKZS_QUOTED_STRING:
ts->token_len++;
continue;
case LWS_TOKZS_TOKEN_POST_TERMINAL:
case LWS_TOKZS_TOKEN:
/* report the delimiter next time */
ts->start--;
ts->len++;
goto token_or_numeric;
}
}
/* anything that's not whitespace or delimiter is payload */
switch (state) {
case LWS_TOKZS_LEADING_WHITESPACE:
if (ts->flags & LWS_TOKENIZE_F_COMMA_SEP_LIST) {
if (ts->delim == LWSTZ_DT_NEED_DELIM)
return LWS_TOKZE_ERR_COMMA_LIST;
ts->delim = LWSTZ_DT_NEED_DELIM;
}
state = LWS_TOKZS_TOKEN;
ts->token = ts->start - 1;
ts->token_len = 1;
if (c < '0' || c > '9')
num = 0;
else
if (num < 0)
num = 1;
continue;
case LWS_TOKZS_QUOTED_STRING:
case LWS_TOKZS_TOKEN:
if (c < '0' || c > '9')
num = 0;
else
if (num < 0)
num = 1;
ts->token_len++;
continue;
case LWS_TOKZS_TOKEN_POST_TERMINAL:
/* report the new token next time */
ts->start--;
ts->len++;
goto token_or_numeric;
}
}
/* we ran out of content */
if (utf8) /* ended partway through a multibyte char */
return LWS_TOKZE_ERR_BROKEN_UTF8;
if (state == LWS_TOKZS_QUOTED_STRING)
return LWS_TOKZE_ERR_UNTERM_STRING;
if (state != LWS_TOKZS_TOKEN_POST_TERMINAL &&
state != LWS_TOKZS_TOKEN) {
if ((ts->flags & LWS_TOKENIZE_F_COMMA_SEP_LIST) &&
ts->delim == LWSTZ_DT_NEED_NEXT_CONTENT)
return LWS_TOKZE_ERR_COMMA_LIST;
return LWS_TOKZE_ENDED;
}
/* report the pending token */
token_or_numeric:
if (num != 1)
return LWS_TOKZE_TOKEN;
if (flo)
return LWS_TOKZE_FLOAT;
return LWS_TOKZE_INTEGER;
}
LWS_VISIBLE LWS_EXTERN int
lws_tokenize_cstr(struct lws_tokenize *ts, char *str, int max)
{
if (ts->token_len + 1 >= max)
return 1;
memcpy(str, ts->token, ts->token_len);
str[ts->token_len] = '\0';
return 0;
}
LWS_VISIBLE LWS_EXTERN void
lws_tokenize_init(struct lws_tokenize *ts, const char *start, int flags)
{
ts->start = start;
ts->len = 0x7fffffff;
ts->flags = flags;
ts->delim = LWSTZ_DT_NEED_FIRST_CONTENT;
}
#if LWS_MAX_SMP > 1
void

View file

@ -1449,6 +1449,8 @@ lws_plat_inet_ntop(int af, const void *src, char *dst, int cnt);
LWS_EXTERN int LWS_WARN_UNUSED_RESULT
lws_plat_inet_pton(int af, const char *src, void *dst);
LWS_EXTERN int
lws_check_byte_utf8(unsigned char state, unsigned char c);
LWS_EXTERN int LWS_WARN_UNUSED_RESULT
lws_check_utf8(unsigned char *state, unsigned char *buf, size_t len);
LWS_EXTERN int alloc_file(struct lws_context *context, const char *filename, uint8_t **buf,

View file

@ -501,6 +501,10 @@ LWS_VISIBLE int lws_hdr_copy(struct lws *wsi, char *dst, int len,
int toklen = lws_hdr_total_length(wsi, h);
int n;
*dst = '\0';
if (!toklen)
return 0;
if (toklen >= len)
return -1;

View file

@ -0,0 +1,73 @@
cmake_minimum_required(VERSION 2.8)
include(CheckCSourceCompiles)
set(SAMP lws-api-test-lws_tokenize)
set(SRCS main.c)
# If we are being built as part of lws, confirm current build config supports
# reqconfig, else skip building ourselves.
#
# If we are being built externally, confirm installed lws was configured to
# support reqconfig, else error out with a helpful message about the problem.
#
MACRO(require_lws_config reqconfig _val result)
if (DEFINED ${reqconfig})
if (${reqconfig})
set (rq 1)
else()
set (rq 0)
endif()
else()
set(rq 0)
endif()
if (${_val} EQUAL ${rq})
set(SAME 1)
else()
set(SAME 0)
endif()
if (LWS_WITH_MINIMAL_EXAMPLES AND NOT ${SAME})
if (${_val})
message("${SAMP}: skipping as lws being built without ${reqconfig}")
else()
message("${SAMP}: skipping as lws built with ${reqconfig}")
endif()
set(${result} 0)
else()
if (LWS_WITH_MINIMAL_EXAMPLES)
set(MET ${SAME})
else()
CHECK_C_SOURCE_COMPILES("#include <libwebsockets.h>\nint main(void) {\n#if defined(${reqconfig})\n return 0;\n#else\n fail;\n#endif\n return 0;\n}\n" HAS_${reqconfig})
if (NOT DEFINED HAS_${reqconfig} OR NOT HAS_${reqconfig})
set(HAS_${reqconfig} 0)
else()
set(HAS_${reqconfig} 1)
endif()
if ((HAS_${reqconfig} AND ${_val}) OR (NOT HAS_${reqconfig} AND NOT ${_val}))
set(MET 1)
else()
set(MET 0)
endif()
endif()
if (NOT MET)
if (${_val})
message(FATAL_ERROR "This project requires lws must have been configured with ${reqconfig}")
else()
message(FATAL_ERROR "Lws configuration of ${reqconfig} is incompatible with this project")
endif()
endif()
endif()
ENDMACRO()
add_executable(${SAMP} ${SRCS})
if (websockets_shared)
target_link_libraries(${SAMP} websockets_shared)
add_dependencies(${SAMP} websockets_shared)
else()
target_link_libraries(${SAMP} websockets)
endif()

View file

@ -0,0 +1,37 @@
# lws api test lws_tokenize
Performs selftests for lws_tokenize
## build
```
$ cmake . && make
```
## usage
Commandline option|Meaning
---|---
-d <loglevel>|Debug verbosity in decimal, eg, -d15
-s "input string"|String to tokenize
-f 15|LWS_TOKENIZE_F_ flag values to apply to processing of -s
```
$ ./lws-api-test-lws_tokenize
[2018/10/09 09:14:17:4834] USER: LWS API selftest: lws_tokenize
[2018/10/09 09:14:17:4835] USER: Completed: PASS: 6, FAIL: 0
```
If the `-s string` option is given, the string is tokenized on stdout in
the format used to produce the tests in the sources
```
$ ./lws-api-test-lws_tokenize -s "hello: 1234,256"
[2018/10/09 09:14:17:4834] USER: LWS API selftest: lws_tokenize
{ LWS_TOKZE_TOKEN_NAME_COLON, "hello", 5 }
{ LWS_TOKZE_INTEGER, "1234", 4 }
{ LWS_TOKZE_DELIMITER, ",", 1 }
{ LWS_TOKZE_INTEGER, "256", 3 }
{ LWS_TOKZE_ENDED, "", 0 }
```

View file

@ -0,0 +1,335 @@
/*
* lws-api-test-lws_tokenize
*
* Copyright (C) 2018 Andy Green <andy@warmcat.com>
*
* This file is made available under the Creative Commons CC0 1.0
* Universal Public Domain Dedication.
*
* This demonstrates the most minimal http server you can make with lws.
*
* To keep it simple, it serves stuff from the subdirectory
* "./mount-origin" of the directory it was started in.
* You can change that by changing mount.origin below.
*/
#include <libwebsockets.h>
#include <string.h>
struct expected {
lws_tokenize_elem e;
const char *value;
int len;
};
struct tests {
const char *string;
struct expected *exp;
int count;
int flags;
};
struct expected expected1[] = {
{ LWS_TOKZE_TOKEN, "protocol-1", 10 },
{ LWS_TOKZE_DELIMITER, ",", 1},
{ LWS_TOKZE_TOKEN, "protocol_2", 10 },
{ LWS_TOKZE_DELIMITER, ",", 1},
{ LWS_TOKZE_TOKEN, "protocol3", 9 },
{ LWS_TOKZE_ENDED, NULL, 0 },
},
expected2[] = {
{ LWS_TOKZE_TOKEN_NAME_COLON, "Accept-Language", 15 },
{ LWS_TOKZE_TOKEN, "fr-CH", 5 },
{ LWS_TOKZE_DELIMITER, ",", 1 },
{ LWS_TOKZE_TOKEN, "fr", 2 },
{ LWS_TOKZE_DELIMITER, ";", 1},
{ LWS_TOKZE_TOKEN_NAME_EQUALS, "q", 1 },
{ LWS_TOKZE_FLOAT, "0.9", 3 },
{ LWS_TOKZE_DELIMITER, ",", 1 },
{ LWS_TOKZE_TOKEN, "en", 2 },
{ LWS_TOKZE_DELIMITER, ";", 1},
{ LWS_TOKZE_TOKEN_NAME_EQUALS, "q", 1 },
{ LWS_TOKZE_FLOAT, "0.8", 3 },
{ LWS_TOKZE_DELIMITER, ",", 1 },
{ LWS_TOKZE_TOKEN, "de", 2 },
{ LWS_TOKZE_DELIMITER, ";", 1},
{ LWS_TOKZE_TOKEN_NAME_EQUALS, "q", 1 },
{ LWS_TOKZE_FLOAT, "0.7", 3 },
{ LWS_TOKZE_DELIMITER, ",", 1 },
{ LWS_TOKZE_DELIMITER, "*", 1 },
{ LWS_TOKZE_DELIMITER, ";", 1 },
{ LWS_TOKZE_TOKEN_NAME_EQUALS, "q", 1 },
{ LWS_TOKZE_FLOAT, "0.5", 3 },
{ LWS_TOKZE_ENDED, NULL, 0 },
},
expected3[] = {
{ LWS_TOKZE_TOKEN_NAME_EQUALS, "quoted", 6 },
{ LWS_TOKZE_QUOTED_STRING, "things:", 7 },
{ LWS_TOKZE_DELIMITER, ",", 1 },
{ LWS_TOKZE_INTEGER, "1234", 4 },
{ LWS_TOKZE_ENDED, NULL, 0 },
},
expected4[] = {
{ LWS_TOKZE_ERR_COMMA_LIST, ",", 1 },
},
expected5[] = {
{ LWS_TOKZE_TOKEN, "brokenlist2", 11 },
{ LWS_TOKZE_DELIMITER, ",", 1 },
{ LWS_TOKZE_ERR_COMMA_LIST, ",", 1 },
},
expected6[] = {
{ LWS_TOKZE_TOKEN, "brokenlist3", 11 },
{ LWS_TOKZE_DELIMITER, ",", 1 },
{ LWS_TOKZE_ERR_COMMA_LIST, ",", 1 },
},
expected7[] = {
{ LWS_TOKZE_TOKEN, "fr", 2 },
{ LWS_TOKZE_DELIMITER, "-", 1 },
{ LWS_TOKZE_TOKEN, "CH", 2 },
{ LWS_TOKZE_DELIMITER, ",", 1 },
{ LWS_TOKZE_TOKEN, "fr", 2 },
{ LWS_TOKZE_DELIMITER, ";", 1 },
{ LWS_TOKZE_TOKEN_NAME_EQUALS, "q", 1 },
{ LWS_TOKZE_FLOAT, "0.9", 3 },
{ LWS_TOKZE_DELIMITER, ",", 1 },
{ LWS_TOKZE_TOKEN, "en", 2 },
{ LWS_TOKZE_DELIMITER, ";", 1 },
{ LWS_TOKZE_TOKEN_NAME_EQUALS, "q", 1 },
{ LWS_TOKZE_FLOAT, "0.8", 3 },
{ LWS_TOKZE_DELIMITER, ",", 1 },
{ LWS_TOKZE_TOKEN, "de", 2 },
{ LWS_TOKZE_DELIMITER, ";", 1 },
{ LWS_TOKZE_TOKEN_NAME_EQUALS, "q", 1 },
{ LWS_TOKZE_FLOAT, "0.7", 3 },
{ LWS_TOKZE_DELIMITER, ",", 1 },
{ LWS_TOKZE_TOKEN, "*", 1 },
{ LWS_TOKZE_DELIMITER, ";", 1 },
{ LWS_TOKZE_TOKEN_NAME_EQUALS, "q", 1 },
{ LWS_TOKZE_FLOAT, "0.5", 3 },
{ LWS_TOKZE_ENDED, "", 0 },
},
expected8[] = {
{ LWS_TOKZE_TOKEN, "Οὐχὶ", 10 },
{ LWS_TOKZE_TOKEN, "ταὐτὰ", 12 },
{ LWS_TOKZE_TOKEN, "παρίσταταί", 22 },
{ LWS_TOKZE_TOKEN, "μοι", 6 },
{ LWS_TOKZE_TOKEN, "γιγνώσκειν", 21 },
{ LWS_TOKZE_DELIMITER, ",", 1 },
{ LWS_TOKZE_TOKEN, "", 3 },
{ LWS_TOKZE_TOKEN, "ἄνδρες", 13 },
{ LWS_TOKZE_TOKEN, "᾿Αθηναῖοι", 20 },
{ LWS_TOKZE_DELIMITER, ",", 1 },
{ LWS_TOKZE_TOKEN, "greek", 5 },
{ LWS_TOKZE_ENDED, "", 0 },
},
expected9[] = {
/*
* because the tokenizer scans ahead for = aggregation,
* it finds the broken utf8 before reporting the token
*/
{ LWS_TOKZE_ERR_BROKEN_UTF8, "", 0 },
},
expected10[] = {
{ LWS_TOKZE_TOKEN, "badutf8-2", 9 },
{ LWS_TOKZE_TOKEN, "", 3 },
{ LWS_TOKZE_DELIMITER, ",", 1 },
{ LWS_TOKZE_ERR_BROKEN_UTF8, "", 0 },
}
;
struct tests tests[] = {
{
" protocol-1, protocol_2\t,\tprotocol3\n",
expected1, LWS_ARRAY_SIZE(expected1),
LWS_TOKENIZE_F_MINUS_NONTERM | LWS_TOKENIZE_F_AGG_COLON
}, {
"Accept-Language: fr-CH, fr;q=0.9, en;q=0.8, de;q=0.7, *;q=0.5",
expected2, LWS_ARRAY_SIZE(expected2),
LWS_TOKENIZE_F_MINUS_NONTERM | LWS_TOKENIZE_F_AGG_COLON
}, {
"quoted = \"things:\", 1234",
expected3, LWS_ARRAY_SIZE(expected3),
LWS_TOKENIZE_F_MINUS_NONTERM | LWS_TOKENIZE_F_AGG_COLON
}, {
", brokenlist1",
expected4, LWS_ARRAY_SIZE(expected4),
LWS_TOKENIZE_F_COMMA_SEP_LIST
}, {
"brokenlist2,,",
expected5, LWS_ARRAY_SIZE(expected5),
LWS_TOKENIZE_F_COMMA_SEP_LIST
}, {
"brokenlist3,",
expected6, LWS_ARRAY_SIZE(expected6),
LWS_TOKENIZE_F_COMMA_SEP_LIST
}, {
"fr-CH, fr;q=0.9, en;q=0.8, de;q=0.7, *;q=0.5",
expected7, LWS_ARRAY_SIZE(expected7),
LWS_TOKENIZE_F_RFC7230_DELIMS
},
{
" Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, greek",
expected8, LWS_ARRAY_SIZE(expected8),
LWS_TOKENIZE_F_RFC7230_DELIMS
},
{
"badutf8-1 \x80...",
expected9, LWS_ARRAY_SIZE(expected9),
LWS_TOKENIZE_F_MINUS_NONTERM | LWS_TOKENIZE_F_RFC7230_DELIMS
},
{
"badutf8-2 \xed\x9f\xbf,\x80...",
expected10, LWS_ARRAY_SIZE(expected10),
LWS_TOKENIZE_F_MINUS_NONTERM | LWS_TOKENIZE_F_RFC7230_DELIMS
},
};
/*
* add LWS_TOKZE_ERRS to the element index (which may be negative by that
* amount) to index this array
*/
static const char *element_names[] = {
"LWS_TOKZE_ERR_BROKEN_UTF8",
"LWS_TOKZE_ERR_UNTERM_STRING",
"LWS_TOKZE_ERR_MALFORMED_FLOAT",
"LWS_TOKZE_ERR_NUM_ON_LHS",
"LWS_TOKZE_ERR_COMMA_LIST",
"LWS_TOKZE_ENDED",
"LWS_TOKZE_DELIMITER",
"LWS_TOKZE_TOKEN",
"LWS_TOKZE_INTEGER",
"LWS_TOKZE_FLOAT",
"LWS_TOKZE_TOKEN_NAME_EQUALS",
"LWS_TOKZE_TOKEN_NAME_COLON",
"LWS_TOKZE_QUOTED_STRING",
};
int main(int argc, const char **argv)
{
struct lws_tokenize ts;
lws_tokenize_elem e;
const char *p;
int n, logs = LLL_USER | LLL_ERR | LLL_WARN | LLL_NOTICE
/* for LLL_ verbosity above NOTICE to be built into lws,
* lws must have been configured and built with
* -DCMAKE_BUILD_TYPE=DEBUG instead of =RELEASE */
/* | LLL_INFO */ /* | LLL_PARSER */ /* | LLL_HEADER */
/* | LLL_EXT */ /* | LLL_CLIENT */ /* | LLL_LATENCY */
/* | LLL_DEBUG */;
int fail = 0, ok = 0, flags = 0;
if ((p = lws_cmdline_option(argc, argv, "-d")))
logs = atoi(p);
lws_set_log_level(logs, NULL);
lwsl_user("LWS API selftest: lws_tokenize\n");
if ((p = lws_cmdline_option(argc, argv, "-f")))
flags = atoi(p);
p = lws_cmdline_option(argc, argv, "-s");
for (n = 0; n < (int)LWS_ARRAY_SIZE(tests); n++) {
int m = 0, in_fail = fail;
struct expected *exp = tests[n].exp;
ts.start = tests[n].string;
ts.len = strlen(ts.start);
ts.flags = tests[n].flags;
do {
e = lws_tokenize(&ts);
lwsl_info("{ %s, \"%.*s\", %d }\n",
element_names[e + LWS_TOKZE_ERRS],
(int)ts.token_len, ts.token,
(int)ts.token_len);
if (m == (int)tests[n].count) {
lwsl_notice("fail: expected end earlier\n");
fail++;
break;
}
if (e != exp->e) {
lwsl_notice("fail... tok %s vs expected %s\n",
element_names[e + LWS_TOKZE_ERRS],
element_names[exp->e + LWS_TOKZE_ERRS]);
fail++;
break;
}
if (e > 0 &&
(ts.token_len != exp->len ||
memcmp(exp->value, ts.token, exp->len))) {
lwsl_notice("fail token mismatch\n");
fail++;
break;
}
m++;
exp++;
} while (e > 0);
if (fail == in_fail)
ok++;
}
if (p) {
ts.start = p;
ts.len = strlen(p);
ts.flags = flags;
printf("\t{\n\t\t\"%s\",\n"
"\t\texpected%d, LWS_ARRAY_SIZE(expected%d),\n\t\t",
p, (int)LWS_ARRAY_SIZE(tests) + 1,
(int)LWS_ARRAY_SIZE(tests) + 1);
if (!flags)
printf("0\n\t},\n");
else {
if (flags & LWS_TOKENIZE_F_MINUS_NONTERM)
printf("LWS_TOKENIZE_F_MINUS_NONTERM");
if (flags & LWS_TOKENIZE_F_AGG_COLON) {
if (flags & 1)
printf(" | ");
printf("LWS_TOKENIZE_F_AGG_COLON");
}
if (flags & LWS_TOKENIZE_F_COMMA_SEP_LIST) {
if (flags & 3)
printf(" | ");
printf("LWS_TOKENIZE_F_COMMA_SEP_LIST");
}
if (flags & LWS_TOKENIZE_F_RFC7230_DELIMS) {
if (flags & 7)
printf(" | ");
printf("LWS_TOKENIZE_F_RFC7230_DELIMS");
}
printf("\n\t},\n");
}
printf("\texpected%d[] = {\n", (int)LWS_ARRAY_SIZE(tests) + 1);
do {
e = lws_tokenize(&ts);
printf("\t\t{ %s, \"%.*s\", %d },\n",
element_names[e + LWS_TOKZE_ERRS],
(int)ts.token_len,
ts.token, (int)ts.token_len);
} while (e > 0);
printf("\t}\n");
}
lwsl_user("Completed: PASS: %d, FAIL: %d\n", ok, fail);
return !(ok && !fail);
}

View file

@ -0,0 +1,24 @@
#!/bin/bash
#
# $1: path to minimal example binaries...
# if lws is built with -DLWS_WITH_MINIMAL_EXAMPLES=1
# that will be ./bin from your build dir
#
# $2: path for logs and results. The results will go
# in a subdir named after the directory this script
# is in
#
# $3: offset for test index count
#
# $4: total test count
#
# $5: path to ./minimal-examples dir in lws
#
# Test return code 0: OK, 254: timed out, other: error indication
. $5/selftests-library.sh
COUNT_TESTS=1
dotest $1 $2 apiselftest
exit $FAILS