1
0
Fork 0
mirror of https://github.com/warmcat/libwebsockets.git synced 2025-03-09 00:00:04 +01:00

http: parser: straighten out %00 legality

https://github.com/warmcat/libwebsockets/issues/2262

This adds a README explaining what can be expected if your URLs contain
%00, and adds a safe helper for urlargs-by-name that is length-based.

Contains fix for extra NUL on some headers

https://github.com/warmcat/libwebsockets/issues/2267
This commit is contained in:
Andy Green 2021-04-08 15:26:23 +01:00
parent 354b29c747
commit 24abd699f6
7 changed files with 162 additions and 26 deletions

View file

@ -0,0 +1,33 @@
# Notes on http parser corner cases
## Dealing with %00
%00 is considered illegal in
- the path part of the URL. A lot of user code handles it as a NUL terminated string,
even though the header get apis are based around length. So it is disallowed to
avoid ambiguity.
- the name part of a urlarg, like ?name=value
%00 is valid in
- the value part of a urlarg, like ?name=value
When the parser sees %00 where it is not allowed, it simply drops the connection.
## Note on proper urlarg handling
urlargs are allowed to contain non-NUL terminated binary. So it is important to
use the length-based urlarg apis
- `lws_hdr_copy_fragment()`
- `lws_get_urlarg_by_name_safe()`
The non-length based urlarg api
- `lws_get_urlarg_by_name()`
...is soft-deprecated, it's still allowed but it will be fooled by the first %00
seen in the argument into truncating the argument. Use `lws_get_urlarg_by_name_safe()`
instead.

View file

@ -494,8 +494,35 @@ LWS_VISIBLE LWS_EXTERN int
lws_hdr_custom_copy(struct lws *wsi, char *dst, int len, const char *name,
int nlen);
/**
* lws_get_urlarg_by_name_safe() - get copy and return length of y for x=y urlargs
*
* \param wsi: the connection to check
* \param name: the arg name, like "token" or "token="
* \param buf: the buffer to receive the urlarg (including the name= part)
* \param len: the length of the buffer to receive the urlarg
*
* Returns -1 if not present, else the length of y in the urlarg name=y. If
* zero or greater, then buf contains a copy of the string y. Any = after the
* name match is trimmed off if the name does not end with = itself.
*
* This returns the explicit length and so can deal with binary blobs that are
* percent-encoded. It also makes sure buf has a NUL just after the valid
* length so it can work with NUL-based apis if you don't care about truncation.
*
* buf may have been written even when -1 is returned indicating no match.
*
* Use this in place of lws_get_urlarg_by_name() that does not return an
* explicit length.
*
* Use lws_get_urlarg_by_name_safe() instead of this, which returns the length.
*/
LWS_VISIBLE LWS_EXTERN int
lws_get_urlarg_by_name_safe(struct lws *wsi, const char *name, char *buf, int len);
/**
* lws_get_urlarg_by_name() - return pointer to arg value if present
*
* \param wsi: the connection to check
* \param name: the arg name, like "token="
* \param buf: the buffer to receive the urlarg (including the name= part)
@ -503,9 +530,16 @@ lws_hdr_custom_copy(struct lws *wsi, char *dst, int len, const char *name,
*
* Returns NULL if not found or a pointer inside buf to just after the
* name= part.
*
* This assumed the argument can be represented with a NUL-terminated string.
* It can't correctly deal with binary values encoded with %XX, eg. %00 will
* be understood to terminate the string.
*
* Use lws_get_urlarg_by_name_safe() instead of this, which returns the length.
*/
LWS_VISIBLE LWS_EXTERN const char *
lws_get_urlarg_by_name(struct lws *wsi, const char *name, char *buf, int len);
lws_get_urlarg_by_name(struct lws *wsi, const char *name, char *buf, int len)
/* LWS_WARN_DEPRECATED */;
///@}
/*! \defgroup HTTP-headers-create HTTP headers: create

View file

@ -636,22 +636,49 @@ lws_parse_uri(char *p, const char **prot, const char **ads, int *port,
/* ... */
int
lws_get_urlarg_by_name_safe(struct lws *wsi, const char *name, char *buf, int len)
{
int n = 0, fraglen, sl = (int)strlen(name);
do {
fraglen = lws_hdr_copy_fragment(wsi, buf, len,
WSI_TOKEN_HTTP_URI_ARGS, n);
if (fraglen < 0)
break;
if (fraglen + 1 < len &&
fraglen >= sl &&
!strncmp(buf, name, (size_t)sl)) {
/*
* If he left off the trailing =, trim it from the
* result
*/
if (name[sl - 1] != '=' &&
sl < fraglen &&
buf[sl] == '=')
sl++;
memmove(buf, buf + sl, (size_t)(fraglen - sl));
buf[fraglen - sl] = '\0';
return fraglen - sl;
}
n++;
} while (1);
return -1;
}
const char *
lws_get_urlarg_by_name(struct lws *wsi, const char *name, char *buf, int len)
{
int n = 0;
size_t sl = strlen(name);
int n = lws_get_urlarg_by_name_safe(wsi, name, buf, len);
while (lws_hdr_copy_fragment(wsi, buf, len,
WSI_TOKEN_HTTP_URI_ARGS, n) >= 0) {
if (!strncmp(buf, name, sl))
return buf + sl;
n++;
}
return NULL;
return n < 0 ? NULL : buf;
}

View file

@ -742,8 +742,7 @@ issue_char(struct lws *wsi, unsigned char c)
if (!wsi->http.ah->current_token_limit ||
frag_len < wsi->http.ah->current_token_limit) {
wsi->http.ah->data[wsi->http.ah->pos++] = (char)c;
if (c)
wsi->http.ah->frags[wsi->http.ah->nfrag].len++;
wsi->http.ah->frags[wsi->http.ah->nfrag].len++;
return 0;
}
@ -812,14 +811,29 @@ lws_parse_urldecode(struct lws *wsi, uint8_t *_c)
* leave /.dir or whatever alone
*/
if (!c && (!ah->frag_index[WSI_TOKEN_HTTP_URI_ARGS] ||
!ah->post_literal_equal)) {
/*
* Since user code is typically going to parse the path using
* NUL-terminated apis, it's too dangerous to allow NUL
* injection here.
*
* It's allowed in the urlargs, because the apis to access
* those only allow retreival with explicit length.
*/
lwsl_warn("%s: saw NUL outside of uri args\n", __func__);
return -1;
}
switch (ah->ups) {
case URIPS_IDLE:
if (!c)
return -1;
/* genuine delimiter */
if ((c == '&' || c == ';') && !enc) {
if (issue_char(wsi, '\0') < 0)
return -1;
/* don't account for it */
wsi->http.ah->frags[wsi->http.ah->nfrag].len--;
/* link to next fragment */
ah->frags[ah->nfrag].nfrag = (uint8_t)(ah->nfrag + 1);
ah->nfrag++;
@ -924,6 +938,9 @@ lws_parse_urldecode(struct lws *wsi, uint8_t *_c)
if (issue_char(wsi, '\0') < 0)
return -1;
/* don't account for it */
wsi->http.ah->frags[wsi->http.ah->nfrag].len--;
/* move to using WSI_TOKEN_HTTP_URI_ARGS */
ah->nfrag++;
if (ah->nfrag >= LWS_ARRAY_SIZE(ah->frags))
@ -1055,6 +1072,8 @@ lws_parse(struct lws *wsi, unsigned char *buf, int *len)
/* begin parsing HTTP version: */
if (issue_char(wsi, '\0') < 0)
return LPR_FAIL;
/* don't account for it */
wsi->http.ah->frags[wsi->http.ah->nfrag].len--;
ah->parser_state = WSI_TOKEN_HTTP;
goto start_fragment;
}
@ -1096,6 +1115,17 @@ check_eol:
return LPR_FAIL;
if (n > 0)
ah->parser_state = WSI_TOKEN_SKIPPING;
else {
/*
* Explicit zeroes are legal in URI ARGS.
* They can only exist as a safety terminator
* after the valid part of the token contents
* for other types.
*/
if (!c && ah->parser_state != WSI_TOKEN_HTTP_URI_ARGS)
/* don't account for safety terminator */
wsi->http.ah->frags[wsi->http.ah->nfrag].len--;
}
swallow:
/* per-protocol end of headers management */

View file

@ -74,6 +74,19 @@ callback_dynamic_http(struct lws *wsi, enum lws_callback_reasons reason,
lwsl_notice("%s: HTTP: connection %s, path %s\n", __func__,
(const char *)buf, pss->path);
/*
* Demonstrates how to retreive a urlarg x=value
*/
{
char value[100];
int z = lws_get_urlarg_by_name_safe(wsi, "x", value,
sizeof(value) - 1);
if (z >= 0)
lwsl_hexdump_notice(value, (size_t)z);
}
/*
* prepare and write http headers... with regards to content-
* length, there are three approaches:

View file

@ -29,7 +29,6 @@ callback_http(struct lws *wsi, enum lws_callback_reasons reason, void *user,
uint8_t buf[LWS_PRE + LWS_RECOMMENDED_MIN_HEADER_SPACE],
*start = &buf[LWS_PRE], *p = start,
*end = &buf[sizeof(buf) - 1];
const char *val;
int n;
switch (reason) {
@ -46,13 +45,13 @@ callback_http(struct lws *wsi, enum lws_callback_reasons reason, void *user,
/* we just dump the decoded things to the log */
for (n = 0; n < (int)LWS_ARRAY_SIZE(param_names); n++) {
val = lws_get_urlarg_by_name(wsi, param_names[n],
int rv = lws_get_urlarg_by_name_safe(wsi, param_names[n],
(char *)buf, sizeof(buf));
if (!val)
if (rv < 0)
lwsl_user("%s: undefined\n", param_names[n]);
else
lwsl_user("%s: (len %d) '%s'\n", param_names[n],
(int)strlen((const char *)buf),buf);
(int)rv, buf);
}
/*

View file

@ -224,12 +224,12 @@ callback_lws_mirror(struct lws *wsi, enum lws_callback_reasons reason,
* mirror instance name... defaults to "", but if URL includes
* "?mirror=xxx", will be "xxx"
*/
name[0] = '\0';
if (!lws_get_urlarg_by_name(wsi, "mirror", name,
sizeof(name) - 1))
if (lws_get_urlarg_by_name_safe(wsi, "mirror", name,
sizeof(name) - 1) < 0) {
lwsl_debug("get urlarg failed\n");
if (strchr(name, '='))
pn = strchr(name, '=') + 1;
name[0] = '\0';
}
//lwsl_notice("%s: mirror name '%s'\n", __func__, pn);