mirror of
https://github.com/warmcat/libwebsockets.git
synced 2025-03-09 00:00:04 +01:00
http: parser: straighten out %00 legality
https://github.com/warmcat/libwebsockets/issues/2262 This adds a README explaining what can be expected if your URLs contain %00, and adds a safe helper for urlargs-by-name that is length-based. Contains fix for extra NUL on some headers https://github.com/warmcat/libwebsockets/issues/2267
This commit is contained in:
parent
354b29c747
commit
24abd699f6
7 changed files with 162 additions and 26 deletions
33
READMEs/README.http_parser.md
Normal file
33
READMEs/README.http_parser.md
Normal file
|
@ -0,0 +1,33 @@
|
|||
# Notes on http parser corner cases
|
||||
|
||||
## Dealing with %00
|
||||
|
||||
%00 is considered illegal in
|
||||
|
||||
- the path part of the URL. A lot of user code handles it as a NUL terminated string,
|
||||
even though the header get apis are based around length. So it is disallowed to
|
||||
avoid ambiguity.
|
||||
|
||||
- the name part of a urlarg, like ?name=value
|
||||
|
||||
%00 is valid in
|
||||
|
||||
- the value part of a urlarg, like ?name=value
|
||||
|
||||
When the parser sees %00 where it is not allowed, it simply drops the connection.
|
||||
|
||||
## Note on proper urlarg handling
|
||||
|
||||
urlargs are allowed to contain non-NUL terminated binary. So it is important to
|
||||
use the length-based urlarg apis
|
||||
|
||||
- `lws_hdr_copy_fragment()`
|
||||
- `lws_get_urlarg_by_name_safe()`
|
||||
|
||||
The non-length based urlarg api
|
||||
|
||||
- `lws_get_urlarg_by_name()`
|
||||
|
||||
...is soft-deprecated, it's still allowed but it will be fooled by the first %00
|
||||
seen in the argument into truncating the argument. Use `lws_get_urlarg_by_name_safe()`
|
||||
instead.
|
|
@ -494,8 +494,35 @@ LWS_VISIBLE LWS_EXTERN int
|
|||
lws_hdr_custom_copy(struct lws *wsi, char *dst, int len, const char *name,
|
||||
int nlen);
|
||||
|
||||
/**
|
||||
* lws_get_urlarg_by_name_safe() - get copy and return length of y for x=y urlargs
|
||||
*
|
||||
* \param wsi: the connection to check
|
||||
* \param name: the arg name, like "token" or "token="
|
||||
* \param buf: the buffer to receive the urlarg (including the name= part)
|
||||
* \param len: the length of the buffer to receive the urlarg
|
||||
*
|
||||
* Returns -1 if not present, else the length of y in the urlarg name=y. If
|
||||
* zero or greater, then buf contains a copy of the string y. Any = after the
|
||||
* name match is trimmed off if the name does not end with = itself.
|
||||
*
|
||||
* This returns the explicit length and so can deal with binary blobs that are
|
||||
* percent-encoded. It also makes sure buf has a NUL just after the valid
|
||||
* length so it can work with NUL-based apis if you don't care about truncation.
|
||||
*
|
||||
* buf may have been written even when -1 is returned indicating no match.
|
||||
*
|
||||
* Use this in place of lws_get_urlarg_by_name() that does not return an
|
||||
* explicit length.
|
||||
*
|
||||
* Use lws_get_urlarg_by_name_safe() instead of this, which returns the length.
|
||||
*/
|
||||
LWS_VISIBLE LWS_EXTERN int
|
||||
lws_get_urlarg_by_name_safe(struct lws *wsi, const char *name, char *buf, int len);
|
||||
|
||||
/**
|
||||
* lws_get_urlarg_by_name() - return pointer to arg value if present
|
||||
*
|
||||
* \param wsi: the connection to check
|
||||
* \param name: the arg name, like "token="
|
||||
* \param buf: the buffer to receive the urlarg (including the name= part)
|
||||
|
@ -503,9 +530,16 @@ lws_hdr_custom_copy(struct lws *wsi, char *dst, int len, const char *name,
|
|||
*
|
||||
* Returns NULL if not found or a pointer inside buf to just after the
|
||||
* name= part.
|
||||
*
|
||||
* This assumed the argument can be represented with a NUL-terminated string.
|
||||
* It can't correctly deal with binary values encoded with %XX, eg. %00 will
|
||||
* be understood to terminate the string.
|
||||
*
|
||||
* Use lws_get_urlarg_by_name_safe() instead of this, which returns the length.
|
||||
*/
|
||||
LWS_VISIBLE LWS_EXTERN const char *
|
||||
lws_get_urlarg_by_name(struct lws *wsi, const char *name, char *buf, int len);
|
||||
lws_get_urlarg_by_name(struct lws *wsi, const char *name, char *buf, int len)
|
||||
/* LWS_WARN_DEPRECATED */;
|
||||
///@}
|
||||
|
||||
/*! \defgroup HTTP-headers-create HTTP headers: create
|
||||
|
|
|
@ -636,22 +636,49 @@ lws_parse_uri(char *p, const char **prot, const char **ads, int *port,
|
|||
|
||||
/* ... */
|
||||
|
||||
int
|
||||
lws_get_urlarg_by_name_safe(struct lws *wsi, const char *name, char *buf, int len)
|
||||
{
|
||||
int n = 0, fraglen, sl = (int)strlen(name);
|
||||
|
||||
do {
|
||||
fraglen = lws_hdr_copy_fragment(wsi, buf, len,
|
||||
WSI_TOKEN_HTTP_URI_ARGS, n);
|
||||
|
||||
if (fraglen < 0)
|
||||
break;
|
||||
|
||||
if (fraglen + 1 < len &&
|
||||
fraglen >= sl &&
|
||||
!strncmp(buf, name, (size_t)sl)) {
|
||||
/*
|
||||
* If he left off the trailing =, trim it from the
|
||||
* result
|
||||
*/
|
||||
|
||||
if (name[sl - 1] != '=' &&
|
||||
sl < fraglen &&
|
||||
buf[sl] == '=')
|
||||
sl++;
|
||||
|
||||
memmove(buf, buf + sl, (size_t)(fraglen - sl));
|
||||
buf[fraglen - sl] = '\0';
|
||||
|
||||
return fraglen - sl;
|
||||
}
|
||||
|
||||
n++;
|
||||
} while (1);
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
const char *
|
||||
lws_get_urlarg_by_name(struct lws *wsi, const char *name, char *buf, int len)
|
||||
{
|
||||
int n = 0;
|
||||
size_t sl = strlen(name);
|
||||
int n = lws_get_urlarg_by_name_safe(wsi, name, buf, len);
|
||||
|
||||
while (lws_hdr_copy_fragment(wsi, buf, len,
|
||||
WSI_TOKEN_HTTP_URI_ARGS, n) >= 0) {
|
||||
|
||||
if (!strncmp(buf, name, sl))
|
||||
return buf + sl;
|
||||
|
||||
n++;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
return n < 0 ? NULL : buf;
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -742,8 +742,7 @@ issue_char(struct lws *wsi, unsigned char c)
|
|||
if (!wsi->http.ah->current_token_limit ||
|
||||
frag_len < wsi->http.ah->current_token_limit) {
|
||||
wsi->http.ah->data[wsi->http.ah->pos++] = (char)c;
|
||||
if (c)
|
||||
wsi->http.ah->frags[wsi->http.ah->nfrag].len++;
|
||||
wsi->http.ah->frags[wsi->http.ah->nfrag].len++;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -812,14 +811,29 @@ lws_parse_urldecode(struct lws *wsi, uint8_t *_c)
|
|||
* leave /.dir or whatever alone
|
||||
*/
|
||||
|
||||
if (!c && (!ah->frag_index[WSI_TOKEN_HTTP_URI_ARGS] ||
|
||||
!ah->post_literal_equal)) {
|
||||
/*
|
||||
* Since user code is typically going to parse the path using
|
||||
* NUL-terminated apis, it's too dangerous to allow NUL
|
||||
* injection here.
|
||||
*
|
||||
* It's allowed in the urlargs, because the apis to access
|
||||
* those only allow retreival with explicit length.
|
||||
*/
|
||||
lwsl_warn("%s: saw NUL outside of uri args\n", __func__);
|
||||
return -1;
|
||||
}
|
||||
|
||||
switch (ah->ups) {
|
||||
case URIPS_IDLE:
|
||||
if (!c)
|
||||
return -1;
|
||||
|
||||
/* genuine delimiter */
|
||||
if ((c == '&' || c == ';') && !enc) {
|
||||
if (issue_char(wsi, '\0') < 0)
|
||||
return -1;
|
||||
/* don't account for it */
|
||||
wsi->http.ah->frags[wsi->http.ah->nfrag].len--;
|
||||
/* link to next fragment */
|
||||
ah->frags[ah->nfrag].nfrag = (uint8_t)(ah->nfrag + 1);
|
||||
ah->nfrag++;
|
||||
|
@ -924,6 +938,9 @@ lws_parse_urldecode(struct lws *wsi, uint8_t *_c)
|
|||
if (issue_char(wsi, '\0') < 0)
|
||||
return -1;
|
||||
|
||||
/* don't account for it */
|
||||
wsi->http.ah->frags[wsi->http.ah->nfrag].len--;
|
||||
|
||||
/* move to using WSI_TOKEN_HTTP_URI_ARGS */
|
||||
ah->nfrag++;
|
||||
if (ah->nfrag >= LWS_ARRAY_SIZE(ah->frags))
|
||||
|
@ -1055,6 +1072,8 @@ lws_parse(struct lws *wsi, unsigned char *buf, int *len)
|
|||
/* begin parsing HTTP version: */
|
||||
if (issue_char(wsi, '\0') < 0)
|
||||
return LPR_FAIL;
|
||||
/* don't account for it */
|
||||
wsi->http.ah->frags[wsi->http.ah->nfrag].len--;
|
||||
ah->parser_state = WSI_TOKEN_HTTP;
|
||||
goto start_fragment;
|
||||
}
|
||||
|
@ -1096,6 +1115,17 @@ check_eol:
|
|||
return LPR_FAIL;
|
||||
if (n > 0)
|
||||
ah->parser_state = WSI_TOKEN_SKIPPING;
|
||||
else {
|
||||
/*
|
||||
* Explicit zeroes are legal in URI ARGS.
|
||||
* They can only exist as a safety terminator
|
||||
* after the valid part of the token contents
|
||||
* for other types.
|
||||
*/
|
||||
if (!c && ah->parser_state != WSI_TOKEN_HTTP_URI_ARGS)
|
||||
/* don't account for safety terminator */
|
||||
wsi->http.ah->frags[wsi->http.ah->nfrag].len--;
|
||||
}
|
||||
|
||||
swallow:
|
||||
/* per-protocol end of headers management */
|
||||
|
|
|
@ -74,6 +74,19 @@ callback_dynamic_http(struct lws *wsi, enum lws_callback_reasons reason,
|
|||
lwsl_notice("%s: HTTP: connection %s, path %s\n", __func__,
|
||||
(const char *)buf, pss->path);
|
||||
|
||||
/*
|
||||
* Demonstrates how to retreive a urlarg x=value
|
||||
*/
|
||||
|
||||
{
|
||||
char value[100];
|
||||
int z = lws_get_urlarg_by_name_safe(wsi, "x", value,
|
||||
sizeof(value) - 1);
|
||||
|
||||
if (z >= 0)
|
||||
lwsl_hexdump_notice(value, (size_t)z);
|
||||
}
|
||||
|
||||
/*
|
||||
* prepare and write http headers... with regards to content-
|
||||
* length, there are three approaches:
|
||||
|
|
|
@ -29,7 +29,6 @@ callback_http(struct lws *wsi, enum lws_callback_reasons reason, void *user,
|
|||
uint8_t buf[LWS_PRE + LWS_RECOMMENDED_MIN_HEADER_SPACE],
|
||||
*start = &buf[LWS_PRE], *p = start,
|
||||
*end = &buf[sizeof(buf) - 1];
|
||||
const char *val;
|
||||
int n;
|
||||
|
||||
switch (reason) {
|
||||
|
@ -46,13 +45,13 @@ callback_http(struct lws *wsi, enum lws_callback_reasons reason, void *user,
|
|||
/* we just dump the decoded things to the log */
|
||||
|
||||
for (n = 0; n < (int)LWS_ARRAY_SIZE(param_names); n++) {
|
||||
val = lws_get_urlarg_by_name(wsi, param_names[n],
|
||||
int rv = lws_get_urlarg_by_name_safe(wsi, param_names[n],
|
||||
(char *)buf, sizeof(buf));
|
||||
if (!val)
|
||||
if (rv < 0)
|
||||
lwsl_user("%s: undefined\n", param_names[n]);
|
||||
else
|
||||
lwsl_user("%s: (len %d) '%s'\n", param_names[n],
|
||||
(int)strlen((const char *)buf),buf);
|
||||
(int)rv, buf);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -224,12 +224,12 @@ callback_lws_mirror(struct lws *wsi, enum lws_callback_reasons reason,
|
|||
* mirror instance name... defaults to "", but if URL includes
|
||||
* "?mirror=xxx", will be "xxx"
|
||||
*/
|
||||
name[0] = '\0';
|
||||
if (!lws_get_urlarg_by_name(wsi, "mirror", name,
|
||||
sizeof(name) - 1))
|
||||
|
||||
if (lws_get_urlarg_by_name_safe(wsi, "mirror", name,
|
||||
sizeof(name) - 1) < 0) {
|
||||
lwsl_debug("get urlarg failed\n");
|
||||
if (strchr(name, '='))
|
||||
pn = strchr(name, '=') + 1;
|
||||
name[0] = '\0';
|
||||
}
|
||||
|
||||
//lwsl_notice("%s: mirror name '%s'\n", __func__, pn);
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue