diff --git a/READMEs/README.http_parser.md b/READMEs/README.http_parser.md new file mode 100644 index 000000000..e77125cb8 --- /dev/null +++ b/READMEs/README.http_parser.md @@ -0,0 +1,33 @@ +# Notes on http parser corner cases + +## Dealing with %00 + +%00 is considered illegal in + + - the path part of the URL. A lot of user code handles it as a NUL terminated string, + even though the header get apis are based around length. So it is disallowed to + avoid ambiguity. + + - the name part of a urlarg, like ?name=value + +%00 is valid in + + - the value part of a urlarg, like ?name=value + +When the parser sees %00 where it is not allowed, it simply drops the connection. + +## Note on proper urlarg handling + +urlargs are allowed to contain non-NUL terminated binary. So it is important to +use the length-based urlarg apis + + - `lws_hdr_copy_fragment()` + - `lws_get_urlarg_by_name_safe()` + +The non-length based urlarg api + + - `lws_get_urlarg_by_name()` + +...is soft-deprecated, it's still allowed but it will be fooled by the first %00 +seen in the argument into truncating the argument. Use `lws_get_urlarg_by_name_safe()` +instead. diff --git a/include/libwebsockets/lws-http.h b/include/libwebsockets/lws-http.h index 2c85a9f80..d1ecacf8f 100644 --- a/include/libwebsockets/lws-http.h +++ b/include/libwebsockets/lws-http.h @@ -494,8 +494,35 @@ LWS_VISIBLE LWS_EXTERN int lws_hdr_custom_copy(struct lws *wsi, char *dst, int len, const char *name, int nlen); +/** + * lws_get_urlarg_by_name_safe() - get copy and return length of y for x=y urlargs + * + * \param wsi: the connection to check + * \param name: the arg name, like "token" or "token=" + * \param buf: the buffer to receive the urlarg (including the name= part) + * \param len: the length of the buffer to receive the urlarg + * + * Returns -1 if not present, else the length of y in the urlarg name=y. If + * zero or greater, then buf contains a copy of the string y. Any = after the + * name match is trimmed off if the name does not end with = itself. + * + * This returns the explicit length and so can deal with binary blobs that are + * percent-encoded. It also makes sure buf has a NUL just after the valid + * length so it can work with NUL-based apis if you don't care about truncation. + * + * buf may have been written even when -1 is returned indicating no match. + * + * Use this in place of lws_get_urlarg_by_name() that does not return an + * explicit length. + * + * Use lws_get_urlarg_by_name_safe() instead of this, which returns the length. + */ +LWS_VISIBLE LWS_EXTERN int +lws_get_urlarg_by_name_safe(struct lws *wsi, const char *name, char *buf, int len); + /** * lws_get_urlarg_by_name() - return pointer to arg value if present + * * \param wsi: the connection to check * \param name: the arg name, like "token=" * \param buf: the buffer to receive the urlarg (including the name= part) @@ -503,9 +530,16 @@ lws_hdr_custom_copy(struct lws *wsi, char *dst, int len, const char *name, * * Returns NULL if not found or a pointer inside buf to just after the * name= part. + * + * This assumed the argument can be represented with a NUL-terminated string. + * It can't correctly deal with binary values encoded with %XX, eg. %00 will + * be understood to terminate the string. + * + * Use lws_get_urlarg_by_name_safe() instead of this, which returns the length. */ LWS_VISIBLE LWS_EXTERN const char * -lws_get_urlarg_by_name(struct lws *wsi, const char *name, char *buf, int len); +lws_get_urlarg_by_name(struct lws *wsi, const char *name, char *buf, int len) +/* LWS_WARN_DEPRECATED */; ///@} /*! \defgroup HTTP-headers-create HTTP headers: create diff --git a/lib/core-net/wsi.c b/lib/core-net/wsi.c index c79ca6591..85f5b6c7b 100644 --- a/lib/core-net/wsi.c +++ b/lib/core-net/wsi.c @@ -636,22 +636,49 @@ lws_parse_uri(char *p, const char **prot, const char **ads, int *port, /* ... */ +int +lws_get_urlarg_by_name_safe(struct lws *wsi, const char *name, char *buf, int len) +{ + int n = 0, fraglen, sl = (int)strlen(name); + + do { + fraglen = lws_hdr_copy_fragment(wsi, buf, len, + WSI_TOKEN_HTTP_URI_ARGS, n); + + if (fraglen < 0) + break; + + if (fraglen + 1 < len && + fraglen >= sl && + !strncmp(buf, name, (size_t)sl)) { + /* + * If he left off the trailing =, trim it from the + * result + */ + + if (name[sl - 1] != '=' && + sl < fraglen && + buf[sl] == '=') + sl++; + + memmove(buf, buf + sl, (size_t)(fraglen - sl)); + buf[fraglen - sl] = '\0'; + + return fraglen - sl; + } + + n++; + } while (1); + + return -1; +} + const char * lws_get_urlarg_by_name(struct lws *wsi, const char *name, char *buf, int len) { - int n = 0; - size_t sl = strlen(name); + int n = lws_get_urlarg_by_name_safe(wsi, name, buf, len); - while (lws_hdr_copy_fragment(wsi, buf, len, - WSI_TOKEN_HTTP_URI_ARGS, n) >= 0) { - - if (!strncmp(buf, name, sl)) - return buf + sl; - - n++; - } - - return NULL; + return n < 0 ? NULL : buf; } diff --git a/lib/roles/http/parsers.c b/lib/roles/http/parsers.c index 2999faa2b..cb405fd78 100644 --- a/lib/roles/http/parsers.c +++ b/lib/roles/http/parsers.c @@ -742,8 +742,7 @@ issue_char(struct lws *wsi, unsigned char c) if (!wsi->http.ah->current_token_limit || frag_len < wsi->http.ah->current_token_limit) { wsi->http.ah->data[wsi->http.ah->pos++] = (char)c; - if (c) - wsi->http.ah->frags[wsi->http.ah->nfrag].len++; + wsi->http.ah->frags[wsi->http.ah->nfrag].len++; return 0; } @@ -812,14 +811,29 @@ lws_parse_urldecode(struct lws *wsi, uint8_t *_c) * leave /.dir or whatever alone */ + if (!c && (!ah->frag_index[WSI_TOKEN_HTTP_URI_ARGS] || + !ah->post_literal_equal)) { + /* + * Since user code is typically going to parse the path using + * NUL-terminated apis, it's too dangerous to allow NUL + * injection here. + * + * It's allowed in the urlargs, because the apis to access + * those only allow retreival with explicit length. + */ + lwsl_warn("%s: saw NUL outside of uri args\n", __func__); + return -1; + } + switch (ah->ups) { case URIPS_IDLE: - if (!c) - return -1; + /* genuine delimiter */ if ((c == '&' || c == ';') && !enc) { if (issue_char(wsi, '\0') < 0) return -1; + /* don't account for it */ + wsi->http.ah->frags[wsi->http.ah->nfrag].len--; /* link to next fragment */ ah->frags[ah->nfrag].nfrag = (uint8_t)(ah->nfrag + 1); ah->nfrag++; @@ -924,6 +938,9 @@ lws_parse_urldecode(struct lws *wsi, uint8_t *_c) if (issue_char(wsi, '\0') < 0) return -1; + /* don't account for it */ + wsi->http.ah->frags[wsi->http.ah->nfrag].len--; + /* move to using WSI_TOKEN_HTTP_URI_ARGS */ ah->nfrag++; if (ah->nfrag >= LWS_ARRAY_SIZE(ah->frags)) @@ -1055,6 +1072,8 @@ lws_parse(struct lws *wsi, unsigned char *buf, int *len) /* begin parsing HTTP version: */ if (issue_char(wsi, '\0') < 0) return LPR_FAIL; + /* don't account for it */ + wsi->http.ah->frags[wsi->http.ah->nfrag].len--; ah->parser_state = WSI_TOKEN_HTTP; goto start_fragment; } @@ -1096,6 +1115,17 @@ check_eol: return LPR_FAIL; if (n > 0) ah->parser_state = WSI_TOKEN_SKIPPING; + else { + /* + * Explicit zeroes are legal in URI ARGS. + * They can only exist as a safety terminator + * after the valid part of the token contents + * for other types. + */ + if (!c && ah->parser_state != WSI_TOKEN_HTTP_URI_ARGS) + /* don't account for safety terminator */ + wsi->http.ah->frags[wsi->http.ah->nfrag].len--; + } swallow: /* per-protocol end of headers management */ diff --git a/minimal-examples/http-server/minimal-http-server-dynamic/minimal-http-server-dynamic.c b/minimal-examples/http-server/minimal-http-server-dynamic/minimal-http-server-dynamic.c index 2d5769e3b..7ead9fe89 100644 --- a/minimal-examples/http-server/minimal-http-server-dynamic/minimal-http-server-dynamic.c +++ b/minimal-examples/http-server/minimal-http-server-dynamic/minimal-http-server-dynamic.c @@ -74,6 +74,19 @@ callback_dynamic_http(struct lws *wsi, enum lws_callback_reasons reason, lwsl_notice("%s: HTTP: connection %s, path %s\n", __func__, (const char *)buf, pss->path); + /* + * Demonstrates how to retreive a urlarg x=value + */ + + { + char value[100]; + int z = lws_get_urlarg_by_name_safe(wsi, "x", value, + sizeof(value) - 1); + + if (z >= 0) + lwsl_hexdump_notice(value, (size_t)z); + } + /* * prepare and write http headers... with regards to content- * length, there are three approaches: diff --git a/minimal-examples/http-server/minimal-http-server-form-get/minimal-http-server-form-get.c b/minimal-examples/http-server/minimal-http-server-form-get/minimal-http-server-form-get.c index de149ac13..b0ce321e5 100644 --- a/minimal-examples/http-server/minimal-http-server-form-get/minimal-http-server-form-get.c +++ b/minimal-examples/http-server/minimal-http-server-form-get/minimal-http-server-form-get.c @@ -29,7 +29,6 @@ callback_http(struct lws *wsi, enum lws_callback_reasons reason, void *user, uint8_t buf[LWS_PRE + LWS_RECOMMENDED_MIN_HEADER_SPACE], *start = &buf[LWS_PRE], *p = start, *end = &buf[sizeof(buf) - 1]; - const char *val; int n; switch (reason) { @@ -46,13 +45,13 @@ callback_http(struct lws *wsi, enum lws_callback_reasons reason, void *user, /* we just dump the decoded things to the log */ for (n = 0; n < (int)LWS_ARRAY_SIZE(param_names); n++) { - val = lws_get_urlarg_by_name(wsi, param_names[n], + int rv = lws_get_urlarg_by_name_safe(wsi, param_names[n], (char *)buf, sizeof(buf)); - if (!val) + if (rv < 0) lwsl_user("%s: undefined\n", param_names[n]); else lwsl_user("%s: (len %d) '%s'\n", param_names[n], - (int)strlen((const char *)buf),buf); + (int)rv, buf); } /* diff --git a/plugins/protocol_lws_mirror.c b/plugins/protocol_lws_mirror.c index 213dc619f..9929109e9 100644 --- a/plugins/protocol_lws_mirror.c +++ b/plugins/protocol_lws_mirror.c @@ -224,12 +224,12 @@ callback_lws_mirror(struct lws *wsi, enum lws_callback_reasons reason, * mirror instance name... defaults to "", but if URL includes * "?mirror=xxx", will be "xxx" */ - name[0] = '\0'; - if (!lws_get_urlarg_by_name(wsi, "mirror", name, - sizeof(name) - 1)) + + if (lws_get_urlarg_by_name_safe(wsi, "mirror", name, + sizeof(name) - 1) < 0) { lwsl_debug("get urlarg failed\n"); - if (strchr(name, '=')) - pn = strchr(name, '=') + 1; + name[0] = '\0'; + } //lwsl_notice("%s: mirror name '%s'\n", __func__, pn);