diff --git a/lib/client-parser.c b/lib/client-parser.c index ec35e136..93502819 100644 --- a/lib/client-parser.c +++ b/lib/client-parser.c @@ -291,67 +291,6 @@ int lws_client_rx_sm(struct lws *wsi, unsigned char c) c ^= wsi->u.ws.mask_nonce[ (wsi->u.ws.frame_mask_index++) & 3]; - /* if we skipped the 2-byte code at the start, UTF-8 after */ - if (wsi->u.ws.opcode == LWSWSOPC_CLOSE && - wsi->u.ws.rx_user_buffer_head == 2) - wsi->u.ws.check_utf8 = !!(wsi->context->options & - LWS_SERVER_OPTION_VALIDATE_UTF8); - - if (wsi->u.ws.check_utf8 && !wsi->u.ws.defeat_check_utf8) { - static const unsigned char e0f4[] = { - 0xa0 | ((2 - 1) << 2) | 1, /* e0 */ - 0x80 | ((4 - 1) << 2) | 1, /* e1 */ - 0x80 | ((4 - 1) << 2) | 1, /* e2 */ - 0x80 | ((4 - 1) << 2) | 1, /* e3 */ - 0x80 | ((4 - 1) << 2) | 1, /* e4 */ - 0x80 | ((4 - 1) << 2) | 1, /* e5 */ - 0x80 | ((4 - 1) << 2) | 1, /* e6 */ - 0x80 | ((4 - 1) << 2) | 1, /* e7 */ - 0x80 | ((4 - 1) << 2) | 1, /* e8 */ - 0x80 | ((4 - 1) << 2) | 1, /* e9 */ - 0x80 | ((4 - 1) << 2) | 1, /* ea */ - 0x80 | ((4 - 1) << 2) | 1, /* eb */ - 0x80 | ((4 - 1) << 2) | 1, /* ec */ - 0x80 | ((2 - 1) << 2) | 1, /* ed */ - 0x80 | ((4 - 1) << 2) | 1, /* ee */ - 0x80 | ((4 - 1) << 2) | 1, /* ef */ - 0x90 | ((3 - 1) << 2) | 2, /* f0 */ - 0x80 | ((4 - 1) << 2) | 2, /* f1 */ - 0x80 | ((4 - 1) << 2) | 2, /* f2 */ - 0x80 | ((4 - 1) << 2) | 2, /* f3 */ - 0x80 | ((1 - 1) << 2) | 2, /* f4 */ - - 0, /* s0 */ - 0x80 | ((4 - 1) << 2) | 0, /* s2 */ - 0x80 | ((4 - 1) << 2) | 1, /* s3 */ - }; - - if (!wsi->u.ws.utf8) { - if (c >= 0x80) { - if (c < 0xc2 || c > 0xf4) - goto utf8_fail; - if (c < 0xe0) - wsi->u.ws.utf8 = 0x80 | - ((4 - 1) << 2); - else - wsi->u.ws.utf8 = e0f4[c - 0xe0]; - } - } else { - if (c < (wsi->u.ws.utf8 & 0xf0) || - c >= (wsi->u.ws.utf8 & 0xf0) + 0x10 + - ((wsi->u.ws.utf8 << 2) & 0x30)) - goto utf8_fail; - wsi->u.ws.utf8 = e0f4[21 + (wsi->u.ws.utf8 & 3)]; - } - - /* we are ending partway through utf-8 character? */ - if (wsi->u.ws.final && wsi->u.ws.rx_packet_length == 1 && - wsi->u.ws.utf8) { -utf8_fail: lwsl_info("utf8 error\n"); - return -1; - } - } - wsi->u.ws.rx_user_buffer[LWS_SEND_BUFFER_PRE_PADDING + (wsi->u.ws.rx_user_buffer_head++)] = c; @@ -386,6 +325,14 @@ spill: switch (wsi->u.ws.opcode) { case LWSWSOPC_CLOSE: + pp = (unsigned char *)&wsi->u.ws.rx_user_buffer[ + LWS_SEND_BUFFER_PRE_PADDING]; + if (wsi->context->options & LWS_SERVER_OPTION_VALIDATE_UTF8 && + wsi->u.ws.rx_user_buffer_head > 2 && + lws_check_utf8(&wsi->u.ws.utf8, pp + 2, + wsi->u.ws.rx_user_buffer_head - 2)) + goto utf8_fail; + /* is this an acknowledgement of our close? */ if (wsi->state == LWSS_AWAITING_CLOSE_ACK) { /* @@ -395,8 +342,7 @@ spill: lwsl_parser("seen server's close ack\n"); return -1; } - pp = (unsigned char *)&wsi->u.ws.rx_user_buffer[ - LWS_SEND_BUFFER_PRE_PADDING]; + lwsl_parser("client sees server close len = %d\n", wsi->u.ws.rx_user_buffer_head); if (wsi->u.ws.rx_user_buffer_head >= 2) { @@ -526,11 +472,25 @@ ping_drop: &eff_buf, 0) < 0) /* fail */ return -1; + if (wsi->u.ws.check_utf8 && !wsi->u.ws.defeat_check_utf8) { + if (lws_check_utf8(&wsi->u.ws.utf8, + (unsigned char *)eff_buf.token, + eff_buf.token_len)) + goto utf8_fail; + + /* we are ending partway through utf-8 character? */ + if (wsi->u.ws.final && wsi->u.ws.utf8) { +utf8_fail: lwsl_info("utf8 error\n"); + return -1; + } + } + if (eff_buf.token_len < 0 && callback_action != LWS_CALLBACK_CLIENT_RECEIVE_PONG) goto already_done; - eff_buf.token[eff_buf.token_len] = '\0'; + if (eff_buf.token) + eff_buf.token[eff_buf.token_len] = '\0'; if (!wsi->protocol->callback) goto already_done; diff --git a/lib/libwebsockets.c b/lib/libwebsockets.c index 82f317af..082bd43b 100644 --- a/lib/libwebsockets.c +++ b/lib/libwebsockets.c @@ -1009,3 +1009,60 @@ _lws_rx_flow_control(struct lws *wsi) return 0; } + +LWS_EXTERN int +lws_check_utf8(unsigned char *state, unsigned char *buf, size_t len) +{ + static const unsigned char e0f4[] = { + 0xa0 | ((2 - 1) << 2) | 1, /* e0 */ + 0x80 | ((4 - 1) << 2) | 1, /* e1 */ + 0x80 | ((4 - 1) << 2) | 1, /* e2 */ + 0x80 | ((4 - 1) << 2) | 1, /* e3 */ + 0x80 | ((4 - 1) << 2) | 1, /* e4 */ + 0x80 | ((4 - 1) << 2) | 1, /* e5 */ + 0x80 | ((4 - 1) << 2) | 1, /* e6 */ + 0x80 | ((4 - 1) << 2) | 1, /* e7 */ + 0x80 | ((4 - 1) << 2) | 1, /* e8 */ + 0x80 | ((4 - 1) << 2) | 1, /* e9 */ + 0x80 | ((4 - 1) << 2) | 1, /* ea */ + 0x80 | ((4 - 1) << 2) | 1, /* eb */ + 0x80 | ((4 - 1) << 2) | 1, /* ec */ + 0x80 | ((2 - 1) << 2) | 1, /* ed */ + 0x80 | ((4 - 1) << 2) | 1, /* ee */ + 0x80 | ((4 - 1) << 2) | 1, /* ef */ + 0x90 | ((3 - 1) << 2) | 2, /* f0 */ + 0x80 | ((4 - 1) << 2) | 2, /* f1 */ + 0x80 | ((4 - 1) << 2) | 2, /* f2 */ + 0x80 | ((4 - 1) << 2) | 2, /* f3 */ + 0x80 | ((1 - 1) << 2) | 2, /* f4 */ + + 0, /* s0 */ + 0x80 | ((4 - 1) << 2) | 0, /* s2 */ + 0x80 | ((4 - 1) << 2) | 1, /* s3 */ + }; + unsigned char s = *state; + + while (len--) { + unsigned char c = *buf++; + + if (!s) { + if (c >= 0x80) { + if (c < 0xc2 || c > 0xf4) + return 1; + if (c < 0xe0) + s = 0x80 | ((4 - 1) << 2); + else + s = e0f4[c - 0xe0]; + } + } else { + if (c < (s & 0xf0) || + c >= (s & 0xf0) + 0x10 + ((s << 2) & 0x30)) + return 1; + s = e0f4[21 + (s & 3)]; + } + } + + *state = s; + + return 0; +} diff --git a/lib/private-libwebsockets.h b/lib/private-libwebsockets.h index 3ef7a525..b443daf4 100644 --- a/lib/private-libwebsockets.h +++ b/lib/private-libwebsockets.h @@ -1312,6 +1312,9 @@ time_in_microseconds(void); LWS_EXTERN const char * lws_plat_inet_ntop(int af, const void *src, char *dst, int cnt); +LWS_EXTERN int +lws_check_utf8(unsigned char *state, unsigned char *buf, size_t len); + #ifdef __cplusplus }; #endif