diff --git a/lib/client-parser.c b/lib/client-parser.c index 90fd305d..6d511ea1 100644 --- a/lib/client-parser.c +++ b/lib/client-parser.c @@ -41,6 +41,7 @@ int lws_client_rx_sm(struct lws *wsi, unsigned char c) case LWSWSOPC_TEXT_FRAME: case LWSWSOPC_BINARY_FRAME: wsi->u.ws.continuation_possible = 1; + wsi->u.ws.utf8 = 0; break; case LWSWSOPC_CONTINUATION: if (!wsi->u.ws.continuation_possible) { @@ -48,6 +49,9 @@ int lws_client_rx_sm(struct lws *wsi, unsigned char c) return -1; } break; + case LWSWSOPC_CLOSE: + wsi->u.ws.utf8 = 0; + break; case 3: case 4: case 5: @@ -274,15 +278,73 @@ int lws_client_rx_sm(struct lws *wsi, unsigned char c) return 1; } - if ((!wsi->u.ws.this_frame_masked) || wsi->u.ws.all_zero_nonce) - wsi->u.ws.rx_user_buffer[LWS_SEND_BUFFER_PRE_PADDING + - (wsi->u.ws.rx_user_buffer_head++)] = c; - else - wsi->u.ws.rx_user_buffer[LWS_SEND_BUFFER_PRE_PADDING + - (wsi->u.ws.rx_user_buffer_head++)] = - c ^ wsi->u.ws.mask_nonce[ + if (wsi->u.ws.this_frame_masked && !wsi->u.ws.all_zero_nonce) + c ^= wsi->u.ws.mask_nonce[ (wsi->u.ws.frame_mask_index++) & 3]; + if ((!wsi->u.ws.frame_is_binary && + wsi->u.ws.opcode == LWSWSOPC_CONTINUATION) || + wsi->u.ws.opcode == LWSWSOPC_TEXT_FRAME || + (wsi->u.ws.opcode == LWSWSOPC_CLOSE && + wsi->u.ws.rx_user_buffer_head > 2)) { + static const unsigned char e0f4[] = { + 0xa0 | ((2 - 1) << 2) | 1, /* e0 */ + 0x80 | ((4 - 1) << 2) | 1, /* e1 */ + 0x80 | ((4 - 1) << 2) | 1, /* e2 */ + 0x80 | ((4 - 1) << 2) | 1, /* e3 */ + 0x80 | ((4 - 1) << 2) | 1, /* e4 */ + 0x80 | ((4 - 1) << 2) | 1, /* e5 */ + 0x80 | ((4 - 1) << 2) | 1, /* e6 */ + 0x80 | ((4 - 1) << 2) | 1, /* e7 */ + 0x80 | ((4 - 1) << 2) | 1, /* e8 */ + 0x80 | ((4 - 1) << 2) | 1, /* e9 */ + 0x80 | ((4 - 1) << 2) | 1, /* ea */ + 0x80 | ((4 - 1) << 2) | 1, /* eb */ + 0x80 | ((4 - 1) << 2) | 1, /* ec */ + 0x80 | ((2 - 1) << 2) | 1, /* ed */ + 0x80 | ((4 - 1) << 2) | 1, /* ee */ + 0x80 | ((4 - 1) << 2) | 1, /* ef */ + 0x90 | ((3 - 1) << 2) | 2, /* f0 */ + 0x80 | ((4 - 1) << 2) | 2, /* f1 */ + 0x80 | ((4 - 1) << 2) | 2, /* f2 */ + 0x80 | ((4 - 1) << 2) | 2, /* f3 */ + 0x80 | ((1 - 1) << 2) | 2, /* f4 */ + + 0, /* s0 */ + 0x80 | ((4 - 1) << 2) | 0, /* s2 */ + 0x80 | ((4 - 1) << 2) | 1, /* s3 */ + }; + + if (!wsi->u.ws.utf8) { + if (c >= 0x80) { + if (c < 0xc2 || c > 0xf4) + goto utf8_fail; + if (c < 0xe0) + wsi->u.ws.utf8 = 0x80 | + ((4 - 1) << 2); + else + wsi->u.ws.utf8 = e0f4[c - 0xe0]; + } + } else { + if (c < (wsi->u.ws.utf8 & 0xf0) || + c >= (wsi->u.ws.utf8 & 0xf0) + 0x10 + + ((wsi->u.ws.utf8 << 2) & 0x30)) + goto utf8_fail; + wsi->u.ws.utf8 = e0f4[21 + (wsi->u.ws.utf8 & 3)]; + } + + /* we are ending partway through utf-8 character? */ + if (wsi->u.ws.final && + wsi->u.ws.rx_packet_length == 1 && + wsi->u.ws.utf8) { +utf8_fail: lwsl_info("utf8 error\n"); + return -1; + } + } + + wsi->u.ws.rx_user_buffer[LWS_SEND_BUFFER_PRE_PADDING + + (wsi->u.ws.rx_user_buffer_head++)] = c; + if (--wsi->u.ws.rx_packet_length == 0) { /* spill because we have the whole frame */ wsi->lws_rx_parse_state = LWS_RXPS_NEW; diff --git a/lib/private-libwebsockets.h b/lib/private-libwebsockets.h index 06dd64c6..d6b51759 100644 --- a/lib/private-libwebsockets.h +++ b/lib/private-libwebsockets.h @@ -847,6 +847,7 @@ struct _lws_websocket_related { unsigned char rsv; /* zero if no info, or length including 2-byte close code */ unsigned char close_in_ping_buffer_len; + unsigned char utf8; unsigned int final:1; unsigned int frame_is_binary:1;