utf8 check compatible with extensions

Signed-off-by: Andy Green <andy.green@linaro.org>
This commit is contained in:
Andy Green 2015-12-30 11:43:36 +08:00
parent 9b81d3c967
commit 86c1ef1e7c
3 changed files with 84 additions and 64 deletions

View file

@ -291,67 +291,6 @@ int lws_client_rx_sm(struct lws *wsi, unsigned char c)
c ^= wsi->u.ws.mask_nonce[
(wsi->u.ws.frame_mask_index++) & 3];
/* if we skipped the 2-byte code at the start, UTF-8 after */
if (wsi->u.ws.opcode == LWSWSOPC_CLOSE &&
wsi->u.ws.rx_user_buffer_head == 2)
wsi->u.ws.check_utf8 = !!(wsi->context->options &
LWS_SERVER_OPTION_VALIDATE_UTF8);
if (wsi->u.ws.check_utf8 && !wsi->u.ws.defeat_check_utf8) {
static const unsigned char e0f4[] = {
0xa0 | ((2 - 1) << 2) | 1, /* e0 */
0x80 | ((4 - 1) << 2) | 1, /* e1 */
0x80 | ((4 - 1) << 2) | 1, /* e2 */
0x80 | ((4 - 1) << 2) | 1, /* e3 */
0x80 | ((4 - 1) << 2) | 1, /* e4 */
0x80 | ((4 - 1) << 2) | 1, /* e5 */
0x80 | ((4 - 1) << 2) | 1, /* e6 */
0x80 | ((4 - 1) << 2) | 1, /* e7 */
0x80 | ((4 - 1) << 2) | 1, /* e8 */
0x80 | ((4 - 1) << 2) | 1, /* e9 */
0x80 | ((4 - 1) << 2) | 1, /* ea */
0x80 | ((4 - 1) << 2) | 1, /* eb */
0x80 | ((4 - 1) << 2) | 1, /* ec */
0x80 | ((2 - 1) << 2) | 1, /* ed */
0x80 | ((4 - 1) << 2) | 1, /* ee */
0x80 | ((4 - 1) << 2) | 1, /* ef */
0x90 | ((3 - 1) << 2) | 2, /* f0 */
0x80 | ((4 - 1) << 2) | 2, /* f1 */
0x80 | ((4 - 1) << 2) | 2, /* f2 */
0x80 | ((4 - 1) << 2) | 2, /* f3 */
0x80 | ((1 - 1) << 2) | 2, /* f4 */
0, /* s0 */
0x80 | ((4 - 1) << 2) | 0, /* s2 */
0x80 | ((4 - 1) << 2) | 1, /* s3 */
};
if (!wsi->u.ws.utf8) {
if (c >= 0x80) {
if (c < 0xc2 || c > 0xf4)
goto utf8_fail;
if (c < 0xe0)
wsi->u.ws.utf8 = 0x80 |
((4 - 1) << 2);
else
wsi->u.ws.utf8 = e0f4[c - 0xe0];
}
} else {
if (c < (wsi->u.ws.utf8 & 0xf0) ||
c >= (wsi->u.ws.utf8 & 0xf0) + 0x10 +
((wsi->u.ws.utf8 << 2) & 0x30))
goto utf8_fail;
wsi->u.ws.utf8 = e0f4[21 + (wsi->u.ws.utf8 & 3)];
}
/* we are ending partway through utf-8 character? */
if (wsi->u.ws.final && wsi->u.ws.rx_packet_length == 1 &&
wsi->u.ws.utf8) {
utf8_fail: lwsl_info("utf8 error\n");
return -1;
}
}
wsi->u.ws.rx_user_buffer[LWS_SEND_BUFFER_PRE_PADDING +
(wsi->u.ws.rx_user_buffer_head++)] = c;
@ -386,6 +325,14 @@ spill:
switch (wsi->u.ws.opcode) {
case LWSWSOPC_CLOSE:
pp = (unsigned char *)&wsi->u.ws.rx_user_buffer[
LWS_SEND_BUFFER_PRE_PADDING];
if (wsi->context->options & LWS_SERVER_OPTION_VALIDATE_UTF8 &&
wsi->u.ws.rx_user_buffer_head > 2 &&
lws_check_utf8(&wsi->u.ws.utf8, pp + 2,
wsi->u.ws.rx_user_buffer_head - 2))
goto utf8_fail;
/* is this an acknowledgement of our close? */
if (wsi->state == LWSS_AWAITING_CLOSE_ACK) {
/*
@ -395,8 +342,7 @@ spill:
lwsl_parser("seen server's close ack\n");
return -1;
}
pp = (unsigned char *)&wsi->u.ws.rx_user_buffer[
LWS_SEND_BUFFER_PRE_PADDING];
lwsl_parser("client sees server close len = %d\n",
wsi->u.ws.rx_user_buffer_head);
if (wsi->u.ws.rx_user_buffer_head >= 2) {
@ -526,11 +472,25 @@ ping_drop:
&eff_buf, 0) < 0) /* fail */
return -1;
if (wsi->u.ws.check_utf8 && !wsi->u.ws.defeat_check_utf8) {
if (lws_check_utf8(&wsi->u.ws.utf8,
(unsigned char *)eff_buf.token,
eff_buf.token_len))
goto utf8_fail;
/* we are ending partway through utf-8 character? */
if (wsi->u.ws.final && wsi->u.ws.utf8) {
utf8_fail: lwsl_info("utf8 error\n");
return -1;
}
}
if (eff_buf.token_len < 0 &&
callback_action != LWS_CALLBACK_CLIENT_RECEIVE_PONG)
goto already_done;
eff_buf.token[eff_buf.token_len] = '\0';
if (eff_buf.token)
eff_buf.token[eff_buf.token_len] = '\0';
if (!wsi->protocol->callback)
goto already_done;

View file

@ -1009,3 +1009,60 @@ _lws_rx_flow_control(struct lws *wsi)
return 0;
}
LWS_EXTERN int
lws_check_utf8(unsigned char *state, unsigned char *buf, size_t len)
{
static const unsigned char e0f4[] = {
0xa0 | ((2 - 1) << 2) | 1, /* e0 */
0x80 | ((4 - 1) << 2) | 1, /* e1 */
0x80 | ((4 - 1) << 2) | 1, /* e2 */
0x80 | ((4 - 1) << 2) | 1, /* e3 */
0x80 | ((4 - 1) << 2) | 1, /* e4 */
0x80 | ((4 - 1) << 2) | 1, /* e5 */
0x80 | ((4 - 1) << 2) | 1, /* e6 */
0x80 | ((4 - 1) << 2) | 1, /* e7 */
0x80 | ((4 - 1) << 2) | 1, /* e8 */
0x80 | ((4 - 1) << 2) | 1, /* e9 */
0x80 | ((4 - 1) << 2) | 1, /* ea */
0x80 | ((4 - 1) << 2) | 1, /* eb */
0x80 | ((4 - 1) << 2) | 1, /* ec */
0x80 | ((2 - 1) << 2) | 1, /* ed */
0x80 | ((4 - 1) << 2) | 1, /* ee */
0x80 | ((4 - 1) << 2) | 1, /* ef */
0x90 | ((3 - 1) << 2) | 2, /* f0 */
0x80 | ((4 - 1) << 2) | 2, /* f1 */
0x80 | ((4 - 1) << 2) | 2, /* f2 */
0x80 | ((4 - 1) << 2) | 2, /* f3 */
0x80 | ((1 - 1) << 2) | 2, /* f4 */
0, /* s0 */
0x80 | ((4 - 1) << 2) | 0, /* s2 */
0x80 | ((4 - 1) << 2) | 1, /* s3 */
};
unsigned char s = *state;
while (len--) {
unsigned char c = *buf++;
if (!s) {
if (c >= 0x80) {
if (c < 0xc2 || c > 0xf4)
return 1;
if (c < 0xe0)
s = 0x80 | ((4 - 1) << 2);
else
s = e0f4[c - 0xe0];
}
} else {
if (c < (s & 0xf0) ||
c >= (s & 0xf0) + 0x10 + ((s << 2) & 0x30))
return 1;
s = e0f4[21 + (s & 3)];
}
}
*state = s;
return 0;
}

View file

@ -1312,6 +1312,9 @@ time_in_microseconds(void);
LWS_EXTERN const char *
lws_plat_inet_ntop(int af, const void *src, char *dst, int cnt);
LWS_EXTERN int
lws_check_utf8(unsigned char *state, unsigned char *buf, size_t len);
#ifdef __cplusplus
};
#endif