mirror of
https://github.com/warmcat/libwebsockets.git
synced 2025-03-30 00:00:16 +01:00
utf8 check compatible with extensions
Signed-off-by: Andy Green <andy.green@linaro.org>
This commit is contained in:
parent
9b81d3c967
commit
86c1ef1e7c
3 changed files with 84 additions and 64 deletions
|
@ -291,67 +291,6 @@ int lws_client_rx_sm(struct lws *wsi, unsigned char c)
|
||||||
c ^= wsi->u.ws.mask_nonce[
|
c ^= wsi->u.ws.mask_nonce[
|
||||||
(wsi->u.ws.frame_mask_index++) & 3];
|
(wsi->u.ws.frame_mask_index++) & 3];
|
||||||
|
|
||||||
/* if we skipped the 2-byte code at the start, UTF-8 after */
|
|
||||||
if (wsi->u.ws.opcode == LWSWSOPC_CLOSE &&
|
|
||||||
wsi->u.ws.rx_user_buffer_head == 2)
|
|
||||||
wsi->u.ws.check_utf8 = !!(wsi->context->options &
|
|
||||||
LWS_SERVER_OPTION_VALIDATE_UTF8);
|
|
||||||
|
|
||||||
if (wsi->u.ws.check_utf8 && !wsi->u.ws.defeat_check_utf8) {
|
|
||||||
static const unsigned char e0f4[] = {
|
|
||||||
0xa0 | ((2 - 1) << 2) | 1, /* e0 */
|
|
||||||
0x80 | ((4 - 1) << 2) | 1, /* e1 */
|
|
||||||
0x80 | ((4 - 1) << 2) | 1, /* e2 */
|
|
||||||
0x80 | ((4 - 1) << 2) | 1, /* e3 */
|
|
||||||
0x80 | ((4 - 1) << 2) | 1, /* e4 */
|
|
||||||
0x80 | ((4 - 1) << 2) | 1, /* e5 */
|
|
||||||
0x80 | ((4 - 1) << 2) | 1, /* e6 */
|
|
||||||
0x80 | ((4 - 1) << 2) | 1, /* e7 */
|
|
||||||
0x80 | ((4 - 1) << 2) | 1, /* e8 */
|
|
||||||
0x80 | ((4 - 1) << 2) | 1, /* e9 */
|
|
||||||
0x80 | ((4 - 1) << 2) | 1, /* ea */
|
|
||||||
0x80 | ((4 - 1) << 2) | 1, /* eb */
|
|
||||||
0x80 | ((4 - 1) << 2) | 1, /* ec */
|
|
||||||
0x80 | ((2 - 1) << 2) | 1, /* ed */
|
|
||||||
0x80 | ((4 - 1) << 2) | 1, /* ee */
|
|
||||||
0x80 | ((4 - 1) << 2) | 1, /* ef */
|
|
||||||
0x90 | ((3 - 1) << 2) | 2, /* f0 */
|
|
||||||
0x80 | ((4 - 1) << 2) | 2, /* f1 */
|
|
||||||
0x80 | ((4 - 1) << 2) | 2, /* f2 */
|
|
||||||
0x80 | ((4 - 1) << 2) | 2, /* f3 */
|
|
||||||
0x80 | ((1 - 1) << 2) | 2, /* f4 */
|
|
||||||
|
|
||||||
0, /* s0 */
|
|
||||||
0x80 | ((4 - 1) << 2) | 0, /* s2 */
|
|
||||||
0x80 | ((4 - 1) << 2) | 1, /* s3 */
|
|
||||||
};
|
|
||||||
|
|
||||||
if (!wsi->u.ws.utf8) {
|
|
||||||
if (c >= 0x80) {
|
|
||||||
if (c < 0xc2 || c > 0xf4)
|
|
||||||
goto utf8_fail;
|
|
||||||
if (c < 0xe0)
|
|
||||||
wsi->u.ws.utf8 = 0x80 |
|
|
||||||
((4 - 1) << 2);
|
|
||||||
else
|
|
||||||
wsi->u.ws.utf8 = e0f4[c - 0xe0];
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if (c < (wsi->u.ws.utf8 & 0xf0) ||
|
|
||||||
c >= (wsi->u.ws.utf8 & 0xf0) + 0x10 +
|
|
||||||
((wsi->u.ws.utf8 << 2) & 0x30))
|
|
||||||
goto utf8_fail;
|
|
||||||
wsi->u.ws.utf8 = e0f4[21 + (wsi->u.ws.utf8 & 3)];
|
|
||||||
}
|
|
||||||
|
|
||||||
/* we are ending partway through utf-8 character? */
|
|
||||||
if (wsi->u.ws.final && wsi->u.ws.rx_packet_length == 1 &&
|
|
||||||
wsi->u.ws.utf8) {
|
|
||||||
utf8_fail: lwsl_info("utf8 error\n");
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
wsi->u.ws.rx_user_buffer[LWS_SEND_BUFFER_PRE_PADDING +
|
wsi->u.ws.rx_user_buffer[LWS_SEND_BUFFER_PRE_PADDING +
|
||||||
(wsi->u.ws.rx_user_buffer_head++)] = c;
|
(wsi->u.ws.rx_user_buffer_head++)] = c;
|
||||||
|
|
||||||
|
@ -386,6 +325,14 @@ spill:
|
||||||
|
|
||||||
switch (wsi->u.ws.opcode) {
|
switch (wsi->u.ws.opcode) {
|
||||||
case LWSWSOPC_CLOSE:
|
case LWSWSOPC_CLOSE:
|
||||||
|
pp = (unsigned char *)&wsi->u.ws.rx_user_buffer[
|
||||||
|
LWS_SEND_BUFFER_PRE_PADDING];
|
||||||
|
if (wsi->context->options & LWS_SERVER_OPTION_VALIDATE_UTF8 &&
|
||||||
|
wsi->u.ws.rx_user_buffer_head > 2 &&
|
||||||
|
lws_check_utf8(&wsi->u.ws.utf8, pp + 2,
|
||||||
|
wsi->u.ws.rx_user_buffer_head - 2))
|
||||||
|
goto utf8_fail;
|
||||||
|
|
||||||
/* is this an acknowledgement of our close? */
|
/* is this an acknowledgement of our close? */
|
||||||
if (wsi->state == LWSS_AWAITING_CLOSE_ACK) {
|
if (wsi->state == LWSS_AWAITING_CLOSE_ACK) {
|
||||||
/*
|
/*
|
||||||
|
@ -395,8 +342,7 @@ spill:
|
||||||
lwsl_parser("seen server's close ack\n");
|
lwsl_parser("seen server's close ack\n");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
pp = (unsigned char *)&wsi->u.ws.rx_user_buffer[
|
|
||||||
LWS_SEND_BUFFER_PRE_PADDING];
|
|
||||||
lwsl_parser("client sees server close len = %d\n",
|
lwsl_parser("client sees server close len = %d\n",
|
||||||
wsi->u.ws.rx_user_buffer_head);
|
wsi->u.ws.rx_user_buffer_head);
|
||||||
if (wsi->u.ws.rx_user_buffer_head >= 2) {
|
if (wsi->u.ws.rx_user_buffer_head >= 2) {
|
||||||
|
@ -526,11 +472,25 @@ ping_drop:
|
||||||
&eff_buf, 0) < 0) /* fail */
|
&eff_buf, 0) < 0) /* fail */
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
|
if (wsi->u.ws.check_utf8 && !wsi->u.ws.defeat_check_utf8) {
|
||||||
|
if (lws_check_utf8(&wsi->u.ws.utf8,
|
||||||
|
(unsigned char *)eff_buf.token,
|
||||||
|
eff_buf.token_len))
|
||||||
|
goto utf8_fail;
|
||||||
|
|
||||||
|
/* we are ending partway through utf-8 character? */
|
||||||
|
if (wsi->u.ws.final && wsi->u.ws.utf8) {
|
||||||
|
utf8_fail: lwsl_info("utf8 error\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (eff_buf.token_len < 0 &&
|
if (eff_buf.token_len < 0 &&
|
||||||
callback_action != LWS_CALLBACK_CLIENT_RECEIVE_PONG)
|
callback_action != LWS_CALLBACK_CLIENT_RECEIVE_PONG)
|
||||||
goto already_done;
|
goto already_done;
|
||||||
|
|
||||||
eff_buf.token[eff_buf.token_len] = '\0';
|
if (eff_buf.token)
|
||||||
|
eff_buf.token[eff_buf.token_len] = '\0';
|
||||||
|
|
||||||
if (!wsi->protocol->callback)
|
if (!wsi->protocol->callback)
|
||||||
goto already_done;
|
goto already_done;
|
||||||
|
|
|
@ -1009,3 +1009,60 @@ _lws_rx_flow_control(struct lws *wsi)
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
LWS_EXTERN int
|
||||||
|
lws_check_utf8(unsigned char *state, unsigned char *buf, size_t len)
|
||||||
|
{
|
||||||
|
static const unsigned char e0f4[] = {
|
||||||
|
0xa0 | ((2 - 1) << 2) | 1, /* e0 */
|
||||||
|
0x80 | ((4 - 1) << 2) | 1, /* e1 */
|
||||||
|
0x80 | ((4 - 1) << 2) | 1, /* e2 */
|
||||||
|
0x80 | ((4 - 1) << 2) | 1, /* e3 */
|
||||||
|
0x80 | ((4 - 1) << 2) | 1, /* e4 */
|
||||||
|
0x80 | ((4 - 1) << 2) | 1, /* e5 */
|
||||||
|
0x80 | ((4 - 1) << 2) | 1, /* e6 */
|
||||||
|
0x80 | ((4 - 1) << 2) | 1, /* e7 */
|
||||||
|
0x80 | ((4 - 1) << 2) | 1, /* e8 */
|
||||||
|
0x80 | ((4 - 1) << 2) | 1, /* e9 */
|
||||||
|
0x80 | ((4 - 1) << 2) | 1, /* ea */
|
||||||
|
0x80 | ((4 - 1) << 2) | 1, /* eb */
|
||||||
|
0x80 | ((4 - 1) << 2) | 1, /* ec */
|
||||||
|
0x80 | ((2 - 1) << 2) | 1, /* ed */
|
||||||
|
0x80 | ((4 - 1) << 2) | 1, /* ee */
|
||||||
|
0x80 | ((4 - 1) << 2) | 1, /* ef */
|
||||||
|
0x90 | ((3 - 1) << 2) | 2, /* f0 */
|
||||||
|
0x80 | ((4 - 1) << 2) | 2, /* f1 */
|
||||||
|
0x80 | ((4 - 1) << 2) | 2, /* f2 */
|
||||||
|
0x80 | ((4 - 1) << 2) | 2, /* f3 */
|
||||||
|
0x80 | ((1 - 1) << 2) | 2, /* f4 */
|
||||||
|
|
||||||
|
0, /* s0 */
|
||||||
|
0x80 | ((4 - 1) << 2) | 0, /* s2 */
|
||||||
|
0x80 | ((4 - 1) << 2) | 1, /* s3 */
|
||||||
|
};
|
||||||
|
unsigned char s = *state;
|
||||||
|
|
||||||
|
while (len--) {
|
||||||
|
unsigned char c = *buf++;
|
||||||
|
|
||||||
|
if (!s) {
|
||||||
|
if (c >= 0x80) {
|
||||||
|
if (c < 0xc2 || c > 0xf4)
|
||||||
|
return 1;
|
||||||
|
if (c < 0xe0)
|
||||||
|
s = 0x80 | ((4 - 1) << 2);
|
||||||
|
else
|
||||||
|
s = e0f4[c - 0xe0];
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (c < (s & 0xf0) ||
|
||||||
|
c >= (s & 0xf0) + 0x10 + ((s << 2) & 0x30))
|
||||||
|
return 1;
|
||||||
|
s = e0f4[21 + (s & 3)];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
*state = s;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
|
@ -1312,6 +1312,9 @@ time_in_microseconds(void);
|
||||||
LWS_EXTERN const char *
|
LWS_EXTERN const char *
|
||||||
lws_plat_inet_ntop(int af, const void *src, char *dst, int cnt);
|
lws_plat_inet_ntop(int af, const void *src, char *dst, int cnt);
|
||||||
|
|
||||||
|
LWS_EXTERN int
|
||||||
|
lws_check_utf8(unsigned char *state, unsigned char *buf, size_t len);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
};
|
};
|
||||||
#endif
|
#endif
|
||||||
|
|
Loading…
Add table
Reference in a new issue