diff --git a/include/libwebsockets/lws-tokenize.h b/include/libwebsockets/lws-tokenize.h index d2383c789..319355b33 100644 --- a/include/libwebsockets/lws-tokenize.h +++ b/include/libwebsockets/lws-tokenize.h @@ -141,3 +141,99 @@ lws_tokenize(struct lws_tokenize *ts); LWS_VISIBLE LWS_EXTERN int lws_tokenize_cstr(struct lws_tokenize *ts, char *str, int max); + + +/* + * lws_strexp: flexible string expansion helper api + * + * This stateful helper can handle multiple separate input chunks and multiple + * output buffer loads with arbitrary boundaries between literals and expanded + * symbols. This allows it to handle fragmented input as well as arbitrarily + * long symbol expansions that are bigger than the output buffer itself. + * + * A user callback is used to convert symbol names to the symbol value. + * + * A single byte buffer for input and another for output can process any + * length substitution then. The state object is around 64 bytes on a 64-bit + * system and it only uses 8 bytes stack. + */ + + +typedef int (*lws_strexp_expand_cb)(void *priv, const char *name, char *out, + size_t *pos, size_t olen, size_t *exp_ofs); + +typedef struct lws_strexp { + char name[32]; + lws_strexp_expand_cb cb; + void *priv; + char *out; + size_t olen; + size_t pos; + + size_t exp_ofs; + + uint8_t name_pos; + char state; +} lws_strexp_t; + +enum { + LSTRX_DONE, /* it completed OK */ + LSTRX_FILLED_OUT, /* out buf filled and needs resetting */ + LSTRX_FATAL_NAME_TOO_LONG = -1, /* fatal */ + LSTRX_FATAL_NAME_UNKNOWN = -2, +}; + + +/** + * lws_strexp_init() - initialize an lws_strexp_t for use + * + * \p exp: the exp object to init + * \p priv: the user's object pointer to pass to callback + * \p cb: the callback to expand named objects + * \p out: the start of the output buffer + * \p olen: the length of the output buffer in bytes + * + * Prepares an lws_strexp_t for use and sets the initial output buffer + */ +LWS_VISIBLE LWS_EXTERN void +lws_strexp_init(lws_strexp_t *exp, void *priv, lws_strexp_expand_cb cb, + char *out, size_t olen); + +/** + * lws_strexp_reset_out() - reset the output buffer on an existing strexp + * + * \p exp: the exp object to init + * \p out: the start of the output buffer + * \p olen: the length of the output buffer in bytes + * + * Provides a new output buffer for lws_strexp_expand() to continue to write + * into. It can be the same as the old one if it has been copied out or used. + * The position of the next write will be reset to the start of the given buf. + */ +LWS_VISIBLE LWS_EXTERN void +lws_strexp_reset_out(lws_strexp_t *exp, char *out, size_t olen); + +/** + * lws_strexp_expand() - copy / expand a string into the output buffer + * + * \p exp: the exp object for the copy / expansion + * \p in: the start of the next input data + * \p len: the length of the input data + * \p pused: pointer to write the amount of input used + * + * Copies in to the output buffer set in exp, expanding any ${name} tokens using + * the callback. *pused is set to the number of input chars used. + * + * May return LSTRX_FILLED_OUT early with *pused < len if the output buffer is + * filled. Handle the output buffer and reset it with lws_strexp_reset_out() + * before calling again with adjusted in / len to continue. + * + * In the case of large expansions, the expansion itself may fill the output + * buffer, in which case the expansion callback returns the LSTRX_FILLED_OUT + * and will be called again to continue with its *exp_ofs parameter set + * appropriately. + */ +LWS_VISIBLE LWS_EXTERN int +lws_strexp_expand(lws_strexp_t *exp, const char *in, size_t len, + size_t *pused_in, size_t *pused_out); + diff --git a/lib/core/libwebsockets.c b/lib/core/libwebsockets.c index acf2443c0..11b638b92 100644 --- a/lib/core/libwebsockets.c +++ b/lib/core/libwebsockets.c @@ -911,6 +911,112 @@ lws_tokenize_init(struct lws_tokenize *ts, const char *start, int flags) ts->delim = LWSTZ_DT_NEED_FIRST_CONTENT; } + +typedef enum { + LWS_EXPS_LITERAL, + LWS_EXPS_OPEN_OR_LIT, + LWS_EXPS_NAME_OR_CLOSE, + LWS_EXPS_DRAIN, +} lws_strexp_state; + +void +lws_strexp_init(lws_strexp_t *exp, void *priv, lws_strexp_expand_cb cb, + char *out, size_t olen) +{ + memset(exp, 0, sizeof(*exp)); + exp->cb = cb; + exp->out = out; + exp->olen = olen; + exp->state = LWS_EXPS_LITERAL; + exp->priv = priv; +} + +void +lws_strexp_reset_out(lws_strexp_t *exp, char *out, size_t olen) +{ + exp->out = out; + exp->olen = olen; + exp->pos = 0; +} + +int +lws_strexp_expand(lws_strexp_t *exp, const char *in, size_t len, + size_t *pused_in, size_t *pused_out) +{ + size_t used = 0; + int n; + + while (used < len) { + + switch (exp->state) { + case LWS_EXPS_LITERAL: + if (*in == '$') { + exp->state = LWS_EXPS_OPEN_OR_LIT; + break; + } + + exp->out[exp->pos++] = *in; + if (exp->olen - exp->pos < 1) { + *pused_in = used + 1; + *pused_out = exp->pos; + return LSTRX_FILLED_OUT; + } + break; + + case LWS_EXPS_OPEN_OR_LIT: + if (*in == '{') { + exp->state = LWS_EXPS_NAME_OR_CLOSE; + exp->name_pos = 0; + break; + } + /* treat as a literal */ + if (exp->olen - exp->pos < 3) + return -1; + + exp->out[exp->pos++] = '$'; + exp->out[exp->pos++] = *in; + if (*in != '$') + exp->state = LWS_EXPS_LITERAL; + break; + + case LWS_EXPS_NAME_OR_CLOSE: + if (*in == '}') { + exp->name[exp->name_pos] = '\0'; + exp->state = LWS_EXPS_DRAIN; + goto drain; + } + if (exp->name_pos >= sizeof(exp->name) - 1) + return LSTRX_FATAL_NAME_TOO_LONG; + + exp->name[exp->name_pos++] = *in; + break; + + case LWS_EXPS_DRAIN: +drain: + *pused_in = used; + n = exp->cb(exp->priv, exp->name, exp->out, &exp->pos, + exp->olen, &exp->exp_ofs); + *pused_out = exp->pos; + if (n == LSTRX_FILLED_OUT || + n == LSTRX_FATAL_NAME_UNKNOWN) + return n; + + exp->state = LWS_EXPS_LITERAL; + break; + } + + used++; + in++; + } + + exp->out[exp->pos] = '\0'; + *pused_in = used; + *pused_out = exp->pos; + + return LSTRX_DONE; +} + + #if LWS_MAX_SMP > 1 void diff --git a/minimal-examples/api-tests/api-test-lws_tokenize/main.c b/minimal-examples/api-tests/api-test-lws_tokenize/main.c index 4fb23dc42..b243caa66 100644 --- a/minimal-examples/api-tests/api-test-lws_tokenize/main.c +++ b/minimal-examples/api-tests/api-test-lws_tokenize/main.c @@ -277,6 +277,40 @@ static const char *element_names[] = { "LWS_TOKZE_QUOTED_STRING", }; + +int +exp_cb1(void *priv, const char *name, char *out, size_t *pos, size_t olen, + size_t *exp_ofs) +{ + const char *replace = NULL; + size_t total, budget; + + if (!strcmp(name, "test")) { + replace = "replacement_string"; + total = strlen(replace); + goto expand; + } + + return LSTRX_FATAL_NAME_UNKNOWN; + +expand: + budget = olen - *pos; + total -= *exp_ofs; + if (total < budget) + budget = total; + + memcpy(out + *pos, replace + (*exp_ofs), budget); + *exp_ofs += budget; + *pos += budget; + + if (budget == total) + return LSTRX_DONE; + + return LSTRX_FILLED_OUT; +} + +static const char *exp_inp1 = "this-is-a-${test}-for-strexp"; + int main(int argc, const char **argv) { struct lws_tokenize ts; @@ -301,6 +335,61 @@ int main(int argc, const char **argv) if ((p = lws_cmdline_option(argc, argv, "-f"))) flags = atoi(p); + /* lws_strexp */ + + { + size_t in_len, used_in, used_out; + lws_strexp_t exp; + char obuf[128]; + const char *p; + + obuf[0] = '\0'; + lws_strexp_init(&exp, NULL, exp_cb1, obuf, sizeof(obuf)); + n = lws_strexp_expand(&exp, exp_inp1, 28, &used_in, &used_out); + if (n != LSTRX_DONE || used_in != 28 || + strcmp(obuf, "this-is-a-replacement_string-for-strexp")) { + lwsl_notice("%s: obuf %s\n", __func__, obuf); + lwsl_err("%s: lws_strexp test 1 failed: %d\n", __func__, n); + + return 1; + } + + p = exp_inp1; + in_len = strlen(p); + memset(obuf, 0, sizeof(obuf)); + lws_strexp_init(&exp, NULL, exp_cb1, obuf, 16); + n = lws_strexp_expand(&exp, p, in_len, &used_in, &used_out); + if (n != LSTRX_FILLED_OUT || used_in != 16 || used_out != 16) { + lwsl_err("a\n"); + return 1; + } + + p += used_in; + in_len -= used_in; + + memset(obuf, 0, sizeof(obuf)); + lws_strexp_reset_out(&exp, obuf, 16); + + n = lws_strexp_expand(&exp, p, in_len, &used_in, &used_out); + if (n != LSTRX_FILLED_OUT || used_in != 5 || used_out != 16) { + lwsl_err("b: n %d, used_in %d, used_out %d\n", n, + (int)used_in, (int)used_out); + return 2; + } + + p += used_in; + in_len -= used_in; + + memset(obuf, 0, sizeof(obuf)); + lws_strexp_reset_out(&exp, obuf, 16); + + n = lws_strexp_expand(&exp, p, in_len, &used_in, &used_out); + if (n != LSTRX_DONE || used_in != 7 || used_out != 7) { + lwsl_err("c: n %d, used_in %d, used_out %d\n", n, (int)used_in, (int)used_out); + return 2; + } + } + /* sanity check lws_strnncpy() */ lws_strnncpy(dotstar, "12345678", 4, sizeof(dotstar));