diff --git a/READMEs/README.lws_fixed3232.md b/READMEs/README.lws_fixed3232.md new file mode 100644 index 000000000..f7728dad2 --- /dev/null +++ b/READMEs/README.lws_fixed3232.md @@ -0,0 +1,87 @@ +# lws_fixed3232 Fixed point arithmetic + +Lws provides reasonably fast fixed-point 32:32 arithmetic functions so code +can be designed to work without floating-point support. + +The underlying type is + +``` +typedef struct lws_fixed3232 { + int32_t whole; /* signed 32-bit int */ + int32_t frac; /* proportion from 0 to (100M - 1) */ +} lws_fx_t; +``` + +Either or both of whole or frac may be negative, indicating that the +combined scalar is negative. This is to deal with numbers less than +0 but greater than -1 not being able to use whole to indicating +signedness, since it's zero. This scheme allows .whole to be used +as a signed `int32_t` naturally. + +## Fractional representation + +The fractional part counts parts per 100M and is restricted to the range +0 .. 99999999. For convenience a constant `LWS_FX_FRACTION_MSD` is +defined with the value 100M. + +It's possible to declare constants naturally, but leading zeroes are not +valid on the fractional part, since C parses a leading 0 as indicating +the number is octal. + +For the case of negative values less than 1, the fractional part bears the +sign. + +Eg to declare 12.5, 6.0, -6.0, 0.1 and -0.1 + +``` + static const lws_fx_t x[2] = { { 12,50000000 }, { 6,0 }, + { -6, 0 }, { 0, 10000000 }, { 0, -10000000 } }; +``` + +There are some helpers + +|Helper|Function| +|---|---| +|`lws_neg(a)`|nonzero if a is negative in whole or fractional part| +|`lws_fx_set(a,w,f)`|Convenience to set `lws_fx_t` a in code, notices if w is negative and also marks f the same| + +## API style + +The APIs are given the storage for the result along with the const args. +The result pointer is also returned from the operation to make operation +chaining more natural. + +## Available operations + +``` +const lws_fx_t * +lws_fx_add(lws_fx_t *r, const lws_fx_t *a, const lws_fx_t *b); + +const lws_fx_t * +lws_fx_sub(lws_fx_t *r, const lws_fx_t *a, const lws_fx_t *b); + +const lws_fx_t * +lws_fx_mul(lws_fx_t *r, const lws_fx_t *a, const lws_fx_t *b); + +const lws_fx_t * +lws_fx_div(lws_fx_t *r, const lws_fx_t *a, const lws_fx_t *b); + +const lws_fx_t * +lws_fx_sqrt(lws_fx_t *r, const lws_fx_t *a); + +int /* -1 if a < b, 1 if a > b, 0 if exactly equal */ +lws_fx_comp(const lws_fx_t *a, const lws_fx_t *b); + +int /* return whole, or whole + 1 if frac is nonzero */ +lws_fx_roundup(const lws_fx_t *a); + +int /* return whole */ +lws_fx_rounddown(const lws_fx_t *a); + +const char * /* format an lws_fx_t into a buffer */ +lws_fx_string(const lws_fx_t *a, char *buf, size_t size +``` + +div and sqrt operations are iterative, up to 64 loops. Multiply is relatively cheap +since it devolves to four integer multiply-adds. Add and Sub are trivially cheap. + diff --git a/include/libwebsockets.h b/include/libwebsockets.h index e999b06cf..1f867207d 100644 --- a/include/libwebsockets.h +++ b/include/libwebsockets.h @@ -612,6 +612,50 @@ struct lws_tokens; struct lws_vhost; struct lws; +typedef struct lws_fixed3232 { + int32_t whole; /* signed 32-bit int */ + int32_t frac; /* signed frac proportion from 0 to (100M - 1) */ +} lws_fx_t; + +#define LWS_FX_FRACTION_MSD 100000000 +#define lws_neg(a) (a->whole < 0 || a->frac < 0) +#define lws_fx_set(a, x, y) { a.whole = x; a.frac = x < 0 ? -y : y; } +#define lws_fix64(a) (((int64_t)a->whole << 32) + \ + (((1ll << 32) * (a->frac < 0 ? -a->frac : a->frac)) / LWS_FX_FRACTION_MSD)) +#define lws_fix64_abs(a) \ + ((((int64_t)(a->whole < 0 ? (-a->whole) : a->whole) << 32) + \ + (((1ll << 32) * (a->frac < 0 ? -a->frac : a->frac)) / LWS_FX_FRACTION_MSD))) + +#define lws_fix3232(a, a64) { a->whole = (int32_t)(a64 >> 32); \ + a->frac = (int32_t)((100000000 * (a64 & 0xffffffff)) >> 32); } + +LWS_VISIBLE LWS_EXTERN const lws_fx_t * +lws_fx_add(lws_fx_t *r, const lws_fx_t *a, const lws_fx_t *b); + +LWS_VISIBLE LWS_EXTERN const lws_fx_t * +lws_fx_sub(lws_fx_t *r, const lws_fx_t *a, const lws_fx_t *b); + +LWS_VISIBLE LWS_EXTERN const lws_fx_t * +lws_fx_mul(lws_fx_t *r, const lws_fx_t *a, const lws_fx_t *b); + +LWS_VISIBLE LWS_EXTERN const lws_fx_t * +lws_fx_div(lws_fx_t *r, const lws_fx_t *a, const lws_fx_t *b); + +LWS_VISIBLE LWS_EXTERN const lws_fx_t * +lws_fx_sqrt(lws_fx_t *r, const lws_fx_t *a); + +LWS_VISIBLE LWS_EXTERN int +lws_fx_comp(const lws_fx_t *a, const lws_fx_t *b); + +LWS_VISIBLE LWS_EXTERN int +lws_fx_roundup(const lws_fx_t *a); + +LWS_VISIBLE LWS_EXTERN int +lws_fx_rounddown(const lws_fx_t *a); + +LWS_VISIBLE LWS_EXTERN const char * +lws_fx_string(const lws_fx_t *a, char *buf, size_t size); + #include #include diff --git a/lib/core/libwebsockets.c b/lib/core/libwebsockets.c index 865292d29..9524b3bca 100644 --- a/lib/core/libwebsockets.c +++ b/lib/core/libwebsockets.c @@ -1728,3 +1728,217 @@ lws_sigbits(uintptr_t u) return n; } + +const lws_fx_t * +lws_fx_add(lws_fx_t *r, const lws_fx_t *a, const lws_fx_t *b) +{ + int32_t w, sf; + + w = a->whole + b->whole; + sf = a->frac + b->frac; + if (sf >= 100000000) { + w++; + r->frac = sf - 100000000; + } else if (sf < -100000000) { + w--; + r->frac = sf + 100000000; + } else + r->frac = sf; + + r->whole = w; + + return r; +} + +const lws_fx_t * +lws_fx_sub(lws_fx_t *r, const lws_fx_t *a, const lws_fx_t *b) +{ + int32_t w; + + if (a->whole >= b->whole) { + w = a->whole - b->whole; + if (a->frac >= b->frac) + r->frac = a->frac - b->frac; + else { + w--; + r->frac = (100000000 + a->frac) - b->frac; + } + } else { + w = -(b->whole - a->whole); + if (b->frac >= a->frac) + r->frac = b->frac - a->frac; + else { + w++; + r->frac = (100000000 + b->frac) - a->frac; + } + } + r->whole = w; + + return r; +} + +const lws_fx_t * +lws_fx_mul(lws_fx_t *r, const lws_fx_t *a, const lws_fx_t *b) +{ + int64_t _c1, _c2; + int32_t w, t; + char neg = 0; + + assert(a->frac < LWS_FX_FRACTION_MSD); + assert(b->frac < LWS_FX_FRACTION_MSD); + + /* we can't use r as a temp, because it may alias on to a, b */ + + w = a->whole * b->whole; + + if (!lws_neg(a) && !lws_neg(b)) { + _c2 = (((int64_t)((int64_t)a->frac) * (int64_t)b->frac) / + LWS_FX_FRACTION_MSD); + _c1 = ((int64_t)a->frac * ((int64_t)b->whole)) + + (((int64_t)a->whole) * (int64_t)b->frac) + _c2; + w += (int32_t)(_c1 / LWS_FX_FRACTION_MSD); + } else + if (lws_neg(a) && !lws_neg(b)) { + _c2 = (((int64_t)((int64_t)-a->frac) * (int64_t)b->frac) / + LWS_FX_FRACTION_MSD); + _c1 = ((int64_t)-a->frac * (-(int64_t)b->whole)) + + (((int64_t)a->whole) * (int64_t)b->frac) - _c2; + w += (int32_t)(_c1 / LWS_FX_FRACTION_MSD); + neg = 1; + } else + if (!lws_neg(a) && lws_neg(b)) { + _c2 = (((int64_t)((int64_t)a->frac) * (int64_t)-b->frac) / + LWS_FX_FRACTION_MSD); + _c1 = ((int64_t)a->frac * ((int64_t)b->whole)) - + (((int64_t)a->whole) * (int64_t)-b->frac) - _c2; + w += (int32_t)(_c1 / LWS_FX_FRACTION_MSD); + neg = 1; + } else { + _c2 = (((int64_t)((int64_t)-a->frac) * (int64_t)-b->frac) / + LWS_FX_FRACTION_MSD); + _c1 = ((int64_t)-a->frac * ((int64_t)b->whole)) + + (((int64_t)a->whole) * (int64_t)-b->frac) - _c2; + w -= (int32_t)(_c1 / LWS_FX_FRACTION_MSD); + } + + t = (int32_t)(_c1 % LWS_FX_FRACTION_MSD); + r->whole = w; /* don't need a,b any further... now we can write to r */ + if (neg ^ !!(t < 0)) + r->frac = -t; + else + r->frac = t; + + return r; +} + +const lws_fx_t * +lws_fx_div(lws_fx_t *r, const lws_fx_t *a, const lws_fx_t *b) +{ + int64_t _a = lws_fix64_abs(a), _b = lws_fix64_abs(b), q = 0, d, m; + + if (!_b) + _a = 0; + else { + int c = 64 / 2 + 1; + + while (_a && c >= 0) { + d = _a / _b; + m = (_a % _b); + if (m < 0) + m = -m; + _a = m << 1; + q += d << (c--); + } + _a = q >> 1; + } + + if (lws_neg(a) ^ lws_neg(b)) { + r->whole = -(int32_t)(_a >> 32); + r->frac = -(int32_t)((100000000 * (_a & 0xffffffff)) >> 32); + } else { + r->whole = (int32_t)(_a >> 32); + r->frac = (int32_t)((100000000 * (_a & 0xffffffff)) >> 32); + } + + return r; +} + +const lws_fx_t * +lws_fx_sqrt(lws_fx_t *r, const lws_fx_t *a) +{ + uint64_t t, q = 0, b = 1ull << 62, v = ((uint64_t)a->whole << 32) + + (((uint64_t)a->frac << 32) / LWS_FX_FRACTION_MSD); + + while (b > 0x40) { + t = q + b; + if (v >= t) { + v -= t; + q = t + b; + } + v <<= 1; + b >>= 1; + } + + r->whole = (int32_t)(q >> 48); + r->frac = (int32_t)((((q >> 16) & 0xffffffff) * + LWS_FX_FRACTION_MSD) >> 32); + + return r; +} + +/* returns < 0 if a < b, >0 if a > b, or 0 if exactly equal */ + +int +lws_fx_comp(const lws_fx_t *a, const lws_fx_t *b) +{ + if (a->whole < b->whole) + return -1; + if (a->whole > b->whole) + return 1; + + if (a->frac < b->frac) + return -1; + + if (a->frac > b->frac) + return 1; + + return 0; +} + +int +lws_fx_roundup(const lws_fx_t *a) +{ + if (!a->frac) + return a->whole; + + if (lws_neg(a)) + return a->whole - 1; + + return a->whole + 1; +} + +LWS_VISIBLE LWS_EXTERN int +lws_fx_rounddown(const lws_fx_t *a) +{ + return a->whole; +} + +LWS_VISIBLE LWS_EXTERN const char * +lws_fx_string(const lws_fx_t *a, char *buf, size_t size) +{ + int n, m = 7; + + if (lws_neg(a)) + n = lws_snprintf(buf, size - 1, "-%d.%08d", (int)-a->whole, + (int)(a->frac < 0 ? -a->frac : a->frac)); + else + n = lws_snprintf(buf, size - 1, "%d.%08d", (int)a->whole, + (int)a->frac); + + while (m-- && buf[n - 1] == '0') + n--; + + buf[n] = '\0'; + + return buf; +} diff --git a/minimal-examples-lowlevel/api-tests/api-test-lws_tokenize/main.c b/minimal-examples-lowlevel/api-tests/api-test-lws_tokenize/main.c index b9bbcea9e..b03725196 100644 --- a/minimal-examples-lowlevel/api-tests/api-test-lws_tokenize/main.c +++ b/minimal-examples-lowlevel/api-tests/api-test-lws_tokenize/main.c @@ -327,6 +327,7 @@ int main(int argc, const char **argv) struct lws_tokenize ts; lws_tokenize_elem e; const char *p; + char b1[22]; int n, logs = LLL_USER | LLL_ERR | LLL_WARN | LLL_NOTICE /* for LLL_ verbosity above NOTICE to be built into lws, * lws must have been configured and built with @@ -379,6 +380,332 @@ int main(int argc, const char **argv) memcert[info.client_ssl_ca_mem_len++] = '\0'; } #endif + { + /* lws_fx_t */ + + lws_fx_t r, a, b; + + a.whole = 1; + a.frac = 0; + + b.whole = 5; + b.frac = 50000000; + + lws_fx_add(&r, &a, &b); + + if (r.whole != 6 || r.frac != 50000000) { + lwsl_err("%s: fixed3232: test1 fail\n", __func__); + return 1; + } + + + lws_fx_sub(&r, &b, &a); + + if (r.whole != 4 || r.frac != 50000000) { + lwsl_err("%s: fixed3232: test2 fail\n", __func__); + return 1; + } + + + a.whole = -1; + a.frac = 0; + + b.whole = 5; + b.frac = 50000000; + + lws_fx_add(&r, &a, &b); + + if (r.whole != 4 || r.frac != 50000000) { + lwsl_err("%s: fixed3232: test3 fail\n", __func__); + return 1; + } + + + a.whole = -1; + a.frac = 0; + + b.whole = -1; + b.frac = 50000000; + + lws_fx_add(&r, &a, &b); + + if (r.whole != -2 || r.frac != 50000000) { + lwsl_err("%s: fixed3232: test4 fail\n", __func__); + return 1; + } + + lws_fx_set(a, -1, 70000000); + lws_fx_set(b, -1, 50000000); + + lws_fx_add(&r, &a, &b); + + if (r.whole != -3 || r.frac != -20000000) { + lwsl_err("%s: fixed3232: test4a fail: %s\n", __func__, + lws_fx_string(&r, b1, sizeof(b1))); + return 1; + } + + lws_fx_set(a, 1, 70000000); + lws_fx_set(b, -1, 50000000); + + lws_fx_add(&r, &a, &b); + + if (r.whole != 0 || r.frac != 20000000) { + lwsl_err("%s: fixed3232: test4b fail: %s\n", __func__, + lws_fx_string(&r, b1, sizeof(b1))); + return 1; + } + + lws_fx_set(a, -1, 70000000); + lws_fx_set(b, 1, 50000000); + + lws_fx_add(&r, &a, &b); + + if (r.whole != 0 || r.frac != -20000000) { + lwsl_err("%s: fixed3232: test4b1 fail: %s\n", __func__, + lws_fx_string(&r, b1, sizeof(b1))); + return 1; + } + + lws_fx_set(a, 3, 0); + lws_fx_set(b, 5, 10000000); + + lws_fx_mul(&r, &a, &b); + + if (r.whole != 15 || r.frac != 30000000) { + lwsl_err("%s: fixed3232: test5 fail: %d.%08u\n", __func__, r.whole, r.frac); + return 1; + } + + lws_fx_set(a, -3, 0); + lws_fx_set(b, 5, 10000000); + + lws_fx_mul(&r, &a, &b); + + if (r.whole != -15 || r.frac != -30000000) { + lwsl_err("%s: fixed3232: test6 fail: %d.%08u\n", __func__, r.whole, r.frac); + return 1; + } + + lws_fx_set(a, 3, 0); + lws_fx_set(b, -5, 10000000); + + lws_fx_mul(&r, &a, &b); + + if (r.whole != -15 || r.frac != -30000000) { + lwsl_err("%s: fixed3232: test7 fail: %d.%08d\n", __func__, r.whole, r.frac); + return 1; + } + + + lws_fx_set(a, -3, 0); + lws_fx_set(b, -5, 10000000); + + lws_fx_mul(&r, &a, &b); + + if (r.whole != 15 || r.frac != 30000000) { + lwsl_err("%s: fixed3232: test8 fail: %d.%08u\n", __func__, r.whole, r.frac); + return 1; + } + + lws_fx_set(a, 3, 50000000); + lws_fx_set(b, 5, 10000000); + + lws_fx_mul(&r, &a, &b); + + if (r.whole != 17 || r.frac != 85000000) { + lwsl_err("%s: fixed3232: test9 fail: %d.%08u\n", __func__, r.whole, r.frac); + return 1; + } + + lws_fx_set(a, -3, 50000000); + lws_fx_set(b, 5, 10000000); + + lws_fx_mul(&r, &a, &b); + + if (r.whole != -17 || r.frac != -85000000) { + lwsl_err("%s: fixed3232: test10 fail: %d.%08u\n", __func__, r.whole, r.frac); + return 1; + } + + lws_fx_set(a, 1, 50000000); + lws_fx_set(b, 0, 10000000); + + lws_fx_mul(&r, &a, &b); + + if (r.whole != 0 || r.frac != 15000000) { + lwsl_err("%s: fixed3232: test10a fail: %d.%08u\n", __func__, r.whole, r.frac); + return 1; + } + + lws_fx_set(a, -1, 50000000); + lws_fx_set(b, 0, 10000000); + + lws_fx_mul(&r, &a, &b); + + if (r.whole != 0 || r.frac != -15000000) { + lwsl_err("%s: fixed3232: test10b fail: %d.%08u\n", __func__, r.whole, r.frac); + return 1; + } + + lws_fx_set(a, 1, 50000000); + lws_fx_set(b, 0, -10000000); + + lws_fx_mul(&r, &a, &b); + + if (r.whole != 0 || r.frac != -15000000) { + lwsl_err("%s: fixed3232: test10c fail: %d.%08u\n", __func__, r.whole, r.frac); + return 1; + } + + lws_fx_set(a, 3, 50000000); + lws_fx_set(b, -5, 10000000); + + lws_fx_mul(&r, &a, &b); + + if (r.whole != -17 || r.frac != -85000000) { + lwsl_err("%s: fixed3232: test11 fail: %d.%08u\n", __func__, r.whole, r.frac); + return 1; + } + + lws_fx_set(a, -3, 50000000); + lws_fx_set(b, -5, 10000000); + + lws_fx_mul(&r, &a, &b); + + if (r.whole != 17 || r.frac != 85000000) { + lwsl_err("%s: fixed3232: test12 fail: %d.%08u\n", __func__, r.whole, r.frac); + return 1; + } + + lws_fx_set(a, 27, 0); + lws_fx_set(b, 9, 0); + + lws_fx_div(&r, &a, &b); + + if (r.whole != 3 || r.frac != 0) { + lwsl_err("%s: fixed3232: test13 fail: %d.%08u\n", __func__, r.whole, r.frac); + return 1; + } + + lws_fx_set(a, 33, 33333333); + lws_fx_set(b, 11, 0); + + lws_fx_div(&r, &a, &b); + + if (r.whole != 3 || r.frac != 3030302) { + lwsl_err("%s: fixed3232: test14 fail: %d.%08u\n", __func__, r.whole, r.frac); + return 1; + } + + lws_fx_set(a, 33, 33333333); + lws_fx_set(b, 1, 10000000); + + lws_fx_div(&r, &a, &b); + + if (r.whole != 30 || r.frac != 30303030) { + lwsl_err("%s: fixed3232: test15 fail: %d.%08u\n", __func__, r.whole, r.frac); + return 1; + } + + lws_fx_set(a, -33, 33333333); + lws_fx_set(b, 1, 10000000); + + lws_fx_div(&r, &a, &b); + + if (r.whole != -30 || r.frac != -30303030) { + lwsl_err("%s: fixed3232: test16 fail: %d.%08u\n", __func__, r.whole, r.frac); + return 1; + } + + a.whole = 33; + a.frac = 33333333; + + b.whole = -1; + b.frac = 10000000; + + lws_fx_div(&r, &a, &b); + + if (r.whole != -30 || r.frac != -30303030) { + lwsl_err("%s: fixed3232: test17 fail: %d.%08u\n", __func__, r.whole, r.frac); + return 1; + } + + a.whole = -33; + a.frac = 33333333; + + b.whole = -1; + b.frac = 10000000; + + lws_fx_div(&r, &a, &b); + + if (r.whole != 30 || r.frac != 30303030) { + lwsl_err("%s: fixed3232: test18 fail: %d.%08u\n", __func__, r.whole, r.frac); + return 1; + } + + + a.whole = 64; + a.frac = 0; + + lws_fx_sqrt(&r, &a); + + if (r.whole != 8 || r.frac != 0) { + lwsl_err("%s: fixed3232: test19 fail: %d.%08u\n", __func__, r.whole, r.frac); + return 1; + } + + a.whole = 16512; + a.frac = 25000000; + + lws_fx_sqrt(&r, &a); + + if (r.whole != 128 || r.frac != 50000000) { + lwsl_err("%s: fixed3232: test20 fail: %d.%08u\n", __func__, r.whole, r.frac); + return 1; + } + + a.whole = 22; + a.frac = 80000000; + + b.whole = 18; + b.frac = 0; + + lws_fx_sub(&r, &a, &b); + + if (r.whole != 4 || r.frac != 80000000) { + lwsl_err("%s: fixed3232: test21 fail: %d.%08u\n", __func__, r.whole, r.frac); + return 1; + } + + a.whole = 2; + a.frac = 80000000; + + b.whole = 18; + b.frac = 0; + + lws_fx_sub(&r, &a, &b); + + if (r.whole != -15 || r.frac != 20000000) { + lwsl_err("%s: fixed3232: test22 fail: %d.%08u\n", __func__, r.whole, r.frac); + return 1; + } + + a.whole = 24; + a.frac = 20000000; + + b.whole = 24; + b.frac = 0; + + lws_fx_sub(&r, &a, &b); + + if (r.whole != 0 || r.frac != 20000000) { + lwsl_err("%s: fixed3232: test23 fail: %d.%08u\n", __func__, r.whole, r.frac); + return 1; + } + } + + cx = lws_create_context(&info); /* lws_strexp */