1
0
Fork 0
mirror of https://github.com/warmcat/libwebsockets.git synced 2025-03-09 00:00:04 +01:00

lws_fx: fixed point 3232 arithmetic

This introduces a fixed precision signed 32.32 fractional type that can
work on devices without an FPU.

The integer part works as an int32_t, the fractional part represents the
fractional proportion expressed as part of 100M, so 8 fractional decimal
digit precision which is more than enough for many applications.

Add and Sub are reasonably fast as they are scaled on to a combined
uint64_t, Multiply is a little slower as it takes four uint64_t multiplies
that are summed, and divide is expensive but accurate, done bitwise taking
up to 32 iterations involving uint64_t div and mod.
This commit is contained in:
Andy Green 2022-01-04 04:53:20 +00:00
parent 3070709f18
commit aadcd3c44a
4 changed files with 672 additions and 0 deletions

View file

@ -0,0 +1,87 @@
# lws_fixed3232 Fixed point arithmetic
Lws provides reasonably fast fixed-point 32:32 arithmetic functions so code
can be designed to work without floating-point support.
The underlying type is
```
typedef struct lws_fixed3232 {
int32_t whole; /* signed 32-bit int */
int32_t frac; /* proportion from 0 to (100M - 1) */
} lws_fx_t;
```
Either or both of whole or frac may be negative, indicating that the
combined scalar is negative. This is to deal with numbers less than
0 but greater than -1 not being able to use whole to indicating
signedness, since it's zero. This scheme allows .whole to be used
as a signed `int32_t` naturally.
## Fractional representation
The fractional part counts parts per 100M and is restricted to the range
0 .. 99999999. For convenience a constant `LWS_FX_FRACTION_MSD` is
defined with the value 100M.
It's possible to declare constants naturally, but leading zeroes are not
valid on the fractional part, since C parses a leading 0 as indicating
the number is octal.
For the case of negative values less than 1, the fractional part bears the
sign.
Eg to declare 12.5, 6.0, -6.0, 0.1 and -0.1
```
static const lws_fx_t x[2] = { { 12,50000000 }, { 6,0 },
{ -6, 0 }, { 0, 10000000 }, { 0, -10000000 } };
```
There are some helpers
|Helper|Function|
|---|---|
|`lws_neg(a)`|nonzero if a is negative in whole or fractional part|
|`lws_fx_set(a,w,f)`|Convenience to set `lws_fx_t` a in code, notices if w is negative and also marks f the same|
## API style
The APIs are given the storage for the result along with the const args.
The result pointer is also returned from the operation to make operation
chaining more natural.
## Available operations
```
const lws_fx_t *
lws_fx_add(lws_fx_t *r, const lws_fx_t *a, const lws_fx_t *b);
const lws_fx_t *
lws_fx_sub(lws_fx_t *r, const lws_fx_t *a, const lws_fx_t *b);
const lws_fx_t *
lws_fx_mul(lws_fx_t *r, const lws_fx_t *a, const lws_fx_t *b);
const lws_fx_t *
lws_fx_div(lws_fx_t *r, const lws_fx_t *a, const lws_fx_t *b);
const lws_fx_t *
lws_fx_sqrt(lws_fx_t *r, const lws_fx_t *a);
int /* -1 if a < b, 1 if a > b, 0 if exactly equal */
lws_fx_comp(const lws_fx_t *a, const lws_fx_t *b);
int /* return whole, or whole + 1 if frac is nonzero */
lws_fx_roundup(const lws_fx_t *a);
int /* return whole */
lws_fx_rounddown(const lws_fx_t *a);
const char * /* format an lws_fx_t into a buffer */
lws_fx_string(const lws_fx_t *a, char *buf, size_t size
```
div and sqrt operations are iterative, up to 64 loops. Multiply is relatively cheap
since it devolves to four integer multiply-adds. Add and Sub are trivially cheap.

View file

@ -612,6 +612,50 @@ struct lws_tokens;
struct lws_vhost;
struct lws;
typedef struct lws_fixed3232 {
int32_t whole; /* signed 32-bit int */
int32_t frac; /* signed frac proportion from 0 to (100M - 1) */
} lws_fx_t;
#define LWS_FX_FRACTION_MSD 100000000
#define lws_neg(a) (a->whole < 0 || a->frac < 0)
#define lws_fx_set(a, x, y) { a.whole = x; a.frac = x < 0 ? -y : y; }
#define lws_fix64(a) (((int64_t)a->whole << 32) + \
(((1ll << 32) * (a->frac < 0 ? -a->frac : a->frac)) / LWS_FX_FRACTION_MSD))
#define lws_fix64_abs(a) \
((((int64_t)(a->whole < 0 ? (-a->whole) : a->whole) << 32) + \
(((1ll << 32) * (a->frac < 0 ? -a->frac : a->frac)) / LWS_FX_FRACTION_MSD)))
#define lws_fix3232(a, a64) { a->whole = (int32_t)(a64 >> 32); \
a->frac = (int32_t)((100000000 * (a64 & 0xffffffff)) >> 32); }
LWS_VISIBLE LWS_EXTERN const lws_fx_t *
lws_fx_add(lws_fx_t *r, const lws_fx_t *a, const lws_fx_t *b);
LWS_VISIBLE LWS_EXTERN const lws_fx_t *
lws_fx_sub(lws_fx_t *r, const lws_fx_t *a, const lws_fx_t *b);
LWS_VISIBLE LWS_EXTERN const lws_fx_t *
lws_fx_mul(lws_fx_t *r, const lws_fx_t *a, const lws_fx_t *b);
LWS_VISIBLE LWS_EXTERN const lws_fx_t *
lws_fx_div(lws_fx_t *r, const lws_fx_t *a, const lws_fx_t *b);
LWS_VISIBLE LWS_EXTERN const lws_fx_t *
lws_fx_sqrt(lws_fx_t *r, const lws_fx_t *a);
LWS_VISIBLE LWS_EXTERN int
lws_fx_comp(const lws_fx_t *a, const lws_fx_t *b);
LWS_VISIBLE LWS_EXTERN int
lws_fx_roundup(const lws_fx_t *a);
LWS_VISIBLE LWS_EXTERN int
lws_fx_rounddown(const lws_fx_t *a);
LWS_VISIBLE LWS_EXTERN const char *
lws_fx_string(const lws_fx_t *a, char *buf, size_t size);
#include <libwebsockets/lws-dll2.h>
#include <libwebsockets/lws-map.h>

View file

@ -1728,3 +1728,217 @@ lws_sigbits(uintptr_t u)
return n;
}
const lws_fx_t *
lws_fx_add(lws_fx_t *r, const lws_fx_t *a, const lws_fx_t *b)
{
int32_t w, sf;
w = a->whole + b->whole;
sf = a->frac + b->frac;
if (sf >= 100000000) {
w++;
r->frac = sf - 100000000;
} else if (sf < -100000000) {
w--;
r->frac = sf + 100000000;
} else
r->frac = sf;
r->whole = w;
return r;
}
const lws_fx_t *
lws_fx_sub(lws_fx_t *r, const lws_fx_t *a, const lws_fx_t *b)
{
int32_t w;
if (a->whole >= b->whole) {
w = a->whole - b->whole;
if (a->frac >= b->frac)
r->frac = a->frac - b->frac;
else {
w--;
r->frac = (100000000 + a->frac) - b->frac;
}
} else {
w = -(b->whole - a->whole);
if (b->frac >= a->frac)
r->frac = b->frac - a->frac;
else {
w++;
r->frac = (100000000 + b->frac) - a->frac;
}
}
r->whole = w;
return r;
}
const lws_fx_t *
lws_fx_mul(lws_fx_t *r, const lws_fx_t *a, const lws_fx_t *b)
{
int64_t _c1, _c2;
int32_t w, t;
char neg = 0;
assert(a->frac < LWS_FX_FRACTION_MSD);
assert(b->frac < LWS_FX_FRACTION_MSD);
/* we can't use r as a temp, because it may alias on to a, b */
w = a->whole * b->whole;
if (!lws_neg(a) && !lws_neg(b)) {
_c2 = (((int64_t)((int64_t)a->frac) * (int64_t)b->frac) /
LWS_FX_FRACTION_MSD);
_c1 = ((int64_t)a->frac * ((int64_t)b->whole)) +
(((int64_t)a->whole) * (int64_t)b->frac) + _c2;
w += (int32_t)(_c1 / LWS_FX_FRACTION_MSD);
} else
if (lws_neg(a) && !lws_neg(b)) {
_c2 = (((int64_t)((int64_t)-a->frac) * (int64_t)b->frac) /
LWS_FX_FRACTION_MSD);
_c1 = ((int64_t)-a->frac * (-(int64_t)b->whole)) +
(((int64_t)a->whole) * (int64_t)b->frac) - _c2;
w += (int32_t)(_c1 / LWS_FX_FRACTION_MSD);
neg = 1;
} else
if (!lws_neg(a) && lws_neg(b)) {
_c2 = (((int64_t)((int64_t)a->frac) * (int64_t)-b->frac) /
LWS_FX_FRACTION_MSD);
_c1 = ((int64_t)a->frac * ((int64_t)b->whole)) -
(((int64_t)a->whole) * (int64_t)-b->frac) - _c2;
w += (int32_t)(_c1 / LWS_FX_FRACTION_MSD);
neg = 1;
} else {
_c2 = (((int64_t)((int64_t)-a->frac) * (int64_t)-b->frac) /
LWS_FX_FRACTION_MSD);
_c1 = ((int64_t)-a->frac * ((int64_t)b->whole)) +
(((int64_t)a->whole) * (int64_t)-b->frac) - _c2;
w -= (int32_t)(_c1 / LWS_FX_FRACTION_MSD);
}
t = (int32_t)(_c1 % LWS_FX_FRACTION_MSD);
r->whole = w; /* don't need a,b any further... now we can write to r */
if (neg ^ !!(t < 0))
r->frac = -t;
else
r->frac = t;
return r;
}
const lws_fx_t *
lws_fx_div(lws_fx_t *r, const lws_fx_t *a, const lws_fx_t *b)
{
int64_t _a = lws_fix64_abs(a), _b = lws_fix64_abs(b), q = 0, d, m;
if (!_b)
_a = 0;
else {
int c = 64 / 2 + 1;
while (_a && c >= 0) {
d = _a / _b;
m = (_a % _b);
if (m < 0)
m = -m;
_a = m << 1;
q += d << (c--);
}
_a = q >> 1;
}
if (lws_neg(a) ^ lws_neg(b)) {
r->whole = -(int32_t)(_a >> 32);
r->frac = -(int32_t)((100000000 * (_a & 0xffffffff)) >> 32);
} else {
r->whole = (int32_t)(_a >> 32);
r->frac = (int32_t)((100000000 * (_a & 0xffffffff)) >> 32);
}
return r;
}
const lws_fx_t *
lws_fx_sqrt(lws_fx_t *r, const lws_fx_t *a)
{
uint64_t t, q = 0, b = 1ull << 62, v = ((uint64_t)a->whole << 32) +
(((uint64_t)a->frac << 32) / LWS_FX_FRACTION_MSD);
while (b > 0x40) {
t = q + b;
if (v >= t) {
v -= t;
q = t + b;
}
v <<= 1;
b >>= 1;
}
r->whole = (int32_t)(q >> 48);
r->frac = (int32_t)((((q >> 16) & 0xffffffff) *
LWS_FX_FRACTION_MSD) >> 32);
return r;
}
/* returns < 0 if a < b, >0 if a > b, or 0 if exactly equal */
int
lws_fx_comp(const lws_fx_t *a, const lws_fx_t *b)
{
if (a->whole < b->whole)
return -1;
if (a->whole > b->whole)
return 1;
if (a->frac < b->frac)
return -1;
if (a->frac > b->frac)
return 1;
return 0;
}
int
lws_fx_roundup(const lws_fx_t *a)
{
if (!a->frac)
return a->whole;
if (lws_neg(a))
return a->whole - 1;
return a->whole + 1;
}
LWS_VISIBLE LWS_EXTERN int
lws_fx_rounddown(const lws_fx_t *a)
{
return a->whole;
}
LWS_VISIBLE LWS_EXTERN const char *
lws_fx_string(const lws_fx_t *a, char *buf, size_t size)
{
int n, m = 7;
if (lws_neg(a))
n = lws_snprintf(buf, size - 1, "-%d.%08d", (int)-a->whole,
(int)(a->frac < 0 ? -a->frac : a->frac));
else
n = lws_snprintf(buf, size - 1, "%d.%08d", (int)a->whole,
(int)a->frac);
while (m-- && buf[n - 1] == '0')
n--;
buf[n] = '\0';
return buf;
}

View file

@ -327,6 +327,7 @@ int main(int argc, const char **argv)
struct lws_tokenize ts;
lws_tokenize_elem e;
const char *p;
char b1[22];
int n, logs = LLL_USER | LLL_ERR | LLL_WARN | LLL_NOTICE
/* for LLL_ verbosity above NOTICE to be built into lws,
* lws must have been configured and built with
@ -379,6 +380,332 @@ int main(int argc, const char **argv)
memcert[info.client_ssl_ca_mem_len++] = '\0';
}
#endif
{
/* lws_fx_t */
lws_fx_t r, a, b;
a.whole = 1;
a.frac = 0;
b.whole = 5;
b.frac = 50000000;
lws_fx_add(&r, &a, &b);
if (r.whole != 6 || r.frac != 50000000) {
lwsl_err("%s: fixed3232: test1 fail\n", __func__);
return 1;
}
lws_fx_sub(&r, &b, &a);
if (r.whole != 4 || r.frac != 50000000) {
lwsl_err("%s: fixed3232: test2 fail\n", __func__);
return 1;
}
a.whole = -1;
a.frac = 0;
b.whole = 5;
b.frac = 50000000;
lws_fx_add(&r, &a, &b);
if (r.whole != 4 || r.frac != 50000000) {
lwsl_err("%s: fixed3232: test3 fail\n", __func__);
return 1;
}
a.whole = -1;
a.frac = 0;
b.whole = -1;
b.frac = 50000000;
lws_fx_add(&r, &a, &b);
if (r.whole != -2 || r.frac != 50000000) {
lwsl_err("%s: fixed3232: test4 fail\n", __func__);
return 1;
}
lws_fx_set(a, -1, 70000000);
lws_fx_set(b, -1, 50000000);
lws_fx_add(&r, &a, &b);
if (r.whole != -3 || r.frac != -20000000) {
lwsl_err("%s: fixed3232: test4a fail: %s\n", __func__,
lws_fx_string(&r, b1, sizeof(b1)));
return 1;
}
lws_fx_set(a, 1, 70000000);
lws_fx_set(b, -1, 50000000);
lws_fx_add(&r, &a, &b);
if (r.whole != 0 || r.frac != 20000000) {
lwsl_err("%s: fixed3232: test4b fail: %s\n", __func__,
lws_fx_string(&r, b1, sizeof(b1)));
return 1;
}
lws_fx_set(a, -1, 70000000);
lws_fx_set(b, 1, 50000000);
lws_fx_add(&r, &a, &b);
if (r.whole != 0 || r.frac != -20000000) {
lwsl_err("%s: fixed3232: test4b1 fail: %s\n", __func__,
lws_fx_string(&r, b1, sizeof(b1)));
return 1;
}
lws_fx_set(a, 3, 0);
lws_fx_set(b, 5, 10000000);
lws_fx_mul(&r, &a, &b);
if (r.whole != 15 || r.frac != 30000000) {
lwsl_err("%s: fixed3232: test5 fail: %d.%08u\n", __func__, r.whole, r.frac);
return 1;
}
lws_fx_set(a, -3, 0);
lws_fx_set(b, 5, 10000000);
lws_fx_mul(&r, &a, &b);
if (r.whole != -15 || r.frac != -30000000) {
lwsl_err("%s: fixed3232: test6 fail: %d.%08u\n", __func__, r.whole, r.frac);
return 1;
}
lws_fx_set(a, 3, 0);
lws_fx_set(b, -5, 10000000);
lws_fx_mul(&r, &a, &b);
if (r.whole != -15 || r.frac != -30000000) {
lwsl_err("%s: fixed3232: test7 fail: %d.%08d\n", __func__, r.whole, r.frac);
return 1;
}
lws_fx_set(a, -3, 0);
lws_fx_set(b, -5, 10000000);
lws_fx_mul(&r, &a, &b);
if (r.whole != 15 || r.frac != 30000000) {
lwsl_err("%s: fixed3232: test8 fail: %d.%08u\n", __func__, r.whole, r.frac);
return 1;
}
lws_fx_set(a, 3, 50000000);
lws_fx_set(b, 5, 10000000);
lws_fx_mul(&r, &a, &b);
if (r.whole != 17 || r.frac != 85000000) {
lwsl_err("%s: fixed3232: test9 fail: %d.%08u\n", __func__, r.whole, r.frac);
return 1;
}
lws_fx_set(a, -3, 50000000);
lws_fx_set(b, 5, 10000000);
lws_fx_mul(&r, &a, &b);
if (r.whole != -17 || r.frac != -85000000) {
lwsl_err("%s: fixed3232: test10 fail: %d.%08u\n", __func__, r.whole, r.frac);
return 1;
}
lws_fx_set(a, 1, 50000000);
lws_fx_set(b, 0, 10000000);
lws_fx_mul(&r, &a, &b);
if (r.whole != 0 || r.frac != 15000000) {
lwsl_err("%s: fixed3232: test10a fail: %d.%08u\n", __func__, r.whole, r.frac);
return 1;
}
lws_fx_set(a, -1, 50000000);
lws_fx_set(b, 0, 10000000);
lws_fx_mul(&r, &a, &b);
if (r.whole != 0 || r.frac != -15000000) {
lwsl_err("%s: fixed3232: test10b fail: %d.%08u\n", __func__, r.whole, r.frac);
return 1;
}
lws_fx_set(a, 1, 50000000);
lws_fx_set(b, 0, -10000000);
lws_fx_mul(&r, &a, &b);
if (r.whole != 0 || r.frac != -15000000) {
lwsl_err("%s: fixed3232: test10c fail: %d.%08u\n", __func__, r.whole, r.frac);
return 1;
}
lws_fx_set(a, 3, 50000000);
lws_fx_set(b, -5, 10000000);
lws_fx_mul(&r, &a, &b);
if (r.whole != -17 || r.frac != -85000000) {
lwsl_err("%s: fixed3232: test11 fail: %d.%08u\n", __func__, r.whole, r.frac);
return 1;
}
lws_fx_set(a, -3, 50000000);
lws_fx_set(b, -5, 10000000);
lws_fx_mul(&r, &a, &b);
if (r.whole != 17 || r.frac != 85000000) {
lwsl_err("%s: fixed3232: test12 fail: %d.%08u\n", __func__, r.whole, r.frac);
return 1;
}
lws_fx_set(a, 27, 0);
lws_fx_set(b, 9, 0);
lws_fx_div(&r, &a, &b);
if (r.whole != 3 || r.frac != 0) {
lwsl_err("%s: fixed3232: test13 fail: %d.%08u\n", __func__, r.whole, r.frac);
return 1;
}
lws_fx_set(a, 33, 33333333);
lws_fx_set(b, 11, 0);
lws_fx_div(&r, &a, &b);
if (r.whole != 3 || r.frac != 3030302) {
lwsl_err("%s: fixed3232: test14 fail: %d.%08u\n", __func__, r.whole, r.frac);
return 1;
}
lws_fx_set(a, 33, 33333333);
lws_fx_set(b, 1, 10000000);
lws_fx_div(&r, &a, &b);
if (r.whole != 30 || r.frac != 30303030) {
lwsl_err("%s: fixed3232: test15 fail: %d.%08u\n", __func__, r.whole, r.frac);
return 1;
}
lws_fx_set(a, -33, 33333333);
lws_fx_set(b, 1, 10000000);
lws_fx_div(&r, &a, &b);
if (r.whole != -30 || r.frac != -30303030) {
lwsl_err("%s: fixed3232: test16 fail: %d.%08u\n", __func__, r.whole, r.frac);
return 1;
}
a.whole = 33;
a.frac = 33333333;
b.whole = -1;
b.frac = 10000000;
lws_fx_div(&r, &a, &b);
if (r.whole != -30 || r.frac != -30303030) {
lwsl_err("%s: fixed3232: test17 fail: %d.%08u\n", __func__, r.whole, r.frac);
return 1;
}
a.whole = -33;
a.frac = 33333333;
b.whole = -1;
b.frac = 10000000;
lws_fx_div(&r, &a, &b);
if (r.whole != 30 || r.frac != 30303030) {
lwsl_err("%s: fixed3232: test18 fail: %d.%08u\n", __func__, r.whole, r.frac);
return 1;
}
a.whole = 64;
a.frac = 0;
lws_fx_sqrt(&r, &a);
if (r.whole != 8 || r.frac != 0) {
lwsl_err("%s: fixed3232: test19 fail: %d.%08u\n", __func__, r.whole, r.frac);
return 1;
}
a.whole = 16512;
a.frac = 25000000;
lws_fx_sqrt(&r, &a);
if (r.whole != 128 || r.frac != 50000000) {
lwsl_err("%s: fixed3232: test20 fail: %d.%08u\n", __func__, r.whole, r.frac);
return 1;
}
a.whole = 22;
a.frac = 80000000;
b.whole = 18;
b.frac = 0;
lws_fx_sub(&r, &a, &b);
if (r.whole != 4 || r.frac != 80000000) {
lwsl_err("%s: fixed3232: test21 fail: %d.%08u\n", __func__, r.whole, r.frac);
return 1;
}
a.whole = 2;
a.frac = 80000000;
b.whole = 18;
b.frac = 0;
lws_fx_sub(&r, &a, &b);
if (r.whole != -15 || r.frac != 20000000) {
lwsl_err("%s: fixed3232: test22 fail: %d.%08u\n", __func__, r.whole, r.frac);
return 1;
}
a.whole = 24;
a.frac = 20000000;
b.whole = 24;
b.frac = 0;
lws_fx_sub(&r, &a, &b);
if (r.whole != 0 || r.frac != 20000000) {
lwsl_err("%s: fixed3232: test23 fail: %d.%08u\n", __func__, r.whole, r.frac);
return 1;
}
}
cx = lws_create_context(&info);
/* lws_strexp */