diff --git a/include/libwebsockets/lws-dsh.h b/include/libwebsockets/lws-dsh.h index 09c950c05..afd265405 100644 --- a/include/libwebsockets/lws-dsh.h +++ b/include/libwebsockets/lws-dsh.h @@ -42,12 +42,18 @@ * All management structures exist inside the allocated buffer. */ +enum { + LWS_DSHFLAG_ENABLE_COALESCE = (1u << 24), + LWS_DSHFLAG_ENABLE_SPLIT = (1u << 25), +}; + /** * lws_dsh_create() - Allocate a DSH buffer * * \param owner: the owning list this dsh belongs on, or NULL if standalone * \param buffer_size: the allocation in bytes - * \param count_kinds: how many separately-tracked fifos use the buffer + * \param count_kinds: how many separately-tracked fifos use the buffer, or-ed + * with optional LWS_DSHFLAGs * * This makes a single heap allocation that includes internal tracking objects * in the buffer. Sub-allocated objects are bound to a "kind" index and @@ -115,6 +121,18 @@ lws_dsh_alloc_tail(struct lws_dsh *dsh, int kind, const void *src1, LWS_VISIBLE LWS_EXTERN void lws_dsh_free(void **obj); +/** + * lws_dsh_consume() - partially consume a dsh + * + * \param dsh: the dsh + * \param kind: the kind of allocation (0 +) + * \param len: length to consume + * + * Consume part of a dsh object. + */ +LWS_VISIBLE LWS_EXTERN void +lws_dsh_consume(struct lws_dsh *dsh, int kind, size_t len); + LWS_VISIBLE LWS_EXTERN size_t lws_dsh_get_size(struct lws_dsh *dsh, int kind); diff --git a/lib/core-net/lws-dsh.c b/lib/core-net/lws-dsh.c index fc332ae09..b2eb6e135 100644 --- a/lib/core-net/lws-dsh.c +++ b/lib/core-net/lws-dsh.c @@ -26,12 +26,17 @@ struct lws_dsh_search { size_t required; + ssize_t natural_required; int kind; lws_dsh_obj_t *best; lws_dsh_t *dsh; + lws_dsh_obj_t *tail_obj; + void *natural; /* coalesce address against last tail */ lws_dsh_t *already_checked; lws_dsh_t *this_dsh; + + char coalesce; }; static int @@ -50,13 +55,16 @@ lws_dsh_align(size_t length) } lws_dsh_t * -lws_dsh_create(lws_dll2_owner_t *owner, size_t buf_len, int count_kinds) +lws_dsh_create(lws_dll2_owner_t *owner, size_t buf_len, int _count_kinds) { - size_t oha_len = sizeof(lws_dsh_obj_head_t) * (unsigned int)(++count_kinds); + int count_kinds = _count_kinds & 0xff; lws_dsh_obj_t *obj; lws_dsh_t *dsh; + size_t oha_len; int n; + oha_len = sizeof(lws_dsh_obj_head_t) * (unsigned int)(++count_kinds); + assert(buf_len); assert(count_kinds > 1); assert(buf_len > sizeof(lws_dsh_t) + oha_len); @@ -73,6 +81,8 @@ lws_dsh_create(lws_dll2_owner_t *owner, size_t buf_len, int count_kinds) dsh->count_kinds = count_kinds; dsh->buffer_size = buf_len; dsh->being_destroyed = 0; + dsh->splitat = 0; + dsh->flags = (unsigned int)_count_kinds & 0xff000000u; /* clear down the obj heads array */ @@ -102,14 +112,43 @@ lws_dsh_create(lws_dll2_owner_t *owner, size_t buf_len, int count_kinds) return dsh; } +/* + * We're flicking through the hole list... if we find a suitable hole starting + * right after the current tail, it means we can coalesce against the current + * tail, that overrides all other considerations + */ + static int search_best_free(struct lws_dll2 *d, void *user) { struct lws_dsh_search *s = (struct lws_dsh_search *)user; lws_dsh_obj_t *obj = lws_container_of(d, lws_dsh_obj_t, list); - lwsl_debug("%s: obj %p, asize %zu (req %zu)\n", __func__, obj, - obj->asize, s->required); +// lwsl_debug("%s: obj %p, asize %zu (req %zu)\n", __func__, obj, +// obj->asize, s->required); + +// if (s->tail_obj) +// lwsl_notice("%s: tail est %d, splitat %d\n", __func__, +// (int)(s->tail_obj->asize + (size_t)s->natural_required), (int)s->dsh->splitat); + + + if (s->dsh->flags & LWS_DSHFLAG_ENABLE_COALESCE) { + if (obj == s->natural && s->tail_obj && + (int)obj->asize >= s->natural_required + && + (!s->dsh->splitat || + (size_t)(s->tail_obj->asize + + (size_t)s->natural_required) <= s->dsh->splitat) + ) { + // lwsl_user("%s: found natural\n", __func__); + s->dsh = s->this_dsh; + s->best = obj; + s->coalesce = 1; + } + + if (s->coalesce) + return 0; + } if (obj->asize >= s->required && (!s->best || obj->asize < s->best->asize)) { @@ -120,6 +159,12 @@ search_best_free(struct lws_dll2 *d, void *user) return 0; } +static int +buf_compare(const lws_dll2_t *d, const lws_dll2_t *i) +{ + return (int)lws_ptr_diff(d, i); +} + void lws_dsh_destroy(lws_dsh_t **pdsh) { @@ -161,25 +206,126 @@ _lws_dsh_alloc_tail(lws_dsh_t *dsh, int kind, const void *src1, size_t size1, * Search our free list looking for the smallest guy who will fit * what we want to allocate */ - s.required = asize; - s.kind = kind; - s.best = NULL; - s.already_checked = NULL; - s.this_dsh = dsh; + s.dsh = dsh; + s.required = asize; + s.kind = kind; + s.best = NULL; + s.already_checked = NULL; + s.this_dsh = dsh; + s.natural = NULL; + s.coalesce = 0; + s.natural_required = 0; + /* list is at the very start, so we can cast */ + s.tail_obj = (lws_dsh_obj_t *)dsh->oha[kind].owner.tail; + + if (s.tail_obj) { + + assert(s.tail_obj->kind == kind); + + /* + * there's a tail... precompute where a natural hole would + * have to start to be coalescable + */ + s.natural = (uint8_t *)s.tail_obj + s.tail_obj->asize; + /* + * ... and precompute the needed hole extent (including its + * obj part we would no longer need if we coalesced, and + * accounting for any unused / alignment part in the tail + */ + s.natural_required = (ssize_t)(lws_dsh_align(s.tail_obj->size + size1 + size2) - + s.tail_obj->asize + sizeof(lws_dsh_obj_t)); + +// lwsl_notice("%s: natural %p, tail len %d, nreq %d, splitat %d\n", __func__, s.natural, +// (int)s.tail_obj->size, (int)s.natural_required, (int)dsh->splitat); + } if (dsh && !dsh->being_destroyed) lws_dll2_foreach_safe(&dsh->oha[0].owner, &s, search_best_free); if (!s.best) { - lwsl_notice("%s: no buffer has space\n", __func__); + lwsl_notice("%s: no buffer has space for %lu\n", + __func__, (unsigned long)asize); return 1; } + if (s.coalesce) { + uint8_t *nf = (uint8_t *)&s.tail_obj[1] + s.tail_obj->size, + *e = (uint8_t *)s.best + s.best->asize, *ce; + lws_dsh_obj_t *rh; + size_t le; + + // lwsl_notice("%s: coalescing\n", __func__); + + /* + * logically remove the free list entry we're taking over the + * memory footprint of + */ + lws_dll2_remove(&s.best->list); + s.dsh->locally_free -= s.best->asize; + if (s.dsh->oha[kind].total_size < s.tail_obj->asize) { + lwsl_err("%s: total_size %d, asize %d, hdr size %d\n", __func__, + (int)s.dsh->oha[kind].total_size, + (int)s.tail_obj->asize, (int)sizeof(lws_dsh_obj_t)); + + assert(0); + } + s.dsh->oha[kind].total_size -= s.tail_obj->asize; + s.dsh->locally_in_use -= s.tail_obj->asize; + + memcpy(nf, src1, size1); + nf += size1; + if (size2) { + memcpy(nf, src2, size2); + nf += size2; + } + + /* + * adjust the tail guy's sizes to account for the coalesced + * data and alignment for the end point + */ + + s.tail_obj->size = s.tail_obj->size + size1 + size2; + s.tail_obj->asize = sizeof(lws_dsh_obj_t) + + lws_dsh_align(s.tail_obj->size); + + ce = (uint8_t *)s.tail_obj + s.tail_obj->asize; + assert(ce <= e); + le = lws_ptr_diff_size_t(e, ce); + + /* + * Now we have to decide what to do with any leftovers... + */ + + if (le < 64) + /* + * just absorb it into the coalesced guy as spare, and + * no need for a replacement hole + */ + s.tail_obj->asize += le; + else { + + rh = (lws_dsh_obj_t *)ce; + + memset(rh, 0, sizeof(*rh)); + rh->asize = le; + lws_dll2_add_sorted(&rh->list, &s.dsh->oha[0].owner, + buf_compare); + s.dsh->locally_free += rh->asize; + } + + s.dsh->oha[kind].total_size += s.tail_obj->asize; + s.dsh->locally_in_use += s.tail_obj->asize; + + return 0; + } + /* anything coming out of here must be aligned */ assert(!(((unsigned long)s.best) & (sizeof(int *) - 1))); if (s.best->asize < asize + (2 * sizeof(*s.best))) { + + // lwsl_notice("%s: exact\n", __func__); /* * Exact fit, or close enough we can't / don't want to have to * track the little bit of free area that would be left. @@ -205,8 +351,10 @@ _lws_dsh_alloc_tail(lws_dsh_t *dsh, int kind, const void *src1, size_t size1, replace->next->prev = &s.best->list; } else if (dsh) { - assert(!(((unsigned long)(intptr_t)(s.best)) & (sizeof(int *) - 1))); - lws_dll2_add_tail(&s.best->list, &dsh->oha[kind].owner); + assert(!(((unsigned long)(intptr_t)(s.best)) & + (sizeof(int *) - 1))); + lws_dll2_add_tail(&s.best->list, + &dsh->oha[kind].owner); } assert(s.dsh->locally_free >= s.best->asize); @@ -215,33 +363,56 @@ _lws_dsh_alloc_tail(lws_dsh_t *dsh, int kind, const void *src1, size_t size1, dsh->oha[kind].total_size += s.best->asize; assert(s.dsh->locally_in_use <= s.dsh->buffer_size); } else { - lws_dsh_obj_t *obj; - + lws_dsh_obj_t *nf; +#if defined(_DEBUG) + uint8_t *e = ((uint8_t *)s.best) + s.best->asize; +#endif /* * Free area was oversize enough that we need to split it. * - * Leave the first part of the free area where it is and - * reduce its extent by our asize. Use the latter part of - * the original free area as the allocation. + * Unlink the free area and move its header forward to account + * for our usage of its start area. It's like this so that we + * can coalesce sequential objects. */ - lwsl_debug("%s: splitting... free reduce %zu -> %zu\n", - __func__, s.best->asize, s.best->asize - asize); + //lwsl_notice("%s: splitting... free reduce %zu -> %zu\n", + // __func__, s.best->asize, s.best->asize - asize); - s.best->asize -= asize; + assert(s.best->asize >= asize); - /* latter part becomes new object */ + /* unlink the entire original hole object at s.best */ + lws_dll2_remove(&s.best->list); + s.dsh->locally_free -= s.best->asize; + s.dsh->locally_in_use += asize; - obj = (lws_dsh_obj_t *)(((uint8_t *)s.best) + lws_dsh_align(s.best->asize)); + /* latter part becomes new hole object */ - lws_dll2_clear(&obj->list); - obj->dsh = s.dsh; - obj->kind = kind; - obj->size = size1 + size2; - obj->asize = asize; + nf = (lws_dsh_obj_t *)(((uint8_t *)s.best) + asize); - memcpy(&obj[1], src1, size1); + assert((uint8_t *)nf < e); + + memset(nf, 0, sizeof(*nf)); + nf->asize = s.best->asize - asize; /* rump free part only */ + + assert(((uint8_t *)nf) + nf->asize <= e); + + lws_dll2_add_sorted(&nf->list, &s.dsh->oha[0].owner, buf_compare); + s.dsh->locally_free += s.best->asize; + + /* take over s.best as the new allocated object, fill it in */ + + s.best->dsh = s.dsh; + s.best->kind = kind; + s.best->size = size1 + size2; + s.best->asize = asize; + + // lwsl_notice("%s: split off kind %d\n", __func__, kind); + + assert((uint8_t *)s.best + s.best->asize < e); + assert((uint8_t *)s.best + s.best->asize <= (uint8_t *)nf); + + memcpy(&s.best[1], src1, size1); if (src2) - memcpy((uint8_t *)&obj[1] + size1, src2, size2); + memcpy((uint8_t *)&s.best[1] + size1, src2, size2); if (replace) { s.best->list.prev = replace->prev; @@ -253,13 +424,13 @@ _lws_dsh_alloc_tail(lws_dsh_t *dsh, int kind, const void *src1, size_t size1, replace->next->prev = &s.best->list; } else if (dsh) { - assert(!(((unsigned long)(intptr_t)(obj)) & (sizeof(int *) - 1))); - lws_dll2_add_tail(&obj->list, &dsh->oha[kind].owner); + assert(!(((unsigned long)(intptr_t)(s.best)) & + (sizeof(int *) - 1))); + lws_dll2_add_tail(&s.best->list, + &dsh->oha[kind].owner); } assert(s.dsh->locally_free >= asize); - s.dsh->locally_free -= asize; - s.dsh->locally_in_use += asize; dsh->oha[kind].total_size += asize; assert(s.dsh->locally_in_use <= s.dsh->buffer_size); } @@ -273,13 +444,50 @@ int lws_dsh_alloc_tail(lws_dsh_t *dsh, int kind, const void *src1, size_t size1, const void *src2, size_t size2) { - return _lws_dsh_alloc_tail(dsh, kind, src1, size1, src2, size2, NULL); + int r; + + do { + size_t s1 = size1, s2 = size2; + + if (!dsh->splitat || !(dsh->flags & LWS_DSHFLAG_ENABLE_SPLIT)) { + s1 = size1; + s2 = size2; + } else + if (s1 > dsh->splitat) { + s1 = dsh->splitat; + s2 = 0; + } else { + if (s1 + s2 > dsh->splitat) + s2 = dsh->splitat - s1; + } + r = _lws_dsh_alloc_tail(dsh, kind, src1, s1, src2, s2, NULL); + if (r) + return r; + src1 += s1; + src2 += s2; + size1 -= s1; + size2 -= s2; + } while (size1 + size2); + + return 0; } -static int -buf_compare(const lws_dll2_t *d, const lws_dll2_t *i) +void +lws_dsh_consume(struct lws_dsh *dsh, int kind, size_t len) { - return (int)lws_ptr_diff(d, i); + lws_dsh_obj_t *h = (lws_dsh_obj_t *)dsh->oha[kind + 1].owner.head; + + assert(len <= h->size); + assert(h->pos + len <= h->size); + + if (len == h->size || h->pos + len == h->size) { + lws_dsh_free((void **)&h); + return; + } + + assert(0); + + h->pos += len; } void @@ -303,6 +511,7 @@ lws_dsh_free(void **pobj) assert(dsh->locally_in_use >= _o->asize); dsh->locally_free += _o->asize; dsh->locally_in_use -= _o->asize; + assert(dsh->oha[_o->kind].total_size >= _o->asize); dsh->oha[_o->kind].total_size -= _o->asize; /* account for usage by kind */ assert(dsh->locally_in_use <= dsh->buffer_size); @@ -387,7 +596,7 @@ describe_kind(struct lws_dll2 *d, void *user) { lws_dsh_obj_t *obj = lws_container_of(d, lws_dsh_obj_t, list); - lwsl_info(" _obj %p - %p, dsh %p, size %zu, asize %zu\n", + lwsl_notice(" _obj %p - %p, dsh %p, size %zu, asize %zu\n", obj, (uint8_t *)obj + obj->asize, obj->dsh, obj->size, obj->asize); @@ -399,12 +608,12 @@ lws_dsh_describe(lws_dsh_t *dsh, const char *desc) { int n = 0; - lwsl_info("%s: dsh %p, bufsize %zu, kinds %d, lf: %zu, liu: %zu, %s\n", + lwsl_notice("%s: dsh %p, bufsize %zu, kinds %d, lf: %zu, liu: %zu, %s\n", __func__, dsh, dsh->buffer_size, dsh->count_kinds, dsh->locally_free, dsh->locally_in_use, desc); for (n = 0; n < dsh->count_kinds; n++) { - lwsl_info(" Kind %d:\n", n); + lwsl_notice(" Kind %d:\n", n); lws_dll2_foreach_safe(&dsh->oha[n].owner, dsh, describe_kind); } } diff --git a/lib/core/private-lib-core.h b/lib/core/private-lib-core.h index 8ba08520d..c867ebacd 100644 --- a/lib/core/private-lib-core.h +++ b/lib/core/private-lib-core.h @@ -153,6 +153,7 @@ typedef struct lws_dsh_obj { lws_dll2_t list; /* must be first */ struct lws_dsh *dsh; /* invalid when on free list */ size_t size; /* invalid when on free list */ + size_t pos; /* invalid when on free list */ size_t asize; int kind; /* so we can account at free */ } lws_dsh_obj_t; @@ -161,10 +162,12 @@ typedef struct lws_dsh { lws_dll2_t list; uint8_t *buf; lws_dsh_obj_head_t *oha; /* array of object heads/kind */ + size_t splitat; size_t buffer_size; size_t locally_in_use; size_t locally_free; int count_kinds; + uint32_t flags; uint8_t being_destroyed; /* * Overallocations at create: diff --git a/minimal-examples/api-tests/api-test-lws_dsh/main.c b/minimal-examples/api-tests/api-test-lws_dsh/main.c index befd490ea..3e938b2e1 100644 --- a/minimal-examples/api-tests/api-test-lws_dsh/main.c +++ b/minimal-examples/api-tests/api-test-lws_dsh/main.c @@ -196,7 +196,7 @@ test4(void) goto bail; } if (size != 4000) { - lwsl_err("%s: test 1 mismatch\n", __func__); + lwsl_err("%s: test 1 mismatch %d\n", __func__, (int)size); goto bail; } @@ -215,30 +215,20 @@ test4(void) } if (lws_dsh_get_head(dsh, 0, &a1, &size)) { - lwsl_err("%s: no head 1\n", __func__); + lwsl_err("%s: no head 4\n", __func__); goto bail; } if (size != 17 || memcmp(a1, "some other string", 17)) { - lwsl_err("%s: test 1 mismatch\n", __func__); + /* it may (should...) coalesce */ + if (size != 28 || memcmp(a1, "some other stringhello again", 28)) { + lwsl_err("%s: test 4 mismatch %d\n", __func__, (int)size); - goto bail; + goto bail; + } } lws_dsh_free(&a1); - if (lws_dsh_get_head(dsh, 0, &a1, &size)) { - lwsl_err("%s: no head 2\n", __func__); - - goto bail; - } - if (size != 11 || memcmp(a1, "hello again", 11)) { - lwsl_err("%s: test 2 mismatch (%zu)\n", __func__, size); - - goto bail; - } - - lws_dsh_free(&a1); - lws_dsh_destroy(&dsh); return 0; @@ -336,6 +326,90 @@ test5(void) return 0; } +int +test6(void) +{ + struct lws_dsh *dsh; + size_t size; + void *a1; + + /* + * test 6: confirm sequential coalescing + */ + + dsh = lws_dsh_create(NULL, 16384, 2); + if (!dsh) { + lwsl_err("%s: Failed to create dsh\n", __func__); + + return 1; + } + + if (lws_dsh_alloc_tail(dsh, 0, "hello", 5, NULL, 0)) { + lwsl_err("%s: Failed to alloc 1\n", __func__); + + goto bail; + } + + if (lws_dsh_alloc_tail(dsh, 1, "some other string", 17, NULL, 0)) { + lwsl_err("%s: Failed to alloc 2\n", __func__); + + goto bail; + } + + if (lws_dsh_alloc_tail(dsh, 0, "hello again", 11, NULL, 0)) { + lwsl_err("%s: Failed to alloc 3\n", __func__); + + goto bail; + } + + if (lws_dsh_get_head(dsh, 1, &a1, &size)) { + lwsl_err("%s: no head 1\n", __func__); + + goto bail; + } + if (size != 17 || memcmp(a1, "some other string", 17)) { + lwsl_err("%s: test 1 mismatch\n", __func__); + + goto bail; + } + lws_dsh_free(&a1); + + if (lws_dsh_get_head(dsh, 0, &a1, &size)) { + lwsl_err("%s: no head 2\n", __func__); + + goto bail; + } + if (size != 5 || memcmp(a1, "hello", 5)) { + lwsl_err("%s: test 2 mismatch\n", __func__); + + goto bail; + } + lws_dsh_free(&a1); + + if (lws_dsh_get_head(dsh, 0, &a1, &size)) { + lwsl_err("%s: no head 3\n", __func__); + + goto bail; + } + if (size != 11 || memcmp(a1, "hello again", 11)) { + lwsl_err("%s: test 3 mismatch\n", __func__); + + goto bail; + } + lws_dsh_free(&a1); + + lws_dsh_destroy(&dsh); + + return 0; +bail: +#if defined(_DEBUG) + lws_dsh_describe(dsh, "test6 fail"); +#endif + lws_dsh_destroy(&dsh); + + return 1; +} + int main(int argc, const char **argv) { int logs = LLL_USER | LLL_ERR | LLL_WARN | LLL_NOTICE; @@ -351,19 +425,32 @@ int main(int argc, const char **argv) n = test1(); lwsl_user("%s: test1: %d\n", __func__, n); ret |= n; + if (ret) + goto bail; n = test3(); lwsl_user("%s: test3: %d\n", __func__, n); ret |= n; + if (ret) + goto bail; n = test4(); lwsl_user("%s: test4: %d\n", __func__, n); ret |= n; + if (ret) + goto bail; n = test5(); lwsl_user("%s: test5: %d\n", __func__, n); ret |= n; + if (ret) + goto bail; + n = test6(); + lwsl_user("%s: test6: %d\n", __func__, n); + ret |= n; + +bail: lwsl_user("Completed: %s\n", ret ? "FAIL" : "PASS"); return ret;