diff --git a/CMakeLists.txt b/CMakeLists.txt index 4ca269957..53eb5b44e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -329,6 +329,16 @@ else() set(LWS_WITH_NETLINK 0) endif() +# +# Compressed backtraces +# +option(LWS_WITH_COMPRESSED_BACKTRACES "Build with support for compressed backtraces" OFF) +set(LWS_COMPRESSED_BACKTRACES_SNIP_PRE 2 CACHE STRING "Amount of callstack to snip from top") +set(LWS_COMPRESSED_BACKTRACES_SNIP_POST 1 CACHE STRING "Amount of callstack to snip from bottom") +option(LWS_WITH_ALLOC_METADATA_LWS "Build lws_*alloc() with compressed backtraces (requires WITH_COMPRESSED_BACKTRACES)" OFF) + + + if (${CMAKE_SYSTEM_NAME} MATCHES "SunOS") # its openssl has md5 deprecated set(LWS_SUPPRESS_DEPRECATED_API_WARNINGS 1) diff --git a/READMEs/README.lws_backtrace.md b/READMEs/README.lws_backtrace.md new file mode 100644 index 000000000..f38893741 --- /dev/null +++ b/READMEs/README.lws_backtrace.md @@ -0,0 +1,181 @@ +# lws_backtrace and lws_alloc_metadata + +|Area|Definition| +|---|---| +|Cmake|`LWS_WITH_COMPRESSED_BACKTRACES` on by default| +|API|`./include/libwebsockets/lws-backtrace.h`| +|README|./READMEs/README.lws_backtrace.md| + +## lws_backtrace + +The `lws_backtrace` apis provide a way to collect backtrace addresses into a +struct, and an efficient domain-specific compressor to reduce the number of +bytes needed to express the backtrace stack. + +This information is particularly useful in RTOS type systems to understand heap +usage. The information would typically be sent off the embedded device, in logs +or it into own stream, and decompressed and processed off the embedded device, +converted to source information via addr2line or similar. + +It only works with gcc and probably clang at the moment (patches welcome). + +## lws_alloc_metadata apis + +This provides helpers on top of `lws_backtrace` that are suitable for adapting +your heap allocator to create compressed metadata such as the call stack at +allocation time + + - optionally report allocation and free events with this information + synchronously to a user supplied callback + + - optionally conceal the additional metadata behind allocations transparently + +The extra metadata contains information on allocation size, and the backtrace of +the code path that originally performed the allocation. Live allocations are +also listed on one or more lws_dll2_owner_t that can be walked to dump active +allocations along with the responsible code path. + +## Tuning the call stack + +Entries at both ends of the call stack may be invariant and therefore just +bloat to store. At the top end of the call stack, the backtrace will show the +path through lws_backtrace apis and perhaps other apis. At the bottom end, +depending on your system, the backtrace may detail call sequences from the +loader that started your application. + +For those reasons, the cmake variables `LWS_COMPRESSED_BACKTRACES_SNIP_PRE` +and `LWS_COMPRESSED_BACKTRACES_SNIP_POST` (defaulting to 2 and 1 respectively) +may be set to remove invariant, uninteresting call stack information from the +top and bottom of the call stack. + +## LWS_WITH_ALLOC_METADATA_LWS + +An optional, off-by-default implementation is provided for the lws_*alloc() +apis, using the alloc_metadata apis to instrument all allocations via +lws_*alloc(). This is not so useful as instrumenting the system allocator with +alloc_metadata apis, since it only shows lws allocations, but it is a complete +example to show how to do it. + +## Allocator instrumentation and thread-safety + +Unless your application is totally singlethreaded, when instrumenting a real +allocator, care must be taken with + + - `_lws_alloc_metadata_adjust()` + - `_lws_alloc_metadata_trim()` + - `_lws_alloc_metadata_dump()` + +apis which deal with the hidden overallocation and listing allocations, that +they are called from a locked critical section that disallows reentry, either +an existing one that the allocator already uses, or add a new mutex. + +## Dumping entire active instrumented heap allocations + +Calling `_lws_alloc_metadata_dump()` allows you to walk the current list of +allocations from a heap and dump the backtrace responsible for its allocation. +You can define your own iterator callback, or use a helper callback that is +provided, `lws_alloc_metadata_dump_stdout`, which issues the heap metadata in +the lws convention base64 format described below. + +## Convention for emission of compressed backtraces + +To simplify triggering dumps, a convention is defined with a 3-character +lead-in identifying lines as dumps or backtraces. This kind of approach makes +it easy to emit the metadata into logs and fish them out with grep or similar. + +|lead-in|signifies|Example| +|---|---|---| +|~m#|Compressed allocator backtrace, eg, emitted into logs|~m#IF0BmagugNDWgCnkhdAYpQa6wAAV| +|~b#|Decoded, uncompressed backtrace line suitable for `addr2line`|~b#size: 7520, 0x406651 0x406852 0x406c1b 0x406294| + +Both examples are complete representations of the same 4-level, 64-bit compressed +backtrace. + +## Compressed backtrace decode tool + +The `lws-api-test-backtrace` example (requires `LWS_WITH_COMPRESSED_BACKTRACES` +to build) decodes the base64 representations with or without the 3-character +lead-in, to textual output suitable for `addr2line`. Eg + +``` +$ echo -n "~m#IF0BmagugNDWgCnkhdAYpQa6wAAV" | lws-api-test-backtrace +~b#size: 7520, 0x406651 0x406852 0x406c1b 0x406294 +``` + +You can use it with `addr2line` in this kind of way (you probably want to give +`-f -p` to `addr2line` as well) + +``` +addr2line -e myapplication `echo -n "~m#IF0BmUQugNCkgCnkhdAYpQa6wAAV" | ./bin/lws-api-test-backtrace 2>/dev/null | grep '~b#' | cut -d',' -f2-` +/projects/libwebsockets/lib/core/alloc.c:124 +/projects/libwebsockets/lib/core/alloc.c:213 +/projects/libwebsockets/lib/core/context.c:600 +/projects/myapplication/main.c:55 +``` + +There is a shell script `./contrib/heapmap.sh` which takes a screenscrape of +a dump's `~m#` log lines and processes them into an allocation size, backtrace, +and function names (especially in RELEASE mode, either a function name hint or +the source coordinates are available). + +## lws_backtrace compression + +The compressed blob has an outer structure designed for prepending, where the +information available at recovery is a pointer to the end of it. + +![overview](../doc-assets/backtrace.png) + +### Outer compressed blob layout in memory + +This goes behind the reported allocation, the actual allocation is increased +to allow for it and we report what the caller asked for by pointing at the end +of this. It means eg at free() time, we are told the address just past the end +of this and work backwards to find the start of the compressed blob (which is +further aligned backwards to ptr boundary to recover the true allocation start). + +|data|bits|meaning| +|---|---|---| +|compressed blob|variable, padded to byte boundary|Backtrace and extra info| +|compressed length|fixed, 16|MSB-first 16-bit byte count of compressed blob, including the 16-bit length itself| +|lws_dll2_t|fixed, 3 x pointers|linked-list for tracking| + +### Bitwise structure inside the compressed blob + +|data|bits|meaning| +|---|---|---| +|stack depth|5|Number of backtrace callstack levels present| +|Call stack items, one per stack depth|variable|Compressed Instruction Pointer value| +|alloc size bits|6|Number of bits in alloc size| +|alloc size literal|variable|Allocation size| + +### Call stack item domain-specific compression + +The goal is to compress 32- or 64-bit backtraces efficiently. + +The Call stack items are compressed one of two ways and start with a bit +indicating which method was used for this Call stack item. + + - 0 = literal value, 1 = delta against a previous reference value + +The literals issue a bit count and then the significant bits + + - a 6-bit bit count + - the significant bits of the literal + +The delta from a previous Call stack item looks like this: + + - a 3-bit index (from -1 to -8) says how far back from the +current stack item the reference value can be found from the call stack + - a 1-bit sign where 0 == add the delta and 1 == subtract the delta + - a 6-bit bit count for the delta + - the significant bits of the delta + +The delta is decoded, and added or subtracted from the earlier reference result +to arrive at the correct reconstruction. + +The first Call stack item is always a literal. + +## Note for esp-idf + +Backtrace generation in esp-idf requires `CONFIG_COMPILER_CXX_EXCEPTIONS` set +in sdkconfig. diff --git a/cmake/lws_config.h.in b/cmake/lws_config.h.in index 90f94ec8a..e87b18725 100644 --- a/cmake/lws_config.h.in +++ b/cmake/lws_config.h.in @@ -145,10 +145,14 @@ #cmakedefine LWS_WITH_ABSTRACT #cmakedefine LWS_WITH_ACCESS_LOG #cmakedefine LWS_WITH_ACME +#cmakedefine LWS_WITH_ALLOC_METADATA_LWS #cmakedefine LWS_WITH_ALSA #cmakedefine LWS_WITH_SYS_ASYNC_DNS #cmakedefine LWS_WITH_BORINGSSL #cmakedefine LWS_WITH_CGI +#cmakedefine LWS_WITH_COMPRESSED_BACKTRACES +#cmakedefine LWS_COMPRESSED_BACKTRACES_SNIP_PRE ${LWS_COMPRESSED_BACKTRACES_SNIP_PRE} +#cmakedefine LWS_COMPRESSED_BACKTRACES_SNIP_POST ${LWS_COMPRESSED_BACKTRACES_SNIP_POST} #cmakedefine LWS_WITH_CONMON #cmakedefine LWS_WITH_COSE #cmakedefine LWS_WITH_CUSTOM_HEADERS diff --git a/contrib/heapmap.sh b/contrib/heapmap.sh new file mode 100755 index 000000000..a8f933df1 --- /dev/null +++ b/contrib/heapmap.sh @@ -0,0 +1,29 @@ +#!/bin/sh +# +# Pass the the scraped compressed alloc metadata on stdin. +# +# $1 is the path to the elf file with the debugging info. +# $2 is the path to lws-api-test-backtrace, may be omitted if it's on the path +# +# Eg, +# +# cat /tmp/mydump | ../../../../../contrib/heapmap.sh build/myapp.elf ../../../../../build/bin/ + +echo -n 0 > /tmp/_total_size + +while read line ; do + X=`echo -n $line | "$2"lws-api-test-backtrace 2>/dev/null` + if [ "$X" != "" ] ; then + S=`echo -n $X | cut -d' ' -f2 | sed "s/\,//g"` + T=`cat /tmp/_total_size` + echo -n $(( $T + $S )) > /tmp/_total_size + echo "$S" + addr2line -f -p -e $1 `echo $X | cut -d',' -f2-` + echo + fi +done + +T=`cat /tmp/_total_size` + +echo +echo "# Total instrumented allocation $T" diff --git a/doc-assets/backtrace.png b/doc-assets/backtrace.png new file mode 100644 index 000000000..48525b4e2 Binary files /dev/null and b/doc-assets/backtrace.png differ diff --git a/include/libwebsockets.h b/include/libwebsockets.h index 6e0b10d77..3387021d3 100644 --- a/include/libwebsockets.h +++ b/include/libwebsockets.h @@ -609,6 +609,7 @@ struct lws; #include #include +#include #include #include #if defined(LWS_WITH_SYS_SMD) diff --git a/include/libwebsockets/lws-backtrace.h b/include/libwebsockets/lws-backtrace.h new file mode 100644 index 000000000..d4fb9fc10 --- /dev/null +++ b/include/libwebsockets/lws-backtrace.h @@ -0,0 +1,280 @@ +/* + * libwebsockets - small server side websockets and web server implementation + * + * Copyright (C) 2010 - 2022 Andy Green + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/** \defgroup lws_backtrace generic and compressed backtrace acquisition + * ##Backtrace apis + * \ingroup lwsbacktrace + * + * lws_backtrace + * + * These apis abstract acquisition and optionally compressed on binary back- + * traces, effectively build-specific signatures for where in the code you are + * and how you got there. + */ +//@{ + +typedef struct { + uintptr_t st[32]; + uintptr_t asize; + + uint8_t sp; + uint8_t pre; + uint8_t post; +} lws_backtrace_info_t; + +typedef struct { + uint8_t *comp; + size_t pos; + size_t len; +} lws_backtrace_comp_t; + +/* + * lws_backtrace() - init and fiull a backtrace struct + * + * \param si: the backtrace struct to populate + * \param pre: the number of call levels to snip from the top + * \param post: the number of call levels to snip from the bottom + * + * This describes the call stack into \p si. \p si doesn't need preparing + * before the call. \p pre levels of the call stack at the top will be snipped, + * this will usually want to be 1 or 2 to conceal the helpers that are making + * the call stack, such as lws_backtrace itself. + * + * \p post levels of the call stack at the bottom will be snipped, this is to + * conceal loaders or other machinery that was used to start your application, + * otherwise those entries will bloat all call stacks results on that platform. + * + * Returns 0 for success. + */ +LWS_VISIBLE LWS_EXTERN int +lws_backtrace(lws_backtrace_info_t *si, uint8_t pre, uint8_t post); + +/* + * lws_backtrace_compression_stream_init() - init and fiull a backtrace struct + * + * \param c: the backtrace compression struct + * \param comp: the buffer to take the compressed bytes + * \param comp_len: the number of bytes available at \p comp + * + * This initializes the caller's lws_backtrace_comp_t. Because it's expected + * the caller will want to put his own compressed data after the compressed + * backtrace, he is responsible for the compression context. + */ +LWS_VISIBLE LWS_EXTERN void +lws_backtrace_compression_stream_init(lws_backtrace_comp_t *c, + uint8_t *comp, size_t comp_len); + +/* + * lws_backtrace_compression_stream() - add bitfields to compression stream + * + * \param c: the backtrace compression context struct + * \param v: the bitfield to add to the stream + * \param bits: the number of bits of v to add + * + * This inserts bits from the LSB end of v to the compression stream. + * + * This is used by the backtrace compression, user code can use this to add + * its own bitfields into the compression stream after the compressed backtrace. + * + * User data should be added after, so that the backtrace can be processed even + * if the additional data is not understood by the processing script. + * + * Returns 0 for success or nonzero if ran out of compression output buffer. + */ +LWS_VISIBLE LWS_EXTERN int +lws_backtrace_compression_stream(lws_backtrace_comp_t *c, uintptr_t v, + unsigned int bits); + +/* + * lws_backtrace_compression_destream() - add bitfields to compression stream + * + * \param c: the backtrace compression context struct + * \param _v: pointer to take the bitfield result + * \param bits: the number of bits to bring out into _v + * + * This reads the compression stream and creates a bitfield from it in \p _v. + * + * Returns 0 for success (with \p _v set to the value), or nonzero if ran out + * of compression output buffer. + */ +LWS_VISIBLE LWS_EXTERN int +lws_backtrace_compression_destream(lws_backtrace_comp_t *c, uintptr_t *_v, + unsigned int bits); + +/* + * lws_backtrace_compress_backtrace() - compress backtrace si into c + * + * \param si: the backtrace struct to compress + * \param c: the backtrace compression context struct + * + * This compresses backtrace information acquired in \p si into the compression + * context \p c. It compresses first the call stack length and then each IP + * address in turn. + * + * Returns 0 for success. + */ +LWS_VISIBLE LWS_EXTERN int +lws_backtrace_compress_backtrace(lws_backtrace_info_t *si, + lws_backtrace_comp_t *c); + +//@} + +/** \defgroup lws_alloc_metadata helpers for allocator instrumentation + * ##Alloc Metadata APIs + * \ingroup lwsallocmetadata + * + * lws_alloc_metadata + * + * These helpers let you rapidly instrument your libc or platform memory + * allocator so that you can later dump details, including a backtrace of where + * the allocation was made, for every live heap allocation. + * + * You would use it at peak memory usage, to audit who is using what at that + * time. + * + * Effective compression is used to keep the metadata overhead to ~48 bytes + * per active allocation on 32-bit systems. + */ +//@{ + +/** + * lws_alloc_metadata_gen() - generate metadata blob (with compressed backtrace) + * + * \param size: the allocation size + * \param comp: buffer for compressed backtrace + * \param comp_len: number of bytes available in the compressed backtrace + * \param adj: takes the count of additional bytes needed for metadata behind + * the allocation we tell the user about + * \param cl: takes the count of bytes used in comp + * + * This helper creates the compressed part of the alloc metadata blob and + * calculates the total overallocation that is needed in \p adj. + * + * This doesn't need any locking. + * + * If \p comp_len is too small for the whole result, or it was not possible to + * get the backtrace information, the compressed part is set to empty (total + * length 2 to carry the 00 00 length). + * + * 6 or 10 (64-bit) bytes per backtrace IP allowed (currently 16) should always + * be enough, typically the compression reduces this very significantly. + */ +LWS_VISIBLE LWS_EXTERN void +lws_alloc_metadata_gen(size_t size, uint8_t *comp, size_t comp_len, size_t *adj, + size_t *cl); + +/** + * _lws_alloc_metadata_adjust() - helper to inject metadata and list as active + * + * \param active: the allocation owner + * \param v: Original, true allocation pointer, adjusted on exit + * \param adj: Total size of metadata overallocation + * \param comp: The compressed metadata + * \param cl: takes the count of bytes used in comp + * + * THIS MUST BE LOCKED BY THE CALLER IF YOUR ALLOCATOR MAY BE CALLED BY OTHER + * THREADS. You can call it from an existing mutex or similar -protected + * critical section in your allocator if there is one already, or you will have + * to protect the caller of it with your own mutex so it cannot reenter. + * + * This is a helper that adjusts the allocation past the metadata part so the + * caller of the allocator using this sees what he asked for. The deallocator + * must call _lws_alloc_metadata_trim() to balance this before actual + * deallocation. + */ +LWS_VISIBLE LWS_EXTERN void +_lws_alloc_metadata_adjust(lws_dll2_owner_t *active, void **v, size_t adj, uint8_t *comp, unsigned int cl); + +/** + * _lws_alloc_metadata_trim() - helper to trim metadata and remove from active + * + * \param ptr: Adjusted allocation pointer on entry, true allocation ptr on exit + * \param comp: NULL, or set on exit to point to start of compressed area + * \param complen: NULL, or set on exit to length of compressed area in bytes + * + * THIS MUST BE LOCKED BY THE CALLER IF YOUR DEALLOCATOR MAY BE CALLED BY OTHER + * THREADS. You can call it from an existing mutex or similar -protected + * critical section in your deallocator if there is one already, or you will + * have to protect that caller of it with your own mutex so it cannot reenter. + */ +LWS_VISIBLE LWS_EXTERN void +_lws_alloc_metadata_trim(void **ptr, uint8_t **comp, uint16_t *complen); + +/** + * lws_alloc_metadata_parse() - parse compressed metadata into struct + * + * \param si: Struct to take the backtrace results from decompression + * \param adjusted_alloc: pointer to adjusted, user allocation start + * + * This api parses and decompresses the blob behind the \p adjusted_alloc + * address into \p si. + * + * Returns 0 for success. + */ +LWS_VISIBLE LWS_EXTERN int +lws_alloc_metadata_parse(lws_backtrace_info_t *si, const uint8_t *adjusted_alloc); + +/** + * lws_alloc_metadata_dump_stdout() - helper to print base64 blob on stdout + * + * \param d: the current list item + * \param user: the optional arg given to the dump api (ignored) + * + * Generic helper that can be given to _lws_alloc_metadata_dump() as the + * callback that will emit a standardized base64 blob for the alloc metadata + */ +LWS_VISIBLE LWS_EXTERN int +lws_alloc_metadata_dump_stdout(struct lws_dll2 *d, void *user); + +/** + * lws_alloc_metadata_dump_stdout() - dump all live allocs in instrumented heap + * + * \param active: the owner of the active allocation list for this heap + * \param cb: the callback to receive information + * \param arg: optional arg devivered to the callback + * + * THIS MUST BE LOCKED BY THE CALLER IF YOUR ALLOCATOR MAY BE CALLED BY OTHER + * THREADS. You can call it from an existing mutex or similar -protected + * critical section in your allocator if there is one already, or you will have + * to protect the caller of it with your own mutex so it cannot reenter. + * + * Iterates through the list of instrumented allocations calling the given + * callback for each one. + */ +LWS_VISIBLE LWS_EXTERN void +_lws_alloc_metadata_dump(lws_dll2_owner_t *active, lws_dll2_foreach_cb_t cb, + void *arg); + +#if defined(LWS_WITH_ALLOC_METADATA_LWS) +/* + * Wrapper for _lws_alloc_metadata_dump() that uses the list owner that tracks + * + */ +LWS_VISIBLE LWS_EXTERN void +_lws_alloc_metadata_dump_lws(lws_dll2_foreach_cb_t cb, void *arg); +#else +#define _lws_alloc_metadata_dump_lws(_a, _b) +#endif + +//@} diff --git a/include/libwebsockets/lws-misc.h b/include/libwebsockets/lws-misc.h index 233cb3456..52aff1a5c 100644 --- a/include/libwebsockets/lws-misc.h +++ b/include/libwebsockets/lws-misc.h @@ -1221,3 +1221,11 @@ lws_fsmount_unmount(struct lws_fsmount *fsm); LWS_VISIBLE LWS_EXTERN int lws_minilex_parse(const uint8_t *lex, int16_t *ps, const uint8_t c, int *match); + +/* + * Reports the number of significant bits (from the left) that is needed to + * represent u. So if u is 0x80, result is 8. + */ + +LWS_VISIBLE LWS_EXTERN unsigned int +lws_sigbits(uintptr_t u); diff --git a/lib/core/alloc.c b/lib/core/alloc.c index b9ca8f0e8..3619ff244 100644 --- a/lib/core/alloc.c +++ b/lib/core/alloc.c @@ -1,7 +1,7 @@ /* * libwebsockets - small server side websockets and web server implementation * - * Copyright (C) 2010 - 2020 Andy Green + * Copyright (C) 2010 - 2022 Andy Green * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to @@ -32,6 +32,10 @@ static size_t allocated; #endif +#if defined(LWS_WITH_ALLOC_METADATA_LWS) +static lws_dll2_owner_t active; +#endif + #if defined(LWS_PLAT_OPTEE) #define TEE_USER_MEM_HINT_NO_FILL_ZERO 0x80000000 @@ -107,9 +111,19 @@ void lws_set_allocator(void *(*cb)(void *ptr, size_t size, const char *reason)) static void * _realloc(void *ptr, size_t size, const char *reason) { +#if defined(LWS_WITH_ALLOC_METADATA_LWS) + uint8_t comp[16 * LWS_ARRAY_SIZE(((lws_backtrace_info_t *)NULL)->st)]; + size_t complen; + size_t adj = 0; +#endif void *v; if (size) { +#if defined(LWS_WITH_ALLOC_METADATA_LWS) + lws_alloc_metadata_gen(size, comp, sizeof(comp), &adj, &complen); + size += adj; +#endif + #if defined(LWS_PLAT_FREERTOS) lwsl_debug("%s: size %lu: %s (free heap %d)\n", __func__, #if defined(LWS_AMAZON_RTOS) @@ -127,17 +141,39 @@ _realloc(void *ptr, size_t size, const char *reason) allocated -= malloc_usable_size(ptr); #endif +#if defined(LWS_WITH_ALLOC_METADATA_LWS) + size += adj; +#endif + #if defined(LWS_PLAT_OPTEE) v = (void *)TEE_Realloc(ptr, size); #else v = (void *)realloc(ptr, size); #endif + + if (!v) + return v; + #if defined(LWS_HAVE_MALLOC_USABLE_SIZE) allocated += malloc_usable_size(v); #endif + +#if defined(LWS_WITH_ALLOC_METADATA_LWS) + _lws_alloc_metadata_adjust(&active, &v, adj, comp, (unsigned int)complen); +#endif + return v; } + + /* + * We are freeing it then... + */ + if (ptr) { +#if defined(LWS_WITH_ALLOC_METADATA_LWS) + _lws_alloc_metadata_trim(&ptr, NULL, NULL); +#endif + #if defined(LWS_HAVE_MALLOC_USABLE_SIZE) allocated -= malloc_usable_size(ptr); #endif @@ -147,6 +183,15 @@ _realloc(void *ptr, size_t size, const char *reason) return NULL; } +#if defined(LWS_WITH_ALLOC_METADATA_LWS) +void +_lws_alloc_metadata_dump_lws(lws_dll2_foreach_cb_t cb, void *arg) +{ + lwsl_err("%s\n", __func__); + _lws_alloc_metadata_dump(&active, cb, arg); +} +#endif + void *(*_lws_realloc)(void *ptr, size_t size, const char *reason) = _realloc; void *lws_realloc(void *ptr, size_t size, const char *reason) diff --git a/lib/core/libwebsockets.c b/lib/core/libwebsockets.c index 665051a6f..865292d29 100644 --- a/lib/core/libwebsockets.c +++ b/lib/core/libwebsockets.c @@ -1703,3 +1703,28 @@ nope: return LWS_MINILEX_FAIL; } + +unsigned int +lws_sigbits(uintptr_t u) +{ + uintptr_t mask = (uintptr_t)(0xffllu << ((sizeof(u) - 1) * 8)), + m1 = (uintptr_t)(0x80llu << ((sizeof(u) - 1) * 8)); + unsigned int n; + + for (n = sizeof(u) * 8; n > 0; n -= 8) { + if (u & mask) + break; + mask >>= 8; + m1 >>= 8; + } + + if (!n) + return 1; /* not bits are set, we need at least 1 to represent */ + + while (!(u & m1)) { + n--; + m1 >>= 1; + } + + return n; +} diff --git a/lib/misc/CMakeLists.txt b/lib/misc/CMakeLists.txt index 7a1293ae1..1b16acd1d 100644 --- a/lib/misc/CMakeLists.txt +++ b/lib/misc/CMakeLists.txt @@ -49,6 +49,11 @@ if (LWS_WITH_NETWORK) endif() +if (LWS_WITH_COMPRESSED_BACKTRACES) + list(APPEND SOURCES + misc/backtrace.c) +endif() + if (LWS_WITH_FTS) list(APPEND SOURCES misc/fts/trie.c diff --git a/lib/misc/backtrace.c b/lib/misc/backtrace.c new file mode 100644 index 000000000..3794a03ba --- /dev/null +++ b/lib/misc/backtrace.c @@ -0,0 +1,392 @@ +/* + * libwebsockets - small server side websockets and web server implementation + * + * Copyright (C) 2010 - 2022 Andy Green + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "private-lib-core.h" + +#define _GNU_SOURCE +#include + +static _Unwind_Reason_Code +uwcb(struct _Unwind_Context* uctx, void *arg) +{ + lws_backtrace_info_t *si = (lws_backtrace_info_t *)arg; + + if (si->sp == LWS_ARRAY_SIZE(si->st)) + return _URC_END_OF_STACK; + + if (!si->pre) { + if (_Unwind_GetIP(uctx)) + si->st[si->sp++] = _Unwind_GetIP(uctx); + } else + si->pre--; + + return _URC_NO_REASON; +} + +int +lws_backtrace(lws_backtrace_info_t *si, uint8_t pre, uint8_t post) +{ + _Unwind_Reason_Code r; + + si->sp = 0; + si->pre = pre; /* skip the top couple of backtrace results */ + si->post = post; + + r = _Unwind_Backtrace(uwcb, si); + + if (si->sp > si->post) + si->sp -= si->post; + + return r != _URC_END_OF_STACK; +} + +int +lws_backtrace_compression_stream(lws_backtrace_comp_t *c, uintptr_t v, + unsigned int bits) +{ + int nbits = (int)bits; + + while (nbits-- >= 0) { + if (!(c->pos & 7)) + c->comp[c->pos >> 3] = 0; + if (v & (1 << nbits)) + c->comp[c->pos >> 3] |= (1 << (7 - (c->pos & 7))); + + c->pos++; + + if ((c->pos >> 3) == c->len) { + lwsl_err("%s: overrun %u\n", __func__, (unsigned int)c->len); + return 1; + } + } + + return 0; +} + +int +lws_backtrace_compression_destream(lws_backtrace_comp_t *c, uintptr_t *_v, + unsigned int bits) +{ + int nbits = (int)bits; + uintptr_t v = 0; + + while (nbits-- >= 0) { + if ((c->pos >> 3) == c->len) + return 1; + if (c->comp[c->pos >> 3] & (1 << (7 - (c->pos & 7)))) + v |= (1 << nbits); + c->pos++; + } + + *_v = v; + + return 0; +} + +void +lws_backtrace_compression_stream_init(lws_backtrace_comp_t *c, + uint8_t *comp, size_t comp_len) +{ + *comp = 0; + c->pos = 0; + c->comp = comp; + c->len = comp_len; +} + +int +lws_backtrace_compress_backtrace(lws_backtrace_info_t *si, + lws_backtrace_comp_t *c) +{ + int n; + + lws_backtrace_compression_stream(c, si->sp, 5); + + for (n = 0; n < si->sp; n++) { /* go through each in turn */ + uintptr_t delta = (uintptr_t)~0ll, d1; + char hit = -1, sign, _sign; + unsigned int q, ql; + int m; + + if (n > 8) + m = n - 8; + else + m = 0; + + /* we can look for 1 to 8 back */ + for (; m < n; m++) { + if (si->st[n] > si->st[m]) { + d1 = si->st[n] - si->st[m]; + _sign = 0; + } else { + d1 = si->st[m] - si->st[n]; + _sign = 1; + } + if (d1 < delta) { + delta = d1; + hit = (char)m; + sign = _sign; + } + } + + q = lws_sigbits(delta); + ql = lws_sigbits(si->st[n]); + + /* + * Bitwise compression: + * + * 0: zzzzzz literal (number of bits following) + * 1: xxx: y: zzzzzz delta (base index is (xxx + 1) back + * from this index) + * y == 1 == subtract from base, + * zzzzzz delta bits follow + */ + + if (n && hit && q + 11 < ql + 7) { + /* shorter to issue a delta froma previous address */ + lws_backtrace_compression_stream(c, 1, 1); + lws_backtrace_compression_stream(c, (uintptr_t)((n - hit) - 1), 3); + lws_backtrace_compression_stream(c, (uintptr_t)sign, 1); + lws_backtrace_compression_stream(c, q, 6); + + if (lws_backtrace_compression_stream(c, delta, q)) + return 1; + } else { + /* shorter to issue a literal */ + lws_backtrace_compression_stream(c, 0, 1); + lws_backtrace_compression_stream(c, ql, 6); + + if (lws_backtrace_compression_stream(c, si->st[n], ql)) + return 1; + } + } + + return 0; +} + + +void +lws_alloc_metadata_gen(size_t size, uint8_t *comp, size_t comp_len, + size_t *adj, size_t *cl) +{ + lws_backtrace_info_t si; + lws_backtrace_comp_t c; + unsigned int q, ql; + + /**< We need enough here to take the compressed results of however many + * callstack Instruction Pointers are allowed, currently 16. + */ + + lws_backtrace_compression_stream_init(&c, comp, comp_len); + + lws_backtrace(&si, LWS_COMPRESSED_BACKTRACES_SNIP_PRE, + LWS_COMPRESSED_BACKTRACES_SNIP_POST); + + /* + * We have the result stack, let's compress it + * + * - (implicit alignment) + * - call stack len (5b) / call stack literal [ { literal | delta } ... ] + * - bitcount(6), alloc size literal + * + * - 2 bytes MSB-first at end on byte boundary, total compressed length + * behind it. + * - lws_dll2_t + */ + + if (!lws_backtrace_compress_backtrace(&si, &c)) { + + lws_backtrace_compression_stream(&c, lws_sigbits(size), 6); + lws_backtrace_compression_stream(&c, size, lws_sigbits(size)); + + q = (unsigned int)(c.pos >> 3); + if (c.pos & 7) + q++; + + if (q + 2 >= c.len) { + lwsl_err("ovf\n"); + goto nope; + } + + ql = q + 2; + c.comp[q++] = (uint8_t)((ql >> 8) & 0xff); + c.comp[q++] = (uint8_t)(ql & 0xff); + + /* + * So we have it compressed along with our additional data. + */ + + /* pointer-aligned total overallocation */ + *adj = sizeof(lws_dll2_t) + + ((q + sizeof(void *) - 1) / sizeof(void *)) * + sizeof(void *); + /* compression buf contents amount */ + *cl = q; + } else { + /* put an explicit zero-length prepend for want of anything else */ +nope: + c.comp[0] = 0; + c.comp[1] = 0; + c.pos = 16; /* bits */ + *cl = 2; + *adj = sizeof(lws_dll2_t) + sizeof(void *); + } +} + +/* incoming *v is the true allocation */ + +void +_lws_alloc_metadata_adjust(lws_dll2_owner_t *active, void **v, size_t adj, + uint8_t *comp, unsigned int cl) +{ + /* + * Lie about the alloc start in order to conceal our metadata behind + * what was asked for. Incoming v is the real + * + * True alloc /Comp Reported alloc + * V V + * <16-bit MSB len to comp> lws_dll2_t + */ + + *v = (void *)((uint8_t *)(*v) + adj - sizeof(lws_dll2_t)); + memcpy((uint8_t *)(*v) - cl, comp, cl); + lws_dll2_clear((*v)); + lws_dll2_add_tail((*v), active); + *v = (void *)((uint8_t *)(*v) + sizeof(lws_dll2_t)); +} + +void +_lws_alloc_metadata_trim(void **ptr, uint8_t **comp, uint16_t *complen) +{ + const uint8_t *p = ((const uint8_t *)*ptr) - sizeof(lws_dll2_t); + uint16_t cofs = p[-1] | (p[-2] << 8); + size_t adj = ((sizeof(lws_dll2_t) + cofs + sizeof(void *) - 1) / + sizeof(void *)) * sizeof(void *); + + //lwsl_hexdump_notice((uint8_t *)(*ptr) - adj, adj); + + if (comp) + *comp = (uint8_t *)p - cofs; /* start of compressed area */ + if (complen) + *complen = cofs - 2; + + lws_dll2_remove((lws_dll2_t *)p); + *ptr = (void *)((uint8_t *)*ptr - adj); /* original alloc point */ +} + +/* past_len: after the 16-bit len, pointing at the lws_dll2_t at the end */ + +int +lws_alloc_metadata_parse(lws_backtrace_info_t *si, const uint8_t *past_len) +{ + const uint8_t *p = (const uint8_t *)past_len; + uintptr_t n, entries, ri, sign, field; + uint16_t cofs = p[-1] | (p[-2] << 8); + lws_backtrace_comp_t c; + + c.comp = (uint8_t *)p - cofs; + c.pos = 0; + c.len = cofs - 2; + si->sp = 0; + + /* 5-bit bitfield contains callstack depth */ + if (lws_backtrace_compression_destream(&c, &entries, 5)) + return 1; + + while (si->sp != entries) { + + if (lws_backtrace_compression_destream(&c, &n, 1)) + return 1; + + if (n) { /* delta: 3-bit refidx, 1-bit delta sign, 6-bit fieldlen, field */ + + assert(si->sp); /* first must be literal */ + + if (lws_backtrace_compression_destream(&c, &ri, 3)) + return 1; + if (lws_backtrace_compression_destream(&c, &sign, 1)) + return 1; + if (lws_backtrace_compression_destream(&c, &n, 6)) + return 1; + if (lws_backtrace_compression_destream(&c, &field, (unsigned int)n)) + return 1; + + if (si->sp < si->sp - ri - 1 ) { + lwsl_err("ref err\n"); + return 1; + } + + if (sign) /* backwards from ref */ + si->st[si->sp] = si->st[si->sp - (ri + 1)] - field; + else /* forwards from ref */ + si->st[si->sp] = si->st[si->sp - (ri + 1)] + field; + + } else { /* literal */ + if (lws_backtrace_compression_destream(&c, &n, 6)) + return 1; + if (lws_backtrace_compression_destream(&c, &field, (unsigned int)n)) + return 1; + + si->st[si->sp] = field; + } + + si->sp++; + } + + /* 6-bit bitlength, then allocated size */ + if (lws_backtrace_compression_destream(&c, &n, 6)) + return 1; + if (lws_backtrace_compression_destream(&c, &si->asize, (unsigned int)n)) + return 1; + + return 0; +} + +int +lws_alloc_metadata_dump_stdout(struct lws_dll2 *d, void *user) +{ + char ab[192]; + + const uint8_t *p = (const uint8_t *)d; + uint16_t cofs = p[-1] | (p[-2] << 8); + + p = (uint8_t *)p - cofs; + + ab[0] = '~'; + ab[1] = 'm'; + ab[2] = '#'; + lws_b64_encode_string((const char *)p, (int)cofs, + ab + 3, (int)sizeof(ab) - 4); + + puts(ab); + + return 0; +} + +void +_lws_alloc_metadata_dump(lws_dll2_owner_t *active, lws_dll2_foreach_cb_t cb, + void *arg) +{ + lws_dll2_foreach_safe(active, arg, cb); +} + diff --git a/lib/roles/h2/ops-h2.c b/lib/roles/h2/ops-h2.c index 989cb5b95..c4e2665e1 100644 --- a/lib/roles/h2/ops-h2.c +++ b/lib/roles/h2/ops-h2.c @@ -722,7 +722,7 @@ rops_close_kill_connection_h2(struct lws *wsi, enum lws_close_status reason) while (w) { w1 = w->next; - free(w); + lws_free(w); w = w1; } wsi->h2.h2n->pps = NULL; diff --git a/minimal-examples-lowlevel/api-tests/api-test-backtrace/CMakeLists.txt b/minimal-examples-lowlevel/api-tests/api-test-backtrace/CMakeLists.txt new file mode 100644 index 000000000..34a6faa1a --- /dev/null +++ b/minimal-examples-lowlevel/api-tests/api-test-backtrace/CMakeLists.txt @@ -0,0 +1,24 @@ +project(lws-api-test-backtrace C) +cmake_minimum_required(VERSION 2.8.12) +find_package(libwebsockets CONFIG REQUIRED) +list(APPEND CMAKE_MODULE_PATH ${LWS_CMAKE_DIR}) +include(CheckCSourceCompiles) +include(LwsCheckRequirements) + +set(SRCS main.c) + +set(requirements 1) +require_lws_config(LWS_WITH_COMPRESSED_BACKTRACES 1 requirements) + +if (requirements) + + add_executable(${PROJECT_NAME} ${SRCS}) + + if (websockets_shared) + target_link_libraries(${PROJECT_NAME} websockets_shared ${LIBWEBSOCKETS_DEP_LIBS}) + add_dependencies(${PROJECT_NAME} websockets_shared) + else() + target_link_libraries(${PROJECT_NAME} websockets ${LIBWEBSOCKETS_DEP_LIBS}) + endif() +endif() + diff --git a/minimal-examples-lowlevel/api-tests/api-test-backtrace/README.md b/minimal-examples-lowlevel/api-tests/api-test-backtrace/README.md new file mode 100644 index 000000000..17a1af0b9 --- /dev/null +++ b/minimal-examples-lowlevel/api-tests/api-test-backtrace/README.md @@ -0,0 +1,20 @@ +# lws api test Compressed Backtraces + +Tool to decompress the `lws_backtrace` compressed backtraces + +## build + +``` + $ cmake . && make +``` + +## usage + +Commandline option|Meaning +---|--- +-d |Debug verbosity in decimal, eg, -d15 + +``` + $ echo -n "~m#ghawu9ICDldHWP9xuFCTFrDOOUzlHOLYIbqO1C3eYbrpcC3NoQo41CtHWBxkZcnU4BA1VCoANw==" | ./lws-api-test-backtrace +``` + diff --git a/minimal-examples-lowlevel/api-tests/api-test-backtrace/main.c b/minimal-examples-lowlevel/api-tests/api-test-backtrace/main.c new file mode 100644 index 000000000..b7155987e --- /dev/null +++ b/minimal-examples-lowlevel/api-tests/api-test-backtrace/main.c @@ -0,0 +1,145 @@ +/* + * lws-api-test-backtrace + * + * Written in 2010-2022 by Andy Green + * + * This file is made available under the Creative Commons CC0 1.0 + * Universal Public Domain Dedication. + */ + +#include +#include +#include +#include +#include + +int fdin = 0; + +int +main(int argc, const char **argv) +{ + struct lws_context_creation_info info; + struct lws_context *context; + const char *p; + int result = 1, logs = LLL_USER | LLL_ERR | LLL_WARN | LLL_NOTICE, n; + uint8_t ib[2048], ob[1536], *eib = ib; + lws_backtrace_info_t si; + unsigned int m; + uintptr_t uipt; + ssize_t s = 0; + size_t l = 0; + uint16_t san; + + if ((p = lws_cmdline_option(argc, argv, "-d"))) + logs = atoi(p); + + lws_set_log_level(logs, NULL); + + if ((p = lws_cmdline_option(argc, argv, "--stdin"))) { + fdin = open(p, LWS_O_RDONLY, 0); + if (fdin < 0) { + result = 1; + lwsl_err("%s: unable to open stdin file\n", __func__); + goto bail; + } + } + + lwsl_user("LWS Compressed Backtrace Decoder\n"); + + memset(&info, 0, sizeof info); /* otherwise uninitialized garbage */ +#if defined(LWS_WITH_NETWORK) + info.port = CONTEXT_PORT_NO_LISTEN; +#endif + info.options = 0; + + context = lws_create_context(&info); + if (!context) { + lwsl_err("lws init failed\n"); + return 1; + } + + /* confirm operation of lws_sigbits */ + + uipt = 0x8000000000000000ull; + for (n = 64; n; n--) { + m = lws_sigbits(uipt); + if (n != (int)m) { + lwsl_err("a: %d %d\n", n, m); + goto bail; + } + uipt >>= 1; + } + +#if defined(LWS_WITH_ALLOC_METADATA_LWS) + _lws_alloc_metadata_dump_lws(lws_alloc_metadata_dump_stdout, NULL); +#endif + + if (!fdin) { + struct timeval timeout; + fd_set fds; + + FD_ZERO(&fds); + FD_SET(0, &fds); + + timeout.tv_sec = 0; + timeout.tv_usec = 1000; + + if (select(fdin + 1, &fds, NULL, NULL, &timeout) < 0 || + !FD_ISSET(0, &fds)) { + result = 1; + lwsl_err("%s: pass Compressed Backtrace line " + "on stdin or use --stdin\n", __func__); + goto bail; + } + } + + while (l != sizeof(ib)) { + s = read(fdin, ib + l, sizeof(ib) - l); + if (s <= 0) + break; + l = l + (size_t)s; + } + + if (l < 4) + goto bail; + + if (ib[0] == '~' && ib[2] == '#') { + eib += 3; + l -= 3; + } + + n = lws_b64_decode_string_len((char *)eib, (int)l, (char *)ob, (int)sizeof(ob)); + if (n <= 0) { + lwsl_err("%s: invalid base64\n", __func__); + goto bail; + } + + lwsl_hexdump_notice(ob, (size_t)n); + + san = (ob[n - 2] << 8) | ob[n - 1]; + if (san != (unsigned int)n) { + lwsl_err("%s: compressed length wrong\n", __func__); + goto bail; + } + + if (lws_alloc_metadata_parse(&si, ob + n)) { + lwsl_err("%s: compressed parse failed\n", __func__); + goto bail; + } + + printf("~b#size: %llu, ", (unsigned long long)si.asize); + + for (n = 0; n < si.sp; n++) + printf("0x%llx ", (unsigned long long)si.st[n]); + printf("\n"); + + result = 0; + +bail: + lwsl_user("Completed: %s\n", result ? "FAIL" : "PASS"); + + lws_context_destroy(context); + + return result; +} + diff --git a/minimal-examples-lowlevel/http-client/minimal-http-client/minimal-http-client.c b/minimal-examples-lowlevel/http-client/minimal-http-client/minimal-http-client.c index ac9aa32fb..5f1f1aa13 100644 --- a/minimal-examples-lowlevel/http-client/minimal-http-client/minimal-http-client.c +++ b/minimal-examples-lowlevel/http-client/minimal-http-client/minimal-http-client.c @@ -94,6 +94,10 @@ callback_http(struct lws *wsi, enum lws_callback_reasons reason, lws_get_peer_simple(wsi, buf, sizeof(buf)); status = (int)lws_http_client_http_response(wsi); +#if defined(LWS_WITH_ALLOC_METADATA_LWS) + _lws_alloc_metadata_dump_lws(lws_alloc_metadata_dump_stdout, NULL); +#endif + lwsl_user("Connected to %s, http response: %d\n", buf, status); }