diff --git a/CMakeLists.txt b/CMakeLists.txt index d9e4cb40f..9a97b7968 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -242,6 +242,7 @@ option(LWS_WITH_SPAWN "Spawn subprocesses with piped stdin/out/stderr" OFF) option(LWS_WITH_FSMOUNT "Overlayfs and fallback mounting apis" OFF) option(LWS_WITH_FANALYZER "Enable gcc -fanalyzer if compiler supports" OFF) option(LWS_HTTP_HEADERS_ALL "Override header reduction optimization and include all like older lws versions" OFF) +option(LWS_WITH_SUL_DEBUGGING "Enable zombie lws_sul checking on object deletion" OFF) # # to use miniz, enable both LWS_WITH_ZLIB and LWS_WITH_MINIZ diff --git a/include/libwebsockets/lws-timeout-timer.h b/include/libwebsockets/lws-timeout-timer.h index 5eb975988..a02ba3485 100644 --- a/include/libwebsockets/lws-timeout-timer.h +++ b/include/libwebsockets/lws-timeout-timer.h @@ -302,6 +302,35 @@ lws_sul_earliest_wakeable_event(struct lws_context *ctx, lws_usec_t *pearliest); lws_sul2_schedule(ctx, tsi, LWSSULLI_WAKE_IF_SUSPENDED, sul); \ }} +#if defined(LWS_WITH_SUL_DEBUGGING) +/** + * lws_sul_debug_zombies() - assert there are no scheduled sul in a given object + * + * \param ctx: lws_context + * \param po: pointer to the object that is about to be destroyed + * \param len: length of the object that is about to be destroyed + * \param destroy_description: string clue what any failure is related to + * + * This is an optional debugging helper that walks the sul scheduler lists + * confirming that there are no suls scheduled that live inside the object + * footprint described by po and len. When internal objects are about to be + * destroyed, like wsi / user_data or secure stream handles, if + * LWS_WITH_SUL_DEBUGGING is enabled the scheduler is checked for anything + * in the object being destroyed. If something found, an error is printed and + * an assert fired. + * + * Internal sul like timeouts should always be cleaned up correctly, but user + * suls in, eg, wsi user_data area, or in secure stream user allocation, may be + * the cause of difficult to find bugs if valgrind not available and the user + * code left a sul in the scheduler after destroying the object the sul was + * living in. + */ +LWS_VISIBLE LWS_EXTERN void +lws_sul_debug_zombies(struct lws_context *ctx, void *po, size_t len, + const char *destroy_description); +#else +#define lws_sul_debug_zombies(_a, _b, _c, _d) +#endif /* * lws_validity_confirmed() - reset the validity timer for a network connection diff --git a/lib/core-net/close.c b/lib/core-net/close.c index a92f258fe..c078aa0f0 100644 --- a/lib/core-net/close.c +++ b/lib/core-net/close.c @@ -79,8 +79,12 @@ __lws_reset_wsi(struct lws *wsi) * or by specified the user. We should only free what we allocated. */ if (wsi->protocol && wsi->protocol->per_session_data_size && - wsi->user_space && !wsi->user_space_externally_allocated) + wsi->user_space && !wsi->user_space_externally_allocated) { + /* confirm no sul left scheduled in user data itself */ + lws_sul_debug_zombies(wsi->context, wsi->user_space, + wsi->protocol->per_session_data_size, __func__); lws_free_set_NULL(wsi->user_space); + } /* * Don't let buflist content or state from the wsi's previous life @@ -91,7 +95,12 @@ __lws_reset_wsi(struct lws *wsi) lws_dll2_remove(&wsi->dll_buflist); lws_buflist_destroy_all_segments(&wsi->buflist_out); #if defined(LWS_WITH_UDP) - lws_free_set_NULL(wsi->udp); + if (wsi->udp) { + /* confirm no sul left scheduled in wsi->udp itself */ + lws_sul_debug_zombies(wsi->context, wsi->udp, + sizeof(*wsi->udp), "close udp wsi"); + lws_free_set_NULL(wsi->udp); + } #endif wsi->retry = 0; @@ -161,6 +170,9 @@ __lws_free_wsi(struct lws *wsi) wsi->context->count_wsi_allocated, wsi->context->pt[(int)wsi->tsi].fds_count); + /* confirm no sul left scheduled in wsi itself */ + lws_sul_debug_zombies(wsi->context, wsi, sizeof(wsi), __func__); + lws_free(wsi); } @@ -434,8 +446,13 @@ just_kill_connection: lws_buflist_destroy_all_segments(&wsi->http.buflist_post_body); #endif #if defined(LWS_WITH_UDP) - if (wsi->udp) + if (wsi->udp) { + /* confirm no sul left scheduled in wsi->udp itself */ + lws_sul_debug_zombies(wsi->context, wsi->udp, + sizeof(*wsi->udp), "close udp wsi"); + lws_free_set_NULL(wsi->udp); + } #endif if (wsi->role_ops->close_kill_connection) diff --git a/lib/core-net/sorted-usec-list.c b/lib/core-net/sorted-usec-list.c index 895865c56..e00de7b0a 100644 --- a/lib/core-net/sorted-usec-list.c +++ b/lib/core-net/sorted-usec-list.c @@ -259,3 +259,63 @@ lws_sul_earliest_wakeable_event(struct lws_context *ctx, lws_usec_t *pearliest) return 0; } + +#if defined(LWS_WITH_SUL_DEBUGGING) + +/* + * Sanity checker for any sul left scheduled when its containing object is + * freed... code scheduling suls must take care to cancel them when destroying + * their object. This optional debugging helper checks that when an object is + * being destroyed, there is no live sul scheduled from inside the object. + */ + +void +lws_sul_debug_zombies(struct lws_context *ctx, void *po, size_t len, + const char *destroy_description) +{ + struct lws_context_per_thread *pt; + int n, m; + + for (n = 0; n < ctx->count_threads; n++) { + pt = &ctx->pt[n]; + + lws_pt_lock(pt, __func__); + + for (m = 0; m < LWS_COUNT_PT_SUL_OWNERS; m++) { + + lws_start_foreach_dll(struct lws_dll2 *, p, + lws_dll2_get_head(&pt->pt_sul_owner[m])) { + lws_sorted_usec_list_t *sul = + lws_container_of(p, + lws_sorted_usec_list_t, list); + + /* + * Is the sul resident inside the object that is + * indicated as being deleted? + */ + + if (sul >= po && lws_ptr_diff(sul, po) < len) { + lwsl_err("%s: ERROR: Zombie Sul " + "(on list %d) %s\n", __func__, + m, destroy_description); + /* + * This assert fires if you have left + * a sul scheduled to fire later, but + * are about to destroy the object the + * sul lives in. You must take care to + * do lws_sul_cancel(&sul) on any suls + * that may be scheduled before + * destroying the object the sul lives + * inside. + */ + assert(0); + } + + } lws_end_foreach_dll(p); + } + + lws_pt_unlock(pt); + } +} + +#endif diff --git a/lib/secure-streams/secure-streams.c b/lib/secure-streams/secure-streams.c index d0097e98f..4373023d2 100644 --- a/lib/secure-streams/secure-streams.c +++ b/lib/secure-streams/secure-streams.c @@ -742,6 +742,10 @@ lws_ss_destroy(lws_ss_handle_t **ppss) lws_sul_cancel(&h->sul); + /* confirm no sul left scheduled in handle or user allocation object */ + lws_sul_debug_zombies(h->context, h, sizeof(*h) + h->info.user_alloc, + __func__); + lws_free_set_NULL(h); }