From a57d13cb1ba874e4808c70eaea89721499af3942 Mon Sep 17 00:00:00 2001 From: Andy Green Date: Sat, 23 Feb 2019 05:41:30 +0800 Subject: [PATCH] smp: adopt: deal with load balancing init window With SMP as soon as we add the new sockfd to the fds table, in the case we load-balanced the fd on to a different pt, service on it becomes live immediately and concurrently. This can lead to the unexpected situation that while we think we're still initing the new wsi from our thread, it can have lived out its whole life concurrently from another service thread. Add a volatile flag to inform the owning pt that if it wants to service the wsi during this init window, it must wait and retry next time around the event loop. --- lib/core-net/adopt.c | 21 +++++++++++++++++++++ lib/core-net/private.h | 4 ++++ lib/core-net/service.c | 9 +++++++++ lib/roles/listen/ops-listen.c | 3 ++- 4 files changed, 36 insertions(+), 1 deletion(-) diff --git a/lib/core-net/adopt.c b/lib/core-net/adopt.c index 03bad8393..a9d2d6d39 100644 --- a/lib/core-net/adopt.c +++ b/lib/core-net/adopt.c @@ -132,6 +132,12 @@ lws_adopt_descriptor_vhost(struct lws_vhost *vh, lws_adoption_type type, } #endif + /* + * Notice that in SMP case, the wsi may being being created on an + * entirely different pt / tsi for load balancing. In that case as + * we initialize it, it may become "live" concurrently unexpectedly... + */ + n = -1; if (parent) n = parent->tsi; @@ -194,6 +200,15 @@ lws_adopt_descriptor_vhost(struct lws_vhost *vh, lws_adoption_type type, if (context->event_loop_ops->accept(new_wsi)) goto fail; +#if LWS_MAX_SMP > 1 + /* + * Caution: after this point the wsi is live on its service thread + * which may be concurrent to this. We mark the wsi as still undergoing + * init in another pt so the assigned pt leaves it alone. + */ + new_wsi->undergoing_init_from_other_pt = 1; +#endif + if (!(type & LWS_ADOPT_ALLOW_SSL)) { lws_pt_lock(pt, __func__); if (__insert_wsi_socket_into_fds(context, new_wsi)) { @@ -224,6 +239,12 @@ lws_adopt_descriptor_vhost(struct lws_vhost *vh, lws_adoption_type type, lws_role_call_adoption_bind(new_wsi, type | _LWS_ADOPT_FINISH, vh_prot_name); +#if LWS_MAX_SMP > 1 + /* its actual pt can service it now */ + + new_wsi->undergoing_init_from_other_pt = 0; +#endif + lws_cancel_service_pt(new_wsi); return new_wsi; diff --git a/lib/core-net/private.h b/lib/core-net/private.h index 3d4b4c420..6048e3b40 100644 --- a/lib/core-net/private.h +++ b/lib/core-net/private.h @@ -623,6 +623,10 @@ struct lws { /* volatile to make sure code is aware other thread can change */ volatile char handling_pollout; volatile char leave_pollout_active; +#if LWS_MAX_SMP > 1 + volatile char undergoing_init_from_other_pt; +#endif + }; #define lws_is_flowcontrolled(w) (!!(wsi->rxflow_bitmap)) diff --git a/lib/core-net/service.c b/lib/core-net/service.c index f4014a733..481141534 100644 --- a/lib/core-net/service.c +++ b/lib/core-net/service.c @@ -960,6 +960,15 @@ lws_service_fd_tsi(struct lws_context *context, struct lws_pollfd *pollfd, /* not lws connection ... leave revents alone and return */ return 0; +#if LWS_MAX_SMP > 1 + if (wsi->undergoing_init_from_other_pt) + /* + * Temporary situation that other service thread is initializing + * this wsi right now for use on our service thread. + */ + return 0; +#endif + /* * so that caller can tell we handled, past here we need to * zero down pollfd->revents after handling diff --git a/lib/roles/listen/ops-listen.c b/lib/roles/listen/ops-listen.c index a1661cabb..662434509 100644 --- a/lib/roles/listen/ops-listen.c +++ b/lib/roles/listen/ops-listen.c @@ -136,7 +136,7 @@ rops_handle_POLLIN_listen(struct lws_context_per_thread *pt, struct lws *wsi, /* already closed cleanly as necessary */ return LWS_HPI_RET_WSI_ALREADY_DIED; } - +/* if (lws_server_socket_service_ssl(cwsi, accept_fd)) { lws_close_free_wsi(cwsi, LWS_CLOSE_STATUS_NOSTATUS, "listen svc fail"); @@ -145,6 +145,7 @@ rops_handle_POLLIN_listen(struct lws_context_per_thread *pt, struct lws *wsi, lwsl_info("%s: new wsi %p: wsistate 0x%x, role_ops %s\n", __func__, cwsi, cwsi->wsistate, cwsi->role_ops->name); +*/ } while (pt->fds_count < context->fd_limit_per_thread - 1 && wsi->position_in_fds_table != LWS_NO_FDS_POS &&