cgi lifecycle logging

Signed-off-by: Andy Green <andy@warmcat.com>
This commit is contained in:
Andy Green 2016-04-20 06:10:56 +08:00
parent d02028bf25
commit 748a2210bd
4 changed files with 236 additions and 52 deletions

View file

@ -187,6 +187,17 @@ lws_close_free_wsi(struct lws *wsi, enum lws_close_status reason)
}
if (wsi->cgi) {
struct lws_cgi **pcgi = &pt->cgi_list;
/* remove us from the cgi list */
lwsl_notice("%s: remove cgi %p from list\n", __func__, wsi->cgi);
while (*pcgi) {
if (*pcgi == wsi->cgi) {
/* drop us from the pt cgi list */
*pcgi = (*pcgi)->cgi_list;
break;
}
pcgi = &(*pcgi)->cgi_list;
}
/* we have a cgi going, we must kill it */
wsi->cgi->being_closed = 1;
lws_cgi_kill(wsi);
@ -535,10 +546,9 @@ lws_close_free_wsi_final(struct lws *wsi)
#ifdef LWS_WITH_CGI
if (wsi->cgi) {
for (n = 0; n < 6; n++)
if (wsi->cgi->pipe_fds[n / 2][n & 1] >= 0) {
lwsl_notice(" closing %d\n", wsi->cgi->pipe_fds[n / 2][n & 1]);
if (wsi->cgi->pipe_fds[n / 2][n & 1] >= 0)
close(wsi->cgi->pipe_fds[n / 2][n & 1]);
}
lws_free(wsi->cgi);
}
#endif
@ -1752,8 +1762,8 @@ lws_cgi(struct lws *wsi, const char * const *exec_array, int script_uri_path_len
cgi->stdwsi[n]->cgi_channel = n;
cgi->stdwsi[n]->vhost = wsi->vhost;
lwsl_err("%s: cgi %p: pipe fd %d -> fd %d / %d\n", __func__, wsi, n,
cgi->pipe_fds[n][!!(n == 0)], cgi->pipe_fds[n][!(n == 0)]);
// lwsl_err("%s: cgi %p: pipe fd %d -> fd %d / %d\n", __func__, wsi, n,
// cgi->pipe_fds[n][!!(n == 0)], cgi->pipe_fds[n][!(n == 0)]);
/* read side is 0, stdin we want the write side, others read */
cgi->stdwsi[n]->sock = cgi->pipe_fds[n][!!(n == 0)];
@ -1783,6 +1793,7 @@ lws_cgi(struct lws *wsi, const char * const *exec_array, int script_uri_path_len
wsi->hdr_state = LCHS_HEADER;
/* add us to the pt list of active cgis */
lwsl_notice("%s: adding cgi %p to list\n", __func__, wsi->cgi);
cgi->cgi_list = pt->cgi_list;
pt->cgi_list = cgi;
@ -1890,11 +1901,18 @@ lws_cgi(struct lws *wsi, const char * const *exec_array, int script_uri_path_len
env_array[n++] = "PATH=/bin:/usr/bin:/usr/local/bin:/var/www/cgi-bin";
env_array[n] = NULL;
#if 1
#if 0
for (m = 0; m < n; m++)
lwsl_err(" %s\n", env_array[m]);
#endif
/*
* Actually having made the env, as a cgi we don't need the ah
* any more
*/
if (wsi->u.hdr.ah->rxpos == wsi->u.hdr.ah->rxlen)
lws_header_table_detach(wsi, 0);
/* we are ready with the redirection pipes... run the thing */
#if !defined(LWS_HAVE_VFORK) || !defined(LWS_HAVE_EXECVPE)
cgi->pid = fork();
@ -1906,9 +1924,17 @@ lws_cgi(struct lws *wsi, const char * const *exec_array, int script_uri_path_len
goto bail3;
}
if (cgi->pid)
#if defined(__linux__)
prctl(PR_SET_PDEATHSIG, SIGTERM);
#endif
setpgrp();
if (cgi->pid) {
/* we are the parent process */
wsi->context->count_cgi_spawned++;
lwsl_notice("%s: cgi %p spawned PID %d\n", __func__, cgi, cgi->pid);
return 0;
}
/* somewhere we can at least read things and enter it */
if (chdir("/tmp"))
@ -2112,26 +2138,58 @@ lws_cgi_write_split_stdout_headers(struct lws *wsi)
LWS_VISIBLE LWS_EXTERN int
lws_cgi_kill(struct lws *wsi)
{
struct lws_context_per_thread *pt = &wsi->context->pt[(int)wsi->tsi];
struct lws_cgi **pcgi = &pt->cgi_list;
struct lws_cgi_args args;
int n, status, do_close = 0;
int status, n;
lwsl_debug("!!!!! %s: %p\n", __func__, wsi);
lwsl_debug("%s: %p\n", __func__, wsi);
if (!wsi->cgi)
return 0;
if (wsi->cgi->pid > 0) {
/* kill the process */
n = kill(wsi->cgi->pid, SIGTERM);
if (n < 0) {
lwsl_err("%s: failed\n", __func__);
return 1;
n = waitpid(wsi->cgi->pid, &status, WNOHANG);
if (n > 0) {
lwsl_notice("%s: PID %d reaped\n", __func__,
wsi->cgi->pid);
goto handled;
}
/* kill the process group */
n = kill(-wsi->cgi->pid, SIGTERM);
lwsl_notice("%s: SIGTERM child PID %d says %d (errno %d)\n", __func__,
wsi->cgi->pid, n, errno);
if (n < 0) {
/*
* hum seen errno=3 when process is listed in ps,
* it seems we don't always retain process grouping
*
* Direct these fallback attempt to the exact child
*/
n = kill(wsi->cgi->pid, SIGTERM);
if (n < 0) {
n = kill(wsi->cgi->pid, SIGPIPE);
if (n < 0) {
n = kill(wsi->cgi->pid, SIGKILL);
if (n < 0)
lwsl_err("%s: SIGKILL PID %d failed errno %d (maybe zombie)\n",
__func__, wsi->cgi->pid, errno);
}
}
}
/* He could be unkillable because he's a zombie */
n = 1;
while (n > 0) {
n = waitpid(-wsi->cgi->pid, &status, WNOHANG);
if (n > 0)
lwsl_notice("%s: reaped PID %d\n", __func__, n);
if (n <= 0) {
n = waitpid(wsi->cgi->pid, &status, WNOHANG);
if (n > 0)
lwsl_notice("%s: reaped PID %d\n", __func__, n);
}
}
waitpid(wsi->cgi->pid, &status, 0); /* !!! may hang !!! */
}
handled:
args.stdwsi = &wsi->cgi->stdwsi[0];
if (wsi->cgi->pid != -1 && user_callback_handle_rxflow(
@ -2140,22 +2198,8 @@ lws_cgi_kill(struct lws *wsi)
wsi->user_space,
(void *)&args, 0)) {
wsi->cgi->pid = -1;
do_close = !wsi->cgi->being_closed;
}
/* remove us from the cgi list */
while (*pcgi) {
if (*pcgi == wsi->cgi) {
/* drop us from the pt cgi list */
*pcgi = (*pcgi)->cgi_list;
break;
}
pcgi = &(*pcgi)->cgi_list;
}
if (do_close) {
lwsl_debug("!!!!! %s: do_close\n", __func__);
lws_close_free_wsi(wsi, 0);
if (!wsi->cgi->being_closed)
lws_close_free_wsi(wsi, 0);
}
return 0;
@ -2164,12 +2208,68 @@ lws_cgi_kill(struct lws *wsi)
LWS_EXTERN int
lws_cgi_kill_terminated(struct lws_context_per_thread *pt)
{
struct lws_cgi **pcgi = &pt->cgi_list, *cgi;
int status;
struct lws_cgi **pcgi, *cgi = NULL;
int status, n = 1;
/* check all the subprocesses on the cgi list for termination */
while (n > 0) {
/* find finished guys but don't reap yet */
n = waitpid(-1, &status, WNOHANG | WNOWAIT);
if (n <= 0)
continue;
lwsl_notice("%s: observed PID %d terminated\n", __func__, n);
pcgi = &pt->cgi_list;
/* check all the subprocesses on the cgi list */
while (*pcgi) {
/* get the next one first as list may change */
cgi = *pcgi;
pcgi = &(*pcgi)->cgi_list;
if (cgi->pid <= 0)
continue;
/* wait for stdout to be drained */
if (cgi->content_length > cgi->content_length_seen)
continue;
if (cgi->content_length)
lwsl_notice("%s: wsi %p: expected content length seen: %ld\n",
__func__, cgi->wsi, cgi->content_length_seen);
/* reap it */
waitpid(n, &status, WNOHANG);
/*
* he's already terminated so no need for kill()
* but we should do the terminated cgi callback
* and close him if he's not already closing
*/
if (n == cgi->pid) {
lwsl_notice("%s: found PID %d on cgi list\n",
__func__, n);
/* defeat kill() */
cgi->pid = 0;
lws_cgi_kill(cgi->wsi);
break;
}
cgi = NULL;
}
/* if not found on the cgi list, as he's one of ours, reap */
if (!cgi) {
lwsl_notice("%s: reading PID %d although no cgi match\n",
__func__, n);
waitpid(n, &status, WNOHANG);
}
}
/* disable this to confirm timeout cgi cleanup flow */
#if 1
pcgi = &pt->cgi_list;
/* check all the subprocesses on the cgi list */
while (*pcgi) {
/* get the next one because we may close current one next */
/* get the next one first as list may change */
cgi = *pcgi;
pcgi = &(*pcgi)->cgi_list;
@ -2180,13 +2280,23 @@ lws_cgi_kill_terminated(struct lws_context_per_thread *pt)
if (cgi->content_length > cgi->content_length_seen)
continue;
if (cgi->content_length)
lwsl_notice("%s: wsi %p: expected content length seen: %ld\n",
__func__, cgi->wsi, cgi->content_length_seen);
/* reap it */
if (waitpid(cgi->pid, &status, WNOHANG) > 0) {
lwsl_notice("content length seen: %ld\n", cgi->content_length_seen);
lwsl_notice("%s: found PID %d on cgi list\n",
__func__, cgi->pid);
/* defeat kill() */
cgi->pid = 0;
lws_cgi_kill(cgi->wsi);
pcgi = &pt->cgi_list;
break;
}
}
#endif
return 0;
}
@ -2330,26 +2440,75 @@ LWS_EXTERN LWS_VISIBLE int
lws_json_dump_context(const struct lws_context *context, char *buf, int len)
{
char *orig = buf, *end = buf + len - 1, first = 1;
struct lws_vhost *vh = context->vhost_list;
const struct lws_vhost *vh = context->vhost_list;
#ifdef LWS_WITH_CGI
struct lws_cgi * const *pcgi;
#endif
const struct lws_context_per_thread *pt;
time_t t = time(NULL);
int listening = 0, cgi_count = 0, n;
buf += snprintf(buf, end - buf, "{ "
"\"uptime\":\"%ld\",\n"
"\"wsi_alive\":\"%d\",\n"
"\"vhosts\":[\n ",
"\"cgi_spawned\":\"%d\",\n"
"\"pt_fd_max\":\"%d\",\n"
"\"ah_pool_max\":\"%d\",\n"
"\"wsi_alive\":\"%d\",\n",
(unsigned long)(t - context->time_up),
context->count_cgi_spawned,
context->fd_limit_per_thread,
context->max_http_header_pool,
context->count_wsi_allocated);
buf += snprintf(buf, end - buf, "\"pt\":[\n ");
for (n = 0; n < context->count_threads; n++) {
pt = &context->pt[n];
if (n)
buf += snprintf(buf, end - buf, ",");
buf += snprintf(buf, end - buf,
"\n {\n"
" \"fds_count\":\"%d\",\n"
" \"ah_pool_inuse\":\"%d\",\n"
" \"ah_wait_list\":\"%d\"\n"
" }",
pt->fds_count,
pt->ah_count_in_use,
pt->ah_wait_list_length);
}
buf += snprintf(buf, end - buf, "], \"vhosts\":[\n ");
while (vh) {
if (!first)
if(buf != end)
*buf++ = ',';
buf += lws_json_dump_vhost(vh, buf, end - buf);
first = 0;
if (vh->lserv_wsi)
listening++;
vh = vh->vhost_next;
}
buf += snprintf(buf, end - buf, "]}\n ");
buf += snprintf(buf, end - buf, "],\n\"listen_wsi\":\"%d\"",
listening);
#ifdef LWS_WITH_CGI
for (n = 0; n < context->count_threads; n++) {
pt = &context->pt[n];
pcgi = &pt->cgi_list;
while (*pcgi) {
pcgi = &(*pcgi)->cgi_list;
cgi_count++;
}
}
#endif
buf += snprintf(buf, end - buf, ",\n \"cgi_alive\":\"%d\"\n ",
cgi_count);
buf += snprintf(buf, end - buf, "}\n ");
return buf - orig;
}

View file

@ -748,6 +748,7 @@ struct lws_context {
#define lws_ssl_anybody_has_buffered_read_tsi(ctx, t) (0)
#endif
int count_wsi_allocated;
int count_cgi_spawned;
unsigned int options;
unsigned int fd_limit_per_thread;
unsigned int timeout_secs;

View file

@ -1112,7 +1112,9 @@ lws_http_transaction_completed(struct lws *wsi)
wsi->mode = LWSCM_HTTP_SERVING;
wsi->u.http.content_length = 0;
wsi->hdr_parsing_completed = 0;
#ifdef LWS_WITH_ACCESS_LOG
wsi->access_log.sent = 0;
#endif
if (wsi->vhost->keepalive_timeout)
n = PENDING_TIMEOUT_HTTP_KEEPALIVE_IDLE;

View file

@ -171,23 +171,45 @@ function get_appropriate_ws_url()
jso = JSON.parse(msg.data);
s="<table><tr><td class=\"c\">" +
"Context</td><td>Uptime: " + san(jso.uptime) + "<br>" +
"Current wsi alive: " + san(jso.wsi_alive) +
"Context</td><td>Uptime: " + san(jso.uptime) + ", " +
"Listening wsi: " + san(jso.listen_wsi) + ", " +
"Current wsi alive: " + (parseInt(san(jso.wsi_alive)) -
parseInt(san(jso.listen_wsi))) + "<br>" +
"Current cgi alive: " + san(jso.cgi_alive) + ", " +
"Total CGI spawned: " + san(jso.cgi_spawned) +
"</td></tr>";
var n;
for (n = 0; n < jso.pt.length; n++) {
s = s + "<tr><td class=\"l\">service thread " + (n + 1) +
"</td><td>" +
"fds: " + san(jso.pt[n].fds_count) + " / " +
san(jso.pt_fd_max) + ", ";
s = s + "ah pool: " + san(jso.pt[n].ah_pool_inuse) + " / " +
san(jso.ah_pool_max) + ", " +
"ah waiting list: " + san(jso.pt[n].ah_wait_list);
s = s + "</td></tr>";
}
for (n = 0; n < jso.vhosts.length; n++) {
s = s + "<tr><td class=\"l\">vhost " + (n + 1) +
"</td><td><b>" + san(jso.vhosts[n].name) + ":" +
san(jso.vhosts[n].port) +
"</b><br>" +
"ssl " + san(jso.vhosts[n].use_ssl) + ", " +
"sts " + san(jso.vhosts[n].sts) + "<br>" +
"</td><td><b>";
if (jso.vhosts[n].use_ssl == '1')
s = s + "https://";
else
s = s + "http://";
s = s + san(jso.vhosts[n].name) + ":" +
san(jso.vhosts[n].port) + "</b>";
if (jso.vhosts[n].sts == '1')
s = s + " (STS)";
s = s +"<br>" +
"rx " + san(jso.vhosts[n].rx) + ", " +
"tx " + san(jso.vhosts[n].tx) + "<br>" +
"total connections " + san(jso.vhosts[n].conn) + ", " +
"total http transactions " + san(jso.vhosts[n].trans) + "<br>" +
"Upgrades to ws " + san(jso.vhosts[n].ws_upg) + ", " +
"to http2 " + san(jso.vhosts[n].http2_upg) + "<br>" +
"Upgrades to ws: " + san(jso.vhosts[n].ws_upg) + ", " +
"to http/2: " + san(jso.vhosts[n].http2_upg) + "<br>" +
"<table><tr><td class=t colspan=2>Mounts</td></tr>";
var m;