From 27cc9e33e03657f0146ec79b8e34e70a9ef55eab Mon Sep 17 00:00:00 2001 From: Andy Green Date: Wed, 10 Mar 2021 12:58:46 +0000 Subject: [PATCH] client: DNS failure should retry while waiting for connect timeout If the DNS lookup fails, we just sit out the remaining connect time. The adapts it to reuse the wsi->sul_connect_timeout to schedule DNS lookup retries until we're out of time. Eventually we want to try other things as well, this is aligned with that. Found with fault injection. --- lib/core-net/client/connect3.c | 33 +++++++++++++++++-- lib/system/async-dns/async-dns-parse.c | 4 +-- lib/system/async-dns/async-dns.c | 2 +- .../api-tests/api-test-async-dns/main.c | 4 +-- .../CMakeLists.txt | 4 +-- .../minimal-secure-streams-testsfail.c | 25 +++++++------- 6 files changed, 49 insertions(+), 23 deletions(-) diff --git a/lib/core-net/client/connect3.c b/lib/core-net/client/connect3.c index ec1b250c3..ee08ffc54 100644 --- a/lib/core-net/client/connect3.c +++ b/lib/core-net/client/connect3.c @@ -39,6 +39,23 @@ lws_client_conn_wait_timeout(lws_sorted_usec_list_t *sul) lws_client_connect_3_connect(wsi, NULL, NULL, 0, NULL); } +void +lws_client_dns_retry_timeout(lws_sorted_usec_list_t *sul) +{ + struct lws *wsi = lws_container_of(sul, struct lws, + sul_connect_timeout); + + /* + * This limits the amount of dns lookups we will try before + * giving up and failing... it reuses sul_connect_timeout, which + * isn't officially used until we connected somewhere. + */ + + lwsl_info("%s: dns retry\n", __func__); + if (!lws_client_connect_2_dnsreq(wsi)) + lwsl_notice("%s: DNS lookup failed\n", __func__); +} + /* * Figure out if an ongoing connect() has arrived at a final disposition or not * @@ -146,6 +163,7 @@ lws_client_connect_3_connect(struct lws *wsi, const char *ads, */ if (result) { + lws_sul_cancel(&wsi->sul_connect_timeout); lws_sort_dns(wsi, result); #if defined(LWS_WITH_SYS_ASYNC_DNS) lws_async_dns_freeaddrinfo(&result); @@ -174,8 +192,19 @@ lws_client_connect_3_connect(struct lws *wsi, const char *ads, !wsi->speculative_connect_owner.count /* no spec attempt */ ) { lwsl_notice("%s: dns lookup failed %d\n", __func__, n); - cce = "dns lookup failed"; - goto oom4; + /* + * DNS lookup itself failed... let's try again until we + * timeout + */ + + lwsi_set_state(wsi, LRS_UNCONNECTED); + lws_sul_schedule(wsi->a.context, 0, &wsi->sul_connect_timeout, + lws_client_dns_retry_timeout, + LWS_USEC_PER_SEC); + return wsi; + +// cce = "dns lookup failed"; +// goto oom4; } /* diff --git a/lib/system/async-dns/async-dns-parse.c b/lib/system/async-dns/async-dns-parse.c index 001455550..cc9693ab6 100644 --- a/lib/system/async-dns/async-dns-parse.c +++ b/lib/system/async-dns/async-dns-parse.c @@ -263,8 +263,8 @@ start: if (n < 1 || n != m || strncmp(stack[0].name, stack[stp].name, (unsigned int)n)) { - lwsl_notice("%s: skipping %s vs %s\n", __func__, - stack[0].name, stack[stp].name); + //lwsl_notice("%s: skipping %s vs %s\n", __func__, + // stack[0].name, stack[stp].name); goto skip; } diff --git a/lib/system/async-dns/async-dns.c b/lib/system/async-dns/async-dns.c index ed13fd740..ba1552980 100644 --- a/lib/system/async-dns/async-dns.c +++ b/lib/system/async-dns/async-dns.c @@ -398,7 +398,7 @@ lws_adns_get_cache(lws_async_dns_t *dns, const char *name) lws_dll2_get_head(&dns->cached)) { c = lws_container_of(d, lws_adns_cache_t, list); - lwsl_notice("%s vs %s (inc %d)\n", name, c->name, c->incomplete); + // lwsl_notice("%s vs %s (inc %d)\n", name, c->name, c->incomplete); if (!c->incomplete && !strcasecmp(name, c->name)) { /* Keep sorted by LRU: move to the head */ diff --git a/minimal-examples/api-tests/api-test-async-dns/main.c b/minimal-examples/api-tests/api-test-async-dns/main.c index ae0f5e0c7..4af79d329 100644 --- a/minimal-examples/api-tests/api-test-async-dns/main.c +++ b/minimal-examples/api-tests/api-test-async-dns/main.c @@ -112,8 +112,8 @@ next_test_cb(lws_sorted_usec_list_t *sul) adt[dtest].dns_name, (adns_query_type_t)adt[dtest].recordtype, cb1, NULL, context); - if (m != LADNS_RET_CONTINUING && m != LADNS_RET_FOUND) { - lwsl_err("%s: adns 1 failed: %d\n", __func__, m); + if (m != LADNS_RET_CONTINUING && m != LADNS_RET_FOUND && m != LADNS_RET_FAILED_WSI_CLOSED) { + lwsl_err("%s: adns 1: %s failed: %d\n", __func__, adt[dtest].dns_name, m); interrupted = 1; } } diff --git a/minimal-examples/secure-streams/minimal-secure-streams-testsfail/CMakeLists.txt b/minimal-examples/secure-streams/minimal-secure-streams-testsfail/CMakeLists.txt index 28b8c156a..824449434 100644 --- a/minimal-examples/secure-streams/minimal-secure-streams-testsfail/CMakeLists.txt +++ b/minimal-examples/secure-streams/minimal-secure-streams-testsfail/CMakeLists.txt @@ -31,7 +31,7 @@ if (requirements) set_tests_properties(ss-tf PROPERTIES WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/minimal-examples/secure-streams/minimal-secure-streams-testsfail - TIMEOUT 240) + TIMEOUT 440) if (HAS_LWS_WITH_SECURE_STREAMS_PROXY_API OR LWS_WITH_SECURE_STREAMS_PROXY_API) @@ -74,7 +74,7 @@ if (requirements) set_tests_properties(sspc-minimaltf PROPERTIES WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/minimal-examples/secure-streams/minimal-secure-streams-testsfail FIXTURES_REQUIRED "sstfproxy" - TIMEOUT 240) + TIMEOUT 440) endif() diff --git a/minimal-examples/secure-streams/minimal-secure-streams-testsfail/minimal-secure-streams-testsfail.c b/minimal-examples/secure-streams/minimal-secure-streams-testsfail/minimal-secure-streams-testsfail.c index e5339e147..db076e1d1 100644 --- a/minimal-examples/secure-streams/minimal-secure-streams-testsfail/minimal-secure-streams-testsfail.c +++ b/minimal-examples/secure-streams/minimal-secure-streams-testsfail/minimal-secure-streams-testsfail.c @@ -48,9 +48,9 @@ static const char * const default_ss_policy = "\"retry\": [" /* named backoff / retry strategies */ "{\"default\": {" - "\"backoff\": [ 1000, 1000, 1000, 1000, 1000" + "\"backoff\": [ 1000, 1000, 1000, 1000" "]," - "\"conceal\":" "5," + "\"conceal\":" "4," "\"jitterpc\":" "20," "\"svalidping\":" "30," "\"svalidhup\":" "35" @@ -504,19 +504,19 @@ struct tests_seq { { "h1:80 NXDOMAIN exhaust retries", - "nxd_h1", 35 * LWS_US_PER_SEC, LWSSSCS_ALL_RETRIES_FAILED, + "nxd_h1", 65 * LWS_US_PER_SEC, LWSSSCS_ALL_RETRIES_FAILED, (1 << LWSSSCS_QOS_ACK_REMOTE) | (1 << LWSSSCS_QOS_NACK_REMOTE) | (1 << LWSSSCS_TIMEOUT) }, { "h1:443 NXDOMAIN exhaust retries", - "nxd_h1_tls", 35 * LWS_US_PER_SEC, LWSSSCS_ALL_RETRIES_FAILED, + "nxd_h1_tls", 65 * LWS_US_PER_SEC, LWSSSCS_ALL_RETRIES_FAILED, (1 << LWSSSCS_QOS_ACK_REMOTE) | (1 << LWSSSCS_QOS_NACK_REMOTE) | (1 << LWSSSCS_TIMEOUT) }, { "h2:443 NXDOMAIN exhaust retries", - "nxd_h2_tls", 35 * LWS_US_PER_SEC, LWSSSCS_ALL_RETRIES_FAILED, + "nxd_h2_tls", 65 * LWS_US_PER_SEC, LWSSSCS_ALL_RETRIES_FAILED, (1 << LWSSSCS_QOS_ACK_REMOTE) | (1 << LWSSSCS_QOS_NACK_REMOTE) | (1 << LWSSSCS_TIMEOUT) }, @@ -553,21 +553,18 @@ struct tests_seq { { "h1:badcert_hostname", - "badcert_hostname", 5 * LWS_US_PER_SEC, LWSSSCS_TIMEOUT, - (1 << LWSSSCS_QOS_NACK_REMOTE) | - (1 << LWSSSCS_ALL_RETRIES_FAILED) + "badcert_hostname", 6 * LWS_US_PER_SEC, LWSSSCS_ALL_RETRIES_FAILED, + (1 << LWSSSCS_QOS_NACK_REMOTE) }, { "h1:badcert_expired", - "badcert_expired", 5 * LWS_US_PER_SEC, LWSSSCS_TIMEOUT, - (1 << LWSSSCS_QOS_NACK_REMOTE) | - (1 << LWSSSCS_ALL_RETRIES_FAILED) + "badcert_expired", 6 * LWS_US_PER_SEC, LWSSSCS_ALL_RETRIES_FAILED, + (1 << LWSSSCS_QOS_NACK_REMOTE) }, { "h1:badcert_selfsigned", - "badcert_selfsigned", 5 * LWS_US_PER_SEC, LWSSSCS_TIMEOUT, - (1 << LWSSSCS_QOS_NACK_REMOTE) | - (1 << LWSSSCS_ALL_RETRIES_FAILED) + "badcert_selfsigned", 6 * LWS_US_PER_SEC, LWSSSCS_ALL_RETRIES_FAILED, + (1 << LWSSSCS_QOS_NACK_REMOTE) }, };