diff --git a/CMakeLists-implied-options.txt b/CMakeLists-implied-options.txt index 5a5d426ff..3ebbe62b0 100644 --- a/CMakeLists-implied-options.txt +++ b/CMakeLists-implied-options.txt @@ -375,6 +375,12 @@ if (LWS_WITH_UPNG) set(LWS_WITH_GZINFLATE 1) endif() +if (LWS_WITH_OTA) + set(LWS_WITH_JOSE 1) + set(LWS_WITH_GENCRYPTO 1) + set(LWS_WITH_GZINFATE 1) +endif() + # using any abstract protocol enables LWS_WITH_ABSTRACT #if (LWS_WITH_SMTP) diff --git a/CMakeLists.txt b/CMakeLists.txt index bdda6ddb4..4283a2ccd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -93,6 +93,8 @@ if (ESP_PLATFORM) $ENV{IDF_PATH}/components/soc/include/ $ENV{IDF_PATH}/components/esp_hw_support/include $ENV{IDF_PATH}/components/hal/${CONFIG_IDF_TARGET}/include/ + $ENV{IDF_PATH}/components/app_update/include/ + $ENV{IDF_PATH}/components/bootloader_support/include ) if (CONFIG_IDF_TARGET_ARCH_RISCV) @@ -342,6 +344,12 @@ set(LWS_COMPRESSED_BACKTRACES_SNIP_PRE 2 CACHE STRING "Amount of callstack to sn set(LWS_COMPRESSED_BACKTRACES_SNIP_POST 1 CACHE STRING "Amount of callstack to snip from bottom") option(LWS_WITH_ALLOC_METADATA_LWS "Build lws_*alloc() with compressed backtraces (requires WITH_COMPRESSED_BACKTRACES)" OFF) +# +# Over The Air updates +# +option(LWS_WITH_OTA "Build with support for Over The Air update download and validation" OFF) +set(LWS_OTA_VARIANT "set-LWS_OTA_VARIANT" CACHE STRING "Build Variant ID for OTA filtering") +set(LWS_OTA_PUBLIC_JWK "set-LWS_OTA_PUBLIC_JWK" CACHE STRING "Filepath to public JWK used for OTA validation") if (${CMAKE_SYSTEM_NAME} MATCHES "SunOS") diff --git a/READMEs/README.lws_ota.md b/READMEs/README.lws_ota.md new file mode 100644 index 000000000..40541a322 --- /dev/null +++ b/READMEs/README.lws_ota.md @@ -0,0 +1,302 @@ +# lws_ota Over The Air updates + +|Area|Definition| +|---|---| +|Cmake|`LWS_WITH_OTA` off by default| +|API|`./include/libwebsockets/lws-ota.h`| +|README|./READMEs/README.lws_ota.md| + +![overview](../doc-assets/lws_ota.png) + +`lws_ota` apis provide a standardized way to securely distribute gzipped +firmware update blobs over the internet, using a detached signed manifest that +describes the latest update for a particular variant and the corresponding +compressed firmware update blob. + +Generic client infrastructure to fetch and check the related manifest on an +http update server to discover new firmware at boot and periodically, download +the related firemare via Secure Streams, decompress on-the-fly, validate the +manifest signature against your issuer public key, and validate the decompressed +download against the manifests's signed hash, then if valid, interact with +platform-specific firmware update procedures such as erase and flash as +implemented in user code via lws_system, asynchronously. + +This gives you a way to have standardized production, identification, discovery +and security on firmware updates leveraging the lws arrangements for all the +generic work no matter the SoC involved, while still remaining compatible with +SoC-specific OTA procedures flexibly. + +`lws_ota` signing is an extra layer on top ensuring that only the firmware +issuer with the approriate key can sign the update manifest for the firmware +blobs that will be accepted by the client as valid. SoC-specific firmware +signing (eg, with bootloader key) is done separately before the `lws_ota` layer; +SoC-specific signatures should already have been applied on the blob before +`lws_ota` hashes it and signs the hash in the manifest. + +## Separation of generic OTA and platform operations + +lws_ota handles generic OTA steps such as checking for new firmware, starting +a new OTA action, downloading the image, checking its integrity and deciding if +it should be finalized. + +For all platform-specific steps such as selection of OTA slot or the actual +flashing, there is an lws_system ops struct `lws_ota_ops_t` that lets the user +code define how the operations are actually done, outside of lws itself. +lws_ota calls these user-defined ops struct members to get platform-specific +stuff done without needing to understand the details. + +These operation apis are given a completion callback and opaque completion +context pointer to call when their async operation completes; if the platform +implementation is synchronous, it's also possible to block and call the +completion callback before returning. + +## Structure of firmware repo + +The lws_ota network code uses "ota" streamtype from the policy, this sets the +endpoint address of the firmware repo and the first part of the URL path to use +statically, reflecting where the generic firmware update infrastructure lives. + +In top of that, at build-time, cmake var `LWS_OTA_VARIANT` can be set to a +URL-compatible string identifying the build variant, this is appended to the +URL path using metadata so a given variant can only see firmware related to +its specific kind of device. + +For example, the ota streamtype sets the endpoint (server address) and the first +part of the repo path, like this for lws examples + +``` + "}},{\"ota\": {" + "\"endpoint\":" "\"libwebsockets.org\"," + "\"port\":" "443," + "\"protocol\":" "\"h2\"," + "\"http_method\":" "\"GET\"," + "\"http_url\":" "\"firmware/examples/${ota_variant}/${file}\"," + "\"metadata\": [{\n" + "\"ota_variant\":" "\"\",\n" + "\"file\":" "\"\"\n" + "}],\n" + ... +``` + +`ota_variant` metadata is set to the application's unique variant name, and +`file` metadata is set first to "manifest.jws" to find out if anything newer is +available, and if there is, to whatever filepath is told in the manifest as the +latest. + +## Update discovery approach + +lws_ota autonomously checks for update at boot, 30 + a random jitter of up to +15 seconds after reaching OPERATIONAL, and thereafter at intervals set in the +lws_ota_ops_t struct, by default every 24h. + +To minimize network load from potentially large fleets checking for updates, +each variant has at least two files in its directory in the repo. + + - a signed detached JWS containing JSON manifest information about the latest + update for the variant it relates to, with the fixed name "manifest.txt", an + example looks like this + +``` +eyJhbGciOiJFUzUxMiJ9.ewoJInZhcmlhbnQiOgkieHl6LXYxIiwKCSJwYXRoIjoJCSJ4eXotdjEtM +jAyMi0wMi0yMi0wMS5sd3NvdGEiLAoJInNpemUiOgkJMTIzNDU2NywKCSJ1bml4dGltZSI6CTE2NDU +3NjgxMDEsCgkic2hhNTEyIjoJIjk5MjYwMzNkY2UwZDE4NmM0ZTNkMzViMDM4MjU2NTYwMzFlZTQzN +jA4NDFhNGI5ZGM2ZGY5YzdkNDZhZGRlMDM3NmJhZWQ0ODk5NDhkYjEwMmQ3ZjFmMWJkODVmYTJkNDc +zOTNhZjg0YTMzZGQyNmZlZDQ5ZDZmNDBjNTJlMGQ2IiwKCSJyZXNldCI6CXRydWUKfQoK.AKbYKDcG +cV5LwKSs9_c8T3qusD_PMrC2zCQjbNvxmcvstAE6DDs6NwP6PaaW9aLO7uQ2uZtXPC_01VRFiasteX +55AXp7-flJdWOOS-_K0BJMwbb-qO62QWDV3-7rr60JUr8IQ8FTmXjJkFOzYXG1iAVevOeo4kyCNcF2 +CKsJgVnrqwFn +``` + +... the protected part above decodes to... + +```json +{ + "variant": "xyz-v1", + "path": "xyz-v1-2022-02-22-01.lwsota", + "size": 1234567, + "unixtime": 1645768101, + "sha512": "9926033dce0d186c4e3d35b03825656031ee4360841a4b9dc6df9c7d46adde0376baed489948db102d7f1f1bd85fa2d47393af84a33dd26fed49d6f40c52e0d6", + "reset": true +} +``` + +... this describes the valid update image including its sha512 hash and +expected size, and the filename on the server to fetch it from. + + - the matching, unmodified update image specified in the manifest, with + whatever signatures the SoC update or boot process requires to see applied. In + this example the update image is at the same url but file "xyz-v1-2022-02-22-01.lwsota" + +there is a separate signed, detached JWS manifest in each variant directory +(named `mainfest`) that describes the latest available update available in the +same server directory. + +By using JSON and standardized, agile signing and validation, using currently +secure crypto like SHA512 and EC P521, the information in the JWS is easily and +safely extensible. + +## Discovery of potential update vs updating + +If the downloaded manifest JWS describes an update that we want, lws tries to +advance the system state to `LWS_SYSTATE_MODAL_UPDATING`. User code can +register a notifier for system state changes that can either hook the transition +to this to indicate that the device is attempting to update, or countermand the +transition and defeat the update. If it is not allowed to reach the required +state, the update is ignored and will be retried at the next periodic check. + +## Using variant names as update epochs + +In the case that updates change data stored on devices and must be applied in +some order, for example because on-device state formats have changed and must be +upgraded, updates after the change can use a different variant name (and so a +different directory path on the update repo and variant name stored in the +firmware). + +Older devices can then update at any time, and will only see the older update +with the old variant name that brings their data to the new format, and has the +new variant name in itself, after rebooting into that and doing the local data +uplevel, it will have the new variant name and be able to see the latest updates +for that. + +One-time updates in user code should be written to occur at +`LWS_SYSTATE_ONE_TIME_UPDATES` lws_system notification state, before +OPERATIONAL, which triggers the boot update check that might otherwise occur +first. + +This method allows older devices to connect much later and still rejoin the +current updates cleanly, without needing all future updates to carry around the +code handling the data upgrade. + +## Interface to platform flash operations + +An ops struct for lws_ota operations is defined by lws, along with an enum for +the async operations it is requested to do + +typedef enum { + LWS_OTA_ASYNC_START = 1, + LWS_OTA_ASYNC_WRITE, + LWS_OTA_ASYNC_ABORT, + LWS_OTA_ASYNC_FINALIZE +} lws_ota_async_t; + +```C +typedef struct { + + /* asynchronous (completions via lws_cancel_service) */ + + int (*ota_start)(struct lws_ota *g); + /**< Creates the ota task and queues LWS_OTA_ASYNC_START on it. */ + + void (*ota_queue)(struct lws_ota *g, lws_ota_async_t a); + /**< Queue next command to OTA task (args are in g) */ + + /* synchronous */ + + int (*ota_report_current)(struct lws_ota *g, int bad); + /**< Report information to the platform code about how we feel about the + * current boot... if we can check the OTA then we report it seems in + * good shape (bad = 0), if we can identify it's brain-damaged then + * (bad = 1). What action the platform takes about these reports is up + * to the platform code */ + + int (*ota_progress)(lws_ota_ret_t state, int percent); + /**< Gets called so the platform can represent OTA progress, give + * platform a chance to choose what to do about an available update */ + + int (*ota_get_last_fw_unixtime)(uint64_t *fw_unixtime); + /**< tries to recover the newest firmware unixtime that had been + * OTA'd into fw_unixtime, updates from same or earlier unixtime are + * ignored for update purposes. */ + + int ota_periodic_check_secs; + /**< Check after this many seconds for a new update */ +} lws_ota_ops_t; +``` + +If the platform being built has a specified OTA methodology, for example as with +esp-idf, lws may provide platform implementations for + + - `lws_plat_ota_start()` + - `lws_plat_ota_queue()` + - `lws_plat_ota_report_current()` + - `lws_plat_ota_get_last_fw_unixtime()` + +that are suitable for use in the first four `lws_opta_ops_t` callbacks, either +directly or by being called through to by user implementations. + +This means platform implementations for the flashing and OTA management part +only need to be done once per platform and can be reused easily by applications. + +OTA operations on the platform are typically done in their own thread, created +at `lws_plat_ota_start()` and terminated when the `ABORT` or `FINALIZE` operations +are queued. Such an implementation can be found for esp32 in +`./lib/plat/freertos/esp32/esp32-lws_ota.c`. + +The generic lws_ota code queues async operations on the thread using the +`(*ota_queue)` op and on completion, the thread calls `lws_cancel_service()` to +synchronize the result back with the generic lws_ota code in the lws event loop +thread. So there is a clean separation between generic OTA check, download and +validation flow, and platform-specific OTA actual flashing and slot selection +etc. + +## Storing firmware state + +The firmware itself contains a public `const char *lws_ota_variant`, which is +set via cmake symbol `LWS_OTA_VARIANT`. By convention (used for the signing +and upload script) the leaf directory of the cwd is the variant string used for +the build. + +This information is used as part of the url path when checking for updates, so +only updates appropriate for the currently installed build variant can be seen. + +Part of the information signed in the manifest is the unixtime of the firmware +blob file, the last installed firmware is stored by the platform-specific OTA +op in whatever manner suits the platform, for esp32 it used `lws_settings` apis +to store them in an esp-idf key-value store in a SPI flash partition. + +Subsequently when checking for updates, the new manifest's unixtime is compared +to the last installed update's unixtime, and ignored if older or same. + +## Creation of Signing and Verification keys + +The manifest needs some unique crypto keys to be signed with, and verified by. +The JWS lws uses needs Json Web Keys or JWKs. + +Build lws with `-DLWS_WITH_JOSE=1`, make and make install, this creates some +`lws-crypo-*` examples for working with JOSE / JWK / JWS on your path. + +Produce a new 512-bit EC JWK in both private and public-only forms like this: + +```bash +$ lws-crypto-jwk -t EC --curve P-521 \ + --kid="my.org lws_ota firmware signing key v1" \ + --use="sig" \ + --key-ops='sign verify' \ + --public my-lws-ota-v1.public.jwk >my-lws-ota-v1.private.jwk +``` + +You should place both your public and private JWKs in your build user's home +directory `~/.lws_ota/`, so they are available but secure to your build user. + +You should point cmake option `-DLWS_OTA_PUBLIC_JWK_FILE="$ENV{HOME}/.lws_ota/name-of-public.jwk"` to +the public JWK, so it can be imported into your build and made available to +lws_ota so it can validate the manifest JWS with it. + +## Creating the signed manifest and uploading to the repo + +Lws includes a script to process and upload your firmware image in one step, +`./contrib/lws_ota-manifest.sh`. + +The script takes the variant name from the last part of the cwd it is executed +from. + +The script takes three arguments, the firmware image, the path to the private +JWK for signing, and the host:path to ssh the files to. Eg + +``` + $ ../../../../../contrib/lws_ota-manifest.sh \ + build/myapp.bin \ + ~/.lws_ota/my-lws-ota-v1.private.jwk \ + "libwebsockets.org:/var/www/libwebsockets.org/firmware/examples/" +``` + diff --git a/READMEs/README.lws_system.md b/READMEs/README.lws_system.md index e1a91eea4..4b0035663 100644 --- a/READMEs/README.lws_system.md +++ b/READMEs/README.lws_system.md @@ -18,6 +18,10 @@ typedef struct lws_system_ops { int (*attach)(struct lws_context *context, int tsi, lws_attach_cb_t cb, lws_system_states_t state, void *opaque, struct lws_attach_item **get); + int (*jit_trust_query)(struct lws_context *cx, const uint8_t *skid, + size_t skid_len, void *got_opaque); + lws_ota_ops_t ota_ops; + uint32_t wake_latency_us; } lws_system_ops_t; ``` @@ -26,6 +30,8 @@ typedef struct lws_system_ops { |`(*reboot)()`|Reboot the system| |`(*set_clock)()`|Set the system clock| |`(*attach)()`|Request an event loop callback from another thread context| +|`(*jit_trust_query)()`|Method for providing a trusted X.509 cert by ID (see JIT_TRUST)` +|`ota_ops`|Set of OTA-related operation implementations for platform| ### `reboot` @@ -42,6 +48,18 @@ example, for foreign threads to set up their event loop activity in their callback, and eg, exit once it is done, with their event loop activity able to continue wholly from the lws event loop thread and stack context. +### `jit_trust_query` + +JIT_TRUST handles most of the generic work in lws, but how the platform stores +and retrieves its trusted CA certs is platform-specific, and handled by the +user code for this. + +### `ota_ops` + +Device-specific operations to perform OTA flashing. + +See README.lws_ota.md / include/libwebsockets/lws_ota.h + ## Foreign thread `attach` architecture When lws is started, it should define an `lws_system_ops_t` at context creation @@ -188,6 +206,8 @@ for various steps leading up to normal operation. By default it acts in a backwards-compatible way and directly reaches the OPERATIONAL state just after the context is created. +![overview](../doc-assets/lws_system_states.png) + However other pieces of lws, and user, code may define notification handlers that get called back when the state changes incrementally, and may veto or delay the changes until work necessary for the new state has completed asynchronously. @@ -205,9 +225,12 @@ The generic states defined are: |`LWS_SYSTATE_REGISTERED`|The device has a registered identity| |`LWS_SYSTATE_AUTH1`|The device identity has produced a time-limited access token| |`LWS_SYSTATE_AUTH2`|Optional second access token for different services| +|`LWS_SYSTATE_ONE_TIME_UPDATES`|If firmware updates need to do one-time operations on data, they should do it at this point before OPERATIONAL| |`LWS_SYSTATE_OPERATIONAL`|The system is ready for user code to work normally| |`LWS_SYSTATE_POLICY_INVALID`|All connections are being dropped because policy information is changing. It will transition back to `LWS_SYSTATE_INITIALIZED` and onward to `OPERATIONAL` again afterwards with the new policy| |`LWS_SYSTATE_CONTEXT_DESTROYING`|Context is going down and smd with it| +|`LWS_SYSTATE_AWAITING_MODAL_UPDATING`|We are trying to get agreement to enter MODAL_UPDATING state| +|`LWS_SYSTATE_MODAL_UPDATING`|We are in modal update state| ### Inserting a notifier diff --git a/cmake/lws_config.h.in b/cmake/lws_config.h.in index 2bfe405aa..20992de2d 100644 --- a/cmake/lws_config.h.in +++ b/cmake/lws_config.h.in @@ -123,6 +123,8 @@ #cmakedefine LWS_ONLY_SSPC #cmakedefine LWS_OPENSSL_CLIENT_CERTS "${LWS_OPENSSL_CLIENT_CERTS}" #cmakedefine LWS_OPENSSL_SUPPORT +#cmakedefine LWS_OTA_PUBLIC_JWK "${LWS_OTA_PUBLIC_JWK}" +#cmakedefine LWS_OTA_VARIANT "${LWS_OTA_VARIANT}" #cmakedefine LWS_PLAT_OPTEE #cmakedefine LWS_PLAT_UNIX #cmakedefine LWS_PLAT_FREERTOS @@ -197,6 +199,7 @@ #cmakedefine LWS_WITH_NETLINK #cmakedefine LWS_WITH_NETWORK #cmakedefine LWS_WITH_NO_LOGS +#cmakedefine LWS_WITH_OTA #cmakedefine LWS_WITH_CACHE_NSCOOKIEJAR #cmakedefine LWS_WITH_CLIENT #cmakedefine LWS_WITHOUT_EXTENSIONS diff --git a/contrib/lws_ota-manifest.sh b/contrib/lws_ota-manifest.sh new file mode 100755 index 000000000..acca444e3 --- /dev/null +++ b/contrib/lws_ota-manifest.sh @@ -0,0 +1,28 @@ +#!/bin/sh + +# Usage: +# +# lws-ota-manifest.sh +# +# We take various measurements of the binary update into a JSON manifest, sign the +# manifest, then gzip the image and upload both to an http server. + +# repo server base address for ssh +REPO=$3 +JWK_PRIVKEY_PATH=$2 + +# the leaf part of the build dir path is the variant name +VAR=`pwd | sed "s/.*\///g" | sed "s/\\///g"` +UT=`date +%s` +size=`stat -c %s $1` +unixtime=`stat -c %Y $1` +gzimg=$VAR-$UT.img.gz + +echo -n "{ \"variant\": \"$VAR\", \"path\": \"$gzimg\", \"size\": $size, \"unixtime\": $unixtime, \"sha512\": \"`sha512sum $1 | cut -d' ' -f1`\", \"reset\": true }" | lws-crypto-jws -s "ES512" -k $2 2>/dev/null > .manifest.jws.1 + +cp $1 .image +rm -f .image.gz +gzip .image +scp .image.gz root@$REPO$VAR/$gzimg +scp .manifest.jws.1 root@$REPO$VAR/manifest.jws + diff --git a/doc-assets/lws_ota.png b/doc-assets/lws_ota.png new file mode 100644 index 000000000..aa9517b1c Binary files /dev/null and b/doc-assets/lws_ota.png differ diff --git a/doc-assets/lws_system_states.png b/doc-assets/lws_system_states.png new file mode 100644 index 000000000..30e85fa7f Binary files /dev/null and b/doc-assets/lws_system_states.png differ diff --git a/include/libwebsockets.h b/include/libwebsockets.h index 8923035bb..49721cab2 100644 --- a/include/libwebsockets.h +++ b/include/libwebsockets.h @@ -687,6 +687,7 @@ lws_fx_string(const lws_fx_t *a, char *buf, size_t size); #include #endif +#include #include #if defined(LWS_WITH_NETWORK) #include diff --git a/include/libwebsockets/lws-ota.h b/include/libwebsockets/lws-ota.h new file mode 100644 index 000000000..1dd6e9654 --- /dev/null +++ b/include/libwebsockets/lws-ota.h @@ -0,0 +1,122 @@ +/* + * lws OTA updates + * + * Copyright (C) 2019 - 2022 Andy Green + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * This is the platform interface that lws_ota uses to flash new firmware. + * The platform implementation for these ops is set via lws_system and consists + * of user code. + * + * All the update-related calls have async interfaces with a callback and opaque + * callback context that is called on completion. This allows us to, eg, + * download the next buffer while flashing the previous one. + * + * If the platform implementation is actually synchronous, then just call the + * callback before returning. + * + * If it is async, because eg, erase is slow, in the platform ota op + * implementation spawn a thread to do the platform operation, return + * immediately with LWSOTARET_ONGOING, and call the callback from the spawned + * thread context with the real return before terminating the thread. + */ + +typedef void * lws_ota_process_t; + +typedef enum { + LWSOTARET_OK, + LWSOTARET_ONGOING, /* result not ready to read yet */ + LWSOTARET_REJECTED, + LWSOTARET_NOSLOT, + + LWSOTARET_UPDATE_AVAILABLE, + LWSOTARET_PROGRESS, + LWSOTARET_FAILED, + LWSOTARET_COMPLETED +} lws_ota_ret_t; + +typedef enum { + LWS_OTA_ASYNC_START = 1, + LWS_OTA_ASYNC_WRITE, + LWS_OTA_ASYNC_ABORT, + LWS_OTA_ASYNC_FINALIZE +} lws_ota_async_t; + +struct lws_ota; + +typedef void (*lws_ota_cb_t)(void *ctx, lws_ota_ret_t r); + +typedef struct { + + /* asynchronous (completions via lws_cancel_service) */ + + int (*ota_start)(struct lws_ota *g); + /**< Creates the ota task and queues LWS_OTA_ASYNC_START on it. */ + + void (*ota_queue)(struct lws_ota *g, lws_ota_async_t a); + /**< Queue next command to OTA task (args are in g) */ + + /* synchronous */ + + int (*ota_report_current)(struct lws_ota *g, int bad); + /**< Report information to the platform code about how we feel about the + * current boot... if we can check the OTA then we report it seems in + * good shape (bad = 0), if we can identify it's brain-damaged then + * (bad = 1). What action the platform takes about these reports is up + * to the platform code */ + + int (*ota_progress)(lws_ota_ret_t state, int percent); + /**< Gets called so the platform can represent OTA progress, give + * platform a chance to choose what to do about an available update */ + + int (*ota_get_last_fw_unixtime)(uint64_t *fw_unixtime); + /**< tries to recover the newest firmware unixtime that had been + * OTA'd into fw_unixtime, updates from same or earlier unixtime are + * ignored for update purposes. */ + + int ota_periodic_check_secs; + /**< Check after this many seconds for a new update */ +} lws_ota_ops_t; + +/** + * lws_ota_variant_name() - returns the build variant name + * + * Returns a string that uniquely identifies the kind of firmware build this + * device is running. + */ + +LWS_VISIBLE LWS_EXTERN const char * +lws_ota_variant_name(void); + +LWS_VISIBLE LWS_EXTERN int +lws_plat_ota_start(struct lws_ota *g); + + +#define LWSOTAFIN_OK 0 +#define LWSOTAFIN_BAD 1 + +LWS_VISIBLE LWS_EXTERN void +lws_plat_ota_queue(struct lws_ota *g, lws_ota_async_t a); + +LWS_VISIBLE LWS_EXTERN int +lws_plat_ota_report_current(struct lws_ota *g, int bad); + +LWS_VISIBLE LWS_EXTERN int +lws_plat_ota_get_last_fw_unixtime(uint64_t *fw_unixtime); diff --git a/include/libwebsockets/lws-system.h b/include/libwebsockets/lws-system.h index ec9432311..5739b7da3 100644 --- a/include/libwebsockets/lws-system.h +++ b/include/libwebsockets/lws-system.h @@ -133,6 +133,11 @@ typedef enum { /* keep system_state_names[] in sync in context.c */ LWS_SYSTATE_AUTH1, /* identity used for main auth token */ LWS_SYSTATE_AUTH2, /* identity used for optional auth */ + LWS_SYSTATE_ONE_TIME_UPDATES, /* pre-OPERATIONAL one-time updates, + * when a firmware needs to perform + * one-time upgrades to state before + * OPERATIONAL */ + LWS_SYSTATE_OPERATIONAL, /* user code can operate normally */ LWS_SYSTATE_POLICY_INVALID, /* user code is changing its policies @@ -140,6 +145,9 @@ typedef enum { /* keep system_state_names[] in sync in context.c */ * policy, switch to new then enter * LWS_SYSTATE_POLICY_VALID */ LWS_SYSTATE_CONTEXT_DESTROYING, /* Context is being destroyed */ + LWS_SYSTATE_AWAITING_MODAL_UPDATING, /* We're negotiating with the + * user code for update mode */ + LWS_SYSTATE_MODAL_UPDATING, /* We're updating the firmware */ } lws_system_states_t; /* Captive Portal Detect -related */ @@ -202,7 +210,12 @@ typedef struct lws_system_ops { * returning. The DER should be destroyed if in heap before returning. */ - uint32_t wake_latency_us; +#if defined(LWS_WITH_OTA) + lws_ota_ops_t ota_ops; + /**< Platform OTA interface to lws_ota, see lws-ota.h */ +#endif + + uint32_t wake_latency_us; /**< time taken for this device to wake from suspend, in us */ } lws_system_ops_t; diff --git a/lib/core-net/private-lib-core-net.h b/lib/core-net/private-lib-core-net.h index f6b3b73e0..fa0a503df 100644 --- a/lib/core-net/private-lib-core-net.h +++ b/lib/core-net/private-lib-core-net.h @@ -949,6 +949,69 @@ lws_spawn_reap(struct lws_spawn_piped *lsp); #endif +#if defined(LWS_WITH_OTA) + +typedef enum { + LWSOS_IDLE, + LWSOS_CHECKING, /* we are looking at the manifest, if any */ + LWSOS_AWAITING_MODAL, /* we would like to fetch the update, but we have + * to wait for the user code to agree it's entered + * an update "mode" where it's not using the heap + * for anything else */ + LWSOS_FETCHING, /* if we did enter the lws_system MODAL state, we + * can proceed with fetching the update we like */ + LWSOS_FETCHING_INITED_GZ, + LWSOS_FETCHING_INITED_GZ_HASH, + LWSOS_STARTED, + LWSOS_WRITING, + LWSOS_FINALIZING, + LWSOS_REPORTED, + LWSOS_FAILED +} lws_ota_state_t; + +typedef struct lws_ota { + char buf[2048]; + struct lws_ss_handle *ss; + void *opaque_data; + char file[128]; + uint8_t sha512[64]; + + lws_flow_t flow; + + lws_sorted_usec_list_t sul_drain; + + lws_ota_state_t state; + lws_ota_process_t op; + + struct lws_genhash_ctx ctx; + struct inflator_ctx *inflate; + const uint8_t *outring; + struct lws_context *cx; + + uint64_t unixtime; + + lws_ota_async_t async_last; + lws_ota_ret_t async_r; + + size_t pos; + size_t expected_size; + size_t seen; + size_t written; + size_t buf_len; + + size_t outringlen; + size_t *opl; + size_t old_op; + size_t *cl; + + uint8_t last_pc; + uint8_t ota_start_done; + + + uint8_t async_completed; +} lws_ota_t; +#endif + void lws_service_do_ripe_rxflow(struct lws_context_per_thread *pt); diff --git a/lib/core/context.c b/lib/core/context.c index 202a57ac6..c140be305 100644 --- a/lib/core/context.c +++ b/lib/core/context.c @@ -74,9 +74,12 @@ static const char * system_state_names[] = { "REGISTERED", "AUTH1", "AUTH2", + "ONE_TIME_UPDATES", "OPERATIONAL", "POLICY_INVALID", - "DESTROYING" + "DESTROYING", + "AWAITING_MODAL_UPDATING", + "MODAL_UPDATING" }; @@ -126,6 +129,23 @@ lws_state_notify_protocol_init(struct lws_state_manager *mgr, } #endif +#if defined(LWS_WITH_OTA) + if (target == LWS_SYSTATE_OPERATIONAL) { + uint16_t b; + + /* + * We add jitter, so possibly large numbers of devices don't + * all wake up and check for updates at the same moment after a + * power outage + */ + + lws_get_random(context, &b, 2); + lws_sul_schedule(context, 0, &context->sul_ota_periodic, + lws_ota_periodic_cb, (/* 30 + */ (b % 1000) * + LWS_US_PER_MS)); + } +#endif + #if defined(LWS_WITH_NETLINK) /* * If we're going to use netlink routing data for DNS, we have to diff --git a/lib/core/private-lib-core.h b/lib/core/private-lib-core.h index 45edd4959..6c59d7d9c 100644 --- a/lib/core/private-lib-core.h +++ b/lib/core/private-lib-core.h @@ -626,6 +626,11 @@ struct lws_context { #endif +#if defined(LWS_WITH_OTA) + lws_sorted_usec_list_t sul_ota_periodic; + lws_ss_handle_t * ota_ss; /* opaque to platform */ +#endif + /* * <====== LWS_WITH_NETWORK end */ @@ -938,6 +943,7 @@ typedef struct inflator_ctx { size_t bp; size_t inpos; size_t inlen; + size_t archive_pos; size_t outpos; size_t outpos_linear; size_t consumed_linear; @@ -1039,6 +1045,9 @@ void lwsl_emit_stderr(int level, const char *line); #define lws_pt_stats_unlock(_a) (void)(_a) #endif +void +lws_ota_periodic_cb(lws_sorted_usec_list_t *sul); + int LWS_WARN_UNUSED_RESULT lws_ssl_capable_read_no_ssl(struct lws *wsi, unsigned char *buf, size_t len); @@ -1166,7 +1175,6 @@ lws_transport_mux_get_channel(lws_transport_mux_t *tm, lws_mux_ch_idx_t i); int lws_transport_mux_next_free(lws_transport_mux_t *tm, lws_mux_ch_idx_t *result); - void sul_ping_cb(lws_sorted_usec_list_t *sul); diff --git a/lib/plat/freertos/CMakeLists.txt b/lib/plat/freertos/CMakeLists.txt index c0d92e941..508c03432 100644 --- a/lib/plat/freertos/CMakeLists.txt +++ b/lib/plat/freertos/CMakeLists.txt @@ -53,6 +53,11 @@ if (LWS_WITH_SYS_ASYNC_DNS OR LWS_WITH_SYS_NTPCLIENT) list(APPEND SOURCES plat/freertos/freertos-resolv.c) endif() +if (LWS_ESP_PLATFORM AND LWS_WITH_OTA) + list(APPEND SOURCES plat/freertos/esp32/esp32-lws_ota.c) +endif() + + # # Keep explicit parent scope exports at end # diff --git a/lib/plat/freertos/esp32/esp32-lws_ota.c b/lib/plat/freertos/esp32/esp32-lws_ota.c new file mode 100644 index 000000000..3152c8267 --- /dev/null +++ b/lib/plat/freertos/esp32/esp32-lws_ota.c @@ -0,0 +1,201 @@ +/* + * libwebsockets - small server side websockets and web server implementation + * + * Copyright (C) 2010 - 2022 Andy Green + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * lws_ota platform implementation for esp-idf + * + * The whole platform OTA implementation runs in its own task context, which + * is created in ota_start() and taken down in ota_finalize(). Async + * completions are passed back to the main code by lws_cancel_service(). + */ + +#include "private-lib-core.h" +#include "esp_ota_ops.h" + +extern lws_settings_instance_t *si; + +/* + * Our platform-specific single OTA process object, it knows the esp-idf OTA + * handle too after ota_start succeeds. + */ + +typedef struct { + lws_ota_t *g; + + esp_ota_handle_t ota; /* opaque platform ota handle */ + TaskHandle_t th; + SemaphoreHandle_t sem; + const esp_partition_t *ep; +} _lws_ota_process_t; + +static _lws_ota_process_t pop; + +static void +ota_task(void *_g) +{ + lws_ota_t *g = (lws_ota_t *)_g; + esp_err_t e; + uint32_t no; + + while (1) { + + xTaskNotifyWaitIndexed(0, 0, ULONG_MAX, &no, portMAX_DELAY); + + /* something to do */ + + g->async_r = LWSOTARET_ONGOING; + + switch (no) { + + case LWS_OTA_ASYNC_START: + pop.ep = esp_ota_get_next_update_partition(NULL); + + g->async_r = LWSOTARET_NOSLOT; + + if (pop.ep) { + e = esp_ota_begin(pop.ep, g->expected_size, + &pop.ota); + if (e == ESP_OK) + g->async_r = LWSOTARET_OK; + else + printf("esp_ota_begin: %d\n", (int)e); + } else + lwsl_err("%s: no next update part\n", __func__); + + g->async_completed = 1; + lws_cancel_service(g->cx); + break; + + case LWS_OTA_ASYNC_WRITE: + /* + * g->flow has compressed data we can use when we + * need it + */ + + g->async_r = LWSOTARET_FAILED; + e = esp_ota_write(pop.ota, g->buf, g->buf_len); + if (e == ESP_OK) + g->async_r = LWSOTARET_OK; + else + lwsl_cx_err(g->cx, "esp_ota_write: %d", (int)e); + + g->async_completed = 1; + lws_cancel_service(g->cx); + break; + + case LWS_OTA_ASYNC_ABORT: + case LWS_OTA_ASYNC_FINALIZE: + + g->async_r = LWSOTARET_FAILED; + if (no == LWS_OTA_ASYNC_ABORT) + e = esp_ota_abort(pop.ota); + else { + e = esp_ota_end(pop.ota); + if (e == ESP_OK) { + struct timeval tv; + + /* + * Mark that we want to boot into the + * updated firmware that we just + * installed + */ + + e = esp_ota_set_boot_partition(pop.ep); + + /* + * Set the latest fw unixtime to the new + * guy. Set the time we updated. + */ + + lws_settings_plat_printf(si, + "ota.fw_unixtime", "%llu", + (unsigned long long)g->unixtime); + + if (!gettimeofday(&tv, NULL)) + lws_settings_plat_printf(si, + "ota.upd_unixtime", "%llu", + (unsigned long long)tv.tv_sec); + } + } + if (e == ESP_OK) + g->async_r = LWSOTARET_OK; + else + lwsl_cx_err(g->cx, "esp_ota_end: %d", (int)e); + + g->async_completed = 1; + lws_cancel_service(g->cx); + + pop.th = NULL; + vTaskDelete(0); + + return; + } + } +} + +void +lws_plat_ota_queue(lws_ota_t *g, lws_ota_async_t a) +{ + g->async_last = a; + xTaskNotify(pop.th, a, eSetValueWithOverwrite); +} + +int +lws_plat_ota_start(lws_ota_t *g) +{ + g->op = (lws_ota_process_t)&pop; + + xTaskCreate(ota_task, "ota", 3072, g, tskIDLE_PRIORITY, &pop.th); + if (!pop.th) + return 1; + + lws_plat_ota_queue(g, LWS_OTA_ASYNC_START); + + return 0; +} + +int +lws_plat_ota_report_current(lws_ota_t *g, int bad) +{ + if (bad) + esp_ota_mark_app_invalid_rollback_and_reboot(); + else + esp_ota_mark_app_valid_cancel_rollback(); + + return LWSOTARET_OK; +} + +int +lws_plat_ota_get_last_fw_unixtime(uint64_t *fw_unixtime) +{ + uint8_t buf[20]; + size_t l = sizeof(buf); + + if (lws_settings_plat_get(si, "ota.fw_unixtime", buf, &l)) { + lwsl_notice("%s: not in settings\n", __func__); + return 1; + } + + *fw_unixtime = atoll((const char *)buf); + + return 0; +} diff --git a/lib/system/CMakeLists.txt b/lib/system/CMakeLists.txt index 0a1d93837..ce480f41c 100644 --- a/lib/system/CMakeLists.txt +++ b/lib/system/CMakeLists.txt @@ -55,6 +55,12 @@ if (LWS_WITH_NETWORK) system/dhcpclient/dhcpc4.c) endif() + if (LWS_WITH_OTA) + list(APPEND SOURCES + system/ota/ota.c) + endif() + + if (LWS_WITH_SYS_SMD) add_subdir_include_dirs(smd) endif() diff --git a/lib/system/ota/ota.c b/lib/system/ota/ota.c new file mode 100644 index 000000000..9353150d2 --- /dev/null +++ b/lib/system/ota/ota.c @@ -0,0 +1,735 @@ +/* + * libwebsockets - small server side websockets and web server implementation + * + * Copyright (C) 2010 - 2022 Andy Green + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Secure Streams / OTA + * + * In the interests of minimizing heap usage, OTA SS is only existing during + * update checks, update bulk data download, and OTA storage. Checks are + * initiated by cx->sul_ota_periodic which is triggered at OPERATIONAL and then + * periodically as set in system_ops->ota_ops->ota_periodic_check_secs. + */ + +#include "private-lib-core.h" + +static const char * const ota_pub_jwk = LWS_OTA_PUBLIC_JWK; +/* This is a string that is unique to the build type / application... we use + * it to make sure that we are updating to the same kind of build... */ +const char *lws_ota_variant = LWS_OTA_VARIANT; + +static void +ota_write_sul_cb(lws_sorted_usec_list_t *sul) +{ + lws_ota_t *g = lws_container_of(sul, lws_ota_t, sul_drain); + + /* we use this to retry entering modal */ + + if (g->state == LWSOS_AWAITING_MODAL) { + const lws_ota_ops_t *ota_ops = &g->cx->system_ops->ota_ops; + + /* + * Ask the user code to move to AWAITING_MODAL_UPDATING which it + * should agree to... and then MODAL_UPDATING whereit may choose + * to indicate it can't stop what it's doing right now. + */ + + lws_state_transition(&g->cx->mgr_system, + LWS_SYSTATE_AWAITING_MODAL_UPDATING); + lws_state_transition(&g->cx->mgr_system, + LWS_SYSTATE_MODAL_UPDATING); + + if (g->cx->mgr_system.state != LWS_SYSTATE_MODAL_UPDATING) { + + /* + * Something decided we can't do the update right now, eg, + * he's busy rendering something that would exhause the heap + * if we also tried to get on with the update. + * + * Let's try again in 1s, up to a timeout. + */ + + lwsl_ss_warn(g->ss, "Scheduling update mode retry"); + + lws_sul_schedule(g->cx, 0, &g->sul_drain, + ota_write_sul_cb, LWS_US_PER_SEC); + return; + } + + /* we can go ahead now, the system is in the update mode */ + + g->state = LWSOS_FETCHING; + + /* prep the gzip stream decompression */ + + g->inflate = lws_upng_inflator_create(&g->outring, + &g->outringlen, &g->opl, &g->cl); + if (!g->inflate) { + lwsl_err("%s: zlib init failed\n", __func__); + goto update_impossible; + } + + g->state = LWSOS_FETCHING_INITED_GZ; + + /* prep the hash computation of the decompressed data */ + + if (lws_genhash_init(&g->ctx, LWS_GENHASH_TYPE_SHA512)) { + lwsl_err("%s: hash init failed\n", __func__); + goto update_impossible; + } + + g->state = LWSOS_FETCHING_INITED_GZ_HASH; + + /* we don't want to create a dupe of ourselves while + * we're busy doing the OTA */ + lws_sul_cancel(&g->cx->sul_ota_periodic); + + lwsl_warn("%s: platform ota start\n", __func__); + /* continues asynchronously */ + if (ota_ops->ota_start(g)) { + lwsl_err("%s: ota_start failed\n", __func__); + goto update_impossible; + } + + return; + +update_impossible: + g->state = LWSOS_FAILED; + lws_ss_start_timeout(g->ss, 1); + + return; + } + + if (*((volatile lws_ota_async_t *)&g->async_last)) { + /* + * The task is busy, we can't start anything atm. When it + * is finished, the write completion will come back here. + */ + // lwsl_notice("%s: async_last busy\n", __func__); + return; + } + + /* + * We have a chance to write the next chunk... let's stage g->buf with + * as much inflated data as we can with what we have to hand, and set it + * writing + */ + + g->buf_len = 0; + while (g->buf_len < sizeof(g->buf) - 8 && + g->seen + g->buf_len < g->expected_size) { + lws_stateful_ret_t sr = 0; + size_t os, part; + + /* inflator pauses for WANT_OUTPUT after this many bytes out */ + g->inflate->bypl = sizeof(g->buf) - g->buf_len - 1; + + if (*g->opl == *g->cl) { + + /* No output pending.. do we have unused input left? */ + + if (g->flow.len) { + + /* + * There's some input already available, + * let's process that and see if it helped + */ + + sr = lws_upng_inflate_data(g->inflate, NULL, 0); + if (sr & LWS_SRET_FATAL) { + lwsl_ss_err(g->ss, "inflate error 1"); + + goto fail; + } + g->flow.len = g->inflate->inlen - (g->inflate->bp >> 3); + } + + if (*g->opl == *g->cl) { + + /* + * Still no output available... let's + * attempt to move to the next + */ + + lws_flow_req(&g->flow); + if (!g->flow.len) + break; + + sr = lws_upng_inflate_data(g->inflate, + g->flow.data, g->flow.len); + + g->flow.len = g->inflate->inlen - + (g->inflate->bp >> 3); + } + } /* there is already output pending */ + + if (sr & LWS_SRET_FATAL) { + lwsl_ss_err(g->ss, "inflate error %d", sr & 0xff); + + goto fail; + } + + os = ((*g->opl - g->old_op) % g->outringlen); + if (os > sizeof(g->buf) - g->buf_len) + os = sizeof(g->buf) - g->buf_len; + + if (!os) { + lwsl_err("%s: Nothing to compose in\n", __func__); + break; + } + + part = os; + if (*g->opl % g->outringlen < g->old_op) + part = g->outringlen - g->old_op; + + memcpy(g->buf + g->buf_len, g->outring + g->old_op, part); + g->buf_len += part; + if (part != os) { + memcpy(g->buf + g->buf_len, g->outring, os - part); + g->buf_len += os - part; + } + + g->old_op = *g->opl % g->outringlen; + *g->cl += os; + + } /* while try to fill the staging buffer */ + + if (!g->buf_len) + /* no ammo to work with... we will come back next time we + * get some rx */ + return; + + g->seen += g->buf_len; + if (g->seen > g->expected_size) { + lwsl_ss_err(g->ss, "oversize payload"); + + goto fail; + } + + /* let's track the hash as we get it */ + + if (lws_genhash_update(&g->ctx, g->buf, g->buf_len)) { + lwsl_ss_err(g->ss, "hash update failed"); + + goto fail; + } + + if (g->seen == g->expected_size) { + char temp[64]; + + lws_upng_inflator_destroy(&g->inflate); + lws_genhash_destroy(&g->ctx, temp); + + if (memcmp(temp, g->sha512, sizeof(temp))) { + lwsl_err("%s: payload hash differs\n", __func__); + + goto fail; + } + } + + g->cx->system_ops->ota_ops.ota_queue(g, LWS_OTA_ASYNC_WRITE); + + return; + +fail: + g->flow.state = LWSDLOFLOW_STATE_READ_FAILED; + lws_ss_cx_from_user(g)->system_ops->ota_ops.ota_queue(g, + LWS_OTA_ASYNC_ABORT); +} + +static void +ota_completion_start(lws_ota_t *g) +{ + if (g->async_r != LWSOTARET_OK) { + lwsl_ss_err(g->ss, "OTA START FAILED r %d", g->async_r); + + g->flow.state = LWSDLOFLOW_STATE_READ_FAILED; + lws_ss_cx_from_user(g)->system_ops->ota_ops.ota_queue(g, + LWS_OTA_ASYNC_ABORT); + return; + } + + /* we can start writing now */ + g->ota_start_done = 1; + g->state = LWSOS_STARTED; + + if (lws_ss_client_connect(lws_ss_from_user(g))) + lwsl_ss_warn(g->ss, "reconn failed"); + + lws_sul_schedule(g->cx, 0, &g->sul_drain, ota_write_sul_cb, 1); +} + +static void +ota_completion_write(lws_ota_t *g) +{ + const lws_ota_ops_t *ota_ops = &g->cx->system_ops->ota_ops; + uint8_t pc; + + if (g->async_r != LWSOTARET_OK) { + lwsl_ss_err(g->ss, "r %d", g->async_r); + + g->flow.state = LWSDLOFLOW_STATE_READ_FAILED; + lws_ss_cx_from_user(g)->system_ops->ota_ops.ota_queue(g, + LWS_OTA_ASYNC_ABORT); + return; + } + + g->written += g->buf_len; + + pc = (uint8_t)((g->written * 100) / g->expected_size); + if (pc != g->last_pc) { + g->last_pc = pc; + lwsl_notice("%s: %u%%\n", __func__, pc); + if (ota_ops->ota_progress) + g->cx->system_ops->ota_ops.ota_progress(LWSOTARET_PROGRESS, pc); + } + + if (g->written != g->expected_size) { + lws_sul_schedule(g->cx, 0, &g->sul_drain, ota_write_sul_cb, 1); + + return; + } + + /* We have completed writing the last part */ + + lwsl_warn("%s: finalizing good ota\n", __func__); + + g->cx->system_ops->ota_ops.ota_queue(g, LWS_OTA_ASYNC_FINALIZE); +} + +static void +ota_completion_finalize(lws_ota_t *g) +{ + lwsl_notice("%s: %d\n", __func__, g->async_r); + + if (g->async_r) + return; + + g->cx->system_ops->reboot(); +} + +static void +ota_completion_abort(lws_ota_t *g) +{ + int secs = 0; + + if (g->cx->system_ops && g->cx->system_ops->ota_ops.ota_periodic_check_secs) + secs = g->cx->system_ops->ota_ops.ota_periodic_check_secs; + + /* return from modal update state */ + lws_state_transition(&g->cx->mgr_system, LWS_SYSTATE_OPERATIONAL); + + /* we've had it */ + lws_ss_start_timeout(g->ss, 1); + + lws_sul_schedule(g->cx, 0, &g->cx->sul_ota_periodic, lws_ota_periodic_cb, + secs ? secs * LWS_US_PER_SEC : 24 * 3600 * LWS_US_PER_SEC); +} + + +static lws_ss_state_return_t +ota_rx(void *userobj, const uint8_t *in, size_t len, int flags) +{ + lws_ss_state_return_t r = LWSSSSRET_DISCONNECT_ME; + lws_ota_t *g = (lws_ota_t *)userobj; + const lws_ota_ops_t *ota_ops = &lws_ss_cx_from_user(g)->system_ops->ota_ops; + struct lws_jws_map map; + struct lws_jwk jwk; + uint64_t fw_last; + char temp[1024]; + int temp_len = sizeof(temp); + const char *p; + size_t alen; + int n; + + if (g->state >= LWSOS_FETCHING) { + + lwsl_info("%s: fetching %u, fl 0x%02X\n", __func__, (unsigned int)len, flags); + + /* + * We are decompressing, checking and flashing the image. + * + * g->flow and its buflist is managing COMPRESSED data from the + * network according to g->flow.window limit. Rx events are + * tiggered by tx credit manipulation from, and coming to + * service g->flow / buflist state ONLY and do not know or care + * about direct inflator state (it makes itself felt by using + * g->flow data in the write completion). + * + * The inflator may not need any g->flow data to produce output, + * or it may need all of it and more before it can produce + * output, or somewhere in the middle. At the output side, we + * have a fixed-size staging buffer so we may need to come back + * to issue more inflated data without any network event + * triggering it. + */ + + if (flags & LWSSS_FLAG_SOM) { + g->state = LWSOS_WRITING; + g->flow.state = LWSDLOFLOW_STATE_READ; + g->flow.h = g->ss; + g->flow.window = 4096; + if (ota_ops->ota_progress) + ota_ops->ota_progress(LWSOTARET_PROGRESS, 0); + } + + if (len && + lws_buflist_append_segment(&g->flow.bl, in, len) < 0) { + lwsl_ss_err(g->ss, "OOM"); + + goto fetch_fail; + } + + lws_sul_schedule(g->cx, 0, &g->sul_drain, ota_write_sul_cb, 1); + + if (flags & LWSSS_FLAG_EOM) + /* + * This was the last part, so there is no more new data + * in flight + */ + g->flow.state = (uint8_t)LWSDLOFLOW_STATE_READ_COMPLETED; + + return LWSSSSRET_OK; + +fetch_fail: + g->flow.state = LWSDLOFLOW_STATE_READ_FAILED; + + return LWSSSSRET_DISCONNECT_ME; + } + + /* we are collecting the manifest... */ + + if (g->pos + len > sizeof(g->buf)) + return LWSSSSRET_DISCONNECT_ME; + + memcpy(g->buf + g->pos, in, len); + g->pos += len; + + if ((flags & LWSSS_FLAG_EOM) != LWSSS_FLAG_EOM) + return LWSSSSRET_OK; + + /* we want to validate the JWS manifest against our public JWK */ + + if (lws_jwk_import(&jwk, NULL, NULL, ota_pub_jwk, strlen(ota_pub_jwk))) { + lwsl_err("%s: unable to import jwk\n", __func__); + return LWSSSSRET_DISCONNECT_ME; + } + + /* Step 1... is the JWS signed by the required key? */ + + if (lws_jws_sig_confirm_compact_b64(g->buf, g->pos, &map, &jwk, + lws_ss_cx_from_user(g), temp, + &temp_len)) { + lwsl_err("%s: manifest failed sig check\n", __func__); + goto bail; + } + + /* finished with the jwk */ + lws_jwk_destroy(&jwk); + + /* Step 2... the JOSE and payload sections are there, right? */ + + if (!map.buf[LJWS_JOSE] || !map.buf[LJWS_PYLD]) { + lwsl_err("%s: no JOSE block\n", __func__); + goto bail1; + } + + /* Step 3... do we agree the signing alg is secure enough? */ + + p = lws_json_simple_find(map.buf[LJWS_JOSE], map.len[LJWS_JOSE], + "\"alg\":", &alen); + if (!p) { + lwsl_err("%s: no alg\n", __func__); + goto bail1; + } + + if (strncmp("ES512", p, alen)) { + lwsl_err("%s: bad alg %.*s %d\n", __func__, (int)alen, p, (int)alen); + goto bail1; + } + + /* + * We trust that the manifest was robustly signed by the key we like, + * let's parse out the pieces we care about and validate the firmware is + * the same variant build as we're currently running, and, eg, we're not + * being given a validly-signed real firmware from the wrong variant, + * that will brick us. + */ + + lwsl_hexdump_notice(map.buf[LJWS_PYLD], map.len[LJWS_PYLD]); + + lwsl_notice("%s: JWS validated okay\n", __func__); + + p = lws_json_simple_find(map.buf[LJWS_PYLD], map.len[LJWS_PYLD], + "\"variant\":", &alen); + if (!p || strncmp(lws_ota_variant, p, alen)) { + lwsl_err("%s: wrong variant %.*s\n", __func__, (int)alen, p); + goto bail1; + } + + /* + * We liked the manifest, prepare to go again targeting the payload + * that the manifest described to us. + */ + + p = lws_json_simple_find(map.buf[LJWS_PYLD], map.len[LJWS_PYLD], + "\"path\":", &alen); + if (!p) { + lwsl_err("%s: no path\n", __func__); + goto bail1; + } + + lws_strnncpy(g->file, p, alen, sizeof(g->file)); + if (lws_ss_set_metadata(lws_ss_from_user(g), "file", g->file, alen)) { + lwsl_err("%s: failed to set firmware file %s\n", __func__, + LWS_OTA_VARIANT); + return LWSSSSRET_DISCONNECT_ME; + } + + p = lws_json_simple_find(map.buf[LJWS_PYLD], map.len[LJWS_PYLD], + "\"size\":", &alen); + if (!p) { + lwsl_err("%s: no size\n", __func__); + goto bail1; + } + g->expected_size = (size_t)atoll(p); + + p = lws_json_simple_find(map.buf[LJWS_PYLD], map.len[LJWS_PYLD], + "\"unixtime\":", &alen); + if (!p) { + lwsl_err("%s: no unxitime\n", __func__); + goto bail1; + } + g->unixtime = (uint64_t)atoll(p); + + p = lws_json_simple_find(map.buf[LJWS_PYLD], map.len[LJWS_PYLD], + "\"sha512\":", &alen); + if (!p) { + lwsl_err("%s: no hash\n", __func__); + goto bail1; + } + n = lws_hex_len_to_byte_array(p, alen, g->sha512, sizeof(g->sha512)); + if (n != sizeof(g->sha512)) { + lwsl_err("%s: bad hash %d %u %s\n", __func__, n, (unsigned int)alen, p); + goto bail1; + } + + /* + * So... is it newer? + */ + + if (!ota_ops->ota_get_last_fw_unixtime(&fw_last) && + g->unixtime <= fw_last) { + + /* + * We don't actually want this... + */ + + lwsl_ss_warn(g->ss, "Latest update is not newer"); + + return LWSSSSRET_DISCONNECT_ME; + } + + /* ... this is something that we like the look of... schedule trying + * to enter LWS_SYSTATE_MODAL_UPDATING state after this, and retry if + * we don't get there immediately */ + + g->state = LWSOS_AWAITING_MODAL; + lws_sul_schedule(g->cx, 0, &g->sul_drain, ota_write_sul_cb, 1); + /* on the other hand, don't let it keep trying forever */ + lws_ss_start_timeout(g->ss, 30000); + + /* + * We will DISCONNECT shortly, we won't proceed to the update image + * download unless we can agree with the user code to enter MODAL_ + * UPDATING within a timeout. Otherwise we will give up and retry + * after 24h or whatever. + */ + + return LWSSSSRET_OK; + +bail: + lws_jwk_destroy(&jwk); + +bail1: + return r; +} + +static lws_ss_state_return_t +ota_state(void *userobj, void *h_src, lws_ss_constate_t state, + lws_ss_tx_ordinal_t ack) +{ + lws_ota_t *g = (lws_ota_t *)userobj; + int n; + + switch ((int)state) { + case LWSSSCS_CREATING: /* start the transaction as soon as we exist */ + + g->cx = lws_ss_cx_from_user(g); + g->cx->ota_ss = g->ss; + g->state = LWSOS_CHECKING; + + if (lws_ss_set_metadata(lws_ss_from_user(g), + "ota_variant", LWS_OTA_VARIANT, + strlen(LWS_OTA_VARIANT))) { + lwsl_err("%s: failed to set ota_variant %s\n", __func__, + LWS_OTA_VARIANT); + return LWSSSSRET_DISCONNECT_ME; + } + + if (lws_ss_set_metadata(lws_ss_from_user(g), + "file", "manifest.jws", 12)) { + lwsl_err("%s: failed to set ota_variant %s\n", __func__, + LWS_OTA_VARIANT); + return LWSSSSRET_DISCONNECT_ME; + } + + return lws_ss_client_connect(lws_ss_from_user(g)); + + case LWSSSCS_DISCONNECTED: + + /* + * We have two kinds of connection that may disconnect, the + * manifest fetch, and the firmware fetch. + */ + + switch (g->state) { + case LWSOS_FETCHING_INITED_GZ_HASH: + case LWSOS_FETCHING: + return LWSSSSRET_OK; + + case LWSOS_WRITING: + /* + * The network part of fetching the update image is + * over. If it didn't fail, we need to stick around and + * let it either finish / writing and finalizing, or + * timeout. + */ + lwsl_notice("%s: draining\n", __func__); + + lws_ss_start_timeout(g->ss, 45000); + + return LWSSSSRET_OK; + + case LWSOS_AWAITING_MODAL: + /* + * We might have to wait a bit to find a good moment to + * enter the update mode. If we disconnect + * inbetweentimes, it's OK. + */ + return LWSSSSRET_OK; + + default: + lwsl_notice("%s: state %d, DESTROYING\n", __func__, g->state); + + return LWSSSSRET_DESTROY_ME; + } + + case LWSSSCS_DESTROYING: + + /* we only live for one ota check / fetch */ + lws_ss_cx_from_user(g)->ota_ss = NULL; + lws_buflist_destroy_all_segments(&g->flow.bl); + lws_sul_cancel(&g->sul_drain); + if (g->state == LWSOS_FETCHING_INITED_GZ_HASH) + lws_genhash_destroy(&g->ctx, NULL); + if (g->state >= LWSOS_FETCHING_INITED_GZ && + g->state < LWSOS_FINALIZING) + lws_upng_inflator_destroy(&g->inflate); + + return LWSSSSRET_OK; + + case LWSSSCS_TIMEOUT: + lwsl_err("%s: timeout\n", __func__); + + return LWSSSSRET_DESTROY_ME; + + case LWSSSCS_EVENT_WAIT_CANCELLED: + /* We may have a completion */ + if (g->async_completed) { + g->async_completed = 0; + n = g->async_last; + *((volatile lws_ota_async_t *)&g->async_last) = 0; + + switch (n) { + case LWS_OTA_ASYNC_START: + ota_completion_start(g); + break; + case LWS_OTA_ASYNC_WRITE: + ota_completion_write(g); + break; + + /* EVENT_WAIT_CANCELLED doesn't deal with returns */ + + case LWS_OTA_ASYNC_ABORT: + /* let's forget about it then */ + lws_ss_start_timeout(g->ss, 1); + ota_completion_abort(g); + break; + + case LWS_OTA_ASYNC_FINALIZE: + lws_ss_start_timeout(g->ss, 5000); + ota_completion_finalize(g); + break; + } + } + break; + } + + return LWSSSSRET_OK; +} + +static LWS_SS_INFO("ota", lws_ota_t) + .rx = ota_rx, + .state = ota_state, + .manual_initial_tx_credit = sizeof(((lws_ota_t *)NULL)->buf), +}; + +/* + * Creates the SS and kicks off the manifest check + */ + +void +lws_ota_periodic_cb(lws_sorted_usec_list_t *sul) +{ + struct lws_context *cx = lws_container_of(sul, struct lws_context, + sul_ota_periodic); + int secs = 0; + + if (cx->system_ops && cx->system_ops->ota_ops.ota_periodic_check_secs) + secs = cx->system_ops->ota_ops.ota_periodic_check_secs; + + lwsl_notice("%s\n", __func__); + + if (lws_ss_create(cx, 0, &ssi_lws_ota_t, NULL, NULL, NULL, NULL)) + lwsl_cx_warn(cx, "failed to create ota SS"); + + /* set up coming back again at (usually long) periods */ + + lws_sul_schedule(cx, 0, sul, lws_ota_periodic_cb, + secs ? secs * LWS_US_PER_SEC : 24 * 3600 * LWS_US_PER_SEC); +} + +const char * +lws_ota_variant_name(void) +{ + return lws_ota_variant; +}