From 28ccfd43a254399945d3337fac2ac40466014287 Mon Sep 17 00:00:00 2001 From: Joachim Wiberg Date: Tue, 12 Nov 2024 06:05:31 +0100 Subject: [PATCH 01/21] confd: add validation of min/max interface name length A sane interface name is at least two characters long, and in Linux the interface name (using ip link) is at most 15 characthers long. Signed-off-by: Joachim Wiberg --- src/confd/yang/confd.inc | 2 +- src/confd/yang/containers.inc | 2 +- src/confd/yang/infix-interfaces.yang | 13 +++++++++++++ ...-10-28.yang => infix-interfaces@2024-11-12.yang} | 0 4 files changed, 15 insertions(+), 2 deletions(-) rename src/confd/yang/{infix-interfaces@2024-10-28.yang => infix-interfaces@2024-11-12.yang} (100%) diff --git a/src/confd/yang/confd.inc b/src/confd/yang/confd.inc index ea4fe93b5..cb7eee3da 100644 --- a/src/confd/yang/confd.inc +++ b/src/confd/yang/confd.inc @@ -37,7 +37,7 @@ MODULES=( "ieee802-ethernet-interface@2019-06-21.yang" "infix-ethernet-interface@2024-02-27.yang" "infix-factory-default@2023-06-28.yang" - "infix-interfaces@2024-10-28.yang -e vlan-filtering" + "infix-interfaces@2024-11-12.yang -e vlan-filtering" # from rousette "ietf-restconf@2017-01-26.yang" diff --git a/src/confd/yang/containers.inc b/src/confd/yang/containers.inc index af72dc83f..c143090d9 100644 --- a/src/confd/yang/containers.inc +++ b/src/confd/yang/containers.inc @@ -1,6 +1,6 @@ # -*- sh -*- # REMEMBER TO UPDATE infix-interfaces ALSO IN confd.inc MODULES=( - "infix-interfaces@2024-10-28.yang -e vlan-filtering -e containers" + "infix-interfaces@2024-11-12.yang -e vlan-filtering -e containers" "infix-containers@2024-10-14.yang" ) diff --git a/src/confd/yang/infix-interfaces.yang b/src/confd/yang/infix-interfaces.yang index b5c0bbd60..1afb2721c 100644 --- a/src/confd/yang/infix-interfaces.yang +++ b/src/confd/yang/infix-interfaces.yang @@ -23,6 +23,11 @@ module infix-interfaces { contact "kernelkit@googlegroups.com"; description "Linux bridge and lag extensions for ietf-interfaces."; + revision 2024-11-12 { + description "Limit name 2-15 chars, Linux limitation."; + reference "internal"; + } + revision 2024-10-28 { description "Limit description to 64 chars, matching IF-MIB max."; reference "internal"; @@ -89,6 +94,14 @@ module infix-interfaces { } } + deviation "/if:interfaces/if:interface/if:name" { + deviate replace { + type string { + length "2..15"; + } + } + } + deviation "/if:interfaces/if:interface/if:description" { deviate replace { type string { diff --git a/src/confd/yang/infix-interfaces@2024-10-28.yang b/src/confd/yang/infix-interfaces@2024-11-12.yang similarity index 100% rename from src/confd/yang/infix-interfaces@2024-10-28.yang rename to src/confd/yang/infix-interfaces@2024-11-12.yang From 20927ce573d94ef64d9cbd565b52635a6b055fd8 Mon Sep 17 00:00:00 2001 From: Joachim Wiberg Date: Wed, 13 Nov 2024 13:34:00 +0100 Subject: [PATCH 02/21] board/common: ensure containers are handled as sysv scripts Signed-off-by: Joachim Wiberg --- board/common/rootfs/etc/finit.d/available/container@.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/board/common/rootfs/etc/finit.d/available/container@.conf b/board/common/rootfs/etc/finit.d/available/container@.conf index 20c3570a1..81fd0cf61 100644 --- a/board/common/rootfs/etc/finit.d/available/container@.conf +++ b/board/common/rootfs/etc/finit.d/available/container@.conf @@ -1,4 +1,4 @@ service :%i pid:!/run/k8s-logger-%i.pid \ [2345] k8s-logger -cni %i -f local1 /run/containers/%i.fifo -- Logger for container %i -sysv :%i pid:!/run/container:%i.pid log kill:10 \ +sysv :%i pid:!/run/container:%i.pid log kill:10 \ [2345] container -n %i -- container %i From 282ed50a5475e9876e83b791083a2e60c33bf615 Mon Sep 17 00:00:00 2001 From: Joachim Wiberg Date: Thu, 14 Nov 2024 19:59:58 +0100 Subject: [PATCH 03/21] confd: major behavior change, run container --read-only always Container support in Infix was released with v24.02, so this change may unfortunately break a few use-cases out there. Regrettable as this is, the default behavior, including how containers are started after boot, break other use-cases that were considered more important. As of this commit: - all containers in Infix run in read-only mode, use volumes and mounts for persistence across reboot/stop/start/upgrade - all containers are now "recreated" at boot or related config changes, this ensures an OCI image embedded in the Infix image, /lib/oci/, is always used as the base for a running container Fixes #823 Signed-off-by: Joachim Wiberg --- board/common/rootfs/usr/sbin/container | 13 ++--- package/execd/execd.conf | 2 +- package/execd/tmpfiles.conf | 2 +- src/confd/src/infix-containers.c | 50 ++++++------------- src/confd/yang/infix-containers.yang | 6 +++ ....yang => infix-containers@2024-11-12.yang} | 0 6 files changed, 28 insertions(+), 45 deletions(-) rename src/confd/yang/{infix-containers@2024-10-14.yang => infix-containers@2024-11-12.yang} (100%) diff --git a/board/common/rootfs/usr/sbin/container b/board/common/rootfs/usr/sbin/container index e17a4f537..8fa8f6ebf 100755 --- a/board/common/rootfs/usr/sbin/container +++ b/board/common/rootfs/usr/sbin/container @@ -108,9 +108,9 @@ create() logging="--log-driver k8s-file --log-opt path=/run/containers/$name.fifo" fi - args="$args --replace --quiet --cgroup-parent=containers $caps" + args="$args --read-only --replace --quiet --cgroup-parent=containers $caps" args="$args --restart=$restart --systemd=false --tz=local $privileged" - args="$args $ro $vol $mount $hostname $entrypoint $env $port $logging" + args="$args $vol $mount $hostname $entrypoint $env $port $logging" pidfn=/run/container:${name}.pid [ -n "$quiet" ] || log "---------------------------------------" @@ -136,8 +136,7 @@ create() # shellcheck disable=SC2048 log "Calling podman create --name $name --conmon-pidfile=$pidfn $args $image $*" if podman create --name "$name" --conmon-pidfile="$pidfn" $args "$image" $*; then - [ -n "$quiet" ] || log "Successfully created container $name from $image" - rm -f "/run/containers/env/${name}.env" + [ -n "$quiet" ] || log "Successfully created container $name from $image" [ -n "$manual" ] || start "$name" exit 0 fi @@ -267,7 +266,6 @@ options: Syntax: [[ip:][hostPort]:]containerPort[/protocol] -q, --quiet Quiet operation, called from confd -r, --restart POLICY One of "no", "always", or "on-failure:NUM" - --read-only Do not create a writable layer -s, --simple Show output in simplified format -v, --volume NAME:PATH Create named volume mounted inside container on PATH @@ -386,9 +384,6 @@ while [ "$1" != "" ]; do shift restart=$1 ;; - --read-only) - ro="--read-only=true" - ;; -s | --simple) simple=true ;; @@ -627,7 +622,7 @@ case $cmd in ;; upgrade) # Start script used to initially create container - script=/var/lib/containers/active/S01-${1}.sh + script=/run/containers/active/S01-${1}.sh # Find container image img=$(podman inspect "$1" | jq -r .[].ImageName) diff --git a/package/execd/execd.conf b/package/execd/execd.conf index 54978b51b..1c4b5625a 100644 --- a/package/execd/execd.conf +++ b/package/execd/execd.conf @@ -1,2 +1,2 @@ service log:prio:local1.err,tag:container \ - [2345] execd /run/containers/queue /var/lib/containers/active -- Container job runner + [2345] execd /run/containers/queue /run/containers/active -- Container job runner diff --git a/package/execd/tmpfiles.conf b/package/execd/tmpfiles.conf index f281f81ed..6ee94af22 100644 --- a/package/execd/tmpfiles.conf +++ b/package/execd/tmpfiles.conf @@ -1,8 +1,8 @@ d /run/containers/args 0700 - - d /run/containers/files 0700 - - -d /var/lib/containers/active 0700 - - d /var/lib/containers/oci 0755 - - d /run/containers/inbox 0700 - - d /run/containers/queue 0700 - - +d /run/containers/active 0700 - - d /run/cni 0755 - - L+ /var/lib/cni - - - - /run/cni diff --git a/src/confd/src/infix-containers.c b/src/confd/src/infix-containers.c index 492e7220a..d6b856923 100644 --- a/src/confd/src/infix-containers.c +++ b/src/confd/src/infix-containers.c @@ -18,7 +18,7 @@ #define CFG_XPATH "/infix-containers:containers" #define INBOX_QUEUE "/run/containers/inbox" #define JOB_QUEUE "/run/containers/queue" -#define ACTIVE_QUEUE "/var/lib/containers/active" +#define ACTIVE_QUEUE "/run/containers/active" #define LOGGER "logger -t container -p local1.notice" @@ -56,9 +56,6 @@ static int add(const char *name, struct lyd_node *cif) fprintf(fp, " --hostname %s", buf); } - if (lydx_is_enabled(cif, "read-only")) - fprintf(fp, " --read-only"); - if (lydx_is_enabled(cif, "privileged")) fprintf(fp, " --privileged"); @@ -392,15 +389,7 @@ static void cleanup(sr_session_ctx_t *session, struct confd *confd) /* * Containers depend on a lot of other system resources being properly * set up, e.g., networking, which is run by dagger. So we need to wait - * for all that before we can launch new, or modified, containers. The - * latter is the tricky part. - * - * By default, containers get a writable layer which is preserved across - * restarts/reboots of container or host -- provided we don't recreate - * them on a reboot. Hence the cmp magic below: we check if the command - * to create a container is the same as what is already activated, if it - * is already activated we know 'podman create' has done its thing and - * we can safely start the container. + * for all that before we can launch new, or modified, containers. */ void infix_containers_post_hook(sr_session_ctx_t *session, struct confd *confd) { @@ -416,33 +405,26 @@ void infix_containers_post_hook(sr_session_ctx_t *session, struct confd *confd) } while ((d = readdir(dir))) { - char curr[strlen(ACTIVE_QUEUE) + strlen(d->d_name) + 2]; char next[strlen(INBOX_QUEUE) + strlen(d->d_name) + 2]; + char name[strlen(d->d_name) + 1]; + char *ptr; if (d->d_name[0] == '.') continue; - snprintf(curr, sizeof(curr), "%s/%s", ACTIVE_QUEUE, d->d_name); snprintf(next, sizeof(next), "%s/%s", INBOX_QUEUE, d->d_name); - if (!systemf("cmp %s %s >/dev/null 2>&1", curr, next)) { - char name[strlen(d->d_name) + 1]; - char *ptr; - - strlcpy(name, d->d_name, sizeof(name)); - ptr = strstr(name, ".sh"); - if (ptr) { - char *nm = NULL; - - *ptr = 0; - if (!strncmp(name, "S01-", 4)) - nm = &name[4]; - - /* New job is already active, no changes, skipping ... */ - if (nm && !is_manual(session, nm)) - systemf("initctl -bnq cond set container:%s", nm); - } - remove(next); - continue; + + strlcpy(name, d->d_name, sizeof(name)); + ptr = strstr(name, ".sh"); + if (ptr) { + char *nm = NULL; + + *ptr = 0; + if (!strncmp(name, "S01-", 4)) + nm = &name[4]; + + if (nm && !is_manual(session, nm)) + systemf("initctl -bnq cond set container:%s", nm); } if (movefile(next, JOB_QUEUE)) diff --git a/src/confd/yang/infix-containers.yang b/src/confd/yang/infix-containers.yang index beb866017..31b3eb5ee 100644 --- a/src/confd/yang/infix-containers.yang +++ b/src/confd/yang/infix-containers.yang @@ -26,6 +26,11 @@ module infix-containers { prefix infix-sys; } + revision 2024-11-12 { + description "Deprecate read-only, it is now always true."; + reference "internal"; + } + revision 2024-10-14 { description "Two major changes: - Allow changing name of host interfaces inside container @@ -256,6 +261,7 @@ module infix-containers { } leaf read-only { + status deprecated; // This is now the default, setting kept only to not break configs description "Create a read-only container. Use volumes for writable directories."; type boolean; } diff --git a/src/confd/yang/infix-containers@2024-10-14.yang b/src/confd/yang/infix-containers@2024-11-12.yang similarity index 100% rename from src/confd/yang/infix-containers@2024-10-14.yang rename to src/confd/yang/infix-containers@2024-11-12.yang From 4a4f7e6171fae6d3621ecdd3c2a0370d2efadc9a Mon Sep 17 00:00:00 2001 From: Joachim Wiberg Date: Mon, 18 Nov 2024 09:31:28 +0100 Subject: [PATCH 04/21] board/common: clean up stale files in /var/tmp/frr on boot On unclean shutdowns Frr leaves a lot of per-thread message buffers in /var/tmp/frr/[-]./* See https://docs.frrouting.org/en/latest/setup.html Signed-off-by: Joachim Wiberg --- board/common/rootfs/usr/lib/tmpfiles.d/frr.conf | 1 + 1 file changed, 1 insertion(+) diff --git a/board/common/rootfs/usr/lib/tmpfiles.d/frr.conf b/board/common/rootfs/usr/lib/tmpfiles.d/frr.conf index b7dc8851d..bad32a39a 100644 --- a/board/common/rootfs/usr/lib/tmpfiles.d/frr.conf +++ b/board/common/rootfs/usr/lib/tmpfiles.d/frr.conf @@ -1 +1,2 @@ d /var/run/frr 0755 frr frr - +R /var/tmp/frr - - - - From 06520afd309c5b097c8b191602dfba735fd7d06f Mon Sep 17 00:00:00 2001 From: Joachim Wiberg Date: Mon, 18 Nov 2024 09:36:55 +0100 Subject: [PATCH 05/21] board/common: revert parts of 8353963 Running 'shred' on files stored on eMMC is pointless since the writes are spread out over other sectors rather than overwriting the content of the files as it was supposed to on old rotating media. Signed-off-by: Joachim Wiberg --- board/common/rootfs/usr/libexec/infix/mnt | 5 ----- 1 file changed, 5 deletions(-) diff --git a/board/common/rootfs/usr/libexec/infix/mnt b/board/common/rootfs/usr/libexec/infix/mnt index c0409e145..aafee06fb 100755 --- a/board/common/rootfs/usr/libexec/infix/mnt +++ b/board/common/rootfs/usr/libexec/infix/mnt @@ -45,11 +45,6 @@ factory_reset() find /sys/class/leds/ -type l -exec sh -c 'echo 100 > $0/brightness' {} \; logger $opt -p user.crit -t "$nm" "Resetting to factory defaults." - # Shred all files to prevent restoring contents - find /mnt/cfg -type f -exec shred -zu {} \; - find /mnt/var -type f -exec shred -zu {} \; - - # Remove any lingering directories and symlinks as well rm -rf /mnt/cfg/* /mnt/var/* logger $opt -p user.crit -t "$nm" "Factory reset complete." From 6d1bf545fe3590a2d9992445c90e79b205b04550 Mon Sep 17 00:00:00 2001 From: Joachim Wiberg Date: Fri, 15 Nov 2024 06:29:02 +0100 Subject: [PATCH 06/21] package/execd: clean up any lingering containers at boot To be able to handle container restarts, incl. restart policy, at runtime, most of the container data lives in /var/lib/containers, which on most systems is backed by a persistent store. As of issue #823 we no longer keep a writable layer for containers, nor should we cache container state across reboots, all containers are recreated at boot. This task cleans up any lingering state. Signed-off-by: Joachim Wiberg --- board/common/rootfs/usr/sbin/container | 5 +++++ package/execd/execd.conf | 2 ++ 2 files changed, 7 insertions(+) diff --git a/board/common/rootfs/usr/sbin/container b/board/common/rootfs/usr/sbin/container index 8fa8f6ebf..bd3eebb23 100755 --- a/board/common/rootfs/usr/sbin/container +++ b/board/common/rootfs/usr/sbin/container @@ -273,6 +273,7 @@ commands: create NAME IMAGE NET Create container NAME using IMAGE with networks NET delete [network] NAME Remove container NAME or network NAME from all containers exec NAME CMD Run a command inside a container + flush Clean up lingering containers and associated anonymous volumes find [ifname PID] Find PID of container where '--net IFNAME' currently lives or, find the name of our IFNAME inside the container @PID help Show this help text @@ -425,6 +426,10 @@ case $cmd in exec) podman exec -it "$@" ;; + flush) + echo "Cleaning up any lingering containers"; + podman rm -av + ;; find) cmd=$1 pid=$2 diff --git a/package/execd/execd.conf b/package/execd/execd.conf index 1c4b5625a..1a8da3b82 100644 --- a/package/execd/execd.conf +++ b/package/execd/execd.conf @@ -1,2 +1,4 @@ +task log:prio:local1.notice,tag:container-flush \ + [S] container flush -- Cleaning up lingering containers service log:prio:local1.err,tag:container \ [2345] execd /run/containers/queue /run/containers/active -- Container job runner From 7be17d452b0327c45524ba8fdadf872dea43625f Mon Sep 17 00:00:00 2001 From: Joachim Wiberg Date: Fri, 15 Nov 2024 08:51:48 +0100 Subject: [PATCH 07/21] confd: refactor and refine container creation - Reduce the amount of queues: 3 -> 1 - Simplify post hook - Refine execd The resulting simplification of infix_containers_post_hook(), and touching execd, also ensure container environment variable changes are propagated. Fixes #822 Signed-off-by: Joachim Wiberg --- board/common/rootfs/usr/sbin/container | 2 +- package/execd/execd.conf | 2 +- package/execd/tmpfiles.conf | 1 - src/confd/src/infix-containers.c | 171 +++++++++---------------- src/execd/execd.c | 60 ++++----- 5 files changed, 84 insertions(+), 152 deletions(-) diff --git a/board/common/rootfs/usr/sbin/container b/board/common/rootfs/usr/sbin/container index bd3eebb23..79e505045 100755 --- a/board/common/rootfs/usr/sbin/container +++ b/board/common/rootfs/usr/sbin/container @@ -627,7 +627,7 @@ case $cmd in ;; upgrade) # Start script used to initially create container - script=/run/containers/active/S01-${1}.sh + script=/run/containers/${1}.sh # Find container image img=$(podman inspect "$1" | jq -r .[].ImageName) diff --git a/package/execd/execd.conf b/package/execd/execd.conf index 1a8da3b82..63f1b501b 100644 --- a/package/execd/execd.conf +++ b/package/execd/execd.conf @@ -1,4 +1,4 @@ task log:prio:local1.notice,tag:container-flush \ [S] container flush -- Cleaning up lingering containers service log:prio:local1.err,tag:container \ - [2345] execd /run/containers/queue /run/containers/active -- Container job runner + [2345] execd -l debug /run/containers/queue -- Container job runner diff --git a/package/execd/tmpfiles.conf b/package/execd/tmpfiles.conf index 6ee94af22..0d97bce62 100644 --- a/package/execd/tmpfiles.conf +++ b/package/execd/tmpfiles.conf @@ -3,6 +3,5 @@ d /run/containers/files 0700 - - d /var/lib/containers/oci 0755 - - d /run/containers/inbox 0700 - - d /run/containers/queue 0700 - - -d /run/containers/active 0700 - - d /run/cni 0755 - - L+ /var/lib/cni - - - - /run/cni diff --git a/src/confd/src/infix-containers.c b/src/confd/src/infix-containers.c index d6b856923..5db409fd1 100644 --- a/src/confd/src/infix-containers.c +++ b/src/confd/src/infix-containers.c @@ -13,13 +13,16 @@ #include #include "core.h" + #define ARPING_MSEC 1000 +#define LOGGER "logger -t container -p local1.notice" + #define MODULE "infix-containers" #define CFG_XPATH "/infix-containers:containers" -#define INBOX_QUEUE "/run/containers/inbox" -#define JOB_QUEUE "/run/containers/queue" -#define ACTIVE_QUEUE "/run/containers/active" -#define LOGGER "logger -t container -p local1.notice" + +#define _PATH_CONT "/run/containers" +#define _PATH_INBOX _PATH_CONT "/INBOX" +#define _PATH_QUEUE _PATH_CONT "/queue" static int add(const char *name, struct lyd_node *cif) @@ -27,11 +30,13 @@ static int add(const char *name, struct lyd_node *cif) const char *image = lydx_get_cattr(cif, "image"); const char *restart_policy, *string; struct lyd_node *node, *nets, *caps; + char script[strlen(name) + 5]; FILE *fp, *ap; - fp = fopenf("w", "%s/S01-%s.sh", INBOX_QUEUE, name); + snprintf(script, sizeof(script), "%s.sh", name); + fp = fopenf("w", "%s/%s", _PATH_CONT, script); if (!fp) { - ERRNO("Failed adding job S01-%s.sh to job queue" INBOX_QUEUE, name); + ERRNO("Failed creating container script %s/%s", _PATH_CONT, script); return SR_ERR_SYS; } @@ -176,37 +181,45 @@ static int add(const char *name, struct lyd_node *cif) fprintf(fp, " %s", string); fprintf(fp, "\n"); + + if (lydx_is_enabled(cif, "manual")) + fprintf(fp, "initctl -bnq cond set container:%s\n", name); + fchmod(fileno(fp), 0700); fclose(fp); systemf("initctl -bnq enable container@%s.conf", name); + /* + * All start scripts must wait for the rest of confd to complete + * before being enqueued to execd, so we postpone it using this + * "inbox" to the post hook. + */ + writesf(script, "a", "%s", _PATH_INBOX); + return 0; } static int del(const char *name) { - const char *queue[] = { - JOB_QUEUE, - INBOX_QUEUE, - ACTIVE_QUEUE, - }; + char fn[strlen(_PATH_QUEUE) + strlen(name) + 10]; FILE *fp; /* Remove any pending download/create job first */ - for (size_t i = 0; i < NELEMS(queue); i++) { - char fn[strlen(queue[i]) + strlen(name) + 5]; + snprintf(fn, sizeof(fn), "%s/S01-%s.sh", _PATH_QUEUE, name); + erase(fn); - snprintf(fn, sizeof(fn), "%s/%s.sh", queue[i], name); - erase(fn); - } + /* Remove container script itself */ + snprintf(fn, sizeof(fn), "%s/%s.sh", _PATH_CONT, name); + erase(fn); /* Disable service and schedule for deletion. */ systemf("initctl -bnq disable container@%s.conf", name); - fp = fopenf("w", "%s/K01-%s.sh", INBOX_QUEUE, name); + snprintf(fn, sizeof(fn), "%s/K01-%s.sh", _PATH_CONT, name); + fp = fopen(fn, "w"); if (!fp) { - ERRNO("Failed adding job 00-delete-%s.sh to job queue" INBOX_QUEUE, name); + ERRNO("Failed creating container stop script %s", fn); return SR_ERR_SYS; } @@ -215,6 +228,9 @@ static int del(const char *name) fchmod(fileno(fp), 0700); fclose(fp); + /* Enqueue kill job immediately on execd */ + movefile(fn, _PATH_QUEUE); + return SR_ERR_OK; } @@ -330,109 +346,42 @@ static int oci_load(sr_session_ctx_t *session, uint32_t sub_id, const char *xpat return SR_ERR_OK; } -static int is_active(sr_session_ctx_t *session, const char *name) -{ - return srx_enabled(session, CFG_XPATH "/container[name='%s']/enabled", name); -} - -static int is_manual(sr_session_ctx_t *session, const char *name) -{ - return srx_enabled(session, CFG_XPATH "/container[name='%s']/manual", name); -} - -/* - * When container configurations are not saved to startup-config and the - * user reboot the system (or lose power) we will have lingering active - * containers cached on persistent storage. - * - * This function runs every time a configuration is applied to clean up - * any lingering active jobs to prevent false matches in the cmp magic - * in the below post-hook. - */ -static void cleanup(sr_session_ctx_t *session, struct confd *confd) -{ - struct dirent *d; - DIR *dir; - - dir = opendir(ACTIVE_QUEUE); - if (!dir) - return; - - while ((d = readdir(dir))) { - char name[strlen(ACTIVE_QUEUE) + strlen(d->d_name) + 2]; - char *ptr; - - if (d->d_name[0] == '.') - continue; - - strlcpy(name, d->d_name, sizeof(name)); - ptr = strstr(name, ".sh"); - if (!ptr) - continue; /* odd, non-script file? */ - *ptr = 0; - - if (strncmp(name, "S01-", 4)) - continue; /* odd, not start script? */ - - if (is_active(session, &name[4])) - continue; - - /* Not found in running-config, remove stale cache. */ - snprintf(name, sizeof(name), "%s/%s", ACTIVE_QUEUE, d->d_name); - if (erase(name)) - ERRNO("Failed removing stale container job %s", name); - } - - closedir(dir); -} - /* * Containers depend on a lot of other system resources being properly * set up, e.g., networking, which is run by dagger. So we need to wait - * for all that before we can launch new, or modified, containers. + * for all that before we can launch new, or modified, containers. This + * post hook runs as (one of) the last actions on a config change/boot. */ void infix_containers_post_hook(sr_session_ctx_t *session, struct confd *confd) { - struct dirent *d; - DIR *dir; - - cleanup(session, confd); + char script[256]; + FILE *fp; - dir = opendir(INBOX_QUEUE); - if (!dir) { - ERROR("Cannot open %s to launch scripts.", INBOX_QUEUE); - return; + fp = fopen(_PATH_INBOX, "r"); + if (!fp) + return; /* nothing to do today */ + + while (fgets(script, sizeof(script), fp)) { + char link[strlen(_PATH_QUEUE) + strlen(script) + 10]; + char path[strlen(script) + 10]; + + chomp(script); + + /* + * Enqueue start job on execd, use a symlink since we + * want to be able to reuse the script for manual image + * uprgade (and debugging) purposes. + */ + snprintf(link, sizeof(link), "%s/S01-%s", _PATH_QUEUE, script); + snprintf(path, sizeof(path), "../%s", script); + if (symlink(path, link) && errno != EEXIST) + ERRNO("Creating symlink %s -> %s", link, path); } - while ((d = readdir(dir))) { - char next[strlen(INBOX_QUEUE) + strlen(d->d_name) + 2]; - char name[strlen(d->d_name) + 1]; - char *ptr; - - if (d->d_name[0] == '.') - continue; - - snprintf(next, sizeof(next), "%s/%s", INBOX_QUEUE, d->d_name); - - strlcpy(name, d->d_name, sizeof(name)); - ptr = strstr(name, ".sh"); - if (ptr) { - char *nm = NULL; - - *ptr = 0; - if (!strncmp(name, "S01-", 4)) - nm = &name[4]; - - if (nm && !is_manual(session, nm)) - systemf("initctl -bnq cond set container:%s", nm); - } - - if (movefile(next, JOB_QUEUE)) - ERRNO("Failed moving %s to job queue %s", next, JOB_QUEUE); - } + fclose(fp); + erase(_PATH_INBOX); - closedir(dir); - systemf("container volume prune -f >/dev/null 2>&1"); + systemf("initctl -bnq touch execd"); } int infix_containers_init(struct confd *confd) diff --git a/src/execd/execd.c b/src/execd/execd.c index 03a23f36e..1c7c03a95 100644 --- a/src/execd/execd.c +++ b/src/execd/execd.c @@ -30,9 +30,8 @@ static int logmask = LOG_UPTO(LOG_NOTICE); static char buffer[BUFSIZ]; -static char *done; -static void run_job(const char *path, char *file, int archive) +static void run_job(const char *path, char *file) { char cmd[strlen(path) + strlen(file) + 2]; int rc; @@ -57,11 +56,7 @@ static void run_job(const char *path, char *file, int archive) return; } - dbg("job %s in %s done %p, archive: %d", file, path, done, archive); - if (done && archive) - movefile(cmd, done); - else - erase(cmd); + erase(cmd); } /* @@ -69,7 +64,7 @@ static void run_job(const char *path, char *file, int archive) * a type '*' just to figure out if a job should be archived in * the done directory. */ -static int should_run(const char *name, int type, int *archive) +static int should_run(const char *name, int type) { if (!name || strlen(name) < 3) return 0; @@ -78,32 +73,25 @@ static int should_run(const char *name, int type, int *archive) if (type == '*') { switch (name[0]) { case 'K': - *archive = 0; - return 1; case 'S': - *archive = 1; return 1; default: - errx("unsupported '%s', scripts must start with S or K", name); - return 0; + goto done; } } switch (type) { case 'K': - *archive = 0; - break; case 'S': - *archive = 1; break; default: return 0; } - dbg("name:%s type:'%c' archive:%d => run:%d", name, type, *archive, type == name[0]); + dbg("name:%s type:'%c' => run:%d", name, type, type == name[0]); return type == name[0]; } - +done: errx("unsupported script %s, must follow pattern SNN/KNN", name); return 0; } @@ -111,7 +99,6 @@ static int should_run(const char *name, int type, int *archive) static void run_dir(const char *path, int type) { struct dirent **namelist; - int archive = 0; int n, i; n = scandir(path, &namelist, NULL, alphasort); @@ -126,8 +113,8 @@ static void run_dir(const char *path, int type) if (d->d_type == DT_DIR) continue; - if (should_run(d->d_name, type, &archive)) - run_job(path, d->d_name, archive); + if (should_run(d->d_name, type)) + run_job(path, d->d_name); free(d); } @@ -139,7 +126,7 @@ static void run_dir(const char *path, int type) * Call stop/cleanup jobs first, may use same container name or * resources as replacement container start scripts use. */ -static void run_queue(char *path) +static void run_queue(const char *path) { run_dir(path, 'K'); run_dir(path, 'S'); @@ -174,14 +161,13 @@ static void inotify_cb(uev_t *w, void *arg, int _) for (char *p = buffer; p < buffer + bytes;) { struct inotify_event *event = (struct inotify_event *)p; char *name = event->name; - int archive; if (event->mask & (IN_CLOSE_WRITE | IN_ATTRIB | IN_MOVED_TO)) { dbg("Got inotify event %s 0x%04x", name, event->mask); - if (!should_run(name, '*', &archive)) + if (!should_run(name, '*')) continue; - run_job(arg, name, archive); + run_job(arg, name); } p += sizeof(struct inotify_event) + event->len; @@ -225,13 +211,13 @@ int logmask_from_str(const char *str) static int usage(char *arg0, int rc) { printf("Usage:\n" - " %s [-dh] [-l LVL] JOBDIR\n" + " %s [-dh] [-l LVL] QUEUE\n" "Options:\n" " -d Log to stderr as well\n" " -h This help text\n" " -l LVL Set log level: none, err, warn, notice*, info, debug\n" "\n" - "Runs jobs from JOBDIR, re-runs failing jobs on route changes or SIGHUP.\n" + "Runs jobs from QUEUE, re-runs failing jobs on route changes or SIGHUP.\n" "Use SIGUSR1 to toggle debug messages at runtime.\n", arg0); return rc; @@ -246,7 +232,7 @@ int main(int argc, char *argv[]) uev_t sighup_watcher; int logopt = LOG_PID; int wd, sd, fd, c; - char *jobdir; + char *queue; uev_ctx_t ctx; int rc = 0; @@ -272,13 +258,11 @@ int main(int argc, char *argv[]) if (optind >= argc) return usage(argv[0], 1); - jobdir = argv[optind++]; - if (optind < argc) - done = argv[optind]; + queue = argv[optind]; - if (access(jobdir, X_OK)) { + if (access(queue, X_OK)) { fprintf(stderr, "Cannot find job directory %s, errno %d: %s\n", - jobdir, errno, strerror(errno)); + queue, errno, strerror(errno)); return 1; } @@ -299,7 +283,7 @@ int main(int argc, char *argv[]) return 1; } - wd = inotify_add_watch(fd, jobdir, IN_CLOSE_WRITE | IN_ATTRIB | IN_MOVED_TO); + wd = inotify_add_watch(fd, queue, IN_CLOSE_WRITE | IN_ATTRIB | IN_MOVED_TO); if (wd == -1) { err("inotify_add_watch"); close(fd); @@ -323,7 +307,7 @@ int main(int argc, char *argv[]) } uev_init(&ctx); - if (uev_signal_init(&ctx, &sighup_watcher, signal_cb, jobdir, SIGHUP) == -1) { + if (uev_signal_init(&ctx, &sighup_watcher, signal_cb, queue, SIGHUP) == -1) { err("uev_signal_init (sighup)"); rc = 1; goto done; @@ -334,19 +318,19 @@ int main(int argc, char *argv[]) goto done; } - if (uev_io_init(&ctx, &inotify_watcher, inotify_cb, jobdir, fd, UEV_READ) == -1) { + if (uev_io_init(&ctx, &inotify_watcher, inotify_cb, queue, fd, UEV_READ) == -1) { err("uev_io_init (inotify)"); rc = 1; goto done; } - if (uev_io_init(&ctx, &netlink_watcher, netlink_cb, jobdir, sd, UEV_READ) == -1) { + if (uev_io_init(&ctx, &netlink_watcher, netlink_cb, queue, sd, UEV_READ) == -1) { err("uev_io_init (netlink)"); rc = 1; goto done; } - run_queue(jobdir); + run_queue(queue); if (uev_run(&ctx, 0) == -1) { err("uev_run"); rc = 1; From 35f57ee6bc36971182b86a2e899d769dcf7811bc Mon Sep 17 00:00:00 2001 From: Joachim Wiberg Date: Fri, 15 Nov 2024 11:29:04 +0100 Subject: [PATCH 08/21] confd: add support for fetching container images over ftp/http/https - Anonymous FTP, or URL encoded ftp://user:hostname@addr/oci.tar.gz - HTTP/HTTPS fetched with curl, optional credentials support - Verify download against an optional sha256 checksum Ensure the unpacked directory name does not contain a ':', it is a restricted character and cannot be part of the file name. If this syntax is used we retain it as the name and retag it after load. Fix #801 Signed-off-by: Joachim Wiberg --- board/common/rootfs/usr/sbin/container | 115 +++++++++++++++--- src/confd/src/infix-containers.c | 3 + src/confd/yang/containers.inc | 2 +- src/confd/yang/infix-containers.yang | 24 +++- ....yang => infix-containers@2024-11-15.yang} | 0 src/klish-plugin-infix/xml/containers.xml | 6 +- 6 files changed, 127 insertions(+), 23 deletions(-) rename src/confd/yang/{infix-containers@2024-11-12.yang => infix-containers@2024-11-15.yang} (100%) diff --git a/board/common/rootfs/usr/sbin/container b/board/common/rootfs/usr/sbin/container index 79e505045..8aaa53205 100755 --- a/board/common/rootfs/usr/sbin/container +++ b/board/common/rootfs/usr/sbin/container @@ -1,6 +1,8 @@ #!/bin/sh - +DOWNLOADS=/var/lib/containers/oci +BUILTIN=/lib/oci all="" +sha="" env="" port="" force= @@ -10,6 +12,72 @@ log() logger -I $PPID -t container -p local1.notice -- "$*" } +check() +{ + file=$1 + + if [ -z "$sha" ]; then + log "no checksum to verify $file against, continuing." + return 0 + fi + + if echo "${sha} ${file}" | sha256sum -c -s; then + log "$file checksum verified OK." + return 0 + fi + + got=$(sha256sum "${file}" | awk '{print $1}') + log "$file checksum mismatch, got $got, expected $sha, removing file." + rm -f "$file" + + return 1 +} + +# Fetch an OCI image over ftp/http/https. Use wget for FTP, which curl +# empirically does not work well with. Log progress+ & error to syslog. +fetch() +{ + url=$1 + file=$(basename "$url") + dst="$DOWNLOADS/$file" + + cd "$DOWNLOADS" || return + if [ -e "$file" ]; then + log "$file already available." + if check "$file"; then + echo "$dst" + return 0 + fi + fi + + log "Fetching $url" + + if echo "$url" | grep -qE "^ftp://"; then + cmd="wget -q $url" + elif echo "$url" | grep -qE "^https?://"; then + cmd="curl $creds -sSL --fail -o \"$file\" $url" + else + log "Unsupported URL scheme: $url" + return 1 + fi + + if out=$(eval "$cmd" 2>&1); then + log "$file downloaded successfully." + if check "$file"; then + echo "$dst" + return 0 + fi + fi + + # log error message from backend + while IFS= read -r line; do + log "$line" + done </dev/null @@ -73,10 +154,6 @@ unpack_archive() name=$dir fi - if [ "$remove" = "true" ]; then - rm -rf "$file" - fi - echo "$name" } @@ -102,7 +179,9 @@ create() # Unpack and load docker-archive/oci/oci-archive, returning image # name, or return docker:// URL for download. - image=$(unpack_archive "$image") + if ! image=$(unpack_archive "$image"); then + exit 1 + fi if [ -z "$logging" ]; then logging="--log-driver k8s-file --log-opt path=/run/containers/$name.fifo" @@ -247,6 +326,7 @@ options: --dns-search LIST Set host lookup search list when creating container --cap-add CAP Add capability to unprivileged container --cap-drop CAP Drop capability, for privileged containter + --checksum SHA Use sha256sym to verify images from ftp/http/https -c, --creds USR[:PWD] Credentials to pass to curl -u for remote ops -d, --detach Detach a container started with 'run IMG [CMD]' -e, --env FILE Environment variables when creating container @@ -306,6 +386,10 @@ while [ "$1" != "" ]; do shift caps="$caps --cap-drop=$1" ;; + --checksum) + shift + sha="$1" + ;; -c | --creds) shift creds="-u $1" @@ -350,7 +434,6 @@ while [ "$1" != "" ]; do --log-path) shift logging="$logging --log-opt path=$1" - log_path="$1" ;; -m | --mount) shift @@ -457,10 +540,10 @@ case $cmd in load) url=$1 name=$2 + # shellcheck disable=SC2086 - if echo "$url" | grep -q "://"; then - file=$(basename "$url") - curl -k $creds -Lo "$file" "$url" + if echo "$url" | grep -qE "^(ftp|http|https)://"; then + file=$(fetch "$url") else file="$url" fi @@ -500,7 +583,7 @@ case $cmd in podman images $all --format "{{.Repository}}:{{.Tag}}" ;; oci) - find /lib/oci /var/lib/containers/oci -type f 2>/dev/null + find $BUILTIN $DOWNLOADS -type f 2>/dev/null ;; *) podman ps $all --format "{{.Names}}" diff --git a/src/confd/src/infix-containers.c b/src/confd/src/infix-containers.c index 5db409fd1..85c22b414 100644 --- a/src/confd/src/infix-containers.c +++ b/src/confd/src/infix-containers.c @@ -175,6 +175,9 @@ static int add(const char *name, struct lyd_node *cif) if (lydx_is_enabled(cif, "manual")) fprintf(fp, " --manual"); + if ((string = lydx_get_cattr(cif, "checksum"))) + fprintf(fp, " --checksum %s", string); + fprintf(fp, " create %s %s", name, image); if ((string = lydx_get_cattr(cif, "command"))) diff --git a/src/confd/yang/containers.inc b/src/confd/yang/containers.inc index c143090d9..8d69b6625 100644 --- a/src/confd/yang/containers.inc +++ b/src/confd/yang/containers.inc @@ -2,5 +2,5 @@ # REMEMBER TO UPDATE infix-interfaces ALSO IN confd.inc MODULES=( "infix-interfaces@2024-11-12.yang -e vlan-filtering -e containers" - "infix-containers@2024-10-14.yang" + "infix-containers@2024-11-15.yang" ) diff --git a/src/confd/yang/infix-containers.yang b/src/confd/yang/infix-containers.yang index 31b3eb5ee..87673d8b8 100644 --- a/src/confd/yang/infix-containers.yang +++ b/src/confd/yang/infix-containers.yang @@ -26,24 +26,26 @@ module infix-containers { prefix infix-sys; } - revision 2024-11-12 { - description "Deprecate read-only, it is now always true."; + revision 2024-11-15 { + description "Two major changes: + - Add support for ftp/http/https images with checksum + - Deprecate read-only, it is now always true"; reference "internal"; } - revision 2024-10-14 { + revision 2024-10-14 { description "Two major changes: - Allow changing name of host interfaces inside container - Support hostname format specifiers, like ietf-system"; reference "internal"; } - revision 2024-03-27 { + revision 2024-03-27 { description "Add support for capabilities."; reference "internal"; } - revision 2024-02-01 { + revision 2024-02-01 { description "Initial revision"; reference "internal"; } @@ -138,6 +140,13 @@ module infix-containers { oci-archive:/lib/oci/archive -- Use archive:latest from OCI archive May be in .tar or .tar.gz format + Additionally, the following URIs are also supported for setups + that do not use a HUB or similar. Recommend using 'checksum'! + + ftp://addr/path/to/archive -- Downloaded using wget + http://addr/path/to/archive -- Downloaded using curl + https://addr/path/to/archive -- Downloaded using curl + Note: if a remote repository cannot be reached, the creation of the container will be put on a queue that retries pull every time there is a route change in the host's system."; @@ -145,6 +154,11 @@ module infix-containers { type string; } + leaf checksum { + description "Checksum for ftp/http/https OCI archives (sha256sum)"; + type string; + } + leaf image-id { description "Docker image ID, exact hash used."; config false; diff --git a/src/confd/yang/infix-containers@2024-11-12.yang b/src/confd/yang/infix-containers@2024-11-15.yang similarity index 100% rename from src/confd/yang/infix-containers@2024-11-12.yang rename to src/confd/yang/infix-containers@2024-11-15.yang diff --git a/src/klish-plugin-infix/xml/containers.xml b/src/klish-plugin-infix/xml/containers.xml index 45ffcbb2d..76fefe6d8 100644 --- a/src/klish-plugin-infix/xml/containers.xml +++ b/src/klish-plugin-infix/xml/containers.xml @@ -75,11 +75,15 @@ + + + creds=${KLISH_PARAM_creds:+-c $KLISH_PARAM_creds} + sha=${KLISH_PARAM_sha:+--checksum $KLISH_PARAM_sha} cd /var/lib/containers/oci - doas container $creds load $KLISH_PARAM_url $KLISH_PARAM_name + doas container $creds $sha load $KLISH_PARAM_url $KLISH_PARAM_name From d7756f88e27c693a240d1f1de7d0be80e2337abb Mon Sep 17 00:00:00 2001 From: Joachim Wiberg Date: Fri, 15 Nov 2024 12:03:26 +0100 Subject: [PATCH 09/21] confd: fix pyang linter errors Issue #815 detail issues found running the Clixon Controllar and Cisco Yangsuite. The errors and warnings listed are very similar to pyang, which the undersigned has, the following changes fixes the pyang errors: - relocate 'feature containers' to submodule - drop already deviated ospf:database deviations - drop unused imports Signed-off-by: Joachim Wiberg --- src/confd/yang/confd.inc | 4 +-- src/confd/yang/containers.inc | 2 +- src/confd/yang/infix-containers.yang | 4 --- src/confd/yang/infix-if-bridge.yang | 10 +++--- ...6.yang => infix-if-bridge@2024-11-15.yang} | 0 src/confd/yang/infix-if-container.yang | 26 +++++++++++---- ...ang => infix-if-container@2024-11-15.yang} | 0 src/confd/yang/infix-interfaces.yang | 18 ++++------- ....yang => infix-interfaces@2024-11-15.yang} | 0 src/confd/yang/infix-routing.yang | 32 ++++--------------- ...-01.yang => infix-routing@2024-11-15.yang} | 0 11 files changed, 38 insertions(+), 58 deletions(-) rename src/confd/yang/{infix-if-bridge@2024-08-26.yang => infix-if-bridge@2024-11-15.yang} (100%) rename src/confd/yang/{infix-if-container@2024-10-29.yang => infix-if-container@2024-11-15.yang} (100%) rename src/confd/yang/{infix-interfaces@2024-11-12.yang => infix-interfaces@2024-11-15.yang} (100%) rename src/confd/yang/{infix-routing@2024-10-01.yang => infix-routing@2024-11-15.yang} (100%) diff --git a/src/confd/yang/confd.inc b/src/confd/yang/confd.inc index cb7eee3da..84ee4ae89 100644 --- a/src/confd/yang/confd.inc +++ b/src/confd/yang/confd.inc @@ -27,7 +27,7 @@ MODULES=( "ieee802-dot1q-types@2022-10-29.yang" "infix-ip@2024-09-16.yang" "infix-if-type@2024-10-13.yang" - "infix-routing@2024-10-01.yang" + "infix-routing@2024-11-15.yang" "ieee802-dot1ab-lldp@2022-03-15.yang" "infix-lldp@2023-08-23.yang" "infix-dhcp-client@2024-09-20.yang" @@ -37,7 +37,7 @@ MODULES=( "ieee802-ethernet-interface@2019-06-21.yang" "infix-ethernet-interface@2024-02-27.yang" "infix-factory-default@2023-06-28.yang" - "infix-interfaces@2024-11-12.yang -e vlan-filtering" + "infix-interfaces@2024-11-15.yang -e vlan-filtering" # from rousette "ietf-restconf@2017-01-26.yang" diff --git a/src/confd/yang/containers.inc b/src/confd/yang/containers.inc index 8d69b6625..0a5d891dc 100644 --- a/src/confd/yang/containers.inc +++ b/src/confd/yang/containers.inc @@ -1,6 +1,6 @@ # -*- sh -*- # REMEMBER TO UPDATE infix-interfaces ALSO IN confd.inc MODULES=( - "infix-interfaces@2024-11-12.yang -e vlan-filtering -e containers" + "infix-interfaces@2024-11-15.yang -e vlan-filtering -e containers" "infix-containers@2024-11-15.yang" ) diff --git a/src/confd/yang/infix-containers.yang b/src/confd/yang/infix-containers.yang index 87673d8b8..4d46b08d6 100644 --- a/src/confd/yang/infix-containers.yang +++ b/src/confd/yang/infix-containers.yang @@ -6,10 +6,6 @@ module infix-containers { namespace "urn:ietf:params:xml:ns:yang:infix-containers"; prefix infix-cont; - import ietf-yang-types { - prefix yang; - } - import ietf-interfaces { prefix if; } diff --git a/src/confd/yang/infix-if-bridge.yang b/src/confd/yang/infix-if-bridge.yang index 74806d3fc..c85d41952 100644 --- a/src/confd/yang/infix-if-bridge.yang +++ b/src/confd/yang/infix-if-bridge.yang @@ -3,9 +3,6 @@ submodule infix-if-bridge { belongs-to infix-interfaces { prefix infix-if; } - import ietf-yang-types { - prefix yang; - } import iana-if-type { prefix ianaift; } @@ -15,9 +12,6 @@ submodule infix-if-bridge { import ietf-interfaces { prefix if; } - import ietf-inet-types { - prefix inet; - } import ietf-ip { prefix ip; } @@ -32,6 +26,10 @@ submodule infix-if-bridge { contact "kernelkit@googlegroups.com"; description "Linux bridge extension for ietf-interfaces."; + revision 2024-11-15 { + description "Fix pyang linter warnings, drop unused imports."; + reference "internal"; + } revision 2024-08-26 { description "Improve must expressions for multicast. diff --git a/src/confd/yang/infix-if-bridge@2024-08-26.yang b/src/confd/yang/infix-if-bridge@2024-11-15.yang similarity index 100% rename from src/confd/yang/infix-if-bridge@2024-08-26.yang rename to src/confd/yang/infix-if-bridge@2024-11-15.yang diff --git a/src/confd/yang/infix-if-container.yang b/src/confd/yang/infix-if-container.yang index 425156736..f496aa4ff 100644 --- a/src/confd/yang/infix-if-container.yang +++ b/src/confd/yang/infix-if-container.yang @@ -13,9 +13,6 @@ submodule infix-if-container { import ietf-ip { prefix ip; } - import infix-if-type { - prefix infixift; - } organization "KernelKit"; contact "kernelkit@googlegroups.com"; @@ -23,6 +20,13 @@ submodule infix-if-container { Ensures a container interface can never be a bridge port, or LAG member, at the same time."; + revision 2024-11-15 { + description "Two changes: + - Relocate 'feature containers' declaration to here + - Fix pyang linter errors and warnings, dropping unused + imports and dropping the default value for subnet"; + reference "internal"; + } revision 2024-10-29 { description "Add read only container list to container-network"; reference "internal"; @@ -32,6 +36,14 @@ submodule infix-if-container { reference "internal"; } + /* + * Features + */ + + feature containers { + description "Containers is an optional build-time feature in Infix."; + } + /* * Identities */ @@ -76,6 +88,9 @@ submodule infix-if-container { list subnet { description "Static IP ranges to hand out addresses to containers from. + If no subnet is provided, a default 172.17.0.0/16 one is + set up. This is the default in a standard Docker setup. + A container bridge forwards DNS, NTP, and SSH by default to the host interfaces."; when "../type = 'infix-if:bridge'"; @@ -83,10 +98,7 @@ submodule infix-if-container { leaf subnet { type inet:ip-prefix; - description "Subnet to assign addresses from, round-robin assignment. - - The default is from a standard Docker setup."; - default "172.17.0.0/16"; + description "Subnet to assign addresses from, round-robin assignment."; } leaf gateway { diff --git a/src/confd/yang/infix-if-container@2024-10-29.yang b/src/confd/yang/infix-if-container@2024-11-15.yang similarity index 100% rename from src/confd/yang/infix-if-container@2024-10-29.yang rename to src/confd/yang/infix-if-container@2024-11-15.yang diff --git a/src/confd/yang/infix-interfaces.yang b/src/confd/yang/infix-interfaces.yang index 1afb2721c..a550fdf16 100644 --- a/src/confd/yang/infix-interfaces.yang +++ b/src/confd/yang/infix-interfaces.yang @@ -4,7 +4,7 @@ module infix-interfaces { prefix infix-if; import infix-if-type { - prefix infixift; + prefix infix-ift; } import ietf-interfaces { prefix if; @@ -23,8 +23,10 @@ module infix-interfaces { contact "kernelkit@googlegroups.com"; description "Linux bridge and lag extensions for ietf-interfaces."; - revision 2024-11-12 { - description "Limit name 2-15 chars, Linux limitation."; + revision 2024-11-15 { + description "Two changes: + - Limit name 2-15 chars, Linux limitation + - Relocate 'feature containers' to submodule"; reference "internal"; } @@ -74,14 +76,6 @@ module infix-interfaces { reference "internal"; } - /* - * Features - */ - - feature containers { - description "Containers is an optional build-time feature in Infix."; - } - /* * Data Nodes */ @@ -89,7 +83,7 @@ module infix-interfaces { deviation "/if:interfaces/if:interface/if:type" { deviate replace { type identityref { - base infixift:infix-interface-type; + base infix-ift:infix-interface-type; } } } diff --git a/src/confd/yang/infix-interfaces@2024-11-12.yang b/src/confd/yang/infix-interfaces@2024-11-15.yang similarity index 100% rename from src/confd/yang/infix-interfaces@2024-11-12.yang rename to src/confd/yang/infix-interfaces@2024-11-15.yang diff --git a/src/confd/yang/infix-routing.yang b/src/confd/yang/infix-routing.yang index 59d7bcb69..fa17daeb3 100644 --- a/src/confd/yang/infix-routing.yang +++ b/src/confd/yang/infix-routing.yang @@ -15,17 +15,17 @@ module infix-routing { import ietf-ospf { prefix ospf; } - import ietf-interfaces { - prefix if; - } - import ietf-routing-types { - prefix rt-types; - } organization "KernelKit"; contact "kernelkit@googlegroups.com"; description "Deviations and augments for ietf-routing and ietf-ospf."; + revision 2024-11-15 { + description "Fix pyang linter errors and warnings: + - Drop OSPF database deviations, already deviated earlier + - Drop unused imports"; + reference "internal"; + } revision 2024-10-01 { description "Remove possibility to have loopack in multiple areas."; reference "internal"; @@ -457,26 +457,6 @@ module infix-routing { deviate not-supported; } - /* OSPF database */ - deviation "/rt:routing/rt:control-plane-protocols/rt:control-plane-protocol/ospf:ospf/ospf:database/ospf:as-scope-lsa-type/ospf:as-scope-lsas/ospf:as-scope-lsa/ospf:version/ospf:ospfv3" { - deviate not-supported; - } - deviation "/rt:routing/rt:control-plane-protocols/rt:control-plane-protocol/ospf:ospf/ospf:database/ospf:as-scope-lsa-type/ospf:as-scope-lsas/ospf:as-scope-lsa/ospf:version/ospf:ospfv2/ospf:ospfv2/ospf:body/ospf:opaque" { - deviate not-supported; - } - deviation "/rt:routing/rt:control-plane-protocols/rt:control-plane-protocol/ospf:ospf/ospf:database/ospf:as-scope-lsa-type/ospf:as-scope-lsas/ospf:as-scope-lsa/ospf:version/ospf:ospfv2/ospf:ospfv2/ospf:body/ospf:external" { - deviate not-supported; - } - deviation "/rt:routing/rt:control-plane-protocols/rt:control-plane-protocol/ospf:ospf/ospf:database/ospf:as-scope-lsa-type/ospf:as-scope-lsas/ospf:as-scope-lsa/ospf:version/ospf:ospfv2/ospf:ospfv2/ospf:body/ospf:summary" { - deviate not-supported; - } - deviation "/rt:routing/rt:control-plane-protocols/rt:control-plane-protocol/ospf:ospf/ospf:database/ospf:as-scope-lsa-type/ospf:as-scope-lsas/ospf:as-scope-lsa/ospf:version/ospf:ospfv2/ospf:ospfv2/ospf:body/ospf:router" { - deviate not-supported; - } - deviation "/rt:routing/rt:control-plane-protocols/rt:control-plane-protocol/ospf:ospf/ospf:database/ospf:as-scope-lsa-type/ospf:as-scope-lsas/ospf:as-scope-lsa/ospf:version/ospf:ospfv2/ospf:ospfv2/ospf:header" { - deviate not-supported; - } - /* OSPF RPCs */ deviation "/ospf:clear-neighbor" { deviate not-supported; diff --git a/src/confd/yang/infix-routing@2024-10-01.yang b/src/confd/yang/infix-routing@2024-11-15.yang similarity index 100% rename from src/confd/yang/infix-routing@2024-10-01.yang rename to src/confd/yang/infix-routing@2024-11-15.yang From c9d5dd525904b078cc6b377f3d0595e83378ace9 Mon Sep 17 00:00:00 2001 From: Joachim Wiberg Date: Sat, 16 Nov 2024 14:24:36 +0100 Subject: [PATCH 10/21] confd: quiet container script log messages Should be inverted to a --verbose or --debug flag instead. After this change we still see the full 'podman create ...' command, with all the optionas and arguments. Signed-off-by: Joachim Wiberg --- board/common/rootfs/usr/sbin/container | 2 +- src/confd/src/infix-containers.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/board/common/rootfs/usr/sbin/container b/board/common/rootfs/usr/sbin/container index 8aaa53205..15bde625a 100755 --- a/board/common/rootfs/usr/sbin/container +++ b/board/common/rootfs/usr/sbin/container @@ -213,7 +213,7 @@ create() fi # shellcheck disable=SC2048 - log "Calling podman create --name $name --conmon-pidfile=$pidfn $args $image $*" + log "podman create --name $name --conmon-pidfile=$pidfn $args $image $*" if podman create --name "$name" --conmon-pidfile="$pidfn" $args "$image" $*; then [ -n "$quiet" ] || log "Successfully created container $name from $image" [ -n "$manual" ] || start "$name" diff --git a/src/confd/src/infix-containers.c b/src/confd/src/infix-containers.c index 85c22b414..3d592f28b 100644 --- a/src/confd/src/infix-containers.c +++ b/src/confd/src/infix-containers.c @@ -42,9 +42,9 @@ static int add(const char *name, struct lyd_node *cif) /* Stop any running container gracefully so it releases its IP addresses. */ fprintf(fp, "#!/bin/sh\n" - "container stop %s >/dev/null\n" /* Silence "not running" on upgrade */ - "container delete %s >/dev/null\n" /* Silence any hashes when deleting */ - "container", name, name); + "container --quiet stop %s >/dev/null\n" /* Silence "not running" on upgrade */ + "container --quiet delete %s >/dev/null\n" /* Silence any hashes when deleting */ + "container --quiet", name, name); LYX_LIST_FOR_EACH(lyd_child(cif), node, "dns") fprintf(fp, " --dns %s", lyd_get_value(node)); From e5323ecf092fd25849aba35357446243df46c1bc Mon Sep 17 00:00:00 2001 From: Joachim Wiberg Date: Sat, 16 Nov 2024 14:26:41 +0100 Subject: [PATCH 11/21] execd: add retry timer for pending jobs When an Infix device is connected to a LAN where the gateway has yet to connect to the Internet, the container script will fail pulling images from any remote server. Nov 16 12:48:13 infix container[3490]: Error: initializing source docker://ghcr.io/kernelkit/curios:edge: pinging container registry ghcr.io: Get "https://ghcr.io/v2/": dial tcp: lookup ghcr.io on 127.0.0.1:53: read udp 127.0.0.1:55422->127.0.0.1:53: i/o timeout Nov 16 12:48:13 infix container[3641]: Error: failed creating container fw, please check the configuration. Nov 16 12:48:13 infix execd[3490]: /run/containers/queue/S01-fw.sh failed, exit code: 1 Since execd until now only retries on netlink/inotify events, or manual SIGUSR1, jobs would get stuck even though Internet connectivity had been established. This patch fixes that with the addition of a retry timer which runs while there are pending jobs in the queue. Signed-off-by: Joachim Wiberg --- src/execd/Makefile.am | 2 +- src/execd/execd.c | 161 +++++++++++++++++------------------------- src/execd/execd.h | 76 ++++++++++++++++++++ 3 files changed, 140 insertions(+), 99 deletions(-) create mode 100644 src/execd/execd.h diff --git a/src/execd/Makefile.am b/src/execd/Makefile.am index 102b04d10..ae10145cc 100644 --- a/src/execd/Makefile.am +++ b/src/execd/Makefile.am @@ -2,7 +2,7 @@ DISTCLEANFILES = *~ *.d ACLOCAL_AMFLAGS = -I m4 sbin_PROGRAMS = execd -execd_SOURCES = execd.c +execd_SOURCES = execd.c execd.h execd_CPPFLAGS = -D_GNU_SOURCE execd_CFLAGS = -W -Wall -Wextra -Wno-unused execd_CFLAGS = $(libuev_CFLAGS) $(libite_CFLAGS) diff --git a/src/execd/execd.c b/src/execd/execd.c index 1c7c03a95..caad924ad 100644 --- a/src/execd/execd.c +++ b/src/execd/execd.c @@ -1,37 +1,15 @@ /* SPDX-License-Identifier: ISC */ -#include -#include -#include -#include -#include -#include -#include -#include -#define SYSLOG_NAMES -#include -#include - -#include -#include - -#include -#include - -#include -#include - -#define err(fmt, args...) syslog(LOG_ERR, fmt ": %s", ##args, strerror(errno)) -#define errx(fmt, args...) syslog(LOG_ERR, fmt, ##args) -#define warn(fmt, args...) syslog(LOG_WARNING, fmt, ": %s", ##args, strerror(errno)) -#define warnx(fmt, args...) syslog(LOG_WARNING, fmt, ##args) -#define log(fmt, args...) syslog(LOG_NOTICE, fmt, ##args) -#define dbg(fmt, args...) syslog(LOG_DEBUG, fmt, ##args) +#include "execd.h" +#define RETRY_TIMER 60 + +static uev_t retry_watcher; +static int retry = RETRY_TIMER; static int logmask = LOG_UPTO(LOG_NOTICE); static char buffer[BUFSIZ]; -static void run_job(const char *path, char *file) +static int run_job(const char *path, const char *file) { char cmd[strlen(path) + strlen(file) + 2]; int rc; @@ -46,65 +24,28 @@ static void run_job(const char *path, char *file) snprintf(cmd, sizeof(cmd), "%s/%s", path, file); if (access(cmd, X_OK)) { - errx("skipping %s, not executable", cmd); - return; + errx("%s skipping, not executable.", cmd); + return -1; } dbg("running job %s", cmd); if ((rc = systemf("%s", cmd))) { - errx("failed %s: rc %d", cmd, rc); - return; + errx("%s failed, exit code: %d", cmd, rc); + return -1; } - erase(cmd); -} - -/* - * Allow SNN and KNN style jobs, for inotyify_cb() we also allow - * a type '*' just to figure out if a job should be archived in - * the done directory. - */ -static int should_run(const char *name, int type) -{ - if (!name || strlen(name) < 3) - return 0; - - if (isdigit(name[1]) && isdigit(name[2])) { - if (type == '*') { - switch (name[0]) { - case 'K': - case 'S': - return 1; - default: - goto done; - } - } - - switch (type) { - case 'K': - case 'S': - break; - default: - return 0; - } - - dbg("name:%s type:'%c' => run:%d", name, type, type == name[0]); - return type == name[0]; - } -done: - errx("unsupported script %s, must follow pattern SNN/KNN", name); - return 0; + return erase(cmd); } -static void run_dir(const char *path, int type) +static int run_dir(const char *path, int type) { struct dirent **namelist; - int n, i; + int n, i, num = 0; n = scandir(path, &namelist, NULL, alphasort); if (n < 0) { err("scandir %s", path); - return; + return 0; } for (i = 0; i < n; i++) { @@ -114,12 +55,14 @@ static void run_dir(const char *path, int type) continue; if (should_run(d->d_name, type)) - run_job(path, d->d_name); + num += !!run_job(path, d->d_name); free(d); } free(namelist); + + return num; } /* @@ -128,13 +71,24 @@ static void run_dir(const char *path, int type) */ static void run_queue(const char *path) { - run_dir(path, 'K'); - run_dir(path, 'S'); + int num; + + num = run_dir(path, 'K'); + num += run_dir(path, 'S'); + + if (num) + uev_timer_set(&retry_watcher, retry, 0); } -static void signal_cb(uev_t *w, void *arg, int _) +static void signal_cb(uev_t *w, void *arg, int signo) { - dbg("Got signal, calling job queue"); + dbg("signal %d, calling job queue", signo); + run_queue(arg); +} + +static void timer_cb(uev_t *w, void *arg, int _) +{ + warnx("timer, retry job queue"); run_queue(arg); } @@ -151,6 +105,7 @@ static void toggle_debug(uev_t *w, void *arg, int _) static void inotify_cb(uev_t *w, void *arg, int _) { ssize_t bytes; + int num = 0; bytes = read(w->fd, buffer, sizeof(buffer)); if (bytes == -1) { @@ -167,11 +122,14 @@ static void inotify_cb(uev_t *w, void *arg, int _) if (!should_run(name, '*')) continue; - run_job(arg, name); + num += !!run_job(arg, name); } p += sizeof(struct inotify_event) + event->len; } + + if (num) + uev_timer_set(&retry_watcher, retry, 0); } static void netlink_cb(uev_t *w, void *arg, int _) @@ -197,28 +155,20 @@ static void netlink_cb(uev_t *w, void *arg, int _) run_queue(arg); } -int logmask_from_str(const char *str) -{ - const CODE *code; - - for (code = prioritynames; code->c_name; code++) - if (!strcmp(str, code->c_name)) - return LOG_UPTO(code->c_val); - - return -1; -} - -static int usage(char *arg0, int rc) +static int usage(const char *arg0, int rc) { printf("Usage:\n" - " %s [-dh] [-l LVL] QUEUE\n" + " %s [-dh] [-l LVL] [-t SEC] QUEUE\n" "Options:\n" - " -d Log to stderr as well\n" - " -h This help text\n" + " -d Log to stderr as well\n" + " -h This help text\n" " -l LVL Set log level: none, err, warn, notice*, info, debug\n" + " -t SEC Retry timer in seconds [10, 604800], default: %d\n" "\n" - "Runs jobs from QUEUE, re-runs failing jobs on route changes or SIGHUP.\n" - "Use SIGUSR1 to toggle debug messages at runtime.\n", arg0); + "Run jobs from QUEUE. Triggers on inotify of new jobs, route changes, and\n" + "retries failing jobs every minute until the queue has been emtied.\n" + "Use SIGHUP to trigger a manual retry.\n" + "Use SIGUSR1 to toggle debug messages at runtime.\n", arg0, RETRY_TIMER); return rc; } @@ -236,7 +186,7 @@ int main(int argc, char *argv[]) uev_ctx_t ctx; int rc = 0; - while ((c = getopt(argc, argv, "dhl:")) != EOF) { + while ((c = getopt(argc, argv, "dhl:t:")) != EOF) { switch (c) { case 'd': logopt |= LOG_PERROR; @@ -250,6 +200,13 @@ int main(int argc, char *argv[]) return usage(argv[0], 1); } break; + case 't': + retry = atoi(optarg); + if (retry < 10 || retry > 604800) { + fprintf(stderr, "Invalid value %d, accepted [10, 604800]", retry); + return 1; + } + break; default: return usage(argv[0], 1); } @@ -259,6 +216,7 @@ int main(int argc, char *argv[]) return usage(argv[0], 1); queue = argv[optind]; + retry *= 1000; if (access(queue, X_OK)) { fprintf(stderr, "Cannot find job directory %s, errno %d: %s\n", @@ -312,6 +270,14 @@ int main(int argc, char *argv[]) rc = 1; goto done; } + + /* Initial delay of 1 sec, lots of other events happening at boot. */ + if (uev_timer_init(&ctx, &retry_watcher, timer_cb, queue, 1000, 0) == -1) { + err("uev_timer_init (1000, %d)", retry); + rc = 1; + goto done; + } + if (uev_signal_init(&ctx, &sigusr1_watcher, toggle_debug, NULL, SIGUSR1) == -1) { err("uev_signal_init (sigusr1)"); rc = 1; @@ -330,7 +296,6 @@ int main(int argc, char *argv[]) goto done; } - run_queue(queue); if (uev_run(&ctx, 0) == -1) { err("uev_run"); rc = 1; diff --git a/src/execd/execd.h b/src/execd/execd.h new file mode 100644 index 000000000..06ba51bb7 --- /dev/null +++ b/src/execd/execd.h @@ -0,0 +1,76 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#define SYSLOG_NAMES +#include +#include + +#include +#include + +#include +#include + +#include +#include + +#define err(fmt, args...) syslog(LOG_ERR, fmt ": %s", ##args, strerror(errno)) +#define errx(fmt, args...) syslog(LOG_ERR, fmt, ##args) +#define warn(fmt, args...) syslog(LOG_WARNING, fmt, ": %s", ##args, strerror(errno)) +#define warnx(fmt, args...) syslog(LOG_WARNING, fmt, ##args) +#define log(fmt, args...) syslog(LOG_NOTICE, fmt, ##args) +#define dbg(fmt, args...) syslog(LOG_DEBUG, fmt, ##args) + +/* + * Allow SNN and KNN style jobs, for inotyify_cb() we also allow + * a type '*' just to figure out if a job should be archived in + * the done directory. + */ +static inline int should_run(const char *name, int type) +{ + if (!name || strlen(name) < 3) + return 0; + + if (isdigit(name[1]) && isdigit(name[2])) { + if (type == '*') { + switch (name[0]) { + case 'K': + case 'S': + return 1; + default: + goto done; + } + } + + switch (type) { + case 'K': + case 'S': + break; + default: + return 0; + } + + dbg("name:%s type:'%c' => run:%d", name, type, type == name[0]); + return type == name[0]; + } +done: + errx("unsupported script %s, must follow pattern SNN/KNN", name); + return 0; +} + +static inline int logmask_from_str(const char *str) +{ + const CODE *code; + + for (code = prioritynames; code->c_name; code++) + if (!strcmp(str, code->c_name)) + return LOG_UPTO(code->c_val); + + return -1; +} + From 888bece76eb99af176c5fcd2044128ed5c5c635b Mon Sep 17 00:00:00 2001 From: Joachim Wiberg Date: Sat, 16 Nov 2024 14:21:25 +0100 Subject: [PATCH 12/21] board/common: workaround missing --retry=NUM flag to podman Disable the default "podman pull" retry value. We use execd to retry "podman create" on failure. Wihtout this change, a single container can block start of other containers by 3 * 20 seconds. Now we only block max 20 seconds before we try starting the next container. Modern versions of podman (>= 5.0) have this --retry option, but it does not have CNI, so this is a temporary workaround. Signed-off-by: Joachim Wiberg --- board/common/rootfs/usr/sbin/container | 4 ++++ .../4.5.0/0001-disable-pull-retry.patch | 21 +++++++++++++++++++ 2 files changed, 25 insertions(+) create mode 100644 patches/podman/4.5.0/0001-disable-pull-retry.patch diff --git a/board/common/rootfs/usr/sbin/container b/board/common/rootfs/usr/sbin/container index 15bde625a..dccf3e733 100755 --- a/board/common/rootfs/usr/sbin/container +++ b/board/common/rootfs/usr/sbin/container @@ -187,6 +187,10 @@ create() logging="--log-driver k8s-file --log-opt path=/run/containers/$name.fifo" fi + # Pull quietly and don't retry on failure, we use execd for this, + # or user retry manually when run interactively, we may have other + # containers waiting to start that have an image locally already. + # Use --retry=0 with Podman 5.0 or later. args="$args --read-only --replace --quiet --cgroup-parent=containers $caps" args="$args --restart=$restart --systemd=false --tz=local $privileged" args="$args $vol $mount $hostname $entrypoint $env $port $logging" diff --git a/patches/podman/4.5.0/0001-disable-pull-retry.patch b/patches/podman/4.5.0/0001-disable-pull-retry.patch new file mode 100644 index 000000000..96131d36b --- /dev/null +++ b/patches/podman/4.5.0/0001-disable-pull-retry.patch @@ -0,0 +1,21 @@ +This patch disables the default "podman pull" retry value, which otherwise +blocks execd from trying other jobs in its queue. + +As of podman v5.0.0 a --retry=NUM has been added to the podman create, run, +and pull commands. However, CNI is no longer supported, and a lot of other +breaking changes have been made, e.g., output of podman inspect. So there's +a lot of work upgrading. + + -- Joachim + +--- a/vendor/github.com/containers/common/libimage/copier.go 2023-04-14 15:28:20.000000000 +0200 ++++ b/vendor/github.com/containers/common/libimage/copier.go 2024-11-16 13:05:42.207641898 +0100 +@@ -24,7 +24,7 @@ + ) + + const ( +- defaultMaxRetries = 3 ++ defaultMaxRetries = 0 + defaultRetryDelay = time.Second + ) + From f73867ccd8286831a1fffd6270167322ff2c54a5 Mon Sep 17 00:00:00 2001 From: Joachim Wiberg Date: Mon, 18 Nov 2024 07:13:25 +0100 Subject: [PATCH 13/21] board/common: extract OCI archives in /var/tmp Instead of using $HOME, which may be a ramdisk, use /var/tmp which podman also uses by default. Also, make sure to clean up after ourselves. Signed-off-by: Joachim Wiberg --- board/common/rootfs/usr/sbin/container | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/board/common/rootfs/usr/sbin/container b/board/common/rootfs/usr/sbin/container index dccf3e733..e1a0ccbec 100755 --- a/board/common/rootfs/usr/sbin/container +++ b/board/common/rootfs/usr/sbin/container @@ -1,6 +1,7 @@ #!/bin/sh DOWNLOADS=/var/lib/containers/oci BUILTIN=/lib/oci +TMPDIR=/var/tmp all="" sha="" env="" @@ -123,6 +124,8 @@ unpack_archive() exit 1 fi else + cd "$TMPDIR" || log "Error: failed cd $TMPDIR, wiill use $(pwd) for OCI archive extraction." + index=$(tar tf "$file" |grep index.json) if [ -z "$index" ]; then log "Error: invalid OCI archive, cannot find index.json in $file" @@ -131,6 +134,7 @@ unpack_archive() [ -n "$quiet" ] || log "Extracting OCI archive $file ..." tar xf "$file" || (log "Error: failed unpacking $file in $(pwd)"; exit 1) + extracted=true fi dir=$(dirname "$index") @@ -146,6 +150,12 @@ unpack_archive() [ -n "$quiet" ] || log "Loading OCI image $dir ..." podman load -qi "$dir" >/dev/null + # Clean up after ourselves + if [ -n "$extracted" ]; then + log "Cleaning up extracted $dir" + rm -rf "$dir" + fi + # Rename image from podman default $dir:latest if [ -n "$name" ]; then podman tag "$dir" "$name" >/dev/null From efdf6bc055609e42c3333db7fda70c39df9bcc7b Mon Sep 17 00:00:00 2001 From: Joachim Wiberg Date: Mon, 18 Nov 2024 11:24:56 +0100 Subject: [PATCH 14/21] board/common: enable/disable Finit service for container This patch allows running the configure script manually to create and delete containers. The normal flow via confd has additional handling to ensure containers are started/stopped on inictl reload. Signed-off-by: Joachim Wiberg --- board/common/rootfs/usr/sbin/container | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/board/common/rootfs/usr/sbin/container b/board/common/rootfs/usr/sbin/container index e1a0ccbec..dcd7f4628 100755 --- a/board/common/rootfs/usr/sbin/container +++ b/board/common/rootfs/usr/sbin/container @@ -1,4 +1,12 @@ #!/bin/sh +# This script can be used to start, stop, create, and delete containers. +# It is primarily used by confd to create jobs for execd to run from its +# /run/containers/queue, but it can also be used manually. +# +# NOTE: when creating/deleting containers, remember 'initctl reload' to +# activate the changes! When called by confd, via execd, this is +# already handled. +# DOWNLOADS=/var/lib/containers/oci BUILTIN=/lib/oci TMPDIR=/var/tmp @@ -231,6 +239,8 @@ create() if podman create --name "$name" --conmon-pidfile="$pidfn" $args "$image" $*; then [ -n "$quiet" ] || log "Successfully created container $name from $image" [ -n "$manual" ] || start "$name" + # Should already be enabled by confd (this is for manual use) + initctl -bnq enable "container@${name}.conf" exit 0 fi @@ -249,6 +259,9 @@ delete() exit 1 fi + # Should already be disabled (and stopped) by confd (this is for manual use) + initctl -bnq disable "container@${name}.conf" + podman rm -vif "$name" >/dev/null 2>&1 [ -n "$quiet" ] || log "Container $name has been removed." } From 9e7b9d6be374097f0b8289a6dd9e10f69174c695 Mon Sep 17 00:00:00 2001 From: Joachim Wiberg Date: Mon, 18 Nov 2024 14:28:56 +0100 Subject: [PATCH 15/21] board/common: new shell tool 'text-editor' Aptly named to match the same command in the CLI used to edit binary YANG leaves, and also an example of how to script these types of settings. Signed-off-by: Joachim Wiberg --- board/common/rootfs/usr/bin/text-editor | 23 +++++++++ doc/scripting.md | 64 +++++++++++++++++++++++++ 2 files changed, 87 insertions(+) create mode 100755 board/common/rootfs/usr/bin/text-editor diff --git a/board/common/rootfs/usr/bin/text-editor b/board/common/rootfs/usr/bin/text-editor new file mode 100755 index 000000000..31d3d74c1 --- /dev/null +++ b/board/common/rootfs/usr/bin/text-editor @@ -0,0 +1,23 @@ +#!/bin/sh +# Edit YANG binary types using sysrepo, base64, and duct tape. + +xpath=$1 +if [ -z "$xpath" ]; then + echo "Usage: text-editor \"/full/xpath/to/binary/leaf\"" + exit 1 +fi + +if tmp=$(sysrepocfg -G "$xpath"); then + file=$(mktemp) + + echo "$tmp" | base64 -d > "$file" + if edit "$file"; then + tmp=$(base64 -w0 < "$file") + sysrepocfg -S "$xpath" -u "$tmp" + fi + + rm -f "$file" +else + echo "Failed to retrieve value for $xpath" + exit 1 +fi diff --git a/doc/scripting.md b/doc/scripting.md index e41794f0b..caade55d6 100644 --- a/doc/scripting.md +++ b/doc/scripting.md @@ -653,6 +653,70 @@ on interface *e0*. ~$ ``` +### Change a Binary Setting + +A YANG `binary` type setting is Base64 encoded and requires a little bit +more tricks. We take the opportunity to showcase a shell script helper: +`/usr/bin/text-editor`, which works just like the `text-editor` command +in the CLI, but this one takes an XPath argument to the binary leaf to +edit. + +Stripped down, it looks something like this: + +```bash +if tmp=$(sysrepocfg -G "$xpath"); then + file=$(mktemp) + + echo "$tmp" | base64 -d > "$file" + if edit "$file"; then + tmp=$(base64 -w0 < "$file") + sysrepocfg -S "$xpath" -u "$tmp" + fi + + rm -f "$file" +else + echo "Failed to retrieve value for $xpath" + exit 1 +fi +``` + +An example container configuration, with an embedded file that is +mounted to `/var/www/index.html` can look like this: + +```json + "infix-containers:containers": { + "container": [ + { + "name": "web", + "image": "oci-archive:/lib/oci/curios-httpd-latest.tar.gz", + "hostname": "web", + "network": { + "interface": [ + { + "name": "veth-sys0" + } + ] + }, + "mount": [ + { + "name": "index.html", + "content": "PCFET0NUWVBFIGh0bWwjibberish.shortened.down==", + "target": "/var/www/index.html" + } + ] + } + ] + } +``` + +The command to edit this file, and restart the container with the new +contents, look like this: + +``` +admin@infix:~$ text-editor "/infix-containers:containers/container[name='web']/mount[name='index.html']/content" +``` + + ### Backup Configuration Using sysrepocfg And scp Displaying running or startup configuration is possible with From 684efe797b693ccf30378025fa455c7c6ae9fe72 Mon Sep 17 00:00:00 2001 From: Joachim Wiberg Date: Mon, 18 Nov 2024 07:53:05 +0100 Subject: [PATCH 16/21] package/curios-*: add latest symlinks for easy upgrades This patch adds latest symlinks to the curiOS containers to make system upgrades easier. I.e., a user can now reference the bundled image with: set image oci-archive:/lib/oci/curios-httpd-latest.tar.gz So that when they upgrade to the latest Infix, which might include an update of curiOS httpd, they will get a seamless upgrade also of the container(s) running. Signed-off-by: Joachim Wiberg --- package/curios-httpd/curios-httpd.mk | 4 +++- package/curios-nftables/curios-nftables.mk | 4 +++- test/infamy/container.py | 4 ++-- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/package/curios-httpd/curios-httpd.mk b/package/curios-httpd/curios-httpd.mk index 05946c033..f360ef100 100644 --- a/package/curios-httpd/curios-httpd.mk +++ b/package/curios-httpd/curios-httpd.mk @@ -13,7 +13,9 @@ CURIOS_HTTPD_LICENSE_FILES = COPYING define CURIOS_HTTPD_INSTALL_TARGET_CMDS mkdir -p $(TARGET_DIR)/lib/oci cp $(CURIOS_HTTPD_DL_DIR)/$(CURIOS_HTTPD_SOURCE) \ - $(TARGET_DIR)/lib/oci/$(notdir $(@D)).tar.gz + $(TARGET_DIR)/lib/oci/$(CURIOS_HTTPD_NAME)-$(CURIOS_HTTPD_VERSION).tar.gz + ln -sf $(CURIOS_HTTPD_NAME)-$(CURIOS_HTTPD_VERSION).tar.gz \ + $(TARGET_DIR)/lib/oci/$(CURIOS_HTTPD_NAME)-latest.tar.gz endef $(eval $(generic-package)) diff --git a/package/curios-nftables/curios-nftables.mk b/package/curios-nftables/curios-nftables.mk index 43b9745d5..39fda9c1d 100644 --- a/package/curios-nftables/curios-nftables.mk +++ b/package/curios-nftables/curios-nftables.mk @@ -14,7 +14,9 @@ CURIOS_NFTABLES_INSTALL_TARGET = YES define CURIOS_NFTABLES_INSTALL_TARGET_CMDS mkdir -p $(TARGET_DIR)/lib/oci cp $(CURIOS_NFTABLES_DL_DIR)/$(CURIOS_NFTABLES_SOURCE) \ - $(TARGET_DIR)/lib/oci/$(notdir $(@D)).tar.gz + $(TARGET_DIR)/lib/oci/$(CURIOS_NFTABLES_NAME)-$(CURIOS_NFTABLES_VERSION).tar.gz + ln -sf $(CURIOS_NFTABLES_NAME)-$(CURIOS_NFTABLES_VERSION).tar.gz \ + $(TARGET_DIR)/lib/oci/$(CURIOS_NFTABLES_NAME)-latest.tar.gz endef $(eval $(generic-package)) diff --git a/test/infamy/container.py b/test/infamy/container.py index 9989cf38e..7b0392e56 100644 --- a/test/infamy/container.py +++ b/test/infamy/container.py @@ -5,8 +5,8 @@ class Container: """Helper methods""" - HTTPD_IMAGE = "curios-httpd-v24.05.0.tar.gz" - NFTABLES_IMAGE = "curios-nftables-v24.05.0.tar.gz" + HTTPD_IMAGE = "curios-httpd-latest.tar.gz" + NFTABLES_IMAGE = "curios-nftables-latest.tar.gz" def __init__(self, target): self.system = target From a0b0904d1f18e4ed4c53421e5abb77acc7c8a383 Mon Sep 17 00:00:00 2001 From: Joachim Wiberg Date: Mon, 18 Nov 2024 09:21:43 +0100 Subject: [PATCH 17/21] package/curios-httpd: bump to v24.11.0 Signed-off-by: Joachim Wiberg --- package/curios-httpd/curios-httpd.hash | 4 ++-- package/curios-httpd/curios-httpd.mk | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/package/curios-httpd/curios-httpd.hash b/package/curios-httpd/curios-httpd.hash index bbb8a3bda..193fbeee3 100644 --- a/package/curios-httpd/curios-httpd.hash +++ b/package/curios-httpd/curios-httpd.hash @@ -1,4 +1,4 @@ # Locally computed sha256 ab15fd526bd8dd18a9e77ebc139656bf4d33e97fc7238cd11bf60e2b9b8666c6 COPYING -sha256 988a523bf4be543cb4ea1d3472ce4720e0b92511546817028b90150a3f054e9f curios-httpd-oci-arm64-v24.05.0.tar.gz -sha256 25f94a7c44cdbbceca3f89382b8bea0dcd8d178032c97a9113bb69bbc1770528 curios-httpd-oci-amd64-v24.05.0.tar.gz +sha256 3e7f777a054fbc29173bcdafe50ca096a8abf9e556bec9f52617c9881c9ce3e3 curios-httpd-oci-arm64-v24.11.0.tar.gz +sha256 bfa52f712301427a21b89cfee31ca315e1404973affaf34b23978a70a3ec4f63 curios-httpd-oci-amd64-v24.11.0.tar.gz diff --git a/package/curios-httpd/curios-httpd.mk b/package/curios-httpd/curios-httpd.mk index f360ef100..1ed0afbac 100644 --- a/package/curios-httpd/curios-httpd.mk +++ b/package/curios-httpd/curios-httpd.mk @@ -4,7 +4,7 @@ # ################################################################################ -CURIOS_HTTPD_VERSION = v24.05.0 +CURIOS_HTTPD_VERSION = v24.11.0 CURIOS_HTTPD_SOURCE = curios-httpd-oci-$(GO_GOARCH)-$(CURIOS_HTTPD_VERSION).tar.gz CURIOS_HTTPD_SITE = https://github.com/kernelkit/curiOS/releases/download/$(CURIOS_HTTPD_VERSION) CURIOS_HTTPD_LICENSE = GPL From f5a425e2fa19ce123e0a08f65994b2771f419f71 Mon Sep 17 00:00:00 2001 From: Joachim Wiberg Date: Mon, 18 Nov 2024 09:22:00 +0100 Subject: [PATCH 18/21] package/curios-nftables: bump to v24.11.0 Signed-off-by: Joachim Wiberg --- package/curios-nftables/curios-nftables.hash | 4 ++-- package/curios-nftables/curios-nftables.mk | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/package/curios-nftables/curios-nftables.hash b/package/curios-nftables/curios-nftables.hash index 230c02513..d21e63601 100644 --- a/package/curios-nftables/curios-nftables.hash +++ b/package/curios-nftables/curios-nftables.hash @@ -1,4 +1,4 @@ # Locally computed sha256 ab15fd526bd8dd18a9e77ebc139656bf4d33e97fc7238cd11bf60e2b9b8666c6 COPYING -sha256 f25651505bf4fc635dc8d391f377b1116837e99dee522ce0691c15b090cee818 curios-nftables-oci-arm64-v24.05.0.tar.gz -sha256 025c2a5d6c71c62ebbe6f96d8b9ffa3235d6812e87b51645f6e89357d9762669 curios-nftables-oci-amd64-v24.05.0.tar.gz +sha256 95b3625587738cb0a8cc6e9eb41d10a0b2f77ae99dbb1d9b213148d35268eb75 curios-nftables-oci-arm64-v24.11.0.tar.gz +sha256 4f61ccef90721b8f95a5c7b77b69ccccb5dbd215b9c38986d20e5b245244e902 curios-nftables-oci-amd64-v24.11.0.tar.gz diff --git a/package/curios-nftables/curios-nftables.mk b/package/curios-nftables/curios-nftables.mk index 39fda9c1d..bec570fbc 100644 --- a/package/curios-nftables/curios-nftables.mk +++ b/package/curios-nftables/curios-nftables.mk @@ -4,7 +4,7 @@ # ################################################################################ -CURIOS_NFTABLES_VERSION = v24.05.0 +CURIOS_NFTABLES_VERSION = v24.11.0 CURIOS_NFTABLES_SOURCE = curios-nftables-oci-$(GO_GOARCH)-$(CURIOS_NFTABLES_VERSION).tar.gz CURIOS_NFTABLES_SITE = https://github.com/kernelkit/curiOS/releases/download/$(CURIOS_NFTABLES_VERSION) CURIOS_NFTABLES_LICENSE = GPL From 4768ff3c7b87d3c6e1f2e6d772d7a6b0ff0eb3d5 Mon Sep 17 00:00:00 2001 From: Joachim Wiberg Date: Sat, 16 Nov 2024 19:44:49 +0100 Subject: [PATCH 19/21] doc: update container doucmentation - No more default writable layer - Don't mention read-only (deprecated, and always on now) - Use ghfm note highlights Signed-off-by: Joachim Wiberg --- doc/container.md | 244 +++++++++++++++++++++++++++++++++-------------- 1 file changed, 171 insertions(+), 73 deletions(-) diff --git a/doc/container.md b/doc/container.md index 022ef70ed..b71180ca8 100644 --- a/doc/container.md +++ b/doc/container.md @@ -20,6 +20,9 @@ Containers in Infix * [Application Container: ntpd](#application-container-ntpd) * [Advanced](#advanced) * [Running Host Commands From Container](#running-host-commands-from-container) +* [Container Requirements](#container-requirements) + * [Advanced Users](#advanced-users) + Introduction ------------ @@ -36,10 +39,11 @@ All network specific settings are done using the IETF interfaces YANG model, with augments for containers to ensure smooth integration with container networking in podman. -> **Note:** even though the `podman` command can be used directly from a -> shell prompt, we strongly recommend using the CLI commands instead. -> They employ the services of a wrapper `container` script which handles -> the integration of containers in the system. +> [!IMPORTANT] +> Even though the `podman` command can be used directly from a shell +> prompt, we strongly recommend using the CLI commands instead. They +> employ the services of a wrapper `container` script which handles the +> integration of containers in the system. Caution @@ -83,26 +87,22 @@ The former is useful mostly for testing, or running single commands in an image. It is a wrapper for `podman run -it --rm ...`, while the latter is a wrapper and adaptation of `podman create ...`. -The second create a container with a semi-persistent writable layer that -survives container restarts and host system restarts. However, if you -change the container configuration or upgrade the image (see below), the -container will be recreated and the writable layer is lost. This is why -it is recommended to set up a named volume for directories, or use file -[Content Mounts](#content-mounts), in your container if you want truly -persistent content. +The second creates a read-only container that is automatically started +at every boot. It can be extended with writable volumes that survive +both container and host system restarts, and upgrades. Another option +is [Content Mounts](#content-mounts), for truly persistent content. -In fact, in many cases the best way is to create a `read-only` container -and use file mounts and volumes only for the critical parts. Podman -ensures (using tmpfs) `read-only` containers still have writable -directories for certain critical file system paths: `/dev`, `/dev/shm`, -`/run`, `/tmp`, and `/var/tmp`. Meaning, what you most often need is -writable volumes for `/var/lib` and `/etc`, or only file mounts for a -few files in `/etc`. The actual needs depend on the container image and -application to run. +Podman ensures (using tmpfs) all containers have writable directories +for certain critical file system paths: `/dev`, `/dev/shm`, `/run`, +`/tmp`, and `/var/tmp`. Meaning, what you most often need is writable +volumes for `/var/lib` and `/etc`, or only file mounts for a few files +in `/etc`. The [actual requirements](#container-requirements) depend on +your container image and application to run. -> **Note:** when running containers from public registries, double-check -> that they support the CPU architecture of your host system. Remember, -> unlike virtualization, containers reuse the host's CPU and kernel. +> [!IMPORTANT] +> When running containers from public registries, double-check that they +> support the CPU architecture of your host system. Remember, unlike +> virtualization, containers reuse the host's CPU and kernel. Hello World @@ -168,13 +168,20 @@ The CLI help shows: oci-archive:/lib/oci/archive -- Use archive:latest from OCI archive May be in .tar or .tar.gz format + Additionally, the following URIs are also supported for setups + that do not use a HUB or similar. Recommend using 'checksum'! + + ftp://addr/path/to/archive -- Downloaded using wget + http://addr/path/to/archive -- Downloaded using curl + https://addr/path/to/archive -- Downloaded using curl + Note: if a remote repository cannot be reached, the creation of the container will be put on a queue that retries pull every time there is a route change in the host's system. -> **Note::** the built-in help system in the CLI is generated from the -> YANG model, so the same information is also available for remote -> NETCONF users. +> [!TIP] +> The built-in help system in the CLI is generated from the YANG model, +> so the same information is also available for remote NETCONF users. The two most common variants are `docker://` and `oci-archive:/`. @@ -217,15 +224,16 @@ mind. -rw-r--r-- 1 root root 7261785 Mar 27 14:22 curios-oci-amd64.tar.gz drwx------ 6 frr frr 4096 Mar 27 11:57 frr/ -Importing the image into podman can be done either from the CLI +Importing the image into Podman can be done either from the CLI admin-exec context ... admin@example:/var/tmp$ cli admin@example:/> container load /var/tmp/curios-oci-amd64.tar.gz name curios:edge -> The `name curios:edge` is the tag you give the imported -> (raw) archive which you can then reference in your container image -> configuration: `set image curios:edge`. +> [!TIP] +> The `name curios:edge` is the tag you give the imported (raw) archive +> which you can then reference in your container image configuration: +> `set image curios:edge`. ... or by giving the container configuration the full path to the OCI archive, which helps greatly with container upgrades (see below): @@ -240,15 +248,12 @@ Upgrading a Container Image The applications in your container are an active part of the system as a whole, so make it a routine to keep your container images up-to-date! -> **Note:** the default writable layer is lost when upgrading the image. -> Use named volumes for content that you want to persist across upgrades. +Containers are locked to the image hash at the time of creation. This +applies to all images, not just those tagged `:edge` or `:latest`. To +upgrade a versioned image is obvious: update the `running-config` to use +the new `image:tag`, the upgrade will be performed in the background. -All container configurations are locked to the image hash at the time of -first download, not just ones that use an `:edge` or `:latest` tag. An -upgrade of containers using versioned images is more obvious -- update -the configuration to use the new `image:tag` -- the latter is a bit -trickier. Either remove the configuration and recreate it (leave/apply -the changes between), or use the admin-exec level command: +For "unversioned" images, e.g., `:latest`, use the CLI command: admin@example:/> container upgrade NAME @@ -279,12 +284,27 @@ the upgrade command as Upgrading container system with local archive: oci-archive:/var/tmp/curios-oci-amd64.tar.gz ... 7ab4a07ee0c6039837419b7afda4da1527a70f0c60c0f0ac21cafee05ba24b52 +> [!TIP] +> Containers running from OCI images embedded in the operating system, +> e.g., `/lib/oci/mycontainer.tar.gz`, always run from the version in +> the operating system. To upgrade, install the new container image at +> build time, after system upgrade the container is also upgraded. The +> system unpacks and loads the OCI images into Podman every boot, which +> ensures the running container always has known starting state. +> +> **Example:** default builds of Infix include a couple of OCI images +> for reference, one is `/lib/oci/curios-nftables-v24.11.0.tar.gz`, but +> there is also a symlink called `curios-nftables-latest.tar.gz` in the +> same directory, which is what the Infix regression tests use in the +> image configuration of the container. This is what enables easy +> upgrades of the container along with the system itself. + Capabilities ------------- An unprivileged container works for almost all use-cases, but there are -occasions where they are too restricted and users being looking for the +occasions where they are too restricted and users start looking for the `privileged` flag. Capabilities offers a middle ground. For example, a system container from which `ping` does not work: @@ -300,9 +320,9 @@ For example, a system container from which `ping` does not work: ... Infix supports a subset of all [capabilities][6] that are relevant for -containers. Please note, that this is and advanced topic and will -require time and analysis of your container application to figure out -which capabilities you need. +containers. Please note, that this is an advanced topic that require +time and analysis of your container application to figure out which +capabilities you need. Networking and Containers @@ -312,9 +332,10 @@ By default, unlike other systems, persistent[^1] containers have no networking enabled. All network access has to be set up explicitly. Currently two types of of container networks are supported: - - `host`: one end of a VETH pair, or a physical Ethernet port + - `host`: one end of a VETH pair, or a physical Ethernet interface - `bridge`: an IP masquerading bridge +> [!TIP] > For more information on VETH pairs, see the [Networking Guide][0]. @@ -347,10 +368,11 @@ have to set manually: admin@example:/config/interface/docker0/> set type bridge admin@example:/config/interface/docker0/> set container-network type bridge -> **Note:** when doing the same operation over NETCONF there is no -> inference, so all the "magic" settings need to be defined. This -> makes the CLI very useful for first setup and then extracting the -> resulting XML from the shell using the `cfg -X` command. +> [!IMPORTANT] +> When doing the same operation over NETCONF there is no inference, so +> all the "magic" settings need to be defined. This makes the CLI very +> useful for first setup and then extracting the resulting XML from the +> shell using the `cfg -X` command. We have to declare the interface as a container network, ensuring the interface cannot be used by the system for any other purpose. E.g., a @@ -419,8 +441,12 @@ example. The network `option` setting is available also for this case, but only the `interface_name=foo0` option works. Which is still very useful. To -change the MAC address, you need to use the `custom-phys-address` in the -general network settings. +set: + + - IP address, use IPv4/IPv6 settings in the interface settings + - MAC address, to use the `custom-phys-address` in the interface settings + +For an example of both, see the next section. [^3]: Something which the container bridge network type does behind the scenes with one end of an automatically created VETH pair. @@ -447,6 +473,7 @@ line where we declare the `ntpd` end as a container network interface: admin@example:/config/interface/ntpd/> set custom-phys-address static 00:c0:ff:ee:00:01 admin@example:/config/interface/ntpd/> set container-network +> [!TIP] > Notice how you can also set a custom MAC address at the same time. Adding the interface to the container is the same as before, but since @@ -456,6 +483,7 @@ can take a bit of a shortcut. admin@example:/config/container/ntpd/> set network interface ntpd admin@example:/config/container/ntpd/> leave +> [!TIP] > Use the `set network interface ntpd option interface_name=foo0` to set > the name of the interface inside the container to `foo0`. @@ -478,6 +506,7 @@ We start by adding the second VETH pair: admin@example:/config/interface/veth1a/> set veth peer veth1 admin@example:/config/interface/veth1a/> set ipv4 address 192.168.1.2 prefix-length 24 +> [!NOTE] > The LAN bridge (br1) in this example has IP address 192.168.1.1. When a container has multiple host interfaces it can often be useful to @@ -529,29 +558,33 @@ file system: admin@example:/config/container/system/mount/leds> end admin@example:/config/container/system/> -Sometimes *volumes* are a better fit. A volume is an automatically +Any type of file can be *bind mounted* into the container. So depending +on the container, and indeed your overall setup, a local file on the +host can be very useful, and other times a *Content Mount* that when +changed also trigger a container restart to activate, is more useful. + +Other times *volumes* are a better fit. A volume is an automatically created read-writable entity that follows the life of your container. admin@example:/config/container/ntpd/> set volume varlib target /var/lib -Volumes survive reboots and upgrading of the base image, unlike the -persistent writable layer you get by default, which does not survive -upgrades. The volume is created by podman when the container first -starts up, unlike a regular bind mount it synchronizes with the contents -of the underlying container image's path on the first start. I.e., -"bind-mount, if empty: then rsync". +Volumes survive both reboots and upgrades of the base image. They are +created by Podman when the container first starts up, unlike a regular +bind mount it synchronizes with the contents of the underlying container +image's path at first use. I.e., "bind-mount, if empty: then rsync". +> [!NOTE] > Infix support named volumes (only), and it is not possible to share a > volume between containers. All the tricks possible with volumes may > be added in a later release. ### Content Mounts -Content mount is a special type of where the file contents for the -container is stored alongside the container configuration. This can be -very useful when deploying similar systems at multiple sites. When the -host loads its `startup-config` (or even `factory-config`) a temporary -file is created using the decoded base64 data from the `content` node. +Content mounts are a special type of file mount where the file contents +is stored with the container configuration. This can be very useful +when deploying similar systems at multiple sites. When the host loads +its `startup-config` (or even `factory-config`) a temporary file is +created using the decoded base64 data from the `content` node. admin@example:/config/container/ntpd/> edit mount ntpd.conf admin@example:/config/container/ntpd/mount/ntpd.conf> text-editor content @@ -564,9 +597,10 @@ The editor is a small [Emacs clone called Mg][2], see the built-in help text, or press Ctrl-x Ctrl-c to exit and save. When the editor exits the contents are base64 encoded and stored in the candidate datastore. -> **Note:** since these files are always recreated when the host is -> restarted, changes made by the container are not preserved, or saved -> back to the host's startup-config even if the read-only option is off. +> [!NOTE] +> Since these files are always recreated when the host is restarted, +> changes made by the container are not preserved, or saved back to the +> host's startup-config. Infix has three different text editors available. For more information, see [CLI Text Editor](cli/text-editor.md). @@ -588,10 +622,11 @@ we created previously: admin@example:/config/container/system/> set publish 222:22 admin@example:/config/container/system/> leave -> **Note:** ensure you have a network connection to the registry. -> If the image cannot be pulled, creation of the container will be -> put in a queue and be retried every time there is a change in the -> routing table, e.g., default route is added. +> [!NOTE] +> Ensure you have a network connection to the registry. If the image +> cannot be pulled, creation of the container will be put in a queue and +> be retried every time there is a change in the routing table, e.g., +> default route is added, and every 60 seconds. Provided the image is downloaded successfully, a new `system` container now runs behind the docker0 interface, forwarding container port 22 to @@ -707,12 +742,13 @@ Another *insecure* approach is to access the host system directly, bypassing the namespaces that make up the boundary between host and container. -> **Security:** Please note, this completely demolishes the isolation -> barrier between container and host operating system. It is only -> suitable in situations where the container serves more as a unit of -> distribution rather than as a separate component of the system. -> *Strongly recommended* to use this only in trusted setups! Consider -> also limiting the time frame in which this is active! +> [!CAUTION] +> Please note, this completely demolishes the isolation barrier between +> container and host operating system. It is only suitable in +> situations where the container serves more as a unit of distribution +> rather than as a separate component of the system. *Strongly +> recommended* to use this only in trusted setups! Consider also +> limiting the time frame in which this is active! First, enable *Privileged* mode, this unlocks the door and allows the container to manage resources on the host system. An example is the @@ -747,6 +783,58 @@ control an Infix system this way, see [Scripting Infix](scripting.md). it may not be enabled by default in BusyBox. +Container Requirements +---------------------- + +In addition to general [*best practices*][7] for container images, there +are a few more things to consider when targeting embedded systems: + + - Ensure the image targets the CPU architecture of the target system, + learn more about [Multi-platform Builds][8] + - Follow [best practices for naming and tagging][10], e.g., `:latest` vs `:1.0` + - Follow [OCI recommendations and layout][9], + learn more about [OCI and Docker Exporters][6] + +If the [Docker documentation][11] is not enough, there are plenty of +[guides online][12] with examples on how to create your own container +image. For the more advanced, please see the next section. + + +### Advanced Users + +Most people prefer their system containers small, often based on Alpine +Linux, or similar, with only a few small applications, including their +own, and an SSH server perhaps. For some developers, even this is too +big, so they roll their own from source. This section is for you. + +Depending on your needs, here is a checklist: + + - you need something that can forward signals, e.g., + - [tini][] + - [Bash only][13], or + - BusyBox init, a classic most embedded developers know, but read on ... + - a system container only need the bare necessities of a system bringup + - E.g., BusyBox's init, [but not everything][15] + - Some of the networking is set up by Podman and CNI for you, but + you may want to run a DHCP client? + - Do *not* rename interfaces inside the container, use the dedicated + `interface_name` option in the configuration instead + - Remember, Podman provides a `tmpfs` for all critical system paths: + `/dev`, `/dev/shm`, `/run`, `/tmp`, and `/var/tmp`, so you don't + need to clean or set up any of these mount points + +Examples using `tini` and BusyBox init are available from the KernelKit +[curiOS project][14]. It is a small Buildroot based container image +builder that generates OCI compatible image tarballs without any tools +from Docker or Podman -- ready-made images exist for testing on both +AMD64 and ARM64 targets, as well as `docker pull` images and and OCI +tarballs with SHA256 checksums for integrity checking. + +Finally, if you build your own version of Infix, and embed OCI tarballs +in the system image, then see the tip at the end of [Upgrading a +Container Image](#upgrading-a-container-image) (above). + + [0]: networking.md [1]: https://github.com/kernelkit/infix/blob/main/src/confd/yang/infix-containers.yang [2]: https://github.com/troglobit/mg @@ -754,4 +842,14 @@ control an Infix system this way, see [Scripting Infix](scripting.md). [4]: system.md#ssh-authorized-key [5]: https://docs.docker.com/build/exporters/oci-docker/ [6]: https://man7.org/linux/man-pages/man7/capabilities.7.html +[7]: https://docs.docker.com/build/building/best-practices/ +[8]: https://docs.docker.com/build/building/multi-platform/ +[9]: https://github.com/opencontainers/image-spec/blob/main/image-layout.md +[10]: https://docs.docker.com/get-started/docker-concepts/building-images/build-tag-and-publish-an-image/#tagging-images +[11]: https://www.docker.com/blog/multi-arch-images/ +[12]: https://lemariva.com/blog/2018/05/tutorial-docker-on-embedded-systems-raspberry-pi-beagleboard +[13]: https://sirikon.me/posts/0009-pid-1-bash-script-docker-container.html +[14]: https://github.com/kernelkit/curiOS/ +[15]: https://github.com/kernelkit/curiOS/blob/2e4748f65e356b2c117f586cd9420d7ba66f79d5/board/system/rootfs/etc/inittab +[tini]: https://github.com/krallin/tini [podman]: https://podman.io From 4ea7d6d0dbffe36668b1a7bdb1e51dc941237f1a Mon Sep 17 00:00:00 2001 From: Joachim Wiberg Date: Sat, 16 Nov 2024 22:37:18 +0100 Subject: [PATCH 20/21] doc: add ghfm note highlights Signed-off-by: Joachim Wiberg --- doc/networking.md | 102 ++++++++++++++++++++++++++++------------------ 1 file changed, 62 insertions(+), 40 deletions(-) diff --git a/doc/networking.md b/doc/networking.md index efc48a50f..146048f9a 100644 --- a/doc/networking.md +++ b/doc/networking.md @@ -7,9 +7,10 @@ model forms the base, extended with [ietf-ip.yang][2] and other layer-3 IETF models. The layer-2 bridge and aggregate models are defined by Infix to exploit the unique features not available in IEEE models. -> **Note:** when issuing `leave` to activate your changes, remember to -> also save your settings, `copy running-config startup-config`. See -> the [CLI Introduction](cli/introduction.md) for a background. +> [!IMPORTANT] +> When issuing `leave` to activate your changes, remember to also save +> your settings, `copy running-config startup-config`. See the [CLI +> Introduction](cli/introduction.md) for a background. ## Interface LEGO® @@ -103,10 +104,11 @@ The `description` is saved as Linux `ifalias` on an interface. It is a free-form string, useful for describing purpose or just adding comments for remote debugging, e.g., using the operational datastore. -> **Note:** there is no validation or safety checks performed by the -> system when using `custom-phys-address`. In particular the `offset` -> variant can be dangerous to use -- pay attention to the meaning of -> bits in the upper-most octet: local bit, multicast/group, etc. +> [!CAUTION] +> There is no validation or safety checks performed by the system when +> using `custom-phys-address`. In particular the `offset` variant can +> be dangerous to use -- pay attention to the meaning of bits in the +> upper-most octet: local bit, multicast/group, etc. #### Fixed custom MAC @@ -167,11 +169,14 @@ admin@example:/config/> leave Here we add two ports to bridge `br0`: `eth0` and `eth1`. -> **Note:** Infix has many built-in helpers controlled by convention. -> E.g., if you name your bridge `brN`, where `N` is a number, Infix sets -> the interface type automatically and unlocks all bridge features. -> Other "magic" names are `ethN.M` for VLAN M on top of `ethN`, or -> `dockerN` to create an IP masquerading container bridge. +> [!TIP] +> Infix has many built-in helpers controlled by convention. Example, +> naming your bridge `brN`, where `N` is a number, hints Infix to set +> interface type automatically and unlocks all bridge features. Other +> "magic" names are `vethNA`, where `N` is a number and `A` is a letter +> ('a' for access port and 'b' for bridge side is common), and `ethN.M` +> for VLAN M on top of `ethN`, or `dockerN` to create an IP masquerading +> container bridge. ![A MAC bridge with two ports](img/mac-bridge.svg) @@ -214,6 +219,7 @@ admin@example:/config/interface/br0/> set bridge vlans vlan 10 tagged br0 admin@example:/config/interface/br0/> set bridge vlans vlan 20 tagged br0 ``` +> [!NOTE] > To route or to manage via a VLAN, a VLAN interface needs to be created > on top of the bridge, see section [VLAN Interfaces](#vlan-interfaces) > below for more on this topic. @@ -227,10 +233,11 @@ also supports "snooping", i.e., IGMP and MLD, to automatically reduce the broadcast effects of multicast. See the next section for a summary of the [terminology used](#terminology--abbreviations). -> **Note:** currently there is no way to just enable multicast filtering -> without also enabling snooping. This may change in the future, in -> which case a `filtering` enabled setting will be made available along -> with the existing `snooping` setting. +> [!IMPORTANT] +> Currently there is no way to just enable multicast filtering without +> also enabling snooping. This may change in the future, in which case +> a `filtering` enabled setting will be made available along with the +> existing `snooping` setting. When creating your bridge you must decide if you need a VLAN filtering bridge or a plain bridge (see previous section). Multicast filtering is @@ -370,16 +377,23 @@ an IGMP/MLD fast-leave port. tables shown above, a *None* timeout is declared when the current device is the active querier -> **Note:** the reason why multicast flooding is enabled by default is -> to ensure safe co-existence with MAC multicast, which is very common -> in industrial networks. It also allows end devices that do not know -> of IGMP/MLD to communicate over multicast as long as the group they -> have chosen is not used by other IGMP/MLD aware devices on the LAN. +> [!TIP] +> The reason why multicast flooding is enabled by default is to ensure +> safe co-existence with MAC multicast, which is common in industrial +> networks. It also allows end devices that do not know of IGMP/MLD to +> communicate over multicast as long as the group they have chosen is +> not used by other IGMP/MLD aware devices on the LAN. > > As soon as an IGMP/MLD membership report to "join" a group is received -> the group is added to the MDB and forwarding to other ports stop. The -> only exception to this rule is multicast router ports. +> the group is added to the kernel MDB and forwarding to other ports +> stop. The only exception to this rule is multicast router ports. +> +> If your MAC multicast forwarding is not working properly, it may be +> because an IP multicast group maps to the same MAC address. Please +> see [RFC 1112][RFC1112] for details. Use static multicast router +> ports, or static multicast MAC filters, to mitigate. +[RFC1112]: https://www.rfc-editor.org/rfc/rfc1112.html [RFC3376]: https://www.rfc-editor.org/rfc/rfc3376.html [RFC3810]: https://www.rfc-editor.org/rfc/rfc3810.html @@ -510,9 +524,10 @@ Auto-negotiation of speed/duplex mode is desired in almost all use-cases, but it is possible to disable auto-negotiation and specify a fixed speed and duplex mode. -> If setting a fixed speed and duplex mode, ensure both sides of the -> link have matching configuration. If speed does not match, the link -> will not come up. If duplex mode does not match, the result is +> [!IMPORTANT] +> When setting a fixed speed and duplex mode, ensure both sides of the +> link have matching configuration. If speed does not match, the link +> will not come up. If duplex mode does not match, the result is > reported collisions and/or bad throughput. The example below configures port eth3 to fixed speed 100 Mbit/s @@ -605,9 +620,10 @@ interfaces { admin@example:/config/> ``` -> **Note:** this is another example of the automatic inference of the -> interface type from the name. Any name can be used, but then you have -> to set the interface type to `veth` manually. +> [!TIP] +> This is another example of the automatic inference of the interface +> type from the name. Any name can be used, but then you have to set +> the interface type to `veth` manually. ## Management Plane @@ -629,7 +645,8 @@ Multiple address assignment methods are available: | link-local | infix-ip | Auto-assignment of IPv4 address in 169.254.x.x/16 range | | dhcp | infix-dhcp-client | Assignment of IPv4 address by DHCP server, e.g., *10.0.1.1/24* | -> **Note:** DHCP address method is only available for *LAN* interfaces +> [!NOTE] +> The DHCP address method is only available for *LAN* interfaces > (Ethernet, virtual Ethernet (veth), bridge, link aggregates, etc.) Supported DHCP (request) options, configurability (Cfg) and defaults, @@ -663,9 +680,10 @@ client is not enabled, any NTP servers provided by the DHCP server will be ignored. For details on how to enable the NTP client, see the [NTP Client Configuration](system.md#ntp-client-configuration) section. -> **Note:** as per [RFC3442][4], if the DHCP server returns both a -> Classless Static Routes option (121) and Router option (3), the -> DHCP client *must* ignore the latter. +> [!IMPORTANT] +> Per [RFC3442][4], if the DHCP server returns both a Classless Static +> Routes option (121) and Router option (3), the DHCP client *must* +> ignore the latter. ### IPv6 Address Assignment @@ -932,9 +950,10 @@ The base model, ietf-routing, is where all the other models hook in. It is used to set configuration and read operational status (RIB tables) in the other models. -> **Note:** the standard IETF routing models allows multiple instances, -> but Infix currently *only support one instance* per routing protocol! -> In the examples presented here, the instance name `default` is used. +> [!NOTE] +> The standard IETF routing models allows multiple instances, but Infix +> currently *only support one instance* per routing protocol! In the +> examples presented here, the instance name `default` is used. ### IPv4 Static routes @@ -960,6 +979,7 @@ router 192.168.1.1, using the highest possible distance: admin@example:/config/routing/control-plane-protocol/static/name/default/> leave admin@example:/> +> [!TIP] > Remember to enable [IPv4 forwarding](#IPv4-forwarding) for the > interfaces you want to route between. @@ -983,6 +1003,7 @@ enable OSPF and set one active interface in area 0: admin@example:/config/routing/control-plane-protocol/ospfv2/name/default/> leave admin@example:/> +> [!TIP] > Remember to enable [IPv4 forwarding](#IPv4-forwarding) for all the > interfaces you want to route between. @@ -1171,10 +1192,11 @@ different next-hop, learned from a DHCP server wins over an OSPF route. The distance used for static routes and DHCP routes can be changed by setting a different *routing preference* value. -> **Note:** the kernel metric is an unsigned 32-bit value, which is read -> by Frr as (upper) 8 bits distance and 24 bits metric. But it does not -> write it back to the kernel FIB this way, only selected routes are -> candidates to be installed in the FIB by Frr. +> [!NOTE] +> The kernel metric is an unsigned 32-bit value, which is read by Frr as +> (upper) 8 bits distance and 24 bits metric. But it does not write it +> back to the kernel FIB this way, only selected routes are candidates +> to be installed in the FIB by Frr. #### Source protocol From 1014d1ce2994945a30d45c5876275e0b5df1a7a6 Mon Sep 17 00:00:00 2001 From: Joachim Wiberg Date: Sat, 16 Nov 2024 22:37:49 +0100 Subject: [PATCH 21/21] doc: update ChangeLog for v24.11 Signed-off-by: Joachim Wiberg --- doc/ChangeLog.md | 63 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/doc/ChangeLog.md b/doc/ChangeLog.md index 148f69e6b..f615feceb 100644 --- a/doc/ChangeLog.md +++ b/doc/ChangeLog.md @@ -4,6 +4,49 @@ Change Log All notable changes to the project are documented in this file. +[v24.11.0][UNRELEASED] +------------------------- + +> [!CAUTION] +> This release contains breaking changes for container users! As of +> v24.11.0, all persistent[^1] containers always run in `read-only` mode +> and the setting itself is deprecated (kept only for compatibility +> reasons). The main reason for this change is to better serve users +> with embedded container images in their builds of Infix. I.e., they +> can now upgrade the OCI image in their build and rely on the container +> being automatically upgraded when Infix is upgraded, issue #823. For +> other users, the benefit is that *all* container configuration changes +> take when activated, issue #822, without having to perform any tricks. + +### Changes + + - Add validation of interface name lengths, (2..15), Linux limit + - Add support for ftp/http/https URI:s in container image, with a new + `checksum` setting for SHA256 verification, issue #801 + - Add a retry timer to the background container create service. This + will ensure failing `docker pull` operations from remote images are + retrying after 60 seconds, or quicker + - CLI base component, `klish`, has been updated with better support for + raw terminal mode and alternate quotes (' in addition to ") + - Log silenced from container activation messages, only the very bare + necessities are now logged, e.g., `podman create` command + status + +### Fixes + + - Fix #659: paged output in CLI accessed via console port sometimes + causes lost lines, e.g. missing interfaces. With updated `klish` + and the terminal in raw mode, the pager (less) can now control both + the horizontal and vertical + - Fix #822: adding, or changing, an environment variable to a running + container does not take without the `container upgrade NAME` trick + - Fix #823: with an OCI image embedded in the Infix image, an existing + container in the configuration is not upgraded to the new OCI image + with the Infix upgrade. + +[^1]: I.e., set up in the configuration, as opposed to temporary ones + started with `container run` from the CLI admin-exec context. + + [v24.10.2][] - 2024-11-08 ------------------------- @@ -36,6 +79,7 @@ All notable changes to the project are documented in this file. - Styx: override iitod (LED daemon) with a product specific LED script ### Fixes + - Fix #685: DSA conduit interface not always detected, randomly causing major issues configuring systems with multiple switch cores - Fix #778: reactivate OpenSSL backend for libssh/libssh2 for NanoPI R2S. @@ -52,11 +96,13 @@ All notable changes to the project are documented in this file. ------------------------- ### Changes + - Add support for interface description, sometimes referred to as "ifAlias". Saved as an Linux interface alias (not `altname`), e.g., `/sys/class/interfaces/veth0a/ifalias`, includes operational support ### Fixes + - Fix #735: `copy` and `erase` commands missing from CLI, regression in Infix v24.10.0 defconfigs, now added as dep. in klish package @@ -71,7 +117,9 @@ Also, heads-up to all downstream users of Infix. YANG models have been renamed to ease maintenance, more info below. ### Changes + - Software control of port LEDs on the Styx platform has been disabled. + Default driver behavior, green link and green traffic blink, is kept as-is, which should mitigate issues reported in #670 - Correcting documentation on QoS. For packets containing both a VLAN @@ -107,6 +155,7 @@ renamed to ease maintenance, more info below. see ### Fixes + - Fix #499: add an NACM rule to factory-config, which by default deny everyone to read user password hash(es) - Fix #663: internal Ethernet interfaces shown in CLI tab completion @@ -155,6 +204,7 @@ also been added to facilitate site specific adaptations. Please see the documentation for details. ### Known Issues + - The CLI command `show interfaces` may for some terminal resolutions not display all interfaces (on systems with >20 interfaces). This problem is limited to the console port and only occurs for smaller @@ -163,6 +213,7 @@ documentation for details. using the CLI from an SSH session, is not affected. Issue #659 ### Changes + - Upgrade Buildroot to 2024.02.6 (LTS) - Upgrade Linux kernel to 6.6.52 (LTS) - Upgrade libyang to 3.4.2 @@ -182,6 +233,7 @@ documentation for details. by `mctl` reporting no multicast filtering enabled on bridge ### Fixes + - Fix #357: EUI-64 based IPv6 autoconf address on bridges seem to be randomized. Problem caused by kernel setting a random MAC before any bridge port is added. Fixed by using the device's base MAC address on @@ -244,6 +296,7 @@ Finally, the following consumer boards are now fully supported: - StarFive VisionFive2 (RISC-V) ### Changes + - Upgrade Buildroot to 2024.02.5 (LTS) - Upgrade Linux kernel to 6.6.46 (LTS) - Issue #158: enhance security of factory reset. All file content @@ -295,6 +348,7 @@ Finally, the following consumer boards are now fully supported: log messages. See `/var/log/debug` for *all* log messages ### Fixes + - Fix #274: add missing link/traffic LEDs on NanoPi R2S LAN port - Fix #489: ensure all patches are versioned, including Linux kernel - Fix #531: creating a new VLAN interface named `vlanN` should not set @@ -324,6 +378,7 @@ Finally, the following consumer boards are now fully supported: > upgrade, but before reboot, a factory reset is required! ### Changes + - Upgrade Buildroot to 2024.02.3 (LTS) - Upgrade Linux kernel to 6.6.34 (LTS) - Upgrade bundled curiOS httpd container to v24.05.0 @@ -390,6 +445,7 @@ Finally, the following consumer boards are now fully supported: [yescrypt]: https://en.wikipedia.org/wiki/Yescrypt ### Fixes + - Fix #424: regression, root user can log in without password - Fix build regressions in `cn9130_crb_boot_defconfig` caused by upgrade to Buildroot v2024.02 and recent multi-key support in RAUC and U-Boot @@ -421,11 +477,13 @@ Finally, the following consumer boards are now fully supported: ------------------------- ### Changes + - Add small delay in U-Boot to allow stopping boot on reference boards - Document how to provision the bootloader and Infix on a blank board - Use initial hostname from `/etc/os-release` as configuration fallback ### Fixes + - Fix build regressions in `cn9130_crb_boot_defconfig` caused by upgrade to Buildroot v2024.02 and recent multi-key support in RAUC and U-Boot - Fix provisioning script after changes to make GRUB loading more robust @@ -440,6 +498,7 @@ Finally, the following consumer boards are now fully supported: ------------------------- ### Changes + - Default web landing page refactored into a Buildroot package to make it possible to overload from customer repos. - Enable DCB support in aarch64 kernel (for EtherType prio override) @@ -450,6 +509,7 @@ Finally, the following consumer boards are now fully supported: - Issue #374: add timestamps to dagger .log files ### Fixes + - Add missing LICENSE hash for factory reset tool - Fix #424: regression, root user can log in without password @@ -471,6 +531,7 @@ idea is to generate supported features from the models and include in future releases. ### Changes + - Bump the base Buildroot version to v2024.02 LTS - Bump the base Linux kernel version to 6.6 LTS - Drop Classic variant to reduce overhead, simplify build & release @@ -551,6 +612,7 @@ future releases. named 'default' ### Fixes + - confd: Fix memory leak when operating on candidate configuration - probe: Fix crash on systems without USB - Reduced syslog errors for accesses no non-existing xpaths @@ -1294,6 +1356,7 @@ Supported YANG models in addition to those used by sysrepo and netopeer: [buildroot]: https://buildroot.org/ [UNRELEASED]: https://github.com/kernelkit/infix/compare/v24.10.1...HEAD +[v24.11.0]: https://github.com/kernelkit/infix/compare/v24.10.2...v24.11.0 [v24.10.2]: https://github.com/kernelkit/infix/compare/v24.10.1...v24.10.2 [v24.10.1]: https://github.com/kernelkit/infix/compare/v24.10.0...v24.10.1 [v24.10.0]: https://github.com/kernelkit/infix/compare/v24.09.0...v24.10.0