diff --git a/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/0001-wait-online-set-any-by-default.patch b/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/0001-wait-online-set-any-by-default.patch new file mode 100644 index 00000000000..7171955041f --- /dev/null +++ b/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/0001-wait-online-set-any-by-default.patch @@ -0,0 +1,32 @@ +From 98cbd0a4576464478f0f9fcd2066efc08bef9491 Mon Sep 17 00:00:00 2001 +From: David Michael +Date: Tue, 16 Apr 2019 02:44:51 +0000 +Subject: [PATCH 1/8] wait-online: set --any by default + +The systemd-networkd-wait-online command would normally continue +waiting after a network interface is usable if other interfaces are +still configuring. There is a new flag --any to change this. + +Preserve previous Container Linux behavior for compatibility by +setting the --any flag by default. See patches from v241 (or +earlier) for the original implementation. +--- + src/network/wait-online/wait-online.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/src/network/wait-online/wait-online.c b/src/network/wait-online/wait-online.c +index 5328bba2d8..95294df607 100644 +--- a/src/network/wait-online/wait-online.c ++++ b/src/network/wait-online/wait-online.c +@@ -21,7 +21,7 @@ static Hashmap *arg_interfaces = NULL; + static char **arg_ignore = NULL; + static LinkOperationalStateRange arg_required_operstate = { _LINK_OPERSTATE_INVALID, _LINK_OPERSTATE_INVALID }; + static AddressFamily arg_required_family = ADDRESS_FAMILY_NO; +-static bool arg_any = false; ++static bool arg_any = true; + + STATIC_DESTRUCTOR_REGISTER(arg_interfaces, hashmap_free_free_freep); + STATIC_DESTRUCTOR_REGISTER(arg_ignore, strv_freep); +-- +2.34.1 + diff --git a/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/0002-networkd-default-to-kernel-IPForwarding-setting.patch b/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/0002-networkd-default-to-kernel-IPForwarding-setting.patch new file mode 100644 index 00000000000..de0955b8018 --- /dev/null +++ b/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/0002-networkd-default-to-kernel-IPForwarding-setting.patch @@ -0,0 +1,24 @@ +From e3fd50ec704b5d48e9d756c1cc5c40e72b7d1fa4 Mon Sep 17 00:00:00 2001 +From: Nick Owens +Date: Tue, 2 Jun 2015 18:22:32 -0700 +Subject: [PATCH 2/8] networkd: default to "kernel" IPForwarding setting + +--- + src/network/networkd-network.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/src/network/networkd-network.c b/src/network/networkd-network.c +index dcd3e5ae12..2ae481d1ec 100644 +--- a/src/network/networkd-network.c ++++ b/src/network/networkd-network.c +@@ -461,6 +461,7 @@ int network_load_one(Manager *manager, OrderedHashmap **networks, const char *fi + .link_local = _ADDRESS_FAMILY_INVALID, + .ipv6ll_address_gen_mode = _IPV6_LINK_LOCAL_ADDRESS_GEN_MODE_INVALID, + ++ .ip_forward = _ADDRESS_FAMILY_INVALID, + .ipv4_accept_local = -1, + .ipv4_route_localnet = -1, + .ipv6_privacy_extensions = _IPV6_PRIVACY_EXTENSIONS_INVALID, +-- +2.34.1 + diff --git a/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/0003-needs-update-don-t-require-strictly-newer-usr.patch b/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/0003-needs-update-don-t-require-strictly-newer-usr.patch new file mode 100644 index 00000000000..400cb96e05d --- /dev/null +++ b/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/0003-needs-update-don-t-require-strictly-newer-usr.patch @@ -0,0 +1,58 @@ +From 0be1b5367c24427e3285d33fb87aa4acdf3c4dce Mon Sep 17 00:00:00 2001 +From: Alex Crawford +Date: Wed, 2 Mar 2016 10:46:33 -0800 +Subject: [PATCH 3/8] needs-update: don't require strictly newer usr + +Updates should be triggered whenever usr changes, not only when it is newer. +--- + man/systemd-update-done.service.xml | 2 +- + src/shared/condition.c | 6 +++--- + 2 files changed, 4 insertions(+), 4 deletions(-) + +diff --git a/man/systemd-update-done.service.xml b/man/systemd-update-done.service.xml +index 3393010ff6..5478baca25 100644 +--- a/man/systemd-update-done.service.xml ++++ b/man/systemd-update-done.service.xml +@@ -50,7 +50,7 @@ + ConditionNeedsUpdate= (see + systemd.unit5) + condition to make sure to run when /etc/ or +- /var/ are older than /usr/ ++ /var/ aren't the same age as /usr/ + according to the modification times of the files described above. + This requires that updates to /usr/ are always + followed by an update of the modification time of +diff --git a/src/shared/condition.c b/src/shared/condition.c +index d3446e8a9d..3f7cc9ea58 100644 +--- a/src/shared/condition.c ++++ b/src/shared/condition.c +@@ -793,7 +793,7 @@ static int condition_test_needs_update(Condition *c, char **env) { + * First, compare seconds as they are always accurate... + */ + if (usr.st_mtim.tv_sec != other.st_mtim.tv_sec) +- return usr.st_mtim.tv_sec > other.st_mtim.tv_sec; ++ return true; + + /* + * ...then compare nanoseconds. +@@ -804,7 +804,7 @@ static int condition_test_needs_update(Condition *c, char **env) { + * (otherwise the filesystem supports nsec timestamps, see stat(2)). + */ + if (usr.st_mtim.tv_nsec == 0 || other.st_mtim.tv_nsec > 0) +- return usr.st_mtim.tv_nsec > other.st_mtim.tv_nsec; ++ return usr.st_mtim.tv_nsec != other.st_mtim.tv_nsec; + + _cleanup_free_ char *timestamp_str = NULL; + r = parse_env_file(NULL, p, "TIMESTAMP_NSEC", ×tamp_str); +@@ -824,7 +824,7 @@ static int condition_test_needs_update(Condition *c, char **env) { + return true; + } + +- return timespec_load_nsec(&usr.st_mtim) > timestamp; ++ return timespec_load_nsec(&usr.st_mtim) != timestamp; + } + + static bool in_first_boot(void) { +-- +2.34.1 + diff --git a/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/0004-core-use-max-for-DefaultTasksMax.patch b/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/0004-core-use-max-for-DefaultTasksMax.patch new file mode 100644 index 00000000000..a25e8f17ac3 --- /dev/null +++ b/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/0004-core-use-max-for-DefaultTasksMax.patch @@ -0,0 +1,64 @@ +From d21ebfcf17ffc1dba635389193f10d2b93eba730 Mon Sep 17 00:00:00 2001 +From: Adrian Vladu +Date: Fri, 16 Feb 2024 11:22:08 +0000 +Subject: [PATCH 4/8] core: use max for DefaultTasksMax + +Since systemd v228, systemd has a DefaultTasksMax which defaulted +to 512, later 15% of the system's maximum number of PIDs. This +limit is low and a change in behavior that people running services +in containers will hit frequently, so revert to previous behavior. + +Though later the TasksMax was changed in the a dynamic property to +accommodate stale values. + +This change is built on previous patch by David Michael(dm0-). + +Signed-off-by: Adrian Vladu +--- + man/systemd-system.conf.xml | 2 +- + src/core/manager.c | 2 +- + src/core/system.conf.in | 2 +- + 3 files changed, 3 insertions(+), 3 deletions(-) + +diff --git a/man/systemd-system.conf.xml b/man/systemd-system.conf.xml +index 3c06b65f93..71f38692b6 100644 +--- a/man/systemd-system.conf.xml ++++ b/man/systemd-system.conf.xml +@@ -501,7 +501,7 @@ + Configure the default value for the per-unit TasksMax= setting. See + systemd.resource-control5 + for details. This setting applies to all unit types that support resource control settings, with the exception +- of slice units. Defaults to 15% of the minimum of kernel.pid_max=, kernel.threads-max= ++ of slice units. Defaults to 100% of the minimum of kernel.pid_max=, kernel.threads-max= + and root cgroup pids.max. + Kernel has a default value for kernel.pid_max= and an algorithm of counting in case of more than 32 cores. + For example, with the default kernel.pid_max=, DefaultTasksMax= defaults to 4915, +diff --git a/src/core/manager.c b/src/core/manager.c +index 88eebfc626..8992c8c3e3 100644 +--- a/src/core/manager.c ++++ b/src/core/manager.c +@@ -114,7 +114,7 @@ + /* How many units and jobs to process of the bus queue before returning to the event loop. */ + #define MANAGER_BUS_MESSAGE_BUDGET 100U + +-#define DEFAULT_TASKS_MAX ((CGroupTasksMax) { 15U, 100U }) /* 15% */ ++#define DEFAULT_TASKS_MAX ((CGroupTasksMax) { 100U, 100U }) /* 15% */ + + static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata); + static int manager_dispatch_cgroups_agent_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata); +diff --git a/src/core/system.conf.in b/src/core/system.conf.in +index 05eb681270..94d0365244 100644 +--- a/src/core/system.conf.in ++++ b/src/core/system.conf.in +@@ -58,7 +58,7 @@ + #DefaultIPAccounting=no + #DefaultMemoryAccounting={{ 'yes' if MEMORY_ACCOUNTING_DEFAULT else 'no' }} + #DefaultTasksAccounting=yes +-#DefaultTasksMax=15% ++#DefaultTasksMax=100% + #DefaultLimitCPU= + #DefaultLimitFSIZE= + #DefaultLimitDATA= +-- +2.34.1 + diff --git a/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/0005-systemd-Disable-SELinux-permissions-checks.patch b/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/0005-systemd-Disable-SELinux-permissions-checks.patch new file mode 100644 index 00000000000..648e3fa3082 --- /dev/null +++ b/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/0005-systemd-Disable-SELinux-permissions-checks.patch @@ -0,0 +1,29 @@ +From 374cca5b2f9aea1c506352cf58b09db5c216a0d3 Mon Sep 17 00:00:00 2001 +From: Matthew Garrett +Date: Tue, 20 Dec 2016 16:43:22 +0000 +Subject: [PATCH 5/8] systemd: Disable SELinux permissions checks + +We don't care about the interaction between systemd and SELinux policy, so +let's just disable these checks rather than having to incorporate policy +support. This has no impact on our SELinux use-case, which is purely intended +to limit containers and not anything running directly on the host. +--- + src/core/selinux-access.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/src/core/selinux-access.c b/src/core/selinux-access.c +index 62181a6309..448f9211d6 100644 +--- a/src/core/selinux-access.c ++++ b/src/core/selinux-access.c +@@ -2,7 +2,7 @@ + + #include "selinux-access.h" + +-#if HAVE_SELINUX ++#if 0 + + #include + #include +-- +2.34.1 + diff --git a/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/0006-Revert-getty-Pass-tty-to-use-by-agetty-via-stdin.patch b/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/0006-Revert-getty-Pass-tty-to-use-by-agetty-via-stdin.patch new file mode 100644 index 00000000000..7baa379d346 --- /dev/null +++ b/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/0006-Revert-getty-Pass-tty-to-use-by-agetty-via-stdin.patch @@ -0,0 +1,95 @@ +From bffb2a48796a2736d7fb7328d2a88b1cbb812b12 Mon Sep 17 00:00:00 2001 +From: Sayan Chowdhury +Date: Fri, 16 Dec 2022 16:28:26 +0530 +Subject: [PATCH 6/8] Revert "getty: Pass tty to use by agetty via stdin" + +This reverts commit b4bf9007cbee7dc0b1356897344ae2a7890df84c. + +This is to work around a SELinux denial that happens when setting up standard +input for serial consoles (which is used for SSH connections). + +Signed-off-by: Sayan Chowdhury +--- + units/console-getty.service.in | 4 +--- + units/container-getty@.service.in | 4 +--- + units/getty@.service.in | 4 +--- + units/serial-getty@.service.in | 4 +--- + 4 files changed, 4 insertions(+), 12 deletions(-) + +diff --git a/units/console-getty.service.in b/units/console-getty.service.in +index d64112be5e..b908708d8c 100644 +--- a/units/console-getty.service.in ++++ b/units/console-getty.service.in +@@ -22,12 +22,10 @@ ConditionPathExists=/dev/console + [Service] + # The '-o' option value tells agetty to replace 'login' arguments with an option to preserve environment (-p), + # followed by '--' for safety, and then the entered username. +-ExecStart=-/sbin/agetty -o '-p -- \\u' --noclear --keep-baud - 115200,38400,9600 $TERM ++ExecStart=-/sbin/agetty -o '-p -- \\u' --noclear --keep-baud console 115200,38400,9600 $TERM + Type=idle + Restart=always + UtmpIdentifier=cons +-StandardInput=tty +-StandardOutput=tty + TTYPath=/dev/console + TTYReset=yes + TTYVHangup=yes +diff --git a/units/container-getty@.service.in b/units/container-getty@.service.in +index 8847d735fb..8be25663f5 100644 +--- a/units/container-getty@.service.in ++++ b/units/container-getty@.service.in +@@ -27,13 +27,11 @@ Before=rescue.service + [Service] + # The '-o' option value tells agetty to replace 'login' arguments with an option to preserve environment (-p), + # followed by '--' for safety, and then the entered username. +-ExecStart=-/sbin/agetty -o '-p -- \\u' --noclear - $TERM ++ExecStart=-/sbin/agetty -o '-p -- \\u' --noclear --keep-baud pts/%I 115200,38400,9600 $TERM + Type=idle + Restart=always + RestartSec=0 + UtmpIdentifier=pts/%I +-StandardInput=tty +-StandardOutput=tty + TTYPath=/dev/pts/%I + TTYReset=yes + TTYVHangup=yes +diff --git a/units/getty@.service.in b/units/getty@.service.in +index 80b8f3e922..b57666c123 100644 +--- a/units/getty@.service.in ++++ b/units/getty@.service.in +@@ -38,13 +38,11 @@ ConditionPathExists=/dev/tty0 + # The '-o' option value tells agetty to replace 'login' arguments with an + # option to preserve environment (-p), followed by '--' for safety, and then + # the entered username. +-ExecStart=-/sbin/agetty -o '-p -- \\u' --noclear - $TERM ++ExecStart=-/sbin/agetty -o '-p -- \\u' --noclear %I $TERM + Type=idle + Restart=always + RestartSec=0 + UtmpIdentifier=%I +-StandardInput=tty +-StandardOutput=tty + TTYPath=/dev/%I + TTYReset=yes + TTYVHangup=yes +diff --git a/units/serial-getty@.service.in b/units/serial-getty@.service.in +index 6bf101eac9..479b8759a9 100644 +--- a/units/serial-getty@.service.in ++++ b/units/serial-getty@.service.in +@@ -33,12 +33,10 @@ Before=rescue.service + # The '-o' option value tells agetty to replace 'login' arguments with an + # option to preserve environment (-p), followed by '--' for safety, and then + # the entered username. +-ExecStart=-/sbin/agetty -o '-p -- \\u' --keep-baud 115200,57600,38400,9600 - $TERM ++ExecStart=-/sbin/agetty -o '-p -- \\u' --keep-baud 115200,57600,38400,9600 %I $TERM + Type=idle + Restart=always + UtmpIdentifier=%I +-StandardInput=tty +-StandardOutput=tty + TTYPath=/dev/%I + TTYReset=yes + TTYVHangup=yes +-- +2.34.1 + diff --git a/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/0007-units-Keep-using-old-journal-file-format.patch b/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/0007-units-Keep-using-old-journal-file-format.patch new file mode 100644 index 00000000000..8471991893d --- /dev/null +++ b/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/0007-units-Keep-using-old-journal-file-format.patch @@ -0,0 +1,42 @@ +From 6a4c6f97742afc9ca5de40335b2d041095990aa2 Mon Sep 17 00:00:00 2001 +From: Adrian Vladu +Date: Fri, 16 Feb 2024 11:29:04 +0000 +Subject: [PATCH 7/8] units: Keep using old journal file format + +Systemd 252 made an incompatible change in journal file format. Temporarily +force journald to use the old journal format to give logging containers more +time to adapt to the new format. + +Signed-off-by: Adrian Vladu +--- + units/systemd-journald.service.in | 1 + + units/systemd-journald@.service.in | 1 + + 2 files changed, 2 insertions(+) + +diff --git a/units/systemd-journald.service.in b/units/systemd-journald.service.in +index 37eeabc510..e5030a81bd 100644 +--- a/units/systemd-journald.service.in ++++ b/units/systemd-journald.service.in +@@ -27,6 +27,7 @@ IgnoreOnIsolate=yes + + [Service] + DeviceAllow=char-* rw ++Environment=SYSTEMD_JOURNAL_COMPACT=0 + ExecStart={{LIBEXECDIR}}/systemd-journald + FileDescriptorStoreMax=4224 + IPAddressDeny=any +diff --git a/units/systemd-journald@.service.in b/units/systemd-journald@.service.in +index c3bcb08533..8780783cf6 100644 +--- a/units/systemd-journald@.service.in ++++ b/units/systemd-journald@.service.in +@@ -21,6 +21,7 @@ Conflicts=soft-reboot.target + [Service] + CapabilityBoundingSet=CAP_SYS_ADMIN CAP_DAC_OVERRIDE CAP_SYS_PTRACE CAP_CHOWN CAP_DAC_READ_SEARCH CAP_FOWNER CAP_SETUID CAP_SETGID CAP_MAC_OVERRIDE + DevicePolicy=closed ++Environment=SYSTEMD_JOURNAL_COMPACT=0 + ExecStart={{LIBEXECDIR}}/systemd-journald %i + FileDescriptorStoreMax=4224 + Group=systemd-journal +-- +2.34.1 + diff --git a/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/0008-sysext-Mutable-overlays.patch b/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/0008-sysext-Mutable-overlays.patch new file mode 100644 index 00000000000..f6fb957cd2f --- /dev/null +++ b/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/0008-sysext-Mutable-overlays.patch @@ -0,0 +1,1310 @@ +From b3e3257bfa07ae9ff63f5a139a1f7b72353a456b Mon Sep 17 00:00:00 2001 +From: Krzesimir Nowak +Date: Mon, 22 Apr 2024 16:43:38 +0200 +Subject: [PATCH 8/8] sysext: Mutable overlays + +--- + src/basic/path-util.c | 12 + + src/basic/path-util.h | 3 +- + src/shared/mount-util.c | 10 + + src/shared/mount-util.h | 1 + + src/sysext/sysext.c | 954 +++++++++++++++++++++++++++++++++++++--- + 5 files changed, 913 insertions(+), 67 deletions(-) + +diff --git a/src/basic/path-util.c b/src/basic/path-util.c +index 6810bf66aa..b21c7b66a3 100644 +--- a/src/basic/path-util.c ++++ b/src/basic/path-util.c +@@ -525,6 +525,18 @@ int path_compare_filename(const char *a, const char *b) { + return strcmp(fa, fb); + } + ++int path_equal_or_inode_same_full(const char *a, const char *b, int flags) { ++ /* Returns true if paths are of the same entry, false if not, <0 on error. */ ++ ++ if (path_equal(a, b)) ++ return 1; ++ ++ if (!a || !b) ++ return 0; ++ ++ return inode_same(a, b, flags); ++} ++ + char* path_extend_internal(char **x, ...) { + size_t sz, old_sz; + char *q, *nx; +diff --git a/src/basic/path-util.h b/src/basic/path-util.h +index 6d943e967f..19d42c56bc 100644 +--- a/src/basic/path-util.h ++++ b/src/basic/path-util.h +@@ -68,8 +68,9 @@ static inline bool path_equal_filename(const char *a, const char *b) { + return path_compare_filename(a, b) == 0; + } + ++int path_equal_or_inode_same_full(const char *a, const char *b, int flags); + static inline bool path_equal_or_inode_same(const char *a, const char *b, int flags) { +- return path_equal(a, b) || inode_same(a, b, flags) > 0; ++ return path_equal_or_inode_same_full(a, b, flags) > 0; + } + + char* path_extend_internal(char **x, ...); +diff --git a/src/shared/mount-util.c b/src/shared/mount-util.c +index 4f2acce513..dd9a995fb6 100644 +--- a/src/shared/mount-util.c ++++ b/src/shared/mount-util.c +@@ -453,6 +453,16 @@ int bind_remount_one_with_mountinfo( + return 0; + } + ++int bind_remount_one(const char *path, unsigned long new_flags, unsigned long flags_mask) { ++ _cleanup_fclose_ FILE *proc_self_mountinfo = NULL; ++ ++ proc_self_mountinfo = fopen("/proc/self/mountinfo", "re"); ++ if (!proc_self_mountinfo) ++ return log_debug_errno(errno, "Failed to open /proc/self/mountinfo: %m"); ++ ++ return bind_remount_one_with_mountinfo(path, new_flags, flags_mask, proc_self_mountinfo); ++} ++ + static int mount_switch_root_pivot(int fd_newroot, const char *path) { + assert(fd_newroot >= 0); + assert(path); +diff --git a/src/shared/mount-util.h b/src/shared/mount-util.h +index ef31104900..679c94c950 100644 +--- a/src/shared/mount-util.h ++++ b/src/shared/mount-util.h +@@ -26,6 +26,7 @@ static inline int bind_remount_recursive(const char *prefix, unsigned long new_f + } + + int bind_remount_one_with_mountinfo(const char *path, unsigned long new_flags, unsigned long flags_mask, FILE *proc_self_mountinfo); ++int bind_remount_one(const char *path, unsigned long new_flags, unsigned long flags_mask); + + int mount_switch_root_full(const char *path, unsigned long mount_propagation_flag, bool force_ms_move); + static inline int mount_switch_root(const char *path, unsigned long mount_propagation_flag) { +diff --git a/src/sysext/sysext.c b/src/sysext/sysext.c +index 8dc515e4d5..7c364e5bc7 100644 +--- a/src/sysext/sysext.c ++++ b/src/sysext/sysext.c +@@ -39,15 +39,41 @@ + #include "pager.h" + #include "parse-argument.h" + #include "parse-util.h" ++#include "path-util.h" + #include "pretty-print.h" + #include "process-util.h" ++#include "rm-rf.h" + #include "sort-util.h" ++#include "string-table.h" ++#include "string-util.h" + #include "terminal-util.h" + #include "user-util.h" + #include "varlink.h" + #include "varlink-io.systemd.sysext.h" + #include "verbs.h" + ++typedef enum MutableMode { ++ MUTABLE_NO, ++ MUTABLE_YES, ++ MUTABLE_AUTO, ++ MUTABLE_IMPORT, ++ MUTABLE_EPHEMERAL, ++ MUTABLE_EPHEMERAL_IMPORT, ++ _MUTABLE_MAX, ++ _MUTABLE_INVALID = -EINVAL, ++} MutableMode; ++ ++static const char* const mutable_mode_table[_MUTABLE_MAX] = { ++ [MUTABLE_NO] = "no", ++ [MUTABLE_YES] = "yes", ++ [MUTABLE_AUTO] = "auto", ++ [MUTABLE_IMPORT] = "import", ++ [MUTABLE_EPHEMERAL] = "ephemeral", ++ [MUTABLE_EPHEMERAL_IMPORT] = "ephemeral-import", ++}; ++ ++DEFINE_PRIVATE_STRING_TABLE_LOOKUP_FROM_STRING_WITH_BOOLEAN(mutable_mode, MutableMode, MUTABLE_YES); ++ + static char **arg_hierarchies = NULL; /* "/usr" + "/opt" by default for sysext and /etc by default for confext */ + static char *arg_root = NULL; + static JsonFormatFlags arg_json_format_flags = JSON_FORMAT_OFF; +@@ -58,10 +84,13 @@ static bool arg_no_reload = false; + static int arg_noexec = -1; + static ImagePolicy *arg_image_policy = NULL; + static bool arg_varlink = false; ++static MutableMode arg_mutable = MUTABLE_NO; + + /* Is set to IMAGE_CONFEXT when systemd is called with the confext functionality instead of the default */ + static ImageClass arg_image_class = IMAGE_SYSEXT; + ++#define MUTABLE_EXTENSIONS_BASE_DIR "/var/lib/extensions.mutable" ++ + STATIC_DESTRUCTOR_REGISTER(arg_hierarchies, strv_freep); + STATIC_DESTRUCTOR_REGISTER(arg_root, freep); + STATIC_DESTRUCTOR_REGISTER(arg_image_policy, image_policy_freep); +@@ -75,6 +104,7 @@ static const struct { + const char *level_env; + const char *scope_env; + const char *name_env; ++ const char *mode_env; + const ImagePolicy *default_image_policy; + unsigned long default_mount_flags; + } image_class_info[_IMAGE_CLASS_MAX] = { +@@ -86,6 +116,7 @@ static const struct { + .level_env = "SYSEXT_LEVEL", + .scope_env = "SYSEXT_SCOPE", + .name_env = "SYSTEMD_SYSEXT_HIERARCHIES", ++ .mode_env = "SYSTEMD_SYSEXT_MUTABLE_MODE", + .default_image_policy = &image_policy_sysext, + .default_mount_flags = MS_RDONLY|MS_NODEV, + }, +@@ -97,11 +128,16 @@ static const struct { + .level_env = "CONFEXT_LEVEL", + .scope_env = "CONFEXT_SCOPE", + .name_env = "SYSTEMD_CONFEXT_HIERARCHIES", ++ .mode_env = "SYSTEMD_CONFEXT_MUTABLE_MODE", + .default_image_policy = &image_policy_confext, + .default_mount_flags = MS_RDONLY|MS_NODEV|MS_NOSUID|MS_NOEXEC, + } + }; + ++static int parse_mutable_mode(const char *p) { ++ return mutable_mode_from_string(p); ++} ++ + static int is_our_mount_point( + ImageClass image_class, + const char *p) { +@@ -150,7 +186,7 @@ static int is_our_mount_point( + return log_error_errno(r, "Failed to parse device major/minor stored in '%s/dev' file on '%s': %m", image_class_info[image_class].dot_directory_name, p); + + if (lstat(p, &st) < 0) +- return log_error_errno(r, "Failed to stat %s: %m", p); ++ return log_error_errno(errno, "Failed to stat %s: %m", p); + + if (st.st_dev != dev) { + log_debug("Hierarchy '%s' reports a different device major/minor than what we are seeing, assuming offline copy.", p); +@@ -248,11 +284,22 @@ static int unmerge_hierarchy( + ImageClass image_class, + const char *p) { + ++ _cleanup_free_ char *dot_dir = NULL, *work_dir_info_file = NULL; + int r; + + assert(p); + ++ dot_dir = path_join(p, image_class_info[image_class].dot_directory_name); ++ if (!dot_dir) ++ return log_oom(); ++ ++ work_dir_info_file = path_join(dot_dir, "work_dir"); ++ if (!work_dir_info_file) ++ return log_oom(); ++ + for (;;) { ++ _cleanup_free_ char *escaped_work_dir_in_root = NULL, *work_dir = NULL; ++ + /* We only unmount /usr/ if it is a mount point and really one of ours, in order not to break + * systems where /usr/ is a mount point of its own already. */ + +@@ -262,9 +309,40 @@ static int unmerge_hierarchy( + if (r == 0) + break; + ++ r = read_one_line_file(work_dir_info_file, &escaped_work_dir_in_root); ++ if (r < 0) { ++ if (r != -ENOENT) ++ return log_error_errno(r, "Failed to read '%s': %m", work_dir_info_file); ++ } else { ++ _cleanup_free_ char *work_dir_in_root = NULL; ++ ssize_t l; ++ ++ l = cunescape_length(escaped_work_dir_in_root, r, 0, &work_dir_in_root); ++ if (l < 0) ++ return log_error_errno(l, "Failed to unescape work directory path: %m"); ++ work_dir = path_join(arg_root, work_dir_in_root); ++ if (!work_dir) ++ return log_oom(); ++ } ++ ++ r = umount_verbose(LOG_DEBUG, dot_dir, MNT_DETACH|UMOUNT_NOFOLLOW); ++ if (r < 0) { ++ /* EINVAL is possibly "not a mount point". Let it slide as it's expected to occur if ++ * the whole hierarchy was read-only, so the dot directory inside it was not ++ * bind-mounted as read-only. */ ++ if (r != -EINVAL) ++ return log_error_errno(r, "Failed to unmount '%s': %m", dot_dir); ++ } ++ + r = umount_verbose(LOG_ERR, p, MNT_DETACH|UMOUNT_NOFOLLOW); + if (r < 0) +- return log_error_errno(r, "Failed to unmount file system '%s': %m", p); ++ return r; ++ ++ if (work_dir) { ++ r = rm_rf(work_dir, REMOVE_ROOT | REMOVE_MISSING_OK | REMOVE_PHYSICAL); ++ if (r < 0) ++ return log_error_errno(r, "Failed to remove '%s': %m", work_dir); ++ } + + log_info("Unmerged '%s'.", p); + } +@@ -448,7 +526,7 @@ static int verb_status(int argc, char **argv, void *userdata) { + return log_oom(); + + if (stat(*p, &st) < 0) +- return log_error_errno(r, "Failed to stat() '%s': %m", *p); ++ return log_error_errno(errno, "Failed to stat() '%s': %m", *p); + + r = table_add_many( + t, +@@ -474,11 +552,38 @@ static int verb_status(int argc, char **argv, void *userdata) { + return ret; + } + ++static int append_overlayfs_path_option( ++ char **options, ++ const char *separator, ++ const char *option, ++ const char *path) { ++ ++ _cleanup_free_ char *escaped = NULL; ++ ++ assert(options); ++ assert(separator); ++ assert(path); ++ ++ escaped = shell_escape(path, ",:"); ++ if (!escaped) ++ return log_oom(); ++ ++ if (option) { ++ if (!strextend(options, separator, option, "=", escaped)) ++ return log_oom(); ++ } else if (!strextend(options, separator, escaped)) ++ return log_oom(); ++ ++ return 0; ++} ++ + static int mount_overlayfs( + ImageClass image_class, + int noexec, + const char *where, +- char **layers) { ++ char **layers, ++ const char *upper_dir, ++ const char *work_dir) { + + _cleanup_free_ char *options = NULL; + bool separator = false; +@@ -486,20 +591,16 @@ static int mount_overlayfs( + int r; + + assert(where); ++ assert((upper_dir && work_dir) || (!upper_dir && !work_dir)); + + options = strdup("lowerdir="); + if (!options) + return log_oom(); + + STRV_FOREACH(l, layers) { +- _cleanup_free_ char *escaped = NULL; +- +- escaped = shell_escape(*l, ",:"); +- if (!escaped) +- return log_oom(); +- +- if (!strextend(&options, separator ? ":" : "", escaped)) +- return log_oom(); ++ r = append_overlayfs_path_option(&options, separator ? ":" : "", NULL, *l); ++ if (r < 0) ++ return r; + + separator = true; + } +@@ -508,6 +609,22 @@ static int mount_overlayfs( + if (noexec >= 0) + SET_FLAG(flags, MS_NOEXEC, noexec); + ++ if (upper_dir && work_dir) { ++ r = append_overlayfs_path_option(&options, ",", "upperdir", upper_dir); ++ if (r < 0) ++ return r; ++ ++ flags &= ~MS_RDONLY; ++ ++ r = append_overlayfs_path_option(&options, ",", "workdir", work_dir); ++ if (r < 0) ++ return r; ++ /* redirect_dir=on and noatime prevent unnecessary upcopies, metacopy=off prevents broken ++ * files from partial upcopies after umount. */ ++ if (!strextend(&options, ",redirect_dir=on,noatime,metacopy=off")) ++ return log_oom(); ++ } ++ + /* Now mount the actual overlayfs */ + r = mount_nofollow_verbose(LOG_ERR, image_class_info[image_class].short_identifier, where, "overlay", flags, options); + if (r < 0) +@@ -516,62 +633,277 @@ static int mount_overlayfs( + return 0; + } + +-static int merge_hierarchy( +- ImageClass image_class, ++static char *hierarchy_as_single_path_component(const char *hierarchy) { ++ /* We normally expect hierarchy to be /usr, /opt or /etc, but for debugging purposes the hierarchy ++ * could very well be like /foo/bar/baz/. So for a given hierarchy we generate a directory name by ++ * stripping the leading and trailing separators and replacing the rest of separators with dots. This ++ * makes the generated name to be the same for /foo/bar/baz and for /foo/bar.baz, but, again, ++ * speciyfing a different hierarchy is a debugging feature, so non-unique mapping should not be an ++ * issue in general case. */ ++ const char *stripped = hierarchy; ++ _cleanup_free_ char *dir_name = NULL; ++ ++ assert(hierarchy); ++ ++ stripped += strspn(stripped, "/"); ++ ++ dir_name = strdup(stripped); ++ if (!dir_name) ++ return NULL; ++ delete_trailing_chars(dir_name, "/"); ++ string_replace_char(dir_name, '/', '.'); ++ return TAKE_PTR(dir_name); ++} ++ ++static int paths_on_same_fs(const char *path1, const char *path2) { ++ struct stat st1, st2; ++ ++ assert(path1); ++ assert(path2); ++ ++ if (stat(path1, &st1) < 0) ++ return log_error_errno(errno, "Failed to stat '%s': %m", path1); ++ ++ if (stat(path2, &st2) < 0) ++ return log_error_errno(errno, "Failed to stat '%s': %m", path2); ++ ++ return st1.st_dev == st2.st_dev; ++} ++ ++static int work_dir_for_hierarchy( + const char *hierarchy, +- int noexec, +- char **extensions, +- char **paths, +- const char *meta_path, +- const char *overlay_path) { ++ const char *resolved_upper_dir, ++ char **ret_work_dir) { ++ ++ _cleanup_free_ char *parent = NULL; ++ int r; ++ ++ assert(hierarchy); ++ assert(resolved_upper_dir); ++ assert(ret_work_dir); ++ ++ r = path_extract_directory(resolved_upper_dir, &parent); ++ if (r < 0) ++ return log_error_errno(r, "Failed to get parent directory of upperdir '%s': %m", resolved_upper_dir); ++ ++ /* TODO: paths_in_same_superblock? partition? device? */ ++ r = paths_on_same_fs(resolved_upper_dir, parent); ++ if (r < 0) ++ return r; ++ if (!r) ++ return log_error_errno(SYNTHETIC_ERRNO(EXDEV), "Unable to find a suitable workdir location for upperdir '%s' for host hierarchy '%s' - parent directory of the upperdir is in a different filesystem", resolved_upper_dir, hierarchy); ++ ++ _cleanup_free_ char *f = NULL, *dir_name = NULL; ++ ++ f = hierarchy_as_single_path_component(hierarchy); ++ if (!f) ++ return log_oom(); ++ dir_name = strjoin(".systemd-", f, "-workdir"); ++ if (!dir_name) ++ return log_oom(); ++ ++ free(f); ++ f = path_join(parent, dir_name); ++ if (!f) ++ return log_oom(); ++ ++ *ret_work_dir = TAKE_PTR(f); ++ return 0; ++} ++ ++typedef struct OverlayFSPaths { ++ char *hierarchy; ++ mode_t hierarchy_mode; ++ char *resolved_hierarchy; ++ char *resolved_mutable_directory; ++ ++ /* NULL if merged fs is read-only */ ++ char *upper_dir; ++ /* NULL if merged fs is read-only */ ++ char *work_dir; ++ /* lowest index is top lowerdir, highest index is bottom lowerdir */ ++ char **lower_dirs; ++} OverlayFSPaths; ++ ++static OverlayFSPaths *overlayfs_paths_free(OverlayFSPaths *op) { ++ if (!op) ++ return NULL; ++ ++ free(op->hierarchy); ++ free(op->resolved_hierarchy); ++ free(op->resolved_mutable_directory); ++ ++ free(op->upper_dir); ++ free(op->work_dir); ++ strv_free(op->lower_dirs); ++ ++ free(op); ++ return NULL; ++} ++DEFINE_TRIVIAL_CLEANUP_FUNC(OverlayFSPaths *, overlayfs_paths_free); + +- _cleanup_free_ char *resolved_hierarchy = NULL, *f = NULL, *buf = NULL; +- _cleanup_strv_free_ char **layers = NULL; ++static int resolve_hierarchy(const char *hierarchy, char **ret_resolved_hierarchy) { ++ _cleanup_free_ char *resolved_path = NULL; ++ int r; ++ ++ assert(hierarchy); ++ assert(ret_resolved_hierarchy); ++ ++ r = chase(hierarchy, arg_root, CHASE_PREFIX_ROOT, &resolved_path, NULL); ++ if (r < 0 && r != -ENOENT) ++ return log_error_errno(r, "Failed to resolve hierarchy '%s': %m", hierarchy); ++ ++ *ret_resolved_hierarchy = TAKE_PTR(resolved_path); ++ return 0; ++} ++ ++static int mutable_directory_mode_matches_hierarchy( ++ const char *root_or_null, ++ const char *path, ++ mode_t hierarchy_mode) { ++ ++ _cleanup_free_ char *path_in_root = NULL; + struct stat st; ++ mode_t actual_mode; ++ ++ assert(path); ++ ++ path_in_root = path_join(root_or_null, path); ++ if (!path_in_root) ++ return log_oom(); ++ ++ if (stat(path_in_root, &st) < 0) { ++ if (errno == ENOENT) ++ return 0; ++ return log_error_errno(errno, "Failed to stat mutable directory '%s': %m", path_in_root); ++ } ++ ++ actual_mode = st.st_mode & 0777; ++ if (actual_mode != hierarchy_mode) ++ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Mutable directory '%s' has mode %04o, ought to have mode %04o", path_in_root, actual_mode, hierarchy_mode); ++ ++ return 0; ++} ++ ++static int resolve_mutable_directory( ++ const char *hierarchy, ++ mode_t hierarchy_mode, ++ const char *workspace, ++ char **ret_resolved_mutable_directory) { ++ ++ _cleanup_free_ char *path = NULL, *resolved_path = NULL, *dir_name = NULL; ++ const char *root = arg_root, *base = MUTABLE_EXTENSIONS_BASE_DIR; + int r; + + assert(hierarchy); +- assert(meta_path); +- assert(overlay_path); ++ assert(ret_resolved_mutable_directory); + +- /* Resolve the path of the host's version of the hierarchy, i.e. what we want to use as lowest layer +- * in the overlayfs stack. */ +- r = chase(hierarchy, arg_root, CHASE_PREFIX_ROOT, &resolved_hierarchy, NULL); +- if (r == -ENOENT) +- log_debug_errno(r, "Hierarchy '%s' on host doesn't exist, not merging.", hierarchy); +- else if (r < 0) +- return log_error_errno(r, "Failed to resolve host hierarchy '%s': %m", hierarchy); +- else { +- r = dir_is_empty(resolved_hierarchy, /* ignore_hidden_or_backup= */ false); +- if (r < 0) +- return log_error_errno(r, "Failed to check if host hierarchy '%s' is empty: %m", resolved_hierarchy); +- if (r > 0) { +- log_debug("Host hierarchy '%s' is empty, not merging.", resolved_hierarchy); +- resolved_hierarchy = mfree(resolved_hierarchy); +- } ++ if (arg_mutable == MUTABLE_NO) { ++ log_debug("Mutability for hierarchy '%s' is disabled, not resolving mutable directory.", hierarchy); ++ *ret_resolved_mutable_directory = NULL; ++ return 0; + } + +- /* Let's generate a metadata file that lists all extensions we took into account for this +- * hierarchy. We include this in the final fs, to make things nicely discoverable and +- * recognizable. */ +- f = path_join(meta_path, image_class_info[image_class].dot_directory_name, image_class_info[image_class].short_identifier_plural); +- if (!f) ++ if (IN_SET(arg_mutable, MUTABLE_EPHEMERAL, MUTABLE_EPHEMERAL_IMPORT)) { ++ /* We create mutable directory inside the temporary tmpfs workspace, which is a fixed ++ * location that ignores arg_root. */ ++ root = NULL; ++ base = workspace; ++ } ++ ++ dir_name = hierarchy_as_single_path_component(hierarchy); ++ if (!dir_name) + return log_oom(); + +- buf = strv_join(extensions, "\n"); +- if (!buf) ++ path = path_join(base, dir_name); ++ if (!path) + return log_oom(); + +- r = write_string_file(f, buf, WRITE_STRING_FILE_CREATE|WRITE_STRING_FILE_MKDIR_0755); ++ if (IN_SET(arg_mutable, MUTABLE_YES, MUTABLE_AUTO)) { ++ /* If there already is a mutable directory, check if its mode matches hierarchy. Merged ++ * hierarchy will have the same mode as the mutable directory, so we want no surprising mode ++ * changes here. */ ++ r = mutable_directory_mode_matches_hierarchy(root, path, hierarchy_mode); ++ if (r < 0) ++ return r; ++ } ++ ++ if (IN_SET(arg_mutable, MUTABLE_YES, MUTABLE_EPHEMERAL, MUTABLE_EPHEMERAL_IMPORT)) { ++ _cleanup_free_ char *path_in_root = NULL; ++ ++ path_in_root = path_join(root, path); ++ if (!path_in_root) ++ return log_oom(); ++ ++ r = mkdir_p(path_in_root, 0700); ++ if (r < 0) ++ return log_error_errno(r, "Failed to create a directory '%s': %m", path_in_root); ++ } ++ ++ r = chase(path, root, CHASE_PREFIX_ROOT, &resolved_path, NULL); ++ if (r < 0 && r != -ENOENT) ++ return log_error_errno(r, "Failed to resolve mutable directory '%s': %m", path); ++ ++ *ret_resolved_mutable_directory = TAKE_PTR(resolved_path); ++ return 0; ++} ++ ++static int overlayfs_paths_new(const char *hierarchy, const char *workspace_path, OverlayFSPaths **ret_op) { ++ _cleanup_free_ char *hierarchy_copy = NULL, *resolved_hierarchy = NULL, *resolved_mutable_directory = NULL; ++ mode_t hierarchy_mode; ++ ++ int r; ++ ++ assert (hierarchy); ++ assert (ret_op); ++ ++ hierarchy_copy = strdup(hierarchy); ++ if (!hierarchy_copy) ++ return log_oom(); ++ ++ r = resolve_hierarchy(hierarchy, &resolved_hierarchy); + if (r < 0) +- return log_error_errno(r, "Failed to write extension meta file '%s': %m", f); ++ return r; + +- /* Put the meta path (i.e. our synthesized stuff) at the top of the layer stack */ +- layers = strv_new(meta_path); +- if (!layers) ++ if (resolved_hierarchy) { ++ struct stat st; ++ ++ if (stat(resolved_hierarchy, &st) < 0) ++ return log_error_errno(errno, "Failed to stat '%s': %m", resolved_hierarchy); ++ hierarchy_mode = st.st_mode & 0777; ++ } else ++ hierarchy_mode = 0755; ++ ++ r = resolve_mutable_directory(hierarchy, hierarchy_mode, workspace_path, &resolved_mutable_directory); ++ if (r < 0) ++ return r; ++ ++ OverlayFSPaths *op; ++ op = new(OverlayFSPaths, 1); ++ if (!op) + return log_oom(); + +- /* Put the extensions in the middle */ ++ *op = (OverlayFSPaths) { ++ .hierarchy = TAKE_PTR(hierarchy_copy), ++ .hierarchy_mode = hierarchy_mode, ++ .resolved_hierarchy = TAKE_PTR(resolved_hierarchy), ++ .resolved_mutable_directory = TAKE_PTR(resolved_mutable_directory), ++ }; ++ ++ *ret_op = TAKE_PTR(op); ++ return 0; ++} ++ ++static int determine_used_extensions(const char *hierarchy, char **paths, char ***ret_used_paths, size_t *ret_extensions_used) { ++ _cleanup_strv_free_ char **used_paths = NULL; ++ size_t n = 0; ++ int r; ++ ++ assert(hierarchy); ++ assert(paths); ++ assert(ret_used_paths); ++ assert(ret_extensions_used); ++ + STRV_FOREACH(p, paths) { + _cleanup_free_ char *resolved = NULL; + +@@ -591,54 +923,514 @@ static int merge_hierarchy( + continue; + } + +- r = strv_consume(&layers, TAKE_PTR(resolved)); ++ r = strv_consume_with_size (&used_paths, &n, TAKE_PTR(resolved)); + if (r < 0) + return log_oom(); + } + +- if (!layers[1]) /* No extension with files in this hierarchy? Then don't do anything. */ ++ *ret_used_paths = TAKE_PTR(used_paths); ++ *ret_extensions_used = n; ++ return 0; ++} ++ ++static int maybe_import_mutable_directory(OverlayFSPaths *op) { ++ int r; ++ ++ assert(op); ++ ++ /* If importing mutable layer and it actually exists and is not a hierarchy itself, add it just below ++ * the meta path */ ++ ++ if (arg_mutable != MUTABLE_IMPORT || !op->resolved_mutable_directory) + return 0; + +- if (resolved_hierarchy) { +- /* Add the host hierarchy as last (lowest) layer in the stack */ +- r = strv_consume(&layers, TAKE_PTR(resolved_hierarchy)); ++ r = path_equal_or_inode_same_full(op->resolved_hierarchy, op->resolved_mutable_directory, 0); ++ if (r < 0) ++ return log_error_errno(r, "Failed to check equality of hierarchy %s and its mutable directory %s: %m", op->resolved_hierarchy, op->resolved_mutable_directory); ++ if (r > 0) ++ return log_error_errno(SYNTHETIC_ERRNO(ELOOP), "Not importing mutable directory for hierarchy %s as a lower dir, because it points to the hierarchy itself", op->hierarchy); ++ ++ r = strv_extend(&op->lower_dirs, op->resolved_mutable_directory); ++ if (r < 0) ++ return log_oom(); ++ ++ return 0; ++} ++ ++static int maybe_import_ignored_mutable_directory(OverlayFSPaths *op) { ++ _cleanup_free_ char *dir_name = NULL, *path = NULL, *resolved_path = NULL; ++ int r; ++ ++ assert(op); ++ ++ /* If importing the ignored mutable layer and it actually exists and is not a hierarchy itself, add ++ * it just below the meta path */ ++ if (arg_mutable != MUTABLE_EPHEMERAL_IMPORT) ++ return 0; ++ ++ dir_name = hierarchy_as_single_path_component(op->hierarchy); ++ if (!dir_name) ++ return log_oom(); ++ ++ path = path_join(MUTABLE_EXTENSIONS_BASE_DIR, dir_name); ++ if (!path) ++ return log_oom(); ++ ++ r = chase(path, arg_root, CHASE_PREFIX_ROOT, &resolved_path, NULL); ++ if (r == -ENOENT) { ++ log_debug("Mutable directory for %s does not exist, not importing", op->hierarchy); ++ return 0; ++ } ++ if (r < 0) ++ return log_error_errno(r, "Failed to resolve mutable directory '%s': %m", path); ++ ++ r = path_equal_or_inode_same_full(op->resolved_hierarchy, resolved_path, 0); ++ if (r < 0) ++ return log_error_errno(r, "Failed to check equality of hierarchy %s and its mutable directory %s: %m", op->resolved_hierarchy, op->resolved_mutable_directory); ++ ++ if (r > 0) ++ return log_error_errno(SYNTHETIC_ERRNO(ELOOP), "Not importing mutable directory for hierarchy %s as a lower dir, because it points to the hierarchy itself", op->hierarchy); ++ ++ r = strv_consume(&op->lower_dirs, TAKE_PTR(resolved_path)); ++ if (r < 0) ++ return log_oom(); ++ ++ return 0; ++} ++ ++static int determine_top_lower_dirs(OverlayFSPaths *op, const char *meta_path) { ++ int r; ++ ++ assert(op); ++ assert(meta_path); ++ ++ /* Put the meta path (i.e. our synthesized stuff) at the top of the layer stack */ ++ r = strv_extend(&op->lower_dirs, meta_path); ++ if (r < 0) ++ return log_oom(); ++ ++ r = maybe_import_mutable_directory(op); ++ if (r < 0) ++ return r; ++ ++ r = maybe_import_ignored_mutable_directory(op); ++ if (r < 0) ++ return r; ++ ++ return 0; ++} ++ ++static int determine_middle_lower_dirs(OverlayFSPaths *op, char **paths) { ++ int r; ++ ++ assert(op); ++ assert(paths); ++ ++ /* The paths were already determined in determine_used_extensions, so we just take them as is. */ ++ r = strv_extend_strv(&op->lower_dirs, paths, false); ++ if (r < 0) ++ return log_oom (); ++ ++ return 0; ++} ++ ++static int hierarchy_as_lower_dir(OverlayFSPaths *op) { ++ int r; ++ ++ /* return 0 if hierarchy should be used as lower dir, >0, if not */ ++ ++ assert(op); ++ ++ if (!op->resolved_hierarchy) { ++ log_debug("Host hierarchy '%s' does not exist, will not be used as lowerdir", op->hierarchy); ++ return 1; ++ } ++ ++ r = dir_is_empty(op->resolved_hierarchy, /* ignore_hidden_or_backup= */ false); ++ if (r < 0) ++ return log_error_errno(r, "Failed to check if host hierarchy '%s' is empty: %m", op->resolved_hierarchy); ++ if (r > 0) { ++ log_debug("Host hierarchy '%s' is empty, will not be used as lower dir.", op->resolved_hierarchy); ++ return 1; ++ } ++ ++ if (arg_mutable == MUTABLE_IMPORT) { ++ log_debug("Mutability for host hierarchy '%s' is disabled, so host hierarchy will be a lowerdir", op->resolved_hierarchy); ++ return 0; ++ } ++ ++ if (arg_mutable == MUTABLE_EPHEMERAL_IMPORT) { ++ log_debug("Mutability for host hierarchy '%s' is ephemeral, so host hierarchy will be a lowerdir", op->resolved_hierarchy); ++ return 0; ++ } ++ ++ if (!op->resolved_mutable_directory) { ++ log_debug("No mutable directory found, so host hierarchy '%s' will be used as lowerdir", op->resolved_hierarchy); ++ return 0; ++ } ++ ++ r = path_equal_or_inode_same_full(op->resolved_hierarchy, op->resolved_mutable_directory, 0); ++ if (r < 0) ++ return log_error_errno(r, "Failed to check equality of hierarchy %s and its mutable directory %s: %m", op->resolved_hierarchy, op->resolved_mutable_directory); ++ if (r > 0) { ++ log_debug("Host hierarchy '%s' will serve as upperdir.", op->resolved_hierarchy); ++ return 1; ++ } ++ ++ return 0; ++} ++ ++static int determine_bottom_lower_dirs(OverlayFSPaths *op) { ++ int r; ++ ++ assert(op); ++ ++ r = hierarchy_as_lower_dir(op); ++ if (r < 0) ++ return r; ++ if (!r) { ++ r = strv_extend(&op->lower_dirs, op->resolved_hierarchy); + if (r < 0) + return log_oom(); + } + ++ return 0; ++} ++ ++static int determine_lower_dirs( ++ OverlayFSPaths *op, ++ char **paths, ++ const char *meta_path) { ++ ++ int r; ++ ++ assert(op); ++ assert(paths); ++ assert(meta_path); ++ ++ r = determine_top_lower_dirs(op, meta_path); ++ if (r < 0) ++ return r; ++ ++ r = determine_middle_lower_dirs(op, paths); ++ if (r < 0) ++ return r; ++ ++ r = determine_bottom_lower_dirs(op); ++ if (r < 0) ++ return r; ++ ++ return 0; ++} ++ ++static int determine_upper_dir(OverlayFSPaths *op) { ++ int r; ++ ++ assert(op); ++ assert(!op->upper_dir); ++ ++ if (arg_mutable == MUTABLE_IMPORT) { ++ log_debug("Mutability is disabled, there will be no upperdir for host hierarchy '%s'", op->hierarchy); ++ return 0; ++ } ++ ++ if (!op->resolved_mutable_directory) { ++ log_debug("No mutable directory found for host hierarchy '%s', there will be no upperdir", op->hierarchy); ++ return 0; ++ } ++ ++ /* Require upper dir to be on writable filesystem if it's going to be used as an actual overlayfs ++ * upperdir, instead of a lowerdir as an imported path. */ ++ r = path_is_read_only_fs(op->resolved_mutable_directory); ++ if (r < 0) ++ return log_error_errno(r, "Failed to determine if mutable directory '%s' is on read-only filesystem: %m", op->resolved_mutable_directory); ++ if (r > 0) ++ return log_error_errno(SYNTHETIC_ERRNO(EROFS), "Can't use '%s' as an upperdir as it is read-only.", op->resolved_mutable_directory); ++ ++ op->upper_dir = strdup(op->resolved_mutable_directory); ++ if (!op->upper_dir) ++ return log_oom(); ++ ++ return 0; ++} ++ ++static int determine_work_dir(OverlayFSPaths *op) { ++ _cleanup_free_ char *work_dir = NULL; ++ int r; ++ ++ assert(op); ++ assert(!op->work_dir); ++ ++ if (!op->upper_dir) ++ return 0; ++ ++ if (arg_mutable == MUTABLE_IMPORT) ++ return 0; ++ ++ r = work_dir_for_hierarchy(op->hierarchy, op->upper_dir, &work_dir); ++ if (r < 0) ++ return r; ++ ++ op->work_dir = TAKE_PTR(work_dir); ++ return 0; ++} ++ ++static int mount_overlayfs_with_op( ++ OverlayFSPaths *op, ++ ImageClass image_class, ++ int noexec, ++ const char *overlay_path, ++ const char *meta_path) { ++ ++ int r; ++ const char *top_layer = NULL; ++ ++ assert(op); ++ assert(overlay_path); ++ + r = mkdir_p(overlay_path, 0700); + if (r < 0) + return log_error_errno(r, "Failed to make directory '%s': %m", overlay_path); + +- r = mount_overlayfs(image_class, noexec, overlay_path, layers); ++ r = mkdir_p(meta_path, 0700); ++ if (r < 0) ++ return log_error_errno(r, "Failed to make directory '%s': %m", meta_path); ++ ++ if (op->upper_dir && op->work_dir) { ++ r = mkdir_p(op->work_dir, 0700); ++ if (r < 0) ++ return log_error_errno(r, "Failed to make directory '%s': %m", op->work_dir); ++ top_layer = op->upper_dir; ++ } else { ++ assert(!strv_isempty(op->lower_dirs)); ++ top_layer = op->lower_dirs[0]; ++ } ++ ++ /* Overlayfs merged directory has the same mode as the top layer (either first lowerdir in options in ++ * read-only case, or upperdir for mutable case. Set up top overlayfs layer to the same mode as the ++ * unmerged hierarchy, otherwise we might end up with merged hierarchy owned by root and with mode ++ * being 0700. */ ++ if (chmod(top_layer, op->hierarchy_mode) < 0) ++ return log_error_errno(errno, "Failed to set permissions of '%s' to %04o: %m", top_layer, op->hierarchy_mode); ++ ++ r = mount_overlayfs(image_class, noexec, overlay_path, op->lower_dirs, op->upper_dir, op->work_dir); + if (r < 0) + return r; + +- /* The overlayfs superblock is read-only. Let's also mark the bind mount read-only. Extra turbo safety 😎 */ +- r = bind_remount_recursive(overlay_path, MS_RDONLY, MS_RDONLY, NULL); ++ return 0; ++} ++ ++static int write_extensions_file(ImageClass image_class, char **extensions, const char *meta_path) { ++ _cleanup_free_ char *f = NULL, *buf = NULL; ++ int r; ++ ++ assert(extensions); ++ assert(meta_path); ++ ++ /* Let's generate a metadata file that lists all extensions we took into account for this ++ * hierarchy. We include this in the final fs, to make things nicely discoverable and ++ * recognizable. */ ++ f = path_join(meta_path, image_class_info[image_class].dot_directory_name, image_class_info[image_class].short_identifier_plural); ++ if (!f) ++ return log_oom(); ++ ++ buf = strv_join(extensions, "\n"); ++ if (!buf) ++ return log_oom(); ++ ++ r = write_string_file(f, buf, WRITE_STRING_FILE_CREATE|WRITE_STRING_FILE_MKDIR_0755); + if (r < 0) +- return log_error_errno(r, "Failed to make bind mount '%s' read-only: %m", overlay_path); ++ return log_error_errno(r, "Failed to write extension meta file '%s': %m", f); ++ ++ return 0; ++} ++ ++static int write_dev_file(ImageClass image_class, const char *meta_path, const char *overlay_path) { ++ _cleanup_free_ char *f = NULL; ++ struct stat st; ++ int r; ++ ++ assert(meta_path); ++ assert(overlay_path); + + /* Now we have mounted the new file system. Let's now figure out its .st_dev field, and make that + * available in the metadata directory. This is useful to detect whether the metadata dir actually + * belongs to the fs it is found on: if .st_dev of the top-level mount matches it, it's pretty likely + * we are looking at a live tree, and not an unpacked tar or so of one. */ + if (stat(overlay_path, &st) < 0) +- return log_error_errno(r, "Failed to stat mount '%s': %m", overlay_path); ++ return log_error_errno(errno, "Failed to stat mount '%s': %m", overlay_path); + +- free(f); + f = path_join(meta_path, image_class_info[image_class].dot_directory_name, "dev"); + if (!f) + return log_oom(); + ++ /* Modifying the underlying layers while the overlayfs is mounted is technically undefined, but at ++ * least it won't crash or deadlock, as per the kernel docs about overlayfs: ++ * https://www.kernel.org/doc/html/latest/filesystems/overlayfs.html#changes-to-underlying-filesystems */ + r = write_string_file(f, FORMAT_DEVNUM(st.st_dev), WRITE_STRING_FILE_CREATE); + if (r < 0) + return log_error_errno(r, "Failed to write '%s': %m", f); + ++ return 0; ++} ++ ++static int write_work_dir_file(ImageClass image_class, const char *meta_path, const char *work_dir) { ++ _cleanup_free_ char *escaped_work_dir_in_root = NULL, *f = NULL; ++ char *work_dir_in_root = NULL; ++ int r; ++ ++ assert(meta_path); ++ ++ if (!work_dir) ++ return 0; ++ ++ /* Do not store work dir path for ephemeral mode, it will be gone once this process is done. */ ++ if (IN_SET(arg_mutable, MUTABLE_EPHEMERAL, MUTABLE_EPHEMERAL_IMPORT)) ++ return 0; ++ ++ work_dir_in_root = path_startswith(work_dir, empty_to_root(arg_root)); ++ if (!work_dir_in_root) ++ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Workdir '%s' must not be outside root '%s'", work_dir, empty_to_root(arg_root)); ++ ++ f = path_join(meta_path, image_class_info[image_class].dot_directory_name, "work_dir"); ++ if (!f) ++ return log_oom(); ++ ++ /* Paths can have newlines for whatever reason, so better escape them to really get a single ++ * line file. */ ++ escaped_work_dir_in_root = cescape(work_dir_in_root); ++ if (!escaped_work_dir_in_root) ++ return log_oom(); ++ r = write_string_file(f, escaped_work_dir_in_root, WRITE_STRING_FILE_CREATE); ++ if (r < 0) ++ return log_error_errno(r, "Failed to write '%s': %m", f); ++ ++ return 0; ++} ++ ++static int store_info_in_meta( ++ ImageClass image_class, ++ char **extensions, ++ const char *meta_path, ++ const char *overlay_path, ++ const char *work_dir) { ++ ++ int r; ++ ++ assert(extensions); ++ assert(meta_path); ++ assert(overlay_path); ++ /* work_dir may be NULL */ ++ ++ r = write_extensions_file(image_class, extensions, meta_path); ++ if (r < 0) ++ return r; ++ ++ r = write_dev_file(image_class, meta_path, overlay_path); ++ if (r < 0) ++ return r; ++ ++ r = write_work_dir_file(image_class, meta_path, work_dir); ++ if (r < 0) ++ return r; ++ + /* Make sure the top-level dir has an mtime marking the point we established the merge */ + if (utimensat(AT_FDCWD, meta_path, NULL, AT_SYMLINK_NOFOLLOW) < 0) + return log_error_errno(r, "Failed fix mtime of '%s': %m", meta_path); + ++ return 0; ++} ++ ++static int make_mounts_read_only(ImageClass image_class, const char *overlay_path, bool mutable) { ++ int r; ++ ++ assert(overlay_path); ++ ++ if (mutable) { ++ /* Bind mount the meta path as read-only on mutable overlays to avoid accidental ++ * modifications of the contents of meta directory, which could lead to systemd thinking that ++ * this hierarchy is not our mount. */ ++ _cleanup_free_ char *f = NULL; ++ ++ f = path_join(overlay_path, image_class_info[image_class].dot_directory_name); ++ if (!f) ++ return log_oom(); ++ ++ r = mount_nofollow_verbose(LOG_ERR, f, f, NULL, MS_BIND, NULL); ++ if (r < 0) ++ return r; ++ ++ r = bind_remount_one(f, MS_RDONLY, MS_RDONLY); ++ if (r < 0) ++ return log_error_errno(r, "Failed to remount '%s' as read-only: %m", f); ++ } else { ++ /* The overlayfs superblock is read-only. Let's also mark the bind mount read-only. Extra ++ * turbo safety 😎 */ ++ r = bind_remount_recursive(overlay_path, MS_RDONLY, MS_RDONLY, NULL); ++ if (r < 0) ++ return log_error_errno(r, "Failed to make bind mount '%s' read-only: %m", overlay_path); ++ } ++ ++ return 0; ++} ++ ++static int merge_hierarchy( ++ ImageClass image_class, ++ const char *hierarchy, ++ int noexec, ++ char **extensions, ++ char **paths, ++ const char *meta_path, ++ const char *overlay_path, ++ const char *workspace_path) { ++ ++ _cleanup_(overlayfs_paths_freep) OverlayFSPaths *op = NULL; ++ _cleanup_strv_free_ char **used_paths = NULL; ++ size_t extensions_used = 0; ++ int r; ++ ++ assert(hierarchy); ++ assert(extensions); ++ assert(paths); ++ assert(meta_path); ++ assert(overlay_path); ++ assert(workspace_path); ++ ++ r = determine_used_extensions(hierarchy, paths, &used_paths, &extensions_used); ++ if (r < 0) ++ return r; ++ ++ if (extensions_used == 0) /* No extension with files in this hierarchy? Then don't do anything. */ ++ return 0; ++ ++ r = overlayfs_paths_new(hierarchy, workspace_path, &op); ++ if (r < 0) ++ return r; ++ ++ r = determine_lower_dirs(op, used_paths, meta_path); ++ if (r < 0) ++ return r; ++ ++ r = determine_upper_dir(op); ++ if (r < 0) ++ return r; ++ ++ r = determine_work_dir(op); ++ if (r < 0) ++ return r; ++ ++ r = mount_overlayfs_with_op(op, image_class, noexec, overlay_path, meta_path); ++ if (r < 0) ++ return r; ++ ++ r = store_info_in_meta(image_class, extensions, meta_path, overlay_path, op->work_dir); ++ if (r < 0) ++ return r; ++ ++ r = make_mounts_read_only(image_class, overlay_path, op->upper_dir && op->work_dir); ++ if (r < 0) ++ return r; ++ + return 1; + } + +@@ -908,7 +1700,7 @@ static int merge_subprocess( + + /* Create overlayfs mounts for all hierarchies */ + STRV_FOREACH(h, hierarchies) { +- _cleanup_free_ char *meta_path = NULL, *overlay_path = NULL; ++ _cleanup_free_ char *meta_path = NULL, *overlay_path = NULL, *merge_hierarchy_workspace = NULL; + + meta_path = path_join(workspace, "meta", *h); /* The place where to store metadata about this instance */ + if (!meta_path) +@@ -918,6 +1710,11 @@ static int merge_subprocess( + if (!overlay_path) + return log_oom(); + ++ /* Temporary directory for merge_hierarchy needs, like ephemeral directories. */ ++ merge_hierarchy_workspace = path_join(workspace, "mh_workspace", *h); ++ if (!merge_hierarchy_workspace) ++ return log_oom(); ++ + r = merge_hierarchy( + image_class, + *h, +@@ -925,7 +1722,8 @@ static int merge_subprocess( + extensions, + paths, + meta_path, +- overlay_path); ++ overlay_path, ++ merge_hierarchy_workspace); + if (r < 0) + return r; + } +@@ -954,7 +1752,8 @@ static int merge_subprocess( + if (r < 0) + return log_error_errno(r, "Failed to create hierarchy mount point '%s': %m", resolved); + +- r = mount_nofollow_verbose(LOG_ERR, p, resolved, NULL, MS_BIND, NULL); ++ /* Using MS_REC to potentially bring in our read-only bind mount of metadata. */ ++ r = mount_nofollow_verbose(LOG_ERR, p, resolved, NULL, MS_BIND|MS_REC, NULL); + if (r < 0) + return r; + +@@ -992,9 +1791,10 @@ static int merge(ImageClass image_class, + r = wait_for_terminate_and_check("(sd-merge)", pid, WAIT_LOG_ABNORMAL); + if (r < 0) + return r; +- + if (r == 123) /* exit code 123 means: didn't do anything */ + return 0; ++ if (r > 0) ++ return log_error_errno(SYNTHETIC_ERRNO(EPROTO), "Failed to merge hierarchies"); + + r = need_reload(image_class, hierarchies, no_reload); + if (r < 0) +@@ -1373,6 +2173,8 @@ static int verb_help(int argc, char **argv, void *userdata) { + " -h --help Show this help\n" + " --version Show package version\n" + "\n%3$sOptions:%4$s\n" ++ " --mutable=yes|no|auto|import|ephemeral|ephemeral-import\n" ++ " Specify a mutability mode of the merged hierarchy\n" + " --no-pager Do not pipe output into a pager\n" + " --no-legend Do not show the headers and footers\n" + " --root=PATH Operate relative to root path\n" +@@ -1406,6 +2208,7 @@ static int parse_argv(int argc, char *argv[]) { + ARG_IMAGE_POLICY, + ARG_NOEXEC, + ARG_NO_RELOAD, ++ ARG_MUTABLE, + }; + + static const struct option options[] = { +@@ -1419,6 +2222,7 @@ static int parse_argv(int argc, char *argv[]) { + { "image-policy", required_argument, NULL, ARG_IMAGE_POLICY }, + { "noexec", required_argument, NULL, ARG_NOEXEC }, + { "no-reload", no_argument, NULL, ARG_NO_RELOAD }, ++ { "mutable", required_argument, NULL, ARG_MUTABLE }, + {} + }; + +@@ -1482,6 +2286,13 @@ static int parse_argv(int argc, char *argv[]) { + arg_no_reload = true; + break; + ++ case ARG_MUTABLE: ++ r = parse_mutable_mode(optarg); ++ if (r < 0) ++ return log_error_errno(r, "Failed to parse argument to --mutable=: %s", optarg); ++ arg_mutable = r; ++ break; ++ + case '?': + return -EINVAL; + +@@ -1514,12 +2325,23 @@ static int sysext_main(int argc, char *argv[]) { + } + + static int run(int argc, char *argv[]) { ++ const char *env_var; + int r; + + log_setup(); + + arg_image_class = invoked_as(argv, "systemd-confext") ? IMAGE_CONFEXT : IMAGE_SYSEXT; + ++ env_var = getenv(image_class_info[arg_image_class].mode_env); ++ if (env_var) { ++ r = parse_mutable_mode(env_var); ++ if (r < 0) ++ log_warning("Failed to parse %s environment variable value '%s'. Ignoring.", ++ image_class_info[arg_image_class].mode_env, env_var); ++ else ++ arg_mutable = r; ++ } ++ + r = parse_argv(argc, argv); + if (r <= 0) + return r; +-- +2.34.1 + diff --git a/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/99-default.preset b/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/99-default.preset new file mode 100644 index 00000000000..d2545d5d1d1 --- /dev/null +++ b/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/99-default.preset @@ -0,0 +1,2 @@ +# Do not enable any services if /etc is detected as empty. +disable * diff --git a/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/nsswitch.conf b/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/nsswitch.conf deleted file mode 100644 index 91dbe757f9f..00000000000 --- a/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/nsswitch.conf +++ /dev/null @@ -1,27 +0,0 @@ -# Sample nss configuration for systemd - -# systemd-specific modules -# See the manual pages fore further information. -# nss-myhostname - host resolution for the local hostname -# nss-mymachines - host, user, group resolution for containers -# nss-resolve - host resolution using resolved -# nss-systemd - dynamic user/group resolution (DynamicUser in unit files) - -passwd: files mymachines systemd -shadow: files -group: files mymachines systemd -gshadow: files - -hosts: files mymachines resolve [!UNAVAIL=return] dns myhostname -networks: files - -services: db files -protocols: db files -rpc: db files -ethers: db files -netmasks: files -netgroup: files -bootparams: files - -automount: files -aliases: files diff --git a/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/systemd-flatcar.conf b/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/systemd-flatcar.conf new file mode 100644 index 00000000000..2b72383153a --- /dev/null +++ b/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/systemd-flatcar.conf @@ -0,0 +1,10 @@ +# The list of directories is taken from Gentoo ebuild, where they use +# keepdir. The list isn't sorted, but tries to preserve the order of +# keepdir lines from Gentoo ebuild for easier comparisons. We skip the +# directories in /usr, though. +d /var/lib/systemd - - - - - +d /var/log/journal - - - - - +d /etc/sysctl.d - - - - - + +# This seems to be our own addition. +d /var/log/journal/remote - systemd-journal-remote systemd-journal-remote - - diff --git a/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/systemd-resolv.conf b/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/systemd-resolv.conf new file mode 100644 index 00000000000..32b7e9d2144 --- /dev/null +++ b/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/systemd-resolv.conf @@ -0,0 +1,2 @@ +d /run/systemd/network - - - - - +L /run/systemd/network/resolv.conf - - - - ../resolve/resolv.conf diff --git a/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/systemd-user.pam b/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/systemd-user.pam deleted file mode 100644 index 38ae3211f8d..00000000000 --- a/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/systemd-user.pam +++ /dev/null @@ -1,5 +0,0 @@ -account include system-auth - -session required pam_loginuid.so -session include system-auth -session optional pam_systemd.so diff --git a/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/systemd-255.8.ebuild b/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/systemd-255.8.ebuild index c4ed9c5dda6..22a1978bb06 100644 --- a/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/systemd-255.8.ebuild +++ b/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/systemd-255.8.ebuild @@ -25,12 +25,13 @@ else SRC_URI="https://github.com/systemd/${MY_PN}/archive/v${MY_PV}/${MY_P}.tar.gz" if [[ ${PV} != *rc* ]] ; then - KEYWORDS="~alpha ~amd64 ~arm ~arm64 ~hppa ~ia64 ~loong ~m68k ~mips ~ppc ~ppc64 ~riscv ~s390 ~sparc ~x86" + # Flatcar: mark as stable + KEYWORDS="~alpha amd64 ~arm arm64 ~hppa ~ia64 ~m68k ~mips ~ppc ~ppc64 ~riscv ~s390 ~sparc ~x86" fi fi inherit bash-completion-r1 linux-info meson-multilib optfeature pam python-single-r1 -inherit secureboot systemd toolchain-funcs udev +inherit secureboot systemd tmpfiles toolchain-funcs udev DESCRIPTION="System and service manager for Linux" HOMEPAGE="http://systemd.io/" @@ -101,6 +102,14 @@ DEPEND="${COMMON_DEPEND} PEFILE_DEPEND='dev-python/pefile[${PYTHON_USEDEP}]' # baselayout-2.2 has /run +# +# Flatcar: Drop sec-policy/selinux-ntp from deps (under selinux use +# flag). The image stage fails with "Failed to resolve +# typeattributeset statement at +# /var/lib/selinux/mcs/tmp/modules/400/ntp/cil:120" +# +# Flatcar: Added a dep on sys-apps/kbd. It provides a loadkeys binary +# needed by dracut's systemd-vconsole-setup module. RDEPEND="${COMMON_DEPEND} >=acct-group/adm-0-r1 >=acct-group/wheel-0-r1 @@ -129,13 +138,13 @@ RDEPEND="${COMMON_DEPEND} >=acct-user/systemd-resolve-0-r1 >=acct-user/systemd-timesync-0-r1 >=sys-apps/baselayout-2.2 + sys-apps/kbd ukify? ( ${PYTHON_DEPS} $(python_gen_cond_dep "${PEFILE_DEPEND}") ) selinux? ( sec-policy/selinux-base-policy[systemd] - sec-policy/selinux-ntp ) sysv-utils? ( !sys-apps/openrc[sysv-utils(-)] @@ -186,11 +195,12 @@ QA_FLAGS_IGNORED="usr/lib/systemd/boot/efi/.*" QA_EXECSTACK="usr/lib/systemd/boot/efi/*" pkg_pretend() { - if use split-usr; then - eerror "Please complete the migration to merged-usr." - eerror "https://wiki.gentoo.org/wiki/Merge-usr" - die "systemd no longer supports split-usr" - fi + # Flatcar: We keep using split-usr for SDK. + # if use split-usr; then + # eerror "Please complete the migration to merged-usr." + # eerror "https://wiki.gentoo.org/wiki/Merge-usr" + # die "systemd no longer supports split-usr" + # fi if [[ ${MERGE_TYPE} != buildonly ]]; then local CONFIG_CHECK="~BLK_DEV_BSG ~CGROUPS ~CGROUP_BPF ~DEVTMPFS ~EPOLL ~FANOTIFY ~FHANDLE @@ -246,6 +256,17 @@ src_unpack() { src_prepare() { local PATCHES=( "${FILESDIR}/systemd-test-process-util.patch" + "${FILESDIR}"/255-install-format-overflow.patch + # Flatcar: Adding our own patches here. + "${FILESDIR}/0001-wait-online-set-any-by-default.patch" + "${FILESDIR}/0002-networkd-default-to-kernel-IPForwarding-setting.patch" + "${FILESDIR}/0003-needs-update-don-t-require-strictly-newer-usr.patch" + "${FILESDIR}/0004-core-use-max-for-DefaultTasksMax.patch" + "${FILESDIR}/0005-systemd-Disable-SELinux-permissions-checks.patch" + "${FILESDIR}/0006-Revert-getty-Pass-tty-to-use-by-agetty-via-stdin.patch" + "${FILESDIR}/0007-units-Keep-using-old-journal-file-format.patch" + # Flatcar: This can be dropped when updating to 256. + "${FILESDIR}/0008-sysext-Mutable-overlays.patch" ) if ! use vanilla; then @@ -255,6 +276,23 @@ src_prepare() { ) fi + # Fails with split-usr. + sed -i -e '2i exit 77' test/test-rpm-macros.sh || die + + # Flatcar: The Kubelet takes /etc/resolv.conf for, e.g., + # CoreDNS which has dnsPolicy "default", but unless the + # kubelet --resolv-conf flag is set to point to + # /run/systemd/resolve/resolv.conf this won't work with + # /etc/resolv.conf pointing to + # /run/systemd/resolve/stub-resolv.conf which configures + # 127.0.0.53. See + # https://kubernetes.io/docs/tasks/administer-cluster/dns-debugging-resolution/#known-issues + # This means that users who need split DNS to work should + # point /etc/resolv.conf back to + # /run/systemd/resolve/stub-resolv.conf (and if using K8s + # configure the kubelet resolvConf variable/--resolv-conf flag + # to /run/systemd/resolve/resolv.conf). + sed -i -e 's,/run/systemd/resolve/stub-resolv.conf,/run/systemd/resolve/resolv.conf,' tmpfiles.d/systemd-resolve.conf || die default } @@ -267,16 +305,34 @@ src_configure() { multilib-minimal_src_configure } +# Flatcar: Our function, we use it in some places below. +get_rootprefix() { + usex split-usr "${EPREFIX:-/}" "${EPREFIX}/usr" +} + multilib_src_configure() { local myconf=( --localstatedir="${EPREFIX}/var" # default is developer, bug 918671 -Dmode=release - -Dsupport-url="https://gentoo.org/support/" + # Flatcar: Point to our user mailing list. + -Dsupport-url="https://groups.google.com/forum/#!forum/flatcar-linux-user" -Dpamlibdir="$(getpam_mod_dir)" # avoid bash-completion dep -Dbashcompletiondir="$(get_bashcompdir)" - -Dsplit-bin=false + # Flatcar: We keep using split-usr in SDK. + $(meson_use split-usr) + # Flatcar: Always set split-bin to true, we always + # have separate bin and sbin directories + -Dsplit-bin=true + # Flatcar: Use get_rootprefix. No functional change + # from upstream, just refactoring the common code used + # in some places. + # + # TODO: Drop -Drootprefix and -Drootlibdir we get rid + # of split-usr in SDK + -Drootprefix="$(get_rootprefix)" + -Drootlibdir="${EPREFIX}/usr/$(get_libdir)" # Disable compatibility with sysvinit -Dsysvinit-path= -Dsysvrcnd-path= @@ -326,9 +382,11 @@ multilib_src_configure() { $(meson_native_use_bool test dbus) $(meson_native_use_bool ukify) $(meson_native_use_bool xkb xkbcommon) - -Dntp-servers="0.gentoo.pool.ntp.org 1.gentoo.pool.ntp.org 2.gentoo.pool.ntp.org 3.gentoo.pool.ntp.org" + # Flatcar: Use our ntp servers. + -Dntp-servers="0.flatcar.pool.ntp.org 1.flatcar.pool.ntp.org 2.flatcar.pool.ntp.org 3.flatcar.pool.ntp.org" # Breaks screen, tmux, etc. -Ddefault-kill-user-processes=false + # Flatcar: TODO: Investigate if we want this. -Dcreate-log-dirs=false # multilib options @@ -352,6 +410,42 @@ multilib_src_configure() { $(meson_native_true tmpfiles) $(meson_native_true vconsole) $(meson_native_enabled vmspawn) + # Flatcar: Specify this, or meson breaks due to no + # /etc/login.defs. + -Dsystem-gid-max=999 + -Dsystem-uid-max=999 + + # Flatcar: DBus paths. + -Ddbussessionservicedir="${EPREFIX}/usr/share/dbus-1/services" + -Ddbussystemservicedir="${EPREFIX}/usr/share/dbus-1/system-services" + + # Flatcar: PAM config directory. + -Dpamconfdir=/usr/share/pam.d + + # Flatcar: The CoreOS epoch, Mon Jul 1 00:00:00 UTC + # 2013. Used by timesyncd as a sanity check for the + # minimum acceptable time. Explicitly set to avoid + # using the current build time. + -Dtime-epoch=1372636800 + + # Flatcar: No default name servers. + -Ddns-servers= + + # Flatcar: Disable the "First Boot Wizard", it isn't + # very applicable to us. + -Dfirstboot=false + + # Flatcar: Set latest network interface naming scheme + # for https://github.com/flatcar/Flatcar/issues/36 + -Ddefault-net-naming-scheme=latest + + # Flatcar: Combined log format: name plus description + -Dstatus-unit-format-default=combined + + # Flatcar: Unported options, still needed? + -Dquotaon-path=/usr/sbin/quotaon + -Dquotacheck-path=/usr/sbin/quotacheck + -Ddefault-mdns=no ) meson_src_configure "${myconf[@]}" @@ -374,7 +468,9 @@ multilib_src_install_all() { mv "${ED}"/usr/share/doc/{systemd,${PF}} || die einstalldocs - dodoc "${FILESDIR}"/nsswitch.conf + # Flatcar: Do not install sample nsswitch.conf, we don't + # provide it. + # dodoc "${FILESDIR}"/nsswitch.conf insinto /usr/lib/tmpfiles.d doins "${FILESDIR}"/legacy.conf @@ -392,6 +488,8 @@ multilib_src_install_all() { # https://bugs.gentoo.org/761763 rm -r "${ED}"/usr/lib/sysusers.d || die + # Flatcar: Upstream uses keepdir commands to keep some empty + # directories. We use tmpfiles. # Preserve empty dirs in /etc & /var, bug #437008 keepdir /etc/{binfmt.d,modules-load.d,tmpfiles.d} keepdir /etc/kernel/install.d @@ -400,25 +498,134 @@ multilib_src_install_all() { keepdir /etc/udev/hwdb.d - keepdir /usr/lib/systemd/{system-sleep,system-shutdown} - keepdir /usr/lib/{binfmt.d,modules-load.d} - keepdir /usr/lib/systemd/user-generators - keepdir /var/lib/systemd - keepdir /var/log/journal + # keepdir /usr/lib/systemd/{system-sleep,system-shutdown} + # keepdir /usr/lib/{binfmt.d,modules-load.d} + # keepdir /usr/lib/systemd/user-generators + # keepdir /var/lib/systemd + # keepdir /var/log/journal - if use pam; then - newpamd "${FILESDIR}"/systemd-user.pam systemd-user - fi + # if use pam; then + # newpamd "${FILESDIR}"/systemd-user.pam systemd-user + # fi if use kernel-install; then # Dummy config, remove to make room for sys-kernel/installkernel rm "${ED}/usr/lib/kernel/install.conf" || die fi + # Flatcar: Ensure journal directory has correct ownership/mode + # in inital image. This is fixed by systemd-tmpfiles *but* + # journald starts before that and will create the journal if + # the filesystem is already read-write. Conveniently the + # systemd Makefile sets this up completely wrong. + # + # Flatcar: TODO: Is this still a problem? + dodir /var/log/journal + fowners root:systemd-journal /var/log/journal + fperms 2755 /var/log/journal + + # Flatcar: Don't prune systemd dirs. + dotmpfiles "${FILESDIR}"/systemd-flatcar.conf + # Flatcar: Add tmpfiles rule for resolv.conf. This path has + # changed after v213 so it must be handled here instead of + # baselayout now. + dotmpfiles "${FILESDIR}"/systemd-resolv.conf + + # Flatcar: Don't default to graphical.target. + local unitdir=$(builddir_systemd_get_systemunitdir) + dosym multi-user.target "${unitdir}"/default.target + + # Flatcar: Don't set any extra environment variables by default. + rm "${ED}/usr/lib/environment.d/99-environment.conf" || die + + # Flatcar: These lines more or less follow the systemd's + # preset file (90-systemd.preset). We do it that way, to avoid + # putting symlinks in /etc. Please keep the lines in the same + # order as the "enable" lines appear in the preset file. For a + # single enable line in preset, there may be more lines if the + # unit file had Also: clause which has units we enable here + # too. + + # Flatcar: enable remote-fs.target + builddir_systemd_enable_service multi-user.target remote-fs.target + # Flatcar: enable remote-cryptsetup.target + if use cryptsetup; then + builddir_systemd_enable_service multi-user.target remote-cryptsetup.target + fi + # Flatcar: enable machines.target + builddir_systemd_enable_service multi-user.target machines.target + # Flatcar: enable getty@.service + dodir "${unitdir}/getty.target.wants" + dosym ../getty@.service "${unitdir}/getty.target.wants/getty@tty1.service" + # Flatcar: enable systemd-timesyncd.service + builddir_systemd_enable_service sysinit.target systemd-timesyncd.service + # Flatcar: enable systemd-networkd.service (Also: systemd-networkd.socket, systemd-networkd-wait-online.service) + builddir_systemd_enable_service multi-user.target systemd-networkd.service + builddir_systemd_enable_service sockets.target systemd-networkd.socket + builddir_systemd_enable_service network-online.target systemd-networkd-wait-online.service + # Flatcar: enable systemd-network-generator.service + builddir_systemd_enable_service sysinit.target systemd-network-generator.service + # Flatcar: enable systemd-resolved.service + builddir_systemd_enable_service multi-user.target systemd-resolved.service + # Flatcar: enable systemd-homed.service (Also: systemd-userdbd.service [not enabled - has no WantedBy entry]) + if use homed; then + builddir_systemd_enable_service multi-user.target systemd-homed.target + fi + # Flatcar: enable systemd-userdbd.socket + builddir_systemd_enable_service sockets.target systemd-userdbd.socket + # Flatcar: enable systemd-pstore.service + builddir_systemd_enable_service sysinit.target systemd-pstore.service + # Flatcar: enable systemd-boot-update.service + if use boot; then + builddir_systemd_enable_service sysinit.target systemd-boot-update.service + fi + # Flatcar: enable reboot.target (not enabled - has no WantedBy + # entry) + + # Flatcar: enable systemd-sysext.service by default + builddir_systemd_enable_service sysinit.target systemd-sysext.service + + # Flatcar: Use an empty preset file, because systemctl + # preset-all puts symlinks in /etc, not in /usr. We don't use + # /etc, because it is not autoupdated. We do the "preset" above. + rm "${ED}/usr/lib/systemd/system-preset/90-systemd.preset" || die + insinto /usr/lib/systemd/system-preset + doins "${FILESDIR}"/99-default.preset + + # Flatcar: Do not ship distro-specific files (nsswitch.conf + # pam.d). This conflicts with our own configuration provided + # by baselayout. + rm -rf "${ED}"/usr/share/factory + sed -i "${ED}"/usr/lib/tmpfiles.d/etc.conf \ + -e '/^C!* \/etc\/nsswitch\.conf/d' \ + -e '/^C!* \/etc\/pam\.d/d' \ + -e '/^C!* \/etc\/issue/d' use ukify && python_fix_shebang "${ED}" use boot && secureboot_auto_sign } +# Flatcar: Our own version of systemd_get_systemunitdir, that returns +# a path inside /usr, not /etc. +builddir_systemd_get_systemunitdir() { + echo "$(get_rootprefix)/lib/systemd/system" +} + +# Flatcar: Our own version of systemd_enable_service, that does +# operations inside /usr, not /etc. +builddir_systemd_enable_service() { + local target=${1} + local service=${2} + local ud=$(builddir_systemd_get_systemunitdir) + local destname=${service##*/} + + dodir "${ud}"/"${target}".wants && \ + dosym ../"${service}" "${ud}"/"${target}".wants/"${destname}" + + if use boot; then + python_fix_shebang "${ED}" + secureboot_auto_sign + fi +} migrate_locale() { local envd_locale_def="${EROOT}/etc/env.d/02locale" local envd_locale=( "${EROOT}"/etc/env.d/??locale ) @@ -469,6 +676,23 @@ pkg_preinst() { dosym ../../../etc/sysctl.conf /usr/lib/sysctl.d/99-sysctl.conf fi + # Flatcar: This used to be in upstream ebuild, but now it's + # gone. We should drop it once we get rid of split-usr in SDK. + if ! use split-usr; then + local dir + # Flatcar: We still use separate bin and sbin, so drop usr/sbin from the list. + for dir in bin sbin lib; do + if [[ ! -L ${EROOT}/${dir} ]]; then + eerror "'${EROOT}/${dir}' is not a symbolic link." + FAIL=1 + fi + done + if [[ ${FAIL} ]]; then + eerror "Migration to system layout with merged directories must be performed before" + eerror "installing ${CATEGORY}/${PN} with USE=\"-split-usr\" to avoid run-time breakage." + die "System layout with split directories still used" + fi + fi if ! use boot && has_version "sys-apps/systemd[gnuefi(-)]"; then ewarn "The 'gnuefi' USE flag has been renamed to 'boot'." ewarn "Make sure to enable the 'boot' USE flag if you use systemd-boot." @@ -488,13 +712,15 @@ pkg_postinst() { # between OpenRC & systemd migrate_locale - if [[ -z ${REPLACING_VERSIONS} ]]; then - if type systemctl &>/dev/null; then - systemctl --root="${ROOT:-/}" enable getty@.service remote-fs.target || FAIL=1 - fi - elog "To enable a useful set of services, run the following:" - elog " systemctl preset-all --preset-mode=enable-only" - fi + # Flatcar: We enable getty and remote-fs targets in /usr + # ourselves above. + # if [[ -z ${REPLACING_VERSIONS} ]]; then + # if type systemctl &>/dev/null; then + # systemctl --root="${ROOT:-/}" enable getty@.service remote-fs.target || FAIL=1 + # fi + # elog "To enable a useful set of services, run the following:" + # elog " systemctl preset-all --preset-mode=enable-only" + # fi if [[ -L ${EROOT}/var/lib/systemd/timesync ]]; then rm "${EROOT}/var/lib/systemd/timesync"