diff --git a/Makefile-boot.am b/Makefile-boot.am index e02961e5..3ca7879b 100644 --- a/Makefile-boot.am +++ b/Makefile-boot.am @@ -38,7 +38,6 @@ endif if BUILDOPT_SYSTEMD systemdsystemunit_DATA = src/boot/ostree-prepare-root.service \ src/boot/ostree-remount.service \ - src/boot/ostree-shutdown.service \ src/boot/ostree-boot-complete.service \ src/boot/ostree-finalize-staged.service \ src/boot/ostree-finalize-staged-hold.service \ @@ -70,7 +69,6 @@ EXTRA_DIST += src/boot/dracut/module-setup.sh \ src/boot/ostree-boot-complete.service \ src/boot/ostree-prepare-root.service \ src/boot/ostree-remount.service \ - src/boot/ostree-shutdown.service \ src/boot/ostree-finalize-staged.service \ src/boot/ostree-finalize-staged-hold.service \ src/boot/ostree-state-overlay@.service \ diff --git a/src/boot/ostree-shutdown.service b/src/boot/ostree-shutdown.service deleted file mode 100644 index 24919ec5..00000000 --- a/src/boot/ostree-shutdown.service +++ /dev/null @@ -1,22 +0,0 @@ -# SPDX-License-Identifier: LGPL-2.0+ - -[Unit] -Description=OSTree Shutdown -Documentation=man:ostree(1) -DefaultDependencies=no -# Only enabled via generator, but for good measure -ConditionKernelCommandLine=ostree -# Run after core mounts -RequiresMountsFor=/etc /sysroot -# However, we want to only shut down after `/var` has been umounted. -# Since this runs via ExecStop, this Before= is actually After= at shutdown -Before=var.mount -Conflicts=umount.target -Before=umount.target - -[Service] -Type=oneshot -RemainAfterExit=yes -ExecStop=/usr/lib/ostree/ostree-remount --shutdown - -# No [Install] section - we're only enabled via generator diff --git a/src/libostree/ostree-impl-system-generator.c b/src/libostree/ostree-impl-system-generator.c index 65f07fd5..f0116251 100644 --- a/src/libostree/ostree-impl-system-generator.c +++ b/src/libostree/ostree-impl-system-generator.c @@ -108,10 +108,6 @@ require_internal_units (const char *normal_dir, const char *early_dir, const cha "local-fs.target.requires/ostree-remount.service") < 0) return glnx_throw_errno_prefix (error, "symlinkat"); - if (symlinkat (SYSTEM_DATA_UNIT_PATH "/ostree-shutdown.service", normal_dir_dfd, - "local-fs.target.wants/ostree-shutdown.service") - < 0) - return glnx_throw_errno_prefix (error, "symlinkat"); if (!glnx_shutil_mkdir_p_at (normal_dir_dfd, "multi-user.target.wants", 0755, cancellable, error)) return FALSE; diff --git a/src/libostree/ostree-soft-reboot.c b/src/libostree/ostree-soft-reboot.c index 809b7991..a015cb4d 100644 --- a/src/libostree/ostree-soft-reboot.c +++ b/src/libostree/ostree-soft-reboot.c @@ -63,6 +63,22 @@ _ostree_prepare_soft_reboot (GError **error) if (!glnx_shutil_mkdir_p_at (AT_FDCWD, OTCORE_RUN_NEXTROOT, 0755, NULL, error)) return FALSE; + /* Bind-mount /sysroot on itself. + * The composefs mount at /run/nextboot is going to use /sysroot, + * causing systemd to fail to umount sysroot.mount during soft-reboot. + * Create a temporary bind-mount, and MNT_DETACH it when we are done + */ + if (mount (sysroot_path, sysroot_path, NULL, MS_BIND | MS_SILENT, NULL) < 0) + err (EXIT_FAILURE, "failed to MS_BIND '%s'", sysroot_path); + + /* Our curent working directory is in the old /sysroot, + * ie we are under the bind mount, so run 'cd $PWD' + * to move to the new /sysroot + */ + g_autofree char *cwd = g_get_current_dir (); + if (chdir (cwd) < 0) + err (EXIT_FAILURE, "failed to chdir to '%s'", cwd); + // Tracks if we did successfully enable it at runtime bool using_composefs = false; if (!otcore_mount_rootfs (rootfs_config, &metadata_builder, sysroot_path, target_deployment, @@ -78,6 +94,14 @@ _ostree_prepare_soft_reboot (GError **error) if (!otcore_mount_etc (config, &metadata_builder, OTCORE_RUN_NEXTROOT, error)) return FALSE; + // detach the temporary /sysroot bind-mount + if (umount2 (sysroot_path, MNT_DETACH) < 0) + err (EXIT_FAILURE, "failed to MS_DETACH '%s'", sysroot_path); + + // run 'cd $PWD' again to go back to the old /sysroot + if (chdir (cwd) < 0) + err (EXIT_FAILURE, "failed to chdir to '%s'", cwd); + // And set up /sysroot. Here since we hardcode composefs, we also hardcode // having a read-only /sysroot. g_variant_builder_add (&metadata_builder, "{sv}", OTCORE_RUN_BOOTED_KEY_SYSROOT_RO, diff --git a/src/switchroot/ostree-prepare-root.c b/src/switchroot/ostree-prepare-root.c index 7743eff0..97880059 100644 --- a/src/switchroot/ostree-prepare-root.c +++ b/src/switchroot/ostree-prepare-root.c @@ -325,6 +325,25 @@ main (int argc, char *argv[]) err (EXIT_FAILURE, "failed to bind mount (class:readonly) /usr"); } + /* Prepare /sysroot. + * The future / (currently at /sysroot.tmp) is an overlayfs or composefs that uses + * the physical root (currently at /sysroot), and we want to mount the physical root + * on top of the future / (at /sysroot.tmp/sysroot). + * If we MS_MOVE /sysroot to /sysroot.tmp/sysroot, we end up with a mount cycle, + * and systemd fails to unmount sysroot.mount. + * To avoid the mount cycle, bind-mount the physical root and then detach it. + */ + if (mount (root_mountpoint, TMP_SYSROOT "/sysroot", NULL, MS_BIND | MS_SILENT, NULL) < 0) + err (EXIT_FAILURE, "failed to MS_BIND '%s' to 'sysroot'", root_mountpoint); + + if (umount2 (root_mountpoint, MNT_DETACH) < 0) + err (EXIT_FAILURE, "failed to MS_DETACH '%s'", root_mountpoint); + + /* Resolve deploy path again so we can use paths relative to the physical root bind-mount */ + g_autofree char *deploy_path2 = resolve_deploy_path (kernel_cmdline, TMP_SYSROOT "/sysroot"); + if (chdir (deploy_path2) < 0) + err (EXIT_FAILURE, "failed to chdir to deploy_path2"); + /* Prepare /var. * When a read-only sysroot is configured, this adds a dedicated bind-mount (to itself) * so that the stateroot location stays writable. */ @@ -377,20 +396,12 @@ main (int argc, char *argv[]) errx (EXIT_FAILURE, "Writing %s: %s", OTCORE_RUN_BOOTED, error->message); } - if (chdir (TMP_SYSROOT) < 0) - err (EXIT_FAILURE, "failed to chdir to " TMP_SYSROOT); - - /* Now we have our ready made-up up root at - * /sysroot.tmp and the physical root at /sysroot (root_mountpoint). - * We want to end up with our deploy root at /sysroot/ and the physical - * root under /sysroot/sysroot as systemd will be responsible for - * moving /sysroot to /. + /* Now we have our ready made-up deploy root at /sysroot.tmp, + * we just need to move it to /sysroot (root_mountpoint). + * systemd will be responsible for moving /sysroot to /. */ - if (mount (root_mountpoint, "sysroot", NULL, MS_MOVE | MS_SILENT, NULL) < 0) - err (EXIT_FAILURE, "failed to MS_MOVE '%s' to 'sysroot'", root_mountpoint); - - if (mount (".", root_mountpoint, NULL, MS_MOVE | MS_SILENT, NULL) < 0) - err (EXIT_FAILURE, "failed to MS_MOVE %s to %s", ".", root_mountpoint); + if (mount (TMP_SYSROOT, root_mountpoint, NULL, MS_MOVE | MS_SILENT, NULL) < 0) + err (EXIT_FAILURE, "failed to MS_MOVE %s to %s", TMP_SYSROOT, root_mountpoint); if (chdir (root_mountpoint) < 0) err (EXIT_FAILURE, "failed to chdir to %s", root_mountpoint); diff --git a/src/switchroot/ostree-remount.c b/src/switchroot/ostree-remount.c index b1c829ea..f0a4b3d9 100644 --- a/src/switchroot/ostree-remount.c +++ b/src/switchroot/ostree-remount.c @@ -42,12 +42,6 @@ #include "ostree-mount-util.h" #include "otcore.h" -static gboolean opt_shutdown; - -static GOptionEntry options[] = { { "shutdown", 'S', 0, G_OPTION_ARG_NONE, &opt_shutdown, - "Perform shutdown unmounting", NULL }, - { NULL } }; - static void do_remount (const char *target, bool writable) { @@ -139,61 +133,10 @@ relabel_dir_for_upper (const char *upper_path, const char *real_path, gboolean i #endif } -// ostree-prepare-root sets things up so that /sysroot points to the "physical" (real) root in the -// initramfs, and then with composefs `/` is an overlay+EROFS that holds references to content in -// that physical filesystem. -// -// In a typical mutable system where the OS is in a mutable `/` (or `/usr), systemd explicitly -// skips unmounting both `/` and `/usr`. It will remount them read-only though - and that's -// the semantic we want to match here. -static void -do_shutdown (void) -{ - const char *sysroot = "/sysroot"; - if (mount (sysroot, sysroot, NULL, MS_REMOUNT | MS_SILENT | MS_RDONLY, NULL) < 0) - { - // Hopefully at this point nothing has any write references, but if they - // do we still want to continue. - perror ("Remounting /sysroot read-only"); - } - // And fully detach it from the mountns because otherwise systemd thinks - // it can be unmounted, but it can't - it's required by `/` (and in a - // composefs setup `/etc`) and possibly `/var`. Again, we only really - // care that it got mounted read-only and hence outstanding data flushed. - // A better fix in the future would be to teach systemd to honor `-.mount` - // having a `Requires=sysroot.mount` meaning we can't unmount the latter. - if (umount2 (sysroot, MNT_DETACH) < 0) - err (EXIT_FAILURE, "umount(/sysroot)"); - - // And finally: /etc - // NOTE! This one is intentionally last in that we want to try to make - // this read-only, but if it fails, systemd-shutdown will have another - // attempt after a process killing spree. If anything happens to be - // holding a writable fd at this point, conceptually it would have - // created race conditions vs ostree-finalize-staged.service, and so - // having this service fail will be a signal that those things need - // to be fixed. - do_remount ("/etc", false); - // Don't add anything else after this. -} - int main (int argc, char *argv[]) { g_autoptr (GError) error = NULL; - g_autoptr (GOptionContext) context = g_option_context_new (""); - g_option_context_add_main_entries (context, options, NULL); - if (!g_option_context_parse (context, &argc, &argv, &error)) - errx (EXIT_FAILURE, "Error parsing options: %s", error->message); - - // Handle the shutdown option - if (opt_shutdown) - { - do_shutdown (); - return 0; - } - // Otherwise fall through to the default startup - g_autoptr (GVariant) ostree_run_metadata_v = NULL; { glnx_autofd int fd = open (OTCORE_RUN_BOOTED, O_RDONLY | O_CLOEXEC);