[bug#77638,3/8] linux-container: Support having a read-only root file system.

Message ID 1a625b6062f6dbb5b73ae42eef277e21ee05f0bf.1744114408.git.ludo@gnu.org
State New
Headers
Series Harden 'call-with-container' |

Commit Message

Ludovic Courtès April 8, 2025, 12:24 p.m. UTC
  Until now, the read-only file system set up by ‘call-with-container’
would always be writable.  With this change, it can be made read-only.
With this patch, only ‘least-authority-wrapper’ switches to a read-only
root file system.

* gnu/build/linux-container.scm (remount-read-only): New procedure.
(mount-file-systems): Add #:writable-root? and #:populate-file-system
and honor them.
(run-container): Likewise.
(call-with-container): Likewise.
* gnu/system/linux-container.scm (container-script): Pass #:writable-root?
to ‘call-with-container’.
(eval/container): Add #:populate-file-system and #:writable-root? and
honor them.
* guix/scripts/environment.scm (launch-environment/container):
Pass #:writable-root? to ‘call-with-container’.
* guix/scripts/home.scm (spawn-home-container): Likewise.
* tests/containers.scm ("call-with-container, mnt namespace, read-only root")
("call-with-container, mnt namespace, writable root"): New tests.

Change-Id: I603e2fd08851338b737bb16c8af3f765e2538906
---
 gnu/build/linux-container.scm  | 38 +++++++++++++++++++++++++++++-----
 gnu/system/linux-container.scm |  5 +++++
 guix/scripts/environment.scm   |  1 +
 guix/scripts/home.scm          |  1 +
 tests/containers.scm           | 26 +++++++++++++++++++++++
 5 files changed, 66 insertions(+), 5 deletions(-)
  

Patch

diff --git a/gnu/build/linux-container.scm b/gnu/build/linux-container.scm
index a5c5d8962e..4dcdaa8f33 100644
--- a/gnu/build/linux-container.scm
+++ b/gnu/build/linux-container.scm
@@ -75,10 +75,16 @@  (define (purify-environment)
             (match (get-environment-variables)
               (((names . _) ...) names))))
 
+(define (remount-read-only mount-point)
+  (mount mount-point mount-point "none"
+         (logior MS_BIND MS_REMOUNT MS_RDONLY)))
+
 ;; The container setup procedure closely resembles that of the Docker
 ;; specification:
 ;; https://raw.githubusercontent.com/docker/libcontainer/master/SPEC.md
-(define* (mount-file-systems root mounts #:key mount-/sys? mount-/proc?)
+(define* (mount-file-systems root mounts #:key mount-/sys? mount-/proc?
+                             (populate-file-system (const #t))
+                             writable-root?)
   "Mount the essential file systems and the those in MOUNTS, a list of
 <file-system> objects, relative to ROOT; then make ROOT the new root directory
 for the process."
@@ -177,7 +183,10 @@  (define* (mount-file-systems root mounts #:key mount-/sys? mount-/proc?)
     (chdir "/")
     (umount "real-root" MNT_DETACH)
     (rmdir "real-root")
-    (chmod "/" #o755)))
+    (populate-file-system)
+    (chmod "/" #o755)
+    (unless writable-root?
+      (remount-read-only "/"))))
 
 (define* (initialize-user-namespace pid host-uids
                                     #:key (guest-uid 0) (guest-gid 0))
@@ -226,13 +235,19 @@  (define (namespaces->bit-mask namespaces)
               namespaces)))
 
 (define* (run-container root mounts namespaces host-uids thunk
-                        #:key (guest-uid 0) (guest-gid 0))
+                        #:key (guest-uid 0) (guest-gid 0)
+                        (populate-file-system (const #t))
+                        writable-root?)
   "Run THUNK in a new container process and return its PID.  ROOT specifies
 the root directory for the container.  MOUNTS is a list of <file-system>
 objects that specify file systems to mount inside the container.  NAMESPACES
 is a list of symbols that correspond to the possible Linux namespaces: mnt,
 ipc, uts, user, and net.
 
+When WRITABLE-ROOT? is false, remount the container's root as read-only before
+calling THUNK.  Call POPULATE-FILE-SYSTEM before the root is (potentially)
+made read-only.
+
 HOST-UIDS specifies the number of host user identifiers to map into the user
 namespace.  GUEST-UID and GUEST-GID specify the first UID (respectively GID)
 that host UIDs (respectively GIDs) map to in the namespace."
@@ -258,7 +273,12 @@  (define* (run-container root mounts namespaces host-uids thunk
                       (mount-file-systems root mounts
                                           #:mount-/proc? (memq 'pid namespaces)
                                           #:mount-/sys?  (memq 'net
-                                                               namespaces)))
+                                                               namespaces)
+                                          #:populate-file-system
+                                          populate-file-system
+                                          #:writable-root?
+                                          (or writable-root?
+                                              (not (memq 'mnt namespaces)))))
                     (lambda args
                       ;; Forward the exception to the parent process.
                       ;; FIXME: SRFI-35 conditions and non-trivial objects
@@ -329,6 +349,8 @@  (define* (call-with-container mounts thunk #:key (namespaces %namespaces)
                               (host-uids 1) (guest-uid 0) (guest-gid 0)
                               (relayed-signals (list SIGINT SIGTERM))
                               (child-is-pid1? #t)
+                              (populate-file-system (const #t))
+                              writable-root?
                               (process-spawned-hook (const #t)))
   "Run THUNK in a new container process and return its exit status; call
 PROCESS-SPAWNED-HOOK with the PID of the new process that has been spawned.
@@ -349,6 +371,10 @@  (define* (call-with-container mounts thunk #:key (namespaces %namespaces)
 RELAYED-SIGNALS is the list of signals that are \"relayed\" to the container
 process when caught by its parent.
 
+When WRITABLE-ROOT? is false, remount the container's root as read-only before
+calling THUNK.  Call POPULATE-FILE-SYSTEM before the root is (potentially)
+made read-only.
+
 When CHILD-IS-PID1? is true, and if NAMESPACES contains 'pid', then the child
 process runs directly as PID 1.  As such, it is responsible for (1) installing
 signal handlers and (2) reaping terminated processes by calling 'waitpid'.
@@ -402,7 +428,9 @@  (define* (call-with-container mounts thunk #:key (namespaces %namespaces)
    (lambda (root)
      (let ((pid (run-container root mounts namespaces host-uids thunk*
                                #:guest-uid guest-uid
-                               #:guest-gid guest-gid)))
+                               #:guest-gid guest-gid
+                               #:populate-file-system populate-file-system
+                               #:writable-root? writable-root?)))
        (install-signal-handlers pid)
        (process-spawned-hook pid)
        (match (waitpid pid)
diff --git a/gnu/system/linux-container.scm b/gnu/system/linux-container.scm
index 3622328500..e7cb90d091 100644
--- a/gnu/system/linux-container.scm
+++ b/gnu/system/linux-container.scm
@@ -312,12 +312,15 @@  (define* (container-script os #:key (mappings '()) shared-network?)
                 #:namespaces (if #$shared-network?
                                  (delq 'net %namespaces)
                                  %namespaces)
+                #:writable-root? #t
                 #:process-spawned-hook explain)))))
 
     (gexp->script "run-container" script)))
 
 (define* (eval/container exp
                          #:key
+                         (populate-file-system (const #t))
+                         writable-root?
                          (mappings '())
                          (mounts '())
                          (namespaces %namespaces)
@@ -367,6 +370,8 @@  (define* (eval/container exp
                                    (list "-c"
                                          (object->string
                                           (lowered-gexp-sexp lowered))))))
+                  #:writable-root? writable-root?
+                  #:populate-file-system populate-file-system
                   #:namespaces namespaces
                   #:guest-uid guest-uid
                   #:guest-gid guest-gid))))))
diff --git a/guix/scripts/environment.scm b/guix/scripts/environment.scm
index 648a497743..4be9807163 100644
--- a/guix/scripts/environment.scm
+++ b/guix/scripts/environment.scm
@@ -959,6 +959,7 @@  (define* (launch-environment/container #:key command bash user user-mappings
                                  #:emulate-fhs? emulate-fhs?)))
           #:guest-uid uid
           #:guest-gid gid
+          #:writable-root? #t                     ;for backward compatibility
           #:namespaces (if network?
                            (delq 'net %namespaces) ; share host network
                            %namespaces)))))))
diff --git a/guix/scripts/home.scm b/guix/scripts/home.scm
index 56a4b7c7d4..7ce6217324 100644
--- a/guix/scripts/home.scm
+++ b/guix/scripts/home.scm
@@ -375,6 +375,7 @@  (define* (spawn-home-container home
                     (type "tmpfs")
                     (check? #f)))
    #:mappings (append network-mappings mappings)
+   #:writable-root? #t
    #:guest-uid uid
    #:guest-gid gid))
 
diff --git a/tests/containers.scm b/tests/containers.scm
index 70d5ba2d30..1e915d517e 100644
--- a/tests/containers.scm
+++ b/tests/containers.scm
@@ -142,6 +142,32 @@  (define (skip-if-unsupported)
        (assert-exit (= #o755 (stat:perms (lstat "/")))))
      #:namespaces '(user mnt))))
 
+(skip-if-unsupported)
+(test-assert "call-with-container, mnt namespace, read-only root"
+  (zero?
+   (call-with-container '()
+     (lambda ()
+       (assert-exit (and (file-is-directory? "/witness")
+                         (catch 'system-error
+                           (lambda ()
+                             (mkdir "/whatever")
+                             #f)
+                           (lambda args
+                             (= (system-error-errno args) EROFS))))))
+     #:populate-file-system (lambda ()
+                              (mkdir "/witness"))
+     #:namespaces '(user mnt))))
+
+(skip-if-unsupported)
+(test-assert "call-with-container, mnt namespace, writable root"
+  (zero?
+   (call-with-container '()
+     (lambda ()
+       (mkdir "whatever")
+       (assert-exit (file-is-directory? "/whatever")))
+     #:writable-root? #t
+     #:namespaces '(user mnt))))
+
 (skip-if-unsupported)
 (test-assert "container-excursion"
   (call-with-temporary-directory