[bug#77638,3/8] linux-container: Support having a read-only root file system.
Commit Message
Until now, the read-only file system set up by ‘call-with-container’
would always be writable. With this change, it can be made read-only.
With this patch, only ‘least-authority-wrapper’ switches to a read-only
root file system.
* gnu/build/linux-container.scm (remount-read-only): New procedure.
(mount-file-systems): Add #:writable-root? and #:populate-file-system
and honor them.
(run-container): Likewise.
(call-with-container): Likewise.
* gnu/system/linux-container.scm (container-script): Pass #:writable-root?
to ‘call-with-container’.
(eval/container): Add #:populate-file-system and #:writable-root? and
honor them.
* guix/scripts/environment.scm (launch-environment/container):
Pass #:writable-root? to ‘call-with-container’.
* guix/scripts/home.scm (spawn-home-container): Likewise.
* tests/containers.scm ("call-with-container, mnt namespace, read-only root")
("call-with-container, mnt namespace, writable root"): New tests.
Change-Id: I603e2fd08851338b737bb16c8af3f765e2538906
---
gnu/build/linux-container.scm | 38 +++++++++++++++++++++++++++++-----
gnu/system/linux-container.scm | 5 +++++
guix/scripts/environment.scm | 1 +
guix/scripts/home.scm | 1 +
tests/containers.scm | 26 +++++++++++++++++++++++
5 files changed, 66 insertions(+), 5 deletions(-)
@@ -75,10 +75,16 @@ (define (purify-environment)
(match (get-environment-variables)
(((names . _) ...) names))))
+(define (remount-read-only mount-point)
+ (mount mount-point mount-point "none"
+ (logior MS_BIND MS_REMOUNT MS_RDONLY)))
+
;; The container setup procedure closely resembles that of the Docker
;; specification:
;; https://raw.githubusercontent.com/docker/libcontainer/master/SPEC.md
-(define* (mount-file-systems root mounts #:key mount-/sys? mount-/proc?)
+(define* (mount-file-systems root mounts #:key mount-/sys? mount-/proc?
+ (populate-file-system (const #t))
+ writable-root?)
"Mount the essential file systems and the those in MOUNTS, a list of
<file-system> objects, relative to ROOT; then make ROOT the new root directory
for the process."
@@ -177,7 +183,10 @@ (define* (mount-file-systems root mounts #:key mount-/sys? mount-/proc?)
(chdir "/")
(umount "real-root" MNT_DETACH)
(rmdir "real-root")
- (chmod "/" #o755)))
+ (populate-file-system)
+ (chmod "/" #o755)
+ (unless writable-root?
+ (remount-read-only "/"))))
(define* (initialize-user-namespace pid host-uids
#:key (guest-uid 0) (guest-gid 0))
@@ -226,13 +235,19 @@ (define (namespaces->bit-mask namespaces)
namespaces)))
(define* (run-container root mounts namespaces host-uids thunk
- #:key (guest-uid 0) (guest-gid 0))
+ #:key (guest-uid 0) (guest-gid 0)
+ (populate-file-system (const #t))
+ writable-root?)
"Run THUNK in a new container process and return its PID. ROOT specifies
the root directory for the container. MOUNTS is a list of <file-system>
objects that specify file systems to mount inside the container. NAMESPACES
is a list of symbols that correspond to the possible Linux namespaces: mnt,
ipc, uts, user, and net.
+When WRITABLE-ROOT? is false, remount the container's root as read-only before
+calling THUNK. Call POPULATE-FILE-SYSTEM before the root is (potentially)
+made read-only.
+
HOST-UIDS specifies the number of host user identifiers to map into the user
namespace. GUEST-UID and GUEST-GID specify the first UID (respectively GID)
that host UIDs (respectively GIDs) map to in the namespace."
@@ -258,7 +273,12 @@ (define* (run-container root mounts namespaces host-uids thunk
(mount-file-systems root mounts
#:mount-/proc? (memq 'pid namespaces)
#:mount-/sys? (memq 'net
- namespaces)))
+ namespaces)
+ #:populate-file-system
+ populate-file-system
+ #:writable-root?
+ (or writable-root?
+ (not (memq 'mnt namespaces)))))
(lambda args
;; Forward the exception to the parent process.
;; FIXME: SRFI-35 conditions and non-trivial objects
@@ -329,6 +349,8 @@ (define* (call-with-container mounts thunk #:key (namespaces %namespaces)
(host-uids 1) (guest-uid 0) (guest-gid 0)
(relayed-signals (list SIGINT SIGTERM))
(child-is-pid1? #t)
+ (populate-file-system (const #t))
+ writable-root?
(process-spawned-hook (const #t)))
"Run THUNK in a new container process and return its exit status; call
PROCESS-SPAWNED-HOOK with the PID of the new process that has been spawned.
@@ -349,6 +371,10 @@ (define* (call-with-container mounts thunk #:key (namespaces %namespaces)
RELAYED-SIGNALS is the list of signals that are \"relayed\" to the container
process when caught by its parent.
+When WRITABLE-ROOT? is false, remount the container's root as read-only before
+calling THUNK. Call POPULATE-FILE-SYSTEM before the root is (potentially)
+made read-only.
+
When CHILD-IS-PID1? is true, and if NAMESPACES contains 'pid', then the child
process runs directly as PID 1. As such, it is responsible for (1) installing
signal handlers and (2) reaping terminated processes by calling 'waitpid'.
@@ -402,7 +428,9 @@ (define* (call-with-container mounts thunk #:key (namespaces %namespaces)
(lambda (root)
(let ((pid (run-container root mounts namespaces host-uids thunk*
#:guest-uid guest-uid
- #:guest-gid guest-gid)))
+ #:guest-gid guest-gid
+ #:populate-file-system populate-file-system
+ #:writable-root? writable-root?)))
(install-signal-handlers pid)
(process-spawned-hook pid)
(match (waitpid pid)
@@ -312,12 +312,15 @@ (define* (container-script os #:key (mappings '()) shared-network?)
#:namespaces (if #$shared-network?
(delq 'net %namespaces)
%namespaces)
+ #:writable-root? #t
#:process-spawned-hook explain)))))
(gexp->script "run-container" script)))
(define* (eval/container exp
#:key
+ (populate-file-system (const #t))
+ writable-root?
(mappings '())
(mounts '())
(namespaces %namespaces)
@@ -367,6 +370,8 @@ (define* (eval/container exp
(list "-c"
(object->string
(lowered-gexp-sexp lowered))))))
+ #:writable-root? writable-root?
+ #:populate-file-system populate-file-system
#:namespaces namespaces
#:guest-uid guest-uid
#:guest-gid guest-gid))))))
@@ -959,6 +959,7 @@ (define* (launch-environment/container #:key command bash user user-mappings
#:emulate-fhs? emulate-fhs?)))
#:guest-uid uid
#:guest-gid gid
+ #:writable-root? #t ;for backward compatibility
#:namespaces (if network?
(delq 'net %namespaces) ; share host network
%namespaces)))))))
@@ -375,6 +375,7 @@ (define* (spawn-home-container home
(type "tmpfs")
(check? #f)))
#:mappings (append network-mappings mappings)
+ #:writable-root? #t
#:guest-uid uid
#:guest-gid gid))
@@ -142,6 +142,32 @@ (define (skip-if-unsupported)
(assert-exit (= #o755 (stat:perms (lstat "/")))))
#:namespaces '(user mnt))))
+(skip-if-unsupported)
+(test-assert "call-with-container, mnt namespace, read-only root"
+ (zero?
+ (call-with-container '()
+ (lambda ()
+ (assert-exit (and (file-is-directory? "/witness")
+ (catch 'system-error
+ (lambda ()
+ (mkdir "/whatever")
+ #f)
+ (lambda args
+ (= (system-error-errno args) EROFS))))))
+ #:populate-file-system (lambda ()
+ (mkdir "/witness"))
+ #:namespaces '(user mnt))))
+
+(skip-if-unsupported)
+(test-assert "call-with-container, mnt namespace, writable root"
+ (zero?
+ (call-with-container '()
+ (lambda ()
+ (mkdir "whatever")
+ (assert-exit (file-is-directory? "/whatever")))
+ #:writable-root? #t
+ #:namespaces '(user mnt))))
+
(skip-if-unsupported)
(test-assert "container-excursion"
(call-with-temporary-directory