@@ -1,6 +1,6 @@
;;; GNU Guix --- Functional package management for GNU
;;; Copyright © 2015 David Thompson <davet@gnu.org>
-;;; Copyright © 2017, 2018, 2019 Ludovic Courtès <ludo@gnu.org>
+;;; Copyright © 2017, 2018, 2019, 2022 Ludovic Courtès <ludo@gnu.org>
;;;
;;; This file is part of GNU Guix.
;;;
@@ -21,6 +21,7 @@ (define-module (gnu build linux-container)
#:use-module (ice-9 format)
#:use-module (ice-9 match)
#:use-module (ice-9 rdelim)
+ #:use-module (srfi srfi-1)
#:use-module (srfi srfi-98)
#:use-module (guix build utils)
#:use-module (guix build syscalls)
@@ -33,7 +34,8 @@ (define-module (gnu build linux-container)
run-container
call-with-container
container-excursion
- container-excursion*))
+ container-excursion*
+ self-sever))
(define (user-namespace-supported?)
"Return #t if user namespaces are supported on this system."
@@ -174,50 +176,53 @@ (define* (mount* source target type #:optional (flags 0) options
(chmod "/" #o755)))
(define* (initialize-user-namespace pid host-uids
- #:key (guest-uid 0) (guest-gid 0))
+ #:key (guest-uid 0) (guest-gid 0)
+ (uid (getuid)) (gid (getgid)))
"Configure the user namespace for PID. HOST-UIDS specifies the number of
host user identifiers to map into the user namespace. GUEST-UID and GUEST-GID
specify the first UID (respectively GID) that host UIDs (respectively GIDs)
map to in the namespace."
(define proc-dir
- (string-append "/proc/" (number->string pid)))
+ (string-append "/proc/"
+ (match pid
+ ('self "self")
+ (_ (number->string pid)))))
(define (scope file)
(string-append proc-dir file))
- (let ((uid (getuid))
- (gid (getgid)))
-
- ;; Only root can write to the gid map without first disabling the
- ;; setgroups syscall.
- (unless (and (zero? uid) (zero? gid))
- (call-with-output-file (scope "/setgroups")
- (lambda (port)
- (display "deny" port))))
-
- ;; Map the user/group that created the container to the root user
- ;; within the container.
- (call-with-output-file (scope "/uid_map")
+ ;; Only root can write to the gid map without first disabling the
+ ;; setgroups syscall.
+ (unless (and (zero? uid) (zero? gid))
+ (call-with-output-file (scope "/setgroups")
(lambda (port)
- (format port "~d ~d ~d" guest-uid uid host-uids)))
- (call-with-output-file (scope "/gid_map")
- (lambda (port)
- (format port "~d ~d ~d" guest-gid gid host-uids)))))
+ (display "deny" port))))
+
+ ;; Map the user/group that created the container to the root user
+ ;; within the container.
+ (call-with-output-file (scope "/uid_map")
+ (lambda (port)
+ (format port "~d ~d ~d" guest-uid uid host-uids)))
+ (call-with-output-file (scope "/gid_map")
+ (lambda (port)
+ (format port "~d ~d ~d" guest-gid gid host-uids))))
(define (namespaces->bit-mask namespaces)
"Return the number suitable for the 'flags' argument of 'clone' that
corresponds to the symbols in NAMESPACES."
;; Use the same flags as fork(3) in addition to the namespace flags.
- (apply logior SIGCHLD
- (map (match-lambda
- ('cgroup CLONE_NEWCGROUP)
- ('mnt CLONE_NEWNS)
- ('uts CLONE_NEWUTS)
- ('ipc CLONE_NEWIPC)
- ('user CLONE_NEWUSER)
- ('pid CLONE_NEWPID)
- ('net CLONE_NEWNET))
- namespaces)))
+ (fold (lambda (namespace flags)
+ (logior flags
+ (match namespace
+ ('cgroup CLONE_NEWCGROUP)
+ ('mnt CLONE_NEWNS)
+ ('uts CLONE_NEWUTS)
+ ('ipc CLONE_NEWIPC)
+ ('user CLONE_NEWUSER)
+ ('pid CLONE_NEWPID)
+ ('net CLONE_NEWNET))))
+ 0
+ namespaces))
(define* (run-container root mounts namespaces host-uids thunk
#:key (guest-uid 0) (guest-gid 0))
@@ -236,7 +241,7 @@ (define* (run-container root mounts namespaces host-uids thunk
(match (socketpair PF_UNIX SOCK_STREAM 0)
((child . parent)
(let ((flags (namespaces->bit-mask namespaces)))
- (match (clone flags)
+ (match (clone (logior SIGCHLD flags))
(0
(call-with-clean-exit
(lambda ()
@@ -392,3 +397,23 @@ (define (container-excursion* pid thunk)
(close-port out)
(close-port in)
#f)))))
+
+(define* (self-sever mounts
+ #:key (namespaces %namespaces) (host-uids 1)
+ (guest-uid 0) (guest-gid 0))
+ (let ((uid (getuid))
+ (gid (getgid)))
+ (unshare (namespaces->bit-mask namespaces))
+
+ (initialize-user-namespace 'self host-uids
+ #:uid uid #:gid gid
+ #:guest-uid uid
+ #:guest-gid guest-gid)
+
+ (when (memq 'mnt namespaces)
+ ;; (mount "none" "/" #f (logior MS_REC MS_PRIVATE))
+ (call-with-temporary-directory
+ (lambda (root)
+ (mount-file-systems root mounts
+ #:mount-/proc? (memq 'pid namespaces)
+ #:mount-/sys? (memq 'net namespaces)))))))
@@ -1,5 +1,5 @@
;;; GNU Guix --- Functional package management for GNU
-;;; Copyright © 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021 Ludovic Courtès <ludo@gnu.org>
+;;; Copyright © 2014-2022 Ludovic Courtès <ludo@gnu.org>
;;; Copyright © 2015 David Thompson <davet@gnu.org>
;;; Copyright © 2015 Mark H Weaver <mhw@netris.org>
;;; Copyright © 2017 Mathieu Othacehe <m.othacehe@gmail.com>
@@ -49,6 +49,11 @@ (define-module (guix build syscalls)
MS_RELATIME
MS_BIND
MS_MOVE
+ MS_REC
+ MS_SILENT
+ MS_POSIXACL
+ MS_UNBINDABLE
+ MS_PRIVATE
MS_LAZYTIME
MNT_FORCE
MNT_DETACH
@@ -140,6 +145,7 @@ (define-module (guix build syscalls)
CLONE_NEWPID
CLONE_NEWNET
clone
+ unshare
setns
PF_PACKET
@@ -537,6 +543,11 @@ (define MS_REMOUNT 32)
(define MS_NOATIME 1024)
(define MS_BIND 4096)
(define MS_MOVE 8192)
+(define MS_REC 16384)
+(define MS_SILENT 32768)
+(define MS_POSIXACL 65536)
+(define MS_UNBINDABLE 131072)
+(define MS_PRIVATE 262144)
(define MS_RELATIME 2097152)
(define MS_STRICTATIME 16777216)
(define MS_LAZYTIME 33554432)
@@ -1101,6 +1112,23 @@ (define clone
(list err))
ret)))))
+(define unshare
+ (let ((proc (syscall->procedure int "unshare" (list int))))
+ (lambda (flags)
+ "Disassociate the current process from parts of its execution context
+according to FLAGS, which must be a logical or of CLONE_NEW* constants.
+
+Note that CLONE_NEWUSER requires that the calling process be single-threaded,
+which is possible if and only if libgc is running a single marker thread; this
+can be achieved by setting the GC_MARKERS environment variable to 1. If the
+calling process is multi-threaded, this throws to 'system-error' with EINVAL."
+ (let-values (((ret err)
+ (without-automatic-finalization (proc flags))))
+ (unless (zero? ret)
+ (throw 'system-error "unshare" "~a: ~A"
+ (list flags (strerror err))
+ err))))))
+
(define setns
;; Some systems may be using an old (pre-2.14) version of glibc where there
;; is no 'setns' function available.