[bug#75810,v3,05/11] daemon: Allow running as non-root with unprivileged user namespaces.

Message ID 1f4adc1c09dde70b193e1571b250e6152f0b4ca2.1740142328.git.ludo@gnu.org
State New
Headers
Series Rootless guix-daemon |

Commit Message

Ludovic Courtès Feb. 21, 2025, 1:05 p.m. UTC
  From: Ludovic Courtès <ludovic.courtes@inria.fr>

* nix/libstore/build.cc (guestUID, guestGID): New variables.
(DerivationGoal)[readiness]: New field.
(initializeUserNamespace): New function.
(DerivationGoal::runChild): When ‘readiness.readSide’ is positive, read
from it.
(DerivationGoal::startBuilder): Call ‘chown’
only when ‘buildUser.enabled()’ is true.  Pass CLONE_NEWUSER to ‘clone’
when ‘buildUser.enabled()’ is false or not running as root.  Retry
‘clone’ without CLONE_NEWUSER upon EPERM.
(DerivationGoal::registerOutputs): Make ‘actualPath’ writable before
‘rename’.
(DerivationGoal::deleteTmpDir): Catch ‘SysError’ around ‘_chown’ call.
* nix/libstore/local-store.cc (LocalStore::createUser): Do nothing if
‘dirs’ already exists.  Warn instead of failing when failing to chown
‘dir’.
* guix/substitutes.scm (%narinfo-cache-directory): Check for
‘_NIX_OPTIONS’ rather than getuid() == 0 to determine the cache
location.

Change-Id: I38fbe01f80fb45a99cd8a391e55a39a54d64fcb7
---
 guix/substitutes.scm        |   4 +-
 nix/libstore/build.cc       | 149 ++++++++++++++++++++++++++++--------
 nix/libstore/local-store.cc |  22 ++++--
 3 files changed, 135 insertions(+), 40 deletions(-)
  

Patch

diff --git a/guix/substitutes.scm b/guix/substitutes.scm
index e31b3940203..2761a3dafb4 100644
--- a/guix/substitutes.scm
+++ b/guix/substitutes.scm
@@ -1,5 +1,5 @@ 
 ;;; GNU Guix --- Functional package management for GNU
-;;; Copyright © 2013-2021, 2023-2024 Ludovic Courtès <ludo@gnu.org>
+;;; Copyright © 2013-2021, 2023-2025 Ludovic Courtès <ludo@gnu.org>
 ;;; Copyright © 2014 Nikita Karetnikov <nikita@karetnikov.org>
 ;;; Copyright © 2018 Kyle Meyer <kyle@kyleam.com>
 ;;; Copyright © 2020 Christopher Baines <mail@cbaines.net>
@@ -76,7 +76,7 @@  (define %narinfo-cache-directory
   ;; time, 'guix substitute' is called by guix-daemon as root and stores its
   ;; cached data in /var/guix/….  However, when invoked from 'guix challenge'
   ;; as a user, it stores its cache in ~/.cache.
-  (if (zero? (getuid))
+  (if (getenv "_NIX_OPTIONS")                     ;invoked by guix-daemon
       (or (and=> (getenv "XDG_CACHE_HOME")
                  (cut string-append <> "/guix/substitute"))
           (string-append %state-directory "/substitute/cache"))
diff --git a/nix/libstore/build.cc b/nix/libstore/build.cc
index c87f4f767c5..107ffcfea06 100644
--- a/nix/libstore/build.cc
+++ b/nix/libstore/build.cc
@@ -747,6 +747,10 @@  private:
 
     friend int childEntry(void *);
 
+    /* Pipe to notify readiness to the child process when using unprivileged
+       user namespaces.  */
+    Pipe readiness;
+
     /* Check that the derivation outputs all exist and register them
        as valid. */
     void registerOutputs();
@@ -1622,6 +1626,25 @@  int childEntry(void * arg)
 }
 
 
+/* UID and GID of the build user inside its own user namespace.  */
+static const uid_t guestUID = 30001;
+static const gid_t guestGID = 30000;
+
+/* Initialize the user namespace of CHILD.  */
+static void initializeUserNamespace(pid_t child)
+{
+    auto hostUID = getuid();
+    auto hostGID = getgid();
+
+    writeFile("/proc/" + std::to_string(child) + "/uid_map",
+	      (format("%d %d 1") % guestUID % hostUID).str());
+
+    writeFile("/proc/" + std::to_string(child) + "/setgroups", "deny");
+
+    writeFile("/proc/" + std::to_string(child) + "/gid_map",
+	      (format("%d %d 1") % guestGID % hostGID).str());
+}
+
 void DerivationGoal::startBuilder()
 {
     auto f = format(
@@ -1685,7 +1708,7 @@  void DerivationGoal::startBuilder()
 	   then an attacker could create in it a hardlink to a root-owned file
 	   such as /etc/shadow.  If 'keepFailed' is true, the daemon would
 	   then chown that hardlink to the user, giving them write access to
-	   that file.  */
+	   that file.  See CVE-2021-27851.  */
 	tmpDir += "/top";
 	if (mkdir(tmpDir.c_str(), 0700) == 1)
 	    throw SysError("creating top-level build directory");
@@ -1802,7 +1825,7 @@  void DerivationGoal::startBuilder()
         if (mkdir(chrootRootDir.c_str(), 0750) == -1)
             throw SysError(format("cannot create ‘%1%’") % chrootRootDir);
 
-        if (chown(chrootRootDir.c_str(), 0, buildUser.getGID()) == -1)
+        if (buildUser.enabled() && chown(chrootRootDir.c_str(), 0, buildUser.getGID()) == -1)
             throw SysError(format("cannot change ownership of ‘%1%’") % chrootRootDir);
 
         /* Create a writable /tmp in the chroot.  Many builders need
@@ -1821,8 +1844,8 @@  void DerivationGoal::startBuilder()
             (format(
                 "nixbld:x:%1%:%2%:Nix build user:/:/noshell\n"
                 "nobody:x:65534:65534:Nobody:/:/noshell\n")
-                % (buildUser.enabled() ? buildUser.getUID() : getuid())
-                % (buildUser.enabled() ? buildUser.getGID() : getgid())).str());
+                % (buildUser.enabled() ? buildUser.getUID() : guestUID)
+                % (buildUser.enabled() ? buildUser.getGID() : guestGID)).str());
 
         /* Declare the build user's group so that programs get a consistent
            view of the system (e.g., "id -gn"). */
@@ -1857,7 +1880,7 @@  void DerivationGoal::startBuilder()
         createDirs(chrootStoreDir);
         chmod_(chrootStoreDir, 01775);
 
-        if (chown(chrootStoreDir.c_str(), 0, buildUser.getGID()) == -1)
+        if (buildUser.enabled() && chown(chrootStoreDir.c_str(), 0, buildUser.getGID()) == -1)
             throw SysError(format("cannot change ownership of ‘%1%’") % chrootStoreDir);
 
         foreach (PathSet::iterator, i, inputPaths) {
@@ -1948,14 +1971,34 @@  void DerivationGoal::startBuilder()
     if (useChroot) {
 	char stack[32 * 1024];
 	int flags = CLONE_NEWPID | CLONE_NEWNS | CLONE_NEWIPC | CLONE_NEWUTS | SIGCHLD;
-	if (!fixedOutput) flags |= CLONE_NEWNET;
+	if (!fixedOutput) {
+	    flags |= CLONE_NEWNET;
+	}
+	if (!buildUser.enabled() || getuid() != 0) {
+	    flags |= CLONE_NEWUSER;
+	    readiness.create();
+	}
+
 	/* Ensure proper alignment on the stack.  On aarch64, it has to be 16
 	   bytes.  */
-	pid = clone(childEntry,
+ 	pid = clone(childEntry,
 		    (char *)(((uintptr_t)stack + sizeof(stack) - 8) & ~(uintptr_t)0xf),
 		    flags, this);
-	if (pid == -1)
-	    throw SysError("cloning builder process");
+	if (pid == -1) {
+	    if ((flags & CLONE_NEWUSER) != 0 && getuid() != 0)
+		/* 'clone' fails with EPERM on distros where unprivileged user
+		   namespaces are disabled.  Error out instead of giving up on
+		   isolation.  */
+		throw SysError("cannot create process in unprivileged user namespace");
+	    else
+		throw SysError("cloning builder process");
+	}
+
+	if ((flags & CLONE_NEWUSER) != 0) {
+	     /* Initialize the UID/GID mapping of the child process.  */
+	     initializeUserNamespace(pid);
+	     writeFull(readiness.writeSide, (unsigned char*)"go\n", 3);
+	}
     } else
 #endif
     {
@@ -2001,23 +2044,34 @@  void DerivationGoal::runChild()
 
         _writeToStderr = 0;
 
+	if (readiness.readSide > 0) {
+	     /* Wait for the parent process to initialize the UID/GID mapping
+		of our user namespace.  */
+	     char str[20] = { '\0' };
+	     readFull(readiness.readSide, (unsigned char*)str, 3);
+	     if (strcmp(str, "go\n") != 0)
+		  throw Error("failed to initialize process in unprivileged user namespace");
+	}
+
         restoreAffinity();
 
         commonChildInit(builderOut);
 
 #if CHROOT_ENABLED
         if (useChroot) {
-            /* Initialise the loopback interface. */
-            AutoCloseFD fd(socket(PF_INET, SOCK_DGRAM, IPPROTO_IP));
-            if (fd == -1) throw SysError("cannot open IP socket");
+	    if (!fixedOutput) {
+		/* Initialise the loopback interface. */
+		AutoCloseFD fd(socket(PF_INET, SOCK_DGRAM, IPPROTO_IP));
+		if (fd == -1) throw SysError("cannot open IP socket");
 
-            struct ifreq ifr;
-            strcpy(ifr.ifr_name, "lo");
-            ifr.ifr_flags = IFF_UP | IFF_LOOPBACK | IFF_RUNNING;
-            if (ioctl(fd, SIOCSIFFLAGS, &ifr) == -1)
-                throw SysError("cannot set loopback interface flags");
+		struct ifreq ifr;
+		strcpy(ifr.ifr_name, "lo");
+		ifr.ifr_flags = IFF_UP | IFF_LOOPBACK | IFF_RUNNING;
+		if (ioctl(fd, SIOCSIFFLAGS, &ifr) == -1)
+		    throw SysError("cannot set loopback interface flags");
 
-            fd.close();
+		fd.close();
+	    }
 
             /* Set the hostname etc. to fixed values. */
             char hostname[] = "localhost";
@@ -2463,8 +2517,16 @@  void DerivationGoal::registerOutputs()
             if (buildMode == bmRepair)
               replaceValidPath(path, actualPath);
             else
-              if (buildMode != bmCheck && rename(actualPath.c_str(), path.c_str()) == -1)
-                throw SysError(format("moving build output `%1%' from the chroot to the store") % path);
+		if (buildMode != bmCheck) {
+		    if (S_ISDIR(st.st_mode))
+			/* Change mode on the directory to allow for
+			   rename(2).  */
+			chmod(actualPath.c_str(), st.st_mode | 0700);
+		    if (rename(actualPath.c_str(), path.c_str()) == -1)
+			throw SysError(format("moving build output `%1%' from the chroot to the store") % path);
+		    if (S_ISDIR(st.st_mode) && chmod(path.c_str(), st.st_mode) == -1)
+			throw SysError(format("restoring permissions on directory `%1%'") % actualPath);
+		}
           }
           if (buildMode != bmCheck) actualPath = path;
         }
@@ -2723,17 +2785,42 @@  void DerivationGoal::deleteTmpDir(bool force)
             // Change the ownership if clientUid is set. Never change the
             // ownership or the group to "root" for security reasons.
             if (settings.clientUid != (uid_t) -1 && settings.clientUid != 0) {
-                _chown(tmpDir, settings.clientUid,
-                       settings.clientGid != 0 ? settings.clientGid : -1);
-
-		if (top != tmpDir) {
-		    // Rename tmpDir to its parent, with an intermediate step.
-		    string pivot = top + ".pivot";
-		    if (rename(top.c_str(), pivot.c_str()) == -1)
-			throw SysError("pivoting failed build tree");
-		    if (rename((pivot + "/top").c_str(), top.c_str()) == -1)
-			throw SysError("renaming failed build tree");
-		    rmdir(pivot.c_str());
+		uid_t uid = settings.clientUid;
+		gid_t gid = settings.clientGid != 0 ? settings.clientGid : -1;
+		try {
+		    _chown(tmpDir, uid, gid);
+
+		    if (getuid() != 0) {
+			/* If, without being root, the '_chown' call above
+			   succeeded, then it means we have CAP_CHOWN.  Retake
+			   ownership of tmpDir itself so it can be renamed
+			   below.  */
+			chown(tmpDir.c_str(), getuid(), getgid());
+		    }
+
+		    if (top != tmpDir) {
+			 /* Rename 'tmpDir' to its parent with an intermediate
+			    step.  Skip that if the '_chown' call above fails
+			    since in that case the setuid bits are not
+			    removed.  */
+			 string pivot = top + ".pivot";
+			 if (rename(top.c_str(), pivot.c_str()) == -1)
+			      throw SysError("pivoting failed build tree");
+			 if (rename((pivot + "/top").c_str(), top.c_str()) == -1)
+			      throw SysError("renaming failed build tree");
+
+			 if (getuid() != 0)
+			      /* Running unprivileged but with CAP_CHOWN.  */
+			      chown(top.c_str(), uid, gid);
+
+			 rmdir(pivot.c_str());
+		    }
+		} catch (SysError & e) {
+		    /* When running as an unprivileged user and without
+		       CAP_CHOWN, we cannot chown the build tree.  Print a
+		       message and keep going.  */
+		    printMsg(lvlInfo, format("cannot change ownership of build directory '%1%': %2%")
+			     % tmpDir % strerror(e.errNo));
 		}
             }
         }
diff --git a/nix/libstore/local-store.cc b/nix/libstore/local-store.cc
index 0883a4bbcee..4308264a4f3 100644
--- a/nix/libstore/local-store.cc
+++ b/nix/libstore/local-store.cc
@@ -306,14 +306,14 @@  void LocalStore::openDB(bool create)
 void LocalStore::makeStoreWritable()
 {
 #if HAVE_UNSHARE && HAVE_STATVFS && HAVE_SYS_MOUNT_H && defined(MS_BIND) && defined(MS_REMOUNT)
-    if (getuid() != 0) return;
     /* Check if /nix/store is on a read-only mount. */
     struct statvfs stat;
     if (statvfs(settings.nixStore.c_str(), &stat) != 0)
         throw SysError("getting info about the store mount point");
 
     if (stat.f_flag & ST_RDONLY) {
-        if (unshare(CLONE_NEWNS) == -1)
+	int flags = CLONE_NEWNS | (getpid() == 0 ? 0 : CLONE_NEWUSER);
+        if (unshare(flags) == -1)
             throw SysError("setting up a private mount namespace");
 
         if (mount(0, settings.nixStore.c_str(), "none", MS_REMOUNT | MS_BIND, 0) == -1)
@@ -1614,11 +1614,19 @@  void LocalStore::createUser(const std::string & userName, uid_t userId)
 {
     auto dir = settings.nixStateDir + "/profiles/per-user/" + userName;
 
-    createDirs(dir);
-    if (chmod(dir.c_str(), 0755) == -1)
-	throw SysError(format("changing permissions of directory '%s'") % dir);
-    if (chown(dir.c_str(), userId, -1) == -1)
-	throw SysError(format("changing owner of directory '%s'") % dir);
+    auto created = createDirs(dir);
+    if (!created.empty()) {
+	if (chmod(dir.c_str(), 0755) == -1)
+	    throw SysError(format("changing permissions of directory '%s'") % dir);
+
+	/* The following operation requires CAP_CHOWN or can be handled
+	   manually by a user with CAP_CHOWN.  */
+	if (chown(dir.c_str(), userId, -1) == -1) {
+	    rmdir(dir.c_str());
+	    string message = strerror(errno);
+	    printMsg(lvlInfo, format("failed to change owner of directory '%1%' to %2%: %3%") % dir % userId % message);
+	}
+    }
 }