diff mbox series

[bug#72137,2/2] cache: Avoid cache cleanup storms from concurrent processes.

Message ID 26f82a475d4ccd776c64c0a14628597dd65d8908.1721120884.git.ludo@gnu.org
State New
Headers show
Series Avoid cache cleanup storms | expand

Commit Message

Ludovic Courtès July 16, 2024, 9:15 a.m. UTC
Reported by Christopher Baines <guix@cbaines.net>.

* guix/cache.scm (maybe-remove-expired-cache-entries): Define
‘expiry-port’; create it with ‘lock-file’.  Change ‘last-expiry-date’
accordingly.  Write timestamp straight to ‘expiry-port’.
* tests/cache.scm ("maybe-remove-expired-cache-entries, cleanup needed
but lock taken"): New test.

Change-Id: I22441d9d2c4a339d3d3878de131864db5a0ae826
---
 guix/cache.scm  | 27 ++++++++++++++++++---------
 tests/cache.scm | 30 +++++++++++++++++++++++++++++-
 2 files changed, 47 insertions(+), 10 deletions(-)
diff mbox series

Patch

diff --git a/guix/cache.scm b/guix/cache.scm
index 6a91c7d3ef..8b12312c77 100644
--- a/guix/cache.scm
+++ b/guix/cache.scm
@@ -1,5 +1,5 @@ 
 ;;; GNU Guix --- Functional package management for GNU
-;;; Copyright © 2013-2017, 2020-2021, 2023 Ludovic Courtès <ludo@gnu.org>
+;;; Copyright © 2013-2017, 2020-2021, 2023-2024 Ludovic Courtès <ludo@gnu.org>
 ;;; Copyright © 2022 Simon Tournier <zimon.toutoune@gmail.com>
 ;;;
 ;;; This file is part of GNU Guix.
@@ -19,6 +19,7 @@ 
 
 (define-module (guix cache)
   #:use-module ((guix utils) #:select (with-atomic-file-output))
+  #:autoload   (guix build syscalls) (lock-file unlock-file)
   #:use-module (srfi srfi-19)
   #:use-module (srfi srfi-26)
   #:use-module (ice-9 match)
@@ -93,13 +94,19 @@  (define* (maybe-remove-expired-cache-entries cache
   (define expiry-file
     (string-append cache "/last-expiry-cleanup"))
 
+  (define expiry-port
+    ;; Get exclusive access to EXPIRY-FILE to avoid "cleanup storms" where
+    ;; several processes would concurrently decide that time has come to clean
+    ;; up the same cache.  'lock-file' might throw to 'system-error' or to
+    ;; 'flock-error'; in either case, assume that we lost the race.
+    (false-if-exception
+     (lock-file expiry-file "a+0" #:wait? #f)))
+
   (define last-expiry-date
-    (catch 'system-error
-      (lambda ()
-        (or (string->number
-             (call-with-input-file expiry-file get-string-all))
-            0))
-      (const 0)))
+    (if expiry-port
+        (or (string->number (get-string-all expiry-port))
+            0)
+        +inf.0))
 
   (when (obsolete? last-expiry-date now cleanup-period)
     (remove-expired-cache-entries (cache-entries cache)
@@ -108,8 +115,10 @@  (define* (maybe-remove-expired-cache-entries cache
                                   #:delete-entry delete-entry)
     (catch 'system-error
       (lambda ()
-        (with-atomic-file-output expiry-file
-          (cute write (time-second now) <>)))
+        (seek expiry-port 0 SEEK_SET)
+        (truncate-file expiry-port 0)
+        (write (time-second now) expiry-port)
+        (unlock-file expiry-port))
       (lambda args
         ;; ENOENT means CACHE does not exist.
         (unless (= ENOENT (system-error-errno args))
diff --git a/tests/cache.scm b/tests/cache.scm
index d495ace2bd..e8ad083d40 100644
--- a/tests/cache.scm
+++ b/tests/cache.scm
@@ -1,5 +1,5 @@ 
 ;;; GNU Guix --- Functional package management for GNU
-;;; Copyright © 2017, 2020 Ludovic Courtès <ludo@gnu.org>
+;;; Copyright © 2017, 2020, 2024 Ludovic Courtès <ludo@gnu.org>
 ;;; Copyright © 2022 Simon Tournier <zimon.toutoune@gmail.com>
 ;;;
 ;;; This file is part of GNU Guix.
@@ -22,7 +22,9 @@  (define-module (test-cache)
   #:use-module (srfi srfi-1)
   #:use-module (srfi srfi-19)
   #:use-module (srfi srfi-64)
+  #:use-module ((guix build syscalls) #:select (lock-file))
   #:use-module ((guix utils) #:select (call-with-temporary-directory))
+  #:use-module ((rnrs io ports) #:select (get-string-all))
   #:use-module (ice-9 match))
 
 (test-begin "cache")
@@ -75,6 +77,32 @@  (define-syntax-rule (test-cache-cleanup cache exp ...)
       (lambda (port)
         (display 0 port)))))
 
+(let ((pid #f))
+  (test-equal "maybe-remove-expired-cache-entries, cleanup needed but lock taken"
+    '()
+    (test-cache-cleanup cache
+      (let ((in+out (pipe)))
+        (match (primitive-fork)
+          (0 (dynamic-wind
+               (const #t)
+               (lambda ()
+                 (close-port (car in+out))
+                 (let ((port (lock-file
+                              (string-append cache "/last-expiry-cleanup"))))
+                   (display 0 port)
+                   (display "done!\n" (cdr in+out))
+                   (close-port (cdr in+out))
+                   (sleep 100)))
+               (lambda ()
+                 (primitive-exit 0))))
+          (n
+           (set! pid n)
+           (close-port (cdr in+out))
+           (pk 'chr (get-string-all (car in+out)))
+           (close-port (car in+out)))))))
+
+  (when pid (kill pid SIGKILL)))
+
 (test-equal "maybe-remove-expired-cache-entries, empty cache"
   '("a" "b" "c")
   (test-cache-cleanup cache