From: Ludovic Courtès <ludo@gnu.org>
* src/cuirass/base.scm (%remote-server-socket-file-name): New variable.
(remote-builder-listener): New procedure.
(remote-builder): Add ‘socket’ parameter. Call
‘remote-builder-listener’. Handle ‘build-status-change’ messages.
(spawn-remote-builder): Add call to ‘open-unix-listening-socket’ and
pass it to ‘remote-builder’.
* src/cuirass/scripts/remote-server.scm (%notification-socket-pool): New
variable.
(send-build-status-change-notification, update-build-status): New
procedures.
(run-fetch, serve-build-requests): Call ‘update-build-status’ instead of
‘db-update-build-status!’ and ‘set-build-successful!’.
(open-build-notification-socket): New procedure.
(cuirass-remote-server): Use it and parameterize ‘%notification-socket-pool’.
* tests/remote.scm (notification-server): New variable.
(terminate-process): New procedure.
(stop-worker, stop-server): Use it.
(start-notification-server, stop-notification-server): New procedures.
("remote-server"): Call ‘start-notification-server’.
("clean-up"): Call ‘stop-notification-server’.
---
src/cuirass/base.scm | 53 +++++++++++++--
src/cuirass/scripts/remote-server.scm | 96 ++++++++++++++++++---------
tests/remote.scm | 44 ++++++++++--
3 files changed, 151 insertions(+), 42 deletions(-)
base-commit: b6c2e340b24c4ea9701eb687669a091027bd361e
@@ -90,6 +90,7 @@
;; Parameters.
%bridge-socket-file-name
open-bridge-connection
+ %remote-server-socket-file-name
%package-cachedir
%fallback?))
@@ -166,6 +167,8 @@
(string-append (%cuirass-state-directory) "/run"))))
(define %bridge-socket-file-name
+ ;; Socket 'cuirass register' listens to, allowing 'cuirass web' to connect
+ ;; to it.
(make-parameter (string-append (%cuirass-run-state-directory)
"/cuirass/bridge")))
@@ -178,6 +181,12 @@ socket."
(connect sock AF_UNIX file)
sock))
+(define %remote-server-socket-file-name
+ ;; Socket 'cuirass register' listens to, allowing 'cuirass remote-server' to
+ ;; connect to it.
+ (make-parameter (string-append (%cuirass-run-state-directory)
+ "/cuirass/remote-builds")))
+
;;;
;;; Read parameters.
@@ -456,22 +465,56 @@ by handing them to the local build daemon."
(spawn-fiber (local-builder channel))
channel))
-(define (remote-builder channel)
+(define (remote-builder-listener socket channel)
+ "Spawn a server that accepts connections on SOCKET and forwards messages it
+reads to CHANNEL."
+ (define (serve-client client)
+ (let loop ()
+ (match (read client)
+ ((? eof-object?)
+ (log-info "terminating remote server client connection on EOF"))
+ (message
+ (put-message channel message)
+ (loop)))))
+
+ (lambda ()
+ (let loop ()
+ (match (accept socket (logior SOCK_NONBLOCK SOCK_CLOEXEC))
+ ((connection . peer)
+ (spawn-fiber (lambda ()
+ (log-info "remote builder accepted connection: ~s"
+ connection)
+ (serve-client connection)))
+ (loop))))))
+
+(define (remote-builder channel socket)
+ "Spawn a remote builder that accepts messages on CHANNEL and receives
+notifications from 'cuirass remote-server' over SOCKET."
(lambda ()
(log-info "builds will be delegated to 'cuirass remote-server'")
+ (spawn-fiber (remote-builder-listener socket channel))
+
(let loop ()
(match (get-message channel)
(`(build ,derivations)
;; Currently there's nothing to do here: 'cuirass remote-server'
;; periodically calls 'db-get-pending-build'.
;; TODO: Push notifications to 'remote-server' instead.
- (log-info "~a pending derivation builds" (length derivations))))
+ (log-info "~a pending derivation builds" (length derivations)))
+ (`(build-status-change ,derivation ,status)
+ ;; TODO: Handle database operations, notifications, etc. from here.
+ (log-info "status of '~a' changed to ~a" derivation status)))
(loop))))
(define (spawn-remote-builder)
- "Spawn a build actor that performs builds using \"remote workers\"."
- (let ((channel (make-channel)))
- (spawn-fiber (remote-builder channel))
+ "Spawn a build actor that performs builds using \"remote workers\". Return
+once ready to listen for incoming connections from 'cuirass remote-server'."
+ (log-info "listening for 'cuirass remote-server' notifications on '~a'"
+ (%remote-server-socket-file-name))
+ (let ((channel (make-channel))
+ (socket (open-unix-listening-socket
+ (%remote-server-socket-file-name))))
+ (spawn-fiber (remote-builder channel socket))
channel))
@@ -1,6 +1,6 @@
;;; remote-server.scm -- Remote build server.
;;; Copyright © 2020, 2021 Mathieu Othacehe <othacehe@gnu.org>
-;;; Copyright © 2023, 2024 Ludovic Courtès <ludo@gnu.org>
+;;; Copyright © 2023-2025 Ludovic Courtès <ludo@gnu.org>
;;;
;;; This file is part of Cuirass.
;;;
@@ -20,7 +20,8 @@
(define-module (cuirass scripts remote-server)
#:autoload (cuirass base) (read-parameters
set-build-successful!
- spawn-build-maintainer)
+ spawn-build-maintainer
+ %remote-server-socket-file-name)
#:use-module (cuirass config)
#:use-module (cuirass database)
#:use-module (cuirass logging)
@@ -88,6 +89,10 @@
(define %publish-port
(make-parameter #f))
+(define %notification-socket-pool
+ ;; Pool of sockets connected to 'cuirass register'.
+ (make-parameter #f))
+
(define service-name
"Cuirass remote server")
@@ -288,6 +293,22 @@ signing key for URL is not authorized."
#t))))
outputs))))
+(define (send-build-status-change-notification drv status)
+ "Send a notification to 'cuirass register' that the status of DRV changed to
+STATUS."
+ (with-resource-from-pool (%notification-socket-pool) sock
+ (write `(build-status-change ,drv ,status) sock)
+ (newline sock)))
+
+(define* (update-build-status drv status
+ #:key log-file)
+ "Change the status of DRV to STATUS, both in the database and by sending a
+notification to 'cuirass register'."
+ (if (= (build-status succeeded) status)
+ (set-build-successful! drv)
+ (db-update-build-status! drv status #:log-file log-file))
+ (send-build-status-change-notification drv status))
+
(define* (run-fetch message)
"Read MESSAGE and download the corresponding build outputs. If
%CACHE-DIRECTORY is set, download the matching NAR and NARINFO files in this
@@ -311,15 +332,15 @@ directory."
(add-to-store drv outputs url))
(begin
(log-info "build succeeded: '~a'" drv)
- (set-build-successful! drv))
+ (update-build-status drv (build-status succeeded)))
(begin
(log-error "failed to retrieve output of \
successful build '~a'; rescheduling"
drv)
- (db-update-build-status! drv (build-status scheduled))))))
+ (update-build-status drv (build-status scheduled))))))
(('build-failed ('drv drv) ('url url) _ ...)
(log-info "build failed: '~a'" drv)
- (db-update-build-status! drv (build-status failed)))))
+ (update-build-status drv (build-status failed)))))
(define (fetch-worker channel max-parallel-downloads)
(define queue-size
@@ -529,7 +550,7 @@ Use WORKER-DIRECTORY to maintain the list of active workers."
('dependency dependency) _ ...)
(log-info "build failed: dependency '~a' of '~a'"
dependency drv)
- (db-update-build-status! drv (build-status failed-dependency))
+ (update-build-status drv (build-status failed-dependency))
(put-message build-maintainer 'failed-dependency) ;mark 'failed-dependency' builds
(let ((parent (db-get-build drv)))
(when parent
@@ -567,7 +588,7 @@ Use WORKER-DIRECTORY to maintain the list of active workers."
(build-id build)
derivation (build-system build)))
(db-update-build-worker! derivation name)
- (db-update-build-status! derivation (build-status submitted))
+ (update-build-status derivation (build-status submitted))
(catch 'zmq-error
(lambda ()
(reply-worker
@@ -583,7 +604,7 @@ Use WORKER-DIRECTORY to maintain the list of active workers."
(worker-name worker)
(worker-address worker)
message)
- (db-update-build-status! derivation
+ (update-build-status derivation
(build-status scheduled)))))
(begin
(when worker
@@ -605,8 +626,8 @@ Use WORKER-DIRECTORY to maintain the list of active workers."
(worker-name worker)
drv))
(db-update-build-worker! drv name)
- (db-update-build-status! drv (build-status started)
- #:log-file log-file)))
+ (update-build-status drv (build-status started)
+ #:log-file log-file)))
(`(build-rejected (drv ,drv) (worker ,name))
;; Worker rejected the build, which might be either because the
;; derivation is unavailable or because of a transient error. In
@@ -619,11 +640,11 @@ Use WORKER-DIRECTORY to maintain the list of active workers."
(begin
(log-warning "~a: build rejected: ~a; rescheduling"
name drv)
- (db-update-build-status! drv (build-status scheduled)))
+ (update-build-status drv (build-status scheduled)))
(begin
(log-warning "~a: build rejected: ~a; canceling"
name drv)
- (db-update-build-status! drv (build-status canceled)))))
+ (update-build-status drv (build-status canceled)))))
(_
(log-warning "ignoring unrecognized message: ~s" command)))))
@@ -699,6 +720,15 @@ exiting."
(terminate-helper-processes)
(primitive-exit 1))))
+(define (open-build-notification-socket)
+ "Return a socket connected to the 'cuirass register' process, used to send
+status updates."
+ (let ((sock (socket AF_UNIX
+ (logior SOCK_STREAM SOCK_CLOEXEC SOCK_NONBLOCK)
+ 0)))
+ (connect sock AF_UNIX (%remote-server-socket-file-name))
+ sock))
+
(define (cuirass-remote-server args)
(signal-handler)
(with-error-handling
@@ -793,24 +823,30 @@ exiting."
(run-fibers
(lambda ()
- (with-database
- (receive-logs log-port (%cache-directory))
- (spawn-notification-fiber)
- (spawn-build-log-cleaner (assoc-ref opts 'build-log-expiry))
+ (parameterize ((%notification-socket-pool
+ (make-resource-pool
+ (map (lambda (i)
+ (open-build-notification-socket))
+ (iota 8))
+ 'notification-socket)))
+ (with-database
+ (receive-logs log-port (%cache-directory))
+ (spawn-notification-fiber)
+ (spawn-build-log-cleaner (assoc-ref opts 'build-log-expiry))
- (let ((fetch-worker (spawn-fetch-worker))
- (worker-directory (spawn-worker-directory)))
- (catch 'zmq-error
- (lambda ()
- (serve-build-requests backend-port
- fetch-worker
- worker-directory
- #:build-maintainer
- (spawn-build-maintainer)))
- (lambda (key errno message . _)
- (log-error (G_ "ZeroMQ error in build server: ~a")
- message)
- (terminate-helper-processes)
- (primitive-exit 1))))))
+ (let ((fetch-worker (spawn-fetch-worker))
+ (worker-directory (spawn-worker-directory)))
+ (catch 'zmq-error
+ (lambda ()
+ (serve-build-requests backend-port
+ fetch-worker
+ worker-directory
+ #:build-maintainer
+ (spawn-build-maintainer)))
+ (lambda (key errno message . _)
+ (log-error (G_ "ZeroMQ error in build server: ~a")
+ message)
+ (terminate-helper-processes)
+ (primitive-exit 1)))))))
#:hz 0
#:parallelism (min 8 (current-processor-count)))))))
@@ -29,6 +29,7 @@
symbol))))
(cuirass specification)
((cuirass remote) #:select (worker-systems))
+ ((cuirass base) #:select (spawn-remote-builder))
(gnu packages base)
(guix build utils)
(guix channels)
@@ -39,6 +40,7 @@
((guix store) #:hide (build))
((guix utils) #:select (%current-system))
(tests common)
+ (fibers)
(squee)
(simple-zmq)
((zlib) #:select (call-with-gzip-input-port))
@@ -57,6 +59,9 @@
(define worker
(make-parameter #f))
+(define notification-server
+ (make-parameter #f))
+
(define (start-worker)
(setenv "REQUEST_PERIOD" "1")
(setenv "CUIRASS_LOGGING_LEVEL" "debug")
@@ -67,10 +72,12 @@
"--public-key=tests/signing-key.pub")
#:search-path? #t)))
+(define (terminate-process pid)
+ (kill pid SIGINT)
+ (waitpid pid))
+
(define (stop-worker)
- (let ((worker (worker)))
- (kill worker SIGINT)
- (waitpid worker)))
+ (terminate-process (worker)))
(define (start-server)
(mkdir-p "tests/cache")
@@ -85,9 +92,30 @@
#:search-path? #t)))
(define (stop-server)
- (let ((server (server)))
- (kill server SIGINT)
- (waitpid server)))
+ (terminate-process (server)))
+
+(define (start-notification-server)
+ ;; Spawn the notification server that normally runs as part of 'cuirass
+ ;; register', and which 'cuirass remote-server' connects to. Do so in a
+ ;; separate process because 'run-fibers' installs suspendable ports, which
+ ;; this process may not be able to deal with.
+ (notification-server
+ (spawn "guile"
+ (list "guile" "-c"
+ (object->string
+ '(begin
+ (use-modules (cuirass base) (fibers))
+
+ (setvbuf (current-output-port) 'none)
+ (setvbuf (current-error-port) 'none)
+ (run-fibers
+ (lambda ()
+ (spawn-remote-builder)
+ (sleep 120)) ;wait
+ #:drain? #t)))))))
+
+(define (stop-notification-server)
+ (terminate-process (notification-server)))
(define* (dummy-drv #:optional sleep #:key (name "foo") dependency message)
(let ((dependency (and=> dependency read-derivation-from-file)))
@@ -185,6 +213,7 @@
(test-assert "remote-server"
(begin
+ (start-notification-server)
(start-server)
#t))
@@ -384,4 +413,5 @@ Failing dependency ~s.\n"
(test-assert "clean-up"
(begin
(stop-worker)
- (stop-server))))
+ (stop-server)
+ (stop-notification-server))))