Message ID | a3fbd0db890da836e1646d1f4ff2da4af0024cda.1691052372.git.mail@cbaines.net |
---|---|
State | New |
Headers | show |
Series | [bug#65033] guix: read-derivation-from-file: Use less open files. | expand |
Hey Chris, Christopher Baines <mail@cbaines.net> skribis: > The Guix derivation graph isn't that deep, so I don't think this generally > opens lots of files, but I think it's still unnecessary to keep more files > than needed open. > > * guix/derivations.scm (read-derivation-from-file): Read each derivation to a > string, which is passed as a port to read-derivation. [...] > - (let ((drv (call-with-input-file file read-derivation))) > + (let ((drv > + ;; read-derivation can call read-derivation-from-file, so to > + ;; avoid having multiple open files when reading a derivation > + ;; with inputs, read it in to a string first. > + (call-with-input-string > + (call-with-input-file file > + get-string-all) > + read-derivation))) How real is the risk of having too many open files due to a ‘read-derivation’ call? (Where too many is >= 100.) You might think it’s likely because: --8<---------------cut here---------------start------------->8--- $ guix gc -R $(guix build -d --no-grafts emacs) |grep drv$ | wc -l 2234 --8<---------------cut here---------------end--------------->8--- But in fact, due to the shape of the graph + memoization (I suppose¹), there are at most 27 open file descriptors in this case: --8<---------------cut here---------------start------------->8--- $ strace -o /tmp/log.strace -e openat guile -c "(use-modules (guix)) (read-derivation-from-file \"$(guix build -d --no-grafts emacs)\")" $ cut -d '=' -f 2- < /tmp/log.strace |sort -un | tail 18 19 20 21 22 23 24 25 26 27 --8<---------------cut here---------------end--------------->8--- So to me the status quo is probably okay. The reason I’m paying attention to this is that allocating a string port plus a string for the whole contents every time would put pressure on memory, which is worth avoiding if we can. WDYT? Thanks, Ludo’. ¹ Here’s an excerpt of the log of open/close calls: --8<---------------cut here---------------start------------->8--- openat(AT_FDCWD, "/gnu/store/0i35jrxk605w30kg5rbi8llrmn0p5d3z-rust-1.58.1.drv", O_RDONLY) = 20 openat(AT_FDCWD, "/gnu/store/hxb2q20kqni626v1qnnp48qp9c31xk26-rustc-1.58.1-src.tar.xz.drv", O_RDONLY) = 21 openat(AT_FDCWD, "/gnu/store/99j3sbiv8yrgvw7nlyykfk92wr6s2ckn-rustc-1.58.1-src.tar.gz.drv", O_RDONLY) = 22 close(22) = 0 close(21) = 0 openat(AT_FDCWD, "/gnu/store/0p87fpbbjgjy0mj9z3frinml75yyhkwj-rust-1.57.0.drv", O_RDONLY) = 21 openat(AT_FDCWD, "/gnu/store/dshfgf4i6jigvr7plf79bz256gq6rnvs-rustc-1.57.0-src.tar.xz.drv", O_RDONLY) = 22 openat(AT_FDCWD, "/gnu/store/casv57vivd9zp90sqp7ky8kk0zzs9di5-rustc-1.57.0-src.tar.gz.drv", O_RDONLY) = 23 close(23) = 0 close(22) = 0 openat(AT_FDCWD, "/gnu/store/bhw4sm7wdsxdl4kjr9jbxg7yh26naarh-rust-1.56.1.drv", O_RDONLY) = 22 openat(AT_FDCWD, "/gnu/store/f5x8avmf1llkvhiprjimbnzps5g3a9zs-rust-1.55.0.drv", O_RDONLY) = 23 openat(AT_FDCWD, "/gnu/store/skaj2s179s6irdz92fsb0pyfd1y0z8cr-rust-1.54.0.drv", O_RDONLY) = 24 openat(AT_FDCWD, "/gnu/store/j1h5zijffi9z03q9ixvxgzvrx8bh06hy-mrustc-0.10-2.597593a-checkout.drv", O_RDONLY) = 25 openat(AT_FDCWD, "/gnu/store/x5yag0vxmj8kyl9b7a1x8pksyvxh4bx9-mrustc-0.10-2.597593a-checkout.drv", O_RDONLY) = 26 close(26) = 0 close(25) = 0 openat(AT_FDCWD, "/gnu/store/i4ilq3iirdyyk9pga22dhldr5khm2v27-openssl-1.1.1q.drv", O_RDONLY) = 25 openat(AT_FDCWD, "/gnu/store/d8s5zhx5lhlfngshbzbszyqm4nbnczzn-openssl-1.1.1q.tar.xz.drv", O_RDONLY) = 26 openat(AT_FDCWD, "/gnu/store/hgkr5j4y9y8kgsriyc94drs1mjfmdq7b-openssl-1.1.1q.tar.gz.drv", O_RDONLY) = 27 close(27) = 0 close(26) = 0 close(25) = 0 openat(AT_FDCWD, "/gnu/store/0c5f6lqiqspbf9vhxil0zwxgw0dw9qfr-rustc-1.54.0-src.tar.xz.drv", O_RDONLY) = 25 openat(AT_FDCWD, "/gnu/store/jpr1x3p9vv8cvjvw3bhbwljfpwcp5drv-rustc-1.54.0-src.tar.gz.drv", O_RDONLY) = 26 close(26) = 0 close(25) = 0 close(24) = 0 openat(AT_FDCWD, "/gnu/store/bsllkwnaj5ssad7lmwc5y82vaw7rxn2z-rustc-1.55.0-src.tar.xz.drv", O_RDONLY) = 24 openat(AT_FDCWD, "/gnu/store/wb8yxyj5i741h07y7mrvmqx3zgqjjdps-rustc-1.55.0-src.tar.gz.drv", O_RDONLY) = 25 close(25) = 0 close(24) = 0 close(23) = 0 openat(AT_FDCWD, "/gnu/store/86hdkc84zp1xjzqp9nsfhqzqpi7wxr8m-rustc-1.56.1-src.tar.xz.drv", O_RDONLY) = 23 openat(AT_FDCWD, "/gnu/store/8y3kwcvsw72rdbari8irvhfwh872y4gd-rustc-1.56.1-src.tar.gz.drv", O_RDONLY) = 24 close(24) = 0 close(23) = 0 close(22) = 0 close(21) = 0 close(20) = 0 close(19) = 0 close(18) = 0 close(17) = 0 openat(AT_FDCWD, "/gnu/store/vc4vah8yz98f1vq0fdf57b9r0v2255y9-rustc-1.62.1-src.tar.xz.drv", O_RDONLY) = 17 openat(AT_FDCWD, "/gnu/store/xxklgspj41zwb0n3kd33bilq9n08pp58-rustc-1.62.1-src.tar.gz.drv", O_RDONLY) = 18 close(18) = 0 close(17) = 0 close(16) = 0 close(15) = 0 close(14) = 0 close(13) = 0 close(12) = 0 --8<---------------cut here---------------end--------------->8---
Ludovic Courtès <ludo@gnu.org> writes: > Hey Chris, > > Christopher Baines <mail@cbaines.net> skribis: > >> The Guix derivation graph isn't that deep, so I don't think this generally >> opens lots of files, but I think it's still unnecessary to keep more files >> than needed open. >> >> * guix/derivations.scm (read-derivation-from-file): Read each derivation to a >> string, which is passed as a port to read-derivation. > > [...] > >> - (let ((drv (call-with-input-file file read-derivation))) >> + (let ((drv >> + ;; read-derivation can call read-derivation-from-file, so to >> + ;; avoid having multiple open files when reading a derivation >> + ;; with inputs, read it in to a string first. >> + (call-with-input-string >> + (call-with-input-file file >> + get-string-all) >> + read-derivation))) > > How real is the risk of having too many open files due to a > ‘read-derivation’ call? (Where too many is >= 100.) > > You might think it’s likely because: > > $ guix gc -R $(guix build -d --no-grafts emacs) |grep drv$ | wc -l > 2234 > > > But in fact, due to the shape of the graph + memoization (I suppose¹), > there are at most 27 open file descriptors in this case: > > $ strace -o /tmp/log.strace -e openat guile -c "(use-modules (guix)) (read-derivation-from-file \"$(guix build -d --no-grafts emacs)\")" > $ cut -d '=' -f 2- < /tmp/log.strace |sort -un | tail > 18 > 19 > 20 > 21 > 22 > 23 > 24 > 25 > 26 > 27 > > > So to me the status quo is probably okay. > > The reason I’m paying attention to this is that allocating a string port > plus a string for the whole contents every time would put pressure on > memory, which is worth avoiding if we can. > > WDYT? I guess there should be a way of arranging the code so that it doesn't keep unnecessary ports, but also doesn't use strings, but that will require some rearranging. I think I just got thinking about this as the build coordinator was using excessive file descriptors, but this isn't the cause.
diff --git a/guix/derivations.scm b/guix/derivations.scm index 9fec7f4f0b..2154bd76f6 100644 --- a/guix/derivations.scm +++ b/guix/derivations.scm @@ -31,6 +31,7 @@ (define-module (guix derivations) #:use-module (ice-9 match) #:use-module (ice-9 rdelim) #:use-module (ice-9 vlist) + #:use-module (ice-9 textual-ports) #:use-module (guix store) #:use-module (guix utils) #:use-module (guix base16) @@ -556,7 +557,14 @@ (define (read-derivation-from-file file) ;; and because the same argument is read more than 15 times on average ;; during something like (package-derivation s gdb). (or (and file (hash-ref %derivation-cache file)) - (let ((drv (call-with-input-file file read-derivation))) + (let ((drv + ;; read-derivation can call read-derivation-from-file, so to + ;; avoid having multiple open files when reading a derivation + ;; with inputs, read it in to a string first. + (call-with-input-string + (call-with-input-file file + get-string-all) + read-derivation))) (hash-set! %derivation-cache file drv) drv)))