Message ID | befeaef0fa91ae49ea8f141121de4d790dd52a2a.1704484373.git.maxim.cournoyer@gmail.com |
---|---|
State | New |
Headers | show |
Series | Compress man pages using zstd | expand |
Maxim Cournoyer <maxim.cournoyer@gmail.com> skribis: > The aim is to improve the efficiency of computing the man pages database, > which must decompress the man pages. Zstd is faster than gzip, especially for > decompression, and has a similar compression ratio. > > * gnu/packages/commencement.scm (%final-inputs): Add zstd. > * guix/build/gnu-build-system.scm > (compress-documentation) Update doc. > <info-compressor, info-compressor-flags, man-compressor, man-compressor-flags> > <man-compressor-file-extension>: New arguments. > <compressed-documentation-extension>: Rename argument to... > <info-compressor-file-extension>: ... this. Add an 'extension' argument to > the retarget-symlink nested procedure. Use new arguments in nested > 'maybe-compress' procedure. > > Change-Id: Ibaad4658f8e5151633714d263d9198f56d255020 That’s a great idea, LGTM! Do you have figures on the space savings of a package with many man pages such as gnutls:doc or openssl:doc? Thanks, Ludo’.
Hi Ludovic! Ludovic Courtès <ludo@gnu.org> writes: > Maxim Cournoyer <maxim.cournoyer@gmail.com> skribis: > >> The aim is to improve the efficiency of computing the man pages database, >> which must decompress the man pages. Zstd is faster than gzip, especially for >> decompression, and has a similar compression ratio. >> >> * gnu/packages/commencement.scm (%final-inputs): Add zstd. >> * guix/build/gnu-build-system.scm >> (compress-documentation) Update doc. >> <info-compressor, info-compressor-flags, man-compressor, man-compressor-flags> >> <man-compressor-file-extension>: New arguments. >> <compressed-documentation-extension>: Rename argument to... >> <info-compressor-file-extension>: ... this. Add an 'extension' argument to >> the retarget-symlink nested procedure. Use new arguments in nested >> 'maybe-compress' procedure. >> >> Change-Id: Ibaad4658f8e5151633714d263d9198f56d255020 > > That’s a great idea, LGTM! Thank you for the review! > Do you have figures on the space savings of a package with many man > pages such as gnutls:doc or openssl:doc? Surprisingly, all of these I've checked used the weighed the same. Here's gnutls:doc from my local (master) Guix: --8<---------------cut here---------------start------------->8--- $ du -sh /gnu/store/8i3bas6lhziqi2n5wg6qzzhlddkb502c-gnutls-3.7.7-doc 4,9M /gnu/store/8i3bas6lhziqi2n5wg6qzzhlddkb502c-gnutls-3.7.7-doc --8<---------------cut here---------------end--------------->8--- Compared to core-updates with these changes: --8<---------------cut here---------------start------------->8--- $ du -sh /gnu/store/h3lbj1g64lkn9rd9xp86dphqnblxqkl6-gnutls-3.8.1-doc 4.9M /gnu/store/h3lbj1g64lkn9rd9xp86dphqnblxqkl6-gnutls-3.8.1-doc --8<---------------cut here---------------end--------------->8--- That's because all the compressed man pages appear to fit in the minimal 4 KiB size of a single file, whether they are compressed with gzip or zstd compressed. Both man-pages packages weigh 11 MiB, but we can get an idea of the compression ratio using: With my local Guix: --8<---------------cut here---------------start------------->8--- $ find $(guix build man-pages) -name '*.gz' | xargs -n1 du | sort -rn | head -n20 64 /gnu/store/93fjc9hv5canvs2lpya0qsbcm44hq7hh-man-pages-6.02/share/man/man5/proc.5.gz 44 /gnu/store/93fjc9hv5canvs2lpya0qsbcm44hq7hh-man-pages-6.02/share/man/man7/bpf-helpers.7.gz 32 /gnu/store/93fjc9hv5canvs2lpya0qsbcm44hq7hh-man-pages-6.02/share/man/man2/perf_event_open.2.gz 28 /gnu/store/93fjc9hv5canvs2lpya0qsbcm44hq7hh-man-pages-6.02/share/man/man2/ptrace.2.gz 20 /gnu/store/93fjc9hv5canvs2lpya0qsbcm44hq7hh-man-pages-6.02/share/man/man7/tcp.7.gz 20 /gnu/store/93fjc9hv5canvs2lpya0qsbcm44hq7hh-man-pages-6.02/share/man/man7/cgroups.7.gz 20 /gnu/store/93fjc9hv5canvs2lpya0qsbcm44hq7hh-man-pages-6.02/share/man/man2/seccomp_unotify.2.gz 20 /gnu/store/93fjc9hv5canvs2lpya0qsbcm44hq7hh-man-pages-6.02/share/man/man2/prctl.2.gz 20 /gnu/store/93fjc9hv5canvs2lpya0qsbcm44hq7hh-man-pages-6.02/share/man/man2/open.2.gz 20 /gnu/store/93fjc9hv5canvs2lpya0qsbcm44hq7hh-man-pages-6.02/share/man/man2/futex.2.gz 20 /gnu/store/93fjc9hv5canvs2lpya0qsbcm44hq7hh-man-pages-6.02/share/man/man2/fcntl.2.gz 16 /gnu/store/93fjc9hv5canvs2lpya0qsbcm44hq7hh-man-pages-6.02/share/man/man7/user_namespaces.7.gz 16 /gnu/store/93fjc9hv5canvs2lpya0qsbcm44hq7hh-man-pages-6.02/share/man/man7/socket.7.gz 16 /gnu/store/93fjc9hv5canvs2lpya0qsbcm44hq7hh-man-pages-6.02/share/man/man7/man-pages.7.gz 16 /gnu/store/93fjc9hv5canvs2lpya0qsbcm44hq7hh-man-pages-6.02/share/man/man7/ip.7.gz 16 /gnu/store/93fjc9hv5canvs2lpya0qsbcm44hq7hh-man-pages-6.02/share/man/man7/cpuset.7.gz 16 /gnu/store/93fjc9hv5canvs2lpya0qsbcm44hq7hh-man-pages-6.02/share/man/man7/capabilities.7.gz 16 /gnu/store/93fjc9hv5canvs2lpya0qsbcm44hq7hh-man-pages-6.02/share/man/man5/elf.5.gz 16 /gnu/store/93fjc9hv5canvs2lpya0qsbcm44hq7hh-man-pages-6.02/share/man/man2/seccomp.2.gz 16 /gnu/store/93fjc9hv5canvs2lpya0qsbcm44hq7hh-man-pages-6.02/share/man/man2/keyctl.2.gz --8<---------------cut here---------------end--------------->8--- On core-updates with these changes: --8<---------------cut here---------------start------------->8--- $ find /gnu/store/nqp5mmi1kb4xp7nkqsybrp5i18lygsl2-man-pages-6.02 -name '*.zst' | xargs -n1 du | sort -rn | head -n20 56 /gnu/store/nqp5mmi1kb4xp7nkqsybrp5i18lygsl2-man-pages-6.02/share/man/man5/proc.5.zst 36 /gnu/store/nqp5mmi1kb4xp7nkqsybrp5i18lygsl2-man-pages-6.02/share/man/man7/bpf-helpers.7.zst 28 /gnu/store/nqp5mmi1kb4xp7nkqsybrp5i18lygsl2-man-pages-6.02/share/man/man2/perf_event_open.2.zst 24 /gnu/store/nqp5mmi1kb4xp7nkqsybrp5i18lygsl2-man-pages-6.02/share/man/man2/ptrace.2.zst 20 /gnu/store/nqp5mmi1kb4xp7nkqsybrp5i18lygsl2-man-pages-6.02/share/man/man7/tcp.7.zst 20 /gnu/store/nqp5mmi1kb4xp7nkqsybrp5i18lygsl2-man-pages-6.02/share/man/man2/seccomp_unotify.2.zst 20 /gnu/store/nqp5mmi1kb4xp7nkqsybrp5i18lygsl2-man-pages-6.02/share/man/man2/prctl.2.zst 20 /gnu/store/nqp5mmi1kb4xp7nkqsybrp5i18lygsl2-man-pages-6.02/share/man/man2/futex.2.zst 20 /gnu/store/nqp5mmi1kb4xp7nkqsybrp5i18lygsl2-man-pages-6.02/share/man/man2/fcntl.2.zst 16 /gnu/store/nqp5mmi1kb4xp7nkqsybrp5i18lygsl2-man-pages-6.02/share/man/man7/user_namespaces.7.zst 16 /gnu/store/nqp5mmi1kb4xp7nkqsybrp5i18lygsl2-man-pages-6.02/share/man/man7/man-pages.7.zst 16 /gnu/store/nqp5mmi1kb4xp7nkqsybrp5i18lygsl2-man-pages-6.02/share/man/man7/ip.7.zst 16 /gnu/store/nqp5mmi1kb4xp7nkqsybrp5i18lygsl2-man-pages-6.02/share/man/man7/cpuset.7.zst 16 /gnu/store/nqp5mmi1kb4xp7nkqsybrp5i18lygsl2-man-pages-6.02/share/man/man7/cgroups.7.zst 16 /gnu/store/nqp5mmi1kb4xp7nkqsybrp5i18lygsl2-man-pages-6.02/share/man/man7/capabilities.7.zst 16 /gnu/store/nqp5mmi1kb4xp7nkqsybrp5i18lygsl2-man-pages-6.02/share/man/man5/elf.5.zst 16 /gnu/store/nqp5mmi1kb4xp7nkqsybrp5i18lygsl2-man-pages-6.02/share/man/man2/seccomp.2.zst 16 /gnu/store/nqp5mmi1kb4xp7nkqsybrp5i18lygsl2-man-pages-6.02/share/man/man2/open.2.zst 16 /gnu/store/nqp5mmi1kb4xp7nkqsybrp5i18lygsl2-man-pages-6.02/share/man/man2/keyctl.2.zst 16 /gnu/store/nqp5mmi1kb4xp7nkqsybrp5i18lygsl2-man-pages-6.02/share/man/man2/clone.2.zst --8<---------------cut here---------------end--------------->8--- So for larger man pages, it seems we're talking about a 10% improvement. That's not much, but the decompression is more efficient: Compare gzipped man-pages decompression: --8<---------------cut here---------------start------------->8--- $ find /gnu/store/93fjc9hv5canvs2lpya0qsbcm44hq7hh-man-pages-6.02 -name '*.gz' | sh -c 'time xargs gunzip -ck > /dev/null' real 0m0.137s user 0m0.106s sys 0m0.032s $ find /gnu/store/93fjc9hv5canvs2lpya0qsbcm44hq7hh-man-pages-6.02 -name '*.gz' | sh -c 'time xargs gunzip -ck > /dev/null' real 0m0.137s user 0m0.104s sys 0m0.035s $ find /gnu/store/93fjc9hv5canvs2lpya0qsbcm44hq7hh-man-pages-6.02 -name '*.gz' | sh -c 'time xargs gunzip -ck > /dev/null' real 0m0.138s user 0m0.103s sys 0m0.036s --8<---------------cut here---------------end--------------->8--- With zstd' man-pages decompression: --8<---------------cut here---------------start------------->8--- $ find /gnu/store/nqp5mmi1kb4xp7nkqsybrp5i18lygsl2-man-pages-6.02 -name '*.zst' | sh -c 'time xargs zstd -dkc > /dev/null' real 0m0.091s user 0m0.033s sys 0m0.059s $ find /gnu/store/nqp5mmi1kb4xp7nkqsybrp5i18lygsl2-man-pages-6.02 -name '*.zst' | sh -c 'time xargs zstd -dkc > /dev/null' real 0m0.091s user 0m0.035s sys 0m0.058s $ find /gnu/store/nqp5mmi1kb4xp7nkqsybrp5i18lygsl2-man-pages-6.02 -name '*.zst' | sh -c 'time xargs zstd -dkc > /dev/null' real 0m0.090s user 0m0.027s sys 0m0.063s --8<---------------cut here---------------end--------------->8--- Assuming guile-zstd fares as well as zstd itself, we're looking at 1.5x faster decompression. Past measurements though had suggested the decompression was not the limiting thing in making man-pages faster; rather it had to do with building the database with Guile (sorry, I can't find a reference to it anymore).
diff --git a/gnu/packages/commencement.scm b/gnu/packages/commencement.scm index ae1c91f0d0..51c26339ef 100644 --- a/gnu/packages/commencement.scm +++ b/gnu/packages/commencement.scm @@ -3492,7 +3492,8 @@ (define-public %final-inputs (native-inputs (list (if (target-hurd?) glibc-utf8-locales-final/hurd - glibc-utf8-locales-final))))))) + glibc-utf8-locales-final))))) + ("zstd" ,zstd))) ("sed" ,sed-final) ("grep" ,grep-final) ("xz" ,xz-final) diff --git a/guix/build/gnu-build-system.scm b/guix/build/gnu-build-system.scm index 51b8f9acbf..2f0ffe36fc 100644 --- a/guix/build/gnu-build-system.scm +++ b/guix/build/gnu-build-system.scm @@ -2,7 +2,7 @@ ;;; Copyright © 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021 Ludovic Courtès <ludo@gnu.org> ;;; Copyright © 2018 Mark H Weaver <mhw@netris.org> ;;; Copyright © 2020 Brendan Tildesley <mail@brendan.scot> -;;; Copyright © 2021 Maxim Cournoyer <maxim.cournoyer@gmail.com> +;;; Copyright © 2021, 2022 Maxim Cournoyer <maxim.cournoyer@gmail.com> ;;; ;;; This file is part of GNU Guix. ;;; @@ -644,21 +644,36 @@ (define* (reset-gzip-timestamps #:key outputs #:allow-other-keys) (((names . directories) ...) (for-each process-directory directories)))) -(define* (compress-documentation #:key outputs +(define* (compress-documentation #:key + outputs (compress-documentation? #t) - (documentation-compressor "gzip") - (documentation-compressor-flags + (info-compressor "gzip") + (info-compressor-flags '("--best" "--no-name")) - (compressed-documentation-extension ".gz") + (info-compressor-file-extension ".gz") + (man-compressor (if (which "zstd") + "zstd" + info-compressor)) + (man-compressor-flags + (if (which "zstd") + (list "-19" "--rm" + "--threads" (number->string + (parallel-job-count))) + info-compressor-flags)) + (man-compressor-file-extension + (if (which "zstd") + ".zst" + info-compressor-file-extension)) #:allow-other-keys) - "When COMPRESS-DOCUMENTATION? is true, compress man pages and Info files -found in OUTPUTS using DOCUMENTATION-COMPRESSOR, called with -DOCUMENTATION-COMPRESSOR-FLAGS." - (define (retarget-symlink link) + "When COMPRESS-INFO-MANUALS? is true, compress Info files found in OUTPUTS +using INFO-COMPRESSOR, called with INFO-COMPRESSOR-FLAGS. Similarly, when +COMPRESS-MAN-PAGES? is true, compress man pages files found in OUTPUTS using +MAN-COMPRESSOR, using MAN-COMPRESSOR-FLAGS." + (define (retarget-symlink link extension) (let ((target (readlink link))) (delete-file link) - (symlink (string-append target compressed-documentation-extension) - (string-append link compressed-documentation-extension)))) + (symlink (string-append target extension) + (string-append link extension)))) (define (has-links? file) ;; Return #t if FILE has hard links. @@ -676,23 +691,23 @@ (define* (compress-documentation #:key outputs (symbolic-link? target-absolute)) (lambda args (if (= ENOENT (system-error-errno args)) - (begin - (format (current-error-port) - "The symbolic link '~a' target is missing: '~a'\n" - symlink target-absolute) - #f) + (format (current-error-port) + "The symbolic link '~a' target is missing: '~a'\n" + symlink target-absolute) (apply throw args)))))) - (define (maybe-compress-directory directory regexp) + (define (maybe-compress-directory directory regexp + compressor + compressor-flags + compressor-extension) (when (directory-exists? directory) (match (find-files directory regexp) - (() ;nothing to compress + (() ;nothing to compress #t) - ((files ...) ;one or more files + ((files ...) ;one or more files (format #t "compressing documentation in '~a' with ~s and flags ~s~%" - directory documentation-compressor - documentation-compressor-flags) + directory compressor compressor-flags) (call-with-values (lambda () (partition symbolic-link? files)) @@ -702,20 +717,26 @@ (define* (compress-documentation #:key outputs ;; unchanged ('gzip' would refuse to compress them anyway.) ;; Also, do not retarget symbolic links pointing to other ;; symbolic links, since these are not compressed. - (for-each retarget-symlink + (for-each (cut retarget-symlink <> compressor-extension) (filter (lambda (symlink) (and (not (points-to-symlink? symlink)) (string-match regexp symlink))) symlinks)) - (apply invoke documentation-compressor - (append documentation-compressor-flags + (apply invoke compressor + (append compressor-flags (remove has-links? regular-files))))))))) (define (maybe-compress output) (maybe-compress-directory (string-append output "/share/man") - "\\.[0-9]+$") + "\\.[0-9]+$" + man-compressor + man-compressor-flags + man-compressor-file-extension) (maybe-compress-directory (string-append output "/share/info") - "\\.info(-[0-9]+)?$")) + "\\.info(-[0-9]+)?$" + info-compressor + info-compressor-flags + info-compressor-file-extension)) (if compress-documentation? (match outputs