diff mbox series

[bug#68405,v4] guix: download: Add support for git repositories.

Message ID 20240119102417.17155-1-romain.garbage@inria.fr
State New
Headers show
Series [bug#68405,v4] guix: download: Add support for git repositories. | expand

Commit Message

Romain GARBAGE Jan. 19, 2024, 10:19 a.m. UTC
* guix/scripts/download.scm (git-download-to-store*): Add new variable.
(copy-recursively-without-dot-git): New variable.
(git-download-to-file): Add new variable.
(show-help): Add 'git', 'commit', 'branch' and 'recursive'options
help message.
(%default-options): Add default value for 'git-reference' and
'recursive' options.
(%options): Add 'git', 'commit', 'branch' and 'recursive' command
line options.
(guix-download) [hash]: Compute hash with 'file-hash*' instead of
'port-hash' from (gcrypt hash) module. This allows us to compute
hashes for directories.
* doc/guix.texi (Invoking guix-download): Add @item entries for
`git', `commit', `branch' and `recursive' options. Add a paragraph in
the introduction.
* tests/guix-download.sh: New tests. Move variables and trap definition
to the top of the file.

Change-Id: Ic2c428dca4cfcb0d4714ed361a4c46609339140a
---
 doc/guix.texi             |  23 ++++++
 guix/scripts/download.scm | 154 +++++++++++++++++++++++++++++++++++---
 tests/guix-download.sh    |  45 ++++++++++-
 3 files changed, 208 insertions(+), 14 deletions(-)

Changes from v3->v4
* Capitalized `git' in help message
* Replaced underscore with dash in help message
* Fixed url shadowing
* Wrapped long lines
* Added missing punctuation
* Moved variables and trap call definition to the top of the test file
* Renamed some variables in the test file to be more descriptive
* Removed unnecessary call to `chmod' in the test file

Comments

Maxim Cournoyer Jan. 20, 2024, 3:23 a.m. UTC | #1
Hello!

Romain GARBAGE <romain.garbage@inria.fr> writes:

> * guix/scripts/download.scm (git-download-to-store*): Add new variable.
> (copy-recursively-without-dot-git): New variable.
> (git-download-to-file): Add new variable.
> (show-help): Add 'git', 'commit', 'branch' and 'recursive'options
> help message.
> (%default-options): Add default value for 'git-reference' and
> 'recursive' options.
> (%options): Add 'git', 'commit', 'branch' and 'recursive' command
> line options.
> (guix-download) [hash]: Compute hash with 'file-hash*' instead of
> 'port-hash' from (gcrypt hash) module. This allows us to compute
> hashes for directories.
> * doc/guix.texi (Invoking guix-download): Add @item entries for
> `git', `commit', `branch' and `recursive' options. Add a paragraph in
> the introduction.
> * tests/guix-download.sh: New tests. Move variables and trap definition
> to the top of the file.
>
> Change-Id: Ic2c428dca4cfcb0d4714ed361a4c46609339140a

[...]

> +(define* (git-download-to-store* url
> +                                 reference
> +                                 recursive?
> +                                 #:key (verify-certificate? #t))
> +  "Download the git repository at URL to the store, checked out at REFERENCE.
> +URL must specify a protocol (i.e https:// or file://), REFERENCE must be a
> +pair argument as understood by 'latest-repository-commit'."
> +  ;; Ensure the URL string is properly formatted  when using the 'file'
> +  ;; protocol: URL is generated using 'uri->string', which returns
> +  ;; "file:/path/to/file" instead of "file:///path/to/file", which in turn
> +  ;; makes 'git-download-to-store' fail.
> +  (let* ((file? (string-prefix? "file:" url))
> +         (url (if (and file?
> +                        (not (string-prefix? "file:///" url)))
> +                   (string-append "file://" (string-drop url (string-length "file:")))
> +                   url)))
> +    (with-store store
> +      ;; TODO: Verify certificate support and deactivation.
> +      (with-git-error-handling
> +       (latest-repository-commit store url #:recursive? recursive? #:ref reference)))))

The above contains too long lines still :-).

> +
>  (define %default-options
>    ;; Alist of default option values.
>    `((format . ,bytevector->nix-base32-string)
>      (hash-algorithm . ,(hash-algorithm sha256))
>      (verify-certificate? . #t)
> +    (git-reference . #f)
> +    (recursive? . #f)
>      (download-proc . ,download-to-store*)))
>  
>  (define (show-help)
> @@ -97,6 +180,19 @@ (define (show-help)
>                           do not validate the certificate of HTTPS servers "))
>    (format #t (G_ "
>    -o, --output=FILE      download to FILE"))
> +  (format #t (G_ "
> +  -g, --git              download the default branch's latest commit of the
> +                         Git repository at URL"))
> +  (format #t (G_ "
> +      --commit=COMMIT-OR-TAG
> +                         download the given commit or tag of the Git
> +                         repository at URL"))
> +  (format #t (G_ "
> +      --branch=BRANCH    download the given branch of the Git repository
> +                         at URL"))
> +  (format #t (G_ "
> +  -r, --recursive        download a Git repository recursively"))
> +
>    (newline)
>    (display (G_ "
>    -h, --help             display this help and exit"))
> @@ -105,6 +201,13 @@ (define (show-help)
>    (newline)
>    (show-bug-report-information))
>  
> +(define (add-git-download-option result)
> +  (alist-cons 'download-proc
> +              ;; XXX: #:verify-certificate? currently ignored.
> +              (lambda* (url #:key verify-certificate? ref recursive?)
> +                (git-download-to-store* url ref recursive?))
> +              (alist-delete 'download result)))
> +
>  (define %options
>    ;; Specifications of the command-line options.
>    (list (option '(#\f "format") #t #f
> @@ -136,11 +239,36 @@ (define fmt-proc
>                    (alist-cons 'verify-certificate? #f result)))
>          (option '(#\o "output") #t #f
>                  (lambda (opt name arg result)
> -                  (alist-cons 'download-proc
> -                              (lambda* (url #:key verify-certificate?)
> -                                (download-to-file url arg))
> -                              (alist-delete 'download result))))
> -
> +                  (let* ((git
> +                          (assoc-ref result 'git-reference)))
> +                    (if git
> +                        (alist-cons 'download-proc
> +                                    (lambda* (url #:key verify-certificate? ref recursive?)
> +                                      (git-download-to-file url arg (assoc-ref result 'git-reference) recursive?))
> +                                    (alist-delete 'download result))
> +                        (alist-cons 'download-proc
> +                                    (lambda* (url #:key verify-certificate? #:allow-other-keys)
> +                                      (download-to-file url arg))
> +                                    (alist-delete 'download result))))))

Here as well.

Otherwise, I've tested it with:

--8<---------------cut here---------------start------------->8---
./pre-inst-env guix download -gr https://git.jami.net/savoirfairelinux/jami-client-qt -o /tmp/jami
--8<---------------cut here---------------end--------------->8---

and it worked as advertised; very nice!

Some idea for the future: the --recurse option could take an optional
argument that'd be a comma-separated list of submodules to fetch, e.g.

--8<---------------cut here---------------start------------->8---
./pre-inst-env guix download --git \
  --recurse=daemon,3rdparty/SortFilterProxyModel \
  https://git.jami.net/savoirfairelinux/jami-client-qt -o /tmp/jami
--8<---------------cut here---------------end--------------->8---

and it'd recurse *only* the listed submodules.  This would be useful as
some projects contain submodules for windows or other platforms we do
not care about and they may be very large (heavy) to download.

The same idea could be implemented for our git-reference, where
recursive? could accept a git submodule names list.

But back to the current scope:

Reviewed-by: Maxim Cournoyer <maxim.cournoyer@gmail>
Romain GARBAGE Jan. 22, 2024, 10:39 a.m. UTC | #2
Hello,

----- Mail original -----
> De: "Maxim Cournoyer" <maxim.cournoyer@gmail.com>
> À: "Romain Garbage" <romain.garbage@inria.fr>
> Cc: "68405" <68405@debbugs.gnu.org>, "Ludovic Courtès" <ludo@gnu.org>
> Envoyé: Samedi 20 Janvier 2024 03:23:01
> Objet: Re: [PATCH v4] guix: download: Add support for git repositories.

[...]

>> +    (with-store store
>> +      ;; TODO: Verify certificate support and deactivation.
>> +      (with-git-error-handling
>> +       (latest-repository-commit store url #:recursive? recursive? #:ref
>> reference)))))
> 
> The above contains too long lines still :-).

Sorry, I missed them, I sent a v5 hopefully wrapping all the lines longer than 80 characters :)

[...]
 
> Otherwise, I've tested it with:
> 
> --8<---------------cut here---------------start------------->8---
> ./pre-inst-env guix download -gr
> https://git.jami.net/savoirfairelinux/jami-client-qt -o /tmp/jami
> --8<---------------cut here---------------end--------------->8---
> 
> and it worked as advertised; very nice!
> 
> Some idea for the future: the --recurse option could take an optional
> argument that'd be a comma-separated list of submodules to fetch, e.g.
> 
> --8<---------------cut here---------------start------------->8---
> ./pre-inst-env guix download --git \
>  --recurse=daemon,3rdparty/SortFilterProxyModel \
>  https://git.jami.net/savoirfairelinux/jami-client-qt -o /tmp/jami
> --8<---------------cut here---------------end--------------->8---
> 
> and it'd recurse *only* the listed submodules.  This would be useful as
> some projects contain submodules for windows or other platforms we do
> not care about and they may be very large (heavy) to download.
> 
> The same idea could be implemented for our git-reference, where
> recursive? could accept a git submodule names list.

That's actually a nice idea. I will see if I can find some time to add it to a later patch.

> But back to the current scope:
> 
> Reviewed-by: Maxim Cournoyer <maxim.cournoyer@gmail>

Thank you for your review !
diff mbox series

Patch

diff --git a/doc/guix.texi b/doc/guix.texi
index a66005ee9d..6e5f801a1e 100644
--- a/doc/guix.texi
+++ b/doc/guix.texi
@@ -14020,6 +14020,9 @@  the certificates of X.509 authorities from the directory pointed to by
 the @env{SSL_CERT_DIR} environment variable (@pxref{X.509
 Certificates}), unless @option{--no-check-certificate} is used.
 
+Alternatively, @command{guix download} can also retrieve a Git
+repository, possibly a specific commit, tag, or branch.
+
 The following options are available:
 
 @table @code
@@ -14044,6 +14047,26 @@  URL, which makes you vulnerable to ``man-in-the-middle'' attacks.
 @itemx -o @var{file}
 Save the downloaded file to @var{file} instead of adding it to the
 store.
+
+@item --git
+@itemx -g
+Checkout the Git repository at the latest commit on the default branch.
+
+@item --commit=@var{commit-or-tag}
+Checkout the Git repository at @var{commit-or-tag}.
+
+@var{commit-or-tag} can be either a tag or a commit defined in the Git
+repository.
+
+@item --branch=@var{branch}
+Checkout the Git repository at @var{branch}.
+
+The repository will be checked out at the latest commit of @var{branch},
+which must be a valid branch of the Git repository.
+
+@item --recursive
+@itemx -r
+Recursively clone the Git repository.
 @end table
 
 @node Invoking guix hash
diff --git a/guix/scripts/download.scm b/guix/scripts/download.scm
index 19052d5652..ce2ed68248 100644
--- a/guix/scripts/download.scm
+++ b/guix/scripts/download.scm
@@ -22,17 +22,24 @@  (define-module (guix scripts download)
   #:use-module (guix scripts)
   #:use-module (guix store)
   #:use-module (gcrypt hash)
+  #:use-module (guix hash)
   #:use-module (guix base16)
   #:use-module (guix base32)
   #:autoload   (guix base64) (base64-encode)
   #:use-module ((guix download) #:hide (url-fetch))
+  #:use-module ((guix git)
+                #:select (latest-repository-commit
+                          update-cached-checkout
+                          with-git-error-handling))
   #:use-module ((guix build download)
                 #:select (url-fetch))
+  #:use-module (guix build utils)
   #:use-module ((guix progress)
                 #:select (current-terminal-columns))
   #:use-module ((guix build syscalls)
                 #:select (terminal-columns))
   #:use-module (web uri)
+  #:use-module (ice-9 ftw)
   #:use-module (ice-9 match)
   #:use-module (srfi srfi-1)
   #:use-module (srfi srfi-26)
@@ -54,6 +61,57 @@  (define (download-to-file url file)
        (url-fetch url file #:mirrors %mirrors)))
     file))
 
+;; This is a simplified version of 'copy-recursively'.
+;; It allows us to filter out the ".git" subfolder.
+;; TODO: Remove when 'copy-recursively' supports '#:select?'.
+(define (copy-recursively-without-dot-git source destination)
+  (define strip-source
+    (let ((len (string-length source)))
+      (lambda (file)
+        (substring file len))))
+
+  (file-system-fold (lambda (file stat result) ; enter?
+                      (not (string-suffix? "/.git" file)))
+                    (lambda (file stat result) ; leaf
+                      (let ((dest (string-append destination
+                                                 (strip-source file))))
+                        (case (stat:type stat)
+                          ((symlink)
+                           (let ((target (readlink file)))
+                             (symlink target dest)))
+                          (else
+                           (copy-file file dest)))))
+                    (lambda (dir stat result) ; down
+                      (let ((target (string-append destination
+                                                   (strip-source dir))))
+                        (mkdir-p target)))
+                    (const #t)          ; up
+                    (const #t)          ; skip
+                    (lambda (file stat errno result)
+                      (format (current-error-port) "i/o error: ~a: ~a~%"
+                              file (strerror errno))
+                      #f)
+                    #t
+                    source))
+
+(define (git-download-to-file url file reference recursive?)
+  "Download the git repo at URL to file, checked out at REFERENCE.
+REFERENCE must be a pair argument as understood by 'latest-repository-commit'.
+Return FILE."
+  ;; 'libgit2' doesn't support the URL format generated by 'uri->string' so
+  ;; we have to do a little fixup. Dropping completely the 'file:' protocol
+  ;; part gives better performance.
+  (let ((url (cond ((string-prefix? "file://" url)
+                     (string-drop url (string-length "file://")))
+                    ((string-prefix? "file:" url)
+                     (string-drop url (string-length "file:")))
+                    (else url))))
+    (copy-recursively-without-dot-git
+     (with-git-error-handling
+      (update-cached-checkout url #:ref reference #:recursive? recursive?))
+     file))
+  file)
+
 (define (ensure-valid-store-file-name name)
   "Replace any character not allowed in a store name by an underscore."
 
@@ -67,17 +125,42 @@  (define valid
               name))
 
 
-(define* (download-to-store* url #:key (verify-certificate? #t))
+(define* (download-to-store* url
+                             #:key (verify-certificate? #t)
+                             #:allow-other-keys)
   (with-store store
     (download-to-store store url
                        (ensure-valid-store-file-name (basename url))
                        #:verify-certificate? verify-certificate?)))
 
+(define* (git-download-to-store* url
+                                 reference
+                                 recursive?
+                                 #:key (verify-certificate? #t))
+  "Download the git repository at URL to the store, checked out at REFERENCE.
+URL must specify a protocol (i.e https:// or file://), REFERENCE must be a
+pair argument as understood by 'latest-repository-commit'."
+  ;; Ensure the URL string is properly formatted  when using the 'file'
+  ;; protocol: URL is generated using 'uri->string', which returns
+  ;; "file:/path/to/file" instead of "file:///path/to/file", which in turn
+  ;; makes 'git-download-to-store' fail.
+  (let* ((file? (string-prefix? "file:" url))
+         (url (if (and file?
+                        (not (string-prefix? "file:///" url)))
+                   (string-append "file://" (string-drop url (string-length "file:")))
+                   url)))
+    (with-store store
+      ;; TODO: Verify certificate support and deactivation.
+      (with-git-error-handling
+       (latest-repository-commit store url #:recursive? recursive? #:ref reference)))))
+
 (define %default-options
   ;; Alist of default option values.
   `((format . ,bytevector->nix-base32-string)
     (hash-algorithm . ,(hash-algorithm sha256))
     (verify-certificate? . #t)
+    (git-reference . #f)
+    (recursive? . #f)
     (download-proc . ,download-to-store*)))
 
 (define (show-help)
@@ -97,6 +180,19 @@  (define (show-help)
                          do not validate the certificate of HTTPS servers "))
   (format #t (G_ "
   -o, --output=FILE      download to FILE"))
+  (format #t (G_ "
+  -g, --git              download the default branch's latest commit of the
+                         Git repository at URL"))
+  (format #t (G_ "
+      --commit=COMMIT-OR-TAG
+                         download the given commit or tag of the Git
+                         repository at URL"))
+  (format #t (G_ "
+      --branch=BRANCH    download the given branch of the Git repository
+                         at URL"))
+  (format #t (G_ "
+  -r, --recursive        download a Git repository recursively"))
+
   (newline)
   (display (G_ "
   -h, --help             display this help and exit"))
@@ -105,6 +201,13 @@  (define (show-help)
   (newline)
   (show-bug-report-information))
 
+(define (add-git-download-option result)
+  (alist-cons 'download-proc
+              ;; XXX: #:verify-certificate? currently ignored.
+              (lambda* (url #:key verify-certificate? ref recursive?)
+                (git-download-to-store* url ref recursive?))
+              (alist-delete 'download result)))
+
 (define %options
   ;; Specifications of the command-line options.
   (list (option '(#\f "format") #t #f
@@ -136,11 +239,36 @@  (define fmt-proc
                   (alist-cons 'verify-certificate? #f result)))
         (option '(#\o "output") #t #f
                 (lambda (opt name arg result)
-                  (alist-cons 'download-proc
-                              (lambda* (url #:key verify-certificate?)
-                                (download-to-file url arg))
-                              (alist-delete 'download result))))
-
+                  (let* ((git
+                          (assoc-ref result 'git-reference)))
+                    (if git
+                        (alist-cons 'download-proc
+                                    (lambda* (url #:key verify-certificate? ref recursive?)
+                                      (git-download-to-file url arg (assoc-ref result 'git-reference) recursive?))
+                                    (alist-delete 'download result))
+                        (alist-cons 'download-proc
+                                    (lambda* (url #:key verify-certificate? #:allow-other-keys)
+                                      (download-to-file url arg))
+                                    (alist-delete 'download result))))))
+        (option '(#\g "git") #f #f
+                (lambda (opt name arg result)
+                  ;; Ignore this option if 'commit' or 'branch' has
+                  ;; already been provided
+                  (if (assoc-ref result 'git-reference)
+                      result
+                      (alist-cons 'git-reference '()
+                                  (add-git-download-option result)))))
+        (option '("commit") #t #f
+                (lambda (opt name arg result)
+                  (alist-cons 'git-reference `(tag-or-commit . ,arg)
+                              (add-git-download-option result))))
+        (option '("branch") #t #f
+                (lambda (opt name arg result)
+                  (alist-cons 'git-reference `(branch . ,arg)
+                              (add-git-download-option result))))
+        (option '(#\r "recursive") #f #f
+                (lambda (opt name arg result)
+                  (alist-cons 'recursive? #t result)))
         (option '(#\h "help") #f #f
                 (lambda args
                   (leave-on-EPIPE (show-help))
@@ -183,12 +311,14 @@  (define (parse-options)
                                   (terminal-columns)))
                     (fetch (uri->string uri)
                            #:verify-certificate?
-                           (assq-ref opts 'verify-certificate?))))
-           (hash  (call-with-input-file
-                      (or path
-                          (leave (G_ "~a: download failed~%")
-                                 arg))
-                    (cute port-hash (assoc-ref opts 'hash-algorithm) <>)))
+                           (assq-ref opts 'verify-certificate?)
+                           #:ref (assq-ref opts 'git-reference)
+                           #:recursive? (assq-ref opts 'recursive?))))
+           (hash  (let* ((path* (or path
+                                  (leave (G_ "~a: download failed~%")
+                                         arg))))
+                   (file-hash* path*
+                               #:algorithm (assoc-ref opts 'hash-algorithm))))
            (fmt   (assq-ref opts 'format)))
       (format #t "~a~%~a~%" path (fmt hash))
       #t)))
diff --git a/tests/guix-download.sh b/tests/guix-download.sh
index f4cb335eef..d4cd2ea6b9 100644
--- a/tests/guix-download.sh
+++ b/tests/guix-download.sh
@@ -16,6 +16,12 @@ 
 # You should have received a copy of the GNU General Public License
 # along with GNU Guix.  If not, see <http://www.gnu.org/licenses/>.
 
+# Define some files/folders needed for the tests.
+output="t-download-$$"
+test_git_repo="$(mktemp -d)"
+output_dir="t-archive-dir-$$"
+trap 'rm -rf "$test_git_repo" ; rm -f "$output" ; rm -rf "$output_dir"' EXIT
+
 #
 # Test the `guix download' command-line utility.
 #
@@ -36,8 +42,6 @@  guix download "file://$abs_top_srcdir/README"
 guix download "$abs_top_srcdir/README"
 
 # This one too, even if it cannot talk to the daemon.
-output="t-download-$$"
-trap 'rm -f "$output"' EXIT
 GUIX_DAEMON_SOCKET="/nowhere" guix download -o "$output" \
 		  "file://$abs_top_srcdir/README"
 cmp "$output" "$abs_top_srcdir/README"
@@ -45,4 +49,41 @@  cmp "$output" "$abs_top_srcdir/README"
 # This one should fail.
 guix download "file:///does-not-exist" "file://$abs_top_srcdir/README" && false
 
+# Test git support with local repository.
+# First, create a dummy git repo in the temporary directory.
+(
+    cd $test_git_repo
+    git init
+    touch test
+    git config user.name "User"
+    git config user.email "user@domain"
+    git add test
+    git commit -m "Commit"
+    git tag -a -m "v1" v1
+)
+
+# Extract commit number.
+commit=$((cd $test_git_repo && git log) | head -n 1 | cut -f2 -d' ')
+
+# We expect that guix hash is working properly or at least that the output of
+# 'guix download' is consistent with 'guix hash'.
+expected_hash=$(guix hash -rx $test_git_repo)
+
+# Test the different options
+for option in "" "--commit=$commit" "--commit=v1" "--branch=master"
+do
+    command_output="$(guix download --git $option "file://$test_git_repo")"
+    computed_hash="$(echo $command_output | cut -f2 -d' ')"
+    store_path="$(echo $command_output | cut -f1 -d' ')"
+    [ "$expected_hash" = "$computed_hash" ]
+    diff -r -x ".git" $test_git_repo $store_path
+done
+
+# Should fail.
+guix download --git --branch=non_existent "file://$test_git_repo" && false
+
+# Same but download to file instead of store.
+guix download --git "file://$test_git_repo" -o $output_dir
+diff -r -x ".git" $test_git_repo $output_dir
+
 exit 0