diff mbox series

[bug#39547] website: Provide JSON sources list used by Software Heritage.

Message ID 20200210170418.32076-1-zimon.toutoune@gmail.com
State Accepted
Headers show
Series [bug#39547] website: Provide JSON sources list used by Software Heritage. | expand

Checks

Context Check Description
cbaines/applying patch fail View Laminar job

Commit Message

Simon Tournier Feb. 10, 2020, 5:04 p.m. UTC
Format discussed here <https://forge.softwareheritage.org/D2025#51269>.

* website/apps/packages/builder.scm (sources-json-builder): New procedure.
---
 website/apps/packages/builder.scm | 62 +++++++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)

Comments

Ludovic Courtès Feb. 14, 2020, 8:40 a.m. UTC | #1
Hello,

zimoun <zimon.toutoune@gmail.com> skribis:

> Format discussed here <https://forge.softwareheritage.org/D2025#51269>.
>
> * website/apps/packages/builder.scm (sources-json-builder): New procedure.

[...]

> +(define (sources-json-builder)
> +  "Return a JSON page listing all the sources."

Please add a reference to
<https://forge.softwareheritage.org/D2025#51269> here.

> +  (define (origin->json origin)
> +    (define method
> +      (origin-method origin))
> +
> +    (define uri                         ;represented as string
> +      (origin-uri origin))
> +
> +    (define (mirror->url uri)
> +      (uri->string (car (maybe-expand-mirrors uri %mirrors))))
> +
> +    (define (resolve urls)
> +      (let* ((url (car urls))
> +             (uri (string->uri url))
> +             (rest (cdr urls)))
> +        (case (uri-scheme uri)
> +          ((mirror) (mirror->url uri))
> +          ((http) url)
> +          ((https) url)
> +          (else
> +           (if (null? rest)
> +               url
> +               (resolve rest))))))
> +
> +    `((type . ,(cond ((eq? url-fetch method) 'url)
> +                     ((eq? git-fetch method) 'git)
> +                     ((eq? svn-fetch method) 'svn)
> +                     (else                   #nil)))
> +      ,@(cond ((eq? url-fetch method)
> +               `(("url" . ,(match uri
> +				  ((? string? url) (mirror->url (string->uri url)))
> +				  ((urls ...) (resolve urls))))))
> +              ((eq? git-fetch method)
> +               `(("git_url" . ,(git-reference-url uri))))
> +              ((eq? svn-fetch method)
> +               `(("svn_url" . ,(svn-reference-url uri))))
> +              (else '()))
> +      ,@(if (eq? method git-fetch)
> +            `(("git_ref" . ,(git-reference-commit uri)))
> +            '())
> +      ,@(if (eq? method svn-fetch)
> +            `(("svn_revision" . ,(svn-reference-revision
> +                                  uri)))
> +            '())))

Could you, in a first patch, move ‘origin->json’ out of
‘packages-json-builder’, and in a second patch, add mirror-expansion
feature?

For mirror:// expansion, I think you can just write something like:

  (define uris
    (append-map (cut maybe-expand-mirrors <> %mirrors)
                (match url
                  ((_ ...) (map string->uri url))
                  (_       (list (string->uri url))))))

and then pick the first element of the list.

In parallel, I’d recommend suggesting a format change or addition that
would allow us to provide all the URLs.  :-)

With those changes in place, I think we’ll be ready to go!

Thanks for working on it, Simon!

Ludo’.
Simon Tournier Feb. 14, 2020, 9:04 a.m. UTC | #2
Hi Ludo,

Thank you for the review.


On Fri, 14 Feb 2020 at 09:40, Ludovic Courtès <ludovic.courtes@inria.fr> wrote:
> zimoun <zimon.toutoune@gmail.com> skribis:

> > Format discussed here <https://forge.softwareheritage.org/D2025#51269>.
> >
> > * website/apps/packages/builder.scm (sources-json-builder): New procedure.
>
> [...]
>
> > +(define (sources-json-builder)
> > +  "Return a JSON page listing all the sources."
>
> Please add a reference to
> <https://forge.softwareheritage.org/D2025#51269> here.

ok


> > +  (define (origin->json origin)
> > +    (define method
> > +      (origin-method origin))
> > +
> > +    (define uri                         ;represented as string
> > +      (origin-uri origin))
> > +
> > +    (define (mirror->url uri)
> > +      (uri->string (car (maybe-expand-mirrors uri %mirrors))))
> > +
> > +    (define (resolve urls)
> > +      (let* ((url (car urls))
> > +             (uri (string->uri url))
> > +             (rest (cdr urls)))
> > +        (case (uri-scheme uri)
> > +          ((mirror) (mirror->url uri))
> > +          ((http) url)
> > +          ((https) url)
> > +          (else
> > +           (if (null? rest)
> > +               url
> > +               (resolve rest))))))
> > +
> > +    `((type . ,(cond ((eq? url-fetch method) 'url)
> > +                     ((eq? git-fetch method) 'git)
> > +                     ((eq? svn-fetch method) 'svn)
> > +                     (else                   #nil)))
> > +      ,@(cond ((eq? url-fetch method)
> > +               `(("url" . ,(match uri
> > +                               ((? string? url) (mirror->url (string->uri url)))
> > +                               ((urls ...) (resolve urls))))))
> > +              ((eq? git-fetch method)
> > +               `(("git_url" . ,(git-reference-url uri))))
> > +              ((eq? svn-fetch method)
> > +               `(("svn_url" . ,(svn-reference-url uri))))
> > +              (else '()))
> > +      ,@(if (eq? method git-fetch)
> > +            `(("git_ref" . ,(git-reference-commit uri)))
> > +            '())
> > +      ,@(if (eq? method svn-fetch)
> > +            `(("svn_revision" . ,(svn-reference-revision
> > +                                  uri)))
> > +            '())))
>
> Could you, in a first patch, move ‘origin->json’ out of
> ‘packages-json-builder’, and in a second patch, add mirror-expansion
> feature?

Yes, I will try but I am a bit lost. There is:
 - packages-json-builder that I did not modified
 - sources-json-builder which the "adaptation" of the former to output
to the sources.json format.

Well, do you want a refactor of 'origin->json' shared by the 2
"{sources,packages}-json-builder"?


> For mirror:// expansion, I think you can just write something like:
>
>   (define uris
>     (append-map (cut maybe-expand-mirrors <> %mirrors)
>                 (match url
>                   ((_ ...) (map string->uri url))
>                   (_       (list (string->uri url))))))
>
> and then pick the first element of the list.

Yes, it is better. :-)


> In parallel, I’d recommend suggesting a format change or addition that
> would allow us to provide all the URLs.  :-)

Ok :-)


> With those changes in place, I think we’ll be ready to go!

Working on updating the package Julia, I have seen that some patches
are 'origin' and live for example upstream
(https://blablab/project.git/patches/fancy-name.patch) and the current
patch will not list them. Therefore, if upstream disappears and/or
change in-place the patches, Guix would not be able to re-build in the
future (time-machine).

I am trying to implement a recursive exporter.
Well, I will try to make that the v2 contains your suggestions and all
the patches.



Cheers,
simon
Ludovic Courtès Feb. 14, 2020, 10:20 a.m. UTC | #3
Hi,

zimoun <zimon.toutoune@gmail.com> skribis:

>> Could you, in a first patch, move ‘origin->json’ out of
>> ‘packages-json-builder’, and in a second patch, add mirror-expansion
>> feature?
>
> Yes, I will try but I am a bit lost. There is:
>  - packages-json-builder that I did not modified
>  - sources-json-builder which the "adaptation" of the former to output
> to the sources.json format.
>
> Well, do you want a refactor of 'origin->json' shared by the 2
> "{sources,packages}-json-builder"?

Yes, exactly.  No reason to have two copies of that code.

> Working on updating the package Julia, I have seen that some patches
> are 'origin' and live for example upstream
> (https://blablab/project.git/patches/fancy-name.patch) and the current
> patch will not list them. Therefore, if upstream disappears and/or
> change in-place the patches, Guix would not be able to re-build in the
> future (time-machine).

Right.  I guess you can implement that in the next version.  What you
propose is already nice to have.

Ludo’.
Simon Tournier Feb. 17, 2020, 5:59 p.m. UTC | #4
Hi Ludo,

On Fri, 14 Feb 2020 at 11:20, Ludovic Courtès <ludovic.courtes@inria.fr> wrote:
> zimoun <zimon.toutoune@gmail.com> skribis:

> >> Could you, in a first patch, move ‘origin->json’ out of
> >> ‘packages-json-builder’, and in a second patch, add mirror-expansion
> >> feature?
> >
> > Yes, I will try but I am a bit lost. There is:
> >  - packages-json-builder that I did not modified
> >  - sources-json-builder which the "adaptation" of the former to output
> > to the sources.json format.
> >
> > Well, do you want a refactor of 'origin->json' shared by the 2
> > "{sources,packages}-json-builder"?
>
> Yes, exactly.  No reason to have two copies of that code.

The minor "issue" is the 'match' piece of 'origin->json':

--8<---------------cut here---------------start------------->8---
               `(("url" . ,(match uri
                             ((? string? url) (vector url))
                             ((urls ...) (list->vector urls))))))
--8<---------------cut here---------------end--------------->8---

Because in the 'sources.json' it is always one unique string.
However, in 'packages.json' it is always a vector of one or several
elements. And in the previous patch, the job of 'resolve' was to
uniquify these elements for 'sources.json'.

For example, see the package 'adns' in 'packages.json'

--8<---------------cut here---------------start------------->8---
{
    "name": "adns",
    "version": "1.5.1",
    "source": {
      "type": "url",
      "url": [
        "mirror://gnu/adns/adns-1.5.1.tar.gz",
        "http://www.chiark.greenend.org.uk/~ian/adns/ftp/adns-1.5.1.tar.gz"
      ]
    },
    "synopsis": "Asynchronous DNS client library and utilities",
    "homepage": "https://www.gnu.org/software/adns/",
    "location": "gnu/packages/adns.scm:32"
  },
--8<---------------cut here---------------end--------------->8---



Just to be on the same wavelength, does 'packages.json' stay as it is
(vector for the source url field)?
Or is 'packages.json' modified to record only one url for the source
url field with mirror:// expanded?


Cheers,
simon
Ludovic Courtès Feb. 18, 2020, 8:24 a.m. UTC | #5
Hi!

zimoun <zimon.toutoune@gmail.com> skribis:

> On Fri, 14 Feb 2020 at 11:20, Ludovic Courtès <ludovic.courtes@inria.fr> wrote:
>> zimoun <zimon.toutoune@gmail.com> skribis:
>
>> >> Could you, in a first patch, move ‘origin->json’ out of
>> >> ‘packages-json-builder’, and in a second patch, add mirror-expansion
>> >> feature?
>> >
>> > Yes, I will try but I am a bit lost. There is:
>> >  - packages-json-builder that I did not modified
>> >  - sources-json-builder which the "adaptation" of the former to output
>> > to the sources.json format.
>> >
>> > Well, do you want a refactor of 'origin->json' shared by the 2
>> > "{sources,packages}-json-builder"?
>>
>> Yes, exactly.  No reason to have two copies of that code.
>
> The minor "issue" is the 'match' piece of 'origin->json':
>
>                `(("url" . ,(match uri
>                              ((? string? url) (vector url))
>                              ((urls ...) (list->vector urls))))))
>
>
> Because in the 'sources.json' it is always one unique string.
> However, in 'packages.json' it is always a vector of one or several
> elements. And in the previous patch, the job of 'resolve' was to
> uniquify these elements for 'sources.json'.

Ah, good point.  What about adding a keyword parameter to ‘origin->json’
that would tell whether to return a single URL or a list thereof?

> For example, see the package 'adns' in 'packages.json'
>
> {
>     "name": "adns",
>     "version": "1.5.1",
>     "source": {
>       "type": "url",
>       "url": [
>         "mirror://gnu/adns/adns-1.5.1.tar.gz",
>         "http://www.chiark.greenend.org.uk/~ian/adns/ftp/adns-1.5.1.tar.gz"
>       ]
>     },
>     "synopsis": "Asynchronous DNS client library and utilities",
>     "homepage": "https://www.gnu.org/software/adns/",
>     "location": "gnu/packages/adns.scm:32"
>   },
>
>
>
> Just to be on the same wavelength, does 'packages.json' stay as it is
> (vector for the source url field)?

I think ‘packages.json’ should still return a list of URLs.

I’ve also added a comment asking whether ‘sources.json’ could be changed
to contain a list of URLs:

  https://forge.softwareheritage.org/D2025#64037

> Or is 'packages.json' modified to record only one url for the source
> url field with mirror:// expanded?

Yeah, I think we can expand mirror URLs.

Thanks,
Ludo’.
Simon Tournier Feb. 18, 2020, 8:38 a.m. UTC | #6
Hi Ludo,

On Tue, 18 Feb 2020 at 09:24, Ludovic Courtès <ludovic.courtes@inria.fr> wrote:
> zimoun <zimon.toutoune@gmail.com> skribis:
> > On Fri, 14 Feb 2020 at 11:20, Ludovic Courtès <ludovic.courtes@inria.fr> wrote:
> >> zimoun <zimon.toutoune@gmail.com> skribis:

> >> >> Could you, in a first patch, move ‘origin->json’ out of
> >> >> ‘packages-json-builder’, and in a second patch, add mirror-expansion
> >> >> feature?
> >> >
> >> > Yes, I will try but I am a bit lost. There is:
> >> >  - packages-json-builder that I did not modified
> >> >  - sources-json-builder which the "adaptation" of the former to output
> >> > to the sources.json format.
> >> >
> >> > Well, do you want a refactor of 'origin->json' shared by the 2
> >> > "{sources,packages}-json-builder"?
> >>
> >> Yes, exactly.  No reason to have two copies of that code.
> >
> > The minor "issue" is the 'match' piece of 'origin->json':
> >
> >                `(("url" . ,(match uri
> >                              ((? string? url) (vector url))
> >                              ((urls ...) (list->vector urls))))))
> >
> >
> > Because in the 'sources.json' it is always one unique string.
> > However, in 'packages.json' it is always a vector of one or several
> > elements. And in the previous patch, the job of 'resolve' was to
> > uniquify these elements for 'sources.json'.
>
> Ah, good point.  What about adding a keyword parameter to ‘origin->json’
> that would tell whether to return a single URL or a list thereof?

Sharing the same code.

 - packages.json: as it is now.
 - sources.json: expand mirror and resolve to display only URL.

I am going for that.


> > Just to be on the same wavelength, does 'packages.json' stay as it is
> > (vector for the source url field)?
>
> I think ‘packages.json’ should still return a list of URLs.

Ok.


> I’ve also added a comment asking whether ‘sources.json’ could be changed
> to contain a list of URLs:
>
>   https://forge.softwareheritage.org/D2025#64037

I have seen. :-)
Even, all the mirrors list should be expanded, not only the first one.


> > Or is 'packages.json' modified to record only one url for the source
> > url field with mirror:// expanded?
>
> Yeah, I think we can expand mirror URLs.

So, 'mirror://' will become a list (from '%mirrors') of URLs, right?


Cheers,
simon
Ludovic Courtès Feb. 18, 2020, 8:43 a.m. UTC | #7
Hi,

zimoun <zimon.toutoune@gmail.com> skribis:

> On Tue, 18 Feb 2020 at 09:24, Ludovic Courtès <ludovic.courtes@inria.fr> wrote:

[...]

>> > Or is 'packages.json' modified to record only one url for the source
>> > url field with mirror:// expanded?
>>
>> Yeah, I think we can expand mirror URLs.
>
> So, 'mirror://' will become a list (from '%mirrors') of URLs, right?

Yup!

Thanks,
Ludo’.
Simon Tournier March 3, 2020, 6 p.m. UTC | #8
Hi,

Some upstream sources could disappear between the moment when the
package enters in Guix and the moment when SWH ingests it. (As an edge
case, let consider the recent harfbuzz one.)

Now the format 'sources.json' should accept an array. Does it make
sense to put in this array the content addressed mirrors
(ci.guix.gnu.org and tarball.nixos.org)?

What do you think?


Cheers,
simon
Ludovic Courtès March 5, 2020, 4:19 p.m. UTC | #9
Hello!

zimoun <zimon.toutoune@gmail.com> skribis:

> Some upstream sources could disappear between the moment when the
> package enters in Guix and the moment when SWH ingests it. (As an edge
> case, let consider the recent harfbuzz one.)
>
> Now the format 'sources.json' should accept an array. Does it make
> sense to put in this array the content addressed mirrors
> (ci.guix.gnu.org and tarball.nixos.org)?

Yes, I think ‘sources.json’ should contain, for each item, a list of
URLs rather than a single URL, and mirror:// URLs should be expanded.

However, ‘sources.json’ is currently specified to contain a single URL,
so we’re stuck with a single URL for now.

Is that what you meant?

Thanks,
Ludo’.
Simon Tournier March 6, 2020, 9:26 a.m. UTC | #10
Hi Ludo,

On Thu, 5 Mar 2020 at 17:19, Ludovic Courtès <ludo@gnu.org> wrote:
> zimoun <zimon.toutoune@gmail.com> skribis:
>
> > Some upstream sources could disappear between the moment when the
> > package enters in Guix and the moment when SWH ingests it. (As an edge
> > case, let consider the recent harfbuzz one.)
> >
> > Now the format 'sources.json' should accept an array. Does it make
> > sense to put in this array the content addressed mirrors
> > (ci.guix.gnu.org and tarball.nixos.org)?
>
> Yes, I think ‘sources.json’ should contain, for each item, a list of
> URLs rather than a single URL, and mirror:// URLs should be expanded.

Yes, now it is accepted [1] and it becomes the format of the
'sources.json' file.

[1] https://forge.softwareheritage.org/D2025#65063


The patch set v2 supports the first discussion of the format (single
url); which is not relevant anymore.
The patch set attached in v3 [2] supports the array of URLs.

[2] https://debbugs.gnu.org/cgi/bugreport.cgi?bug=39547#35



> However, ‘sources.json’ is currently specified to contain a single URL,
> so we’re stuck with a single URL for now.
>
> Is that what you meant?

What I mean is: append %content-addressed-mirrors for each package in
the list of URLs.



Cheers,
simon
Ludovic Courtès March 6, 2020, 10:57 a.m. UTC | #11
Hi Simon!

zimoun <zimon.toutoune@gmail.com> skribis:

> On Thu, 5 Mar 2020 at 17:19, Ludovic Courtès <ludo@gnu.org> wrote:

[...]

>> Yes, I think ‘sources.json’ should contain, for each item, a list of
>> URLs rather than a single URL, and mirror:// URLs should be expanded.
>
> Yes, now it is accepted [1] and it becomes the format of the
> 'sources.json' file.
>
> [1] https://forge.softwareheritage.org/D2025#65063

Excellent!

> The patch set v2 supports the first discussion of the format (single
> url); which is not relevant anymore.
> The patch set attached in v3 [2] supports the array of URLs.
>
> [2] https://debbugs.gnu.org/cgi/bugreport.cgi?bug=39547#35

Oh sorry, I hadn’t seen it.

>> However, ‘sources.json’ is currently specified to contain a single URL,
>> so we’re stuck with a single URL for now.
>>
>> Is that what you meant?
>
> What I mean is: append %content-addressed-mirrors for each package in
> the list of URLs.

Now I understand.  :-)  So yes, sure, the array of URLs in
‘sources.json’ could include a ci.guix.gnu.org URL and maybe the other
ones in there too.  For a subsequent patch I guess?

Thanks!

Ludo’.
diff mbox series

Patch

diff --git a/website/apps/packages/builder.scm b/website/apps/packages/builder.scm
index 9dc44c9..5279096 100644
--- a/website/apps/packages/builder.scm
+++ b/website/apps/packages/builder.scm
@@ -2,6 +2,7 @@ 
 ;;; Copyright © 2017 Ludovic Courtès <ludo@gnu.org>
 ;;; Copyright © 2019 Ricardo Wurmus <rekado@elephly.net>
 ;;; Copyright © 2019 Nicolò Balzarotti <nicolo@nixo.xyz>
+;;; Copyright © 2020 Simon Tournier <zimon.toutoune@gmail.com>
 ;;;
 ;;; Initially written by sirgazil
 ;;; who waives all copyright interest on this file.
@@ -37,6 +38,8 @@ 
   #:use-module (haunt page)
   #:use-module (haunt utils)
   #:use-module (srfi srfi-1)
+  #:use-module ((web uri) #:select (string->uri uri->string uri-scheme))
+  #:use-module ((guix build download) #:select (maybe-expand-mirrors))
   #:use-module (guix packages)
   #:use-module (guix download)
   #:use-module (guix git-download)
@@ -70,6 +73,7 @@ 
   (flatten
    (list
     (index-builder)
+    (sources-json-builder)
     (packages-json-builder)
     (packages-builder)
     (package-list-builder))))
@@ -84,6 +88,64 @@ 
   ;; Maximum number of packages shown on /packages.
   30)
 
+(define (sources-json-builder)
+  "Return a JSON page listing all the sources."
+  (define (origin->json origin)
+    (define method
+      (origin-method origin))
+
+    (define uri                         ;represented as string
+      (origin-uri origin))
+
+    (define (mirror->url uri)
+      (uri->string (car (maybe-expand-mirrors uri %mirrors))))
+
+    (define (resolve urls)
+      (let* ((url (car urls))
+             (uri (string->uri url))
+             (rest (cdr urls)))
+        (case (uri-scheme uri)
+          ((mirror) (mirror->url uri))
+          ((http) url)
+          ((https) url)
+          (else
+           (if (null? rest)
+               url
+               (resolve rest))))))
+
+    `((type . ,(cond ((eq? url-fetch method) 'url)
+                     ((eq? git-fetch method) 'git)
+                     ((eq? svn-fetch method) 'svn)
+                     (else                   #nil)))
+      ,@(cond ((eq? url-fetch method)
+               `(("url" . ,(match uri
+				  ((? string? url) (mirror->url (string->uri url)))
+				  ((urls ...) (resolve urls))))))
+              ((eq? git-fetch method)
+               `(("git_url" . ,(git-reference-url uri))))
+              ((eq? svn-fetch method)
+               `(("svn_url" . ,(svn-reference-url uri))))
+              (else '()))
+      ,@(if (eq? method git-fetch)
+            `(("git_ref" . ,(git-reference-commit uri)))
+            '())
+      ,@(if (eq? method svn-fetch)
+            `(("svn_revision" . ,(svn-reference-revision
+                                  uri)))
+            '())))
+
+  (define (package->json package)
+    `(,@(if (origin? (package-source package))
+            (origin->json (package-source package))
+            `(("type" . "no-origin")
+              ("name" . ,(package-name package))))))
+
+  (make-page "sources.json"
+             `(("sources" . ,(list->vector (map package->json (all-packages))))
+               ("version" . "1"))
+             scm->json))
+
+
 (define (packages-json-builder)
   "Return a JSON page listing all packages."
   (define (origin->json origin)