diff mbox series

[bug#50620,1/2] guix: packages: Document 'computed-origin-method'.

Message ID 20210916114734.2686426-1-zimon.toutoune@gmail.com
State Accepted
Headers show
Series Unify 'computed-origin-method' (linux, icecat) | expand

Checks

Context Check Description
cbaines/applying patch fail View Laminar job
cbaines/issue success View issue

Commit Message

Simon Tournier Sept. 16, 2021, 11:47 a.m. UTC
The 'computed-origin-method' had been introduced as a workaround limitations
in the 'snippet' mechanism.  The procedure is duplicated which makes hard to
automatically detects packages using it.

* guix/packages.scm (computed-origin-method): Move procedure from...
* gnu/packages/gnuzilla.scm: ...here and...
* gnu/packages/gnuzilla.scm: ...there.
---
 gnu/packages/gnuzilla.scm | 14 ++------------
 gnu/packages/linux.scm    | 14 ++------------
 guix/packages.scm         | 23 ++++++++++++++++++++++-
 3 files changed, 26 insertions(+), 25 deletions(-)

Comments

Liliana Marie Prikler Sept. 16, 2021, 3:53 p.m. UTC | #1
Hi,

Am Donnerstag, den 16.09.2021, 13:47 +0200 schrieb zimoun:
> The 'computed-origin-method' had been introduced as a workaround
> limitations in the 'snippet' mechanism.  The procedure is duplicated
> which makes hard to automatically detects packages using it.
> 
> * guix/packages.scm (computed-origin-method): Move procedure from...
> * gnu/packages/gnuzilla.scm: ...here and...
> * gnu/packages/gnuzilla.scm: ...there.
> ---
>  gnu/packages/gnuzilla.scm | 14 ++------------
>  gnu/packages/linux.scm    | 14 ++------------
>  guix/packages.scm         | 23 ++++++++++++++++++++++-
>  3 files changed, 26 insertions(+), 25 deletions(-)
> 
> diff --git a/gnu/packages/gnuzilla.scm b/gnu/packages/gnuzilla.scm
> index 431b487fd0..9f6e1f24e1 100644
> --- a/gnu/packages/gnuzilla.scm
> +++ b/gnu/packages/gnuzilla.scm
> @@ -682,18 +682,8 @@ in C/C++.")
>     ("1j6l66v1xw27z8w78mpsnmqgv8m277mf4r0hgqcrb4zx7xc2vqyy"
> "527e5e090608" "zh-CN")
>     ("1frwx35klpyz3sdwrkz7945ivb2dwaawhhyfnz4092h9hn7rc4ky"
> "6cd366ad2947" "zh-TW")))
>  
> -(define* (computed-origin-method gexp-promise hash-algo hash
> -                                 #:optional (name "source")
> -                                 #:key (system (%current-system))
> -                                 (guile (default-guile)))
> -  "Return a derivation that executes the G-expression that results
> -from forcing GEXP-PROMISE."
> -  (mlet %store-monad ((guile (package->derivation guile system)))
> -    (gexp->derivation (or name "computed-origin")
> -                      (force gexp-promise)
> -                      #:graft? #f       ;nothing to graft
> -                      #:system system
> -                      #:guile-for-build guile)))
> +;; XXXX: Workaround 'snippet' limitations.
> +(define computed-origin-method (@@ (guix packages) computed-origin-
> method))
>  
>  (define %icecat-version "78.14.0-guix0-preview1")
>  (define %icecat-build-id "20210907000000") ;must be of the form
> YYYYMMDDhhmmss
> diff --git a/gnu/packages/linux.scm b/gnu/packages/linux.scm
> index 285eb132f4..eb792be9a3 100644
> --- a/gnu/packages/linux.scm
> +++ b/gnu/packages/linux.scm
> @@ -216,18 +216,8 @@ defconfig.  Return the appropriate make target
> if applicable, otherwise return
>            (file-name (string-append "linux-libre-deblob-check-"
> version "-" gnu-revision))
>            (sha256 deblob-check-hash))))
>  
> -(define* (computed-origin-method gexp-promise hash-algo hash
> -                                 #:optional (name "source")
> -                                 #:key (system (%current-system))
> -                                 (guile (default-guile)))
> -  "Return a derivation that executes the G-expression that results
> -from forcing GEXP-PROMISE."
> -  (mlet %store-monad ((guile (package->derivation guile system)))
> -    (gexp->derivation (or name "computed-origin")
> -                      (force gexp-promise)
> -                      #:graft? #f       ;nothing to graft
> -                      #:system system
> -                      #:guile-for-build guile)))
> +;; XXXX: Workaround 'snippet' limitations
> +(define computed-origin-method (@@ (guix packages) computed-origin-
> method))
>  
>  (define (make-linux-libre-source version
>                                   upstream-source
> diff --git a/guix/packages.scm b/guix/packages.scm
> index ad7937b4fb..8c3a0b0b7b 100644
> --- a/guix/packages.scm
> +++ b/guix/packages.scm
> @@ -1,6 +1,6 @@
>  ;;; GNU Guix --- Functional package management for GNU
>  ;;; Copyright © 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019,
> 2020, 2021 Ludovic Courtès <ludo@gnu.org>
> -;;; Copyright © 2014, 2015, 2017, 2018 Mark H Weaver <mhw@netris.org
> >
> +;;; Copyright © 2014, 2015, 2017, 2018, 2019 Mark H Weaver <
> mhw@netris.org>
>  ;;; Copyright © 2015 Eric Bavier <bavier@member.fsf.org>
>  ;;; Copyright © 2016 Alex Kost <alezost@gmail.com>
>  ;;; Copyright © 2017, 2019, 2020 Efraim Flashner <
> efraim@flashner.co.il>
> @@ -344,6 +344,27 @@ name of its URI."
>           ;; git, svn, cvs, etc. reference
>           #f))))
>  
> +;; Work around limitations in the 'snippet' mechanism.  It is not
> possible for
> +;; a 'snippet' to produce a tarball with a different base name than
> the
> +;; original downloaded source.  Moreover, cherry picking dozens of
> upsteam
> +;; patches and applying them suddenly is often impractical;
> especially when a
> +;; comprehensive code reformatting is done upstream.  Mainly
> designed for
> +;; Linux and IceCat packages.
> +;; XXXX: do not make part of public API (export) such radical
> capability
> +;; before a detailed review process.
> +(define* (computed-origin-method gexp-promise hash-algo hash
> +                                 #:optional (name "source")
> +                                 #:key (system (%current-system))
> +                                 (guile (default-guile)))
> +  "Return a derivation that executes the G-expression that results
> +from forcing GEXP-PROMISE."
> +  (mlet %store-monad ((guile (package->derivation guile system)))
> +    (gexp->derivation (or name "computed-origin")
> +                      (force gexp-promise)
> +                      #:graft? #f       ;nothing to graft
> +                      #:system system
> +                      #:guile-for-build guile)))
> +
>  
>  (define %supported-systems
>    ;; This is the list of system types that are supported.  By
> default, we
I think that rather than putting this into (guix packages) itself, we
might want to put it into its own file like (guix computed-origins) and
choose a method name that is actually a verb, similar to git-fetch or
svn-fetch.  Perhaps simply call it compute-origin?

If done this way, there'd be the benefit that modules with packages
using this thing would have to explicitly request the presence of the
symbol through their use-modules clauses.  WDYT?
Mark H Weaver Sept. 16, 2021, 11:38 p.m. UTC | #2
Hi Liliana,

Liliana Marie Prikler <liliana.prikler@gmail.com> wrote:
> I think that rather than putting this into (guix packages) itself, we
> might want to put it into its own file like (guix computed-origins) and
> choose a method name that is actually a verb, similar to git-fetch or
> svn-fetch.  Perhaps simply call it compute-origin?

These suggestions sound fine to me, although I don't have a strong
opinion either way.  I'm happy to leave these details to others to
decide.

> If done this way, there'd be the benefit that modules with packages
> using this thing would have to explicitly request the presence of the
> symbol through their use-modules clauses.

Actually, for better or worse, Guile's '@@' form does not require the
named module to be imported using 'use-modules', so I don't think this
benefit strictly exists as stated above.  However, I agree that it's
good practice to list all imported modules in the '#:use-module' clauses
at the top of the file wherever possible [*], and that there may be some
benefit in declaring the use of 'computed-origins' at the top of each
file.

      Thanks,
        Mark

[*] It's not always possible in the presence of cyclic module dependencies.
Simon Tournier Sept. 17, 2021, 8:41 a.m. UTC | #3
Hi Liliana and Mark,

Thanks for the comments.

On Fri, 17 Sept 2021 at 01:40, Mark H Weaver <mhw@netris.org> wrote:
> Liliana Marie Prikler <liliana.prikler@gmail.com> wrote:

> > I think that rather than putting this into (guix packages) itself, we
> > might want to put it into its own file like (guix computed-origins) and
> > choose a method name that is actually a verb, similar to git-fetch or
> > svn-fetch.  Perhaps simply call it compute-origin?
>
> These suggestions sound fine to me, although I don't have a strong
> opinion either way.  I'm happy to leave these details to others to
> decide.

I do not have a strong opinion either.  If no one comes with a better
name, I will pick the suggested one. :-)

        There are only two hard things in Computer Science: cache
        invalidation and naming things.
                                                      -- Internet


> > If done this way, there'd be the benefit that modules with packages
> > using this thing would have to explicitly request the presence of the
> > symbol through their use-modules clauses.
>
> Actually, for better or worse, Guile's '@@' form does not require the
> named module to be imported using 'use-modules', so I don't think this
> benefit strictly exists as stated above.  However, I agree that it's
> good practice to list all imported modules in the '#:use-module' clauses
> at the top of the file wherever possible [*], and that there may be some
> benefit in declaring the use of 'computed-origins' at the top of each
> file.

I am not deeply familiar with Guile module.

I chose to put this in (guix packages) instead of its own module
because the module would contain only one function and nothing
exported.  The aim for now, as discussed, is to not make this 'method'
part of the public API.

Then if the function is not exported by the module, the '#:use-module'
does not have an effect, right?

In this case, does it make sense to put this in its own module?

Initially, I put the '@@' right after the '#:use-module's but then I
changed my mind; I do not remember why.  Anyway, yeah it is better at
the top.


Cheers,
simon
Mark H Weaver Sept. 28, 2021, 9:36 a.m. UTC | #4
Hi Simon,

zimoun <zimon.toutoune@gmail.com> writes:
> On Fri, 17 Sept 2021 at 01:40, Mark H Weaver <mhw@netris.org> wrote:
>> Liliana Marie Prikler <liliana.prikler@gmail.com> wrote:
[...]
>> > If done this way, there'd be the benefit that modules with packages
>> > using this thing would have to explicitly request the presence of the
>> > symbol through their use-modules clauses.
>>
>> Actually, for better or worse, Guile's '@@' form does not require the
>> named module to be imported using 'use-modules', so I don't think this
>> benefit strictly exists as stated above.  However, I agree that it's
>> good practice to list all imported modules in the '#:use-module' clauses
>> at the top of the file wherever possible [*], and that there may be some
>> benefit in declaring the use of 'computed-origins' at the top of each
>> file.
>
> I am not deeply familiar with Guile module.
>
> I chose to put this in (guix packages) instead of its own module
> because the module would contain only one function and nothing
> exported.  The aim for now, as discussed, is to not make this 'method'
> part of the public API.
>
> Then if the function is not exported by the module, the '#:use-module'
> does not have an effect, right?

It's true that it would have no effect on the set of available bindings,
and that's an excellent point.  Perhaps Liliana could clarify what she
had in mind, or better yet, propose a patch.

Please don't let me be a blocker on this thread.  I contributed a few
thoughts, but I don't have time right now to shepherd this issue, sorry.

      Regards,
        Mark
Ludovic Courtès Sept. 29, 2021, 1:16 p.m. UTC | #5
Hi there!

I’d rather go with zimoun’s original patch, which is focused and does
nothing more than what was originally intended, which is to factorize
the procedure.  I’ll go ahead and apply it shortly if there are no
objections.


As Mark wrote, ‘computed-origin-method’ remains a hack, so we can
discuss about the best way to improve on it, but that’s a separate
discussion.  What you propose, Liliana, is one possible way to improve
on the situation, but only on the surface: the hack remains, it just
gets its own module.

A better solution IMO would be to improve the ‘snippet’ mechanism in the
first place.  ‘computed-origin-method’ improves on it in two ways: (1)
lazy evaluation of the gexp, and (2) allows the use of a different base
name.

I would think #2 is addressed by the ‘file-name’ field (isn’t it?).

As for #1, it can be addressed by making the ‘snippet’ field delayed or
thunked.  It’s a one line change; the only thing we need is to measure,
or attempt to measure, the impact it has on module load time.

Thoughts?

Ludo’.
Liliana Marie Prikler Sept. 29, 2021, 3:34 p.m. UTC | #6
Hi there,

Am Mittwoch, den 29.09.2021, 15:16 +0200 schrieb Ludovic Courtès:
> Hi there!
> 
> I’d rather go with zimoun’s original patch, which is focused and does
> nothing more than what was originally intended, which is to factorize
> the procedure.  I’ll go ahead and apply it shortly if there are no
> objections.
I have trouble understanding this paragraph.  What exactly is "this
patch" and what do you mean by "factorizing"?  If it means moving
computed-origin-method elsewhere, then yes, for a short-time solution
only moving it is a wise choice in my opinion, but zimoun and I still
disagree on the target.  zimoun says (guix packages) for reasons
unknown to me, whereas I say (gnu packages), because it's closer to
where it's used and doesn't imply that this is going to be a part of
the (guix) download schemes anytime soon.

> As Mark wrote, ‘computed-origin-method’ remains a hack, so we can
> discuss about the best way to improve on it, but that’s a separate
> discussion.  What you propose, Liliana, is one possible way to
> improve on the situation, but only on the surface: the hack remains,
> it just gets its own module.
I don't necessarily perceive computed-origin-method as a hack, though,
at least not in concept.  The current implementation may be a hack
indeed, but I don't think the very concept of expressing an input as a
function is.  We are functional programmers after all :)

> A better solution IMO would be to improve the ‘snippet’ mechanism in
> the first place.  ‘computed-origin-method’ improves on it in two
> ways: (1) lazy evaluation of the gexp, and (2) allows the use of a
> different base name.
> 
> I would think #2 is addressed by the ‘file-name’ field (isn’t it?).
> 
> As for #1, it can be addressed by making the ‘snippet’ field delayed
> or thunked.  It’s a one line change; the only thing we need is to
> measure, or attempt to measure, the impact it has on module load
> time.
> 
> Thoughts?
This would work for packages, whose source are some base source with
patches or snippets applied, as is indeed the case for linux and
icecat.  However, there are also other potential uses for computed
origins.  Grepping for the string "'unpack 'unpack", which indicates
that more sources are unpacked at build-time returns more than fifty
instances in a number of locations.  This is potentially dangerous, as
for one, we will probably want to phase out the "origins as inputs"
idiom once we have short-form inputs and also it could be argued, that
`guix build -S` returns something wrong in those packages.

I think that some version of `computed-origin-method' will eventually
need to become public API as such packages may not always be best
described as "a base package with a snippet".  If we had recursive
origins – i.e. origins, that can take origins as inputs – we might be
able to do some of that, but I don't think it would necessarily work
for linux-libre or icecat, as with those you don't want the tainted
versions to be kept around.  Perhaps this could be worked around by not
interning the intermediate origins, but only using their file-names
inside the temporary directory in which the snippet is applied?

Another thing is that the final act of the linux-libre promise is not
the packing of the tarball, but the deblob-check.  Guix currently lacks
a way of modeling such checks in their origin, but I'd argue it would
need one if we wanted to do computed origins via snippets.  This is not
required by icecat and so one "simplification" could be that computed-
origin-method would not require the user to create a tarball, but
instead simply provide a name for the tarball and a directory to create
it from (via a promise again).

A combination of the above might make computed origins obsolete for
good, but the question remains whether that is a better design.  What
do y'all think?
Mark H Weaver Sept. 29, 2021, 8:42 p.m. UTC | #7
Hi Ludovic,

Ludovic Courtès <ludo@gnu.org> writes:

> A better solution IMO would be to improve the ‘snippet’ mechanism in the
> first place.  ‘computed-origin-method’ improves on it in two ways: (1)
> lazy evaluation of the gexp, and (2) allows the use of a different base
> name.
>
> I would think #2 is addressed by the ‘file-name’ field (isn’t it?).

Using the 'file-name' field would not satisfy my requirements.  It has
two problems:

(1) It would cause the unmodified upstream Firefox source tarball to be
    named "icecat-…" in the store.

(2) Although the resulting tarball would be named "icecat-…", the
    toplevel directory name within that tarball would still be named
    "firefox-…".

I consider each of these flaws to be unacceptable.

What do you think?

      Thanks,
        Mark
Ludovic Courtès Sept. 29, 2021, 9:34 p.m. UTC | #8
Hi Mark,

Mark H Weaver <mhw@netris.org> skribis:

> Ludovic Courtès <ludo@gnu.org> writes:
>
>> A better solution IMO would be to improve the ‘snippet’ mechanism in the
>> first place.  ‘computed-origin-method’ improves on it in two ways: (1)
>> lazy evaluation of the gexp, and (2) allows the use of a different base
>> name.
>>
>> I would think #2 is addressed by the ‘file-name’ field (isn’t it?).
>
> Using the 'file-name' field would not satisfy my requirements.  It has
> two problems:
>
> (1) It would cause the unmodified upstream Firefox source tarball to be
>     named "icecat-…" in the store.
>
> (2) Although the resulting tarball would be named "icecat-…", the
>     toplevel directory name within that tarball would still be named
>     "firefox-…".
>
> I consider each of these flaws to be unacceptable.

Oh I see.  I agree it’d be nice to have a way to fix those.

Perhaps by allowing for custom pack-and-repack procedures?

Thanks,
Ludo’.
Ludovic Courtès Sept. 29, 2021, 9:47 p.m. UTC | #9
Hi Liliana,

Liliana Marie Prikler <liliana.prikler@gmail.com> skribis:

> Am Mittwoch, den 29.09.2021, 15:16 +0200 schrieb Ludovic Courtès:
>> Hi there!
>> 
>> I’d rather go with zimoun’s original patch, which is focused and does
>> nothing more than what was originally intended, which is to factorize
>> the procedure.  I’ll go ahead and apply it shortly if there are no
>> objections.
> I have trouble understanding this paragraph.  What exactly is "this
> patch" and what do you mean by "factorizing"?  If it means moving
> computed-origin-method elsewhere, then yes, for a short-time solution
> only moving it is a wise choice in my opinion,

OK, I agree too.

> but zimoun and I still disagree on the target.  zimoun says (guix
> packages) for reasons unknown to me, whereas I say (gnu packages),
> because it's closer to where it's used and doesn't imply that this is
> going to be a part of the (guix) download schemes anytime soon.

(gnu packages) is higher-level: it’s part of the distro and includes CLI
helpers such as ‘specification->package’.  So I think (guix …) is
somewhat more appropriate.

(That said, what matters more to me is how we’re going to replace it
with a proper solution.)

[...]

>> A better solution IMO would be to improve the ‘snippet’ mechanism in
>> the first place.  ‘computed-origin-method’ improves on it in two
>> ways: (1) lazy evaluation of the gexp, and (2) allows the use of a
>> different base name.
>> 
>> I would think #2 is addressed by the ‘file-name’ field (isn’t it?).
>> 
>> As for #1, it can be addressed by making the ‘snippet’ field delayed
>> or thunked.  It’s a one line change; the only thing we need is to
>> measure, or attempt to measure, the impact it has on module load
>> time.
>> 
>> Thoughts?
> This would work for packages, whose source are some base source with
> patches or snippets applied, as is indeed the case for linux and
> icecat.  However, there are also other potential uses for computed
> origins.

It’s hard for me to talk about potential uses in the abstract.  :-)

There might be cases where an origin simply isn’t the right tool and one
would prefer ‘computed-file’ or something else.  It really depends on
the context.

[...]

> I think that some version of `computed-origin-method' will eventually
> need to become public API as such packages may not always be best
> described as "a base package with a snippet".  If we had recursive
> origins – i.e. origins, that can take origins as inputs – we might be
> able to do some of that, but I don't think it would necessarily work
> for linux-libre or icecat, as with those you don't want the tainted
> versions to be kept around.  Perhaps this could be worked around by not
> interning the intermediate origins, but only using their file-names
> inside the temporary directory in which the snippet is applied?

“Recursive origins” are a bit of a stretch as a concept IMO; what you
describe is a case where I’d probably use ‘computed-file’ instead.

> Another thing is that the final act of the linux-libre promise is not
> the packing of the tarball, but the deblob-check.  Guix currently lacks
> a way of modeling such checks in their origin, but I'd argue it would
> need one if we wanted to do computed origins via snippets.  This is not
> required by icecat and so one "simplification" could be that computed-
> origin-method would not require the user to create a tarball, but
> instead simply provide a name for the tarball and a directory to create
> it from (via a promise again).

Ah, I had overlooked that ‘deblob-check’ bit.  It could be that allowing
for custom pack-and-repack procedures would be enough to address it.

> A combination of the above might make computed origins obsolete for
> good, but the question remains whether that is a better design.  What
> do y'all think?

The design goal is to have clearly identified types: <package>,
<origin>, <operating-system>.  For each of these, we want some
flexibility: build system, origin method, etc.  However, beyond some
level of stretching, it may be clearer to just use the catch-all
‘computed-file’ or to devise a new type.  After all, that’s how <origin>
came to be (we could have used <package> instead with a suitable build
system).

There’s a tension between “purely declarative” and “flexible”, and it’s
about striking a balance, subjectively.

Hope that makes sense!

Ludo’.
Liliana Marie Prikler Sept. 29, 2021, 11:44 p.m. UTC | #10
Hi Ludo,

Am Mittwoch, den 29.09.2021, 23:47 +0200 schrieb Ludovic Courtès:
> [...]
> 
> > but zimoun and I still disagree on the target.  zimoun says (guix
> > packages) for reasons unknown to me, whereas I say (gnu packages),
> > because it's closer to where it's used and doesn't imply that this
> > is
> > going to be a part of the (guix) download schemes anytime soon.
> 
> (gnu packages) is higher-level: it’s part of the distro and includes
> CLI helpers such as ‘specification->package’.  So I think (guix …) is
> somewhat more appropriate.
> 
> (That said, what matters more to me is how we’re going to replace it
> with a proper solution.)
(gnu packages) being high-level is part of the reason I want it there. 
Stuff that's hidden quite deep inside (guix something) will be slower
to change and replace with the proper solution.  When you pull on a
lever, the outside moves faster :)

> > > A better solution IMO would be to improve the ‘snippet’ mechanism
> > > in the first place.  ‘computed-origin-method’ improves on it in
> > > two ways: (1) lazy evaluation of the gexp, and (2) allows the use
> > > of a different base name.
> > > 
> > > I would think #2 is addressed by the ‘file-name’ field (isn’t
> > > it?).
> > > 
> > > As for #1, it can be addressed by making the ‘snippet’ field
> > > delayed or thunked.  It’s a one line change; the only thing we
> > > need is to measure, or attempt to measure, the impact it has on
> > > module load time.
> > > 
> > > Thoughts?
> > This would work for packages, whose source are some base source
> > with patches or snippets applied, as is indeed the case for linux
> > and icecat.  However, there are also other potential uses for
> > computed origins.
> 
> It’s hard for me to talk about potential uses in the abstract.  :-)
> 
> There might be cases where an origin simply isn’t the right tool and
> one would prefer ‘computed-file’ or something else.  It really
> depends on the context.
> 
> [...]
> 
> > I think that some version of `computed-origin-method' will
> > eventually need to become public API as such packages may not
> > always be best described as "a base package with a snippet".  If we
> > had recursive origins – i.e. origins, that can take origins as
> > inputs – we might be able to do some of that, but I don't think it
> > would necessarily work for linux-libre or icecat, as with those you
> > don't want the tainted versions to be kept around.  Perhaps this
> > could be worked around by not interning the intermediate origins,
> > but only using their file-names inside the temporary directory in
> > which the snippet is applied?
> 
> “Recursive origins” are a bit of a stretch as a concept IMO; what you
> describe is a case where I’d probably use ‘computed-file’ instead.
In other words, we could/should use computed-file for linux-libre and
icecat?  If we reasonably can, would it make sense to use that in lieu
of computed-origin-method to actually advertise the existence of
computed-file to Guix users/packagers?

> > Another thing is that the final act of the linux-libre promise is
> > not the packing of the tarball, but the deblob-check.  Guix
> > currently lacks a way of modeling such checks in their origin, but
> > I'd argue it would need one if we wanted to do computed origins via
> > snippets.  This is not required by icecat and so one
> > "simplification" could be that computed-origin-method would not
> > require the user to create a tarball, but instead simply provide a
> > name for the tarball and a directory to create it from (via a
> > promise again).
> 
> Ah, I had overlooked that ‘deblob-check’ bit.  It could be that
> allowing for custom pack-and-repack procedures would be enough to
> address it.
I think asking users to supply their own implementation of a 200 line
long function to be a bit much to only do part of the job.  On the
other hand, the promise for linux-libre takes 400 lines and for icecat
more than 600, but I think there are some things we ought to factor
out.  Particularly, looking up tools like tar or gzip and even the
actual packing are always the same.

What we can't currently control is the top directory name and the
output name.  Both of that could be customized by supplying a "repack-
name" field, which is used as basis for the directory name and the
tarball name.
Another thing we can't easily control are extraneous inputs to the
patches, although the patch-inputs field *does* exist.

> > A combination of the above might make computed origins obsolete for
> > good, but the question remains whether that is a better
> > design.  What do y'all think?
> 
> The design goal is to have clearly identified types: <package>,
> <origin>, <operating-system>.  For each of these, we want some
> flexibility: build system, origin method, etc.  However, beyond some
> level of stretching, it may be clearer to just use the catch-all
> ‘computed-file’ or to devise a new type.  After all, that’s how
> <origin> came to be (we could have used <package> instead with a
> suitable build system).
> 
> There’s a tension between “purely declarative” and “flexible”, and
> it’s about striking a balance, subjectively.
To be fair, I did think that "computed-tarball" might be a good
abstraction in some sense, but on another hand origins are computed
tarballs with a record interface.

On a somewhat related note, origins have this weird situation going on
where some things like git or svn checkouts need to be defined through
them, whereas others may pass unhindered.  I feel that this contributes
to the equation of source = origin, that might have caused "computed-
origin-method" to exist in the first place.

What do you think?

Liliana
Ludovic Courtès Sept. 30, 2021, 8:28 a.m. UTC | #11
Hi!

Liliana Marie Prikler <liliana.prikler@gmail.com> skribis:

> I think asking users to supply their own implementation of a 200 line
> long function to be a bit much to only do part of the job.  On the
> other hand, the promise for linux-libre takes 400 lines and for icecat
> more than 600, but I think there are some things we ought to factor
> out.  Particularly, looking up tools like tar or gzip and even the
> actual packing are always the same.

True, there’s a lot going on there, though that’s partly because it’s
generic.

> What we can't currently control is the top directory name and the
> output name.  Both of that could be customized by supplying a "repack-
> name" field, which is used as basis for the directory name and the
> tarball name.
> Another thing we can't easily control are extraneous inputs to the
> patches, although the patch-inputs field *does* exist.

It’s possible to use a gexp as the snippet, where you can refer to
additional things in there (though in practice this is currently
impractical due to snippets not being thunks/promises.)

>> > A combination of the above might make computed origins obsolete for
>> > good, but the question remains whether that is a better
>> > design.  What do y'all think?
>> 
>> The design goal is to have clearly identified types: <package>,
>> <origin>, <operating-system>.  For each of these, we want some
>> flexibility: build system, origin method, etc.  However, beyond some
>> level of stretching, it may be clearer to just use the catch-all
>> ‘computed-file’ or to devise a new type.  After all, that’s how
>> <origin> came to be (we could have used <package> instead with a
>> suitable build system).
>> 
>> There’s a tension between “purely declarative” and “flexible”, and
>> it’s about striking a balance, subjectively.
> To be fair, I did think that "computed-tarball" might be a good
> abstraction in some sense, but on another hand origins are computed
> tarballs with a record interface.
>
> On a somewhat related note, origins have this weird situation going on
> where some things like git or svn checkouts need to be defined through
> them, whereas others may pass unhindered.  I feel that this contributes
> to the equation of source = origin, that might have caused "computed-
> origin-method" to exist in the first place.

I’m not sure what you mean by “others may pass unhindered”?  You mean
other VCS checkouts?

> What do you think?

I think the situation of IceCat and Linux-libre is unusual: 2 packages
out of 18K.  That probably explains why we have a hard time figuring out
how to generalize the issues that ‘computed-origin-method’ addresses.

What you propose (IIUC) sounds interesting: we’d provide a
<computed-tarball> data type, which would make the source URL manifest
(something that’s useful for <https://issues.guix.gnu.org/50515>, for
instance), but the lowering step would be entirely custom, similar to
what it already looks like:

  (define-record-type* <computed-tarball> computed-tarball make-computed-tarball
    computed-tarball?
    this-computed-tarball
    (url      computed-tarball-url)  ;or could be an <origin>
    (builder  computer-tarball-builder (thunked)) ;gexp
    (location computed-tarball-location (innate) (default (current-source-location))))

Is this what you had in mind?

Thanks,
Ludo’.
Liliana Marie Prikler Sept. 30, 2021, 2:17 p.m. UTC | #12
Hi,

Am Donnerstag, den 30.09.2021, 10:28 +0200 schrieb Ludovic Courtès:
> [...]
> > What we can't currently control is the top directory name and the
> > output name.  Both of that could be customized by supplying a
> > "repack-name" field, which is used as basis for the directory name
> > and the tarball name.
> > Another thing we can't easily control are extraneous inputs to the
> > patches, although the patch-inputs field *does* exist.
> 
> It’s possible to use a gexp as the snippet, where you can refer to
> additional things in there (though in practice this is currently
> impractical due to snippets not being thunks/promises.)
Which is a practical issue because it'd mean that the tarball gets
built as soon as the source is interpreted?

> > > > A combination of the above might make computed origins obsolete
> > > > for
> > > > good, but the question remains whether that is a better
> > > > design.  What do y'all think?
> > > 
> > > The design goal is to have clearly identified types: <package>,
> > > <origin>, <operating-system>.  For each of these, we want some
> > > flexibility: build system, origin method, etc.  However, beyond
> > > some
> > > level of stretching, it may be clearer to just use the catch-all
> > > ‘computed-file’ or to devise a new type.  After all, that’s how
> > > <origin> came to be (we could have used <package> instead with a
> > > suitable build system).
> > > 
> > > There’s a tension between “purely declarative” and “flexible”,
> > > and
> > > it’s about striking a balance, subjectively.
> > To be fair, I did think that "computed-tarball" might be a good
> > abstraction in some sense, but on another hand origins are computed
> > tarballs with a record interface.
> > 
> > On a somewhat related note, origins have this weird situation going
> > on where some things like git or svn checkouts need to be defined
> > through them, whereas others may pass unhindered.  I feel that this
> > contributes to the equation of source = origin, that might have
> > caused "computed-origin-method" to exist in the first place.
> 
> I’m not sure what you mean by “others may pass unhindered”?  You mean
> other VCS checkouts?
I mean that we don't need to wrap local-file inside an origin for
example whereas we do need to wrap e.g. svn-fetch instead of having an
svn-checkout constructor at the top.  It's not really that noticable
normally, but weird once you start thinking a little too hard about it.

> > What do you think?
> 
> I think the situation of IceCat and Linux-libre is unusual: 2
> packages out of 18K.  That probably explains why we have a hard time
> figuring out how to generalize the issues that ‘computed-origin-
> method’ addresses.
> 
> What you propose (IIUC) sounds interesting: we’d provide a
> <computed-tarball> data type, which would make the source URL
> manifest (something that’s useful for <
> https://issues.guix.gnu.org/50515>;,
> for instance), but the lowering step would be entirely custom,
> similar to what it already looks like:
> 
>   (define-record-type* <computed-tarball> computed-tarball make-
> computed-tarball
>     computed-tarball?
>     this-computed-tarball
>     (url      computed-tarball-url)  ;or could be an <origin>
>     (builder  computer-tarball-builder (thunked)) ;gexp
>     (location computed-tarball-location (innate) (default (current-
> source-location))))
> 
> Is this what you had in mind?
Slightly similar, but I don't think I'd want a singular source url. 
Instead

  (define-record-type* <computed-tarball> computed-tarball 
    make-computed-tarball
    computed-tarball?
    this-computed-tarball
    (sources  computed-tarball-sources)  ; list of origins, local 
                                         ; files or other things
    (builder  computer-tarball-builder (thunked)) ; gexp
    (name     computed-tarball-name) ; perhaps?
    (location computed-tarball-location (innate) 
              (default (current-source-location))))

At the start of BUILDER, SOURCES are already unpacked to the current
working directory under their stripped file names.  After builder
returns, we either package the contents of the current working
directory up into a tarball (variant A) or we have builder return a
list of files to pack up (variant B) which we then post-process maybe.
 
WDYT?
Ludovic Courtès Sept. 30, 2021, 8:09 p.m. UTC | #13
Liliana Marie Prikler <liliana.prikler@gmail.com> skribis:

> Am Donnerstag, den 30.09.2021, 10:28 +0200 schrieb Ludovic Courtès:
>> [...]
>> > What we can't currently control is the top directory name and the
>> > output name.  Both of that could be customized by supplying a
>> > "repack-name" field, which is used as basis for the directory name
>> > and the tarball name.
>> > Another thing we can't easily control are extraneous inputs to the
>> > patches, although the patch-inputs field *does* exist.
>> 
>> It’s possible to use a gexp as the snippet, where you can refer to
>> additional things in there (though in practice this is currently
>> impractical due to snippets not being thunks/promises.)
> Which is a practical issue because it'd mean that the tarball gets
> built as soon as the source is interpreted?

It’s impractical because typical usage introduces top-level circular
references (e.g., if you write #$gzip).

> I mean that we don't need to wrap local-file inside an origin for
> example whereas we do need to wrap e.g. svn-fetch instead of having an
> svn-checkout constructor at the top.  It's not really that noticable
> normally, but weird once you start thinking a little too hard about it.

Hmm yeah, I must not be thinking hard enough.  :-)

> Slightly similar, but I don't think I'd want a singular source url. 
> Instead
>
>   (define-record-type* <computed-tarball> computed-tarball 
>     make-computed-tarball
>     computed-tarball?
>     this-computed-tarball
>     (sources  computed-tarball-sources)  ; list of origins, local 
>                                          ; files or other things
>     (builder  computer-tarball-builder (thunked)) ; gexp
>     (name     computed-tarball-name) ; perhaps?
>     (location computed-tarball-location (innate) 
>               (default (current-source-location))))
>
> At the start of BUILDER, SOURCES are already unpacked to the current
> working directory under their stripped file names.  After builder
> returns, we either package the contents of the current working
> directory up into a tarball (variant A) or we have builder return a
> list of files to pack up (variant B) which we then post-process maybe.
>  
> WDYT?

Overall LGTM!  IWBN to see if there are other potential users in the
tree (I can’t think of any), but for IceCat and Linux-libre, it could
already improve the situation.

Thanks,
Ludo’.
Liliana Marie Prikler Sept. 30, 2021, 9:49 p.m. UTC | #14
Hi,

Am Donnerstag, den 30.09.2021, 22:09 +0200 schrieb Ludovic Courtès:
> Liliana Marie Prikler <liliana.prikler@gmail.com> skribis:
> 
> > [...]
> > Which is a practical issue because it'd mean that the tarball gets
> > built as soon as the source is interpreted?
> 
> It’s impractical because typical usage introduces top-level circular
> references (e.g., if you write #$gzip).
Ah, right.

> > I mean that we don't need to wrap local-file inside an origin for
> > example whereas we do need to wrap e.g. svn-fetch instead of having
> > an svn-checkout constructor at the top.  It's not really that
> > noticable normally, but weird once you start thinking a little too
> > hard about it.
> 
> Hmm yeah, I must not be thinking hard enough.  :-)
Perhaps it's just me who finds it weird tho.  ¯\_(ツ)_/¯

> > Slightly similar, but I don't think I'd want a singular source
> > url. Instead
> > 
> >   (define-record-type* <computed-tarball> computed-tarball 
> >     make-computed-tarball
> >     computed-tarball?
> >     this-computed-tarball
> >     (sources  computed-tarball-sources)  ; list of origins, local 
> >                                          ; files or other things
> >     (builder  computer-tarball-builder (thunked)) ; gexp
> >     (name     computed-tarball-name) ; perhaps?
> >     (location computed-tarball-location (innate) 
> >               (default (current-source-location))))
> > 
> > At the start of BUILDER, SOURCES are already unpacked to the
> > current working directory under their stripped file names.  After
> > builder returns, we either package the contents of the current
> > working directory up into a tarball (variant A) or we have builder
> > return a list of files to pack up (variant B) which we then post-
> > process maybe.
> >  
> > WDYT?
> 
> Overall LGTM!  IWBN to see if there are other potential users in the
> tree (I can’t think of any), but for IceCat and Linux-libre, it could
> already improve the situation.
Concrete examples which currently use "unpack more after unpack":

- chez-scheme with nanopass and stex
- xen's mini-os
- lbzip2's gnulib (and probably gnulib in other locations)
- similarly libgd in gedit and gnome-recipes (same origin for both)

The builder for those would always be a simple (series of) directory
rename(s) as well :)

This list might not be complete, at least I haven't checked whether it
is.  Also, packages which have  (package-source some-other-package) as
input somewhere don't count here, as the missing sources can trivially
be found and inserted imo.

Regards,
Liliana
Ludovic Courtès Sept. 30, 2021, 10:17 p.m. UTC | #15
zimoun <zimon.toutoune@gmail.com> skribis:

> The 'computed-origin-method' had been introduced as a workaround limitations
> in the 'snippet' mechanism.  The procedure is duplicated which makes hard to
> automatically detects packages using it.
>
> * guix/packages.scm (computed-origin-method): Move procedure from...
> * gnu/packages/gnuzilla.scm: ...here and...
> * gnu/packages/gnuzilla.scm: ...there.

I tweaked the commit log and applied, thanks!

Hopefully we’ll grow a “proper” solution thanks to the discussions about
<computed-tarball> & co. that we’ve had.

Thanks everyone,
Ludo’.
diff mbox series

Patch

diff --git a/gnu/packages/gnuzilla.scm b/gnu/packages/gnuzilla.scm
index 431b487fd0..9f6e1f24e1 100644
--- a/gnu/packages/gnuzilla.scm
+++ b/gnu/packages/gnuzilla.scm
@@ -682,18 +682,8 @@  in C/C++.")
    ("1j6l66v1xw27z8w78mpsnmqgv8m277mf4r0hgqcrb4zx7xc2vqyy" "527e5e090608" "zh-CN")
    ("1frwx35klpyz3sdwrkz7945ivb2dwaawhhyfnz4092h9hn7rc4ky" "6cd366ad2947" "zh-TW")))
 
-(define* (computed-origin-method gexp-promise hash-algo hash
-                                 #:optional (name "source")
-                                 #:key (system (%current-system))
-                                 (guile (default-guile)))
-  "Return a derivation that executes the G-expression that results
-from forcing GEXP-PROMISE."
-  (mlet %store-monad ((guile (package->derivation guile system)))
-    (gexp->derivation (or name "computed-origin")
-                      (force gexp-promise)
-                      #:graft? #f       ;nothing to graft
-                      #:system system
-                      #:guile-for-build guile)))
+;; XXXX: Workaround 'snippet' limitations.
+(define computed-origin-method (@@ (guix packages) computed-origin-method))
 
 (define %icecat-version "78.14.0-guix0-preview1")
 (define %icecat-build-id "20210907000000") ;must be of the form YYYYMMDDhhmmss
diff --git a/gnu/packages/linux.scm b/gnu/packages/linux.scm
index 285eb132f4..eb792be9a3 100644
--- a/gnu/packages/linux.scm
+++ b/gnu/packages/linux.scm
@@ -216,18 +216,8 @@  defconfig.  Return the appropriate make target if applicable, otherwise return
           (file-name (string-append "linux-libre-deblob-check-" version "-" gnu-revision))
           (sha256 deblob-check-hash))))
 
-(define* (computed-origin-method gexp-promise hash-algo hash
-                                 #:optional (name "source")
-                                 #:key (system (%current-system))
-                                 (guile (default-guile)))
-  "Return a derivation that executes the G-expression that results
-from forcing GEXP-PROMISE."
-  (mlet %store-monad ((guile (package->derivation guile system)))
-    (gexp->derivation (or name "computed-origin")
-                      (force gexp-promise)
-                      #:graft? #f       ;nothing to graft
-                      #:system system
-                      #:guile-for-build guile)))
+;; XXXX: Workaround 'snippet' limitations
+(define computed-origin-method (@@ (guix packages) computed-origin-method))
 
 (define (make-linux-libre-source version
                                  upstream-source
diff --git a/guix/packages.scm b/guix/packages.scm
index ad7937b4fb..8c3a0b0b7b 100644
--- a/guix/packages.scm
+++ b/guix/packages.scm
@@ -1,6 +1,6 @@ 
 ;;; GNU Guix --- Functional package management for GNU
 ;;; Copyright © 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021 Ludovic Courtès <ludo@gnu.org>
-;;; Copyright © 2014, 2015, 2017, 2018 Mark H Weaver <mhw@netris.org>
+;;; Copyright © 2014, 2015, 2017, 2018, 2019 Mark H Weaver <mhw@netris.org>
 ;;; Copyright © 2015 Eric Bavier <bavier@member.fsf.org>
 ;;; Copyright © 2016 Alex Kost <alezost@gmail.com>
 ;;; Copyright © 2017, 2019, 2020 Efraim Flashner <efraim@flashner.co.il>
@@ -344,6 +344,27 @@  name of its URI."
          ;; git, svn, cvs, etc. reference
          #f))))
 
+;; Work around limitations in the 'snippet' mechanism.  It is not possible for
+;; a 'snippet' to produce a tarball with a different base name than the
+;; original downloaded source.  Moreover, cherry picking dozens of upsteam
+;; patches and applying them suddenly is often impractical; especially when a
+;; comprehensive code reformatting is done upstream.  Mainly designed for
+;; Linux and IceCat packages.
+;; XXXX: do not make part of public API (export) such radical capability
+;; before a detailed review process.
+(define* (computed-origin-method gexp-promise hash-algo hash
+                                 #:optional (name "source")
+                                 #:key (system (%current-system))
+                                 (guile (default-guile)))
+  "Return a derivation that executes the G-expression that results
+from forcing GEXP-PROMISE."
+  (mlet %store-monad ((guile (package->derivation guile system)))
+    (gexp->derivation (or name "computed-origin")
+                      (force gexp-promise)
+                      #:graft? #f       ;nothing to graft
+                      #:system system
+                      #:guile-for-build guile)))
+
 
 (define %supported-systems
   ;; This is the list of system types that are supported.  By default, we