[bug#34882] Update to Pandas, enable Excel writer support

Message ID 87k1gzqy2c.fsf@gmail.com
State Accepted
Headers show
Series [bug#34882] Update to Pandas, enable Excel writer support | expand

Checks

Context Check Description
cbaines/applying patch fail Apply failed
cbaines/applying patch fail Apply failed
cbaines/applying patch fail Apply failed
cbaines/applying patch fail Apply failed

Commit Message

Maxim Cournoyer March 16, 2019, 3:48 a.m. UTC

Comments

Marius Bakke March 17, 2019, 7:45 p.m. UTC | #1
Hello Maxim,

Overall LGTM, some comments inline.

[...]

> +(define-public python-et-xmlfile
> +  (package
> +    (name "python-et-xmlfile")
> +    (version "1.0.1")
> +    (source
> +      (origin
> +        (method url-fetch)
> +        (uri (pypi-uri "et_xmlfile" version))
> +        (sha256
> +          (base32
> +            "0nrkhcb6jdrlb6pwkvd4rycw34y3s931hjf409ij9xkjsli9fkb1"))))
> +    (build-system python-build-system)
> +    (arguments
> +     `(#:phases (modify-phases %standard-phases
> +                  (replace 'check
> +                    (lambda _
> +                      (invoke "pytest"))))))
> +    (native-inputs
> +     `(("python-pytest" ,python-pytest)
> +       ("python-lxml" ,python-lxml)))

Should python-lxml be a propagated-input?

> +    (home-page
> +      "https://bitbucket.org/openpyxl/et_xmlfile")
> +    (synopsis
> +      "Low memory implementation of @code{lxml.xmlfile}")

Please remove the extra newlines in these patches.

> +    (description
> +      "This Python library is based upon the @code{xmlfile} module
> +from @code{lxml}.  It aims to provide a low memory, compatible implementation
> +of @code{xmlfile}.")
> +    (license license:expat)))

[...]
  
> +(define-public python-openpyxl
> +  (package
> +    (name "python-openpyxl")
> +    (version "2.6.0")
> +    (source
> +     (origin
> +       (method hg-fetch)
> +       (uri (hg-reference
> +             (url "https://bitbucket.org/openpyxl/openpyxl")
> +             (changeset version)))
> +       (file-name (string-append name "-" version "-checkout"))
> +       (sha256
> +        (base32
> +         "1x47ngn7ybaqdbvg90c8h2x0j6yfdfj25gjfinp2w5rf62gsany7"))))

Can you leave a comment about why we take it from this repository
instead of PyPi?

> +    (native-inputs
> +     `(("python-lxml" ,python-lxml)

Why is python-lxml a native-input?

> +       ;; For the test suite.
> +       ("python-pillow" ,python-pillow)
> +       ("python-pytest" ,python-pytest)))
> +    (propagated-inputs
> +     `(("python-et-xmlfile" ,python-et-xmlfile)
> +       ("python-jdcal" ,python-jdcal)))
> +    (home-page "https://openpyxl.readthedocs.io")
> +    (synopsis
> +     "Python library to read/write Excel 2010 XLSX/XLSM files")
> +    (description
> +     "This Python library allows reading and writing to the Excel XLSX, XLSM,
> +XLTX and XLTM file formats that are defined by the Office Open XML (OOXML)
> +standard.")
> +    (license license:expat)))

[...]

> From ad1f0efe4a5c3d28ee9d7e2e5da275721af9e172 Mon Sep 17 00:00:00 2001
> From: Maxim Cournoyer <maxim.cournoyer@gmail.com>
> Date: Sat, 9 Feb 2019 00:25:51 -0500
> Subject: [PATCH 5/5] gnu: python-pandas: Update to 0.24.2.
>
> * gnu/packages/python-xyz.scm (python-pandas): Update to 0.24.2.
> [phases]{patch-which}: Add phase.
> [inputs]: Add WHICH.
> ---
>  gnu/packages/python-xyz.scm | 65 ++++++++++++++++++++++---------------
>  1 file changed, 38 insertions(+), 27 deletions(-)
>
> diff --git a/gnu/packages/python-xyz.scm b/gnu/packages/python-xyz.scm
> index 321c881f4d..bbf1403758 100644
> --- a/gnu/packages/python-xyz.scm
> +++ b/gnu/packages/python-xyz.scm
> @@ -1014,56 +1014,67 @@ human-friendly syntax.")
>  (define-public python-pandas
>    (package
>      (name "python-pandas")
> -    (version "0.23.4")
> +    (version "0.24.2")
>      (source
>       (origin
>         (method url-fetch)
>         (uri (pypi-uri "pandas" version))
>         (sha256
> -        (base32 "1x54pd7hr3y7qahx6b5bf2wzj54xvl8r3s1h4pl254pnmi3wl92v"))))
> +        (base32 "18imlm8xbhcbwy4wa957a1fkamrcb0z988z006jpfda3ki09z4ag"))))
>      (build-system python-build-system)
>      (arguments
>       `(#:modules ((guix build utils)
>                    (guix build python-build-system)
>                    (ice-9 ftw)
>                    (srfi srfi-26))
> -       #:phases (modify-phases %standard-phases
> -                  (replace 'check
> -                    (lambda _
> -                      (let ((build-directory
> -                             (string-append
> -                              (getcwd) "/build/"
> -                              (car (scandir "build"
> -                                            (cut string-prefix? "lib." <>))))))
> -                        ;; Disable the "strict data files" option which causes
> -                        ;; the build to error out if required data files are not
> -                        ;; available (as is the case with PyPI archives).
> -                        (substitute* "setup.cfg"
> -                          (("addopts = --strict-data-files") "addopts = "))
> -                        (with-directory-excursion build-directory
> -                          ;; Delete tests that require "moto" which is not yet in Guix.
> -                          (for-each delete-file
> -                                    '("pandas/tests/io/conftest.py"
> -                                      "pandas/tests/io/json/test_compression.py"
> -                                      "pandas/tests/io/parser/test_network.py"
> -                                      "pandas/tests/io/test_parquet.py"))
> -                          (invoke "pytest" "-vv" "pandas" "--skip-slow"
> -                                  "--skip-network" "-k"
> -                                  ;; XXX: Due to the deleted tests above.
> -                                  "not test_read_s3_jsonl"))))))))
> +       #:phases
> +       (modify-phases %standard-phases
> +         (add-after 'unpack 'patch-which
> +           (lambda* (#:key inputs #:allow-other-keys)
> +             (let ((which (assoc-ref inputs "which")))
> +               (substitute* "pandas/io/clipboard/__init__.py"
> +                 (("^CHECK_CMD = .*")
> +                  (string-append "CHECK_CMD = \"" which "\"\n"))))
> +             #t))
> +         (replace 'check
> +           (lambda _
> +             (let ((build-directory
> +                    (string-append
> +                     (getcwd) "/build/"
> +                     (car (scandir "build"
> +                                   (cut string-prefix? "lib." <>))))))
> +               ;; Disable the "strict data files" option which causes
> +               ;; the build to error out if required data files are not
> +               ;; available (as is the case with PyPI archives).
> +               (substitute* "setup.cfg"
> +                 (("addopts = --strict-data-files") "addopts = "))
> +               (with-directory-excursion build-directory
> +                 ;; Delete tests that require "moto" which is not yet in Guix.
> +                 (for-each delete-file
> +                           '("pandas/tests/io/conftest.py"
> +                             "pandas/tests/io/json/test_compression.py"
> +                             "pandas/tests/io/parser/test_network.py"
> +                             "pandas/tests/io/test_parquet.py"))
> +                 (invoke "pytest" "-vv" "pandas" "--skip-slow"
> +                         "--skip-network" "-k"
> +                         ;; XXX: Due to the deleted tests above.
> +                         "not test_read_s3_jsonl"))))))))

LGTM, although I'd prefer not to reindent the phases section.  It makes
the patch harder to read, and I prefer the "deep" indentation for
logically separate chunks of code anyway (though I am probably in the
minority here..).  YMMV!

Thanks!

>      (propagated-inputs
>       `(("python-numpy" ,python-numpy)
>         ("python-openpyxl" ,python-openpyxl)
>         ("python-pytz" ,python-pytz)
>         ("python-dateutil" ,python-dateutil)
>         ("python-xlrd" ,python-xlrd)))
> +    (inputs
> +     `(("which" ,which)))
>      (native-inputs
>       `(("python-cython" ,python-cython)
>         ("python-beautifulsoup4" ,python-beautifulsoup4)
>         ("python-lxml" ,python-lxml)
>         ("python-html5lib" ,python-html5lib)
>         ("python-nose" ,python-nose)
> -       ("python-pytest" ,python-pytest)))
> +       ("python-pytest" ,python-pytest)
> +       ("python-pytest-mock" ,python-pytest-mock)))
>      (home-page "https://pandas.pydata.org")
>      (synopsis "Data structures for data analysis, time series, and statistics")
>      (description
> -- 
> 2.20.1
Maxim Cournoyer March 18, 2019, 1:18 p.m. UTC | #2
Hi Marius, and thanks for having a look!

Marius Bakke <mbakke@fastmail.com> writes:

> Hello Maxim,
>
> Overall LGTM, some comments inline.
>
> [...]
>
>> +(define-public python-et-xmlfile
>> +  (package
>> +    (name "python-et-xmlfile")
>> +    (version "1.0.1")
>> +    (source
>> +      (origin
>> +        (method url-fetch)
>> +        (uri (pypi-uri "et_xmlfile" version))
>> +        (sha256
>> +          (base32
>> +            "0nrkhcb6jdrlb6pwkvd4rycw34y3s931hjf409ij9xkjsli9fkb1"))))
>> +    (build-system python-build-system)
>> +    (arguments
>> +     `(#:phases (modify-phases %standard-phases
>> +                  (replace 'check
>> +                    (lambda _
>> +                      (invoke "pytest"))))))
>> +    (native-inputs
>> +     `(("python-pytest" ,python-pytest)
>> +       ("python-lxml" ,python-lxml)))
>
> Should python-lxml be a propagated-input?

No, otherwise this package would be pretty pointless, as it aims to be
a "low memory implementation of a component of lxml" :-). The lxml
dependency is used in the test suite (I'm guessing to validate that both
implementations' behaviors match).

>
>> +    (home-page
>> +      "https://bitbucket.org/openpyxl/et_xmlfile")
>> +    (synopsis
>> +      "Low memory implementation of @code{lxml.xmlfile}")
>
> Please remove the extra newlines in these patches.

Done.

>> +    (description
>> +      "This Python library is based upon the @code{xmlfile} module
>> +from @code{lxml}.  It aims to provide a low memory, compatible
>> implementation
>> +of @code{xmlfile}.")
>> +    (license license:expat)))
>
> [...]
>
>> +(define-public python-openpyxl
>> +  (package
>> +    (name "python-openpyxl")
>> +    (version "2.6.0")
>> +    (source
>> +     (origin
>> +       (method hg-fetch)
>> +       (uri (hg-reference
>> +             (url "https://bitbucket.org/openpyxl/openpyxl")
>> +             (changeset version)))
>> +       (file-name (string-append name "-" version "-checkout"))
>> +       (sha256
>> +        (base32
>> +         "1x47ngn7ybaqdbvg90c8h2x0j6yfdfj25gjfinp2w5rf62gsany7"))))
>
> Can you leave a comment about why we take it from this repository
> instead of PyPi?

Done. The reason is that the tests are missing from the PyPI
release.

>> +    (native-inputs
>> +     `(("python-lxml" ,python-lxml)
>
> Why is python-lxml a native-input?

Here also it is a test dependency. lxml is an optional backend. I've
moved the existing comment ("     ;; For the test suite.") above this
native-input as well.

>> +       ;; For the test suite.
>> +       ("python-pillow" ,python-pillow)
>> +       ("python-pytest" ,python-pytest)))
>> +    (propagated-inputs
>> +     `(("python-et-xmlfile" ,python-et-xmlfile)
>> +       ("python-jdcal" ,python-jdcal)))
>> +    (home-page "https://openpyxl.readthedocs.io")
>> +    (synopsis
>> +     "Python library to read/write Excel 2010 XLSX/XLSM files")
>> +    (description
>> + "This Python library allows reading and writing to the Excel XLSX,
>> XLSM,
>> +XLTX and XLTM file formats that are defined by the Office Open XML
>> (OOXML)
>> +standard.")
>> +    (license license:expat)))
>
> [...]
>
>> From ad1f0efe4a5c3d28ee9d7e2e5da275721af9e172 Mon Sep 17 00:00:00 2001
>> From: Maxim Cournoyer <maxim.cournoyer@gmail.com>
>> Date: Sat, 9 Feb 2019 00:25:51 -0500
>> Subject: [PATCH 5/5] gnu: python-pandas: Update to 0.24.2.
>>
>> * gnu/packages/python-xyz.scm (python-pandas): Update to 0.24.2.
>> [phases]{patch-which}: Add phase.
>> [inputs]: Add WHICH.
>> ---
>>  gnu/packages/python-xyz.scm | 65 ++++++++++++++++++++++---------------
>>  1 file changed, 38 insertions(+), 27 deletions(-)
>>
>> diff --git a/gnu/packages/python-xyz.scm b/gnu/packages/python-xyz.scm
>> index 321c881f4d..bbf1403758 100644
>> --- a/gnu/packages/python-xyz.scm
>> +++ b/gnu/packages/python-xyz.scm
>> @@ -1014,56 +1014,67 @@ human-friendly syntax.")
>>  (define-public python-pandas
>>    (package
>>      (name "python-pandas")
>> -    (version "0.23.4")
>> +    (version "0.24.2")
>>      (source
>>       (origin
>>         (method url-fetch)
>>         (uri (pypi-uri "pandas" version))
>>         (sha256
>> -        (base32 "1x54pd7hr3y7qahx6b5bf2wzj54xvl8r3s1h4pl254pnmi3wl92v"))))
>> +        (base32 "18imlm8xbhcbwy4wa957a1fkamrcb0z988z006jpfda3ki09z4ag"))))
>>      (build-system python-build-system)
>>      (arguments
>>       `(#:modules ((guix build utils)
>>                    (guix build python-build-system)
>>                    (ice-9 ftw)
>>                    (srfi srfi-26))
>> -       #:phases (modify-phases %standard-phases
>> -                  (replace 'check
>> -                    (lambda _
>> -                      (let ((build-directory
>> -                             (string-append
>> -                              (getcwd) "/build/"
>> -                              (car (scandir "build"
>> -                                            (cut string-prefix? "lib." <>))))))
>> -                        ;; Disable the "strict data files" option which causes
>> -                        ;; the build to error out if required data files are not
>> -                        ;; available (as is the case with PyPI archives).
>> -                        (substitute* "setup.cfg"
>> -                          (("addopts = --strict-data-files") "addopts = "))
>> -                        (with-directory-excursion build-directory
>> -                          ;; Delete tests that require "moto" which is not yet in Guix.
>> -                          (for-each delete-file
>> -                                    '("pandas/tests/io/conftest.py"
>> -                                      "pandas/tests/io/json/test_compression.py"
>> -                                      "pandas/tests/io/parser/test_network.py"
>> -                                      "pandas/tests/io/test_parquet.py"))
>> -                          (invoke "pytest" "-vv" "pandas" "--skip-slow"
>> -                                  "--skip-network" "-k"
>> -                                  ;; XXX: Due to the deleted tests above.
>> -                                  "not test_read_s3_jsonl"))))))))
>> +       #:phases
>> +       (modify-phases %standard-phases
>> +         (add-after 'unpack 'patch-which
>> +           (lambda* (#:key inputs #:allow-other-keys)
>> +             (let ((which (assoc-ref inputs "which")))
>> +               (substitute* "pandas/io/clipboard/__init__.py"
>> +                 (("^CHECK_CMD = .*")
>> +                  (string-append "CHECK_CMD = \"" which "\"\n"))))
>> +             #t))
>> +         (replace 'check
>> +           (lambda _
>> +             (let ((build-directory
>> +                    (string-append
>> +                     (getcwd) "/build/"
>> +                     (car (scandir "build"
>> +                                   (cut string-prefix? "lib." <>))))))
>> +               ;; Disable the "strict data files" option which causes
>> +               ;; the build to error out if required data files are not
>> +               ;; available (as is the case with PyPI archives).
>> +               (substitute* "setup.cfg"
>> +                 (("addopts = --strict-data-files") "addopts = "))
>> +               (with-directory-excursion build-directory
>> +                 ;; Delete tests that require "moto" which is not yet in Guix.
>> +                 (for-each delete-file
>> +                           '("pandas/tests/io/conftest.py"
>> +                             "pandas/tests/io/json/test_compression.py"
>> +                             "pandas/tests/io/parser/test_network.py"
>> +                             "pandas/tests/io/test_parquet.py"))
>> +                 (invoke "pytest" "-vv" "pandas" "--skip-slow"
>> +                         "--skip-network" "-k"
>> +                         ;; XXX: Due to the deleted tests above.
>> +                         "not test_read_s3_jsonl"))))))))
>
> LGTM, although I'd prefer not to reindent the phases section.  It makes
> the patch harder to read, and I prefer the "deep" indentation for
> logically separate chunks of code anyway (though I am probably in the
> minority here..).  YMMV!

While I loathe any "deep" indentation, I've reverted my indentation
change here as it was a bit gratuitous (I needn't struggle to fit into
the 80 chars guideline).

> Thanks!

I pushed this change as c0d43f6223 with modifications based on your feedback.

Thank you!

Maxim
Ricardo Wurmus March 18, 2019, 1:50 p.m. UTC | #3
Maxim Cournoyer <maxim.cournoyer@gmail.com> writes:

> From ad1f0efe4a5c3d28ee9d7e2e5da275721af9e172 Mon Sep 17 00:00:00 2001
> From: Maxim Cournoyer <maxim.cournoyer@gmail.com>
> Date: Sat, 9 Feb 2019 00:25:51 -0500
> Subject: [PATCH 5/5] gnu: python-pandas: Update to 0.24.2.
>
> * gnu/packages/python-xyz.scm (python-pandas): Update to 0.24.2.
> [phases]{patch-which}: Add phase.
> [inputs]: Add WHICH.

I have no objections to updating Pandas, but please make sure that this
version of Pandas works well with the other scientific Python packages
like numpy, scipy, sklearn, numba, etc.

These packages usually have rather strict interdependencies and need to
be updated together to avoid breakage.
Ricardo Wurmus March 18, 2019, 5:28 p.m. UTC | #4
Hi Maxim,

> I pushed this change as c0d43f6223 with modifications based on your feedback.

Have you checked if this version of Pandas is known to be compatible
with our versions of the scientific Python stack, including numpy,
scipy, statsmodels, matplotlib, sklearn, etc?

--
Ricardo
Maxim Cournoyer March 18, 2019, 9:04 p.m. UTC | #5
Hello Ricardo,

Ricardo Wurmus <rekado@elephly.net> writes:

> Maxim Cournoyer <maxim.cournoyer@gmail.com> writes:
>
>> From ad1f0efe4a5c3d28ee9d7e2e5da275721af9e172 Mon Sep 17 00:00:00 2001
>> From: Maxim Cournoyer <maxim.cournoyer@gmail.com>
>> Date: Sat, 9 Feb 2019 00:25:51 -0500
>> Subject: [PATCH 5/5] gnu: python-pandas: Update to 0.24.2.
>>
>> * gnu/packages/python-xyz.scm (python-pandas): Update to 0.24.2.
>> [phases]{patch-which}: Add phase.
>> [inputs]: Add WHICH.
>
> I have no objections to updating Pandas, but please make sure that this
> version of Pandas works well with the other scientific Python packages
> like numpy, scipy, sklearn, numba, etc.
>
> These packages usually have rather strict interdependencies and need to
> be updated together to avoid breakage.

I've already went ahead and merged those changes,  but retested the
following (on master) to make sure:

--8<---------------cut here---------------start------------->8---
for o in $(./pre-inst-env guix refresh -l python-pandas | cut -d':' -f2); do ./pre-inst-env guix build --check --no-grafts "$o" && echo "$o OK" >> build.results || echo "$o NOK" >>  build.results; done
--8<---------------cut here---------------end--------------->8---

And then:
--8<---------------cut here---------------start------------->8---
$ cat build.results
cnvkit@0.9.5 OK
deeptools@3.1.3 NOK
nanopolish@0.10.2-1.50e8b5c NOK
pigx@0.0.3 NOK
python-biom-format@2.1.7 NOK
python-feather-format@0.4.0 NOK
python-hic2cool@0.4.2 OK
python-plastid@0.4.8 OK
python-pybedtools@0.8.0 OK
python-pygenometracks@2.0 OK
python-scanpy@1.2.2 OK
python-scikit-image@0.14.2 OK
python-velocyto@0.17.17 OK
--8<---------------cut here---------------end--------------->8---

So nanopolish, deeptools, python-feather-format, python-biom-format, and
pigx are currently broken, but...

When using master on commit g8c72f13fd4
# (and re-running the same script as earlier)

--8<---------------cut here---------------start------------->8---
cat build.results.g8c72f13fd4 
cnvkit@0.9.5 OK
deeptools@3.1.3 NOK
nanopolish@0.10.2-1.50e8b5c NOK
pigx@0.0.3 NOK
python-biom-format@2.1.7 NOK
python-feather-format@0.4.0 NOK
python-hic2cool@0.4.2 OK
python-plastid@0.4.8 OK
python-pybedtools@0.8.0 NOK
python-pygenometracks@2.0 OK
python-scanpy@1.2.2 OK
python-scikit-image@0.14.2 OK
python-velocyto@0.17.17 OK
--8<---------------cut here---------------end--------------->8---

they already were!

I've also found out while testing that Pandas was not reproducible (this
was true also before my changes).

I will create tickets for all of these problems.

Apart from that, I have run some script which uses Pandas successfully
(and the Pandas test suite passes).

Are these verifications sufficient? And why does 'guix refresh -l' seem
to miss some packages which depend on python-pandas, e.g. python-seaborn?

Thanks,

Maxim
Ricardo Wurmus March 18, 2019, 10:34 p.m. UTC | #6
Hi Maxim,

> deeptools@3.1.3 NOK

I can’t reproduce this.  I get a substitute for deeptools.

> nanopolish@0.10.2-1.50e8b5c NOK

I can’t reproduce this.  I get a substitute for nanopolish.

> pigx@0.0.3 NOK

This is broken since the upgrade to python-loompy.  The authors are
working on fixing it.

> python-biom-format@2.1.7 NOK

I just fixed this.

> python-feather-format@0.4.0 NOK

This is broken because apache-arrow is broken.  I’m trying to fix this
now.  I just updated arrow to 0.10.0 (couldn’t build 0.12.0).

> python-pybedtools@0.8.0 NOK

I can’t reproduce this.  I get a substitute.

--
Ricardo
Maxim Cournoyer March 19, 2019, 1:15 a.m. UTC | #7
Hello Ricardo,

Ricardo Wurmus <rekado@elephly.net> writes:

> Hi Maxim,
>
>> deeptools@3.1.3 NOK
>
> I can’t reproduce this.  I get a substitute for deeptools.

It builds, but isn't reproducible. Try with --check and --no-grafts, it
should give you something like: guix build: error: derivation
`/gnu/store/7a80qjk898f7lhh46bjvv6mbbsrgaq5i-deeptools-3.1.3.drv' may
not be deterministic: output
`/gnu/store/f3z6fczw70j6692ddy467pbagbjck009-deeptools-3.1.3' differs

>> nanopolish@0.10.2-1.50e8b5c NOK
>
> I can’t reproduce this.  I get a substitute for nanopolish.

It builds, but isn't reproducible.

>> pigx@0.0.3 NOK
>
> This is broken since the upgrade to python-loompy.  The authors are
> working on fixing it.

OK

>> python-biom-format@2.1.7 NOK
>
> I just fixed this.

Cool!

>> python-feather-format@0.4.0 NOK
>
> This is broken because apache-arrow is broken.  I’m trying to fix this
> now.  I just updated arrow to 0.10.0 (couldn’t build 0.12.0).

This builds fine here now! :-)

>> python-pybedtools@0.8.0 NOK
>
> I can’t reproduce this.  I get a substitute.

Yeah this builds fine on master, but on the older commit it seems it had
trouble. Not to worry about!

Thanks for the follow-up! Should we create tickets for the
reproducibility issues?

Maxim
Ricardo Wurmus March 19, 2019, 8:41 a.m. UTC | #8
Maxim Cournoyer <maxim.cournoyer@gmail.com> writes:

>>> deeptools@3.1.3 NOK
>>
>> I can’t reproduce this.  I get a substitute for deeptools.
>
> It builds, but isn't reproducible. Try with --check and --no-grafts, it
> should give you something like: guix build: error: derivation
> `/gnu/store/7a80qjk898f7lhh46bjvv6mbbsrgaq5i-deeptools-3.1.3.drv' may
> not be deterministic: output
> `/gnu/store/f3z6fczw70j6692ddy467pbagbjck009-deeptools-3.1.3' differs

Indeed.

“lib/python3.7/site-packages/deeptoolsintervals/tree.cpython-37m-x86_64-linux-gnu.so”
differs, but looking at the diffoscope output I can’t figure out why.

>>> nanopolish@0.10.2-1.50e8b5c NOK
>>
>> I can’t reproduce this.  I get a substitute for nanopolish.
>
> It builds, but isn't reproducible.

Yes, here it’s “bin/nanopolish” that differs.  I’ll investigate.

[…]
> Thanks for the follow-up! Should we create tickets for the
> reproducibility issues?

Sure, thanks!

--
Ricardo
Maxim Cournoyer March 21, 2019, 3:14 a.m. UTC | #9
I created the issues #34934 and #34935 to track the reproducibility
problems of deeptools and nanopolish, respectively.

Patch

From ad1f0efe4a5c3d28ee9d7e2e5da275721af9e172 Mon Sep 17 00:00:00 2001
From: Maxim Cournoyer <maxim.cournoyer@gmail.com>
Date: Sat, 9 Feb 2019 00:25:51 -0500
Subject: [PATCH 5/5] gnu: python-pandas: Update to 0.24.2.

* gnu/packages/python-xyz.scm (python-pandas): Update to 0.24.2.
[phases]{patch-which}: Add phase.
[inputs]: Add WHICH.
---
 gnu/packages/python-xyz.scm | 65 ++++++++++++++++++++++---------------
 1 file changed, 38 insertions(+), 27 deletions(-)

diff --git a/gnu/packages/python-xyz.scm b/gnu/packages/python-xyz.scm
index 321c881f4d..bbf1403758 100644
--- a/gnu/packages/python-xyz.scm
+++ b/gnu/packages/python-xyz.scm
@@ -1014,56 +1014,67 @@  human-friendly syntax.")
 (define-public python-pandas
   (package
     (name "python-pandas")
-    (version "0.23.4")
+    (version "0.24.2")
     (source
      (origin
        (method url-fetch)
        (uri (pypi-uri "pandas" version))
        (sha256
-        (base32 "1x54pd7hr3y7qahx6b5bf2wzj54xvl8r3s1h4pl254pnmi3wl92v"))))
+        (base32 "18imlm8xbhcbwy4wa957a1fkamrcb0z988z006jpfda3ki09z4ag"))))
     (build-system python-build-system)
     (arguments
      `(#:modules ((guix build utils)
                   (guix build python-build-system)
                   (ice-9 ftw)
                   (srfi srfi-26))
-       #:phases (modify-phases %standard-phases
-                  (replace 'check
-                    (lambda _
-                      (let ((build-directory
-                             (string-append
-                              (getcwd) "/build/"
-                              (car (scandir "build"
-                                            (cut string-prefix? "lib." <>))))))
-                        ;; Disable the "strict data files" option which causes
-                        ;; the build to error out if required data files are not
-                        ;; available (as is the case with PyPI archives).
-                        (substitute* "setup.cfg"
-                          (("addopts = --strict-data-files") "addopts = "))
-                        (with-directory-excursion build-directory
-                          ;; Delete tests that require "moto" which is not yet in Guix.
-                          (for-each delete-file
-                                    '("pandas/tests/io/conftest.py"
-                                      "pandas/tests/io/json/test_compression.py"
-                                      "pandas/tests/io/parser/test_network.py"
-                                      "pandas/tests/io/test_parquet.py"))
-                          (invoke "pytest" "-vv" "pandas" "--skip-slow"
-                                  "--skip-network" "-k"
-                                  ;; XXX: Due to the deleted tests above.
-                                  "not test_read_s3_jsonl"))))))))
+       #:phases
+       (modify-phases %standard-phases
+         (add-after 'unpack 'patch-which
+           (lambda* (#:key inputs #:allow-other-keys)
+             (let ((which (assoc-ref inputs "which")))
+               (substitute* "pandas/io/clipboard/__init__.py"
+                 (("^CHECK_CMD = .*")
+                  (string-append "CHECK_CMD = \"" which "\"\n"))))
+             #t))
+         (replace 'check
+           (lambda _
+             (let ((build-directory
+                    (string-append
+                     (getcwd) "/build/"
+                     (car (scandir "build"
+                                   (cut string-prefix? "lib." <>))))))
+               ;; Disable the "strict data files" option which causes
+               ;; the build to error out if required data files are not
+               ;; available (as is the case with PyPI archives).
+               (substitute* "setup.cfg"
+                 (("addopts = --strict-data-files") "addopts = "))
+               (with-directory-excursion build-directory
+                 ;; Delete tests that require "moto" which is not yet in Guix.
+                 (for-each delete-file
+                           '("pandas/tests/io/conftest.py"
+                             "pandas/tests/io/json/test_compression.py"
+                             "pandas/tests/io/parser/test_network.py"
+                             "pandas/tests/io/test_parquet.py"))
+                 (invoke "pytest" "-vv" "pandas" "--skip-slow"
+                         "--skip-network" "-k"
+                         ;; XXX: Due to the deleted tests above.
+                         "not test_read_s3_jsonl"))))))))
     (propagated-inputs
      `(("python-numpy" ,python-numpy)
        ("python-openpyxl" ,python-openpyxl)
        ("python-pytz" ,python-pytz)
        ("python-dateutil" ,python-dateutil)
        ("python-xlrd" ,python-xlrd)))
+    (inputs
+     `(("which" ,which)))
     (native-inputs
      `(("python-cython" ,python-cython)
        ("python-beautifulsoup4" ,python-beautifulsoup4)
        ("python-lxml" ,python-lxml)
        ("python-html5lib" ,python-html5lib)
        ("python-nose" ,python-nose)
-       ("python-pytest" ,python-pytest)))
+       ("python-pytest" ,python-pytest)
+       ("python-pytest-mock" ,python-pytest-mock)))
     (home-page "https://pandas.pydata.org")
     (synopsis "Data structures for data analysis, time series, and statistics")
     (description
-- 
2.20.1