[bug#77541,23/38] gnu: python-bed-reader: Fix build and enable tests.

Message ID 20250404191717.32747-23-ngraves@ngraves.fr
State New
Headers
Series [bug#77541,01/38] gnu: Add clipper2. |

Commit Message

Nicolas Graves April 4, 2025, 7:16 p.m. UTC
  * gnu/packages/bioinformatics.scm (python-bed-reader): Fix build and enable tests.
[source]{snippet}: Delete bundled website-related javascript.
{patches}: Use the store-cached instead of pooch-cached samples.
[arguments]{tests?}: Enable them.
{cargo-test-flags}: Skip doc tests. Skip failing tests.
{cargo-inputs}: Improve style.
{cargo-development-inputs}: Improve style.
{phases}: Add phases 'set-data-path, 'patch-data-path to use
store-cached samples for library and tests.  Rewrite phase
'prepare-python-module to rely more on the existing info in
pyproject.toml.  Rewrite phase 'check-python entirely, and
marginaly rewrite phase 'install-python-library to match 'check-python
phase style.
{modules}: Adapt accordingly.
{propagated-inputs}: Remove python-pooch. Add python-scipy.

(bed-sample-files): Add origin, used in python-bed-reader.

* gnu/packages/patches/python-bed-reader-use-store-samples.patch: Add
patch.
* gnu/local.mk: Record patch.
---
 gnu/local.mk                                  |   1 +
 gnu/packages/bioinformatics.scm               | 195 ++++++++++++------
 .../python-bed-reader-use-store-samples.patch | 147 +++++++++++++
 3 files changed, 284 insertions(+), 59 deletions(-)
 create mode 100644 gnu/packages/patches/python-bed-reader-use-store-samples.patch
  

Patch

diff --git a/gnu/local.mk b/gnu/local.mk
index 1bf8f8deba..6c2466114d 100644
--- a/gnu/local.mk
+++ b/gnu/local.mk
@@ -2008,6 +2008,7 @@  dist_patch_DATA =						\
   %D%/packages/patches/python-3.12-fix-tests.patch		\
   %D%/packages/patches/python-accupy-use-matplotx.patch		\
   %D%/packages/patches/python-accupy-fix-use-of-perfplot.patch	\
+  %D%/packages/patches/python-bed-reader-use-store-samples.patch	\
   %D%/packages/patches/python-chai-drop-python2.patch		\
   %D%/packages/patches/python-clarabel-blas.patch		\
   %D%/packages/patches/python-docrepr-fix-tests.patch		\
diff --git a/gnu/packages/bioinformatics.scm b/gnu/packages/bioinformatics.scm
index 4dbedc09f9..c5fe295f6c 100644
--- a/gnu/packages/bioinformatics.scm
+++ b/gnu/packages/bioinformatics.scm
@@ -2167,6 +2167,20 @@  (define-public python-bcbio-gff/biopython-1.73
       (modify-inputs (package-propagated-inputs python-bcbio-gff)
         (replace "python-biopython" python-biopython-1.73))))))
 
+(define bed-sample-files
+  (let* ((name "bed-sample-files")
+         (commit "a06dc0450e484090f15656ffd5d317813a5e1e01")
+         (revision "0")
+         (version (git-version "0.0.0" revision commit)))
+    (origin
+      (method git-fetch)
+      (uri (git-reference
+            (url "https://github.com/fastlmm/bed-sample-files")
+            (commit commit)))
+      (file-name (git-file-name name version))
+      (sha256
+       (base32 "1ldr2lvgbcykxa9i2s2298mhfh0sz96aaxs5dx217aipa9vsrjwk")))))
+
 (define-public python-bed-reader
   (package
     (name "python-bed-reader")
@@ -2176,80 +2190,124 @@  (define-public python-bed-reader
        (method url-fetch)
        (uri (pypi-uri "bed_reader" version))
        (sha256
-        (base32 "1c8ibwvz3b069w7ffh9aasz16lfkmx4z0249c2v909a21mrkkd6n"))))
+        (base32 "1c8ibwvz3b069w7ffh9aasz16lfkmx4z0249c2v909a21mrkkd6n"))
+       (modules '((guix build utils)))
+       ;; Bundled unused javascript & co.
+       (snippet #~(delete-file-recursively "_static"))
+       (patches
+        (search-patches "python-bed-reader-use-store-samples.patch"))))
     (build-system cargo-build-system)
     (arguments
      (list
-      ;; Many of the tests (both the Rust tests and the Python tests) require
-      ;; Internet access to fetch samples.
-      #:tests? #false
       #:install-source? #false
       #:features '(list "extension-module")
-      #:cargo-test-flags '(list "--features=extension-module")
+      #:cargo-test-flags
+      '(list "--features=extension-module"
+             ;; Skip doc tests.
+             "--lib" "--bins" "--tests" "--"
+             ;; This test is the only one not matched by our regexp.
+             "--skip=http_one"
+             ;; These test require a 84 GB file.
+             "--skip=http_two"
+             "--skip=http_cloud_urls_md_3")
       #:cargo-inputs
-      `(("rust-anyinput" ,rust-anyinput-0.1)
-        ("rust-bytecount" ,rust-bytecount-0.6)
-        ("rust-byteorder" ,rust-byteorder-1)
-        ("rust-bytes" ,rust-bytes-1)
-        ("rust-cloud-file" ,rust-cloud-file-0.2)
-        ("rust-derive-builder" ,rust-derive-builder-0.20)
-        ("rust-dpc-pariter" ,rust-dpc-pariter-0.4)
-        ("rust-fetch-data" ,rust-fetch-data-0.2)
-        ("rust-futures-util" ,rust-futures-util-0.3)
-        ("rust-itertools" ,rust-itertools-0.13)
-        ("rust-ndarray" ,rust-ndarray-0.16)
-        ("rust-ndarray-npy" ,rust-ndarray-npy-0.9)
-        ("rust-num-traits" ,rust-num-traits-0.2)
-        ("rust-numpy" ,rust-numpy-0.22)
-        ("rust-pyo3" ,rust-pyo3-0.22)
-        ("rust-pyo3-build-config" ,rust-pyo3-build-config-0.22)
-        ("rust-rayon" ,rust-rayon-1)
-        ("rust-statrs" ,rust-statrs-0.17)
-        ("rust-thiserror" ,rust-thiserror-1)
-        ("rust-tokio" ,rust-tokio-1))
+      (list rust-anyinput-0.1
+            rust-bytecount-0.6
+            rust-byteorder-1
+            rust-bytes-1
+            rust-cloud-file-0.2
+            rust-derive-builder-0.20
+            rust-dpc-pariter-0.4
+            rust-fetch-data-0.2
+            rust-futures-util-0.3
+            rust-itertools-0.13
+            rust-ndarray-0.16
+            rust-ndarray-npy-0.9
+            rust-num-traits-0.2
+            rust-numpy-0.22
+            rust-pyo3-0.22
+            rust-pyo3-build-config-0.22
+            rust-rayon-1
+            rust-statrs-0.17
+            rust-thiserror-1
+            rust-tokio-1)
       #:cargo-development-inputs
-      `(("rust-anyhow" ,rust-anyhow-1)
-        ("rust-ndarray-rand" ,rust-ndarray-rand-0.15)
-        ("rust-rusoto-credential" ,rust-rusoto-credential-0.48)
-        ("rust-temp-testdir" ,rust-temp-testdir-0.2)
-        ("rust-thousands" ,rust-thousands-0.2))
+      (list rust-anyhow-1
+            rust-ndarray-rand-0.15
+            rust-rusoto-credential-0.48
+            rust-temp-testdir-0.2
+            rust-thousands-0.2)
       #:imported-modules
       (append %cargo-build-system-modules
               %pyproject-build-system-modules)
       #:modules
       '((guix build cargo-build-system)
         ((guix build pyproject-build-system) #:prefix py:)
-        (guix build utils))
+        (guix build utils)
+        (ice-9 match)
+        (ice-9 rdelim))
       #:phases
       #~(modify-phases %standard-phases
+          (add-after 'configure 'set-data-path
+            (lambda _
+              ;; This var is still necessary despite the patch-data-path phase.
+              ;; Otherwise more tests fail with a read-only filesystem error.
+              (setenv "BED_READER_DATA_DIR" #+bed-sample-files)))
+          (add-after 'unpack 'patch-data-path
+            (lambda _
+              ;; If BED_READER_DATA_DIR is unset, default to bed-sample-files.
+              (substitute* "bed_reader/_sample_data.py"
+                (("os\\.environ\\.get\\(\"BED_READER_DATA_DIR\"" all)
+                 (format #f "~a, ~s" all #+bed-sample-files)))
+              ;; XXX: More work is necessary to use another
+              ;; version of sample files with BED_READER_DATA_DIR
+              ;; Currently, only the hardcoded Guix version is working.
+              (substitute* '("bed_reader/tests/test_open_bed_cloud.py"
+                             "src/bed_cloud.rs"
+                             "src/lib.rs"
+                             "src/supplemental_documents/cloud_urls_etc.md"
+                             "tests/tests_api_cloud.rs")
+                (("\
+https://raw\\.githubusercontent\\.com/fastlmm/bed-sample-files/main")
+                 (string-append "file://" #+bed-sample-files)))
+              (substitute* "src/tests.rs"
+                (("bed_reader/tests/data")
+                 #+bed-sample-files))))
           (add-after 'install 'prepare-python-module
             (lambda _
-              ;; We don't use maturin.
-              (delete-file "pyproject.toml")
-              (call-with-output-file "pyproject.toml"
-                (lambda (port)
-                  (format port "\
+              ;; We don't use maturin. Conveniently, what we want to drop
+              ;; from pyproject.toml is at the end of the file.
+              (rename-file "pyproject.toml" "pyproject.toml.bak")
+              (call-with-input-file "pyproject.toml.bak"
+                (lambda (in)
+                  (call-with-output-file "pyproject.toml"
+                    (lambda (out)
+                      (let loop ()
+                        (match (read-line in)
+                          ((? eof-object? eof)
+                           eof)
+                          ("[build-system]"
+                           (and (format out "\
 [build-system]
 build-backend = 'setuptools.build_meta'
 requires = ['setuptools']
+
+[tool.setuptools.packages.find]
+where = [\".\"]
+exclude = [\"src\", \"docs\", \"tests\", \"Cargo.toml\"]
 ")))
-              (call-with-output-file "setup.cfg"
-                (lambda (port)
-                  (format port "\
-[metadata]
-name = bed-reader
-version = ~a
-
-[options]
-packages = find:
-
-[options.packages.find]
-exclude =
-  src
-  docs
-  tests
-  Cargo.toml
-" #$version)))))
+                          ("samples = [\"pooch>=1.5.0\"]"
+                           (and (format out "samples = []~%")
+                                (loop)))
+                          ("[project]"
+                           (and (format out "\
+[project]
+version = ~s
+" #$version)
+                                (loop)))
+                          (line
+                           (and (format out "~a~%" line)
+                                (loop)))))))))))
           (add-after 'prepare-python-module 'enable-bytecode-determinism
             (assoc-ref py:%standard-phases 'enable-bytecode-determinism))
           (add-after 'enable-bytecode-determinism 'build-python-module
@@ -2261,16 +2319,35 @@  (define-public python-bed-reader
               (let ((site (string-append #$output "/lib/python"
                                          #$(version-major+minor
                                             (package-version python))
-                                         "/site-packages")))
-                (mkdir-p site)
+                                         "/site-packages/")))
+                (mkdir-p (string-append site "bed_reader"))
                 (copy-file "target/release/libbed_reader.so"
-                           (string-append site "/bed_reader/bed_reader.so")))))
+                           (string-append site "bed_reader/bed_reader.so")))))
           (add-after 'install-python-library 'add-install-to-pythonpath
             (assoc-ref py:%standard-phases 'add-install-to-pythonpath))
           (add-after 'add-install-to-pythonpath 'check-python
-            (lambda* (#:key tests? test-flags #:allow-other-keys)
+            (lambda* (#:key tests? #:allow-other-keys)
               (when tests?
-                (apply invoke "pytest" "-v" #$output test-flags)))))))
+                (let ((site (string-append #$output "/lib/python"
+                                           #$(version-major+minor
+                                              (package-version python))
+                                           "/site-packages/"))
+                      (data-dir "bed_reader/tests/data"))
+                  (symlink (canonicalize-path data-dir)
+                           (string-append site data-dir))
+                  (invoke "pytest" "-v" #$output
+                          ;; These test require a 84 GB file.
+                          "-k" (string-join
+                                (list "not test_http_two"
+                                      "test_http_cloud_urls_rst_3"
+                                      "test_http_cloud_urls_rst_4"
+                                      ;; XXX: python-pooch dependency removed
+                                      "test_optional_dependencies")
+                                " and not "))
+                  (delete-file-recursively
+                   (string-append site "bed_reader/tests"))
+                  (delete-file-recursively
+                   (string-append #$output "/.pytest_cache")))))))))
     (native-inputs (list python-pytest
                          python-pytest-cov
                          python-pytest-datadir
@@ -2278,7 +2355,7 @@  (define-public python-bed-reader
                          python-recommonmark
                          python-sphinx))
     (inputs (list python-wrapper))
-    (propagated-inputs (list python-numpy python-pandas python-pooch))
+    (propagated-inputs (list python-numpy python-pandas python-scipy))
     (home-page "https://fastlmm.github.io/")
     (synopsis "Read and write the PLINK BED format, simply and efficiently")
     (description
diff --git a/gnu/packages/patches/python-bed-reader-use-store-samples.patch b/gnu/packages/patches/python-bed-reader-use-store-samples.patch
new file mode 100644
index 0000000000..813f155225
--- /dev/null
+++ b/gnu/packages/patches/python-bed-reader-use-store-samples.patch
@@ -0,0 +1,147 @@ 
+From 7e6bcdfeed54500ca533d2f0eb12078248c43c77 Mon Sep 17 00:00:00 2001
+Message-ID: <7e6bcdfeed54500ca533d2f0eb12078248c43c77.1743682382.git.ngraves@ngraves.fr>
+From: Nicolas Graves <ngraves@ngraves.fr>
+Date: Thu, 3 Apr 2025 11:33:58 +0200
+Subject: [PATCH] samples: Use deterministic samples in Guix.
+
+---
+ bed_reader/_sample_data.py | 86 +++++++++-----------------------------
+ 1 file changed, 19 insertions(+), 67 deletions(-)
+
+diff --git a/bed_reader/_sample_data.py b/bed_reader/_sample_data.py
+index 6ca4cc0..6a1146e 100644
+--- a/bed_reader/_sample_data.py
++++ b/bed_reader/_sample_data.py
+@@ -1,33 +1,8 @@
++import os
+ import tempfile
+ from pathlib import Path, PurePath
+ from typing import Union
+ 
+-try:
+-    import pooch
+-
+-    """
+-    Load sample data.
+-    """
+-
+-    POOCH = pooch.create(
+-        # Use the default cache folder for the OS
+-        path=pooch.os_cache("bed_reader"),
+-        # The remote data is on Github
+-        base_url="https://raw.githubusercontent.com/"
+-        + "fastlmm/bed-sample-files/main/",
+-        # If this is a development version, get the data from the master branch
+-        version_dev="main",
+-        # The registry specifies the files that can be fetched
+-        env="BED_READER_DATA_DIR",
+-    )
+-
+-    # Get registry file from package_data
+-    registry_file = Path(__file__).parent / "tests/registry.txt"
+-    # Load this registry file
+-    POOCH.load_registry(registry_file)
+-except ImportError:
+-    pooch = None
+-
+ 
+ def sample_file(filepath: Union[str, Path]) -> str:
+     """Retrieve a sample .bed file. (Also retrieves associated .fam and .bim files).
+@@ -40,50 +15,41 @@ def sample_file(filepath: Union[str, Path]) -> str:
+     Returns
+     -------
+     str
+-        Local name of sample .bed file.
+-
+-
+-    .. note::
+-        This function requires the :mod:`pooch` package. Install `pooch` with:
+-
+-        .. code-block:: bash
+-
+-            pip install --upgrade bed-reader[samples]
+-
+-
+-    By default this function puts files under the user's cache directory.
+-    Override this by setting
+-    the `BED_READER_DATA_DIR` environment variable.
++        Local path of sample .bed file.
+ 
+     Example
+     --------
+ 
+     .. doctest::
+ 
+-        >>> # pip install bed-reader[samples]  # if needed
+         >>> from bed_reader import sample_file
+         >>>
+         >>> file_name = sample_file("small.bed")
+         >>> print(f"The local file name is '{file_name}'")
+         The local file name is '...small.bed'
+-
+     """
+-    if pooch is None:
+-        raise ImportError(
+-            "The function sample_file() requires pooch. "
+-            + "Install it with 'pip install --upgrade bed-reader[samples]'.",
++    filepath = Path(filepath)
++    sample_dir = os.environ.get("BED_READER_DATA_DIR")
++    if sample_dir is None:
++        raise EnvironmentError(
++            "BED_READER_DATA_DIR environment variable is not set. "
++            "This should point to the directory containing the sample files."
+         )
+ 
+-    filepath = Path(filepath)
+-    file_string = str(filepath)
+-    if file_string.lower().endswith(".bed"):
+-        POOCH.fetch(file_string[:-4] + ".fam")
+-        POOCH.fetch(file_string[:-4] + ".bim")
+-    return POOCH.fetch(file_string)
++    file_path = Path(sample_dir) / filepath
++
++    # Check if file exists
++    if not file_path.exists():
++        raise FileNotFoundError(
++            f"Sample file '{filepath}' not found in {sample_dir}. "
++            f"Make sure you're using the latest samples in BED_READER_DATA_DIR."
++        )
++
++    return str(file_path)
+ 
+ 
+ def sample_url(filepath: Union[str, Path]) -> str:
+-    """Retrieve a URL to a sample .bed file. (Also makes ready associated .fam and .bim files).
++    """Retrieve a URL to a sample .bed file.
+ 
+     Parameters
+     ----------
+@@ -95,25 +61,11 @@ def sample_url(filepath: Union[str, Path]) -> str:
+     str
+         URL to sample .bed file.
+ 
+-
+-    .. note::
+-        This function requires the :mod:`pooch` package. Install `pooch` with:
+-
+-        .. code-block:: bash
+-
+-            pip install --upgrade bed-reader[samples]
+-
+-
+-    By default this function puts files under the user's cache directory.
+-    Override this by setting
+-    the `BED_READER_DATA_DIR` environment variable.
+-
+     Example
+     --------
+ 
+     .. doctest::
+ 
+-        >>> # pip install bed-reader[samples]  # if needed
+         >>> from bed_reader import sample_url
+         >>>
+         >>> url = sample_url("small.bed")
+-- 
+2.49.0
+