diff mbox series

[bug#68721,2/2] gnu: Add python-fastparquet.

Message ID 87il3hgjt9.fsf@troyfigiel.com
State New
Headers show
Series gnu: Add python-fastparquet. | expand

Commit Message

Troy Figiel Jan. 25, 2024, 8:45 p.m. UTC
* gnu/packages/databases.scm (python-fastparquet): New variable.
---
 gnu/packages/databases.scm | 67 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 67 insertions(+)
diff mbox series

Patch

diff --git a/gnu/packages/databases.scm b/gnu/packages/databases.scm
index b56767d311..66f7f7d951 100644
--- a/gnu/packages/databases.scm
+++ b/gnu/packages/databases.scm
@@ -62,6 +62,7 @@ 
 ;;; Copyright © 2023 Felix Gruber <felgru@posteo.ne
 ;;; Copyright © 2023 Munyoki Kilyungi <me@bonfacemunyoki.com>
 ;;; Copyright © 2023 Giacomo Leidi <goodoldpaul@autistici.org>
+;;; Copyright © 2024 Troy Figiel <troy@troyfigiel.com>
 ;;;
 ;;; This file is part of GNU Guix.
 ;;;
@@ -142,6 +143,7 @@  (define-module (gnu packages databases)
   #:use-module (gnu packages python)
   #:use-module (gnu packages python-build)
   #:use-module (gnu packages python-check)
+  #:use-module (gnu packages python-compression)
   #:use-module (gnu packages python-crypto)
   #:use-module (gnu packages python-science)
   #:use-module (gnu packages python-web)
@@ -4896,6 +4898,71 @@  (define-public python-pyarrow-0.16
 other traditional Python scientific computing packages.")
     (license license:asl2.0)))
 
+(define-public python-fastparquet
+  (package
+    (name "python-fastparquet")
+    (version "2023.10.1")
+    (source
+     (origin
+       ;; Fastparquet uses setuptools-scm to find the current version. This
+       ;; only works when we use the PyPI tarball, which does not contain
+       ;; tests. Instead, we use the git-fetch method and add the version back
+       ;; ourselves.
+       (method git-fetch)
+       (uri (git-reference
+             (url "https://github.com/dask/fastparquet")
+             (commit version)))
+       (file-name (git-file-name name version))
+       (sha256
+        (base32 "0msc2n4sjbq3h5pq6l94rfx27v0aqrk5cxbpg3yssr74gwx26h4r"))
+       (modules '((guix build utils)))
+       (snippet '(substitute* "setup.py"
+                   ;; Remove dependencies on git and setuptools-scm.
+                   (("^.*\"git\", \"status\".*$")
+                    "")
+                   (("'setuptools-scm>1.5.4',")
+                    "")
+                   ;; Guix is only compatible with a single version of numpy
+                   ;; at a time. We can safely remove this dependency.
+                   (("'oldest-supported-numpy'")
+                    "")))))
+    (build-system pyproject-build-system)
+    (arguments
+     (list
+      #:phases #~(modify-phases %standard-phases
+                   ;; Make sure to add back the missing version information
+                   ;; the build phase.
+                   (add-after 'unpack 'set-version
+                     (lambda _
+                       (call-with-output-file "fastparquet/_version.py"
+                         (lambda (port)
+                           (format port "__version__ = ~a"
+                                   (string-append "\""
+                                                  #$version "\""))))))
+                   ;; Cython extensions need to be built for the check phase.
+                   (add-before 'check 'build-cython-extensions
+                     (lambda _
+                       (invoke "python" "setup.py" "build_ext" "--inplace"))))))
+    (propagated-inputs (list python-cramjam python-fsspec python-numpy
+                             python-packaging python-pandas))
+    (native-inputs (list python-cython python-pytest-runner))
+    (home-page "https://github.com/dask/fastparquet")
+    (synopsis "Python implementation of the Parquet file format")
+    (description
+     "@code{fastparquet} is a Python implementation of the Parquet file
+format.  @code{fastparquet} is used implicitly by @code{dask}, @code{pandas}
+and @code{intake-parquet}.  It supports the following compression algorithms:
+
+@itemize
+@item Gzip
+@item Snappy
+@item Brotli
+@item LZ4
+@item Zstd
+@item LZO (optionally)
+@end itemize")
+    (license license:asl2.0)))
+
 (define-public python-crate
   (package
     (name "python-crate")