diff mbox series

[bug#68721,2/2] gnu: Add python-fastparquet.

Message ID 87y1b0pit1.fsf@troyfigiel.com
State New
Headers show
Series None | expand

Commit Message

Troy Figiel March 2, 2024, 6:40 p.m. UTC
* gnu/packages/databases.scm (python-fastparquet): New variable.
---
 gnu/packages/databases.scm | 68 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 68 insertions(+)
diff mbox series

Patch

diff --git a/gnu/packages/databases.scm b/gnu/packages/databases.scm
index db4fe0b447..5ec0234ef1 100644
--- a/gnu/packages/databases.scm
+++ b/gnu/packages/databases.scm
@@ -62,6 +62,7 @@ 
 ;;; Copyright © 2023 Felix Gruber <felgru@posteo.ne
 ;;; Copyright © 2023 Munyoki Kilyungi <me@bonfacemunyoki.com>
 ;;; Copyright © 2023 Giacomo Leidi <goodoldpaul@autistici.org>
+;;; Copyright © 2024 Troy Figiel <troy@troyfigiel.com>
 ;;;
 ;;; This file is part of GNU Guix.
 ;;;
@@ -143,6 +144,7 @@  (define-module (gnu packages databases)
   #:use-module (gnu packages python)
   #:use-module (gnu packages python-build)
   #:use-module (gnu packages python-check)
+  #:use-module (gnu packages python-compression)
   #:use-module (gnu packages python-crypto)
   #:use-module (gnu packages python-science)
   #:use-module (gnu packages python-web)
@@ -4984,6 +4986,72 @@  (define-public python-pyarrow-0.16
 other traditional Python scientific computing packages.")
     (license license:asl2.0)))
 
+(define-public python-fastparquet
+  (package
+    (name "python-fastparquet")
+    (version "2024.2.0")
+    (source
+     (origin
+       ;; Fastparquet uses setuptools-scm to find the current version. This
+       ;; only works when we use the PyPI tarball, which does not contain
+       ;; tests. Instead, we use the git-fetch method and add the version back
+       ;; ourselves.
+       (method git-fetch)
+       (uri (git-reference
+             (url "https://github.com/dask/fastparquet")
+             (commit version)))
+       (file-name (git-file-name name version))
+       (sha256
+        (base32 "0f32dj1xvd11l0siznqd33dpjlhg9siylcjcfkcdlqfcy45jfj3v"))))
+    (build-system pyproject-build-system)
+    (arguments
+     (list
+      #:phases #~(modify-phases %standard-phases
+                   ;; Make sure to add back the missing version information
+                   ;; the build phase.
+                   (add-after 'unpack 'set-version
+                     (lambda _
+                       (call-with-output-file "fastparquet/_version.py"
+                         (lambda (port)
+                           (format port "__version__ = ~a"
+                                   (string-append "\""
+                                                  #$version "\""))))))
+                   (add-after 'unpack 'relax-dependencies
+                     (lambda _
+                       (substitute* "setup.py"
+                         ;; Remove dependencies on git and setuptools-scm.
+                         (("^.*\"git\", \"status\".*$")
+                          "")
+                         (("'setuptools-scm>1.5.4',")
+                          "")
+                         ;; Guix is only compatible with a single version of numpy
+                         ;; at a time. We can safely remove this dependency.
+                         (("'oldest-supported-numpy'")
+                          ""))))
+                   ;; Cython extensions need to be built for the check phase.
+                   (add-before 'check 'build-cython-extensions
+                     (lambda _
+                       (invoke "python" "setup.py" "build_ext" "--inplace"))))))
+    (propagated-inputs (list python-cramjam python-fsspec python-numpy
+                             python-packaging python-pandas))
+    (native-inputs (list python-cython python-pytest-runner))
+    (home-page "https://github.com/dask/fastparquet")
+    (synopsis "Python implementation of the Parquet file format")
+    (description
+     "@code{fastparquet} is a Python implementation of the Parquet file
+format.  @code{fastparquet} is used implicitly by @code{dask}, @code{pandas}
+and @code{intake-parquet}.  It supports the following compression algorithms:
+
+@itemize
+@item Gzip
+@item Snappy
+@item Brotli
+@item LZ4
+@item Zstd
+@item LZO (optionally)
+@end itemize")
+    (license license:asl2.0)))
+
 (define-public python-crate
   (package
     (name "python-crate")