@@ -20,6 +20,7 @@
;;; Copyright © 2022 Paul A. Patience <paul@apatience.com>
;;; Copyright © 2022 Wiktor Żelazny <wzelazny@vurv.cz>
;;; Copyright © 2022 Eric Bavier <bavier@posteo.net>
+;;; Copyright © 2022 Kiran Shila <me@kiranshila.com>
;;;
;;; This file is part of GNU Guix.
;;;
@@ -1478,3 +1479,43 @@ (define-public python-opt-einsum
well as potentially any library which conforms to a standard API. See the
documentation for more information.")
(license license:expat)))
+
+(define-public python-hdbscan
+ (package
+ (name "python-hdbscan")
+ (version "0.8.28")
+ (source (origin
+ (method url-fetch)
+ (uri (pypi-uri "hdbscan" version))
+ (sha256
+ (base32
+ "0nn0xm9ji74pkil1lbkrskmyak25jx4av8cdqg2dzgiv5wgzgnpf"))))
+ (build-system python-build-system)
+ (arguments
+ `(#:modules ((ice-9 ftw)
+ (srfi srfi-1)
+ (guix build utils)
+ (guix build python-build-system))
+ #:phases (modify-phases
+ %standard-phases
+ (replace 'check
+ (lambda* (#:key tests? #:allow-other-keys)
+ (when tests?
+ (let ((outdir (string-append
+ (getcwd) "/build/"
+ (car (filter (lambda (s) (string-contains s "lib"))
+ (scandir "build"))))))
+ (invoke "pytest" "-vv" outdir))))))))
+ (propagated-inputs (list python-cython python-joblib python-numpy
+ python-scikit-learn python-scipy))
+ (native-inputs (list python-nose python-pytest python-pandas python-networkx))
+ (home-page "http://github.com/scikit-learn-contrib/hdbscan")
+ (synopsis "Clustering based on density with variable density clusters")
+ (description "HDBSCAN - Hierarchical Density-Based Spatial Clustering of
+Applications with Noise. Performs DBSCAN over varying epsilon values and
+integrates the result to find a clustering that gives the best stability over
+epsilon. This allows HDBSCAN to find clusters of varying densities (unlike
+DBSCAN), and be more robust to parameter selection. HDBSCAN is ideal for
+exploratory data analysis; it's a fast and robust algorithm that you can trust
+to return meaningful clusters (if there are any).")
+ (license license:bsd-3)))