diff mbox series

[bug#46946,1/2] gnu: Add python-bioframe.

Message ID 20210305141930.28787-1-madalinionel.patrascu@mdc-berlin.de
State New
Headers show
Series [bug#46946,1/2] gnu: Add python-bioframe. | expand

Checks

Context Check Description
cbaines/submitting builds success
cbaines/comparison success View comparision
cbaines/git branch success View Git branch
cbaines/applying patch success View Laminar job
cbaines/issue success View issue

Commit Message

Mădălin Ionel Patrașcu March 5, 2021, 2:19 p.m. UTC
* gnu/packages/bioinformatics.scm (python-bioframe): New variable.
---
 gnu/packages/bioinformatics.scm | 46 +++++++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)

Comments

Ricardo Wurmus April 1, 2021, 3:38 p.m. UTC | #1
Hi Mădălin,

> * gnu/packages/bioinformatics.scm (python-bioframe): New variable.

Thanks for the patch.

Despite all the test inputs the test suite is not actually run:

--8<---------------cut here---------------start------------->8---
starting phase `check'
running "python setup.py" with command "test" and parameters ()
running test
running egg_info
writing bioframe.egg-info/PKG-INFO
writing dependency_links to bioframe.egg-info/dependency_links.txt
writing requirements to bioframe.egg-info/requires.txt
writing top-level names to bioframe.egg-info/top_level.txt
reading manifest file 'bioframe.egg-info/SOURCES.txt'
reading manifest template 'MANIFEST.in'
warning: no previously-included files matching '__pycache__/*' found anywhere in distribution
warning: no previously-included files matching '*.so' found anywhere in distribution
warning: no previously-included files matching '*.pyd' found anywhere in distribution
warning: no previously-included files matching '*.pyc' found anywhere in distribution
warning: no previously-included files matching '.git*' found anywhere in distribution
warning: no previously-included files matching '.deps/*' found anywhere in distribution
warning: no previously-included files matching '.DS_Store' found anywhere in distribution
writing manifest file 'bioframe.egg-info/SOURCES.txt'
running build_ext
/tmp/guix-build-python-bioframe-0.2.0.drv-0/source/bioframe/io/dask.py:47: NumbaWarning: 
Compilation is falling back to object mode WITH looplifting enabled because Function "reg2bins" failed type inference due to: No conversion from list(int64)<iv=None> to int32 for '$262return_value.1', defined at None

File "bioframe/io/dask.py", line 79:
def reg2bins(rbeg, rend):
    <source elided>

    return lst
    ^

During: typing of assignment at /tmp/guix-build-python-bioframe-0.2.0.drv-0/source/bioframe/io/dask.py (79)

File "bioframe/io/dask.py", line 79:
def reg2bins(rbeg, rend):
    <source elided>

    return lst
    ^

  @numba.jit("int32(int32, int32)")
/tmp/guix-build-python-bioframe-0.2.0.drv-0/source/bioframe/io/dask.py:47: NumbaWarning: 
Compilation is falling back to object mode WITHOUT looplifting enabled because Function "reg2bins" failed type inference due to: cannot determine Numba type of <class 'numba.core.dispatcher.LiftedLoop'>

File "bioframe/io/dask.py", line 59:
def reg2bins(rbeg, rend):
    <source elided>

    k = 9 + (rbeg >> 23)
    ^

  @numba.jit("int32(int32, int32)")
/gnu/store/sm6k3ar46q11w77s7cc2q1hv33q8p3v0-python-numba-0.51.2/lib/python3.8/site-packages/numba/core/object_mode_passes.py:177: NumbaWarning: Function "reg2bins" was compiled in object mode without forceobj=True, but has lifted loops.

File "bioframe/io/dask.py", line 50:
def reg2bins(rbeg, rend):
    <source elided>

    MAX_BIN = ((1 << 18) - 1) // 7
    ^

  warnings.warn(errors.NumbaWarning(warn_msg,
/gnu/store/sm6k3ar46q11w77s7cc2q1hv33q8p3v0-python-numba-0.51.2/lib/python3.8/site-packages/numba/core/object_mode_passes.py:187: NumbaDeprecationWarning: 
Fall-back from the nopython compilation path to the object mode compilation path has been detected, this is deprecated behaviour.

For more information visit https://numba.pydata.org/numba-doc/latest/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit

File "bioframe/io/dask.py", line 50:
def reg2bins(rbeg, rend):
    <source elided>

    MAX_BIN = ((1 << 18) - 1) // 7
    ^

  warnings.warn(errors.NumbaDeprecationWarning(msg,

----------------------------------------------------------------------
Ran 0 tests in 0.000s

OK
phase `check' succeeded after 3.1 seconds
--8<---------------cut here---------------end--------------->8---


I made a couple of cosmestic changes:

--8<---------------cut here---------------start------------->8---
(define-public python-bioframe
  (package
    (name "python-bioframe")
    (version "0.2.0")
    ;; Sources on pypi do not contain requirements.txt
    (source
     (origin
       (method git-fetch)
       (uri (git-reference
             (url "https://github.com/open2c/bioframe")
             (commit (string-append "v" version))))
       (file-name (git-file-name name version))
       (sha256
        (base32
         "0w5xrb93cf3hx3d1lw48a0l1ranghpj260b05b1rpk58wwwcsqfh"))))
    (build-system python-build-system)
    (propagated-inputs
     `(("python-click" ,python-click)
       ("python-numpy" ,python-numpy)
       ("python-pandas" ,python-pandas)
       ("python-requests" ,python-requests)))
    (native-inputs
     `(("python-dask" ,python-dask)             ; for tests
       ("python-matplotlib" ,python-matplotlib) ; for tests
       ("python-numba" ,python-numba)           ; for tests
       ("python-pypairix" ,python-pypairix)     ; for tests
       ("python-pytest" ,python-pytest)
       ("python-wheel" ,python-wheel)))
    (home-page "https://github.com/open2c/bioframe")
    (synopsis "Framework for genomic data analysis using Pandas dataframes")
    (description
     "This package is a library to enable flexible and scalable operations on
genomic interval dataframes in Python.  @code{python-bioframe} enables access;
to a rich set of dataframe operations.  Working in Python enables rapid
visualization (e.g. matplotlib, seaborn) and iteration of genomic analyses.")
    (license license:expat)))
--8<---------------cut here---------------end--------------->8---


Please note that the lack of requirements.txt on pypi shouldn’t be an
obstacle to use the sources from pypi.  It should build just as well.
diff mbox series

Patch

diff --git a/gnu/packages/bioinformatics.scm b/gnu/packages/bioinformatics.scm
index d617736e57..fb357b8b6c 100644
--- a/gnu/packages/bioinformatics.scm
+++ b/gnu/packages/bioinformatics.scm
@@ -120,6 +120,7 @@ 
   #:use-module (gnu packages popt)
   #:use-module (gnu packages protobuf)
   #:use-module (gnu packages python)
+  #:use-module (gnu packages python-build)
   #:use-module (gnu packages python-check)
   #:use-module (gnu packages python-compression)
   #:use-module (gnu packages python-science)
@@ -923,6 +924,51 @@  Python.")
        `(("python2-pathlib" ,python2-pathlib)
          ,@(package-native-inputs pybedtools))))))
 
+(define-public python-bioframe
+  (package
+    (name "python-bioframe")
+    (version "0.2.0")
+    ;;pypi sources does not contain requirements.txt
+    (source
+     (origin
+       (method git-fetch)
+       (uri (git-reference
+             (url "https://github.com/open2c/bioframe")
+             (commit (string-append "v" version))))
+       (file-name (git-file-name name version))
+       (sha256
+        (base32
+         "0w5xrb93cf3hx3d1lw48a0l1ranghpj260b05b1rpk58wwwcsqfh"))))
+    (build-system python-build-system)
+    (propagated-inputs
+     `(("python-click" ,python-click)
+       ("python-numpy" ,python-numpy)
+       ("python-pandas" ,python-pandas)
+       ("python-requests" ,python-requests)))
+    (native-inputs
+     `(("python-dask" ,python-dask)             ;;for unittest.loader
+       ("python-matplotlib" ,python-matplotlib) ;;for unittest.loader
+       ("python-numba" ,python-numba)           ;;for unittest.loader
+       ("python-pypairix" ,python-pypairix)     ;;for unittest.loader
+       ("python-pytest" ,python-pytest)
+       ("python-wheel" ,python-wheel)))
+    (home-page "https://github.com/open2c/bioframe")
+    (synopsis
+     "Framework for genomic data analysis using Pandas dataframes")
+    (description
+     "This package is a library to enable flexible and scalable operations on
+genomic interval dataframes in python.  @code{python-bioframe} enables access
+to a rich set of dataframe operations.  Working in python enables rapid
+visualization (e.g. matplotlib, seaborn) and iteration of genomic analyses.
+The philosophy underlying @code{python-bioframe} is to enable flexible
+operations: instead of creating a function for every possible use-case, we
+instead encourage users to compose functions to achieve their goals.  As a
+rough rule of thumb, if a function requires three steps and is crucial for
+genomic interval arithmetic we have included it; conversely if it can be
+performed in a single line by composing two of the core functions, we have not
+included it.")
+    (license license:expat)))
+
 (define-public python-biom-format
   (package
     (name "python-biom-format")