diff mbox series

[bug#46946,1/2] gnu: Add python-bioframe.

Message ID 20210305141930.28787-1-madalinionel.patrascu@mdc-berlin.de
State New
Headers show
Series [bug#46946,1/2] gnu: Add python-bioframe. | expand

Checks

Context Check Description
cbaines/submitting builds success
cbaines/comparison success View comparision
cbaines/git branch success View Git branch
cbaines/applying patch success View Laminar job
cbaines/issue success View issue

Commit Message

Mădălin Ionel Patrașcu March 5, 2021, 2:19 p.m. UTC
* gnu/packages/bioinformatics.scm (python-bioframe): New variable.
---
 gnu/packages/bioinformatics.scm | 46 +++++++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)

Comments

Ricardo Wurmus April 1, 2021, 3:38 p.m. UTC | #1
Hi Mădălin,

> * gnu/packages/bioinformatics.scm (python-bioframe): New variable.

Thanks for the patch.

Despite all the test inputs the test suite is not actually run:

--8<---------------cut here---------------start------------->8---
starting phase `check'
running "python setup.py" with command "test" and parameters ()
running test
running egg_info
writing bioframe.egg-info/PKG-INFO
writing dependency_links to bioframe.egg-info/dependency_links.txt
writing requirements to bioframe.egg-info/requires.txt
writing top-level names to bioframe.egg-info/top_level.txt
reading manifest file 'bioframe.egg-info/SOURCES.txt'
reading manifest template 'MANIFEST.in'
warning: no previously-included files matching '__pycache__/*' found anywhere in distribution
warning: no previously-included files matching '*.so' found anywhere in distribution
warning: no previously-included files matching '*.pyd' found anywhere in distribution
warning: no previously-included files matching '*.pyc' found anywhere in distribution
warning: no previously-included files matching '.git*' found anywhere in distribution
warning: no previously-included files matching '.deps/*' found anywhere in distribution
warning: no previously-included files matching '.DS_Store' found anywhere in distribution
writing manifest file 'bioframe.egg-info/SOURCES.txt'
running build_ext
/tmp/guix-build-python-bioframe-0.2.0.drv-0/source/bioframe/io/dask.py:47: NumbaWarning: 
Compilation is falling back to object mode WITH looplifting enabled because Function "reg2bins" failed type inference due to: No conversion from list(int64)<iv=None> to int32 for '$262return_value.1', defined at None

File "bioframe/io/dask.py", line 79:
def reg2bins(rbeg, rend):
    <source elided>

    return lst
    ^

During: typing of assignment at /tmp/guix-build-python-bioframe-0.2.0.drv-0/source/bioframe/io/dask.py (79)

File "bioframe/io/dask.py", line 79:
def reg2bins(rbeg, rend):
    <source elided>

    return lst
    ^

  @numba.jit("int32(int32, int32)")
/tmp/guix-build-python-bioframe-0.2.0.drv-0/source/bioframe/io/dask.py:47: NumbaWarning: 
Compilation is falling back to object mode WITHOUT looplifting enabled because Function "reg2bins" failed type inference due to: cannot determine Numba type of <class 'numba.core.dispatcher.LiftedLoop'>

File "bioframe/io/dask.py", line 59:
def reg2bins(rbeg, rend):
    <source elided>

    k = 9 + (rbeg >> 23)
    ^

  @numba.jit("int32(int32, int32)")
/gnu/store/sm6k3ar46q11w77s7cc2q1hv33q8p3v0-python-numba-0.51.2/lib/python3.8/site-packages/numba/core/object_mode_passes.py:177: NumbaWarning: Function "reg2bins" was compiled in object mode without forceobj=True, but has lifted loops.

File "bioframe/io/dask.py", line 50:
def reg2bins(rbeg, rend):
    <source elided>

    MAX_BIN = ((1 << 18) - 1) // 7
    ^

  warnings.warn(errors.NumbaWarning(warn_msg,
/gnu/store/sm6k3ar46q11w77s7cc2q1hv33q8p3v0-python-numba-0.51.2/lib/python3.8/site-packages/numba/core/object_mode_passes.py:187: NumbaDeprecationWarning: 
Fall-back from the nopython compilation path to the object mode compilation path has been detected, this is deprecated behaviour.

For more information visit https://numba.pydata.org/numba-doc/latest/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit

File "bioframe/io/dask.py", line 50:
def reg2bins(rbeg, rend):
    <source elided>

    MAX_BIN = ((1 << 18) - 1) // 7
    ^

  warnings.warn(errors.NumbaDeprecationWarning(msg,

----------------------------------------------------------------------
Ran 0 tests in 0.000s

OK
phase `check' succeeded after 3.1 seconds
--8<---------------cut here---------------end--------------->8---


I made a couple of cosmestic changes:

--8<---------------cut here---------------start------------->8---
(define-public python-bioframe
  (package
    (name "python-bioframe")
    (version "0.2.0")
    ;; Sources on pypi do not contain requirements.txt
    (source
     (origin
       (method git-fetch)
       (uri (git-reference
             (url "https://github.com/open2c/bioframe")
             (commit (string-append "v" version))))
       (file-name (git-file-name name version))
       (sha256
        (base32
         "0w5xrb93cf3hx3d1lw48a0l1ranghpj260b05b1rpk58wwwcsqfh"))))
    (build-system python-build-system)
    (propagated-inputs
     `(("python-click" ,python-click)
       ("python-numpy" ,python-numpy)
       ("python-pandas" ,python-pandas)
       ("python-requests" ,python-requests)))
    (native-inputs
     `(("python-dask" ,python-dask)             ; for tests
       ("python-matplotlib" ,python-matplotlib) ; for tests
       ("python-numba" ,python-numba)           ; for tests
       ("python-pypairix" ,python-pypairix)     ; for tests
       ("python-pytest" ,python-pytest)
       ("python-wheel" ,python-wheel)))
    (home-page "https://github.com/open2c/bioframe")
    (synopsis "Framework for genomic data analysis using Pandas dataframes")
    (description
     "This package is a library to enable flexible and scalable operations on
genomic interval dataframes in Python.  @code{python-bioframe} enables access;
to a rich set of dataframe operations.  Working in Python enables rapid
visualization (e.g. matplotlib, seaborn) and iteration of genomic analyses.")
    (license license:expat)))
--8<---------------cut here---------------end--------------->8---


Please note that the lack of requirements.txt on pypi shouldn’t be an
obstacle to use the sources from pypi.  It should build just as well.
Mădălin Ionel Patrașcu Oct. 14, 2022, 10:49 p.m. UTC | #2
Dear Ricardo,


I am very sorry that I started this and abandon it!
After a long period, I remembered about this and I found some time to finish this and amend my initial patches as you already pointed out.
The next two definitions are updated and they come with the latest releases and also I solved the issues mentioned.

All the best!

Mădălin Ionel Patrașcu

System Administrator
The Berlin Institute for Medical Systems Biology (BIMSB)
Max Delbrück Center (MDC)
Hannoversche Straße 28
House 101, room 1.89
10115 Berlin, Germany


________________________________
From: Ricardo Wurmus <rekado@elephly.net>
Sent: Thursday, April 1, 2021 17:38
To: Patrascu, Madalin Ionel
Cc: 46946@debbugs.gnu.org
Subject: [ext] Re: [PATCH 1/2] gnu: Add python-bioframe.


Hi Mădălin,

> * gnu/packages/bioinformatics.scm (python-bioframe): New variable.

Thanks for the patch.

Despite all the test inputs the test suite is not actually run:

--8<---------------cut here---------------start------------->8---
starting phase `check'
running "python setup.py" with command "test" and parameters ()
running test
running egg_info
writing bioframe.egg-info/PKG-INFO
writing dependency_links to bioframe.egg-info/dependency_links.txt
writing requirements to bioframe.egg-info/requires.txt
writing top-level names to bioframe.egg-info/top_level.txt
reading manifest file 'bioframe.egg-info/SOURCES.txt'
reading manifest template 'MANIFEST.in'
warning: no previously-included files matching '__pycache__/*' found anywhere in distribution
warning: no previously-included files matching '*.so' found anywhere in distribution
warning: no previously-included files matching '*.pyd' found anywhere in distribution
warning: no previously-included files matching '*.pyc' found anywhere in distribution
warning: no previously-included files matching '.git*' found anywhere in distribution
warning: no previously-included files matching '.deps/*' found anywhere in distribution
warning: no previously-included files matching '.DS_Store' found anywhere in distribution
writing manifest file 'bioframe.egg-info/SOURCES.txt'
running build_ext
/tmp/guix-build-python-bioframe-0.2.0.drv-0/source/bioframe/io/dask.py:47: NumbaWarning:
Compilation is falling back to object mode WITH looplifting enabled because Function "reg2bins" failed type inference due to: No conversion from list(int64)<iv=None> to int32 for '$262return_value.1', defined at None

File "bioframe/io/dask.py", line 79:
def reg2bins(rbeg, rend):
    <source elided>

    return lst
    ^

During: typing of assignment at /tmp/guix-build-python-bioframe-0.2.0.drv-0/source/bioframe/io/dask.py (79)

File "bioframe/io/dask.py", line 79:
def reg2bins(rbeg, rend):
    <source elided>

    return lst
    ^

  @numba.jit("int32(int32, int32)")
/tmp/guix-build-python-bioframe-0.2.0.drv-0/source/bioframe/io/dask.py:47: NumbaWarning:
Compilation is falling back to object mode WITHOUT looplifting enabled because Function "reg2bins" failed type inference due to: cannot determine Numba type of <class 'numba.core.dispatcher.LiftedLoop'>

File "bioframe/io/dask.py", line 59:
def reg2bins(rbeg, rend):
    <source elided>

    k = 9 + (rbeg >> 23)
    ^

  @numba.jit("int32(int32, int32)")
/gnu/store/sm6k3ar46q11w77s7cc2q1hv33q8p3v0-python-numba-0.51.2/lib/python3.8/site-packages/numba/core/object_mode_passes.py:177: NumbaWarning: Function "reg2bins" was compiled in object mode without forceobj=True, but has lifted loops.

File "bioframe/io/dask.py", line 50:
def reg2bins(rbeg, rend):
    <source elided>

    MAX_BIN = ((1 << 18) - 1) // 7
    ^

  warnings.warn(errors.NumbaWarning(warn_msg,
/gnu/store/sm6k3ar46q11w77s7cc2q1hv33q8p3v0-python-numba-0.51.2/lib/python3.8/site-packages/numba/core/object_mode_passes.py:187: NumbaDeprecationWarning:
Fall-back from the nopython compilation path to the object mode compilation path has been detected, this is deprecated behaviour.

For more information visit https://numba.pydata.org/numba-doc/latest/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit

File "bioframe/io/dask.py", line 50:
def reg2bins(rbeg, rend):
    <source elided>

    MAX_BIN = ((1 << 18) - 1) // 7
    ^

  warnings.warn(errors.NumbaDeprecationWarning(msg,

----------------------------------------------------------------------
Ran 0 tests in 0.000s

OK
phase `check' succeeded after 3.1 seconds
--8<---------------cut here---------------end--------------->8---


I made a couple of cosmestic changes:

--8<---------------cut here---------------start------------->8---
(define-public python-bioframe
  (package
    (name "python-bioframe")
    (version "0.2.0")
    ;; Sources on pypi do not contain requirements.txt
    (source
     (origin
       (method git-fetch)
       (uri (git-reference
             (url "https://github.com/open2c/bioframe")
             (commit (string-append "v" version))))
       (file-name (git-file-name name version))
       (sha256
        (base32
         "0w5xrb93cf3hx3d1lw48a0l1ranghpj260b05b1rpk58wwwcsqfh"))))
    (build-system python-build-system)
    (propagated-inputs
     `(("python-click" ,python-click)
       ("python-numpy" ,python-numpy)
       ("python-pandas" ,python-pandas)
       ("python-requests" ,python-requests)))
    (native-inputs
     `(("python-dask" ,python-dask)             ; for tests
       ("python-matplotlib" ,python-matplotlib) ; for tests
       ("python-numba" ,python-numba)           ; for tests
       ("python-pypairix" ,python-pypairix)     ; for tests
       ("python-pytest" ,python-pytest)
       ("python-wheel" ,python-wheel)))
    (home-page "https://github.com/open2c/bioframe")
    (synopsis "Framework for genomic data analysis using Pandas dataframes")
    (description
     "This package is a library to enable flexible and scalable operations on
genomic interval dataframes in Python.  @code{python-bioframe} enables access;
to a rich set of dataframe operations.  Working in Python enables rapid
visualization (e.g. matplotlib, seaborn) and iteration of genomic analyses.")
    (license license:expat)))
--8<---------------cut here---------------end--------------->8---


Please note that the lack of requirements.txt on pypi shouldn’t be an
obstacle to use the sources from pypi.  It should build just as well.

--
Ricardo
diff mbox series

Patch

diff --git a/gnu/packages/bioinformatics.scm b/gnu/packages/bioinformatics.scm
index d617736e57..fb357b8b6c 100644
--- a/gnu/packages/bioinformatics.scm
+++ b/gnu/packages/bioinformatics.scm
@@ -120,6 +120,7 @@ 
   #:use-module (gnu packages popt)
   #:use-module (gnu packages protobuf)
   #:use-module (gnu packages python)
+  #:use-module (gnu packages python-build)
   #:use-module (gnu packages python-check)
   #:use-module (gnu packages python-compression)
   #:use-module (gnu packages python-science)
@@ -923,6 +924,51 @@  Python.")
        `(("python2-pathlib" ,python2-pathlib)
          ,@(package-native-inputs pybedtools))))))
 
+(define-public python-bioframe
+  (package
+    (name "python-bioframe")
+    (version "0.2.0")
+    ;;pypi sources does not contain requirements.txt
+    (source
+     (origin
+       (method git-fetch)
+       (uri (git-reference
+             (url "https://github.com/open2c/bioframe")
+             (commit (string-append "v" version))))
+       (file-name (git-file-name name version))
+       (sha256
+        (base32
+         "0w5xrb93cf3hx3d1lw48a0l1ranghpj260b05b1rpk58wwwcsqfh"))))
+    (build-system python-build-system)
+    (propagated-inputs
+     `(("python-click" ,python-click)
+       ("python-numpy" ,python-numpy)
+       ("python-pandas" ,python-pandas)
+       ("python-requests" ,python-requests)))
+    (native-inputs
+     `(("python-dask" ,python-dask)             ;;for unittest.loader
+       ("python-matplotlib" ,python-matplotlib) ;;for unittest.loader
+       ("python-numba" ,python-numba)           ;;for unittest.loader
+       ("python-pypairix" ,python-pypairix)     ;;for unittest.loader
+       ("python-pytest" ,python-pytest)
+       ("python-wheel" ,python-wheel)))
+    (home-page "https://github.com/open2c/bioframe")
+    (synopsis
+     "Framework for genomic data analysis using Pandas dataframes")
+    (description
+     "This package is a library to enable flexible and scalable operations on
+genomic interval dataframes in python.  @code{python-bioframe} enables access
+to a rich set of dataframe operations.  Working in python enables rapid
+visualization (e.g. matplotlib, seaborn) and iteration of genomic analyses.
+The philosophy underlying @code{python-bioframe} is to enable flexible
+operations: instead of creating a function for every possible use-case, we
+instead encourage users to compose functions to achieve their goals.  As a
+rough rule of thumb, if a function requires three steps and is crucial for
+genomic interval arithmetic we have included it; conversely if it can be
+performed in a single line by composing two of the core functions, we have not
+included it.")
+    (license license:expat)))
+
 (define-public python-biom-format
   (package
     (name "python-biom-format")