diff mbox series

[bug#73109,2/2] gnu: Add python-transformers.

Message ID 20240907165703.23229-2-ngraves@ngraves.fr
State New
Headers show
Series Add python-transformers | expand

Commit Message

Nicolas Graves Sept. 7, 2024, 4:56 p.m. UTC
* gnu/packages/machine-learning.scm (python-transformers): New variable.

Change-Id: Ifd7fa3a0f4611d3298ab76ceb44b3aea1397b824
---
 gnu/packages/machine-learning.scm | 60 +++++++++++++++++++++++++++++++
 1 file changed, 60 insertions(+)
diff mbox series

Patch

diff --git a/gnu/packages/machine-learning.scm b/gnu/packages/machine-learning.scm
index 412499d424..42842d7d61 100644
--- a/gnu/packages/machine-learning.scm
+++ b/gnu/packages/machine-learning.scm
@@ -5846,6 +5846,66 @@  (define-public python-tokenizers
 tokenizers, @code{rust-tokenizers}.")
     (license license:asl2.0)))
 
+(define-public python-transformers
+  (package
+    (name "python-transformers")
+    (version "4.44.2")
+    (source
+     (origin
+       (method url-fetch)
+       (uri (pypi-uri "transformers" version))
+       (sha256
+        (base32 "09h84wqqk2bgi4vr9d1m3dsliard99l53n96wic405gfjb61gain"))))
+    (build-system pyproject-build-system)
+    ;; The imported package contains ~60 more inputs, but they don't seem
+    ;; necessary to build a minimal version of the package.
+    (propagated-inputs
+     (list python-filelock
+           python-huggingface-hub
+           python-numpy
+           python-pytorch
+           python-pyyaml
+           python-regex
+           python-requests
+           python-safetensors
+           python-tokenizers
+           python-tqdm
+           tensorflow))
+    (home-page "https://github.com/huggingface/transformers")
+    (synopsis "Machine Learning for PyTorch and TensorFlow")
+    (description
+     "This package provides easy download of thousands of pretrained models to
+perform tasks on different modalities such as text, vision, and audio.
+
+These models can be applied on:
+@itemize
+@item Text, for tasks like text classification, information extraction,
+question answering, summarization, translation, and text generation, in over
+100 languages.
+@item Images, for tasks like image classification, object detection, and
+segmentation.
+@item Audio, for tasks like speech recognition and audio classification.
+@end itemize
+
+Transformer models can also perform tasks on several modalities combined, such
+as table question answering, optical character recognition, information
+extraction from scanned documents, video classification, and visual question
+answering.
+
+This package provides APIs to quickly download and use those pretrained models
+on a given text, fine-tune them on your own datasets and then share them with
+the community on our model hub.  At the same time, each python module defining
+an architecture is fully standalone and can be modified to enable quick
+research experiments.
+
+Transformers is backed by the three most popular deep learning libraries —
+Jax, PyTorch and TensorFlow — with a seamless integration between them.  It's
+straightforward to train your models with one before loading them for
+inference with the other.
+
+Note: This version doesn't support integration with JAX.")
+    (license license:asl2.0)))
+
 (define-public python-hmmlearn
   (package
     (name "python-hmmlearn")