From ee945305828750c4bf6e1ee010444d705eb9334b Mon Sep 17 00:00:00 2001
From: Andy Tai <atai@atai.org>
Date: Sat, 20 Feb 2021 22:58:07 -0800
Subject: [PATCH] gnu: Add ocrfeeder
* gnu/packages/gnome.scm (ocrfeeder): New variable
---
gnu/packages/gnome.scm | 67 ++++++++++++++++++++++++++++++++++++-
gnu/packages/ocr.scm | 32 +++++++++---------
gnu/packages/python-xyz.scm | 24 +++++++++++++
3 files changed, 106 insertions(+), 17 deletions(-)
@@ -56,7 +56,7 @@
;;; Copyright © 2020 Arun Isaac <arunisaac@systemreboot.net>
;;; Copyright © 2020 Michael Rohleder <mike@rohleder.de>
;;; Copyright © 2020 Tim Gesthuizen <tim.gesthuizen@yahoo.de>
-;;; Copyright © 2020 Andy Tai <atai@atai.org>
+;;; Copyright © 2020, 2021 Andy Tai <atai@atai.org>
;;;
;;; This file is part of GNU Guix.
;;;
@@ -149,6 +149,7 @@
#:use-module (gnu packages ninja)
#:use-module (gnu packages node)
#:use-module (gnu packages nss)
+ #:use-module (gnu packages ocr)
#:use-module (gnu packages openldap)
#:use-module (gnu packages package-management)
#:use-module (gnu packages password-utils)
@@ -12095,3 +12096,67 @@ for the GNOME 3.x platform with many features. It aims to be a very complete
editing environment for translation issues within the GNU gettext/GNOME desktop
world.")
(license license:gpl3+)))
+
+
+(define-public ocrfeeder
+ (package
+ (name "ocrfeeder")
+ (version "0.8.3")
+ (source (origin
+ (method url-fetch)
+ (uri (string-append "mirror://gnome/sources/ocrfeeder/"
+ (version-major+minor version) "/"
+ "ocrfeeder-" version ".tar.xz"))
+ (sha256
+ (base32
+ "12f5gnq92ffnd5zaj04df7jrnsdz1zn4zcgpbf5p9qnd21i2y529"))))
+ (build-system gnu-build-system)
+ (arguments
+ `(#:phases
+ (modify-phases %standard-phases
+ (add-after
+ 'install 'wrap-program
+ (lambda* (#:key outputs #:allow-other-keys)
+ (let ((prog (string-append (assoc-ref outputs "out")
+ "/bin/" "ocrfeeder"))
+ (pylib (string-append (assoc-ref outputs "out")
+ "/lib/python"
+ ,(version-major+minor
+ (package-version python))
+ "/site-packages")))
+ (wrap-program prog
+ `("PYTHONPATH" = (,(getenv "PYTHONPATH") ,pylib))
+ `("GI_TYPELIB_PATH" = (,(getenv "GI_TYPELIB_PATH"))))
+ #t))))))
+ (native-inputs
+ `(("glib:bin" ,glib "bin") ; for glib-compile-resources
+ ("gobject-introspection" ,gobject-introspection)
+ ("gtk+:bin" ,gtk+ "bin") ; for gtk-update-icon-cache
+ ("intltool" ,intltool)
+ ("itstool" ,itstool)
+ ("pkg-config" ,pkg-config)
+ ("xmllint" ,libxml2)))
+ (inputs
+ `(("enchant" ,enchant)
+ ("glib" ,glib)
+ ("goocanvas" ,goocanvas)
+ ("gtk" ,gtk+)
+ ("gtkspell3" ,gtkspell3)
+ ("libjpeg" ,libjpeg-turbo)
+ ("libtiff" ,libtiff)
+ ("libraw" ,libraw)
+ ("ocrad" ,ocrad)
+ ("python" ,python-wrapper)
+ ("python-pygobject" ,python-pygobject)
+ ("python-odfpy" ,python-odfpy)
+ ("python-pillow" ,python-pillow)
+ ("python-pyenchant" ,python-pyenchant)
+ ("python-reportlab" ,python-reportlab)
+ ("python-sane" ,python-sane)
+ ("sane-backends" ,sane-backends)
+ ("tesseract-ocr" ,tesseract-ocr)))
+ (home-page "https://wiki.gnome.org/Apps/OCRFeeder")
+ (synopsis "Complete OCR Suite")
+ (description "OCRFeeder is a complete Optical Character Recognition and
+Document Analysis and Recognition program.")
+ (license license:gpl3+)))
@@ -67,21 +67,21 @@ it produces text in 8-bit or UTF-8 formats.")
;; There are useful commits beyond the last official stable release.
(let ((commit "97079fa353557af6df86fd20b5d2e0dff5d8d5df")
(revision "1"))
- (package
- (name "tesseract-ocr")
+ (package
+ (name "tesseract-ocr")
(version (git-version "4.1.1" revision commit))
- (source
- (origin
- (method git-fetch)
- (uri (git-reference
- (url "https://github.com/tesseract-ocr/tesseract")
+ (source
+ (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/tesseract-ocr/tesseract")
(commit commit)))
(file-name (git-file-name name version))
- (sha256
+ (sha256
(base32
"11137a4aaay7qp64vdjd83hz1l089nzi5a0ql0qgk8gn79pyhi98"))))
- (build-system gnu-build-system)
- (inputs
+ (build-system gnu-build-system)
+ (inputs
`(("cairo" ,cairo)
("icu" ,icu4c)
("leptonica" ,leptonica)
@@ -98,9 +98,9 @@ it produces text in 8-bit or UTF-8 formats.")
("libtiff" ,libtiff)
("pkg-config" ,pkg-config)
("xsltproc" ,libxslt)))
- (arguments
+ (arguments
`(#:configure-flags
- (let ((leptonica (assoc-ref %build-inputs "leptonica")))
+ (let ((leptonica (assoc-ref %build-inputs "leptonica")))
(list (string-append "LIBLEPT_HEADERSDIR=" leptonica "/include")))
#:tests? #f ; Tests currently result in a segfault
#:phases
@@ -120,10 +120,10 @@ it produces text in 8-bit or UTF-8 formats.")
(add-after 'build-training 'install-training
(lambda _
(invoke "make" "training-install"))))))
- (home-page "https://github.com/tesseract-ocr/tesseract")
- (synopsis "Optical character recognition engine")
- (description
- "Tesseract is an optical character recognition (OCR) engine with very
+ (home-page "https://github.com/tesseract-ocr/tesseract")
+ (synopsis "Optical character recognition engine")
+ (description
+ "Tesseract is an optical character recognition (OCR) engine with very
high accuracy. It supports many languages, output text formatting, hOCR
positional information and page layout analysis. Several image formats are
supported through the Leptonica library. It can also detect whether text is
@@ -180,6 +180,7 @@
#:use-module (gnu packages readline)
#:use-module (gnu packages sdl)
#:use-module (gnu packages search)
+ #:use-module (gnu packages scanner)
#:use-module (gnu packages shells)
#:use-module (gnu packages sphinx)
#:use-module (gnu packages ssh)
@@ -23906,3 +23907,26 @@ Application Programming Interface based on the Open Inventor 2.1 API.")
Crayons automatically wraps a given string in the foreground color and
restores the original state after the string is printed.")
(license license:expat)))
+
+(define-public python-sane
+ (package
+ (name "python-sane")
+ (version "2.9.1")
+ (source
+ (origin
+ (method url-fetch)
+ (uri (pypi-uri name version))
+ (sha256
+ (base32
+ "1pi597z94n2mkd821ln52fq0g727n2jxfskf280ip3kf7jw8w294"))))
+ (build-system python-build-system)
+ (native-inputs
+ `(("python-pytest" ,python-pytest)))
+ (inputs
+ `(("sane-backends" ,sane-backends)))
+ (home-page "https://github.com/python-pillow/Sane")
+ (synopsis "Python interface to the SANE scanner")
+ (description "This package provides Python interface to the SANE scanner and
+frame grabber interface for Linux.")
+ ;; MIT license; FSF directory refers to this as expat licene
+ (license license:expat)))
--
2.30.0