diff mbox series

[bug#71787,03/12] gnu: Add extract.

Message ID 20240626192717.12818-3-david.elsing@posteo.net
State New
Headers show
Series Update and unbundle ghostscript and mupdf | expand

Commit Message

David Elsing June 26, 2024, 7:26 p.m. UTC
* gnu/packages/ghostscript.scm (extract): New variable.
* gnu/packages/patches/extract-shared-library.patch: New file.
* gnu/local.mk (dist_patch_DATA): Register it.
---
 gnu/local.mk                                  |  1 +
 gnu/packages/ghostscript.scm                  | 47 +++++++++++++++
 .../patches/extract-shared-library.patch      | 59 +++++++++++++++++++
 3 files changed, 107 insertions(+)
 create mode 100644 gnu/packages/patches/extract-shared-library.patch
diff mbox series

Patch

diff --git a/gnu/local.mk b/gnu/local.mk
index 282cf30f7f..2fc14e68fe 100644
--- a/gnu/local.mk
+++ b/gnu/local.mk
@@ -1178,6 +1178,7 @@  dist_patch_DATA =						\
   %D%/packages/patches/eudev-rules-directory.patch		\
   %D%/packages/patches/exercism-disable-self-update.patch	\
   %D%/packages/patches/extempore-unbundle-external-dependencies.patch	\
+  %D%/packages/patches/extract-shared-library.patch		\
   %D%/packages/patches/extundelete-e2fsprogs-1.44.patch		\
   %D%/packages/patches/fail2ban-0.11.2_CVE-2021-32749.patch	\
   %D%/packages/patches/fail2ban-0.11.2_fix-setuptools-drop-2to3.patch	\
diff --git a/gnu/packages/ghostscript.scm b/gnu/packages/ghostscript.scm
index 5f0e2cf3c4..2e24904fd4 100644
--- a/gnu/packages/ghostscript.scm
+++ b/gnu/packages/ghostscript.scm
@@ -30,6 +30,7 @@ 
 (define-module (gnu packages ghostscript)
   #:use-module (gnu packages)
   #:use-module (gnu packages autotools)
+  #:use-module (gnu packages c)
   #:use-module (gnu packages compression)
   #:use-module (gnu packages cups)
   #:use-module (gnu packages fontutils)
@@ -94,6 +95,52 @@  (define-public lcms2mt
 (GhostScript fork)")
     (home-page "https://www.ghostscript.com/")))
 
+(define-public extract
+  (package
+    (name "extract")
+    (version "10.03.0")
+    (source (origin
+              (method git-fetch)
+              (uri (git-reference
+                    (url "git://git.ghostscript.com/extract.git")
+                    (commit (string-append "ghostpdl-" version))))
+              (file-name (git-file-name name version))
+              (sha256
+               (base32
+                "17mb96xpsbr26q2l3kahmi3f1mcqzn7n1q1783f40155lrkk88q9"))
+              (snippet
+               '(for-each
+                 delete-file
+                 '("src/docx_template.c" "src/docx_template.h"
+                   "src/odt_template.c" "src/odt_template.h"
+                   "src/memento.h" "src/memento.c")))
+              (patches (search-patches "extract-shared-library.patch"))))
+    (build-system gnu-build-system)
+    (arguments
+     (list
+      #:test-target "test"
+      #:make-flags
+      `(list
+        "build=debug-opt"
+        "flags_compile=-MMD -MP -Iinclude -Isrc -fPIC"
+        (string-append "CC=" ,(cc-for-target))
+        (string-append "CXX=" ,(cxx-for-target)))
+      #:phases
+      #~(modify-phases %standard-phases
+          (delete 'configure) ; no configure script
+          (replace 'install
+            (lambda _
+              (install-file "libextract.so" (string-append #$output "/lib"))
+              (copy-recursively
+               "include" (string-append #$output "/include")))))))
+    (inputs (list memento zlib))
+    (native-inputs (list python unzip))
+    (home-page "https://git.ghostscript.com/?p=extract.git")
+    (synopsis "Document content extraction library")
+    (description "extract is a library for exstracting dox, odt, html and text
+files from documents.")
+    (license license:agpl3+)))
+
 (define-public libpaper
   (package
     (name "libpaper")
diff --git a/gnu/packages/patches/extract-shared-library.patch b/gnu/packages/patches/extract-shared-library.patch
new file mode 100644
index 0000000000..b2ab37dcc6
--- /dev/null
+++ b/gnu/packages/patches/extract-shared-library.patch
@@ -0,0 +1,59 @@ 
+Adjust the Makefile to build a shared library.
+
+diff --git a/Makefile b/Makefile
+index e8933ea..5cf503c 100644
+--- a/Makefile
++++ b/Makefile
+@@ -130,6 +130,7 @@ endif
+ $(warning gs=$(gs))
+ endif
+ 
++build: libextract.so $(exe_dep) $(exe_buffer_test_dep) $(exe_misc_test_dep) $(exe_ziptest_dep)
+ 
+ # Default target - run all tests.
+ #
+@@ -294,7 +295,7 @@ test/generated/%.pdf.mutool.text.diff: test/generated/%.pdf.mutool.text test/%.p
+ # Main executable.
+ #
+ exe = src/build/extract-$(build).exe
+-exe_src = \
++lib_src = \
+         src/alloc.c \
+         src/astring.c \
+         src/boxer.c \
+@@ -302,10 +303,10 @@ exe_src = \
+         src/document.c \
+         src/docx.c \
+         src/docx_template.c \
+-        src/extract-exe.c \
+         src/extract.c \
+         src/html.c \
+         src/join.c \
++        src/json.c \
+         src/mem.c \
+         src/odt.c \
+         src/odt_template.c \
+@@ -318,16 +319,18 @@ exe_src = \
+ 
+ 
+ ifeq ($(build),memento)
+-    exe_src += src/memento.c
++    lib_src += src/memento.c
+     ifeq ($(uname),Linux)
+         flags_compile += -D HAVE_LIBDL
+         flags_link += -L $(libbacktrace) -l backtrace -l dl
+     endif
+ endif
+-exe_obj := $(exe_src)
+-exe_obj := $(patsubst src/%.c, src/build/%.c-$(build).o, $(exe_obj))
+-exe_obj := $(patsubst src/%.cpp, src/build/%.cpp-$(build).o, $(exe_obj))
+-exe_dep = $(exe_obj:.o=.d)
++lib_obj := $(lib_src)
++lib_obj := $(patsubst src/%.c, src/build/%.c-$(build).o, $(lib_obj))
++lib_obj := $(patsubst src/%.cpp, src/build/%.cpp-$(build).o, $(lib_obj))
++lib_dep = $(lib_obj:.o=.d)
++libextract.so: $(lib_obj)
++	$(CXX) $(flags_link) $^ -lz -lm -shared -o $@
+ exe: $(exe)
+ $(exe): $(exe_obj)
+ 	$(CXX) $(flags_link) -o $@ $^ -lz -lm