@@ -6,6 +6,7 @@
;;; Copyright © 2020 Vincent Legoll <vincent.legoll@gmail.com>
;;; Copyright © 2021 Léo Le Bouter <lle-bout@zaclys.net>
;;; Copyright © 2022 Christopher Baines <mail@cbaines.net>
+;;; Copyright © 2023 Frank Pursel <frank.pursel@gmail.com>
;;;
;;; This file is part of GNU Guix.
;;;
@@ -37,6 +38,84 @@ (define-module (gnu packages java-xml)
#:use-module (guix build-system ant)
#:use-module (guix utils))
+(define-public java-jericho-html
+ (let ((ver "3.4")
+ (hash "1i7z3b8yamgkm7p3pv9qzv8l2kw15ccxy1aj9mpbf66kzkkds51i")
+ (internal-name "jericho-html"))
+ (package
+ (name "java-jericho-html")
+ (version ver)
+ (source (origin
+ (method url-fetch/zipbomb)
+ (uri (string-append
+ "https://sourceforge.net/projects/jerichohtml"
+ "/files/jericho-html/3.4/jericho-html-3.4.zip/download"))
+ (sha256
+ (base32
+ hash))
+ (modules '((guix build utils)))
+ (snippet '(begin
+ (format #t "~%~a~%" "Removing sourced jar files.")
+ (for-each (lambda (jarf)
+ (delete-file jarf)
+ (format #t "Deleted: ~a~%" jarf))
+ (find-files "." "\\.jar$"))))))
+ (build-system ant-build-system)
+ (arguments
+ `(#:jar-name (string-append ,name ".jar")
+ #:source-dir (string-append ,internal-name "-"
+ ,ver "/src/")
+ #:test-dir (string-append ,internal-name "-"
+ ,ver "/test/")
+ #:test-exclude (list "**/StAXTest.java"
+ "**/NodeIteratorTest.java"
+ "**/ScriptTest.java"
+ "**/SegmentGetStyleURISegmentsTest.java"
+ "**/SegmentTest.java"
+ "**/StreamedSourceTest.java"
+ "**/HTMLSanitiserTest.java")
+ #:phases (modify-phases %standard-phases
+ (add-before 'build 'add-ant-env-options
+ (lambda* _
+ (setenv "ANT_OPTS" "-Dfile.encoding=iso-8859-1") #t))
+ (add-after 'build 'check-prep
+ (lambda* (#:key source #:allow-other-keys)
+ (let* ((cwd (getcwd))
+ (jericho-test (string-append cwd "/" ,internal-name
+ "-" ,ver "/test"))
+ (test (string-append jericho-test "/test"))
+ (test-data (string-append jericho-test "/data"))
+ (test-src (string-append jericho-test "/src"))
+ (test-src-data (string-append test-src "/data")))
+ (format #t "~%Check Prep dir: ~s\n" cwd)
+ (substitute* "build.xml"
+ (("\\$\\{test\\.home\\}/java")
+ "${test.home}/src"))
+ (mkdir-p (string-append cwd "/build/test-classes"))
+ (copy-recursively (string-append cwd
+ "/"
+ ,internal-name
+ "-"
+ ,ver
+ "/samples/console/classes")
+ (string-append cwd
+ "/build/test-classes/"))
+ (mkdir-p test-src-data)
+ (copy-recursively test-data
+ test-src-data))
+ #t)))))
+ (native-inputs (list java-junit java-slf4j-api java-log4j-api
+ java-commons-logging-minimal))
+ (home-page "http://jericho.htmlparser.net/docs/index.html")
+ (description
+ "A java library allowing analysis and manipulation of
+parts of an HTML document, including server-side tags, while
+reproducing verbatim any unrecognised or invalid HTML. It also
+provides high-level HTML form manipulation functions.
+")
+ (synopsis "Java HTML Parser library")
+ (license (list license:lgpl2.1+ license:asl2.0 license:epl1.0)))))
+
(define-public java-simple-xml
(package
(name "java-simple-xml")