[bug#77387,v2,1/2] man-db: Parse man macro arguments better.
Commit Message
* guix/man-db.scm (man-macro-tokenize): New procedure to parse man
macros.
(man-page->entry): Parse macro line using man-macro-tokenize.
Change-Id: Iea0ffbc65290757df746138e0a6174646b5a3eb8
---
guix/man-db.scm | 55 +++++++++++++++++++++++++++++++++++++++++--------
1 file changed, 46 insertions(+), 9 deletions(-)
base-commit: 621d09a185b106364fe7636923ab39c8bca35141
--
2.49.0
Comments
Hi,
I applied v2, thank you!
I confirmed that this goes from:
--8<---------------cut here---------------start------------->8---
$ guix describe
Generation 342 Apr 06 2025 23:07:09 (current)
shepherd d98d61a
repository URL: https://git.savannah.gnu.org/git/shepherd.git
branch: main
commit: d98d61a8a3f20de46d18ce4a8af05c93fab20b89
guile af96820
repository URL: https://git.savannah.gnu.org/git/guile.git
branch: main
commit: af96820e072d18c49ac03e80c6f3466d568dc77d
guix 6af6806
repository URL: https://git.savannah.gnu.org/git/guix.git
branch: master
commit: 6af680670bf9055b90e6f8b63c4c2ab7b08e7c56
ludo@ribbon ~/src/guix$ guix shell man-db openssh -C -- man -k ssh
ssh (0) - (unknown subject)
ssh-add (0) - (unknown subject)
ssh-agent (0) - (unknown subject)
ssh-copy-id (0) - (unknown subject)
ssh-keygen (0) - (unknown subject)
ssh-keyscan (0) - (unknown subject)
ssh-keysign (0) - (unknown subject)
ssh-pkcs11-helper (0) - (unknown subject)
ssh-sk-helper (0) - (unknown subject)
ssh_config (0) - (unknown subject)
sshd (0) - (unknown subject)
sshd_config (0) - (unknown subject)
--8<---------------cut here---------------end--------------->8---
… to:
--8<---------------cut here---------------start------------->8---
$ ./pre-inst-env guix shell man-db openssh -C -- man -k ssh
ssh (1) - (unknown subject)
ssh-add (1) - (unknown subject)
ssh-agent (1) - (unknown subject)
ssh-copy-id (1) - (unknown subject)
ssh-keygen (1) - (unknown subject)
ssh-keyscan (1) - (unknown subject)
ssh-keysign (8) - (unknown subject)
ssh-pkcs11-helper (8) - (unknown subject)
ssh-sk-helper (8) - (unknown subject)
ssh_config (5) - (unknown subject)
sshd (8) - (unknown subject)
sshd_config (5) - (unknown subject)
--8<---------------cut here---------------end--------------->8---
… which will undoubtedly be more convenient. :-)
Thanks!
Ludo’.
@@ -161,16 +161,51 @@ (define (read-synopsis port)
(line
(loop (cons line lines))))))
+(define (man-macro-tokenize input)
+ "Split INPUT string, a man macro invocation, into a list containing the macro's
+name followed by its arguments."
+ (let loop ((pos 0)
+ (tokens '())
+ (characters '())
+ (in-string? #f))
+ (if (>= pos (string-length input))
+ ;; End of input
+ (reverse (if (null? characters)
+ tokens
+ (cons (list->string (reverse characters)) tokens)))
+ (let ((c (string-ref input pos)))
+ (cond
+ ;; Inside a string
+ (in-string?
+ (if (char=? c #\")
+ (if (and (< (+ pos 1) (string-length input))
+ (char=? (string-ref input (+ pos 1)) #\"))
+ ;; Double quote inside string
+ (loop (+ pos 2) tokens (cons #\" characters) #t)
+ ;; End of string
+ (loop (+ pos 1) (cons (list->string (reverse characters)) tokens) '() #f))
+ ;; Regular character in string
+ (loop (+ pos 1) tokens (cons c characters) #t)))
+
+ ;; Whitespace outside string
+ ((char-whitespace? c)
+ (if (null? characters)
+ (loop (+ pos 1) tokens '() #f)
+ (loop (+ pos 1) (cons (list->string (reverse characters)) tokens) '() #f)))
+
+ ;; Start of string
+ ((char=? c #\")
+ (if (null? characters)
+ (loop (+ pos 1) tokens '() #t)
+ (loop pos (cons (list->string (reverse characters)) tokens) '() #f)))
+
+ ;; Symbol character
+ (else
+ (loop (+ pos 1) tokens (cons c characters) #f)))))))
+
(define* (man-page->entry file #:optional (resolve identity))
"Parse FILE, a gzip or zstd compressed man page, and return a <mandb-entry>
for it."
- (define (string->number* str)
- (if (and (string-prefix? "\"" str)
- (> (string-length str) 1)
- (string-suffix? "\"" str))
- (string->number (string-drop (string-drop-right str 1) 1))
- (string->number str)))
-
(define call-with-input-port*
(cond
((gzip-compressed? file) call-with-gzip-input-port)
@@ -189,8 +224,10 @@ (define* (man-page->entry file #:optional (resolve identity))
(if (eof-object? line)
(mandb-entry file name (or section 0) (or synopsis "")
kind)
- (match (string-tokenize line)
- ((".TH" name (= string->number* section) _ ...)
+ ;; man 7 groff groff_mdoc groff_man
+ ;; look for metadata in macro invocations (lines starting with .)
+ (match (and (string-prefix? "." line) (man-macro-tokenize line))
+ ((".TH" name (= string->number section) _ ...)
(loop name section synopsis kind))
((".SH" (or "NAME" "\"NAME\""))
(loop name section (read-synopsis port) kind))