[bug#77387,v2,1/2] man-db: Parse man macro arguments better.

Message ID a0040cd0faf8501a35b71848b33d83dc45d6659b.1744202723.git.sarg@sarg.org.ru
State New
Headers
Series [bug#77387,v2,1/2] man-db: Parse man macro arguments better. |

Commit Message

Sergey Trofimov April 9, 2025, 12:46 p.m. UTC
  * guix/man-db.scm (man-macro-tokenize): New procedure to parse man
macros.
(man-page->entry): Parse macro line using man-macro-tokenize.

Change-Id: Iea0ffbc65290757df746138e0a6174646b5a3eb8
---
 guix/man-db.scm | 55 +++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 46 insertions(+), 9 deletions(-)


base-commit: 621d09a185b106364fe7636923ab39c8bca35141
--
2.49.0
  

Comments

Ludovic Courtès April 11, 2025, 10:05 a.m. UTC | #1
Hi,

I applied v2, thank you!

I confirmed that this goes from:

--8<---------------cut here---------------start------------->8---
$ guix describe
Generation 342  Apr 06 2025 23:07:09    (current)
  shepherd d98d61a
    repository URL: https://git.savannah.gnu.org/git/shepherd.git
    branch: main
    commit: d98d61a8a3f20de46d18ce4a8af05c93fab20b89
  guile af96820
    repository URL: https://git.savannah.gnu.org/git/guile.git
    branch: main
    commit: af96820e072d18c49ac03e80c6f3466d568dc77d
  guix 6af6806
    repository URL: https://git.savannah.gnu.org/git/guix.git
    branch: master
    commit: 6af680670bf9055b90e6f8b63c4c2ab7b08e7c56
ludo@ribbon ~/src/guix$ guix shell man-db openssh -C -- man -k ssh
ssh (0)              - (unknown subject)
ssh-add (0)          - (unknown subject)
ssh-agent (0)        - (unknown subject)
ssh-copy-id (0)      - (unknown subject)
ssh-keygen (0)       - (unknown subject)
ssh-keyscan (0)      - (unknown subject)
ssh-keysign (0)      - (unknown subject)
ssh-pkcs11-helper (0) - (unknown subject)
ssh-sk-helper (0)    - (unknown subject)
ssh_config (0)       - (unknown subject)
sshd (0)             - (unknown subject)
sshd_config (0)      - (unknown subject)
--8<---------------cut here---------------end--------------->8---

… to:

--8<---------------cut here---------------start------------->8---
$ ./pre-inst-env guix shell man-db openssh -C -- man -k ssh
ssh (1)              - (unknown subject)
ssh-add (1)          - (unknown subject)
ssh-agent (1)        - (unknown subject)
ssh-copy-id (1)      - (unknown subject)
ssh-keygen (1)       - (unknown subject)
ssh-keyscan (1)      - (unknown subject)
ssh-keysign (8)      - (unknown subject)
ssh-pkcs11-helper (8) - (unknown subject)
ssh-sk-helper (8)    - (unknown subject)
ssh_config (5)       - (unknown subject)
sshd (8)             - (unknown subject)
sshd_config (5)      - (unknown subject)
--8<---------------cut here---------------end--------------->8---

… which will undoubtedly be more convenient.  :-)

Thanks!

Ludo’.
  

Patch

diff --git a/guix/man-db.scm b/guix/man-db.scm
index bba90ed473..1259658f52 100644
--- a/guix/man-db.scm
+++ b/guix/man-db.scm
@@ -161,16 +161,51 @@  (define (read-synopsis port)
       (line
        (loop (cons line lines))))))
 
+(define (man-macro-tokenize input)
+  "Split INPUT string, a man macro invocation, into a list containing the macro's
+name followed by its arguments."
+  (let loop ((pos 0)
+             (tokens '())
+             (characters '())
+             (in-string? #f))
+    (if (>= pos (string-length input))
+        ;; End of input
+        (reverse (if (null? characters)
+                     tokens
+                     (cons (list->string (reverse characters)) tokens)))
+        (let ((c (string-ref input pos)))
+          (cond
+           ;; Inside a string
+           (in-string?
+            (if (char=? c #\")
+                (if (and (< (+ pos 1) (string-length input))
+                         (char=? (string-ref input (+ pos 1)) #\"))
+                    ;; Double quote inside string
+                    (loop (+ pos 2) tokens (cons #\" characters) #t)
+                    ;; End of string
+                    (loop (+ pos 1) (cons (list->string (reverse characters)) tokens) '() #f))
+                ;; Regular character in string
+                (loop (+ pos 1) tokens (cons c characters) #t)))
+
+           ;; Whitespace outside string
+           ((char-whitespace? c)
+            (if (null? characters)
+                (loop (+ pos 1) tokens '() #f)
+                (loop (+ pos 1) (cons (list->string (reverse characters)) tokens) '() #f)))
+
+           ;; Start of string
+           ((char=? c #\")
+            (if (null? characters)
+                (loop (+ pos 1) tokens '() #t)
+                (loop pos (cons (list->string (reverse characters)) tokens) '() #f)))
+
+           ;; Symbol character
+           (else
+            (loop (+ pos 1) tokens (cons c characters) #f)))))))
+
 (define* (man-page->entry file #:optional (resolve identity))
   "Parse FILE, a gzip or zstd compressed man page, and return a <mandb-entry>
 for it."
-  (define (string->number* str)
-    (if (and (string-prefix? "\"" str)
-             (> (string-length str) 1)
-             (string-suffix? "\"" str))
-        (string->number (string-drop (string-drop-right str 1) 1))
-        (string->number str)))
-
   (define call-with-input-port*
     (cond
      ((gzip-compressed? file) call-with-gzip-input-port)
@@ -189,8 +224,10 @@  (define* (man-page->entry file #:optional (resolve identity))
               (if (eof-object? line)
                   (mandb-entry file name (or section 0) (or synopsis "")
                                kind)
-                  (match (string-tokenize line)
-                    ((".TH" name (= string->number* section) _ ...)
+                  ;; man 7 groff groff_mdoc groff_man
+                  ;; look for metadata in macro invocations (lines starting with .)
+                  (match (and (string-prefix? "." line) (man-macro-tokenize line))
+                    ((".TH" name (= string->number section) _ ...)
                      (loop name section synopsis kind))
                     ((".SH" (or "NAME" "\"NAME\""))
                      (loop name section (read-synopsis port) kind))