[bug#77387,1/2] man-db: Parse man macro arguments better.
Commit Message
* guix/man-db.scm (man-macro-tokenize): New procedure to parse man
macros.
(man-page->entry): Parse macro line using man-macro-tokenize.
Change-Id: Iea0ffbc65290757df746138e0a6174646b5a3eb8
---
guix/man-db.scm | 52 ++++++++++++++++++++++++++++++++++++++++---------
1 file changed, 43 insertions(+), 9 deletions(-)
Comments
Hi!
Glad you fixed this problem. :-)
Sergey Trofimov <sarg@sarg.org.ru> skribis:
> * guix/man-db.scm (man-macro-tokenize): New procedure to parse man
> macros.
> (man-page->entry): Parse macro line using man-macro-tokenize.
>
> Change-Id: Iea0ffbc65290757df746138e0a6174646b5a3eb8
[...]
> +(define (man-macro-tokenize input)
Could you add a docstring explaining what it takes and what it returns?
> + (let loop ((pos 0)
> + (tokens '())
> + (current '())
Maybe s/current/characters/ ?
> + (in-string? #f))
> + (if (>= pos (string-length input))
> + ;; End of input
> + (unless in-string?
> + (reverse (if (null? current)
> + tokens
> + (cons (list->string (reverse current)) tokens))))
So this procedure can return *unspecified*, right? Sounds fishy.
> @@ -189,8 +223,8 @@ (define* (man-page->entry file #:optional (resolve identity))
> (if (eof-object? line)
> (mandb-entry file name (or section 0) (or synopsis "")
> kind)
> - (match (string-tokenize line)
> - ((".TH" name (= string->number* section) _ ...)
> + (match (if (string-prefix? "." line) (man-macro-tokenize line) #f)
> + ((".TH" name (= string->number section) _ ...)
Please add a comment above ‘match’ explaining what’s happening (why we
call ‘man-macro-tokenize’ etc.).
Also: (and (string-prefix? "." line) (man-macro-tokenize line))
Ludo’.
Hi Ludovic,
I've sent an amended series.
Ludovic Courtès <ludo@gnu.org> writes:
>> + (in-string? #f))
>> + (if (>= pos (string-length input))
>> + ;; End of input
>> + (unless in-string?
>> + (reverse (if (null? current)
>> + tokens
>> + (cons (list->string (reverse current)) tokens))))
>
> So this procedure can return *unspecified*, right? Sounds fishy.
>
Why is it fishy? Is it unconventional? Such return value is handled
correctly by the calling code (`match`).
@@ -161,16 +161,50 @@ (define (read-synopsis port)
(line
(loop (cons line lines))))))
+(define (man-macro-tokenize input)
+ (let loop ((pos 0)
+ (tokens '())
+ (current '())
+ (in-string? #f))
+ (if (>= pos (string-length input))
+ ;; End of input
+ (unless in-string?
+ (reverse (if (null? current)
+ tokens
+ (cons (list->string (reverse current)) tokens))))
+ (let ((c (string-ref input pos)))
+ (cond
+ ;; Inside a string
+ (in-string?
+ (if (char=? c #\")
+ (if (and (< (+ pos 1) (string-length input))
+ (char=? (string-ref input (+ pos 1)) #\"))
+ ;; Double quote inside string
+ (loop (+ pos 2) tokens (cons #\" current) #t)
+ ;; End of string
+ (loop (+ pos 1) (cons (list->string (reverse current)) tokens) '() #f))
+ ;; Regular character in string
+ (loop (+ pos 1) tokens (cons c current) #t)))
+
+ ;; Whitespace outside string
+ ((char-whitespace? c)
+ (if (null? current)
+ (loop (+ pos 1) tokens '() #f)
+ (loop (+ pos 1) (cons (list->string (reverse current)) tokens) '() #f)))
+
+ ;; Start of string
+ ((char=? c #\")
+ (if (null? current)
+ (loop (+ pos 1) tokens '() #t)
+ (loop pos (cons (list->string (reverse current)) tokens) '() #f)))
+
+ ;; Symbol character
+ (else
+ (loop (+ pos 1) tokens (cons c current) #f)))))))
+
(define* (man-page->entry file #:optional (resolve identity))
"Parse FILE, a gzip or zstd compressed man page, and return a <mandb-entry>
for it."
- (define (string->number* str)
- (if (and (string-prefix? "\"" str)
- (> (string-length str) 1)
- (string-suffix? "\"" str))
- (string->number (string-drop (string-drop-right str 1) 1))
- (string->number str)))
-
(define call-with-input-port*
(cond
((gzip-compressed? file) call-with-gzip-input-port)
@@ -189,8 +223,8 @@ (define* (man-page->entry file #:optional (resolve identity))
(if (eof-object? line)
(mandb-entry file name (or section 0) (or synopsis "")
kind)
- (match (string-tokenize line)
- ((".TH" name (= string->number* section) _ ...)
+ (match (if (string-prefix? "." line) (man-macro-tokenize line) #f)
+ ((".TH" name (= string->number section) _ ...)
(loop name section synopsis kind))
((".SH" (or "NAME" "\"NAME\""))
(loop name section (read-synopsis port) kind))