diff mbox series

[bug#39258,3/4] gnu: Generate xapian package search index.

Message ID 20200227204150.30985-4-arunisaac@systemreboot.net
State Work in progress
Headers show
Series Xapian for Guix package search | expand

Checks

Context Check Description
cbaines/comparison success View comparision
cbaines/git branch success View Git branch
cbaines/applying patch success View Laminar job

Commit Message

Arun Isaac Feb. 27, 2020, 8:41 p.m. UTC
* gnu/packages.scm (%package-search-index): New variable.
(generate-package-search-index): New function.
* guix/channels.scm (package-search-index): New function.
(%channel-profile-hooks): Add package-search-index.
---
 gnu/packages.scm  | 29 ++++++++++++++++++++++++++++-
 guix/channels.scm | 34 +++++++++++++++++++++++++++++++++-
 2 files changed, 61 insertions(+), 2 deletions(-)

Comments

Pierre Neidhardt Feb. 28, 2020, 8:04 a.m. UTC | #1
Arun Isaac <arunisaac@systemreboot.net> writes:

> +(define (generate-package-search-index directory)
> +  "Generate under DIRECTORY a xapian index of all the available packages."
> +  (define db-path
> +    (string-append directory %package-search-index))
> +
> +  (mkdir-p (dirname db-path))
> +  (call-with-writable-database db-path
> +    (lambda (db)
> +      (fold-packages (lambda (package _)
> +                       (let* ((idterm (string-append "Q" (package-name package)))
> +                              (doc (make-document #:data (package-name package)
> +                                                  #:terms `((,idterm . 0))))
> +                              (term-generator (make-term-generator #:stem (make-stem "en")
> +                                                                   #:document doc)))
> +                         (index-text! term-generator (package-description package))
> +                         (replace-document! db idterm doc)))

I guess these non-functional functions (index-text!, replace-document!)
represent how Xapian works at the C++ level.  Would it be possible to
make more functional bindings nonetheless?
Simon Tournier March 3, 2020, 6:29 p.m. UTC | #2
Hi Arun,

In the commit message, I would capitalize Xapian.


On Thu, 27 Feb 2020 at 21:42, Arun Isaac <arunisaac@systemreboot.net> wrote:
>
> * gnu/packages.scm (%package-search-index): New variable.
> (generate-package-search-index): New function.
> * guix/channels.scm (package-search-index): New function.
> (%channel-profile-hooks): Add package-search-index.
> ---
>  gnu/packages.scm  | 29 ++++++++++++++++++++++++++++-
>  guix/channels.scm | 34 +++++++++++++++++++++++++++++++++-
>  2 files changed, 61 insertions(+), 2 deletions(-)
>
> diff --git a/gnu/packages.scm b/gnu/packages.scm
> index d22c992bb1..e91753e2a8 100644
> --- a/gnu/packages.scm
> +++ b/gnu/packages.scm
> @@ -4,6 +4,7 @@
>  ;;; Copyright © 2014 Eric Bavier <bavier@member.fsf.org>
>  ;;; Copyright © 2016, 2017 Alex Kost <alezost@gmail.com>
>  ;;; Copyright © 2016 Mathieu Lirzin <mthl@gnu.org>
> +;;; Copyright © 2020 Arun Isaac <arunisaac@systemreboot.net>
>  ;;;
>  ;;; This file is part of GNU Guix.
>  ;;;
> @@ -43,6 +44,7 @@
>    #:use-module (srfi srfi-34)
>    #:use-module (srfi srfi-35)
>    #:use-module (srfi srfi-39)
> +  #:use-module (xapian xapian)
>    #:export (search-patch
>              search-patches
>              search-auxiliary-file
> @@ -64,7 +66,8 @@
>              specification->location
>              specifications->manifest
>
> -            generate-package-cache))
> +            generate-package-cache
> +            generate-package-search-index))
>
>  ;;; Commentary:
>  ;;;
> @@ -426,6 +429,30 @@ reducing the memory footprint."
>                                 #:opts '(#:to-file? #t)))))
>    cache-file)
>
> +(define %package-search-index
> +  ;; Location of the package search-index
> +  "/lib/guix/package-search.index")
> +
> +(define (generate-package-search-index directory)
> +  "Generate under DIRECTORY a xapian index of all the available packages."

Xapian with capital.


> +  (define db-path
> +    (string-append directory %package-search-index))
> +
> +  (mkdir-p (dirname db-path))
> +  (call-with-writable-database db-path
> +    (lambda (db)
> +      (fold-packages (lambda (package _)
> +                       (let* ((idterm (string-append "Q" (package-name package)))
> +                              (doc (make-document #:data (package-name package)
> +                                                  #:terms `((,idterm . 0))))
> +                              (term-generator (make-term-generator #:stem (make-stem "en")
> +                                                                   #:document doc)))
> +                         (index-text! term-generator (package-description package))

Instead, this:

(index-term! term-generator (string-append (package-synopsis package)
(package-description package)))

should index both 'synopsis' and 'description'.


Is (make-stem "en") for the locale?


> +                         (replace-document! db idterm doc)))
> +                     #f)))
> +
> +  db-path)
> +
>
>  (define %sigint-prompt
>    ;; The prompt to jump to upon SIGINT.
> diff --git a/guix/channels.scm b/guix/channels.scm
> index f0261dc2da..c70c70938c 100644
> --- a/guix/channels.scm
> +++ b/guix/channels.scm
> @@ -2,6 +2,7 @@
>  ;;; Copyright © 2018, 2019, 2020 Ludovic Courtès <ludo@gnu.org>
>  ;;; Copyright © 2018 Ricardo Wurmus <rekado@elephly.net>
>  ;;; Copyright © 2019 Jan (janneke) Nieuwenhuizen <janneke@gnu.org>
> +;;; Copyright © 2020 Arun Isaac <arunisaac@systemreboot.net>
>  ;;;
>  ;;; This file is part of GNU Guix.
>  ;;;
> @@ -581,9 +582,40 @@ be used as a profile hook."
>                                                   (hook . package-cache))
>                                    #:local-build? #t)))
>
> +(define (package-search-index manifest)
> +  "Build a package search index for the instance in MANIFEST.  This is meant
> +to be used as a profile hook."
> +  (mlet %store-monad ((profile (profile-derivation manifest
> +                                                   #:hooks '())))
> +
> +    (define build
> +      #~(begin
> +          (use-modules (gnu packages))
> +
> +          (if (defined? 'generate-package-search-index)
> +              (begin
> +                ;; Delegate package search index generation to the inferior.
> +                (format (current-error-port)
> +                        "Generating package search index for '~a'...~%"
> +                        #$profile)
> +                (generate-package-search-index #$output))
> +              (mkdir #$output))))
> +
> +    (gexp->derivation-in-inferior "guix-package-search-index" build
> +                                  profile
> +
> +                                  ;; If the Guix in PROFILE is too old and
> +                                  ;; lacks 'guix repl', don't build the cache
> +                                  ;; instead of failing.
> +                                  #:silent-failure? #t
> +
> +                                  #:properties '((type . profile-hook)
> +                                                 (hook . package-search-index))
> +                                  #:local-build? #t)))
> +

package-search-index and package-cache-file could be refactored
because they share all the same code.


>  (define %channel-profile-hooks
>    ;; The default channel profile hooks.
> -  (cons package-cache-file %default-profile-hooks))
> +  (cons* package-cache-file package-search-index %default-profile-hooks))
>
>  (define (channel-instances->derivation instances)
>    "Return the derivation of the profile containing INSTANCES, a list of
> --
> 2.23.0
>
Arun Isaac March 5, 2020, 8:26 p.m. UTC | #3
>> +      (fold-packages (lambda (package _)
>> +                       (let* ((idterm (string-append "Q" (package-name package)))
>> +                              (doc (make-document #:data (package-name package)
>> +                                                  #:terms `((,idterm . 0))))
>> +                              (term-generator (make-term-generator #:stem (make-stem "en")
>> +                                                                   #:document doc)))
>> +                         (index-text! term-generator (package-description package))
>> +                         (replace-document! db idterm doc)))
>
> I guess these non-functional functions (index-text!, replace-document!)
> represent how Xapian works at the C++ level.  Would it be possible to
> make more functional bindings nonetheless?

I somehow overlooked this particular email and am reading it just
now. Yes, the non-functional bindings are a bit ugly. But, I'm not able
to think of a clean way to make functional bindings without supporting
all features offered by xapian. Any suggestions you have in this regard
would be useful. Look through xapian/termgenerator.h for more
details. In particular, look at functions increase_termpos,
index_text_without_positions.
diff mbox series

Patch

diff --git a/gnu/packages.scm b/gnu/packages.scm
index d22c992bb1..e91753e2a8 100644
--- a/gnu/packages.scm
+++ b/gnu/packages.scm
@@ -4,6 +4,7 @@ 
 ;;; Copyright © 2014 Eric Bavier <bavier@member.fsf.org>
 ;;; Copyright © 2016, 2017 Alex Kost <alezost@gmail.com>
 ;;; Copyright © 2016 Mathieu Lirzin <mthl@gnu.org>
+;;; Copyright © 2020 Arun Isaac <arunisaac@systemreboot.net>
 ;;;
 ;;; This file is part of GNU Guix.
 ;;;
@@ -43,6 +44,7 @@ 
   #:use-module (srfi srfi-34)
   #:use-module (srfi srfi-35)
   #:use-module (srfi srfi-39)
+  #:use-module (xapian xapian)
   #:export (search-patch
             search-patches
             search-auxiliary-file
@@ -64,7 +66,8 @@ 
             specification->location
             specifications->manifest
 
-            generate-package-cache))
+            generate-package-cache
+            generate-package-search-index))
 
 ;;; Commentary:
 ;;;
@@ -426,6 +429,30 @@  reducing the memory footprint."
                                #:opts '(#:to-file? #t)))))
   cache-file)
 
+(define %package-search-index
+  ;; Location of the package search-index
+  "/lib/guix/package-search.index")
+
+(define (generate-package-search-index directory)
+  "Generate under DIRECTORY a xapian index of all the available packages."
+  (define db-path
+    (string-append directory %package-search-index))
+
+  (mkdir-p (dirname db-path))
+  (call-with-writable-database db-path
+    (lambda (db)
+      (fold-packages (lambda (package _)
+                       (let* ((idterm (string-append "Q" (package-name package)))
+                              (doc (make-document #:data (package-name package)
+                                                  #:terms `((,idterm . 0))))
+                              (term-generator (make-term-generator #:stem (make-stem "en")
+                                                                   #:document doc)))
+                         (index-text! term-generator (package-description package))
+                         (replace-document! db idterm doc)))
+                     #f)))
+
+  db-path)
+
 
 (define %sigint-prompt
   ;; The prompt to jump to upon SIGINT.
diff --git a/guix/channels.scm b/guix/channels.scm
index f0261dc2da..c70c70938c 100644
--- a/guix/channels.scm
+++ b/guix/channels.scm
@@ -2,6 +2,7 @@ 
 ;;; Copyright © 2018, 2019, 2020 Ludovic Courtès <ludo@gnu.org>
 ;;; Copyright © 2018 Ricardo Wurmus <rekado@elephly.net>
 ;;; Copyright © 2019 Jan (janneke) Nieuwenhuizen <janneke@gnu.org>
+;;; Copyright © 2020 Arun Isaac <arunisaac@systemreboot.net>
 ;;;
 ;;; This file is part of GNU Guix.
 ;;;
@@ -581,9 +582,40 @@  be used as a profile hook."
                                                  (hook . package-cache))
                                   #:local-build? #t)))
 
+(define (package-search-index manifest)
+  "Build a package search index for the instance in MANIFEST.  This is meant
+to be used as a profile hook."
+  (mlet %store-monad ((profile (profile-derivation manifest
+                                                   #:hooks '())))
+
+    (define build
+      #~(begin
+          (use-modules (gnu packages))
+
+          (if (defined? 'generate-package-search-index)
+              (begin
+                ;; Delegate package search index generation to the inferior.
+                (format (current-error-port)
+                        "Generating package search index for '~a'...~%"
+                        #$profile)
+                (generate-package-search-index #$output))
+              (mkdir #$output))))
+
+    (gexp->derivation-in-inferior "guix-package-search-index" build
+                                  profile
+
+                                  ;; If the Guix in PROFILE is too old and
+                                  ;; lacks 'guix repl', don't build the cache
+                                  ;; instead of failing.
+                                  #:silent-failure? #t
+
+                                  #:properties '((type . profile-hook)
+                                                 (hook . package-search-index))
+                                  #:local-build? #t)))
+
 (define %channel-profile-hooks
   ;; The default channel profile hooks.
-  (cons package-cache-file %default-profile-hooks))
+  (cons* package-cache-file package-search-index %default-profile-hooks))
 
 (define (channel-instances->derivation instances)
   "Return the derivation of the profile containing INSTANCES, a list of