diff mbox series

[bug#49946,v7,06/32] build-system: Add tree-sitter-build-system.

Message ID 87r0xrcpei.fsf@gmx.com
State New
Headers show
Series gnu: Add tree-sitter for emacs. | expand

Commit Message

Pierre Langlois Nov. 25, 2022, 1:57 a.m. UTC
It appears this email is making it through, so here it is as an
attachment:

Comments

Simon Tournier Feb. 7, 2023, 11:25 a.m. UTC | #1
Hi,

Sorry, I have totally overlooked this tree-sitter story. :-) And I have
not read all the patch versions and thread yet.


On Fri, 25 Nov 2022 at 01:57, Pierre Langlois <pierre.langlois@gmx.com> wrote:

> +(define* (install #:key target grammar-directories outputs #:allow-other-keys)
> +  (let ((lib (string-append (assoc-ref outputs "out")
> +                            "/lib/tree-sitter")))
> +    (mkdir-p lib)
> +    (define (compile-language dir)
> +      (with-directory-excursion dir
> +        (let ((lang (assoc-ref (call-with-input-file "src/grammar.json"
> +                                 read-json)
> +                               "name"))
> +              (source-file (lambda (path)
> +                             (if (file-exists? path)
> +                                 path
> +                                 #f))))
> +          (apply invoke
> +                 `(,(if target
> +                        (string-append target "-g++")
> +                        "g++")

Why is g++ required?  Is gcc not enough?  I thought from Tree-sitter
documentation that it was only using C files and not C++; but…

> +                   "-shared"
> +                   "-fPIC"
> +                   "-fno-exceptions"
> +                   "-O2"
> +                   "-g"
> +                   "-o" ,(string-append lib "/" lang ".so")
> +                   ;; An additional `scanner.{c,cc}' file is sometimes
> +                   ;; provided.
> +                   ,@(cond
> +                      ((source-file "src/scanner.c")
> +                       => (lambda (file) (list "-xc" "-std=c99" file)))
> +                      ((source-file "src/scanner.cc")

…apparently not.


Cheers,
simon
Pierre Langlois Feb. 10, 2023, 4:14 p.m. UTC | #2
Hi!

zimoun <zimon.toutoune@gmail.com> writes:

> Hi,
>
> Sorry, I have totally overlooked this tree-sitter story. :-) And I have
> not read all the patch versions and thread yet.
>
>
> On Fri, 25 Nov 2022 at 01:57, Pierre Langlois <pierre.langlois@gmx.com> wrote:
>
>> +(define* (install #:key target grammar-directories outputs #:allow-other-keys)
>> +  (let ((lib (string-append (assoc-ref outputs "out")
>> +                            "/lib/tree-sitter")))
>> +    (mkdir-p lib)
>> +    (define (compile-language dir)
>> +      (with-directory-excursion dir
>> +        (let ((lang (assoc-ref (call-with-input-file "src/grammar.json"
>> +                                 read-json)
>> +                               "name"))
>> +              (source-file (lambda (path)
>> +                             (if (file-exists? path)
>> +                                 path
>> +                                 #f))))
>> +          (apply invoke
>> +                 `(,(if target
>> +                        (string-append target "-g++")
>> +                        "g++")
>
> Why is g++ required?  Is gcc not enough?  I thought from Tree-sitter
> documentation that it was only using C files and not C++; but…
>
>> +                   "-shared"
>> +                   "-fPIC"
>> +                   "-fno-exceptions"
>> +                   "-O2"
>> +                   "-g"
>> +                   "-o" ,(string-append lib "/" lang ".so")
>> +                   ;; An additional `scanner.{c,cc}' file is sometimes
>> +                   ;; provided.
>> +                   ,@(cond
>> +                      ((source-file "src/scanner.c")
>> +                       => (lambda (file) (list "-xc" "-std=c99" file)))
>> +                      ((source-file "src/scanner.cc")
>
> …apparently not.

Yeah this is a little strange. IIUC, the final code generated from the
grammar.js->grammar.json->grammar.c process is plain C, but grammar
 packages may also provide some custom hand-written code in terms of a
scanner.{c,cc} file.

Thanks,
Pierre
Andrew Tropin Feb. 12, 2023, 7:42 a.m. UTC | #3
On 2022-11-25 01:57, Pierre Langlois wrote:

> It appears this email is making it through, so here it is as an
> attachment:
>
> From 4a3c1fff8460a03bfb7c1aada9863205cd6f22fd Mon Sep 17 00:00:00 2001
> From: Pierre Langlois <pierre.langlois@gmx.com>
> Date: Tue, 29 Mar 2022 20:13:34 +0100
> Subject: [PATCH v7 06/32] build-system: Add tree-sitter-build-system.
>
> * guix/build-system/tree-sitter.scm: New module.
> * guix/build/tree-sitter-build-system.scm: Likewise.
> * Makefile.am (MODULES): Add them.
> * doc/guix.texi: Document it.
> ---
>  Makefile.am                             |   2 +
>  doc/guix.texi                           |  21 ++-
>  guix/build-system/tree-sitter.scm       | 190 ++++++++++++++++++++++++
>  guix/build/tree-sitter-build-system.scm | 153 +++++++++++++++++++
>  4 files changed, 365 insertions(+), 1 deletion(-)
>  create mode 100644 guix/build-system/tree-sitter.scm
>  create mode 100644 guix/build/tree-sitter-build-system.scm
>
> diff --git a/Makefile.am b/Makefile.am
> index c3af23b68e..a16c4fcd7e 100644
> --- a/Makefile.am
> +++ b/Makefile.am
> @@ -178,6 +178,7 @@ MODULES =					\
>    guix/build-system/ruby.scm			\
>    guix/build-system/scons.scm			\
>    guix/build-system/texlive.scm			\
> +  guix/build-system/tree-sitter.scm		\
>    guix/build-system/trivial.scm			\
>    guix/ftp-client.scm				\
>    guix/http-client.scm				\
> @@ -234,6 +235,7 @@ MODULES =					\
>    guix/build/ruby-build-system.scm		\
>    guix/build/scons-build-system.scm		\
>    guix/build/texlive-build-system.scm		\
> +  guix/build/tree-sitter-build-system.scm	\
>    guix/build/waf-build-system.scm		\
>    guix/build/haskell-build-system.scm		\
>    guix/build/julia-build-system.scm		\
> diff --git a/doc/guix.texi b/doc/guix.texi
> index e547d469f4..4e997f7176 100644
> --- a/doc/guix.texi
> +++ b/doc/guix.texi
> @@ -79,7 +79,7 @@ Copyright @copyright{} 2020 Jack Hill@*
>  Copyright @copyright{} 2020 Naga Malleswari@*
>  Copyright @copyright{} 2020, 2021 Brice Waegeneire@*
>  Copyright @copyright{} 2020 R Veera Kumar@*
> -Copyright @copyright{} 2020, 2021 Pierre Langlois@*
> +Copyright @copyright{} 2020, 2021, 2022 Pierre Langlois@*
>  Copyright @copyright{} 2020 pinoaffe@*
>  Copyright @copyright{} 2020 André Batista@*
>  Copyright @copyright{} 2020, 2021 Alexandru-Sergiu Marton@*
> @@ -9732,6 +9732,25 @@ be specified with the @code{#:node} parameter which defaults to
>  @code{node}.
>  @end defvr
>  
> +@defvr {Scheme Variable} tree-sitter-build-system
> +
> +This variable is exported by @code{(guix build-system tree-sitter)}.  It
> +implements procedures to compile grammars for the
> +@url{https://tree-sitter.github.io/tree-sitter/, Tree-sitter} parsing
> +library.  It essentially runs @code{tree-sitter generate} to translate
> +@code{grammar.js} grammars to JSON and then to C.  Which it then
> +compiles to native code.
> +
> +Tree-sitter packages may support multiple grammars, so this build system
> +supports a @code{#:grammar-directories} keyword to specify a list of
> +locations where a @code{grammar.js} file may be found.
> +
> +Grammars sometimes depend on each other, such as C++ depending on C and
> +TypeScript depending on JavaScript.  You may use inputs to declare such
> +dependencies.
> +
> +@end defvr
> +
>  Lastly, for packages that do not need anything as sophisticated, a
>  ``trivial'' build system is provided.  It is trivial in the sense that
>  it provides basically no support: it does not pull any implicit inputs,
> diff --git a/guix/build-system/tree-sitter.scm b/guix/build-system/tree-sitter.scm
> new file mode 100644
> index 0000000000..aeb96e3ef5
> --- /dev/null
> +++ b/guix/build-system/tree-sitter.scm
> @@ -0,0 +1,190 @@
> +;;; GNU Guix --- Functional package management for GNU
> +;;; Copyright © 2022 Pierre Langlois <pierre.langlois@gmx.com>
> +;;;
> +;;; This file is part of GNU Guix.
> +;;;
> +;;; GNU Guix is free software; you can redistribute it and/or modify it
> +;;; under the terms of the GNU General Public License as published by
> +;;; the Free Software Foundation; either version 3 of the License, or (at
> +;;; your option) any later version.
> +;;;
> +;;; GNU Guix is distributed in the hope that it will be useful, but
> +;;; WITHOUT ANY WARRANTY; without even the implied warranty of
> +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +;;; GNU General Public License for more details.
> +;;;
> +;;; You should have received a copy of the GNU General Public License
> +;;; along with GNU Guix.  If not, see <http://www.gnu.org/licenses/>.
> +
> +(define-module (guix build-system tree-sitter)
> +  #:use-module (guix store)
> +  #:use-module (guix utils)
> +  #:use-module (guix packages)
> +  #:use-module (guix gexp)
> +  #:use-module (guix monads)
> +  #:use-module (guix search-paths)
> +  #:use-module (guix build-system)
> +  #:use-module (guix build-system gnu)
> +  #:use-module (guix build-system node)
> +  #:use-module (ice-9 match)
> +  #:export (%tree-sitter-build-system-modules
> +            tree-sitter-build
> +            tree-sitter-build-system))
> +
> +(define %tree-sitter-build-system-modules
> +  ;; Build-side modules imported by default.
> +  `((guix build tree-sitter-build-system)
> +    ,@%node-build-system-modules))
> +
> +(define* (lower name
> +                #:key source inputs native-inputs outputs system target
> +                #:allow-other-keys
> +                #:rest arguments)
> +  "Return a bag for NAME from the given arguments."
> +  (define private-keywords
> +    `(#:inputs #:native-inputs #:outputs ,@(if target
> +                                               '()
> +                                               '(#:target))))
> +  (define node
> +    (module-ref (resolve-interface '(gnu packages node))
> +                'node-lts))
> +  (define tree-sitter
> +    (module-ref (resolve-interface '(gnu packages tree-sitter))
> +                'tree-sitter))
> +  (define tree-sitter-cli
> +    (module-ref (resolve-interface '(gnu packages tree-sitter))
> +                'tree-sitter-cli))
> +  ;; Grammars depend on each other via JS modules, which we package into a
> +  ;; dedicated js output.
> +  (define grammar-inputs
> +    (map (match-lambda
> +           ((name package)
> +            `(,name ,package "js")))
> +         inputs))
> +  (bag
> +    (name name)
> +    (system system) (target target)
> +    (build-inputs `(,@(if source
> +                          `(("source" ,source))
> +                          '())
> +                    ("node" ,node)
> +                    ("tree-sitter-cli" ,tree-sitter-cli)
> +                    ,@native-inputs
> +                    ,@(if target '() grammar-inputs)
> +                    ;; Keep the standard inputs of 'gnu-build-system'.
> +                    ,@(if target
> +                          (standard-cross-packages target 'host)
> +                          '())
> +                    ,@(standard-packages)))
> +    (host-inputs `(("tree-sitter" ,tree-sitter)
> +                   ,@(if target grammar-inputs '())))
> +    ;; Keep the standard inputs of 'gnu-buid-system'.
> +    (target-inputs (if target
> +                       (standard-cross-packages target 'target)
> +                       '()))
> +    ;; XXX: this is a hack to get around issue #41569.
> +    (outputs (match outputs
> +               (("out") (cons "js" outputs))
> +               (_ outputs)))
> +    (build (if target tree-sitter-cross-build tree-sitter-build))
> +    (arguments (strip-keyword-arguments private-keywords arguments))))
> +
> +(define* (tree-sitter-build name inputs
> +                            #:key
> +                            source
> +                            (phases '%standard-phases)
> +                            (grammar-directories '("."))
> +                            (tests? #t)
> +                            (outputs '("out" "js"))
> +                            (search-paths '())
> +                            (system (%current-system))
> +                            (guile #f)
> +                            (imported-modules %tree-sitter-build-system-modules)
> +                            (modules '((guix build utils)
> +                                       (guix build tree-sitter-build-system))))
> +  (define builder
> +    (with-imported-modules imported-modules
> +      #~(begin
> +          (use-modules #$@(sexp->gexp modules))
> +          (tree-sitter-build #:name #$name
> +                             #:source #+source
> +                             #:system #$system
> +                             #:phases #$phases
> +                             #:tests? #$tests?
> +                             #:grammar-directories '#$grammar-directories
> +                             #:outputs #$(outputs->gexp outputs)
> +                             #:search-paths '#$(sexp->gexp
> +                                                (map search-path-specification->sexp
> +                                                     search-paths))
> +                             #:inputs #$(input-tuples->gexp inputs)))))
> +
> +  (mlet %store-monad ((guile (package->derivation (or guile (default-guile))
> +                                                  system #:graft? #f)))
> +    (gexp->derivation name builder
> +                      #:system system
> +                      #:guile-for-build guile)))
> +
> +(define* (tree-sitter-cross-build name
> +                                  #:key
> +                                  target
> +                                  build-inputs target-inputs host-inputs
> +                                  guile source
> +                                  (phases '%standard-phases)
> +                                  (grammar-directories '("."))
> +                                  (tests? #t)
> +                                  (outputs '("out" "js"))
> +                                  (search-paths '())
> +                                  (native-search-paths '())
> +                                  (system (%current-system))
> +                                  (build (nix-system->gnu-triplet system))
> +                                  (imported-modules %tree-sitter-build-system-modules)
> +                                  (modules '((guix build utils)
> +                                             (guix build tree-sitter-build-system))))
> +  (define builder
> +    (with-imported-modules imported-modules
> +      #~(begin
> +          (use-modules #$@(sexp->gexp modules))
> +
> +          (define %build-host-inputs
> +            #+(input-tuples->gexp build-inputs))
> +
> +          (define %build-target-inputs
> +            (append #$(input-tuples->gexp host-inputs)
> +                    #+(input-tuples->gexp target-inputs)))
> +
> +          (define %build-inputs
> +            (append %build-host-inputs %build-target-inputs))
> +
> +          (tree-sitter-build #:name #$name
> +                             #:source #+source
> +                             #:system #$system
> +                             #:build #$build
> +                             #:target #$target
> +                             #:phases #$phases
> +                             #:tests? #$tests?
> +                             #:grammar-directories '#$grammar-directories
> +                             #:outputs #$(outputs->gexp outputs)
> +                             #:inputs %build-target-inputs
> +                             #:native-inputs %build-host-inputs
> +                             #:search-paths '#$(sexp->gexp
> +                                                (map search-path-specification->sexp
> +                                                     search-paths))
> +                             #:native-search-paths '#$(sexp->gexp
> +                                                       (map
> +                                                        search-path-specification->sexp
> +                                                        native-search-paths))))))
> +
> +  (mlet %store-monad ((guile (package->derivation (or guile (default-guile))
> +                                                  system #:graft? #f)))
> +    (gexp->derivation name builder
> +                      #:system system
> +                      #:target target
> +                      #:guile-for-build guile)))
> +
> +(define tree-sitter-build-system
> +  (build-system
> +    (name 'tree-sitter)
> +    (description "The Tree-sitter grammar build system")
> +    (lower lower)))
> +
> +;;; tree-sitter.scm ends here
> diff --git a/guix/build/tree-sitter-build-system.scm b/guix/build/tree-sitter-build-system.scm
> new file mode 100644
> index 0000000000..574b0f2a1c
> --- /dev/null
> +++ b/guix/build/tree-sitter-build-system.scm
> @@ -0,0 +1,153 @@
> +;;; GNU Guix --- Functional package management for GNU
> +;;; Copyright © 2022 Pierre Langlois <pierre.langlois@gmx.com>
> +;;;
> +;;; This file is part of GNU Guix.
> +;;;
> +;;; GNU Guix is free software; you can redistribute it and/or modify it
> +;;; under the terms of the GNU General Public License as published by
> +;;; the Free Software Foundation; either version 3 of the License, or (at
> +;;; your option) any later version.
> +;;;
> +;;; GNU Guix is distributed in the hope that it will be useful, but
> +;;; WITHOUT ANY WARRANTY; without even the implied warranty of
> +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +;;; GNU General Public License for more details.
> +;;;
> +;;; You should have received a copy of the GNU General Public License
> +;;; along with GNU Guix.  If not, see <http://www.gnu.org/licenses/>.
> +
> +(define-module (guix build tree-sitter-build-system)
> +  #:use-module ((guix build node-build-system) #:prefix node:)
> +  #:use-module (guix build json)
> +  #:use-module (guix build utils)
> +  #:use-module (ice-9 match)
> +  #:use-module (ice-9 regex)
> +  #:use-module (srfi srfi-1)
> +  #:export (%standard-phases
> +            tree-sitter-build))
> +
> +;; Commentary:
> +;;
> +;; Build procedures for tree-sitter grammar packages.  This is the
> +;; builder-side code, which builds on top fo the node build-system.
> +;;
> +;; Tree-sitter grammars are written in JavaScript and compiled to a native
> +;; shared object.  The `tree-sitter generate' command invokes `node' in order
> +;; to evaluate the grammar.js into a grammar.json file, which is then
> +;; translated into C code.  We then compile the C code ourselves.  Packages
> +;; also sometimes add extra manually written C/C++ code.
> +;;
> +;; In order to support grammars depending on each other, such as C and C++,
> +;; JavaScript and TypeScript, this build-system installs the source of the
> +;; node module in a dedicated "js" output.
> +;;
> +;; Code:
> +
> +(define* (patch-dependencies #:key inputs #:allow-other-keys)
> +  "Rewrite dependencies in 'package.json'.  We remove all runtime dependencies
> +and replace development dependencies with tree-sitter grammar node modules."
> +
> +  (define (rewrite package.json)
> +    (map (match-lambda
> +           (("dependencies" @ . _)
> +            '("dependencies" @))
> +           (("devDependencies" @ . _)
> +            `("devDependencies" @
> +              ,@(filter-map (match-lambda
> +                              ((key . directory)
> +                               (let ((node-module
> +                                      (string-append directory
> +                                                     "/lib/node_modules/"
> +                                                     key)))
> +                                 (and (directory-exists? node-module)
> +                                      `(,key . ,node-module)))))
> +                            (alist-delete "node" inputs))))
> +           (other other))
> +         package.json))
> +
> +  (node:with-atomic-json-file-replacement "package.json"
> +    (match-lambda
> +      (('@ . package.json)
> +       (cons '@ (rewrite package.json))))))
> +
> +;; FIXME: The node build-system's configure phase does not support
> +;; cross-compiling so we re-define it.
> +(define* (configure #:key native-inputs inputs #:allow-other-keys)
> +  (invoke (search-input-file (or native-inputs inputs) "/bin/npm")
> +          "--offline" "--ignore-scripts" "install"))
> +
> +(define* (build #:key grammar-directories #:allow-other-keys)
> +  (for-each (lambda (dir)
> +              (with-directory-excursion dir
> +                ;; Avoid generating binding code for other languages, we do
> +                ;; not support this use-case yet and it relies on running
> +                ;; `node-gyp' to build native addons.
> +                (invoke "tree-sitter" "generate" "--no-bindings")))
> +            grammar-directories))
> +
> +(define* (check #:key grammar-directories tests? #:allow-other-keys)
> +  (when tests?
> +    (for-each (lambda (dir)
> +                (with-directory-excursion dir
> +                  (invoke "tree-sitter" "test")))
> +              grammar-directories)))
> +
> +(define* (install #:key target grammar-directories outputs #:allow-other-keys)
> +  (let ((lib (string-append (assoc-ref outputs "out")
> +                            "/lib/tree-sitter")))
> +    (mkdir-p lib)
> +    (define (compile-language dir)
> +      (with-directory-excursion dir
> +        (let ((lang (assoc-ref (call-with-input-file "src/grammar.json"
> +                                 read-json)
> +                               "name"))
> +              (source-file (lambda (path)
> +                             (if (file-exists? path)
> +                                 path
> +                                 #f))))
> +          (apply invoke
> +                 `(,(if target
> +                        (string-append target "-g++")
> +                        "g++")
> +                   "-shared"
> +                   "-fPIC"
> +                   "-fno-exceptions"
> +                   "-O2"
> +                   "-g"
> +                   "-o" ,(string-append lib "/" lang ".so")
> +                   ;; An additional `scanner.{c,cc}' file is sometimes
> +                   ;; provided.
> +                   ,@(cond
> +                      ((source-file "src/scanner.c")
> +                       => (lambda (file) (list "-xc" "-std=c99" file)))
> +                      ((source-file "src/scanner.cc")
> +                       => (lambda (file) (list file)))
> +                      (else '()))
> +                   "-xc" "src/parser.c")))))
> +    (for-each compile-language grammar-directories)))
> +
> +(define* (install-js #:key native-inputs inputs outputs #:allow-other-keys)
> +  (invoke (search-input-file (or native-inputs inputs) "/bin/npm")
> +          "--prefix" (assoc-ref outputs "js")
> +          "--global"
> +          "--offline"
> +          "--loglevel" "info"
> +          "--production"
> +          ;; Skip scripts to prevent building bindings via GYP.
> +          "--ignore-scripts"
> +          "install" "../package.tgz"))
> +
> +(define %standard-phases
> +  (modify-phases node:%standard-phases
> +    (replace 'patch-dependencies patch-dependencies)
> +    (replace 'configure configure)
> +    (replace 'build build)
> +    (replace 'check check)
> +    (replace 'install install)
> +    (add-after 'install 'install-js install-js)))
> +
> +(define* (tree-sitter-build #:key inputs (phases %standard-phases)
> +                            #:allow-other-keys #:rest args)
> +  (apply node:node-build #:inputs inputs #:phases phases args))
> +
> +;;; tree-sitter-build-system.scm ends here

Applied, slightly reformatted, pushed as
dbd4d2d0707b486f1e2c8659e94e1d3b15e4351e

Also, migrated tree-sitter-grammar fn to this build system.

I still try to get the taste of packaging grammars with
tree-sitter-grammar helper and just as plain packages and try to
understand how good/bad template function idea is in this case and if we
want to keep it or not.
diff mbox series

Patch

From 4a3c1fff8460a03bfb7c1aada9863205cd6f22fd Mon Sep 17 00:00:00 2001
From: Pierre Langlois <pierre.langlois@gmx.com>
Date: Tue, 29 Mar 2022 20:13:34 +0100
Subject: [PATCH v7 06/32] build-system: Add tree-sitter-build-system.

* guix/build-system/tree-sitter.scm: New module.
* guix/build/tree-sitter-build-system.scm: Likewise.
* Makefile.am (MODULES): Add them.
* doc/guix.texi: Document it.
---
 Makefile.am                             |   2 +
 doc/guix.texi                           |  21 ++-
 guix/build-system/tree-sitter.scm       | 190 ++++++++++++++++++++++++
 guix/build/tree-sitter-build-system.scm | 153 +++++++++++++++++++
 4 files changed, 365 insertions(+), 1 deletion(-)
 create mode 100644 guix/build-system/tree-sitter.scm
 create mode 100644 guix/build/tree-sitter-build-system.scm

diff --git a/Makefile.am b/Makefile.am
index c3af23b68e..a16c4fcd7e 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -178,6 +178,7 @@  MODULES =					\
   guix/build-system/ruby.scm			\
   guix/build-system/scons.scm			\
   guix/build-system/texlive.scm			\
+  guix/build-system/tree-sitter.scm		\
   guix/build-system/trivial.scm			\
   guix/ftp-client.scm				\
   guix/http-client.scm				\
@@ -234,6 +235,7 @@  MODULES =					\
   guix/build/ruby-build-system.scm		\
   guix/build/scons-build-system.scm		\
   guix/build/texlive-build-system.scm		\
+  guix/build/tree-sitter-build-system.scm	\
   guix/build/waf-build-system.scm		\
   guix/build/haskell-build-system.scm		\
   guix/build/julia-build-system.scm		\
diff --git a/doc/guix.texi b/doc/guix.texi
index e547d469f4..4e997f7176 100644
--- a/doc/guix.texi
+++ b/doc/guix.texi
@@ -79,7 +79,7 @@  Copyright @copyright{} 2020 Jack Hill@*
 Copyright @copyright{} 2020 Naga Malleswari@*
 Copyright @copyright{} 2020, 2021 Brice Waegeneire@*
 Copyright @copyright{} 2020 R Veera Kumar@*
-Copyright @copyright{} 2020, 2021 Pierre Langlois@*
+Copyright @copyright{} 2020, 2021, 2022 Pierre Langlois@*
 Copyright @copyright{} 2020 pinoaffe@*
 Copyright @copyright{} 2020 André Batista@*
 Copyright @copyright{} 2020, 2021 Alexandru-Sergiu Marton@*
@@ -9732,6 +9732,25 @@  be specified with the @code{#:node} parameter which defaults to
 @code{node}.
 @end defvr
 
+@defvr {Scheme Variable} tree-sitter-build-system
+
+This variable is exported by @code{(guix build-system tree-sitter)}.  It
+implements procedures to compile grammars for the
+@url{https://tree-sitter.github.io/tree-sitter/, Tree-sitter} parsing
+library.  It essentially runs @code{tree-sitter generate} to translate
+@code{grammar.js} grammars to JSON and then to C.  Which it then
+compiles to native code.
+
+Tree-sitter packages may support multiple grammars, so this build system
+supports a @code{#:grammar-directories} keyword to specify a list of
+locations where a @code{grammar.js} file may be found.
+
+Grammars sometimes depend on each other, such as C++ depending on C and
+TypeScript depending on JavaScript.  You may use inputs to declare such
+dependencies.
+
+@end defvr
+
 Lastly, for packages that do not need anything as sophisticated, a
 ``trivial'' build system is provided.  It is trivial in the sense that
 it provides basically no support: it does not pull any implicit inputs,
diff --git a/guix/build-system/tree-sitter.scm b/guix/build-system/tree-sitter.scm
new file mode 100644
index 0000000000..aeb96e3ef5
--- /dev/null
+++ b/guix/build-system/tree-sitter.scm
@@ -0,0 +1,190 @@ 
+;;; GNU Guix --- Functional package management for GNU
+;;; Copyright © 2022 Pierre Langlois <pierre.langlois@gmx.com>
+;;;
+;;; This file is part of GNU Guix.
+;;;
+;;; GNU Guix is free software; you can redistribute it and/or modify it
+;;; under the terms of the GNU General Public License as published by
+;;; the Free Software Foundation; either version 3 of the License, or (at
+;;; your option) any later version.
+;;;
+;;; GNU Guix is distributed in the hope that it will be useful, but
+;;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;;; GNU General Public License for more details.
+;;;
+;;; You should have received a copy of the GNU General Public License
+;;; along with GNU Guix.  If not, see <http://www.gnu.org/licenses/>.
+
+(define-module (guix build-system tree-sitter)
+  #:use-module (guix store)
+  #:use-module (guix utils)
+  #:use-module (guix packages)
+  #:use-module (guix gexp)
+  #:use-module (guix monads)
+  #:use-module (guix search-paths)
+  #:use-module (guix build-system)
+  #:use-module (guix build-system gnu)
+  #:use-module (guix build-system node)
+  #:use-module (ice-9 match)
+  #:export (%tree-sitter-build-system-modules
+            tree-sitter-build
+            tree-sitter-build-system))
+
+(define %tree-sitter-build-system-modules
+  ;; Build-side modules imported by default.
+  `((guix build tree-sitter-build-system)
+    ,@%node-build-system-modules))
+
+(define* (lower name
+                #:key source inputs native-inputs outputs system target
+                #:allow-other-keys
+                #:rest arguments)
+  "Return a bag for NAME from the given arguments."
+  (define private-keywords
+    `(#:inputs #:native-inputs #:outputs ,@(if target
+                                               '()
+                                               '(#:target))))
+  (define node
+    (module-ref (resolve-interface '(gnu packages node))
+                'node-lts))
+  (define tree-sitter
+    (module-ref (resolve-interface '(gnu packages tree-sitter))
+                'tree-sitter))
+  (define tree-sitter-cli
+    (module-ref (resolve-interface '(gnu packages tree-sitter))
+                'tree-sitter-cli))
+  ;; Grammars depend on each other via JS modules, which we package into a
+  ;; dedicated js output.
+  (define grammar-inputs
+    (map (match-lambda
+           ((name package)
+            `(,name ,package "js")))
+         inputs))
+  (bag
+    (name name)
+    (system system) (target target)
+    (build-inputs `(,@(if source
+                          `(("source" ,source))
+                          '())
+                    ("node" ,node)
+                    ("tree-sitter-cli" ,tree-sitter-cli)
+                    ,@native-inputs
+                    ,@(if target '() grammar-inputs)
+                    ;; Keep the standard inputs of 'gnu-build-system'.
+                    ,@(if target
+                          (standard-cross-packages target 'host)
+                          '())
+                    ,@(standard-packages)))
+    (host-inputs `(("tree-sitter" ,tree-sitter)
+                   ,@(if target grammar-inputs '())))
+    ;; Keep the standard inputs of 'gnu-buid-system'.
+    (target-inputs (if target
+                       (standard-cross-packages target 'target)
+                       '()))
+    ;; XXX: this is a hack to get around issue #41569.
+    (outputs (match outputs
+               (("out") (cons "js" outputs))
+               (_ outputs)))
+    (build (if target tree-sitter-cross-build tree-sitter-build))
+    (arguments (strip-keyword-arguments private-keywords arguments))))
+
+(define* (tree-sitter-build name inputs
+                            #:key
+                            source
+                            (phases '%standard-phases)
+                            (grammar-directories '("."))
+                            (tests? #t)
+                            (outputs '("out" "js"))
+                            (search-paths '())
+                            (system (%current-system))
+                            (guile #f)
+                            (imported-modules %tree-sitter-build-system-modules)
+                            (modules '((guix build utils)
+                                       (guix build tree-sitter-build-system))))
+  (define builder
+    (with-imported-modules imported-modules
+      #~(begin
+          (use-modules #$@(sexp->gexp modules))
+          (tree-sitter-build #:name #$name
+                             #:source #+source
+                             #:system #$system
+                             #:phases #$phases
+                             #:tests? #$tests?
+                             #:grammar-directories '#$grammar-directories
+                             #:outputs #$(outputs->gexp outputs)
+                             #:search-paths '#$(sexp->gexp
+                                                (map search-path-specification->sexp
+                                                     search-paths))
+                             #:inputs #$(input-tuples->gexp inputs)))))
+
+  (mlet %store-monad ((guile (package->derivation (or guile (default-guile))
+                                                  system #:graft? #f)))
+    (gexp->derivation name builder
+                      #:system system
+                      #:guile-for-build guile)))
+
+(define* (tree-sitter-cross-build name
+                                  #:key
+                                  target
+                                  build-inputs target-inputs host-inputs
+                                  guile source
+                                  (phases '%standard-phases)
+                                  (grammar-directories '("."))
+                                  (tests? #t)
+                                  (outputs '("out" "js"))
+                                  (search-paths '())
+                                  (native-search-paths '())
+                                  (system (%current-system))
+                                  (build (nix-system->gnu-triplet system))
+                                  (imported-modules %tree-sitter-build-system-modules)
+                                  (modules '((guix build utils)
+                                             (guix build tree-sitter-build-system))))
+  (define builder
+    (with-imported-modules imported-modules
+      #~(begin
+          (use-modules #$@(sexp->gexp modules))
+
+          (define %build-host-inputs
+            #+(input-tuples->gexp build-inputs))
+
+          (define %build-target-inputs
+            (append #$(input-tuples->gexp host-inputs)
+                    #+(input-tuples->gexp target-inputs)))
+
+          (define %build-inputs
+            (append %build-host-inputs %build-target-inputs))
+
+          (tree-sitter-build #:name #$name
+                             #:source #+source
+                             #:system #$system
+                             #:build #$build
+                             #:target #$target
+                             #:phases #$phases
+                             #:tests? #$tests?
+                             #:grammar-directories '#$grammar-directories
+                             #:outputs #$(outputs->gexp outputs)
+                             #:inputs %build-target-inputs
+                             #:native-inputs %build-host-inputs
+                             #:search-paths '#$(sexp->gexp
+                                                (map search-path-specification->sexp
+                                                     search-paths))
+                             #:native-search-paths '#$(sexp->gexp
+                                                       (map
+                                                        search-path-specification->sexp
+                                                        native-search-paths))))))
+
+  (mlet %store-monad ((guile (package->derivation (or guile (default-guile))
+                                                  system #:graft? #f)))
+    (gexp->derivation name builder
+                      #:system system
+                      #:target target
+                      #:guile-for-build guile)))
+
+(define tree-sitter-build-system
+  (build-system
+    (name 'tree-sitter)
+    (description "The Tree-sitter grammar build system")
+    (lower lower)))
+
+;;; tree-sitter.scm ends here
diff --git a/guix/build/tree-sitter-build-system.scm b/guix/build/tree-sitter-build-system.scm
new file mode 100644
index 0000000000..574b0f2a1c
--- /dev/null
+++ b/guix/build/tree-sitter-build-system.scm
@@ -0,0 +1,153 @@ 
+;;; GNU Guix --- Functional package management for GNU
+;;; Copyright © 2022 Pierre Langlois <pierre.langlois@gmx.com>
+;;;
+;;; This file is part of GNU Guix.
+;;;
+;;; GNU Guix is free software; you can redistribute it and/or modify it
+;;; under the terms of the GNU General Public License as published by
+;;; the Free Software Foundation; either version 3 of the License, or (at
+;;; your option) any later version.
+;;;
+;;; GNU Guix is distributed in the hope that it will be useful, but
+;;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;;; GNU General Public License for more details.
+;;;
+;;; You should have received a copy of the GNU General Public License
+;;; along with GNU Guix.  If not, see <http://www.gnu.org/licenses/>.
+
+(define-module (guix build tree-sitter-build-system)
+  #:use-module ((guix build node-build-system) #:prefix node:)
+  #:use-module (guix build json)
+  #:use-module (guix build utils)
+  #:use-module (ice-9 match)
+  #:use-module (ice-9 regex)
+  #:use-module (srfi srfi-1)
+  #:export (%standard-phases
+            tree-sitter-build))
+
+;; Commentary:
+;;
+;; Build procedures for tree-sitter grammar packages.  This is the
+;; builder-side code, which builds on top fo the node build-system.
+;;
+;; Tree-sitter grammars are written in JavaScript and compiled to a native
+;; shared object.  The `tree-sitter generate' command invokes `node' in order
+;; to evaluate the grammar.js into a grammar.json file, which is then
+;; translated into C code.  We then compile the C code ourselves.  Packages
+;; also sometimes add extra manually written C/C++ code.
+;;
+;; In order to support grammars depending on each other, such as C and C++,
+;; JavaScript and TypeScript, this build-system installs the source of the
+;; node module in a dedicated "js" output.
+;;
+;; Code:
+
+(define* (patch-dependencies #:key inputs #:allow-other-keys)
+  "Rewrite dependencies in 'package.json'.  We remove all runtime dependencies
+and replace development dependencies with tree-sitter grammar node modules."
+
+  (define (rewrite package.json)
+    (map (match-lambda
+           (("dependencies" @ . _)
+            '("dependencies" @))
+           (("devDependencies" @ . _)
+            `("devDependencies" @
+              ,@(filter-map (match-lambda
+                              ((key . directory)
+                               (let ((node-module
+                                      (string-append directory
+                                                     "/lib/node_modules/"
+                                                     key)))
+                                 (and (directory-exists? node-module)
+                                      `(,key . ,node-module)))))
+                            (alist-delete "node" inputs))))
+           (other other))
+         package.json))
+
+  (node:with-atomic-json-file-replacement "package.json"
+    (match-lambda
+      (('@ . package.json)
+       (cons '@ (rewrite package.json))))))
+
+;; FIXME: The node build-system's configure phase does not support
+;; cross-compiling so we re-define it.
+(define* (configure #:key native-inputs inputs #:allow-other-keys)
+  (invoke (search-input-file (or native-inputs inputs) "/bin/npm")
+          "--offline" "--ignore-scripts" "install"))
+
+(define* (build #:key grammar-directories #:allow-other-keys)
+  (for-each (lambda (dir)
+              (with-directory-excursion dir
+                ;; Avoid generating binding code for other languages, we do
+                ;; not support this use-case yet and it relies on running
+                ;; `node-gyp' to build native addons.
+                (invoke "tree-sitter" "generate" "--no-bindings")))
+            grammar-directories))
+
+(define* (check #:key grammar-directories tests? #:allow-other-keys)
+  (when tests?
+    (for-each (lambda (dir)
+                (with-directory-excursion dir
+                  (invoke "tree-sitter" "test")))
+              grammar-directories)))
+
+(define* (install #:key target grammar-directories outputs #:allow-other-keys)
+  (let ((lib (string-append (assoc-ref outputs "out")
+                            "/lib/tree-sitter")))
+    (mkdir-p lib)
+    (define (compile-language dir)
+      (with-directory-excursion dir
+        (let ((lang (assoc-ref (call-with-input-file "src/grammar.json"
+                                 read-json)
+                               "name"))
+              (source-file (lambda (path)
+                             (if (file-exists? path)
+                                 path
+                                 #f))))
+          (apply invoke
+                 `(,(if target
+                        (string-append target "-g++")
+                        "g++")
+                   "-shared"
+                   "-fPIC"
+                   "-fno-exceptions"
+                   "-O2"
+                   "-g"
+                   "-o" ,(string-append lib "/" lang ".so")
+                   ;; An additional `scanner.{c,cc}' file is sometimes
+                   ;; provided.
+                   ,@(cond
+                      ((source-file "src/scanner.c")
+                       => (lambda (file) (list "-xc" "-std=c99" file)))
+                      ((source-file "src/scanner.cc")
+                       => (lambda (file) (list file)))
+                      (else '()))
+                   "-xc" "src/parser.c")))))
+    (for-each compile-language grammar-directories)))
+
+(define* (install-js #:key native-inputs inputs outputs #:allow-other-keys)
+  (invoke (search-input-file (or native-inputs inputs) "/bin/npm")
+          "--prefix" (assoc-ref outputs "js")
+          "--global"
+          "--offline"
+          "--loglevel" "info"
+          "--production"
+          ;; Skip scripts to prevent building bindings via GYP.
+          "--ignore-scripts"
+          "install" "../package.tgz"))
+
+(define %standard-phases
+  (modify-phases node:%standard-phases
+    (replace 'patch-dependencies patch-dependencies)
+    (replace 'configure configure)
+    (replace 'build build)
+    (replace 'check check)
+    (replace 'install install)
+    (add-after 'install 'install-js install-js)))
+
+(define* (tree-sitter-build #:key inputs (phases %standard-phases)
+                            #:allow-other-keys #:rest args)
+  (apply node:node-build #:inputs inputs #:phases phases args))
+
+;;; tree-sitter-build-system.scm ends here
-- 
2.38.1