diff mbox series

[bug#68296] gnu: Add KLEE.

Message ID 890dc6595ccef88cb60ff0a380be9d323df479aa.1704573641.git.soeren@soeren-tempel.net
State New
Headers show
Series [bug#68296] gnu: Add KLEE. | expand

Commit Message

Sören Tempel Jan. 6, 2024, 8:40 p.m. UTC
From: Sören Tempel <soeren@soeren-tempel.net>

* gnu/packages/check.scm (klee-uclibc): New variable.
* gnu/packages/check.scm (klee): New variable.

Signed-off-by: Sören Tempel <soeren@soeren-tempel.net>
---
This is a new package for KLEE, a popular piece of academic software
in the software engineering domain. KLEE implements a technique called
symbolic execution <https://en.wikipedia.org/wiki/Symbolic_execution>
which allows for automated testing of software through SMT solving.
KLEE forms the basis for a lot of research in the symbolic execution
domain <https://klee.github.io/publications/>. Packaging KLEE and
other related tools, eases using Guix for conducting reproducible
research in this domain. I have Guix packages for other symbolic
execution tools which I also would like to upstream in the future,
I figured I would start with KLEE as it has little to no dependencies.

I tested this package by conforming that the basic upstream tutorials
work as intended, e.g. <https://klee.github.io/tutorials/testing-function/>.

This is my first Guix package, hence CC'ing the mentors.

 gnu/packages/check.scm | 107 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 107 insertions(+)


base-commit: 29c94dd522833b2603a651c14a5b06120bcf1829

Comments

Julien Lepiller Feb. 13, 2024, 8:49 a.m. UTC | #1
Le 6 janvier 2024 21:40:41 GMT+01:00, soeren@soeren-tempel.net a écrit :
>From: Sören Tempel <soeren@soeren-tempel.net>
>
>* gnu/packages/check.scm (klee-uclibc): New variable.
>* gnu/packages/check.scm (klee): New variable.
>
>Signed-off-by: Sören Tempel <soeren@soeren-tempel.net>
>---
>This is a new package for KLEE, a popular piece of academic software
>in the software engineering domain. KLEE implements a technique called
>symbolic execution <https://en.wikipedia.org/wiki/Symbolic_execution>
>which allows for automated testing of software through SMT solving.
>KLEE forms the basis for a lot of research in the symbolic execution
>domain <https://klee.github.io/publications/>. Packaging KLEE and
>other related tools, eases using Guix for conducting reproducible
>research in this domain. I have Guix packages for other symbolic
>execution tools which I also would like to upstream in the future,
>I figured I would start with KLEE as it has little to no dependencies.
>
>I tested this package by conforming that the basic upstream tutorials
>work as intended, e.g. <https://klee.github.io/tutorials/testing-function/>.
>
>This is my first Guix package, hence CC'ing the mentors.
>
> gnu/packages/check.scm | 107 +++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 107 insertions(+)
>
>diff --git a/gnu/packages/check.scm b/gnu/packages/check.scm
>index 5181d3a164..7e97e59955 100644
>--- a/gnu/packages/check.scm
>+++ b/gnu/packages/check.scm
>@@ -71,6 +71,7 @@ (define-module (gnu packages check)
>   #:use-module (gnu packages bash)
>   #:use-module (gnu packages cmake)
>   #:use-module (gnu packages compression)
>+  #:use-module (gnu packages cpp)
>   #:use-module (gnu packages linux)
>   #:use-module (gnu packages llvm)
>   #:use-module (gnu packages glib)
>@@ -79,6 +80,8 @@ (define-module (gnu packages check)
>   #:use-module (gnu packages gtk)
>   #:use-module (gnu packages guile)
>   #:use-module (gnu packages guile-xyz)
>+  #:use-module (gnu packages maths)
>+  #:use-module (gnu packages ncurses)
>   #:use-module (gnu packages perl)
>   #:use-module (gnu packages pkg-config)
>   #:use-module (gnu packages python)
>@@ -87,6 +90,7 @@ (define-module (gnu packages check)
>   #:use-module (gnu packages python-web)
>   #:use-module (gnu packages python-xyz)
>   #:use-module (gnu packages python-science)
>+  #:use-module (gnu packages sqlite)
>   #:use-module (gnu packages texinfo)
>   #:use-module (gnu packages time)
>   #:use-module (gnu packages xml)
>@@ -3648,3 +3652,106 @@ (define-public subunit
> command line filters to process a subunit stream and language bindings for
> Python, C, C++ and shell.  Bindings are easy to write for other languages.")
>     (license (list license:asl2.0 license:bsd-3)))) ;user can pick
>+
>+(define-public klee-uclibc
>+  (let ((commit "955d502cc1f0688e82348304b053ad787056c754"))
>+    (package
>+      (name "klee-uclibc")
>+      (version (git-version "20230612" "0" commit))
>+      (source
>+       (origin
>+         (method git-fetch)
>+         (uri (git-reference
>+               (url "https://github.com/klee/klee-uclibc")
>+               (commit commit)))
>+         (file-name (git-file-name name version))
>+         (sha256
>+          (base32 "12fnr5mq80cxwvv09gi844mi31jgi8067swagxnlxlhxj4mi125j"))))
>+      (build-system gnu-build-system)
>+      (supported-systems '("x86_64-linux"))
>+      (arguments
>+       `(#:tests? #f ;upstream uClibc tests do not work in the fork
>+         #:phases (modify-phases %standard-phases
>+                    ;; Disable locales as these would have to be downloaded and
>+                    ;; shouldn't really be needed for symbolic execution either.
>+                    (add-after 'unpack 'patch-config
>+                      (lambda _
>+                        (substitute* "klee-premade-configs/x86_64/config"
>+                          (("UCLIBC_DOWNLOAD_PREGENERATED_LOCALE_DATA=y")
>+                           "UCLIBC_DOWNLOAD_PREGENERATED_LOCALE_DATA=n")
>+                          (("UCLIBC_PREGENERATED_LOCALE_DATA=y")
>+                           "UCLIBC_PREGENERATED_LOCALE_DATA=n")
>+                          (("UCLIBC_HAS_LOCALE=y")
>+                           "UCLIBC_HAS_LOCALE=n")
>+                          (("UCLIBC_HAS_XLOCALE=y")
>+                           "UCLIBC_HAS_XLOCALE=n"))))
>+
>+                    ;; Upstream uses a custom non-GNU configure script written
>+                    ;; in Python, replace the default configure phase accordingly.
>+                    (replace 'configure
>+                      (lambda _
>+                        (invoke "./configure"
>+                                "--make-llvm-lib"
>+                                "--enable-release")))
>+
>+                    ;; Custom install phase to only install the libc.a file manually.
>+                    ;; This is the only file which is used/needed by KLEE itself.
>+                    (replace 'install
>+                      (lambda* (#:key outputs #:allow-other-keys)
>+                        (install-file "lib/libc.a"
>+                                      (string-append (assoc-ref outputs "out")
>+                                                     "/lib")))))))
>+      (inputs (list clang-toolchain-13 python ncurses))
>+      (synopsis "Variant of uClibc tailored to symbolic execution")
>+      (description
>+       "Modified version of uClibc for symbolic execution of
>+Unix userland software.  This library can only be used in conjunction
>+with the @code{klee} package.")
>+      (home-page "https://klee.github.io/")
>+      (license license:lgpl2.1))))
>+
>+(define-public klee
>+  (package
>+    (name "klee")
>+    (version "3.0")
>+    (source
>+     (origin
>+       (method git-fetch)
>+       (uri (git-reference
>+             (url "https://github.com/klee/klee")
>+             (commit (string-append "v" version))))
>+       (sha256
>+        (base32 "0dj20nazkcq84ryr87dihvjznapsbl1n21sa8dhhnb0wsad5d6fb"))
>+       (file-name (git-file-name name version))))
>+    (build-system cmake-build-system)
>+    (supported-systems '("x86_64-linux"))
>+    (arguments
>+     `(#:test-target "systemtests"
>+       #:strip-directories '("bin")
>+       #:configure-flags ,#~(list "-DENABLE_KLEE_ASSERTS=OFF"
>+                                  "-DENABLE_TCMALLOC=ON"
>+                                  "-DENABLE_POSIX_RUNTIME=ON"
>+                                  (string-append "-DKLEE_UCLIBC_PATH="
>+                                                 #$klee-uclibc))
>+       #:phases (modify-phases %standard-phases
>+                  (add-after 'unpack 'patch-lit-config
>+                    (lambda _
>+                      ;; Make sure that we retain the value of the GUIX_PYTHONPATH
>+                      ;; environment variable in the test environmented created by
>+                      ;; python-lit. Otherwise, the test scripts won't be able to
>+                      ;; find the python-tabulate dependency, causing test failures.
>+                      (substitute* "test/lit.cfg"
>+                        (("addEnv\\('PWD'\\)" env)
>+                         (string-append env "\n" "addEnv('GUIX_PYTHONPATH')"))))))))
>+    (propagated-inputs (list klee-uclibc clang-toolchain-13 llvm-13 python
>+                             python-tabulate))
>+    (inputs (list python-lit z3 gperftools sqlite))
>+    (synopsis
>+     "Symbolic execution engine built on top of the LLVM compiler infastructure")
>+    (description
>+     "Dynamic symbolic execution engine built on top of
>+LLVM.  Symbolic execution is an automated software testing technique,
>+KLEE leverage this technique to automatically generate test cases for
>+software compiled to LLVM IR.")
>+    (home-page "https://klee.github.io/")
>+    (license (list license:expat license:bsd-4))))
>
>base-commit: 29c94dd522833b2603a651c14a5b06120bcf1829
>
>
>

Hi Sören,

I'm sorry nobody looked at this before. Here are a few remarks.

First, could you separate this in two patches, one per package?

Is there a reason why this is limited to the x86_64 architecture?

You have mixed native and normal inputs. In uclibc, since python is only used for the build, it should be native.

Does it make sense to propagate uclibc in klee, when it only contains a static library? Some for clang and llvm. Isn't z3 used at runtime? Shouldn't it be propagated?

Using #$klee-uclibc directly in the phase could be problematic I think, you should use this-package-inputs or similar (can't remember the exact name or syntax right now, you can leave it to me if you don't find it).

Otherwise, looks good for a first patch :)
Sören Tempel Feb. 13, 2024, 2:09 p.m. UTC | #2
Hi Julien,

Thanks a lot for your feedback! I send an updated revision of the
patchset based on your feedback. More information on changes below.

Julien Lepiller <julien@lepiller.eu> wrote:
> First, could you separate this in two patches, one per package?

Sorry, oversight on my part. Fixed!

> Is there a reason why this is limited to the x86_64 architecture?

Yes, despite operating on LLVM IR abstraction, KLEE is tightly
integrated with the host architecture. Therefore, upstream currently
only support x86_64. Packages for other package manager (e.g. Nix)
also only support KLEE on x86_64 [1].

I added a comment explaining this.

> You have mixed native and normal inputs. In uclibc, since python is
> only used for the build, it should be native.

Fixed, ncurses should also be native as it is only used for menuconfig.

> Does it make sense to propagate uclibc in klee, when it only contains
> a static library? Some for clang and llvm. Isn't z3 used at runtime?
> Shouldn't it be propagated?

Sorry, I should have done a better job at explaining this: KLEE is a
symbolic analyzer for LLVM IR. Users of KLEE will need to translate
their C/C++ source to LLVM IR in order to analyze it with KLEE [2].
Furthermore, as KLEE is tightly integrated with a specific LLVM version,
it makes (at least from my point of view) sense to propagate a specific
clang toolchain so users can just run `guix shell klee` and get started
with it. However, if preferred I can also remove the propagation.

z3 isn't used at runtime, KLEE just uses the Z3 library interface and
links against z3. AFAIK, it doesn't use any binaries from z3. Does z3
still need to be propagated in this case?

uclibc does not need to be a propagated input since the KLEE build
systems generates LLVM IR from the .a archive [3]. I fixed this.

> Using #$klee-uclibc directly in the phase could be problematic I
> think, you should use this-package-inputs or similar (can't remember
> the exact name or syntax right now, you can leave it to me if you
> don't find it).

Sorry, I am new to Guix so I am not sure what you mean. Let me know if
you have more information on this but also feel free to just adjust this
as you wish :)

Greetings
Sören

[1]: https://github.com/NixOS/nixpkgs/blob/40a7b182e0a00245d69f6b8c1dfd3ea4bfc6257c/pkgs/applications/science/logic/klee/default.nix
[2]: https://klee.github.io/tutorials/testing-function/#compiling-to-llvm-bitcode
[3]: https://github.com/klee/klee/blob/v3.0/CMakeLists.txt#L473-L487
Sören Tempel March 11, 2024, 9:54 a.m. UTC | #3
Hi,

What does "QA: Investigate" mean? The build should be fixed now with the
v3 revision. I don't see any build failures on the QA. Is there anything
I need to do on my end in order to have the build restarted?

Greetings
Sören
diff mbox series

Patch

diff --git a/gnu/packages/check.scm b/gnu/packages/check.scm
index 5181d3a164..7e97e59955 100644
--- a/gnu/packages/check.scm
+++ b/gnu/packages/check.scm
@@ -71,6 +71,7 @@  (define-module (gnu packages check)
   #:use-module (gnu packages bash)
   #:use-module (gnu packages cmake)
   #:use-module (gnu packages compression)
+  #:use-module (gnu packages cpp)
   #:use-module (gnu packages linux)
   #:use-module (gnu packages llvm)
   #:use-module (gnu packages glib)
@@ -79,6 +80,8 @@  (define-module (gnu packages check)
   #:use-module (gnu packages gtk)
   #:use-module (gnu packages guile)
   #:use-module (gnu packages guile-xyz)
+  #:use-module (gnu packages maths)
+  #:use-module (gnu packages ncurses)
   #:use-module (gnu packages perl)
   #:use-module (gnu packages pkg-config)
   #:use-module (gnu packages python)
@@ -87,6 +90,7 @@  (define-module (gnu packages check)
   #:use-module (gnu packages python-web)
   #:use-module (gnu packages python-xyz)
   #:use-module (gnu packages python-science)
+  #:use-module (gnu packages sqlite)
   #:use-module (gnu packages texinfo)
   #:use-module (gnu packages time)
   #:use-module (gnu packages xml)
@@ -3648,3 +3652,106 @@  (define-public subunit
 command line filters to process a subunit stream and language bindings for
 Python, C, C++ and shell.  Bindings are easy to write for other languages.")
     (license (list license:asl2.0 license:bsd-3)))) ;user can pick
+
+(define-public klee-uclibc
+  (let ((commit "955d502cc1f0688e82348304b053ad787056c754"))
+    (package
+      (name "klee-uclibc")
+      (version (git-version "20230612" "0" commit))
+      (source
+       (origin
+         (method git-fetch)
+         (uri (git-reference
+               (url "https://github.com/klee/klee-uclibc")
+               (commit commit)))
+         (file-name (git-file-name name version))
+         (sha256
+          (base32 "12fnr5mq80cxwvv09gi844mi31jgi8067swagxnlxlhxj4mi125j"))))
+      (build-system gnu-build-system)
+      (supported-systems '("x86_64-linux"))
+      (arguments
+       `(#:tests? #f ;upstream uClibc tests do not work in the fork
+         #:phases (modify-phases %standard-phases
+                    ;; Disable locales as these would have to be downloaded and
+                    ;; shouldn't really be needed for symbolic execution either.
+                    (add-after 'unpack 'patch-config
+                      (lambda _
+                        (substitute* "klee-premade-configs/x86_64/config"
+                          (("UCLIBC_DOWNLOAD_PREGENERATED_LOCALE_DATA=y")
+                           "UCLIBC_DOWNLOAD_PREGENERATED_LOCALE_DATA=n")
+                          (("UCLIBC_PREGENERATED_LOCALE_DATA=y")
+                           "UCLIBC_PREGENERATED_LOCALE_DATA=n")
+                          (("UCLIBC_HAS_LOCALE=y")
+                           "UCLIBC_HAS_LOCALE=n")
+                          (("UCLIBC_HAS_XLOCALE=y")
+                           "UCLIBC_HAS_XLOCALE=n"))))
+
+                    ;; Upstream uses a custom non-GNU configure script written
+                    ;; in Python, replace the default configure phase accordingly.
+                    (replace 'configure
+                      (lambda _
+                        (invoke "./configure"
+                                "--make-llvm-lib"
+                                "--enable-release")))
+
+                    ;; Custom install phase to only install the libc.a file manually.
+                    ;; This is the only file which is used/needed by KLEE itself.
+                    (replace 'install
+                      (lambda* (#:key outputs #:allow-other-keys)
+                        (install-file "lib/libc.a"
+                                      (string-append (assoc-ref outputs "out")
+                                                     "/lib")))))))
+      (inputs (list clang-toolchain-13 python ncurses))
+      (synopsis "Variant of uClibc tailored to symbolic execution")
+      (description
+       "Modified version of uClibc for symbolic execution of
+Unix userland software.  This library can only be used in conjunction
+with the @code{klee} package.")
+      (home-page "https://klee.github.io/")
+      (license license:lgpl2.1))))
+
+(define-public klee
+  (package
+    (name "klee")
+    (version "3.0")
+    (source
+     (origin
+       (method git-fetch)
+       (uri (git-reference
+             (url "https://github.com/klee/klee")
+             (commit (string-append "v" version))))
+       (sha256
+        (base32 "0dj20nazkcq84ryr87dihvjznapsbl1n21sa8dhhnb0wsad5d6fb"))
+       (file-name (git-file-name name version))))
+    (build-system cmake-build-system)
+    (supported-systems '("x86_64-linux"))
+    (arguments
+     `(#:test-target "systemtests"
+       #:strip-directories '("bin")
+       #:configure-flags ,#~(list "-DENABLE_KLEE_ASSERTS=OFF"
+                                  "-DENABLE_TCMALLOC=ON"
+                                  "-DENABLE_POSIX_RUNTIME=ON"
+                                  (string-append "-DKLEE_UCLIBC_PATH="
+                                                 #$klee-uclibc))
+       #:phases (modify-phases %standard-phases
+                  (add-after 'unpack 'patch-lit-config
+                    (lambda _
+                      ;; Make sure that we retain the value of the GUIX_PYTHONPATH
+                      ;; environment variable in the test environmented created by
+                      ;; python-lit. Otherwise, the test scripts won't be able to
+                      ;; find the python-tabulate dependency, causing test failures.
+                      (substitute* "test/lit.cfg"
+                        (("addEnv\\('PWD'\\)" env)
+                         (string-append env "\n" "addEnv('GUIX_PYTHONPATH')"))))))))
+    (propagated-inputs (list klee-uclibc clang-toolchain-13 llvm-13 python
+                             python-tabulate))
+    (inputs (list python-lit z3 gperftools sqlite))
+    (synopsis
+     "Symbolic execution engine built on top of the LLVM compiler infastructure")
+    (description
+     "Dynamic symbolic execution engine built on top of
+LLVM.  Symbolic execution is an automated software testing technique,
+KLEE leverage this technique to automatically generate test cases for
+software compiled to LLVM IR.")
+    (home-page "https://klee.github.io/")
+    (license (list license:expat license:bsd-4))))