diff mbox series

[bug#69591,v3,26/32] gnu: python-pytorch: Update to 2.2.1 and unbundle dependencies.

Message ID 20240320223906.13214-26-david.elsing@posteo.net
State New
Headers show
Series Unbundle and update python-pytorch | expand

Commit Message

David Elsing March 20, 2024, 10:38 p.m. UTC
Autogenerated files are also regenerated. The tests can be run, but are
disabled, as they require a long time.

* gnu/packages/machine-learning.scm (python-pytorch): Update to 2.2.1.
[version]: Use %python-pytorch-version.
[source]: Use %python-pytorch-src.
[arguments]: Remove 'make-things-writable phase.  Add 'cmake-patches,
'disable-avx-dependencies, 'set-max-jobs, 'codegen1, 'codegen2, 'build2,
'install2 phases. Adjust 'use-system-libraries and 'check phases.
[native-inputs]: Add doxygen, ideep-pytorch, pocketfft-cpp, python-expecttest,
python-pytest-flakefinder, python-pytest-rerunfailures-13,
python-pytest-shard, python-pytest-xdist, python-hypothesis,
python-types-dataclasses, python-typing-extensions-4.10 and valgrind.
[inputs]: Add asmjit, clog, flatbuffers-next, foxi, fxdiv, libuv,
miniz-for-pytorch, qnnpack, qnnpack-pytorch and oneapi-dnnl. Use nnpack,
oneapi-dnnl, qnnpack, qnnpack-pytorch and xnnpack only for supported systems.
[propagated-inputs]: Add python-filelock, python-fsspec, python-jinja2,
python-networkx, python-optree, python-packaging, python-psutil and
python-sympy. Remove python-cffi and python-six. Use cpuinfo only for
supported systems.
(%python-pytorch-src)[source]: Add patches.
* gnu/packages/patches/python-pytorch-runpath.patch: Adjust patch.
* gnu/packages/patches/python-pytorch-system-libraries.patch: Adjust patch.
* gnu/packages/patches/python-pytorch-1.9.0-system-libraries.patch: Remove file.
* gnu/packages/patches/python-pytorch-fix-codegen.patch: New file.
* gnu/packages/patches/python-pytorch-without-kineto: New file.
* gnu/local.mk (dist_patch_DATA): Register them.
---
 gnu/local.mk                                  |   3 +-
 gnu/packages/machine-learning.scm             | 371 ++++++++++++------
 ...ython-pytorch-1.9.0-system-libraries.patch | 139 -------
 .../patches/python-pytorch-fix-codegen.patch  | 167 ++++++++
 .../patches/python-pytorch-runpath.patch      |  19 +-
 .../python-pytorch-system-libraries.patch     | 317 +++++++++++++--
 .../python-pytorch-without-kineto.patch       |  60 +++
 7 files changed, 771 insertions(+), 305 deletions(-)
 delete mode 100644 gnu/packages/patches/python-pytorch-1.9.0-system-libraries.patch
 create mode 100644 gnu/packages/patches/python-pytorch-fix-codegen.patch
 create mode 100644 gnu/packages/patches/python-pytorch-without-kineto.patch
diff mbox series

Patch

diff --git a/gnu/local.mk b/gnu/local.mk
index adcb1be9b7..c7dfe1b873 100644
--- a/gnu/local.mk
+++ b/gnu/local.mk
@@ -1930,9 +1930,10 @@  dist_patch_DATA =						\
   %D%/packages/patches/python-pyan3-fix-absolute-path-bug.patch \
   %D%/packages/patches/python-pyan3-fix-positional-arguments.patch \
   %D%/packages/patches/python-pygpgme-fix-pinentry-tests.patch	\
+  %D%/packages/patches/python-pytorch-fix-codegen.patch		\
   %D%/packages/patches/python-pytorch-runpath.patch		\
   %D%/packages/patches/python-pytorch-system-libraries.patch	\
-  %D%/packages/patches/python-pytorch-1.9.0-system-libraries.patch \
+  %D%/packages/patches/python-pytorch-without-kineto.patch	\
   %D%/packages/patches/python-robotframework-atest.patch	\
   %D%/packages/patches/python-robotframework-source-date-epoch.patch \
   %D%/packages/patches/python-robotframework-sshlibrary-rf5-compat.patch \
diff --git a/gnu/packages/machine-learning.scm b/gnu/packages/machine-learning.scm
index c4785ac2f1..a062a2398b 100644
--- a/gnu/packages/machine-learning.scm
+++ b/gnu/packages/machine-learning.scm
@@ -105,6 +105,7 @@  (define-module (gnu packages machine-learning)
   #:use-module (gnu packages parallel)
   #:use-module (gnu packages perl)
   #:use-module (gnu packages pkg-config)
+  #:use-module (gnu packages pretty-print)
   #:use-module (gnu packages protobuf)
   #:use-module (gnu packages pulseaudio)
   #:use-module (gnu packages python)
@@ -123,6 +124,7 @@  (define-module (gnu packages machine-learning)
   #:use-module (gnu packages swig)
   #:use-module (gnu packages time)
   #:use-module (gnu packages tls)
+  #:use-module (gnu packages valgrind)
   #:use-module (gnu packages vulkan)
   #:use-module (gnu packages video)
   #:use-module (gnu packages web)
@@ -4325,6 +4327,13 @@  (define %python-pytorch-src
     (sha256
      (base32
       "03mm0pwwb5lxdsmmiw3cch9fijgjw81kmmc4ln9rlyazkm7l1r48"))
+    (patches (search-patches "python-pytorch-system-libraries.patch"
+                             "python-pytorch-runpath.patch"
+                             "python-pytorch-without-kineto.patch"
+                             ;; Some autogeneration scripts depend on the
+                             ;; compile PyTorch library. Therefore, we create
+                             ;; dummy versions which are regenerated later.
+                             "python-pytorch-fix-codegen.patch"))
     (modules '((guix build utils)))
     (snippet
      '(begin
@@ -4444,135 +4453,249 @@  (define-public qnnpack-pytorch
 (define-public python-pytorch
   (package
     (name "python-pytorch")
-    (version "1.13.1")
-    (source (origin
-              (method git-fetch)
-              (uri (git-reference
-                    (url "https://github.com/pytorch/pytorch")
-                    (commit (string-append "v" version))
-                    (recursive? #t)))
-              (file-name (git-file-name name version))
-              (sha256
-               (base32
-                "17yxjzwp4zp75fz7czgz9acijzw7dpyqcza50v8y1x7hfg2gw369"))
-              (patches (search-patches "python-pytorch-system-libraries.patch"
-                                       "python-pytorch-runpath.patch"))
-              (modules '((guix build utils)))
-              (snippet
-               '(begin
-                  ;; XXX: Let's be clear: this package is a bundling fest.  We
-                  ;; delete as much as we can, but there's still a lot left.
-                  (for-each (lambda (directory)
-                              (delete-file-recursively
-                               (string-append "third_party/" directory)))
-                            '("benchmark" "cpuinfo" "eigen"
-
-                              ;; FIXME: QNNPACK (of which XNNPACK is a fork)
-                              ;; needs these.
-                              ;; "FP16" "FXdiv" "gemmlowp" "psimd"
-
-                              "gloo" "googletest" "ios-cmake" "NNPACK"
-                              "onnx" "protobuf" "pthreadpool"
-                              "pybind11" "python-enum" "python-peachpy"
-                              "python-six" "tbb" "XNNPACK" "zstd"))
-                  (substitute* "functorch/CMakeLists.txt"
-                    (("\\$\\{_rpath_portable_origin\\}/../torch/lib")
-                     "$ORIGIN/../torch/lib"))))))
+    (version %python-pytorch-version)
+    (source %python-pytorch-src)
     (build-system python-build-system)
     (arguments
-     '(#:phases (modify-phases %standard-phases
-                  (add-before 'build 'use-system-libraries
-                    (lambda* (#:key outputs #:allow-other-keys)
-                      ;; Tell 'setup.py' to let 'CMakeLists.txt' know that we
-                      ;; want to use "system libraries" instead of the bundled
-                      ;; ones.
-                      (setenv "USE_SYSTEM_LIBS" "1")
-
-                      (substitute* "cmake/Dependencies.cmake"
-                        (("if\\(USE_SYSTEM_BIND11\\)")
-                         "if(TRUE)"))
-
-                      ;; XXX: Disable that for simplicity for now.
-                      (setenv "USE_FBGEMM" "0")))
-                  (add-before 'build 'make-things-writable
-                    (lambda _
-                      ;; The 'build_caffe2' function in
-                      ;; 'tools/build_pytorch_libs.py', called from the
-                      ;; top-level 'setup.py', needs write access to this
-                      ;; directory.
-                      (for-each make-file-writable
-                                (find-files "caffe2/proto" "."
-                                            #:directories? #t))))
-                  (replace 'check
-                    (lambda* (#:key inputs outputs tests? #:allow-other-keys)
-                      ;; Run the test suite following the instructions in
-                      ;; 'CONTRIBUTING.md'.  XXX: Unfortunately this doesn't
-                      ;; work, unless you set GUIX_PYTHONPATH presumably.
-                      (when tests?
-                        (add-installed-pythonpath inputs outputs)
-                        (invoke "python" "test/run_test.py"))))
-                  (add-after 'install 'remove-test-executables
-                    (lambda* (#:key inputs outputs #:allow-other-keys)
-                      ;; Remove test executables, but keep other executables
-                      ;; such as 'torch_shm_manager' and and .so files such as
-                      ;; 'libtorch_global_deps.so'.
-                      (let ((python-site (site-packages inputs outputs)))
-                        (for-each delete-file
-                                  (find-files python-site
-                                              "(^test_cpp_rpc|_test)$")))))
-                  (add-after 'install 'remove-caffe2-onnx-scripts
-                    (lambda* (#:key outputs #:allow-other-keys)
-                      (let* ((out (assoc-ref outputs "out"))
-                             (bin (string-append out "/bin")))
-                        ;; Remove 'convert-caffe2-to-onnx' and
-                        ;; 'convert-onnx-to-caffe2': they seem to be
-                        ;; deprecated and they cause a failure of the
-                        ;; 'sanity-check' phase:
-                        ;;
-                        ;; ImportError: cannot import name 'metanet_pb2' from partially initialized module 'caffe2.proto' (most likely due to a circular import)
-                        (for-each delete-file
-                                  (find-files bin "^convert-.*caffe2"))
-
-                        (substitute* (find-files out "^entry_points\\.txt$")
-                          (("^convert-.*" all)
-                           (string-append "# " all "\n")))))))
-
-       ;; XXX: Tests attempt to download data such as
-       ;; <https://raw.githubusercontent.com/pytorch/test-infra/master/stats/slow-tests.json>.
-       ;; We're also missing some Python modules, such as expecttest.
-       #:tests? #f))
+     (list
+      #:phases
+      #~(modify-phases %standard-phases
+          (add-after 'unpack 'cmake-patches
+            (lambda _
+              (substitute* "cmake/Dependencies.cmake"
+                (("#POCKETFFT_INCLUDE_DIR")
+                 (string-append
+                  #$(this-package-native-input "pocketfft-cpp") "/include"))
+                (("#FP16_INCLUDE_DIR")
+                 (string-append
+                  #$(this-package-input "fp16") "/include")))))
+          (add-before 'build 'use-system-libraries
+            (lambda _
+              (substitute* '("caffe2/serialize/crc.cc"
+                             "caffe2/serialize/inline_container.cc")
+                (("\"miniz\\.h\"") "<miniz/miniz.h>"))
+              (substitute* "aten/src/ATen/native/vulkan/api/Allocator.h"
+                (("<include/vk_mem_alloc.h>")
+                 "<vk_mem_alloc.h>"))
+              ;; For Vulkan
+              (substitute* "CMakeLists.txt"
+                (("append_cxx_flag.*-Werror=(return-type|range-loop-construct).*") ""))
+              (substitute*
+                  (cons*
+                   "torch/csrc/Module.cpp"
+                   (map
+                    (lambda (name)
+                      (string-append
+                       "torch/utils/benchmark/utils/valgrind_wrapper/"
+                       name))
+                    '("compat_bindings.cpp" "timer_callgrind_template.cpp")))
+                (("<callgrind.h>") "<valgrind/callgrind.h>"))
+              (setenv "USE_FFMPEG" "1")
+              (setenv "USE_VULKAN" "1")
+              (setenv "USE_OPENCV" "1")
+              ;; Tell 'setup.py' to let 'CMakeLists.txt' know that we
+              ;; want to use "system libraries" instead of the bundled
+              ;; ones.
+              (setenv "USE_SYSTEM_LIBS" "1")
+              ;; For oneDNN
+              (setenv "USE_MKLDNN" "1")
+              ;; Only works with CUPTI
+              (setenv "USE_KINETO" "0")
+              ;; Prevent CMake error by disabling explicitely
+              (setenv "USE_ITT" "0")
+              ;; Disable on unsupported systems
+              (if #$(not (member
+                          (or (%current-target-system)
+                              (%current-system))
+                          (package-transitive-supported-systems qnnpack)))
+                  (setenv "USE_QNNPACK" "0")
+                  (setenv "USE_PYTORCH_QNNPACK" "0"))))
+          ;; PyTorch is still built with AVX2 and AVX-512 support selected at
+          ;; runtime, but these dependencies require it (nnpack only for
+          ;; x86_64).
+          (add-before 'build 'disable-avx-dependencies
+            (lambda _
+              (setenv "USE_FBGEMM" "0")
+              (if #$(not
+                     (member (or (%current-target-system)
+                                 (%current-system))
+                             '("armhf-linux" "aarch64-linux")))
+                  (setenv "USE_NNPACK" "0"))))
+          (add-after 'use-system-libraries 'set-max-jobs
+            (lambda _
+              (setenv "MAX_JOBS" (number->string (parallel-job-count)))))
+          (add-after 'set-max-jobs 'codegen1
+            (lambda _
+              (with-directory-excursion "torch/csrc/jit/tensorexpr"
+                (setenv "PYTHONPATH" "../../../..")
+                (invoke "python3" "codegen_external.py")
+                (setenv "PYTHONPATH" #f))
+
+              (invoke "python3" "aten/src/ATen/nnapi/codegen.py")
+
+              (invoke "bash" "tools/gen_flatbuffers.sh")
+
+              ;; Generate dummy files as the generation depends on the compiled
+              ;; library. They are regenerated later.
+              (setenv "PYTHONPATH" ".")
+              (invoke "python3"
+                      "torchgen/operator_versions/gen_mobile_upgraders.py"
+                      "dummy")
+              (setenv "PYTHONPATH" #f)
+
+              (invoke "python3"
+                      "torchgen/shape_functions/gen_jit_shape_functions.py"
+                      "dummy")
+
+              (invoke "python3"
+                      "torchgen/decompositions/gen_jit_decompositions.py"
+                      "dummy")))
+          ;; Properly generate autogenerated files ...
+          (add-after 'install 'codegen2
+            (lambda* (#:key inputs outputs #:allow-other-keys)
+              (add-installed-pythonpath inputs outputs)
+              (invoke "python3"
+                      "torchgen/operator_versions/gen_mobile_upgraders.py")
+              (invoke "python3"
+                      "torchgen/shape_functions/gen_jit_shape_functions.py")
+              (invoke "python3"
+                      "torchgen/decompositions/gen_jit_decompositions.py")))
+          ;; ... rebuild their dependencies ...
+          (add-after 'codegen2 'build2
+            (lambda _
+              (invoke "python3" "setup.py" "build")))
+          ;; ... and install again.
+          (add-after 'build2 'install2
+            (lambda _
+              (invoke "python3" "setup.py" "install" (string-append "--prefix=" #$output)
+                      "--no-compile" "--single-version-externally-managed" "--root=/")
+              (invoke "python" "-m" "compileall"
+                      "--invalidation-mode=unchecked-hash" #$output)))
+          (replace 'check
+            (lambda* (#:key tests? #:allow-other-keys)
+              ;; Run the test suite following the instructions in
+              ;; 'CONTRIBUTING.md'. Unfortunately this doesn't work, unless
+              ;; you set PYTHONPATH or GUIX_PYTHONPATH, but this is done in
+              ;; the codegen2 phase already.
+              (when tests?
+                (invoke "python3" "test/run_test.py" "--core"))))
+          (add-after 'install2 'remove-test-executables
+            (lambda* (#:key inputs outputs #:allow-other-keys)
+              ;; Remove test executables, but keep other executables
+              ;; such as 'torch_shm_manager' and and .so files such as
+              ;; 'libtorch_global_deps.so'.
+              (let ((python-site (site-packages inputs outputs)))
+                (for-each delete-file
+                          (find-files python-site
+                                      "(^test_cpp_rpc|_test)$")))))
+          (add-after 'install2 'remove-caffe2-onnx-scripts
+            (lambda* (#:key outputs #:allow-other-keys)
+              (let* ((out (assoc-ref outputs "out"))
+                     (bin (string-append out "/bin")))
+                ;; Remove 'convert-caffe2-to-onnx' and
+                ;; 'convert-onnx-to-caffe2': they seem to be
+                ;; deprecated and they cause a failure of the
+                ;; 'sanity-check' phase:
+                ;;
+                ;; ImportError: cannot import name 'metanet_pb2' from
+                ;; partially initialized module 'caffe2.proto' (most likely
+                ;; due to a circular import)
+                (for-each delete-file
+                          (find-files bin "^convert-.*caffe2"))
+
+                (substitute* (find-files out "^entry_points\\.txt$")
+                  (("^convert-.*" all)
+                   (string-append "# " all "\n")))))))
+
+      ;; Even only the core tests take a very long time to run.
+      #:tests? #f))
     (native-inputs
-     (list cmake ninja))
+     (list cmake
+           doxygen
+           ideep-pytorch
+           ninja
+           pocketfft-cpp
+           python-expecttest
+           python-pytest-flakefinder
+           python-pytest-rerunfailures-13
+           python-pytest-shard
+           python-pytest-xdist
+           python-hypothesis
+           python-types-dataclasses
+           python-typing-extensions-4.10
+           shaderc
+           valgrind))
     (inputs
-     (list eigen
-           ;; ("fmt" ,fmt)
-           fp16
-           gemmlowp
-           googletest
-           googlebenchmark
-           gloo
-           nnpack
-           openblas
-           openmpi
-           pthreadpool
-           protobuf
-           pybind11
-           sleef
-           xnnpack
-           zstd))
+     (append
+      (list asmjit
+            clog
+            eigen
+            ffmpeg
+            flatbuffers-next
+            fmt
+            foxi
+            fp16
+            fxdiv
+            gemmlowp
+            gloo
+            googletest
+            googlebenchmark
+            libuv
+            miniz-for-pytorch
+            openblas
+            opencv
+            openmpi
+            pthreadpool
+            protobuf
+            pybind11
+            sleef
+            tensorpipe
+            vulkan-headers
+            vulkan-loader
+            vulkan-memory-allocator
+            zstd)
+      ;; TODO: fix build on 32 bit systems once Rust is available.
+      (filter
+       (lambda (pkg)
+         (member (or (%current-target-system)
+                     (%current-system))
+                 (package-transitive-supported-systems pkg)))
+       (list oneapi-dnnl
+             qnnpack
+             qnnpack-pytorch
+             xnnpack))
+      ;; nnpack requires AVX2 for x86_64-linux
+      (filter
+       (lambda (pkg)
+         (member (or (%current-target-system)
+                     (%current-system))
+                 '("armhf-linux" "aarch64-linux")))
+       (list nnpack))))
     (propagated-inputs
-     (list python-astunparse
-           python-click
-           python-numpy
-           python-pyyaml
-           python-cffi
-           python-typing-extensions
-           python-future
-           python-six
-           python-requests
-           onnx                             ;propagated for its Python modules
-           onnx-optimizer
-           cpuinfo))
+     (append
+      (list onnx ;propagated for its Python modules
+            onnx-optimizer
+            python-astunparse
+            python-click
+            python-filelock
+            python-fsspec
+            python-future
+            python-jinja2
+            python-networkx
+            python-numpy
+            python-optree
+            python-packaging
+            python-psutil
+            python-pyyaml
+            python-requests
+            python-sympy
+            python-typing-extensions)
+      (filter
+       (lambda (pkg)
+         (member (or (%current-target-system)
+                     (%current-system))
+                 (package-transitive-supported-systems pkg)))
+       (list cpuinfo))))
     (home-page "https://pytorch.org/")
     (synopsis "Python library for tensor computation and deep neural networks")
     (description
diff --git a/gnu/packages/patches/python-pytorch-1.9.0-system-libraries.patch b/gnu/packages/patches/python-pytorch-1.9.0-system-libraries.patch
deleted file mode 100644
index 76c06520f0..0000000000
--- a/gnu/packages/patches/python-pytorch-1.9.0-system-libraries.patch
+++ /dev/null
@@ -1,139 +0,0 @@ 
-Use our own googletest rather than the bundled one.
-Get NNPACK to use our own PeachPy rather than the bundled one.
-
-diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
-index 5d57b9ca78..620cca4e60 100644
---- a/cmake/Dependencies.cmake
-+++ b/cmake/Dependencies.cmake
-@@ -644,11 +644,6 @@ if(BUILD_TEST OR BUILD_MOBILE_BENCHMARK OR BUILD_MOBILE_TEST)
-   # this shouldn't be necessary anymore.
-   get_property(INC_DIR_temp DIRECTORY PROPERTY INCLUDE_DIRECTORIES)
-   set_property(DIRECTORY PROPERTY INCLUDE_DIRECTORIES "")
--  add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/../third_party/googletest)
--  set_property(DIRECTORY PROPERTY INCLUDE_DIRECTORIES ${INC_DIR_temp})
--
--  include_directories(BEFORE SYSTEM ${CMAKE_CURRENT_LIST_DIR}/../third_party/googletest/googletest/include)
--  include_directories(BEFORE SYSTEM ${CMAKE_CURRENT_LIST_DIR}/../third_party/googletest/googlemock/include)
- 
-   # We will not need to test benchmark lib itself.
-   set(BENCHMARK_ENABLE_TESTING OFF CACHE BOOL "Disable benchmark testing as we don't need it.")
-@@ -1485,7 +1480,7 @@ if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO AND NOT INTERN_DISABLE_ONNX)
-     endif()
-     set_property(TARGET onnx_proto PROPERTY IMPORTED_LOCATION ${ONNX_PROTO_LIBRARY})
-     message("-- Found onnx: ${ONNX_LIBRARY} ${ONNX_PROTO_LIBRARY}")
--    list(APPEND Caffe2_DEPENDENCY_LIBS onnx_proto onnx)
-+    list(APPEND Caffe2_DEPENDENCY_LIBS onnx_proto onnx onnx_optimizer)
-   endif()
-   include_directories(${FOXI_INCLUDE_DIRS})
-   list(APPEND Caffe2_DEPENDENCY_LIBS foxi_loader)
-
-diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt
-index 50ebb224ce..5953d9ddf7 100644
---- a/caffe2/CMakeLists.txt
-+++ b/caffe2/CMakeLists.txt
-@@ -1632,7 +1632,7 @@ if(BUILD_TEST)
-         if(NOT MSVC)
-           add_executable(${test_name}_${CPU_CAPABILITY} "${test_src}" ../aten/src/ATen/native/quantized/affine_quantizer_base.cpp)
-           # TODO: Get rid of c10 dependency (which is only needed for the implementation of AT_ERROR)
--          target_link_libraries(${test_name}_${CPU_CAPABILITY} c10 sleef gtest_main)
-+          target_link_libraries(${test_name}_${CPU_CAPABILITY} c10 sleef gtest_main gtest)
-           if(USE_FBGEMM)
-             target_link_libraries(${test_name}_${CPU_CAPABILITY} fbgemm)
-           endif()
-@@ -1655,7 +1655,7 @@ if(BUILD_TEST)
-   foreach(test_src ${Caffe2_CPU_TEST_SRCS})
-     get_filename_component(test_name ${test_src} NAME_WE)
-     add_executable(${test_name} "${test_src}")
--    target_link_libraries(${test_name} torch_library gtest_main)
-+    target_link_libraries(${test_name} torch_library gtest_main gtest)
-     target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>)
-     target_include_directories(${test_name} PRIVATE $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/include>)
-     target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE})
-@@ -1673,7 +1673,7 @@ if(BUILD_TEST)
-     foreach(test_src ${Caffe2_GPU_TEST_SRCS})
-       get_filename_component(test_name ${test_src} NAME_WE)
-       cuda_add_executable(${test_name} "${test_src}")
--      target_link_libraries(${test_name} torch_library gtest_main)
-+      target_link_libraries(${test_name} torch_library gtest_main gtest)
-       target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>)
-       target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE})
-       add_test(NAME ${test_name} COMMAND $<TARGET_FILE:${test_name}>)
-@@ -1691,7 +1691,7 @@ if(BUILD_TEST)
-     foreach(test_src ${Caffe2_VULKAN_TEST_SRCS})
-       get_filename_component(test_name ${test_src} NAME_WE)
-       add_executable(${test_name} "${test_src}")
--      target_link_libraries(${test_name} torch_library gtest_main)
-+      target_link_libraries(${test_name} torch_library gtest_main gtest)
-       target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>)
-       target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE})
-       add_test(NAME ${test_name} COMMAND $<TARGET_FILE:${test_name}>)
-@@ -1709,7 +1709,7 @@ if(BUILD_TEST)
-     foreach(test_src ${Caffe2_HIP_TEST_SRCS})
-       get_filename_component(test_name ${test_src} NAME_WE)
-       add_executable(${test_name} "${test_src}")
--      target_link_libraries(${test_name} torch_library gtest_main)
-+      target_link_libraries(${test_name} torch_library gtest_main gtest)
-       target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>)
-       target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE} ${Caffe2_HIP_INCLUDE})
-       target_compile_options(${test_name} PRIVATE ${HIP_CXX_FLAGS})
-
-diff --git a/torch/lib/c10d/test/CMakeLists.txt b/torch/lib/c10d/test/CMakeLists.txt
-index b74d4b65f7..fc7c207505 100644
---- a/torch/lib/c10d/test/CMakeLists.txt
-+++ b/torch/lib/c10d/test/CMakeLists.txt
-@@ -16,24 +16,24 @@ function(c10d_add_test test_src)
-   add_test(NAME ${test_name} COMMAND $<TARGET_FILE:${test_name}>)
- endfunction()
- 
--c10d_add_test(FileStoreTest.cpp c10d gtest_main)
--c10d_add_test(TCPStoreTest.cpp c10d gtest_main)
-+c10d_add_test(FileStoreTest.cpp c10d gtest_main gtest)
-+c10d_add_test(TCPStoreTest.cpp c10d gtest_main gtest)
- if(NOT WIN32)
--  c10d_add_test(HashStoreTest.cpp c10d gtest_main)
-+  c10d_add_test(HashStoreTest.cpp c10d gtest_main gtest)
- endif()
- 
- if(USE_CUDA)
-   if(USE_C10D_GLOO)
--    c10d_add_test(ProcessGroupGlooTest.cpp c10d c10d_cuda_test gtest_main)
--    c10d_add_test(ProcessGroupGlooAsyncTest.cpp c10d c10d_cuda_test gtest_main)
-+    c10d_add_test(ProcessGroupGlooTest.cpp c10d c10d_cuda_test gtest_main gtest)
-+    c10d_add_test(ProcessGroupGlooAsyncTest.cpp c10d c10d_cuda_test gtest_main gtest)
-   endif()
-   if(USE_C10D_NCCL)
--    c10d_add_test(ProcessGroupNCCLTest.cpp c10d c10d_cuda_test gtest_main)
-+    c10d_add_test(ProcessGroupNCCLTest.cpp c10d c10d_cuda_test gtest_main gtest)
-     c10d_add_test(ProcessGroupNCCLErrorsTest.cpp c10d c10d_cuda_test
--        gtest_main)
-+        gtest_main gtest)
-   endif()
- else()
-   if(USE_C10D_GLOO)
--    c10d_add_test(ProcessGroupGlooTest.cpp c10d gtest_main)
-+    c10d_add_test(ProcessGroupGlooTest.cpp c10d gtest_main gtest)
-   endif()
- endif()
-
-diff --git a/cmake/External/nnpack.cmake b/cmake/External/nnpack.cmake
-index a41343cbb5..6075bdd0a4 100644
---- a/cmake/External/nnpack.cmake
-+++ b/cmake/External/nnpack.cmake
-@@ -40,7 +40,7 @@ endif()
- # (3) Android, iOS, Linux, macOS - supported
- ##############################################################################
- 
--if(ANDROID OR IOS OR ${CMAKE_SYSTEM_NAME} STREQUAL "Linux" OR ${CMAKE_SYSTEM_NAME} STREQUAL "Darwin")
-+if(FALSE)
-   message(STATUS "Brace yourself, we are building NNPACK")
-   set(CAFFE2_THIRD_PARTY_ROOT ${PROJECT_SOURCE_DIR}/third_party)
- 
-@@ -114,6 +114,5 @@ endif()
- # (4) Catch-all: not supported.
- ##############################################################################
- 
--message(WARNING "Unknown platform - I don't know how to build NNPACK. "
--                "See cmake/External/nnpack.cmake for details.")
--set(USE_NNPACK OFF)
-+set(NNPACK_FOUND TRUE)
-+set(USE_NNPACK ON)
diff --git a/gnu/packages/patches/python-pytorch-fix-codegen.patch b/gnu/packages/patches/python-pytorch-fix-codegen.patch
new file mode 100644
index 0000000000..cb246b25de
--- /dev/null
+++ b/gnu/packages/patches/python-pytorch-fix-codegen.patch
@@ -0,0 +1,167 @@ 
+This patch fixes some scripts for generating source files.  For
+gen_jit_decompositions.py, gen_mobile_upgraders.py and
+gen_jit_shape_functions.py, which depend on the compiled PyTorch library, the
+option to generate "dummy" source files is added for the initial build, which
+is later corrected.  codegen_external.py is patched to avoid duplicate
+functions and add the static keyword as in the existing generated file.
+
+diff --git a/tools/gen_flatbuffers.sh b/tools/gen_flatbuffers.sh
+index cc0263dbbf..ac34e84b82 100644
+--- a/tools/gen_flatbuffers.sh
++++ b/tools/gen_flatbuffers.sh
+@@ -1,13 +1,13 @@
+ #!/bin/bash
+ ROOT=$(pwd)
+-FF_LOCATION="$ROOT/third_party/flatbuffers"
+-cd "$FF_LOCATION" || exit
+-mkdir build
+-cd build || exit
+-cmake ..
+-cmake --build . --target flatc
+-mkdir -p "$ROOT/build/torch/csrc/jit/serialization"
+-./flatc --cpp --gen-mutable --scoped-enums \
++#FF_LOCATION="$ROOT/third_party/flatbuffers"
++#cd "$FF_LOCATION" || exit
++#mkdir build
++#cd build || exit
++#cmake ..
++#cmake --build . --target flatc
++#mkdir -p "$ROOT/build/torch/csrc/jit/serialization"
++flatc --cpp --gen-mutable --scoped-enums \
+      -o "$ROOT/torch/csrc/jit/serialization" \
+      -c "$ROOT/torch/csrc/jit/serialization/mobile_bytecode.fbs"
+ echo '// @generated' >> "$ROOT/torch/csrc/jit/serialization/mobile_bytecode_generated.h"
+diff --git a/torch/csrc/jit/tensorexpr/codegen_external.py b/torch/csrc/jit/tensorexpr/codegen_external.py
+index bc69b05162..0f8df81de3 100644
+--- a/torch/csrc/jit/tensorexpr/codegen_external.py
++++ b/torch/csrc/jit/tensorexpr/codegen_external.py
+@@ -20,9 +20,14 @@ def gen_external(native_functions_path, tags_path, external_path):
+     native_functions = parse_native_yaml(native_functions_path, tags_path)
+     func_decls = []
+     func_registrations = []
+-    for func in native_functions:
++    done_names = set()
++    for func in native_functions[0]:
+         schema = func.func
+         name = schema.name.name.base
++        if name in done_names:
++            continue
++        else:
++            done_names.add(name)
+         args = schema.arguments
+         # Only supports extern calls for functions with out variants
+         if not schema.is_out_fn():
+@@ -62,7 +67,7 @@ def gen_external(native_functions_path, tags_path, external_path):
+ 
+         # print(tensor_decls, name, arg_names)
+         func_decl = f"""\
+-void nnc_aten_{name}(
++static void nnc_aten_{name}(
+     int64_t bufs_num,
+     void** buf_data,
+     int64_t* buf_ranks,
+diff --git a/torchgen/decompositions/gen_jit_decompositions.py b/torchgen/decompositions/gen_jit_decompositions.py
+index 7cfbb803f9..2e69bb1868 100644
+--- a/torchgen/decompositions/gen_jit_decompositions.py
++++ b/torchgen/decompositions/gen_jit_decompositions.py
+@@ -1,8 +1,12 @@
+ #!/usr/bin/env python3
+ import os
+ from pathlib import Path
++import sys
+ 
+-from torch.jit._decompositions import decomposition_table
++if len(sys.argv) < 2 or sys.argv[1] != "dummy":
++    from torch.jit._decompositions import decomposition_table
++else:
++    decomposition_table = {}
+ 
+ # from torchgen.code_template import CodeTemplate
+ 
+@@ -85,7 +89,7 @@ def write_decomposition_util_file(path: str) -> None:
+ 
+ 
+ def main() -> None:
+-    pytorch_dir = Path(__file__).resolve().parents[3]
++    pytorch_dir = Path(__file__).resolve().parents[2]
+     upgrader_path = pytorch_dir / "torch" / "csrc" / "jit" / "runtime"
+     write_decomposition_util_file(str(upgrader_path))
+ 
+diff --git a/torchgen/operator_versions/gen_mobile_upgraders.py b/torchgen/operator_versions/gen_mobile_upgraders.py
+index dab1568580..55c58715fc 100644
+--- a/torchgen/operator_versions/gen_mobile_upgraders.py
++++ b/torchgen/operator_versions/gen_mobile_upgraders.py
+@@ -2,10 +2,12 @@
+ import os
+ from enum import Enum
+ from pathlib import Path
++import sys
+ from typing import Any, Dict, List
+ 
+-import torch
+-from torch.jit.generate_bytecode import generate_upgraders_bytecode
++if len(sys.argv) < 2 or sys.argv[1] != "dummy":
++    import torch
++    from torch.jit.generate_bytecode import generate_upgraders_bytecode
+ 
+ from torchgen.code_template import CodeTemplate
+ from torchgen.operator_versions.gen_mobile_upgraders_constant import (
+@@ -262,7 +264,10 @@ def construct_register_size(register_size_from_yaml: int) -> str:
+ def construct_version_maps(
+     upgrader_bytecode_function_to_index_map: Dict[str, Any]
+ ) -> str:
+-    version_map = torch._C._get_operator_version_map()
++    if len(sys.argv) < 2 or sys.argv[1] != "dummy":
++        version_map = torch._C._get_operator_version_map()
++    else:
++        version_map = {}
+     sorted_version_map_ = sorted(version_map.items(), key=lambda item: item[0])  # type: ignore[no-any-return]
+     sorted_version_map = dict(sorted_version_map_)
+ 
+@@ -378,7 +383,10 @@ def sort_upgrader(upgrader_list: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+ 
+ 
+ def main() -> None:
+-    upgrader_list = generate_upgraders_bytecode()
++    if len(sys.argv) < 2 or sys.argv[1] != "dummy":
++        upgrader_list = generate_upgraders_bytecode()
++    else:
++        upgrader_list = []
+     sorted_upgrader_list = sort_upgrader(upgrader_list)
+     for up in sorted_upgrader_list:
+         print("after sort upgrader : ", next(iter(up)))
+diff --git a/torchgen/shape_functions/gen_jit_shape_functions.py b/torchgen/shape_functions/gen_jit_shape_functions.py
+index c6336a6951..34e394d818 100644
+--- a/torchgen/shape_functions/gen_jit_shape_functions.py
++++ b/torchgen/shape_functions/gen_jit_shape_functions.py
+@@ -18,16 +18,20 @@ you are in the root directory of the Pytorch git repo"""
+ if not file_path.exists():
+     raise Exception(err_msg)
+ 
+-spec = importlib.util.spec_from_file_location(module_name, file_path)
+-assert spec is not None
+-module = importlib.util.module_from_spec(spec)
+-sys.modules[module_name] = module
+-assert spec.loader is not None
+-assert module is not None
+-spec.loader.exec_module(module)
+-
+-bounded_compute_graph_mapping = module.bounded_compute_graph_mapping
+-shape_compute_graph_mapping = module.shape_compute_graph_mapping
++if len(sys.argv) < 2 or sys.argv[1] != "dummy":
++    spec = importlib.util.spec_from_file_location(module_name, file_path)
++    assert spec is not None
++    module = importlib.util.module_from_spec(spec)
++    sys.modules[module_name] = module
++    assert spec.loader is not None
++    assert module is not None
++    spec.loader.exec_module(module)
++
++    bounded_compute_graph_mapping = module.bounded_compute_graph_mapping
++    shape_compute_graph_mapping = module.shape_compute_graph_mapping
++else:
++    bounded_compute_graph_mapping = {}
++    shape_compute_graph_mapping = {}
+ 
+ 
+ SHAPE_HEADER = r"""
diff --git a/gnu/packages/patches/python-pytorch-runpath.patch b/gnu/packages/patches/python-pytorch-runpath.patch
index 7f95b88a2b..2c1724cdb0 100644
--- a/gnu/packages/patches/python-pytorch-runpath.patch
+++ b/gnu/packages/patches/python-pytorch-runpath.patch
@@ -3,10 +3,10 @@  get installed, quite surprisingly, to 'lib/python3.8/site-packages/{bin,lib}'.
 Make sure RUNPATH matches that.
 
 diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt
-index 5b5622f0..30d27e57 100644
+index 74836372..c8eb69d1 100644
 --- a/caffe2/CMakeLists.txt
 +++ b/caffe2/CMakeLists.txt
-@@ -1909,7 +1909,7 @@ if(BUILD_PYTHON)
+@@ -1910,7 +1910,7 @@ if(BUILD_PYTHON)
    if(${BUILDING_WITH_TORCH_LIBS})
      # site-packages/caffe2/python/caffe2_pybind11_state
      # site-packages/torch/lib
@@ -16,7 +16,7 @@  index 5b5622f0..30d27e57 100644
  
    # Must also include `CMAKE_SHARED_LINKER_FLAGS` in linker flags for
 diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
-index ee9cf410..f190e69b 100644
+index acc95842..8f8fb7d7 100644
 --- a/cmake/Dependencies.cmake
 +++ b/cmake/Dependencies.cmake
 @@ -4,7 +4,7 @@ if(APPLE)
@@ -28,3 +28,16 @@  index ee9cf410..f190e69b 100644
  endif(APPLE)
  # Use separate rpaths during build and install phases
  set(CMAKE_SKIP_BUILD_RPATH  FALSE)
+diff --git a/functorch/CMakeLists.txt b/functorch/CMakeLists.txt
+index f2f32745..db21b656 100644
+--- a/functorch/CMakeLists.txt
++++ b/functorch/CMakeLists.txt
+@@ -21,7 +21,7 @@ target_link_libraries(${PROJECT_NAME} PRIVATE pybind::pybind11)
+ 
+ set_target_properties(${PROJECT_NAME} PROPERTIES LIBRARY_OUTPUT_DIRECTORY
+       ${CMAKE_BINARY_DIR}/functorch)
+-set_target_properties(${PROJECT_NAME} PROPERTIES INSTALL_RPATH "${_rpath_portable_origin}/../torch/lib")
++set_target_properties(${PROJECT_NAME} PROPERTIES INSTALL_RPATH "$ORIGIN/../torch/lib")
+ 
+ # Copy-pasted prefix/suffix logic for Python extensions from
+ # https://github.com/pytorch/pytorch/blob/33bb8ae350611760139457b85842b1d7edf9aa11/caffe2/CMakeLists.txt#L1975
diff --git a/gnu/packages/patches/python-pytorch-system-libraries.patch b/gnu/packages/patches/python-pytorch-system-libraries.patch
index fd849fd9e2..99f999f32f 100644
--- a/gnu/packages/patches/python-pytorch-system-libraries.patch
+++ b/gnu/packages/patches/python-pytorch-system-libraries.patch
@@ -1,38 +1,104 @@ 
-Use our own googletest rather than the bundled one.
-Get NNPACK to use our own PeachPy rather than the bundled one.
+Patch build files to also system libraries instead of bundled ones for the
+libraries not supported or working only by specifying USE_SYSTEM_LIBS.  This
+includes using the clog, cpuinfo, fbgemm, foxi, fp16, fxdiv, googletest,
+ideep, miniz, nnpack, oneapi-dnnl, pocketfft, pthreadpool, qnnpack,
+qnnpack-pytorch, tensorpipe, valgrind and xnnpack packages.
+For QNNPACK, two versions were bundled and are required: The upstream one and
+an internal fork (now in the package qnnpack-pytorch).
 
+diff --git a/aten/src/ATen/CMakeLists.txt b/aten/src/ATen/CMakeLists.txt
+index 2c2b967..5ac5fa6 100644
+--- a/aten/src/ATen/CMakeLists.txt
++++ b/aten/src/ATen/CMakeLists.txt
+@@ -371,9 +371,9 @@ if(AT_NNPACK_ENABLED)
+   list(APPEND ATen_CPU_DEPENDENCY_LIBS nnpack) # cpuinfo is added below
+ endif()
+ 
+-if(MKLDNN_FOUND)
+-  list(APPEND ATen_CPU_DEPENDENCY_LIBS ${MKLDNN_LIBRARIES})
+-endif(MKLDNN_FOUND)
++if(USE_MKLDNN)
++  list(APPEND ATen_CPU_DEPENDENCY_LIBS DNNL::dnnl)
++endif(USE_MKLDNN)
+ 
+ if(NOT CMAKE_SYSTEM_PROCESSOR MATCHES "^(s390x|ppc64le)$")
+   list(APPEND ATen_CPU_DEPENDENCY_LIBS cpuinfo)
 diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt
-index d57d7ebb..5b5622f0 100644
+index 7483637..093de40 100644
 --- a/caffe2/CMakeLists.txt
 +++ b/caffe2/CMakeLists.txt
-@@ -1736,7 +1736,7 @@ if(BUILD_TEST)
+@@ -111,9 +111,6 @@ if(NOT MSVC AND USE_XNNPACK)
+   if(NOT TARGET fxdiv)
+     set(FXDIV_BUILD_TESTS OFF CACHE BOOL "")
+     set(FXDIV_BUILD_BENCHMARKS OFF CACHE BOOL "")
+-    add_subdirectory(
+-      "${FXDIV_SOURCE_DIR}"
+-      "${CMAKE_BINARY_DIR}/FXdiv")
+   endif()
+ endif()
+ 
+@@ -1055,7 +1052,6 @@ elseif(USE_CUDA)
+ endif()
+ 
+ if(NOT MSVC AND USE_XNNPACK)
+-  TARGET_LINK_LIBRARIES(torch_cpu PRIVATE fxdiv)
+ endif()
+ 
+ # ==========================================================
+@@ -1396,6 +1392,7 @@ target_link_libraries(torch_cpu PUBLIC c10)
+ target_link_libraries(torch_cpu PUBLIC ${Caffe2_PUBLIC_DEPENDENCY_LIBS})
+ target_link_libraries(torch_cpu PRIVATE ${Caffe2_DEPENDENCY_LIBS})
+ target_link_libraries(torch_cpu PRIVATE ${Caffe2_DEPENDENCY_WHOLE_LINK_LIBS})
++target_link_libraries(torch_cpu PRIVATE miniz clog)
+ if(USE_MPI)
+   target_link_libraries(torch_cpu PRIVATE MPI::MPI_CXX)
+ endif()
+@@ -1653,7 +1650,7 @@ if(BUILD_STATIC_RUNTIME_BENCHMARK)
+   add_executable(static_runtime_bench "${STATIC_RUNTIME_BENCHMARK_SRCS}")
+   add_executable(static_runtime_test "${STATIC_RUNTIME_TEST_SRCS}")
+   target_link_libraries(static_runtime_bench torch_library benchmark)
+-  target_link_libraries(static_runtime_test torch_library gtest_main)
++  target_link_libraries(static_runtime_test torch_library gtest_main gtest)
+ endif()
+ 
+ if(BUILD_TENSOREXPR_BENCHMARK)
+@@ -1680,7 +1677,7 @@ if(BUILD_MOBILE_TEST)
+   foreach(test_src ${ATen_MOBILE_TEST_SRCS})
+     get_filename_component(test_name ${test_src} NAME_WE)
+     add_executable(${test_name} "${test_src}")
+-    target_link_libraries(${test_name} torch_library gtest_main)
++    target_link_libraries(${test_name} torch_library gtest_main gtest)
+     target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>)
+     target_include_directories(${test_name} PRIVATE $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/include>)
+     target_include_directories(${test_name} PRIVATE ${ATen_CPU_INCLUDE})
+@@ -1701,7 +1698,7 @@ if(BUILD_TEST)
          if(NOT MSVC)
-           add_executable(${test_name}_${CPU_CAPABILITY} "${test_src}" ../aten/src/ATen/native/quantized/affine_quantizer_base.cpp)
+           add_executable(${test_name}_${CPU_CAPABILITY} "${test_src}" ../aten/src/ATen/native/quantized/AffineQuantizerBase.cpp)
            # TODO: Get rid of c10 dependency (which is only needed for the implementation of AT_ERROR)
 -          target_link_libraries(${test_name}_${CPU_CAPABILITY} c10 sleef gtest_main)
 +          target_link_libraries(${test_name}_${CPU_CAPABILITY} c10 sleef gtest_main gtest)
            if(USE_FBGEMM)
              target_link_libraries(${test_name}_${CPU_CAPABILITY} fbgemm)
            endif()
-@@ -1759,7 +1759,7 @@ if(BUILD_TEST)
+@@ -1715,7 +1712,7 @@ if(BUILD_TEST)
+           endif()
+         else()
+           add_executable(${test_name}_${CPU_CAPABILITY} "${test_src}")
+-          target_link_libraries(${test_name}_${CPU_CAPABILITY} torch_library gtest_main)
++          target_link_libraries(${test_name}_${CPU_CAPABILITY} torch_library gtest_main gtest)
+         endif()
+         target_include_directories(${test_name}_${CPU_CAPABILITY} PRIVATE $<INSTALL_INTERFACE:include>)
+         target_include_directories(${test_name}_${CPU_CAPABILITY} PRIVATE $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/include>)
+@@ -1732,7 +1729,7 @@ if(BUILD_TEST)
    foreach(test_src ${Caffe2_CPU_TEST_SRCS})
      get_filename_component(test_name ${test_src} NAME_WE)
      add_executable(${test_name} "${test_src}")
 -    target_link_libraries(${test_name} torch_library gtest_main)
 +    target_link_libraries(${test_name} torch_library gtest_main gtest)
-     if(USE_OPENMP)
-       # -fopenmp is a compile time flag and as result not guaranteed
-       # to link executable against OpenMP runtime library
-@@ -1785,7 +1785,7 @@ if(BUILD_TEST)
-     foreach(test_src ${Caffe2_GPU_TEST_SRCS})
-       get_filename_component(test_name ${test_src} NAME_WE)
-       add_executable(${test_name} "${test_src}")
--      target_link_libraries(${test_name} torch_library gtest_main)
-+      target_link_libraries(${test_name} torch_library gtest_main gtest)
-       target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>)
-       target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE})
-       add_test(NAME ${test_name} COMMAND $<TARGET_FILE:${test_name}>)
-@@ -1803,7 +1803,7 @@ if(BUILD_TEST)
+     target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>)
+     target_include_directories(${test_name} PRIVATE $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/include>)
+     target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE})
+@@ -1795,7 +1792,7 @@ if(BUILD_TEST)
      foreach(test_src ${Caffe2_VULKAN_TEST_SRCS})
        get_filename_component(test_name ${test_src} NAME_WE)
        add_executable(${test_name} "${test_src}")
@@ -41,20 +107,66 @@  index d57d7ebb..5b5622f0 100644
        target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>)
        target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE})
        add_test(NAME ${test_name} COMMAND $<TARGET_FILE:${test_name}>)
-@@ -1821,7 +1821,7 @@ if(BUILD_TEST)
-     foreach(test_src ${Caffe2_HIP_TEST_SRCS})
-       get_filename_component(test_name ${test_src} NAME_WE)
-       add_executable(${test_name} "${test_src}")
--      target_link_libraries(${test_name} torch_library gtest_main)
-+      target_link_libraries(${test_name} torch_library gtest_main gtest)
-       target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>)
-       target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE} ${Caffe2_HIP_INCLUDE})
-       target_compile_options(${test_name} PRIVATE ${HIP_CXX_FLAGS})
+diff --git a/caffe2/serialize/CMakeLists.txt b/caffe2/serialize/CMakeLists.txt
+index 1552b59..67e1a9a 100644
+--- a/caffe2/serialize/CMakeLists.txt
++++ b/caffe2/serialize/CMakeLists.txt
+@@ -2,7 +2,6 @@ file(GLOB tmp *_test.cc)
+ 
+ set(Caffe2_CPU_TEST_SRCS ${Caffe2_CPU_TEST_SRCS} ${tmp})
+ list(APPEND Caffe2_CPU_SRCS
+-  ${PROJECT_SOURCE_DIR}/third_party/miniz-2.1.0/miniz.c
+   ${CMAKE_CURRENT_SOURCE_DIR}/inline_container.cc
+   ${CMAKE_CURRENT_SOURCE_DIR}/istream_adapter.cc
+   ${CMAKE_CURRENT_SOURCE_DIR}/file_adapter.cc
 diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
-index 557ab649..ee9cf410 100644
+index acc9584..97275bf 100644
 --- a/cmake/Dependencies.cmake
 +++ b/cmake/Dependencies.cmake
-@@ -732,11 +732,6 @@ if(BUILD_TEST OR BUILD_MOBILE_BENCHMARK OR BUILD_MOBILE_TEST)
+@@ -283,7 +283,7 @@ endif()
+ # --- [ PocketFFT
+ set(AT_POCKETFFT_ENABLED 0)
+ if(NOT AT_MKL_ENABLED)
+-  set(POCKETFFT_INCLUDE_DIR "${Torch_SOURCE_DIR}/third_party/pocketfft/")
++  set(POCKETFFT_INCLUDE_DIR "#POCKETFFT_INCLUDE_DIR")
+   if(NOT EXISTS "${POCKETFFT_INCLUDE_DIR}")
+     message(FATAL_ERROR "pocketfft directory not found, expected ${POCKETFFT_INCLUDE_DIR}")
+   elif(NOT EXISTS "${POCKETFFT_INCLUDE_DIR}/pocketfft_hdronly.h")
+@@ -489,19 +489,6 @@ if(USE_QNNPACK)
+     set(QNNPACK_BUILD_TESTS OFF CACHE BOOL "")
+     set(QNNPACK_BUILD_BENCHMARKS OFF CACHE BOOL "")
+     set(QNNPACK_LIBRARY_TYPE "static" CACHE STRING "")
+-    add_subdirectory(
+-      "${QNNPACK_SOURCE_DIR}"
+-      "${CONFU_DEPENDENCIES_BINARY_DIR}/QNNPACK")
+-
+-    # TODO: See https://github.com/pytorch/pytorch/issues/56285
+-    if(CMAKE_CXX_COMPILER_ID MATCHES "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
+-      target_compile_options(qnnpack PRIVATE -Wno-deprecated-declarations)
+-    endif()
+-
+-    # We build static versions of QNNPACK and pthreadpool but link
+-    # them into a shared library for Caffe2, so they need PIC.
+-    set_property(TARGET qnnpack PROPERTY POSITION_INDEPENDENT_CODE ON)
+-    set_property(TARGET cpuinfo PROPERTY POSITION_INDEPENDENT_CODE ON)
+ 
+     if(QNNPACK_CUSTOM_THREADPOOL)
+       target_compile_definitions(
+@@ -550,13 +537,6 @@ if(USE_PYTORCH_QNNPACK)
+       set(PYTORCH_QNNPACK_BUILD_TESTS OFF CACHE BOOL "")
+       set(PYTORCH_QNNPACK_BUILD_BENCHMARKS OFF CACHE BOOL "")
+       set(PYTORCH_QNNPACK_LIBRARY_TYPE "static" CACHE STRING "")
+-      add_subdirectory(
+-        "${PYTORCH_QNNPACK_SOURCE_DIR}"
+-        "${CONFU_DEPENDENCIES_BINARY_DIR}/pytorch_qnnpack")
+-      # We build static versions of QNNPACK and pthreadpool but link
+-      # them into a shared library for Caffe2, so they need PIC.
+-      set_property(TARGET pytorch_qnnpack PROPERTY POSITION_INDEPENDENT_CODE ON)
+-      set_property(TARGET cpuinfo PROPERTY POSITION_INDEPENDENT_CODE ON)
+ 
+       if(PYTORCH_QNNPACK_CUSTOM_THREADPOOL)
+         target_compile_definitions(
+@@ -728,11 +708,6 @@ if(BUILD_TEST OR BUILD_MOBILE_BENCHMARK OR BUILD_MOBILE_TEST)
    # this shouldn't be necessary anymore.
    get_property(INC_DIR_temp DIRECTORY PROPERTY INCLUDE_DIRECTORIES)
    set_property(DIRECTORY PROPERTY INCLUDE_DIRECTORIES "")
@@ -66,7 +178,49 @@  index 557ab649..ee9cf410 100644
  
    # We will not need to test benchmark lib itself.
    set(BENCHMARK_ENABLE_TESTING OFF CACHE BOOL "Disable benchmark testing as we don't need it.")
-@@ -1543,7 +1538,7 @@ if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO AND NOT INTERN_DISABLE_ONNX)
+@@ -810,16 +785,6 @@ if(USE_FBGEMM)
+     if(USE_ASAN)
+       set(USE_SANITIZER "address,undefined" CACHE STRING "-fsanitize options for FBGEMM")
+     endif()
+-    add_subdirectory("${FBGEMM_SOURCE_DIR}")
+-    set_property(TARGET fbgemm_generic PROPERTY POSITION_INDEPENDENT_CODE ON)
+-    set_property(TARGET fbgemm_avx2 PROPERTY POSITION_INDEPENDENT_CODE ON)
+-    set_property(TARGET fbgemm_avx512 PROPERTY POSITION_INDEPENDENT_CODE ON)
+-    set_property(TARGET fbgemm PROPERTY POSITION_INDEPENDENT_CODE ON)
+-    if("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 13.0.0)
+-      # See https://github.com/pytorch/pytorch/issues/74352
+-      target_compile_options_if_supported(asmjit -Wno-deprecated-copy)
+-      target_compile_options_if_supported(asmjit -Wno-unused-but-set-variable)
+-    endif()
+   endif()
+ 
+   if(USE_FBGEMM)
+@@ -979,7 +944,7 @@ if(NOT TARGET fp16 AND NOT USE_SYSTEM_FP16)
+     "${FP16_SOURCE_DIR}"
+     "${CONFU_DEPENDENCIES_BINARY_DIR}/FP16")
+ elseif(NOT TARGET fp16 AND USE_SYSTEM_FP16)
+-  add_library(fp16 STATIC "/usr/include/fp16.h")
++  add_library(fp16 STATIC "#FP16_INCLUDE_DIR")
+   set_target_properties(fp16 PROPERTIES LINKER_LANGUAGE C)
+ endif()
+ list(APPEND Caffe2_DEPENDENCY_LIBS fp16)
+@@ -1362,7 +1327,6 @@ if(USE_DISTRIBUTED AND USE_TENSORPIPE)
+ 
+     # Tensorpipe uses cuda_add_library
+     torch_update_find_cuda_flags()
+-    add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/tensorpipe)
+ 
+     list(APPEND Caffe2_DEPENDENCY_LIBS tensorpipe)
+     if(USE_CUDA)
+@@ -1529,7 +1493,6 @@ if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO AND NOT INTERN_DISABLE_ONNX)
+       set_target_properties(onnx_proto PROPERTIES CXX_STANDARD 17)
+     endif()
+   endif()
+-  add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/../third_party/foxi EXCLUDE_FROM_ALL)
+ 
+   add_definitions(-DONNX_NAMESPACE=${ONNX_NAMESPACE})
+   if(NOT USE_SYSTEM_ONNX)
+@@ -1560,7 +1523,7 @@ if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO AND NOT INTERN_DISABLE_ONNX)
      endif()
      set_property(TARGET onnx_proto PROPERTY IMPORTED_LOCATION ${ONNX_PROTO_LIBRARY})
      message("-- Found onnx: ${ONNX_LIBRARY} ${ONNX_PROTO_LIBRARY}")
@@ -75,8 +229,36 @@  index 557ab649..ee9cf410 100644
    endif()
    include_directories(${FOXI_INCLUDE_DIRS})
    list(APPEND Caffe2_DEPENDENCY_LIBS foxi_loader)
+@@ -1739,9 +1702,8 @@ if(NOT INTERN_BUILD_MOBILE)
+   endif()
+   if(USE_MKLDNN)
+     include(${CMAKE_CURRENT_LIST_DIR}/public/mkldnn.cmake)
+-    if(MKLDNN_FOUND)
++    if(DNNL_FOUND)
+       set(AT_MKLDNN_ENABLED 1)
+-      include_directories(AFTER SYSTEM ${MKLDNN_INCLUDE_DIR})
+       if(BUILD_CAFFE2_OPS)
+         list(APPEND Caffe2_DEPENDENCY_LIBS caffe2::mkldnn)
+       endif(BUILD_CAFFE2_OPS)
+@@ -1796,7 +1758,7 @@ endif()
+ #
+ set(TEMP_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS})
+ set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared libs" FORCE)
+-add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt)
++find_package(fmt)
+ 
+ # Disable compiler feature checks for `fmt`.
+ #
+@@ -1805,7 +1767,6 @@ add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt)
+ # CMAKE_CXX_FLAGS in ways that break feature checks. Since we already know
+ # `fmt` is compatible with a superset of the compilers that PyTorch is, it
+ # shouldn't be too bad to just disable the checks.
+-set_target_properties(fmt-header-only PROPERTIES INTERFACE_COMPILE_FEATURES "")
+ 
+ list(APPEND Caffe2_DEPENDENCY_LIBS fmt::fmt-header-only)
+ set(BUILD_SHARED_LIBS ${TEMP_BUILD_SHARED_LIBS} CACHE BOOL "Build shared libs" FORCE)
 diff --git a/cmake/External/nnpack.cmake b/cmake/External/nnpack.cmake
-index a41343cb..6075bdd0 100644
+index 9d5f064..c3624e5 100644
 --- a/cmake/External/nnpack.cmake
 +++ b/cmake/External/nnpack.cmake
 @@ -40,7 +40,7 @@ endif()
@@ -88,7 +270,7 @@  index a41343cb..6075bdd0 100644
    message(STATUS "Brace yourself, we are building NNPACK")
    set(CAFFE2_THIRD_PARTY_ROOT ${PROJECT_SOURCE_DIR}/third_party)
  
-@@ -114,6 +114,5 @@ endif()
+@@ -112,6 +112,5 @@ endif()
  # (4) Catch-all: not supported.
  ##############################################################################
  
@@ -97,8 +279,45 @@  index a41343cb..6075bdd0 100644
 -set(USE_NNPACK OFF)
 +set(NNPACK_FOUND TRUE)
 +set(USE_NNPACK ON)
+diff --git a/cmake/public/mkldnn.cmake b/cmake/public/mkldnn.cmake
+index 8793562..9f8fa3d 100644
+--- a/cmake/public/mkldnn.cmake
++++ b/cmake/public/mkldnn.cmake
+@@ -4,7 +4,7 @@ if(CPU_AARCH64)
+   include(${CMAKE_CURRENT_LIST_DIR}/ComputeLibrary.cmake)
+ endif()
+ 
+-find_package(MKLDNN QUIET)
++find_package(DNNL REQUIRED)
+ 
+ if(NOT TARGET caffe2::mkldnn)
+   add_library(caffe2::mkldnn INTERFACE IMPORTED)
+@@ -15,4 +15,4 @@ set_property(
+   ${MKLDNN_INCLUDE_DIR})
+ set_property(
+   TARGET caffe2::mkldnn PROPERTY INTERFACE_LINK_LIBRARIES
+-  ${MKLDNN_LIBRARIES})
++  DNNL::dnnl)
+diff --git a/setup.py b/setup.py
+index 81f3c6c..3251cab 100644
+--- a/setup.py
++++ b/setup.py
+@@ -482,13 +482,9 @@ def build_deps():
+     # Windows has very poor support for them.
+     sym_files = [
+         "tools/shared/_utils_internal.py",
+-        "torch/utils/benchmark/utils/valgrind_wrapper/callgrind.h",
+-        "torch/utils/benchmark/utils/valgrind_wrapper/valgrind.h",
+     ]
+     orig_files = [
+         "torch/_utils_internal.py",
+-        "third_party/valgrind-headers/callgrind.h",
+-        "third_party/valgrind-headers/valgrind.h",
+     ]
+     for sym_file, orig_file in zip(sym_files, orig_files):
+         same = False
 diff --git a/test/cpp/c10d/CMakeLists.txt b/test/cpp/c10d/CMakeLists.txt
-index bf91460c..ef56948f 100644
+index 5c89748..ef84c57 100644
 --- a/test/cpp/c10d/CMakeLists.txt
 +++ b/test/cpp/c10d/CMakeLists.txt
 @@ -16,14 +16,14 @@ function(c10d_add_test test_src)
@@ -133,7 +352,29 @@  index bf91460c..ef56948f 100644
    endif()
    if(USE_NCCL AND USE_C10D_NCCL)
      # NCCL is a private dependency of libtorch, but the tests include some
-@@ -56,7 +56,7 @@ if(USE_CUDA)
+@@ -44,10 +44,10 @@ if(USE_CUDA)
+     # a private dependency of the tests as well.
+     c10d_add_test(
+       ProcessGroupNCCLTest.cpp
+-      torch_cpu c10d_cuda_test gtest_main __caffe2_nccl)
++      torch_cpu c10d_cuda_test gtest_main gtest __caffe2_nccl)
+     c10d_add_test(
+       ProcessGroupNCCLErrorsTest.cpp
+-      torch_cpu c10d_cuda_test gtest_main __caffe2_nccl)
++      torch_cpu c10d_cuda_test gtest_main gtest __caffe2_nccl)
+     if(INSTALL_TEST)
+       install(TARGETS ProcessGroupNCCLTest DESTINATION bin)
+       install(TARGETS ProcessGroupNCCLErrorsTest DESTINATION bin)
+@@ -61,7 +61,7 @@ if(USE_CUDA)
+     # a private dependency of the tests as well.
+     c10d_add_test(
+       ProcessGroupUCCTest.cpp
+-      torch_cpu c10d_cuda_test gtest_main __caffe2_ucc)
++      torch_cpu c10d_cuda_test gtest_main gtest __caffe2_ucc)
+     if(INSTALL_TEST)
+       install(TARGETS ProcessGroupUCCTest DESTINATION bin)
+       install(TARGETS c10d_cuda_test DESTINATION lib)
+@@ -69,7 +69,7 @@ if(USE_CUDA)
    endif()
  else()
    if(USE_GLOO AND USE_C10D_GLOO)
@@ -143,10 +384,10 @@  index bf91460c..ef56948f 100644
  endif()
  
 diff --git a/test/cpp/tensorexpr/CMakeLists.txt b/test/cpp/tensorexpr/CMakeLists.txt
-index 8fc5a0a1..643202f6 100644
+index 012471d..d39b625 100644
 --- a/test/cpp/tensorexpr/CMakeLists.txt
 +++ b/test/cpp/tensorexpr/CMakeLists.txt
-@@ -53,7 +53,7 @@ target_include_directories(tutorial_tensorexpr PRIVATE ${ATen_CPU_INCLUDE})
+@@ -54,7 +54,7 @@ target_include_directories(tutorial_tensorexpr PRIVATE ${ATen_CPU_INCLUDE})
  # pthreadpool header. For some build environment we need add the dependency
  # explicitly.
  if(USE_PTHREADPOOL)
@@ -154,4 +395,4 @@  index 8fc5a0a1..643202f6 100644
 +  target_link_libraries(test_tensorexpr PRIVATE pthreadpool)
  endif()
  if(USE_CUDA)
-   target_link_libraries(test_tensorexpr PRIVATE
+   target_compile_definitions(test_tensorexpr PRIVATE USE_CUDA)
diff --git a/gnu/packages/patches/python-pytorch-without-kineto.patch b/gnu/packages/patches/python-pytorch-without-kineto.patch
new file mode 100644
index 0000000000..f956316866
--- /dev/null
+++ b/gnu/packages/patches/python-pytorch-without-kineto.patch
@@ -0,0 +1,60 @@ 
+Even when building without Kineto, the <ActivityType.h> header is still
+imported and the ActivityType type is used. This patch was copied from
+https://github.com/pytorch/pytorch/pull/111048.
+
+diff --git a/torch/csrc/profiler/kineto_shim.h b/torch/csrc/profiler/kineto_shim.h
+index e92cbf00..68985ab7 100644
+--- a/torch/csrc/profiler/kineto_shim.h
++++ b/torch/csrc/profiler/kineto_shim.h
+@@ -12,7 +12,51 @@
+ #undef USE_KINETO
+ #endif
+ 
++#ifdef USE_KINETO
+ #include <ActivityType.h>
++#else
++namespace libkineto {
++// copied from header
++/*
++ * Copyright (c) Meta Platforms, Inc. and affiliates.
++ * All rights reserved.
++ *
++ * This source code is licensed under the BSD-style license found in the
++ * LICENSE file in the root directory of this source tree.
++ */
++
++// Note : All activity types are not enabled by default. Please add them
++// at correct position in the enum
++enum class ActivityType {
++    // Activity types enabled by default
++    CPU_OP = 0, // cpu side ops
++    USER_ANNOTATION,
++    GPU_USER_ANNOTATION,
++    GPU_MEMCPY,
++    GPU_MEMSET,
++    CONCURRENT_KERNEL, // on-device kernels
++    EXTERNAL_CORRELATION,
++    CUDA_RUNTIME, // host side cuda runtime events
++    CUDA_DRIVER, // host side cuda driver events
++    CPU_INSTANT_EVENT, // host side point-like events
++    PYTHON_FUNCTION,
++    OVERHEAD, // CUPTI induced overhead events sampled from its overhead API.
++
++    // Optional Activity types
++    CUDA_SYNC, // synchronization events between runtime and kernels
++    GLOW_RUNTIME, // host side glow runtime events
++    MTIA_RUNTIME, // host side MTIA runtime events
++    CUDA_PROFILER_RANGE, // CUPTI Profiler range for performance metrics
++    MTIA_CCP_EVENTS, // MTIA ondevice CCP events
++    HPU_OP, // HPU host side runtime event
++    XPU_RUNTIME, // host side xpu runtime events
++
++    ENUM_COUNT, // This is to add buffer and not used for any profiling logic. Add your new type before it.
++    OPTIONAL_ACTIVITY_TYPE_START = CUDA_SYNC,
++};
++}
++
++#endif
+ 
+ #include <torch/csrc/Export.h>
+ #include <torch/csrc/profiler/api.h>