diff mbox series

[bug#72471,20/24] gnu: python-pytorch: Update to 2.4.0.

Message ID 20240804221635.3805-20-david.elsing@posteo.net
State New
Headers show
Series Update PyTorch and dependencies | expand

Commit Message

David Elsing Aug. 4, 2024, 10:15 p.m. UTC
This also updates the qnnpack-pytorch package.

* gnu/packages/machine-learning.scm (%python-pytorch-version): Set to 2.4.0.
(%python-pytorch-src): Adjust hash accordingly.
[source]: Remove obsolete file deletions in snippet.
(python-pytorch)[arguments]<#:phases>: Disable telemetry and set Python
install directory.  Remove obsolete USE_FFMPEG, USE_OPENCV and
USE_PYTORCH_QNNPACK environment variables.
[native-inputs]: Remove python-typing-extensions.
[inputs]: Add brotli, cpp-httplib and zlib.  Remove qnnpack.  Use oneapi-dnnl,
qnnpack-pytorch and xnnpack for all systems.
[propagated-inputs]: Add onnx and python-typing-extensions.  Use cpuinfo for
all systems.
[supported-systems]: New field.
(python-pytorch-for-r-torch)[inputs]: Add qnnpack.
* gnu/packages/patches/python-pytorch-fix-codegen.patch: Adjust patch.
* gnu/packages/patches/python-pytorch-runpath.patch: Adjust patch.
* gnu/packages/patches/python-pytorch-system-libraries.patch: Adjust patch.
* gnu/packages/patches/python-pytorch-without-kineto.patch: Adjust patch.
---
 gnu/packages/machine-learning.scm             |  76 +++++------
 .../patches/python-pytorch-fix-codegen.patch  |  26 ++--
 .../patches/python-pytorch-runpath.patch      |  19 +--
 .../python-pytorch-system-libraries.patch     | 122 +++++++++---------
 .../python-pytorch-without-kineto.patch       |  10 +-
 5 files changed, 119 insertions(+), 134 deletions(-)
diff mbox series

Patch

diff --git a/gnu/packages/machine-learning.scm b/gnu/packages/machine-learning.scm
index b66071effc..822881f15a 100644
--- a/gnu/packages/machine-learning.scm
+++ b/gnu/packages/machine-learning.scm
@@ -4466,7 +4466,7 @@  (define-public ideep-pytorch-for-r-torch
         (base32
          "0hdpkhcjry22fjx2zg2r48v7f4ljrclzj0li2pgk76kvyblfbyvm"))))))
 
-(define %python-pytorch-version "2.2.1")
+(define %python-pytorch-version "2.4.0")
 
 (define %python-pytorch-src
   (origin
@@ -4477,7 +4477,7 @@  (define %python-pytorch-src
     (file-name (git-file-name "python-pytorch" %python-pytorch-version))
     (sha256
      (base32
-      "03mm0pwwb5lxdsmmiw3cch9fijgjw81kmmc4ln9rlyazkm7l1r48"))
+      "18hdhzr12brj0b7ppyiscax0dbra30207qx0cckw78midfkcn7cn"))
     (patches (search-patches "python-pytorch-system-libraries.patch"
                              "python-pytorch-runpath.patch"
                              "python-pytorch-without-kineto.patch"
@@ -4505,14 +4505,6 @@  (define %python-pytorch-src
          delete-file
          '("aten/src/ATen/nnapi/nnapi_wrapper.cpp"
            "aten/src/ATen/nnapi/nnapi_wrapper.h"
-           "caffe2/mobile/contrib/ios/mpscnn/mpscnn_kernels.h"
-           "caffe2/proto/caffe2_legacy_pb2.pyi"
-           "caffe2/proto/caffe2_pb2.pyi"
-           "caffe2/proto/hsm_pb2.pyi"
-           "caffe2/proto/metanet_pb2.pyi"
-           "caffe2/proto/predictor_consts_pb2.pyi"
-           "caffe2/proto/prof_dag_pb2.pyi"
-           "caffe2/proto/torch_pb2.pyi"
            ;; These files contain just lists of floating point values and
            ;; might be as well hand-written.
            ;; "test/cpp/api/init_baseline.h"
@@ -4619,7 +4611,18 @@  (define-public python-pytorch
                   #$(this-package-native-input "pocketfft-cpp") "/include"))
                 (("#FP16_INCLUDE_DIR")
                  (string-append
-                  #$(this-package-input "fp16") "/include")))))
+                  #$(this-package-input "fp16") "/include"))
+                ;; Disable opentelemetry
+                ((".*(add_library|target_include_directories).*opentelemetry.*")
+                 ""))
+              (substitute* "torch/CMakeLists.txt"
+                ((".*opentelemetry.*") ""))
+              ;; Fix Python install directory
+              (substitute* "caffe2/CMakeLists.txt"
+                (("\\$\\{Python_SITELIB\\}")
+                 (string-append #$output "/lib/python"
+                                #$(version-major+minor (package-version python))
+                                "/site-packages")))))
           (add-before 'build 'use-system-libraries
             (lambda _
               (substitute* '("caffe2/serialize/crc.cc"
@@ -4641,9 +4644,7 @@  (define-public python-pytorch
                        name))
                     '("compat_bindings.cpp" "timer_callgrind_template.cpp")))
                 (("<callgrind.h>") "<valgrind/callgrind.h>"))
-              (setenv "USE_FFMPEG" "1")
               (setenv "USE_VULKAN" "1")
-              (setenv "USE_OPENCV" "1")
               ;; Tell 'setup.py' to let 'CMakeLists.txt' know that we
               ;; want to use "system libraries" instead of the bundled
               ;; ones.
@@ -4659,8 +4660,7 @@  (define-public python-pytorch
                           (or (%current-target-system)
                               (%current-system))
                           (package-transitive-supported-systems qnnpack)))
-                  (setenv "USE_QNNPACK" "0")
-                  (setenv "USE_PYTORCH_QNNPACK" "0"))))
+                  (setenv "USE_QNNPACK" "0"))))
           ;; PyTorch is still built with AVX2 and AVX-512 support selected at
           ;; runtime, but these dependencies require it (nnpack only for
           ;; x86_64).
@@ -4773,15 +4773,15 @@  (define-public python-pytorch
            python-pytest-xdist
            python-hypothesis
            python-types-dataclasses
-           python-typing-extensions
            shaderc
            valgrind))
     (inputs
      (append
       (list asmjit
+            brotli ; for cpp-httplib
             clog
+            cpp-httplib
             eigen
-            ffmpeg
             flatbuffers-next
             fmt
             foxi
@@ -4793,38 +4793,33 @@  (define-public python-pytorch
             googlebenchmark
             libuv
             miniz-for-pytorch
+            oneapi-dnnl
             openblas
-            opencv
             openmpi
+            openssl ; for cpp-httplib
             pthreadpool
             protobuf
             pybind11
+            ;; qnnpack
+            qnnpack-pytorch
             sleef
             tensorpipe
             vulkan-headers
             vulkan-loader
             vulkan-memory-allocator
+            xnnpack
+            zlib ; for cpp-httplib
             zstd)
-      ;; TODO: fix build on 32 bit systems once Rust is available.
-      (filter
-       (lambda (pkg)
-         (member (or (%current-target-system)
-                     (%current-system))
-                 (package-transitive-supported-systems pkg)))
-       (list oneapi-dnnl
-             qnnpack
-             qnnpack-pytorch
-             xnnpack))
       ;; nnpack requires AVX2 for x86_64-linux
-      (filter
-       (lambda (pkg)
-         (member (or (%current-target-system)
-                     (%current-system))
-                 '("armhf-linux" "aarch64-linux")))
-       (list nnpack))))
+      (if (equal? (or (%current-target-system)
+                      (%current-system))
+                  '("aarch64-linux"))
+          (list nnpack)
+          '())))
     (propagated-inputs
      (append
-      (list onnx ;propagated for its Python modules
+      (list cpuinfo
+            onnx ;propagated for its Python modules
             onnx-optimizer
             python-astunparse
             python-click
@@ -4841,15 +4836,11 @@  (define-public python-pytorch
             python-pyyaml
             python-requests
             python-sympy
-            python-typing-extensions)
-      (filter
-       (lambda (pkg)
-         (member (or (%current-target-system)
-                     (%current-system))
-                 (package-transitive-supported-systems pkg)))
-       (list cpuinfo))))
+            python-typing-extensions)))
     (home-page "https://pytorch.org/")
     (synopsis "Python library for tensor computation and deep neural networks")
+    ;; TODO: Support other 64-bit systems.
+    (supported-systems '("x86_64-linux" "aarch64-linux"))
     (description
      "PyTorch is a Python package that provides two high-level features:
 
@@ -4927,6 +4918,7 @@  (define-public python-pytorch-for-r-torch
        (replace "ideep-pytorch" ideep-pytorch-for-r-torch)))
     (inputs
      (modify-inputs (package-inputs python-pytorch)
+       (prepend qnnpack)
        (replace "qnnpack-pytorch" qnnpack-pytorch-for-r-torch)
        (replace "oneapi-dnnl" oneapi-dnnl-for-r-torch)
        (replace "xnnpack" xnnpack-for-r-torch)))
diff --git a/gnu/packages/patches/python-pytorch-fix-codegen.patch b/gnu/packages/patches/python-pytorch-fix-codegen.patch
index cb246b25de..b30094de09 100644
--- a/gnu/packages/patches/python-pytorch-fix-codegen.patch
+++ b/gnu/packages/patches/python-pytorch-fix-codegen.patch
@@ -6,7 +6,7 @@  is later corrected.  codegen_external.py is patched to avoid duplicate
 functions and add the static keyword as in the existing generated file.
 
 diff --git a/tools/gen_flatbuffers.sh b/tools/gen_flatbuffers.sh
-index cc0263dbbf..ac34e84b82 100644
+index cc0263d..ac34e84 100644
 --- a/tools/gen_flatbuffers.sh
 +++ b/tools/gen_flatbuffers.sh
 @@ -1,13 +1,13 @@
@@ -32,10 +32,10 @@  index cc0263dbbf..ac34e84b82 100644
       -c "$ROOT/torch/csrc/jit/serialization/mobile_bytecode.fbs"
  echo '// @generated' >> "$ROOT/torch/csrc/jit/serialization/mobile_bytecode_generated.h"
 diff --git a/torch/csrc/jit/tensorexpr/codegen_external.py b/torch/csrc/jit/tensorexpr/codegen_external.py
-index bc69b05162..0f8df81de3 100644
+index 5dcf1b2..0e20b0c 100644
 --- a/torch/csrc/jit/tensorexpr/codegen_external.py
 +++ b/torch/csrc/jit/tensorexpr/codegen_external.py
-@@ -20,9 +20,14 @@ def gen_external(native_functions_path, tags_path, external_path):
+@@ -21,9 +21,14 @@ def gen_external(native_functions_path, tags_path, external_path):
      native_functions = parse_native_yaml(native_functions_path, tags_path)
      func_decls = []
      func_registrations = []
@@ -51,7 +51,7 @@  index bc69b05162..0f8df81de3 100644
          args = schema.arguments
          # Only supports extern calls for functions with out variants
          if not schema.is_out_fn():
-@@ -62,7 +67,7 @@ def gen_external(native_functions_path, tags_path, external_path):
+@@ -63,7 +68,7 @@ def gen_external(native_functions_path, tags_path, external_path):
  
          # print(tensor_decls, name, arg_names)
          func_decl = f"""\
@@ -61,7 +61,7 @@  index bc69b05162..0f8df81de3 100644
      void** buf_data,
      int64_t* buf_ranks,
 diff --git a/torchgen/decompositions/gen_jit_decompositions.py b/torchgen/decompositions/gen_jit_decompositions.py
-index 7cfbb803f9..2e69bb1868 100644
+index 7a0024f..6b2445f 100644
 --- a/torchgen/decompositions/gen_jit_decompositions.py
 +++ b/torchgen/decompositions/gen_jit_decompositions.py
 @@ -1,8 +1,12 @@
@@ -88,12 +88,12 @@  index 7cfbb803f9..2e69bb1868 100644
      write_decomposition_util_file(str(upgrader_path))
  
 diff --git a/torchgen/operator_versions/gen_mobile_upgraders.py b/torchgen/operator_versions/gen_mobile_upgraders.py
-index dab1568580..55c58715fc 100644
+index 2907076..6866332 100644
 --- a/torchgen/operator_versions/gen_mobile_upgraders.py
 +++ b/torchgen/operator_versions/gen_mobile_upgraders.py
-@@ -2,10 +2,12 @@
- import os
+@@ -3,10 +3,12 @@ import os
  from enum import Enum
+ from operator import itemgetter
  from pathlib import Path
 +import sys
  from typing import Any, Dict, List
@@ -106,7 +106,7 @@  index dab1568580..55c58715fc 100644
  
  from torchgen.code_template import CodeTemplate
  from torchgen.operator_versions.gen_mobile_upgraders_constant import (
-@@ -262,7 +264,10 @@ def construct_register_size(register_size_from_yaml: int) -> str:
+@@ -263,7 +265,10 @@ def construct_register_size(register_size_from_yaml: int) -> str:
  def construct_version_maps(
      upgrader_bytecode_function_to_index_map: Dict[str, Any]
  ) -> str:
@@ -115,10 +115,10 @@  index dab1568580..55c58715fc 100644
 +        version_map = torch._C._get_operator_version_map()
 +    else:
 +        version_map = {}
-     sorted_version_map_ = sorted(version_map.items(), key=lambda item: item[0])  # type: ignore[no-any-return]
+     sorted_version_map_ = sorted(version_map.items(), key=itemgetter(0))  # type: ignore[no-any-return]
      sorted_version_map = dict(sorted_version_map_)
  
-@@ -378,7 +383,10 @@ def sort_upgrader(upgrader_list: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+@@ -379,7 +384,10 @@ def sort_upgrader(upgrader_list: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
  
  
  def main() -> None:
@@ -131,12 +131,12 @@  index dab1568580..55c58715fc 100644
      for up in sorted_upgrader_list:
          print("after sort upgrader : ", next(iter(up)))
 diff --git a/torchgen/shape_functions/gen_jit_shape_functions.py b/torchgen/shape_functions/gen_jit_shape_functions.py
-index c6336a6951..34e394d818 100644
+index bdfd5c7..72b237a 100644
 --- a/torchgen/shape_functions/gen_jit_shape_functions.py
 +++ b/torchgen/shape_functions/gen_jit_shape_functions.py
 @@ -18,16 +18,20 @@ you are in the root directory of the Pytorch git repo"""
  if not file_path.exists():
-     raise Exception(err_msg)
+     raise Exception(err_msg)  # noqa: TRY002
  
 -spec = importlib.util.spec_from_file_location(module_name, file_path)
 -assert spec is not None
diff --git a/gnu/packages/patches/python-pytorch-runpath.patch b/gnu/packages/patches/python-pytorch-runpath.patch
index 2c1724cdb0..811de9e288 100644
--- a/gnu/packages/patches/python-pytorch-runpath.patch
+++ b/gnu/packages/patches/python-pytorch-runpath.patch
@@ -2,21 +2,8 @@  Libraries (such as 'libtorch_cpu.so') and executables (such as 'torch_shm_manage
 get installed, quite surprisingly, to 'lib/python3.8/site-packages/{bin,lib}'.
 Make sure RUNPATH matches that.
 
-diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt
-index 74836372..c8eb69d1 100644
---- a/caffe2/CMakeLists.txt
-+++ b/caffe2/CMakeLists.txt
-@@ -1910,7 +1910,7 @@ if(BUILD_PYTHON)
-   if(${BUILDING_WITH_TORCH_LIBS})
-     # site-packages/caffe2/python/caffe2_pybind11_state
-     # site-packages/torch/lib
--    set(caffe2_pybind11_rpath "${_rpath_portable_origin}/../../torch/lib")
-+    set(caffe2_pybind11_rpath $ORIGIN/../../torch/lib)
-   endif(${BUILDING_WITH_TORCH_LIBS})
- 
-   # Must also include `CMAKE_SHARED_LINKER_FLAGS` in linker flags for
 diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
-index acc95842..8f8fb7d7 100644
+index f1f2eb7..cb5caea 100644
 --- a/cmake/Dependencies.cmake
 +++ b/cmake/Dependencies.cmake
 @@ -4,7 +4,7 @@ if(APPLE)
@@ -29,10 +16,10 @@  index acc95842..8f8fb7d7 100644
  # Use separate rpaths during build and install phases
  set(CMAKE_SKIP_BUILD_RPATH  FALSE)
 diff --git a/functorch/CMakeLists.txt b/functorch/CMakeLists.txt
-index f2f32745..db21b656 100644
+index bdfa4bf..2a75e38 100644
 --- a/functorch/CMakeLists.txt
 +++ b/functorch/CMakeLists.txt
-@@ -21,7 +21,7 @@ target_link_libraries(${PROJECT_NAME} PRIVATE pybind::pybind11)
+@@ -26,7 +26,7 @@ target_link_libraries(${PROJECT_NAME} PRIVATE pybind::pybind11)
  
  set_target_properties(${PROJECT_NAME} PROPERTIES LIBRARY_OUTPUT_DIRECTORY
        ${CMAKE_BINARY_DIR}/functorch)
diff --git a/gnu/packages/patches/python-pytorch-system-libraries.patch b/gnu/packages/patches/python-pytorch-system-libraries.patch
index 99f999f32f..025b3241a1 100644
--- a/gnu/packages/patches/python-pytorch-system-libraries.patch
+++ b/gnu/packages/patches/python-pytorch-system-libraries.patch
@@ -1,16 +1,14 @@ 
 Patch build files to also system libraries instead of bundled ones for the
 libraries not supported or working only by specifying USE_SYSTEM_LIBS.  This
 includes using the clog, cpuinfo, fbgemm, foxi, fp16, fxdiv, googletest,
-ideep, miniz, nnpack, oneapi-dnnl, pocketfft, pthreadpool, qnnpack,
+httlib, ideep, miniz, nnpack, oneapi-dnnl, pocketfft, pthreadpool,
 qnnpack-pytorch, tensorpipe, valgrind and xnnpack packages.
-For QNNPACK, two versions were bundled and are required: The upstream one and
-an internal fork (now in the package qnnpack-pytorch).
 
 diff --git a/aten/src/ATen/CMakeLists.txt b/aten/src/ATen/CMakeLists.txt
-index 2c2b967..5ac5fa6 100644
+index 0087dd9..0235fa1 100644
 --- a/aten/src/ATen/CMakeLists.txt
 +++ b/aten/src/ATen/CMakeLists.txt
-@@ -371,9 +371,9 @@ if(AT_NNPACK_ENABLED)
+@@ -419,9 +419,9 @@ if(AT_NNPACK_ENABLED)
    list(APPEND ATen_CPU_DEPENDENCY_LIBS nnpack) # cpuinfo is added below
  endif()
  
@@ -24,10 +22,10 @@  index 2c2b967..5ac5fa6 100644
  if(NOT CMAKE_SYSTEM_PROCESSOR MATCHES "^(s390x|ppc64le)$")
    list(APPEND ATen_CPU_DEPENDENCY_LIBS cpuinfo)
 diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt
-index 7483637..093de40 100644
+index 89c31fa..e6d9ef1 100644
 --- a/caffe2/CMakeLists.txt
 +++ b/caffe2/CMakeLists.txt
-@@ -111,9 +111,6 @@ if(NOT MSVC AND USE_XNNPACK)
+@@ -91,9 +91,6 @@ if(NOT MSVC AND USE_XNNPACK)
    if(NOT TARGET fxdiv)
      set(FXDIV_BUILD_TESTS OFF CACHE BOOL "")
      set(FXDIV_BUILD_BENCHMARKS OFF CACHE BOOL "")
@@ -37,7 +35,7 @@  index 7483637..093de40 100644
    endif()
  endif()
  
-@@ -1055,7 +1052,6 @@ elseif(USE_CUDA)
+@@ -1075,7 +1072,6 @@ if(USE_XPU)
  endif()
  
  if(NOT MSVC AND USE_XNNPACK)
@@ -45,7 +43,17 @@  index 7483637..093de40 100644
  endif()
  
  # ==========================================================
-@@ -1396,6 +1392,7 @@ target_link_libraries(torch_cpu PUBLIC c10)
+@@ -1189,6 +1185,9 @@ endif()
+ target_include_directories(torch_cpu PRIVATE
+   ${TORCH_ROOT}/third_party/cpp-httplib)
+ 
++find_package(httplib REQUIRED)
++target_link_libraries(torch_cpu PUBLIC httplib::httplib)
++
+ install(DIRECTORY "${TORCH_SRC_DIR}/csrc"
+   DESTINATION ${TORCH_INSTALL_INCLUDE_DIR}/torch
+   FILES_MATCHING PATTERN "*.h" PATTERN "*.hpp")
+@@ -1417,6 +1416,7 @@ target_link_libraries(torch_cpu PUBLIC c10)
  target_link_libraries(torch_cpu PUBLIC ${Caffe2_PUBLIC_DEPENDENCY_LIBS})
  target_link_libraries(torch_cpu PRIVATE ${Caffe2_DEPENDENCY_LIBS})
  target_link_libraries(torch_cpu PRIVATE ${Caffe2_DEPENDENCY_WHOLE_LINK_LIBS})
@@ -53,7 +61,7 @@  index 7483637..093de40 100644
  if(USE_MPI)
    target_link_libraries(torch_cpu PRIVATE MPI::MPI_CXX)
  endif()
-@@ -1653,7 +1650,7 @@ if(BUILD_STATIC_RUNTIME_BENCHMARK)
+@@ -1694,7 +1694,7 @@ if(BUILD_STATIC_RUNTIME_BENCHMARK)
    add_executable(static_runtime_bench "${STATIC_RUNTIME_BENCHMARK_SRCS}")
    add_executable(static_runtime_test "${STATIC_RUNTIME_TEST_SRCS}")
    target_link_libraries(static_runtime_bench torch_library benchmark)
@@ -61,8 +69,8 @@  index 7483637..093de40 100644
 +  target_link_libraries(static_runtime_test torch_library gtest_main gtest)
  endif()
  
- if(BUILD_TENSOREXPR_BENCHMARK)
-@@ -1680,7 +1677,7 @@ if(BUILD_MOBILE_TEST)
+ if(BUILD_MOBILE_BENCHMARK)
+@@ -1713,7 +1713,7 @@ if(BUILD_MOBILE_TEST)
    foreach(test_src ${ATen_MOBILE_TEST_SRCS})
      get_filename_component(test_name ${test_src} NAME_WE)
      add_executable(${test_name} "${test_src}")
@@ -71,7 +79,7 @@  index 7483637..093de40 100644
      target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>)
      target_include_directories(${test_name} PRIVATE $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/include>)
      target_include_directories(${test_name} PRIVATE ${ATen_CPU_INCLUDE})
-@@ -1701,7 +1698,7 @@ if(BUILD_TEST)
+@@ -1734,7 +1734,7 @@ if(BUILD_TEST)
          if(NOT MSVC)
            add_executable(${test_name}_${CPU_CAPABILITY} "${test_src}" ../aten/src/ATen/native/quantized/AffineQuantizerBase.cpp)
            # TODO: Get rid of c10 dependency (which is only needed for the implementation of AT_ERROR)
@@ -80,25 +88,25 @@  index 7483637..093de40 100644
            if(USE_FBGEMM)
              target_link_libraries(${test_name}_${CPU_CAPABILITY} fbgemm)
            endif()
-@@ -1715,7 +1712,7 @@ if(BUILD_TEST)
+@@ -1748,7 +1748,7 @@ if(BUILD_TEST)
            endif()
          else()
            add_executable(${test_name}_${CPU_CAPABILITY} "${test_src}")
--          target_link_libraries(${test_name}_${CPU_CAPABILITY} torch_library gtest_main)
-+          target_link_libraries(${test_name}_${CPU_CAPABILITY} torch_library gtest_main gtest)
+-          target_link_libraries(${test_name}_${CPU_CAPABILITY} torch_library sleef gtest_main)
++          target_link_libraries(${test_name}_${CPU_CAPABILITY} torch_library sleef gtest_main gtest)
          endif()
          target_include_directories(${test_name}_${CPU_CAPABILITY} PRIVATE $<INSTALL_INTERFACE:include>)
          target_include_directories(${test_name}_${CPU_CAPABILITY} PRIVATE $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/include>)
-@@ -1732,7 +1729,7 @@ if(BUILD_TEST)
+@@ -1765,7 +1765,7 @@ if(BUILD_TEST)
    foreach(test_src ${Caffe2_CPU_TEST_SRCS})
      get_filename_component(test_name ${test_src} NAME_WE)
      add_executable(${test_name} "${test_src}")
 -    target_link_libraries(${test_name} torch_library gtest_main)
 +    target_link_libraries(${test_name} torch_library gtest_main gtest)
-     target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>)
-     target_include_directories(${test_name} PRIVATE $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/include>)
-     target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE})
-@@ -1795,7 +1792,7 @@ if(BUILD_TEST)
+     if(NOT MSVC)
+       target_link_libraries(${test_name} stdc++)
+     endif()
+@@ -1845,7 +1845,7 @@ if(BUILD_TEST)
      foreach(test_src ${Caffe2_VULKAN_TEST_SRCS})
        get_filename_component(test_name ${test_src} NAME_WE)
        add_executable(${test_name} "${test_src}")
@@ -120,10 +128,10 @@  index 1552b59..67e1a9a 100644
    ${CMAKE_CURRENT_SOURCE_DIR}/istream_adapter.cc
    ${CMAKE_CURRENT_SOURCE_DIR}/file_adapter.cc
 diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
-index acc9584..97275bf 100644
+index f1f2eb7..b4d2033 100644
 --- a/cmake/Dependencies.cmake
 +++ b/cmake/Dependencies.cmake
-@@ -283,7 +283,7 @@ endif()
+@@ -263,7 +263,7 @@ endif()
  # --- [ PocketFFT
  set(AT_POCKETFFT_ENABLED 0)
  if(NOT AT_MKL_ENABLED)
@@ -132,27 +140,7 @@  index acc9584..97275bf 100644
    if(NOT EXISTS "${POCKETFFT_INCLUDE_DIR}")
      message(FATAL_ERROR "pocketfft directory not found, expected ${POCKETFFT_INCLUDE_DIR}")
    elif(NOT EXISTS "${POCKETFFT_INCLUDE_DIR}/pocketfft_hdronly.h")
-@@ -489,19 +489,6 @@ if(USE_QNNPACK)
-     set(QNNPACK_BUILD_TESTS OFF CACHE BOOL "")
-     set(QNNPACK_BUILD_BENCHMARKS OFF CACHE BOOL "")
-     set(QNNPACK_LIBRARY_TYPE "static" CACHE STRING "")
--    add_subdirectory(
--      "${QNNPACK_SOURCE_DIR}"
--      "${CONFU_DEPENDENCIES_BINARY_DIR}/QNNPACK")
--
--    # TODO: See https://github.com/pytorch/pytorch/issues/56285
--    if(CMAKE_CXX_COMPILER_ID MATCHES "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
--      target_compile_options(qnnpack PRIVATE -Wno-deprecated-declarations)
--    endif()
--
--    # We build static versions of QNNPACK and pthreadpool but link
--    # them into a shared library for Caffe2, so they need PIC.
--    set_property(TARGET qnnpack PROPERTY POSITION_INDEPENDENT_CODE ON)
--    set_property(TARGET cpuinfo PROPERTY POSITION_INDEPENDENT_CODE ON)
- 
-     if(QNNPACK_CUSTOM_THREADPOOL)
-       target_compile_definitions(
-@@ -550,13 +537,6 @@ if(USE_PYTORCH_QNNPACK)
+@@ -458,15 +458,6 @@ if(USE_PYTORCH_QNNPACK)
        set(PYTORCH_QNNPACK_BUILD_TESTS OFF CACHE BOOL "")
        set(PYTORCH_QNNPACK_BUILD_BENCHMARKS OFF CACHE BOOL "")
        set(PYTORCH_QNNPACK_LIBRARY_TYPE "static" CACHE STRING "")
@@ -163,10 +151,12 @@  index acc9584..97275bf 100644
 -      # them into a shared library for Caffe2, so they need PIC.
 -      set_property(TARGET pytorch_qnnpack PROPERTY POSITION_INDEPENDENT_CODE ON)
 -      set_property(TARGET cpuinfo PROPERTY POSITION_INDEPENDENT_CODE ON)
+-      # QNNPACK depends on gemmlowp headers
+-      target_include_directories(pytorch_qnnpack PRIVATE "${CAFFE2_THIRD_PARTY_ROOT}/gemmlowp")
  
        if(PYTORCH_QNNPACK_CUSTOM_THREADPOOL)
          target_compile_definitions(
-@@ -728,11 +708,6 @@ if(BUILD_TEST OR BUILD_MOBILE_BENCHMARK OR BUILD_MOBILE_TEST)
+@@ -653,11 +644,6 @@ if(BUILD_TEST OR BUILD_MOBILE_BENCHMARK OR BUILD_MOBILE_TEST)
    # this shouldn't be necessary anymore.
    get_property(INC_DIR_temp DIRECTORY PROPERTY INCLUDE_DIRECTORIES)
    set_property(DIRECTORY PROPERTY INCLUDE_DIRECTORIES "")
@@ -178,7 +168,7 @@  index acc9584..97275bf 100644
  
    # We will not need to test benchmark lib itself.
    set(BENCHMARK_ENABLE_TESTING OFF CACHE BOOL "Disable benchmark testing as we don't need it.")
-@@ -810,16 +785,6 @@ if(USE_FBGEMM)
+@@ -735,16 +721,6 @@ if(USE_FBGEMM)
      if(USE_ASAN)
        set(USE_SANITIZER "address,undefined" CACHE STRING "-fsanitize options for FBGEMM")
      endif()
@@ -195,7 +185,7 @@  index acc9584..97275bf 100644
    endif()
  
    if(USE_FBGEMM)
-@@ -979,7 +944,7 @@ if(NOT TARGET fp16 AND NOT USE_SYSTEM_FP16)
+@@ -812,7 +788,7 @@ if(NOT TARGET fp16 AND NOT USE_SYSTEM_FP16)
      "${FP16_SOURCE_DIR}"
      "${CONFU_DEPENDENCIES_BINARY_DIR}/FP16")
  elseif(NOT TARGET fp16 AND USE_SYSTEM_FP16)
@@ -204,7 +194,7 @@  index acc9584..97275bf 100644
    set_target_properties(fp16 PROPERTIES LINKER_LANGUAGE C)
  endif()
  list(APPEND Caffe2_DEPENDENCY_LIBS fp16)
-@@ -1362,7 +1327,6 @@ if(USE_DISTRIBUTED AND USE_TENSORPIPE)
+@@ -1159,7 +1135,6 @@ if(USE_DISTRIBUTED AND USE_TENSORPIPE)
  
      # Tensorpipe uses cuda_add_library
      torch_update_find_cuda_flags()
@@ -212,7 +202,7 @@  index acc9584..97275bf 100644
  
      list(APPEND Caffe2_DEPENDENCY_LIBS tensorpipe)
      if(USE_CUDA)
-@@ -1529,7 +1493,6 @@ if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO AND NOT INTERN_DISABLE_ONNX)
+@@ -1288,7 +1263,6 @@ if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO AND NOT INTERN_DISABLE_ONNX)
        set_target_properties(onnx_proto PROPERTIES CXX_STANDARD 17)
      endif()
    endif()
@@ -220,7 +210,7 @@  index acc9584..97275bf 100644
  
    add_definitions(-DONNX_NAMESPACE=${ONNX_NAMESPACE})
    if(NOT USE_SYSTEM_ONNX)
-@@ -1560,7 +1523,7 @@ if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO AND NOT INTERN_DISABLE_ONNX)
+@@ -1319,7 +1293,7 @@ if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO AND NOT INTERN_DISABLE_ONNX)
      endif()
      set_property(TARGET onnx_proto PROPERTY IMPORTED_LOCATION ${ONNX_PROTO_LIBRARY})
      message("-- Found onnx: ${ONNX_LIBRARY} ${ONNX_PROTO_LIBRARY}")
@@ -229,7 +219,7 @@  index acc9584..97275bf 100644
    endif()
    include_directories(${FOXI_INCLUDE_DIRS})
    list(APPEND Caffe2_DEPENDENCY_LIBS foxi_loader)
-@@ -1739,9 +1702,8 @@ if(NOT INTERN_BUILD_MOBILE)
+@@ -1476,9 +1450,8 @@ if(NOT INTERN_BUILD_MOBILE)
    endif()
    if(USE_MKLDNN)
      include(${CMAKE_CURRENT_LIST_DIR}/public/mkldnn.cmake)
@@ -237,10 +227,10 @@  index acc9584..97275bf 100644
 +    if(DNNL_FOUND)
        set(AT_MKLDNN_ENABLED 1)
 -      include_directories(AFTER SYSTEM ${MKLDNN_INCLUDE_DIR})
-       if(BUILD_CAFFE2_OPS)
-         list(APPEND Caffe2_DEPENDENCY_LIBS caffe2::mkldnn)
-       endif(BUILD_CAFFE2_OPS)
-@@ -1796,7 +1758,7 @@ endif()
+     else()
+       message(WARNING "MKLDNN could not be found.")
+       caffe2_update_option(USE_MKLDNN OFF)
+@@ -1530,7 +1503,7 @@ endif()
  #
  set(TEMP_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS})
  set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared libs" FORCE)
@@ -249,7 +239,7 @@  index acc9584..97275bf 100644
  
  # Disable compiler feature checks for `fmt`.
  #
-@@ -1805,7 +1767,6 @@ add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt)
+@@ -1539,7 +1512,6 @@ add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt)
  # CMAKE_CXX_FLAGS in ways that break feature checks. Since we already know
  # `fmt` is compatible with a superset of the compilers that PyTorch is, it
  # shouldn't be too bad to just disable the checks.
@@ -299,10 +289,10 @@  index 8793562..9f8fa3d 100644
 -  ${MKLDNN_LIBRARIES})
 +  DNNL::dnnl)
 diff --git a/setup.py b/setup.py
-index 81f3c6c..3251cab 100644
+index 9ec41cd..1f505fe 100644
 --- a/setup.py
 +++ b/setup.py
-@@ -482,13 +482,9 @@ def build_deps():
+@@ -494,13 +494,9 @@ def build_deps():
      # Windows has very poor support for them.
      sym_files = [
          "tools/shared/_utils_internal.py",
@@ -384,10 +374,10 @@  index 5c89748..ef84c57 100644
  endif()
  
 diff --git a/test/cpp/tensorexpr/CMakeLists.txt b/test/cpp/tensorexpr/CMakeLists.txt
-index 012471d..d39b625 100644
+index 179270c..72f5582 100644
 --- a/test/cpp/tensorexpr/CMakeLists.txt
 +++ b/test/cpp/tensorexpr/CMakeLists.txt
-@@ -54,7 +54,7 @@ target_include_directories(tutorial_tensorexpr PRIVATE ${ATen_CPU_INCLUDE})
+@@ -51,7 +51,7 @@ target_include_directories(tutorial_tensorexpr PRIVATE ${ATen_CPU_INCLUDE})
  # pthreadpool header. For some build environment we need add the dependency
  # explicitly.
  if(USE_PTHREADPOOL)
@@ -396,3 +386,15 @@  index 012471d..d39b625 100644
  endif()
  if(USE_CUDA)
    target_compile_definitions(test_tensorexpr PRIVATE USE_CUDA)
+diff --git a/torch/CMakeLists.txt b/torch/CMakeLists.txt
+index 10a44af..33e2df4 100644
+--- a/torch/CMakeLists.txt
++++ b/torch/CMakeLists.txt
+@@ -81,7 +81,6 @@ set(TORCH_PYTHON_LINK_LIBRARIES
+     Python::Module
+     pybind::pybind11
+     opentelemetry::api
+-    httplib
+     shm
+     fmt::fmt-header-only
+     ATEN_CPU_FILES_GEN_LIB)
diff --git a/gnu/packages/patches/python-pytorch-without-kineto.patch b/gnu/packages/patches/python-pytorch-without-kineto.patch
index f956316866..896e11dcc2 100644
--- a/gnu/packages/patches/python-pytorch-without-kineto.patch
+++ b/gnu/packages/patches/python-pytorch-without-kineto.patch
@@ -1,12 +1,12 @@ 
 Even when building without Kineto, the <ActivityType.h> header is still
 imported and the ActivityType type is used. This patch was copied from
-https://github.com/pytorch/pytorch/pull/111048.
+https://github.com/pytorch/pytorch/pull/111048 and adapted.
 
 diff --git a/torch/csrc/profiler/kineto_shim.h b/torch/csrc/profiler/kineto_shim.h
-index e92cbf00..68985ab7 100644
+index 7a3b788..7f49d18 100644
 --- a/torch/csrc/profiler/kineto_shim.h
 +++ b/torch/csrc/profiler/kineto_shim.h
-@@ -12,7 +12,51 @@
+@@ -12,7 +12,55 @@
  #undef USE_KINETO
  #endif
  
@@ -48,6 +48,10 @@  index e92cbf00..68985ab7 100644
 +    MTIA_CCP_EVENTS, // MTIA ondevice CCP events
 +    HPU_OP, // HPU host side runtime event
 +    XPU_RUNTIME, // host side xpu runtime events
++    MTIA_WORKLOADD,
++
++    PRIVATEUSE1_RUNTIME,
++    PRIVATEUSE1_DRIVER,
 +
 +    ENUM_COUNT, // This is to add buffer and not used for any profiling logic. Add your new type before it.
 +    OPTIONAL_ACTIVITY_TYPE_START = CUDA_SYNC,