@@ -2147,11 +2147,15 @@ dist_patch_DATA = \
%D%/packages/patches/python-pyan3-fix-absolute-path-bug.patch \
%D%/packages/patches/python-pyan3-fix-positional-arguments.patch \
%D%/packages/patches/python-pytorch-fix-codegen.patch \
+ %D%/packages/patches/python-pytorch-fix-codegen-2.7.0.patch \
%D%/packages/patches/python-pytorch-for-r-torch-fix-codegen.patch \
%D%/packages/patches/python-pytorch-for-r-torch-system-libraries.patch \
%D%/packages/patches/python-pytorch-runpath.patch \
+ %D%/packages/patches/python-pytorch-runpath-2.7.0.patch \
%D%/packages/patches/python-pytorch-system-libraries.patch \
+ %D%/packages/patches/python-pytorch-system-libraries-2.7.0.patch \
%D%/packages/patches/python-pytorch-without-kineto.patch \
+ %D%/packages/patches/python-pytorch-without-kineto-2.7.0.patch \
%D%/packages/patches/python-robotframework-sshlibrary-rf5-compat.patch \
%D%/packages/patches/python-unittest2-python3-compat.patch \
%D%/packages/patches/python-unittest2-remove-argparse.patch \
b/gnu/packages/machine-learning.scm
@@ -4955,7 +4955,7 @@ (define-public ideep-pytorch-for-r-torch
(base32
"0hdpkhcjry22fjx2zg2r48v7f4ljrclzj0li2pgk76kvyblfbyvm"))))))
-(define %python-pytorch-version "2.5.1")
+(define %python-pytorch-version "2.7.0")
(define %python-pytorch-src
(origin
@@ -4966,14 +4966,14 @@ (define %python-pytorch-src
(file-name (git-file-name "python-pytorch"
%python-pytorch-version))
(sha256
(base32
- "052cvagpmm9y7jspjpcyysx8yc5fhxnjl8rcz6nndis06v8dcj8s"))
- (patches (search-patches "python-pytorch-system-libraries.patch"
- "python-pytorch-runpath.patch"
- "python-pytorch-without-kineto.patch"
+ "19prdpzx34n8y2q6wx9dn9vyms6zidjvfgh58d28rfcf5z7z5ra5"))
+ (patches (search-patches
"python-pytorch-system-libraries-2.7.0.patch"
+ "python-pytorch-runpath-2.7.0.patch"
+
"python-pytorch-without-kineto-2.7.0.patch"
;; Some autogeneration scripts depend on
the
;; compile PyTorch library. Therefore, we
create
;; dummy versions which are regenerated
later.
- "python-pytorch-fix-codegen.patch"))
+ "python-pytorch-fix-codegen-2.7.0.patch"))
(modules '((guix build utils)))
(snippet
'(begin
@@ -5123,8 +5123,10 @@ (define-public python-pytorch
(add-before 'build 'use-system-libraries
(lambda _
(substitute* '("caffe2/serialize/crc.cc"
- "caffe2/serialize/inline_container.cc")
- (("\"miniz\\.h\"") "<miniz/miniz.h>"))
+ "caffe2/serialize/inline_container.cc"
+
"torch/csrc/inductor/aoti_package/model_package_loader.cpp")
+ (("\"miniz\\.h\"") "<miniz/miniz.h>")
+ (("<miniz\\.h>") "<miniz/miniz.h>"))
(substitute*
"aten/src/ATen/native/vulkan/api/Allocator.h"
(("<include/vk_mem_alloc.h>")
"<vk_mem_alloc.h>"))
@@ -5161,6 +5163,12 @@ (define-public python-pytorch
(substitute* '("requirements.txt" "setup.py")
(("sympy==1\\.13\\.1")
"sympy>=1.13.1"))))
+ (add-after 'use-system-libraries 'skip-nccl-call
+ (lambda _
+ ;; Comment-out `checkout_nccl()` invokation in
build_pytorch().
+ (substitute* "tools/build_pytorch_libs.py"
+ (("^[[:blank:]]*checkout_nccl\\(\\)" all)
+ (string-append "# " all " # Guix: use system
NCCL\n")))))
;; PyTorch is still built with AVX2 and AVX-512 support
selected at
;; runtime, but these dependencies require it (nnpack only
for
;; x86_64).
b/gnu/packages/patches/python-pytorch-fix-codegen-2.7.0.patch
new file mode 100644
@@ -0,0 +1,178 @@
+This patch fixes some scripts for generating source files. For
+gen_jit_decompositions.py, gen_mobile_upgraders.py and
+gen_jit_shape_functions.py, which depend on the compiled PyTorch
library, the
+option to generate "dummy" source files is added for the initial build,
which
+is later corrected. codegen_external.py is patched to avoid duplicate
+functions and add the static keyword as in the existing generated file.
+
+diff --git a/tools/gen_flatbuffers.sh b/tools/gen_flatbuffers.sh
+index cc0263dbb..ac34e84b8 100644
+--- a/tools/gen_flatbuffers.sh
++++ b/tools/gen_flatbuffers.sh
+@@ -1,13 +1,13 @@
+ #!/bin/bash
+ ROOT=$(pwd)
+-FF_LOCATION="$ROOT/third_party/flatbuffers"
+-cd "$FF_LOCATION" || exit
+-mkdir build
+-cd build || exit
+-cmake ..
+-cmake --build . --target flatc
+-mkdir -p "$ROOT/build/torch/csrc/jit/serialization"
+-./flatc --cpp --gen-mutable --scoped-enums \
++#FF_LOCATION="$ROOT/third_party/flatbuffers"
++#cd "$FF_LOCATION" || exit
++#mkdir build
++#cd build || exit
++#cmake ..
++#cmake --build . --target flatc
++#mkdir -p "$ROOT/build/torch/csrc/jit/serialization"
++flatc --cpp --gen-mutable --scoped-enums \
+ -o "$ROOT/torch/csrc/jit/serialization" \
+ -c "$ROOT/torch/csrc/jit/serialization/mobile_bytecode.fbs"
+ echo '// @generated' >>
"$ROOT/torch/csrc/jit/serialization/mobile_bytecode_generated.h"
+diff --git a/torch/csrc/jit/tensorexpr/codegen_external.py
b/torch/csrc/jit/tensorexpr/codegen_external.py
+index 5dcf1b284..0e20b0c10 100644
+--- a/torch/csrc/jit/tensorexpr/codegen_external.py
++++ b/torch/csrc/jit/tensorexpr/codegen_external.py
+@@ -21,9 +21,14 @@ def gen_external(native_functions_path, tags_path,
external_path):
+ native_functions = parse_native_yaml(native_functions_path,
tags_path)
+ func_decls = []
+ func_registrations = []
+- for func in native_functions:
++ done_names = set()
++ for func in native_functions[0]:
+ schema = func.func
+ name = schema.name.name.base
++ if name in done_names:
++ continue
++ else:
++ done_names.add(name)
+ args = schema.arguments
+ # Only supports extern calls for functions with out variants
+ if not schema.is_out_fn():
+@@ -63,7 +68,7 @@ def gen_external(native_functions_path, tags_path,
external_path):
+
+ # print(tensor_decls, name, arg_names)
+ func_decl = f"""\
+-void nnc_aten_{name}(
++static void nnc_aten_{name}(
+ int64_t bufs_num,
+ void** buf_data,
+ int64_t* buf_ranks,
+diff --git a/torchgen/decompositions/gen_jit_decompositions.py
b/torchgen/decompositions/gen_jit_decompositions.py
+index b42948045..e1cfc73a5 100644
+--- a/torchgen/decompositions/gen_jit_decompositions.py
++++ b/torchgen/decompositions/gen_jit_decompositions.py
+@@ -1,8 +1,12 @@
+ #!/usr/bin/env python3
+ import os
+ from pathlib import Path
++import sys
+
+-from torch.jit._decompositions import decomposition_table
++if len(sys.argv) < 2 or sys.argv[1] != "dummy":
++ from torch.jit._decompositions import decomposition_table
++else:
++ decomposition_table = {}
+
+
+ # from torchgen.code_template import CodeTemplate
+@@ -86,7 +90,7 @@ def write_decomposition_util_file(path: str) -> None:
+
+
+ def main() -> None:
+- pytorch_dir = Path(__file__).resolve().parents[3]
++ pytorch_dir = Path(__file__).resolve().parents[2]
+ upgrader_path = pytorch_dir / "torch" / "csrc" / "jit" / "runtime"
+ write_decomposition_util_file(str(upgrader_path))
+
+diff --git a/torchgen/operator_versions/gen_mobile_upgraders.py
b/torchgen/operator_versions/gen_mobile_upgraders.py
+index 845034cb7..a1c5767c2 100644
+--- a/torchgen/operator_versions/gen_mobile_upgraders.py
++++ b/torchgen/operator_versions/gen_mobile_upgraders.py
+@@ -6,10 +6,13 @@ import os
+ from enum import Enum
+ from operator import itemgetter
+ from pathlib import Path
++import sys
+ from typing import Any
+
+-import torch
+-from torch.jit.generate_bytecode import generate_upgraders_bytecode
++if len(sys.argv) < 2 or sys.argv[1] != "dummy":
++ import torch
++ from torch.jit.generate_bytecode import
generate_upgraders_bytecode
++
+ from torchgen.code_template import CodeTemplate
+ from torchgen.operator_versions.gen_mobile_upgraders_constant import (
+ MOBILE_UPGRADERS_HEADER_DESCRIPTION,
+@@ -263,7 +266,10 @@ def
construct_register_size(register_size_from_yaml: int) -> str:
+ def construct_version_maps(
+ upgrader_bytecode_function_to_index_map: dict[str, Any],
+ ) -> str:
+- version_map = torch._C._get_operator_version_map()
++ if len(sys.argv) < 2 or sys.argv[1] != "dummy":
++ version_map = torch._C._get_operator_version_map()
++ else:
++ version_map = {}
+ sorted_version_map_ = sorted(version_map.items(),
key=itemgetter(0)) # type: ignore[no-any-return]
+ sorted_version_map = dict(sorted_version_map_)
+
+@@ -375,7 +381,10 @@ def sort_upgrader(upgrader_list: list[dict[str,
Any]]) -> list[dict[str, Any]]:
+
+
+ def main() -> None:
+- upgrader_list = generate_upgraders_bytecode()
++ if len(sys.argv) < 2 or sys.argv[1] != "dummy":
++ upgrader_list = generate_upgraders_bytecode()
++ else:
++ upgrader_list = []
+ sorted_upgrader_list = sort_upgrader(upgrader_list)
+ for up in sorted_upgrader_list:
+ print("after sort upgrader : ", next(iter(up)))
+diff --git a/torchgen/shape_functions/gen_jit_shape_functions.py
b/torchgen/shape_functions/gen_jit_shape_functions.py
+index 56a3d8bf0..ffd0785fd 100644
+--- a/torchgen/shape_functions/gen_jit_shape_functions.py
++++ b/torchgen/shape_functions/gen_jit_shape_functions.py
+@@ -1,6 +1,7 @@
+ #!/usr/bin/env python3
+ import os
+ import sys
++import importlib
+ from importlib.util import module_from_spec, spec_from_file_location
+ from itertools import chain
+ from pathlib import Path
+@@ -18,17 +19,21 @@ you are in the root directory of the Pytorch git
repo"""
+ if not file_path.exists():
+ raise Exception(err_msg) # noqa: TRY002
+
+-spec = spec_from_file_location(module_name, file_path)
+-assert spec is not None
+-module = module_from_spec(spec)
+-sys.modules[module_name] = module
+-assert spec.loader is not None
+-assert module is not None
+-spec.loader.exec_module(module)
+-
+-bounded_compute_graph_mapping = module.bounded_compute_graph_mapping
+-shape_compute_graph_mapping = module.shape_compute_graph_mapping
+-
++if len(sys.argv) < 2 or sys.argv[1] != "dummy":
++ spec = importlib.util.spec_from_file_location(module_name,
file_path)
++ assert spec is not None
++ module = importlib.util.module_from_spec(spec)
++ sys.modules[module_name] = module
++ assert spec.loader is not None
++ assert module is not None
++ spec.loader.exec_module(module)
++
++ bounded_compute_graph_mapping =
module.bounded_compute_graph_mapping
++ shape_compute_graph_mapping = module.shape_compute_graph_mapping
++
++else:
++ bounded_compute_graph_mapping = {}
++ shape_compute_graph_mapping = {}
+
+ SHAPE_HEADER = r"""
+ /**
b/gnu/packages/patches/python-pytorch-runpath-2.7.0.patch
new file mode 100644
@@ -0,0 +1,30 @@
+Libraries (such as 'libtorch_cpu.so') and executables (such as
'torch_shm_manager')
+get installed, quite surprisingly, to
'lib/python3.8/site-packages/{bin,lib}'.
+Make sure RUNPATH matches that.
+
+diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
+index be45936a8..7b19e5359 100644
+--- a/cmake/Dependencies.cmake
++++ b/cmake/Dependencies.cmake
+@@ -4,7 +4,7 @@ if(APPLE)
+ set(CMAKE_MACOSX_RPATH ON)
+ set(_rpath_portable_origin "@loader_path")
+ else()
+- set(_rpath_portable_origin $ORIGIN)
++ set(_rpath_portable_origin $ORIGIN/../lib)
+ endif(APPLE)
+ # Use separate rpaths during build and install phases
+ set(CMAKE_SKIP_BUILD_RPATH FALSE)
+diff --git a/functorch/CMakeLists.txt b/functorch/CMakeLists.txt
+index bdfa4bfe4..2a75e3825 100644
+--- a/functorch/CMakeLists.txt
++++ b/functorch/CMakeLists.txt
+@@ -26,7 +26,7 @@ target_link_libraries(${PROJECT_NAME} PRIVATE
pybind::pybind11)
+
+ set_target_properties(${PROJECT_NAME} PROPERTIES
LIBRARY_OUTPUT_DIRECTORY
+ ${CMAKE_BINARY_DIR}/functorch)
+-set_target_properties(${PROJECT_NAME} PROPERTIES INSTALL_RPATH
"${_rpath_portable_origin}/../torch/lib")
++set_target_properties(${PROJECT_NAME} PROPERTIES INSTALL_RPATH
"$ORIGIN/../torch/lib")
+
+ # Copy-pasted prefix/suffix logic for Python extensions from
+ #
https://github.com/pytorch/pytorch/blob/33bb8ae350611760139457b85842b1d7edf9aa11/caffe2/CMakeLists.txt#L1975
a/gnu/packages/patches/python-pytorch-system-libraries-2.7.0.patch
b/gnu/packages/patches/python-pytorch-system-libraries-2.7.0.patch
new file mode 100644
@@ -0,0 +1,442 @@
+Patch build files to also system libraries instead of bundled ones for
the
+libraries not supported or working only by specifying USE_SYSTEM_LIBS.
This
+includes using the clog, cpuinfo, fbgemm, foxi, fp16, fxdiv,
googletest,
+httlib, ideep, miniz, nnpack, oneapi-dnnl, pocketfft, pthreadpool,
+qnnpack-pytorch, tensorpipe, valgrind and xnnpack packages.
+
+diff --git a/aten/src/ATen/CMakeLists.txt
b/aten/src/ATen/CMakeLists.txt
+index 085af373e..3287429b4 100644
+--- a/aten/src/ATen/CMakeLists.txt
++++ b/aten/src/ATen/CMakeLists.txt
+@@ -468,9 +468,9 @@ if(AT_NNPACK_ENABLED)
+ list(APPEND ATen_CPU_DEPENDENCY_LIBS nnpack) # cpuinfo is added
below
+ endif()
+
+-if(MKLDNN_FOUND)
+- list(APPEND ATen_CPU_DEPENDENCY_LIBS ${MKLDNN_LIBRARIES})
+-endif(MKLDNN_FOUND)
++if(USE_MKLDNN)
++ list(APPEND ATen_CPU_DEPENDENCY_LIBS DNNL::dnnl)
++endif(USE_MKLDNN)
+
+ if(USE_MKLDNN_ACL)
+ list(APPEND ATen_CPU_INCLUDE ${ACL_INCLUDE_DIRS})
+diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt
+index d2d23b7ab..1a7e5a042 100644
+--- a/caffe2/CMakeLists.txt
++++ b/caffe2/CMakeLists.txt
+@@ -91,9 +91,6 @@ if(NOT MSVC AND USE_XNNPACK)
+ if(NOT TARGET fxdiv)
+ set(FXDIV_BUILD_TESTS OFF CACHE BOOL "")
+ set(FXDIV_BUILD_BENCHMARKS OFF CACHE BOOL "")
+- add_subdirectory(
+- "${FXDIV_SOURCE_DIR}"
+- "${CMAKE_BINARY_DIR}/FXdiv")
+ endif()
+ endif()
+
+@@ -1135,7 +1132,6 @@ if(USE_XPU)
+ endif()
+
+ if(NOT MSVC AND USE_XNNPACK)
+- TARGET_LINK_LIBRARIES(torch_cpu PRIVATE fxdiv)
+ endif()
+
+ # ==========================================================
+@@ -1254,8 +1250,8 @@ endif()
+ target_include_directories(torch_cpu PRIVATE
+ ${TORCH_ROOT}/third_party/cpp-httplib)
+
+-target_include_directories(torch_cpu PRIVATE
+- ${TORCH_ROOT}/third_party/nlohmann/include)
++find_package(httplib REQUIRED)
++target_link_libraries(torch_cpu PUBLIC httplib::httplib)
+
+ install(DIRECTORY "${TORCH_SRC_DIR}/csrc"
+ DESTINATION ${TORCH_INSTALL_INCLUDE_DIR}/torch
+@@ -1494,6 +1490,7 @@ target_link_libraries(torch_cpu PUBLIC c10)
+ target_link_libraries(torch_cpu PUBLIC
${Caffe2_PUBLIC_DEPENDENCY_LIBS})
+ target_link_libraries(torch_cpu PRIVATE ${Caffe2_DEPENDENCY_LIBS})
+ target_link_libraries(torch_cpu PRIVATE
${Caffe2_DEPENDENCY_WHOLE_LINK_LIBS})
++target_link_libraries(torch_cpu PRIVATE miniz clog)
+ if(USE_MPI)
+ target_link_libraries(torch_cpu PRIVATE MPI::MPI_CXX)
+ endif()
+@@ -1728,7 +1725,7 @@ if(BUILD_STATIC_RUNTIME_BENCHMARK)
+ add_executable(static_runtime_bench
"${STATIC_RUNTIME_BENCHMARK_SRCS}")
+ add_executable(static_runtime_test "${STATIC_RUNTIME_TEST_SRCS}")
+ target_link_libraries(static_runtime_bench torch_library benchmark)
+- target_link_libraries(static_runtime_test torch_library gtest_main)
++ target_link_libraries(static_runtime_test torch_library gtest_main
gtest)
+ endif()
+
+ if(BUILD_MOBILE_BENCHMARK)
+@@ -1747,7 +1744,7 @@ if(BUILD_MOBILE_TEST)
+ foreach(test_src ${ATen_MOBILE_TEST_SRCS})
+ get_filename_component(test_name ${test_src} NAME_WE)
+ add_executable(${test_name} "${test_src}")
+- target_link_libraries(${test_name} torch_library gtest_main)
++ target_link_libraries(${test_name} torch_library gtest_main gtest)
+ target_include_directories(${test_name} PRIVATE
$<INSTALL_INTERFACE:include>)
+ target_include_directories(${test_name} PRIVATE
$<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/include>)
+ target_include_directories(${test_name} PRIVATE
${ATen_CPU_INCLUDE})
+@@ -1768,7 +1765,7 @@ if(BUILD_TEST)
+ if(NOT MSVC)
+ add_executable(${test_name}_${CPU_CAPABILITY} "${test_src}"
../aten/src/ATen/native/quantized/AffineQuantizerBase.cpp)
+ # TODO: Get rid of c10 dependency (which is only needed for
the implementation of AT_ERROR)
+- target_link_libraries(${test_name}_${CPU_CAPABILITY} c10
sleef gtest_main nlohmann)
++ target_link_libraries(${test_name}_${CPU_CAPABILITY} c10
sleef gtest_main gtest nlohmann)
+ if(USE_FBGEMM)
+ target_link_libraries(${test_name}_${CPU_CAPABILITY}
fbgemm)
+ endif()
+@@ -1782,7 +1779,7 @@ if(BUILD_TEST)
+ endif()
+ else()
+ add_executable(${test_name}_${CPU_CAPABILITY} "${test_src}")
+- target_link_libraries(${test_name}_${CPU_CAPABILITY}
torch_library sleef gtest_main)
++ target_link_libraries(${test_name}_${CPU_CAPABILITY}
torch_library sleef gtest_main gtest)
+ endif()
+ target_include_directories(${test_name}_${CPU_CAPABILITY}
PRIVATE $<INSTALL_INTERFACE:include>)
+ target_include_directories(${test_name}_${CPU_CAPABILITY}
PRIVATE $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/include>)
+@@ -1799,7 +1796,7 @@ if(BUILD_TEST)
+ foreach(test_src ${Caffe2_CPU_TEST_SRCS})
+ get_filename_component(test_name ${test_src} NAME_WE)
+ add_executable(${test_name} "${test_src}")
+- target_link_libraries(${test_name} torch_library gtest_main)
++ target_link_libraries(${test_name} torch_library gtest_main gtest)
+ if(NOT MSVC)
+ target_link_libraries(${test_name} stdc++)
+ endif()
+@@ -1823,7 +1820,7 @@ if(BUILD_TEST)
+ add_executable(${test_name} "${test_src}")
+ find_library(metal NAMES Metal)
+ find_library(foundation NAMES Foundation)
+- target_link_libraries(${test_name} torch_library gtest_main
${metal} ${foundation})
++ target_link_libraries(${test_name} torch_library gtest_main
gtest ${metal} ${foundation})
+ target_include_directories(${test_name} PRIVATE
$<INSTALL_INTERFACE:include>)
+ target_include_directories(${test_name} PRIVATE
$<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/include>)
+ target_include_directories(${test_name} PRIVATE
${Caffe2_CPU_INCLUDE})
+@@ -1843,7 +1840,7 @@ if(BUILD_TEST)
+ foreach(test_src ${Caffe2_GPU_TEST_SRCS})
+ get_filename_component(test_name ${test_src} NAME_WE)
+ add_executable(${test_name} "${test_src}")
+- target_link_libraries(${test_name} torch_library gtest_main)
++ target_link_libraries(${test_name} torch_library gtest_main
gtest)
+ if(USE_CUDNN AND ${test_name} MATCHES "cudnn")
+ target_link_libraries(${test_name} torch::cudnn)
+ endif()
+@@ -1865,7 +1862,7 @@ if(BUILD_TEST)
+ foreach(test_src ${Caffe2_XPU_TEST_SRCS})
+ get_filename_component(test_name ${test_src} NAME_WE)
+ add_executable(${test_name} "${test_src}")
+- target_link_libraries(${test_name} torch_library gtest_main)
++ target_link_libraries(${test_name} torch_library gtest_main
gtest)
+ target_include_directories(${test_name} PRIVATE
$<INSTALL_INTERFACE:include>)
+ target_include_directories(${test_name} PRIVATE
${Caffe2_CPU_INCLUDE})
+ add_test(NAME ${test_name} COMMAND $<TARGET_FILE:${test_name}>)
+@@ -1880,7 +1877,7 @@ if(BUILD_TEST)
+ foreach(test_src ${Caffe2_VULKAN_TEST_SRCS})
+ get_filename_component(test_name ${test_src} NAME_WE)
+ add_executable(${test_name} "${test_src}")
+- target_link_libraries(${test_name} torch_library gtest_main)
++ target_link_libraries(${test_name} torch_library gtest_main
gtest)
+ target_include_directories(${test_name} PRIVATE
$<INSTALL_INTERFACE:include>)
+ target_include_directories(${test_name} PRIVATE
${Caffe2_CPU_INCLUDE})
+ add_test(NAME ${test_name} COMMAND $<TARGET_FILE:${test_name}>)
+@@ -1899,7 +1896,7 @@ if(BUILD_TEST)
+ foreach(test_src ${Caffe2_HIP_TEST_SRCS})
+ get_filename_component(test_name ${test_src} NAME_WE)
+ add_executable(${test_name} "${test_src}")
+- target_link_libraries(${test_name} torch_library gtest_main)
++ target_link_libraries(${test_name} torch_library gtest_main
gtest)
+ target_include_directories(${test_name} PRIVATE
$<INSTALL_INTERFACE:include>)
+ target_include_directories(${test_name} PRIVATE
${Caffe2_CPU_INCLUDE} ${Caffe2_HIP_INCLUDE})
+ target_compile_options(${test_name} PRIVATE ${HIP_CXX_FLAGS})
+diff --git a/caffe2/serialize/CMakeLists.txt
b/caffe2/serialize/CMakeLists.txt
+index ebbff0f29..dcded2590 100644
+--- a/caffe2/serialize/CMakeLists.txt
++++ b/caffe2/serialize/CMakeLists.txt
+@@ -2,7 +2,6 @@ file(GLOB tmp *_test.cc)
+
+ set(Caffe2_CPU_TEST_SRCS ${Caffe2_CPU_TEST_SRCS} ${tmp})
+ list(APPEND Caffe2_CPU_SRCS
+- ${PROJECT_SOURCE_DIR}/third_party/miniz-3.0.2/miniz.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/inline_container.cc
+ ${CMAKE_CURRENT_SOURCE_DIR}/istream_adapter.cc
+ ${CMAKE_CURRENT_SOURCE_DIR}/file_adapter.cc
+diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
+index be45936a8..bb1aa1cc1 100644
+--- a/cmake/Dependencies.cmake
++++ b/cmake/Dependencies.cmake
+@@ -276,7 +276,7 @@ endif()
+ # --- [ PocketFFT
+ set(AT_POCKETFFT_ENABLED 0)
+ if(NOT AT_MKL_ENABLED)
+- set(POCKETFFT_INCLUDE_DIR
"${Torch_SOURCE_DIR}/third_party/pocketfft/")
++ set(POCKETFFT_INCLUDE_DIR "#POCKETFFT_INCLUDE_DIR")
+ if(NOT EXISTS "${POCKETFFT_INCLUDE_DIR}")
+ message(FATAL_ERROR "pocketfft directory not found, expected
${POCKETFFT_INCLUDE_DIR}")
+ elif(NOT EXISTS "${POCKETFFT_INCLUDE_DIR}/pocketfft_hdronly.h")
+@@ -460,15 +460,6 @@ if(USE_PYTORCH_QNNPACK)
+ set(PYTORCH_QNNPACK_BUILD_TESTS OFF CACHE BOOL "")
+ set(PYTORCH_QNNPACK_BUILD_BENCHMARKS OFF CACHE BOOL "")
+ set(PYTORCH_QNNPACK_LIBRARY_TYPE "static" CACHE STRING "")
+- add_subdirectory(
+- "${PYTORCH_QNNPACK_SOURCE_DIR}"
+- "${CONFU_DEPENDENCIES_BINARY_DIR}/pytorch_qnnpack")
+- # We build static versions of QNNPACK and pthreadpool but link
+- # them into a shared library for Caffe2, so they need PIC.
+- set_property(TARGET pytorch_qnnpack PROPERTY
POSITION_INDEPENDENT_CODE ON)
+- set_property(TARGET cpuinfo PROPERTY POSITION_INDEPENDENT_CODE
ON)
+- # QNNPACK depends on gemmlowp headers
+- target_include_directories(pytorch_qnnpack PRIVATE
"${CAFFE2_THIRD_PARTY_ROOT}/gemmlowp")
+ endif()
+
+ list(APPEND Caffe2_DEPENDENCY_LIBS pytorch_qnnpack)
+@@ -558,16 +549,15 @@ if(USE_XNNPACK AND NOT USE_SYSTEM_XNNPACK)
+ list(APPEND Caffe2_DEPENDENCY_LIBS XNNPACK microkernels-prod)
+ elseif(NOT TARGET XNNPACK AND USE_SYSTEM_XNNPACK)
+ add_library(XNNPACK SHARED IMPORTED)
+- add_library(microkernels-prod SHARED IMPORTED)
++ add_library(microkernels-prod INTERFACE IMPORTED)
+ find_library(XNNPACK_LIBRARY XNNPACK)
+- find_library(microkernels-prod_LIBRARY microkernels-prod)
+ set_property(TARGET XNNPACK PROPERTY IMPORTED_LOCATION
"${XNNPACK_LIBRARY}")
+- set_property(TARGET microkernels-prod PROPERTY IMPORTED_LOCATION
"${microkernels-prod_LIBRARY}")
+- if(NOT XNNPACK_LIBRARY or NOT microkernels-prod_LIBRARY)
++ set_property(TARGET microkernels-prod PROPERTY
INTERFACE_LINK_LIBRARIES XNNPACK)
++ if(NOT XNNPACK_LIBRARY)
+ message(FATAL_ERROR "Cannot find XNNPACK")
+ endif()
+ message("-- Found XNNPACK: ${XNNPACK_LIBRARY}")
+- list(APPEND Caffe2_DEPENDENCY_LIBS XNNPACK microkernels-prod)
++ list(APPEND Caffe2_DEPENDENCY_LIBS XNNPACK)
+ endif()
+
+ # ---[ Vulkan deps
+@@ -650,11 +640,6 @@ if(BUILD_TEST OR BUILD_MOBILE_BENCHMARK OR
BUILD_MOBILE_TEST)
+ # this shouldn't be necessary anymore.
+ get_property(INC_DIR_temp DIRECTORY PROPERTY INCLUDE_DIRECTORIES)
+ set_property(DIRECTORY PROPERTY INCLUDE_DIRECTORIES "")
+-
add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/../third_party/googletest)
+- set_property(DIRECTORY PROPERTY INCLUDE_DIRECTORIES ${INC_DIR_temp})
+-
+- include_directories(BEFORE SYSTEM
${CMAKE_CURRENT_LIST_DIR}/../third_party/googletest/googletest/include)
+- include_directories(BEFORE SYSTEM
${CMAKE_CURRENT_LIST_DIR}/../third_party/googletest/googlemock/include)
+
+ # We will not need to test benchmark lib itself.
+ set(BENCHMARK_ENABLE_TESTING OFF CACHE BOOL "Disable benchmark
testing as we don't need it.")
+@@ -732,16 +717,6 @@ if(USE_FBGEMM)
+ if(USE_ASAN)
+ set(USE_SANITIZER "address,undefined" CACHE STRING "-fsanitize
options for FBGEMM")
+ endif()
+- add_subdirectory("${FBGEMM_SOURCE_DIR}")
+- set_property(TARGET fbgemm_generic PROPERTY
POSITION_INDEPENDENT_CODE ON)
+- set_property(TARGET fbgemm_avx2 PROPERTY POSITION_INDEPENDENT_CODE
ON)
+- set_property(TARGET fbgemm_avx512 PROPERTY
POSITION_INDEPENDENT_CODE ON)
+- set_property(TARGET fbgemm PROPERTY POSITION_INDEPENDENT_CODE ON)
+- if("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang" AND
CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 13.0.0)
+- # See https://github.com/pytorch/pytorch/issues/74352
+- target_compile_options_if_supported(asmjit -Wno-deprecated-copy)
+- target_compile_options_if_supported(asmjit
-Wno-unused-but-set-variable)
+- endif()
+ if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+ target_compile_options_if_supported(asmjit -Wno-extra-semi)
+ target_compile_options_if_supported(fbgemm -Wno-extra-semi)
+@@ -829,7 +804,7 @@ if(NOT TARGET fp16 AND NOT USE_SYSTEM_FP16)
+ "${CONFU_DEPENDENCIES_BINARY_DIR}/FP16")
+ endif()
+ elseif(NOT TARGET fp16 AND USE_SYSTEM_FP16)
+- add_library(fp16 STATIC "/usr/include/fp16.h")
++ add_library(fp16 STATIC "#FP16_INCLUDE_DIR")
+ set_target_properties(fp16 PROPERTIES LINKER_LANGUAGE C)
+ endif()
+ list(APPEND Caffe2_DEPENDENCY_LIBS fp16)
+@@ -1170,7 +1145,6 @@ if(USE_DISTRIBUTED AND USE_TENSORPIPE)
+ message(WARNING "Archived TensorPipe forces CMake compatibility
mode")
+ set(CMAKE_POLICY_VERSION_MINIMUM 3.5)
+ endif()
+- add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/tensorpipe)
+ if(CMAKE_VERSION VERSION_GREATER_EQUAL "4.0.0")
+ unset(CMAKE_POLICY_VERSION_MINIMUM)
+ endif()
+@@ -1340,7 +1314,7 @@ if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO AND NOT
INTERN_DISABLE_ONNX)
+ endif()
+ set_property(TARGET onnx_proto PROPERTY IMPORTED_LOCATION
${ONNX_PROTO_LIBRARY})
+ message("-- Found onnx: ${ONNX_LIBRARY} ${ONNX_PROTO_LIBRARY}")
+- list(APPEND Caffe2_DEPENDENCY_LIBS onnx_proto onnx)
++ list(APPEND Caffe2_DEPENDENCY_LIBS onnx_proto onnx onnx_optimizer)
+ endif()
+ # Recover the build shared libs option.
+ set(BUILD_SHARED_LIBS ${TEMP_BUILD_SHARED_LIBS})
+@@ -1500,9 +1474,8 @@ if(NOT INTERN_BUILD_MOBILE)
+ endif()
+ if(USE_MKLDNN)
+ include(${CMAKE_CURRENT_LIST_DIR}/public/mkldnn.cmake)
+- if(MKLDNN_FOUND)
++ if(DNNL_FOUND)
+ set(AT_MKLDNN_ENABLED 1)
+- include_directories(AFTER SYSTEM ${MKLDNN_INCLUDE_DIR})
+ else()
+ message(WARNING "MKLDNN could not be found.")
+ caffe2_update_option(USE_MKLDNN OFF)
+@@ -1583,7 +1556,7 @@ endif()
+ #
+ set(TEMP_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS})
+ set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared libs" FORCE)
+-add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt)
++find_package(fmt)
+
+ # Disable compiler feature checks for `fmt`.
+ #
+@@ -1592,7 +1565,6 @@
add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt)
+ # CMAKE_CXX_FLAGS in ways that break feature checks. Since we already
know
+ # `fmt` is compatible with a superset of the compilers that PyTorch
is, it
+ # shouldn't be too bad to just disable the checks.
+-set_target_properties(fmt-header-only PROPERTIES
INTERFACE_COMPILE_FEATURES "")
+
+ list(APPEND Caffe2_DEPENDENCY_LIBS fmt::fmt-header-only)
+ set(BUILD_SHARED_LIBS ${TEMP_BUILD_SHARED_LIBS} CACHE BOOL "Build
shared libs" FORCE)
+diff --git a/cmake/External/nnpack.cmake b/cmake/External/nnpack.cmake
+index 8a4a310d6..f413d2e61 100644
+--- a/cmake/External/nnpack.cmake
++++ b/cmake/External/nnpack.cmake
+@@ -40,7 +40,7 @@ endif()
+ # (3) Android, iOS, Linux, macOS - supported
+
##############################################################################
+
+-if(ANDROID OR IOS OR ${CMAKE_SYSTEM_NAME} STREQUAL "Linux" OR
${CMAKE_SYSTEM_NAME} STREQUAL "Darwin")
++if(FALSE)
+ message(STATUS "Brace yourself, we are building NNPACK")
+ set(CAFFE2_THIRD_PARTY_ROOT ${PROJECT_SOURCE_DIR}/third_party)
+
+@@ -94,6 +94,5 @@ endif()
+ # (4) Catch-all: not supported.
+
##############################################################################
+
+-message(WARNING "Unknown platform - I don't know how to build NNPACK.
"
+- "See cmake/External/nnpack.cmake for details.")
+-set(USE_NNPACK OFF)
++set(NNPACK_FOUND TRUE)
++set(USE_NNPACK ON)
+diff --git a/cmake/public/mkldnn.cmake b/cmake/public/mkldnn.cmake
+index 87935625f..9f8fa3df8 100644
+--- a/cmake/public/mkldnn.cmake
++++ b/cmake/public/mkldnn.cmake
+@@ -4,7 +4,7 @@ if(CPU_AARCH64)
+ include(${CMAKE_CURRENT_LIST_DIR}/ComputeLibrary.cmake)
+ endif()
+
+-find_package(MKLDNN QUIET)
++find_package(DNNL REQUIRED)
+
+ if(NOT TARGET caffe2::mkldnn)
+ add_library(caffe2::mkldnn INTERFACE IMPORTED)
+@@ -15,4 +15,4 @@ set_property(
+ ${MKLDNN_INCLUDE_DIR})
+ set_property(
+ TARGET caffe2::mkldnn PROPERTY INTERFACE_LINK_LIBRARIES
+- ${MKLDNN_LIBRARIES})
++ DNNL::dnnl)
+diff --git a/setup.py b/setup.py
+index 61ee9363f..3691cc35c 100644
+--- a/setup.py
++++ b/setup.py
+@@ -508,13 +508,9 @@ def build_deps():
+ # Windows has very poor support for them.
+ sym_files = [
+ "tools/shared/_utils_internal.py",
+- "torch/utils/benchmark/utils/valgrind_wrapper/callgrind.h",
+- "torch/utils/benchmark/utils/valgrind_wrapper/valgrind.h",
+ ]
+ orig_files = [
+ "torch/_utils_internal.py",
+- "third_party/valgrind-headers/callgrind.h",
+- "third_party/valgrind-headers/valgrind.h",
+ ]
+ for sym_file, orig_file in zip(sym_files, orig_files):
+ same = False
+diff --git a/test/cpp/c10d/CMakeLists.txt
b/test/cpp/c10d/CMakeLists.txt
+index 5b423241d..e069accd6 100644
+--- a/test/cpp/c10d/CMakeLists.txt
++++ b/test/cpp/c10d/CMakeLists.txt
+@@ -26,17 +26,17 @@ function(c10d_add_test test_src)
+ endif()
+ endfunction()
+
+-c10d_add_test(BackoffTest.cpp LINK_LIBRARIES torch_cpu gtest_main
INSTALL_TEST OFF)
+-c10d_add_test(FileStoreTest.cpp LINK_LIBRARIES torch_cpu gtest_main
INSTALL_TEST ${INSTALL_TEST})
+-c10d_add_test(TCPStoreTest.cpp LINK_LIBRARIES torch_cpu gtest_main
INSTALL_TEST ${INSTALL_TEST})
++c10d_add_test(BackoffTest.cpp LINK_LIBRARIES torch_cpu gtest_main
gtest INSTALL_TEST OFF)
++c10d_add_test(FileStoreTest.cpp LINK_LIBRARIES torch_cpu gtest_main
gtest INSTALL_TEST ${INSTALL_TEST})
++c10d_add_test(TCPStoreTest.cpp LINK_LIBRARIES torch_cpu gtest_main
gtest INSTALL_TEST ${INSTALL_TEST})
+ if(NOT WIN32)
+- c10d_add_test(HashStoreTest.cpp LINK_LIBRARIES torch_cpu gtest_main
INSTALL_TEST ${INSTALL_TEST})
++ c10d_add_test(HashStoreTest.cpp LINK_LIBRARIES torch_cpu gtest_main
gtest INSTALL_TEST ${INSTALL_TEST})
+ endif()
+
+ if(USE_CUDA)
+ if(USE_GLOO AND USE_C10D_GLOO)
+- c10d_add_test(ProcessGroupGlooTest.cpp LINK_LIBRARIES torch_cpu
c10d_cuda_test gtest_main INSTALL_TEST ${INSTALL_TEST})
+- c10d_add_test(ProcessGroupGlooAsyncTest.cpp LINK_LIBRARIES
torch_cpu c10d_cuda_test gtest_main INSTALL_TEST ${INSTALL_TEST})
++ c10d_add_test(ProcessGroupGlooTest.cpp LINK_LIBRARIES torch_cpu
c10d_cuda_test gtest_main gtest INSTALL_TEST ${INSTALL_TEST})
++ c10d_add_test(ProcessGroupGlooAsyncTest.cpp LINK_LIBRARIES
torch_cpu c10d_cuda_test gtest_main gtest INSTALL_TEST ${INSTALL_TEST})
+ endif()
+ if(USE_NCCL AND USE_C10D_NCCL)
+ # NCCL is a private dependency of libtorch, but the tests include
some
+@@ -45,10 +45,10 @@ if(USE_CUDA)
+ # a private dependency of the tests as well.
+ c10d_add_test(
+ ProcessGroupNCCLTest.cpp
+- LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main __caffe2_nccl
INSTALL_TEST ${INSTALL_TEST})
++ LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main gtest
__caffe2_nccl INSTALL_TEST ${INSTALL_TEST})
+ c10d_add_test(
+ ProcessGroupNCCLErrorsTest.cpp
+- LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main __caffe2_nccl
INSTALL_TEST ${INSTALL_TEST})
++ LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main gtest
__caffe2_nccl INSTALL_TEST ${INSTALL_TEST})
+ if(INSTALL_TEST)
+ install(TARGETS c10d_cuda_test DESTINATION lib)
+ endif()
+@@ -60,14 +60,14 @@ if(USE_CUDA)
+ # a private dependency of the tests as well.
+ c10d_add_test(
+ ProcessGroupUCCTest.cpp
+- LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main __caffe2_ucc
INSTALL_TEST ${INSTALL_TEST})
++ LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main gtest
__caffe2_ucc INSTALL_TEST ${INSTALL_TEST})
+ if(INSTALL_TEST)
+ install(TARGETS c10d_cuda_test DESTINATION lib)
+ endif()
+ endif()
+ else()
+ if(USE_GLOO AND USE_C10D_GLOO)
+- c10d_add_test(ProcessGroupGlooTest.cpp LINK_LIBRARIES torch_cpu
gtest_main INSTALL_TEST OFF)
++ c10d_add_test(ProcessGroupGlooTest.cpp LINK_LIBRARIES torch_cpu
gtest_main gtest INSTALL_TEST OFF)
+ endif()
+ endif()
+
+diff --git a/test/cpp/tensorexpr/CMakeLists.txt
b/test/cpp/tensorexpr/CMakeLists.txt
+index 9c409e078..6cddd8de4 100644
+--- a/test/cpp/tensorexpr/CMakeLists.txt
++++ b/test/cpp/tensorexpr/CMakeLists.txt
+@@ -51,7 +51,7 @@ target_include_directories(tutorial_tensorexpr
PRIVATE ${ATen_CPU_INCLUDE})
+ # pthreadpool header. For some build environment we need add the
dependency
+ # explicitly.
+ if(USE_PTHREADPOOL)
+- target_link_libraries(test_tensorexpr PRIVATE pthreadpool_interface)
++ target_link_libraries(test_tensorexpr PRIVATE pthreadpool)
+ endif()
+ if(USE_CUDA)
+ target_compile_definitions(test_tensorexpr PRIVATE USE_CUDA)
+diff --git a/torch/CMakeLists.txt b/torch/CMakeLists.txt
+index 8b8ebdc6e..034b5e56c 100644
+--- a/torch/CMakeLists.txt
++++ b/torch/CMakeLists.txt
+@@ -82,8 +82,6 @@ set(TORCH_PYTHON_LINK_LIBRARIES
+ Python::Module
+ pybind::pybind11
+ opentelemetry::api
+- httplib
+- nlohmann
+ shm
+ fmt::fmt-header-only
+ ATEN_CPU_FILES_GEN_LIB)
a/gnu/packages/patches/python-pytorch-without-kineto-2.7.0.patch
b/gnu/packages/patches/python-pytorch-without-kineto-2.7.0.patch
new file mode 100644
@@ -0,0 +1,64 @@
+Even when building without Kineto, the <ActivityType.h> header is still
+imported and the ActivityType type is used. This patch was copied from
+https://github.com/pytorch/pytorch/pull/111048 and adapted.
+
+diff --git a/torch/csrc/profiler/kineto_shim.h
b/torch/csrc/profiler/kineto_shim.h
+index c4efd7785..2caef1f1e 100644
+--- a/torch/csrc/profiler/kineto_shim.h
++++ b/torch/csrc/profiler/kineto_shim.h
+@@ -12,7 +12,55 @@
+ #undef USE_KINETO
+ #endif
+
++#ifdef USE_KINETO
+ #include <ActivityType.h>
++#else
++namespace libkineto {
++// copied from header
++/*
++ * Copyright (c) Meta Platforms, Inc. and affiliates.
++ * All rights reserved.
++ *
++ * This source code is licensed under the BSD-style license found in
the
++ * LICENSE file in the root directory of this source tree.
++ */
++
++// Note : All activity types are not enabled by default. Please add
them
++// at correct position in the enum
++enum class ActivityType {
++ // Activity types enabled by default
++ CPU_OP = 0, // cpu side ops
++ USER_ANNOTATION,
++ GPU_USER_ANNOTATION,
++ GPU_MEMCPY,
++ GPU_MEMSET,
++ CONCURRENT_KERNEL, // on-device kernels
++ EXTERNAL_CORRELATION,
++ CUDA_RUNTIME, // host side cuda runtime events
++ CUDA_DRIVER, // host side cuda driver events
++ CPU_INSTANT_EVENT, // host side point-like events
++ PYTHON_FUNCTION,
++ OVERHEAD, // CUPTI induced overhead events sampled from its
overhead API.
++
++ // Optional Activity types
++ CUDA_SYNC, // synchronization events between runtime and kernels
++ GLOW_RUNTIME, // host side glow runtime events
++ MTIA_RUNTIME, // host side MTIA runtime events
++ CUDA_PROFILER_RANGE, // CUPTI Profiler range for performance
metrics
++ MTIA_CCP_EVENTS, // MTIA ondevice CCP events
++ HPU_OP, // HPU host side runtime event
++ XPU_RUNTIME, // host side xpu runtime events
++ MTIA_WORKLOADD,
++
++ PRIVATEUSE1_RUNTIME,
++ PRIVATEUSE1_DRIVER,
++
++ ENUM_COUNT, // This is to add buffer and not used for any
profiling logic. Add your new type before it.
++ OPTIONAL_ACTIVITY_TYPE_START = CUDA_SYNC,
++};