Use Intel oneMKL for Intel devices

Rbiessy · Rbiessy · commit 995aea3d8309 · 2025-03-28T15:15:40.000Z
diff --git a/ggml/src/ggml-sycl/CMakeLists.txt b/ggml/src/ggml-sycl/CMakeLists.txt
@@ -72,71 +72,77 @@ if (GGML_SYCL_GRAPH)
     target_compile_definitions(ggml-sycl PRIVATE GGML_SYCL_GRAPH)
 endif()
 
-find_package(oneMath QUIET)
-if (NOT oneMath_FOUND)
-    message("-- oneMath not found: oneMath will be automatically downloaded")
-    # Use FetchContent to automatically pull and build oneMath
-    include(FetchContent)
-    set(BUILD_FUNCTIONAL_TESTS False)
-    set(BUILD_EXAMPLES False)
-    set(TARGET_DOMAINS blas)
+# Link against Intel oneMKL or oneMath
+if (GGML_SYCL_TARGET STREQUAL "INTEL")
+    # Intel devices use Intel oneMKL directly instead of oneMath to avoid the limitation of linking Intel oneMKL statically
+    # See https://github.com./uxlfoundation/oneMath/issues/654
+    find_package(MKL REQUIRED)
+    target_link_libraries(ggml-sycl PRIVATE MKL::MKL MKL::MKL_SYCL)
+    target_compile_definitions(ggml-sycl PRIVATE GGML_SYCL_USE_INTEL_ONEMKL)
+else()
+    find_package(oneMath QUIET)
+    if (NOT oneMath_FOUND)
+        message("-- oneMath not found: oneMath will be automatically downloaded")
+        # Use FetchContent to automatically pull and build oneMath
+        include(FetchContent)
+        set(BUILD_FUNCTIONAL_TESTS False)
+        set(BUILD_EXAMPLES False)
+        set(TARGET_DOMAINS blas)
+        if (GGML_SYCL_TARGET STREQUAL "NVIDIA")
+            set(ENABLE_MKLCPU_BACKEND False)
+            set(ENABLE_MKLGPU_BACKEND False)
+            set(ENABLE_CUBLAS_BACKEND True)
+        elseif (GGML_SYCL_TARGET STREQUAL "AMD")
+            set(ENABLE_MKLCPU_BACKEND False)
+            set(ENABLE_MKLGPU_BACKEND False)
+            set(ENABLE_ROCBLAS_BACKEND True)
+            # Ensure setting a string variable here is not overriden by oneMath CACHE variables
+            cmake_policy(SET CMP0126 NEW)
+            # Setting the device architecture is only needed and useful for AMD devices in oneMath
+            set(HIP_TARGETS ${GGML_SYCL_DEVICE_ARCH} CACHE STRING "oneMath HIP target" FORCE)
+        endif()
+        FetchContent_Declare(
+            ONEMATH
+            GIT_REPOSITORY https://github.com./uxlfoundation/oneMath.git
+            GIT_TAG c255b1b4c41e2ee3059455c1f96a965d6a62568a
+        )
+        FetchContent_MakeAvailable(ONEMATH)
+        # Create alias to match with find_package targets name
+        function(onemath_alias target)
+            if (TARGET ${target}_obj)
+                # Silence verbose warnings from external libraries
+                target_compile_options(${target}_obj PRIVATE -w)
+            endif()
+            if (TARGET ${target})
+                add_library(ONEMATH::${target} ALIAS ${target})
+            endif()
+        endfunction()
+        onemath_alias(onemath)
+        onemath_alias(onemath_blas_mklcpu)
+        onemath_alias(onemath_blas_mklgpu)
+        onemath_alias(onemath_blas_cublas)
+        onemath_alias(onemath_blas_rocblas)
+    endif()
+
+    # Below oneMath compile-time dispatching is used for better performance
     if (GGML_SYCL_TARGET STREQUAL "NVIDIA")
-        set(ENABLE_MKLCPU_BACKEND False)
-        set(ENABLE_MKLGPU_BACKEND False)
-        set(ENABLE_CUBLAS_BACKEND True)
+        target_link_libraries(ggml-sycl PRIVATE ONEMATH::onemath_blas_cublas)
+        target_compile_options(ggml-sycl PRIVATE "-fsycl-targets=nvptx64-nvidia-cuda")
+        target_link_options(ggml-sycl PRIVATE "-fsycl-targets=nvptx64-nvidia-cuda")
+        target_compile_definitions(ggml-sycl PRIVATE GGML_SYCL_NVIDIA)
     elseif (GGML_SYCL_TARGET STREQUAL "AMD")
-        set(ENABLE_MKLCPU_BACKEND False)
-        set(ENABLE_MKLGPU_BACKEND False)
-        set(ENABLE_ROCBLAS_BACKEND True)
-        # Ensure setting a string variable here is not overriden by oneMath CACHE variables
-        cmake_policy(SET CMP0126 NEW)
-        # Setting the device architecture is only needed and useful for AMD devices in oneMath
-        set(HIP_TARGETS ${GGML_SYCL_DEVICE_ARCH} CACHE STRING "oneMath HIP target" FORCE)
-    endif()
-    FetchContent_Declare(
-        ONEMATH
-        GIT_REPOSITORY https://github.com./uxlfoundation/oneMath.git
-        GIT_TAG c255b1b4c41e2ee3059455c1f96a965d6a62568a
-    )
-    FetchContent_MakeAvailable(ONEMATH)
-    # Create alias to match with find_package targets name
-    function(onemath_alias target)
-        if (TARGET ${target}_obj)
-            # Silence verbose warnings from external libraries
-            target_compile_options(${target}_obj PRIVATE -w)
+        if (NOT GGML_SYCL_DEVICE_ARCH)
+            message(ERROR "Can't enable SYCL hip backend, GGML_SYCL_DEVICE_ARCH has not been set.")
         endif()
-        if (TARGET ${target})
-            add_library(ONEMATH::${target} ALIAS ${target})
-        endif()
-    endfunction()
-    onemath_alias(onemath)
-    onemath_alias(onemath_blas_mklcpu)
-    onemath_alias(onemath_blas_mklgpu)
-    onemath_alias(onemath_blas_cublas)
-    onemath_alias(onemath_blas_rocblas)
-endif()
-
-# Below oneMath compile-time dispatching is used for better performance
-if (GGML_SYCL_TARGET STREQUAL "INTEL")
-    target_link_libraries(ggml-sycl PRIVATE ONEMATH::onemath_blas_mklgpu)
-    target_compile_definitions(ggml-sycl PRIVATE GGML_SYCL_INTEL)
-elseif (GGML_SYCL_TARGET STREQUAL "NVIDIA")
-    target_link_libraries(ggml-sycl PRIVATE ONEMATH::onemath_blas_cublas)
-    target_compile_options(ggml-sycl PRIVATE "-fsycl-targets=nvptx64-nvidia-cuda")
-    target_link_options(ggml-sycl PRIVATE "-fsycl-targets=nvptx64-nvidia-cuda")
-    target_compile_definitions(ggml-sycl PRIVATE GGML_SYCL_NVIDIA)
-elseif (GGML_SYCL_TARGET STREQUAL "AMD")
-    if (NOT GGML_SYCL_DEVICE_ARCH)
-        message(ERROR "Can't enable SYCL hip backend, GGML_SYCL_DEVICE_ARCH has not been set.")
+        target_link_libraries(ggml-sycl PRIVATE ONEMATH::onemath_blas_rocblas)
+        target_compile_options(ggml-sycl PRIVATE "-fsycl-targets=amdgcn-amd-amdhsa")
+        target_link_options(ggml-sycl PRIVATE "-fsycl-targets=amdgcn-amd-amdhsa")
+        target_compile_definitions(ggml-sycl PRIVATE GGML_SYCL_AMD)
+    else()
+        # Fallback to oneMath runtime dispatcher
+        target_link_libraries(ggml-sycl PRIVATE ONEMATH::onemath)
+        target_compile_definitions(ggml-sycl PRIVATE GGML_SYCL_GENERIC)
     endif()
-    target_link_libraries(ggml-sycl PRIVATE ONEMATH::onemath_blas_rocblas)
-    target_compile_options(ggml-sycl PRIVATE "-fsycl-targets=amdgcn-amd-amdhsa")
-    target_link_options(ggml-sycl PRIVATE "-fsycl-targets=amdgcn-amd-amdhsa")
-    target_compile_definitions(ggml-sycl PRIVATE GGML_SYCL_AMD)
-else()
-    # Fallback to oneMath runtime dispatcher
-    target_link_libraries(ggml-sycl PRIVATE ONEMATH::onemath)
-    target_compile_definitions(ggml-sycl PRIVATE GGML_SYCL_GENERIC)
 endif()
 
 if (GGML_SYCL_DEVICE_ARCH)
diff --git a/ggml/src/ggml-sycl/dpct/helper.hpp b/ggml/src/ggml-sycl/dpct/helper.hpp
@@ -16,9 +16,18 @@
 #include <sycl/sycl.hpp>
 #include <sycl/half_type.hpp>
 #include <syclcompat/math.hpp>
-#include <oneapi/math.hpp>
 #include <map>
 
+#ifdef GGML_SYCL_USE_INTEL_ONEMKL
+#include <oneapi/mkl.hpp>
+// Allow to use the same namespace for Intel oneMKL and oneMath
+namespace oneapi {
+    namespace math = mkl;
+}
+#else
+#include <oneapi/math.hpp>
+#endif
+
 #include "ggml.h"
 
 #if defined(__linux__)
@@ -91,20 +100,18 @@ template <typename Ts> struct matrix_info_t {
 };
 
 inline auto get_onemath_backend(sycl::queue& queue)
-#ifdef GGML_SYCL_GENERIC
+#if defined(GGML_SYCL_GENERIC) || defined(GGML_SYCL_USE_INTEL_ONEMKL)
   -> sycl::queue&
 #endif
 {
 // If the backend is known at compile-time, use oneMath backend_selector to use
 // compile-time dispatching and avoid the need to dlopen libraries. Otherwise
 // fallback to runtime dispatching.
-#if defined(GGML_SYCL_INTEL)
-    return oneapi::math::backend_selector<oneapi::math::backend::mklgpu>{ queue };
-#elif defined(GGML_SYCL_NVIDIA)
+#if defined(GGML_SYCL_NVIDIA)
     return oneapi::math::backend_selector<oneapi::math::backend::cublas>{ queue };
 #elif defined(GGML_SYCL_AMD)
     return oneapi::math::backend_selector<oneapi::math::backend::rocblas>{ queue };
-#elif defined(GGML_SYCL_GENERIC)
+#elif defined(GGML_SYCL_GENERIC) || defined(GGML_SYCL_USE_INTEL_ONEMKL)
     return queue;
 #else
     static_assert(false, "Unsupported backend");