Abhilash Majumder commited on
Commit
9a168fc
·
unverified ·
1 Parent(s): 53d0282

whisper : add SYCL support (#1863)

Browse files

* add changes from llama upstream

* add sycl abstraction

* add sycl build

* update cmake

* add sycl build config

* fix bug

* fix bug

* refactor build

* fix bug

* update build

* call build

* use sycl header

* add examples

* add target

* fix typecast in quant.c

* readd fp16 and readme

* fix quant typecast

* add sample

* add readme

* remove cxx file check

.github/workflows/build.yml CHANGED
@@ -150,6 +150,106 @@ jobs:
150
  make
151
  ctest -L gh --output-on-failure'
152
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
  windows:
154
  runs-on: windows-latest
155
 
 
150
  make
151
  ctest -L gh --output-on-failure'
152
 
153
+ ubuntu-22-cmake-sycl:
154
+ runs-on: ubuntu-22.04
155
+
156
+ strategy:
157
+ fail-fast: false
158
+ matrix:
159
+ dwhisper_sycl: [ON]
160
+ dcmake_c_compiler: [icx]
161
+ dcmake_cxx_compiler: [icpx]
162
+ arch: [linux/amd64, linux/arm64, linux/arm/v7, linux/ppc64le]
163
+
164
+ continue-on-error: true
165
+
166
+ steps:
167
+ - name: Clone
168
+ uses: actions/checkout@v3
169
+
170
+ - name: add oneAPI to apt
171
+ shell: bash
172
+ run: |
173
+ cd /tmp
174
+ wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
175
+ sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
176
+ rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
177
+ sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main"
178
+
179
+ - name: install oneAPI dpcpp compiler
180
+ shell: bash
181
+ run: |
182
+ sudo apt update
183
+ sudo apt install intel-oneapi-compiler-dpcpp-cpp
184
+
185
+ - name: install oneAPI MKL library
186
+ shell: bash
187
+ run: |
188
+ sudo apt install intel-oneapi-mkl-devel
189
+
190
+ - name: Clone
191
+ id: checkout
192
+ uses: actions/checkout@v3
193
+
194
+ - name: Build
195
+ id: cmake_build
196
+ run: |
197
+ source /opt/intel/oneapi/setvars.sh
198
+ mkdir build
199
+ cd build
200
+ cmake -DWHISPER_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ..
201
+ cmake --build . --config Release -j $(nproc)
202
+
203
+ ubuntu-22-cmake-sycl-fp16:
204
+ runs-on: ubuntu-22.04
205
+
206
+ strategy:
207
+ fail-fast: false
208
+ matrix:
209
+ dwhisper_sycl: [ON]
210
+ dcmake_c_compiler: [icx]
211
+ dcmake_cxx_compiler: [icpx]
212
+ arch: [linux/amd64, linux/arm64, linux/arm/v7, linux/ppc64le]
213
+
214
+ continue-on-error: true
215
+
216
+ steps:
217
+ - name: Clone
218
+ uses: actions/checkout@v3
219
+
220
+ - name: add oneAPI to apt
221
+ shell: bash
222
+ run: |
223
+ cd /tmp
224
+ wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
225
+ sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
226
+ rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
227
+ sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main"
228
+
229
+ - name: install oneAPI dpcpp compiler
230
+ shell: bash
231
+ run: |
232
+ sudo apt update
233
+ sudo apt install intel-oneapi-compiler-dpcpp-cpp
234
+
235
+ - name: install oneAPI MKL library
236
+ shell: bash
237
+ run: |
238
+ sudo apt install intel-oneapi-mkl-devel
239
+
240
+ - name: Clone
241
+ id: checkout
242
+ uses: actions/checkout@v3
243
+
244
+ - name: Build
245
+ id: cmake_build
246
+ run: |
247
+ source /opt/intel/oneapi/setvars.sh
248
+ mkdir build
249
+ cd build
250
+ cmake -DWHISPER_SYCL_F16=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ..
251
+ cmake --build . --config Release -j $(nproc)
252
+
253
  windows:
254
  runs-on: windows-latest
255
 
CMakeLists.txt CHANGED
@@ -70,12 +70,14 @@ if (APPLE)
70
  option(WHISPER_COREML_ALLOW_FALLBACK "whisper: allow non-CoreML fallback" OFF)
71
  option(WHISPER_METAL_EMBED_LIBRARY "whisper: embed Metal library" OFF)
72
  else()
73
- option(WHISPER_BLAS "whisper: use BLAS libraries" OFF)
74
- option(WHISPER_BLAS_VENDOR "whisper: BLAS library vendor" Generic)
75
- option(WHISPER_OPENBLAS "whisper: prefer OpenBLAS" OFF)
76
- option(WHISPER_CUBLAS "whisper: support for cuBLAS" OFF)
77
- option(WHISPER_HIPBLAS "whisper: support for hipBLAS" OFF)
78
- option(WHISPER_CLBLAST "whisper: use CLBlast" OFF)
 
 
79
  endif()
80
 
81
  option(WHISPER_PERF "whisper: enable perf timings" OFF)
@@ -106,6 +108,13 @@ endif()
106
 
107
  find_package(Threads REQUIRED)
108
 
 
 
 
 
 
 
 
109
  # on APPLE
110
  if (APPLE)
111
  # include Accelerate framework
@@ -309,6 +318,30 @@ if( WHISPER_OPENVINO )
309
  find_package(OpenVINO REQUIRED COMPONENTS Runtime)
310
  endif()
311
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
312
  # compiler flags
313
 
314
  if (NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
@@ -503,6 +536,8 @@ add_library(${TARGET}
503
  ${GGML_SOURCES_METAL}
504
  ${GGML_SOURCES_CUDA}
505
  ${GGML_SOURCES_OPENCL}
 
 
506
  whisper.h
507
  whisper.cpp
508
  )
 
70
  option(WHISPER_COREML_ALLOW_FALLBACK "whisper: allow non-CoreML fallback" OFF)
71
  option(WHISPER_METAL_EMBED_LIBRARY "whisper: embed Metal library" OFF)
72
  else()
73
+ option(WHISPER_BLAS "whisper: use BLAS libraries" OFF)
74
+ option(WHISPER_BLAS_VENDOR "whisper: BLAS library vendor" Generic)
75
+ option(WHISPER_OPENBLAS "whisper: prefer OpenBLAS" OFF)
76
+ option(WHISPER_CUBLAS "whisper: support for cuBLAS" OFF)
77
+ option(WHISPER_HIPBLAS "whisper: support for hipBLAS" OFF)
78
+ option(WHISPER_CLBLAST "whisper: use CLBlast" OFF)
79
+ option(WHISPER_SYCL "whisper: use SYCL" OFF)
80
+ option(WHISPER_SYCL_F16 "whisper: use 16 bit floats for sycl calculations" OFF)
81
  endif()
82
 
83
  option(WHISPER_PERF "whisper: enable perf timings" OFF)
 
108
 
109
  find_package(Threads REQUIRED)
110
 
111
+ #compile flag sycl
112
+ if (WHISPER_SYCL)
113
+ set(CMAKE_CXX_STANDARD 17)
114
+ else()
115
+ set(CMAKE_CXX_STANDARD 11)
116
+ endif()
117
+
118
  # on APPLE
119
  if (APPLE)
120
  # include Accelerate framework
 
318
  find_package(OpenVINO REQUIRED COMPONENTS Runtime)
319
  endif()
320
 
321
+ if (WHISPER_SYCL)
322
+ if ( NOT DEFINED ENV{ONEAPI_ROOT})
323
+ message(FATAL_ERROR "Not detect ENV {ONEAPI_ROOT}, please install oneAPI & source it, like: source /opt/intel/oneapi/setvars.sh")
324
+ endif()
325
+ #todo: AOT
326
+
327
+ find_package(IntelSYCL REQUIRED)
328
+ if (WHISPER_SYCL_F16)
329
+ add_compile_definitions(GGML_SYCL_F16)
330
+ endif()
331
+ add_compile_definitions(GGML_USE_SYCL)
332
+
333
+ add_compile_options(-I./) #include DPCT
334
+ add_compile_options(-I/${SYCL_INCLUDE_DIR})
335
+
336
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-narrowing")
337
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3")
338
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl -L${MKLROOT}/lib")
339
+
340
+ set(GGML_HEADERS_SYCL ggml-sycl.h)
341
+ set(GGML_SOURCES_SYCL ggml-sycl.cpp)
342
+
343
+ set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} sycl OpenCL mkl_core pthread m dl mkl_sycl_blas mkl_intel_ilp64 mkl_tbb_thread)
344
+ endif()
345
  # compiler flags
346
 
347
  if (NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
 
536
  ${GGML_SOURCES_METAL}
537
  ${GGML_SOURCES_CUDA}
538
  ${GGML_SOURCES_OPENCL}
539
+ ${GGML_SOURCES_SYCL}
540
+ ${GGML_HEADERS_SYCL}
541
  whisper.h
542
  whisper.cpp
543
  )
README_sycl.md ADDED
@@ -0,0 +1,249 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # whisper.cpp for SYCL
2
+
3
+ [Background](#background)
4
+
5
+ [OS](#os)
6
+
7
+ [Intel GPU](#intel-gpu)
8
+
9
+ [Linux](#linux)
10
+
11
+ [Environment Variable](#environment-variable)
12
+
13
+ [Known Issue](#known-issue)
14
+
15
+ [Todo](#todo)
16
+
17
+ ## Background
18
+
19
+ SYCL is a higher-level programming model to improve programming productivity on various hardware accelerators�such as CPUs, GPUs, and FPGAs. It is a single-source embedded domain-specific language based on pure C++17.
20
+
21
+ oneAPI is a specification that is open and standards-based, supporting multiple architecture types including but not limited to GPU, CPU, and FPGA. The spec has both direct programming and API-based programming paradigms.
22
+
23
+ Intel uses the SYCL as direct programming language to support CPU, GPUs and FPGAs.
24
+
25
+ To avoid re-inventing the wheel, this code refers other code paths in llama.cpp (like OpenBLAS, cuBLAS, CLBlast). We use a open-source tool [SYCLomatic](https://github.com/oneapi-src/SYCLomatic) (Commercial release [Intel� DPC++ Compatibility Tool](https://www.intel.com/content/www/us/en/developer/tools/oneapi/dpc-compatibility-tool.html)) migrate to SYCL.
26
+
27
+ The whisper.cpp for SYCL is used to support Intel GPUs.
28
+
29
+ For Intel CPU, recommend to use whisper.cpp for X86 (Intel MKL build).
30
+
31
+ ## OS
32
+
33
+ |OS|Status|Verified|
34
+ |-|-|-|
35
+ |Linux|Support|Ubuntu 22.04|
36
+ |Windows|Ongoing| |
37
+
38
+
39
+ ## Intel GPU
40
+
41
+ |Intel GPU| Status | Verified Model|
42
+ |-|-|-|
43
+ |Intel Data Center Max Series| Support| Max 1550|
44
+ |Intel Data Center Flex Series| Support| Flex 170|
45
+ |Intel Arc Series| Support| Arc 770|
46
+ |Intel built-in Arc GPU| Support| built-in Arc GPU in Meteor Lake|
47
+ |Intel iGPU| Support| iGPU in i5-1250P, i7-1165G7|
48
+
49
+
50
+ ## Linux
51
+
52
+ ### Setup Environment
53
+
54
+ 1. Install Intel GPU driver.
55
+
56
+ a. Please install Intel GPU driver by official guide: [Install GPU Drivers](https://dgpu-docs.intel.com/driver/installation.html).
57
+
58
+ Note: for iGPU, please install the client GPU driver.
59
+
60
+ b. Add user to group: video, render.
61
+
62
+ ```
63
+ sudo usermod -aG render username
64
+ sudo usermod -aG video username
65
+ ```
66
+
67
+ Note: re-login to enable it.
68
+
69
+ c. Check
70
+
71
+ ```
72
+ sudo apt install clinfo
73
+ sudo clinfo -l
74
+ ```
75
+
76
+ Output (example):
77
+
78
+ ```
79
+ Platform #0: Intel(R) OpenCL Graphics
80
+ `-- Device #0: Intel(R) Arc(TM) A770 Graphics
81
+
82
+
83
+ Platform #0: Intel(R) OpenCL HD Graphics
84
+ `-- Device #0: Intel(R) Iris(R) Xe Graphics [0x9a49]
85
+ ```
86
+
87
+ 2. Install Intel� oneAPI Base toolkit.
88
+
89
+
90
+ a. Please follow the procedure in [Get the Intel� oneAPI Base Toolkit ](https://www.intel.com/content/www/us/en/developer/tools/oneapi/base-toolkit.html).
91
+
92
+ Recommend to install to default folder: **/opt/intel/oneapi**.
93
+
94
+ Following guide use the default folder as example. If you use other folder, please modify the following guide info with your folder.
95
+
96
+ b. Check
97
+
98
+ ```
99
+ source /opt/intel/oneapi/setvars.sh
100
+
101
+ sycl-ls
102
+ ```
103
+
104
+ There should be one or more level-zero devices. Like **[ext_oneapi_level_zero:gpu:0]**.
105
+
106
+ Output (example):
107
+ ```
108
+ [opencl:acc:0] Intel(R) FPGA Emulation Platform for OpenCL(TM), Intel(R) FPGA Emulation Device OpenCL 1.2 [2023.16.10.0.17_160000]
109
+ [opencl:cpu:1] Intel(R) OpenCL, 13th Gen Intel(R) Core(TM) i7-13700K OpenCL 3.0 (Build 0) [2023.16.10.0.17_160000]
110
+ [opencl:gpu:2] Intel(R) OpenCL Graphics, Intel(R) Arc(TM) A770 Graphics OpenCL 3.0 NEO [23.30.26918.50]
111
+ [ext_oneapi_level_zero:gpu:0] Intel(R) Level-Zero, Intel(R) Arc(TM) A770 Graphics 1.3 [1.3.26918]
112
+
113
+ ```
114
+
115
+ 2. Build locally:
116
+
117
+ ```
118
+ mkdir -p build
119
+ cd build
120
+ source /opt/intel/oneapi/setvars.sh
121
+
122
+ #for FP16
123
+ #cmake .. -DWHISPER_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DWHISPER_SYCL_F16=ON
124
+
125
+ #for FP32
126
+ cmake .. -DWHISPER_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
127
+
128
+ #build example/main only
129
+ #cmake --build . --config Release --target main
130
+
131
+ #build all binary
132
+ cmake --build . --config Release -v
133
+
134
+ ```
135
+
136
+ or
137
+
138
+ ```
139
+ ./examples/sycl/build.sh
140
+ ```
141
+
142
+ Note:
143
+
144
+ - By default, it will build for all binary files. It will take more time. To reduce the time, we recommend to build for **example/main** only.
145
+
146
+ ### Run
147
+
148
+ 1. Put model file to folder **models**
149
+
150
+ 2. Enable oneAPI running environment
151
+
152
+ ```
153
+ source /opt/intel/oneapi/setvars.sh
154
+ ```
155
+
156
+ 3. List device ID
157
+
158
+ Run without parameter:
159
+
160
+ ```
161
+ ./build/bin/ls-sycl-device
162
+
163
+ or
164
+
165
+ ./build/bin/main
166
+ ```
167
+
168
+ Check the ID in startup log, like:
169
+
170
+ ```
171
+ found 4 SYCL devices:
172
+ Device 0: Intel(R) Arc(TM) A770 Graphics, compute capability 1.3,
173
+ max compute_units 512, max work group size 1024, max sub group size 32, global mem size 16225243136
174
+ Device 1: Intel(R) FPGA Emulation Device, compute capability 1.2,
175
+ max compute_units 24, max work group size 67108864, max sub group size 64, global mem size 67065057280
176
+ Device 2: 13th Gen Intel(R) Core(TM) i7-13700K, compute capability 3.0,
177
+ max compute_units 24, max work group size 8192, max sub group size 64, global mem size 67065057280
178
+ Device 3: Intel(R) Arc(TM) A770 Graphics, compute capability 3.0,
179
+ max compute_units 512, max work group size 1024, max sub group size 32, global mem size 16225243136
180
+
181
+ ```
182
+
183
+ |Attribute|Note|
184
+ |-|-|
185
+ |compute capability 1.3|Level-zero running time, recommended |
186
+ |compute capability 3.0|OpenCL running time, slower than level-zero in most cases|
187
+
188
+ 4. Set device ID and execute whisper.cpp
189
+
190
+ Set device ID = 0 by **GGML_SYCL_DEVICE=0**
191
+
192
+ ```
193
+ GGML_SYCL_DEVICE=0 ./build/bin/main -m models/ggml-base.en.bin -f samples/jfk.wav
194
+ ```
195
+ or run by script:
196
+
197
+ ```
198
+ ./examples/sycl/run_whisper.sh
199
+ ```
200
+
201
+
202
+
203
+ 5. Check the device ID in output
204
+
205
+ Like:
206
+ ```
207
+ Using device **0** (Intel(R) Arc(TM) A770 Graphics) as main device
208
+ ```
209
+
210
+
211
+ ## Environment Variable
212
+
213
+ #### Build
214
+
215
+ |Name|Value|Function|
216
+ |-|-|-|
217
+ |WHISPER_SYCL|ON (mandatory)|Enable build with SYCL code path. <br>For FP32/FP16, WHISPER_SYCL=ON is mandatory.|
218
+ |WHISPER_SYCL_F16|ON (optional)|Enable FP16 build with SYCL code path.For FP32, do not set it.|
219
+ |CMAKE_C_COMPILER|icx|Use icx compiler for SYCL code path|
220
+ |CMAKE_CXX_COMPILER|icpx|use icpx for SYCL code path|
221
+
222
+ #### Running
223
+
224
+
225
+ |Name|Value|Function|
226
+ |-|-|-|
227
+ |GGML_SYCL_DEVICE|0 (default) or 1|Set the device id used. Check the device ids by default running output|
228
+ |GGML_SYCL_DEBUG|0 (default) or 1|Enable log function by macro: GGML_SYCL_DEBUG|
229
+
230
+ ## Known Issue
231
+
232
+ - Error: `error while loading shared libraries: libsycl.so.7: cannot open shared object file: No such file or directory`.
233
+
234
+ Miss to enable oneAPI running environment.
235
+
236
+ Install oneAPI base toolkit and enable it by: `source /opt/intel/oneapi/setvars.sh`.
237
+
238
+
239
+ - Hang during startup
240
+
241
+ llama.cpp use mmap as default way to read model file and copy to GPU. In some system, memcpy will be abnormal and block.
242
+
243
+ Solution: add **--no-mmap**.
244
+
245
+ ## Todo
246
+
247
+ - Support to build in Windows.
248
+
249
+ - Support multiple cards.
examples/CMakeLists.txt CHANGED
@@ -79,6 +79,9 @@ else()
79
  add_subdirectory(talk)
80
  add_subdirectory(talk-llama)
81
  add_subdirectory(lsp)
 
 
 
82
  endif()
83
 
84
  add_subdirectory(wchess)
 
79
  add_subdirectory(talk)
80
  add_subdirectory(talk-llama)
81
  add_subdirectory(lsp)
82
+ if (LLAMA_SYCL)
83
+ add_subdirectory(sycl)
84
+ endif()
85
  endif()
86
 
87
  add_subdirectory(wchess)
examples/sycl/CMakeLists.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ # MIT license
2
+ # Copyright (C) 2024 Intel Corporation
3
+ # SPDX-License-Identifier: MIT
4
+
5
+ set(TARGET ls-sycl-device)
6
+ add_executable(${TARGET} ls-sycl-device.cpp)
7
+ install(TARGETS ${TARGET} RUNTIME)
8
+ target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
9
+ target_compile_features(${TARGET} PRIVATE cxx_std_17)
examples/sycl/README.md ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # llama.cpp/example/sycl
2
+
3
+ This example program provide the tools for llama.cpp for SYCL on Intel GPU.
4
+
5
+ ## Tool
6
+
7
+ |Tool Name| Function|Status|
8
+ |-|-|-|
9
+ |ls-sycl-device| List all SYCL devices with ID, compute capability, max work group size, ect.|Support|
10
+
11
+ ### ls-sycl-device
12
+
13
+ List all SYCL devices with ID, compute capability, max work group size, ect.
14
+
15
+ 1. Build the llama.cpp for SYCL for all targets.
16
+
17
+ 2. Enable oneAPI running environment
18
+
19
+ ```
20
+ source /opt/intel/oneapi/setvars.sh
21
+ ```
22
+
23
+ 3. Execute
24
+
25
+ ```
26
+ ./build/bin/ls-sycl-device
27
+ ```
28
+
29
+ Check the ID in startup log, like:
30
+
31
+ ```
32
+ found 4 SYCL devices:
33
+ Device 0: Intel(R) Arc(TM) A770 Graphics, compute capability 1.3,
34
+ max compute_units 512, max work group size 1024, max sub group size 32, global mem size 16225243136
35
+ Device 1: Intel(R) FPGA Emulation Device, compute capability 1.2,
36
+ max compute_units 24, max work group size 67108864, max sub group size 64, global mem size 67065057280
37
+ Device 2: 13th Gen Intel(R) Core(TM) i7-13700K, compute capability 3.0,
38
+ max compute_units 24, max work group size 8192, max sub group size 64, global mem size 67065057280
39
+ Device 3: Intel(R) Arc(TM) A770 Graphics, compute capability 3.0,
40
+ max compute_units 512, max work group size 1024, max sub group size 32, global mem size 16225243136
41
+
42
+ ```
43
+
44
+ |Attribute|Note|
45
+ |-|-|
46
+ |compute capability 1.3|Level-zero running time, recommended |
47
+ |compute capability 3.0|OpenCL running time, slower than level-zero in most cases|
examples/sycl/build.sh ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT license
2
+ # Copyright (C) 2024 Intel Corporation
3
+ # SPDX-License-Identifier: MIT
4
+
5
+ mkdir -p build
6
+ cd build
7
+ source /opt/intel/oneapi/setvars.sh
8
+
9
+ #for FP16
10
+ #cmake .. -DWHISPER_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DWHISPER_SYCL_F16=ON # faster for long-prompt inference
11
+
12
+ #for FP32
13
+ cmake .. -DWHISPER_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
14
+
15
+ #build example/main only
16
+ #cmake --build . --config Release --target main
17
+
18
+ #build all binary
19
+ cmake --build . --config Release -v
examples/sycl/ls-sycl-device.cpp ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*MIT license
2
+ Copyright (C) 2024 Intel Corporation
3
+ SPDX-License-Identifier: MIT
4
+ */
5
+
6
+ #include "ggml-sycl.h"
7
+
8
+ int main(int argc, char ** argv) {
9
+ ggml_backend_sycl_print_sycl_devices();
10
+ return 0;
11
+ }
examples/sycl/run-whisper.sh ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # MIT license
4
+ # Copyright (C) 2024 Intel Corporation
5
+ # SPDX-License-Identifier: MIT
6
+
7
+ INPUT2="Building a website can be done in 10 simple steps:\nStep 1:"
8
+ source /opt/intel/oneapi/setvars.sh
9
+
10
+ if [ $# -gt 0 ]; then
11
+ export GGML_SYCL_DEVICE=$1
12
+ else
13
+ export GGML_SYCL_DEVICE=0
14
+ fi
15
+ echo GGML_SYCL_DEVICE=$GGML_SYCL_DEVICE
16
+ #export GGML_SYCL_DEBUG=1
17
+ ./build/bin/main -m models/ggml-base.en.bin -f samples/jfk.wav
whisper.cpp CHANGED
@@ -12,6 +12,10 @@
12
  #include "ggml-cuda.h"
13
  #endif
14
 
 
 
 
 
15
  #ifdef WHISPER_USE_OPENVINO
16
  #include "openvino/whisper-openvino-encoder.h"
17
  #endif
@@ -1052,6 +1056,16 @@ static ggml_backend_t whisper_backend_init(const whisper_context_params & params
1052
  }
1053
  #endif
1054
 
 
 
 
 
 
 
 
 
 
 
1055
  if (backend_gpu) {
1056
  return backend_gpu;
1057
  }
 
12
  #include "ggml-cuda.h"
13
  #endif
14
 
15
+ #ifdef GGML_USE_SYCL
16
+ #include "ggml-sycl.h"
17
+ #endif
18
+
19
  #ifdef WHISPER_USE_OPENVINO
20
  #include "openvino/whisper-openvino-encoder.h"
21
  #endif
 
1056
  }
1057
  #endif
1058
 
1059
+ #ifdef GGML_USE_SYCL
1060
+ if (params.use_gpu) {
1061
+ WHISPER_LOG_INFO("%s: using SYCL backend\n", __func__);
1062
+ backend_gpu = ggml_backend_sycl_init(params.gpu_device);
1063
+ if (!backend_gpu) {
1064
+ WHISPER_LOG_ERROR("%s: ggml_backend_sycl_init() failed\n", __func__);
1065
+ }
1066
+ }
1067
+ #endif
1068
+
1069
  if (backend_gpu) {
1070
  return backend_gpu;
1071
  }