R0CKSTAR commited on
Commit
8ede9a1
·
unverified ·
1 Parent(s): 0aa41e8

build : Add Moore Threads GPU support and update GitHub workflow for MUSA build (#3069)

Browse files

* Update PATH for main/main-cuda container

Signed-off-by: Xiaodong Ye <[email protected]>

* Add Dockerfile for musa, .dockerignore and update CI

Signed-off-by: Xiaodong Ye <[email protected]>

* Add Moore Threads GPU Support in README.md and replace ./main with whisper-cli

Signed-off-by: Xiaodong Ye <[email protected]>

* Forward GGML_CUDA/GGML_MUSA to cmake in Makefile

Signed-off-by: Xiaodong Ye <[email protected]>

* Minor updates for PATH ENV in Dockerfiles

Signed-off-by: Xiaodong Ye <[email protected]>

* Address comments

Signed-off-by: Xiaodong Ye <[email protected]>

---------

Signed-off-by: Xiaodong Ye <[email protected]>

.devops/main-cuda.Dockerfile CHANGED
@@ -13,8 +13,6 @@ WORKDIR /app
13
  ARG CUDA_DOCKER_ARCH=all
14
  # Set nvcc architecture
15
  ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
16
- # Enable cuBLAS
17
- ENV GGML_CUDA=1
18
 
19
  RUN apt-get update && \
20
  apt-get install -y build-essential libsdl2-dev wget cmake git \
@@ -25,7 +23,8 @@ ENV CUDA_MAIN_VERSION=12.3
25
  ENV LD_LIBRARY_PATH /usr/local/cuda-${CUDA_MAIN_VERSION}/compat:$LD_LIBRARY_PATH
26
 
27
  COPY .. .
28
- RUN make base.en
 
29
 
30
  FROM ${BASE_CUDA_RUN_CONTAINER} AS runtime
31
  ENV CUDA_MAIN_VERSION=12.3
@@ -37,4 +36,5 @@ RUN apt-get update && \
37
  && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
38
 
39
  COPY --from=build /app /app
 
40
  ENTRYPOINT [ "bash", "-c" ]
 
13
  ARG CUDA_DOCKER_ARCH=all
14
  # Set nvcc architecture
15
  ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
 
 
16
 
17
  RUN apt-get update && \
18
  apt-get install -y build-essential libsdl2-dev wget cmake git \
 
23
  ENV LD_LIBRARY_PATH /usr/local/cuda-${CUDA_MAIN_VERSION}/compat:$LD_LIBRARY_PATH
24
 
25
  COPY .. .
26
+ # Enable cuBLAS
27
+ RUN make base.en CMAKE_ARGS="-DGGML_CUDA=1"
28
 
29
  FROM ${BASE_CUDA_RUN_CONTAINER} AS runtime
30
  ENV CUDA_MAIN_VERSION=12.3
 
36
  && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
37
 
38
  COPY --from=build /app /app
39
+ ENV PATH=/app/build/bin:$PATH
40
  ENTRYPOINT [ "bash", "-c" ]
.devops/main-musa.Dockerfile ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG UBUNTU_VERSION=22.04
2
+ # This needs to generally match the container host's environment.
3
+ ARG MUSA_VERSION=rc3.1.1
4
+ # Target the MUSA build image
5
+ ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
6
+ # Target the MUSA runtime image
7
+ ARG BASE_MUSA_RUN_CONTAINER=mthreads/musa:${MUSA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
8
+
9
+ FROM ${BASE_MUSA_DEV_CONTAINER} AS build
10
+ WORKDIR /app
11
+
12
+ RUN apt-get update && \
13
+ apt-get install -y build-essential libsdl2-dev wget cmake git \
14
+ && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
15
+
16
+ COPY .. .
17
+ # Enable muBLAS
18
+ RUN make base.en CMAKE_ARGS="-DGGML_MUSA=1"
19
+
20
+ FROM ${BASE_MUSA_RUN_CONTAINER} AS runtime
21
+ WORKDIR /app
22
+
23
+ RUN apt-get update && \
24
+ apt-get install -y curl ffmpeg wget cmake git \
25
+ && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
26
+
27
+ COPY --from=build /app /app
28
+ ENV PATH=/app/build/bin:$PATH
29
+ ENTRYPOINT [ "bash", "-c" ]
.devops/main.Dockerfile CHANGED
@@ -16,4 +16,5 @@ RUN apt-get update && \
16
  && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
17
 
18
  COPY --from=build /app /app
 
19
  ENTRYPOINT [ "bash", "-c" ]
 
16
  && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
17
 
18
  COPY --from=build /app /app
19
+ ENV PATH=/app/build/bin:$PATH
20
  ENTRYPOINT [ "bash", "-c" ]
.dockerignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ build*/
2
+ .github/
3
+ .devops/
.github/workflows/docker.yml CHANGED
@@ -18,6 +18,7 @@ jobs:
18
  matrix:
19
  config:
20
  - { tag: "main", dockerfile: ".devops/main.Dockerfile", platform: "linux/amd64" }
 
21
  #TODO: the cuda image keeps failing - disable for now
22
  # https://github.com/ggerganov/whisper.cpp/actions/runs/11019444428/job/30602020339
23
  #- { tag: "main-cuda", dockerfile: ".devops/main-cuda.Dockerfile", platform: "linux/amd64" }
 
18
  matrix:
19
  config:
20
  - { tag: "main", dockerfile: ".devops/main.Dockerfile", platform: "linux/amd64" }
21
+ - { tag: "main-musa", dockerfile: ".devops/main-musa.Dockerfile", platform: "linux/amd64" }
22
  #TODO: the cuda image keeps failing - disable for now
23
  # https://github.com/ggerganov/whisper.cpp/actions/runs/11019444428/job/30602020339
24
  #- { tag: "main-cuda", dockerfile: ".devops/main-cuda.Dockerfile", platform: "linux/amd64" }
Makefile CHANGED
@@ -4,7 +4,7 @@
4
 
5
  .PHONY: build
6
  build:
7
- cmake -B build
8
  cmake --build build --config Release
9
 
10
  # download a few audio samples into folder "./samples":
@@ -41,7 +41,7 @@ samples:
41
 
42
  tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 large-v3 large-v3-turbo:
43
  bash ./models/download-ggml-model.sh $@
44
- cmake -B build
45
  cmake --build build --config Release
46
  @echo ""
47
  @echo "==============================================="
 
4
 
5
  .PHONY: build
6
  build:
7
+ cmake -B build $(CMAKE_ARGS)
8
  cmake --build build --config Release
9
 
10
  # download a few audio samples into folder "./samples":
 
41
 
42
  tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 large-v3 large-v3-turbo:
43
  bash ./models/download-ggml-model.sh $@
44
+ cmake -B build $(CMAKE_ARGS)
45
  cmake --build build --config Release
46
  @echo ""
47
  @echo "==============================================="
README.md CHANGED
@@ -23,6 +23,7 @@ High-performance inference of [OpenAI's Whisper](https://github.com/openai/whisp
23
  - [Efficient GPU support for NVIDIA](#nvidia-gpu-support)
24
  - [OpenVINO Support](#openvino-support)
25
  - [Ascend NPU Support](#ascend-npu-support)
 
26
  - [C-style API](https://github.com/ggml-org/whisper.cpp/blob/master/include/whisper.h)
27
 
28
  Supported platforms:
@@ -381,6 +382,25 @@ Run the inference examples as usual, for example:
381
  - If you have trouble with Ascend NPU device, please create a issue with **[CANN]** prefix/tag.
382
  - If you run successfully with your Ascend NPU device, please help update the table `Verified devices`.
383
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
384
  ## FFmpeg support (Linux only)
385
 
386
  If you want to support more audio formats (such as Opus and AAC), you can turn on the `WHISPER_FFMPEG` build flag to enable FFmpeg integration.
@@ -425,6 +445,7 @@ We have two Docker images available for this project:
425
 
426
  1. `ghcr.io/ggml-org/whisper.cpp:main`: This image includes the main executable file as well as `curl` and `ffmpeg`. (platforms: `linux/amd64`, `linux/arm64`)
427
  2. `ghcr.io/ggml-org/whisper.cpp:main-cuda`: Same as `main` but compiled with CUDA support. (platforms: `linux/amd64`)
 
428
 
429
  ### Usage
430
 
@@ -437,11 +458,11 @@ docker run -it --rm \
437
  docker run -it --rm \
438
  -v path/to/models:/models \
439
  -v path/to/audios:/audios \
440
- whisper.cpp:main "./main -m /models/ggml-base.bin -f /audios/jfk.wav"
441
  # transcribe an audio file in samples folder
442
  docker run -it --rm \
443
  -v path/to/models:/models \
444
- whisper.cpp:main "./main -m /models/ggml-base.bin -f ./samples/jfk.wav"
445
  ```
446
 
447
  ## Installing with Conan
 
23
  - [Efficient GPU support for NVIDIA](#nvidia-gpu-support)
24
  - [OpenVINO Support](#openvino-support)
25
  - [Ascend NPU Support](#ascend-npu-support)
26
+ - [Moore Threads GPU Support](#moore-threads-gpu-support)
27
  - [C-style API](https://github.com/ggml-org/whisper.cpp/blob/master/include/whisper.h)
28
 
29
  Supported platforms:
 
382
  - If you have trouble with Ascend NPU device, please create a issue with **[CANN]** prefix/tag.
383
  - If you run successfully with your Ascend NPU device, please help update the table `Verified devices`.
384
 
385
+ ## Moore Threads GPU support
386
+
387
+ With Moore Threads cards the processing of the models is done efficiently on the GPU via muBLAS and custom MUSA kernels.
388
+ First, make sure you have installed `MUSA SDK rc3.1.1`: https://developer.mthreads.com/sdk/download/musa?equipment=&os=&driverVersion=&version=rc3.1.1
389
+
390
+ Now build `whisper.cpp` with MUSA support:
391
+
392
+ ```
393
+ cmake -B build -DGGML_MUSA=1
394
+ cmake --build build -j --config Release
395
+ ```
396
+
397
+ or specify the architecture for your Moore Threads GPU. For example, if you have a MTT S80 GPU, you can specify the architecture as follows:
398
+
399
+ ```
400
+ cmake -B build -DGGML_MUSA=1 -DMUSA_ARCHITECTURES="21"
401
+ cmake --build build -j --config Release
402
+ ```
403
+
404
  ## FFmpeg support (Linux only)
405
 
406
  If you want to support more audio formats (such as Opus and AAC), you can turn on the `WHISPER_FFMPEG` build flag to enable FFmpeg integration.
 
445
 
446
  1. `ghcr.io/ggml-org/whisper.cpp:main`: This image includes the main executable file as well as `curl` and `ffmpeg`. (platforms: `linux/amd64`, `linux/arm64`)
447
  2. `ghcr.io/ggml-org/whisper.cpp:main-cuda`: Same as `main` but compiled with CUDA support. (platforms: `linux/amd64`)
448
+ 3. `ghcr.io/ggml-org/whisper.cpp:main-musa`: Same as `main` but compiled with MUSA support. (platforms: `linux/amd64`)
449
 
450
  ### Usage
451
 
 
458
  docker run -it --rm \
459
  -v path/to/models:/models \
460
  -v path/to/audios:/audios \
461
+ whisper.cpp:main "whisper-cli -m /models/ggml-base.bin -f /audios/jfk.wav"
462
  # transcribe an audio file in samples folder
463
  docker run -it --rm \
464
  -v path/to/models:/models \
465
+ whisper.cpp:main "whisper-cli -m /models/ggml-base.bin -f ./samples/jfk.wav"
466
  ```
467
 
468
  ## Installing with Conan