whisper.cpp

Running

R0CKSTAR commited on Apr 28

Commit

8ede9a1

unverified ·

1 Parent(s): 0aa41e8

build : Add Moore Threads GPU support and update GitHub workflow for MUSA build (#3069)

* Update PATH for main/main-cuda container

Signed-off-by: Xiaodong Ye <[email protected]>

* Add Dockerfile for musa, .dockerignore and update CI

Signed-off-by: Xiaodong Ye <[email protected]>

* Add Moore Threads GPU Support in README.md and replace ./main with whisper-cli

Signed-off-by: Xiaodong Ye <[email protected]>

* Forward GGML_CUDA/GGML_MUSA to cmake in Makefile

Signed-off-by: Xiaodong Ye <[email protected]>

* Minor updates for PATH ENV in Dockerfiles

Signed-off-by: Xiaodong Ye <[email protected]>

* Address comments

Signed-off-by: Xiaodong Ye <[email protected]>

---------

Signed-off-by: Xiaodong Ye <[email protected]>

Files changed (7) hide show

.devops/main-cuda.Dockerfile +3 -3
.devops/main-musa.Dockerfile +29 -0
.devops/main.Dockerfile +1 -0
.dockerignore +3 -0
.github/workflows/docker.yml +1 -0
Makefile +2 -2
README.md +23 -2

.devops/main-cuda.Dockerfile CHANGED Viewed

@@ -13,8 +13,6 @@ WORKDIR /app
 ARG CUDA_DOCKER_ARCH=all
 # Set nvcc architecture
 ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
-# Enable cuBLAS
-ENV GGML_CUDA=1
 RUN apt-get update && \
     apt-get install -y build-essential libsdl2-dev wget cmake git \
@@ -25,7 +23,8 @@ ENV CUDA_MAIN_VERSION=12.3
 ENV LD_LIBRARY_PATH /usr/local/cuda-${CUDA_MAIN_VERSION}/compat:$LD_LIBRARY_PATH
 COPY .. .
-RUN make base.en
 FROM ${BASE_CUDA_RUN_CONTAINER} AS runtime
 ENV CUDA_MAIN_VERSION=12.3
@@ -37,4 +36,5 @@ RUN apt-get update && \
   && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
 COPY --from=build /app /app
 ENTRYPOINT [ "bash", "-c" ]

 ARG CUDA_DOCKER_ARCH=all
 # Set nvcc architecture
 ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
 RUN apt-get update && \
     apt-get install -y build-essential libsdl2-dev wget cmake git \
 ENV LD_LIBRARY_PATH /usr/local/cuda-${CUDA_MAIN_VERSION}/compat:$LD_LIBRARY_PATH
 COPY .. .
+# Enable cuBLAS
+RUN make base.en CMAKE_ARGS="-DGGML_CUDA=1"
 FROM ${BASE_CUDA_RUN_CONTAINER} AS runtime
 ENV CUDA_MAIN_VERSION=12.3
   && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
 COPY --from=build /app /app
+ENV PATH=/app/build/bin:$PATH
 ENTRYPOINT [ "bash", "-c" ]

.devops/main-musa.Dockerfile ADDED Viewed

	@@ -0,0 +1,29 @@

+ARG UBUNTU_VERSION=22.04
+# This needs to generally match the container host's environment.
+ARG MUSA_VERSION=rc3.1.1
+# Target the MUSA build image
+ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
+# Target the MUSA runtime image
+ARG BASE_MUSA_RUN_CONTAINER=mthreads/musa:${MUSA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
+FROM ${BASE_MUSA_DEV_CONTAINER} AS build
+WORKDIR /app
+RUN apt-get update && \
+    apt-get install -y build-essential libsdl2-dev wget cmake git \
+    && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
+COPY .. .
+# Enable muBLAS
+RUN make base.en CMAKE_ARGS="-DGGML_MUSA=1"
+FROM ${BASE_MUSA_RUN_CONTAINER} AS runtime
+WORKDIR /app
+RUN apt-get update && \
+  apt-get install -y curl ffmpeg wget cmake git \
+  && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
+COPY --from=build /app /app
+ENV PATH=/app/build/bin:$PATH
+ENTRYPOINT [ "bash", "-c" ]

.devops/main.Dockerfile CHANGED Viewed

@@ -16,4 +16,5 @@ RUN apt-get update && \
   && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
 COPY --from=build /app /app
 ENTRYPOINT [ "bash", "-c" ]

   && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
 COPY --from=build /app /app
+ENV PATH=/app/build/bin:$PATH
 ENTRYPOINT [ "bash", "-c" ]

.dockerignore ADDED Viewed

	@@ -0,0 +1,3 @@

+build*/
+.github/
+.devops/

.github/workflows/docker.yml CHANGED Viewed

@@ -18,6 +18,7 @@ jobs:
       matrix:
         config:
           - { tag: "main", dockerfile: ".devops/main.Dockerfile", platform: "linux/amd64" }
           #TODO: the cuda image keeps failing - disable for now
           #      https://github.com/ggerganov/whisper.cpp/actions/runs/11019444428/job/30602020339
           #- { tag: "main-cuda", dockerfile: ".devops/main-cuda.Dockerfile", platform: "linux/amd64" }

       matrix:
         config:
           - { tag: "main", dockerfile: ".devops/main.Dockerfile", platform: "linux/amd64" }
+          - { tag: "main-musa", dockerfile: ".devops/main-musa.Dockerfile", platform: "linux/amd64" }
           #TODO: the cuda image keeps failing - disable for now
           #      https://github.com/ggerganov/whisper.cpp/actions/runs/11019444428/job/30602020339
           #- { tag: "main-cuda", dockerfile: ".devops/main-cuda.Dockerfile", platform: "linux/amd64" }

Makefile CHANGED Viewed

@@ -4,7 +4,7 @@
 .PHONY: build
 build:
-	cmake -B build
 	cmake --build build --config Release
 # download a few audio samples into folder "./samples":
@@ -41,7 +41,7 @@ samples:
 tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 large-v3 large-v3-turbo:
 	bash ./models/download-ggml-model.sh $@
-	cmake -B build
 	cmake --build build --config Release
 	@echo ""
 	@echo "==============================================="

 .PHONY: build
 build:
+	cmake -B build $(CMAKE_ARGS)
 	cmake --build build --config Release
 # download a few audio samples into folder "./samples":
 tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 large-v3 large-v3-turbo:
 	bash ./models/download-ggml-model.sh $@
+	cmake -B build $(CMAKE_ARGS)
 	cmake --build build --config Release
 	@echo ""
 	@echo "==============================================="

README.md CHANGED Viewed

@@ -23,6 +23,7 @@ High-performance inference of [OpenAI's Whisper](https://github.com/openai/whisp
 - [Efficient GPU support for NVIDIA](#nvidia-gpu-support)
 - [OpenVINO Support](#openvino-support)
 - [Ascend NPU Support](#ascend-npu-support)
 - [C-style API](https://github.com/ggml-org/whisper.cpp/blob/master/include/whisper.h)
 Supported platforms:
@@ -381,6 +382,25 @@ Run the inference examples as usual, for example:
 - If you have trouble with Ascend NPU device, please create a issue with **[CANN]** prefix/tag.
 - If you run successfully with your Ascend NPU device, please help update the table `Verified devices`.
 ## FFmpeg support (Linux only)
 If you want to support more audio formats (such as Opus and AAC), you can turn on the `WHISPER_FFMPEG` build flag to enable FFmpeg integration.
@@ -425,6 +445,7 @@ We have two Docker images available for this project:
 1. `ghcr.io/ggml-org/whisper.cpp:main`: This image includes the main executable file as well as `curl` and `ffmpeg`. (platforms: `linux/amd64`, `linux/arm64`)
 2. `ghcr.io/ggml-org/whisper.cpp:main-cuda`: Same as `main` but compiled with CUDA support. (platforms: `linux/amd64`)
 ### Usage
@@ -437,11 +458,11 @@ docker run -it --rm \
 docker run -it --rm \
   -v path/to/models:/models \
   -v path/to/audios:/audios \
-  whisper.cpp:main "./main -m /models/ggml-base.bin -f /audios/jfk.wav"
 # transcribe an audio file in samples folder
 docker run -it --rm \
   -v path/to/models:/models \
-  whisper.cpp:main "./main -m /models/ggml-base.bin -f ./samples/jfk.wav"
 ```
 ## Installing with Conan

 - [Efficient GPU support for NVIDIA](#nvidia-gpu-support)
 - [OpenVINO Support](#openvino-support)
 - [Ascend NPU Support](#ascend-npu-support)
+- [Moore Threads GPU Support](#moore-threads-gpu-support)
 - [C-style API](https://github.com/ggml-org/whisper.cpp/blob/master/include/whisper.h)
 Supported platforms:
 - If you have trouble with Ascend NPU device, please create a issue with **[CANN]** prefix/tag.
 - If you run successfully with your Ascend NPU device, please help update the table `Verified devices`.
+## Moore Threads GPU support
+With Moore Threads cards the processing of the models is done efficiently on the GPU via muBLAS and custom MUSA kernels.
+First, make sure you have installed `MUSA SDK rc3.1.1`: https://developer.mthreads.com/sdk/download/musa?equipment=&os=&driverVersion=&version=rc3.1.1
+Now build `whisper.cpp` with MUSA support:
+```
+cmake -B build -DGGML_MUSA=1
+cmake --build build -j --config Release
+```
+or specify the architecture for your Moore Threads GPU. For example, if you have a MTT S80 GPU, you can specify the architecture as follows:
+```
+cmake -B build -DGGML_MUSA=1 -DMUSA_ARCHITECTURES="21"
+cmake --build build -j --config Release
+```
 ## FFmpeg support (Linux only)
 If you want to support more audio formats (such as Opus and AAC), you can turn on the `WHISPER_FFMPEG` build flag to enable FFmpeg integration.
 1. `ghcr.io/ggml-org/whisper.cpp:main`: This image includes the main executable file as well as `curl` and `ffmpeg`. (platforms: `linux/amd64`, `linux/arm64`)
 2. `ghcr.io/ggml-org/whisper.cpp:main-cuda`: Same as `main` but compiled with CUDA support. (platforms: `linux/amd64`)
+3. `ghcr.io/ggml-org/whisper.cpp:main-musa`: Same as `main` but compiled with MUSA support. (platforms: `linux/amd64`)
 ### Usage
 docker run -it --rm \
   -v path/to/models:/models \
   -v path/to/audios:/audios \
+  whisper.cpp:main "whisper-cli -m /models/ggml-base.bin -f /audios/jfk.wav"
 # transcribe an audio file in samples folder
 docker run -it --rm \
   -v path/to/models:/models \
+  whisper.cpp:main "whisper-cli -m /models/ggml-base.bin -f ./samples/jfk.wav"
 ```
 ## Installing with Conan