Spaces:
Running
Running
whisper : add large-v3-turbo (#2440)
Browse files- .gitignore +1 -0
- Makefile +2 -1
- README.md +1 -0
- bindings/go/examples/go-model-download/main.go +1 -1
- examples/livestream.sh +1 -1
- examples/twitch.sh +1 -1
- models/README.md +18 -16
- models/convert-h5-to-coreml.py +2 -2
- models/convert-whisper-to-coreml.py +2 -2
- models/convert-whisper-to-openvino.py +2 -2
- models/download-coreml-model.sh +1 -1
- models/download-ggml-model.cmd +1 -1
- models/download-ggml-model.sh +3 -1
- scripts/bench.py +1 -0
- scripts/convert-all.sh +1 -1
- tests/run-tests.sh +1 -1
.gitignore
CHANGED
|
@@ -3,6 +3,7 @@
|
|
| 3 |
.cache/
|
| 4 |
.coreml/
|
| 5 |
.test/
|
|
|
|
| 6 |
.vs/
|
| 7 |
.vscode/
|
| 8 |
.DS_Store
|
|
|
|
| 3 |
.cache/
|
| 4 |
.coreml/
|
| 5 |
.test/
|
| 6 |
+
.venv/
|
| 7 |
.vs/
|
| 8 |
.vscode/
|
| 9 |
.DS_Store
|
Makefile
CHANGED
|
@@ -1145,8 +1145,9 @@ samples:
|
|
| 1145 |
.PHONY: large-v1
|
| 1146 |
.PHONY: large-v2
|
| 1147 |
.PHONY: large-v3
|
|
|
|
| 1148 |
|
| 1149 |
-
tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 large-v3: main
|
| 1150 |
bash ./models/download-ggml-model.sh $@
|
| 1151 |
@echo ""
|
| 1152 |
@echo "==============================================="
|
|
|
|
| 1145 |
.PHONY: large-v1
|
| 1146 |
.PHONY: large-v2
|
| 1147 |
.PHONY: large-v3
|
| 1148 |
+
.PHONY: large-v3-turbo
|
| 1149 |
|
| 1150 |
+
tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 large-v3 large-v3-turbo: main
|
| 1151 |
bash ./models/download-ggml-model.sh $@
|
| 1152 |
@echo ""
|
| 1153 |
@echo "==============================================="
|
README.md
CHANGED
|
@@ -236,6 +236,7 @@ make medium
|
|
| 236 |
make large-v1
|
| 237 |
make large-v2
|
| 238 |
make large-v3
|
|
|
|
| 239 |
```
|
| 240 |
|
| 241 |
## Memory usage
|
|
|
|
| 236 |
make large-v1
|
| 237 |
make large-v2
|
| 238 |
make large-v3
|
| 239 |
+
make large-v3-turbo
|
| 240 |
```
|
| 241 |
|
| 242 |
## Memory usage
|
bindings/go/examples/go-model-download/main.go
CHANGED
|
@@ -24,7 +24,7 @@ const (
|
|
| 24 |
|
| 25 |
var (
|
| 26 |
// The models which will be downloaded, if no model is specified as an argument
|
| 27 |
-
modelNames = []string{"ggml-tiny.en", "ggml-tiny", "ggml-base.en", "ggml-base", "ggml-small.en", "ggml-small", "ggml-medium.en", "ggml-medium", "ggml-large-v1", "ggml-large-v2", "ggml-large-v3"}
|
| 28 |
)
|
| 29 |
|
| 30 |
var (
|
|
|
|
| 24 |
|
| 25 |
var (
|
| 26 |
// The models which will be downloaded, if no model is specified as an argument
|
| 27 |
+
modelNames = []string{"ggml-tiny.en", "ggml-tiny", "ggml-base.en", "ggml-base", "ggml-small.en", "ggml-small", "ggml-medium.en", "ggml-medium", "ggml-large-v1", "ggml-large-v2", "ggml-large-v3", "large-v3-turbo"}
|
| 28 |
)
|
| 29 |
|
| 30 |
var (
|
examples/livestream.sh
CHANGED
|
@@ -48,7 +48,7 @@ if [ -n "$3" ]; then
|
|
| 48 |
fi
|
| 49 |
|
| 50 |
# Whisper models
|
| 51 |
-
models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large-v2" "large-v3" )
|
| 52 |
|
| 53 |
# list available models
|
| 54 |
function list_models {
|
|
|
|
| 48 |
fi
|
| 49 |
|
| 50 |
# Whisper models
|
| 51 |
+
models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large-v2" "large-v3" "large-v3-turbo" )
|
| 52 |
|
| 53 |
# list available models
|
| 54 |
function list_models {
|
examples/twitch.sh
CHANGED
|
@@ -21,7 +21,7 @@ help()
|
|
| 21 |
echo "Usage: ./twitch.sh -s [step] -m [model] -t [threads] [url]"
|
| 22 |
echo "options:"
|
| 23 |
echo "-s Step in seconds (default is $step)."
|
| 24 |
-
echo "-m Choose model, options are: 'tiny.en' 'tiny' 'base.en' 'base' 'small.en' 'small' 'medium.en' 'medium' 'large-v1' 'large-v2' 'large-v3' (default is '$model')."
|
| 25 |
echo "-t Number of threads to use."
|
| 26 |
echo "-h Print this help page."
|
| 27 |
echo
|
|
|
|
| 21 |
echo "Usage: ./twitch.sh -s [step] -m [model] -t [threads] [url]"
|
| 22 |
echo "options:"
|
| 23 |
echo "-s Step in seconds (default is $step)."
|
| 24 |
+
echo "-m Choose model, options are: 'tiny.en' 'tiny' 'base.en' 'base' 'small.en' 'small' 'medium.en' 'medium' 'large-v1' 'large-v2' 'large-v3' 'large-v3-turbo' (default is '$model')."
|
| 25 |
echo "-t Number of threads to use."
|
| 26 |
echo "-h Print this help page."
|
| 27 |
echo
|
models/README.md
CHANGED
|
@@ -42,22 +42,24 @@ rmdir models/whisper-medium
|
|
| 42 |
|
| 43 |
## Available models
|
| 44 |
|
| 45 |
-
| Model
|
| 46 |
-
|
|
| 47 |
-
| tiny
|
| 48 |
-
| tiny.en
|
| 49 |
-
| base
|
| 50 |
-
| base.en
|
| 51 |
-
| small
|
| 52 |
-
| small.en
|
| 53 |
-
| small.en-tdrz
|
| 54 |
-
| medium
|
| 55 |
-
| medium.en
|
| 56 |
-
| large-v1
|
| 57 |
-
| large-v2
|
| 58 |
-
| large-v2-q5_0
|
| 59 |
-
| large-v3
|
| 60 |
-
| large-v3-q5_0
|
|
|
|
|
|
|
| 61 |
|
| 62 |
Models are multilingual unless the model name includes `.en`. Models ending in `-q5_0` are [quantized](../README.md#quantization). Models ending in `-tdrz` support local diarization (marking of speaker turns) using [tinydiarize](https://github.com/akashmjn/tinydiarize). More information about models is available [upstream (openai/whisper)](https://github.com/openai/whisper#available-models-and-languages). The list above is a subset of the models supported by the [download-ggml-model.sh](download-ggml-model.sh) script, but many more are available at https://huggingface.co/ggerganov/whisper.cpp/tree/main and elsewhere.
|
| 63 |
|
|
|
|
| 42 |
|
| 43 |
## Available models
|
| 44 |
|
| 45 |
+
| Model | Disk | SHA |
|
| 46 |
+
| ------------------- | ------- | ------------------------------------------ |
|
| 47 |
+
| tiny | 75 MiB | `bd577a113a864445d4c299885e0cb97d4ba92b5f` |
|
| 48 |
+
| tiny.en | 75 MiB | `c78c86eb1a8faa21b369bcd33207cc90d64ae9df` |
|
| 49 |
+
| base | 142 MiB | `465707469ff3a37a2b9b8d8f89f2f99de7299dac` |
|
| 50 |
+
| base.en | 142 MiB | `137c40403d78fd54d454da0f9bd998f78703390c` |
|
| 51 |
+
| small | 466 MiB | `55356645c2b361a969dfd0ef2c5a50d530afd8d5` |
|
| 52 |
+
| small.en | 466 MiB | `db8a495a91d927739e50b3fc1cc4c6b8f6c2d022` |
|
| 53 |
+
| small.en-tdrz | 465 MiB | `b6c6e7e89af1a35c08e6de56b66ca6a02a2fdfa1` |
|
| 54 |
+
| medium | 1.5 GiB | `fd9727b6e1217c2f614f9b698455c4ffd82463b4` |
|
| 55 |
+
| medium.en | 1.5 GiB | `8c30f0e44ce9560643ebd10bbe50cd20eafd3723` |
|
| 56 |
+
| large-v1 | 2.9 GiB | `b1caaf735c4cc1429223d5a74f0f4d0b9b59a299` |
|
| 57 |
+
| large-v2 | 2.9 GiB | `0f4c8e34f21cf1a914c59d8b3ce882345ad349d6` |
|
| 58 |
+
| large-v2-q5_0 | 1.1 GiB | `00e39f2196344e901b3a2bd5814807a769bd1630` |
|
| 59 |
+
| large-v3 | 2.9 GiB | `ad82bf6a9043ceed055076d0fd39f5f186ff8062` |
|
| 60 |
+
| large-v3-q5_0 | 1.1 GiB | `e6e2ed78495d403bef4b7cff42ef4aaadcfea8de` |
|
| 61 |
+
| large-v3-turbo | 1.5 GiB | `4af2b29d7ec73d781377bfd1758ca957a807e941` |
|
| 62 |
+
| large-v3-turbo-q5_0 | 547 MiB | `e050f7970618a659205450ad97eb95a18d69c9ee` |
|
| 63 |
|
| 64 |
Models are multilingual unless the model name includes `.en`. Models ending in `-q5_0` are [quantized](../README.md#quantization). Models ending in `-tdrz` support local diarization (marking of speaker turns) using [tinydiarize](https://github.com/akashmjn/tinydiarize). More information about models is available [upstream (openai/whisper)](https://github.com/openai/whisper#available-models-and-languages). The list above is a subset of the models supported by the [download-ggml-model.sh](download-ggml-model.sh) script, but many more are available at https://huggingface.co/ggerganov/whisper.cpp/tree/main and elsewhere.
|
| 65 |
|
models/convert-h5-to-coreml.py
CHANGED
|
@@ -78,14 +78,14 @@ def convert_hf_whisper(hf_model_name_or_path: str, whisper_state_path: str):
|
|
| 78 |
# Ported from models/convert-whisper-to-coreml.py
|
| 79 |
if __name__ == "__main__":
|
| 80 |
parser = argparse.ArgumentParser()
|
| 81 |
-
parser.add_argument("--model-name", type=str, help="name of model to convert (e.g. tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large-v1, large-v2, large-v3)", required=True)
|
| 82 |
parser.add_argument("--model-path", type=str, help="path to the model (e.g. if published on HuggingFace: Oblivion208/whisper-tiny-cantonese)", required=True)
|
| 83 |
parser.add_argument("--encoder-only", type=bool, help="only convert encoder", default=False)
|
| 84 |
parser.add_argument("--quantize", type=bool, help="quantize weights to F16", default=False)
|
| 85 |
parser.add_argument("--optimize-ane", type=bool, help="optimize for ANE execution (currently broken)", default=False)
|
| 86 |
args = parser.parse_args()
|
| 87 |
|
| 88 |
-
if args.model_name not in ["tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", "large-v1", "large-v2", "large-v3"]:
|
| 89 |
raise ValueError("Invalid model name")
|
| 90 |
|
| 91 |
pt_target_path = f"models/hf-{args.model_name}.pt"
|
|
|
|
| 78 |
# Ported from models/convert-whisper-to-coreml.py
|
| 79 |
if __name__ == "__main__":
|
| 80 |
parser = argparse.ArgumentParser()
|
| 81 |
+
parser.add_argument("--model-name", type=str, help="name of model to convert (e.g. tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large-v1, large-v2, large-v3, large-v3-turbo)", required=True)
|
| 82 |
parser.add_argument("--model-path", type=str, help="path to the model (e.g. if published on HuggingFace: Oblivion208/whisper-tiny-cantonese)", required=True)
|
| 83 |
parser.add_argument("--encoder-only", type=bool, help="only convert encoder", default=False)
|
| 84 |
parser.add_argument("--quantize", type=bool, help="quantize weights to F16", default=False)
|
| 85 |
parser.add_argument("--optimize-ane", type=bool, help="optimize for ANE execution (currently broken)", default=False)
|
| 86 |
args = parser.parse_args()
|
| 87 |
|
| 88 |
+
if args.model_name not in ["tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", "large-v1", "large-v2", "large-v3", "large-v3-turbo"]:
|
| 89 |
raise ValueError("Invalid model name")
|
| 90 |
|
| 91 |
pt_target_path = f"models/hf-{args.model_name}.pt"
|
models/convert-whisper-to-coreml.py
CHANGED
|
@@ -283,13 +283,13 @@ def convert_decoder(hparams, model, quantize=False):
|
|
| 283 |
|
| 284 |
if __name__ == "__main__":
|
| 285 |
parser = argparse.ArgumentParser()
|
| 286 |
-
parser.add_argument("--model", type=str, help="model to convert (e.g. tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large-v1, large-v2, large-v3)", required=True)
|
| 287 |
parser.add_argument("--encoder-only", type=bool, help="only convert encoder", default=False)
|
| 288 |
parser.add_argument("--quantize", type=bool, help="quantize weights to F16", default=False)
|
| 289 |
parser.add_argument("--optimize-ane", type=bool, help="optimize for ANE execution (currently broken)", default=False)
|
| 290 |
args = parser.parse_args()
|
| 291 |
|
| 292 |
-
if args.model not in ["tiny", "tiny.en", "base", "base.en", "small", "small.en", "small.en-tdrz", "medium", "medium.en", "large-v1", "large-v2", "large-v3"]:
|
| 293 |
raise ValueError("Invalid model name")
|
| 294 |
|
| 295 |
whisper = load_model(args.model).cpu()
|
|
|
|
| 283 |
|
| 284 |
if __name__ == "__main__":
|
| 285 |
parser = argparse.ArgumentParser()
|
| 286 |
+
parser.add_argument("--model", type=str, help="model to convert (e.g. tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large-v1, large-v2, large-v3, large-v3-turbo)", required=True)
|
| 287 |
parser.add_argument("--encoder-only", type=bool, help="only convert encoder", default=False)
|
| 288 |
parser.add_argument("--quantize", type=bool, help="quantize weights to F16", default=False)
|
| 289 |
parser.add_argument("--optimize-ane", type=bool, help="optimize for ANE execution (currently broken)", default=False)
|
| 290 |
args = parser.parse_args()
|
| 291 |
|
| 292 |
+
if args.model not in ["tiny", "tiny.en", "base", "base.en", "small", "small.en", "small.en-tdrz", "medium", "medium.en", "large-v1", "large-v2", "large-v3", "large-v3-turbo"]:
|
| 293 |
raise ValueError("Invalid model name")
|
| 294 |
|
| 295 |
whisper = load_model(args.model).cpu()
|
models/convert-whisper-to-openvino.py
CHANGED
|
@@ -45,10 +45,10 @@ def convert_encoder(hparams, encoder, mname):
|
|
| 45 |
|
| 46 |
if __name__ == "__main__":
|
| 47 |
parser = argparse.ArgumentParser()
|
| 48 |
-
parser.add_argument("--model", type=str, help="model to convert (e.g. tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large-v1, large-v2, large-v3)", required=True)
|
| 49 |
args = parser.parse_args()
|
| 50 |
|
| 51 |
-
if args.model not in ["tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", "large-v1", "large-v2", "large-v3"]:
|
| 52 |
raise ValueError("Invalid model name")
|
| 53 |
|
| 54 |
whisper = load_model(args.model).cpu()
|
|
|
|
| 45 |
|
| 46 |
if __name__ == "__main__":
|
| 47 |
parser = argparse.ArgumentParser()
|
| 48 |
+
parser.add_argument("--model", type=str, help="model to convert (e.g. tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large-v1, large-v2, large-v3, large-v3-turbo)", required=True)
|
| 49 |
args = parser.parse_args()
|
| 50 |
|
| 51 |
+
if args.model not in ["tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", "large-v1", "large-v2", "large-v3", "large-v3-turbo"]:
|
| 52 |
raise ValueError("Invalid model name")
|
| 53 |
|
| 54 |
whisper = load_model(args.model).cpu()
|
models/download-coreml-model.sh
CHANGED
|
@@ -22,7 +22,7 @@ get_script_path() {
|
|
| 22 |
models_path="$(get_script_path)"
|
| 23 |
|
| 24 |
# Whisper models
|
| 25 |
-
models="tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 large-v3"
|
| 26 |
|
| 27 |
# list available models
|
| 28 |
list_models() {
|
|
|
|
| 22 |
models_path="$(get_script_path)"
|
| 23 |
|
| 24 |
# Whisper models
|
| 25 |
+
models="tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 large-v3 large-v3-turbo"
|
| 26 |
|
| 27 |
# list available models
|
| 28 |
list_models() {
|
models/download-ggml-model.cmd
CHANGED
|
@@ -8,7 +8,7 @@ popd
|
|
| 8 |
set argc=0
|
| 9 |
for %%x in (%*) do set /A argc+=1
|
| 10 |
|
| 11 |
-
set models=tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 large-v3
|
| 12 |
|
| 13 |
if %argc% neq 1 (
|
| 14 |
echo.
|
|
|
|
| 8 |
set argc=0
|
| 9 |
for %%x in (%*) do set /A argc+=1
|
| 10 |
|
| 11 |
+
set models=tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 large-v3 large-v3-turbo
|
| 12 |
|
| 13 |
if %argc% neq 1 (
|
| 14 |
echo.
|
models/download-ggml-model.sh
CHANGED
|
@@ -46,7 +46,9 @@ large-v1
|
|
| 46 |
large-v2
|
| 47 |
large-v2-q5_0
|
| 48 |
large-v3
|
| 49 |
-
large-v3-q5_0
|
|
|
|
|
|
|
| 50 |
|
| 51 |
# list available models
|
| 52 |
list_models() {
|
|
|
|
| 46 |
large-v2
|
| 47 |
large-v2-q5_0
|
| 48 |
large-v3
|
| 49 |
+
large-v3-q5_0
|
| 50 |
+
large-v3-turbo
|
| 51 |
+
large-v3-turbo-q5_0"
|
| 52 |
|
| 53 |
# list available models
|
| 54 |
list_models() {
|
scripts/bench.py
CHANGED
|
@@ -64,6 +64,7 @@ models = [
|
|
| 64 |
"ggml-large-v1.bin",
|
| 65 |
"ggml-large-v2.bin",
|
| 66 |
"ggml-large-v3.bin",
|
|
|
|
| 67 |
]
|
| 68 |
|
| 69 |
|
|
|
|
| 64 |
"ggml-large-v1.bin",
|
| 65 |
"ggml-large-v2.bin",
|
| 66 |
"ggml-large-v3.bin",
|
| 67 |
+
"ggml-large-v3-turbo.bin",
|
| 68 |
]
|
| 69 |
|
| 70 |
|
scripts/convert-all.sh
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
#!/bin/bash
|
| 2 |
|
| 3 |
-
models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large-v2" "large-v3" )
|
| 4 |
|
| 5 |
for model in "${models[@]}"; do
|
| 6 |
python3 models/convert-pt-to-ggml.py ~/.cache/whisper/$model.pt ../whisper models/
|
|
|
|
| 1 |
#!/bin/bash
|
| 2 |
|
| 3 |
+
models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large-v2" "large-v3" "large-v3-turbo" )
|
| 4 |
|
| 5 |
for model in "${models[@]}"; do
|
| 6 |
python3 models/convert-pt-to-ggml.py ~/.cache/whisper/$model.pt ../whisper models/
|
tests/run-tests.sh
CHANGED
|
@@ -19,7 +19,7 @@
|
|
| 19 |
cd `dirname $0`
|
| 20 |
|
| 21 |
# Whisper models
|
| 22 |
-
models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large-v2" "large-v3" )
|
| 23 |
|
| 24 |
# list available models
|
| 25 |
function list_models {
|
|
|
|
| 19 |
cd `dirname $0`
|
| 20 |
|
| 21 |
# Whisper models
|
| 22 |
+
models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large-v2" "large-v3" "large-v3-turbo" )
|
| 23 |
|
| 24 |
# list available models
|
| 25 |
function list_models {
|