fix: Use correct unsloth gemma-4 model identifiers
Browse files
main.py
CHANGED
|
@@ -277,8 +277,8 @@ def load_model(model_name: str):
|
|
| 277 |
stderr_interceptor.active_model = model_name
|
| 278 |
try:
|
| 279 |
if model_name == "27b-4a":
|
| 280 |
-
# Use Gemma 4
|
| 281 |
-
hf_model_id = "unsloth/gemma-4-
|
| 282 |
tokenizer = AutoTokenizer.from_pretrained(hf_model_id, token=hf_token)
|
| 283 |
|
| 284 |
# BitsAndBytes for 4-bit quantization
|
|
@@ -296,8 +296,8 @@ def load_model(model_name: str):
|
|
| 296 |
token=hf_token
|
| 297 |
)
|
| 298 |
else:
|
| 299 |
-
# Default to 2b
|
| 300 |
-
hf_model_id = "unsloth/gemma-4-
|
| 301 |
tokenizer = AutoTokenizer.from_pretrained(hf_model_id, token=hf_token)
|
| 302 |
model = AutoModelForCausalLM.from_pretrained(
|
| 303 |
hf_model_id,
|
|
|
|
| 277 |
stderr_interceptor.active_model = model_name
|
| 278 |
try:
|
| 279 |
if model_name == "27b-4a":
|
| 280 |
+
# Use Gemma 4 26B A4B in 4-bit (requires CUDA)
|
| 281 |
+
hf_model_id = "unsloth/gemma-4-26B-A4B"
|
| 282 |
tokenizer = AutoTokenizer.from_pretrained(hf_model_id, token=hf_token)
|
| 283 |
|
| 284 |
# BitsAndBytes for 4-bit quantization
|
|
|
|
| 296 |
token=hf_token
|
| 297 |
)
|
| 298 |
else:
|
| 299 |
+
# Default to 2b (Gemma 4 E2B)
|
| 300 |
+
hf_model_id = "unsloth/gemma-4-E2B"
|
| 301 |
tokenizer = AutoTokenizer.from_pretrained(hf_model_id, token=hf_token)
|
| 302 |
model = AutoModelForCausalLM.from_pretrained(
|
| 303 |
hf_model_id,
|