Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -3,6 +3,7 @@ from pydantic import BaseModel
|
|
| 3 |
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
|
| 4 |
import torch, gradio as gr
|
| 5 |
|
|
|
|
| 6 |
MODEL_ID = "Gopu-poss/gopu-agent-2k-fdf"
|
| 7 |
|
| 8 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
|
|
@@ -12,6 +13,7 @@ model = AutoModelForCausalLM.from_pretrained(
|
|
| 12 |
device_map="auto"
|
| 13 |
)
|
| 14 |
|
|
|
|
| 15 |
app = FastAPI()
|
| 16 |
|
| 17 |
class Input(BaseModel):
|
|
@@ -28,14 +30,15 @@ def infer(data: Input):
|
|
| 28 |
return {"generated_text": text}
|
| 29 |
|
| 30 |
# --- Gradio multimodal ---
|
| 31 |
-
#
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
|
|
|
| 35 |
|
| 36 |
def gopu_chat(system_prompt, prompt, audio=None, image=None, video=None):
|
| 37 |
# Si audio fourni → transcrire et remplacer le prompt
|
| 38 |
-
if audio is not None:
|
| 39 |
transcription = asr(audio)["text"]
|
| 40 |
prompt = transcription
|
| 41 |
|
|
@@ -53,11 +56,12 @@ demo = gr.Interface(
|
|
| 53 |
gr.Textbox(label="Texte utilisateur"),
|
| 54 |
gr.Audio(label="Voix (optionnel)", type="filepath"),
|
| 55 |
gr.Image(label="Image (optionnel)", type="filepath"),
|
| 56 |
-
gr.Video(label="Vidéo (optionnel)"
|
| 57 |
],
|
| 58 |
outputs="text",
|
| 59 |
title="GopuOS Agentic Endpoint",
|
| 60 |
description="Ajoute un prompt système, parle avec ta voix, ou envoie texte/image/vidéo."
|
| 61 |
)
|
| 62 |
|
|
|
|
| 63 |
app = gr.mount_gradio_app(app, demo, path="/gradio")
|
|
|
|
| 3 |
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
|
| 4 |
import torch, gradio as gr
|
| 5 |
|
| 6 |
+
# --- Chargement du modèle ---
|
| 7 |
MODEL_ID = "Gopu-poss/gopu-agent-2k-fdf"
|
| 8 |
|
| 9 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
|
|
|
|
| 13 |
device_map="auto"
|
| 14 |
)
|
| 15 |
|
| 16 |
+
# --- FastAPI ---
|
| 17 |
app = FastAPI()
|
| 18 |
|
| 19 |
class Input(BaseModel):
|
|
|
|
| 30 |
return {"generated_text": text}
|
| 31 |
|
| 32 |
# --- Gradio multimodal ---
|
| 33 |
+
# Pipeline de reconnaissance vocale (optionnel)
|
| 34 |
+
try:
|
| 35 |
+
asr = pipeline("automatic-speech-recognition", model="openai/whisper-small")
|
| 36 |
+
except Exception:
|
| 37 |
+
asr = None
|
| 38 |
|
| 39 |
def gopu_chat(system_prompt, prompt, audio=None, image=None, video=None):
|
| 40 |
# Si audio fourni → transcrire et remplacer le prompt
|
| 41 |
+
if audio is not None and asr is not None:
|
| 42 |
transcription = asr(audio)["text"]
|
| 43 |
prompt = transcription
|
| 44 |
|
|
|
|
| 56 |
gr.Textbox(label="Texte utilisateur"),
|
| 57 |
gr.Audio(label="Voix (optionnel)", type="filepath"),
|
| 58 |
gr.Image(label="Image (optionnel)", type="filepath"),
|
| 59 |
+
gr.Video(label="Vidéo (optionnel)")
|
| 60 |
],
|
| 61 |
outputs="text",
|
| 62 |
title="GopuOS Agentic Endpoint",
|
| 63 |
description="Ajoute un prompt système, parle avec ta voix, ou envoie texte/image/vidéo."
|
| 64 |
)
|
| 65 |
|
| 66 |
+
# Monter Gradio dans FastAPI
|
| 67 |
app = gr.mount_gradio_app(app, demo, path="/gradio")
|