Spaces:
Running
Running
File size: 8,148 Bytes
cd35cc5 cd40a43 cd35cc5 cd40a43 cd35cc5 cd40a43 cd35cc5 cd40a43 cd35cc5 cd40a43 cd35cc5 cd40a43 cd35cc5 cd40a43 cd35cc5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 |
"""
SambaNova client for Pip's fast responses.
Handles: Quick acknowledgments, prompt enhancement, load-balanced conversation.
Uses OpenAI-compatible API.
"""
import os
import asyncio
from typing import AsyncGenerator
from openai import AsyncOpenAI
class SambanovaClient:
"""SambaNova-powered fast inference for Pip."""
def __init__(self):
api_key = os.getenv("SAMBANOVA_API_KEY")
self.available = bool(api_key)
if self.available:
self.client = AsyncOpenAI(
api_key=api_key,
base_url=os.getenv("SAMBANOVA_BASE_URL", "https://api.sambanova.ai/v1")
)
else:
self.client = None
print("⚠️ SambaNova: No API key found - service disabled")
# Using Llama 3.1 or DeepSeek on SambaNova
self.model = "Meta-Llama-3.1-8B-Instruct"
self._rate_limited = False
self._rate_limit_reset = 0
async def _check_rate_limit(self):
"""Check if we're currently rate limited."""
import time
if self._rate_limited and time.time() < self._rate_limit_reset:
return True
self._rate_limited = False
return False
async def _handle_rate_limit(self):
"""Mark as rate limited for 60 seconds."""
import time
self._rate_limited = True
self._rate_limit_reset = time.time() + 60 # Reset after 60 seconds
print("SambaNova rate limited - will use fallback for 60 seconds")
async def quick_acknowledge(self, user_input: str, system_prompt: str) -> str:
"""
Generate a quick acknowledgment while heavier processing happens.
This should be FAST - just a brief "I hear you" type response.
"""
# If not available or rate limited, return a fallback
if not self.available or not self.client:
return "I hear you..."
if await self._check_rate_limit():
return "I hear you..."
try:
response = await self.client.chat.completions.create(
model=self.model,
max_tokens=50, # Keep it short for speed
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_input}
]
)
return response.choices[0].message.content
except Exception as e:
error_str = str(e).lower()
if "429" in error_str or "rate" in error_str:
await self._handle_rate_limit()
print(f"SambaNova quick_acknowledge error: {e}")
return "I hear you..." # Fallback
async def enhance_prompt(
self,
user_input: str,
emotion_state: dict,
mode: str,
system_prompt: str
) -> str:
"""
Transform user context into a detailed, vivid image prompt.
This is where user-specific imagery is crafted.
"""
emotions = emotion_state.get('primary_emotions', ['peaceful'])
fallback = f"A beautiful, calming scene representing {emotions[0] if emotions else 'peace'}, soft colors, dreamy atmosphere"
# If not available or rate limited, return a simple prompt
if not self.available or not self.client:
return fallback
if await self._check_rate_limit():
return fallback
context = f"""
User said: "{user_input}"
Detected emotions: {emotion_state.get('primary_emotions', [])}
Emotional intensity: {emotion_state.get('intensity', 5)}/10
Current mode: {mode}
Action: {emotion_state.get('action', 'reflect')}
Generate a vivid, specific image prompt based on THIS user's context.
"""
try:
response = await self.client.chat.completions.create(
model=self.model,
max_tokens=300,
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": context}
]
)
return response.choices[0].message.content
except Exception as e:
error_str = str(e).lower()
if "429" in error_str or "rate" in error_str:
await self._handle_rate_limit()
print(f"SambaNova enhance_prompt error: {e}")
emotions = emotion_state.get('primary_emotions', ['peaceful'])
return f"A beautiful, calming scene representing {emotions[0] if emotions else 'peace'}, soft colors, dreamy atmosphere"
async def generate_response_stream(
self,
user_input: str,
emotion_state: dict,
system_prompt: str
) -> AsyncGenerator[str, None]:
"""
Generate conversational response with streaming.
Used for load-balanced conversation when Claude is busy.
"""
# If not available or rate limited, yield a fallback
if not self.available or not self.client:
yield "I understand how you're feeling. Let me take a moment to think about this..."
return
if await self._check_rate_limit():
yield "I understand how you're feeling. Let me take a moment to think about this..."
return
context = f"""
User's emotions: {emotion_state.get('primary_emotions', [])}
Intensity: {emotion_state.get('intensity', 5)}/10
User said: {user_input}
"""
try:
stream = await self.client.chat.completions.create(
model=self.model,
max_tokens=512,
stream=True,
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": context}
]
)
async for chunk in stream:
if chunk.choices[0].delta.content:
yield chunk.choices[0].delta.content
except Exception as e:
error_str = str(e).lower()
if "429" in error_str or "rate" in error_str:
await self._handle_rate_limit()
print(f"SambaNova generate_response_stream error: {e}")
yield "I understand how you're feeling. Let me think about the best way to respond..."
async def analyze_emotion_fast(self, user_input: str, system_prompt: str) -> dict:
"""
Quick emotion analysis fallback when Claude is overloaded.
Less nuanced but faster.
"""
import json
default_response = {
"primary_emotions": ["neutral"],
"intensity": 5,
"pip_expression": "neutral",
"intervention_needed": False
}
# If not available or rate limited, return basic analysis
if not self.available or not self.client:
return default_response
if await self._check_rate_limit():
return default_response
try:
response = await self.client.chat.completions.create(
model=self.model,
max_tokens=256,
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_input}
]
)
content = response.choices[0].message.content
if "```json" in content:
content = content.split("```json")[1].split("```")[0]
elif "```" in content:
content = content.split("```")[1].split("```")[0]
return json.loads(content.strip())
except Exception as e:
error_str = str(e).lower()
if "429" in error_str or "rate" in error_str:
await self._handle_rate_limit()
print(f"SambaNova analyze_emotion_fast error: {e}")
return {
"primary_emotions": ["neutral"],
"intensity": 5,
"pip_expression": "neutral",
"intervention_needed": False
}
|