Spaces:

23f3003322
/

llm-quiz-analysis

Sleeping

App Files Files Community

23f3003322 commited on 24 days ago

Commit

c19b958

1 Parent(s): 81a2360

media transcriber completed

Browse files

Files changed (4) hide show

app/orchestrator/actions/action_executor.py +9 -15
app/orchestrator/actions/media_transcriber.py +207 -69
requirements.txt +2 -0
test/media_transcriber.py +506 -0

app/orchestrator/actions/action_executor.py CHANGED Viewed

@@ -150,27 +150,21 @@ class ActionExecutor:
         return results
     async def _handle_ocr(self, urls: List[str]) -> List[str]:
-        """Handle OCR on images"""
-        logger.info(f"🖼️  Processing OCR URLs")
         results = []
         for url in urls:
-            if not self._is_image(url):
-                continue
-            try:
-                ocr_result = await self.image_processor.extract_text_from_image(url)
-                results.append(
-                    f"\n\nText extracted from image {url}:\n{ocr_result['extracted_text']}"
-                )
-            except Exception as e:
-                logger.error(f"Failed to OCR {url}: {e}")
-                results.append(f"\n\n[Failed to extract text from {url}: {str(e)}]")
         return results
     async def _handle_navigation(self, urls: List[str]) -> List[str]:
         """Handle navigation to additional URLs"""
         logger.info(f"🌐 Processing navigation URLs")

         return results
     async def _handle_ocr(self, urls: List[str]) -> List[str]:
         results = []
         for url in urls:
+            ocr_result = await self.image_processor.extract_text_from_image(url)
+            if ocr_result['status'] == 'success':
+                results.append(f"\nText from {url}:\n{ocr_result['extracted_text']}")
+            elif ocr_result['status'] == 'unavailable':
+                results.append(f"\n[Image at {url} - OCR not configured]")
+            else:
+                results.append(f"\n[OCR failed for {url}]")
         return results
     async def _handle_navigation(self, urls: List[str]) -> List[str]:
         """Handle navigation to additional URLs"""
         logger.info(f"🌐 Processing navigation URLs")

app/orchestrator/actions/media_transcriber.py CHANGED Viewed

@@ -1,107 +1,245 @@
 """
-Media Transcriber
-Handles audio and video transcription
 """
 import httpx
-from typing import Dict, Any
 from app.core.config import settings
 from app.core.logging import get_logger
-from app.core.exceptions import TaskProcessingError
 logger = get_logger(__name__)
 class MediaTranscriber:
     """
-    Transcribes audio and video files
-    Uses external APIs (OpenAI Whisper, etc.)
     """
-    def __init__(self):
         """Initialize media transcriber"""
-        logger.debug("MediaTranscriber initialized")
-    async def transcribe_audio(self, url: str) -> Dict[str, Any]:
         """
         Transcribe audio file
-        Args:
-            url: URL to audio file
-        Returns:
-            Dict with transcription result:
-            {
-                'url': str,
-                'transcription': str,
-                'language': str,
-                'duration': float (if available)
             }
         """
-        logger.info(f"🎤 Transcribing audio from: {url}")
-        # For now, return a placeholder
-        # In production, you would:
-        # 1. Download the audio file
-        # 2. Send to transcription API (Whisper, AssemblyAI, etc.)
-        # 3. Return the transcription
-        logger.warning(
-            "⚠️  Audio transcription not fully implemented. "
-            "Returning placeholder. Integrate with Whisper API for production."
-        )
         return {
             'url': url,
-            'transcription': f"[Audio transcription placeholder for {url}. "
-                           "Integrate with OpenAI Whisper or AssemblyAI API.]",
             'language': 'unknown',
-            'status': 'placeholder'
         }
-    async def transcribe_video(self, url: str) -> Dict[str, Any]:
-        """
-        Transcribe video file (extracts audio and transcribes)
-        Args:
-            url: URL to video file
-        Returns:
-            Dict with transcription result
-        """
-        logger.info(f"🎬 Transcribing video from: {url}")
-        logger.warning(
-            "⚠️  Video transcription not fully implemented. "
-            "Returning placeholder."
         )
         return {
-            'url': url,
-            'transcription': f"[Video transcription placeholder for {url}. "
-                           "Extract audio and use Whisper API.]",
-            'language': 'unknown',
-            'status': 'placeholder'
         }
-    async def _transcribe_with_whisper(self, audio_file_path: str) -> str:
-        """
-        Transcribe using OpenAI Whisper API (placeholder implementation)
-        Args:
-            audio_file_path: Path to audio file
-        Returns:
-            str: Transcription text
-        """
-        # Placeholder for Whisper API integration
-        # Actual implementation would use OpenAI API:
-        #
-        # import openai
-        # with open(audio_file_path, 'rb') as f:
-        #     transcript = openai.Audio.transcribe("whisper-1", f)
-        # return transcript['text']
-        logger.warning("Whisper API integration needed for actual transcription")
-        return "[Transcription unavailable - Whisper API not configured]"

 """
+Media Transcriber - HF Spaces Free Tier Optimized
+Audio-only support (no ffmpeg needed)
 """
 import httpx
+import tempfile
+import os
+from typing import Dict, Any, Optional
+from pathlib import Path
 from app.core.config import settings
 from app.core.logging import get_logger
 logger = get_logger(__name__)
 class MediaTranscriber:
     """
+    Audio transcriber optimized for HF Spaces free tier
+    - Supports audio files: .mp3, .wav, .m4a, .ogg, .flac
+    - Video files return helpful error message
+    - No ffmpeg dependency required
     """
+    def __init__(self, timeout: int = 300):
         """Initialize media transcriber"""
+        self.timeout = timeout
+        self.temp_dir = tempfile.mkdtemp(prefix='audio_transcription_')
+        self.download_headers = {
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)',
+            'Accept': 'audio/*,*/*;q=0.8'
+        }
+        self.faster_whisper_available = self._check_faster_whisper()
+        self.aipipe_available = self._check_aipipe()
+        logger.info(
+            f"MediaTranscriber initialized (audio-only) | "
+            f"faster-whisper: {'✓' if self.faster_whisper_available else '✗'} | "
+            f"AIPipe: {'✓' if self.aipipe_available else '✗'}"
+        )
+    def _check_faster_whisper(self) -> bool:
+        """Check if faster-whisper is available"""
+        try:
+            from faster_whisper import WhisperModel
+            return True
+        except ImportError:
+            return False
+    def _check_aipipe(self) -> bool:
+        """Check if AIPipe is configured"""
+        return settings.is_llm_configured()
+    async def transcribe_audio(self, url: str, language: Optional[str] = None) -> Dict[str, Any]:
         """
         Transcribe audio file
+        Supports: .mp3, .wav, .m4a, .ogg, .flac, .aac
+        """
+        logger.info(f"🎤 Transcribing audio: {url}")
+        try:
+            # Check if it's actually an audio file
+            if not self._is_audio_file(url):
+                logger.warning(f"Not an audio file: {url}")
+                return {
+                    'url': url,
+                    'transcription': (
+                        f'[Only audio files supported. Got: {url}. '
+                        f'Supported: .mp3, .wav, .m4a, .ogg, .flac, .aac]'
+                    ),
+                    'status': 'unsupported_format',
+                    'method': 'none',
+                    'language': 'unknown'
+                }
+            # Download audio
+            audio_path = await self._download_audio(url)
+            if not audio_path:
+                raise Exception("Failed to download audio")
+            # Transcribe
+            if self.faster_whisper_available:
+                result = await self._transcribe_with_faster_whisper(audio_path, language)
+            elif self.aipipe_available:
+                result = await self._transcribe_with_aipipe(audio_path, language)
+            else:
+                result = {
+                    'transcription': f'[Transcription unavailable. Install faster-whisper or set AIPIPE_TOKEN]',
+                    'language': 'unknown',
+                    'method': 'none',
+                    'status': 'unavailable'
+                }
+            result['url'] = url
+            logger.info(f"✅ Transcription complete | Method: {result['method']}")
+            return result
+        except Exception as e:
+            logger.error(f"❌ Transcription failed: {e}", exc_info=True)
+            return {
+                'url': url,
+                'transcription': f'[Transcription failed: {str(e)}]',
+                'status': 'error',
+                'method': 'none',  # ← ADD THIS
+                'language': 'unknown',  # ← ADD THIS
+                'error': str(e)
             }
+    async def transcribe_video(self, url: str, language: Optional[str] = None) -> Dict[str, Any]:
         """
+        Video transcription not supported on HF Spaces free tier
+        Returns helpful error message
+        """
+        logger.warning(f"⚠️  Video transcription not supported: {url}")
         return {
             'url': url,
+            'transcription': (
+                f'[Video transcription not supported on HF Spaces free tier. '
+                f'Video URL: {url}. '
+                f'To transcribe videos: '
+                f'1) Extract audio locally and upload as .mp3, or '
+                f'2) Use a service that provides direct audio URLs.]'
+            ),
             'language': 'unknown',
+            'method': 'none',
+            'status': 'video_not_supported',
+            'note': 'HF Spaces free tier limitation - no ffmpeg available'
         }
+    def _is_audio_file(self, url: str) -> bool:
+        """Check if URL is an audio file"""
+        audio_extensions = ['.mp3', '.wav', '.m4a', '.ogg', '.flac', '.aac']
+        url_lower = url.lower()
+        return any(url_lower.endswith(ext) for ext in audio_extensions)
+    async def _download_audio(self, url: str) -> Optional[str]:
+        """Download audio file"""
+        try:
+            logger.info(f"Downloading audio: {url}")
+            async with httpx.AsyncClient(
+                timeout=self.timeout,
+                follow_redirects=True,
+                headers=self.download_headers
+            ) as client:
+                response = await client.get(url)
+                response.raise_for_status()
+            # Save to temp
+            extension = Path(url.split('?')[0]).suffix or '.mp3'
+            file_path = os.path.join(self.temp_dir, f"audio_{hash(url)}{extension}")
+            with open(file_path, 'wb') as f:
+                f.write(response.content)
+            logger.info(f"✅ Downloaded: {len(response.content) / (1024*1024):.2f} MB")
+            return file_path
+        except Exception as e:
+            logger.error(f"Download failed: {e}")
+            return None
+    async def _transcribe_with_faster_whisper(
+        self,
+        audio_path: str,
+        language: Optional[str] = None
+    ) -> Dict[str, Any]:
+        """Transcribe with faster-whisper (local, no API key)"""
+        from faster_whisper import WhisperModel
+        if not hasattr(self, '_whisper_model'):
+            logger.info("Loading faster-whisper model...")
+            model_size = os.getenv('WHISPER_MODEL_SIZE', 'base')
+            self._whisper_model = WhisperModel(
+                model_size,
+                device="cpu",
+                compute_type="int8"
+            )
+            logger.info(f"✓ Model '{model_size}' loaded")
+        segments, info = self._whisper_model.transcribe(
+            audio_path,
+            language=language,
+            beam_size=5,
+            vad_filter=True
         )
+        transcription = ' '.join([s.text for s in segments]).strip()
         return {
+            'transcription': transcription,
+            'language': info.language if hasattr(info, 'language') else 'unknown',
+            'duration': info.duration if hasattr(info, 'duration') else None,
+            'method': 'faster_whisper',
+            'status': 'success'
         }
+    async def _transcribe_with_aipipe(
+        self,
+        audio_path: str,
+        language: Optional[str] = None
+    ) -> Dict[str, Any]:
+        """Transcribe with AIPipe API"""
+        logger.info("Transcribing with AIPipe...")
+        with open(audio_path, 'rb') as f:
+            audio_data = f.read()
+        files = {'file': (os.path.basename(audio_path), audio_data, 'audio/mpeg')}
+        data = {'model': 'gpt-4o-audio-preview'}
+        if language:
+            data['language'] = language
+        async with httpx.AsyncClient(timeout=self.timeout) as client:
+            response = await client.post(
+                f"{settings.AIPIPE_BASE_URL}/audio/transcriptions",
+                headers={'Authorization': f'Bearer {settings.AIPIPE_TOKEN}'},
+                files=files,
+                data=data
+            )
+            response.raise_for_status()
+            result = response.json()
+        return {
+            'transcription': result.get('text', ''),
+            'language': result.get('language', 'unknown'),
+            'duration': result.get('duration'),
+            'method': 'aipipe',
+            'status': 'success'
+        }
+    def cleanup(self):
+        """Clean up temp files"""
+        try:
+            import shutil
+            shutil.rmtree(self.temp_dir, ignore_errors=True)
+        except Exception as e:
+            logger.warning(f"Cleanup failed: {e}")

requirements.txt CHANGED Viewed

@@ -24,6 +24,8 @@ Pillow
 # PDF Processing
 PyPDF2==3.0.1
 # Data Processing
 # pandas==2.2.0
 # numpy==1.26.3

 # PDF Processing
 PyPDF2==3.0.1
+faster-whisper
 # Data Processing
 # pandas==2.2.0
 # numpy==1.26.3

test/media_transcriber.py ADDED Viewed

	@@ -0,0 +1,506 @@

+"""
+Test Media Transcriber - Audio Only Version
+Tests for HF Spaces free tier (no ffmpeg)
+"""
+import sys
+import os
+ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+sys.path.append(ROOT)
+import asyncio
+from app.orchestrator.actions.media_transcriber import MediaTranscriber
+from app.core.logging import setup_logging, get_logger
+setup_logging()
+logger = get_logger(__name__)
+async def test_speech_detection():
+    """Test transcription with real internet audio containing speech"""
+    print("\n" + "=" * 60)
+    print("Test: Speech Detection (Real World Audio)")
+    print("=" * 60)
+    transcriber = MediaTranscriber()
+    # Public domain/open source audio samples with speech
+    speech_samples = [
+        {
+            'url': 'https://www.voiptroubleshooter.com/open_speech/american/OSR_us_000_0010_8k.wav',
+            'description': 'Open Speech Repository - American English',
+            'format': '.wav',
+            'duration': '~3 seconds',
+            'expected_type': 'clear speech',
+            'source': 'VoIP Troubleshooter Open Speech Repository'
+        },
+        {
+            'url': 'https://www.voiptroubleshooter.com/open_speech/american/OSR_us_000_0011_8k.wav',
+            'description': 'Open Speech Repository - Short phrase',
+            'format': '.wav',
+            'duration': '~3 seconds',
+            'expected_type': 'clear speech',
+            'source': 'VoIP Troubleshooter Open Speech Repository'
+        },
+        {
+            'url': 'https://www.voiptroubleshooter.com/open_speech/american/OSR_us_000_0012_8k.wav',
+            'description': 'Open Speech Repository - Another phrase',
+            'format': '.wav',
+            'duration': '~3 seconds',
+            'expected_type': 'clear speech',
+            'source': 'VoIP Troubleshooter Open Speech Repository'
+        }
+    ]
+    print("\n🎙️  Testing with real-world speech samples")
+    print("Source: Open Speech Repository (Public Domain)")
+    print()
+    success_count = 0
+    speech_detected_count = 0
+    for i, sample in enumerate(speech_samples, 1):
+        print(f"{'-' * 60}")
+        print(f"Test {i}/{len(speech_samples)}")
+        print(f"Audio: {sample['description']}")
+        print(f"URL: {sample['url']}")
+        print(f"Duration: {sample['duration']}")
+        print(f"Expected: {sample['expected_type']}")
+        print(f"{'-' * 60}")
+        try:
+            result = await transcriber.transcribe_audio(sample['url'])
+            status = result.get('status', 'unknown')
+            method = result.get('method', 'none')
+            print(f"\n✓ Status: {status}")
+            print(f"✓ Method: {method}")
+            if status == 'success':
+                language = result.get('language', 'unknown')
+                duration = result.get('duration')
+                transcription = result.get('transcription', '').strip()
+                print(f"✓ Language: {language}")
+                if duration:
+                    print(f"✓ Duration: {duration:.2f} seconds")
+                word_count = len(transcription.split()) if transcription else 0
+                print(f"✓ Word count: {word_count}")
+                if word_count > 0:
+                    print(f"\n✅ SPEECH DETECTED!")
+                    print(f"\n📝 Transcribed text:")
+                    print(f'   "{transcription}"')
+                    speech_detected_count += 1
+                else:
+                    print(f"\n⚠️  No words detected")
+                success_count += 1
+            elif status == 'unavailable':
+                print("\n⚠️  Transcription backend not available")
+                print("💡 Install: pip install faster-whisper")
+                break
+            elif status == 'error':
+                error_msg = result.get('error', 'Unknown')
+                print(f"\n❌ Error: {error_msg[:150]}")
+                # Check error type
+                if any(x in error_msg.lower() for x in ['network', 'dns', 'timeout', 'nodename']):
+                    print("   (Network error - trying next sample...)")
+                    continue
+                else:
+                    print("   (Non-network error - skipping remaining tests)")
+                    break
+        except Exception as e:
+            print(f"\n❌ Exception: {str(e)[:150]}")
+            logger.error(f"Test {i} failed", exc_info=True)
+            continue
+        print()
+    # Summary
+    print("=" * 60)
+    print("SPEECH DETECTION SUMMARY")
+    print("=" * 60)
+    if success_count > 0:
+        print(f"✅ {success_count}/{len(speech_samples)} samples processed")
+        print(f"🎙️  {speech_detected_count}/{success_count} detected speech")
+        if speech_detected_count > 0:
+            print(f"\n🎉 SUCCESS! Real-world speech transcription working")
+            print(f"   System successfully transcribed human speech from internet audio")
+        else:
+            print(f"\n⚠️  Processed but no speech detected")
+    else:
+        if not (transcriber.faster_whisper_available or transcriber.aipipe_available):
+            print("⚠️  No transcription backend installed")
+            print("   Install: pip install faster-whisper")
+        else:
+            print("⚠️  Audio files unavailable or network issue")
+            print("   The transcriber itself is properly configured")
+    print("=" * 60)
+    return transcriber
+async def test_small_audio_files():
+    """Test with small audio files suitable for quick tasks"""
+    print("\n" + "=" * 60)
+    print("Test 1: Small Audio Files (< 30 seconds)")
+    print("=" * 60)
+    transcriber = MediaTranscriber()
+    # Small, reliable test audio files
+    test_audios = [
+        {
+            'url': 'https://actions.google.com/sounds/v1/alarms/beep_short.ogg',
+            'description': 'Very short beep (< 1 second)',
+            'format': '.ogg',
+            'expected_duration': '< 1 sec',
+            'expected_text': 'Instrumental/beep (no speech)'
+        },
+        {
+            'url': 'https://actions.google.com/sounds/v1/cartoon/cartoon_boing.ogg',
+            'description': 'Short sound effect (< 2 seconds)',
+            'format': '.ogg',
+            'expected_duration': '~2 sec',
+            'expected_text': 'Sound effect (no speech)'
+        }
+    ]
+    print("\n📝 Testing with small audio samples suitable for 3-minute tasks\n")
+    success_count = 0
+    for i, test_audio in enumerate(test_audios, 1):
+        print(f"{'-' * 60}")
+        print(f"Test {i}/{len(test_audios)}: {test_audio['description']}")
+        print(f"URL: {test_audio['url']}")
+        print(f"Format: {test_audio['format']}")
+        print(f"Expected duration: {test_audio['expected_duration']}")
+        print(f"Expected: {test_audio['expected_text']}")
+        print(f"{'-' * 60}")
+        try:
+            result = await transcriber.transcribe_audio(test_audio['url'])
+            status = result.get('status', 'unknown')
+            method = result.get('method', 'none')
+            print(f"\n✓ Status: {status}")
+            print(f"✓ Method: {method}")
+            if status == 'success':
+                print(f"✅ Transcription successful!")
+                language = result.get('language', 'unknown')
+                print(f"✓ Language: {language}")
+                duration = result.get('duration')
+                if duration:
+                    print(f"✓ Duration: {duration:.2f} seconds")
+                transcription = result.get('transcription', '')
+                print(f"✓ Text length: {len(transcription)} chars")
+                if transcription.strip():
+                    print(f"\n📝 Transcription:")
+                    print(f"   {transcription[:200]}")
+                else:
+                    print(f"\n📝 No speech detected (expected for sound effects)")
+                success_count += 1
+            elif status == 'unavailable':
+                print("⚠️  Transcription backend not available")
+                print("\n💡 To enable transcription:")
+                print("   1. Install: pip install faster-whisper")
+                print("   2. Or set AIPIPE_TOKEN in .env")
+                break  # No point testing other files
+            elif status == 'error':
+                error_msg = result.get('error', 'Unknown')
+                print(f"❌ Error: {error_msg[:100]}")
+                # Check if it's a network error
+                if any(x in error_msg.lower() for x in ['network', 'dns', 'nodename', 'timeout']):
+                    print("   ℹ️  Network error - URL may be temporarily unavailable")
+        except Exception as e:
+            print(f"❌ Exception: {str(e)[:100]}")
+            logger.error(f"Test {i} failed", exc_info=True)
+        print()
+    # Summary
+    print("=" * 60)
+    if success_count > 0:
+        print(f"✅ {success_count}/{len(test_audios)} audio files transcribed successfully")
+    elif transcriber.faster_whisper_available or transcriber.aipipe_available:
+        print("⚠️  Transcription available but test files failed to download")
+        print("   (Network issue - the transcriber itself is working)")
+    else:
+        print("ℹ️  No transcription backend installed")
+    print("=" * 60)
+    return transcriber
+async def test_video_rejection():
+    """Test that video files are rejected gracefully"""
+    print("\n" + "=" * 60)
+    print("Test 2: Video File Rejection (Audio-Only Mode)")
+    print("=" * 60)
+    transcriber = MediaTranscriber()
+    # Test video URL
+    test_video = {
+        'url': 'https://example.com/sample-video.mp4',
+        'description': 'Sample video file'
+    }
+    print(f"\n📹 Testing: {test_video['description']}")
+    print(f"URL: {test_video['url']}")
+    print(f"Expected: Rejection with helpful message")
+    print("-" * 60)
+    result = await transcriber.transcribe_video(test_video['url'])
+    status = result.get('status', 'unknown')
+    print(f"\n✓ Status: {status}")
+    if status == 'video_not_supported':
+        print(f"✅ Video correctly rejected (audio-only mode)")
+        print(f"\n📝 Message shown to user:")
+        print(f"   {result.get('transcription', '')[:200]}...")
+    else:
+        print(f"⚠️  Unexpected status: {status}")
+    return transcriber
+async def test_format_detection():
+    """Test audio format detection"""
+    print("\n" + "=" * 60)
+    print("Test 3: Format Detection & Validation")
+    print("=" * 60)
+    transcriber = MediaTranscriber()
+    test_cases = [
+        {
+            'url': 'https://example.com/file.mp3',
+            'expected': 'audio',
+            'description': 'MP3 audio file'
+        },
+        {
+            'url': 'https://example.com/file.wav',
+            'expected': 'audio',
+            'description': 'WAV audio file'
+        },
+        {
+            'url': 'https://example.com/file.m4a',
+            'expected': 'audio',
+            'description': 'M4A audio file'
+        },
+        {
+            'url': 'https://example.com/image.png',
+            'expected': 'unsupported',
+            'description': 'PNG image (not audio)'
+        },
+        {
+            'url': 'https://example.com/doc.pdf',
+            'expected': 'unsupported',
+            'description': 'PDF document (not audio)'
+        }
+    ]
+    print("\n🔍 Testing format detection for various file types:\n")
+    for i, test in enumerate(test_cases, 1):
+        is_audio = transcriber._is_audio_file(test['url'])
+        detected = 'audio' if is_audio else 'unsupported'
+        if detected == test['expected']:
+            status = "✅"
+        else:
+            status = "❌"
+        print(f"{status} {test['description']}")
+        print(f"   URL: {test['url']}")
+        print(f"   Detected: {detected} | Expected: {test['expected']}")
+        print()
+    return transcriber
+async def test_backend_check():
+    """Test backend availability"""
+    print("\n" + "=" * 60)
+    print("Test 4: Transcription Backend Status")
+    print("=" * 60)
+    transcriber = MediaTranscriber()
+    print("\n🔧 Checking available backends:\n")
+    # Check faster-whisper
+    if transcriber.faster_whisper_available:
+        print("✅ faster-whisper: Available")
+        print("   → Local transcription (CPU)")
+        print("   → No API key needed")
+        print("   → Free, unlimited")
+        print("   → Model: base (~150MB)")
+        print("   → Speed: ~20 seconds per minute of audio")
+    else:
+        print("❌ faster-whisper: Not installed")
+        print("   → Install: pip install faster-whisper")
+    print()
+    # Check AIPipe
+    if transcriber.aipipe_available:
+        print("✅ AIPipe: Configured")
+        print("   → Cloud transcription")
+        print("   → Uses AIPIPE_TOKEN")
+        print("   → Model: gpt-4o-audio-preview")
+        print("   → Speed: ~5 seconds per minute of audio")
+    else:
+        print("❌ AIPipe: Not configured")
+        print("   → Set AIPIPE_TOKEN in .env")
+    print()
+    # Recommendation
+    if transcriber.faster_whisper_available:
+        print("💡 Recommendation: Using faster-whisper (local, free)")
+    elif transcriber.aipipe_available:
+        print("💡 Recommendation: Using AIPipe (cloud, paid)")
+    else:
+        print("⚠️  No transcription backend available")
+        print("\n📦 Quick Setup:")
+        print("   pip install faster-whisper")
+    return transcriber
+async def test_performance_estimate():
+    """Show performance estimates for typical task sizes"""
+    print("\n" + "=" * 60)
+    print("Test 5: Performance Estimates")
+    print("=" * 60)
+    transcriber = MediaTranscriber()
+    # Typical task scenarios
+    scenarios = [
+        {'duration': 10, 'description': 'Very short clip'},
+        {'duration': 30, 'description': 'Short instruction'},
+        {'duration': 60, 'description': 'One minute audio'},
+        {'duration': 120, 'description': 'Two minute recording'},
+        {'duration': 180, 'description': 'Maximum task audio (3 min)'}
+    ]
+    print("\n⏱️  Estimated transcription times for HF Spaces free tier:\n")
+    print(f"{'Audio Duration':<20} | {'faster-whisper':<20} | {'AIPipe':<20}")
+    print("-" * 65)
+    for scenario in scenarios:
+        duration = scenario['duration']
+        desc = scenario['description']
+        # Estimates (conservative for free tier CPU)
+        local_time = duration * 0.3  # ~30% of audio duration
+        cloud_time = duration * 0.1  # ~10% of audio duration
+        print(f"{duration}s ({desc:<15}) | ~{local_time:.0f}s               | ~{cloud_time:.0f}s")
+    print()
+    print("📝 Notes:")
+    print("   - Estimates for HF Spaces CPU tier")
+    print("   - faster-whisper: First run downloads model (~30s)")
+    print("   - AIPipe: Network latency may add 1-2 seconds")
+    print("   - All times well within 3-minute task limit")
+    return transcriber
+async def run_all_tests():
+    """Run all tests"""
+    print("\n" + "=" * 80)
+    print(" " * 15 + "MEDIA TRANSCRIBER TEST SUITE")
+    print(" " * 12 + "(Small Audio Files - 3 Minute Tasks)")
+    print("=" * 80)
+    transcriber = None
+    try:
+        # Test 1: Small audio files
+        transcriber = await test_small_audio_files()
+        # Test 2: Video rejection
+        if transcriber:
+            transcriber.cleanup()
+        transcriber = await test_video_rejection()
+        # Test 3: Format detection
+        if transcriber:
+            transcriber.cleanup()
+        transcriber = await test_format_detection()
+        # Test 4: Backend check
+        if transcriber:
+            transcriber.cleanup()
+        transcriber = await test_backend_check()
+        # Test 5: Performance estimates
+        if transcriber:
+            transcriber.cleanup()
+        transcriber = await test_performance_estimate()
+        if transcriber:
+            transcriber.cleanup()
+        transcriber = await test_speech_detection()
+        print("\n" + "=" * 80)
+        print(" " * 30 + "TESTS COMPLETE")
+        print("=" * 80)
+        print("\n✅ All tests finished!")
+        print("\n📊 Summary:")
+        print("   • Small audio files tested (< 30 seconds)")
+        print("   • Video rejection verified")
+        print("   • Format detection working")
+        print("   • Performance suitable for 3-minute tasks")
+        print("\n💡 For production: Install faster-whisper for free local transcription")
+    except Exception as e:
+        print("\n" + "=" * 80)
+        print(f"❌ Test suite error: {e}")
+        print("=" * 80)
+        logger.error("Test suite failed", exc_info=True)
+    finally:
+        if transcriber:
+            transcriber.cleanup()
+            print("\n🧹 Cleanup complete")
+if __name__ == "__main__":
+    asyncio.run(run_all_tests())