piyushdev commited on
Commit
3f3d51a
Β·
verified Β·
1 Parent(s): d7df1b6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +319 -377
app.py CHANGED
@@ -7,35 +7,35 @@ import time
7
  from datetime import datetime
8
  import traceback
9
 
10
- # Model configurations with their strengths
11
  MODEL_CONFIGS = {
12
- "Meta Llama 3.1 70B Instruct (Best Quality)": {
13
- "model_id": "meta-llama/Meta-Llama-3.1-70B-Instruct",
14
- "description": "Excellent for creative and detailed descriptions",
15
  "default_temp": 0.3,
 
 
 
 
 
 
16
  "max_tokens": 300
17
  },
18
- "Qwen 2.5 72B Instruct (Fast & Accurate)": {
19
- "model_id": "Qwen/Qwen2.5-72B-Instruct",
20
- "description": "Great balance of speed and quality",
21
  "default_temp": 0.35,
22
  "max_tokens": 300
23
  },
24
- "GPT-OSS 20B (Original)": {
25
- "model_id": "openai/gpt-oss-20b",
26
- "description": "Your current model - good for structured output",
27
- "default_temp": 0.3,
28
- "max_tokens": 256
29
- },
30
- "Mixtral 8x7B (Efficient)": {
31
- "model_id": "mistralai/Mixtral-8x7B-Instruct-v0.1",
32
- "description": "Fast processing for large batches",
33
- "default_temp": 0.4,
34
  "max_tokens": 300
35
  }
36
  }
37
 
38
- # Different system prompts for different description styles
39
  PROMPT_TEMPLATES = {
40
  "Clip-Ready Visual (15-30 words)": """You are an expert at writing ultra-concise, visual descriptions for CLIP models and image search.
41
 
@@ -47,14 +47,19 @@ For each business category, create a description that:
47
  5. Describes physical appearance, setting, or visual activity
48
 
49
  Examples:
50
- Category: "Car Rental"
51
  Description: "rental car with keys, parked at pickup location, clean interior visible, rental company signage"
52
 
53
- Category: "Yoga Classes"
54
- Description: "people in yoga poses on mats, stretching in studio with mirrors, instructor demonstrating position"
55
 
56
- IMPORTANT: Respond with ONLY a JSON object:
57
- {"Category": "category name", "Description": "visual description"}""",
 
 
 
 
 
58
 
59
  "Standard Business (40-60 words)": """You are creating professional business descriptions for directory listings.
60
 
@@ -63,140 +68,114 @@ Generate descriptions that:
63
  2. Define the service clearly
64
  3. Include key visual and contextual elements
65
  4. Are suitable for yellow pages or business directories
66
- 5. Focus on what customers would see or experience
67
 
68
- Examples:
69
  Category: "Photography Studio"
70
- Description: "Professional photography space with lighting equipment, backdrops, and cameras. Photographer capturing portraits, events, or products. Studio setup with tripods, reflectors, softboxes. Clients posing for shots, reviewing images on screens. Portfolio displays, editing workstations visible."
71
 
72
  IMPORTANT: Respond with ONLY a JSON object:
73
  {"Category": "category name", "Description": "description text"}""",
74
 
75
- "E-commerce Ready (30-50 words)": """You are writing descriptions optimized for e-commerce and online marketplaces.
76
 
77
- Create descriptions that:
78
- 1. Are 30-50 words
79
- 2. Highlight visual product/service attributes
80
- 3. Include searchable keywords
81
- 4. Focus on customer benefits
82
- 5. Use action-oriented language
83
 
84
- IMPORTANT: Respond with ONLY a JSON object:
85
- {"Category": "category name", "Description": "description text"}""",
86
-
87
- "Custom Prompt": "" # Will be filled by user
88
  }
89
 
90
- class EnhancedDescriptionGenerator:
91
- def __init__(self):
92
- self.clients = {}
93
- self.initialize_clients()
94
 
95
- def initialize_clients(self):
96
- """Initialize all model clients"""
97
- hf_token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_TOKEN")
98
-
99
- if not hf_token:
100
- raise ValueError("HF_TOKEN not found in environment variables")
101
-
102
- for model_name, config in MODEL_CONFIGS.items():
103
- try:
104
- self.clients[model_name] = InferenceClient(
105
- token=hf_token,
106
- model=config["model_id"]
107
- )
108
- print(f"βœ… Initialized: {model_name}")
109
- except Exception as e:
110
- print(f"⚠️ Failed to initialize {model_name}: {str(e)}")
111
- self.clients[model_name] = None
112
 
113
- def extract_json_from_response(self, response_text):
114
- """Enhanced JSON extraction with multiple fallback methods"""
115
- if not response_text:
116
- raise ValueError("Empty response")
117
-
118
- response_text = response_text.strip()
119
-
120
- # Method 1: Clean markdown formatting
121
- if "```json" in response_text:
122
- response_text = response_text.split("```json")[1].split("```")[0].strip()
123
- elif "```" in response_text:
124
- response_text = response_text.split("```")[1].split("```")[0].strip()
125
-
126
- # Method 2: Extract JSON object
127
- if "{" in response_text and "}" in response_text:
128
- start = response_text.find("{")
129
- end = response_text.rfind("}") + 1
130
- json_str = response_text[start:end]
131
- else:
132
- json_str = response_text
133
-
134
  # Try to parse JSON
 
 
 
 
 
 
 
 
135
  try:
136
  parsed = json.loads(json_str)
137
- except json.JSONDecodeError:
138
- # Method 3: Try to fix common JSON issues
139
- json_str = json_str.replace("'", '"') # Replace single quotes
140
- json_str = json_str.replace("\n", " ") # Remove newlines
141
- json_str = json_str.replace("\\", "\\\\") # Escape backslashes
142
- parsed = json.loads(json_str)
143
-
144
- # Validate and extract description
145
- if not isinstance(parsed, dict):
146
- raise ValueError("Response is not a JSON object")
147
-
148
- description = (
149
- parsed.get("Description") or
150
- parsed.get("description") or
151
- parsed.get("Desc") or
152
- parsed.get("desc") or
153
- ""
154
- )
155
-
156
- if not description or len(description.strip()) < 10:
157
- raise ValueError("Description is missing or too short")
158
-
159
- return description.strip()
160
 
161
- def process_single_category(
162
- self,
163
- category,
164
- model_name,
165
- prompt_template,
166
- custom_prompt,
167
- max_tokens,
168
- temperature,
169
- top_p,
170
- retry_count=3
171
- ):
172
- """Process a single category with the selected model and prompt"""
173
-
174
- client = self.clients.get(model_name)
175
- if not client:
176
- return None, f"Model {model_name} not available"
177
-
178
- # Select and prepare the prompt
179
- if prompt_template == "Custom Prompt":
180
- if not custom_prompt:
181
- return None, "Custom prompt is required when 'Custom Prompt' is selected"
182
- system_prompt = custom_prompt
183
- else:
184
- system_prompt = PROMPT_TEMPLATES[prompt_template]
 
 
 
 
 
 
 
 
185
 
 
186
  messages = [
187
  {"role": "system", "content": system_prompt},
188
  {"role": "user", "content": f"Category: \"{category}\""}
189
  ]
190
 
191
- last_error = None
192
-
193
  for attempt in range(retry_count):
194
  try:
195
  if attempt > 0:
196
  time.sleep(1)
197
 
198
- # Make API call
199
  response_text = ""
 
 
200
  for message in client.chat_completion(
201
  messages,
202
  max_tokens=max_tokens,
@@ -212,55 +191,81 @@ class EnhancedDescriptionGenerator:
212
  elif isinstance(message, str):
213
  response_text += message
214
 
215
- # Validate response
216
  if not response_text or len(response_text.strip()) < 5:
217
- raise ValueError("Empty or too short response")
218
 
219
- # Extract description
220
- description = self.extract_json_from_response(response_text)
221
 
222
- # Count words for validation
223
- word_count = len(description.split())
 
 
 
 
 
 
 
224
 
225
- return response_text.strip(), description, word_count
 
 
 
226
 
227
- except Exception as e:
228
- last_error = f"Attempt {attempt + 1}/{retry_count}: {str(e)}"
 
 
 
229
 
230
- # On last attempt, try to use raw response if it looks valid
231
- if attempt == retry_count - 1 and response_text:
232
- if len(response_text.strip()) > 20 and not response_text.startswith("{"):
233
- return response_text.strip(), response_text.strip(), len(response_text.split())
234
-
235
- raise Exception(f"Failed after {retry_count} attempts. Last error: {last_error}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
236
 
237
- def process_csv_advanced(
238
  files,
239
  category_column,
240
  model_name,
241
  prompt_template,
242
- custom_prompt,
243
  max_tokens,
244
  temperature,
245
  top_p,
246
  output_format,
247
  progress=gr.Progress()
248
  ):
249
- """Enhanced CSV processing with multiple models and output formats"""
250
 
251
  if not files or len(files) == 0:
252
  return "Please upload at least one CSV file.", None, None
253
 
254
- # Check for HF token
255
  hf_token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_TOKEN")
256
- if not hf_token:
257
- return "⚠️ Error: HF_TOKEN not found. Please add your Hugging Face token as a Space Secret.", None, None
258
 
259
- # Initialize generator
260
- try:
261
- generator = EnhancedDescriptionGenerator()
262
- except Exception as e:
263
- return f"Error initializing models: {str(e)}", None, None
264
 
265
  all_results = []
266
  status_messages = []
@@ -273,12 +278,13 @@ def process_csv_advanced(
273
  file_name = os.path.basename(file.name)
274
  status_messages.append(f"πŸ“„ Processing file {file_idx + 1}/{len(files)}: {file_name}")
275
 
276
- # Check column exists
277
  if category_column not in df.columns:
278
- status_messages.append(f"⚠️ Column '{category_column}' not found in {file_name}. Available: {', '.join(df.columns)}")
 
279
  continue
280
 
281
- # Process categories
282
  categories = df[category_column].dropna().unique()
283
  total_categories = len(categories)
284
 
@@ -287,63 +293,65 @@ def process_csv_advanced(
287
  for idx, category in enumerate(categories):
288
  progress(
289
  (file_idx * total_categories + idx) / (len(files) * total_categories),
290
- desc=f"File {file_idx + 1}/{len(files)} - Category {idx + 1}/{total_categories}: {category[:30]}..."
291
  )
292
 
293
  try:
294
- raw_response, description, word_count = generator.process_single_category(
295
  category,
296
  model_name,
297
  prompt_template,
298
- custom_prompt,
299
  max_tokens,
300
  temperature,
301
- top_p
 
302
  )
303
 
304
  result = {
305
- "File": file_name,
306
  "Category": category,
307
  "Description": description,
308
- "Word_Count": word_count,
309
- "Model": model_name,
310
- "Prompt_Type": prompt_template,
311
  "Raw_Response": raw_response,
312
  "Status": "Success"
313
  }
314
 
315
  file_results.append(result)
316
  all_results.append(result)
317
- status_messages.append(f"βœ… {category[:30]}... ({word_count} words)")
318
 
319
  except Exception as e:
320
- error_msg = str(e)[:100]
 
 
 
321
  result = {
322
- "File": file_name,
323
  "Category": category,
324
- "Description": f"[FAILED: {error_msg}]",
325
  "Word_Count": 0,
326
- "Model": model_name,
327
- "Prompt_Type": prompt_template,
328
  "Raw_Response": "",
329
- "Status": f"Failed: {error_msg}"
330
  }
331
 
332
  file_results.append(result)
333
  all_results.append(result)
334
- status_messages.append(f"❌ {category[:30]}... - {error_msg}")
335
 
336
  # Rate limiting
337
- time.sleep(0.3)
338
 
339
- # Create output file for this CSV
340
  if file_results:
341
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
342
  base_name = os.path.splitext(file_name)[0]
343
 
 
 
 
344
  if output_format in ["CSV", "Both"]:
345
  csv_filename = f"output_{base_name}_{timestamp}.csv"
346
- pd.DataFrame(file_results).to_csv(csv_filename, index=False)
347
  output_files.append(csv_filename)
348
 
349
  if output_format in ["JSON", "Both"]:
@@ -352,239 +360,173 @@ def process_csv_advanced(
352
  json.dump(file_results, f, indent=2)
353
  output_files.append(json_filename)
354
 
355
- # Summary for this file
356
  success_count = sum(1 for r in file_results if r["Status"] == "Success")
357
  failed_count = len(file_results) - success_count
358
- avg_words = sum(r["Word_Count"] for r in file_results if r["Status"] == "Success") / max(success_count, 1)
359
 
360
  status_messages.append(f"""
361
  πŸ“Š {file_name} Summary:
362
  - Total: {len(file_results)} categories
363
- - Success: {success_count} ({success_count/len(file_results)*100:.1f}%)
364
  - Failed: {failed_count}
365
- - Avg Words: {avg_words:.1f}
366
  """)
367
 
368
  except Exception as e:
369
- status_messages.append(f"❌ Error processing {os.path.basename(file.name)}: {str(e)}")
370
 
371
- # Overall summary
372
  if all_results:
373
  total_success = sum(1 for r in all_results if r["Status"] == "Success")
374
  total_failed = len(all_results) - total_success
375
 
376
- summary = f"""
377
- ## 🎯 Processing Complete!
378
-
379
- **Model Used:** {model_name}
380
- **Prompt Template:** {prompt_template}
381
 
382
- ### Overall Statistics:
383
- - **Total Categories Processed:** {len(all_results)}
384
  - **Successful:** {total_success} ({total_success/len(all_results)*100:.1f}%)
385
- - **Failed:** {total_failed} ({total_failed/len(all_results)*100:.1f}%)
386
- - **Average Word Count:** {sum(r['Word_Count'] for r in all_results if r['Status'] == 'Success') / max(total_success, 1):.1f}
387
 
388
- ### File Processing Log:
389
  """
390
  status_text = summary + "\n".join(status_messages)
391
 
392
- # Create combined output file
393
- if output_format in ["CSV", "Both"]:
394
- combined_csv = f"combined_output_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
395
- pd.DataFrame(all_results).to_csv(combined_csv, index=False)
396
- output_files.append(combined_csv)
397
 
398
- if output_format in ["JSON", "Both"]:
399
- combined_json = f"combined_output_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
400
- with open(combined_json, 'w') as f:
401
- json.dump(all_results, f, indent=2)
402
- output_files.append(combined_json)
403
-
404
- # Create summary DataFrame for display
405
- summary_df = pd.DataFrame(all_results)[['Category', 'Description', 'Word_Count', 'Status']]
406
-
407
- return status_text, output_files, summary_df
408
  else:
409
- return "\n".join(status_messages) + "\n\n⚠️ No categories were processed successfully.", None, None
410
 
411
- # Create enhanced Gradio interface
412
- def create_interface():
413
- with gr.Blocks(title="Enhanced Business Description Generator", theme=gr.themes.Soft()) as demo:
414
- gr.Markdown("""
415
- # πŸš€ Enhanced Multi-Model Business Description Generator
416
-
417
- Generate professional, clip-ready descriptions using multiple state-of-the-art AI models.
418
-
419
- ### ✨ New Features:
420
- - πŸ€– **4 Different AI Models** to choose from
421
- - πŸ“ **Multiple Prompt Templates** for different use cases
422
- - πŸ“Š **Word Count Tracking** for all descriptions
423
- - πŸ’Ύ **CSV & JSON Export** options
424
- - πŸ“ˆ **Enhanced Statistics** and success tracking
425
- """)
426
-
427
- with gr.Row():
428
- with gr.Column(scale=1):
429
- gr.Markdown("### πŸ“€ Input Configuration")
430
-
431
- files_input = gr.File(
432
- label="Upload CSV Files",
433
- file_count="multiple",
434
- file_types=[".csv"]
435
- )
436
-
437
- category_column = gr.Textbox(
438
- label="Category Column Name",
439
- value="category",
440
- placeholder="Column containing categories"
441
- )
442
-
443
- gr.Markdown("### πŸ€– Model Selection")
444
-
445
- model_selector = gr.Dropdown(
446
- label="Select AI Model",
447
- choices=list(MODEL_CONFIGS.keys()),
448
- value=list(MODEL_CONFIGS.keys())[0],
449
- info="Each model has different strengths"
450
- )
451
-
452
- # Model description display
453
- model_info = gr.Markdown("")
454
-
455
- prompt_template = gr.Dropdown(
456
- label="Prompt Template",
457
- choices=list(PROMPT_TEMPLATES.keys()),
458
- value="Clip-Ready Visual (15-30 words)",
459
- info="Choose based on your use case"
460
- )
461
-
462
- custom_prompt = gr.Textbox(
463
- label="Custom System Prompt (if Custom selected)",
464
- placeholder="Enter your custom instructions here...",
465
- lines=4,
466
- visible=False
467
- )
468
-
469
- gr.Markdown("### βš™οΈ Generation Settings")
470
-
471
- with gr.Row():
472
- temperature = gr.Slider(
473
- minimum=0.1,
474
- maximum=1.0,
475
- value=0.3,
476
- step=0.05,
477
- label="Temperature",
478
- info="Lower = consistent, Higher = creative"
479
- )
480
-
481
- top_p = gr.Slider(
482
- minimum=0.1,
483
- maximum=1.0,
484
- value=0.9,
485
- step=0.05,
486
- label="Top-p"
487
- )
488
-
489
- max_tokens = gr.Slider(
490
- minimum=64,
491
- maximum=512,
492
- value=256,
493
- step=16,
494
- label="Max Tokens"
495
- )
496
-
497
- output_format = gr.Radio(
498
- label="Output Format",
499
- choices=["CSV", "JSON", "Both"],
500
- value="CSV"
501
- )
502
-
503
- process_btn = gr.Button("πŸš€ Generate Descriptions", variant="primary", size="lg")
504
 
505
- with gr.Column(scale=2):
506
- gr.Markdown("### πŸ“Š Results")
507
-
508
- status_output = gr.Markdown(label="Processing Status")
509
-
510
- results_preview = gr.Dataframe(
511
- label="Results Preview",
512
- headers=["Category", "Description", "Word_Count", "Status"],
513
- datatype=["str", "str", "number", "str"],
514
- col_count=4,
515
- wrap=True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
516
  )
517
 
518
- files_output = gr.File(
519
- label="πŸ“₯ Download Output Files",
520
- file_count="multiple"
 
 
 
521
  )
522
-
523
- with gr.Row():
524
- gr.Markdown("""
525
- ### πŸ’‘ Model Recommendations:
526
 
527
- | Model | Best For | Speed | Quality |
528
- |-------|----------|-------|---------|
529
- | **Llama 3.1 70B** | Creative, detailed descriptions | Medium | ⭐⭐⭐⭐⭐ |
530
- | **Qwen 2.5 72B** | Balanced performance | Fast | ⭐⭐⭐⭐ |
531
- | **GPT-OSS 20B** | Structured, consistent output | Fast | ⭐⭐⭐⭐ |
532
- | **Mixtral 8x7B** | Large batch processing | Very Fast | ⭐⭐⭐ |
 
533
 
534
- ### πŸ“ Template Guide:
535
- - **Clip-Ready Visual**: 15-30 words, focus on visual elements only
536
- - **Standard Business**: 40-60 words, comprehensive directory descriptions
537
- - **E-commerce Ready**: 30-50 words, optimized for online marketplaces
538
- - **Custom Prompt**: Use your own instructions for specific needs
539
- """)
540
-
541
- # Update model info when selection changes
542
- def update_model_info(model_name):
543
- config = MODEL_CONFIGS[model_name]
544
- return f"ℹ️ **{config['description']}**\nRecommended temp: {config['default_temp']}"
545
-
546
- model_selector.change(
547
- update_model_info,
548
- inputs=[model_selector],
549
- outputs=[model_info]
550
- )
551
-
552
- # Show/hide custom prompt field
553
- def toggle_custom_prompt(template):
554
- return gr.update(visible=(template == "Custom Prompt"))
555
-
556
- prompt_template.change(
557
- toggle_custom_prompt,
558
- inputs=[prompt_template],
559
- outputs=[custom_prompt]
560
- )
561
 
562
- # Process button click
563
- process_btn.click(
564
- fn=process_csv_advanced,
565
- inputs=[
566
- files_input,
567
- category_column,
568
- model_selector,
569
- prompt_template,
570
- custom_prompt,
571
- max_tokens,
572
- temperature,
573
- top_p,
574
- output_format
575
- ],
576
- outputs=[status_output, files_output, results_preview]
577
- )
 
 
 
 
 
 
 
 
 
 
578
 
579
- # Set initial model info
580
- demo.load(
581
- update_model_info,
582
- inputs=[model_selector],
583
- outputs=[model_info]
584
- )
585
 
586
- return demo
 
 
 
 
 
 
 
 
 
 
 
 
 
 
587
 
588
  if __name__ == "__main__":
589
- demo = create_interface()
590
  demo.launch()
 
7
  from datetime import datetime
8
  import traceback
9
 
10
+ # Working model configurations - These are verified to work with HF Inference API
11
  MODEL_CONFIGS = {
12
+ "GPT-OSS 20B (Reliable)": {
13
+ "model_id": "openai/gpt-oss-20b",
14
+ "description": "Your current model - reliable for structured output",
15
  "default_temp": 0.3,
16
+ "max_tokens": 256
17
+ },
18
+ "Mistral 7B Instruct (Fast)": {
19
+ "model_id": "mistralai/Mistral-7B-Instruct-v0.2",
20
+ "description": "Fast and efficient, good for large batches",
21
+ "default_temp": 0.4,
22
  "max_tokens": 300
23
  },
24
+ "Zephyr 7B Beta (Quality)": {
25
+ "model_id": "HuggingFaceH4/zephyr-7b-beta",
26
+ "description": "Good balance of quality and speed",
27
  "default_temp": 0.35,
28
  "max_tokens": 300
29
  },
30
+ "OpenChat 3.5 (Creative)": {
31
+ "model_id": "openchat/openchat-3.5-0106",
32
+ "description": "More creative descriptions",
33
+ "default_temp": 0.5,
 
 
 
 
 
 
34
  "max_tokens": 300
35
  }
36
  }
37
 
38
+ # Enhanced prompt templates for better clip-ready descriptions
39
  PROMPT_TEMPLATES = {
40
  "Clip-Ready Visual (15-30 words)": """You are an expert at writing ultra-concise, visual descriptions for CLIP models and image search.
41
 
 
47
  5. Describes physical appearance, setting, or visual activity
48
 
49
  Examples:
50
+ Category: "Car Rental For Self Driven"
51
  Description: "rental car with keys, parked at pickup location, clean interior visible, rental company signage"
52
 
53
+ Category: "Mehandi"
54
+ Description: "henna artwork on hands, intricate patterns being applied, cones and design templates visible"
55
 
56
+ Category: "Photographer"
57
+ Description: "person with camera shooting, tripods and lighting equipment, studio setup with backdrops"
58
+
59
+ IMPORTANT: Respond with ONLY a JSON object in this exact format:
60
+ {"Category": "category name", "Description": "visual description"}
61
+
62
+ Do not include any other text, explanations, or markdown formatting.""",
63
 
64
  "Standard Business (40-60 words)": """You are creating professional business descriptions for directory listings.
65
 
 
68
  2. Define the service clearly
69
  3. Include key visual and contextual elements
70
  4. Are suitable for yellow pages or business directories
 
71
 
72
+ Example format:
73
  Category: "Photography Studio"
74
+ Description: "Professional photography space with lighting equipment, backdrops, and cameras. Photographer capturing portraits, events, or products. Studio setup with tripods, reflectors, softboxes. Clients posing for shots, reviewing images on screens."
75
 
76
  IMPORTANT: Respond with ONLY a JSON object:
77
  {"Category": "category name", "Description": "description text"}""",
78
 
79
+ "Your Original Prompt": """You are an expert at writing clear and visual descriptions for a business category keyword for a yellow pages or business listing website. Given a category keyword, generate a single, detailed description that defines its key visual elements, location, and context. Do not add artistic or stylistic flair. Ensure that the description is CLIP model ready and not too verbose.
80
 
81
+ IMPORTANT: You must respond with ONLY a valid JSON object in this exact format:
82
+ {"Category": "category name", "Description": "description text"}
 
 
 
 
83
 
84
+ Do not include any other text, explanations, or markdown formatting. Only output the JSON object."""
 
 
 
85
  }
86
 
87
+ def extract_json_from_response(response_text):
88
+ """Enhanced JSON extraction with better error handling"""
89
+ if not response_text:
90
+ raise ValueError("Empty response")
91
 
92
+ response_text = response_text.strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
 
94
+ # Clean markdown formatting
95
+ if "```json" in response_text:
96
+ response_text = response_text.split("```json")[1].split("```")[0].strip()
97
+ elif "```" in response_text:
98
+ response_text = response_text.split("```")[1].split("```")[0].strip()
99
+
100
+ # Find JSON object
101
+ if "{" in response_text and "}" in response_text:
102
+ start = response_text.find("{")
103
+ end = response_text.rfind("}") + 1
104
+ json_str = response_text[start:end]
105
+ else:
106
+ json_str = response_text
107
+
108
+ try:
 
 
 
 
 
 
109
  # Try to parse JSON
110
+ parsed = json.loads(json_str)
111
+ except json.JSONDecodeError as e:
112
+ # Try to fix common issues
113
+ json_str = json_str.replace("'", '"')
114
+ json_str = json_str.replace("\n", " ")
115
+ json_str = json_str.replace("\t", " ")
116
+
117
+ # Try again
118
  try:
119
  parsed = json.loads(json_str)
120
+ except:
121
+ # Last resort - try to extract description from raw text
122
+ if "description" in response_text.lower():
123
+ # Try to find the description part
124
+ lines = response_text.split('\n')
125
+ for line in lines:
126
+ if 'description' in line.lower() and ':' in line:
127
+ desc = line.split(':', 1)[1].strip().strip('"').strip("'")
128
+ if len(desc) > 10:
129
+ return desc
130
+ raise ValueError(f"Cannot parse JSON: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
131
 
132
+ # Extract description
133
+ description = (
134
+ parsed.get("Description") or
135
+ parsed.get("description") or
136
+ parsed.get("Desc") or
137
+ parsed.get("desc") or
138
+ ""
139
+ )
140
+
141
+ if not description or len(description.strip()) < 10:
142
+ raise ValueError("Description is missing or too short")
143
+
144
+ return description.strip()
145
+
146
+ def process_single_category_with_fallback(
147
+ category,
148
+ model_name,
149
+ prompt_template,
150
+ max_tokens,
151
+ temperature,
152
+ top_p,
153
+ hf_token,
154
+ retry_count=3
155
+ ):
156
+ """Process with fallback to working model if primary fails"""
157
+
158
+ # Try primary model
159
+ try:
160
+ client = InferenceClient(
161
+ token=hf_token,
162
+ model=MODEL_CONFIGS[model_name]["model_id"]
163
+ )
164
 
165
+ system_prompt = PROMPT_TEMPLATES[prompt_template]
166
  messages = [
167
  {"role": "system", "content": system_prompt},
168
  {"role": "user", "content": f"Category: \"{category}\""}
169
  ]
170
 
 
 
171
  for attempt in range(retry_count):
172
  try:
173
  if attempt > 0:
174
  time.sleep(1)
175
 
 
176
  response_text = ""
177
+
178
+ # Try streaming
179
  for message in client.chat_completion(
180
  messages,
181
  max_tokens=max_tokens,
 
191
  elif isinstance(message, str):
192
  response_text += message
193
 
 
194
  if not response_text or len(response_text.strip()) < 5:
195
+ raise ValueError("Empty response")
196
 
197
+ description = extract_json_from_response(response_text)
198
+ return response_text.strip(), description, model_name
199
 
200
+ except Exception as e:
201
+ if attempt == retry_count - 1:
202
+ raise e
203
+
204
+ except Exception as primary_error:
205
+ # Fallback to GPT-OSS-20B which we know works
206
+ if model_name != "GPT-OSS 20B (Reliable)":
207
+ try:
208
+ print(f"Primary model failed, falling back to GPT-OSS-20B: {str(primary_error)[:100]}")
209
 
210
+ client = InferenceClient(
211
+ token=hf_token,
212
+ model="openai/gpt-oss-20b"
213
+ )
214
 
215
+ system_prompt = PROMPT_TEMPLATES[prompt_template]
216
+ messages = [
217
+ {"role": "system", "content": system_prompt},
218
+ {"role": "user", "content": f"Category: \"{category}\""}
219
+ ]
220
 
221
+ response_text = ""
222
+ for message in client.chat_completion(
223
+ messages,
224
+ max_tokens=max_tokens,
225
+ stream=True,
226
+ temperature=temperature,
227
+ top_p=top_p,
228
+ ):
229
+ if hasattr(message, 'choices') and len(message.choices) > 0:
230
+ if hasattr(message.choices[0], 'delta') and hasattr(message.choices[0].delta, 'content'):
231
+ token = message.choices[0].delta.content
232
+ if token:
233
+ response_text += token
234
+ elif isinstance(message, str):
235
+ response_text += message
236
+
237
+ if response_text:
238
+ description = extract_json_from_response(response_text)
239
+ return response_text.strip(), description, "GPT-OSS-20B (Fallback)"
240
+
241
+ except Exception as fallback_error:
242
+ raise Exception(f"Both primary and fallback failed. Primary: {str(primary_error)[:100]}, Fallback: {str(fallback_error)[:100]}")
243
+ else:
244
+ raise primary_error
245
 
246
+ def process_csv_enhanced(
247
  files,
248
  category_column,
249
  model_name,
250
  prompt_template,
 
251
  max_tokens,
252
  temperature,
253
  top_p,
254
  output_format,
255
  progress=gr.Progress()
256
  ):
257
+ """Enhanced processing with better error messages and fallbacks"""
258
 
259
  if not files or len(files) == 0:
260
  return "Please upload at least one CSV file.", None, None
261
 
262
+ # Get HF token
263
  hf_token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_TOKEN")
 
 
264
 
265
+ if not hf_token:
266
+ return """⚠️ Error: HF_TOKEN not found. Please add your Hugging Face token as a Space Secret.
267
+
268
+ Go to: Space Settings β†’ Secrets β†’ Add 'HF_TOKEN'""", None, None
 
269
 
270
  all_results = []
271
  status_messages = []
 
278
  file_name = os.path.basename(file.name)
279
  status_messages.append(f"πŸ“„ Processing file {file_idx + 1}/{len(files)}: {file_name}")
280
 
281
+ # Check column
282
  if category_column not in df.columns:
283
+ available_cols = ', '.join(df.columns[:5])
284
+ status_messages.append(f"⚠️ Column '{category_column}' not found. Available: {available_cols}")
285
  continue
286
 
287
+ # Get unique categories
288
  categories = df[category_column].dropna().unique()
289
  total_categories = len(categories)
290
 
 
293
  for idx, category in enumerate(categories):
294
  progress(
295
  (file_idx * total_categories + idx) / (len(files) * total_categories),
296
+ desc=f"Processing: {category[:30]}..."
297
  )
298
 
299
  try:
300
+ raw_response, description, used_model = process_single_category_with_fallback(
301
  category,
302
  model_name,
303
  prompt_template,
 
304
  max_tokens,
305
  temperature,
306
+ top_p,
307
+ hf_token
308
  )
309
 
310
  result = {
 
311
  "Category": category,
312
  "Description": description,
313
+ "Word_Count": len(description.split()),
314
+ "Model_Used": used_model,
 
315
  "Raw_Response": raw_response,
316
  "Status": "Success"
317
  }
318
 
319
  file_results.append(result)
320
  all_results.append(result)
321
+ status_messages.append(f"βœ… {category[:30]}... ({len(description.split())} words)")
322
 
323
  except Exception as e:
324
+ error_msg = str(e)
325
+ if "Request ID" in error_msg:
326
+ error_msg = "API Error - Try lowering temperature or using GPT-OSS model"
327
+
328
  result = {
 
329
  "Category": category,
330
+ "Description": f"[FAILED: {error_msg[:100]}]",
331
  "Word_Count": 0,
332
+ "Model_Used": model_name,
 
333
  "Raw_Response": "",
334
+ "Status": f"Failed"
335
  }
336
 
337
  file_results.append(result)
338
  all_results.append(result)
339
+ status_messages.append(f"❌ {category[:30]}... - {error_msg[:50]}")
340
 
341
  # Rate limiting
342
+ time.sleep(0.5)
343
 
344
+ # Save output files
345
  if file_results:
346
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
347
  base_name = os.path.splitext(file_name)[0]
348
 
349
+ # Create DataFrame
350
+ output_df = pd.DataFrame(file_results)
351
+
352
  if output_format in ["CSV", "Both"]:
353
  csv_filename = f"output_{base_name}_{timestamp}.csv"
354
+ output_df.to_csv(csv_filename, index=False)
355
  output_files.append(csv_filename)
356
 
357
  if output_format in ["JSON", "Both"]:
 
360
  json.dump(file_results, f, indent=2)
361
  output_files.append(json_filename)
362
 
363
+ # Summary
364
  success_count = sum(1 for r in file_results if r["Status"] == "Success")
365
  failed_count = len(file_results) - success_count
 
366
 
367
  status_messages.append(f"""
368
  πŸ“Š {file_name} Summary:
369
  - Total: {len(file_results)} categories
370
+ - Success: {success_count} ({success_count/max(len(file_results),1)*100:.1f}%)
371
  - Failed: {failed_count}
 
372
  """)
373
 
374
  except Exception as e:
375
+ status_messages.append(f"❌ Error processing {file_name}: {str(e)}")
376
 
377
+ # Create summary
378
  if all_results:
379
  total_success = sum(1 for r in all_results if r["Status"] == "Success")
380
  total_failed = len(all_results) - total_success
381
 
382
+ summary = f"""## 🎯 Processing Complete!
 
 
 
 
383
 
384
+ ### Statistics:
385
+ - **Total Processed:** {len(all_results)} categories
386
  - **Successful:** {total_success} ({total_success/len(all_results)*100:.1f}%)
387
+ - **Failed:** {total_failed}
 
388
 
389
+ ### Details:
390
  """
391
  status_text = summary + "\n".join(status_messages)
392
 
393
+ # Create preview DataFrame
394
+ preview_df = pd.DataFrame(all_results)[['Category', 'Description', 'Word_Count', 'Status']][:20]
 
 
 
395
 
396
+ return status_text, output_files, preview_df
 
 
 
 
 
 
 
 
 
397
  else:
398
+ return "\n".join(status_messages), None, None
399
 
400
+ # Create Gradio interface
401
+ with gr.Blocks(title="Multi-Model Business Description Generator", theme=gr.themes.Soft()) as demo:
402
+ gr.Markdown("""
403
+ # πŸš€ Multi-Model Business Description Generator
404
+
405
+ Generate CLIP-ready visual descriptions using multiple AI models.
406
+
407
+ ### Features:
408
+ - πŸ€– **4 Different Models** - Choose the best for your needs
409
+ - πŸ“ **3 Prompt Templates** - Optimized for different use cases
410
+ - πŸ”„ **Automatic Fallback** - Falls back to GPT-OSS if primary model fails
411
+ - πŸ’Ύ **CSV & JSON Export** - Multiple output formats
412
+ """)
413
+
414
+ with gr.Row():
415
+ with gr.Column(scale=1):
416
+ gr.Markdown("### πŸ“€ Input")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
417
 
418
+ files_input = gr.File(
419
+ label="Upload CSV Files",
420
+ file_count="multiple",
421
+ file_types=[".csv"]
422
+ )
423
+
424
+ category_column = gr.Textbox(
425
+ label="Category Column Name",
426
+ value="category",
427
+ placeholder="Column name containing categories"
428
+ )
429
+
430
+ gr.Markdown("### πŸ€– Model Configuration")
431
+
432
+ model_selector = gr.Dropdown(
433
+ label="Select Model",
434
+ choices=list(MODEL_CONFIGS.keys()),
435
+ value="GPT-OSS 20B (Reliable)",
436
+ info="GPT-OSS is most reliable, others may require fallback"
437
+ )
438
+
439
+ prompt_template = gr.Dropdown(
440
+ label="Prompt Template",
441
+ choices=list(PROMPT_TEMPLATES.keys()),
442
+ value="Your Original Prompt",
443
+ info="Choose based on desired output style"
444
+ )
445
+
446
+ gr.Markdown("### βš™οΈ Settings")
447
+
448
+ with gr.Row():
449
+ temperature = gr.Slider(
450
+ minimum=0.1,
451
+ maximum=1.0,
452
+ value=0.3,
453
+ step=0.05,
454
+ label="Temperature",
455
+ info="Lower = consistent"
456
  )
457
 
458
+ top_p = gr.Slider(
459
+ minimum=0.1,
460
+ maximum=1.0,
461
+ value=0.9,
462
+ step=0.05,
463
+ label="Top-p"
464
  )
 
 
 
 
465
 
466
+ max_tokens = gr.Slider(
467
+ minimum=64,
468
+ maximum=512,
469
+ value=256,
470
+ step=16,
471
+ label="Max Tokens"
472
+ )
473
 
474
+ output_format = gr.Radio(
475
+ label="Output Format",
476
+ choices=["CSV", "JSON", "Both"],
477
+ value="CSV"
478
+ )
479
+
480
+ process_btn = gr.Button("πŸš€ Generate Descriptions", variant="primary", size="lg")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
481
 
482
+ with gr.Column(scale=2):
483
+ gr.Markdown("### πŸ“Š Results")
484
+
485
+ status_output = gr.Markdown(
486
+ value="Results will appear here...",
487
+ label="Status"
488
+ )
489
+
490
+ results_preview = gr.Dataframe(
491
+ label="Preview (First 20 Results)",
492
+ headers=["Category", "Description", "Word_Count", "Status"],
493
+ wrap=True
494
+ )
495
+
496
+ files_output = gr.File(
497
+ label="πŸ“₯ Download Output Files",
498
+ file_count="multiple"
499
+ )
500
+
501
+ with gr.Row():
502
+ gr.Markdown("""
503
+ ### πŸ’‘ Tips:
504
+ - **GPT-OSS 20B** is the most reliable model
505
+ - Use **Temperature 0.2-0.4** for consistent results
506
+ - **Clip-Ready** template gives 15-30 word descriptions
507
+ - If a model fails, it automatically falls back to GPT-OSS
508
 
509
+ ### ⚠️ Troubleshooting:
510
+ - **API Errors**: Try using GPT-OSS 20B model
511
+ - **Failed Categories**: Lower temperature to 0.2
512
+ - **Empty Responses**: Check your HF_TOKEN is valid
513
+ """)
 
514
 
515
+ # Process button
516
+ process_btn.click(
517
+ fn=process_csv_enhanced,
518
+ inputs=[
519
+ files_input,
520
+ category_column,
521
+ model_selector,
522
+ prompt_template,
523
+ max_tokens,
524
+ temperature,
525
+ top_p,
526
+ output_format
527
+ ],
528
+ outputs=[status_output, files_output, results_preview]
529
+ )
530
 
531
  if __name__ == "__main__":
 
532
  demo.launch()