piyushdev commited on
Commit
d7df1b6
Β·
verified Β·
1 Parent(s): ad19ccb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +509 -261
app.py CHANGED
@@ -5,338 +5,586 @@ import json
5
  import os
6
  import time
7
  from datetime import datetime
 
8
 
9
- # Custom system instructions for business category descriptions
10
- SYSTEM_INSTRUCTIONS = """You are an expert at writing clear and visual descriptions for a business category keyword for a yellow pages or business listing website. Given a category keyword, generate a single, detailed description that defines its key visual elements, location, and context. Do not add artistic or stylistic flair. Ensure that the description is CLIP model ready and not too verbose.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
- Here are some examples of the correct format:
 
 
13
 
14
- Category: "Car Rental For Self Driven"
 
 
 
 
 
15
 
16
- Description: "a car available for self-drive rental, parked at a pickup spot without a chauffeur; looks travel-ready, clean, well-maintained, keys handed over to customer"
 
 
17
 
18
- Category: "Mehandi"
 
19
 
20
- Description: "Temporary henna artwork applied on hands and feet using cones; fine brown or maroon floral and paisley patterns, mandalas, and lace-like detailing, commonly seen at weddings and festivals."
 
21
 
22
- Category: "Photographer"
23
 
24
- Description: "a person actively shooting photos or posing with a camera; holding a camera to eye, adjusting lens, or directing a subject during a shoot"
 
 
 
 
 
25
 
26
- Category: "Equipment"
 
 
27
 
28
- Description: "lighting stands, softboxes, strobes, tripods, reflectors, gimbals, battery packs, memory cards arranged as gear kits"
 
29
 
30
- ---
31
 
32
- IMPORTANT: You must respond with ONLY a valid JSON object in this exact format:
33
- {"Category": "category name", "Description": "description text"}
 
 
 
 
34
 
35
- Do not include any other text, explanations, or markdown formatting. Only output the JSON object."""
 
36
 
 
 
37
 
38
- def extract_json_from_response(response_text):
39
- """Extract and validate JSON from model response."""
40
- # Try to find JSON in the response
41
- response_text = response_text.strip()
42
 
43
- # Remove markdown code blocks if present
44
- if "```json" in response_text:
45
- response_text = response_text.split("```json")[1].split("```")[0].strip()
46
- elif "```" in response_text:
47
- response_text = response_text.split("```")[1].split("```")[0].strip()
48
-
49
- # Try to find JSON object in the text
50
- if "{" in response_text and "}" in response_text:
51
- start = response_text.find("{")
52
- end = response_text.rfind("}") + 1
53
- response_text = response_text[start:end]
54
-
55
- # Parse JSON
56
- parsed = json.loads(response_text)
57
-
58
- # Validate structure
59
- if not isinstance(parsed, dict):
60
- raise ValueError("Response is not a JSON object")
61
-
62
- # Get description with various possible keys
63
- description = (
64
- parsed.get("Description") or
65
- parsed.get("description") or
66
- parsed.get("desc") or
67
- ""
68
- )
69
-
70
- if not description or len(description.strip()) < 10:
71
- raise ValueError("Description is missing or too short")
72
-
73
- return description.strip()
74
-
75
-
76
- def process_single_category(category, client, max_tokens, temperature, top_p, retry_count=3):
77
- """Process a single category keyword and return the description with retry logic."""
78
- messages = [
79
- {"role": "system", "content": SYSTEM_INSTRUCTIONS},
80
- {"role": "user", "content": f"Category: \"{category}\""}
81
- ]
82
-
83
- last_error = None
84
 
85
- for attempt in range(retry_count):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
  try:
87
- # Add small delay between retries
88
- if attempt > 0:
89
- time.sleep(1)
90
-
91
- # Try streaming approach (more reliable for this model)
92
- response_text = ""
93
- for message in client.chat_completion(
94
- messages,
95
- max_tokens=max_tokens,
96
- stream=True,
97
- temperature=temperature,
98
- top_p=top_p,
99
- ):
100
- if hasattr(message, 'choices') and len(message.choices) > 0:
101
- if hasattr(message.choices[0], 'delta') and hasattr(message.choices[0].delta, 'content'):
102
- token = message.choices[0].delta.content
103
- if token:
104
- response_text += token
105
- elif isinstance(message, str):
106
- response_text += message
107
-
108
- # Validate we got a response
109
- if not response_text or len(response_text.strip()) < 5:
110
- raise ValueError("Empty or too short response from model")
111
-
112
- # Extract and validate JSON
113
- description = extract_json_from_response(response_text)
114
-
115
- # Return both the description and raw response
116
- return response_text.strip(), description
117
-
118
- except json.JSONDecodeError as e:
119
- last_error = f"JSON parsing failed (attempt {attempt + 1}/{retry_count}): {str(e)}"
120
- # If JSON parsing fails, try to extract description from raw text
121
- if attempt == retry_count - 1 and response_text:
122
- # Last attempt - try to use raw response if it looks like a description
123
- if len(response_text.strip()) > 20 and not response_text.startswith("{"):
124
- return response_text.strip(), response_text.strip()
125
- except Exception as e:
126
- last_error = f"Processing failed (attempt {attempt + 1}/{retry_count}): {str(e)}"
127
 
128
- # All retries failed
129
- raise Exception(f"Failed after {retry_count} attempts. Last error: {last_error}")
130
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
 
132
- def process_csv_files(
133
  files,
134
  category_column,
 
 
 
135
  max_tokens,
136
  temperature,
137
  top_p,
 
138
  progress=gr.Progress()
139
  ):
140
- """
141
- Process multiple CSV files and generate descriptions for category keywords.
142
- """
143
  if not files or len(files) == 0:
144
- return "Please upload at least one CSV file.", None
145
 
146
- # Get HF token from environment variables
147
- import os
148
  hf_token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_TOKEN")
149
-
150
  if not hf_token:
151
- return "❌ Error: HF_TOKEN not found. Please add your Hugging Face token as a Space Secret.\n\nGo to Space Settings β†’ Secrets β†’ Add 'HF_TOKEN'", None
152
 
153
- client = InferenceClient(token=hf_token, model="openai/gpt-oss-20b")
 
 
 
 
154
 
155
- output_files = []
156
  status_messages = []
 
157
 
158
  for file_idx, file in enumerate(files):
159
  try:
160
- # Read CSV file
161
  df = pd.read_csv(file.name)
162
- status_messages.append(f"πŸ“„ Processing file {file_idx + 1}/{len(files)}: {os.path.basename(file.name)}")
 
163
 
164
- # Check if category column exists
165
  if category_column not in df.columns:
166
- status_messages.append(f"⚠️ Warning: Column '{category_column}' not found in {os.path.basename(file.name)}. Available columns: {', '.join(df.columns)}")
167
  continue
168
 
169
- # Process each category
170
- descriptions = []
171
- raw_responses = []
172
-
173
  categories = df[category_column].dropna().unique()
174
  total_categories = len(categories)
175
 
 
 
176
  for idx, category in enumerate(categories):
177
- progress((file_idx * total_categories + idx) / (len(files) * total_categories),
178
- desc=f"Processing category {idx + 1}/{total_categories} in file {file_idx + 1}")
 
 
179
 
180
  try:
181
- # Process with retry logic
182
- raw_response, description = process_single_category(
183
- category, client, max_tokens, temperature, top_p, retry_count=3
 
 
 
 
 
184
  )
185
 
186
- # Validate description
187
- if not description or len(description.strip()) < 10:
188
- raise ValueError("Description is too short or empty")
189
-
190
- descriptions.append({
191
  "Category": category,
192
  "Description": description,
 
 
 
193
  "Raw_Response": raw_response,
194
  "Status": "Success"
195
- })
196
 
197
- status_messages.append(f"βœ… Processed: {category}")
 
 
198
 
199
  except Exception as e:
200
- error_msg = str(e)
201
- status_messages.append(f"⚠️ Error processing '{category}': {error_msg}")
202
-
203
- descriptions.append({
204
  "Category": category,
205
- "Description": f"[FAILED - {error_msg[:100]}]",
 
 
 
206
  "Raw_Response": "",
207
- "Status": "Failed"
208
- })
 
 
 
 
209
 
210
- # Small delay to avoid rate limiting
211
- time.sleep(0.5)
212
-
213
- # Create output dataframe
214
- output_df = pd.DataFrame(descriptions)
215
 
216
- # Save to file
217
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
218
- base_name = os.path.splitext(os.path.basename(file.name))[0]
219
- output_filename = f"output_{base_name}_{timestamp}.csv"
220
- output_df.to_csv(output_filename, index=False)
221
- output_files.append(output_filename)
 
 
 
 
 
 
 
 
 
222
 
223
- # Count successes and failures
224
- success_count = len([d for d in descriptions if d.get("Status") == "Success"])
225
- failed_count = len([d for d in descriptions if d.get("Status") == "Failed"])
 
226
 
227
- status_messages.append(f"βœ… Completed: {success_count} succeeded, {failed_count} failed out of {len(descriptions)} categories from {os.path.basename(file.name)}")
 
 
 
 
 
 
228
 
229
  except Exception as e:
230
  status_messages.append(f"❌ Error processing {os.path.basename(file.name)}: {str(e)}")
231
 
232
- status_text = "\n".join(status_messages)
233
-
234
- if output_files:
235
- return status_text, output_files
236
- else:
237
- return status_text + "\n\n❌ No output files generated.", None
 
238
 
 
 
239
 
240
- # Create Gradio interface
241
- with gr.Blocks(title="Business Category Description Generator") as demo:
242
- gr.Markdown("""
243
- # 🏒 Business Category Description Generator
244
-
245
- Upload CSV files containing business category keywords, and this app will generate
246
- CLIP-ready visual descriptions for each category using AI.
247
-
248
- **Instructions:**
249
- 1. Upload one or more CSV files
250
- 2. Specify the column name that contains the category keywords
251
- 3. Adjust model settings (lower temperature = more consistent output)
252
- 4. Click "Process Files" to generate descriptions
253
- 5. Download the output CSV files with Status column
254
-
255
- **Features:**
256
- - βœ… Automatic retry logic (3 attempts per category)
257
- - βœ… JSON validation and error recovery
258
- - βœ… Progress tracking with detailed status
259
- - βœ… Success/failure reporting
260
-
261
- *Note: For faster processing, use Zero GPU (see Space Settings). Authentication via HF_TOKEN secret.*
262
- """)
263
-
264
- with gr.Row():
265
- with gr.Column(scale=1):
266
- gr.Markdown("### βš™οΈ Model Settings")
267
- max_tokens = gr.Slider(
268
- minimum=64,
269
- maximum=512,
270
- value=256,
271
- step=16,
272
- label="Max Tokens"
273
- )
274
- temperature = gr.Slider(
275
- minimum=0.1,
276
- maximum=1.0,
277
- value=0.3,
278
- step=0.1,
279
- label="Temperature",
280
- info="Lower = more consistent output"
281
- )
282
- top_p = gr.Slider(
283
- minimum=0.1,
284
- maximum=1.0,
285
- value=0.9,
286
- step=0.05,
287
- label="Top-p"
288
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
289
 
290
- with gr.Column(scale=2):
291
- files_input = gr.File(
292
- label="πŸ“€ Upload CSV Files",
293
- file_count="multiple",
294
- file_types=[".csv"]
295
- )
296
- category_column = gr.Textbox(
297
- label="πŸ“ Category Column Name",
298
- value="category",
299
- placeholder="Enter the name of the column containing categories"
300
- )
301
- process_btn = gr.Button("πŸš€ Process Files", variant="primary", size="lg")
302
 
303
- status_output = gr.Textbox(
304
- label="πŸ“Š Status",
305
- lines=10,
306
- interactive=False
307
- )
308
- files_output = gr.File(
309
- label="πŸ’Ύ Download Output Files",
310
- file_count="multiple"
311
- )
312
-
313
- process_btn.click(
314
- fn=process_csv_files,
315
- inputs=[
316
- files_input,
317
- category_column,
318
- max_tokens,
319
- temperature,
320
- top_p
321
- ],
322
- outputs=[status_output, files_output]
323
- )
324
-
325
- gr.Markdown("""
326
- ---
327
- ### πŸ“ Output Format
328
- Each output CSV file will contain:
329
- - **Category**: The original category keyword
330
- - **Description**: The generated visual description (validated and cleaned)
331
- - **Raw_Response**: The complete model response (for debugging)
332
- - **Status**: Success or Failed (with error details)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
333
 
334
- πŸ’‘ **Tips for Best Results:**
335
- - Use Temperature 0.2-0.4 for consistent, focused descriptions
336
- - Use Temperature 0.6-0.8 for more creative variations
337
- - Failed categories are marked clearly - you can reprocess them separately
338
- - Zero GPU acceleration: Add @spaces.GPU decorator or enable in Space Settings
339
- """)
340
 
341
  if __name__ == "__main__":
342
- demo.launch()
 
 
5
  import os
6
  import time
7
  from datetime import datetime
8
+ import traceback
9
 
10
+ # Model configurations with their strengths
11
+ MODEL_CONFIGS = {
12
+ "Meta Llama 3.1 70B Instruct (Best Quality)": {
13
+ "model_id": "meta-llama/Meta-Llama-3.1-70B-Instruct",
14
+ "description": "Excellent for creative and detailed descriptions",
15
+ "default_temp": 0.3,
16
+ "max_tokens": 300
17
+ },
18
+ "Qwen 2.5 72B Instruct (Fast & Accurate)": {
19
+ "model_id": "Qwen/Qwen2.5-72B-Instruct",
20
+ "description": "Great balance of speed and quality",
21
+ "default_temp": 0.35,
22
+ "max_tokens": 300
23
+ },
24
+ "GPT-OSS 20B (Original)": {
25
+ "model_id": "openai/gpt-oss-20b",
26
+ "description": "Your current model - good for structured output",
27
+ "default_temp": 0.3,
28
+ "max_tokens": 256
29
+ },
30
+ "Mixtral 8x7B (Efficient)": {
31
+ "model_id": "mistralai/Mixtral-8x7B-Instruct-v0.1",
32
+ "description": "Fast processing for large batches",
33
+ "default_temp": 0.4,
34
+ "max_tokens": 300
35
+ }
36
+ }
37
 
38
+ # Different system prompts for different description styles
39
+ PROMPT_TEMPLATES = {
40
+ "Clip-Ready Visual (15-30 words)": """You are an expert at writing ultra-concise, visual descriptions for CLIP models and image search.
41
 
42
+ For each business category, create a description that:
43
+ 1. Is 15-30 words maximum
44
+ 2. Focuses on VISUAL elements only (what you would SEE in an image)
45
+ 3. Uses concrete, observable details
46
+ 4. Avoids abstract concepts or services
47
+ 5. Describes physical appearance, setting, or visual activity
48
 
49
+ Examples:
50
+ Category: "Car Rental"
51
+ Description: "rental car with keys, parked at pickup location, clean interior visible, rental company signage"
52
 
53
+ Category: "Yoga Classes"
54
+ Description: "people in yoga poses on mats, stretching in studio with mirrors, instructor demonstrating position"
55
 
56
+ IMPORTANT: Respond with ONLY a JSON object:
57
+ {"Category": "category name", "Description": "visual description"}""",
58
 
59
+ "Standard Business (40-60 words)": """You are creating professional business descriptions for directory listings.
60
 
61
+ Generate descriptions that:
62
+ 1. Are 40-60 words
63
+ 2. Define the service clearly
64
+ 3. Include key visual and contextual elements
65
+ 4. Are suitable for yellow pages or business directories
66
+ 5. Focus on what customers would see or experience
67
 
68
+ Examples:
69
+ Category: "Photography Studio"
70
+ Description: "Professional photography space with lighting equipment, backdrops, and cameras. Photographer capturing portraits, events, or products. Studio setup with tripods, reflectors, softboxes. Clients posing for shots, reviewing images on screens. Portfolio displays, editing workstations visible."
71
 
72
+ IMPORTANT: Respond with ONLY a JSON object:
73
+ {"Category": "category name", "Description": "description text"}""",
74
 
75
+ "E-commerce Ready (30-50 words)": """You are writing descriptions optimized for e-commerce and online marketplaces.
76
 
77
+ Create descriptions that:
78
+ 1. Are 30-50 words
79
+ 2. Highlight visual product/service attributes
80
+ 3. Include searchable keywords
81
+ 4. Focus on customer benefits
82
+ 5. Use action-oriented language
83
 
84
+ IMPORTANT: Respond with ONLY a JSON object:
85
+ {"Category": "category name", "Description": "description text"}""",
86
 
87
+ "Custom Prompt": "" # Will be filled by user
88
+ }
89
 
90
+ class EnhancedDescriptionGenerator:
91
+ def __init__(self):
92
+ self.clients = {}
93
+ self.initialize_clients()
94
 
95
+ def initialize_clients(self):
96
+ """Initialize all model clients"""
97
+ hf_token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_TOKEN")
98
+
99
+ if not hf_token:
100
+ raise ValueError("HF_TOKEN not found in environment variables")
101
+
102
+ for model_name, config in MODEL_CONFIGS.items():
103
+ try:
104
+ self.clients[model_name] = InferenceClient(
105
+ token=hf_token,
106
+ model=config["model_id"]
107
+ )
108
+ print(f"βœ… Initialized: {model_name}")
109
+ except Exception as e:
110
+ print(f"⚠️ Failed to initialize {model_name}: {str(e)}")
111
+ self.clients[model_name] = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
 
113
+ def extract_json_from_response(self, response_text):
114
+ """Enhanced JSON extraction with multiple fallback methods"""
115
+ if not response_text:
116
+ raise ValueError("Empty response")
117
+
118
+ response_text = response_text.strip()
119
+
120
+ # Method 1: Clean markdown formatting
121
+ if "```json" in response_text:
122
+ response_text = response_text.split("```json")[1].split("```")[0].strip()
123
+ elif "```" in response_text:
124
+ response_text = response_text.split("```")[1].split("```")[0].strip()
125
+
126
+ # Method 2: Extract JSON object
127
+ if "{" in response_text and "}" in response_text:
128
+ start = response_text.find("{")
129
+ end = response_text.rfind("}") + 1
130
+ json_str = response_text[start:end]
131
+ else:
132
+ json_str = response_text
133
+
134
+ # Try to parse JSON
135
  try:
136
+ parsed = json.loads(json_str)
137
+ except json.JSONDecodeError:
138
+ # Method 3: Try to fix common JSON issues
139
+ json_str = json_str.replace("'", '"') # Replace single quotes
140
+ json_str = json_str.replace("\n", " ") # Remove newlines
141
+ json_str = json_str.replace("\\", "\\\\") # Escape backslashes
142
+ parsed = json.loads(json_str)
143
+
144
+ # Validate and extract description
145
+ if not isinstance(parsed, dict):
146
+ raise ValueError("Response is not a JSON object")
147
+
148
+ description = (
149
+ parsed.get("Description") or
150
+ parsed.get("description") or
151
+ parsed.get("Desc") or
152
+ parsed.get("desc") or
153
+ ""
154
+ )
155
+
156
+ if not description or len(description.strip()) < 10:
157
+ raise ValueError("Description is missing or too short")
158
+
159
+ return description.strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
 
161
+ def process_single_category(
162
+ self,
163
+ category,
164
+ model_name,
165
+ prompt_template,
166
+ custom_prompt,
167
+ max_tokens,
168
+ temperature,
169
+ top_p,
170
+ retry_count=3
171
+ ):
172
+ """Process a single category with the selected model and prompt"""
173
+
174
+ client = self.clients.get(model_name)
175
+ if not client:
176
+ return None, f"Model {model_name} not available"
177
+
178
+ # Select and prepare the prompt
179
+ if prompt_template == "Custom Prompt":
180
+ if not custom_prompt:
181
+ return None, "Custom prompt is required when 'Custom Prompt' is selected"
182
+ system_prompt = custom_prompt
183
+ else:
184
+ system_prompt = PROMPT_TEMPLATES[prompt_template]
185
+
186
+ messages = [
187
+ {"role": "system", "content": system_prompt},
188
+ {"role": "user", "content": f"Category: \"{category}\""}
189
+ ]
190
+
191
+ last_error = None
192
+
193
+ for attempt in range(retry_count):
194
+ try:
195
+ if attempt > 0:
196
+ time.sleep(1)
197
+
198
+ # Make API call
199
+ response_text = ""
200
+ for message in client.chat_completion(
201
+ messages,
202
+ max_tokens=max_tokens,
203
+ stream=True,
204
+ temperature=temperature,
205
+ top_p=top_p,
206
+ ):
207
+ if hasattr(message, 'choices') and len(message.choices) > 0:
208
+ if hasattr(message.choices[0], 'delta') and hasattr(message.choices[0].delta, 'content'):
209
+ token = message.choices[0].delta.content
210
+ if token:
211
+ response_text += token
212
+ elif isinstance(message, str):
213
+ response_text += message
214
+
215
+ # Validate response
216
+ if not response_text or len(response_text.strip()) < 5:
217
+ raise ValueError("Empty or too short response")
218
+
219
+ # Extract description
220
+ description = self.extract_json_from_response(response_text)
221
+
222
+ # Count words for validation
223
+ word_count = len(description.split())
224
+
225
+ return response_text.strip(), description, word_count
226
+
227
+ except Exception as e:
228
+ last_error = f"Attempt {attempt + 1}/{retry_count}: {str(e)}"
229
+
230
+ # On last attempt, try to use raw response if it looks valid
231
+ if attempt == retry_count - 1 and response_text:
232
+ if len(response_text.strip()) > 20 and not response_text.startswith("{"):
233
+ return response_text.strip(), response_text.strip(), len(response_text.split())
234
+
235
+ raise Exception(f"Failed after {retry_count} attempts. Last error: {last_error}")
236
 
237
+ def process_csv_advanced(
238
  files,
239
  category_column,
240
+ model_name,
241
+ prompt_template,
242
+ custom_prompt,
243
  max_tokens,
244
  temperature,
245
  top_p,
246
+ output_format,
247
  progress=gr.Progress()
248
  ):
249
+ """Enhanced CSV processing with multiple models and output formats"""
250
+
 
251
  if not files or len(files) == 0:
252
+ return "Please upload at least one CSV file.", None, None
253
 
254
+ # Check for HF token
 
255
  hf_token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_TOKEN")
 
256
  if not hf_token:
257
+ return "⚠️ Error: HF_TOKEN not found. Please add your Hugging Face token as a Space Secret.", None, None
258
 
259
+ # Initialize generator
260
+ try:
261
+ generator = EnhancedDescriptionGenerator()
262
+ except Exception as e:
263
+ return f"Error initializing models: {str(e)}", None, None
264
 
265
+ all_results = []
266
  status_messages = []
267
+ output_files = []
268
 
269
  for file_idx, file in enumerate(files):
270
  try:
271
+ # Read CSV
272
  df = pd.read_csv(file.name)
273
+ file_name = os.path.basename(file.name)
274
+ status_messages.append(f"πŸ“„ Processing file {file_idx + 1}/{len(files)}: {file_name}")
275
 
276
+ # Check column exists
277
  if category_column not in df.columns:
278
+ status_messages.append(f"⚠️ Column '{category_column}' not found in {file_name}. Available: {', '.join(df.columns)}")
279
  continue
280
 
281
+ # Process categories
 
 
 
282
  categories = df[category_column].dropna().unique()
283
  total_categories = len(categories)
284
 
285
+ file_results = []
286
+
287
  for idx, category in enumerate(categories):
288
+ progress(
289
+ (file_idx * total_categories + idx) / (len(files) * total_categories),
290
+ desc=f"File {file_idx + 1}/{len(files)} - Category {idx + 1}/{total_categories}: {category[:30]}..."
291
+ )
292
 
293
  try:
294
+ raw_response, description, word_count = generator.process_single_category(
295
+ category,
296
+ model_name,
297
+ prompt_template,
298
+ custom_prompt,
299
+ max_tokens,
300
+ temperature,
301
+ top_p
302
  )
303
 
304
+ result = {
305
+ "File": file_name,
 
 
 
306
  "Category": category,
307
  "Description": description,
308
+ "Word_Count": word_count,
309
+ "Model": model_name,
310
+ "Prompt_Type": prompt_template,
311
  "Raw_Response": raw_response,
312
  "Status": "Success"
313
+ }
314
 
315
+ file_results.append(result)
316
+ all_results.append(result)
317
+ status_messages.append(f"βœ… {category[:30]}... ({word_count} words)")
318
 
319
  except Exception as e:
320
+ error_msg = str(e)[:100]
321
+ result = {
322
+ "File": file_name,
 
323
  "Category": category,
324
+ "Description": f"[FAILED: {error_msg}]",
325
+ "Word_Count": 0,
326
+ "Model": model_name,
327
+ "Prompt_Type": prompt_template,
328
  "Raw_Response": "",
329
+ "Status": f"Failed: {error_msg}"
330
+ }
331
+
332
+ file_results.append(result)
333
+ all_results.append(result)
334
+ status_messages.append(f"❌ {category[:30]}... - {error_msg}")
335
 
336
+ # Rate limiting
337
+ time.sleep(0.3)
 
 
 
338
 
339
+ # Create output file for this CSV
340
+ if file_results:
341
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
342
+ base_name = os.path.splitext(file_name)[0]
343
+
344
+ if output_format in ["CSV", "Both"]:
345
+ csv_filename = f"output_{base_name}_{timestamp}.csv"
346
+ pd.DataFrame(file_results).to_csv(csv_filename, index=False)
347
+ output_files.append(csv_filename)
348
+
349
+ if output_format in ["JSON", "Both"]:
350
+ json_filename = f"output_{base_name}_{timestamp}.json"
351
+ with open(json_filename, 'w') as f:
352
+ json.dump(file_results, f, indent=2)
353
+ output_files.append(json_filename)
354
 
355
+ # Summary for this file
356
+ success_count = sum(1 for r in file_results if r["Status"] == "Success")
357
+ failed_count = len(file_results) - success_count
358
+ avg_words = sum(r["Word_Count"] for r in file_results if r["Status"] == "Success") / max(success_count, 1)
359
 
360
+ status_messages.append(f"""
361
+ πŸ“Š {file_name} Summary:
362
+ - Total: {len(file_results)} categories
363
+ - Success: {success_count} ({success_count/len(file_results)*100:.1f}%)
364
+ - Failed: {failed_count}
365
+ - Avg Words: {avg_words:.1f}
366
+ """)
367
 
368
  except Exception as e:
369
  status_messages.append(f"❌ Error processing {os.path.basename(file.name)}: {str(e)}")
370
 
371
+ # Overall summary
372
+ if all_results:
373
+ total_success = sum(1 for r in all_results if r["Status"] == "Success")
374
+ total_failed = len(all_results) - total_success
375
+
376
+ summary = f"""
377
+ ## 🎯 Processing Complete!
378
 
379
+ **Model Used:** {model_name}
380
+ **Prompt Template:** {prompt_template}
381
 
382
+ ### Overall Statistics:
383
+ - **Total Categories Processed:** {len(all_results)}
384
+ - **Successful:** {total_success} ({total_success/len(all_results)*100:.1f}%)
385
+ - **Failed:** {total_failed} ({total_failed/len(all_results)*100:.1f}%)
386
+ - **Average Word Count:** {sum(r['Word_Count'] for r in all_results if r['Status'] == 'Success') / max(total_success, 1):.1f}
387
+
388
+ ### File Processing Log:
389
+ """
390
+ status_text = summary + "\n".join(status_messages)
391
+
392
+ # Create combined output file
393
+ if output_format in ["CSV", "Both"]:
394
+ combined_csv = f"combined_output_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
395
+ pd.DataFrame(all_results).to_csv(combined_csv, index=False)
396
+ output_files.append(combined_csv)
397
+
398
+ if output_format in ["JSON", "Both"]:
399
+ combined_json = f"combined_output_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
400
+ with open(combined_json, 'w') as f:
401
+ json.dump(all_results, f, indent=2)
402
+ output_files.append(combined_json)
403
+
404
+ # Create summary DataFrame for display
405
+ summary_df = pd.DataFrame(all_results)[['Category', 'Description', 'Word_Count', 'Status']]
406
+
407
+ return status_text, output_files, summary_df
408
+ else:
409
+ return "\n".join(status_messages) + "\n\n⚠️ No categories were processed successfully.", None, None
410
+
411
+ # Create enhanced Gradio interface
412
+ def create_interface():
413
+ with gr.Blocks(title="Enhanced Business Description Generator", theme=gr.themes.Soft()) as demo:
414
+ gr.Markdown("""
415
+ # πŸš€ Enhanced Multi-Model Business Description Generator
416
+
417
+ Generate professional, clip-ready descriptions using multiple state-of-the-art AI models.
418
+
419
+ ### ✨ New Features:
420
+ - πŸ€– **4 Different AI Models** to choose from
421
+ - πŸ“ **Multiple Prompt Templates** for different use cases
422
+ - πŸ“Š **Word Count Tracking** for all descriptions
423
+ - πŸ’Ύ **CSV & JSON Export** options
424
+ - πŸ“ˆ **Enhanced Statistics** and success tracking
425
+ """)
426
+
427
+ with gr.Row():
428
+ with gr.Column(scale=1):
429
+ gr.Markdown("### πŸ“€ Input Configuration")
430
+
431
+ files_input = gr.File(
432
+ label="Upload CSV Files",
433
+ file_count="multiple",
434
+ file_types=[".csv"]
435
+ )
436
+
437
+ category_column = gr.Textbox(
438
+ label="Category Column Name",
439
+ value="category",
440
+ placeholder="Column containing categories"
441
+ )
442
+
443
+ gr.Markdown("### πŸ€– Model Selection")
444
+
445
+ model_selector = gr.Dropdown(
446
+ label="Select AI Model",
447
+ choices=list(MODEL_CONFIGS.keys()),
448
+ value=list(MODEL_CONFIGS.keys())[0],
449
+ info="Each model has different strengths"
450
+ )
451
+
452
+ # Model description display
453
+ model_info = gr.Markdown("")
454
+
455
+ prompt_template = gr.Dropdown(
456
+ label="Prompt Template",
457
+ choices=list(PROMPT_TEMPLATES.keys()),
458
+ value="Clip-Ready Visual (15-30 words)",
459
+ info="Choose based on your use case"
460
+ )
461
+
462
+ custom_prompt = gr.Textbox(
463
+ label="Custom System Prompt (if Custom selected)",
464
+ placeholder="Enter your custom instructions here...",
465
+ lines=4,
466
+ visible=False
467
+ )
468
+
469
+ gr.Markdown("### βš™οΈ Generation Settings")
470
+
471
+ with gr.Row():
472
+ temperature = gr.Slider(
473
+ minimum=0.1,
474
+ maximum=1.0,
475
+ value=0.3,
476
+ step=0.05,
477
+ label="Temperature",
478
+ info="Lower = consistent, Higher = creative"
479
+ )
480
+
481
+ top_p = gr.Slider(
482
+ minimum=0.1,
483
+ maximum=1.0,
484
+ value=0.9,
485
+ step=0.05,
486
+ label="Top-p"
487
+ )
488
+
489
+ max_tokens = gr.Slider(
490
+ minimum=64,
491
+ maximum=512,
492
+ value=256,
493
+ step=16,
494
+ label="Max Tokens"
495
+ )
496
+
497
+ output_format = gr.Radio(
498
+ label="Output Format",
499
+ choices=["CSV", "JSON", "Both"],
500
+ value="CSV"
501
+ )
502
+
503
+ process_btn = gr.Button("πŸš€ Generate Descriptions", variant="primary", size="lg")
504
+
505
+ with gr.Column(scale=2):
506
+ gr.Markdown("### πŸ“Š Results")
507
+
508
+ status_output = gr.Markdown(label="Processing Status")
509
+
510
+ results_preview = gr.Dataframe(
511
+ label="Results Preview",
512
+ headers=["Category", "Description", "Word_Count", "Status"],
513
+ datatype=["str", "str", "number", "str"],
514
+ col_count=4,
515
+ wrap=True
516
+ )
517
+
518
+ files_output = gr.File(
519
+ label="πŸ“₯ Download Output Files",
520
+ file_count="multiple"
521
+ )
522
 
523
+ with gr.Row():
524
+ gr.Markdown("""
525
+ ### πŸ’‘ Model Recommendations:
 
 
 
 
 
 
 
 
 
526
 
527
+ | Model | Best For | Speed | Quality |
528
+ |-------|----------|-------|---------|
529
+ | **Llama 3.1 70B** | Creative, detailed descriptions | Medium | ⭐⭐⭐⭐⭐ |
530
+ | **Qwen 2.5 72B** | Balanced performance | Fast | ⭐⭐⭐⭐ |
531
+ | **GPT-OSS 20B** | Structured, consistent output | Fast | ⭐⭐⭐⭐ |
532
+ | **Mixtral 8x7B** | Large batch processing | Very Fast | ⭐⭐⭐ |
533
+
534
+ ### πŸ“ Template Guide:
535
+ - **Clip-Ready Visual**: 15-30 words, focus on visual elements only
536
+ - **Standard Business**: 40-60 words, comprehensive directory descriptions
537
+ - **E-commerce Ready**: 30-50 words, optimized for online marketplaces
538
+ - **Custom Prompt**: Use your own instructions for specific needs
539
+ """)
540
+
541
+ # Update model info when selection changes
542
+ def update_model_info(model_name):
543
+ config = MODEL_CONFIGS[model_name]
544
+ return f"ℹ️ **{config['description']}**\nRecommended temp: {config['default_temp']}"
545
+
546
+ model_selector.change(
547
+ update_model_info,
548
+ inputs=[model_selector],
549
+ outputs=[model_info]
550
+ )
551
+
552
+ # Show/hide custom prompt field
553
+ def toggle_custom_prompt(template):
554
+ return gr.update(visible=(template == "Custom Prompt"))
555
+
556
+ prompt_template.change(
557
+ toggle_custom_prompt,
558
+ inputs=[prompt_template],
559
+ outputs=[custom_prompt]
560
+ )
561
+
562
+ # Process button click
563
+ process_btn.click(
564
+ fn=process_csv_advanced,
565
+ inputs=[
566
+ files_input,
567
+ category_column,
568
+ model_selector,
569
+ prompt_template,
570
+ custom_prompt,
571
+ max_tokens,
572
+ temperature,
573
+ top_p,
574
+ output_format
575
+ ],
576
+ outputs=[status_output, files_output, results_preview]
577
+ )
578
+
579
+ # Set initial model info
580
+ demo.load(
581
+ update_model_info,
582
+ inputs=[model_selector],
583
+ outputs=[model_info]
584
+ )
585
 
586
+ return demo
 
 
 
 
 
587
 
588
  if __name__ == "__main__":
589
+ demo = create_interface()
590
+ demo.launch()