walaa2022 commited on
Commit
608cdc4
·
verified ·
1 Parent(s): 211efb4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -76
app.py CHANGED
@@ -117,15 +117,16 @@ def generate_voice_response(text, simulate=False):
117
  st.error(f"Error generating voice response: {e}")
118
  return None
119
 
 
120
  def extract_invoice_data(image_file):
121
  """
122
- Extract financial data from invoices using Gemini Pro
123
 
124
  Args:
125
  image_file: Uploaded image file from Streamlit
126
 
127
  Returns:
128
- Dictionary of extracted financial information
129
  """
130
  try:
131
  # Initialize Gemini
@@ -145,38 +146,35 @@ def extract_invoice_data(image_file):
145
  img_byte_arr = img_byte_arr.getvalue()
146
 
147
  # Prepare the model
148
- model = genai.GenerativeModel('gemini-1.5-pro')
149
 
150
- # Prompt for detailed invoice extraction
151
  prompt = """
152
- Extract financial information from this invoice in a structured format:
153
-
154
- Provide details in a CSV-friendly format with these columns:
155
- - Date
156
- - Vendor
157
- - Description
158
- - Quantity
159
- - Unit Price
160
- - Total Amount
161
- - Tax
162
- - Payment Terms
163
-
164
- Be precise and extract as much structured data as possible.
165
- Separate multiple line items if applicable.
166
- Use clear, consistent formatting.
167
  """
168
 
169
  # Generate response
170
  response = model.generate_content([prompt, image])
171
-
172
-
173
 
174
  # Return the extracted text
175
- return parse_invoice_text_to_dataframe(response.text)
176
 
177
  except Exception as e:
178
  st.error(f"Error extracting invoice data: {e}")
179
- return None
 
 
180
 
181
  # Utility Functions
182
  def switch_page(page_name):
@@ -580,77 +578,44 @@ def render_financial_dashboard():
580
 
581
 
582
 
583
- def parse_invoice_text_to_dataframe(text):
584
- """
585
- Convert extracted text to a structured DataFrame
586
-
587
- Args:
588
- text (str): Extracted invoice text
589
-
590
- Returns:
591
- pandas.DataFrame: Structured invoice data
592
- """
593
- try:
594
- # Split the text into lines
595
- lines = text.split('\n')
596
-
597
- # Prepare lists to store data
598
- data = []
599
-
600
- # Try to parse the lines
601
- for line in lines:
602
- # Basic parsing - you might need to adjust regex based on invoice formats
603
- match = re.match(r'(.+?)\s*,\s*(.+?)\s*,\s*(.+?)\s*,\s*(\d+)\s*,\s*(\$?\d+\.?\d*)\s*,\s*(\$?\d+\.?\d*)\s*,\s*(\$?\d+\.?\d*)\s*,\s*(.+)', line)
604
-
605
- if match:
606
- data.append({
607
- 'Date': match.group(1),
608
- 'Vendor': match.group(2),
609
- 'Description': match.group(3),
610
- 'Quantity': match.group(4),
611
- 'Unit Price': match.group(5),
612
- 'Total Amount': match.group(6),
613
- 'Tax': match.group(7),
614
- 'Payment Terms': match.group(8)
615
- })
616
-
617
- # Create DataFrame
618
- df = pd.DataFrame(data)
619
- return df
620
-
621
- except Exception as e:
622
- st.error(f"Error parsing invoice text: {e}")
623
- return pd.DataFrame()
624
 
625
  def render_invoice_processor():
 
626
  st.markdown("<h1 class='main-header'>Invoice Data Extractor</h1>", unsafe_allow_html=True)
627
  st.markdown("<p class='sub-header'>AI-powered financial data extraction</p>", unsafe_allow_html=True)
628
 
 
629
  uploaded_file = st.file_uploader("Upload Invoice Image", type=['png', 'jpg', 'jpeg', 'pdf'])
630
 
631
  if uploaded_file is not None:
632
- st.image(uploaded_file, caption="Uploaded Invoice", use_container_width=True)
 
633
 
 
634
  if st.button("Extract Invoice Details"):
635
  with st.spinner("Extracting invoice information..."):
636
- invoice_df = extract_invoice_data(uploaded_file)
637
 
638
- if invoice_df is not None and not invoice_df.empty:
 
639
  st.subheader("Extracted Invoice Information")
640
- st.dataframe(invoice_df)
 
 
 
 
641
 
642
- # Download as CSV
643
- csv = invoice_df.to_csv(index=False)
644
  st.download_button(
645
- label="Download Invoice Data as CSV",
646
- data=csv,
647
- file_name="invoice_details.csv",
648
- mime="text/csv",
649
- key="download-csv"
650
  )
 
 
651
  else:
652
  st.error("Could not extract invoice data")
653
-
654
 
655
 
656
 
 
117
  st.error(f"Error generating voice response: {e}")
118
  return None
119
 
120
+
121
  def extract_invoice_data(image_file):
122
  """
123
+ Extract financial data from invoices using Gemini flash
124
 
125
  Args:
126
  image_file: Uploaded image file from Streamlit
127
 
128
  Returns:
129
+ Extracted text information
130
  """
131
  try:
132
  # Initialize Gemini
 
146
  img_byte_arr = img_byte_arr.getvalue()
147
 
148
  # Prepare the model
149
+ model = genai.GenerativeModel('gemini-2.0-flash')
150
 
151
+ # Comprehensive prompt for invoice extraction
152
  prompt = """
153
+ Carefully extract all financial and relevant information from this invoice:
154
+
155
+ Please provide a detailed, structured text output that includes:
156
+ - Complete invoice details
157
+ - Vendor/Company information
158
+ - Line items or services
159
+ - Total amounts
160
+ - Tax details
161
+ - Payment terms
162
+ - Any other significant financial information
163
+
164
+ Format the output clearly and comprehensively, making it easy to read and understand.
 
 
 
165
  """
166
 
167
  # Generate response
168
  response = model.generate_content([prompt, image])
 
 
169
 
170
  # Return the extracted text
171
+ return response.text
172
 
173
  except Exception as e:
174
  st.error(f"Error extracting invoice data: {e}")
175
+ return None
176
+
177
+
178
 
179
  # Utility Functions
180
  def switch_page(page_name):
 
578
 
579
 
580
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
581
 
582
  def render_invoice_processor():
583
+ """Render the invoice processing page"""
584
  st.markdown("<h1 class='main-header'>Invoice Data Extractor</h1>", unsafe_allow_html=True)
585
  st.markdown("<p class='sub-header'>AI-powered financial data extraction</p>", unsafe_allow_html=True)
586
 
587
+ # File uploader for invoices
588
  uploaded_file = st.file_uploader("Upload Invoice Image", type=['png', 'jpg', 'jpeg', 'pdf'])
589
 
590
  if uploaded_file is not None:
591
+ # Display the uploaded image
592
+ st.image(uploaded_file, caption="Uploaded Invoice", use_column_width=True)
593
 
594
+ # Extract invoice data
595
  if st.button("Extract Invoice Details"):
596
  with st.spinner("Extracting invoice information..."):
597
+ invoice_data = extract_invoice_data(uploaded_file)
598
 
599
+ if invoice_data:
600
+ # Display extracted data in a formatted, readable way
601
  st.subheader("Extracted Invoice Information")
602
+ st.markdown("<div class='advisor-card'>", unsafe_allow_html=True)
603
+ st.markdown("<span class='ai-badge'>AI Invoice Extraction</span>", unsafe_allow_html=True)
604
+
605
+ # Use st.text to preserve formatting
606
+ st.text(invoice_data)
607
 
608
+ # Optional: Copy to clipboard
 
609
  st.download_button(
610
+ label="Copy Invoice Details",
611
+ data=invoice_data,
612
+ file_name="invoice_details.txt",
613
+ mime="text/plain"
 
614
  )
615
+
616
+ st.markdown("</div>", unsafe_allow_html=True)
617
  else:
618
  st.error("Could not extract invoice data")
 
619
 
620
 
621