Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -117,15 +117,16 @@ def generate_voice_response(text, simulate=False):
|
|
| 117 |
st.error(f"Error generating voice response: {e}")
|
| 118 |
return None
|
| 119 |
|
|
|
|
| 120 |
def extract_invoice_data(image_file):
|
| 121 |
"""
|
| 122 |
-
Extract financial data from invoices using Gemini
|
| 123 |
|
| 124 |
Args:
|
| 125 |
image_file: Uploaded image file from Streamlit
|
| 126 |
|
| 127 |
Returns:
|
| 128 |
-
|
| 129 |
"""
|
| 130 |
try:
|
| 131 |
# Initialize Gemini
|
|
@@ -145,38 +146,35 @@ def extract_invoice_data(image_file):
|
|
| 145 |
img_byte_arr = img_byte_arr.getvalue()
|
| 146 |
|
| 147 |
# Prepare the model
|
| 148 |
-
model = genai.GenerativeModel('gemini-
|
| 149 |
|
| 150 |
-
#
|
| 151 |
prompt = """
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
-
|
| 156 |
-
- Vendor
|
| 157 |
-
-
|
| 158 |
-
-
|
| 159 |
-
-
|
| 160 |
-
-
|
| 161 |
-
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
Be precise and extract as much structured data as possible.
|
| 165 |
-
Separate multiple line items if applicable.
|
| 166 |
-
Use clear, consistent formatting.
|
| 167 |
"""
|
| 168 |
|
| 169 |
# Generate response
|
| 170 |
response = model.generate_content([prompt, image])
|
| 171 |
-
|
| 172 |
-
|
| 173 |
|
| 174 |
# Return the extracted text
|
| 175 |
-
return
|
| 176 |
|
| 177 |
except Exception as e:
|
| 178 |
st.error(f"Error extracting invoice data: {e}")
|
| 179 |
-
return None
|
|
|
|
|
|
|
| 180 |
|
| 181 |
# Utility Functions
|
| 182 |
def switch_page(page_name):
|
|
@@ -580,77 +578,44 @@ def render_financial_dashboard():
|
|
| 580 |
|
| 581 |
|
| 582 |
|
| 583 |
-
def parse_invoice_text_to_dataframe(text):
|
| 584 |
-
"""
|
| 585 |
-
Convert extracted text to a structured DataFrame
|
| 586 |
-
|
| 587 |
-
Args:
|
| 588 |
-
text (str): Extracted invoice text
|
| 589 |
-
|
| 590 |
-
Returns:
|
| 591 |
-
pandas.DataFrame: Structured invoice data
|
| 592 |
-
"""
|
| 593 |
-
try:
|
| 594 |
-
# Split the text into lines
|
| 595 |
-
lines = text.split('\n')
|
| 596 |
-
|
| 597 |
-
# Prepare lists to store data
|
| 598 |
-
data = []
|
| 599 |
-
|
| 600 |
-
# Try to parse the lines
|
| 601 |
-
for line in lines:
|
| 602 |
-
# Basic parsing - you might need to adjust regex based on invoice formats
|
| 603 |
-
match = re.match(r'(.+?)\s*,\s*(.+?)\s*,\s*(.+?)\s*,\s*(\d+)\s*,\s*(\$?\d+\.?\d*)\s*,\s*(\$?\d+\.?\d*)\s*,\s*(\$?\d+\.?\d*)\s*,\s*(.+)', line)
|
| 604 |
-
|
| 605 |
-
if match:
|
| 606 |
-
data.append({
|
| 607 |
-
'Date': match.group(1),
|
| 608 |
-
'Vendor': match.group(2),
|
| 609 |
-
'Description': match.group(3),
|
| 610 |
-
'Quantity': match.group(4),
|
| 611 |
-
'Unit Price': match.group(5),
|
| 612 |
-
'Total Amount': match.group(6),
|
| 613 |
-
'Tax': match.group(7),
|
| 614 |
-
'Payment Terms': match.group(8)
|
| 615 |
-
})
|
| 616 |
-
|
| 617 |
-
# Create DataFrame
|
| 618 |
-
df = pd.DataFrame(data)
|
| 619 |
-
return df
|
| 620 |
-
|
| 621 |
-
except Exception as e:
|
| 622 |
-
st.error(f"Error parsing invoice text: {e}")
|
| 623 |
-
return pd.DataFrame()
|
| 624 |
|
| 625 |
def render_invoice_processor():
|
|
|
|
| 626 |
st.markdown("<h1 class='main-header'>Invoice Data Extractor</h1>", unsafe_allow_html=True)
|
| 627 |
st.markdown("<p class='sub-header'>AI-powered financial data extraction</p>", unsafe_allow_html=True)
|
| 628 |
|
|
|
|
| 629 |
uploaded_file = st.file_uploader("Upload Invoice Image", type=['png', 'jpg', 'jpeg', 'pdf'])
|
| 630 |
|
| 631 |
if uploaded_file is not None:
|
| 632 |
-
|
|
|
|
| 633 |
|
|
|
|
| 634 |
if st.button("Extract Invoice Details"):
|
| 635 |
with st.spinner("Extracting invoice information..."):
|
| 636 |
-
|
| 637 |
|
| 638 |
-
if
|
|
|
|
| 639 |
st.subheader("Extracted Invoice Information")
|
| 640 |
-
st.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 641 |
|
| 642 |
-
#
|
| 643 |
-
csv = invoice_df.to_csv(index=False)
|
| 644 |
st.download_button(
|
| 645 |
-
label="
|
| 646 |
-
data=
|
| 647 |
-
file_name="invoice_details.
|
| 648 |
-
mime="text/
|
| 649 |
-
key="download-csv"
|
| 650 |
)
|
|
|
|
|
|
|
| 651 |
else:
|
| 652 |
st.error("Could not extract invoice data")
|
| 653 |
-
|
| 654 |
|
| 655 |
|
| 656 |
|
|
|
|
| 117 |
st.error(f"Error generating voice response: {e}")
|
| 118 |
return None
|
| 119 |
|
| 120 |
+
|
| 121 |
def extract_invoice_data(image_file):
|
| 122 |
"""
|
| 123 |
+
Extract financial data from invoices using Gemini flash
|
| 124 |
|
| 125 |
Args:
|
| 126 |
image_file: Uploaded image file from Streamlit
|
| 127 |
|
| 128 |
Returns:
|
| 129 |
+
Extracted text information
|
| 130 |
"""
|
| 131 |
try:
|
| 132 |
# Initialize Gemini
|
|
|
|
| 146 |
img_byte_arr = img_byte_arr.getvalue()
|
| 147 |
|
| 148 |
# Prepare the model
|
| 149 |
+
model = genai.GenerativeModel('gemini-2.0-flash')
|
| 150 |
|
| 151 |
+
# Comprehensive prompt for invoice extraction
|
| 152 |
prompt = """
|
| 153 |
+
Carefully extract all financial and relevant information from this invoice:
|
| 154 |
+
|
| 155 |
+
Please provide a detailed, structured text output that includes:
|
| 156 |
+
- Complete invoice details
|
| 157 |
+
- Vendor/Company information
|
| 158 |
+
- Line items or services
|
| 159 |
+
- Total amounts
|
| 160 |
+
- Tax details
|
| 161 |
+
- Payment terms
|
| 162 |
+
- Any other significant financial information
|
| 163 |
+
|
| 164 |
+
Format the output clearly and comprehensively, making it easy to read and understand.
|
|
|
|
|
|
|
|
|
|
| 165 |
"""
|
| 166 |
|
| 167 |
# Generate response
|
| 168 |
response = model.generate_content([prompt, image])
|
|
|
|
|
|
|
| 169 |
|
| 170 |
# Return the extracted text
|
| 171 |
+
return response.text
|
| 172 |
|
| 173 |
except Exception as e:
|
| 174 |
st.error(f"Error extracting invoice data: {e}")
|
| 175 |
+
return None
|
| 176 |
+
|
| 177 |
+
|
| 178 |
|
| 179 |
# Utility Functions
|
| 180 |
def switch_page(page_name):
|
|
|
|
| 578 |
|
| 579 |
|
| 580 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 581 |
|
| 582 |
def render_invoice_processor():
|
| 583 |
+
"""Render the invoice processing page"""
|
| 584 |
st.markdown("<h1 class='main-header'>Invoice Data Extractor</h1>", unsafe_allow_html=True)
|
| 585 |
st.markdown("<p class='sub-header'>AI-powered financial data extraction</p>", unsafe_allow_html=True)
|
| 586 |
|
| 587 |
+
# File uploader for invoices
|
| 588 |
uploaded_file = st.file_uploader("Upload Invoice Image", type=['png', 'jpg', 'jpeg', 'pdf'])
|
| 589 |
|
| 590 |
if uploaded_file is not None:
|
| 591 |
+
# Display the uploaded image
|
| 592 |
+
st.image(uploaded_file, caption="Uploaded Invoice", use_column_width=True)
|
| 593 |
|
| 594 |
+
# Extract invoice data
|
| 595 |
if st.button("Extract Invoice Details"):
|
| 596 |
with st.spinner("Extracting invoice information..."):
|
| 597 |
+
invoice_data = extract_invoice_data(uploaded_file)
|
| 598 |
|
| 599 |
+
if invoice_data:
|
| 600 |
+
# Display extracted data in a formatted, readable way
|
| 601 |
st.subheader("Extracted Invoice Information")
|
| 602 |
+
st.markdown("<div class='advisor-card'>", unsafe_allow_html=True)
|
| 603 |
+
st.markdown("<span class='ai-badge'>AI Invoice Extraction</span>", unsafe_allow_html=True)
|
| 604 |
+
|
| 605 |
+
# Use st.text to preserve formatting
|
| 606 |
+
st.text(invoice_data)
|
| 607 |
|
| 608 |
+
# Optional: Copy to clipboard
|
|
|
|
| 609 |
st.download_button(
|
| 610 |
+
label="Copy Invoice Details",
|
| 611 |
+
data=invoice_data,
|
| 612 |
+
file_name="invoice_details.txt",
|
| 613 |
+
mime="text/plain"
|
|
|
|
| 614 |
)
|
| 615 |
+
|
| 616 |
+
st.markdown("</div>", unsafe_allow_html=True)
|
| 617 |
else:
|
| 618 |
st.error("Could not extract invoice data")
|
|
|
|
| 619 |
|
| 620 |
|
| 621 |
|