PDF-Assistant / app.py
absiitr's picture
Update app.py
7250f7b verified
import os
import tempfile
import gc
import logging
import streamlit as st
from groq import Groq, APIError
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
import torch
# ---------------- CONFIGURATION ----------------
logging.basicConfig(level=logging.INFO)
# Load API key from Hugging Face secrets
GROQ_API_KEY = st.secrets.get("GROQ_API_KEY", os.environ.get("GROQ_API_KEY"))
GROQ_MODEL = "openai/gpt-oss-120b"
# Initialize Groq client
client = None
if GROQ_API_KEY:
try:
client = Groq(api_key=GROQ_API_KEY)
logging.info("βœ… Groq client initialized successfully.")
except Exception as e:
st.error(f"❌ Failed to initialize Groq client: {e}")
client = None
else:
st.warning("⚠️ GROQ_API_KEY not found. Please add it to Hugging Face secrets.")
# ---------------- STREAMLIT UI SETUP ----------------
st.set_page_config(
page_title="PDF Assistant",
page_icon="πŸ“˜",
layout="wide",
initial_sidebar_state="expanded"
)
# ---------------- CSS ----------------
st.markdown("""
<style>
/* 1. GLOBAL RESET & SCROLL LOCK */
html, body {
overflow: hidden;
height: 100%;
margin: 0;
}
/* 2. HIDE DEFAULT STREAMLIT ELEMENTS & SIDEBAR TOGGLES */
header[data-testid="stHeader"] {
display: none;
}
footer {
display: none;
}
/* Hide the 'Close Sidebar' (<<) button inside the sidebar */
section[data-testid="stSidebar"] > div > div:first-child {
display: none;
}
/* Hide the 'Open Sidebar' (>) button on main screen */
[data-testid="collapsedControl"] {
display: none;
}
/* 3. SIDEBAR STYLING (INDEPENDENT LEFT PANEL SCROLL) */
[data-testid="stSidebar"] {
position: fixed;
top: 0;
left: 0;
height: 100vh;
width: 20rem;
overflow-y: auto !important;
z-index: 99999;
}
[data-testid="stSidebar"]::-webkit-scrollbar {
width: 6px;
}
[data-testid="stSidebar"]::-webkit-scrollbar-thumb {
background: #2d3748;
border-radius: 3px;
}
/* 4. FIXED HEADER STYLING */
.fixed-header {
position: fixed;
top: 0;
left: 0;
width: 100%;
height: 6rem;
background-color: #0e1117; /* Hardcoded Dark Background */
z-index: 99998;
display: flex;
flex-direction: column;
justify-content: center;
align-items: center;
border-bottom: 1px solid rgba(255, 255, 255, 0.1);
}
/* 5. MAIN CONTENT SCROLLING (INDEPENDENT RIGHT PANEL SCROLL) */
.main .block-container {
margin-top: 6rem;
height: calc(100vh - 6rem);
overflow-y: auto;
padding-top: 1rem;
padding-bottom: 5rem;
}
.main .block-container::-webkit-scrollbar {
width: 8px;
}
.main .block-container::-webkit-scrollbar-thumb {
background: #2d3748;
border-radius: 4px;
}
/* 6. SIDEBAR BUTTON STYLING */
[data-testid="stSidebar"] .stButton button {
width: 100%;
border-radius: 8px;
font-weight: 600;
margin-bottom: 6px;
}
/* 7. HIDE UPLOADED FILE LIST & NAME */
[data-testid='stFileUploaderFile'] {
display: none;
}
section[data-testid="stFileUploader"] ul {
display: none;
}
section[data-testid="stFileUploader"] small {
display: none;
}
/* 8. CHAT BUBBLES */
.chat-user {
background: #2d3748;
padding: 12px;
border-radius: 10px 10px 2px 10px;
margin: 6px 0 6px auto;
max-width: 85%;
text-align: right;
color: #f0f2f6;
}
.chat-bot {
background: #1e3a8a;
padding: 12px;
border-radius: 10px 10px 10px 2px;
margin: 6px auto 6px 0;
max-width: 85%;
text-align: left;
color: #ffffff;
}
.sources {
display: none;
}
/* 9. TITLE TEXT */
/* UPDATED: Added color: #ffffff to ensure visibility on the dark header in Light Mode */
.title-text {
font-size: 2.5rem;
font-weight: 800;
margin: 0;
line-height: 1.2;
color: #ffffff !important;
}
.creator-text {
font-size: 1rem;
font-weight: 500;
color: #cccccc;
}
.creator-text a {
color: #4da6ff;
text-decoration: none;
}
/* 10. INPUT FORM STYLING */
[data-testid="stForm"] {
border: none;
padding: 0;
}
/* --- NEW: FIX FOR CHAT BUTTON IN LIGHT MODE --- */
/* If browser is in light mode, force the chat button to look clean (White bg, Dark text) */
@media (prefers-color-scheme: light) {
[data-testid="stFormSubmitButton"] > button {
background-color: #ffffff !important;
color: #000000 !important;
border: 1px solid #e2e8f0 !important;
}
[data-testid="stFormSubmitButton"] > button:hover {
background-color: #f7fafc !important;
border-color: #cbd5e0 !important;
color: #000000 !important;
}
}
</style>
""", unsafe_allow_html=True)
# ---------------- FIXED HEADER ----------------
st.markdown("""
<div class="fixed-header">
<div class="title-text">πŸ“˜ PDF Assistant</div>
<div class="creator-text">
by <a href="https://www.linkedin.com/in/abhishek-iitr/" target="_blank">Abhishek Saxena</a>
</div>
</div>
""", unsafe_allow_html=True)
# ---------------- SESSION STATE ----------------
if "chat" not in st.session_state:
st.session_state.chat = []
if "vectorstore" not in st.session_state:
st.session_state.vectorstore = None
if "retriever" not in st.session_state:
st.session_state.retriever = None
if "uploaded_file_name" not in st.session_state:
st.session_state.uploaded_file_name = None
if "uploader_key" not in st.session_state:
st.session_state.uploader_key = 0
# ---------------- FUNCTIONS ----------------
def clear_chat_history():
st.session_state.chat = []
def clear_memory():
st.session_state.vectorstore = None
st.session_state.retriever = None
st.session_state.uploaded_file_name = None
st.session_state.uploader_key += 1
gc.collect()
if torch.cuda.is_available():
torch.cuda.empty_cache()
def process_pdf(uploaded_file):
try:
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
tmp.write(uploaded_file.getvalue())
path = tmp.name
loader = PyPDFLoader(path)
docs = loader.load()
splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=60)
chunks = splitter.split_documents(docs)
embeddings = HuggingFaceEmbeddings(
model_name="sentence-transformers/all-MiniLM-L6-v2",
model_kwargs={"device": "cpu"},
encode_kwargs={"normalize_embeddings": True}
)
vectorstore = Chroma.from_documents(chunks, embeddings)
retriever = vectorstore.as_retriever(search_kwargs={"k": 5})
st.session_state.vectorstore = vectorstore
st.session_state.retriever = retriever
if os.path.exists(path):
os.unlink(path)
return len(chunks)
except Exception as e:
st.error(f"Error processing PDF: {str(e)}")
return None
def ask_question(question):
if not client:
return None, 0, "Groq client is not initialized."
if not st.session_state.retriever:
return None, 0, "Upload PDF first."
try:
docs = st.session_state.retriever.invoke(question)
context = "\n\n".join(d.page_content for d in docs)
prompt = f"""You are a strict RAG Q&A assistant.
Use below CONTEXT to answer the below mentioned QUESTION
If the answer is not found, reply: "I cannot find this in the PDF."
CONTEXT = {context}
QUESTION = {question}
Answer on your behalf, write answer in a presentable manner (proper formatting) like point-wise with numbering or bullet points accordingly!"""
response = client.chat.completions.create(
model=GROQ_MODEL,
messages=[
{"role": "system", "content": "Use only the PDF content."},
{"role": "user", "content": prompt}
],
temperature=0.1
)
return response.choices[0].message.content.strip(), len(docs), None
except Exception as e:
return None, 0, f"Error: {str(e)}"
# ---------------- SIDEBAR ----------------
with st.sidebar:
st.write("")
if st.button("πŸ—‘οΈ Clear Chat History", use_container_width=True):
clear_chat_history()
if st.button("πŸ”₯ Clear PDF Memory", on_click=clear_memory, use_container_width=True):
st.success("Memory Cleared!")
st.markdown("---")
upload_label = "βœ… PDF Uploaded!" if st.session_state.uploaded_file_name else "Upload PDF"
uploaded = st.file_uploader(
upload_label, type=["pdf"], key=st.session_state.uploader_key, label_visibility="collapsed"
)
if uploaded:
if uploaded.name != st.session_state.uploaded_file_name:
st.session_state.uploaded_file_name = None
st.session_state.chat = []
with st.spinner(f"Processing '{uploaded.name}'..."):
chunks = process_pdf(uploaded)
if chunks:
st.session_state.uploaded_file_name = uploaded.name
st.success("βœ… PDF Processed!")
else:
st.error("❌ Failed.")
else:
st.success(f"βœ… **Active:** `{uploaded.name}`")
else:
st.warning("⬆️ Upload a PDF to start chatting!")
# ---------------- INPUT AREA ----------------
disabled_input = st.session_state.uploaded_file_name is None or client is None
# Input Form
with st.form(key='chat_form', clear_on_submit=True):
col_input, col_btn = st.columns([0.85, 0.15], gap="small")
with col_input:
user_question = st.text_input(
"Ask a question",
placeholder="Ask a question about the loaded PDF...",
label_visibility="collapsed",
disabled=disabled_input
)
with col_btn:
submit_btn = st.form_submit_button("➀", disabled=disabled_input, use_container_width=True)
if submit_btn and user_question:
st.session_state.chat.append(("user", user_question))
with st.spinner("Thinking..."):
answer, sources, error = ask_question(user_question)
if answer:
bot_msg = answer
st.session_state.chat.append(("bot", bot_msg))
else:
st.session_state.chat.append(("bot", f"πŸ”΄ **Error:** {error}"))
st.rerun()
# ---------------- CHAT HISTORY (REVERSED) ----------------
if st.session_state.chat:
st.markdown("---")
for role, msg in reversed(st.session_state.chat):
if role == "user":
st.markdown(f"<div class='chat-user'>{msg}</div>", unsafe_allow_html=True)
else:
st.markdown(f"<div class='chat-bot'>{msg}</div>", unsafe_allow_html=True)