Spaces:
Running
Running
File size: 10,876 Bytes
bf8b348 cc598bb 58ee917 cc598bb 58ee917 637d303 cc7258d 58ee917 cc598bb a1ecf7f cc598bb 58ee917 b16e940 58ee917 b16e940 58ee917 f617d1a 58ee917 748fa2f 020af46 58ee917 f9960c7 3055cf6 58ee917 3055cf6 748fa2f 58ee917 3055cf6 58ee917 b16e940 748fa2f 58ee917 3055cf6 b16e940 58ee917 b16e940 58ee917 921958d 58ee917 748fa2f 58ee917 3055cf6 8574ab5 3777b25 921958d b16e940 b3cb16f 58ee917 6a6c714 06f5408 58ee917 b16e940 748fa2f 58ee917 06f5408 58ee917 b16e940 71d2c45 58ee917 71d2c45 58ee917 748fa2f 58ee917 b16e940 58ee917 3055cf6 748fa2f 58ee917 b16e940 58ee917 748fa2f 58ee917 3055cf6 f617d1a b16e940 f617d1a 58ee917 3055cf6 b16e940 f617d1a 58ee917 b6b9378 58ee917 b16e940 748fa2f 58ee917 b3cb16f 58ee917 b3cb16f b16e940 58ee917 748fa2f 58ee917 e03f9c9 b3cb16f 0eb63e4 8601c8e 58e21db 58ee917 3777b25 8574ab5 58ee917 8574ab5 02f4e29 58ee917 f617d1a 58e21db 8601c8e 58ee917 f617d1a 02f4e29 58ee917 cc598bb 02f4e29 cc598bb 58ee917 28a8426 748fa2f 58ee917 748fa2f 7250f7b 58ee917 28a8426 58ee917 8601c8e 7d0de58 8601c8e 58ee917 b16e940 58ee917 748fa2f f617d1a 58ee917 86a71ff 58ee917 86a71ff 748fa2f 58ee917 f617d1a 58ee917 06f5408 58ee917 f617d1a 58ee917 f617d1a 58ee917 f617d1a 58ee917 86a71ff 58ee917 6a6c714 8601c8e 58ee917 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 |
import os
import tempfile
import gc
import logging
import streamlit as st
from groq import Groq, APIError
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
import torch
# ---------------- CONFIGURATION ----------------
logging.basicConfig(level=logging.INFO)
# Load API key from Hugging Face secrets
GROQ_API_KEY = st.secrets.get("GROQ_API_KEY", os.environ.get("GROQ_API_KEY"))
GROQ_MODEL = "openai/gpt-oss-120b"
# Initialize Groq client
client = None
if GROQ_API_KEY:
try:
client = Groq(api_key=GROQ_API_KEY)
logging.info("β
Groq client initialized successfully.")
except Exception as e:
st.error(f"β Failed to initialize Groq client: {e}")
client = None
else:
st.warning("β οΈ GROQ_API_KEY not found. Please add it to Hugging Face secrets.")
# ---------------- STREAMLIT UI SETUP ----------------
st.set_page_config(
page_title="PDF Assistant",
page_icon="π",
layout="wide",
initial_sidebar_state="expanded"
)
# ---------------- CSS ----------------
st.markdown("""
<style>
/* 1. GLOBAL RESET & SCROLL LOCK */
html, body {
overflow: hidden;
height: 100%;
margin: 0;
}
/* 2. HIDE DEFAULT STREAMLIT ELEMENTS & SIDEBAR TOGGLES */
header[data-testid="stHeader"] {
display: none;
}
footer {
display: none;
}
/* Hide the 'Close Sidebar' (<<) button inside the sidebar */
section[data-testid="stSidebar"] > div > div:first-child {
display: none;
}
/* Hide the 'Open Sidebar' (>) button on main screen */
[data-testid="collapsedControl"] {
display: none;
}
/* 3. SIDEBAR STYLING (INDEPENDENT LEFT PANEL SCROLL) */
[data-testid="stSidebar"] {
position: fixed;
top: 0;
left: 0;
height: 100vh;
width: 20rem;
overflow-y: auto !important;
z-index: 99999;
}
[data-testid="stSidebar"]::-webkit-scrollbar {
width: 6px;
}
[data-testid="stSidebar"]::-webkit-scrollbar-thumb {
background: #2d3748;
border-radius: 3px;
}
/* 4. FIXED HEADER STYLING */
.fixed-header {
position: fixed;
top: 0;
left: 0;
width: 100%;
height: 6rem;
background-color: #0e1117; /* Hardcoded Dark Background */
z-index: 99998;
display: flex;
flex-direction: column;
justify-content: center;
align-items: center;
border-bottom: 1px solid rgba(255, 255, 255, 0.1);
}
/* 5. MAIN CONTENT SCROLLING (INDEPENDENT RIGHT PANEL SCROLL) */
.main .block-container {
margin-top: 6rem;
height: calc(100vh - 6rem);
overflow-y: auto;
padding-top: 1rem;
padding-bottom: 5rem;
}
.main .block-container::-webkit-scrollbar {
width: 8px;
}
.main .block-container::-webkit-scrollbar-thumb {
background: #2d3748;
border-radius: 4px;
}
/* 6. SIDEBAR BUTTON STYLING */
[data-testid="stSidebar"] .stButton button {
width: 100%;
border-radius: 8px;
font-weight: 600;
margin-bottom: 6px;
}
/* 7. HIDE UPLOADED FILE LIST & NAME */
[data-testid='stFileUploaderFile'] {
display: none;
}
section[data-testid="stFileUploader"] ul {
display: none;
}
section[data-testid="stFileUploader"] small {
display: none;
}
/* 8. CHAT BUBBLES */
.chat-user {
background: #2d3748;
padding: 12px;
border-radius: 10px 10px 2px 10px;
margin: 6px 0 6px auto;
max-width: 85%;
text-align: right;
color: #f0f2f6;
}
.chat-bot {
background: #1e3a8a;
padding: 12px;
border-radius: 10px 10px 10px 2px;
margin: 6px auto 6px 0;
max-width: 85%;
text-align: left;
color: #ffffff;
}
.sources {
display: none;
}
/* 9. TITLE TEXT */
/* UPDATED: Added color: #ffffff to ensure visibility on the dark header in Light Mode */
.title-text {
font-size: 2.5rem;
font-weight: 800;
margin: 0;
line-height: 1.2;
color: #ffffff !important;
}
.creator-text {
font-size: 1rem;
font-weight: 500;
color: #cccccc;
}
.creator-text a {
color: #4da6ff;
text-decoration: none;
}
/* 10. INPUT FORM STYLING */
[data-testid="stForm"] {
border: none;
padding: 0;
}
/* --- NEW: FIX FOR CHAT BUTTON IN LIGHT MODE --- */
/* If browser is in light mode, force the chat button to look clean (White bg, Dark text) */
@media (prefers-color-scheme: light) {
[data-testid="stFormSubmitButton"] > button {
background-color: #ffffff !important;
color: #000000 !important;
border: 1px solid #e2e8f0 !important;
}
[data-testid="stFormSubmitButton"] > button:hover {
background-color: #f7fafc !important;
border-color: #cbd5e0 !important;
color: #000000 !important;
}
}
</style>
""", unsafe_allow_html=True)
# ---------------- FIXED HEADER ----------------
st.markdown("""
<div class="fixed-header">
<div class="title-text">π PDF Assistant</div>
<div class="creator-text">
by <a href="https://www.linkedin.com/in/abhishek-iitr/" target="_blank">Abhishek Saxena</a>
</div>
</div>
""", unsafe_allow_html=True)
# ---------------- SESSION STATE ----------------
if "chat" not in st.session_state:
st.session_state.chat = []
if "vectorstore" not in st.session_state:
st.session_state.vectorstore = None
if "retriever" not in st.session_state:
st.session_state.retriever = None
if "uploaded_file_name" not in st.session_state:
st.session_state.uploaded_file_name = None
if "uploader_key" not in st.session_state:
st.session_state.uploader_key = 0
# ---------------- FUNCTIONS ----------------
def clear_chat_history():
st.session_state.chat = []
def clear_memory():
st.session_state.vectorstore = None
st.session_state.retriever = None
st.session_state.uploaded_file_name = None
st.session_state.uploader_key += 1
gc.collect()
if torch.cuda.is_available():
torch.cuda.empty_cache()
def process_pdf(uploaded_file):
try:
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
tmp.write(uploaded_file.getvalue())
path = tmp.name
loader = PyPDFLoader(path)
docs = loader.load()
splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=60)
chunks = splitter.split_documents(docs)
embeddings = HuggingFaceEmbeddings(
model_name="sentence-transformers/all-MiniLM-L6-v2",
model_kwargs={"device": "cpu"},
encode_kwargs={"normalize_embeddings": True}
)
vectorstore = Chroma.from_documents(chunks, embeddings)
retriever = vectorstore.as_retriever(search_kwargs={"k": 5})
st.session_state.vectorstore = vectorstore
st.session_state.retriever = retriever
if os.path.exists(path):
os.unlink(path)
return len(chunks)
except Exception as e:
st.error(f"Error processing PDF: {str(e)}")
return None
def ask_question(question):
if not client:
return None, 0, "Groq client is not initialized."
if not st.session_state.retriever:
return None, 0, "Upload PDF first."
try:
docs = st.session_state.retriever.invoke(question)
context = "\n\n".join(d.page_content for d in docs)
prompt = f"""You are a strict RAG Q&A assistant who answers only from user's input PDF.
Use only below CONTEXT to answer the below mentioned QUESTION
If the answer is not found, reply: "I cannot find this in the PDF."
CONTEXT = {context}
QUESTION = {question}
Answer on your behalf, write answer in a presentable manner (proper formatting) like point-wise with numbering or bullet points accordingly!"""
response = client.chat.completions.create(
model=GROQ_MODEL,
messages=[
{"role": "system", "content": "Use only the PDF content."},
{"role": "user", "content": prompt}
],
temperature=0.0
)
return response.choices[0].message.content.strip(), len(docs), None
except Exception as e:
return None, 0, f"Error: {str(e)}"
# ---------------- SIDEBAR ----------------
with st.sidebar:
st.write("")
if st.button("ποΈ Clear Chat History", use_container_width=True):
clear_chat_history()
if st.button("π₯ Clear PDF Memory", on_click=clear_memory, use_container_width=True):
st.success("Memory Cleared!")
st.markdown("---")
upload_label = "β
PDF Uploaded!" if st.session_state.uploaded_file_name else "Upload PDF"
uploaded = st.file_uploader(
upload_label, type=["pdf"], key=st.session_state.uploader_key, label_visibility="collapsed"
)
if uploaded:
if uploaded.name != st.session_state.uploaded_file_name:
st.session_state.uploaded_file_name = None
st.session_state.chat = []
with st.spinner(f"Processing '{uploaded.name}'..."):
chunks = process_pdf(uploaded)
if chunks:
st.session_state.uploaded_file_name = uploaded.name
st.success("β
PDF Processed!")
else:
st.error("β Failed.")
else:
st.success(f"β
**Active:** `{uploaded.name}`")
else:
st.warning("β¬οΈ Upload a PDF to start chatting!")
# ---------------- INPUT AREA ----------------
disabled_input = st.session_state.uploaded_file_name is None or client is None
# Input Form
with st.form(key='chat_form', clear_on_submit=True):
col_input, col_btn = st.columns([0.85, 0.15], gap="small")
with col_input:
user_question = st.text_input(
"Ask a question",
placeholder="Ask a question about the loaded PDF...",
label_visibility="collapsed",
disabled=disabled_input
)
with col_btn:
submit_btn = st.form_submit_button("β€", disabled=disabled_input, use_container_width=True)
if submit_btn and user_question:
st.session_state.chat.append(("user", user_question))
with st.spinner("Thinking..."):
answer, sources, error = ask_question(user_question)
if answer:
bot_msg = answer
st.session_state.chat.append(("bot", bot_msg))
else:
st.session_state.chat.append(("bot", f"π΄ **Error:** {error}"))
st.rerun()
# ---------------- CHAT HISTORY (REVERSED) ----------------
if st.session_state.chat:
st.markdown("---")
for role, msg in reversed(st.session_state.chat):
if role == "user":
st.markdown(f"<div class='chat-user'>{msg}</div>", unsafe_allow_html=True)
else:
st.markdown(f"<div class='chat-bot'>{msg}</div>", unsafe_allow_html=True) |