Spaces:
Running
Running
| import os | |
| import tempfile | |
| import gc | |
| import logging | |
| import streamlit as st | |
| from groq import Groq, APIError | |
| from langchain_community.document_loaders import PyPDFLoader | |
| from langchain_text_splitters import RecursiveCharacterTextSplitter | |
| from langchain_community.embeddings import HuggingFaceEmbeddings | |
| from langchain_community.vectorstores import Chroma | |
| import torch | |
| # ---------------- CONFIGURATION ---------------- | |
| logging.basicConfig(level=logging.INFO) | |
| # Load API key from Hugging Face secrets | |
| GROQ_API_KEY = st.secrets.get("GROQ_API_KEY", os.environ.get("GROQ_API_KEY")) | |
| GROQ_MODEL = "openai/gpt-oss-120b" | |
| # Initialize Groq client | |
| client = None | |
| if GROQ_API_KEY: | |
| try: | |
| client = Groq(api_key=GROQ_API_KEY) | |
| logging.info("β Groq client initialized successfully.") | |
| except Exception as e: | |
| st.error(f"β Failed to initialize Groq client: {e}") | |
| client = None | |
| else: | |
| st.warning("β οΈ GROQ_API_KEY not found. Please add it to Hugging Face secrets.") | |
| # ---------------- STREAMLIT UI SETUP ---------------- | |
| st.set_page_config( | |
| page_title="PDF Assistant", | |
| page_icon="π", | |
| layout="wide", | |
| initial_sidebar_state="expanded" | |
| ) | |
| # ---------------- CSS ---------------- | |
| st.markdown(""" | |
| <style> | |
| /* 1. GLOBAL RESET & SCROLL LOCK */ | |
| html, body { | |
| overflow: hidden; | |
| height: 100%; | |
| margin: 0; | |
| } | |
| /* 2. HIDE DEFAULT STREAMLIT ELEMENTS & SIDEBAR TOGGLES */ | |
| header[data-testid="stHeader"] { | |
| display: none; | |
| } | |
| footer { | |
| display: none; | |
| } | |
| /* Hide the 'Close Sidebar' (<<) button inside the sidebar */ | |
| section[data-testid="stSidebar"] > div > div:first-child { | |
| display: none; | |
| } | |
| /* Hide the 'Open Sidebar' (>) button on main screen */ | |
| [data-testid="collapsedControl"] { | |
| display: none; | |
| } | |
| /* 3. SIDEBAR STYLING (INDEPENDENT LEFT PANEL SCROLL) */ | |
| [data-testid="stSidebar"] { | |
| position: fixed; | |
| top: 0; | |
| left: 0; | |
| height: 100vh; | |
| width: 20rem; | |
| overflow-y: auto !important; | |
| z-index: 99999; | |
| } | |
| [data-testid="stSidebar"]::-webkit-scrollbar { | |
| width: 6px; | |
| } | |
| [data-testid="stSidebar"]::-webkit-scrollbar-thumb { | |
| background: #2d3748; | |
| border-radius: 3px; | |
| } | |
| /* 4. FIXED HEADER STYLING */ | |
| .fixed-header { | |
| position: fixed; | |
| top: 0; | |
| left: 0; | |
| width: 100%; | |
| height: 6rem; | |
| background-color: #0e1117; /* Hardcoded Dark Background */ | |
| z-index: 99998; | |
| display: flex; | |
| flex-direction: column; | |
| justify-content: center; | |
| align-items: center; | |
| border-bottom: 1px solid rgba(255, 255, 255, 0.1); | |
| } | |
| /* 5. MAIN CONTENT SCROLLING (INDEPENDENT RIGHT PANEL SCROLL) */ | |
| .main .block-container { | |
| margin-top: 6rem; | |
| height: calc(100vh - 6rem); | |
| overflow-y: auto; | |
| padding-top: 1rem; | |
| padding-bottom: 5rem; | |
| } | |
| .main .block-container::-webkit-scrollbar { | |
| width: 8px; | |
| } | |
| .main .block-container::-webkit-scrollbar-thumb { | |
| background: #2d3748; | |
| border-radius: 4px; | |
| } | |
| /* 6. SIDEBAR BUTTON STYLING */ | |
| [data-testid="stSidebar"] .stButton button { | |
| width: 100%; | |
| border-radius: 8px; | |
| font-weight: 600; | |
| margin-bottom: 6px; | |
| } | |
| /* 7. HIDE UPLOADED FILE LIST & NAME */ | |
| [data-testid='stFileUploaderFile'] { | |
| display: none; | |
| } | |
| section[data-testid="stFileUploader"] ul { | |
| display: none; | |
| } | |
| section[data-testid="stFileUploader"] small { | |
| display: none; | |
| } | |
| /* 8. CHAT BUBBLES */ | |
| .chat-user { | |
| background: #2d3748; | |
| padding: 12px; | |
| border-radius: 10px 10px 2px 10px; | |
| margin: 6px 0 6px auto; | |
| max-width: 85%; | |
| text-align: right; | |
| color: #f0f2f6; | |
| } | |
| .chat-bot { | |
| background: #1e3a8a; | |
| padding: 12px; | |
| border-radius: 10px 10px 10px 2px; | |
| margin: 6px auto 6px 0; | |
| max-width: 85%; | |
| text-align: left; | |
| color: #ffffff; | |
| } | |
| .sources { | |
| display: none; | |
| } | |
| /* 9. TITLE TEXT */ | |
| /* UPDATED: Added color: #ffffff to ensure visibility on the dark header in Light Mode */ | |
| .title-text { | |
| font-size: 2.5rem; | |
| font-weight: 800; | |
| margin: 0; | |
| line-height: 1.2; | |
| color: #ffffff !important; | |
| } | |
| .creator-text { | |
| font-size: 1rem; | |
| font-weight: 500; | |
| color: #cccccc; | |
| } | |
| .creator-text a { | |
| color: #4da6ff; | |
| text-decoration: none; | |
| } | |
| /* 10. INPUT FORM STYLING */ | |
| [data-testid="stForm"] { | |
| border: none; | |
| padding: 0; | |
| } | |
| /* --- NEW: FIX FOR CHAT BUTTON IN LIGHT MODE --- */ | |
| /* If browser is in light mode, force the chat button to look clean (White bg, Dark text) */ | |
| @media (prefers-color-scheme: light) { | |
| [data-testid="stFormSubmitButton"] > button { | |
| background-color: #ffffff !important; | |
| color: #000000 !important; | |
| border: 1px solid #e2e8f0 !important; | |
| } | |
| [data-testid="stFormSubmitButton"] > button:hover { | |
| background-color: #f7fafc !important; | |
| border-color: #cbd5e0 !important; | |
| color: #000000 !important; | |
| } | |
| } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| # ---------------- FIXED HEADER ---------------- | |
| st.markdown(""" | |
| <div class="fixed-header"> | |
| <div class="title-text">π PDF Assistant</div> | |
| <div class="creator-text"> | |
| by <a href="https://www.linkedin.com/in/abhishek-iitr/" target="_blank">Abhishek Saxena</a> | |
| </div> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| # ---------------- SESSION STATE ---------------- | |
| if "chat" not in st.session_state: | |
| st.session_state.chat = [] | |
| if "vectorstore" not in st.session_state: | |
| st.session_state.vectorstore = None | |
| if "retriever" not in st.session_state: | |
| st.session_state.retriever = None | |
| if "uploaded_file_name" not in st.session_state: | |
| st.session_state.uploaded_file_name = None | |
| if "uploader_key" not in st.session_state: | |
| st.session_state.uploader_key = 0 | |
| # ---------------- FUNCTIONS ---------------- | |
| def clear_chat_history(): | |
| st.session_state.chat = [] | |
| def clear_memory(): | |
| st.session_state.vectorstore = None | |
| st.session_state.retriever = None | |
| st.session_state.uploaded_file_name = None | |
| st.session_state.uploader_key += 1 | |
| gc.collect() | |
| if torch.cuda.is_available(): | |
| torch.cuda.empty_cache() | |
| def process_pdf(uploaded_file): | |
| try: | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp: | |
| tmp.write(uploaded_file.getvalue()) | |
| path = tmp.name | |
| loader = PyPDFLoader(path) | |
| docs = loader.load() | |
| splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=60) | |
| chunks = splitter.split_documents(docs) | |
| embeddings = HuggingFaceEmbeddings( | |
| model_name="sentence-transformers/all-MiniLM-L6-v2", | |
| model_kwargs={"device": "cpu"}, | |
| encode_kwargs={"normalize_embeddings": True} | |
| ) | |
| vectorstore = Chroma.from_documents(chunks, embeddings) | |
| retriever = vectorstore.as_retriever(search_kwargs={"k": 5}) | |
| st.session_state.vectorstore = vectorstore | |
| st.session_state.retriever = retriever | |
| if os.path.exists(path): | |
| os.unlink(path) | |
| return len(chunks) | |
| except Exception as e: | |
| st.error(f"Error processing PDF: {str(e)}") | |
| return None | |
| def ask_question(question): | |
| if not client: | |
| return None, 0, "Groq client is not initialized." | |
| if not st.session_state.retriever: | |
| return None, 0, "Upload PDF first." | |
| try: | |
| docs = st.session_state.retriever.invoke(question) | |
| context = "\n\n".join(d.page_content for d in docs) | |
| prompt = f"""You are a strict RAG Q&A assistant. | |
| Use below CONTEXT to answer the below mentioned QUESTION | |
| If the answer is not found, reply: "I cannot find this in the PDF." | |
| CONTEXT = {context} | |
| QUESTION = {question} | |
| Answer on your behalf, write answer in a presentable manner (proper formatting) like point-wise with numbering or bullet points accordingly!""" | |
| response = client.chat.completions.create( | |
| model=GROQ_MODEL, | |
| messages=[ | |
| {"role": "system", "content": "Use only the PDF content."}, | |
| {"role": "user", "content": prompt} | |
| ], | |
| temperature=0.1 | |
| ) | |
| return response.choices[0].message.content.strip(), len(docs), None | |
| except Exception as e: | |
| return None, 0, f"Error: {str(e)}" | |
| # ---------------- SIDEBAR ---------------- | |
| with st.sidebar: | |
| st.write("") | |
| if st.button("ποΈ Clear Chat History", use_container_width=True): | |
| clear_chat_history() | |
| if st.button("π₯ Clear PDF Memory", on_click=clear_memory, use_container_width=True): | |
| st.success("Memory Cleared!") | |
| st.markdown("---") | |
| upload_label = "β PDF Uploaded!" if st.session_state.uploaded_file_name else "Upload PDF" | |
| uploaded = st.file_uploader( | |
| upload_label, type=["pdf"], key=st.session_state.uploader_key, label_visibility="collapsed" | |
| ) | |
| if uploaded: | |
| if uploaded.name != st.session_state.uploaded_file_name: | |
| st.session_state.uploaded_file_name = None | |
| st.session_state.chat = [] | |
| with st.spinner(f"Processing '{uploaded.name}'..."): | |
| chunks = process_pdf(uploaded) | |
| if chunks: | |
| st.session_state.uploaded_file_name = uploaded.name | |
| st.success("β PDF Processed!") | |
| else: | |
| st.error("β Failed.") | |
| else: | |
| st.success(f"β **Active:** `{uploaded.name}`") | |
| else: | |
| st.warning("β¬οΈ Upload a PDF to start chatting!") | |
| # ---------------- INPUT AREA ---------------- | |
| disabled_input = st.session_state.uploaded_file_name is None or client is None | |
| # Input Form | |
| with st.form(key='chat_form', clear_on_submit=True): | |
| col_input, col_btn = st.columns([0.85, 0.15], gap="small") | |
| with col_input: | |
| user_question = st.text_input( | |
| "Ask a question", | |
| placeholder="Ask a question about the loaded PDF...", | |
| label_visibility="collapsed", | |
| disabled=disabled_input | |
| ) | |
| with col_btn: | |
| submit_btn = st.form_submit_button("β€", disabled=disabled_input, use_container_width=True) | |
| if submit_btn and user_question: | |
| st.session_state.chat.append(("user", user_question)) | |
| with st.spinner("Thinking..."): | |
| answer, sources, error = ask_question(user_question) | |
| if answer: | |
| bot_msg = answer | |
| st.session_state.chat.append(("bot", bot_msg)) | |
| else: | |
| st.session_state.chat.append(("bot", f"π΄ **Error:** {error}")) | |
| st.rerun() | |
| # ---------------- CHAT HISTORY (REVERSED) ---------------- | |
| if st.session_state.chat: | |
| st.markdown("---") | |
| for role, msg in reversed(st.session_state.chat): | |
| if role == "user": | |
| st.markdown(f"<div class='chat-user'>{msg}</div>", unsafe_allow_html=True) | |
| else: | |
| st.markdown(f"<div class='chat-bot'>{msg}</div>", unsafe_allow_html=True) |