absiitr commited on
Commit
cc598bb
Β·
verified Β·
1 Parent(s): b736ab4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +163 -105
app.py CHANGED
@@ -1,40 +1,40 @@
1
- import streamlit as st
2
- import requests
3
  import os
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
- # ============== BACKEND URL CONFIGURATION ==============
6
- # For Hugging Face deployment
7
- import socket
8
-
9
- def get_backend_url():
10
- """Determine backend URL based on environment."""
11
- # Check if we're on Hugging Face
12
- if os.environ.get("SPACE_ID"):
13
- # On Hugging Face, backend is on localhost:8000
14
- return "http://localhost:8000"
15
- else:
16
- # Local development
17
- return "http://localhost:8000"
18
-
19
- BACKEND_URL = get_backend_url()
20
- # ======================================================
21
-
22
  st.set_page_config(page_title="PDF Assistant", page_icon="πŸ“˜", layout="wide")
23
 
24
- # Test backend connection on startup
25
- try:
26
- health_check = requests.get(f"{BACKEND_URL}/health", timeout=2)
27
- if health_check.status_code == 200:
28
- st.sidebar.success("βœ… Backend connected")
29
- else:
30
- st.sidebar.warning(f"⚠️ Backend status: {health_check.status_code}")
31
- except:
32
- st.sidebar.error("❌ Cannot connect to backend")
33
-
34
- # ---------------- CSS (Dark Theme) ----------------
35
  st.markdown("""
36
  <style>
37
- /* Streamlit standard setup for dark theme adherence */
38
  :root {
39
  --primary-color: #1e3a8a;
40
  --background-color: #0e1117;
@@ -42,7 +42,6 @@ st.markdown("""
42
  --text-color: #f0f2f6;
43
  }
44
 
45
- /* Custom Chat Bubbles */
46
  .chat-user {
47
  background: #2d3748;
48
  padding: 12px;
@@ -62,7 +61,6 @@ st.markdown("""
62
  color: #ffffff;
63
  }
64
 
65
- /* Sources section styling */
66
  .sources {
67
  font-size: 0.8em;
68
  opacity: 0.7;
@@ -71,7 +69,6 @@ st.markdown("""
71
  padding-top: 5px;
72
  }
73
 
74
- /* Footer styling */
75
  .footer {
76
  position: fixed;
77
  left: 0;
@@ -92,13 +89,6 @@ st.markdown("""
92
  .footer a:hover {
93
  text-decoration: underline;
94
  }
95
-
96
- /* Debug info */
97
- .debug-info {
98
- font-size: 0.7em;
99
- color: #888;
100
- margin-top: 5px;
101
- }
102
  </style>
103
  """, unsafe_allow_html=True)
104
 
@@ -106,47 +96,138 @@ st.markdown("""
106
  if "chat" not in st.session_state:
107
  st.session_state.chat = []
108
 
 
 
 
 
 
 
109
  if "uploaded_file_name" not in st.session_state:
110
  st.session_state.uploaded_file_name = None
111
 
112
  if "uploader_key" not in st.session_state:
113
  st.session_state.uploader_key = 0
114
 
115
- # Show backend URL for debugging
116
- st.sidebar.markdown(f"**Backend URL:** `{BACKEND_URL}`", unsafe_allow_html=True)
117
-
118
  # ---------------- FUNCTIONS ----------------
119
  def clear_chat_history():
120
  st.session_state.chat = []
121
 
122
  def clear_memory():
 
 
 
 
 
 
 
 
 
 
 
123
  try:
124
- res = requests.post(f"{BACKEND_URL}/clear")
125
- if res.status_code == 200:
126
- st.session_state.uploaded_file_name = None
127
- st.session_state.uploader_key += 1
128
- st.success("Memory cleared. Please upload a new PDF.")
129
- else:
130
- st.error(f"Failed to clear memory: {res.json().get('detail', 'Unknown error')}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
  except Exception as e:
132
- st.error(f"Error clearing memory: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
 
134
- # ---------------- SIDEBAR CONTROLS ----------------
 
 
 
135
  with st.sidebar:
136
  st.header("Controls")
137
  st.button("πŸ—‘οΈ Clear Chat History", on_click=clear_chat_history, use_container_width=True)
138
  st.button("πŸ”₯ Clear PDF Memory", on_click=clear_memory, use_container_width=True)
139
-
140
  st.markdown("---")
141
  if st.session_state.uploaded_file_name:
142
  st.success(f"βœ… **Active PDF:**\n `{st.session_state.uploaded_file_name}`")
143
  else:
144
  st.warning("⬆️ Upload a PDF to start chatting!")
145
 
146
- # ---------------- MAIN APP ----------------
147
- st.title("πŸ“˜ PDF Assistant")
148
-
149
- # ---------------- UPLOAD PDF ----------------
150
  uploaded = st.file_uploader(
151
  "Upload your PDF",
152
  type=["pdf"],
@@ -156,33 +237,21 @@ uploaded = st.file_uploader(
156
  if uploaded and uploaded.name != st.session_state.uploaded_file_name:
157
  st.session_state.uploaded_file_name = None
158
  st.session_state.chat = []
159
-
160
  with st.spinner(f"Processing '{uploaded.name}'..."):
161
- try:
162
- files = {"file": (uploaded.name, uploaded.getvalue(), "application/pdf")}
163
- res = requests.post(f"{BACKEND_URL}/upload", files=files)
164
-
165
- if res.status_code == 200:
166
- chunks = res.json().get("chunks", 0)
167
- st.success(f"βœ… PDF processed successfully! {chunks} chunks created.")
168
- st.session_state.uploaded_file_name = uploaded.name
169
- st.markdown(f'<div class="debug-info">Backend: {BACKEND_URL}, Status: {res.status_code}</div>', unsafe_allow_html=True)
170
- else:
171
- error_msg = res.json().get("detail", f"HTTP {res.status_code}")
172
- st.error(f"❌ Upload failed: {error_msg}")
173
- st.session_state.uploaded_file_name = None
174
-
175
- except requests.exceptions.ConnectionError:
176
- st.error(f"πŸ”Œ Could not connect to backend at {BACKEND_URL}")
177
- st.session_state.uploaded_file_name = None
178
- except Exception as e:
179
- st.error(f"⚠️ Unexpected error: {str(e)}")
180
  st.session_state.uploaded_file_name = None
181
-
182
  st.rerun()
183
 
184
- # ---------------- CHAT INPUT ----------------
185
- disabled_input = st.session_state.uploaded_file_name is None
186
  question = st.text_input(
187
  "Ask a question about the loaded PDF:",
188
  key="question_input",
@@ -190,33 +259,22 @@ question = st.text_input(
190
  )
191
 
192
  if st.button("Send", disabled=disabled_input) and question:
 
193
  st.session_state.chat.append(("user", question))
194
-
 
195
  with st.spinner("Thinking..."):
196
- try:
197
- res = requests.post(f"{BACKEND_URL}/ask", json={"question": question})
198
-
199
- if res.status_code == 200:
200
- data = res.json()
201
- answer = data.get("answer", "No answer provided.")
202
- sources = data.get("sources", 0)
203
-
204
- bot_message = f"{answer}<div class='sources'>Context Chunks Used: {sources}</div>"
205
- st.session_state.chat.append(("bot", bot_message))
206
- st.markdown(f'<div class="debug-info">Backend: {BACKEND_URL}, Status: {res.status_code}</div>', unsafe_allow_html=True)
207
-
208
- else:
209
- error_detail = res.json().get("detail", f"HTTP {res.status_code}")
210
- st.session_state.chat.append(("bot", f"πŸ”΄ **Error:** {error_detail}"))
211
-
212
- except requests.exceptions.ConnectionError:
213
- st.session_state.chat.append(("bot", f"πŸ”΄ **Error:** Could not connect to backend at {BACKEND_URL}"))
214
- except Exception as e:
215
- st.session_state.chat.append(("bot", f"πŸ”΄ **Unexpected error:** {str(e)}"))
216
-
217
  st.rerun()
218
 
219
- # ---------------- SHOW CHAT HISTORY ----------------
220
  st.markdown("## Chat History")
221
  for role, msg in st.session_state.chat:
222
  if role == "user":
@@ -224,7 +282,7 @@ for role, msg in st.session_state.chat:
224
  else:
225
  st.markdown(f"<div class='chat-bot'>{msg}</div>", unsafe_allow_html=True)
226
 
227
- # ---------------- FOOTER ----------------
228
  footer_html = """
229
  <div class="footer">
230
  Created by <a href="https://www.linkedin.com/in/abhishek-iitr/" target="_blank">Abhishek Saxena</a>
 
 
 
1
  import os
2
+ import tempfile
3
+ import gc
4
+ import logging
5
+ import streamlit as st
6
+ from groq import Groq, APIError
7
+ from langchain_community.document_loaders import PyPDFLoader
8
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
9
+ from langchain_community.embeddings import HuggingFaceEmbeddings
10
+ from langchain_community.vectorstores import Chroma
11
+ import torch
12
+
13
+ # ---------------- CONFIGURATION ----------------
14
+ logging.basicConfig(level=logging.INFO)
15
+
16
+ # Load API key from Hugging Face secrets
17
+ GROQ_API_KEY = st.secrets.get("GROQ_API_KEY", os.environ.get("GROQ_API_KEY"))
18
+ GROQ_MODEL = "llama-3.1-8b-instant"
19
+
20
+ # Initialize Groq client
21
+ client = None
22
+ if GROQ_API_KEY:
23
+ try:
24
+ client = Groq(api_key=GROQ_API_KEY)
25
+ st.success("βœ… Groq client initialized successfully.")
26
+ except Exception as e:
27
+ st.error(f"❌ Failed to initialize Groq client: {e}")
28
+ client = None
29
+ else:
30
+ st.warning("⚠️ GROQ_API_KEY not found. Please add it to Hugging Face secrets.")
31
 
32
+ # ---------------- STREAMLIT UI SETUP ----------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  st.set_page_config(page_title="PDF Assistant", page_icon="πŸ“˜", layout="wide")
34
 
35
+ # ---------------- CSS (Your exact UI) ----------------
 
 
 
 
 
 
 
 
 
 
36
  st.markdown("""
37
  <style>
 
38
  :root {
39
  --primary-color: #1e3a8a;
40
  --background-color: #0e1117;
 
42
  --text-color: #f0f2f6;
43
  }
44
 
 
45
  .chat-user {
46
  background: #2d3748;
47
  padding: 12px;
 
61
  color: #ffffff;
62
  }
63
 
 
64
  .sources {
65
  font-size: 0.8em;
66
  opacity: 0.7;
 
69
  padding-top: 5px;
70
  }
71
 
 
72
  .footer {
73
  position: fixed;
74
  left: 0;
 
89
  .footer a:hover {
90
  text-decoration: underline;
91
  }
 
 
 
 
 
 
 
92
  </style>
93
  """, unsafe_allow_html=True)
94
 
 
96
  if "chat" not in st.session_state:
97
  st.session_state.chat = []
98
 
99
+ if "vectorstore" not in st.session_state:
100
+ st.session_state.vectorstore = None
101
+
102
+ if "retriever" not in st.session_state:
103
+ st.session_state.retriever = None
104
+
105
  if "uploaded_file_name" not in st.session_state:
106
  st.session_state.uploaded_file_name = None
107
 
108
  if "uploader_key" not in st.session_state:
109
  st.session_state.uploader_key = 0
110
 
 
 
 
111
  # ---------------- FUNCTIONS ----------------
112
  def clear_chat_history():
113
  st.session_state.chat = []
114
 
115
  def clear_memory():
116
+ st.session_state.vectorstore = None
117
+ st.session_state.retriever = None
118
+ st.session_state.uploaded_file_name = None
119
+ st.session_state.uploader_key += 1
120
+ gc.collect()
121
+ if torch.cuda.is_available():
122
+ torch.cuda.empty_cache()
123
+ st.success("Memory cleared. Please upload a new PDF.")
124
+
125
+ def process_pdf(uploaded_file):
126
+ """Process uploaded PDF and create vectorstore."""
127
  try:
128
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
129
+ tmp.write(uploaded_file.getvalue())
130
+ path = tmp.name
131
+
132
+ # Load PDF
133
+ loader = PyPDFLoader(path)
134
+ docs = loader.load()
135
+
136
+ # Split into chunks
137
+ splitter = RecursiveCharacterTextSplitter(
138
+ chunk_size=800,
139
+ chunk_overlap=50
140
+ )
141
+ chunks = splitter.split_documents(docs)
142
+
143
+ # Create embeddings
144
+ embeddings = HuggingFaceEmbeddings(
145
+ model_name="sentence-transformers/all-MiniLM-L6-v2",
146
+ model_kwargs={"device": "cpu"},
147
+ encode_kwargs={"normalize_embeddings": True}
148
+ )
149
+
150
+ # Create vectorstore
151
+ vectorstore = Chroma.from_documents(chunks, embeddings)
152
+ retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
153
+
154
+ # Store in session state
155
+ st.session_state.vectorstore = vectorstore
156
+ st.session_state.retriever = retriever
157
+
158
+ # Cleanup
159
+ if os.path.exists(path):
160
+ os.unlink(path)
161
+
162
+ return len(chunks)
163
+
164
  except Exception as e:
165
+ st.error(f"Error processing PDF: {str(e)}")
166
+ return None
167
+
168
+ def ask_question(question):
169
+ """Retrieve and generate answer for the question."""
170
+ if not client:
171
+ return None, 0, "Groq client is not initialized. Check API key setup."
172
+
173
+ if not st.session_state.retriever:
174
+ return None, 0, "Upload PDF first to initialize the knowledge base."
175
+
176
+ try:
177
+ # Retrieve relevant chunks
178
+ docs = st.session_state.retriever.invoke(question)
179
+ context = "\n\n".join(d.page_content for d in docs)
180
+
181
+ # Build prompt
182
+ prompt = f"""
183
+ You are a strict RAG Q&A assistant.
184
+ Use ONLY the context provided. If the answer is not found, reply:
185
+ "I cannot find this in the PDF."
186
+
187
+ ---------------- CONTEXT ----------------
188
+ {context}
189
+ -----------------------------------------
190
+
191
+ QUESTION: {question}
192
+
193
+ FINAL ANSWER:
194
+ """
195
+
196
+ # Call Groq API
197
+ response = client.chat.completions.create(
198
+ model=GROQ_MODEL,
199
+ messages=[
200
+ {"role": "system",
201
+ "content": "Use only the PDF content. If answer not found, say: 'I cannot find this in the PDF.'"},
202
+ {"role": "user", "content": prompt}
203
+ ],
204
+ temperature=0.0
205
+ )
206
+
207
+ answer = response.choices[0].message.content.strip()
208
+ return answer, len(docs), None
209
+
210
+ except APIError as e:
211
+ return None, 0, f"Groq API Error: {str(e)}"
212
+ except Exception as e:
213
+ return None, 0, f"General error: {str(e)}"
214
 
215
+ # ---------------- UI COMPONENTS ----------------
216
+ st.title("πŸ“˜ PDF Assistant")
217
+
218
+ # Sidebar Controls
219
  with st.sidebar:
220
  st.header("Controls")
221
  st.button("πŸ—‘οΈ Clear Chat History", on_click=clear_chat_history, use_container_width=True)
222
  st.button("πŸ”₯ Clear PDF Memory", on_click=clear_memory, use_container_width=True)
223
+
224
  st.markdown("---")
225
  if st.session_state.uploaded_file_name:
226
  st.success(f"βœ… **Active PDF:**\n `{st.session_state.uploaded_file_name}`")
227
  else:
228
  st.warning("⬆️ Upload a PDF to start chatting!")
229
 
230
+ # File Upload
 
 
 
231
  uploaded = st.file_uploader(
232
  "Upload your PDF",
233
  type=["pdf"],
 
237
  if uploaded and uploaded.name != st.session_state.uploaded_file_name:
238
  st.session_state.uploaded_file_name = None
239
  st.session_state.chat = []
240
+
241
  with st.spinner(f"Processing '{uploaded.name}'..."):
242
+ chunks_count = process_pdf(uploaded)
243
+
244
+ if chunks_count is not None:
245
+ st.success(f"βœ… PDF processed successfully! {chunks_count} chunks created.")
246
+ st.session_state.uploaded_file_name = uploaded.name
247
+ else:
248
+ st.error("❌ Failed to process PDF")
 
 
 
 
 
 
 
 
 
 
 
 
249
  st.session_state.uploaded_file_name = None
250
+
251
  st.rerun()
252
 
253
+ # Chat Input
254
+ disabled_input = st.session_state.uploaded_file_name is None or client is None
255
  question = st.text_input(
256
  "Ask a question about the loaded PDF:",
257
  key="question_input",
 
259
  )
260
 
261
  if st.button("Send", disabled=disabled_input) and question:
262
+ # Add user query to chat history
263
  st.session_state.chat.append(("user", question))
264
+
265
+ # Get answer
266
  with st.spinner("Thinking..."):
267
+ answer, sources, error = ask_question(question)
268
+
269
+ if answer:
270
+ bot_message = f"{answer}<div class='sources'>Context Chunks Used: {sources}</div>"
271
+ st.session_state.chat.append(("bot", bot_message))
272
+ else:
273
+ st.session_state.chat.append(("bot", f"πŸ”΄ **Error:** {error}"))
274
+
 
 
 
 
 
 
 
 
 
 
 
 
 
275
  st.rerun()
276
 
277
+ # Display Chat History
278
  st.markdown("## Chat History")
279
  for role, msg in st.session_state.chat:
280
  if role == "user":
 
282
  else:
283
  st.markdown(f"<div class='chat-bot'>{msg}</div>", unsafe_allow_html=True)
284
 
285
+ # Footer
286
  footer_html = """
287
  <div class="footer">
288
  Created by <a href="https://www.linkedin.com/in/abhishek-iitr/" target="_blank">Abhishek Saxena</a>