absiitr commited on
Commit
02f4e29
Β·
verified Β·
1 Parent(s): e04ab08

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +216 -168
app.py CHANGED
@@ -10,234 +10,282 @@ from langchain_community.embeddings import HuggingFaceEmbeddings
10
  from langchain_community.vectorstores import Chroma
11
  import torch
12
 
13
- # ---------------- CONFIG ----------------
14
  logging.basicConfig(level=logging.INFO)
15
 
 
16
  GROQ_API_KEY = st.secrets.get("GROQ_API_KEY", os.environ.get("GROQ_API_KEY"))
17
  GROQ_MODEL = "llama-3.1-8b-instant"
18
 
 
19
  client = None
20
  if GROQ_API_KEY:
21
  try:
22
  client = Groq(api_key=GROQ_API_KEY)
 
23
  except Exception as e:
24
- logging.error(e)
 
 
 
25
 
26
- # ---------------- PAGE ----------------
27
  st.set_page_config(page_title="PDF Assistant", page_icon="πŸ“˜", layout="wide")
28
 
29
- # ---------------- CSS ----------------
30
  st.markdown("""
31
  <style>
32
  :root {
33
- --primary:#1e3a8a;
34
- --bg:#0e1117;
35
- --bg2:#1a1d29;
36
- --text:#f0f2f6;
37
- }
38
-
39
- /* FIX SIDEBAR */
40
- section[data-testid="stSidebar"] {
41
- position: fixed;
42
- height: 100vh;
43
- overflow-y: auto;
44
- }
45
- section[data-testid="stSidebar"] > div {
46
- padding-top: 0.5rem !important;
47
- }
48
-
49
- /* MAIN OFFSET */
50
- .main {
51
- margin-left: 300px;
52
- }
53
-
54
- /* CHAT AREA */
55
- .chat-area {
56
- height: calc(100vh - 230px);
57
- overflow-y: auto;
58
- padding: 1rem 2rem;
59
  }
60
 
61
  .chat-user {
62
- background:#2d3748;
63
- padding:14px;
64
- border-radius:18px 18px 4px 18px;
65
- margin:12px 0 12px auto;
66
- max-width:80%;
 
 
67
  }
68
-
69
  .chat-bot {
70
- background:var(--primary);
71
- padding:14px;
72
- border-radius:18px 18px 18px 4px;
73
- margin:12px auto 12px 0;
74
- max-width:80%;
75
- color:white;
 
76
  }
77
 
78
  .sources {
79
- font-size:0.75em;
80
- opacity:0.7;
81
- margin-top:8px;
82
- border-top:1px solid rgba(255,255,255,0.15);
83
- padding-top:6px;
84
  }
85
 
86
- /* INPUT BAR */
87
- .input-bar {
88
- position: sticky;
89
  bottom: 0;
90
- background: var(--bg);
91
- padding: 1rem 2rem;
92
- border-top:1px solid rgba(255,255,255,0.15);
 
 
 
 
93
  }
94
-
95
- .input-box {
96
- max-width:800px;
97
- margin:auto;
 
 
 
98
  }
99
  </style>
100
  """, unsafe_allow_html=True)
101
 
102
- # ---------------- STATE ----------------
103
  if "chat" not in st.session_state:
104
  st.session_state.chat = []
 
105
  if "vectorstore" not in st.session_state:
106
  st.session_state.vectorstore = None
 
107
  if "retriever" not in st.session_state:
108
  st.session_state.retriever = None
109
- if "pdf" not in st.session_state:
110
- st.session_state.pdf = None
 
 
111
  if "uploader_key" not in st.session_state:
112
  st.session_state.uploader_key = 0
113
 
114
  # ---------------- FUNCTIONS ----------------
115
- def clear_all():
116
  st.session_state.chat = []
 
 
117
  st.session_state.vectorstore = None
118
  st.session_state.retriever = None
119
- st.session_state.pdf = None
120
  st.session_state.uploader_key += 1
121
  gc.collect()
122
  if torch.cuda.is_available():
123
  torch.cuda.empty_cache()
124
- st.success("Memory cleared")
125
-
126
- def process_pdf(uploaded):
127
- with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
128
- tmp.write(uploaded.getvalue())
129
- path = tmp.name
130
-
131
- loader = PyPDFLoader(path)
132
- docs = loader.load()
133
-
134
- splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=50)
135
- chunks = splitter.split_documents(docs)
136
-
137
- embeddings = HuggingFaceEmbeddings(
138
- model_name="sentence-transformers/all-MiniLM-L6-v2",
139
- model_kwargs={"device":"cpu"},
140
- encode_kwargs={"normalize_embeddings":True}
141
- )
142
-
143
- st.session_state.vectorstore = Chroma.from_documents(chunks, embeddings)
144
- st.session_state.retriever = st.session_state.vectorstore.as_retriever(k=3)
145
-
146
- os.unlink(path)
147
- return len(chunks)
148
 
149
- def ask_llm(q):
150
- docs = st.session_state.retriever.invoke(q)
151
- ctx = "\n\n".join(d.page_content for d in docs)
152
-
153
- prompt = f"""
154
- Use ONLY the context. If answer not found, say:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
155
  "I cannot find this in the PDF."
156
 
157
- CONTEXT:
158
- {ctx}
 
159
 
160
- QUESTION:
161
- {q}
 
162
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
 
164
- r = client.chat.completions.create(
165
- model=GROQ_MODEL,
166
- messages=[
167
- {"role":"system","content":"Answer strictly from PDF"},
168
- {"role":"user","content":prompt}
169
- ],
170
- temperature=0.0
171
- )
172
- return r.choices[0].message.content.strip(), len(docs)
173
-
174
- # ================= SIDEBAR =================
175
  with st.sidebar:
176
- st.markdown("""
177
- <h1 style="text-align:center;margin-bottom:0.2rem;">πŸ“˜ PDF Assistant</h1>
178
- <div style="text-align:center;font-size:0.85em;margin-bottom:1rem;">
179
- Created by <a href="https://www.linkedin.com/in/abhishek-iitr/" target="_blank">Abhishek Saxena</a>
180
- </div>
181
- """, unsafe_allow_html=True)
182
-
183
- st.subheader("Upload PDF")
184
- uploaded = st.file_uploader(
185
- "PDF",
186
- type=["pdf"],
187
- key=st.session_state.uploader_key,
188
- label_visibility="collapsed"
189
- )
190
-
191
- if uploaded and uploaded.name != st.session_state.pdf:
192
- st.session_state.chat = []
193
- with st.spinner("Processing PDF..."):
194
- chunks = process_pdf(uploaded)
195
- st.session_state.pdf = uploaded.name
196
- st.success(f"PDF processed ({chunks} chunks)")
197
- st.rerun()
198
-
199
- st.subheader("Controls")
200
- st.button("🧹 Clear Memory", on_click=clear_all, use_container_width=True)
201
-
202
- st.subheader("Status")
203
- if st.session_state.pdf:
204
- st.success(f"Active PDF:\n`{st.session_state.pdf}`")
205
  else:
206
- st.info("Upload a PDF to start")
207
 
208
- # ================= CHAT =================
209
- st.markdown('<div class="chat-area">', unsafe_allow_html=True)
 
 
 
 
210
 
211
- if not st.session_state.chat:
212
- st.info("Ask a question about your document to begin.")
213
- else:
214
- for role, msg in st.session_state.chat:
215
- if role == "user":
216
- st.markdown(f"<div class='chat-user'>{msg}</div>", unsafe_allow_html=True)
 
 
 
 
217
  else:
218
- st.markdown(f"<div class='chat-bot'>{msg}</div>", unsafe_allow_html=True)
219
-
220
- st.markdown('</div>', unsafe_allow_html=True)
221
-
222
- # ================= INPUT =================
223
- st.markdown('<div class="input-bar"><div class="input-box">', unsafe_allow_html=True)
224
-
225
- with st.form("chat", clear_on_submit=True):
226
- q = st.text_input(
227
- "Ask",
228
- disabled=st.session_state.pdf is None,
229
- placeholder="Ask anything about your document…",
230
- label_visibility="collapsed"
231
- )
232
- send = st.form_submit_button("Send")
233
-
234
- st.markdown('</div></div>', unsafe_allow_html=True)
235
 
236
- if send and q:
237
- st.session_state.chat.append(("user", q))
 
 
 
 
 
 
 
 
 
 
 
238
  with st.spinner("Thinking..."):
239
- ans, src = ask_llm(q)
240
- st.session_state.chat.append(
241
- ("bot", f"{ans}<div class='sources'>Chunks used: {src}</div>")
242
- )
 
 
 
 
243
  st.rerun()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  from langchain_community.vectorstores import Chroma
11
  import torch
12
 
13
+ # ---------------- CONFIGURATION ----------------
14
  logging.basicConfig(level=logging.INFO)
15
 
16
+ # Load API key from Hugging Face secrets
17
  GROQ_API_KEY = st.secrets.get("GROQ_API_KEY", os.environ.get("GROQ_API_KEY"))
18
  GROQ_MODEL = "llama-3.1-8b-instant"
19
 
20
+ # Initialize Groq client
21
  client = None
22
  if GROQ_API_KEY:
23
  try:
24
  client = Groq(api_key=GROQ_API_KEY)
25
+ st.success("βœ… Groq client initialized successfully.")
26
  except Exception as e:
27
+ st.error(f"❌ Failed to initialize Groq client: {e}")
28
+ client = None
29
+ else:
30
+ st.warning("⚠️ GROQ_API_KEY not found. Please add it to Hugging Face secrets.")
31
 
32
+ # ---------------- STREAMLIT UI SETUP ----------------
33
  st.set_page_config(page_title="PDF Assistant", page_icon="πŸ“˜", layout="wide")
34
 
35
+ # ---------------- CSS (Your exact UI) ----------------
36
  st.markdown("""
37
  <style>
38
  :root {
39
+ --primary-color: #1e3a8a;
40
+ --background-color: #0e1117;
41
+ --secondary-background-color: #1a1d29;
42
+ --text-color: #f0f2f6;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  }
44
 
45
  .chat-user {
46
+ background: #2d3748;
47
+ padding: 12px;
48
+ border-radius: 10px 10px 2px 10px;
49
+ margin: 6px 0 6px auto;
50
+ max-width: 85%;
51
+ text-align: right;
52
+ color: var(--text-color);
53
  }
 
54
  .chat-bot {
55
+ background: var(--primary-color);
56
+ padding: 12px;
57
+ border-radius: 10px 10px 10px 2px;
58
+ margin: 6px auto 6px 0;
59
+ max-width: 85%;
60
+ text-align: left;
61
+ color: #ffffff;
62
  }
63
 
64
  .sources {
65
+ font-size: 0.8em;
66
+ opacity: 0.7;
67
+ margin-top: 10px;
68
+ border-top: 1px solid rgba(255, 255, 255, 0.1);
69
+ padding-top: 5px;
70
  }
71
 
72
+ .footer {
73
+ position: fixed;
74
+ left: 0;
75
  bottom: 0;
76
+ width: 100%;
77
+ background-color: var(--secondary-background-color);
78
+ color: var(--text-color);
79
+ text-align: center;
80
+ padding: 10px;
81
+ font-size: 0.85em;
82
+ border-top: 1px solid rgba(255, 255, 255, 0.1);
83
  }
84
+ .footer a {
85
+ color: var(--primary-color);
86
+ text-decoration: none;
87
+ font-weight: bold;
88
+ }
89
+ .footer a:hover {
90
+ text-decoration: underline;
91
  }
92
  </style>
93
  """, unsafe_allow_html=True)
94
 
95
+ # ---------------- SESSION STATE ----------------
96
  if "chat" not in st.session_state:
97
  st.session_state.chat = []
98
+
99
  if "vectorstore" not in st.session_state:
100
  st.session_state.vectorstore = None
101
+
102
  if "retriever" not in st.session_state:
103
  st.session_state.retriever = None
104
+
105
+ if "uploaded_file_name" not in st.session_state:
106
+ st.session_state.uploaded_file_name = None
107
+
108
  if "uploader_key" not in st.session_state:
109
  st.session_state.uploader_key = 0
110
 
111
  # ---------------- FUNCTIONS ----------------
112
+ def clear_chat_history():
113
  st.session_state.chat = []
114
+
115
+ def clear_memory():
116
  st.session_state.vectorstore = None
117
  st.session_state.retriever = None
118
+ st.session_state.uploaded_file_name = None
119
  st.session_state.uploader_key += 1
120
  gc.collect()
121
  if torch.cuda.is_available():
122
  torch.cuda.empty_cache()
123
+ st.success("Memory cleared. Please upload a new PDF.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
 
125
+ def process_pdf(uploaded_file):
126
+ """Process uploaded PDF and create vectorstore."""
127
+ try:
128
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
129
+ tmp.write(uploaded_file.getvalue())
130
+ path = tmp.name
131
+
132
+ # Load PDF
133
+ loader = PyPDFLoader(path)
134
+ docs = loader.load()
135
+
136
+ # Split into chunks
137
+ splitter = RecursiveCharacterTextSplitter(
138
+ chunk_size=800,
139
+ chunk_overlap=50
140
+ )
141
+ chunks = splitter.split_documents(docs)
142
+
143
+ # Create embeddings
144
+ embeddings = HuggingFaceEmbeddings(
145
+ model_name="sentence-transformers/all-MiniLM-L6-v2",
146
+ model_kwargs={"device": "cpu"},
147
+ encode_kwargs={"normalize_embeddings": True}
148
+ )
149
+
150
+ # Create vectorstore
151
+ vectorstore = Chroma.from_documents(chunks, embeddings)
152
+ retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
153
+
154
+ # Store in session state
155
+ st.session_state.vectorstore = vectorstore
156
+ st.session_state.retriever = retriever
157
+
158
+ # Cleanup
159
+ if os.path.exists(path):
160
+ os.unlink(path)
161
+
162
+ return len(chunks)
163
+
164
+ except Exception as e:
165
+ st.error(f"Error processing PDF: {str(e)}")
166
+ return None
167
+
168
+ def ask_question(question):
169
+ """Retrieve and generate answer for the question."""
170
+ if not client:
171
+ return None, 0, "Groq client is not initialized. Check API key setup."
172
+
173
+ if not st.session_state.retriever:
174
+ return None, 0, "Upload PDF first to initialize the knowledge base."
175
+
176
+ try:
177
+ # Retrieve relevant chunks
178
+ docs = st.session_state.retriever.invoke(question)
179
+ context = "\n\n".join(d.page_content for d in docs)
180
+
181
+ # Build prompt
182
+ prompt = f"""
183
+ You are a strict RAG Q&A assistant.
184
+ Use ONLY the context provided. If the answer is not found, reply:
185
  "I cannot find this in the PDF."
186
 
187
+ ---------------- CONTEXT ----------------
188
+ {context}
189
+ -----------------------------------------
190
 
191
+ QUESTION: {question}
192
+
193
+ FINAL ANSWER:
194
  """
195
+
196
+ # Call Groq API
197
+ response = client.chat.completions.create(
198
+ model=GROQ_MODEL,
199
+ messages=[
200
+ {"role": "system",
201
+ "content": "Use only the PDF content. If answer not found, say: 'I cannot find this in the PDF.'"},
202
+ {"role": "user", "content": prompt}
203
+ ],
204
+ temperature=0.0
205
+ )
206
+
207
+ answer = response.choices[0].message.content.strip()
208
+ return answer, len(docs), None
209
+
210
+ except APIError as e:
211
+ return None, 0, f"Groq API Error: {str(e)}"
212
+ except Exception as e:
213
+ return None, 0, f"General error: {str(e)}"
214
 
215
+ # ---------------- UI COMPONENTS ----------------
216
+ st.title("πŸ“˜ PDF Assistant")
217
+
218
+ # Sidebar Controls
 
 
 
 
 
 
 
219
  with st.sidebar:
220
+ st.header("Controls")
221
+ st.button("πŸ—‘οΈ Clear Chat History", on_click=clear_chat_history, use_container_width=True)
222
+ st.button("πŸ”₯ Clear PDF Memory", on_click=clear_memory, use_container_width=True)
223
+
224
+ st.markdown("---")
225
+ if st.session_state.uploaded_file_name:
226
+ st.success(f"βœ… **Active PDF:**\n `{st.session_state.uploaded_file_name}`")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
227
  else:
228
+ st.warning("⬆️ Upload a PDF to start chatting!")
229
 
230
+ # File Upload
231
+ uploaded = st.file_uploader(
232
+ "Upload your PDF",
233
+ type=["pdf"],
234
+ key=st.session_state.uploader_key
235
+ )
236
 
237
+ if uploaded and uploaded.name != st.session_state.uploaded_file_name:
238
+ st.session_state.uploaded_file_name = None
239
+ st.session_state.chat = []
240
+
241
+ with st.spinner(f"Processing '{uploaded.name}'..."):
242
+ chunks_count = process_pdf(uploaded)
243
+
244
+ if chunks_count is not None:
245
+ st.success(f"βœ… PDF processed successfully! {chunks_count} chunks created.")
246
+ st.session_state.uploaded_file_name = uploaded.name
247
  else:
248
+ st.error("❌ Failed to process PDF")
249
+ st.session_state.uploaded_file_name = None
250
+
251
+ st.rerun()
 
 
 
 
 
 
 
 
 
 
 
 
 
252
 
253
+ # Chat Input
254
+ disabled_input = st.session_state.uploaded_file_name is None or client is None
255
+ question = st.text_input(
256
+ "Ask a question about the loaded PDF:",
257
+ key="question_input",
258
+ disabled=disabled_input
259
+ )
260
+
261
+ if st.button("Send", disabled=disabled_input) and question:
262
+ # Add user query to chat history
263
+ st.session_state.chat.append(("user", question))
264
+
265
+ # Get answer
266
  with st.spinner("Thinking..."):
267
+ answer, sources, error = ask_question(question)
268
+
269
+ if answer:
270
+ bot_message = f"{answer}<div class='sources'>Context Chunks Used: {sources}</div>"
271
+ st.session_state.chat.append(("bot", bot_message))
272
+ else:
273
+ st.session_state.chat.append(("bot", f"πŸ”΄ **Error:** {error}"))
274
+
275
  st.rerun()
276
+
277
+ # Display Chat History
278
+ st.markdown("## Chat History")
279
+ for role, msg in st.session_state.chat:
280
+ if role == "user":
281
+ st.markdown(f"<div class='chat-user'>{msg}</div>", unsafe_allow_html=True)
282
+ else:
283
+ st.markdown(f"<div class='chat-bot'>{msg}</div>", unsafe_allow_html=True)
284
+
285
+ # Footer
286
+ footer_html = """
287
+ <div class="footer">
288
+ Created by <a href="https://www.linkedin.com/in/abhishek-iitr/" target="_blank">Abhishek Saxena</a>
289
+ </div>
290
+ """
291
+ st.markdown(footer_html, unsafe_allow_html=True)