Spaces:

Cardiosense-AG
/

ai_econsult_demo

Paused

App Files Files Community

Cardiosense-AG commited on Nov 2

Commit

d923822

verified ·

1 Parent(s): 834143d

Update app.py

Browse files

Files changed (1) hide show

app.py +107 -102

app.py CHANGED Viewed

@@ -1,59 +1,85 @@
 import os
 import platform
-from pathlib import Path
 import time
-import traceback
 import streamlit as st
 import pandas as pd
-from src.paths import base_dir, guidelines_dir, faiss_index_dir, exports_dir
-st.set_page_config(page_title="AI-Native E-Consult Prototype (V1)", page_icon="🩺", layout="wide")
 st.title("AI‑Native E‑Consult Prototype (V1)")
 st.caption("Step 0 — Environment Setup & Health Check")
 st.warning("Demo only — de‑identified data. Prototype for feedback; **not for clinical use**.", icon="🛑")
-# ---------- Helper ----------
-def _try_import(modname: str):
     try:
         m = __import__(modname)
-        ver = getattr(m, "__version__", "n/a")
-        return True, ver, None
-    except Exception as e:
-        return False, None, str(e)
-def _hf_whoami():
-    try:
-        from huggingface_hub import whoami
-        token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACEHUB_API_TOKEN")
-        if not token:
-            return False, None, "No HF token found. Add HF_TOKEN in Space Settings → Variables."
-        me = whoami(token=token)
-        return True, me, None
     except Exception as e:
-        return False, None, str(e)
-# ---------- Persistent dirs ----------
-bdir = base_dir()
-gdir = guidelines_dir()
-idir = faiss_index_dir()
-xdir = exports_dir()
-with st.expander("📁 Storage locations (persistent)"):
-    st.write({
-        "base_dir": str(bdir),
-        "guidelines_dir": str(gdir),
-        "faiss_index_dir": str(idir),
-        "exports_dir": str(xdir),
     })
-    st.caption("These live on the Space's persistent volume so your RAG index survives restarts.")
-# ---------- Diagnostics ----------
-colA, colB = st.columns(2)
 with colA:
     st.subheader("System")
     st.write({
@@ -61,76 +87,55 @@ with colA:
         "platform": platform.platform(),
         "cwd": str(Path.cwd()),
         "time": time.strftime("%Y-%m-%d %H:%M:%S"),
     })
-    ok_torch, torch_ver, torch_err = _try_import("torch")
-    if ok_torch:
-        import torch
-        cuda = torch.cuda.is_available()
-        device = torch.cuda.get_device_name(0) if cuda else "CPU"
-        st.success(f"torch {torch_ver} — CUDA: {'✅' if cuda else '❌'} — device: {device}")
-    else:
-        st.error(f"torch import failed: {torch_err}")
 with colB:
-    st.subheader("Core libraries")
-    rows = []
-    for name in ["transformers", "accelerate", "bitsandbytes", "faiss", "pypdf", "pandas", "numpy", "huggingface_hub", "sentence_transformers"]:
-        ok, ver, err = _try_import(name)
-        rows.append({"library": name, "status": "ok" if ok else "error", "version_or_error": ver if ok else err})
-    st.dataframe(pd.DataFrame(rows), hide_index=True, use_container_width=True)
-st.divider()
-st.subheader("Hugging Face auth check (for later model pulls)")
-if st.button("Check HF token"):
-    ok, me, err = _hf_whoami()
-    if ok:
-        who = me.get("name") or me.get("email") or me.get("username", "unknown")
-        st.success(f"HF token valid ✅ — signed in as: {who}")
-    else:
-        st.warning(f"HF token not verified: {err}")
-st.subheader("Quick functionality tests")
-if st.button("Run health checks"):
-    results = []
-    # 1) Write to persistent storage
-    try:
-        testfile = bdir / "healthcheck.txt"
-        testfile.write_text("ok\n")
-        results.append(("write_persistent", True, f"wrote {testfile}"))
-    except Exception as e:
-        results.append(("write_persistent", False, str(e)))
-    # 2) FAISS in-memory index sanity test
     try:
-        import numpy as np, faiss
-        xb = np.random.random((50, 8)).astype("float32")
-        idx = faiss.IndexFlatL2(8)
-        idx.add(xb)
-        D, I = idx.search(xb[:1], 5)
-        results.append(("faiss_search", True, f"top5 ids: {I[0].tolist()}"))
-    except Exception as e:
-        results.append(("faiss_search", False, str(e)))
-    # 3) bitsandbytes soft check (import + CUDA capability if torch has it)
-    try:
-        import bitsandbytes as bnb  # noqa
-        cuda_msg = ""
-        try:
-            import torch
-            if torch.cuda.is_available():
-                # light test: allocate a tiny 4-bit linear layer if available
-                from bitsandbytes.nn import Linear4bit
-                _ = Linear4bit(8, 8, bias=False)
-                cuda_msg = "CUDA-backed 4-bit layer constructed."
-        except Exception:
-            pass
-        results.append(("bitsandbytes", True, f"import ok. {cuda_msg}"))
-    except Exception as e:
-        results.append(("bitsandbytes", False, str(e)))
-    st.success("Health checks complete.")
-    st.dataframe(pd.DataFrame([{"check": k, "ok": ok, "detail": d} for (k, ok, d) in results]),
-                 hide_index=True, use_container_width=True)
-st.info("If the checks are green, the Space is ready for Step 1 (RAG Corpus Prep).")

+# app.py
 import os
 import platform
 import time
+from pathlib import Path
+from typing import Dict
 import streamlit as st
 import pandas as pd
+from src.paths import (
+    base_dir,
+    guidelines_dir,
+    faiss_index_dir,
+    exports_dir,
+    cases_dir,
+    audit_dir,
+    hf_cache_dir,
+    initialize_environment,
+    describe_paths,
+)
+st.set_page_config(page_title="AI‑Native E‑Consult — Health Check", page_icon="🩺", layout="wide")
 st.title("AI‑Native E‑Consult Prototype (V1)")
 st.caption("Step 0 — Environment Setup & Health Check")
 st.warning("Demo only — de‑identified data. Prototype for feedback; **not for clinical use**.", icon="🛑")
+# ---------- Initialize env & log ----------
+env = initialize_environment()
+st.session_state.setdefault("_app_env", env)
+with st.expander("Environment variables (runtime)", expanded=False):
+    st.json(env)
+# ---------- Dependency checks ----------
+def _probe_import(modname: str):
     try:
         m = __import__(modname)
+        ver = getattr(m, "__version__", "")
+        # faiss exposes version differently sometimes
+        if modname == "faiss" and not ver:
+            ver = getattr(m, "FAISS_VERSION", "") or ""
+        return True, ver, ""
     except Exception as e:
+        return False, "", f"{type(e).__name__}: {e}"
+mods = [
+    "torch", "accelerate", "transformers", "bitsandbytes", "faiss",
+    "sentence_transformers", "pypdf", "huggingface_hub", "numpy", "pandas"
+]
+rows = []
+for name in mods:
+    ok, ver, err = _probe_import(name)
+    rows.append({
+        "package": name,
+        "status": "✅" if ok else "❌",
+        "version": ver,
+        "error": err,
     })
+st.subheader("Python packages")
+st.dataframe(pd.DataFrame(rows), use_container_width=True)
+# ---------- CUDA ----------
+cuda_txt = "Not checked"
+gpu_name = ""
+try:
+    import torch
+    has_cuda = torch.cuda.is_available()
+    cuda_txt = "✅ Available" if has_cuda else "❌ Not available"
+    if has_cuda:
+        try:
+            gpu_name = torch.cuda.get_device_name(0)
+        except Exception:
+            gpu_name = "CUDA detected (name unavailable)"
+except Exception as e:
+    has_cuda = False
+    cuda_txt = f"⚠️ Torch import error: {e}"
+colA, colB = st.columns(2)
 with colA:
     st.subheader("System")
     st.write({
         "platform": platform.platform(),
         "cwd": str(Path.cwd()),
         "time": time.strftime("%Y-%m-%d %H:%M:%S"),
+        "CUDA": cuda_txt,
+        "GPU": gpu_name,
     })
 with colB:
+    st.subheader("Paths")
+    pinfo: Dict[str, str] = describe_paths()
+    st.write(pinfo)
+    # basic directory status
+    def _count_pdfs(p: Path) -> int:
+        return sum(1 for _ in p.glob("**/*.pdf"))
+    def _human_bytes(n: int) -> str:
+        for u in ["B", "KB", "MB", "GB", "TB"]:
+            if n < 1024:
+                return f"{n:.1f} {u}"
+            n /= 1024
+        return f"{n:.1f} PB"
+    # quick cache dir size
+    cache = Path(pinfo["hf_cache_dir"])
+    size = 0
     try:
+        for root, _, files in os.walk(cache):
+            for f in files:
+                try:
+                    size += (Path(root) / f).stat().st_size
+                except Exception:
+                    pass
+    except Exception:
+        pass
+    st.write({
+        "guideline_pdfs": _count_pdfs(Path(pinfo["guidelines_dir"])),
+        "index_present": (
+            (Path(pinfo["faiss_index_dir"]) / "faiss.index").exists()
+            and (Path(pinfo["faiss_index_dir"]) / "chunks.jsonl").exists()
+            and (Path(pinfo["faiss_index_dir"]) / "index_info.json").exists()
+        ),
+        "hf_cache_size": _human_bytes(size),
+    })
+st.info(
+    "**Model selection**\n\n"
+    f"- Primary: `{os.getenv('MODEL_ID', 'google/medgemma-27b-text-it')}` (GPU / 4-bit)\n"
+    f"- Fallback: `{os.getenv('MODEL_FALLBACK_ID', 'google/medgemma-4b-it')}` (CPU)\n"
+    f"- Stub mode (`E2E_STUB=1`): returns deterministic output for UI tests.",
+    icon="⚙️"
+)
+st.success("Health page loaded. Proceed to **Step 1 — RAG Corpus Prep** from the sidebar when ready.", icon="➡️")