yadnyeshkolte commited on
Commit
2a9bd42
·
1 Parent(s): 131e2d3
Dockerfile CHANGED
@@ -1,40 +1,40 @@
1
- # Simple Dockerfile for HF Spaces deployment
2
- # Uses standard Python base instead of openenv-base to avoid startup hangs
3
-
4
- FROM python:3.11-slim
5
-
6
- WORKDIR /app
7
-
8
- # Install system dependencies
9
- RUN apt-get update && \
10
- apt-get install -y --no-install-recommends curl git && \
11
- rm -rf /var/lib/apt/lists/*
12
-
13
- # Install uv for fast dependency management
14
- RUN curl -LsSf https://astral.sh/uv/install.sh | sh && \
15
- mv /root/.local/bin/uv /usr/local/bin/uv && \
16
- mv /root/.local/bin/uvx /usr/local/bin/uvx
17
-
18
- # Copy environment code
19
- COPY . /app/env
20
-
21
- WORKDIR /app/env
22
-
23
- # Install dependencies
24
- RUN uv sync --no-editable
25
-
26
- # Set PATH to use the virtual environment
27
- ENV PATH="/app/env/.venv/bin:$PATH"
28
-
29
- # Set PYTHONPATH so imports work correctly
30
- ENV PYTHONPATH="/app/env:$PYTHONPATH"
31
-
32
- # Expose port
33
- EXPOSE 8000
34
-
35
- # Health check
36
- HEALTHCHECK --interval=30s --timeout=3s --start-period=10s --retries=3 \
37
- CMD curl -f http://localhost:8000/health || exit 1
38
-
39
- # Run the FastAPI server directly (no web interface to avoid Gradio issues)
40
- CMD ["sh", "-c", "cd /app/env && uvicorn server.app:app --host 0.0.0.0 --port 8000"]
 
1
+ # Simple Dockerfile for HF Spaces deployment
2
+ # Uses standard Python base instead of openenv-base to avoid startup hangs
3
+
4
+ FROM python:3.11-slim
5
+
6
+ WORKDIR /app
7
+
8
+ # Install system dependencies
9
+ RUN apt-get update && \
10
+ apt-get install -y --no-install-recommends curl git && \
11
+ rm -rf /var/lib/apt/lists/*
12
+
13
+ # Install uv for fast dependency management
14
+ RUN curl -LsSf https://astral.sh/uv/install.sh | sh && \
15
+ mv /root/.local/bin/uv /usr/local/bin/uv && \
16
+ mv /root/.local/bin/uvx /usr/local/bin/uvx
17
+
18
+ # Copy environment code
19
+ COPY . /app/env
20
+
21
+ WORKDIR /app/env
22
+
23
+ # Install dependencies
24
+ RUN uv sync --no-editable
25
+
26
+ # Set PATH to use the virtual environment
27
+ ENV PATH="/app/env/.venv/bin:$PATH"
28
+
29
+ # Set PYTHONPATH so imports work correctly
30
+ ENV PYTHONPATH="/app/env:$PYTHONPATH"
31
+
32
+ # Expose port
33
+ EXPOSE 8000
34
+
35
+ # Health check
36
+ HEALTHCHECK --interval=30s --timeout=3s --start-period=10s --retries=3 \
37
+ CMD curl -f http://localhost:8000/health || exit 1
38
+
39
+ # Run the FastAPI server directly (no web interface to avoid Gradio issues)
40
+ CMD ["sh", "-c", "cd /app/env && uvicorn server.app:app --host 0.0.0.0 --port 8000"]
README.md CHANGED
@@ -158,6 +158,7 @@ python scripts/baseline_inference.py --mode llm
158
 
159
  ```
160
  api_debug_env/
 
161
  ├── models.py # Pydantic Action & Observation models
162
  ├── scenarios.py # 3 task scenarios with issues, logs, configs
163
  ├── client.py # WebSocket client for the environment
@@ -168,9 +169,9 @@ api_debug_env/
168
  │ ├── api_debug_env_environment.py # Core environment logic
169
  │ └── Dockerfile # Container build
170
  └── scripts/
171
- └── baseline_inference.py # Baseline agent script
172
  ```
173
 
174
  ## License
175
 
176
- BSD-style license. See LICENSE file.
 
158
 
159
  ```
160
  api_debug_env/
161
+ ├── inference.py # ★ MANDATORY hackathon inference script
162
  ├── models.py # Pydantic Action & Observation models
163
  ├── scenarios.py # 3 task scenarios with issues, logs, configs
164
  ├── client.py # WebSocket client for the environment
 
169
  │ ├── api_debug_env_environment.py # Core environment logic
170
  │ └── Dockerfile # Container build
171
  └── scripts/
172
+ └── baseline_inference.py # Original baseline agent script
173
  ```
174
 
175
  ## License
176
 
177
+ BSD-style license. See LICENSE file
__pycache__/__init__.cpython-313.pyc ADDED
Binary file (456 Bytes). View file
 
__pycache__/client.cpython-313.pyc ADDED
Binary file (3.84 kB). View file
 
__pycache__/models.cpython-313.pyc ADDED
Binary file (3.49 kB). View file
 
__pycache__/scenarios.cpython-313.pyc ADDED
Binary file (12.1 kB). View file
 
inference.py CHANGED
@@ -91,7 +91,7 @@ def log_step(step: int, action: str, reward: float, done: bool, error: Optional[
91
  def log_end(success: bool, steps: int, score: float, rewards: List[float]) -> None:
92
  rewards_str = ",".join(f"{r:.2f}" for r in rewards)
93
  print(
94
- f"[END] success={str(success).lower()} steps={steps} score={score:.2f} rewards={rewards_str}",
95
  flush=True,
96
  )
97
 
@@ -176,7 +176,7 @@ async def run_task(task_id: str, client: OpenAI) -> tuple:
176
 
177
  rewards: List[float] = []
178
  steps_taken = 0
179
- score = 0.0
180
  success = False
181
 
182
  log_start(task=task_id, env=BENCHMARK, model=MODEL_NAME)
@@ -208,8 +208,8 @@ async def run_task(task_id: str, client: OpenAI) -> tuple:
208
  if done:
209
  break
210
 
211
- score = env.grade()
212
- score = min(max(score, 0.0), 1.0)
213
  success = score >= SUCCESS_SCORE_THRESHOLD
214
 
215
  except Exception as e:
 
91
  def log_end(success: bool, steps: int, score: float, rewards: List[float]) -> None:
92
  rewards_str = ",".join(f"{r:.2f}" for r in rewards)
93
  print(
94
+ f"[END] success={str(success).lower()} steps={steps} score={score:.3f} rewards={rewards_str}",
95
  flush=True,
96
  )
97
 
 
176
 
177
  rewards: List[float] = []
178
  steps_taken = 0
179
+ score = 0.001
180
  success = False
181
 
182
  log_start(task=task_id, env=BENCHMARK, model=MODEL_NAME)
 
208
  if done:
209
  break
210
 
211
+ score = env.grade() # already clamped to (0.001, 0.999)
212
+ score = max(0.001, min(0.999, score))
213
  success = score >= SUCCESS_SCORE_THRESHOLD
214
 
215
  except Exception as e:
server/Dockerfile ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Simple Dockerfile for HF Spaces deployment
2
+ # Uses standard Python base instead of openenv-base to avoid startup hangs
3
+
4
+ FROM python:3.11-slim
5
+
6
+ WORKDIR /app
7
+
8
+ # Install system dependencies
9
+ RUN apt-get update && \
10
+ apt-get install -y --no-install-recommends curl git && \
11
+ rm -rf /var/lib/apt/lists/*
12
+
13
+ # Install uv for fast dependency management
14
+ RUN curl -LsSf https://astral.sh/uv/install.sh | sh && \
15
+ mv /root/.local/bin/uv /usr/local/bin/uv && \
16
+ mv /root/.local/bin/uvx /usr/local/bin/uvx
17
+
18
+ # Copy environment code
19
+ COPY . /app/env
20
+
21
+ WORKDIR /app/env
22
+
23
+ # Install dependencies
24
+ RUN uv sync --no-editable
25
+
26
+ # Set PATH to use the virtual environment
27
+ ENV PATH="/app/env/.venv/bin:$PATH"
28
+
29
+ # Set PYTHONPATH so imports work correctly
30
+ ENV PYTHONPATH="/app/env:$PYTHONPATH"
31
+
32
+ # Expose port
33
+ EXPOSE 8000
34
+
35
+ # Health check
36
+ HEALTHCHECK --interval=30s --timeout=3s --start-period=10s --retries=3 \
37
+ CMD curl -f http://localhost:8000/health || exit 1
38
+
39
+ # Run the FastAPI server directly (no web interface to avoid Gradio issues)
40
+ CMD ["sh", "-c", "cd /app/env && uvicorn server.app:app --host 0.0.0.0 --port 8000"]
server/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (330 Bytes). View file
 
server/__pycache__/api_debug_env_environment.cpython-313.pyc ADDED
Binary file (19.5 kB). View file
 
server/__pycache__/app.cpython-313.pyc ADDED
Binary file (6.68 kB). View file
 
server/api_debug_env_environment.py CHANGED
@@ -417,7 +417,7 @@ class ApiDebugEnvironment(Environment):
417
 
418
  score = fix_ratio * efficiency_bonus + exploration_bonus
419
 
420
- # Clamp strictly to (0.001, 0.999) — never exactly 0 or 1
421
  return max(0.001, min(0.999, round(score, 4)))
422
 
423
  def get_task_info(self) -> Dict[str, Any]:
 
417
 
418
  score = fix_ratio * efficiency_bonus + exploration_bonus
419
 
420
+ # Clamp strictly to (0.001, 0.999) — NEVER exactly 0.0 or 1.0
421
  return max(0.001, min(0.999, round(score, 4)))
422
 
423
  def get_task_info(self) -> Dict[str, Any]:
server/app.py CHANGED
@@ -133,7 +133,7 @@ async def run_grader(request: GraderRequest):
133
 
134
  return {
135
  "task_id": task_id,
136
- "score": 0.0,
137
  "message": "No completed episode found. Run the environment first.",
138
  }
139
 
 
133
 
134
  return {
135
  "task_id": task_id,
136
+ "score": 0.001,
137
  "message": "No completed episode found. Run the environment first.",
138
  }
139