Spaces:
Sleeping
Sleeping
Upload app.py with huggingface_hub
Browse files
app.py
CHANGED
|
@@ -22,12 +22,14 @@ TABLE_HEADERS = [
|
|
| 22 |
"Model",
|
| 23 |
"Benchmark",
|
| 24 |
"Score",
|
| 25 |
-
"Unit",
|
| 26 |
-
"Dataset",
|
| 27 |
-
"Contributor",
|
| 28 |
"Source",
|
| 29 |
-
|
| 30 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
]
|
| 32 |
|
| 33 |
|
|
@@ -336,12 +338,7 @@ class LeaderboardFetcher:
|
|
| 336 |
"Model": entry["model_id"],
|
| 337 |
"Benchmark": BENCHMARKS[benchmark_key].label,
|
| 338 |
"Score": round(payload["value"], 2),
|
| 339 |
-
"
|
| 340 |
-
"Dataset": payload["dataset"],
|
| 341 |
-
"Contributor": payload["contributor"],
|
| 342 |
-
"Source": payload["source_type"],
|
| 343 |
-
"Source URL": payload["source_url"],
|
| 344 |
-
"Revision": payload["revision"],
|
| 345 |
}
|
| 346 |
all_rows.append(row)
|
| 347 |
per_benchmark[benchmark_key].append(row)
|
|
@@ -379,18 +376,14 @@ def refresh_handler() -> List[Any]:
|
|
| 379 |
fetcher.log_text(),
|
| 380 |
]
|
| 381 |
)
|
| 382 |
-
per_benchmark = result["per_benchmark"]
|
| 383 |
return [
|
| 384 |
status,
|
| 385 |
_rows_to_matrix(result["all_rows"]),
|
| 386 |
-
_rows_to_matrix(per_benchmark["mmlu"]),
|
| 387 |
-
_rows_to_matrix(per_benchmark["bigcodebench"]),
|
| 388 |
-
_rows_to_matrix(per_benchmark["arc_mc"]),
|
| 389 |
]
|
| 390 |
except Exception as exc: # pylint: disable=broad-except
|
| 391 |
error = f"❌ Failed to refresh leaderboard: {exc}"
|
| 392 |
empty: List[List[Any]] = []
|
| 393 |
-
return [error, empty
|
| 394 |
|
| 395 |
|
| 396 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
@@ -403,14 +396,8 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
| 403 |
)
|
| 404 |
refresh_button = gr.Button("Refresh", variant="primary")
|
| 405 |
status_box = gr.Markdown("")
|
| 406 |
-
|
| 407 |
-
|
| 408 |
-
with gr.Tab("MMLU"):
|
| 409 |
-
mmlu_table = gr.Dataframe(headers=TABLE_HEADERS, interactive=False)
|
| 410 |
-
with gr.Tab("BigCodeBench"):
|
| 411 |
-
bigcode_table = gr.Dataframe(headers=TABLE_HEADERS, interactive=False)
|
| 412 |
-
with gr.Tab("ARC MC"):
|
| 413 |
-
arc_table = gr.Dataframe(headers=TABLE_HEADERS, interactive=False)
|
| 414 |
|
| 415 |
refresh_button.click( # pylint: disable=no-member
|
| 416 |
refresh_handler,
|
|
@@ -418,9 +405,6 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
| 418 |
outputs=[
|
| 419 |
status_box,
|
| 420 |
all_table,
|
| 421 |
-
mmlu_table,
|
| 422 |
-
bigcode_table,
|
| 423 |
-
arc_table,
|
| 424 |
],
|
| 425 |
)
|
| 426 |
demo.load( # pylint: disable=no-member
|
|
@@ -428,9 +412,6 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
| 428 |
outputs=[
|
| 429 |
status_box,
|
| 430 |
all_table,
|
| 431 |
-
mmlu_table,
|
| 432 |
-
bigcode_table,
|
| 433 |
-
arc_table,
|
| 434 |
],
|
| 435 |
)
|
| 436 |
|
|
|
|
| 22 |
"Model",
|
| 23 |
"Benchmark",
|
| 24 |
"Score",
|
|
|
|
|
|
|
|
|
|
| 25 |
"Source",
|
| 26 |
+
]
|
| 27 |
+
|
| 28 |
+
TABLE_DATATYPES = [
|
| 29 |
+
"text",
|
| 30 |
+
"text",
|
| 31 |
+
"number",
|
| 32 |
+
"markdown",
|
| 33 |
]
|
| 34 |
|
| 35 |
|
|
|
|
| 338 |
"Model": entry["model_id"],
|
| 339 |
"Benchmark": BENCHMARKS[benchmark_key].label,
|
| 340 |
"Score": round(payload["value"], 2),
|
| 341 |
+
"Source": f"{payload['source_type']} by [{payload['contributor']}]({payload['source_url']})",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 342 |
}
|
| 343 |
all_rows.append(row)
|
| 344 |
per_benchmark[benchmark_key].append(row)
|
|
|
|
| 376 |
fetcher.log_text(),
|
| 377 |
]
|
| 378 |
)
|
|
|
|
| 379 |
return [
|
| 380 |
status,
|
| 381 |
_rows_to_matrix(result["all_rows"]),
|
|
|
|
|
|
|
|
|
|
| 382 |
]
|
| 383 |
except Exception as exc: # pylint: disable=broad-except
|
| 384 |
error = f"❌ Failed to refresh leaderboard: {exc}"
|
| 385 |
empty: List[List[Any]] = []
|
| 386 |
+
return [error, empty]
|
| 387 |
|
| 388 |
|
| 389 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
|
|
| 396 |
)
|
| 397 |
refresh_button = gr.Button("Refresh", variant="primary")
|
| 398 |
status_box = gr.Markdown("")
|
| 399 |
+
|
| 400 |
+
all_table = gr.Dataframe(headers=TABLE_HEADERS, interactive=False, datatype=TABLE_DATATYPES)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 401 |
|
| 402 |
refresh_button.click( # pylint: disable=no-member
|
| 403 |
refresh_handler,
|
|
|
|
| 405 |
outputs=[
|
| 406 |
status_box,
|
| 407 |
all_table,
|
|
|
|
|
|
|
|
|
|
| 408 |
],
|
| 409 |
)
|
| 410 |
demo.load( # pylint: disable=no-member
|
|
|
|
| 412 |
outputs=[
|
| 413 |
status_box,
|
| 414 |
all_table,
|
|
|
|
|
|
|
|
|
|
| 415 |
],
|
| 416 |
)
|
| 417 |
|