Instructions to use trohrbaugh/IQuest-Coder-V1-40B-Instruct-heretic with libraries, inference providers, notebooks, and local apps. Follow these links to get started.

Libraries

How to use trohrbaugh/IQuest-Coder-V1-40B-Instruct-heretic with Transformers:

# Use a pipeline as a high-level helper
from transformers import pipeline

pipe = pipeline("text-generation", model="trohrbaugh/IQuest-Coder-V1-40B-Instruct-heretic", trust_remote_code=True)
messages = [
    {"role": "user", "content": "Who are you?"},
]
pipe(messages)

# Load model directly
from transformers import AutoModelForCausalLM
model = AutoModelForCausalLM.from_pretrained("trohrbaugh/IQuest-Coder-V1-40B-Instruct-heretic", trust_remote_code=True, dtype="auto")

Notebooks
Google Colab
Kaggle
Local Apps

vLLM

How to use trohrbaugh/IQuest-Coder-V1-40B-Instruct-heretic with vLLM:

Install from pip and serve model

# Install vLLM from pip:
pip install vllm
# Start the vLLM server:
vllm serve "trohrbaugh/IQuest-Coder-V1-40B-Instruct-heretic"
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:8000/v1/chat/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "trohrbaugh/IQuest-Coder-V1-40B-Instruct-heretic",
		"messages": [
			{
				"role": "user",
				"content": "What is the capital of France?"
			}
		]
	}'

Use Docker

docker model run hf.co/trohrbaugh/IQuest-Coder-V1-40B-Instruct-heretic

SGLang

How to use trohrbaugh/IQuest-Coder-V1-40B-Instruct-heretic with SGLang:

Install from pip and serve model

# Install SGLang from pip:
pip install sglang
# Start the SGLang server:
python3 -m sglang.launch_server \
    --model-path "trohrbaugh/IQuest-Coder-V1-40B-Instruct-heretic" \
    --host 0.0.0.0 \
    --port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/chat/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "trohrbaugh/IQuest-Coder-V1-40B-Instruct-heretic",
		"messages": [
			{
				"role": "user",
				"content": "What is the capital of France?"
			}
		]
	}'

Use Docker images

docker run --gpus all \
    --shm-size 32g \
    -p 30000:30000 \
    -v ~/.cache/huggingface:/root/.cache/huggingface \
    --env "HF_TOKEN=<secret>" \
    --ipc=host \
    lmsysorg/sglang:latest \
    python3 -m sglang.launch_server \
        --model-path "trohrbaugh/IQuest-Coder-V1-40B-Instruct-heretic" \
        --host 0.0.0.0 \
        --port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/chat/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "trohrbaugh/IQuest-Coder-V1-40B-Instruct-heretic",
		"messages": [
			{
				"role": "user",
				"content": "What is the capital of France?"
			}
		]
	}'

Docker Model Runner
How to use trohrbaugh/IQuest-Coder-V1-40B-Instruct-heretic with Docker Model Runner:
```
docker model run hf.co/trohrbaugh/IQuest-Coder-V1-40B-Instruct-heretic
```

IQuest-Coder-V1-40B-Instruct-heretic / patch_iquestcoder.py

trohrbaugh

Upload patch_iquestcoder.py with huggingface_hub

4a382cf verified about 2 months ago

raw

history blame contribute delete

4.01 kB

	#!/usr/bin/env python3
	"""
	Patch IQuestLab/IQuest-Coder models for transformers 5.x compatibility.

	Fixes the meta-device RoPE bug where accelerate zeros out inv_freq during
	model initialization, causing the model to produce only newlines/garbage.

	Usage:
	python patch_iquestcoder.py

	This will find and patch all cached IQuest-Coder modeling files automatically.
	Run this AFTER downloading the model (e.g. after a failed heretic run or
	after running `huggingface-cli download IQuestLab/IQuest-Coder-V1-40B-Instruct`).
	"""

	import glob
	import os
	import re
	import sys

	# Pattern to find the forward method that needs patching
	ORIGINAL_PATTERN = re.compile(
	r'( @torch\.no_grad\(\)\n'
	r' @dynamic_rope_update\n'
	r' def forward\(self, x: torch\.Tensor, position_ids: torch\.Tensor\)'
	r' -> Tuple\[torch\.Tensor, torch\.Tensor\]:\n)'
	r'( inv_freq_expanded = self\.inv_freq\[None, :, None\]\.float\(\)\.expand\(position_ids\.shape\[0\], -1, 1\)\.to\(x\.device\))'
	)

	REPLACEMENT = (
	r'\1'
	r' # Lazy recompute: accelerate meta-device init leaves inv_freq as zeros\n'
	r' if self.inv_freq is not None and self.inv_freq.numel() > 0 and (self.inv_freq == 0).all():\n'
	r' inv_freq, self.attention_scaling = self.rope_init_fn(self.config, None)\n'
	r' self.inv_freq = inv_freq.to(device=x.device, dtype=self.inv_freq.dtype)\n'
	r' self.original_inv_freq = self.inv_freq\n'
	r'\2'
	)

	# Check string to see if already patched
	PATCH_MARKER = "Lazy recompute: accelerate meta-device init"

	# Search locations for cached model files
	SEARCH_PATHS = [
	os.path.expanduser("~/.cache/huggingface/hub/models--IQuestLab--/*/modeling_iquestcoder.py"),
	"/llm/huggingface/modules/transformers_modules/IQuestLab/**/modeling_iquestcoder.py",
	# Common alternate HF cache locations
	"/data/huggingface/**/modeling_iquestcoder.py",
	"/scratch/**/modeling_iquestcoder.py",
	]


	def find_model_files():
	"""Find all cached IQuest-Coder modeling files."""
	found = []
	# Also check HF_HOME / TRANSFORMERS_CACHE env vars
	for env_var in ["HF_HOME", "TRANSFORMERS_CACHE", "HUGGINGFACE_HUB_CACHE"]:
	val = os.environ.get(env_var)
	if val:
	SEARCH_PATHS.append(os.path.join(val, "**/modeling_iquestcoder.py"))

	for pattern in SEARCH_PATHS:
	found.extend(glob.glob(pattern, recursive=True))

	# Deduplicate (resolve symlinks)
	seen = set()
	unique = []
	for f in found:
	real = os.path.realpath(f)
	if real not in seen:
	seen.add(real)
	unique.append(f)
	return unique


	def patch_file(filepath):
	"""Apply the RoPE lazy-recompute patch to a modeling file."""
	with open(filepath, "r", encoding="utf-8") as f:
	content = f.read()

	if PATCH_MARKER in content:
	print(f" SKIP (already patched): {filepath}")
	return False

	new_content, count = ORIGINAL_PATTERN.subn(REPLACEMENT, content)
	if count == 0:
	print(f" WARN (pattern not found — may need manual patching): {filepath}")
	return False

	with open(filepath, "w", encoding="utf-8") as f:
	f.write(new_content)

	print(f" OK (patched {count} location(s)): {filepath}")
	return True


	def main():
	print("IQuest-Coder RoPE patch for transformers 5.x")
	print("=" * 50)
	print()

	files = find_model_files()
	if not files:
	print("No IQuest-Coder model files found in cache.")
	print("Download the model first, then re-run this script.")
	print()
	print("Searched:")
	for p in SEARCH_PATHS:
	print(f" {p}")
	sys.exit(1)

	print(f"Found {len(files)} file(s):\n")
	patched = 0
	for f in files:
	if patch_file(f):
	patched += 1

	print()
	if patched:
	print(f"Done — patched {patched} file(s).")
	else:
	print("No files needed patching.")


	if __name__ == "__main__":
	main()