FlowRead / test_tok.py
jobbler's picture
feat: Add support for switching models and the 27b-4a backend
93b9241
raw
history blame contribute delete
536 Bytes
import os
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b", token=os.environ.get("HF_TOKEN"))
preprompt = "Focus on numbers"
text = "The 86 billion neurons."
full = f"{preprompt}\n\n{text}"
p_tok = tokenizer(preprompt + "\n\n")["input_ids"]
f_tok = tokenizer(full)["input_ids"]
print(f"Preprompt tokens: {len(p_tok)}")
print(f"Full tokens: {len(f_tok)}")
print("Preprompt tokens:", tokenizer.convert_ids_to_tokens(p_tok))
print("Full tokens:", tokenizer.convert_ids_to_tokens(f_tok))