blob: fd8f9d5a09b59270f2739a4007c94106e9e0ee05 (
plain) (
blame)
1
2
3
4
5
6
7
8
9
|
import tiktoken
from typing import Union
def tokenize(text: str, model: str = 'gpt-3.5-turbo') -> Union[int, str]:
encoding = tiktoken.encoding_for_model(model)
encoded = encoding.encode(text)
num_tokens = len(encoded)
return num_tokens, encoded
|