From 8e7e694d81e674db63049145a35972df8ad2e3fa Mon Sep 17 00:00:00 2001 From: abc <98614666+xtekky@users.noreply.github.com> Date: Fri, 20 Oct 2023 19:04:13 +0100 Subject: =?UTF-8?q?~=20|=C2=A0updated=20g4f.api?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit new api and requirements --- g4f/api/_tokenizer.py | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 g4f/api/_tokenizer.py (limited to 'g4f/api/_tokenizer.py') diff --git a/g4f/api/_tokenizer.py b/g4f/api/_tokenizer.py new file mode 100644 index 00000000..fd8f9d5a --- /dev/null +++ b/g4f/api/_tokenizer.py @@ -0,0 +1,9 @@ +import tiktoken +from typing import Union + +def tokenize(text: str, model: str = 'gpt-3.5-turbo') -> Union[int, str]: + encoding = tiktoken.encoding_for_model(model) + encoded = encoding.encode(text) + num_tokens = len(encoded) + + return num_tokens, encoded \ No newline at end of file -- cgit v1.2.3