diff options
Diffstat (limited to '')
-rw-r--r-- | g4f/Provider/HuggingChat.py | 195 |
1 files changed, 102 insertions, 93 deletions
diff --git a/g4f/Provider/HuggingChat.py b/g4f/Provider/HuggingChat.py index f7c6b581..7ebbf570 100644 --- a/g4f/Provider/HuggingChat.py +++ b/g4f/Provider/HuggingChat.py @@ -1,33 +1,51 @@ from __future__ import annotations -import json, requests, re +import json +import requests -from curl_cffi import requests as cf_reqs -from ..typing import CreateResult, Messages +from curl_cffi import requests as cf_reqs +from ..typing import CreateResult, Messages from .base_provider import ProviderModelMixin, AbstractProvider -from .helper import format_prompt +from .helper import format_prompt class HuggingChat(AbstractProvider, ProviderModelMixin): - url = "https://huggingface.co/chat" - working = True + url = "https://huggingface.co/chat" + working = True supports_stream = True - default_model = "mistralai/Mixtral-8x7B-Instruct-v0.1" + default_model = "meta-llama/Meta-Llama-3.1-70B-Instruct" + models = [ 'meta-llama/Meta-Llama-3.1-70B-Instruct', - 'meta-llama/Meta-Llama-3.1-405B-Instruct-FP8', - 'CohereForAI/c4ai-command-r-plus', - 'mistralai/Mixtral-8x7B-Instruct-v0.1', - 'NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO', - '01-ai/Yi-1.5-34B-Chat', - 'mistralai/Mistral-7B-Instruct-v0.2', - 'microsoft/Phi-3-mini-4k-instruct', + 'CohereForAI/c4ai-command-r-plus-08-2024', + 'Qwen/Qwen2.5-72B-Instruct', + 'nvidia/Llama-3.1-Nemotron-70B-Instruct-HF', + 'meta-llama/Llama-3.2-11B-Vision-Instruct', + 'NousResearch/Hermes-3-Llama-3.1-8B', + 'mistralai/Mistral-Nemo-Instruct-2407', + 'microsoft/Phi-3.5-mini-instruct', ] model_aliases = { - "mistralai/Mistral-7B-Instruct-v0.1": "mistralai/Mistral-7B-Instruct-v0.2" + "llama-3.1-70b": "meta-llama/Meta-Llama-3.1-70B-Instruct", + "command-r-plus": "CohereForAI/c4ai-command-r-plus-08-2024", + "qwen-2-72b": "Qwen/Qwen2.5-72B-Instruct", + "nemotron-70b": "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF", + "llama-3.2-11b": "meta-llama/Llama-3.2-11B-Vision-Instruct", + "hermes-3": "NousResearch/Hermes-3-Llama-3.1-8B", + "mistral-nemo": "mistralai/Mistral-Nemo-Instruct-2407", + "phi-3.5-mini": "microsoft/Phi-3.5-mini-instruct", } @classmethod + def get_model(cls, model: str) -> str: + if model in cls.models: + return model + elif model in cls.model_aliases: + return cls.model_aliases[model] + else: + return cls.default_model + + @classmethod def create_completion( cls, model: str, @@ -35,108 +53,99 @@ class HuggingChat(AbstractProvider, ProviderModelMixin): stream: bool, **kwargs ) -> CreateResult: + model = cls.get_model(model) - if (model in cls.models) : - - session = requests.Session() - headers = { - 'accept' : '*/*', - 'accept-language' : 'en,fr-FR;q=0.9,fr;q=0.8,es-ES;q=0.7,es;q=0.6,en-US;q=0.5,am;q=0.4,de;q=0.3', - 'cache-control' : 'no-cache', - 'origin' : 'https://huggingface.co', - 'pragma' : 'no-cache', - 'priority' : 'u=1, i', - 'referer' : 'https://huggingface.co/chat/', - 'sec-ch-ua' : '"Not/A)Brand";v="8", "Chromium";v="126", "Google Chrome";v="126"', - 'sec-ch-ua-mobile' : '?0', + if model in cls.models: + session = cf_reqs.Session() + session.headers = { + 'accept': '*/*', + 'accept-language': 'en', + 'cache-control': 'no-cache', + 'origin': 'https://huggingface.co', + 'pragma': 'no-cache', + 'priority': 'u=1, i', + 'referer': 'https://huggingface.co/chat/', + 'sec-ch-ua': '"Not)A;Brand";v="99", "Google Chrome";v="127", "Chromium";v="127"', + 'sec-ch-ua-mobile': '?0', 'sec-ch-ua-platform': '"macOS"', - 'sec-fetch-dest' : 'empty', - 'sec-fetch-mode' : 'cors', - 'sec-fetch-site' : 'same-origin', - 'user-agent' : 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36', + 'sec-fetch-dest': 'empty', + 'sec-fetch-mode': 'cors', + 'sec-fetch-site': 'same-origin', + 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36', } json_data = { - 'searchEnabled' : True, - 'activeModel' : 'CohereForAI/c4ai-command-r-plus', # doesn't matter - 'hideEmojiOnSidebar': False, - 'customPrompts' : {}, - 'assistants' : [], - 'tools' : {}, - 'disableStream' : False, - 'recentlySaved' : False, - 'ethicsModalAccepted' : True, - 'ethicsModalAcceptedAt' : None, - 'shareConversationsWithModelAuthors': False, + 'model': model, } - response = cf_reqs.post('https://huggingface.co/chat/settings', headers=headers, json=json_data) - session.cookies.update(response.cookies) + response = session.post('https://huggingface.co/chat/conversation', json=json_data) + if response.status_code != 200: + raise RuntimeError(f"Request failed with status code: {response.status_code}, response: {response.text}") - response = session.post('https://huggingface.co/chat/conversation', - headers=headers, json={'model': model}) + conversationId = response.json().get('conversationId') + response = session.get(f'https://huggingface.co/chat/conversation/{conversationId}/__data.json?x-sveltekit-invalidated=11') - conversationId = response.json()['conversationId'] - response = session.get(f'https://huggingface.co/chat/conversation/{conversationId}/__data.json?x-sveltekit-invalidated=11', - headers=headers, - ) - - messageId = extract_id(response.json()) + data: list = response.json()["nodes"][1]["data"] + keys: list[int] = data[data[0]["messages"]] + message_keys: dict = data[keys[0]] + messageId: str = data[message_keys["id"]] settings = { - "inputs" : format_prompt(messages), - "id" : messageId, - "is_retry" : False, - "is_continue" : False, - "web_search" : False, - - # TODO // add feature to enable/disable tools - "tools": { - "websearch" : True, - "document_parser" : False, - "query_calculator" : False, - "image_generation" : False, - "image_editing" : False, - "fetch_url" : False, - } + "inputs": format_prompt(messages), + "id": messageId, + "is_retry": False, + "is_continue": False, + "web_search": False, + "tools": [] } - payload = { - "data": json.dumps(settings), + headers = { + 'accept': '*/*', + 'accept-language': 'en', + 'cache-control': 'no-cache', + 'origin': 'https://huggingface.co', + 'pragma': 'no-cache', + 'priority': 'u=1, i', + 'referer': f'https://huggingface.co/chat/conversation/{conversationId}', + 'sec-ch-ua': '"Not)A;Brand";v="99", "Google Chrome";v="127", "Chromium";v="127"', + 'sec-ch-ua-mobile': '?0', + 'sec-ch-ua-platform': '"macOS"', + 'sec-fetch-dest': 'empty', + 'sec-fetch-mode': 'cors', + 'sec-fetch-site': 'same-origin', + 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36', + } + + files = { + 'data': (None, json.dumps(settings, separators=(',', ':'))), } - response = session.post(f"https://huggingface.co/chat/conversation/{conversationId}", - headers=headers, data=payload, stream=True, + response = requests.post(f'https://huggingface.co/chat/conversation/{conversationId}', + cookies=session.cookies, + headers=headers, + files=files, ) - first_token = True + full_response = "" for line in response.iter_lines(): - line = json.loads(line) + if not line: + continue + try: + line = json.loads(line) + except json.JSONDecodeError as e: + print(f"Failed to decode JSON: {line}, error: {e}") + continue if "type" not in line: raise RuntimeError(f"Response: {line}") elif line["type"] == "stream": - token = line["token"] - if first_token: - token = token.lstrip().replace('\u0000', '') - first_token = False - - else: - token = token.replace('\u0000', '') - - yield token + token = line["token"].replace('\u0000', '') + full_response += token elif line["type"] == "finalAnswer": break + + full_response = full_response.replace('<|im_end|', '').replace('\u0000', '').strip() -def extract_id(response: dict) -> str: - data = response["nodes"][1]["data"] - uuid_pattern = re.compile( - r"^[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}$" - ) - for item in data: - if type(item) == str and uuid_pattern.match(item): - return item - - return None + yield full_response |