from __future__ import annotations import json from ..typing import AsyncResult, Messages from .base_provider import AsyncGeneratorProvider from ..requests import StreamSession class DeepInfra(AsyncGeneratorProvider): url = "https://deepinfra.com" working = True supports_stream = True supports_message_history = True @staticmethod async def create_async_generator( model: str, messages: Messages, stream: bool, proxy: str = None, timeout: int = 120, auth: str = None, **kwargs ) -> AsyncResult: if not model: model = 'meta-llama/Llama-2-70b-chat-hf' headers = { 'Accept-Encoding': 'gzip, deflate, br', 'Accept-Language': 'en-US', 'Connection': 'keep-alive', 'Content-Type': 'application/json', 'Origin': 'https://deepinfra.com', 'Referer': 'https://deepinfra.com/', 'Sec-Fetch-Dest': 'empty', 'Sec-Fetch-Mode': 'cors', 'Sec-Fetch-Site': 'same-site', 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36', 'X-Deepinfra-Source': 'web-embed', 'accept': 'text/event-stream', 'sec-ch-ua': '"Google Chrome";v="119", "Chromium";v="119", "Not?A_Brand";v="24"', 'sec-ch-ua-mobile': '?0', 'sec-ch-ua-platform': '"macOS"', } if auth: headers['Authorization'] = f"bearer {auth}" async with StreamSession(headers=headers, timeout=timeout, proxies={"https": proxy}, impersonate="chrome110" ) as session: json_data = { 'model' : model, 'messages': messages, 'stream' : True } async with session.post('https://api.deepinfra.com/v1/openai/chat/completions', json=json_data) as response: response.raise_for_status() first = True async for line in response.iter_lines(): try: if line.startswith(b"data: [DONE]"): break elif line.startswith(b"data: "): chunk = json.loads(line[6:])["choices"][0]["delta"].get("content") if chunk: if first: chunk = chunk.lstrip() if chunk: first = False yield chunk except Exception: raise RuntimeError(f"Response: {line}")