From 96e378e9e21092f486e6f77a083651865062d893 Mon Sep 17 00:00:00 2001 From: Heiner Lohaus Date: Sat, 18 May 2024 15:37:46 +0200 Subject: Fix OpenaiChat provider, improve proofofwork --- g4f/Provider/needs_auth/OpenaiChat.py | 81 +++++++++++++++++++++-------------- g4f/Provider/openai/har_file.py | 7 +-- g4f/Provider/openai/proofofwork.py | 29 ++++--------- g4f/Provider/you/har_file.py | 2 +- 4 files changed, 62 insertions(+), 57 deletions(-) (limited to 'g4f/Provider') diff --git a/g4f/Provider/needs_auth/OpenaiChat.py b/g4f/Provider/needs_auth/OpenaiChat.py index 28d0558b..a202f45e 100644 --- a/g4f/Provider/needs_auth/OpenaiChat.py +++ b/g4f/Provider/needs_auth/OpenaiChat.py @@ -330,6 +330,7 @@ class OpenaiChat(AsyncGeneratorProvider, ProviderModelMixin): image: ImageType = None, image_name: str = None, return_conversation: bool = False, + max_retries: int = 3, **kwargs ) -> AsyncResult: """ @@ -409,36 +410,6 @@ class OpenaiChat(AsyncGeneratorProvider, ProviderModelMixin): raise error cls.default_model = cls.get_model(await cls.get_default_model(session, cls._headers)) - async with session.post( - f"{cls.url}/backend-anon/sentinel/chat-requirements" - if cls._api_key is None else - f"{cls.url}/backend-api/sentinel/chat-requirements", - json={"conversation_mode_kind": "primary_assistant"}, - #json={"p": generate_proof_token(True, user_agent=cls._headers["user-agent"], proofTokens=proofTokens)}, - headers=cls._headers - ) as response: - cls._update_request_args(session) - await raise_for_status(response) - data = await response.json() - need_arkose = data.get("arkose", {}).get("required") - chat_token = data["token"] - proofofwork = "" - if "proofofwork" in data: - proofofwork = generate_proof_token(**data["proofofwork"], user_agent=cls._headers["user-agent"], proofTokens=proofTokens) - - if need_arkose and arkose_token is None: - arkose_token, api_key, cookies, headers = await getArkoseAndAccessToken(proxy) - cls._create_request_args(cookies, headers) - cls._set_api_key(api_key) - if arkose_token is None: - raise MissingAuthError("No arkose token found in .har file") - - if debug.logging: - print( - 'Arkose:', False if not need_arkose else arkose_token[:12]+"...", - 'Proofofwork:', False if proofofwork is None else proofofwork[:12]+"...", - ) - try: image_request = await cls.upload_image(session, cls._headers, image, image_name) if image else None except Exception as e: @@ -457,6 +428,43 @@ class OpenaiChat(AsyncGeneratorProvider, ProviderModelMixin): auto_continue = False conversation.finish_reason = None while conversation.finish_reason is None: + async with session.post( + f"{cls.url}/backend-anon/sentinel/chat-requirements" + if cls._api_key is None else + f"{cls.url}/backend-api/sentinel/chat-requirements", + json={"p": generate_proof_token(True, user_agent=cls._headers["user-agent"], proofTokens=proofTokens)}, + headers=cls._headers + ) as response: + cls._update_request_args(session) + await raise_for_status(response) + requirements = await response.json() + need_arkose = requirements.get("arkose", {}).get("required") + chat_token = requirements["token"] + + if need_arkose and arkose_token is None: + arkose_token, api_key, cookies, headers, proofTokens = await getArkoseAndAccessToken(proxy) + cls._create_request_args(cookies, headers) + cls._set_api_key(api_key) + if arkose_token is None: + raise MissingAuthError("No arkose token found in .har file") + + if "proofofwork" in requirements: + proofofwork = generate_proof_token( + **requirements["proofofwork"], + user_agent=cls._headers["user-agent"], + proofTokens=proofTokens + ) + if debug.logging: + print( + 'Arkose:', False if not need_arkose else arkose_token[:12]+"...", + 'Proofofwork:', False if proofofwork is None else proofofwork[:12]+"...", + ) + ws = None + if need_arkose: + async with session.post("https://chatgpt.com/backend-api/register-websocket", headers=cls._headers) as response: + wss_url = (await response.json()).get("wss_url") + if wss_url: + ws = await session.ws_connect(wss_url) websocket_request_id = str(uuid.uuid4()) data = { "action": action, @@ -488,8 +496,14 @@ class OpenaiChat(AsyncGeneratorProvider, ProviderModelMixin): headers=headers ) as response: cls._update_request_args(session) + if response.status == 403 and max_retries > 0: + max_retries -= 1 + if debug.logging: + print(f"Retry: Error {response.status}: {await response.text()}") + await asyncio.sleep(5) + continue await raise_for_status(response) - async for chunk in cls.iter_messages_chunk(response.iter_lines(), session, conversation): + async for chunk in cls.iter_messages_chunk(response.iter_lines(), session, conversation, ws): if return_conversation: history_disabled = False return_conversation = False @@ -519,13 +533,14 @@ class OpenaiChat(AsyncGeneratorProvider, ProviderModelMixin): cls, messages: AsyncIterator, session: StreamSession, - fields: Conversation + fields: Conversation, + ws = None ) -> AsyncIterator: last_message: int = 0 async for message in messages: if message.startswith(b'{"wss_url":'): message = json.loads(message) - ws = await session.ws_connect(message["wss_url"]) + ws = await session.ws_connect(message["wss_url"]) if ws is None else ws try: async for chunk in cls.iter_messages_chunk( cls.iter_messages_ws(ws, message["conversation_id"], hasattr(ws, "recv")), diff --git a/g4f/Provider/openai/har_file.py b/g4f/Provider/openai/har_file.py index eefe305f..8dcbe44e 100644 --- a/g4f/Provider/openai/har_file.py +++ b/g4f/Provider/openai/har_file.py @@ -35,6 +35,7 @@ headers: dict = None proofTokens: list = [] def readHAR(): + global proofTokens dirPath = "./" harPath = [] chatArks = [] @@ -77,8 +78,8 @@ def readHAR(): if not accessToken: raise NoValidHarFileError("No accessToken found in .har files") if not chatArks: - return None, accessToken, cookies, headers, proofTokens - return chatArks.pop(), accessToken, cookies, headers, proofTokens + return None, accessToken, cookies, headers + return chatArks.pop(), accessToken, cookies, headers def get_headers(entry) -> dict: return {h['name'].lower(): h['value'] for h in entry['request']['headers'] if h['name'].lower() not in ['content-length', 'cookie'] and not h['name'].startswith(':')} @@ -145,7 +146,7 @@ def getN() -> str: async def getArkoseAndAccessToken(proxy: str) -> tuple[str, str, dict, dict]: global chatArk, accessToken, cookies, headers, proofTokens if chatArk is None or accessToken is None: - chatArk, accessToken, cookies, headers, proofTokens = readHAR() + chatArk, accessToken, cookies, headers = readHAR() if chatArk is None: return None, accessToken, cookies, headers, proofTokens newReq = genArkReq(chatArk) diff --git a/g4f/Provider/openai/proofofwork.py b/g4f/Provider/openai/proofofwork.py index cbce153f..baf8a0ea 100644 --- a/g4f/Provider/openai/proofofwork.py +++ b/g4f/Provider/openai/proofofwork.py @@ -4,22 +4,18 @@ import json import base64 from datetime import datetime, timezone -proof_token_cache: dict = {} -def generate_proof_token(required: bool, seed: str = None, difficulty: str = None, user_agent: str = None, proofTokens: list = None): +def generate_proof_token(required: bool, seed: str = "", difficulty: str = "", user_agent: str = None, proofTokens: list = None): if not required: return - if seed is not None and seed in proof_token_cache: - return proof_token_cache[seed] - - # Get current UTC time - now_utc = datetime.now(timezone.utc) - parse_time = now_utc.strftime('%a, %d %b %Y %H:%M:%S GMT') if proofTokens: - config = random.choice(proofTokens) + config = proofTokens[-1] else: screen = random.choice([3008, 4010, 6000]) * random.choice([1, 2, 4]) + # Get current UTC time + now_utc = datetime.now(timezone.utc) + parse_time = now_utc.strftime('%a, %d %b %Y %H:%M:%S GMT') config = [ screen, parse_time, None, 0, user_agent, @@ -31,22 +27,15 @@ def generate_proof_token(required: bool, seed: str = None, difficulty: str = Non random.choice(["alert", "ontransitionend", "onprogress"]) ] - config[1] = parse_time - config[4] = user_agent - config[7] = random.randint(101, 2100) - - diff_len = None if difficulty is None else len(difficulty) + diff_len = len(difficulty) for i in range(100000): config[3] = i json_data = json.dumps(config) base = base64.b64encode(json_data.encode()).decode() - hash_value = hashlib.sha3_512((seed or "" + base).encode()).digest() + hash_value = hashlib.sha3_512((seed + base).encode()).digest() - if difficulty is None or hash_value.hex()[:diff_len] <= difficulty: - if seed is None: - return "gAAAAAC" + base - proof_token_cache[seed] = "gAAAAAB" + base - return proof_token_cache[seed] + if hash_value.hex()[:diff_len] <= difficulty: + return "gAAAAAB" + base fallback_base = base64.b64encode(f'"{seed}"'.encode()).decode() return "gAAAAABwQ8Lk5FbGpA2NcR9dShT6gYjU7VxZ4D" + fallback_base diff --git a/g4f/Provider/you/har_file.py b/g4f/Provider/you/har_file.py index 969ba96c..870c388e 100644 --- a/g4f/Provider/you/har_file.py +++ b/g4f/Provider/you/har_file.py @@ -115,7 +115,7 @@ async def get_telemetry_ids(proxy: str = None) -> list: if page is not None: await page.close() if browser is not None: - await browser.close() + await browser.stop() except Exception as e: if debug.logging: logging.error(e) -- cgit v1.2.3