1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
|
from __future__ import annotations
import asyncio
import time
import json
from aiohttp import ClientSession, BaseConnector
from urllib.parse import quote
from typing import List, Dict
try:
from bs4 import BeautifulSoup
has_requirements = True
except ImportError:
has_requirements = False
from ..helper import get_connector
from ...errors import MissingRequirementsError, RateLimitError
from ...webdriver import WebDriver, get_driver_cookies, get_browser
BING_URL = "https://www.bing.com"
TIMEOUT_LOGIN = 1200
TIMEOUT_IMAGE_CREATION = 300
ERRORS = [
"this prompt is being reviewed",
"this prompt has been blocked",
"we're working hard to offer image creator in more languages",
"we can't create your images right now"
]
BAD_IMAGES = [
"https://r.bing.com/rp/in-2zU3AJUdkgFe7ZKv19yPBHVs.png",
"https://r.bing.com/rp/TX9QuO3WzcCJz1uaaSwQAz39Kb0.jpg",
]
def wait_for_login(driver: WebDriver, timeout: int = TIMEOUT_LOGIN) -> None:
"""
Waits for the user to log in within a given timeout period.
Args:
driver (WebDriver): Webdriver for browser automation.
timeout (int): Maximum waiting time in seconds.
Raises:
RuntimeError: If the login process exceeds the timeout.
"""
driver.get(f"{BING_URL}/")
start_time = time.time()
while not driver.get_cookie("_U"):
if time.time() - start_time > timeout:
raise RuntimeError("Timeout error")
time.sleep(0.5)
def get_cookies_from_browser(proxy: str = None) -> dict[str, str]:
"""
Retrieves cookies from the browser using webdriver.
Args:
proxy (str, optional): Proxy configuration.
Returns:
dict[str, str]: Retrieved cookies.
"""
with get_browser(proxy=proxy) as driver:
wait_for_login(driver)
time.sleep(1)
return get_driver_cookies(driver)
def create_session(cookies: Dict[str, str], proxy: str = None, connector: BaseConnector = None) -> ClientSession:
"""
Creates a new client session with specified cookies and headers.
Args:
cookies (Dict[str, str]): Cookies to be used for the session.
Returns:
ClientSession: The created client session.
"""
headers = {
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
"accept-encoding": "gzip, deflate, br",
"accept-language": "en-US,en;q=0.9,zh-CN;q=0.8,zh-TW;q=0.7,zh;q=0.6",
"content-type": "application/x-www-form-urlencoded",
"referrer-policy": "origin-when-cross-origin",
"referrer": "https://www.bing.com/images/create/",
"origin": "https://www.bing.com",
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36 Edg/111.0.1661.54",
"sec-ch-ua": "\"Microsoft Edge\";v=\"111\", \"Not(A:Brand\";v=\"8\", \"Chromium\";v=\"111\"",
"sec-ch-ua-mobile": "?0",
"sec-fetch-dest": "document",
"sec-fetch-mode": "navigate",
"sec-fetch-site": "same-origin",
"sec-fetch-user": "?1",
"upgrade-insecure-requests": "1",
}
if cookies:
headers["Cookie"] = "; ".join(f"{k}={v}" for k, v in cookies.items())
return ClientSession(headers=headers, connector=get_connector(connector, proxy))
async def create_images(session: ClientSession, prompt: str, timeout: int = TIMEOUT_IMAGE_CREATION) -> List[str]:
"""
Creates images based on a given prompt using Bing's service.
Args:
session (ClientSession): Active client session.
prompt (str): Prompt to generate images.
proxy (str, optional): Proxy configuration.
timeout (int): Timeout for the request.
Returns:
List[str]: A list of URLs to the created images.
Raises:
RuntimeError: If image creation fails or times out.
"""
if not has_requirements:
raise MissingRequirementsError('Install "beautifulsoup4" package')
url_encoded_prompt = quote(prompt)
payload = f"q={url_encoded_prompt}&rt=4&FORM=GENCRE"
url = f"{BING_URL}/images/create?q={url_encoded_prompt}&rt=4&FORM=GENCRE"
async with session.post(url, allow_redirects=False, data=payload, timeout=timeout) as response:
response.raise_for_status()
text = (await response.text()).lower()
if "0 coins available" in text:
raise RateLimitError("No coins left. Log in with a different account or wait a while")
for error in ERRORS:
if error in text:
raise RuntimeError(f"Create images failed: {error}")
if response.status != 302:
url = f"{BING_URL}/images/create?q={url_encoded_prompt}&rt=3&FORM=GENCRE"
async with session.post(url, allow_redirects=False, timeout=timeout) as response:
if response.status != 302:
raise RuntimeError(f"Create images failed. Code: {response.status}")
redirect_url = response.headers["Location"].replace("&nfy=1", "")
redirect_url = f"{BING_URL}{redirect_url}"
request_id = redirect_url.split("id=")[1]
async with session.get(redirect_url) as response:
response.raise_for_status()
polling_url = f"{BING_URL}/images/create/async/results/{request_id}?q={url_encoded_prompt}"
start_time = time.time()
while True:
if time.time() - start_time > timeout:
raise RuntimeError(f"Timeout error after {timeout} sec")
async with session.get(polling_url) as response:
if response.status != 200:
raise RuntimeError(f"Polling images faild. Code: {response.status}")
text = await response.text()
if not text or "GenerativeImagesStatusPage" in text:
await asyncio.sleep(1)
else:
break
error = None
try:
error = json.loads(text).get("errorMessage")
except:
pass
if error == "Pending":
raise RuntimeError("Prompt is been blocked")
elif error:
raise RuntimeError(error)
return read_images(text)
def read_images(html_content: str) -> List[str]:
"""
Extracts image URLs from the HTML content.
Args:
html_content (str): HTML content containing image URLs.
Returns:
List[str]: A list of image URLs.
"""
soup = BeautifulSoup(html_content, "html.parser")
tags = soup.find_all("img", class_="mimg")
if not tags:
tags = soup.find_all("img", class_="gir_mmimg")
images = [img["src"].split("?w=")[0] for img in tags]
if any(im in BAD_IMAGES for im in images):
raise RuntimeError("Bad images found")
if not images:
raise RuntimeError("No images found")
return images
|