diff options
Diffstat (limited to 'etc/tool/vercel.py')
-rw-r--r-- | etc/tool/vercel.py | 103 |
1 files changed, 103 insertions, 0 deletions
diff --git a/etc/tool/vercel.py b/etc/tool/vercel.py new file mode 100644 index 00000000..75df6c70 --- /dev/null +++ b/etc/tool/vercel.py @@ -0,0 +1,103 @@ +import json +import re +from typing import Any + +import quickjs +from curl_cffi import requests + +session = requests.Session(impersonate="chrome107") + + +def get_model_info() -> dict[str, Any]: + url = "https://sdk.vercel.ai" + response = session.get(url) + html = response.text + paths_regex = r"static\/chunks.+?\.js" + separator_regex = r'"\]\)<\/script><script>self\.__next_f\.push\(\[.,"' + + paths = re.findall(paths_regex, html) + paths = [re.sub(separator_regex, "", path) for path in paths] + paths = list(set(paths)) + + urls = [f"{url}/_next/{path}" for path in paths] + scripts = [session.get(url).text for url in urls] + + for script in scripts: + models_regex = r'let .="\\n\\nHuman:\",r=(.+?),.=' + matches = re.findall(models_regex, script) + + if matches: + models_str = matches[0] + stop_sequences_regex = r"(?<=stopSequences:{value:\[)\D(?<!\])" + models_str = re.sub( + stop_sequences_regex, re.escape('"\\n\\nHuman:"'), models_str + ) + + context = quickjs.Context() # type: ignore + json_str: str = context.eval(f"({models_str})").json() # type: ignore + return json.loads(json_str) # type: ignore + + return {} + + +def convert_model_info(models: dict[str, Any]) -> dict[str, Any]: + model_info: dict[str, Any] = {} + for model_name, params in models.items(): + default_params = params_to_default_params(params["parameters"]) + model_info[model_name] = {"id": params["id"], "default_params": default_params} + return model_info + + +def params_to_default_params(parameters: dict[str, Any]): + defaults: dict[str, Any] = {} + for key, parameter in parameters.items(): + if key == "maximumLength": + key = "maxTokens" + defaults[key] = parameter["value"] + return defaults + + +def get_model_names(model_info: dict[str, Any]): + model_names = model_info.keys() + model_names = [ + name + for name in model_names + if name not in ["openai:gpt-4", "openai:gpt-3.5-turbo"] + ] + model_names.sort() + return model_names + + +def print_providers(model_names: list[str]): + for name in model_names: + split_name = re.split(r":|/", name) + base_provider = split_name[0] + variable_name = split_name[-1].replace("-", "_").replace(".", "") + line = f'{variable_name} = Model(name="{name}", base_provider="{base_provider}", best_provider=Vercel,)\n' + print(line) + + +def print_convert(model_names: list[str]): + for name in model_names: + split_name = re.split(r":|/", name) + key = split_name[-1] + variable_name = split_name[-1].replace("-", "_").replace(".", "") + # "claude-instant-v1": claude_instant_v1, + line = f' "{key}": {variable_name},' + print(line) + + +def main(): + model_info = get_model_info() + model_info = convert_model_info(model_info) + print(json.dumps(model_info, indent=2)) + + model_names = get_model_names(model_info) + print("-------" * 40) + print_providers(model_names) + print("-------" * 40) + print_convert(model_names) + + +if __name__ == "__main__": + main()
\ No newline at end of file |