diff options
Diffstat (limited to 'skripti/emailautolinkclicker.py')
-rwxr-xr-x | skripti/emailautolinkclicker.py | 39 |
1 files changed, 39 insertions, 0 deletions
diff --git a/skripti/emailautolinkclicker.py b/skripti/emailautolinkclicker.py new file mode 100755 index 0000000..f9d200b --- /dev/null +++ b/skripti/emailautolinkclicker.py @@ -0,0 +1,39 @@ +#!/usr/bin/python3 +import sys +import email +import quopri +import re +from bs4 import BeautifulSoup +import tempfile +import shutil +import subprocess +def extract_urls_html(html_string): + soup = BeautifulSoup(html_string, 'html.parser') + a_tags = soup.find_all('a', href=True) + urls = [a['href'] for a in a_tags] + return urls +def extract_urls_text(input_string): + pattern = r'https?://\S+' + urls = re.findall(pattern, input_string) + return urls +def decode_quoted_printable(encoded_text): + return quopri.decodestring(encoded_text).decode('utf-8') +def parse_mbox(mbox_text, contenttype): + mbox = email.message_from_string(mbox_text) + for part in mbox.walk(): + content_type = part.get_content_type() + if content_type == contenttype: + content_transfer_encoding = part.get('Content-Transfer-Encoding', '').lower() + if content_transfer_encoding == 'quoted-printable': + content = decode_quoted_printable(part.get_payload()) + else: + content = part.get_payload() + return content +if __name__ == "__main__": + mbox_text = sys.stdin.read() + temp_dir = tempfile.mkdtemp("linkclicker") + for link in extract_urls_text(parse_mbox(mbox_text, "text/plain")) + extract_urls_html(parse_mbox(mbox_text, "text/html")): + cmd = ["timeout", "30s", "librewolf", "--headless", "--profile", temp_dir, "--no-remote", "--new-instance", "--screenshot", temp_dir + "/screenshot.png", "--", link] + result = subprocess.run(cmd, capture_output=True, text=True) + print("cmd: " + str(cmd) + "\nstdout: " + result.stdout + "\nstderr: " + result.stderr + "\nreturncode: " + str(result.returncode)); + shutil.rmtree(temp_dir) |