author: Anton Luka Šijanec <anton@sijanec.eu> 2024-03-12 20:09:59 +0100
committer: Anton Luka Šijanec <anton@sijanec.eu> 2024-03-12 20:09:59 +0100
commit: 7de98dd15b7d50e19e29e682b727d845804868ba (patch)
tree: eeac4ad5d7fd21817214fe2d672602f2580f21f8 /skripti/emailautolinkclicker.py
parent: studisfri-deleted (diff)
download: r-7de98dd15b7d50e19e29e682b727d845804868ba.tar
r-7de98dd15b7d50e19e29e682b727d845804868ba.tar.gz
r-7de98dd15b7d50e19e29e682b727d845804868ba.tar.bz2
r-7de98dd15b7d50e19e29e682b727d845804868ba.tar.lz
r-7de98dd15b7d50e19e29e682b727d845804868ba.tar.xz
r-7de98dd15b7d50e19e29e682b727d845804868ba.tar.zst
r-7de98dd15b7d50e19e29e682b727d845804868ba.zip
1 files changed, 39 insertions, 0 deletions
diff --git a/skripti/emailautolinkclicker.py b/skripti/emailautolinkclicker.py
new file mode 100755
index 0000000..f9d200b
--- /dev/null
+++ b/skripti/emailautolinkclicker.py
@@ -0,0 +1,39 @@
+#!/usr/bin/python3
+import sys
+import email
+import quopri
+import re
+from bs4 import BeautifulSoup
+import tempfile
+import shutil
+import subprocess
+def extract_urls_html(html_string):
+	soup = BeautifulSoup(html_string, 'html.parser')
+	a_tags = soup.find_all('a', href=True)
+	urls = [a['href'] for a in a_tags]
+	return urls
+def extract_urls_text(input_string):
+	pattern = r'https?://\S+'
+	urls = re.findall(pattern, input_string)
+	return urls
+def decode_quoted_printable(encoded_text):
+	return quopri.decodestring(encoded_text).decode('utf-8')
+def parse_mbox(mbox_text, contenttype):
+	mbox = email.message_from_string(mbox_text)
+	for part in mbox.walk():
+		content_type = part.get_content_type()
+		if content_type == contenttype:
+			content_transfer_encoding = part.get('Content-Transfer-Encoding', '').lower()
+			if content_transfer_encoding == 'quoted-printable':
+				content = decode_quoted_printable(part.get_payload())
+			else:
+				content = part.get_payload()
+	return content
+if __name__ == "__main__":
+	mbox_text = sys.stdin.read()
+	temp_dir = tempfile.mkdtemp("linkclicker")
+	for link in extract_urls_text(parse_mbox(mbox_text, "text/plain")) + extract_urls_html(parse_mbox(mbox_text, "text/html")):
+		cmd = ["timeout", "30s", "librewolf", "--headless", "--profile", temp_dir, "--no-remote", "--new-instance", "--screenshot", temp_dir + "/screenshot.png", "--", link]
+		result = subprocess.run(cmd, capture_output=True, text=True)
+		print("cmd: " + str(cmd) + "\nstdout: " + result.stdout + "\nstderr: " + result.stderr + "\nreturncode: " + str(result.returncode));
+	shutil.rmtree(temp_dir)
author	Anton Luka Šijanec <anton@sijanec.eu>	2024-03-12 20:09:59 +0100
committer	Anton Luka Šijanec <anton@sijanec.eu>	2024-03-12 20:09:59 +0100
commit	7de98dd15b7d50e19e29e682b727d845804868ba (patch)
tree	eeac4ad5d7fd21817214fe2d672602f2580f21f8 /skripti/emailautolinkclicker.py
parent	studisfri-deleted (diff)
download	r-7de98dd15b7d50e19e29e682b727d845804868ba.tar r-7de98dd15b7d50e19e29e682b727d845804868ba.tar.gz r-7de98dd15b7d50e19e29e682b727d845804868ba.tar.bz2 r-7de98dd15b7d50e19e29e682b727d845804868ba.tar.lz r-7de98dd15b7d50e19e29e682b727d845804868ba.tar.xz r-7de98dd15b7d50e19e29e682b727d845804868ba.tar.zst r-7de98dd15b7d50e19e29e682b727d845804868ba.zip