Compare commits
6 Commits
b20fb73e60
...
main
Author | SHA1 | Date | |
---|---|---|---|
99ec2b956b | |||
f0ebdc7888 | |||
ada8c4ae48 | |||
3765d30822 | |||
bd849025bf | |||
9d222723b6 |
3
.env.example
Normal file
3
.env.example
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
EMAIL_USERNAME=example@example.com
|
||||||
|
EMAIL_PASSWORD=example
|
||||||
|
EMAIL_SERVER=example.com
|
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
.env
|
||||||
|
*.eml
|
55
app.py
55
app.py
@ -4,6 +4,7 @@ import re
|
|||||||
import os
|
import os
|
||||||
from flask import Flask, render_template, request, redirect, url_for
|
from flask import Flask, render_template, request, redirect, url_for
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
app = Flask(__name__)
|
app = Flask(__name__)
|
||||||
load_dotenv() # Laden der Umgebungsvariablen aus der .env-Datei
|
load_dotenv() # Laden der Umgebungsvariablen aus der .env-Datei
|
||||||
@ -15,6 +16,8 @@ EMAIL_SERVER = os.environ.get("EMAIL_SERVER")
|
|||||||
|
|
||||||
template_dir = os.path.abspath(os.path.dirname(__file__))
|
template_dir = os.path.abspath(os.path.dirname(__file__))
|
||||||
app = Flask(__name__, template_folder=template_dir)
|
app = Flask(__name__, template_folder=template_dir)
|
||||||
|
link_pattern = r"<a href=\"(.*?)\">(.*?)</a>"
|
||||||
|
unsubscribe_pattern = r"(?i)\babbestellen\b|\bunsubscribe\b|\bdeabonnieren\b|\babbestellung\b|\babmelden\b"
|
||||||
|
|
||||||
def get_text_from_email(email_message):
|
def get_text_from_email(email_message):
|
||||||
text = ""
|
text = ""
|
||||||
@ -28,29 +31,29 @@ def get_text_from_email(email_message):
|
|||||||
text += body.decode(errors="ignore")
|
text += body.decode(errors="ignore")
|
||||||
return text
|
return text
|
||||||
|
|
||||||
def find_unsubscribe_links(text):
|
def find_unsubscribe_links(text, html_links):
|
||||||
# Erweitere die Liste der Abmeldelinks-Patterns bei Bedarf
|
soup = BeautifulSoup(text, 'html.parser')
|
||||||
unsubscribe_patterns = [
|
|
||||||
r"(?i)\babbestellen\b", # Deutsch - abbestellen
|
|
||||||
r"(?i)\babbestellung\b", # Deutsch - abbestellung
|
|
||||||
r"(?i)\bdeabonnieren\b", # Deutsch - deabonnieren
|
|
||||||
r"(?i)\bunsubscribe\b", # Englisch - unsubscribe
|
|
||||||
]
|
|
||||||
unsubscribe_links = []
|
unsubscribe_links = []
|
||||||
for pattern in unsubscribe_patterns:
|
|
||||||
matches = re.findall(pattern, text)
|
|
||||||
if matches:
|
|
||||||
for match in matches:
|
|
||||||
link = find_link_in_text(text)
|
|
||||||
if link:
|
|
||||||
unsubscribe_links.append(link)
|
|
||||||
return unsubscribe_links
|
|
||||||
|
|
||||||
def find_link_in_text(text):
|
|
||||||
# Einen einfachen Ansatz zur Link-Erkennung in Texten (nicht perfekt)
|
for link in soup.find_all('a', href=True):
|
||||||
link_pattern = r"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+"
|
if re.search(unsubscribe_pattern, str(link)) and str(link['href']) not in html_links:
|
||||||
link_matches = re.findall(link_pattern, text)
|
print(link['href'])
|
||||||
return link_matches[0] if link_matches else None
|
test1=str(link['href'])
|
||||||
|
print(test1)
|
||||||
|
if test1 not in unsubscribe_links:
|
||||||
|
print(test1)
|
||||||
|
unsubscribe_links.append(test1)
|
||||||
|
|
||||||
|
|
||||||
|
# Zusätzlich nach einfachen URLs suchen und auf das Unsubscribe-Pattern prüfen
|
||||||
|
simple_urls = re.findall(r"(https?://\S+)", text)
|
||||||
|
for url in simple_urls:
|
||||||
|
if re.search(unsubscribe_pattern, url) and url not in html_links:
|
||||||
|
if url not in unsubscribe_links:
|
||||||
|
unsubscribe_links.append(url)
|
||||||
|
|
||||||
|
return unsubscribe_links
|
||||||
|
|
||||||
def get_subject_from_email(email_message):
|
def get_subject_from_email(email_message):
|
||||||
subject = email_message.get("Subject")
|
subject = email_message.get("Subject")
|
||||||
@ -93,6 +96,7 @@ def result():
|
|||||||
|
|
||||||
_, messages = mail.search(None, "ALL")
|
_, messages = mail.search(None, "ALL")
|
||||||
links_data = {}
|
links_data = {}
|
||||||
|
html_links = set()
|
||||||
|
|
||||||
for message_num in messages[0].split():
|
for message_num in messages[0].split():
|
||||||
_, msg_data = mail.fetch(message_num, "(RFC822)")
|
_, msg_data = mail.fetch(message_num, "(RFC822)")
|
||||||
@ -100,11 +104,14 @@ def result():
|
|||||||
body = get_text_from_email(msg)
|
body = get_text_from_email(msg)
|
||||||
subject = get_subject_from_email(msg)
|
subject = get_subject_from_email(msg)
|
||||||
if body:
|
if body:
|
||||||
unsubscribe_links = find_unsubscribe_links(body)
|
unsubscribe_links_in_html = find_unsubscribe_links(body, html_links)
|
||||||
if unsubscribe_links:
|
html_links.update(unsubscribe_links_in_html)
|
||||||
|
unsubscribe_links_in_text = find_unsubscribe_links(body, html_links)
|
||||||
|
all_unsubscribe_links = list(set(unsubscribe_links_in_html + unsubscribe_links_in_text))
|
||||||
|
if all_unsubscribe_links:
|
||||||
links_data[message_num.decode()] = {
|
links_data[message_num.decode()] = {
|
||||||
"subject": subject,
|
"subject": subject,
|
||||||
"links": list(set(unsubscribe_links))
|
"links": all_unsubscribe_links
|
||||||
}
|
}
|
||||||
|
|
||||||
mail.logout()
|
mail.logout()
|
||||||
|
@ -1,2 +1,3 @@
|
|||||||
Flask
|
Flask
|
||||||
python-dotenv
|
python-dotenv
|
||||||
|
beautifulsoup4
|
@ -9,7 +9,7 @@
|
|||||||
<h1>E-Mail Unsubscribe Links Result</h1>
|
<h1>E-Mail Unsubscribe Links Result</h1>
|
||||||
<table>
|
<table>
|
||||||
<tr>
|
<tr>
|
||||||
<th>Email Address</th>
|
<th>Email ID</th>
|
||||||
<th>Subject</th>
|
<th>Subject</th>
|
||||||
<th>Unsubscribe Link</th>
|
<th>Unsubscribe Link</th>
|
||||||
<th>Move to Trash</th>
|
<th>Move to Trash</th>
|
||||||
|
Reference in New Issue
Block a user