modified: app.py

Fehler ohne KI und debugging gefunden...
What have I done? not completed one stupid href
2023-08-02 04:42:13 +02:00 · 2023-08-02 04:01:38 +02:00 · 2023-08-02 03:13:37 +02:00 · 2023-08-02 00:49:02 +02:00 · 2023-08-02 00:09:47 +02:00 · 2023-08-02 00:07:31 +02:00
5 changed files with 38 additions and 25 deletions
--- a/.env.example
+++ b/.env.example
@ -0,0 +1,3 @@
 EMAIL_USERNAME=example@example.com
 EMAIL_PASSWORD=example
 EMAIL_SERVER=example.com
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,2 @@
 .env
 *.eml
--- a/app.py
+++ b/app.py
@ -4,6 +4,7 @@ import re
 import os
 from flask import Flask, render_template, request, redirect, url_for
 from dotenv import load_dotenv
 from bs4 import BeautifulSoup
 app = Flask(__name__)
 load_dotenv()  # Laden der Umgebungsvariablen aus der .env-Datei
@ -15,6 +16,8 @@ EMAIL_SERVER = os.environ.get("EMAIL_SERVER")
 template_dir = os.path.abspath(os.path.dirname(__file__))
 app = Flask(__name__, template_folder=template_dir)
 link_pattern = r"<a href=\"(.*?)\">(.*?)</a>"
 unsubscribe_pattern = r"(?i)\babbestellen\b|\bunsubscribe\b|\bdeabonnieren\b|\babbestellung\b|\babmelden\b"
 def get_text_from_email(email_message):
    text = ""
@ -28,29 +31,29 @@ def get_text_from_email(email_message):
                text += body.decode(errors="ignore")
    return text
-def find_unsubscribe_links(text):
+def find_unsubscribe_links(text, html_links):
-    # Erweitere die Liste der Abmeldelinks-Patterns bei Bedarf
+    soup = BeautifulSoup(text, 'html.parser')
    unsubscribe_patterns = [
        r"(?i)\babbestellen\b",      # Deutsch - abbestellen
        r"(?i)\babbestellung\b",    # Deutsch - abbestellung
        r"(?i)\bdeabonnieren\b",    # Deutsch - deabonnieren
        r"(?i)\bunsubscribe\b",     # Englisch - unsubscribe
    ]
    unsubscribe_links = []
    for pattern in unsubscribe_patterns:
        matches = re.findall(pattern, text)
        if matches:
            for match in matches:
                link = find_link_in_text(text)
                if link:
                    unsubscribe_links.append(link)
    return unsubscribe_links
-def find_link_in_text(text):
+
-    # Einen einfachen Ansatz zur Link-Erkennung in Texten (nicht perfekt)
+    for link in soup.find_all('a', href=True):
-    link_pattern = r"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+"
+        if re.search(unsubscribe_pattern, str(link)) and str(link['href']) not in html_links:
-    link_matches = re.findall(link_pattern, text)
+            print(link['href'])
-    return link_matches[0] if link_matches else None
+            test1=str(link['href'])
            print(test1)
            if test1 not in unsubscribe_links:
                print(test1)
                unsubscribe_links.append(test1)
    # Zusätzlich nach einfachen URLs suchen und auf das Unsubscribe-Pattern prüfen
    simple_urls = re.findall(r"(https?://\S+)", text)
    for url in simple_urls:
        if re.search(unsubscribe_pattern, url) and url not in html_links:
            if url not in unsubscribe_links:
                unsubscribe_links.append(url)
    return unsubscribe_links
 def get_subject_from_email(email_message):
    subject = email_message.get("Subject")
@ -93,6 +96,7 @@ def result():
    _, messages = mail.search(None, "ALL")
    links_data = {}
    html_links = set()
    for message_num in messages[0].split():
        _, msg_data = mail.fetch(message_num, "(RFC822)")
@ -100,11 +104,14 @@ def result():
        body = get_text_from_email(msg)
        subject = get_subject_from_email(msg)
        if body:
-            unsubscribe_links = find_unsubscribe_links(body)
+            unsubscribe_links_in_html = find_unsubscribe_links(body, html_links)
-            if unsubscribe_links:
+            html_links.update(unsubscribe_links_in_html)
            unsubscribe_links_in_text = find_unsubscribe_links(body, html_links)
            all_unsubscribe_links = list(set(unsubscribe_links_in_html + unsubscribe_links_in_text))
            if all_unsubscribe_links:
                links_data[message_num.decode()] = {
                    "subject": subject,
-                    "links": list(set(unsubscribe_links))
+                    "links": all_unsubscribe_links
                }
    mail.logout()
--- a/requirements.txt
+++ b/requirements.txt
@ -1,2 +1,3 @@
 Flask
 python-dotenv
 beautifulsoup4
--- a/result.html
+++ b/result.html
@ -9,7 +9,7 @@
        <h1>E-Mail Unsubscribe Links Result</h1>
        <table>
            <tr>
-                <th>Email Address</th>
+                <th>Email ID</th>
                <th>Subject</th>
                <th>Unsubscribe Link</th>
                <th>Move to Trash</th>
Author	SHA1	Message	Date
Philipp Wagner	99ec2b956b	modified: app.py	2023-08-02 04:42:13 +02:00
Philipp Wagner	f0ebdc7888	Fehler ohne KI und debugging gefunden...	2023-08-02 04:01:38 +02:00
Philipp Wagner	ada8c4ae48	What have I done? not completed one stupid href	2023-08-02 03:13:37 +02:00
Philipp Wagner	3765d30822	modified: .gitignore	2023-08-02 00:49:02 +02:00
Philipp Wagner	bd849025bf	new file: .env.example	2023-08-02 00:09:47 +02:00
Philipp Wagner	9d222723b6	new file: .gitignore	2023-08-02 00:07:31 +02:00