From 4bfff1094cc65809af32f8f1d80b89231378563d Mon Sep 17 00:00:00 2001
From: Drk0027 <drk0027@interlan.ec>
Date: Wed, 14 Jan 2026 16:21:22 -0500
Subject: [PATCH] prototipo de parseo para hugo

---
 .env.example       |  7 ++++++-
 email-processor.py | 29 +++++++++++++++++++++++++++--
 2 files changed, 33 insertions(+), 3 deletions(-)
diff --git a/.env.example b/.env.example
index 38b2f88..2c829a6 100644
--- a/.env.example
+++ b/.env.example
@@ -11,9 +11,14 @@ IGNORE_KEYWORDS=publicidad,notificacion,spam
 
 # Opciones de salida (True para activar, False para desactivar)
 SAVE_HTML=True
+SAVE_MARKDOWN=True
 SAVE_XML=True
 SEND_WEBHOOK=True
 
 # URL del Webhook de WordPress (reemplaza tudominio por la palabra que definas en el webhook de wordpress)
 WP_WEBHOOK_URL=https://tusitio.com/wp-json/tudominio/v1/recibir-comentario
-WEBHOOK_SECRET_TOKEN=mi_clave_secreta_123
\ No newline at end of file
+WEBHOOK_SECRET_TOKEN=mi_clave_secreta_123
+
+# IDEA: vincular a un directorio de HUGO para detectar y acotar los nuevos comentarios como citas en markdown
+HUGO_DIR = ../prueba_hugo/
+APPEND_HUGO = True
\ No newline at end of file
diff --git a/email-processor.py b/email-processor.py
index 576d39b..26f2a03 100644
--- a/email-processor.py
+++ b/email-processor.py
@@ -8,6 +8,8 @@ from dotenv import load_dotenv
 from bs4 import BeautifulSoup
 import xml.etree.ElementTree as ET
 from datetime import datetime
+from markdownify import markdownify as md
+from urllib.parse import urlparse
 
 load_dotenv()
 
@@ -45,8 +47,6 @@ def process_emails():
     domain = os.getenv("DOMAIN_TO_SEARCH")
     ignore_list = [w.strip().lower() for w in os.getenv("IGNORE_KEYWORDS").split(',')]
 
-    print(messages)
-
     for m_id in messages[0].split():
         res, msg_data = mail.fetch(m_id, '(RFC822)')
         for response_part in msg_data:
@@ -84,6 +84,10 @@ def process_emails():
 
                 # --- ACCIONES ---
 
+                # if os.getenv("SAVE_MARKDOWN") == "True":
+                #    with open(f"msg_{m_id.decode()}.html", "w", encoding="utf-8") as f:
+                #        f.write(f"<h3>Remitente: {sender_name}</h3><div>{clean_body}</div>")
+
                 # HTML: Negritas, cursivas, etc.
                 if os.getenv("SAVE_HTML") == "True":
                     with open(f"msg_{m_id.decode()}.html", "w", encoding="utf-8") as f:
@@ -113,6 +117,27 @@ def process_emails():
                         })
                     except: pass
 
+                # APPEND_HUGO
+                if os.getenv("APPEND_HUGO") == "True":
+                    with open(f"msg_{m_id.decode()}.md", "w", encoding="utf-8") as f:
+                        markdown_text = md(clean_body).strip()
+                        comment_md = "\n".join([f"> {line}" for line in markdown_text.split("\n")])
+                        parsed_url = urlparse(subject).path.strip("/") #url_path = urlparse(url_absoluta).path.strip("/")
+                        ruta_relativa = parsed_url.path + (f"?{parsed_url.query}" if parsed_url.query else "")
+                        hugo_dir = os.getenv("HUGO_DIR")
+
+                        directorio_destino = os.path.join(base_hugo_path, "content", ruta_relativa)
+
+                        archivo_md = os.path.join(directorio_destino, "index.md")
+
+                        if os.path.exists(archivo_md):
+                            with open(archivo_md, "a", encoding="utf-8") as f:
+                                f.write(f"\n\n> {comment_md}.")
+                            print(f"✅ Archivo actualizado en: {archivo_md}")
+                        else:
+                            print(f"❌ No se encontró el archivo en {archivo_md}. Verifica la estructura.")
+
+                        #f.write(f"## Remitente: {sender_name}\n\n{comment_md}")
                 # Borrar
                 mail.store(m_id, '+FLAGS', '\\Deleted')