prototipo de parseo para hugo

This commit is contained in:
2026-01-14 16:21:22 -05:00
parent 14d1f3b733
commit 4bfff1094c
2 changed files with 33 additions and 3 deletions

View File

@@ -11,9 +11,14 @@ IGNORE_KEYWORDS=publicidad,notificacion,spam
# Opciones de salida (True para activar, False para desactivar) # Opciones de salida (True para activar, False para desactivar)
SAVE_HTML=True SAVE_HTML=True
SAVE_MARKDOWN=True
SAVE_XML=True SAVE_XML=True
SEND_WEBHOOK=True SEND_WEBHOOK=True
# URL del Webhook de WordPress (reemplaza tudominio por la palabra que definas en el webhook de wordpress) # URL del Webhook de WordPress (reemplaza tudominio por la palabra que definas en el webhook de wordpress)
WP_WEBHOOK_URL=https://tusitio.com/wp-json/tudominio/v1/recibir-comentario WP_WEBHOOK_URL=https://tusitio.com/wp-json/tudominio/v1/recibir-comentario
WEBHOOK_SECRET_TOKEN=mi_clave_secreta_123 WEBHOOK_SECRET_TOKEN=mi_clave_secreta_123
# IDEA: vincular a un directorio de HUGO para detectar y acotar los nuevos comentarios como citas en markdown
HUGO_DIR = ../prueba_hugo/
APPEND_HUGO = True

View File

@@ -8,6 +8,8 @@ from dotenv import load_dotenv
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
from datetime import datetime from datetime import datetime
from markdownify import markdownify as md
from urllib.parse import urlparse
load_dotenv() load_dotenv()
@@ -45,8 +47,6 @@ def process_emails():
domain = os.getenv("DOMAIN_TO_SEARCH") domain = os.getenv("DOMAIN_TO_SEARCH")
ignore_list = [w.strip().lower() for w in os.getenv("IGNORE_KEYWORDS").split(',')] ignore_list = [w.strip().lower() for w in os.getenv("IGNORE_KEYWORDS").split(',')]
print(messages)
for m_id in messages[0].split(): for m_id in messages[0].split():
res, msg_data = mail.fetch(m_id, '(RFC822)') res, msg_data = mail.fetch(m_id, '(RFC822)')
for response_part in msg_data: for response_part in msg_data:
@@ -84,6 +84,10 @@ def process_emails():
# --- ACCIONES --- # --- ACCIONES ---
# if os.getenv("SAVE_MARKDOWN") == "True":
# with open(f"msg_{m_id.decode()}.html", "w", encoding="utf-8") as f:
# f.write(f"<h3>Remitente: {sender_name}</h3><div>{clean_body}</div>")
# HTML: Negritas, cursivas, etc. # HTML: Negritas, cursivas, etc.
if os.getenv("SAVE_HTML") == "True": if os.getenv("SAVE_HTML") == "True":
with open(f"msg_{m_id.decode()}.html", "w", encoding="utf-8") as f: with open(f"msg_{m_id.decode()}.html", "w", encoding="utf-8") as f:
@@ -113,6 +117,27 @@ def process_emails():
}) })
except: pass except: pass
# APPEND_HUGO
if os.getenv("APPEND_HUGO") == "True":
with open(f"msg_{m_id.decode()}.md", "w", encoding="utf-8") as f:
markdown_text = md(clean_body).strip()
comment_md = "\n".join([f"> {line}" for line in markdown_text.split("\n")])
parsed_url = urlparse(subject).path.strip("/") #url_path = urlparse(url_absoluta).path.strip("/")
ruta_relativa = parsed_url.path + (f"?{parsed_url.query}" if parsed_url.query else "")
hugo_dir = os.getenv("HUGO_DIR")
directorio_destino = os.path.join(base_hugo_path, "content", ruta_relativa)
archivo_md = os.path.join(directorio_destino, "index.md")
if os.path.exists(archivo_md):
with open(archivo_md, "a", encoding="utf-8") as f:
f.write(f"\n\n> {comment_md}.")
print(f"✅ Archivo actualizado en: {archivo_md}")
else:
print(f"❌ No se encontró el archivo en {archivo_md}. Verifica la estructura.")
#f.write(f"## Remitente: {sender_name}\n\n{comment_md}")
# Borrar # Borrar
mail.store(m_id, '+FLAGS', '\\Deleted') mail.store(m_id, '+FLAGS', '\\Deleted')