diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..38b2f88 --- /dev/null +++ b/.env.example @@ -0,0 +1,19 @@ +# Configuración IMAP +IMAP_SERVER=imap.gmail.com +IMAP_USER=tu_correo@gmail.com +IMAP_PASS=tu_contraseña_de_aplicacion +IMAP_FOLDER=INBOX + +# Filtros (el dominio en el asunto debe coincidir con el doominio del blog) +DOMAIN_TO_SEARCH=dominio.com +# Palabras clave a ignorar separadas por coma +IGNORE_KEYWORDS=publicidad,notificacion,spam + +# Opciones de salida (True para activar, False para desactivar) +SAVE_HTML=True +SAVE_XML=True +SEND_WEBHOOK=True + +# URL del Webhook de WordPress (reemplaza tudominio por la palabra que definas en el webhook de wordpress) +WP_WEBHOOK_URL=https://tusitio.com/wp-json/tudominio/v1/recibir-comentario +WEBHOOK_SECRET_TOKEN=mi_clave_secreta_123 \ No newline at end of file diff --git a/email-processor.py b/email-processor.py new file mode 100644 index 0000000..576d39b --- /dev/null +++ b/email-processor.py @@ -0,0 +1,123 @@ +import imaplib +import email +import os +import requests +import re +from email.header import decode_header +from dotenv import load_dotenv +from bs4 import BeautifulSoup +import xml.etree.ElementTree as ET +from datetime import datetime + +load_dotenv() + +def clean_html(raw_html): + # Permite solo etiquetas básicas + allowed_tags = ['b', 'i', 'u', 'em', 'strong', 'blockquote', 'p', 'br'] + soup = BeautifulSoup(raw_html, "html.parser") + + for tag in soup.find_all(True): + if tag.name not in allowed_tags: + tag.unwrap() # Elimina la etiqueta pero mantiene el texto interno + else: + tag.attrs = {} # Elimina atributos como class, style, id + + return str(soup) + +def get_sender_name(from_header): + # Extrae "Nombre" de "Nombre " o la parte antes del @ + name, encoding = decode_header(from_header)[0] + if isinstance(name, bytes): + name = name.decode(encoding or "utf-8") + + if "<" in name: + name = name.split("<")[0].strip() + elif "@" in name: + name = name.split("@")[0] + return name.strip() + +def process_emails(): + mail = imaplib.IMAP4_SSL(os.getenv("IMAP_SERVER")) + mail.login(os.getenv("IMAP_USER"), os.getenv("IMAP_PASS")) + mail.select(os.getenv("IMAP_FOLDER")) + + status, messages = mail.search(None, 'ALL') + domain = os.getenv("DOMAIN_TO_SEARCH") + ignore_list = [w.strip().lower() for w in os.getenv("IGNORE_KEYWORDS").split(',')] + + print(messages) + + for m_id in messages[0].split(): + res, msg_data = mail.fetch(m_id, '(RFC822)') + for response_part in msg_data: + if isinstance(response_part, tuple): + msg = email.message_from_bytes(response_part[1]) + # 1. Obtener Asunto (Título de la página) + subject, encoding = decode_header(msg["Subject"])[0] + if isinstance(subject, bytes): + subject = subject.decode(encoding or "utf-8") + + # 2. Filtrar por dominio en el asunto (URL) + if domain not in subject.lower(): + continue + + # 3. Obtener Remitente (Nombre sin dominio) + sender_raw = msg.get("From") + sender_name = get_sender_name(sender_raw) + + # 4. Obtener Cuerpo y Limpiar HTML + body = "" + if msg.is_multipart(): + for part in msg.walk(): + if part.get_content_type() == "text/html": + body = part.get_payload(decode=True).decode() + break + else: + body = msg.get_payload(decode=True).decode() + + # Validar ignore_list en el contenido + if any(word in body.lower() for word in ignore_list): + continue + + clean_body = clean_html(body) + rss_title = f"Comentario de {sender_name} sobre {subject}" + + # --- ACCIONES --- + + # HTML: Negritas, cursivas, etc. + if os.getenv("SAVE_HTML") == "True": + with open(f"msg_{m_id.decode()}.html", "w", encoding="utf-8") as f: + f.write(f"

Remitente: {sender_name}

{clean_body}
") + + # XML: Formato RSS + if os.getenv("SAVE_XML") == "True": + rss = ET.Element("rss", version="2.0") + channel = ET.SubElement(rss, "channel") + item = ET.SubElement(channel, "item") + ET.SubElement(item, "title").text = rss_title + ET.SubElement(item, "description").text = clean_body + ET.SubElement(item, "author").text = sender_name + ET.SubElement(item, "pubDate").text = datetime.now().strftime("%a, %d %b %Y %H:%M:%S GMT") + ET.ElementTree(rss).write(f"msg_{m_id.decode()}.xml", encoding="utf-8", xml_declaration=True) + + # Webhook: Enviar a la URL citada en el asunto + if os.getenv("SEND_WEBHOOK") == "True": + # El asunto es la URL de la página citada + target_url = subject if "http" in subject else f"http://{subject}" + try: + requests.post(os.getenv("WP_WEBHOOK_URL"), json={ + "auth_token": os.getenv("WEBHOOK_SECRET_TOKEN"), + "target_page": target_url, + "author_name": sender_name, + "comment_content": clean_body + }) + except: pass + + # Borrar + mail.store(m_id, '+FLAGS', '\\Deleted') + + mail.expunge() + mail.logout() + +if __name__ == "__main__": + process_emails() diff --git a/functions.php b/functions.php new file mode 100644 index 0000000..a04059a --- /dev/null +++ b/functions.php @@ -0,0 +1,62 @@ + 'POST', + 'callback' => 'procesar_webhook_comentario', + 'permission_callback' => '__return_true', // La validación se hace dentro del callback + )); +}); + +function procesar_webhook_comentario(WP_REST_Request $request) { + // 1. Obtener datos del JSON enviado por Python + $params = $request->get_json_params(); + $token_recibido = $params['auth_token'] ?? ''; + $token_esperado = 'mi_clave_secreta_123'; // Debe coincidir con WEBHOOK_SECRET_TOKEN + + // 2. Validar Token de Seguridad + if ($token_recibido !== $token_esperado) { + return new WP_Error('forbidden', 'No autorizado', array('status' => 403)); + } + + // 3. Extraer y sanear datos + $target_url = esc_url_raw($params['target_page']); + $author_name = sanitize_text_field($params['author_name']); + $comment_body = wp_kses($params['comment_content'], array( + 'b' => array(), 'i' => array(), 'u' => array(), + 'em' => array(), 'strong' => array(), 'blockquote' => array(), 'p' => array(), 'br' => array() + )); + + // 4. Buscar el ID del post/página mediante la URL + $post_id = url_to_postid($target_url); + + if ($post_id === 0) { + return new WP_REST_Response(array('error' => 'URL de destino no encontrada en este sitio'), 404); + } + + // 5. Insertar el comentario + $comment_data = array( + 'comment_post_ID' => $post_id, + 'comment_author' => $author_name, + 'comment_author_email' => 'webhook@tudominio.com', // Email genérico para identificar origen + 'comment_content' => $comment_body, + 'comment_type' => 'comment', + 'comment_parent' => 0, + 'comment_approved' => 0, // 0 = Pendiente de moderación, 1 = Aprobado automáticamente + ); + + $comment_id = wp_insert_comment($comment_data); + + if ($comment_id) { + return new WP_REST_Response(array( + 'success' => true, + 'message' => 'Comentario recibido y enviado a moderación', + 'comment_id' => $comment_id + ), 200); + } else { + return new WP_Error('db_error', 'No se pudo insertar el comentario', array('status' => 500)); + } +} diff --git a/msg_29.html b/msg_29.html new file mode 100644 index 0000000..7bea8a9 --- /dev/null +++ b/msg_29.html @@ -0,0 +1,6 @@ +

Remitente: channel2rss

+ + +

sdfgsdfgsdfg

 

sdfgsdfg

 

sdfgsdfg

 

 

que no se pierdan los saltos ded linea

+ +
\ No newline at end of file diff --git a/msg_29.xml b/msg_29.xml new file mode 100644 index 0000000..d0c7b46 --- /dev/null +++ b/msg_29.xml @@ -0,0 +1,7 @@ + +Comentario de channel2rss sobre https://interlan.ec/blog/2025/12/26/analisis-de-logs-la-teoria-del-internet-parece-real/<!DOCTYPE html> + + +<p>sdfgsdfgsdfg</p><p> </p><p>sdfgsdfg</p><p> </p><p>sdfgsdfg</p><p> </p><p> </p><p>que no se pierdan los saltos ded linea</p> + +channel2rssTue, 06 Jan 2026 11:09:57 GMT \ No newline at end of file