#!/bin/bash # Cargar configuración source ./config.conf # 1. Procesamiento eficiente de logs echo "Procesando logs..." # Extraemos: IP, Fecha(DD/Mon/YYYY), Hora(HH), Agente, URL # Formato Combined: $1=IP, $4=[DD/Mon/YYYY:HH:mm:ss, $7=URL, $12+=Agente awk -v url="$URL_A_MONITORIZAR" ' $7 == url || $7 ~ url { split($4, t, /[:/]/); fecha=t[2]"/"t[3]"/"t[4]; hora=t[5]; # Extraer User Agent (todo lo que sigue después de la columna 11) ua=""; for(i=12; i<=NF; i++) ua=(ua=="" ? $i : ua" "$i); gsub(/"/, "", ua); print $1 "|" fecha "|" hora "|" ua }' "$LOG_FILE" | sort -u > temp_data.txt # 2. Actualizar JSON (Estructura optimizada) echo "Actualizando JSON..." if [ ! -f "$OUTPUT_FILE" ]; then echo "[]" > "$OUTPUT_FILE"; fi # Usamos jq para fusionar datos únicos de forma masiva (más rápido que línea a línea) while IFS="|" read -r ip fecha hora ua; do is_rss=$([[ "$ua" =~ (RSS|Feed|Reader|SimplePie|W3C_Validator) ]] && echo "true" || echo "false") NEW_ENTRY=$(jq -n --arg ip "$ip" --arg f "$fecha" --arg h "$hora" --arg ua "$ua" --arg rss "$is_rss" \ '{ip: $ip, fecha: $f, hora: $h, agente: $ua, es_rss: $rss}') # Solo añadir si no existe la combinación exacta jq --argjson new "$NEW_ENTRY" 'if any(.[]; .ip == $new.ip and .agente == $new.agente and .fecha == $new.fecha) then . else . + [$new] end' \ "$OUTPUT_FILE" > "$OUTPUT_FILE.tmp" && mv "$OUTPUT_FILE.tmp" "$OUTPUT_FILE" done < temp_data.txt # 3. Preparar datos para Gnuplot # Frecuencia por Hora jq -r '.[].hora' "$OUTPUT_FILE" | sort | uniq -c | awk '{print $2, $1}' > hourly.dat # Frecuencia por Agente (Top 10) jq -r '.[].agente' "$OUTPUT_FILE" | sort | uniq -c | sort -rn | head -10 | awk '{$1=""; print "\""$0"\"", $1}' > agents.dat # 4. Generar Gráficas echo "Generando gráficas..." gnuplot < informe.html Reporte de Accesos

Reporte para: $URL_A_MONITORIZAR

Agentes Únicos: $UNIQUE_AGENTS

Lectores RSS detectados: $RSS_COUNT

Frecuencia Horaria

Top Agentes

HTML rm temp_data.txt hourly.dat agents.dat echo "Completado. Revisa informe.html"