From 9cdeb73247bdec7fa24eaa7383f5ba1bbcd3cc0f Mon Sep 17 00:00:00 2001
From: drk0027 <engatadojose@gmail.com>
Date: Fri, 2 Jan 2026 22:33:27 -0500
Subject: [PATCH] Proyecto creado

---
 .gitignore       | 180 +++++++++++++++++++++++++++++++++++++++++++++++
 LICENSE          |  18 +++++
 README.md        |  99 ++++++++++++++++++++++++++
 main.py          |  92 ++++++++++++++++++++++++
 requirements.txt |  11 +++
 5 files changed, 400 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 LICENSE
 create mode 100644 README.md
 create mode 100644 main.py
 create mode 100644 requirements.txt

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..ca12401
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,180 @@
+# ---> Python
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# UV
+#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#uv.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
+.pdm.toml
+.pdm-python
+.pdm-build/
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+
+# Ruff stuff:
+.ruff_cache/
+
+# PyPI configuration file
+.pypirc
+
+#archivos para este proyecto
+*.pdf
+*.csv
+*.json
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..4a5be74
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,18 @@
+MIT License
+
+Copyright (c) 2026 Drk0027
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and 
+associated documentation files (the "Software"), to deal in the Software without restriction, including 
+without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 
+copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the 
+following conditions:
+
+The above copyright notice and this permission notice shall be included in all copies or substantial 
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT 
+LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO 
+EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 
+IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 
+USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..2678cad
--- /dev/null
+++ b/README.md
@@ -0,0 +1,99 @@
+# Consumo Eléctrico - Extractor y Analizador
+
+Este proyecto permite **extraer automáticamente datos de consumo eléctrico** desde planillas de luz en formato PDF (texto digital), organizarlos en tablas y exportarlos a **CSV y JSON** para análisis y visualización.
+
+## Características
+- Extrae el **Valor total** desde la fila `VALOR TOTAL` en la columna `Valor total`.
+- Obtiene también la **fecha** de cada planilla.
+- Exporta resultados a:
+  - `consumo.csv` → tabla lista para abrir en Excel o LibreOffice.
+  - `consumo.json` → formato estructurado para integraciones o APIs.
+- Genera gráficos comparativos:
+  - Evolución mensual del consumo.
+  - Comparación entre el último año y el anterior.
+  - Etiquetas con valores exactos en cada punto. (mas o menos)
+
+## Ejemplo de salida
+
+**CSV (`consumo.csv`):**
+```csv
+archivo,Fecha,KW Consumidos,Valor total,Valor SE y AP,Valor Basura
+enero-2024.pdf,10-01-2024,679.00,94.98,86.37,6.31
+febrero-2024.pdf,08-02-2024,619.00,86.62,78.58,5.74
+marzo-2024.pdf,11-03-2024,604.00,84.53,76.63,5.6
+```
+
+**JSON (`consumo.json`):**
+
+```
+[
+    {
+        "archivo":"enero-2024.pdf",
+        "Fecha":"10-01-2024",
+        "KW Consumidos":"679.00",
+        "Valor total":94.98,
+        "Valor SE y AP":86.37,
+        "Valor Basura":6.31
+    },
+    {
+        "archivo":"febrero-2024.pdf",
+        "Fecha":"08-02-2024",
+        "KW Consumidos":"619.00",
+        "Valor total":86.62,
+        "Valor SE y AP":78.58,
+        "Valor Basura":5.74
+    },
+]
+```
+
+**Gráfico comparativo:**
+- Línea azul → último año.
+- Línea roja → año anterior.
+- Etiquetas con valores de consumo en cada mes.
+
+## Instalación
+
+1. Clona este repositorio:
+   ```bash
+   git clone https://git.interlan.ec/Drk0027/consumo-electrico.git
+   cd consumo-electrico
+   ```
+
+2. Instala dependencias:
+   ```bash
+   pip install -r requirements.txt
+   ```
+
+   Dependencias principales:
+   - `pdfplumber`
+   - `pandas`
+   - `matplotlib`
+
+## Uso
+
+1. Coloca tus archivos PDF en la carpeta con nombres en formato `nombremes-año.pdf`.
+2. Edita `main.py` en la fila 7, con los nombres de tus archivos pdf.
+3. Ejecuta el script principal:
+   ```bash
+   python extraer_consumo.py
+   ```
+4. Revisa los archivos generados:
+   - `consumo.csv`
+   - `consumo.json`
+   - Gráficos en pantalla o exportados como imágenes.
+
+## 💡 Ideas de mejora
+- Extracción de **costo total** y otros campos (tarifa, número de medidor).
+- ¿Alguna forma de automatizacion para descargar regularmente las facturas que llegan al correo?
+- Mas Flexibilidad para no tener que agregar los archivos de forma manual.
+
+## 📬 Sugerencias y contacto
+Me encantaría recibir tus ideas y mejoras.  
+Puedes escribirme a:
+
+- 📧 **Correo:** `drk0027@interlan.ec`  
+- 📢 **Canal de Telegram:** [t.me/drk0072](https://t.me/drk0072)
+
+---
+
+¡Gracias por usar este proyecto! Tu retroalimentación ayuda a que siga creciendo.
\ No newline at end of file
diff --git a/main.py b/main.py
new file mode 100644
index 0000000..68f4f6b
--- /dev/null
+++ b/main.py
@@ -0,0 +1,92 @@
+import pdfplumber
+import re
+import pandas as pd
+import matplotlib.pyplot as plt
+
+
+archivos = ["enero-2024.pdf", "febrero-2024.pdf", "marzo-2024.pdf","abril-2024.pdf","junio-2024.pdf","julio-2024.pdf","agosto-2024.pdf","septiembre-2024.pdf","octubre-2024.pdf","noviembre-2024.pdf","diciembre-2024.pdf","enero-2025.pdf", "febrero-2025.pdf", "marzo-2025.pdf","abril-2025.pdf","junio-2025.pdf","julio-2025.pdf","agosto-2025.pdf","septiembre-2025.pdf","octubre-2025.pdf","noviembre-2025.pdf","diciembre-2025.pdf"]
+     
+datos = []
+
+for archivo in archivos:
+    with pdfplumber.open(archivo) as pdf:
+        texto = " ".join([p.extract_text() for p in pdf.pages if p.extract_text()])
+        #print(texto)
+
+        # Patrones
+        fecha = re.search(r"Fecha hasta *?(\d{2}-\d{2}-\d{4})", texto)
+        consumo = re.search(r"Consumo.*?(\d+)\s*kWh", texto)
+        total = re.search(r"VALOR TOTAL.*?\$?([\d,.]+)", texto)
+        val_imp = re.search(r"TOTAL SE Y AP \(1\).*?\$?([\d,.]+)", texto)
+        val_basura = re.search(r"TOTAL RECOLECCIÓN BASURA \(5\).*?\$?([\d,.]+)", texto)
+
+        #print(fecha)
+        for pagina in pdf.pages:
+            tablas = pagina.extract_tables()
+            for tabla in tablas:
+                df_tabla = pd.DataFrame(tabla[1:], columns=tabla[0])
+                
+                # Buscar la fila específica
+                fila = df_tabla[df_tabla.iloc[:,0].str.contains("Energía Facturada", case=False, na=False)]
+                if not fila.empty:
+                    consumo = fila["Consumo Total"].values[0]  # nombre exacto de la columna
+                    datos.append({
+                        "archivo": archivo,
+                        "Fecha" : fecha.group(1) if fecha else None,
+                        "KW Consumidos": consumo,
+                        "Valor total": float(total.group(1).replace(",", "")) if total else None,
+                        "Valor SE y AP": float(val_imp.group(1).replace(",", "")) if val_imp else None,
+                        "Valor Basura" : float(val_basura.group(1).replace(",", "")) if val_basura else None
+                    })
+
+df = pd.DataFrame(datos)
+print(df)
+
+# Exportar a CSV
+df.to_csv("consumo.csv", index=False)
+
+# Exportar a JSON
+df.to_json("consumo.json", orient="records", indent=4)
+
+print("Datos exportados a consumo.csv y consumo.json")
+
+# aqui grafica los resultados
+
+# Cargar datos
+df = pd.read_csv("consumo.csv")
+
+# Convertir fechas
+df["Fecha"] = pd.to_datetime(df["Fecha"], dayfirst=True)
+df["anio"] = df["Fecha"].dt.year
+df["mes"] = df["Fecha"].dt.month
+
+# Identificar último año y anterior
+ultimo_anio = df["anio"].max()
+anterior_anio = ultimo_anio - 1
+
+# Filtrar datos
+df_ultimo = df[df["anio"] == ultimo_anio]
+df_anterior = df[df["anio"] == anterior_anio]
+
+# Graficar comparación
+plt.figure(figsize=(10,6))
+plt.plot(df_anterior["mes"], df_anterior["KW Consumidos"], marker="o", label=f"Año {anterior_anio}", color="red")
+plt.plot(df_ultimo["mes"], df_ultimo["KW Consumidos"], marker="o", label=f"Año {ultimo_anio}", color="blue")
+
+
+# etiquetas a los valores
+
+for x, y in zip(df_anterior["mes"], df_anterior["Valor total"]):
+    plt.text(x, y+5, str(y), color="red", ha="center", fontsize=8)
+
+for x, y in zip(df_ultimo["mes"], df_ultimo["Valor total"]):
+    plt.text(x, y+5, str(y), color="blue", ha="center", fontsize=8)
+
+plt.title("Comparación de consumo eléctrico entre dos años")
+plt.xlabel("Mes")
+plt.ylabel("Consumo Total (kWh)")
+plt.xticks(range(1,13))
+plt.legend()
+plt.grid(True)
+plt.tight_layout()
+plt.show()
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..57761d8
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,11 @@
+matplotlib==3.9.2
+multidict==6.1.0
+numpy==2.1.2
+pandas==2.2.3
+pdfminer.six==20251107
+pdfplumber==0.11.8
+pillow==11.0.0
+plotly==5.24.1
+python-dateutil==2.9.0.post0
+pytz==2024.2
+tzdata==2024.2