mirror of
https://github.com/escalante29/WealthySmart.git
synced 2026-05-19 11:28:49 +02:00
Add municipal receipt module and convert navbar to sidebar
All checks were successful
Deploy to VPS / deploy (push) Successful in 58s
All checks were successful
Deploy to VPS / deploy (push) Successful in 58s
- New module: Municipalidad de Belén receipt extraction via pdftotext+regex - Backend: MunicipalReceipt + WaterMeterReading models, upload/list/detail/water-consumption endpoints - Auto-creates budget Transaction on upload (duplicate-safe via reference) - Frontend: ServiciosMunicipales page with summary cards, water consumption bar chart, receipt history, PDF upload - Convert top navbar to left sidebar with section headers (General, Finanzas, Servicios) - Desktop: fixed 220px sidebar, mobile: sheet overlay - Grouped nav: Dashboard | Presupuesto, Salarios, Pensiones, Analytics | Municipalidad Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
291
backend/app/services/municipal_receipt_pdf.py
Normal file
291
backend/app/services/municipal_receipt_pdf.py
Normal file
@@ -0,0 +1,291 @@
|
||||
"""
|
||||
Extract structured data from Municipalidad de Belén receipts using pdftotext + regex.
|
||||
"""
|
||||
|
||||
import re
|
||||
import subprocess
|
||||
import tempfile
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
|
||||
def _parse_amount(s: str) -> float:
|
||||
"""Parse a Costa Rican formatted number: '1,875.00' → 1875.00"""
|
||||
return float(s.replace(",", ""))
|
||||
|
||||
|
||||
def _parse_date(s: str) -> str:
|
||||
"""Convert dd/mm/yyyy → YYYY-MM-DD"""
|
||||
d, m, y = s.strip().split("/")
|
||||
return f"{y}-{m.zfill(2)}-{d.zfill(2)}"
|
||||
|
||||
|
||||
def _parse_period(s: str) -> str:
|
||||
"""Convert mm/yyyy → YYYY-MM"""
|
||||
m, y = s.strip().split("/")
|
||||
return f"{y}-{m.zfill(2)}"
|
||||
|
||||
|
||||
@dataclass
|
||||
class Charge:
|
||||
detail: str
|
||||
interests: float
|
||||
iva: float
|
||||
amount: float
|
||||
|
||||
|
||||
@dataclass
|
||||
class WaterMeter:
|
||||
period: str
|
||||
meter_id: str
|
||||
reading_previous: int
|
||||
reading_current: int
|
||||
consumption_m3: int
|
||||
agua_potable: float
|
||||
serv_ambientales: float
|
||||
alcant_sanitario: float
|
||||
iva: float
|
||||
|
||||
|
||||
@dataclass
|
||||
class HistoricalConsumption:
|
||||
meter_id: str
|
||||
period: str
|
||||
consumption_m3: int
|
||||
|
||||
|
||||
@dataclass
|
||||
class MunicipalReceiptData:
|
||||
receipt_date: str # YYYY-MM-DD
|
||||
due_date: str # YYYY-MM-DD
|
||||
holder_name: str
|
||||
holder_cedula: str
|
||||
holder_address: str
|
||||
account: str
|
||||
finca: str
|
||||
charges: list[Charge] = field(default_factory=list)
|
||||
subtotal: float = 0.0
|
||||
interests: float = 0.0
|
||||
iva: float = 0.0
|
||||
total: float = 0.0
|
||||
water_meters: list[WaterMeter] = field(default_factory=list)
|
||||
historical_consumption: list[HistoricalConsumption] = field(default_factory=list)
|
||||
|
||||
|
||||
def _pdf_to_text(pdf_bytes: bytes) -> str:
|
||||
"""Convert PDF bytes to text using pdftotext -layout."""
|
||||
with tempfile.NamedTemporaryFile(suffix=".pdf") as tmp:
|
||||
tmp.write(pdf_bytes)
|
||||
tmp.flush()
|
||||
result = subprocess.run(
|
||||
["pdftotext", "-layout", tmp.name, "-"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=10,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
raise ValueError(f"pdftotext failed: {result.stderr}")
|
||||
return result.stdout
|
||||
|
||||
|
||||
# Regex patterns
|
||||
RE_FECHA = re.compile(r"Fecha:\s*(\d{2}/\d{2}/\d{4})")
|
||||
RE_VENCIMIENTO = re.compile(r"Fecha de vencimiento:\s*(\d{2}/\d{2}/\d{4})")
|
||||
RE_NOMBRE = re.compile(r"Nombre:\s*(.+)")
|
||||
RE_CEDULA = re.compile(r"Cédula:\s*(\d+)")
|
||||
RE_DIRECCION = re.compile(r"Dirección:\s*(.+)")
|
||||
|
||||
# Charge line: DETAIL_TEXT account finca interests iva periodo_actual periodo_anterior
|
||||
RE_CHARGE = re.compile(
|
||||
r"^([A-ZÁÉÍÓÚÑ][A-ZÁÉÍÓÚÑ\s.]+?)\s+"
|
||||
r"(\d{4})\s+"
|
||||
r"(\d{6}---\d{3})\s+"
|
||||
r"([\d,]+\.\d{2})\s+"
|
||||
r"([\d,]+\.\d{2})\s+"
|
||||
r"([\d,]+\.\d{2})\s+"
|
||||
r"([\d,]+\.\d{2})\s*$"
|
||||
)
|
||||
|
||||
RE_SUBTOTAL = re.compile(r"Sub-Total:\s+([\d,]+\.\d{2})")
|
||||
RE_INTERESES = re.compile(r"Intereses:\s+([\d,]+\.\d{2})")
|
||||
RE_IVA = re.compile(r"IVA\s+([\d,]+\.\d{2})")
|
||||
RE_TOTAL = re.compile(r"Total:\s+([\d,]+\.\d{2})")
|
||||
|
||||
# Water meter line: period meter_id lec_ant lec_act consumo agua_potable serv_amb alcant iva
|
||||
RE_WATER_METER = re.compile(
|
||||
r"(\d{2}/\d{4})\s+"
|
||||
r"(\d{4})\s+"
|
||||
r"(\d{5})\s+"
|
||||
r"(\d{5})\s+"
|
||||
r"(\d+)\s+"
|
||||
r"([\d,]+\.\d{2})\s+"
|
||||
r"([\d,]+\.\d{2})\s+"
|
||||
r"([\d,]+\.\d{2})\s+"
|
||||
r"([\d,]+\.\d{2})"
|
||||
)
|
||||
|
||||
# Historical consumption: meter_id period consumption
|
||||
RE_HISTORICAL = re.compile(
|
||||
r"(\d{4})\s+(\d{2}/\d{4})\s+(\d{5})"
|
||||
)
|
||||
|
||||
|
||||
def extract_municipal_receipt(
|
||||
pdf_bytes: bytes, filename: str
|
||||
) -> dict:
|
||||
"""Extract structured data from a municipal receipt PDF.
|
||||
|
||||
Returns a dict matching the target JSON schema.
|
||||
"""
|
||||
text = _pdf_to_text(pdf_bytes)
|
||||
|
||||
if "RECIBO MUNICIPAL" not in text:
|
||||
raise ValueError(f"{filename}: Not a municipal receipt")
|
||||
|
||||
data = MunicipalReceiptData(
|
||||
receipt_date="",
|
||||
due_date="",
|
||||
holder_name="",
|
||||
holder_cedula="",
|
||||
holder_address="",
|
||||
account="",
|
||||
finca="",
|
||||
)
|
||||
|
||||
# --- Header fields ---
|
||||
m = RE_FECHA.search(text)
|
||||
if m:
|
||||
data.receipt_date = _parse_date(m.group(1))
|
||||
|
||||
m = RE_VENCIMIENTO.search(text)
|
||||
if m:
|
||||
data.due_date = _parse_date(m.group(1))
|
||||
|
||||
m = RE_NOMBRE.search(text)
|
||||
if m:
|
||||
data.holder_name = m.group(1).strip()
|
||||
|
||||
m = RE_CEDULA.search(text)
|
||||
if m:
|
||||
data.holder_cedula = m.group(1).strip()
|
||||
|
||||
m = RE_DIRECCION.search(text)
|
||||
if m:
|
||||
data.holder_address = m.group(1).strip().rstrip(".")
|
||||
|
||||
# --- Charges ---
|
||||
for line in text.splitlines():
|
||||
m = RE_CHARGE.match(line.strip())
|
||||
if m:
|
||||
detail = m.group(1).strip()
|
||||
data.account = m.group(2)
|
||||
data.finca = m.group(3)
|
||||
interests = _parse_amount(m.group(4))
|
||||
iva = _parse_amount(m.group(5))
|
||||
amount = _parse_amount(m.group(6))
|
||||
data.charges.append(Charge(detail=detail, interests=interests, iva=iva, amount=amount))
|
||||
|
||||
# --- Totals ---
|
||||
m = RE_SUBTOTAL.search(text)
|
||||
if m:
|
||||
data.subtotal = _parse_amount(m.group(1))
|
||||
|
||||
m = RE_INTERESES.search(text)
|
||||
if m:
|
||||
data.interests = _parse_amount(m.group(1))
|
||||
|
||||
m = RE_IVA.search(text)
|
||||
if m:
|
||||
data.iva = _parse_amount(m.group(1))
|
||||
|
||||
m = RE_TOTAL.search(text)
|
||||
if m:
|
||||
data.total = _parse_amount(m.group(1))
|
||||
|
||||
# --- Water meters ---
|
||||
for m in RE_WATER_METER.finditer(text):
|
||||
data.water_meters.append(
|
||||
WaterMeter(
|
||||
period=_parse_period(m.group(1)),
|
||||
meter_id=m.group(2),
|
||||
reading_previous=int(m.group(3)),
|
||||
reading_current=int(m.group(4)),
|
||||
consumption_m3=int(m.group(5)),
|
||||
agua_potable=_parse_amount(m.group(6)),
|
||||
serv_ambientales=_parse_amount(m.group(7)),
|
||||
alcant_sanitario=_parse_amount(m.group(8)),
|
||||
iva=_parse_amount(m.group(9)),
|
||||
)
|
||||
)
|
||||
|
||||
# --- Historical consumption ---
|
||||
# Only parse lines AFTER "DETALLE DE CONSUMO MESES ANTERIORES"
|
||||
hist_section = text.split("DETALLE DE CONSUMO MESES ANTERIORES")
|
||||
if len(hist_section) > 1:
|
||||
for m in RE_HISTORICAL.finditer(hist_section[1]):
|
||||
data.historical_consumption.append(
|
||||
HistoricalConsumption(
|
||||
meter_id=m.group(1),
|
||||
period=_parse_period(m.group(2)),
|
||||
consumption_m3=int(m.group(3)),
|
||||
)
|
||||
)
|
||||
|
||||
# --- Validation ---
|
||||
if not data.receipt_date:
|
||||
raise ValueError(f"{filename}: Could not parse receipt date")
|
||||
if not data.charges:
|
||||
raise ValueError(f"{filename}: No charges found")
|
||||
|
||||
# --- Build output dict ---
|
||||
return {
|
||||
"receipt": {
|
||||
"type": "RECIBO MUNICIPAL",
|
||||
"issuer": {
|
||||
"name": "MUNICIPALIDAD DE BELÉN",
|
||||
"phone": "(506) 2587-0000",
|
||||
"fax": "(506) 2293-3667",
|
||||
"website": "www.belen.go.cr",
|
||||
},
|
||||
"date": data.receipt_date,
|
||||
"due_date": data.due_date,
|
||||
"account_holder": {
|
||||
"name": data.holder_name,
|
||||
"cedula": data.holder_cedula,
|
||||
"address": data.holder_address,
|
||||
},
|
||||
"account": data.account,
|
||||
"finca": data.finca,
|
||||
},
|
||||
"charges": [
|
||||
{"detail": c.detail, "interests": c.interests, "iva": c.iva, "amount": c.amount}
|
||||
for c in data.charges
|
||||
],
|
||||
"totals": {
|
||||
"subtotal": data.subtotal,
|
||||
"interests": data.interests,
|
||||
"iva": data.iva,
|
||||
"total": data.total,
|
||||
},
|
||||
"water_meters": [
|
||||
{
|
||||
"period": wm.period,
|
||||
"meter_id": wm.meter_id,
|
||||
"reading_previous": wm.reading_previous,
|
||||
"reading_current": wm.reading_current,
|
||||
"consumption_m3": wm.consumption_m3,
|
||||
"agua_potable": wm.agua_potable,
|
||||
"serv_ambientales": wm.serv_ambientales,
|
||||
"alcant_sanitario": wm.alcant_sanitario,
|
||||
"iva": wm.iva,
|
||||
}
|
||||
for wm in data.water_meters
|
||||
],
|
||||
"historical_consumption": [
|
||||
{
|
||||
"meter_id": hc.meter_id,
|
||||
"period": hc.period,
|
||||
"consumption_m3": hc.consumption_m3,
|
||||
}
|
||||
for hc in data.historical_consumption
|
||||
],
|
||||
}
|
||||
Reference in New Issue
Block a user