From 0923337fffb72f17736d37d973725a66974d4af3 Mon Sep 17 00:00:00 2001 From: Carlos Escalante Date: Wed, 1 Apr 2026 10:09:51 -0600 Subject: [PATCH] Fix pension paste parser for split-line format from bank website The bank website puts field labels and amounts on separate lines. Parser now handles both inline and split formats. Co-Authored-By: Claude Opus 4.6 (1M context) --- frontend/src/lib/parsePensionPaste.ts | 90 ++++++++++++++++++++------- 1 file changed, 68 insertions(+), 22 deletions(-) diff --git a/frontend/src/lib/parsePensionPaste.ts b/frontend/src/lib/parsePensionPaste.ts index a890ddc..6882ff6 100644 --- a/frontend/src/lib/parsePensionPaste.ts +++ b/frontend/src/lib/parsePensionPaste.ts @@ -14,14 +14,12 @@ export interface PensionParsedEntry { } function parseAmount(raw: string): number { - // "¢ 18,684,764.98" or "¢ -552,213.24" or just "18,684,764.98" const cleaned = raw.replace(/[¢\s]/g, '').replace(/,/g, ''); const num = parseFloat(cleaned); return isNaN(num) ? 0 : num; } function parseDateDMY(raw: string): string { - // "01/03/2026" → "2026-03-01" const m = raw.match(/(\d{2})\/(\d{2})\/(\d{4})/); if (!m) return ''; return `${m[3]}-${m[2]}-${m[1]}`; @@ -33,6 +31,17 @@ function extractAmounts(line: string): number[] { return matches.map(parseAmount); } +// Field labels in the order they appear in the bank statement +const FIELD_LABELS: [RegExp, string][] = [ + [/saldo\s*anterior/i, 'saldo_anterior'], + [/aportes/i, 'aportes'], + [/rendimientos/i, 'rendimientos'], + [/retiros/i, 'retiros'], + [/traslados/i, 'traslados'], + [/comisi[oó]n/i, 'comision'], + [/bonificaci[oó]n/i, 'bonificacion'], +]; + interface BlockResult { funds: string[]; fields: Record; @@ -60,32 +69,69 @@ function parseBlock(lines: string[]): BlockResult | null { return null; } - const fieldMap: [RegExp, string][] = [ - [/saldo\s*anterior/i, 'saldo_anterior'], - [/aportes/i, 'aportes'], - [/rendimientos/i, 'rendimientos'], - [/retiros/i, 'retiros'], - [/traslados/i, 'traslados'], - [/comisi[oó]n/i, 'comision'], - [/bonificaci[oó]n/i, 'bonificacion'], - [/saldo\s*actual/i, 'saldo_final'], - ]; + // Strategy 1: Try same-line parsing (label + amounts on same line) + // Strategy 2: Collect standalone amount lines for split-format parsing + const detectedFieldOrder: string[] = []; + const standaloneAmounts: number[] = []; for (const line of lines) { - for (const [regex, key] of fieldMap) { - if (regex.test(line)) { - const amounts = extractAmounts(line); - if (amounts.length > 0) { - result.fields[key] = amounts; - } - } - } - - // Period + // Check for period const periodMatch = line.match(/del\s+(\d{2}\/\d{2}\/\d{4})\s+al\s+(\d{2}\/\d{2}\/\d{4})/i); if (periodMatch) { result.period_start = parseDateDMY(periodMatch[1]); result.period_end = parseDateDMY(periodMatch[2]); + continue; + } + + // Check for "Saldo Actual" line (always has amounts inline) + if (/saldo\s*actual/i.test(line)) { + const amounts = extractAmounts(line); + if (amounts.length > 0) { + result.fields['saldo_final'] = amounts; + } + continue; + } + + // Check if this line matches a field label + let matchedLabel = false; + for (const [regex, key] of FIELD_LABELS) { + if (regex.test(line)) { + matchedLabel = true; + const amounts = extractAmounts(line); + if (amounts.length > 0) { + // Strategy 1: amounts on same line as label + result.fields[key] = amounts; + } else { + // Strategy 2: label-only line, record the order + detectedFieldOrder.push(key); + } + break; + } + } + + // If not a label line, check if it's a standalone amount line + if (!matchedLabel) { + const amounts = extractAmounts(line); + if (amounts.length === 1) { + standaloneAmounts.push(amounts[0]); + } + } + } + + // If we have standalone amounts and field labels, map them + // Format: N labels, then N amounts for fund1, then N amounts for fund2, ... + if (detectedFieldOrder.length > 0 && standaloneAmounts.length > 0) { + const numFields = detectedFieldOrder.length; + const numFunds = result.funds.length; + + if (standaloneAmounts.length >= numFields * numFunds) { + for (let f = 0; f < numFunds; f++) { + for (let i = 0; i < numFields; i++) { + const key = detectedFieldOrder[i]; + if (!result.fields[key]) result.fields[key] = []; + result.fields[key].push(standaloneAmounts[f * numFields + i]); + } + } } }