Files
WealthySmart/frontend/src/lib/parsePensionPaste.ts
Carlos Escalante 0923337fff
All checks were successful
Deploy to VPS / deploy (push) Successful in 14s
Fix pension paste parser for split-line format from bank website
The bank website puts field labels and amounts on separate lines.
Parser now handles both inline and split formats.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-01 10:09:51 -06:00

180 lines
5.2 KiB
TypeScript

export interface PensionParsedEntry {
fund: string;
period_start: string; // YYYY-MM-DD
period_end: string;
saldo_anterior: number;
aportes: number;
rendimientos: number;
retiros: number;
traslados: number;
comision: number;
correccion: number;
bonificacion: number;
saldo_final: number;
}
function parseAmount(raw: string): number {
const cleaned = raw.replace(/[¢\s]/g, '').replace(/,/g, '');
const num = parseFloat(cleaned);
return isNaN(num) ? 0 : num;
}
function parseDateDMY(raw: string): string {
const m = raw.match(/(\d{2})\/(\d{2})\/(\d{4})/);
if (!m) return '';
return `${m[3]}-${m[2]}-${m[1]}`;
}
function extractAmounts(line: string): number[] {
const matches = line.match(/¢\s*-?[\d,.]+/g);
if (!matches) return [];
return matches.map(parseAmount);
}
// Field labels in the order they appear in the bank statement
const FIELD_LABELS: [RegExp, string][] = [
[/saldo\s*anterior/i, 'saldo_anterior'],
[/aportes/i, 'aportes'],
[/rendimientos/i, 'rendimientos'],
[/retiros/i, 'retiros'],
[/traslados/i, 'traslados'],
[/comisi[oó]n/i, 'comision'],
[/bonificaci[oó]n/i, 'bonificacion'],
];
interface BlockResult {
funds: string[];
fields: Record<string, number[]>;
period_start: string;
period_end: string;
}
function parseBlock(lines: string[]): BlockResult | null {
const result: BlockResult = {
funds: [],
fields: {},
period_start: '',
period_end: '',
};
// Detect fund columns from header
const headerLine = lines.find((l) => /resumen del per[ií]odo/i.test(l));
if (!headerLine) return null;
if (/\bROP\b/i.test(headerLine) && /\bFCL\b/i.test(headerLine)) {
result.funds = ['ROP', 'FCL'];
} else if (/voluntario/i.test(headerLine) || /\bVOL\b/i.test(headerLine)) {
result.funds = ['VOL'];
} else {
return null;
}
// Strategy 1: Try same-line parsing (label + amounts on same line)
// Strategy 2: Collect standalone amount lines for split-format parsing
const detectedFieldOrder: string[] = [];
const standaloneAmounts: number[] = [];
for (const line of lines) {
// Check for period
const periodMatch = line.match(/del\s+(\d{2}\/\d{2}\/\d{4})\s+al\s+(\d{2}\/\d{2}\/\d{4})/i);
if (periodMatch) {
result.period_start = parseDateDMY(periodMatch[1]);
result.period_end = parseDateDMY(periodMatch[2]);
continue;
}
// Check for "Saldo Actual" line (always has amounts inline)
if (/saldo\s*actual/i.test(line)) {
const amounts = extractAmounts(line);
if (amounts.length > 0) {
result.fields['saldo_final'] = amounts;
}
continue;
}
// Check if this line matches a field label
let matchedLabel = false;
for (const [regex, key] of FIELD_LABELS) {
if (regex.test(line)) {
matchedLabel = true;
const amounts = extractAmounts(line);
if (amounts.length > 0) {
// Strategy 1: amounts on same line as label
result.fields[key] = amounts;
} else {
// Strategy 2: label-only line, record the order
detectedFieldOrder.push(key);
}
break;
}
}
// If not a label line, check if it's a standalone amount line
if (!matchedLabel) {
const amounts = extractAmounts(line);
if (amounts.length === 1) {
standaloneAmounts.push(amounts[0]);
}
}
}
// If we have standalone amounts and field labels, map them
// Format: N labels, then N amounts for fund1, then N amounts for fund2, ...
if (detectedFieldOrder.length > 0 && standaloneAmounts.length > 0) {
const numFields = detectedFieldOrder.length;
const numFunds = result.funds.length;
if (standaloneAmounts.length >= numFields * numFunds) {
for (let f = 0; f < numFunds; f++) {
for (let i = 0; i < numFields; i++) {
const key = detectedFieldOrder[i];
if (!result.fields[key]) result.fields[key] = [];
result.fields[key].push(standaloneAmounts[f * numFields + i]);
}
}
}
}
return result;
}
export function parsePensionPaste(text: string): PensionParsedEntry[] {
// Split into blocks by "---" or multiple blank lines
const blocks = text.split(/(?:^|\n)-{3,}(?:\n|$)|\n{3,}/);
const entries: PensionParsedEntry[] = [];
for (const block of blocks) {
const lines = block.split('\n').filter((l) => l.trim());
if (lines.length < 3) continue;
const parsed = parseBlock(lines);
if (!parsed || !parsed.period_start || !parsed.period_end) continue;
for (let i = 0; i < parsed.funds.length; i++) {
const fund = parsed.funds[i];
const get = (key: string): number => {
const vals = parsed.fields[key];
if (!vals) return 0;
return vals[i] ?? vals[0] ?? 0;
};
entries.push({
fund,
period_start: parsed.period_start,
period_end: parsed.period_end,
saldo_anterior: get('saldo_anterior'),
aportes: get('aportes'),
rendimientos: get('rendimientos'),
retiros: get('retiros'),
traslados: get('traslados'),
comision: get('comision'),
correccion: 0,
bonificacion: get('bonificacion'),
saldo_final: get('saldo_final'),
});
}
}
return entries;
}