"""PNC rebuild classifier + reconciliation (READ-ONLY, no Firefly writes). PNC is the hub account. Every PNC line stays a transaction of its own amount; classification only decides the OTHER leg (transfer target / income / expense / review / Don't Know). So PNC's own balance is unaffected by classification, which gives us a hard integrity check: opening_balance(Aug 1) + sum(all PNC txns) == QFX LEDGERBAL If that holds, nothing was dropped or double-counted. Prints the derived opening balance and a by-class breakdown for Dane to review before any wipe. """ import re, json, sys F = "/Users/danesabo/Documents/Finances/EXPORTS/-MAY172026/PNC7552Aug012025-May152025.QFX" t = open(F, encoding="latin-1", errors="replace").read() m = re.search(r".*?([^<\r\n]*).*?([^<\r\n]*)", t, re.S | re.I) ledger_bal, ledger_asof = float(m.group(1)), m.group(2)[:8] blocks = re.findall(r"(.*?)(?=|)", t, re.S | re.I) def g(b, k): mm = re.search(rf"<{k}>([^<\r\n]*)", b, re.I) return mm.group(1).strip() if mm else "" def classify(desc, amt): d = desc.upper() # --- transfers (own accounts) --- if "APPLECARD GSBANK PAYMENT" in d: return "transfer", "Apple Credit Card", None, False if "CITI AUTOPAY PAYMENT" in d: return "transfer", "Costco Visa Card", None, False if "SCHWAB BROKERAGE MONEYLINK" in d: return "transfer", "Schwab (Stocks/Savings?)", None, True # review: which if "ATM WITHDRAWAL" in d: return "transfer", "Cash", None, False if "CARVANA PAYOUT" in d: return "transfer", "Illiquid Assets", None, False if "ATM DEPOSIT" in d and abs(amt) > 10000: return "transfer", "Coverdell", None, True # the ~$14,715 check # --- income --- if "UNIV PITTSBURGH" in d and ("PAYROLL" in d or "SALARY" in d): return "income", "Pitt Salary", "Wages", False if "INTEREST PAYMENT" in d: return "income", "Interest Income", "Investment: Interest", False if "IRS TREAS 310" in d: return "income", "IRS Refund", "Taxes", False if "ATM SURCHARGE REIMB" in d: return "income", "Don't Know", None, False # --- expenses --- if "DUQUESNE LIGHT" in d: return "expense", "Duquesne Light", "Utilities: Electric", False if "COMPEER" in d: return "expense", "Compeer Investments", "Rent", False if "PITT TUITION" in d: return "expense", "University of Pittsburgh", "Education", False # --- Don't Know (poker / ambiguous money movement) --- if any(k in d for k in ("VENMO CASHOUT", "CASH APP", "ZEL FROM")): return "dontknow", "Don't Know", None, True # --- review individually --- if re.search(r"\bCHECK \d+", d): return "review", "CHECK (payee?)", None, True if "CAPITAL ONE TRANSFER" in d: return "review", "Capital One?", None, True if "ATM DEPOSIT" in d: return "review", "ATM deposit (source?)", None, True if "WITHDRAWAL" in d: return "review", "Withdrawal (where?)", None, True if "YARDI PENNY TEST" in d: return "income", "Don't Know", None, False return "review", "UNCLASSIFIED", None, True recs, total = [], 0.0 from collections import Counter, defaultdict by_class = Counter(); by_class_amt = defaultdict(float) for b in blocks: amt = float(g(b, "TRNAMT")); total += amt desc = (g(b, "NAME") + " " + g(b, "MEMO")).strip() cls, target, cat, review = classify(desc, amt) by_class[cls] += 1; by_class_amt[cls] += amt recs.append({"date": g(b, "DTPOSTED")[:8], "amt": amt, "desc": desc[:60], "class": cls, "target": target, "category": cat, "review": review, "fitid": g(b, "FITID")}) opening = round(ledger_bal - total, 2) print(f"PNC QFX: {len(blocks)} txns | LEDGERBAL ${ledger_bal:,.2f} as of {ledger_asof}") print(f"sum(txns) = ${total:,.2f}") print(f"=> DERIVED OPENING BALANCE (pre 2025-08-01) = ${opening:,.2f}") print(f" reconciliation: {opening:,.2f} + {total:,.2f} = {opening+total:,.2f} " f"(== ledger {ledger_bal:,.2f}? {abs(opening+total-ledger_bal)<0.01})") print("\nBY CLASS:") for c in ("transfer","income","expense","dontknow","review"): print(f" {c:9} n={by_class[c]:>3} net=${by_class_amt[c]:>12,.2f}") print("\nNEEDS-DANE (review / Don't Know):") for r in recs: if r["review"]: print(f" {r['date']} {r['amt']:>10,.2f} [{r['class']}->{r['target']}] {r['desc']}") json.dump(recs, open("/tmp/pnc_classified.json","w"), indent=1) print("\nwrote /tmp/pnc_classified.json (read-only; nothing posted)")