finances/migration/rebuild_pnc.py

99 lines
4.5 KiB
Python

"""PNC rebuild classifier + reconciliation (READ-ONLY, no Firefly writes).
PNC is the hub account. Every PNC line stays a transaction of its own amount;
classification only decides the OTHER leg (transfer target / income / expense /
review / Don't Know). So PNC's own balance is unaffected by classification,
which gives us a hard integrity check:
opening_balance(Aug 1) + sum(all PNC txns) == QFX LEDGERBAL
If that holds, nothing was dropped or double-counted. Prints the derived
opening balance and a by-class breakdown for Dane to review before any wipe.
"""
import re, json, sys
F = "/Users/danesabo/Documents/Finances/EXPORTS/-MAY172026/PNC7552Aug012025-May152025.QFX"
t = open(F, encoding="latin-1", errors="replace").read()
m = re.search(r"<LEDGERBAL>.*?<BALAMT>([^<\r\n]*).*?<DTASOF>([^<\r\n]*)", t, re.S | re.I)
ledger_bal, ledger_asof = float(m.group(1)), m.group(2)[:8]
blocks = re.findall(r"<STMTTRN>(.*?)(?=<STMTTRN>|</BANKTRANLIST>)", t, re.S | re.I)
def g(b, k):
mm = re.search(rf"<{k}>([^<\r\n]*)", b, re.I)
return mm.group(1).strip() if mm else ""
def classify(desc, amt):
d = desc.upper()
# --- transfers (own accounts) ---
if "APPLECARD GSBANK PAYMENT" in d:
return "transfer", "Apple Credit Card", None, False
if "CITI AUTOPAY PAYMENT" in d:
return "transfer", "Costco Visa Card", None, False
if "SCHWAB BROKERAGE MONEYLINK" in d:
return "transfer", "Schwab (Stocks/Savings?)", None, True # review: which
if "ATM WITHDRAWAL" in d:
return "transfer", "Cash", None, False
if "CARVANA PAYOUT" in d:
return "transfer", "Illiquid Assets", None, False
if "ATM DEPOSIT" in d and abs(amt) > 10000:
return "transfer", "Coverdell", None, True # the ~$14,715 check
# --- income ---
if "UNIV PITTSBURGH" in d and ("PAYROLL" in d or "SALARY" in d):
return "income", "Pitt Salary", "Wages", False
if "INTEREST PAYMENT" in d:
return "income", "Interest Income", "Investment: Interest", False
if "IRS TREAS 310" in d:
return "income", "IRS Refund", "Taxes", False
if "ATM SURCHARGE REIMB" in d:
return "income", "Don't Know", None, False
# --- expenses ---
if "DUQUESNE LIGHT" in d:
return "expense", "Duquesne Light", "Utilities: Electric", False
if "COMPEER" in d:
return "expense", "Compeer Investments", "Rent", False
if "PITT TUITION" in d:
return "expense", "University of Pittsburgh", "Education", False
# --- Don't Know (poker / ambiguous money movement) ---
if any(k in d for k in ("VENMO CASHOUT", "CASH APP", "ZEL FROM")):
return "dontknow", "Don't Know", None, True
# --- review individually ---
if re.search(r"\bCHECK \d+", d):
return "review", "CHECK (payee?)", None, True
if "CAPITAL ONE TRANSFER" in d:
return "review", "Capital One?", None, True
if "ATM DEPOSIT" in d:
return "review", "ATM deposit (source?)", None, True
if "WITHDRAWAL" in d:
return "review", "Withdrawal (where?)", None, True
if "YARDI PENNY TEST" in d:
return "income", "Don't Know", None, False
return "review", "UNCLASSIFIED", None, True
recs, total = [], 0.0
from collections import Counter, defaultdict
by_class = Counter(); by_class_amt = defaultdict(float)
for b in blocks:
amt = float(g(b, "TRNAMT")); total += amt
desc = (g(b, "NAME") + " " + g(b, "MEMO")).strip()
cls, target, cat, review = classify(desc, amt)
by_class[cls] += 1; by_class_amt[cls] += amt
recs.append({"date": g(b, "DTPOSTED")[:8], "amt": amt, "desc": desc[:60],
"class": cls, "target": target, "category": cat,
"review": review, "fitid": g(b, "FITID")})
opening = round(ledger_bal - total, 2)
print(f"PNC QFX: {len(blocks)} txns | LEDGERBAL ${ledger_bal:,.2f} as of {ledger_asof}")
print(f"sum(txns) = ${total:,.2f}")
print(f"=> DERIVED OPENING BALANCE (pre 2025-08-01) = ${opening:,.2f}")
print(f" reconciliation: {opening:,.2f} + {total:,.2f} = {opening+total:,.2f} "
f"(== ledger {ledger_bal:,.2f}? {abs(opening+total-ledger_bal)<0.01})")
print("\nBY CLASS:")
for c in ("transfer","income","expense","dontknow","review"):
print(f" {c:9} n={by_class[c]:>3} net=${by_class_amt[c]:>12,.2f}")
print("\nNEEDS-DANE (review / Don't Know):")
for r in recs:
if r["review"]:
print(f" {r['date']} {r['amt']:>10,.2f} [{r['class']}->{r['target']}] {r['desc']}")
json.dump(recs, open("/tmp/pnc_classified.json","w"), indent=1)
print("\nwrote /tmp/pnc_classified.json (read-only; nothing posted)")