"""Build the full rebuild dataset from the 3 QFX (READ-ONLY). Emits one normalized.json (the skill's schema) for ALL of PNC + Apple + Costco, with: - transfers OWNED BY THE PNC LEG (PNC date + FITID authoritative); the Apple PAYMENT lines and Costco positive AUTOPAY lines are the counterparts and are DROPPED (paired by amount, +/- 6 days). - PNC classified per the runbook (income / expense / Don't Know / special). - Apple/Costco: negative = withdrawal (merchant), positive = deposit (refund). merchant_map matching is left to firefly_import.py downstream. - per-account reconciliation: opening + sum(its kept lines) must == QFX ledger, else abort (no silent data loss). Nothing is posted. Output feeds `firefly_import.py --emit-plan/--review-html`. """ import re, json, hashlib, sys, html from collections import Counter D = "/Users/danesabo/Documents/Finances/EXPORTS/-MAY172026" SRC = { "PNC Checking": (f"{D}/PNC7552Aug012025-May152025.QFX", "pnc"), "Apple Credit Card": (f"{D}/Apple Card Transactions Aug 01 2025 - May 17 2026.qfx", "apple"), "Costco Visa Card": (f"{D}/CitiCostcoCard Aug012025-May172025.QFX","costco"), } def parse(path): t = open(path, encoding="latin-1", errors="replace").read() m = re.search(r".*?([^<\r\n]*)", t, re.S | re.I) ledger = float(m.group(1)) blocks = re.findall(r"(.*?)(?=|)", t, re.S | re.I) def g(b, k): mm = re.search(rf"<{k}>([^<\r\n]*)", b, re.I) return mm.group(1).strip() if mm else "" out = [] for b in blocks: out.append({"date": g(b, "DTPOSTED")[:8], "amt": float(g(b, "TRNAMT")), "ttype": g(b, "TRNTYPE").upper(), "desc": html.unescape((g(b, "NAME") + " " + g(b, "MEMO")).strip()), "fitid": g(b, "FITID")}) return ledger, out def iso(d): # YYYYMMDD -> YYYY-MM-DD return f"{d[:4]}-{d[4:6]}-{d[6:8]}" if len(d) >= 8 else d # ---- PNC classification (runbook) --------------------------------------- def classify_pnc(desc, amt): d = desc.upper() if "APPLECARD GSBANK PAYMENT" in d: return ("transfer", "Apple Credit Card") if "CITI AUTOPAY PAYMENT" in d: return ("transfer", "Costco Visa Card") if "SCHWAB BROKERAGE MONEYLINK" in d: # amount disambiguation per the Schwab JSONs return ("transfer", "Schwab Savings" if abs(amt) in (5000.0, 3550.0) else "Schwab Stocks") if "ATM WITHDRAWAL" in d: return ("transfer", "Cash") if "CARVANA PAYOUT" in d: return ("transfer", "Illiquid Assets") if "ATM DEPOSIT" in d and abs(amt) > 10000: return ("transfer", "Coverdell") if "CAPITAL ONE TRANSFER" in d: return ("transfer", "Capital One") if "UNIV PITTSBURGH" in d and ("PAYROLL" in d or "SALARY" in d): return ("deposit", "Pitt Salary") if "INTEREST PAYMENT" in d: return ("deposit", "Interest Income") if "IRS TREAS 310" in d: return ("deposit", "IRS Refund") if "DUQUESNE LIGHT" in d: return ("withdrawal", "Duquesne Light") if "COMPEER" in d: return ("withdrawal", "Compeer Investments") if "PITT TUITION" in d: return ("withdrawal", "University of Pittsburgh") if any(k in d for k in ("VENMO CASHOUT","CASH APP","ZEL FROM","ATM SURCHARGE","YARDI")): return ("dontknow", "Don't Know") return ("raw", None) # leave to merchant_map / review downstream records, recon, dropped = [], {}, Counter() for acct, (path, tag) in SRC.items(): ledger, txns = parse(path) s = round(sum(t["amt"] for t in txns), 2) opening = round(ledger - s, 2) recon[acct] = {"ledger": ledger, "sum": s, "opening": opening, "ties": abs(opening + s - ledger) < 0.01} for t in txns: amt, d = t["amt"], t["desc"] ext = f"{tag}:{t['fitid'] or hashlib.sha1((iso(t['date'])+d+str(amt)).encode()).hexdigest()[:16]}" if acct == "Apple Credit Card" and t["ttype"] == "PAYMENT": dropped["apple_payment(paired->PNC)"] += 1; continue if acct == "Costco Visa Card" and amt > 0 and "AUTOPAY" in d.upper(): dropped["costco_autopay(paired->PNC)"] += 1; continue rec = {"date": iso(t["date"]), "amount": f"{abs(amt):.2f}", "description": d, "asset_account": acct, "source_tag": tag, "source_txn_id": t["fitid"] or None, "currency_code": "USD"} if acct == "PNC Checking": kind, target = classify_pnc(d, amt) if kind == "transfer": rec["type"] = "transfer" if amt < 0: rec["destination_account"] = target else: rec["type"] = "transfer"; rec["asset_account"] = target; rec["destination_account"] = "PNC Checking" elif kind in ("deposit", "withdrawal"): rec["type"] = kind; rec["_canonical"] = target elif kind == "dontknow": rec["type"] = "withdrawal" if amt < 0 else "deposit" rec["_canonical"] = "Don't Know" else: rec["type"] = "withdrawal" if amt < 0 else "deposit" else: rec["type"] = "withdrawal" if amt < 0 else "deposit" records.append(rec) # --- Orphan adjustment: a PNC->Apple/Costco payment whose date predates the # card QFX window has its card-side effect already baked into the card's # DERIVED opening (because opening = ledger - sum_kept_card_lines, and the # orphan never appeared on the card side). If we ALSO post the PNC->card # transfer in the rebuild, the card account gets credited twice. So subtract # orphan transfer amounts from the card opening. APPLE_WINDOW_START = "2025-08-02" COSTCO_WINDOW_START = "2025-08-02" for r in records: if r.get("type") == "transfer" and r["asset_account"] == "PNC Checking": dest = r.get("destination_account") if dest == "Apple Credit Card" and r["date"] < APPLE_WINDOW_START: recon["Apple Credit Card"]["opening"] -= float(r["amount"]) recon["Apple Credit Card"]["opening"] = round(recon["Apple Credit Card"]["opening"], 2) elif dest == "Costco Visa Card" and r["date"] < COSTCO_WINDOW_START: recon["Costco Visa Card"]["opening"] -= float(r["amount"]) recon["Costco Visa Card"]["opening"] = round(recon["Costco Visa Card"]["opening"], 2) print("=== RECONCILIATION (must all tie) ===") ok = True for a, r in recon.items(): flag = "OK" if r["ties"] else "*** MISMATCH ***" ok &= r["ties"] print(f" {a:20} ledger {r['ledger']:>11,.2f} Σ {r['sum']:>11,.2f} " f"opening {r['opening']:>11,.2f} {flag}") print("dropped (paired counterparts):", dict(dropped)) print(f"normalized records: {len(records)}") if not ok: print("ABORT: a reconciliation does not tie.", file=sys.stderr); sys.exit(1) json.dump(records, open("/tmp/rebuild_normalized.json", "w"), indent=1) json.dump(recon, open("/tmp/rebuild_recon.json", "w"), indent=1) print("wrote /tmp/rebuild_normalized.json")