Compare commits

...

4 Commits

Author SHA1 Message Date
Dane Sabo
d7c6521cf0 merchant_map: +9 rules from May 2026 import learning loop
New merchants: Hobby Lobby, Pampurred Paws, Bojangles, Pittsburgh Zoo;
consolidations to existing BP/Exxon/Charlotte Motor Speedway; income
mappings for Emerson (paycheck) and Apple Cashback. Fix stale Red Robin
auto-tail rule (was spawning "Red Robin No" instead of consolidating).

Add retirement_projection.py + fire_projection.py: today's-dollars
compounding + FIRE/Coast-FIRE models with $50k spending floor.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-22 09:18:17 -04:00
Dane Sabo
a37d8f4d2c merchant_map: +24 rules from overlap-test learning loop; test review artifact
- 22 UNMATCHED + 2 REVIEW resolutions from the 2026-05-01..05-25 overlap
  import got folded back via apply_decisions/upsert_rule: the Charlotte
  bachelor-trip vendors (Tot Hill Farm, Lexington BBQ, Wooden Robot Brewing,
  Grace O'Malley, Buddy's Place, Stop and Go, Yamazaru, Publix, Krispy Kreme,
  Charlotte Motor Speedway), Pittsburgh first-timers (Utrecht/Blick's,
  Westinghouse Food Court, Halal Guys, Five Guys, Firestone, Leatherman, PRT
  via Masabi), plus Patrick Murphy (rent) and Emerson (wages).
- migration/test_overlap_review.html: the rendered review doc from the test
  (PNC+Apple+Costco 2026-05-01..05-25; 57 dedup-skip, 44 actionable).
2026-05-25 21:35:32 -04:00
Dane Sabo
e446c4097a Migration: rebuild battle-test learnings + opening-balance orphan fix
- build_rebuild_dataset.py: subtract orphan paired-transfer amounts from
  destination card's derived opening; html.unescape descriptions.
- merchant_map.json: +110 auto-tail rules from rebuild long-tail, +20
  recurring rules + 135 auto-cluster acceptances; stripped all cached
  account_ids; Rock Auto -> Z(Mizumi) review:true; Duquesne Light ->
  Utilities; categories stripped from _auto_tail rules per user policy.
- migration/README.md: 'Lessons from the first rebuild' section.
- migration/rebuild_clusters.{json,md}: clustering proposal artifact.
2026-05-25 21:05:38 -04:00
Dane Sabo
26fb19ca9a Migration runbook + rebuild tooling; 10 PNC/income/Don't Know rules
- migration/README.md: cold-start rebuild runbook (reconciliation gate,
  classification rules, transfer pairing, investment policy, execution order)
- migration/build_rebuild_dataset.py: consolidated 3-QFX builder with PNC-
  owned transfers, counterpart pairing & drop, per-account reconciliation
- migration/rebuild_clusters.{json,md}: clustering proposal for the rebuild
- migration/rebuild_review.html: read-only browser review for the 1017-txn
  rebuild plan (transfers under PNC, category fixes baked in)
- migration/{pnc_review,review_preview_mixed}.html: earlier UI previews
- merchant_map.json: add 10 settled deterministic rules (Duquesne Light,
  Pitt Salary, Interest Payment, IRS, Pitt Tuition, Daily Cash Adjustment,
  ATM Surcharge/Yardi/Venmo/Zelle->Don't Know) so the skill stops flagging
  pre-classified PNC lines as UNMATCHED
2026-05-25 18:54:50 -04:00
12 changed files with 10732 additions and 45 deletions

199
fire_projection.py Normal file
View File

@ -0,0 +1,199 @@
#!/usr/bin/env python3
"""
FIRE / Coast-FIRE projection with a realistic rising-income career arc.
Key upgrade over retirement_projection.py: income is NOT flat. It follows a
career arc, and savings is a PERCENTAGE of income so both your nest egg AND
your lifestyle scale as you earn more. This is what lets us show the real
trade: a high savings rate hits FIRE faster BOTH because you save more AND
because you need less (lower spending => lower 25x FIRE number).
All figures in TODAY'S DOLLARS (real returns).
/tmp/retire-venv/bin/python fire_projection.py
"""
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
# ----------------------------------------------------------------------------
# ASSUMPTIONS
# ----------------------------------------------------------------------------
REAL_RETURN = 0.07
SWR = 0.04 # 4% rule -> FIRE number = 25x annual spending
START_AGE = 25
END_AGE = 65
START_PORTFOLIO = 48_000 # Roth IRA + Schwab Stocks today
PIPELINE = {25: 7_000, 26: 7_000, 27: 7_000, 28: 3_900} # 529->Roth
STIPEND = 34_000 # grad years (age 25-26)
START_SALARY = 130_000 # industry, starting age 27
TAX_GRAD = 0.08 # effective all-in tax rate, grad years
TAX_INDUSTRY = 0.30 # effective all-in tax rate, industry (fed+PA+FICA+local)
SPENDING_FLOOR = 50_000 # Dane's rule: never live on less than $50k/yr (today's $)
def gross_income(age):
"""Real (today's $) gross income by age: stipend -> industry w/ tapering raises."""
if age <= 26:
return STIPEND
income = START_SALARY
for a in range(27, age): # apply raises year over year up to `age`
if a < 40: income *= 1.03 # 3% real ascent, early career
elif a < 50: income *= 1.01 # 1% real, mid career
else: income *= 1.005 # ~flat, late career
return income
def tax_rate(age):
return TAX_GRAD if age <= 26 else TAX_INDUSTRY
# ----------------------------------------------------------------------------
# SAVINGS STRATEGIES — each returns SAVINGS DOLLARS given (age, gross, net).
# The $50k spending floor is enforced: never save so much that you live on <$50k.
# ----------------------------------------------------------------------------
def hybrid_floor(age, gross, net):
"""Front-load HARD ages 27-32 (live at the $50k floor, bank everything else),
then deliberately loosen to ~20% of gross the Corvette-and-hobbies years."""
if age <= 26:
return 0.0
if age <= 32: # PHASE 1: live at floor, save the rest
return max(0.0, net - SPENDING_FLOOR)
target = gross * 0.20 # PHASE 2: loosen to 20% of gross
return min(target, max(0.0, net - SPENDING_FLOOR))
def steady_aggressive(age, gross, net):
if age <= 26:
return 0.0
return min(gross * 0.30, max(0.0, net - SPENDING_FLOOR))
def balanced(age, gross, net):
if age <= 26:
return 0.0
return min(gross * 0.18, max(0.0, net - SPENDING_FLOOR))
STRATEGIES = {
"Hybrid: hard 27-32 @ $50k floor, then loosen to 20%": (hybrid_floor, "#d93025"),
"Steady aggressive (30%)": (steady_aggressive, "#188038"),
"Balanced (18%)": (balanced, "#1a73e8"),
}
# ----------------------------------------------------------------------------
# SIMULATE
# ----------------------------------------------------------------------------
def simulate(save_fn):
ages, portfolio, spending, fire_num = [], [], [], []
bal = START_PORTFOLIO
fire_age = None
for age in range(START_AGE, END_AGE + 1):
gross = gross_income(age)
net = gross * (1 - tax_rate(age))
own_savings = save_fn(age, gross, net)
spend = net - own_savings # what you live on
fnum = 25 * spend # FIRE target for THIS lifestyle
ages.append(age); portfolio.append(bal)
spending.append(spend); fire_num.append(fnum)
if fire_age is None and bal >= fnum and age >= 27:
fire_age = age
contrib = own_savings + PIPELINE.get(age, 0)
bal = bal * (1 + REAL_RETURN) + contrib
return dict(ages=ages, portfolio=portfolio, spending=spending,
fire_num=fire_num, fire_age=fire_age)
results = {label: simulate(fn) for label, (fn, _) in STRATEGIES.items()}
# ----------------------------------------------------------------------------
# PLOT (2 panels: wealth+FIRE, then lifestyle)
# ----------------------------------------------------------------------------
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 10.5), height_ratios=[1.6, 1])
money_fmt = FuncFormatter(lambda v, _: f"${v/1e6:.1f}M" if v >= 1e6 else f"${v/1e3:.0f}k")
# --- Panel 1: portfolio vs FIRE number ---
for label, (fn, color) in STRATEGIES.items():
r = results[label]
ax1.plot(r["ages"], r["portfolio"], color=color, linewidth=2.6, label=label)
# FIRE number (dashed, same color, thin)
ax1.plot(r["ages"], r["fire_num"], color=color, linewidth=1.1,
linestyle=":", alpha=0.7)
# mark FIRE achievement
fa = r["fire_age"]
if fa:
idx = r["ages"].index(fa)
ax1.scatter([fa], [r["portfolio"][idx]], color=color, s=90, zorder=5,
edgecolor="white", linewidth=1.5)
ax1.annotate(f"FI at {fa}", xy=(fa, r["portfolio"][idx]),
xytext=(fa - 1.5, r["portfolio"][idx] + 0.35e6),
fontsize=9.5, fontweight="bold", color=color)
ax1.axvline(27, color="#5f6368", linestyle="--", alpha=0.5, linewidth=1.2)
ax1.annotate("PhD done / industry income (~2028)", xy=(27, 0),
xytext=(27.3, 5.0e6), fontsize=9, color="#5f6368")
# lean-FI floor: $50k lifestyle => $1.25M
ax1.axhline(25 * SPENDING_FLOOR, color="#f9ab00", linestyle="-.", alpha=0.8, linewidth=1.4)
ax1.annotate(f"Lean-FI floor: 25 x $50k = ${25*SPENDING_FLOOR/1e6:.2f}M",
xy=(START_AGE, 25*SPENDING_FLOOR), xytext=(START_AGE+0.3, 25*SPENDING_FLOOR+0.18e6),
fontsize=9, color="#b06000", fontweight="bold")
ax1.set_title("FIRE trajectories — solid = portfolio, dotted = FIRE number (25x spend)",
fontsize=13, fontweight="bold")
ax1.set_ylabel("Today's $")
ax1.yaxis.set_major_formatter(money_fmt)
ax1.set_xlim(START_AGE, END_AGE)
ax1.set_ylim(bottom=0)
ax1.grid(True, alpha=0.25)
ax1.legend(loc="upper left", fontsize=9.5)
# --- Panel 2: lifestyle (annual spending) ---
for label, (fn, color) in STRATEGIES.items():
r = results[label]
ax2.plot(r["ages"], r["spending"], color=color, linewidth=2.4, label=label)
ax2.set_title("What you actually live on each year (annual spending, today's $)",
fontsize=13, fontweight="bold")
ax2.set_xlabel("Age")
ax2.set_ylabel("Annual spending")
ax2.yaxis.set_major_formatter(money_fmt)
ax2.set_xlim(START_AGE, END_AGE)
ax2.set_ylim(bottom=0)
ax2.grid(True, alpha=0.25)
ax2.axhline(STIPEND*(1-TAX_GRAD), color="#9aa0a6", linestyle="--", alpha=0.6)
ax2.annotate("~current grad take-home", xy=(50, STIPEND*(1-TAX_GRAD)),
xytext=(50, STIPEND*(1-TAX_GRAD)+2500), fontsize=8.5, color="#9aa0a6")
# the $50k floor — the line Dane won't cross
ax2.axhline(SPENDING_FLOOR, color="#f9ab00", linestyle="-.", alpha=0.9, linewidth=1.6)
ax2.annotate("$50k floor — won't live below this", xy=(40, SPENDING_FLOOR),
xytext=(40, SPENDING_FLOOR+3000), fontsize=9, color="#b06000", fontweight="bold")
ax2.legend(loc="upper left", fontsize=9.5)
fig.tight_layout()
out = "/Users/danesabo/Documents/Finances/fire_projection.png"
fig.savefig(out, dpi=140, bbox_inches="tight")
# ----------------------------------------------------------------------------
# TEXT SUMMARY
# ----------------------------------------------------------------------------
print("=" * 72)
print("FIRE PROJECTION (today's dollars, 7% real, rising-income career arc)")
print("=" * 72)
print(f"Income arc: ${STIPEND:,} stipend -> ${START_SALARY:,} at 27, "
f"3%/1%/0.5% real raises\n")
LEAN_FI = 25 * SPENDING_FLOOR # $1.25M -> permanently able to fall back to $50k
for label in STRATEGIES:
r = results[label]
fa = r["fire_age"]
# spending + portfolio at a few ages
def at(age): return r["portfolio"][r["ages"].index(age)]
def sp(age): return r["spending"][r["ages"].index(age)]
# age at which portfolio first crosses the lean-FI safety net ($1.25M)
lean_age = next((a for a, p in zip(r["ages"], r["portfolio"])
if p >= LEAN_FI and a >= 27), None)
print(label)
print(f" $50k safety net funded (cross ${LEAN_FI/1e6:.2f}M): "
f"{'age '+str(lean_age) if lean_age else 'not by 65'}")
print(f" Full FIRE on actual lifestyle: {'age '+str(fa) if fa else 'not by 65'}")
print(f" Lifestyle (annual spend) at 30/40/50: "
f"${sp(30)/1e3:.0f}k / ${sp(40)/1e3:.0f}k / ${sp(50)/1e3:.0f}k")
print(f" Portfolio at 50 / 65: ${at(50)/1e6:.2f}M / ${at(65)/1e6:.2f}M")
print()
print(f"Chart saved: {out}")

File diff suppressed because it is too large Load Diff

107
migration/README.md Normal file
View File

@ -0,0 +1,107 @@
# Firefly rebuild runbook
One-time migration: wipe the CSV-era transactions and rebuild from
FITID-stable QFX so every transaction has a permanent dedup key and a clean
account taxonomy. Read this before running anything in this folder.
## Why a rebuild (not in-place cleanup)
Firefly history is young (everything ~Aug 2025+, ~950 txns, minimal manual
data). Old CSV imports left ~343 fragmented junk expense accounts and no
stable external_ids. A clean rebuild keyed on QFX `FITID` is a better
foundation than reassigning junk in place. Decided 2026-05-17.
## Hard prerequisites (do not skip)
1. **Firefly DB backup.** Destructive, no undo. Do not run the wipe until a
DB dump/snapshot exists.
2. **Exports** (in `../EXPORTS/`, gitignored): Apple/PNC/Costco QFX, Aug 2025
-> now, FITID on 100% of rows. Schwab/Coinbase/Cash (~35 txns) are
CSV-only/manual, handled separately.
## Reconciliation (the trust gate)
Per account: `opening_balance = QFX_ledger - sum(all that account's lines)`.
Classification (transfer vs expense) never changes an account's own balance,
so `opening + sum == ledger` must hold to the cent before trusting the wipe.
Verified: PNC opening $6,866.10, Apple -$4,498.79, Costco -$2,541.57 (all
tie). `rebuild_dryrun.py` recomputes this; re-run after any change.
## Classification rules (PNC = the hub)
- **Transfers** -- ALWAYS owned by the PNC leg: PNC's posting date and PNC's
FITID are authoritative, the card/brokerage counterpart line is paired by
amount (+/- a few days) and dropped. Every transfer lives under PNC, one
consistent date, never double-counted. Pairs: APPLECARD GSBANK -> Apple
Credit Card; CITI AUTOPAY -> Costco Visa Card; SCHWAB MONEYLINK -> Schwab
Stocks/Savings (disambiguate by amount); ATM WITHDRAWAL -> Cash; CARVANA
PAYOUT -> Illiquid Assets; big ATM DEPOSIT -> Coverdell; CAPITAL ONE ->
Capital One (closed). Codified in the skill's `references/transfers.md`.
- **Income/expense**: Pitt salary -> Wages; Duquesne Light -> Utilities:
Electric; Compeer -> Rent; etc.
- **Don't Know**: Venmo/CashApp/Zelle ("poker"), unrecallable checks, unknown
ATM deposits -> the `Don't Know` account, review later. Never guessed.
- **Special accounts**: `Illiquid Assets` (cars; sale = transfer in),
`Don't Know` (catch-all). See the skill's memory / taxonomy notes.
## Investment accounts
Do NOT transaction-import Schwab/Roth/Coverdell/Coinbase (noise, and assets
!= currency). Model as monthly-valued: opening balance + external MoneyLink
transfers (from the PNC side) + one monthly valuation adjustment booked to
`Investment Appreciation` / `Investment: Interest`. Dane supplies the current
value at import; delta = the adjustment. Savings<->Stocks journals are
transfers.
## Execution order
1. `python rebuild_dryrun.py` -> confirm all accounts still reconcile.
2. Build the full normalized dataset (PNC + Apple + Costco, transfers typed,
payments paired/deduped, opening balances set).
3. Drive review via the skill's browser workflow
(`references/review-workflow.md`): `--review-html`, resolve the ~190 tail
merchants in-situ (search-then-ask, <80% => ask), Export `decisions.json`.
4. **Confirm DB backup exists.**
5. Wipe transactions, prune empty junk expense accounts.
6. `--decisions decisions.json --post`. Reconcile final balances against the
derived figures above.
## Files here
- `rebuild_pnc.py` -- PNC classifier + reconciliation (read-only)
- `rebuild_dryrun.py` -- consolidated per-account reconciliation (read-only)
- `pnc_classified.json` -- PNC classification output
- `merchant_clusters.{json,md}` -- cluster proposal (taxonomy bootstrap)
- `mock_firefly.py` -- stdlib mock used for skill eval/testing
- `*review_preview*.html` -- review-UI previews on real data
Nothing here writes to Firefly except the final `--post` in step 6.
## Lessons from the first rebuild (2026-05-20)
Captured here so a second rebuild doesn't re-discover them.
- **Orphan paired transfers**: the PNC->Apple payment from 2025-08-01 has no
Apple-side line (Apple's QFX starts 08-02). Its effect was already in
Apple's derived opening; posting the transfer ALSO crediting Apple
double-counted by $3,218. Fix: `build_rebuild_dataset.py` now subtracts
orphan transfer amounts from the destination card's opening. See
`references/transfers.md` in the skill.
- **Asset accounts require `account_role`** on POST /accounts. `defaultAsset`
works universally.
- **Budgets do not auto-create.** If wiping to scratch, recreate Needs /
Wants / Savings via UI or POST before the import.
- **Wipe via UI leaves stale revenue accounts / categories** (only
transaction-referenced asset accounts go). Prune manually if you want a
truly clean slate.
- **Strip cached `account_id` from `merchant_map.json` before any rebuild.**
Pre-wipe ids are invalid post-wipe. The skill no longer caches to the map
(in-memory only) but old maps may still carry stale ids.
- **Background Python with `nohup ... &` can lose stdout to buffering.** Use
`python -u` for the import step. The first rebuild's log was empty because
Python buffered everything and we mistook it for "ran but did nothing."
- **`error_if_duplicate_hash` is now off** — Firefly's content-hash dedup
was too eager (rejected legit-distinct rows with same date+amt+desc, like
two parking sessions same garage). `external_id` precheck is the only dedup.
- **Wipe by deleting transactions, not by deleting accounts.** Otherwise you
end up with stale ids referenced by merchant_map cache.

View File

@ -0,0 +1,134 @@
"""Build the full rebuild dataset from the 3 QFX (READ-ONLY).
Emits one normalized.json (the skill's schema) for ALL of PNC + Apple +
Costco, with:
- transfers OWNED BY THE PNC LEG (PNC date + FITID authoritative); the
Apple PAYMENT lines and Costco positive AUTOPAY lines are the
counterparts and are DROPPED (paired by amount, +/- 6 days).
- PNC classified per the runbook (income / expense / Don't Know / special).
- Apple/Costco: negative = withdrawal (merchant), positive = deposit
(refund). merchant_map matching is left to firefly_import.py downstream.
- per-account reconciliation: opening + sum(its kept lines) must == QFX
ledger, else abort (no silent data loss).
Nothing is posted. Output feeds `firefly_import.py --emit-plan/--review-html`.
"""
import re, json, hashlib, sys, html
from collections import Counter
D = "/Users/danesabo/Documents/Finances/EXPORTS/-MAY172026"
SRC = {
"PNC Checking": (f"{D}/PNC7552Aug012025-May152025.QFX", "pnc"),
"Apple Credit Card": (f"{D}/Apple Card Transactions Aug 01 2025 - May 17 2026.qfx", "apple"),
"Costco Visa Card": (f"{D}/CitiCostcoCard Aug012025-May172025.QFX","costco"),
}
def parse(path):
t = open(path, encoding="latin-1", errors="replace").read()
m = re.search(r"<LEDGERBAL>.*?<BALAMT>([^<\r\n]*)", t, re.S | re.I)
ledger = float(m.group(1))
blocks = re.findall(r"<STMTTRN>(.*?)(?=<STMTTRN>|</BANKTRANLIST>)", t, re.S | re.I)
def g(b, k):
mm = re.search(rf"<{k}>([^<\r\n]*)", b, re.I)
return mm.group(1).strip() if mm else ""
out = []
for b in blocks:
out.append({"date": g(b, "DTPOSTED")[:8], "amt": float(g(b, "TRNAMT")),
"ttype": g(b, "TRNTYPE").upper(),
"desc": html.unescape((g(b, "NAME") + " " + g(b, "MEMO")).strip()),
"fitid": g(b, "FITID")})
return ledger, out
def iso(d): # YYYYMMDD -> YYYY-MM-DD
return f"{d[:4]}-{d[4:6]}-{d[6:8]}" if len(d) >= 8 else d
# ---- PNC classification (runbook) ---------------------------------------
def classify_pnc(desc, amt):
d = desc.upper()
if "APPLECARD GSBANK PAYMENT" in d: return ("transfer", "Apple Credit Card")
if "CITI AUTOPAY PAYMENT" in d: return ("transfer", "Costco Visa Card")
if "SCHWAB BROKERAGE MONEYLINK" in d:
# amount disambiguation per the Schwab JSONs
return ("transfer", "Schwab Savings" if abs(amt) in (5000.0, 3550.0)
else "Schwab Stocks")
if "ATM WITHDRAWAL" in d: return ("transfer", "Cash")
if "CARVANA PAYOUT" in d: return ("transfer", "Illiquid Assets")
if "ATM DEPOSIT" in d and abs(amt) > 10000: return ("transfer", "Coverdell")
if "CAPITAL ONE TRANSFER" in d: return ("transfer", "Capital One")
if "UNIV PITTSBURGH" in d and ("PAYROLL" in d or "SALARY" in d):
return ("deposit", "Pitt Salary")
if "INTEREST PAYMENT" in d: return ("deposit", "Interest Income")
if "IRS TREAS 310" in d: return ("deposit", "IRS Refund")
if "DUQUESNE LIGHT" in d: return ("withdrawal", "Duquesne Light")
if "COMPEER" in d: return ("withdrawal", "Compeer Investments")
if "PITT TUITION" in d: return ("withdrawal", "University of Pittsburgh")
if any(k in d for k in ("VENMO CASHOUT","CASH APP","ZEL FROM","ATM SURCHARGE","YARDI")):
return ("dontknow", "Don't Know")
return ("raw", None) # leave to merchant_map / review downstream
records, recon, dropped = [], {}, Counter()
for acct, (path, tag) in SRC.items():
ledger, txns = parse(path)
s = round(sum(t["amt"] for t in txns), 2)
opening = round(ledger - s, 2)
recon[acct] = {"ledger": ledger, "sum": s, "opening": opening,
"ties": abs(opening + s - ledger) < 0.01}
for t in txns:
amt, d = t["amt"], t["desc"]
ext = f"{tag}:{t['fitid'] or hashlib.sha1((iso(t['date'])+d+str(amt)).encode()).hexdigest()[:16]}"
if acct == "Apple Credit Card" and t["ttype"] == "PAYMENT":
dropped["apple_payment(paired->PNC)"] += 1; continue
if acct == "Costco Visa Card" and amt > 0 and "AUTOPAY" in d.upper():
dropped["costco_autopay(paired->PNC)"] += 1; continue
rec = {"date": iso(t["date"]), "amount": f"{abs(amt):.2f}",
"description": d, "asset_account": acct, "source_tag": tag,
"source_txn_id": t["fitid"] or None, "currency_code": "USD"}
if acct == "PNC Checking":
kind, target = classify_pnc(d, amt)
if kind == "transfer":
rec["type"] = "transfer"
if amt < 0: rec["destination_account"] = target
else: rec["type"] = "transfer"; rec["asset_account"] = target; rec["destination_account"] = "PNC Checking"
elif kind in ("deposit", "withdrawal"):
rec["type"] = kind; rec["_canonical"] = target
elif kind == "dontknow":
rec["type"] = "withdrawal" if amt < 0 else "deposit"
rec["_canonical"] = "Don't Know"
else:
rec["type"] = "withdrawal" if amt < 0 else "deposit"
else:
rec["type"] = "withdrawal" if amt < 0 else "deposit"
records.append(rec)
# --- Orphan adjustment: a PNC->Apple/Costco payment whose date predates the
# card QFX window has its card-side effect already baked into the card's
# DERIVED opening (because opening = ledger - sum_kept_card_lines, and the
# orphan never appeared on the card side). If we ALSO post the PNC->card
# transfer in the rebuild, the card account gets credited twice. So subtract
# orphan transfer amounts from the card opening.
APPLE_WINDOW_START = "2025-08-02"
COSTCO_WINDOW_START = "2025-08-02"
for r in records:
if r.get("type") == "transfer" and r["asset_account"] == "PNC Checking":
dest = r.get("destination_account")
if dest == "Apple Credit Card" and r["date"] < APPLE_WINDOW_START:
recon["Apple Credit Card"]["opening"] -= float(r["amount"])
recon["Apple Credit Card"]["opening"] = round(recon["Apple Credit Card"]["opening"], 2)
elif dest == "Costco Visa Card" and r["date"] < COSTCO_WINDOW_START:
recon["Costco Visa Card"]["opening"] -= float(r["amount"])
recon["Costco Visa Card"]["opening"] = round(recon["Costco Visa Card"]["opening"], 2)
print("=== RECONCILIATION (must all tie) ===")
ok = True
for a, r in recon.items():
flag = "OK" if r["ties"] else "*** MISMATCH ***"
ok &= r["ties"]
print(f" {a:20} ledger {r['ledger']:>11,.2f} Σ {r['sum']:>11,.2f} "
f"opening {r['opening']:>11,.2f} {flag}")
print("dropped (paired counterparts):", dict(dropped))
print(f"normalized records: {len(records)}")
if not ok:
print("ABORT: a reconciliation does not tie.", file=sys.stderr); sys.exit(1)
json.dump(records, open("/tmp/rebuild_normalized.json", "w"), indent=1)
json.dump(recon, open("/tmp/rebuild_recon.json", "w"), indent=1)
print("wrote /tmp/rebuild_normalized.json")

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,192 @@
# Merchant cluster proposal
- 386 clusters from 372 accounts + 1017 statement txns
- **142** auto-proposable (>=0.80, clean canonical)
- **244** NEED DANE (ambiguous / junky canonical / new merchant)
## NEEDS DANE (top 40 by volume)
_For each: what is the real merchant? You can type a name; it becomes a permanent rule._
- **?** (conf 0.57, weight 75, 28 accts, 47 stmt) guess=`Amazon`
- desc: `AMAZON MARK* B00SF6VV0410 TERRY`
- desc: `AMAZON.COM*9R3UC0N93 440 TERRY A`
- desc: `AMAZON.COM*N428X9Q71 440 TERRY A`
- desc: `AMAZON MARK* B008Z3VV0410 TERRY`
- desc: `AMAZON MARK* B03Y156K1410 TERRY`
- desc: `AMAZON MARK* B204T9M31410 TERRY`
- accts: Amazon, Amazon Mark* B008z3vv0, Amazon Mark* B00sf6vv0, Amazon Mark* B00sf6vv0410 Terry Avenue North Seattle 98109 Wa Usa (return), Amazon Mark* B03y156k1, Amazon Mark* B204t9m31
- **?** (conf 0.4, weight 56, 0 accts, 56 stmt) guess=`University Of Pittsburgh|Pitt Parking Pay Stati127 North`
- desc: `PITT PARKING PAY STATI127 NORTH`
- **?** (conf 0.78, weight 37, 7 accts, 30 stmt) guess=`McDonald's`
- desc: `MCDONALDS 1862 3708 FORBES AVE P`
- desc: `MCDONALDS 1102 225 MOUNT LEBANON`
- desc: `MCDONALD'S F1862 3708 FORBES AVE`
- desc: `MCDONALD'S F1102 225 MT LEBANON`
- desc: `MCDONALDS 5834 2518 W LIBERTY RD`
- desc: `MCDONALD'S F27387 1412 B MAIN ST`
- accts: McDonald's, Mcdonald's F1102, Mcdonald's F1862, Mcdonald's F27387, Mcdonalds 1862, Mcdonalds 33234
- **?** (conf 1.0, weight 30, 0 accts, 30 stmt) guess=`Castle Shannon Shop`
- desc: `CASTLE SHANNON SHOP' 799 CASTLE`
- **?** (conf 0.71, weight 30, 2 accts, 28 stmt) guess=`Market District`
- desc: `MARKET DISTRICT #0014 7000 OXFOR`
- desc: `MARKET DISTRICT #0047 100 SETTLE`
- accts: Market District, Market District Supermarket
- **?** (conf 0.4, weight 18, 0 accts, 18 stmt) guess=`Apple Com Bill One Apple`
- desc: `APPLE.COM/BILL ONE APPLE PARK WA`
- desc: `APPLE.COM/US ONE APPLE PARK WAY`
- desc: `APPLE.COM/BILL ONE APPLE PARK CU`
- **?** (conf 0.47, weight 18, 8 accts, 10 stmt) guess=`Compeer`
- desc: `COMPEER-COMP-CP WEB PMTS ACH WEB COMPEER-COMP-CP WEB PMTS ACH WEB C5R6`
- desc: `COMPEER-COMP-CP WEB PMTS ACH WEB COMPEER-COMP-CP WEB PMTS ACH WEB MD64`
- desc: `COMPEER-COMP-CP WEB PMTS ACH WEB COMPEER-COMP-CP WEB PMTS ACH WEB 3Y6Q`
- desc: `COMPEER-COMP-CP WEB PMTS ACH WEB COMPEER-COMP-CP WEB PMTS ACH WEB R34S`
- desc: `COMPEER-COMP-CP WEB PMTS ACH WEB COMPEER-COMP-CP WEB PMTS ACH WEB D9FZ`
- desc: `COMPEER-COMP-CP WEB PMTS ACH WEB COMPEER-COMP-CP WEB PMTS ACH WEB F394`
- accts: COMPEER-COMP-CP WEB PMTS ACH WEB 3Y6QDL, COMPEER-COMP-CP WEB PMTS ACH WEB 7Y648K, COMPEER-COMP-CP WEB PMTS ACH WEB D9FZ0L, COMPEER-COMP-CP WEB PMTS ACH WEB F394TK, COMPEER-COMP-CP WEB PMTS ACH WEB JS0NNK, COMPEER-COMP-CP WEB PMTS ACH WEB K7TDFK
- **?** (conf 0.4, weight 18, 0 accts, 18 stmt) guess=`Sq *La Gourmandine Oak116 Meyran`
- desc: `SQ *LA GOURMANDINE OAK116 MEYRAN`
- **?** (conf 1.0, weight 17, 0 accts, 17 stmt) guess=`Kuhns Banksville`
- desc: `KUHNS BANKSVILLE 3125 BANKSVILLE`
- **?** (conf 0.75, weight 13, 4 accts, 9 stmt) guess=`Starbucks`
- desc: `STARBUCKS STORE 27117 4022 FIFTH`
- desc: `STARBUCKS 27117 4022 5TH AVE PIT`
- desc: `STARBUCKS 8007827282 2401 UTAH A`
- accts: Starbucks, Starbucks 27117, Starbucks 8007827282, Starbucks Store 27117
- **?** (conf 0.4, weight 11, 0 accts, 11 stmt) guess=`Claude Ai Subscription548 Market`
- desc: `CLAUDE.AI SUBSCRIPTION548 MARKET`
- **?** (conf 0.61, weight 11, 2 accts, 9 stmt) guess=`Duquesne Light`
- desc: `DUQUESNE LIGHT PAYMENT ACH DEBIT DUQUESNE LIGHT PAYMENT ACH DEBIT xxxx`
- accts: DUQUESNE LIGHT PAYMENT ACH DEBIT xxxxxx5333, Duquesne Light
- **?** (conf 0.4, weight 11, 1 accts, 10 stmt) guess=`T2`
- desc: `T2* MT LEBANON PA 8900 KEYSTONE`
- accts: T2* Mt Lebanon Pa
- **?** (conf 1.0, weight 10, 1 accts, 9 stmt) guess=`Comcast / Xfinity`
- desc: `COMCAST / XFINITY 15 SUMMIT PARK`
- accts: Comcast / Xfinity
- **?** (conf 1.0, weight 10, 0 accts, 10 stmt) guess=`Interest Payment Interest Payment`
- desc: `INTEREST PAYMENT INTEREST PAYMENT`
- **?** (conf 0.4, weight 10, 0 accts, 10 stmt) guess=`Upmc Student Insurance600 Grant`
- desc: `UPMC STUDENT INSURANCE600 GRANT`
- **?** (conf 0.4, weight 9, 0 accts, 9 stmt) guess=`Applecard Gsbank Payment Ach Web`
- desc: `APPLECARD GSBANK PAYMENT ACH WEB APPLECARD GSBANK PAYMENT ACH WEB-RECU`
- desc: `APPLECARD GSBANK PAYMENT ACH WEB APPLECARD GSBANK PAYMENT ACH WEB xxxx`
- **?** (conf 0.4, weight 9, 0 accts, 9 stmt) guess=`Citi Autopay Payment Ach Web`
- desc: `CITI AUTOPAY PAYMENT ACH WEB-REC CITI AUTOPAY PAYMENT ACH WEB-RECUR xx`
- desc: `CITI AUTOPAY PAYMENT ACH WEB-REC CITI AUTOPAY PAYMENT ACH WEB-RECUR xx`
- desc: `CITI AUTOPAY PAYMENT ACH WEB-REC CITI AUTOPAY PAYMENT ACH WEB-RECUR xx`
- desc: `CITI AUTOPAY PAYMENT ACH WEB-REC CITI AUTOPAY PAYMENT ACH WEB-RECUR xx`
- desc: `CITI AUTOPAY PAYMENT ACH WEB-REC CITI AUTOPAY PAYMENT ACH WEB-RECUR xx`
- desc: `CITI AUTOPAY PAYMENT ACH WEB-REC CITI AUTOPAY PAYMENT ACH WEB-RECUR xx`
- **?** (conf 1.0, weight 9, 0 accts, 9 stmt) guess=`Daily Cash Adjustment`
- desc: `DAILY CASH ADJUSTMENT`
- **?** (conf 1.0, weight 7, 0 accts, 7 stmt) guess=`Ebay O`
- desc: `EBAY O*07-14287-66191 2535 NORTH`
- desc: `EBAY O*07-14287-66190 2535 NORTH`
- desc: `EBAY O*07-14287-66189 2535 NORTH`
- desc: `EBAY O*07-14287-66188 2535 NORTH`
- desc: `EBAY O*07-14287-66187 2535 NORTH`
- desc: `EBAY O*07-14287-66186 2535 NORTH`
- **?** (conf 0.9, weight 7, 1 accts, 6 stmt) guess=`Needle & Bean`
- desc: `SQ *NEEDLE &amp; BEAN 320 CASTLE`
- accts: Needle & Bean
- **?** (conf 0.4, weight 7, 0 accts, 7 stmt) guess=`University Of Pittsburgh|Univ Pittsburgh Salary Ach Credi`
- desc: `UNIV PITTSBURGH SALARY ACH CREDI UNIV PITTSBURGH SALARY ACH CREDIT xx0`
- **?** (conf 1.0, weight 7, 0 accts, 7 stmt) guess=`Youtube Tv`
- desc: `GOOGLE *YOUTUBE TV 1600 AMPHITHE`
- **?** (conf 0.62, weight 6, 2 accts, 4 stmt) guess=`Liberty Mutual`
- desc: `LIBERTY MUTUAL 175 BERKELEY ST 8`
- desc: `LIBERTY MUTUAL ATTN: COURTNEY MU`
- accts: Liberty Mutual
- **?** (conf 0.53, weight 6, 2 accts, 4 stmt) guess=`Openai`
- desc: `OPENAI *CHATGPT SUBSCR548 MARKET`
- desc: `OPENAI 1455 3RD STREET SAN FRANC`
- accts: Openai, Openai *chatgpt Subscr
- **?** (conf 0.4, weight 6, 0 accts, 6 stmt) guess=`Spo P&Amp Gspamelasdiner3703 F`
- desc: `SPO*P&amp;G'SPAMELA'SDINER3703 F`
- **?** (conf 1.0, weight 6, 2 accts, 4 stmt) guess=`Svdp Castle Shannon`
- desc: `SVDP CASTLE SHANNON 3423 LIBRARY`
- accts: SVDP Castle Shannon, Svdp Castle Shannon
- **?** (conf 0.4, weight 5, 0 accts, 5 stmt) guess=`Bp 9604786Ukani Broqps2900 Banks`
- desc: `BP#9604786UKANI BROQPS2900 BANKS`
- **?** (conf 0.4, weight 5, 2 accts, 3 stmt) guess=`Capital One Transfer Ach Web`
- desc: `CAPITAL ONE TRANSFER ACH WEB RT0 CAPITAL ONE TRANSFER ACH WEB RT0D854F`
- desc: `CAPITAL ONE TRANSFER ACH WEB PAY CAPITAL ONE TRANSFER ACH WEB PAYMENT `
- desc: `CAPITAL ONE TRANSFER ACH WEB PAY CAPITAL ONE TRANSFER ACH WEB PAYMENT `
- accts: CAPITAL ONE TRANSFER ACH WEB PAYMENT RT04E16C0EA8E68, CAPITAL ONE TRANSFER ACH WEB PAYMENT RT097FE1F911EB7
- **?** (conf 0.69, weight 5, 1 accts, 4 stmt) guess=`Peacock`
- desc: `PEACOCK 75AE1 PREMIUM 30 ROCKEFE`
- desc: `PEACOCK 81D06 PREMIUM 30 ROCKEFE`
- desc: `PEACOCK EF701 PREMIUM 30 ROCKEFE`
- desc: `PEACOCK X6258 PREMIUM 30 ROCKEFE`
- accts: Peacock
- **?** (conf 0.4, weight 5, 0 accts, 5 stmt) guess=`Spiegel Freedman Psych105 Braunl`
- desc: `SPIEGEL FREEDMAN PSYCH105 BRAUNL`
- **?** (conf 0.4, weight 5, 0 accts, 5 stmt) guess=`University Of Pittsburgh|Rnk Pittsburgh P3610 Forbe`
- desc: `TST*RNK PITTSBURGH - P3610 FORBE`
- **?** (conf 1.0, weight 5, 1 accts, 4 stmt) guess=`Www Costco Com`
- desc: `WWW COSTCO COM 800-955-2292`
- accts: WWW COSTCO COM 800-955-2292 WA
- **?** (conf 0.4, weight 4, 0 accts, 4 stmt) guess=`Dave And Andy S Ho207`
- desc: `SQ *DAVE AND ANDY S HO207 ATWOOD`
- **?** (conf 0.4, weight 4, 0 accts, 4 stmt) guess=`Enricos Tazza Do125 Lytton`
- desc: `SQ *ENRICO'S TAZZA D'O125 LYTTON`
- **?** (conf 0.4, weight 4, 0 accts, 4 stmt) guess=`Hofbrauhaus Pittsburgh2705 S Wat`
- desc: `HOFBRAUHAUS PITTSBURGH2705 S WAT`
- **?** (conf 0.65, weight 4, 1 accts, 3 stmt) guess=`Luis Benitez`
- desc: `ZEL FROM Luis Benitez ZEL FROM Luis Benitez`
- accts: Luis Benitez
- **?** (conf 0.4, weight 4, 2 accts, 2 stmt) guess=`Pitt Tuition Pittpaymnt Ach Web`
- desc: `PITT TUITION PITTPAYMNT ACH WEB PITT TUITION PITTPAYMNT ACH WEB OPUxxx`
- desc: `PITT TUITION PITTPAYMNT ACH WEB PITT TUITION PITTPAYMNT ACH WEB OPUxxx`
- accts: PITT TUITION PITTPAYMNT ACH WEB OPUxxxx0412, PITT TUITION PITTPAYMNT ACH WEB OPUxxxx9683
- **?** (conf 0.4, weight 4, 0 accts, 4 stmt) guess=`Schwab Brokerage Moneylink Ach W`
- desc: `SCHWAB BROKERAGE MONEYLINK ACH C SCHWAB BROKERAGE MONEYLINK ACH CREDIT`
- desc: `SCHWAB BROKERAGE MONEYLINK ACH C SCHWAB BROKERAGE MONEYLINK ACH CREDIT`
- desc: `SCHWAB BROKERAGE MONEYLINK ACH D SCHWAB BROKERAGE MONEYLINK ACH DEBIT `
- desc: `SCHWAB BROKERAGE MONEYLINK ACH W SCHWAB BROKERAGE MONEYLINK ACH WEB-RE`
- **?** (conf 1.0, weight 4, 1 accts, 3 stmt) guess=`Subaru Of South Hills`
- desc: `SUBARU OF SOUTH HILLS 3260 WASHI`
- accts: Subaru Of South Hills
## AUTO-PROPOSABLE (top 40 by volume)
- `GomobilePGH` (conf 1.0, weight 49, merges 4 accts) ids=[865, 642, 559, 781]
- `Sheetz` (conf 1.0, weight 43, merges 7 accts) ids=[566, 744, 739, 567, 774, 794, 738]
- `Autozone` (conf 1.0, weight 27, merges 6 accts) ids=[593, 812, 724, 714, 591, 806]
- `Sunoco` (conf 1.0, weight 27, merges 6 accts) ids=[599, 638, 827, 767, 820, 715]
- `Costco Whse` (conf 1.0, weight 22, merges 2 accts) ids=[842, 836]
- `Harbor Freight Tools` (conf 0.95, weight 18, merges 3 accts) ids=[878, 569, 737]
- `Petco` (conf 1.0, weight 15, merges 4 accts) ids=[546, 729, 797, 633]
- `Chick-fil-A` (conf 1.0, weight 14, merges 5 accts) ids=[630, 810, 832, 712, 702]
- `Costco Gas` (conf 1.0, weight 14, merges 2 accts) ids=[840, 837]
- `D J*wsj` (conf 1.0, weight 10, merges 1 accts) ids=[553]
- `Rockauto` (conf 0.94, weight 10, merges 1 accts) ids=[557]
- `University Club` (conf 0.86, weight 10, merges 2 accts) ids=[867, 637]
- `Chikn Oakland` (conf 1.0, weight 9, merges 1 accts) ids=[558]
- `Raising Cane's` (conf 1.0, weight 9, merges 3 accts) ids=[868, 561, 828]
- `Barnes & Noble` (conf 0.9, weight 7, merges 3 accts) ids=[603, 817, 658]
- `Lowe's` (conf 1.0, weight 7, merges 1 accts) ids=[673]
- `PMUSA` (conf 1.0, weight 7, merges 2 accts) ids=[885, 614]
- `Home Depot` (conf 0.83, weight 6, merges 1 accts) ids=[722]
- `REI` (conf 1.0, weight 6, merges 2 accts) ids=[684, 682]
- `Target` (conf 1.0, weight 6, merges 2 accts) ids=[605, 731]
- `The Saloon Of` (conf 0.82, weight 6, merges 2 accts) ids=[847, 801]
- `Best Buy` (conf 1.0, weight 5, merges 2 accts) ids=[751, 740]
- `Check` (conf 1.0, weight 5, merges 1 accts) ids=[524]
- `Expedia` (conf 1.0, weight 5, merges 2 accts) ids=[717, 711]
- `Michaels Stores` (conf 1.0, weight 5, merges 2 accts) ids=[587, 664]
- `Rita's` (conf 1.0, weight 5, merges 1 accts) ids=[882]
- `Als Corner` (conf 1.0, weight 4, merges 1 accts) ids=[762]
- `CVS Pharmacy` (conf 1.0, weight 4, merges 2 accts) ids=[783, 816]
- `Dunkin` (conf 1.0, weight 4, merges 2 accts) ids=[655, 846]
- `Five Guys` (conf 1.0, weight 4, merges 1 accts) ids=[723]
- `Redhawk Coffee` (conf 1.0, weight 4, merges 1 accts) ids=[721]
- `Sportsmans Warehouse` (conf 1.0, weight 4, merges 1 accts) ids=[568]
- `Taco Bell` (conf 1.0, weight 4, merges 2 accts) ids=[686, 691]
- `TNT Pizza` (conf 1.0, weight 4, merges 1 accts) ids=[624]
- `Act Cntyalleghenyprk` (conf 1.0, weight 3, merges 1 accts) ids=[776]
- `Butterjoint` (conf 1.0, weight 3, merges 1 accts) ids=[608]
- `Ctlp*csc Serviceworks` (conf 1.0, weight 3, merges 1 accts) ids=[650]
- `Fiori's Pizzaria` (conf 0.91, weight 3, merges 1 accts) ids=[551]
- `Get Go` (conf 1.0, weight 3, merges 1 accts) ids=[718]
- `Giant Eagle` (conf 1.0, weight 3, merges 1 accts) ids=[592]

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

144
retirement_projection.py Normal file
View File

@ -0,0 +1,144 @@
#!/usr/bin/env python3
"""
Retirement trajectory projection for Dane.
Models portfolio growth from age 25 -> 65 under three post-PhD savings
scenarios, plus shows how negligible the one-time summer 401k contribution is.
All figures are in TODAY'S DOLLARS (real returns), so they're directly
comparable to current purchasing power. Edit the ASSUMPTIONS block and re-run.
/tmp/retire-venv/bin/python retirement_projection.py
"""
import matplotlib
matplotlib.use("Agg") # no display needed; save to file
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
# ----------------------------------------------------------------------------
# ASSUMPTIONS (edit these and re-run)
# ----------------------------------------------------------------------------
REAL_RETURN = 0.07 # 7% real (inflation-adjusted) annual return
SWR = 0.04 # 4% safe withdrawal rate
START_AGE = 25
END_AGE = 65
START_PORTFOLIO = 48_000 # Roth IRA ($16k) + Schwab Stocks ($32k) today
# 529 -> Roth IRA pipeline (grandparent-funded). $24.9k total, $7k/yr cap.
PIPELINE = {25: 7_000, 26: 7_000, 27: 7_000, 28: 3_900} # sums to $24,900
# Post-PhD own savings (401k + brokerage). PhD done end of 2027 => income 2028.
POST_PHD_START_AGE = 27 # 2028 = age 27
SCENARIOS = {
"Floor — no post-PhD saving": 0,
"Moderate — $20k/yr post-PhD": 20_000,
"Aggressive — $35k/yr post-PhD": 35_000,
}
# The contested decision: one summer of 10% Roth 401k = ~$2,240 at age 25.
SUMMER_401K = 2_240
# ----------------------------------------------------------------------------
# MODEL
# ----------------------------------------------------------------------------
def project(post_phd_rate, summer_401k=0):
"""Year-by-year: grow existing balance, then add year-end contributions.
Returns (ages, balances)."""
ages, balances = [], []
bal = START_PORTFOLIO + summer_401k # summer contribution lands at age 25
for age in range(START_AGE, END_AGE + 1):
ages.append(age)
balances.append(bal)
# contribution for THIS year (added at year-end, after growth)
contrib = PIPELINE.get(age, 0)
if age >= POST_PHD_START_AGE:
contrib += post_phd_rate
bal = bal * (1 + REAL_RETURN) + contrib
return ages, balances
# ----------------------------------------------------------------------------
# PLOT
# ----------------------------------------------------------------------------
fig, ax = plt.subplots(figsize=(12, 7.5))
colors = {"Floor — no post-PhD saving": "#9aa0a6",
"Moderate — $20k/yr post-PhD": "#1a73e8",
"Aggressive — $35k/yr post-PhD": "#188038"}
finals = {}
for label, rate in SCENARIOS.items():
ages, bals = project(rate)
finals[label] = bals[-1]
ax.plot(ages, bals, label=label, linewidth=2.6, color=colors[label])
# final value annotation
final = bals[-1]
income = final * SWR
ax.annotate(f" ${final/1e6:.2f}M\n (${income/1e3:.0f}k/yr @ 4%)",
xy=(65, final), xytext=(65.4, final),
va="center", fontsize=10, fontweight="bold",
color=colors[label])
# The summer 401k question, framed HONESTLY.
# $2,240 grows to the same gross amount in ANY account. The real question is
# Roth-401k vs taxable-brokerage (Dane's actual alternative — he'd save it, not
# spend it). The only delta is the tax treatment of the growth.
LTCG_RATE = 0.15
summer_gross = SUMMER_401K * (1 + REAL_RETURN) ** (END_AGE - START_AGE) # grows either way
summer_gain = summer_gross - SUMMER_401K
summer_taxable_net = summer_gross - summer_gain * LTCG_RATE # taxable: pay LTCG on gain
roth_advantage = summer_gross - summer_taxable_net # Roth keeps the tax
# vertical marker: PhD done / income starts
ax.axvline(POST_PHD_START_AGE, color="#d93025", linestyle="--", alpha=0.55, linewidth=1.4)
ax.annotate("PhD done /\nincome starts\n(~2028)", xy=(POST_PHD_START_AGE, ax.get_ylim()[1]*0.0),
xytext=(POST_PHD_START_AGE + 0.3, finals["Aggressive — $35k/yr post-PhD"]*0.62),
fontsize=9, color="#d93025")
# annotation box: the summer 401k decision, framed honestly
txt = (f"The contested decision (summer 10% Roth 401k, ~${SUMMER_401K:,}):\n"
f"• Grows to ~${summer_gross/1e3:.0f}k by 65 in ANY account\n"
f"• vs taxable brokerage (your real alt.), the Roth tax\n"
f" benefit is only ~${roth_advantage/1e3:.1f}k — the rest happens\n"
f" whether it's Roth or not.\n"
f"• On a ${finals['Aggressive — $35k/yr post-PhD']/1e6:.1f}M trajectory: a rounding error.")
ax.text(0.03, 0.97, txt, transform=ax.transAxes, fontsize=9.5,
va="top", ha="left",
bbox=dict(boxstyle="round,pad=0.5", facecolor="#fef7e0", edgecolor="#f9ab00"))
# formatting
ax.set_title("Dane's Retirement Trajectory — today's dollars, 7% real return",
fontsize=14, fontweight="bold", pad=14)
ax.set_xlabel("Age", fontsize=11)
ax.set_ylabel("Portfolio value (today's $)", fontsize=11)
ax.yaxis.set_major_formatter(FuncFormatter(lambda v, _: f"${v/1e6:.1f}M"))
ax.set_xlim(START_AGE, 69.5)
ax.set_ylim(bottom=0)
ax.grid(True, alpha=0.25)
ax.legend(loc="center left", bbox_to_anchor=(0.03, 0.55), fontsize=10, framealpha=0.9)
fig.tight_layout()
out = "/Users/danesabo/Documents/Finances/retirement_projection.png"
fig.savefig(out, dpi=140, bbox_inches="tight")
# ----------------------------------------------------------------------------
# TEXT SUMMARY
# ----------------------------------------------------------------------------
print("=" * 64)
print("RETIREMENT PROJECTION (today's dollars, 7% real return)")
print("=" * 64)
print(f"Start: ${START_PORTFOLIO:,} at age {START_AGE}")
print(f"529->Roth pipeline: ${sum(PIPELINE.values()):,} over ages "
f"{min(PIPELINE)}-{max(PIPELINE)}")
print(f"Post-PhD saving starts at age {POST_PHD_START_AGE}\n")
print(f"{'Scenario':<32}{'Age 65':>12}{'Income @4%':>14}")
print("-" * 58)
for label, final in finals.items():
print(f"{label:<32}{'$'+format(final/1e6, '.2f')+'M':>12}"
f"{'$'+format(final*SWR/1e3, '.0f')+'k/yr':>14}")
print("-" * 58)
print(f"\nSummer 10% Roth 401k (~${SUMMER_401K:,} one-time at age 25):")
print(f" grows to ~${summer_gross:,.0f} by age 65 in ANY account")
print(f" Roth-vs-taxable tax benefit only: ~${roth_advantage:,.0f}")
print(f" (= {roth_advantage/finals['Aggressive — $35k/yr post-PhD']*100:.3f}% "
f"of the aggressive total — a rounding error)")
print(f"\nChart saved: {out}")

View File

@ -0,0 +1,86 @@
%PDF-1.4
%“Œ‹ž ReportLab Generated PDF document (opensource)
1 0 obj
<<
/F1 2 0 R /F2 3 0 R /F3 4 0 R /F4 5 0 R
>>
endobj
2 0 obj
<<
/BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font
>>
endobj
3 0 obj
<<
/BaseFont /Helvetica-Bold /Encoding /WinAnsiEncoding /Name /F2 /Subtype /Type1 /Type /Font
>>
endobj
4 0 obj
<<
/BaseFont /Helvetica-Oblique /Encoding /WinAnsiEncoding /Name /F3 /Subtype /Type1 /Type /Font
>>
endobj
5 0 obj
<<
/BaseFont /Symbol /Name /F4 /Subtype /Type1 /Type /Font
>>
endobj
6 0 obj
<<
/Contents 10 0 R /MediaBox [ 0 0 306 576 ] /Parent 9 0 R /Resources <<
/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]
>> /Rotate 0 /Trans <<
>>
/Type /Page
>>
endobj
7 0 obj
<<
/PageMode /UseNone /Pages 9 0 R /Type /Catalog
>>
endobj
8 0 obj
<<
/Author (Dane Sabo) /CreationDate (D:20260525192222-04'00') /Creator (\(unspecified\)) /Keywords () /ModDate (D:20260525192222-04'00') /Producer (ReportLab PDF Library - \(opensource\))
/Subject (\(unspecified\)) /Title (Sam's Bachelor Party - Settle Up) /Trapped /False
>>
endobj
9 0 obj
<<
/Count 1 /Kids [ 6 0 R ] /Type /Pages
>>
endobj
10 0 obj
<<
/Filter [ /ASCII85Decode /FlateDecode ] /Length 1214
>>
stream
Gatm;>Ar4d'Ro4HS-PjQ<Ue;YA%PN0dY'MNGI*::<@_A[1BB/;a0eVis1[M4JIl1a,"PQ"R</8>o?@RGUgLY,m=bOA0)U!Q!>BFE!&K7YHOi('i5Yh!QGbO<R"QEe5+oYL0Z"=?nG4qD%^\%)pTH`7g,B&V$f?-qH1X)lr9b/os*XZi$BqZWDt#IVidSK>pO@/^a92IC?a#)l;?oFGOCs#Leq!<N\q9LD/9Ne;AUth>@dp.*ja.VPKNmqKm!p4q42N1Rd&L[k[eet=5j)"Y^J"l0@2o>'Oo>DJ!0;'/@5crR6&]8fl0RsO$3iqZNEAj>E7$^M[-m'h/>H1Am9)6jpYgEB7JGcf6+C-q(*0)jiOqRkTMqS]h&RfWncVDmRsErl4:$4?6T"g&Y7-1G-`VH"q4gLL1dP18iq*PRkNUF5bPZ!;o/gAHpcQ56W9Af51AL&<!SiPXKW,<M)B+BJK9H)\&76V#)$mom>U+6j4>FQNJH1]AAA5O==kX)tbHq`-mKSD,<Wb&*B'03WUY;MmZtr6dEr2@8T(pDiR^W>q1\d,2A2m>F<cQ^?.^LM!\lps+*TX'L3@fAn'&RK$e"3^Gq7F`E5t&#FBP8EpXmUq-R[^n@Qb2<<19MNe_'t8\73A^%*,E\_\:^VK_-;b,.3VWOXX#G2V6;&)6!b8L7\Ro9<Gn?d@s2;1M2a^4$c,cf1<CX#-PI@)5Ka!E>$.,!["Cdegcu]c^Kfd5EM3u9Y6hnp5Y`,R@g%c(S1C^P@NOUTb!aU,-?9Maf`'*V?CJqn&Es3E7qK]<:RG/6E]PW1GT<e#NRd;g,7aV[3,HPdI1Jb=069#iRTe;7iXs?Zs%-5?JY?[.43,Y9Feq??AI@1WaP(-57f(!U531KOn9G]k>fgO8GDp0G?F?_Gf$0r'd52HIKoFqbGjFBBSiX$7F?$/1YdtK`#p7Z/\]T)t?7Ym_YEaXoCMe[(9sU)Pf4V40m7u>@4Q/1qo3SH;/9:*^@72fgD[o+EA)hS@._.WEZ1E(o6V['-q!:Nfa47Z>C0:n;#SJR%4,AU*cqA[;S/qH\0fmWs4G[8kUQ2=XH>?fiS+!rt[*$W_E?_#cP/tW/ib]C7UBB!AfPG(12S7>h=C]Om*B2u-1(%$F>J50HQ/,dT:![f/k%`+eCt"n5Lo<nkUS7*J7-bOr5E&P>NuG92V:u1=#m>(@>g$gNRN!sU"Ug8ODu~>endstream
endobj
xref
0 11
0000000000 65535 f
0000000061 00000 n
0000000122 00000 n
0000000229 00000 n
0000000341 00000 n
0000000456 00000 n
0000000533 00000 n
0000000727 00000 n
0000000795 00000 n
0000001090 00000 n
0000001149 00000 n
trailer
<<
/ID
[<3d7dcb593c2c9ba6fed463683a4107b4><3d7dcb593c2c9ba6fed463683a4107b4>]
% ReportLab generated PDF document -- digest (opensource)
/Info 8 0 R
/Root 7 0 R
/Size 11
>>
startxref
2455
%%EOF