Compare commits
4 Commits
cc21c48b52
...
d7c6521cf0
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d7c6521cf0 | ||
|
|
a37d8f4d2c | ||
|
|
e446c4097a | ||
|
|
26fb19ca9a |
199
fire_projection.py
Normal file
199
fire_projection.py
Normal file
@ -0,0 +1,199 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
FIRE / Coast-FIRE projection with a realistic rising-income career arc.
|
||||
|
||||
Key upgrade over retirement_projection.py: income is NOT flat. It follows a
|
||||
career arc, and savings is a PERCENTAGE of income — so both your nest egg AND
|
||||
your lifestyle scale as you earn more. This is what lets us show the real
|
||||
trade: a high savings rate hits FIRE faster BOTH because you save more AND
|
||||
because you need less (lower spending => lower 25x FIRE number).
|
||||
|
||||
All figures in TODAY'S DOLLARS (real returns).
|
||||
|
||||
/tmp/retire-venv/bin/python fire_projection.py
|
||||
"""
|
||||
|
||||
import matplotlib
|
||||
matplotlib.use("Agg")
|
||||
import matplotlib.pyplot as plt
|
||||
from matplotlib.ticker import FuncFormatter
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# ASSUMPTIONS
|
||||
# ----------------------------------------------------------------------------
|
||||
REAL_RETURN = 0.07
|
||||
SWR = 0.04 # 4% rule -> FIRE number = 25x annual spending
|
||||
START_AGE = 25
|
||||
END_AGE = 65
|
||||
START_PORTFOLIO = 48_000 # Roth IRA + Schwab Stocks today
|
||||
PIPELINE = {25: 7_000, 26: 7_000, 27: 7_000, 28: 3_900} # 529->Roth
|
||||
|
||||
STIPEND = 34_000 # grad years (age 25-26)
|
||||
START_SALARY = 130_000 # industry, starting age 27
|
||||
TAX_GRAD = 0.08 # effective all-in tax rate, grad years
|
||||
TAX_INDUSTRY = 0.30 # effective all-in tax rate, industry (fed+PA+FICA+local)
|
||||
SPENDING_FLOOR = 50_000 # Dane's rule: never live on less than $50k/yr (today's $)
|
||||
|
||||
def gross_income(age):
|
||||
"""Real (today's $) gross income by age: stipend -> industry w/ tapering raises."""
|
||||
if age <= 26:
|
||||
return STIPEND
|
||||
income = START_SALARY
|
||||
for a in range(27, age): # apply raises year over year up to `age`
|
||||
if a < 40: income *= 1.03 # 3% real ascent, early career
|
||||
elif a < 50: income *= 1.01 # 1% real, mid career
|
||||
else: income *= 1.005 # ~flat, late career
|
||||
return income
|
||||
|
||||
def tax_rate(age):
|
||||
return TAX_GRAD if age <= 26 else TAX_INDUSTRY
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# SAVINGS STRATEGIES — each returns SAVINGS DOLLARS given (age, gross, net).
|
||||
# The $50k spending floor is enforced: never save so much that you live on <$50k.
|
||||
# ----------------------------------------------------------------------------
|
||||
def hybrid_floor(age, gross, net):
|
||||
"""Front-load HARD ages 27-32 (live at the $50k floor, bank everything else),
|
||||
then deliberately loosen to ~20% of gross — the Corvette-and-hobbies years."""
|
||||
if age <= 26:
|
||||
return 0.0
|
||||
if age <= 32: # PHASE 1: live at floor, save the rest
|
||||
return max(0.0, net - SPENDING_FLOOR)
|
||||
target = gross * 0.20 # PHASE 2: loosen to 20% of gross
|
||||
return min(target, max(0.0, net - SPENDING_FLOOR))
|
||||
|
||||
def steady_aggressive(age, gross, net):
|
||||
if age <= 26:
|
||||
return 0.0
|
||||
return min(gross * 0.30, max(0.0, net - SPENDING_FLOOR))
|
||||
|
||||
def balanced(age, gross, net):
|
||||
if age <= 26:
|
||||
return 0.0
|
||||
return min(gross * 0.18, max(0.0, net - SPENDING_FLOOR))
|
||||
|
||||
STRATEGIES = {
|
||||
"Hybrid: hard 27-32 @ $50k floor, then loosen to 20%": (hybrid_floor, "#d93025"),
|
||||
"Steady aggressive (30%)": (steady_aggressive, "#188038"),
|
||||
"Balanced (18%)": (balanced, "#1a73e8"),
|
||||
}
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# SIMULATE
|
||||
# ----------------------------------------------------------------------------
|
||||
def simulate(save_fn):
|
||||
ages, portfolio, spending, fire_num = [], [], [], []
|
||||
bal = START_PORTFOLIO
|
||||
fire_age = None
|
||||
for age in range(START_AGE, END_AGE + 1):
|
||||
gross = gross_income(age)
|
||||
net = gross * (1 - tax_rate(age))
|
||||
own_savings = save_fn(age, gross, net)
|
||||
spend = net - own_savings # what you live on
|
||||
fnum = 25 * spend # FIRE target for THIS lifestyle
|
||||
|
||||
ages.append(age); portfolio.append(bal)
|
||||
spending.append(spend); fire_num.append(fnum)
|
||||
if fire_age is None and bal >= fnum and age >= 27:
|
||||
fire_age = age
|
||||
|
||||
contrib = own_savings + PIPELINE.get(age, 0)
|
||||
bal = bal * (1 + REAL_RETURN) + contrib
|
||||
return dict(ages=ages, portfolio=portfolio, spending=spending,
|
||||
fire_num=fire_num, fire_age=fire_age)
|
||||
|
||||
results = {label: simulate(fn) for label, (fn, _) in STRATEGIES.items()}
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# PLOT (2 panels: wealth+FIRE, then lifestyle)
|
||||
# ----------------------------------------------------------------------------
|
||||
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 10.5), height_ratios=[1.6, 1])
|
||||
|
||||
money_fmt = FuncFormatter(lambda v, _: f"${v/1e6:.1f}M" if v >= 1e6 else f"${v/1e3:.0f}k")
|
||||
|
||||
# --- Panel 1: portfolio vs FIRE number ---
|
||||
for label, (fn, color) in STRATEGIES.items():
|
||||
r = results[label]
|
||||
ax1.plot(r["ages"], r["portfolio"], color=color, linewidth=2.6, label=label)
|
||||
# FIRE number (dashed, same color, thin)
|
||||
ax1.plot(r["ages"], r["fire_num"], color=color, linewidth=1.1,
|
||||
linestyle=":", alpha=0.7)
|
||||
# mark FIRE achievement
|
||||
fa = r["fire_age"]
|
||||
if fa:
|
||||
idx = r["ages"].index(fa)
|
||||
ax1.scatter([fa], [r["portfolio"][idx]], color=color, s=90, zorder=5,
|
||||
edgecolor="white", linewidth=1.5)
|
||||
ax1.annotate(f"FI at {fa}", xy=(fa, r["portfolio"][idx]),
|
||||
xytext=(fa - 1.5, r["portfolio"][idx] + 0.35e6),
|
||||
fontsize=9.5, fontweight="bold", color=color)
|
||||
|
||||
ax1.axvline(27, color="#5f6368", linestyle="--", alpha=0.5, linewidth=1.2)
|
||||
ax1.annotate("PhD done / industry income (~2028)", xy=(27, 0),
|
||||
xytext=(27.3, 5.0e6), fontsize=9, color="#5f6368")
|
||||
# lean-FI floor: $50k lifestyle => $1.25M
|
||||
ax1.axhline(25 * SPENDING_FLOOR, color="#f9ab00", linestyle="-.", alpha=0.8, linewidth=1.4)
|
||||
ax1.annotate(f"Lean-FI floor: 25 x $50k = ${25*SPENDING_FLOOR/1e6:.2f}M",
|
||||
xy=(START_AGE, 25*SPENDING_FLOOR), xytext=(START_AGE+0.3, 25*SPENDING_FLOOR+0.18e6),
|
||||
fontsize=9, color="#b06000", fontweight="bold")
|
||||
ax1.set_title("FIRE trajectories — solid = portfolio, dotted = FIRE number (25x spend)",
|
||||
fontsize=13, fontweight="bold")
|
||||
ax1.set_ylabel("Today's $")
|
||||
ax1.yaxis.set_major_formatter(money_fmt)
|
||||
ax1.set_xlim(START_AGE, END_AGE)
|
||||
ax1.set_ylim(bottom=0)
|
||||
ax1.grid(True, alpha=0.25)
|
||||
ax1.legend(loc="upper left", fontsize=9.5)
|
||||
|
||||
# --- Panel 2: lifestyle (annual spending) ---
|
||||
for label, (fn, color) in STRATEGIES.items():
|
||||
r = results[label]
|
||||
ax2.plot(r["ages"], r["spending"], color=color, linewidth=2.4, label=label)
|
||||
ax2.set_title("What you actually live on each year (annual spending, today's $)",
|
||||
fontsize=13, fontweight="bold")
|
||||
ax2.set_xlabel("Age")
|
||||
ax2.set_ylabel("Annual spending")
|
||||
ax2.yaxis.set_major_formatter(money_fmt)
|
||||
ax2.set_xlim(START_AGE, END_AGE)
|
||||
ax2.set_ylim(bottom=0)
|
||||
ax2.grid(True, alpha=0.25)
|
||||
ax2.axhline(STIPEND*(1-TAX_GRAD), color="#9aa0a6", linestyle="--", alpha=0.6)
|
||||
ax2.annotate("~current grad take-home", xy=(50, STIPEND*(1-TAX_GRAD)),
|
||||
xytext=(50, STIPEND*(1-TAX_GRAD)+2500), fontsize=8.5, color="#9aa0a6")
|
||||
# the $50k floor — the line Dane won't cross
|
||||
ax2.axhline(SPENDING_FLOOR, color="#f9ab00", linestyle="-.", alpha=0.9, linewidth=1.6)
|
||||
ax2.annotate("$50k floor — won't live below this", xy=(40, SPENDING_FLOOR),
|
||||
xytext=(40, SPENDING_FLOOR+3000), fontsize=9, color="#b06000", fontweight="bold")
|
||||
ax2.legend(loc="upper left", fontsize=9.5)
|
||||
|
||||
fig.tight_layout()
|
||||
out = "/Users/danesabo/Documents/Finances/fire_projection.png"
|
||||
fig.savefig(out, dpi=140, bbox_inches="tight")
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# TEXT SUMMARY
|
||||
# ----------------------------------------------------------------------------
|
||||
print("=" * 72)
|
||||
print("FIRE PROJECTION (today's dollars, 7% real, rising-income career arc)")
|
||||
print("=" * 72)
|
||||
print(f"Income arc: ${STIPEND:,} stipend -> ${START_SALARY:,} at 27, "
|
||||
f"3%/1%/0.5% real raises\n")
|
||||
LEAN_FI = 25 * SPENDING_FLOOR # $1.25M -> permanently able to fall back to $50k
|
||||
for label in STRATEGIES:
|
||||
r = results[label]
|
||||
fa = r["fire_age"]
|
||||
# spending + portfolio at a few ages
|
||||
def at(age): return r["portfolio"][r["ages"].index(age)]
|
||||
def sp(age): return r["spending"][r["ages"].index(age)]
|
||||
# age at which portfolio first crosses the lean-FI safety net ($1.25M)
|
||||
lean_age = next((a for a, p in zip(r["ages"], r["portfolio"])
|
||||
if p >= LEAN_FI and a >= 27), None)
|
||||
print(label)
|
||||
print(f" $50k safety net funded (cross ${LEAN_FI/1e6:.2f}M): "
|
||||
f"{'age '+str(lean_age) if lean_age else 'not by 65'}")
|
||||
print(f" Full FIRE on actual lifestyle: {'age '+str(fa) if fa else 'not by 65'}")
|
||||
print(f" Lifestyle (annual spend) at 30/40/50: "
|
||||
f"${sp(30)/1e3:.0f}k / ${sp(40)/1e3:.0f}k / ${sp(50)/1e3:.0f}k")
|
||||
print(f" Portfolio at 50 / 65: ${at(50)/1e6:.2f}M / ${at(65)/1e6:.2f}M")
|
||||
print()
|
||||
print(f"Chart saved: {out}")
|
||||
2239
merchant_map.json
2239
merchant_map.json
File diff suppressed because it is too large
Load Diff
107
migration/README.md
Normal file
107
migration/README.md
Normal file
@ -0,0 +1,107 @@
|
||||
# Firefly rebuild runbook
|
||||
|
||||
One-time migration: wipe the CSV-era transactions and rebuild from
|
||||
FITID-stable QFX so every transaction has a permanent dedup key and a clean
|
||||
account taxonomy. Read this before running anything in this folder.
|
||||
|
||||
## Why a rebuild (not in-place cleanup)
|
||||
|
||||
Firefly history is young (everything ~Aug 2025+, ~950 txns, minimal manual
|
||||
data). Old CSV imports left ~343 fragmented junk expense accounts and no
|
||||
stable external_ids. A clean rebuild keyed on QFX `FITID` is a better
|
||||
foundation than reassigning junk in place. Decided 2026-05-17.
|
||||
|
||||
## Hard prerequisites (do not skip)
|
||||
|
||||
1. **Firefly DB backup.** Destructive, no undo. Do not run the wipe until a
|
||||
DB dump/snapshot exists.
|
||||
2. **Exports** (in `../EXPORTS/`, gitignored): Apple/PNC/Costco QFX, Aug 2025
|
||||
-> now, FITID on 100% of rows. Schwab/Coinbase/Cash (~35 txns) are
|
||||
CSV-only/manual, handled separately.
|
||||
|
||||
## Reconciliation (the trust gate)
|
||||
|
||||
Per account: `opening_balance = QFX_ledger - sum(all that account's lines)`.
|
||||
Classification (transfer vs expense) never changes an account's own balance,
|
||||
so `opening + sum == ledger` must hold to the cent before trusting the wipe.
|
||||
Verified: PNC opening $6,866.10, Apple -$4,498.79, Costco -$2,541.57 (all
|
||||
tie). `rebuild_dryrun.py` recomputes this; re-run after any change.
|
||||
|
||||
## Classification rules (PNC = the hub)
|
||||
|
||||
- **Transfers** -- ALWAYS owned by the PNC leg: PNC's posting date and PNC's
|
||||
FITID are authoritative, the card/brokerage counterpart line is paired by
|
||||
amount (+/- a few days) and dropped. Every transfer lives under PNC, one
|
||||
consistent date, never double-counted. Pairs: APPLECARD GSBANK -> Apple
|
||||
Credit Card; CITI AUTOPAY -> Costco Visa Card; SCHWAB MONEYLINK -> Schwab
|
||||
Stocks/Savings (disambiguate by amount); ATM WITHDRAWAL -> Cash; CARVANA
|
||||
PAYOUT -> Illiquid Assets; big ATM DEPOSIT -> Coverdell; CAPITAL ONE ->
|
||||
Capital One (closed). Codified in the skill's `references/transfers.md`.
|
||||
- **Income/expense**: Pitt salary -> Wages; Duquesne Light -> Utilities:
|
||||
Electric; Compeer -> Rent; etc.
|
||||
- **Don't Know**: Venmo/CashApp/Zelle ("poker"), unrecallable checks, unknown
|
||||
ATM deposits -> the `Don't Know` account, review later. Never guessed.
|
||||
- **Special accounts**: `Illiquid Assets` (cars; sale = transfer in),
|
||||
`Don't Know` (catch-all). See the skill's memory / taxonomy notes.
|
||||
|
||||
## Investment accounts
|
||||
|
||||
Do NOT transaction-import Schwab/Roth/Coverdell/Coinbase (noise, and assets
|
||||
!= currency). Model as monthly-valued: opening balance + external MoneyLink
|
||||
transfers (from the PNC side) + one monthly valuation adjustment booked to
|
||||
`Investment Appreciation` / `Investment: Interest`. Dane supplies the current
|
||||
value at import; delta = the adjustment. Savings<->Stocks journals are
|
||||
transfers.
|
||||
|
||||
## Execution order
|
||||
|
||||
1. `python rebuild_dryrun.py` -> confirm all accounts still reconcile.
|
||||
2. Build the full normalized dataset (PNC + Apple + Costco, transfers typed,
|
||||
payments paired/deduped, opening balances set).
|
||||
3. Drive review via the skill's browser workflow
|
||||
(`references/review-workflow.md`): `--review-html`, resolve the ~190 tail
|
||||
merchants in-situ (search-then-ask, <80% => ask), Export `decisions.json`.
|
||||
4. **Confirm DB backup exists.**
|
||||
5. Wipe transactions, prune empty junk expense accounts.
|
||||
6. `--decisions decisions.json --post`. Reconcile final balances against the
|
||||
derived figures above.
|
||||
|
||||
## Files here
|
||||
|
||||
- `rebuild_pnc.py` -- PNC classifier + reconciliation (read-only)
|
||||
- `rebuild_dryrun.py` -- consolidated per-account reconciliation (read-only)
|
||||
- `pnc_classified.json` -- PNC classification output
|
||||
- `merchant_clusters.{json,md}` -- cluster proposal (taxonomy bootstrap)
|
||||
- `mock_firefly.py` -- stdlib mock used for skill eval/testing
|
||||
- `*review_preview*.html` -- review-UI previews on real data
|
||||
|
||||
Nothing here writes to Firefly except the final `--post` in step 6.
|
||||
|
||||
## Lessons from the first rebuild (2026-05-20)
|
||||
|
||||
Captured here so a second rebuild doesn't re-discover them.
|
||||
|
||||
- **Orphan paired transfers**: the PNC->Apple payment from 2025-08-01 has no
|
||||
Apple-side line (Apple's QFX starts 08-02). Its effect was already in
|
||||
Apple's derived opening; posting the transfer ALSO crediting Apple
|
||||
double-counted by $3,218. Fix: `build_rebuild_dataset.py` now subtracts
|
||||
orphan transfer amounts from the destination card's opening. See
|
||||
`references/transfers.md` in the skill.
|
||||
- **Asset accounts require `account_role`** on POST /accounts. `defaultAsset`
|
||||
works universally.
|
||||
- **Budgets do not auto-create.** If wiping to scratch, recreate Needs /
|
||||
Wants / Savings via UI or POST before the import.
|
||||
- **Wipe via UI leaves stale revenue accounts / categories** (only
|
||||
transaction-referenced asset accounts go). Prune manually if you want a
|
||||
truly clean slate.
|
||||
- **Strip cached `account_id` from `merchant_map.json` before any rebuild.**
|
||||
Pre-wipe ids are invalid post-wipe. The skill no longer caches to the map
|
||||
(in-memory only) but old maps may still carry stale ids.
|
||||
- **Background Python with `nohup ... &` can lose stdout to buffering.** Use
|
||||
`python -u` for the import step. The first rebuild's log was empty because
|
||||
Python buffered everything and we mistook it for "ran but did nothing."
|
||||
- **`error_if_duplicate_hash` is now off** — Firefly's content-hash dedup
|
||||
was too eager (rejected legit-distinct rows with same date+amt+desc, like
|
||||
two parking sessions same garage). `external_id` precheck is the only dedup.
|
||||
- **Wipe by deleting transactions, not by deleting accounts.** Otherwise you
|
||||
end up with stale ids referenced by merchant_map cache.
|
||||
134
migration/build_rebuild_dataset.py
Normal file
134
migration/build_rebuild_dataset.py
Normal file
@ -0,0 +1,134 @@
|
||||
"""Build the full rebuild dataset from the 3 QFX (READ-ONLY).
|
||||
|
||||
Emits one normalized.json (the skill's schema) for ALL of PNC + Apple +
|
||||
Costco, with:
|
||||
- transfers OWNED BY THE PNC LEG (PNC date + FITID authoritative); the
|
||||
Apple PAYMENT lines and Costco positive AUTOPAY lines are the
|
||||
counterparts and are DROPPED (paired by amount, +/- 6 days).
|
||||
- PNC classified per the runbook (income / expense / Don't Know / special).
|
||||
- Apple/Costco: negative = withdrawal (merchant), positive = deposit
|
||||
(refund). merchant_map matching is left to firefly_import.py downstream.
|
||||
- per-account reconciliation: opening + sum(its kept lines) must == QFX
|
||||
ledger, else abort (no silent data loss).
|
||||
|
||||
Nothing is posted. Output feeds `firefly_import.py --emit-plan/--review-html`.
|
||||
"""
|
||||
import re, json, hashlib, sys, html
|
||||
from collections import Counter
|
||||
|
||||
D = "/Users/danesabo/Documents/Finances/EXPORTS/-MAY172026"
|
||||
SRC = {
|
||||
"PNC Checking": (f"{D}/PNC7552Aug012025-May152025.QFX", "pnc"),
|
||||
"Apple Credit Card": (f"{D}/Apple Card Transactions Aug 01 2025 - May 17 2026.qfx", "apple"),
|
||||
"Costco Visa Card": (f"{D}/CitiCostcoCard Aug012025-May172025.QFX","costco"),
|
||||
}
|
||||
|
||||
def parse(path):
|
||||
t = open(path, encoding="latin-1", errors="replace").read()
|
||||
m = re.search(r"<LEDGERBAL>.*?<BALAMT>([^<\r\n]*)", t, re.S | re.I)
|
||||
ledger = float(m.group(1))
|
||||
blocks = re.findall(r"<STMTTRN>(.*?)(?=<STMTTRN>|</BANKTRANLIST>)", t, re.S | re.I)
|
||||
def g(b, k):
|
||||
mm = re.search(rf"<{k}>([^<\r\n]*)", b, re.I)
|
||||
return mm.group(1).strip() if mm else ""
|
||||
out = []
|
||||
for b in blocks:
|
||||
out.append({"date": g(b, "DTPOSTED")[:8], "amt": float(g(b, "TRNAMT")),
|
||||
"ttype": g(b, "TRNTYPE").upper(),
|
||||
"desc": html.unescape((g(b, "NAME") + " " + g(b, "MEMO")).strip()),
|
||||
"fitid": g(b, "FITID")})
|
||||
return ledger, out
|
||||
|
||||
def iso(d): # YYYYMMDD -> YYYY-MM-DD
|
||||
return f"{d[:4]}-{d[4:6]}-{d[6:8]}" if len(d) >= 8 else d
|
||||
|
||||
# ---- PNC classification (runbook) ---------------------------------------
|
||||
def classify_pnc(desc, amt):
|
||||
d = desc.upper()
|
||||
if "APPLECARD GSBANK PAYMENT" in d: return ("transfer", "Apple Credit Card")
|
||||
if "CITI AUTOPAY PAYMENT" in d: return ("transfer", "Costco Visa Card")
|
||||
if "SCHWAB BROKERAGE MONEYLINK" in d:
|
||||
# amount disambiguation per the Schwab JSONs
|
||||
return ("transfer", "Schwab Savings" if abs(amt) in (5000.0, 3550.0)
|
||||
else "Schwab Stocks")
|
||||
if "ATM WITHDRAWAL" in d: return ("transfer", "Cash")
|
||||
if "CARVANA PAYOUT" in d: return ("transfer", "Illiquid Assets")
|
||||
if "ATM DEPOSIT" in d and abs(amt) > 10000: return ("transfer", "Coverdell")
|
||||
if "CAPITAL ONE TRANSFER" in d: return ("transfer", "Capital One")
|
||||
if "UNIV PITTSBURGH" in d and ("PAYROLL" in d or "SALARY" in d):
|
||||
return ("deposit", "Pitt Salary")
|
||||
if "INTEREST PAYMENT" in d: return ("deposit", "Interest Income")
|
||||
if "IRS TREAS 310" in d: return ("deposit", "IRS Refund")
|
||||
if "DUQUESNE LIGHT" in d: return ("withdrawal", "Duquesne Light")
|
||||
if "COMPEER" in d: return ("withdrawal", "Compeer Investments")
|
||||
if "PITT TUITION" in d: return ("withdrawal", "University of Pittsburgh")
|
||||
if any(k in d for k in ("VENMO CASHOUT","CASH APP","ZEL FROM","ATM SURCHARGE","YARDI")):
|
||||
return ("dontknow", "Don't Know")
|
||||
return ("raw", None) # leave to merchant_map / review downstream
|
||||
|
||||
records, recon, dropped = [], {}, Counter()
|
||||
for acct, (path, tag) in SRC.items():
|
||||
ledger, txns = parse(path)
|
||||
s = round(sum(t["amt"] for t in txns), 2)
|
||||
opening = round(ledger - s, 2)
|
||||
recon[acct] = {"ledger": ledger, "sum": s, "opening": opening,
|
||||
"ties": abs(opening + s - ledger) < 0.01}
|
||||
for t in txns:
|
||||
amt, d = t["amt"], t["desc"]
|
||||
ext = f"{tag}:{t['fitid'] or hashlib.sha1((iso(t['date'])+d+str(amt)).encode()).hexdigest()[:16]}"
|
||||
if acct == "Apple Credit Card" and t["ttype"] == "PAYMENT":
|
||||
dropped["apple_payment(paired->PNC)"] += 1; continue
|
||||
if acct == "Costco Visa Card" and amt > 0 and "AUTOPAY" in d.upper():
|
||||
dropped["costco_autopay(paired->PNC)"] += 1; continue
|
||||
rec = {"date": iso(t["date"]), "amount": f"{abs(amt):.2f}",
|
||||
"description": d, "asset_account": acct, "source_tag": tag,
|
||||
"source_txn_id": t["fitid"] or None, "currency_code": "USD"}
|
||||
if acct == "PNC Checking":
|
||||
kind, target = classify_pnc(d, amt)
|
||||
if kind == "transfer":
|
||||
rec["type"] = "transfer"
|
||||
if amt < 0: rec["destination_account"] = target
|
||||
else: rec["type"] = "transfer"; rec["asset_account"] = target; rec["destination_account"] = "PNC Checking"
|
||||
elif kind in ("deposit", "withdrawal"):
|
||||
rec["type"] = kind; rec["_canonical"] = target
|
||||
elif kind == "dontknow":
|
||||
rec["type"] = "withdrawal" if amt < 0 else "deposit"
|
||||
rec["_canonical"] = "Don't Know"
|
||||
else:
|
||||
rec["type"] = "withdrawal" if amt < 0 else "deposit"
|
||||
else:
|
||||
rec["type"] = "withdrawal" if amt < 0 else "deposit"
|
||||
records.append(rec)
|
||||
|
||||
# --- Orphan adjustment: a PNC->Apple/Costco payment whose date predates the
|
||||
# card QFX window has its card-side effect already baked into the card's
|
||||
# DERIVED opening (because opening = ledger - sum_kept_card_lines, and the
|
||||
# orphan never appeared on the card side). If we ALSO post the PNC->card
|
||||
# transfer in the rebuild, the card account gets credited twice. So subtract
|
||||
# orphan transfer amounts from the card opening.
|
||||
APPLE_WINDOW_START = "2025-08-02"
|
||||
COSTCO_WINDOW_START = "2025-08-02"
|
||||
for r in records:
|
||||
if r.get("type") == "transfer" and r["asset_account"] == "PNC Checking":
|
||||
dest = r.get("destination_account")
|
||||
if dest == "Apple Credit Card" and r["date"] < APPLE_WINDOW_START:
|
||||
recon["Apple Credit Card"]["opening"] -= float(r["amount"])
|
||||
recon["Apple Credit Card"]["opening"] = round(recon["Apple Credit Card"]["opening"], 2)
|
||||
elif dest == "Costco Visa Card" and r["date"] < COSTCO_WINDOW_START:
|
||||
recon["Costco Visa Card"]["opening"] -= float(r["amount"])
|
||||
recon["Costco Visa Card"]["opening"] = round(recon["Costco Visa Card"]["opening"], 2)
|
||||
|
||||
print("=== RECONCILIATION (must all tie) ===")
|
||||
ok = True
|
||||
for a, r in recon.items():
|
||||
flag = "OK" if r["ties"] else "*** MISMATCH ***"
|
||||
ok &= r["ties"]
|
||||
print(f" {a:20} ledger {r['ledger']:>11,.2f} Σ {r['sum']:>11,.2f} "
|
||||
f"opening {r['opening']:>11,.2f} {flag}")
|
||||
print("dropped (paired counterparts):", dict(dropped))
|
||||
print(f"normalized records: {len(records)}")
|
||||
if not ok:
|
||||
print("ABORT: a reconciliation does not tie.", file=sys.stderr); sys.exit(1)
|
||||
json.dump(records, open("/tmp/rebuild_normalized.json", "w"), indent=1)
|
||||
json.dump(recon, open("/tmp/rebuild_recon.json", "w"), indent=1)
|
||||
print("wrote /tmp/rebuild_normalized.json")
|
||||
299
migration/pnc_review_preview.html
Normal file
299
migration/pnc_review_preview.html
Normal file
File diff suppressed because one or more lines are too long
6229
migration/rebuild_clusters.json
Normal file
6229
migration/rebuild_clusters.json
Normal file
File diff suppressed because it is too large
Load Diff
192
migration/rebuild_clusters.md
Normal file
192
migration/rebuild_clusters.md
Normal file
@ -0,0 +1,192 @@
|
||||
# Merchant cluster proposal
|
||||
|
||||
- 386 clusters from 372 accounts + 1017 statement txns
|
||||
- **142** auto-proposable (>=0.80, clean canonical)
|
||||
- **244** NEED DANE (ambiguous / junky canonical / new merchant)
|
||||
|
||||
## NEEDS DANE (top 40 by volume)
|
||||
_For each: what is the real merchant? You can type a name; it becomes a permanent rule._
|
||||
|
||||
- **?** (conf 0.57, weight 75, 28 accts, 47 stmt) guess=`Amazon`
|
||||
- desc: `AMAZON MARK* B00SF6VV0410 TERRY`
|
||||
- desc: `AMAZON.COM*9R3UC0N93 440 TERRY A`
|
||||
- desc: `AMAZON.COM*N428X9Q71 440 TERRY A`
|
||||
- desc: `AMAZON MARK* B008Z3VV0410 TERRY`
|
||||
- desc: `AMAZON MARK* B03Y156K1410 TERRY`
|
||||
- desc: `AMAZON MARK* B204T9M31410 TERRY`
|
||||
- accts: Amazon, Amazon Mark* B008z3vv0, Amazon Mark* B00sf6vv0, Amazon Mark* B00sf6vv0410 Terry Avenue North Seattle 98109 Wa Usa (return), Amazon Mark* B03y156k1, Amazon Mark* B204t9m31
|
||||
- **?** (conf 0.4, weight 56, 0 accts, 56 stmt) guess=`University Of Pittsburgh|Pitt Parking Pay Stati127 North`
|
||||
- desc: `PITT PARKING PAY STATI127 NORTH`
|
||||
- **?** (conf 0.78, weight 37, 7 accts, 30 stmt) guess=`McDonald's`
|
||||
- desc: `MCDONALDS 1862 3708 FORBES AVE P`
|
||||
- desc: `MCDONALDS 1102 225 MOUNT LEBANON`
|
||||
- desc: `MCDONALD'S F1862 3708 FORBES AVE`
|
||||
- desc: `MCDONALD'S F1102 225 MT LEBANON`
|
||||
- desc: `MCDONALDS 5834 2518 W LIBERTY RD`
|
||||
- desc: `MCDONALD'S F27387 1412 B MAIN ST`
|
||||
- accts: McDonald's, Mcdonald's F1102, Mcdonald's F1862, Mcdonald's F27387, Mcdonalds 1862, Mcdonalds 33234
|
||||
- **?** (conf 1.0, weight 30, 0 accts, 30 stmt) guess=`Castle Shannon Shop`
|
||||
- desc: `CASTLE SHANNON SHOP' 799 CASTLE`
|
||||
- **?** (conf 0.71, weight 30, 2 accts, 28 stmt) guess=`Market District`
|
||||
- desc: `MARKET DISTRICT #0014 7000 OXFOR`
|
||||
- desc: `MARKET DISTRICT #0047 100 SETTLE`
|
||||
- accts: Market District, Market District Supermarket
|
||||
- **?** (conf 0.4, weight 18, 0 accts, 18 stmt) guess=`Apple Com Bill One Apple`
|
||||
- desc: `APPLE.COM/BILL ONE APPLE PARK WA`
|
||||
- desc: `APPLE.COM/US ONE APPLE PARK WAY`
|
||||
- desc: `APPLE.COM/BILL ONE APPLE PARK CU`
|
||||
- **?** (conf 0.47, weight 18, 8 accts, 10 stmt) guess=`Compeer`
|
||||
- desc: `COMPEER-COMP-CP WEB PMTS ACH WEB COMPEER-COMP-CP WEB PMTS ACH WEB C5R6`
|
||||
- desc: `COMPEER-COMP-CP WEB PMTS ACH WEB COMPEER-COMP-CP WEB PMTS ACH WEB MD64`
|
||||
- desc: `COMPEER-COMP-CP WEB PMTS ACH WEB COMPEER-COMP-CP WEB PMTS ACH WEB 3Y6Q`
|
||||
- desc: `COMPEER-COMP-CP WEB PMTS ACH WEB COMPEER-COMP-CP WEB PMTS ACH WEB R34S`
|
||||
- desc: `COMPEER-COMP-CP WEB PMTS ACH WEB COMPEER-COMP-CP WEB PMTS ACH WEB D9FZ`
|
||||
- desc: `COMPEER-COMP-CP WEB PMTS ACH WEB COMPEER-COMP-CP WEB PMTS ACH WEB F394`
|
||||
- accts: COMPEER-COMP-CP WEB PMTS ACH WEB 3Y6QDL, COMPEER-COMP-CP WEB PMTS ACH WEB 7Y648K, COMPEER-COMP-CP WEB PMTS ACH WEB D9FZ0L, COMPEER-COMP-CP WEB PMTS ACH WEB F394TK, COMPEER-COMP-CP WEB PMTS ACH WEB JS0NNK, COMPEER-COMP-CP WEB PMTS ACH WEB K7TDFK
|
||||
- **?** (conf 0.4, weight 18, 0 accts, 18 stmt) guess=`Sq *La Gourmandine Oak116 Meyran`
|
||||
- desc: `SQ *LA GOURMANDINE OAK116 MEYRAN`
|
||||
- **?** (conf 1.0, weight 17, 0 accts, 17 stmt) guess=`Kuhns Banksville`
|
||||
- desc: `KUHNS BANKSVILLE 3125 BANKSVILLE`
|
||||
- **?** (conf 0.75, weight 13, 4 accts, 9 stmt) guess=`Starbucks`
|
||||
- desc: `STARBUCKS STORE 27117 4022 FIFTH`
|
||||
- desc: `STARBUCKS 27117 4022 5TH AVE PIT`
|
||||
- desc: `STARBUCKS 8007827282 2401 UTAH A`
|
||||
- accts: Starbucks, Starbucks 27117, Starbucks 8007827282, Starbucks Store 27117
|
||||
- **?** (conf 0.4, weight 11, 0 accts, 11 stmt) guess=`Claude Ai Subscription548 Market`
|
||||
- desc: `CLAUDE.AI SUBSCRIPTION548 MARKET`
|
||||
- **?** (conf 0.61, weight 11, 2 accts, 9 stmt) guess=`Duquesne Light`
|
||||
- desc: `DUQUESNE LIGHT PAYMENT ACH DEBIT DUQUESNE LIGHT PAYMENT ACH DEBIT xxxx`
|
||||
- accts: DUQUESNE LIGHT PAYMENT ACH DEBIT xxxxxx5333, Duquesne Light
|
||||
- **?** (conf 0.4, weight 11, 1 accts, 10 stmt) guess=`T2`
|
||||
- desc: `T2* MT LEBANON PA 8900 KEYSTONE`
|
||||
- accts: T2* Mt Lebanon Pa
|
||||
- **?** (conf 1.0, weight 10, 1 accts, 9 stmt) guess=`Comcast / Xfinity`
|
||||
- desc: `COMCAST / XFINITY 15 SUMMIT PARK`
|
||||
- accts: Comcast / Xfinity
|
||||
- **?** (conf 1.0, weight 10, 0 accts, 10 stmt) guess=`Interest Payment Interest Payment`
|
||||
- desc: `INTEREST PAYMENT INTEREST PAYMENT`
|
||||
- **?** (conf 0.4, weight 10, 0 accts, 10 stmt) guess=`Upmc Student Insurance600 Grant`
|
||||
- desc: `UPMC STUDENT INSURANCE600 GRANT`
|
||||
- **?** (conf 0.4, weight 9, 0 accts, 9 stmt) guess=`Applecard Gsbank Payment Ach Web`
|
||||
- desc: `APPLECARD GSBANK PAYMENT ACH WEB APPLECARD GSBANK PAYMENT ACH WEB-RECU`
|
||||
- desc: `APPLECARD GSBANK PAYMENT ACH WEB APPLECARD GSBANK PAYMENT ACH WEB xxxx`
|
||||
- **?** (conf 0.4, weight 9, 0 accts, 9 stmt) guess=`Citi Autopay Payment Ach Web`
|
||||
- desc: `CITI AUTOPAY PAYMENT ACH WEB-REC CITI AUTOPAY PAYMENT ACH WEB-RECUR xx`
|
||||
- desc: `CITI AUTOPAY PAYMENT ACH WEB-REC CITI AUTOPAY PAYMENT ACH WEB-RECUR xx`
|
||||
- desc: `CITI AUTOPAY PAYMENT ACH WEB-REC CITI AUTOPAY PAYMENT ACH WEB-RECUR xx`
|
||||
- desc: `CITI AUTOPAY PAYMENT ACH WEB-REC CITI AUTOPAY PAYMENT ACH WEB-RECUR xx`
|
||||
- desc: `CITI AUTOPAY PAYMENT ACH WEB-REC CITI AUTOPAY PAYMENT ACH WEB-RECUR xx`
|
||||
- desc: `CITI AUTOPAY PAYMENT ACH WEB-REC CITI AUTOPAY PAYMENT ACH WEB-RECUR xx`
|
||||
- **?** (conf 1.0, weight 9, 0 accts, 9 stmt) guess=`Daily Cash Adjustment`
|
||||
- desc: `DAILY CASH ADJUSTMENT`
|
||||
- **?** (conf 1.0, weight 7, 0 accts, 7 stmt) guess=`Ebay O`
|
||||
- desc: `EBAY O*07-14287-66191 2535 NORTH`
|
||||
- desc: `EBAY O*07-14287-66190 2535 NORTH`
|
||||
- desc: `EBAY O*07-14287-66189 2535 NORTH`
|
||||
- desc: `EBAY O*07-14287-66188 2535 NORTH`
|
||||
- desc: `EBAY O*07-14287-66187 2535 NORTH`
|
||||
- desc: `EBAY O*07-14287-66186 2535 NORTH`
|
||||
- **?** (conf 0.9, weight 7, 1 accts, 6 stmt) guess=`Needle & Bean`
|
||||
- desc: `SQ *NEEDLE & BEAN 320 CASTLE`
|
||||
- accts: Needle & Bean
|
||||
- **?** (conf 0.4, weight 7, 0 accts, 7 stmt) guess=`University Of Pittsburgh|Univ Pittsburgh Salary Ach Credi`
|
||||
- desc: `UNIV PITTSBURGH SALARY ACH CREDI UNIV PITTSBURGH SALARY ACH CREDIT xx0`
|
||||
- **?** (conf 1.0, weight 7, 0 accts, 7 stmt) guess=`Youtube Tv`
|
||||
- desc: `GOOGLE *YOUTUBE TV 1600 AMPHITHE`
|
||||
- **?** (conf 0.62, weight 6, 2 accts, 4 stmt) guess=`Liberty Mutual`
|
||||
- desc: `LIBERTY MUTUAL 175 BERKELEY ST 8`
|
||||
- desc: `LIBERTY MUTUAL ATTN: COURTNEY MU`
|
||||
- accts: Liberty Mutual
|
||||
- **?** (conf 0.53, weight 6, 2 accts, 4 stmt) guess=`Openai`
|
||||
- desc: `OPENAI *CHATGPT SUBSCR548 MARKET`
|
||||
- desc: `OPENAI 1455 3RD STREET SAN FRANC`
|
||||
- accts: Openai, Openai *chatgpt Subscr
|
||||
- **?** (conf 0.4, weight 6, 0 accts, 6 stmt) guess=`Spo P&Amp Gspamelasdiner3703 F`
|
||||
- desc: `SPO*P&G'SPAMELA'SDINER3703 F`
|
||||
- **?** (conf 1.0, weight 6, 2 accts, 4 stmt) guess=`Svdp Castle Shannon`
|
||||
- desc: `SVDP CASTLE SHANNON 3423 LIBRARY`
|
||||
- accts: SVDP Castle Shannon, Svdp Castle Shannon
|
||||
- **?** (conf 0.4, weight 5, 0 accts, 5 stmt) guess=`Bp 9604786Ukani Broqps2900 Banks`
|
||||
- desc: `BP#9604786UKANI BROQPS2900 BANKS`
|
||||
- **?** (conf 0.4, weight 5, 2 accts, 3 stmt) guess=`Capital One Transfer Ach Web`
|
||||
- desc: `CAPITAL ONE TRANSFER ACH WEB RT0 CAPITAL ONE TRANSFER ACH WEB RT0D854F`
|
||||
- desc: `CAPITAL ONE TRANSFER ACH WEB PAY CAPITAL ONE TRANSFER ACH WEB PAYMENT `
|
||||
- desc: `CAPITAL ONE TRANSFER ACH WEB PAY CAPITAL ONE TRANSFER ACH WEB PAYMENT `
|
||||
- accts: CAPITAL ONE TRANSFER ACH WEB PAYMENT RT04E16C0EA8E68, CAPITAL ONE TRANSFER ACH WEB PAYMENT RT097FE1F911EB7
|
||||
- **?** (conf 0.69, weight 5, 1 accts, 4 stmt) guess=`Peacock`
|
||||
- desc: `PEACOCK 75AE1 PREMIUM 30 ROCKEFE`
|
||||
- desc: `PEACOCK 81D06 PREMIUM 30 ROCKEFE`
|
||||
- desc: `PEACOCK EF701 PREMIUM 30 ROCKEFE`
|
||||
- desc: `PEACOCK X6258 PREMIUM 30 ROCKEFE`
|
||||
- accts: Peacock
|
||||
- **?** (conf 0.4, weight 5, 0 accts, 5 stmt) guess=`Spiegel Freedman Psych105 Braunl`
|
||||
- desc: `SPIEGEL FREEDMAN PSYCH105 BRAUNL`
|
||||
- **?** (conf 0.4, weight 5, 0 accts, 5 stmt) guess=`University Of Pittsburgh|Rnk Pittsburgh P3610 Forbe`
|
||||
- desc: `TST*RNK PITTSBURGH - P3610 FORBE`
|
||||
- **?** (conf 1.0, weight 5, 1 accts, 4 stmt) guess=`Www Costco Com`
|
||||
- desc: `WWW COSTCO COM 800-955-2292`
|
||||
- accts: WWW COSTCO COM 800-955-2292 WA
|
||||
- **?** (conf 0.4, weight 4, 0 accts, 4 stmt) guess=`Dave And Andy S Ho207`
|
||||
- desc: `SQ *DAVE AND ANDY S HO207 ATWOOD`
|
||||
- **?** (conf 0.4, weight 4, 0 accts, 4 stmt) guess=`Enricos Tazza Do125 Lytton`
|
||||
- desc: `SQ *ENRICO'S TAZZA D'O125 LYTTON`
|
||||
- **?** (conf 0.4, weight 4, 0 accts, 4 stmt) guess=`Hofbrauhaus Pittsburgh2705 S Wat`
|
||||
- desc: `HOFBRAUHAUS PITTSBURGH2705 S WAT`
|
||||
- **?** (conf 0.65, weight 4, 1 accts, 3 stmt) guess=`Luis Benitez`
|
||||
- desc: `ZEL FROM Luis Benitez ZEL FROM Luis Benitez`
|
||||
- accts: Luis Benitez
|
||||
- **?** (conf 0.4, weight 4, 2 accts, 2 stmt) guess=`Pitt Tuition Pittpaymnt Ach Web`
|
||||
- desc: `PITT TUITION PITTPAYMNT ACH WEB PITT TUITION PITTPAYMNT ACH WEB OPUxxx`
|
||||
- desc: `PITT TUITION PITTPAYMNT ACH WEB PITT TUITION PITTPAYMNT ACH WEB OPUxxx`
|
||||
- accts: PITT TUITION PITTPAYMNT ACH WEB OPUxxxx0412, PITT TUITION PITTPAYMNT ACH WEB OPUxxxx9683
|
||||
- **?** (conf 0.4, weight 4, 0 accts, 4 stmt) guess=`Schwab Brokerage Moneylink Ach W`
|
||||
- desc: `SCHWAB BROKERAGE MONEYLINK ACH C SCHWAB BROKERAGE MONEYLINK ACH CREDIT`
|
||||
- desc: `SCHWAB BROKERAGE MONEYLINK ACH C SCHWAB BROKERAGE MONEYLINK ACH CREDIT`
|
||||
- desc: `SCHWAB BROKERAGE MONEYLINK ACH D SCHWAB BROKERAGE MONEYLINK ACH DEBIT `
|
||||
- desc: `SCHWAB BROKERAGE MONEYLINK ACH W SCHWAB BROKERAGE MONEYLINK ACH WEB-RE`
|
||||
- **?** (conf 1.0, weight 4, 1 accts, 3 stmt) guess=`Subaru Of South Hills`
|
||||
- desc: `SUBARU OF SOUTH HILLS 3260 WASHI`
|
||||
- accts: Subaru Of South Hills
|
||||
|
||||
## AUTO-PROPOSABLE (top 40 by volume)
|
||||
|
||||
- `GomobilePGH` (conf 1.0, weight 49, merges 4 accts) ids=[865, 642, 559, 781]
|
||||
- `Sheetz` (conf 1.0, weight 43, merges 7 accts) ids=[566, 744, 739, 567, 774, 794, 738]
|
||||
- `Autozone` (conf 1.0, weight 27, merges 6 accts) ids=[593, 812, 724, 714, 591, 806]
|
||||
- `Sunoco` (conf 1.0, weight 27, merges 6 accts) ids=[599, 638, 827, 767, 820, 715]
|
||||
- `Costco Whse` (conf 1.0, weight 22, merges 2 accts) ids=[842, 836]
|
||||
- `Harbor Freight Tools` (conf 0.95, weight 18, merges 3 accts) ids=[878, 569, 737]
|
||||
- `Petco` (conf 1.0, weight 15, merges 4 accts) ids=[546, 729, 797, 633]
|
||||
- `Chick-fil-A` (conf 1.0, weight 14, merges 5 accts) ids=[630, 810, 832, 712, 702]
|
||||
- `Costco Gas` (conf 1.0, weight 14, merges 2 accts) ids=[840, 837]
|
||||
- `D J*wsj` (conf 1.0, weight 10, merges 1 accts) ids=[553]
|
||||
- `Rockauto` (conf 0.94, weight 10, merges 1 accts) ids=[557]
|
||||
- `University Club` (conf 0.86, weight 10, merges 2 accts) ids=[867, 637]
|
||||
- `Chikn Oakland` (conf 1.0, weight 9, merges 1 accts) ids=[558]
|
||||
- `Raising Cane's` (conf 1.0, weight 9, merges 3 accts) ids=[868, 561, 828]
|
||||
- `Barnes & Noble` (conf 0.9, weight 7, merges 3 accts) ids=[603, 817, 658]
|
||||
- `Lowe's` (conf 1.0, weight 7, merges 1 accts) ids=[673]
|
||||
- `PMUSA` (conf 1.0, weight 7, merges 2 accts) ids=[885, 614]
|
||||
- `Home Depot` (conf 0.83, weight 6, merges 1 accts) ids=[722]
|
||||
- `REI` (conf 1.0, weight 6, merges 2 accts) ids=[684, 682]
|
||||
- `Target` (conf 1.0, weight 6, merges 2 accts) ids=[605, 731]
|
||||
- `The Saloon Of` (conf 0.82, weight 6, merges 2 accts) ids=[847, 801]
|
||||
- `Best Buy` (conf 1.0, weight 5, merges 2 accts) ids=[751, 740]
|
||||
- `Check` (conf 1.0, weight 5, merges 1 accts) ids=[524]
|
||||
- `Expedia` (conf 1.0, weight 5, merges 2 accts) ids=[717, 711]
|
||||
- `Michaels Stores` (conf 1.0, weight 5, merges 2 accts) ids=[587, 664]
|
||||
- `Rita's` (conf 1.0, weight 5, merges 1 accts) ids=[882]
|
||||
- `Als Corner` (conf 1.0, weight 4, merges 1 accts) ids=[762]
|
||||
- `CVS Pharmacy` (conf 1.0, weight 4, merges 2 accts) ids=[783, 816]
|
||||
- `Dunkin` (conf 1.0, weight 4, merges 2 accts) ids=[655, 846]
|
||||
- `Five Guys` (conf 1.0, weight 4, merges 1 accts) ids=[723]
|
||||
- `Redhawk Coffee` (conf 1.0, weight 4, merges 1 accts) ids=[721]
|
||||
- `Sportsmans Warehouse` (conf 1.0, weight 4, merges 1 accts) ids=[568]
|
||||
- `Taco Bell` (conf 1.0, weight 4, merges 2 accts) ids=[686, 691]
|
||||
- `TNT Pizza` (conf 1.0, weight 4, merges 1 accts) ids=[624]
|
||||
- `Act Cntyalleghenyprk` (conf 1.0, weight 3, merges 1 accts) ids=[776]
|
||||
- `Butterjoint` (conf 1.0, weight 3, merges 1 accts) ids=[608]
|
||||
- `Ctlp*csc Serviceworks` (conf 1.0, weight 3, merges 1 accts) ids=[650]
|
||||
- `Fiori's Pizzaria` (conf 0.91, weight 3, merges 1 accts) ids=[551]
|
||||
- `Get Go` (conf 1.0, weight 3, merges 1 accts) ids=[718]
|
||||
- `Giant Eagle` (conf 1.0, weight 3, merges 1 accts) ids=[592]
|
||||
385
migration/rebuild_review.html
Normal file
385
migration/rebuild_review.html
Normal file
File diff suppressed because one or more lines are too long
378
migration/review_preview_mixed.html
Normal file
378
migration/review_preview_mixed.html
Normal file
File diff suppressed because one or more lines are too long
385
migration/test_overlap_review.html
Normal file
385
migration/test_overlap_review.html
Normal file
File diff suppressed because one or more lines are too long
144
retirement_projection.py
Normal file
144
retirement_projection.py
Normal file
@ -0,0 +1,144 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Retirement trajectory projection for Dane.
|
||||
|
||||
Models portfolio growth from age 25 -> 65 under three post-PhD savings
|
||||
scenarios, plus shows how negligible the one-time summer 401k contribution is.
|
||||
|
||||
All figures are in TODAY'S DOLLARS (real returns), so they're directly
|
||||
comparable to current purchasing power. Edit the ASSUMPTIONS block and re-run.
|
||||
|
||||
/tmp/retire-venv/bin/python retirement_projection.py
|
||||
"""
|
||||
|
||||
import matplotlib
|
||||
matplotlib.use("Agg") # no display needed; save to file
|
||||
import matplotlib.pyplot as plt
|
||||
from matplotlib.ticker import FuncFormatter
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# ASSUMPTIONS (edit these and re-run)
|
||||
# ----------------------------------------------------------------------------
|
||||
REAL_RETURN = 0.07 # 7% real (inflation-adjusted) annual return
|
||||
SWR = 0.04 # 4% safe withdrawal rate
|
||||
START_AGE = 25
|
||||
END_AGE = 65
|
||||
START_PORTFOLIO = 48_000 # Roth IRA ($16k) + Schwab Stocks ($32k) today
|
||||
|
||||
# 529 -> Roth IRA pipeline (grandparent-funded). $24.9k total, $7k/yr cap.
|
||||
PIPELINE = {25: 7_000, 26: 7_000, 27: 7_000, 28: 3_900} # sums to $24,900
|
||||
|
||||
# Post-PhD own savings (401k + brokerage). PhD done end of 2027 => income 2028.
|
||||
POST_PHD_START_AGE = 27 # 2028 = age 27
|
||||
SCENARIOS = {
|
||||
"Floor — no post-PhD saving": 0,
|
||||
"Moderate — $20k/yr post-PhD": 20_000,
|
||||
"Aggressive — $35k/yr post-PhD": 35_000,
|
||||
}
|
||||
|
||||
# The contested decision: one summer of 10% Roth 401k = ~$2,240 at age 25.
|
||||
SUMMER_401K = 2_240
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# MODEL
|
||||
# ----------------------------------------------------------------------------
|
||||
def project(post_phd_rate, summer_401k=0):
|
||||
"""Year-by-year: grow existing balance, then add year-end contributions.
|
||||
Returns (ages, balances)."""
|
||||
ages, balances = [], []
|
||||
bal = START_PORTFOLIO + summer_401k # summer contribution lands at age 25
|
||||
for age in range(START_AGE, END_AGE + 1):
|
||||
ages.append(age)
|
||||
balances.append(bal)
|
||||
# contribution for THIS year (added at year-end, after growth)
|
||||
contrib = PIPELINE.get(age, 0)
|
||||
if age >= POST_PHD_START_AGE:
|
||||
contrib += post_phd_rate
|
||||
bal = bal * (1 + REAL_RETURN) + contrib
|
||||
return ages, balances
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# PLOT
|
||||
# ----------------------------------------------------------------------------
|
||||
fig, ax = plt.subplots(figsize=(12, 7.5))
|
||||
colors = {"Floor — no post-PhD saving": "#9aa0a6",
|
||||
"Moderate — $20k/yr post-PhD": "#1a73e8",
|
||||
"Aggressive — $35k/yr post-PhD": "#188038"}
|
||||
|
||||
finals = {}
|
||||
for label, rate in SCENARIOS.items():
|
||||
ages, bals = project(rate)
|
||||
finals[label] = bals[-1]
|
||||
ax.plot(ages, bals, label=label, linewidth=2.6, color=colors[label])
|
||||
# final value annotation
|
||||
final = bals[-1]
|
||||
income = final * SWR
|
||||
ax.annotate(f" ${final/1e6:.2f}M\n (${income/1e3:.0f}k/yr @ 4%)",
|
||||
xy=(65, final), xytext=(65.4, final),
|
||||
va="center", fontsize=10, fontweight="bold",
|
||||
color=colors[label])
|
||||
|
||||
# The summer 401k question, framed HONESTLY.
|
||||
# $2,240 grows to the same gross amount in ANY account. The real question is
|
||||
# Roth-401k vs taxable-brokerage (Dane's actual alternative — he'd save it, not
|
||||
# spend it). The only delta is the tax treatment of the growth.
|
||||
LTCG_RATE = 0.15
|
||||
summer_gross = SUMMER_401K * (1 + REAL_RETURN) ** (END_AGE - START_AGE) # grows either way
|
||||
summer_gain = summer_gross - SUMMER_401K
|
||||
summer_taxable_net = summer_gross - summer_gain * LTCG_RATE # taxable: pay LTCG on gain
|
||||
roth_advantage = summer_gross - summer_taxable_net # Roth keeps the tax
|
||||
|
||||
# vertical marker: PhD done / income starts
|
||||
ax.axvline(POST_PHD_START_AGE, color="#d93025", linestyle="--", alpha=0.55, linewidth=1.4)
|
||||
ax.annotate("PhD done /\nincome starts\n(~2028)", xy=(POST_PHD_START_AGE, ax.get_ylim()[1]*0.0),
|
||||
xytext=(POST_PHD_START_AGE + 0.3, finals["Aggressive — $35k/yr post-PhD"]*0.62),
|
||||
fontsize=9, color="#d93025")
|
||||
|
||||
# annotation box: the summer 401k decision, framed honestly
|
||||
txt = (f"The contested decision (summer 10% Roth 401k, ~${SUMMER_401K:,}):\n"
|
||||
f"• Grows to ~${summer_gross/1e3:.0f}k by 65 in ANY account\n"
|
||||
f"• vs taxable brokerage (your real alt.), the Roth tax\n"
|
||||
f" benefit is only ~${roth_advantage/1e3:.1f}k — the rest happens\n"
|
||||
f" whether it's Roth or not.\n"
|
||||
f"• On a ${finals['Aggressive — $35k/yr post-PhD']/1e6:.1f}M trajectory: a rounding error.")
|
||||
ax.text(0.03, 0.97, txt, transform=ax.transAxes, fontsize=9.5,
|
||||
va="top", ha="left",
|
||||
bbox=dict(boxstyle="round,pad=0.5", facecolor="#fef7e0", edgecolor="#f9ab00"))
|
||||
|
||||
# formatting
|
||||
ax.set_title("Dane's Retirement Trajectory — today's dollars, 7% real return",
|
||||
fontsize=14, fontweight="bold", pad=14)
|
||||
ax.set_xlabel("Age", fontsize=11)
|
||||
ax.set_ylabel("Portfolio value (today's $)", fontsize=11)
|
||||
ax.yaxis.set_major_formatter(FuncFormatter(lambda v, _: f"${v/1e6:.1f}M"))
|
||||
ax.set_xlim(START_AGE, 69.5)
|
||||
ax.set_ylim(bottom=0)
|
||||
ax.grid(True, alpha=0.25)
|
||||
ax.legend(loc="center left", bbox_to_anchor=(0.03, 0.55), fontsize=10, framealpha=0.9)
|
||||
|
||||
fig.tight_layout()
|
||||
out = "/Users/danesabo/Documents/Finances/retirement_projection.png"
|
||||
fig.savefig(out, dpi=140, bbox_inches="tight")
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# TEXT SUMMARY
|
||||
# ----------------------------------------------------------------------------
|
||||
print("=" * 64)
|
||||
print("RETIREMENT PROJECTION (today's dollars, 7% real return)")
|
||||
print("=" * 64)
|
||||
print(f"Start: ${START_PORTFOLIO:,} at age {START_AGE}")
|
||||
print(f"529->Roth pipeline: ${sum(PIPELINE.values()):,} over ages "
|
||||
f"{min(PIPELINE)}-{max(PIPELINE)}")
|
||||
print(f"Post-PhD saving starts at age {POST_PHD_START_AGE}\n")
|
||||
print(f"{'Scenario':<32}{'Age 65':>12}{'Income @4%':>14}")
|
||||
print("-" * 58)
|
||||
for label, final in finals.items():
|
||||
print(f"{label:<32}{'$'+format(final/1e6, '.2f')+'M':>12}"
|
||||
f"{'$'+format(final*SWR/1e3, '.0f')+'k/yr':>14}")
|
||||
print("-" * 58)
|
||||
print(f"\nSummer 10% Roth 401k (~${SUMMER_401K:,} one-time at age 25):")
|
||||
print(f" grows to ~${summer_gross:,.0f} by age 65 in ANY account")
|
||||
print(f" Roth-vs-taxable tax benefit only: ~${roth_advantage:,.0f}")
|
||||
print(f" (= {roth_advantage/finals['Aggressive — $35k/yr post-PhD']*100:.3f}% "
|
||||
f"of the aggressive total — a rounding error)")
|
||||
print(f"\nChart saved: {out}")
|
||||
86
sam-bachelor-party-invoice.pdf
Normal file
86
sam-bachelor-party-invoice.pdf
Normal file
@ -0,0 +1,86 @@
|
||||
%PDF-1.4
|
||||
%“Œ‹ž ReportLab Generated PDF document (opensource)
|
||||
1 0 obj
|
||||
<<
|
||||
/F1 2 0 R /F2 3 0 R /F3 4 0 R /F4 5 0 R
|
||||
>>
|
||||
endobj
|
||||
2 0 obj
|
||||
<<
|
||||
/BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font
|
||||
>>
|
||||
endobj
|
||||
3 0 obj
|
||||
<<
|
||||
/BaseFont /Helvetica-Bold /Encoding /WinAnsiEncoding /Name /F2 /Subtype /Type1 /Type /Font
|
||||
>>
|
||||
endobj
|
||||
4 0 obj
|
||||
<<
|
||||
/BaseFont /Helvetica-Oblique /Encoding /WinAnsiEncoding /Name /F3 /Subtype /Type1 /Type /Font
|
||||
>>
|
||||
endobj
|
||||
5 0 obj
|
||||
<<
|
||||
/BaseFont /Symbol /Name /F4 /Subtype /Type1 /Type /Font
|
||||
>>
|
||||
endobj
|
||||
6 0 obj
|
||||
<<
|
||||
/Contents 10 0 R /MediaBox [ 0 0 306 576 ] /Parent 9 0 R /Resources <<
|
||||
/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]
|
||||
>> /Rotate 0 /Trans <<
|
||||
|
||||
>>
|
||||
/Type /Page
|
||||
>>
|
||||
endobj
|
||||
7 0 obj
|
||||
<<
|
||||
/PageMode /UseNone /Pages 9 0 R /Type /Catalog
|
||||
>>
|
||||
endobj
|
||||
8 0 obj
|
||||
<<
|
||||
/Author (Dane Sabo) /CreationDate (D:20260525192222-04'00') /Creator (\(unspecified\)) /Keywords () /ModDate (D:20260525192222-04'00') /Producer (ReportLab PDF Library - \(opensource\))
|
||||
/Subject (\(unspecified\)) /Title (Sam's Bachelor Party - Settle Up) /Trapped /False
|
||||
>>
|
||||
endobj
|
||||
9 0 obj
|
||||
<<
|
||||
/Count 1 /Kids [ 6 0 R ] /Type /Pages
|
||||
>>
|
||||
endobj
|
||||
10 0 obj
|
||||
<<
|
||||
/Filter [ /ASCII85Decode /FlateDecode ] /Length 1214
|
||||
>>
|
||||
stream
|
||||
Gatm;>Ar4d'Ro4HS-PjQ<Ue;YA%PN0dY'MNGI*::<@_A[1BB/;a0eVis1[M4JIl1a,"PQ"R</8>o?@RGUgLY,m=bOA0)U!Q!>BFE!&K7YHOi('i5Yh!QGbO<R"QEe5+oYL0Z"=?nG4qD%^\%)pTH`7g,B&V$f?-qH1X)lr9b/os*XZi$BqZWDt#IVidSK>pO@/^a92IC?a#)l;?oFGOCs#Leq!<N\q9LD/9Ne;AUth>@dp.*ja.VPKNmqKm!p4q42N1Rd&L[k[eet=5j)"Y^J"l0@2o>'Oo>DJ!0;'/@5crR6&]8fl0RsO$3iqZNEAj>E7$^M[-m'h/>H1Am9)6jpYgEB7JGcf6+C-q(*0)jiOqRkTMqS]h&RfWncVDmRsErl4:$4?6T"g&Y7-1G-`VH"q4gLL1dP18iq*PRkNUF5bPZ!;o/gAHpcQ56W9Af51AL&<!SiPXKW,<M)B+BJK9H)\&76V#)$mom>U+6j4>FQNJH1]AAA5O==kX)tbHq`-mKSD,<Wb&*B'03WUY;MmZtr6dEr2@8T(pDiR^W>q1\d,2A2m>F<cQ^?.^LM!\lps+*TX'L3@fAn'&RK$e"3^Gq7F`E5t&#FBP8EpXmUq-R[^n@Qb2<<19MNe_'t8\73A^%*,E\_\:^VK_-;b,.3VWOXX#G2V6;&)6!b8L7\Ro9<Gn?d@s2;1M2a^4$c,cf1<CX#-PI@)5Ka!E>$.,!["Cdegcu]c^Kfd5EM3u9Y6hnp5Y`,R@g%c(S1C^P@NOUTb!aU,-?9Maf`'*V?CJqn&Es3E7qK]<:RG/6E]PW1GT<e#NRd;g,7aV[3,HPdI1Jb=069#iRTe;7iXs?Zs%-5?JY?[.43,Y9Feq??AI@1WaP(-57f(!U531KOn9G]k>fgO8GDp0G?F?_Gf$0r'd52HIKoFqbGjFBBSiX$7F?$/1YdtK`#p7Z/\]T)t?7Ym_YEaXoCMe[(9sU)Pf4V40m7u>@4Q/1qo3SH;/9:*^@72fgD[o+EA)hS@._.WEZ1E(o6V['-q!:Nfa47Z>C0:n;#SJR%4,AU*cqA[;S/qH\0fmWs4G[8kUQ2=XH>?fiS+!rt[*$W_E?_#cP/tW/ib]C7UBB!AfPG(12S7>h=C]Om*B2u-1(%$F>J50HQ/,dT:![f/k%`+eCt"n5Lo<nkUS7*J7-bOr5E&P>NuG92V:u1=#m>(@>g$gNRN!sU"Ug8ODu~>endstream
|
||||
endobj
|
||||
xref
|
||||
0 11
|
||||
0000000000 65535 f
|
||||
0000000061 00000 n
|
||||
0000000122 00000 n
|
||||
0000000229 00000 n
|
||||
0000000341 00000 n
|
||||
0000000456 00000 n
|
||||
0000000533 00000 n
|
||||
0000000727 00000 n
|
||||
0000000795 00000 n
|
||||
0000001090 00000 n
|
||||
0000001149 00000 n
|
||||
trailer
|
||||
<<
|
||||
/ID
|
||||
[<3d7dcb593c2c9ba6fed463683a4107b4><3d7dcb593c2c9ba6fed463683a4107b4>]
|
||||
% ReportLab generated PDF document -- digest (opensource)
|
||||
|
||||
/Info 8 0 R
|
||||
/Root 7 0 R
|
||||
/Size 11
|
||||
>>
|
||||
startxref
|
||||
2455
|
||||
%%EOF
|
||||
Loading…
x
Reference in New Issue
Block a user