Fold in merchant taxonomy (38 rules) + migration tooling (classifier, dry-run, cluster proposal)

This commit is contained in:
Dane Sabo 2026-05-17 13:17:12 -04:00
parent bb12a3e9be
commit cc21c48b52
7 changed files with 7302 additions and 7 deletions

View File

@ -1,11 +1,54 @@
{
"_comment": "Staging merchant map. First match wins; specific before generic. account_id filled at resolve. category=safe existing default only; review:true => account is firm but Dane sets category+budget (Needs/Wants/Savings) per transaction.",
"_special_nonmerchant": {
"ach_7552_payment": "'ACH DEPOSIT INTERNET TRANSFER FROM ACCOUNT ENDING IN 7552' on Apple Card => TRANSFER PNC Checking -> Apple Credit Card.",
"daily_cash_adjustment": "'DAILY CASH ADJUSTMENT' => Apple Card Daily Cash; the ADJUSTMENT is NEGATIVE cashback income (clawback on a return). Sign follows amount; revenue acct 'Apple Card Cashback'."
},
"rules": [
{
"match": "SHEETZ",
"account_name": "Sheetz",
"category": "Fuel",
"type": "withdrawal",
"account_id": 566
}
{"match": "AMZN|AMAZON", "regex": true, "account_name": "Amazon", "review": true, "type": "withdrawal"},
{"match": "EBAY", "account_name": "eBay", "review": true, "type": "withdrawal"},
{"match": "SVDP", "account_name": "St. Vincent de Paul", "review": true, "type": "withdrawal"},
{"match": "WINE AND SPIRITS", "account_name": "Fine Wine & Good Spirits", "review": true, "type": "withdrawal"},
{"match": "COMCAST|XFINITY", "regex": true, "account_name": "Comcast / Xfinity", "review": true, "type": "withdrawal"},
{"match": "LIBERTY MUTUAL", "account_name": "Liberty Mutual", "review": true, "type": "withdrawal"},
{"match": "JEGS", "account_name": "JEGS", "review": true, "type": "withdrawal"},
{"match": "APEX RACE PARTS", "account_name": "Apex Race Parts", "review": true, "type": "withdrawal"},
{"match": "ADVANCE AUTO", "account_name": "Advance Auto Parts", "review": true, "type": "withdrawal"},
{"match": "SUBARU OF SOUTH HILLS", "account_name": "Subaru of South Hills", "review": true, "type": "withdrawal"},
{"match": "ALLEGHENY ARMS", "account_name": "Allegheny Arms", "category": "Recreation: Firearms", "review": true, "type": "withdrawal"},
{"match": "WILLI S SKI|WILLIS SKI", "regex": true, "account_name": "Willi's Ski Shop", "category": "Recreation: Snowboarding", "review": true, "type": "withdrawal"},
{"match": "CASTLE SHANNON SHOP", "account_name": "Shop 'n Save", "category": "Groceries", "budget": "Needs", "type": "withdrawal"},
{"match": "MARKET DISTRICT|GIANT EAGLE", "regex": true, "account_name": "Giant Eagle", "category": "Groceries", "budget": "Needs", "type": "withdrawal"},
{"match": "KUHNS", "account_name": "Kuhn's Market", "category": "Groceries", "budget": "Needs", "type": "withdrawal"},
{"match": "COMPEER", "account_name": "Compeer Investments", "category": "Rent", "budget": "Needs", "type": "withdrawal"},
{"match": "UPMC STUDENT INSURANCE", "account_name": "UPMC Student Insurance", "category": "Medical", "budget": "Needs", "type": "withdrawal"},
{"match": "SPIEGEL FREEDMAN", "account_name": "Spiegel Freedman Psychological Associates", "category": "Medical", "budget": "Needs", "type": "withdrawal"},
{"match": "APPLE.COM", "account_name": "Apple", "category": "Subscriptions", "budget": "Wants", "type": "withdrawal"},
{"match": "CLAUDE.AI", "account_name": "Claude.ai", "category": "Subscriptions", "budget": "Wants", "type": "withdrawal"},
{"match": "OPENAI", "account_name": "OpenAI", "category": "Subscriptions", "budget": "Wants", "type": "withdrawal"},
{"match": "YOUTUBE TV", "account_name": "YouTube TV", "category": "Subscriptions", "budget": "Wants", "type": "withdrawal"},
{"match": "PEACOCK", "account_name": "Peacock", "category": "Subscriptions", "budget": "Wants", "type": "withdrawal"},
{"match": "MCDONALD", "account_name": "McDonald's", "category": "Restaurants", "budget": "Wants", "type": "withdrawal"},
{"match": "PAMELA.?.?S? ?DINER|PAMELA'SDINER", "regex": true, "account_name": "Pamela's Diner", "category": "Restaurants", "budget": "Wants", "type": "withdrawal"},
{"match": "PRIMANTI BROS", "account_name": "Primanti Bros", "category": "Restaurants", "budget": "Wants", "type": "withdrawal"},
{"match": "MINEO'S|MINEOS", "regex": true, "account_name": "Mineo's Pizza", "category": "Restaurants", "budget": "Wants", "type": "withdrawal"},
{"match": "DAVE AND ANDY|DAVE & ANDY", "regex": true, "account_name": "Dave & Andy's", "category": "Restaurants", "budget": "Wants", "type": "withdrawal"},
{"match": "STARBUCKS", "account_name": "Starbucks", "category": "Coffee", "budget": "Wants", "type": "withdrawal"},
{"match": "TAZZA D|ENRICO'S TAZZA", "regex": true, "account_name": "Tazza D'Oro", "category": "Coffee", "budget": "Wants", "type": "withdrawal"},
{"match": "LA GOURMANDINE", "account_name": "La Gourmandine", "category": "Coffee", "budget": "Wants", "type": "withdrawal"},
{"match": "NEEDLE & BEAN|NEEDLE & BEAN|NEEDLE AND BEAN", "regex": true, "account_name": "Needle & Bean", "category": "Coffee", "budget": "Wants", "type": "withdrawal"},
{"match": "SHEETZ", "account_name": "Sheetz", "category": "Auto: Fuel", "type": "withdrawal"},
{"match": "BP#9604786|UKANI BRO", "regex": true, "account_name": "BP", "category": "Auto: Fuel", "type": "withdrawal"},
{"match": "24 7 TRAVEL ST", "account_name": "24/7 Travel Store", "category": "Auto: Fuel", "type": "withdrawal"},
{"match": "PITT PARKING", "account_name": "Pitt Parking", "category": "Auto: Parking", "type": "withdrawal"},
{"match": "T2\\* MT LEBANON", "regex": true, "account_name": "Mt Lebanon Parking", "category": "Auto: Parking", "type": "withdrawal"},
{"match": "GLOSS\\* JAYME|XCEPTIONAL STYLE", "regex": true, "account_name": "Xceptional Style", "category": "Personal Care", "budget": "Wants", "type": "withdrawal"}
]
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,196 @@
# Merchant cluster proposal
- 363 clusters from 372 accounts + 706 statement txns
- **150** auto-proposable (>=0.80, clean canonical)
- **213** NEED DANE (ambiguous / junky canonical / new merchant)
## NEEDS DANE (top 40 by volume)
_For each: what is the real merchant? You can type a name; it becomes a permanent rule._
- **?** (conf 0.55, weight 68, 28 accts, 40 stmt) guess=`Amazon`
- desc: `AMAZON.COM*N428X9Q71 440 TERRY AVE N SEATTLE 98109 WA USA`
- desc: `AMAZON MARK* B008Z3VV0410 TERRY AVENUE NORTH SEATTLE 98109 WA USA`
- desc: `AMAZON MARK* B00SF6VV0410 TERRY AVENUE NORTH SEATTLE 98109 WA USA (RET`
- desc: `AMAZON MARK* B00SF6VV0410 TERRY AVENUE NORTH SEATTLE 98109 WA USA`
- desc: `AMAZON MARK* B03Y156K1410 TERRY AVENUE NORTH SEATTLE 98109 WA USA`
- desc: `AMAZON MARK* B204T9M31410 TERRY AVENUE NORTH SEATTLE 98109 WA USA`
- accts: Amazon, Amazon Mark* B008z3vv0, Amazon Mark* B00sf6vv0, Amazon Mark* B00sf6vv0410 Terry Avenue North Seattle 98109 Wa Usa (return), Amazon Mark* B03y156k1, Amazon Mark* B204t9m31
- **?** (conf 0.4, weight 42, 0 accts, 42 stmt) guess=`Sq *La Gourmandine Oak116 Meyran Ave Pittsburgh15213 Pa Usa|Pitt Parking Pay Stati127 North`
- desc: `PITT PARKING PAY STATI127 NORTH BELLEFIELD AVE PITTSBURGH 15260 PA USA`
- desc: `PITT PARKING PAY STATI127 NORTH BELLEFIELD AVE PITTSBURGH15260 PA USA`
- **?** (conf 0.78, weight 31, 7 accts, 24 stmt) guess=`McDonald's`
- desc: `MCDONALD'S F1862 3708 FORBES AVE PITTSBURGH 15213 PA USA`
- desc: `MCDONALDS 1862 3708 FORBES AVE PITTSBURGH 15213 PA USA`
- desc: `MCDONALDS 1102 225 MOUNT LEBANON BLVD CASTLE SHANNO15234 PA USA`
- desc: `MCDONALDS 5834 2518 W LIBERTY RD PITTSBURGH 15226 PA USA`
- desc: `MCDONALD'S F1102 225 MT LEBANON BLVD PITTSBURGH 15234 PA USA`
- desc: `MCDONALD'S F27387 1412 B MAIN STREET BURGETTSTOWN 15021 PA USA`
- accts: McDonald's, Mcdonald's F1102, Mcdonald's F1862, Mcdonald's F27387, Mcdonalds 1862, Mcdonalds 33234
- **?** (conf 1.0, weight 26, 0 accts, 26 stmt) guess=`Castle Shannon Shop`
- desc: `CASTLE SHANNON SHOP' 799 CASTLE SHANNON BLV PITTSBURGH 15234 PA USA`
- desc: `CASTLE SHANNON SHOP' 799 CASTLE SHANNON BLV PITTSBURGH15234 PA USA`
- **?** (conf 0.71, weight 22, 2 accts, 20 stmt) guess=`Market District`
- desc: `MARKET DISTRICT #0014 7000 OXFORD DR BETHEL PARK 15102 PA USA`
- desc: `MARKET DISTRICT #0014 7000 OXFORD DR BETHEL PAR15102 PA USA`
- desc: `MARKET DISTRICT #0047 100 SETTLERS RIDGE CENTER DR PITTSBURGH 15205 PA`
- accts: Market District, Market District Supermarket
- **?** (conf 1.0, weight 17, 0 accts, 17 stmt) guess=`Kuhns Banksville`
- desc: `KUHNS BANKSVILLE 3125 BANKSVILLE ROAD PITTSBURGH 15216 PA USA`
- desc: `KUHNS BANKSVILLE 3125 BANKSVILLE ROAD PITTSBURGH15216 PA USA`
- **?** (conf 0.4, weight 15, 0 accts, 15 stmt) guess=`Apple Com Bill One Apple`
- desc: `APPLE.COM/BILL ONE APPLE PARK WAY CUPERTINO 95014 CA USA`
- desc: `APPLE.COM/US ONE APPLE PARK WAY CUPERTINO 95014 CA USA`
- desc: `APPLE.COM/BILL ONE APPLE PARK CUPERTINO 95014 CA USA`
- **?** (conf 0.75, weight 11, 4 accts, 7 stmt) guess=`Starbucks`
- desc: `STARBUCKS STORE 27117 4022 FIFTH AVE PITTSBURGH 15213 PA USA`
- desc: `STARBUCKS 27117 4022 5TH AVE PITTSBURGH 15213 PA USA`
- desc: `STARBUCKS STORE 27117 4022 FIFTH AVE PITTSBURGH15213 PA USA`
- accts: Starbucks, Starbucks 27117, Starbucks 8007827282, Starbucks Store 27117
- **?** (conf 0.4, weight 10, 0 accts, 10 stmt) guess=`Claude Ai Subscription548 Market Street`
- desc: `CLAUDE.AI SUBSCRIPTION548 MARKET STREET PMB 90375 SAN FRANCISCO94104 C`
- desc: `CLAUDE.AI SUBSCRIPTION548 MARKET STREET PMB 90375 SAN FRANCI94104 CA U`
- **?** (conf 0.4, weight 10, 0 accts, 10 stmt) guess=`Sq *La Gourmandine Oak116 Meyran Ave Pittsburgh15213 Pa Usa|Sq *La Gourmandine Oak116 Meyran Ave Pittsburgh 15213 Pa Usa`
- desc: `SQ *LA GOURMANDINE OAK116 MEYRAN AVE PITTSBURGH 15213 PA USA`
- **?** (conf 1.0, weight 8, 1 accts, 7 stmt) guess=`Comcast / Xfinity`
- desc: `COMCAST / XFINITY 15 SUMMIT PARK DR PITTSBURGH 15275 PA USA`
- desc: `COMCAST / XFINITY 15 SUMMIT PARK DR PITTSBURGH15275 PA USA`
- desc: `COMCAST / XFINITY 15 SUMMIT PARK DR 800-266-2278 15275 PA USA`
- accts: Comcast / Xfinity
- **?** (conf 0.45, weight 8, 8 accts, 0 stmt) guess=`Compeer`
- accts: COMPEER-COMP-CP WEB PMTS ACH WEB 3Y6QDL, COMPEER-COMP-CP WEB PMTS ACH WEB 7Y648K, COMPEER-COMP-CP WEB PMTS ACH WEB D9FZ0L, COMPEER-COMP-CP WEB PMTS ACH WEB F394TK, COMPEER-COMP-CP WEB PMTS ACH WEB JS0NNK, COMPEER-COMP-CP WEB PMTS ACH WEB K7TDFK
- **?** (conf 0.4, weight 8, 1 accts, 7 stmt) guess=`T2`
- desc: `T2* MT LEBANON PA 8900 KEYSTONE XING, SUITE 700 INDIANAPOLIS 46240 IN `
- desc: `T2* MT LEBANON PA 8900 KEYSTONE XING, SUITE 700 INDIANAPOL46240 IN USA`
- accts: T2* Mt Lebanon Pa
- **?** (conf 0.4, weight 8, 0 accts, 8 stmt) guess=`Upmc Student Insurance600 Grant`
- desc: `UPMC STUDENT INSURANCE600 GRANT ST 41ST FL PITTSBURGH 15219 PA USA`
- desc: `UPMC STUDENT INSURANCE600 GRANT ST 41ST FL PITTSBURGH15219 PA USA`
- desc: `UPMC STUDENT INSURANCE600 GRANT ST 41ST FL 888-499-6885 15219 PA USA`
- **?** (conf 0.4, weight 7, 0 accts, 7 stmt) guess=`Ach Deposit Internet Transfer From`
- desc: `ACH DEPOSIT INTERNET TRANSFER FROM ACCOUNT ENDING IN 7552`
- **?** (conf 1.0, weight 7, 0 accts, 7 stmt) guess=`Ebay O`
- desc: `EBAY O*07-14287-66191 2535 NORTH FIRST STREET SAN JOSE 95125 CA USA`
- desc: `EBAY O*07-14287-66190 2535 NORTH FIRST STREET SAN JOSE 95125 CA USA`
- desc: `EBAY O*07-14287-66189 2535 NORTH FIRST STREET SAN JOSE 95125 CA USA`
- desc: `EBAY O*07-14287-66188 2535 NORTH FIRST STREET SAN JOSE 95125 CA USA`
- desc: `EBAY O*07-14287-66187 2535 NORTH FIRST STREET SAN JOSE 95125 CA USA`
- desc: `EBAY O*07-14287-66186 2535 NORTH FIRST STREET SAN JOSE 95125 CA USA`
- **?** (conf 0.9, weight 7, 1 accts, 6 stmt) guess=`Needle & Bean`
- desc: `SQ *NEEDLE & BEAN 320 CASTLE SHANNON BLVD PITTSBURGH15234 PA USA`
- desc: `SQ *NEEDLE & BEAN 320 CASTLE SHANNON BLVD PITTSBURGH 15234 PA USA`
- accts: Needle & Bean
- **?** (conf 0.4, weight 7, 0 accts, 7 stmt) guess=`Sq *La Gourmandine Oak116 Meyran Ave Pittsburgh15213 Pa Usa|Sq *La Gourmandine Oak116 Meyran Ave Pittsburgh15213 Pa Usa`
- desc: `SQ *LA GOURMANDINE OAK116 MEYRAN AVE PITTSBURGH15213 PA USA`
- **?** (conf 1.0, weight 7, 0 accts, 7 stmt) guess=`Youtube Tv`
- desc: `GOOGLE *YOUTUBE TV 1600 AMPHITHEATRE PKWY MOUNTAIN VIEW94043 CA USA`
- desc: `GOOGLE *YOUTUBE TV 1600 AMPHITHEATRE PKWY MOUNTAIN V94043 CA USA`
- **?** (conf 1.0, weight 6, 0 accts, 6 stmt) guess=`Daily Cash Adjustment`
- desc: `DAILY CASH ADJUSTMENT`
- **?** (conf 0.4, weight 6, 0 accts, 6 stmt) guess=`Spo P&Amp Gspamelasdiner3703 Forbes`
- desc: `SPO*P&G'SPAMELA'SDINER3703 FORBES AVE PITTSBURGH 15213 PA USA`
- **?** (conf 0.4, weight 5, 0 accts, 5 stmt) guess=`Bp 9604786Ukani Broqps2900 Banksville`
- desc: `BP#9604786UKANI BROQPS2900 BANKSVILLE RD PITTSBURGH 15216 PA USA`
- desc: `BP#9604786UKANI BROQPS2900 BANKSVILLE RD PITTSBURGH15216 PA USA`
- **?** (conf 0.69, weight 5, 1 accts, 4 stmt) guess=`Peacock`
- desc: `PEACOCK 75AE1 PREMIUM 30 ROCKEFELLER PLAZA NEW YORK 10112 NY USA`
- desc: `PEACOCK 81D06 PREMIUM 30 ROCKEFELLER PLAZA NEW YORK 10112 NY USA`
- desc: `PEACOCK EF701 PREMIUM 30 ROCKEFELLER PLAZA NEW YORK 10112 NY USA`
- desc: `PEACOCK X6258 PREMIUM 30 ROCKEFELLER PLAZA NEW YORK 10112 NY USA`
- accts: Peacock
- **?** (conf 0.4, weight 4, 0 accts, 4 stmt) guess=`Enricos Tazza Do125 Lytton`
- desc: `SQ *ENRICO'S TAZZA D'O125 LYTTON AVE PITTSBURGH 15213 PA USA`
- **?** (conf 0.57, weight 4, 2 accts, 2 stmt) guess=`Liberty Mutual`
- desc: `LIBERTY MUTUAL ATTN: COURTNEY MURPHY 225 BORT PORTSMOUTH 03801 NH USA `
- desc: `LIBERTY MUTUAL ATTN: COURTNEY MURPHY 225 BORT PORTSMOUTH 03801 NH USA`
- accts: Liberty Mutual
- **?** (conf 0.48, weight 4, 2 accts, 2 stmt) guess=`Openai`
- desc: `OPENAI 1455 3RD STREET SAN FRANCISCO94158 CA USA`
- desc: `OPENAI *CHATGPT SUBSCR548 MARKET STREET PMB 97273 SAN FRANCISCO94104-5`
- accts: Openai, Openai *chatgpt Subscr
- **?** (conf 0.4, weight 4, 0 accts, 4 stmt) guess=`Sq *La Gourmandine Oak116 Meyran Ave Pittsburgh15213 Pa Usa|Hemingway S Cafe`
- desc: `HEMINGWAY S CAFE 3911 FORBES AVE PITTSBURGH 15213 PA USA`
- **?** (conf 1.0, weight 4, 1 accts, 3 stmt) guess=`Subaru Of South Hills`
- desc: `SUBARU OF SOUTH HILLS 3260 WASHINGTON RD MCMURRAY 15317 PA USA`
- accts: Subaru Of South Hills
- **?** (conf 1.0, weight 4, 2 accts, 2 stmt) guess=`Svdp Castle Shannon`
- desc: `SVDP CASTLE SHANNON 3423 LIBRARY ROAD PITTSBURGH15234 PA USA`
- desc: `SVDP CASTLE SHANNON 3423 LIBRARY ROAD PITTSBURGH 15234 PA USA`
- accts: SVDP Castle Shannon, Svdp Castle Shannon
- **?** (conf 0.4, weight 3, 1 accts, 2 stmt) guess=`24 7 Travel St 2710 Commerce Rd Goodland 67735 Ks Usa`
- desc: `24 7 TRAVEL ST 1415 S FOSSIL ST RUSSELL 67665 KS USA`
- desc: `24 7 TRAVEL ST 2710 COMMERCE RD GOODLAND 67735 KS USA`
- accts: 24 7 Travel St
- **?** (conf 0.58, weight 3, 1 accts, 2 stmt) guess=`Allegheny Arms`
- desc: `ALLEGHENY ARMS AND GUN4603 LIBRARY RD BETHEL PARK 15102 PA USA`
- accts: Allegheny Arms
- **?** (conf 0.4, weight 3, 0 accts, 3 stmt) guess=`Dave And Andy S Ho207`
- desc: `SQ *DAVE AND ANDY S HO207 ATWOOD ST PITTSBURGH 15213 PA USA`
- desc: `SQ *DAVE AND ANDY S HO207 ATWOOD ST PITTSBURGH15213 PA USA`
- **?** (conf 1.0, weight 3, 1 accts, 2 stmt) guess=`Gloss* Jayme @ Xcepti.`
- desc: `GLOSS* JAYME @ XCEPTI.1901 MARMADUKE STREET XCEPTIONAL STYLE LLC PITTS`
- desc: `GLOSS* JAYME @ XCEPTI.1901 MARMADUKE STREET XCEPTIONAL STYLE LLC PITTS`
- accts: Gloss* Jayme @ Xcepti.
- **?** (conf 1.0, weight 3, 0 accts, 3 stmt) guess=`Jegs Com`
- desc: `JEGS.COM 101 JEGS BOULEVARD DELAWARE 43015 OH USA`
- **?** (conf 0.46, weight 3, 1 accts, 2 stmt) guess=`Primanti Bros`
- desc: `PRIMANTI BROS 3803 FORBES AVE. PITTSBURGH 15213 PA USA`
- desc: `PRIMANTI BROS ROBINSON4501 STEUBENVILLE PIKE PITTSBURGH 15205 PA USA`
- accts: Primanti Bros
- **?** (conf 0.4, weight 3, 0 accts, 3 stmt) guess=`Spiegel Freedman Psych105 Braunlich Drive`
- desc: `SPIEGEL FREEDMAN PSYCH105 BRAUNLICH DRIVE, STE 210 PITTSBURGH 15237 PA`
- **?** (conf 1.0, weight 3, 0 accts, 3 stmt) guess=`Spo Mineospizzahouse`
- desc: `SPO*MINEO'SPIZZAHOUSE-713 WASHINGTON RD MT LEBANON15228 PA USA`
- desc: `SPO*MINEO'SPIZZAHOUSE-2128 MURRAY AVE PITTSBURGH 15217 PA USA`
- desc: `SPO*MINEO'SPIZZAHOUSE-713 WASHINGTON RD MT LEBANON 15228 PA USA`
- **?** (conf 0.4, weight 3, 0 accts, 3 stmt) guess=`Sq *La Gourmandine Oak116 Meyran Ave Pittsburgh15213 Pa Usa|Pam Pittsburgh Parking325 Daniel Zenker`
- desc: `PAM PITTSBURGH PARKING325 DANIEL ZENKER DR STE 4 HORSEHEADS 14845 NY U`
- desc: `PAM PITTSBURGH PARKING325 DANIEL ZENKER DR STE 4 HORSEHEADS14845 NY US`
- **?** (conf 0.4, weight 3, 0 accts, 3 stmt) guess=`Sq *La Gourmandine Oak116 Meyran Ave Pittsburgh15213 Pa Usa|Rnk Pittsburgh P3610 Forbes`
- desc: `TST*RNK PITTSBURGH - P3610 FORBES AVE PITTSBURGH 15213 PA USA`
- desc: `TST*RNK PITTSBURGH - P3610 FORBES AVE PITTSBURGH15213 PA USA`
- **?** (conf 0.4, weight 3, 1 accts, 2 stmt) guess=`Willi S Ski And Snowb3738`
- desc: `WILLI S SKI AND SNOWB3738 LIBRARY ROAD PITTSBURGH 15234 PA USA`
- accts: Willi S Ski And Snowb
## AUTO-PROPOSABLE (top 40 by volume)
- `GomobilePGH` (conf 1.0, weight 47, merges 4 accts) ids=[865, 642, 559, 781]
- `Sheetz` (conf 1.0, weight 31, merges 7 accts) ids=[566, 744, 739, 567, 774, 794, 738]
- `Sunoco` (conf 1.0, weight 19, merges 6 accts) ids=[599, 638, 827, 767, 820, 715]
- `Autozone` (conf 1.0, weight 17, merges 6 accts) ids=[593, 812, 724, 714, 591, 806]
- `Harbor Freight Tools` (conf 0.95, weight 14, merges 3 accts) ids=[878, 569, 737]
- `Chick-fil-A` (conf 1.0, weight 12, merges 5 accts) ids=[630, 810, 832, 712, 702]
- `Petco` (conf 1.0, weight 12, merges 4 accts) ids=[546, 729, 797, 633]
- `D J*wsj` (conf 1.0, weight 8, merges 1 accts) ids=[553]
- `Raising Cane's` (conf 1.0, weight 8, merges 3 accts) ids=[868, 561, 828]
- `Chikn Oakland` (conf 1.0, weight 7, merges 1 accts) ids=[558]
- `Rockauto` (conf 0.94, weight 7, merges 1 accts) ids=[557]
- `University Club` (conf 0.86, weight 7, merges 2 accts) ids=[867, 637]
- `Barnes & Noble` (conf 0.9, weight 6, merges 3 accts) ids=[603, 817, 658]
- `PMUSA` (conf 1.0, weight 6, merges 2 accts) ids=[885, 614]
- `REI` (conf 1.0, weight 6, merges 2 accts) ids=[684, 682]
- `Target` (conf 1.0, weight 6, merges 2 accts) ids=[605, 731]
- `Best Buy` (conf 1.0, weight 5, merges 2 accts) ids=[751, 740]
- `Expedia` (conf 1.0, weight 5, merges 2 accts) ids=[717, 711]
- `Home Depot` (conf 0.83, weight 5, merges 1 accts) ids=[722]
- `Michaels Stores` (conf 1.0, weight 5, merges 2 accts) ids=[587, 664]
- `Rita's` (conf 1.0, weight 5, merges 1 accts) ids=[882]
- `Sportsmans Warehouse` (conf 1.0, weight 4, merges 1 accts) ids=[568]
- `Taco Bell` (conf 1.0, weight 4, merges 2 accts) ids=[686, 691]
- `TNT Pizza` (conf 1.0, weight 4, merges 1 accts) ids=[624]
- `Act Cntyalleghenyprk` (conf 1.0, weight 3, merges 1 accts) ids=[776]
- `Butterjoint` (conf 1.0, weight 3, merges 1 accts) ids=[608]
- `Ctlp*csc Serviceworks` (conf 1.0, weight 3, merges 1 accts) ids=[650]
- `CVS Pharmacy` (conf 1.0, weight 3, merges 2 accts) ids=[783, 816]
- `Dunkin` (conf 1.0, weight 3, merges 2 accts) ids=[655, 846]
- `Fiori's Pizzaria` (conf 0.91, weight 3, merges 1 accts) ids=[551]
- `Five Guys` (conf 1.0, weight 3, merges 1 accts) ids=[723]
- `Giant Eagle` (conf 1.0, weight 3, merges 1 accts) ids=[592]
- `Hemingway's Cafe` (conf 1.0, weight 3, merges 1 accts) ids=[560]
- `Hinge` (conf 1.0, weight 3, merges 1 accts) ids=[623]
- `J.Crew Factory` (conf 1.0, weight 3, merges 1 accts) ids=[617]
- `Lowe's` (conf 1.0, weight 3, merges 1 accts) ids=[673]
- `ParkWhiz` (conf 1.0, weight 3, merges 1 accts) ids=[670]
- `Redhawk Coffee` (conf 1.0, weight 3, merges 1 accts) ids=[721]
- `Rei Com` (conf 1.0, weight 3, merges 1 accts) ids=[687]
- `T-Gateway Center` (conf 0.9, weight 3, merges 1 accts) ids=[772]

133
migration/mock_firefly.py Normal file
View File

@ -0,0 +1,133 @@
"""Stateful mock Firefly III API for skill evals only. Stdlib http.server.
Persists posted transactions to a JSON file so a SECOND import of the same
statement correctly reports duplicates (the dedup eval depends on this).
Pre-seeded expense/revenue accounts let us verify the skill consolidates onto
an EXISTING account ("Sheetz") instead of auto-creating "SHEETZ #432".
Run: python mock_firefly.py <port> <state_file>
"""
import json
import sys
import urllib.parse
from http.server import BaseHTTPRequestHandler, HTTPServer
STATE_FILE = "mock_state.json"
SEED_ACCOUNTS = [
{"id": "10", "name": "Sheetz", "type": "Expense account"},
{"id": "11", "name": "Amazon", "type": "Expense account"},
{"id": "12", "name": "Costco", "type": "Expense account"},
{"id": "13", "name": "Local Cafe", "type": "Expense account"},
{"id": "20", "name": "Employer Payroll", "type": "Revenue account"},
]
def load_state():
try:
with open(STATE_FILE) as f:
return json.load(f)
except FileNotFoundError:
return {"txns": [], "next_id": 1000}
def save_state(s):
with open(STATE_FILE, "w") as f:
json.dump(s, f)
class H(BaseHTTPRequestHandler):
def log_message(self, *a):
pass
def _send(self, code, obj, ct="application/json"):
body = json.dumps(obj).encode()
self.send_response(code)
self.send_header("Content-Type", ct)
self.send_header("Content-Length", str(len(body)))
self.end_headers()
self.wfile.write(body)
def do_GET(self):
u = urllib.parse.urlparse(self.path)
q = urllib.parse.parse_qs(u.query)
path = u.path.replace("/api/v1/", "")
if path == "about":
return self._send(200, {"data": {"version": "6.1.0-mock"}})
if path == "search/transactions":
query = q.get("query", [""])[0]
ext = ""
if "external_id:" in query:
ext = query.split('external_id:"')[1].rstrip('"')
st = load_state()
hits = [t for t in st["txns"] if t["external_id"] == ext]
return self._send(
200,
{"data": [{"id": str(h["id"])} for h in hits],
"meta": {"pagination": {"total_pages": 1}}},
ct="application/vnd.api+json",
)
if path == "autocomplete/accounts":
query = q.get("query", [""])[0].lower()
types = q.get("types", [""])[0]
res = [
a for a in SEED_ACCOUNTS
if (query in a["name"].lower() or not query)
and (a["type"] in types if types else True)
]
return self._send(200, res)
if path == "accounts":
t = q.get("type", ["all"])[0]
tmap = {"expense": "Expense account", "revenue": "Revenue account"}
res = [
{"id": a["id"], "attributes": {"name": a["name"]}}
for a in SEED_ACCOUNTS
if a["type"] == tmap.get(t)
]
return self._send(
200,
{"data": res, "meta": {"pagination": {"total_pages": 1}}},
ct="application/vnd.api+json",
)
self._send(404, {"message": f"no mock for GET {path}"})
def do_POST(self):
u = urllib.parse.urlparse(self.path)
path = u.path.replace("/api/v1/", "")
n = int(self.headers.get("Content-Length", 0))
body = json.loads(self.rfile.read(n) or "{}")
if path == "transactions":
split = body["transactions"][0]
ext = split.get("external_id", "")
st = load_state()
if body.get("error_if_duplicate_hash") and any(
t["external_id"] == ext for t in st["txns"]
):
return self._send(
422,
{"message": "Duplicate transaction.",
"errors": {"transactions.0": ["Duplicate of existing."]}},
)
tid = st["next_id"]
st["next_id"] += 1
st["txns"].append({"id": tid, "external_id": ext, "split": split})
save_state(st)
return self._send(200, {"data": {"id": str(tid),
"attributes": split}})
self._send(404, {"message": f"no mock for POST {path}"})
if __name__ == "__main__":
port = int(sys.argv[1]) if len(sys.argv) > 1 else 8088
if len(sys.argv) > 2:
STATE_FILE = sys.argv[2]
HTTPServer(("127.0.0.1", port), H).serve_forever()

View File

@ -0,0 +1,902 @@
[
{
"date": "20260515",
"amt": -78.41,
"desc": "CITI AUTOPAY PAYMENT ACH WEB-REC CITI AUTOPAY PAYMENT ACH WE",
"class": "transfer",
"target": "Costco Visa Card",
"category": null,
"review": false,
"fitid": "952673929534609207#1#001#2026-05-15#"
},
{
"date": "20260508",
"amt": 0.05,
"desc": "INTEREST PAYMENT INTEREST PAYMENT",
"class": "income",
"target": "Interest Income",
"category": "Investment: Interest",
"review": false,
"fitid": "952673929534609207#2#001#2026-05-08#"
},
{
"date": "20260508",
"amt": -2150.0,
"desc": "CHECK 143 xxxxx2209 CHECK 143 xxxxx2209",
"class": "review",
"target": "CHECK (payee?)",
"category": null,
"review": true,
"fitid": "952673929534609207#1#001#2026-05-08#"
},
{
"date": "20260505",
"amt": -2150.0,
"desc": "CHECK 142 xxxxx1771 CHECK 142 xxxxx1771",
"class": "review",
"target": "CHECK (payee?)",
"category": null,
"review": true,
"fitid": "952673929534609207#1#001#2026-05-05#"
},
{
"date": "20260504",
"amt": -48.09,
"desc": "DUQUESNE LIGHT PAYMENT ACH DEBIT DUQUESNE LIGHT PAYMENT ACH ",
"class": "expense",
"target": "Duquesne Light",
"category": "Utilities: Electric",
"review": false,
"fitid": "952673929534609207#2#001#2026-05-04#"
},
{
"date": "20260504",
"amt": -1025.0,
"desc": "COMPEER-COMP-CP WEB PMTS ACH WEB COMPEER-COMP-CP WEB PMTS AC",
"class": "expense",
"target": "Compeer Investments",
"category": "Rent",
"review": false,
"fitid": "952673929534609207#1#001#2026-05-04#"
},
{
"date": "20260501",
"amt": -1650.72,
"desc": "APPLECARD GSBANK PAYMENT ACH WEB APPLECARD GSBANK PAYMENT AC",
"class": "transfer",
"target": "Apple Credit Card",
"category": null,
"review": false,
"fitid": "952673929534609207#1#001#2026-05-01#"
},
{
"date": "20260430",
"amt": 2778.7,
"desc": "UNIV PITTSBURGH PAYROLL ACH CRED UNIV PITTSBURGH PAYROLL ACH",
"class": "income",
"target": "Pitt Salary",
"category": "Wages",
"review": false,
"fitid": "952673929534609207#1#001#2026-04-30#"
},
{
"date": "20260428",
"amt": 3550.0,
"desc": "SCHWAB BROKERAGE MONEYLINK ACH C SCHWAB BROKERAGE MONEYLINK ",
"class": "transfer",
"target": "Schwab (Stocks/Savings?)",
"category": null,
"review": true,
"fitid": "952673929534609207#1#001#2026-04-28#"
},
{
"date": "20260427",
"amt": -60.0,
"desc": "CHECK 144 xxxxx4324 CHECK 144 xxxxx4324",
"class": "review",
"target": "CHECK (payee?)",
"category": null,
"review": true,
"fitid": "952673929534609207#2#001#2026-04-27#"
},
{
"date": "20260427",
"amt": 4000.0,
"desc": "SCHWAB BROKERAGE MONEYLINK ACH C SCHWAB BROKERAGE MONEYLINK ",
"class": "transfer",
"target": "Schwab (Stocks/Savings?)",
"category": null,
"review": true,
"fitid": "952673929534609207#1#001#2026-04-27#"
},
{
"date": "20260417",
"amt": 258.96,
"desc": "VENMO CASHOUT ACH CREDIT xxxxxxx VENMO CASHOUT ACH CREDIT xx",
"class": "dontknow",
"target": "Don't Know",
"category": null,
"review": true,
"fitid": "952673929534609207#1#001#2026-04-17#"
},
{
"date": "20260415",
"amt": -116.09,
"desc": "CITI AUTOPAY PAYMENT ACH WEB-REC CITI AUTOPAY PAYMENT ACH WE",
"class": "transfer",
"target": "Costco Visa Card",
"category": null,
"review": false,
"fitid": "952673929534609207#1#001#2026-04-15#"
},
{
"date": "20260414",
"amt": -50.0,
"desc": "ATM WITHDRAWAL PNCPM1896 N0414 3 ATM WITHDRAWAL PNCPM1896 N0",
"class": "transfer",
"target": "Cash",
"category": null,
"review": false,
"fitid": "952673929534609207#1#001#2026-04-14#"
},
{
"date": "20260413",
"amt": 1.05,
"desc": "ZEL FROM MHER KARAKOUZIAN ZEL FROM MHER KARAKOUZIAN",
"class": "dontknow",
"target": "Don't Know",
"category": null,
"review": true,
"fitid": "952673929534609207#2#001#2026-04-13#"
},
{
"date": "20260413",
"amt": 3.45,
"desc": "ZEL FROM Luis Benitez ZEL FROM Luis Benitez",
"class": "dontknow",
"target": "Don't Know",
"category": null,
"review": true,
"fitid": "952673929534609207#1#001#2026-04-13#"
},
{
"date": "20260408",
"amt": 0.08,
"desc": "INTEREST PAYMENT INTEREST PAYMENT",
"class": "income",
"target": "Interest Income",
"category": "Investment: Interest",
"review": false,
"fitid": "952673929534609207#1#001#2026-04-08#"
},
{
"date": "20260402",
"amt": -36.55,
"desc": "DUQUESNE LIGHT PAYMENT ACH DEBIT DUQUESNE LIGHT PAYMENT ACH ",
"class": "expense",
"target": "Duquesne Light",
"category": "Utilities: Electric",
"review": false,
"fitid": "952673929534609207#2#001#2026-04-02#"
},
{
"date": "20260402",
"amt": -1025.0,
"desc": "COMPEER-COMP-CP WEB PMTS ACH WEB COMPEER-COMP-CP WEB PMTS AC",
"class": "expense",
"target": "Compeer Investments",
"category": "Rent",
"review": false,
"fitid": "952673929534609207#1#001#2026-04-02#"
},
{
"date": "20260401",
"amt": -3000.0,
"desc": "SCHWAB BROKERAGE MONEYLINK ACH D SCHWAB BROKERAGE MONEYLINK ",
"class": "transfer",
"target": "Schwab (Stocks/Savings?)",
"category": null,
"review": true,
"fitid": "952673929534609207#2#001#2026-04-01#"
},
{
"date": "20260401",
"amt": -5000.0,
"desc": "SCHWAB BROKERAGE MONEYLINK ACH W SCHWAB BROKERAGE MONEYLINK ",
"class": "transfer",
"target": "Schwab (Stocks/Savings?)",
"category": null,
"review": true,
"fitid": "952673929534609207#1#001#2026-04-01#"
},
{
"date": "20260331",
"amt": 2778.7,
"desc": "UNIV PITTSBURGH PAYROLL ACH CRED UNIV PITTSBURGH PAYROLL ACH",
"class": "income",
"target": "Pitt Salary",
"category": "Wages",
"review": false,
"fitid": "952673929534609207#1#001#2026-03-31#"
},
{
"date": "20260330",
"amt": -9976.38,
"desc": "APPLECARD GSBANK PAYMENT ACH WEB APPLECARD GSBANK PAYMENT AC",
"class": "transfer",
"target": "Apple Credit Card",
"category": null,
"review": false,
"fitid": "952673929534609207#2#001#2026-03-30#"
},
{
"date": "20260330",
"amt": 124.0,
"desc": "IRS TREAS 310 TAX REF ACH CREDIT IRS TREAS 310 TAX REF ACH C",
"class": "income",
"target": "IRS Refund",
"category": "Taxes",
"review": false,
"fitid": "952673929534609207#1#001#2026-03-30#"
},
{
"date": "20260318",
"amt": -100.0,
"desc": "ATM WITHDRAWAL PNCPJ6207 N0318 3 ATM WITHDRAWAL PNCPJ6207 N0",
"class": "transfer",
"target": "Cash",
"category": null,
"review": false,
"fitid": "952673929534609207#2#001#2026-03-18#"
},
{
"date": "20260318",
"amt": 14715.0,
"desc": "ATM DEPOSIT xxxx5883 DEPOSIT 403 ATM DEPOSIT xxxx5883 DEPOSI",
"class": "transfer",
"target": "Coverdell",
"category": null,
"review": true,
"fitid": "952673929534609207#1#001#2026-03-18#"
},
{
"date": "20260316",
"amt": -194.65,
"desc": "CITI AUTOPAY PAYMENT ACH WEB-REC CITI AUTOPAY PAYMENT ACH WE",
"class": "transfer",
"target": "Costco Visa Card",
"category": null,
"review": false,
"fitid": "952673929534609207#1#001#2026-03-16#"
},
{
"date": "20260309",
"amt": 0.07,
"desc": "INTEREST PAYMENT INTEREST PAYMENT",
"class": "income",
"target": "Interest Income",
"category": "Investment: Interest",
"review": false,
"fitid": "952673929534609207#1#001#2026-03-09#"
},
{
"date": "20260304",
"amt": -38.22,
"desc": "DUQUESNE LIGHT PAYMENT ACH DEBIT DUQUESNE LIGHT PAYMENT ACH ",
"class": "expense",
"target": "Duquesne Light",
"category": "Utilities: Electric",
"review": false,
"fitid": "952673929534609207#1#001#2026-03-04#"
},
{
"date": "20260303",
"amt": -1025.0,
"desc": "COMPEER-COMP-CP WEB PMTS ACH WEB COMPEER-COMP-CP WEB PMTS AC",
"class": "expense",
"target": "Compeer Investments",
"category": "Rent",
"review": false,
"fitid": "952673929534609207#1#001#2026-03-03#"
},
{
"date": "20260302",
"amt": -5755.53,
"desc": "APPLECARD GSBANK PAYMENT ACH WEB APPLECARD GSBANK PAYMENT AC",
"class": "transfer",
"target": "Apple Credit Card",
"category": null,
"review": false,
"fitid": "952673929534609207#1#001#2026-03-02#"
},
{
"date": "20260227",
"amt": 2778.7,
"desc": "UNIV PITTSBURGH SALARY ACH CREDI UNIV PITTSBURGH SALARY ACH ",
"class": "income",
"target": "Pitt Salary",
"category": "Wages",
"review": false,
"fitid": "952673929534609207#1#001#2026-02-27#"
},
{
"date": "20260217",
"amt": -164.55,
"desc": "CITI AUTOPAY PAYMENT ACH WEB-REC CITI AUTOPAY PAYMENT ACH WE",
"class": "transfer",
"target": "Costco Visa Card",
"category": null,
"review": false,
"fitid": "952673929534609207#1#001#2026-02-17#"
},
{
"date": "20260209",
"amt": 0.09,
"desc": "INTEREST PAYMENT INTEREST PAYMENT",
"class": "income",
"target": "Interest Income",
"category": "Investment: Interest",
"review": false,
"fitid": "952673929534609207#1#001#2026-02-09#"
},
{
"date": "20260203",
"amt": -1025.0,
"desc": "COMPEER-COMP-CP WEB PMTS ACH WEB COMPEER-COMP-CP WEB PMTS AC",
"class": "expense",
"target": "Compeer Investments",
"category": "Rent",
"review": false,
"fitid": "952673929534609207#1#001#2026-02-03#"
},
{
"date": "20260202",
"amt": -230.0,
"desc": "ATM WITHDRAWAL PNCPM1896 N0202 3 ATM WITHDRAWAL PNCPM1896 N0",
"class": "transfer",
"target": "Cash",
"category": null,
"review": false,
"fitid": "952673929534609207#3#001#2026-02-02#"
},
{
"date": "20260202",
"amt": -41.63,
"desc": "DUQUESNE LIGHT PAYMENT ACH DEBIT DUQUESNE LIGHT PAYMENT ACH ",
"class": "expense",
"target": "Duquesne Light",
"category": "Utilities: Electric",
"review": false,
"fitid": "952673929534609207#2#001#2026-02-02#"
},
{
"date": "20260202",
"amt": -3107.16,
"desc": "APPLECARD GSBANK PAYMENT ACH WEB APPLECARD GSBANK PAYMENT AC",
"class": "transfer",
"target": "Apple Credit Card",
"category": null,
"review": false,
"fitid": "952673929534609207#1#001#2026-02-02#"
},
{
"date": "20260130",
"amt": 3032.5,
"desc": "UNIV PITTSBURGH SALARY ACH CREDI UNIV PITTSBURGH SALARY ACH ",
"class": "income",
"target": "Pitt Salary",
"category": "Wages",
"review": false,
"fitid": "952673929534609207#1#001#2026-01-30#"
},
{
"date": "20260127",
"amt": -30.0,
"desc": "PITT TUITION PITTPAYMNT ACH WEB PITT TUITION PITTPAYMNT ACH ",
"class": "expense",
"target": "University of Pittsburgh",
"category": "Education",
"review": false,
"fitid": "952673929534609207#1#001#2026-01-27#"
},
{
"date": "20260115",
"amt": -200.0,
"desc": "CHECK 141 xxxxx4572 CHECK 141 xxxxx4572",
"class": "review",
"target": "CHECK (payee?)",
"category": null,
"review": true,
"fitid": "952673929534609207#1#001#2026-01-15#"
},
{
"date": "20260109",
"amt": 10.0,
"desc": "OTHER FIN INST ATM SURCHARGE REI OTHER FIN INST ATM SURCHARG",
"class": "income",
"target": "Don't Know",
"category": null,
"review": false,
"fitid": "952673929534609207#2#001#2026-01-09#"
},
{
"date": "20260109",
"amt": 0.12,
"desc": "INTEREST PAYMENT INTEREST PAYMENT",
"class": "income",
"target": "Interest Income",
"category": "Investment: Interest",
"review": false,
"fitid": "952673929534609207#1#001#2026-01-09#"
},
{
"date": "20260105",
"amt": -1025.0,
"desc": "COMPEER-COMP-CP WEB PMTS ACH WEB COMPEER-COMP-CP WEB PMTS AC",
"class": "expense",
"target": "Compeer Investments",
"category": "Rent",
"review": false,
"fitid": "952673929534609207#1#001#2026-01-05#"
},
{
"date": "20260102",
"amt": -5408.31,
"desc": "APPLECARD GSBANK PAYMENT ACH WEB APPLECARD GSBANK PAYMENT AC",
"class": "transfer",
"target": "Apple Credit Card",
"category": null,
"review": false,
"fitid": "952673929534609207#3#001#2026-01-02#"
},
{
"date": "20260102",
"amt": -111.99,
"desc": "ATM WITHDRAWAL MACNVTCOA0N1231 3 ATM WITHDRAWAL MACNVTCOA0N1",
"class": "transfer",
"target": "Cash",
"category": null,
"review": false,
"fitid": "952673929534609207#2#001#2026-01-02#"
},
{
"date": "20251231",
"amt": -48.01,
"desc": "DUQUESNE LIGHT PAYMENT ACH DEBIT DUQUESNE LIGHT PAYMENT ACH ",
"class": "expense",
"target": "Duquesne Light",
"category": "Utilities: Electric",
"review": false,
"fitid": "952673929534609207#2#001#2025-12-31#"
},
{
"date": "20251231",
"amt": 2703.46,
"desc": "UNIV PITTSBURGH SALARY ACH CREDI UNIV PITTSBURGH SALARY ACH ",
"class": "income",
"target": "Pitt Salary",
"category": "Wages",
"review": false,
"fitid": "952673929534609207#1#001#2025-12-31#"
},
{
"date": "20251217",
"amt": 100.0,
"desc": "ZEL FROM MATTHEW BARRY ZEL FROM MATTHEW BARRY",
"class": "dontknow",
"target": "Don't Know",
"category": null,
"review": true,
"fitid": "952673929534609207#1#001#2025-12-17#"
},
{
"date": "20251216",
"amt": 9.1,
"desc": "ZEL FROM Luis Benitez ZEL FROM Luis Benitez",
"class": "dontknow",
"target": "Don't Know",
"category": null,
"review": true,
"fitid": "952673929534609207#1#001#2025-12-16#"
},
{
"date": "20251215",
"amt": -223.79,
"desc": "CITI AUTOPAY PAYMENT ACH WEB-REC CITI AUTOPAY PAYMENT ACH WE",
"class": "transfer",
"target": "Costco Visa Card",
"category": null,
"review": false,
"fitid": "952673929534609207#1#001#2025-12-15#"
},
{
"date": "20251208",
"amt": 0.12,
"desc": "INTEREST PAYMENT INTEREST PAYMENT",
"class": "income",
"target": "Interest Income",
"category": "Investment: Interest",
"review": false,
"fitid": "952673929534609207#1#001#2025-12-08#"
},
{
"date": "20251202",
"amt": -1025.0,
"desc": "COMPEER-COMP-CP WEB PMTS ACH WEB COMPEER-COMP-CP WEB PMTS AC",
"class": "expense",
"target": "Compeer Investments",
"category": "Rent",
"review": false,
"fitid": "952673929534609207#1#001#2025-12-02#"
},
{
"date": "20251201",
"amt": -41.26,
"desc": "DUQUESNE LIGHT PAYMENT ACH DEBIT DUQUESNE LIGHT PAYMENT ACH ",
"class": "expense",
"target": "Duquesne Light",
"category": "Utilities: Electric",
"review": false,
"fitid": "952673929534609207#2#001#2025-12-01#"
},
{
"date": "20251201",
"amt": -2402.93,
"desc": "APPLECARD GSBANK PAYMENT ACH WEB APPLECARD GSBANK PAYMENT AC",
"class": "transfer",
"target": "Apple Credit Card",
"category": null,
"review": false,
"fitid": "952673929534609207#1#001#2025-12-01#"
},
{
"date": "20251126",
"amt": 2703.46,
"desc": "UNIV PITTSBURGH SALARY ACH CREDI UNIV PITTSBURGH SALARY ACH ",
"class": "income",
"target": "Pitt Salary",
"category": "Wages",
"review": false,
"fitid": "952673929534609207#1#001#2025-11-26#"
},
{
"date": "20251117",
"amt": -15.67,
"desc": "CITI AUTOPAY PAYMENT ACH WEB-REC CITI AUTOPAY PAYMENT ACH WE",
"class": "transfer",
"target": "Costco Visa Card",
"category": null,
"review": false,
"fitid": "952673929534609207#1#001#2025-11-17#"
},
{
"date": "20251110",
"amt": 3.0,
"desc": "OTHER FIN INST ATM SURCHARGE REI OTHER FIN INST ATM SURCHARG",
"class": "income",
"target": "Don't Know",
"category": null,
"review": false,
"fitid": "952673929534609207#2#001#2025-11-10#"
},
{
"date": "20251110",
"amt": 0.16,
"desc": "INTEREST PAYMENT INTEREST PAYMENT",
"class": "income",
"target": "Interest Income",
"category": "Investment: Interest",
"review": false,
"fitid": "952673929534609207#1#001#2025-11-10#"
},
{
"date": "20251104",
"amt": -1025.0,
"desc": "COMPEER-COMP-CP WEB PMTS ACH WEB COMPEER-COMP-CP WEB PMTS AC",
"class": "expense",
"target": "Compeer Investments",
"category": "Rent",
"review": false,
"fitid": "952673929534609207#1#001#2025-11-04#"
},
{
"date": "20251103",
"amt": -4487.09,
"desc": "APPLECARD GSBANK PAYMENT ACH WEB APPLECARD GSBANK PAYMENT AC",
"class": "transfer",
"target": "Apple Credit Card",
"category": null,
"review": false,
"fitid": "952673929534609207#1#001#2025-11-03#"
},
{
"date": "20251031",
"amt": 2703.46,
"desc": "UNIV PITTSBURGH SALARY ACH CREDI UNIV PITTSBURGH SALARY ACH ",
"class": "income",
"target": "Pitt Salary",
"category": "Wages",
"review": false,
"fitid": "952673929534609207#1#001#2025-10-31#"
},
{
"date": "20251016",
"amt": -94.52,
"desc": "DUQUESNE LIGHT PAYMENT ACH DEBIT DUQUESNE LIGHT PAYMENT ACH ",
"class": "expense",
"target": "Duquesne Light",
"category": "Utilities: Electric",
"review": false,
"fitid": "952673929534609207#1#001#2025-10-16#"
},
{
"date": "20251015",
"amt": -1194.3,
"desc": "CITI AUTOPAY PAYMENT ACH WEB-REC CITI AUTOPAY PAYMENT ACH WE",
"class": "transfer",
"target": "Costco Visa Card",
"category": null,
"review": false,
"fitid": "952673929534609207#1#001#2025-10-15#"
},
{
"date": "20251014",
"amt": -63.0,
"desc": "ATM WITHDRAWAL MACPxx2261N1012 3 ATM WITHDRAWAL MACPxx2261N1",
"class": "transfer",
"target": "Cash",
"category": null,
"review": false,
"fitid": "952673929534609207#2#001#2025-10-14#"
},
{
"date": "20251008",
"amt": 0.15,
"desc": "INTEREST PAYMENT INTEREST PAYMENT",
"class": "income",
"target": "Interest Income",
"category": "Investment: Interest",
"review": false,
"fitid": "952673929534609207#1#001#2025-10-08#"
},
{
"date": "20251006",
"amt": 80.0,
"desc": "CASH APP*DANE SABO*CAS Oakland C CASH APP*DANE SABO*CAS Oakl",
"class": "dontknow",
"target": "Don't Know",
"category": null,
"review": true,
"fitid": "952673929534609207#1#001#2025-10-06#"
},
{
"date": "20251002",
"amt": -1025.0,
"desc": "COMPEER-COMP-CP WEB PMTS ACH WEB COMPEER-COMP-CP WEB PMTS AC",
"class": "expense",
"target": "Compeer Investments",
"category": "Rent",
"review": false,
"fitid": "952673929534609207#1#001#2025-10-02#"
},
{
"date": "20251001",
"amt": 10.72,
"desc": "ZEL FROM Luis Benitez ZEL FROM Luis Benitez",
"class": "dontknow",
"target": "Don't Know",
"category": null,
"review": true,
"fitid": "952673929534609207#1#001#2025-10-01#"
},
{
"date": "20250930",
"amt": 2703.46,
"desc": "UNIV PITTSBURGH SALARY ACH CREDI UNIV PITTSBURGH SALARY ACH ",
"class": "income",
"target": "Pitt Salary",
"category": "Wages",
"review": false,
"fitid": "952673929534609207#1#001#2025-09-30#"
},
{
"date": "20250915",
"amt": -107.31,
"desc": "CITI AUTOPAY PAYMENT ACH WEB-REC CITI AUTOPAY PAYMENT ACH WE",
"class": "transfer",
"target": "Costco Visa Card",
"category": null,
"review": false,
"fitid": "952673929534609207#1#001#2025-09-15#"
},
{
"date": "20250909",
"amt": 0.19,
"desc": "INTEREST PAYMENT INTEREST PAYMENT",
"class": "income",
"target": "Interest Income",
"category": "Investment: Interest",
"review": false,
"fitid": "952673929534609207#1#001#2025-09-09#"
},
{
"date": "20250908",
"amt": -130.0,
"desc": "PITT TUITION PITTPAYMNT ACH WEB PITT TUITION PITTPAYMNT ACH ",
"class": "expense",
"target": "University of Pittsburgh",
"category": "Education",
"review": false,
"fitid": "952673929534609207#1#001#2025-09-08#"
},
{
"date": "20250903",
"amt": -75.45,
"desc": "DUQUESNE LIGHT PAYMENT ACH DEBIT DUQUESNE LIGHT PAYMENT ACH ",
"class": "expense",
"target": "Duquesne Light",
"category": "Utilities: Electric",
"review": false,
"fitid": "952673929534609207#2#001#2025-09-03#"
},
{
"date": "20250903",
"amt": -1025.0,
"desc": "COMPEER-COMP-CP WEB PMTS ACH WEB COMPEER-COMP-CP WEB PMTS AC",
"class": "expense",
"target": "Compeer Investments",
"category": "Rent",
"review": false,
"fitid": "952673929534609207#1#001#2025-09-03#"
},
{
"date": "20250902",
"amt": -200.0,
"desc": "ATM WITHDRAWAL PNCPJ6274 N0902 3 ATM WITHDRAWAL PNCPJ6274 N0",
"class": "transfer",
"target": "Cash",
"category": null,
"review": false,
"fitid": "952673929534609207#3#001#2025-09-02#"
},
{
"date": "20250902",
"amt": -6923.08,
"desc": "APPLECARD GSBANK PAYMENT ACH WEB APPLECARD GSBANK PAYMENT AC",
"class": "transfer",
"target": "Apple Credit Card",
"category": null,
"review": false,
"fitid": "952673929534609207#2#001#2025-09-02#"
},
{
"date": "20250902",
"amt": 200.0,
"desc": "ATM DEPOSIT xxxx2199 DEPOSIT 560 ATM DEPOSIT xxxx2199 DEPOSI",
"class": "review",
"target": "ATM deposit (source?)",
"category": null,
"review": true,
"fitid": "952673929534609207#1#001#2025-09-02#"
},
{
"date": "20250829",
"amt": 2703.46,
"desc": "UNIV PITTSBURGH SALARY ACH CREDI UNIV PITTSBURGH SALARY ACH ",
"class": "income",
"target": "Pitt Salary",
"category": "Wages",
"review": false,
"fitid": "952673929534609207#1#001#2025-08-29#"
},
{
"date": "20250825",
"amt": -1100.0,
"desc": "WITHDRAWAL xxxxx8118 WITHDRAWAL xxxxx8118",
"class": "review",
"target": "Withdrawal (where?)",
"category": null,
"review": true,
"fitid": "952673929534609207#1#001#2025-08-25#"
},
{
"date": "20250819",
"amt": -50.0,
"desc": "CAPITAL ONE TRANSFER ACH WEB RT0 CAPITAL ONE TRANSFER ACH WE",
"class": "review",
"target": "Capital One?",
"category": null,
"review": true,
"fitid": "952673929534609207#1#001#2025-08-19#"
},
{
"date": "20250818",
"amt": 14.77,
"desc": "CAPITAL ONE TRANSFER ACH WEB PAY CAPITAL ONE TRANSFER ACH WE",
"class": "review",
"target": "Capital One?",
"category": null,
"review": true,
"fitid": "952673929534609207#2#001#2025-08-18#"
},
{
"date": "20250818",
"amt": 2503.39,
"desc": "CAPITAL ONE TRANSFER ACH WEB PAY CAPITAL ONE TRANSFER ACH WE",
"class": "review",
"target": "Capital One?",
"category": null,
"review": true,
"fitid": "952673929534609207#1#001#2025-08-18#"
},
{
"date": "20250815",
"amt": -2505.53,
"desc": "CITI AUTOPAY PAYMENT ACH WEB-REC CITI AUTOPAY PAYMENT ACH WE",
"class": "transfer",
"target": "Costco Visa Card",
"category": null,
"review": false,
"fitid": "952673929534609207#1#001#2025-08-15#"
},
{
"date": "20250811",
"amt": 22000.0,
"desc": "CARVANA PAYOUT CVNA x6542 CORPOR CARVANA PAYOUT CVNA x6542 C",
"class": "transfer",
"target": "Illiquid Assets",
"category": null,
"review": false,
"fitid": "952673929534609207#1#001#2025-08-11#"
},
{
"date": "20250808",
"amt": 0.04,
"desc": "INTEREST PAYMENT INTEREST PAYMENT",
"class": "income",
"target": "Interest Income",
"category": "Investment: Interest",
"review": false,
"fitid": "952673929534609207#1#001#2025-08-08#"
},
{
"date": "20250806",
"amt": -47.39,
"desc": "DUQUESNE LIGHT PAYMENT ACH DEBIT DUQUESNE LIGHT PAYMENT ACH ",
"class": "expense",
"target": "Duquesne Light",
"category": "Utilities: Electric",
"review": false,
"fitid": "952673929534609207#1#001#2025-08-06#"
},
{
"date": "20250804",
"amt": -1029.95,
"desc": "YSI*Compeer Investment 412-xxx57 YSI*Compeer Investment 412-",
"class": "expense",
"target": "Compeer Investments",
"category": "Rent",
"review": false,
"fitid": "952673929534609207#2#001#2025-08-04#"
},
{
"date": "20250804",
"amt": 0.43,
"desc": "YARDI PENNY TEST ACCTVERIFY ACH YARDI PENNY TEST ACCTVERIFY ",
"class": "income",
"target": "Don't Know",
"category": null,
"review": false,
"fitid": "952673929534609207#1#001#2025-08-04#"
},
{
"date": "20250801",
"amt": -3218.03,
"desc": "APPLECARD GSBANK PAYMENT ACH WEB APPLECARD GSBANK PAYMENT AC",
"class": "transfer",
"target": "Apple Credit Card",
"category": null,
"review": false,
"fitid": "952673929534609207#1#001#2025-08-01#"
}
]

View File

@ -0,0 +1,86 @@
"""Consolidated rebuild DRY-RUN (READ-ONLY). No Firefly writes.
Goal: prove each account ties to its QFX ledger balance, so the wipe+reimport
is provably lossless before we run it.
Method per account: opening_balance = ledger - sum(all that account's own QFX
lines). Card PAYMENT/AUTOPAY lines are posted as the receiving leg of a
PNC->card transfer (recorded once on the PNC side, so not double-counted). The
one orphan PNC->Apple payment (2025-08-01, Apple side pre-window) is netted
into Apple's opening balance so the tie still holds.
"""
import re, json
from collections import Counter, defaultdict
D = "/Users/danesabo/Documents/Finances/EXPORTS/-MAY172026"
QFX = {
"PNC Checking": f"{D}/PNC7552Aug012025-May152025.QFX",
"Apple Credit Card": f"{D}/Apple Card Transactions Aug 01 2025 - May 17 2026.qfx",
"Costco Visa Card": f"{D}/CitiCostcoCard Aug012025-May172025.QFX",
}
def parse(path):
t = open(path, encoding="latin-1", errors="replace").read()
m = re.search(r"<LEDGERBAL>.*?<BALAMT>([^<\r\n]*)", t, re.S | re.I)
ledger = float(m.group(1))
blocks = re.findall(r"<STMTTRN>(.*?)(?=<STMTTRN>|</BANKTRANLIST>)", t, re.S | re.I)
def g(b, k):
mm = re.search(rf"<{k}>([^<\r\n]*)", b, re.I)
return mm.group(1).strip() if mm else ""
out = []
for b in blocks:
out.append({"date": g(b, "DTPOSTED")[:8], "amt": float(g(b, "TRNAMT")),
"type": g(b, "TRNTYPE").upper(),
"desc": (g(b, "NAME") + " " + g(b, "MEMO")).strip(),
"fitid": g(b, "FITID")})
return ledger, out
report = {}
recon = {}
for acct, path in QFX.items():
ledger, txns = parse(path)
s = round(sum(t["amt"] for t in txns), 2)
opening = round(ledger - s, 2)
# classify counts
cls = Counter()
for t in txns:
if acct == "PNC Checking":
d = t["desc"].upper()
if "APPLECARD GSBANK PAYMENT" in d: cls["xfer->Apple"] += 1
elif "CITI AUTOPAY PAYMENT" in d: cls["xfer->Costco"] += 1
elif "SCHWAB BROKERAGE MONEYLINK" in d: cls["xfer<->Schwab"] += 1
elif "ATM WITHDRAWAL" in d: cls["xfer->Cash"] += 1
elif "CARVANA PAYOUT" in d: cls["xfer<-Illiquid"] += 1
elif "ATM DEPOSIT" in d and abs(t["amt"]) > 10000: cls["xfer<-Coverdell"] += 1
elif "CAPITAL ONE TRANSFER" in d: cls["xfer<->CapOne(closed)"] += 1
elif "UNIV PITTSBURGH" in d: cls["income"] += 1
elif "INTEREST PAYMENT" in d or "IRS TREAS" in d: cls["income"] += 1
elif "DUQUESNE LIGHT" in d or "COMPEER" in d or "PITT TUITION" in d: cls["expense"] += 1
elif any(k in d for k in ("VENMO","CASH APP","ZEL FROM","ATM SURCHARGE","YARDI")): cls["DontKnow"] += 1
else: cls["review"] += 1
else: # cards
if t["type"] == "PAYMENT" or (acct == "Costco Visa Card" and t["amt"] > 0 and "AUTOPAY" in t["desc"].upper()):
cls["payment(paired w/ PNC)"] += 1
elif t["amt"] > 0: cls["refund(deposit)"] += 1
else: cls["expense(via map)"] += 1
report[acct] = {"ledger": ledger, "n": len(txns), "sum": s,
"opening": opening, "classes": dict(cls)}
recon[acct] = abs(opening + s - ledger) < 0.01
print("=" * 64)
print("PER-ACCOUNT RECONCILIATION (opening + Σtxns must == QFX ledger)")
print("=" * 64)
for acct, r in report.items():
ok = "OK" if recon[acct] else "*** MISMATCH ***"
print(f"\n{acct}")
print(f" QFX ledger : ${r['ledger']:>12,.2f}")
print(f" Σ {r['n']:>3} txns : ${r['sum']:>12,.2f}")
print(f" => opening (Aug1): ${r['opening']:>12,.2f} tie: {ok}")
print(f" classes: {r['classes']}")
print("\n" + "=" * 64)
print("NOTE: orphan PNC->Apple payment 2025-08-01 $3,218.03 (Apple side")
print("pre-window) -> net into Apple opening so Firefly ties to ledger.")
print("Investment accts (Schwab/Roth/Coverdell/Coinbase): opening + monthly")
print("valuation (Dane provides values at execute) + PNC-side transfers.")
print("Nothing written. Normalized dataset build is the next step.")

98
migration/rebuild_pnc.py Normal file
View File

@ -0,0 +1,98 @@
"""PNC rebuild classifier + reconciliation (READ-ONLY, no Firefly writes).
PNC is the hub account. Every PNC line stays a transaction of its own amount;
classification only decides the OTHER leg (transfer target / income / expense /
review / Don't Know). So PNC's own balance is unaffected by classification,
which gives us a hard integrity check:
opening_balance(Aug 1) + sum(all PNC txns) == QFX LEDGERBAL
If that holds, nothing was dropped or double-counted. Prints the derived
opening balance and a by-class breakdown for Dane to review before any wipe.
"""
import re, json, sys
F = "/Users/danesabo/Documents/Finances/EXPORTS/-MAY172026/PNC7552Aug012025-May152025.QFX"
t = open(F, encoding="latin-1", errors="replace").read()
m = re.search(r"<LEDGERBAL>.*?<BALAMT>([^<\r\n]*).*?<DTASOF>([^<\r\n]*)", t, re.S | re.I)
ledger_bal, ledger_asof = float(m.group(1)), m.group(2)[:8]
blocks = re.findall(r"<STMTTRN>(.*?)(?=<STMTTRN>|</BANKTRANLIST>)", t, re.S | re.I)
def g(b, k):
mm = re.search(rf"<{k}>([^<\r\n]*)", b, re.I)
return mm.group(1).strip() if mm else ""
def classify(desc, amt):
d = desc.upper()
# --- transfers (own accounts) ---
if "APPLECARD GSBANK PAYMENT" in d:
return "transfer", "Apple Credit Card", None, False
if "CITI AUTOPAY PAYMENT" in d:
return "transfer", "Costco Visa Card", None, False
if "SCHWAB BROKERAGE MONEYLINK" in d:
return "transfer", "Schwab (Stocks/Savings?)", None, True # review: which
if "ATM WITHDRAWAL" in d:
return "transfer", "Cash", None, False
if "CARVANA PAYOUT" in d:
return "transfer", "Illiquid Assets", None, False
if "ATM DEPOSIT" in d and abs(amt) > 10000:
return "transfer", "Coverdell", None, True # the ~$14,715 check
# --- income ---
if "UNIV PITTSBURGH" in d and ("PAYROLL" in d or "SALARY" in d):
return "income", "Pitt Salary", "Wages", False
if "INTEREST PAYMENT" in d:
return "income", "Interest Income", "Investment: Interest", False
if "IRS TREAS 310" in d:
return "income", "IRS Refund", "Taxes", False
if "ATM SURCHARGE REIMB" in d:
return "income", "Don't Know", None, False
# --- expenses ---
if "DUQUESNE LIGHT" in d:
return "expense", "Duquesne Light", "Utilities: Electric", False
if "COMPEER" in d:
return "expense", "Compeer Investments", "Rent", False
if "PITT TUITION" in d:
return "expense", "University of Pittsburgh", "Education", False
# --- Don't Know (poker / ambiguous money movement) ---
if any(k in d for k in ("VENMO CASHOUT", "CASH APP", "ZEL FROM")):
return "dontknow", "Don't Know", None, True
# --- review individually ---
if re.search(r"\bCHECK \d+", d):
return "review", "CHECK (payee?)", None, True
if "CAPITAL ONE TRANSFER" in d:
return "review", "Capital One?", None, True
if "ATM DEPOSIT" in d:
return "review", "ATM deposit (source?)", None, True
if "WITHDRAWAL" in d:
return "review", "Withdrawal (where?)", None, True
if "YARDI PENNY TEST" in d:
return "income", "Don't Know", None, False
return "review", "UNCLASSIFIED", None, True
recs, total = [], 0.0
from collections import Counter, defaultdict
by_class = Counter(); by_class_amt = defaultdict(float)
for b in blocks:
amt = float(g(b, "TRNAMT")); total += amt
desc = (g(b, "NAME") + " " + g(b, "MEMO")).strip()
cls, target, cat, review = classify(desc, amt)
by_class[cls] += 1; by_class_amt[cls] += amt
recs.append({"date": g(b, "DTPOSTED")[:8], "amt": amt, "desc": desc[:60],
"class": cls, "target": target, "category": cat,
"review": review, "fitid": g(b, "FITID")})
opening = round(ledger_bal - total, 2)
print(f"PNC QFX: {len(blocks)} txns | LEDGERBAL ${ledger_bal:,.2f} as of {ledger_asof}")
print(f"sum(txns) = ${total:,.2f}")
print(f"=> DERIVED OPENING BALANCE (pre 2025-08-01) = ${opening:,.2f}")
print(f" reconciliation: {opening:,.2f} + {total:,.2f} = {opening+total:,.2f} "
f"(== ledger {ledger_bal:,.2f}? {abs(opening+total-ledger_bal)<0.01})")
print("\nBY CLASS:")
for c in ("transfer","income","expense","dontknow","review"):
print(f" {c:9} n={by_class[c]:>3} net=${by_class_amt[c]:>12,.2f}")
print("\nNEEDS-DANE (review / Don't Know):")
for r in recs:
if r["review"]:
print(f" {r['date']} {r['amt']:>10,.2f} [{r['class']}->{r['target']}] {r['desc']}")
json.dump(recs, open("/tmp/pnc_classified.json","w"), indent=1)
print("\nwrote /tmp/pnc_classified.json (read-only; nothing posted)")