PickleBALLER/src/elo/integration_tests.rs

447 lines
19 KiB
Rust

//! Comprehensive integration tests for the ELO rating system
//!
//! These tests verify end-to-end rating calculations for singles and doubles matches,
//! including edge cases and real-world scenarios.
#[cfg(test)]
mod tests {
use crate::elo::calculator::EloCalculator;
use crate::elo::doubles::calculate_effective_opponent_rating;
use crate::elo::rating::EloRating;
use crate::elo::score_weight::calculate_weighted_score;
// ============================================
// SINGLES MATCH TESTS
// ============================================
#[test]
fn test_singles_equal_ratings_close_win() {
// Two 1500-rated players, winner wins 11-9
let calc = EloCalculator::new();
let winner = EloRating::new_player();
let loser = EloRating::new_player();
let performance = calculate_weighted_score(winner.rating, loser.rating, 11, 9);
let new_winner = calc.update_rating(&winner, &loser, performance);
let new_loser = calc.update_rating(&loser, &winner, 1.0 - performance);
// Winner should gain a small amount (~1.6 points for 55% performance vs 50% expected)
assert!(new_winner.rating > winner.rating);
assert!(new_loser.rating < loser.rating);
// Changes should be symmetric
let winner_gain = new_winner.rating - winner.rating;
let loser_loss = loser.rating - new_loser.rating;
assert!((winner_gain - loser_loss).abs() < 0.01);
// For close game, change should be small
assert!(winner_gain < 5.0);
println!("Singles 1500 vs 1500, 11-9: Winner {} -> {} (+{:.1})",
winner.rating, new_winner.rating, winner_gain);
}
#[test]
fn test_singles_equal_ratings_blowout_win() {
// Two 1500-rated players, winner wins 11-2
let calc = EloCalculator::new();
let winner = EloRating::new_player();
let loser = EloRating::new_player();
let performance = calculate_weighted_score(winner.rating, loser.rating, 11, 2);
let new_winner = calc.update_rating(&winner, &loser, performance);
let winner_gain = new_winner.rating - winner.rating;
// Blowout should yield bigger gains than close game
assert!(winner_gain > 10.0);
assert!(winner_gain < 16.0); // Max is 16 for K=32
println!("Singles 1500 vs 1500, 11-2: Winner {} -> {} (+{:.1})",
winner.rating, new_winner.rating, winner_gain);
}
#[test]
fn test_singles_upset_win() {
// Lower rated (1400) beats higher rated (1600), close game 11-9
let calc = EloCalculator::new();
let underdog = EloRating::new_with_rating(1400.0);
let favorite = EloRating::new_with_rating(1600.0);
let performance = calculate_weighted_score(underdog.rating, favorite.rating, 11, 9);
let new_underdog = calc.update_rating(&underdog, &favorite, performance);
let new_favorite = calc.update_rating(&favorite, &underdog, 1.0 - performance);
let underdog_gain = new_underdog.rating - underdog.rating;
let favorite_loss = favorite.rating - new_favorite.rating;
// Upset should yield big gains
assert!(underdog_gain > 5.0);
println!("Singles upset 1400 vs 1600, 11-9: Underdog {} -> {} (+{:.1}), Favorite {} -> {} ({:.1})",
underdog.rating, new_underdog.rating, underdog_gain,
favorite.rating, new_favorite.rating, -favorite_loss);
}
#[test]
fn test_singles_expected_win() {
// Higher rated (1600) beats lower rated (1400), close game 11-9
let calc = EloCalculator::new();
let favorite = EloRating::new_with_rating(1600.0);
let underdog = EloRating::new_with_rating(1400.0);
let performance = calculate_weighted_score(favorite.rating, underdog.rating, 11, 9);
let new_favorite = calc.update_rating(&favorite, &underdog, performance);
let favorite_gain = new_favorite.rating - favorite.rating;
// Expected win should yield small gains (underperformed expectations)
// Expected ~64% of points, got 55%
assert!(favorite_gain < 0.0); // Actually loses rating for close expected win!
println!("Singles expected 1600 vs 1400, 11-9: Favorite {} -> {} ({:.1})",
favorite.rating, new_favorite.rating, favorite_gain);
}
#[test]
fn test_singles_expected_blowout_win() {
// Higher rated (1600) blows out lower rated (1400), 11-2
let calc = EloCalculator::new();
let favorite = EloRating::new_with_rating(1600.0);
let underdog = EloRating::new_with_rating(1400.0);
let performance = calculate_weighted_score(favorite.rating, underdog.rating, 11, 2);
let new_favorite = calc.update_rating(&favorite, &underdog, performance);
let favorite_gain = new_favorite.rating - favorite.rating;
// Blowout exceeds expectations (85% vs expected 64%), should gain
assert!(favorite_gain > 0.0);
println!("Singles expected blowout 1600 vs 1400, 11-2: Favorite {} -> {} (+{:.1})",
favorite.rating, new_favorite.rating, favorite_gain);
}
#[test]
fn test_singles_shutout() {
// Complete shutout 11-0
let calc = EloCalculator::new();
let winner = EloRating::new_player();
let loser = EloRating::new_player();
let performance = calculate_weighted_score(winner.rating, loser.rating, 11, 0);
assert!((performance - 1.0).abs() < 0.001); // 100% performance
let new_winner = calc.update_rating(&winner, &loser, performance);
// Should be max gain of K/2 = 16 (since expected is 0.5)
let gain = new_winner.rating - winner.rating;
assert!((gain - 16.0).abs() < 0.1);
println!("Singles shutout 11-0: {} -> {} (+{:.1})",
winner.rating, new_winner.rating, gain);
}
#[test]
fn test_singles_get_shutout() {
// Complete shutout loss 0-11
let calc = EloCalculator::new();
let winner = EloRating::new_player();
let loser = EloRating::new_player();
let performance = calculate_weighted_score(loser.rating, winner.rating, 0, 11);
assert!(performance.abs() < 0.001); // 0% performance
let new_loser = calc.update_rating(&loser, &winner, performance);
// Should be max loss of K/2 = 16
let loss = loser.rating - new_loser.rating;
assert!((loss - 16.0).abs() < 0.1);
println!("Singles get shutout 0-11: {} -> {} (-{:.1})",
loser.rating, new_loser.rating, loss);
}
// ============================================
// DOUBLES MATCH TESTS
// ============================================
#[test]
fn test_doubles_equal_teams() {
// All four players rated 1500
let calc = EloCalculator::new();
let player1 = EloRating::new_player(); // 1500
let teammate1 = EloRating::new_player(); // 1500
let opp1 = EloRating::new_player(); // 1500
let opp2 = EloRating::new_player(); // 1500
// Player 1's effective opponent
let eff_opp = calculate_effective_opponent_rating(opp1.rating, opp2.rating, teammate1.rating);
assert!((eff_opp - 1500.0).abs() < 0.01); // 1500+1500-1500 = 1500
// Win 11-9
let performance = calculate_weighted_score(player1.rating, eff_opp, 11, 9);
let new_player1 = calc.update_rating(&player1, &EloRating::new_with_rating(eff_opp), performance);
let gain = new_player1.rating - player1.rating;
println!("Doubles equal teams, 11-9: {} -> {} (+{:.1})",
player1.rating, new_player1.rating, gain);
}
#[test]
fn test_doubles_carried_by_strong_teammate() {
// Player (1400) with strong teammate (1600) vs two 1500s
let calc = EloCalculator::new();
let player = EloRating::new_with_rating(1400.0);
let teammate = EloRating::new_with_rating(1600.0);
let opp1 = EloRating::new_player(); // 1500
let opp2 = EloRating::new_player(); // 1500
// Player's effective opponent: 1500+1500-1600 = 1400
let eff_opp = calculate_effective_opponent_rating(opp1.rating, opp2.rating, teammate.rating);
assert!((eff_opp - 1400.0).abs() < 0.01);
// Teammate's effective opponent: 1500+1500-1400 = 1600
let teammate_eff_opp = calculate_effective_opponent_rating(opp1.rating, opp2.rating, player.rating);
assert!((teammate_eff_opp - 1600.0).abs() < 0.01);
// Win 11-9
let player_perf = calculate_weighted_score(player.rating, eff_opp, 11, 9);
let teammate_perf = calculate_weighted_score(teammate.rating, teammate_eff_opp, 11, 9);
let new_player = calc.update_rating(&player, &EloRating::new_with_rating(eff_opp), player_perf);
let new_teammate = calc.update_rating(&teammate, &EloRating::new_with_rating(teammate_eff_opp), teammate_perf);
let player_gain = new_player.rating - player.rating;
let teammate_gain = new_teammate.rating - teammate.rating;
// Player faces easier effective opponent (1400), should gain less
// Teammate faces harder effective opponent (1600), should lose rating (underperformed)
println!("Doubles carry: Player (1400) eff_opp=1400, gain={:.1}; Teammate (1600) eff_opp=1600, gain={:.1}",
player_gain, teammate_gain);
// The weaker player benefits less from wins with strong partner
assert!(player_gain < 3.0);
}
#[test]
fn test_doubles_carrying_weak_teammate() {
// Strong player (1600) with weak teammate (1400) vs two 1500s
let calc = EloCalculator::new();
let strong_player = EloRating::new_with_rating(1600.0);
let weak_teammate = EloRating::new_with_rating(1400.0);
let opp1 = EloRating::new_player(); // 1500
let opp2 = EloRating::new_player(); // 1500
// Strong player's effective opponent: 1500+1500-1400 = 1600
let eff_opp = calculate_effective_opponent_rating(opp1.rating, opp2.rating, weak_teammate.rating);
assert!((eff_opp - 1600.0).abs() < 0.01);
// Win 11-9 - strong player carrying
let performance = calculate_weighted_score(strong_player.rating, eff_opp, 11, 9);
let new_strong = calc.update_rating(&strong_player, &EloRating::new_with_rating(eff_opp), performance);
let gain = new_strong.rating - strong_player.rating;
// Strong player faces harder effective opponent, gains less for close win
// (or even loses points since 55% < expected)
println!("Doubles carrying: Strong (1600) eff_opp=1600, 11-9 win, change={:.1}", gain);
}
#[test]
fn test_doubles_all_different_ratings() {
// Realistic scenario: 1550+1450 vs 1520+1480
let calc = EloCalculator::new();
let p1 = EloRating::new_with_rating(1550.0);
let p1_teammate = EloRating::new_with_rating(1450.0);
let p2 = EloRating::new_with_rating(1520.0);
let p2_teammate = EloRating::new_with_rating(1480.0);
// P1's effective opponent: 1520+1480-1450 = 1550
let p1_eff = calculate_effective_opponent_rating(p2.rating, p2_teammate.rating, p1_teammate.rating);
// P1's teammate's effective opponent: 1520+1480-1550 = 1450
let p1t_eff = calculate_effective_opponent_rating(p2.rating, p2_teammate.rating, p1.rating);
// P2's effective opponent: 1550+1450-1480 = 1520
let p2_eff = calculate_effective_opponent_rating(p1.rating, p1_teammate.rating, p2_teammate.rating);
// P2's teammate's effective opponent: 1550+1450-1520 = 1480
let p2t_eff = calculate_effective_opponent_rating(p1.rating, p1_teammate.rating, p2.rating);
println!("Team 1 (1550+1450) vs Team 2 (1520+1480):");
println!(" P1 (1550) eff_opp: {:.0}", p1_eff);
println!(" P1 teammate (1450) eff_opp: {:.0}", p1t_eff);
println!(" P2 (1520) eff_opp: {:.0}", p2_eff);
println!(" P2 teammate (1480) eff_opp: {:.0}", p2t_eff);
// Each player's effective opponent equals their own rating!
// This is a property of balanced teams
assert!((p1_eff - p1.rating).abs() < 0.01);
assert!((p1t_eff - p1_teammate.rating).abs() < 0.01);
}
// ============================================
// K-FACTOR TESTS
// ============================================
#[test]
fn test_different_k_factors() {
let player = EloRating::new_player();
let opponent = EloRating::new_player();
let calc_k16 = EloCalculator::new_with_k_factor(16.0);
let calc_k32 = EloCalculator::new_with_k_factor(32.0);
let calc_k64 = EloCalculator::new_with_k_factor(64.0);
let new_k16 = calc_k16.update_rating(&player, &opponent, 1.0);
let new_k32 = calc_k32.update_rating(&player, &opponent, 1.0);
let new_k64 = calc_k64.update_rating(&player, &opponent, 1.0);
// Higher K = more volatile ratings
let gain_k16 = new_k16.rating - player.rating;
let gain_k32 = new_k32.rating - player.rating;
let gain_k64 = new_k64.rating - player.rating;
assert!((gain_k16 - 8.0).abs() < 0.1); // K=16, win = +8
assert!((gain_k32 - 16.0).abs() < 0.1); // K=32, win = +16
assert!((gain_k64 - 32.0).abs() < 0.1); // K=64, win = +32
println!("K-factor comparison for 1.0 performance:");
println!(" K=16: +{:.1}", gain_k16);
println!(" K=32: +{:.1}", gain_k32);
println!(" K=64: +{:.1}", gain_k64);
}
// ============================================
// EDGE CASES
// ============================================
#[test]
fn test_extreme_rating_difference() {
// 2000 vs 1000 - extreme mismatch
let calc = EloCalculator::new();
let elite = EloRating::new_with_rating(2000.0);
let beginner = EloRating::new_with_rating(1000.0);
// Elite wins as expected 11-3
let perf = calculate_weighted_score(elite.rating, beginner.rating, 11, 3);
let new_elite = calc.update_rating(&elite, &beginner, perf);
// Expected performance is ~0.99, actual is 0.786
// Should actually lose rating!
let change = new_elite.rating - elite.rating;
assert!(change < 0.0);
println!("Extreme mismatch 2000 vs 1000, 11-3: Elite change = {:.1}", change);
}
#[test]
fn test_beginner_beats_elite() {
// Major upset: 1000 beats 2000
let calc = EloCalculator::new();
let beginner = EloRating::new_with_rating(1000.0);
let elite = EloRating::new_with_rating(2000.0);
let perf = calculate_weighted_score(beginner.rating, elite.rating, 11, 9);
let new_beginner = calc.update_rating(&beginner, &elite, perf);
let gain = new_beginner.rating - beginner.rating;
// Massive upset - expected only ~1% of points, got 55%!
assert!(gain > 15.0);
println!("Major upset 1000 beats 2000, 11-9: Beginner gain = +{:.1}", gain);
}
#[test]
fn test_rating_conservation_singles() {
// In a match, total rating change should sum to approximately zero
let calc = EloCalculator::new();
let p1 = EloRating::new_with_rating(1500.0);
let p2 = EloRating::new_with_rating(1500.0);
let p1_perf = calculate_weighted_score(p1.rating, p2.rating, 11, 7);
let p2_perf = calculate_weighted_score(p2.rating, p1.rating, 7, 11);
let new_p1 = calc.update_rating(&p1, &p2, p1_perf);
let new_p2 = calc.update_rating(&p2, &p1, p2_perf);
let total_change = (new_p1.rating - p1.rating) + (new_p2.rating - p2.rating);
// Should sum to zero (rating conserved in the system)
assert!(total_change.abs() < 0.01);
println!("Rating conservation: P1 {:.1}, P2 {:.1}, sum = {:.3}",
new_p1.rating - p1.rating, new_p2.rating - p2.rating, total_change);
}
#[test]
fn test_multiple_matches_convergence() {
// After many matches, better player should have higher rating
let calc = EloCalculator::new();
let mut strong = EloRating::new_player(); // Actually wins 70% of points
let mut weak = EloRating::new_player(); // Actually wins 30% of points
// Simulate 20 matches where strong player gets ~70% of points
for _ in 0..20 {
let strong_points = 11;
let weak_points = 5; // ~70-30 split
let strong_perf = calculate_weighted_score(strong.rating, weak.rating, strong_points, weak_points);
let weak_perf = calculate_weighted_score(weak.rating, strong.rating, weak_points, strong_points);
strong = calc.update_rating(&strong, &weak, strong_perf);
weak = calc.update_rating(&weak, &strong, weak_perf);
}
// Strong player should be significantly higher rated now
assert!(strong.rating > weak.rating + 100.0);
println!("After 20 matches (70-30 split): Strong={:.0}, Weak={:.0}, Diff={:.0}",
strong.rating, weak.rating, strong.rating - weak.rating);
}
// ============================================
// SCORE WEIGHT TESTS
// ============================================
#[test]
fn test_score_weight_various_margins() {
// Compare performance scores for different margins
let scores = vec![
(11, 0, "11-0 shutout"),
(11, 1, "11-1"),
(11, 5, "11-5"),
(11, 9, "11-9 close"),
(11, 10, "11-10 tiebreak"),
];
println!("Performance scores (1500 vs 1500):");
for (won, lost, label) in scores {
let perf = calculate_weighted_score(1500.0, 1500.0, won, lost);
println!(" {}: {:.3}", label, perf);
}
}
#[test]
fn test_performance_symmetry() {
// Winner and loser performances should sum to 1.0
let winner_perf = calculate_weighted_score(1500.0, 1500.0, 11, 7);
let loser_perf = calculate_weighted_score(1500.0, 1500.0, 7, 11);
assert!((winner_perf + loser_perf - 1.0).abs() < 0.001);
println!("Performance symmetry: Winner {:.3} + Loser {:.3} = {:.3}",
winner_perf, loser_perf, winner_perf + loser_perf);
}
}