π Statistics with Python
24 topics • Click any card to expand
Descriptive statistics summarize and describe the main features of a dataset. Key measures include central tendency (mean, median, mode) and spread (std, variance, percentiles, IQR). NumPy and scipy.stats provide fast, vectorized implementations.
import numpy as np
from scipy import stats
data = [23, 45, 12, 67, 34, 45, 89, 23, 45, 56, 78, 34, 23, 90, 12]
mean = np.mean(data)
median = np.median(data)
mode = stats.mode(data, keepdims=True)
print(f"Mean: {mean:.2f}")
print(f"Median: {median:.2f}")
print(f"Mode: {mode.mode[0]} (count: {mode.count[0]})")import numpy as np
from scipy import stats
data = np.array([12, 15, 14, 10, 18, 21, 13, 16, 14, 15, 17, 20, 11, 19, 14])
std_pop = np.std(data, ddof=0) # population std
std_samp = np.std(data, ddof=1) # sample std (Bessel's correction)
var_samp = np.var(data, ddof=1)
q1, q3 = np.percentile(data, [25, 75])
iqr = stats.iqr(data)
print(f"Population std: {std_pop:.4f}")
print(f"Sample std: {std_samp:.4f}")
print(f"Sample variance: {var_samp:.4f}")
print(f"Q1={q1}, Q3={q3}, IQR={iqr}")import numpy as np
import pandas as pd
np.random.seed(42)
data = pd.Series(np.random.normal(loc=100, scale=15, size=200))
print("=== Summary Statistics ===")
print(data.describe())
print(f"
Skewness: {data.skew():.4f}")
print(f"Kurtosis: {data.kurtosis():.4f}")
print(f"5th pctl: {data.quantile(0.05):.2f}")
print(f"95th pctl: {data.quantile(0.95):.2f}")import numpy as np
from scipy import stats
np.random.seed(0)
data = np.concatenate([np.random.normal(50, 5, 95), [10, 200, 95, 105, -5]])
q1, q3 = np.percentile(data, [25, 75])
iqr = q3 - q1
lower_fence = q1 - 1.5 * iqr
upper_fence = q3 + 1.5 * iqr
outliers = data[(data < lower_fence) | (data > upper_fence)]
z_scores = np.abs(stats.zscore(data))
print(f"IQR fences: [{lower_fence:.2f}, {upper_fence:.2f}]")
print(f"Outliers (IQR): {sorted(outliers)}")
print(f"Outliers (|z|>3): {data[z_scores > 3].tolist()}")import numpy as np
import pandas as pd
from scipy import stats
np.random.seed(7)
# Simulate systolic BP for 300 patients (some hypertensive outliers)
bp = np.concatenate([
np.random.normal(120, 12, 270), # normal range
np.random.normal(170, 10, 30), # hypertensive group
])
series = pd.Series(bp, name="Systolic_BP")
print("=== Blood Pressure Summary ===")
print(series.describe().round(2))
q1, q3 = np.percentile(bp, [25, 75])
iqr = q3 - q1
print(f"
IQR: {iqr:.2f} mmHg")
print(f"Skewness: {series.skew():.3f}")
high_bp = bp[bp > 140]
print(f"Hypertensive (>140): {len(high_bp)} patients ({100*len(high_bp)/len(bp):.1f}%)")
import numpy as np
from scipy import stats
np.random.seed(123)
daily_sales = np.concatenate([
np.random.normal(5000, 800, 90),
np.random.normal(12000, 500, 10), # exceptional days
])
# TODO: Compute mean, median, std, IQR
# TODO: Find outliers using IQR fence
# TODO: Find outliers using z-score (|z| > 2)
# TODO: Report % of days above 90th percentile
Probability distributions model the likelihood of outcomes. scipy.stats provides a unified interface for continuous distributions (normal, exponential) and discrete distributions (binomial, Poisson) with PDF/PMF, CDF, and random variates.
import numpy as np
from scipy import stats
mu, sigma = 0, 1
dist = stats.norm(loc=mu, scale=sigma)
x_vals = [-2, -1, 0, 1, 2]
print("x PDF CDF")
for x in x_vals:
print(f"{x:+.0f} {dist.pdf(x):.6f} {dist.cdf(x):.6f}")
# Percent-point function (inverse CDF)
print(f"
95th percentile (PPF): {dist.ppf(0.95):.4f}")
print(f"P(-1 < X < 1): {dist.cdf(1) - dist.cdf(-1):.4f}")
print(f"P(-2 < X < 2): {dist.cdf(2) - dist.cdf(-2):.4f}")import numpy as np
from scipy import stats
n, p = 20, 0.3 # 20 trials, 30% success rate
dist = stats.binom(n=n, p=p)
print(f"Binomial(n={n}, p={p})")
print(f"Mean: {dist.mean():.2f}, Std: {dist.std():.2f}")
print()
print("k PMF CDF")
for k in range(0, 11):
print(f"{k:2d} {dist.pmf(k):.6f} {dist.cdf(k):.6f}")
# Probability of at least 8 successes
prob_ge8 = 1 - dist.cdf(7)
print(f"
P(X >= 8): {prob_ge8:.4f}")import numpy as np
from scipy import stats
lam = 4.5 # average events per interval
dist = stats.poisson(mu=lam)
print(f"Poisson(lambda={lam})")
print(f"Mean: {dist.mean()}, Variance: {dist.var()}")
print()
print("k PMF")
for k in range(0, 13):
bar = "#" * int(dist.pmf(k) * 100)
print(f"{k:2d} {dist.pmf(k):.5f} {bar}")
print(f"
P(X <= 3): {dist.cdf(3):.4f}")
print(f"P(X >= 8): {1 - dist.cdf(7):.4f}")import numpy as np
from scipy import stats
# Scale = mean inter-arrival time (1/rate)
scale = 10.0 # average 10 minutes between events
dist = stats.expon(scale=scale)
print(f"Exponential(mean={scale})")
print(f"Mean: {dist.mean()}, Std: {dist.std():.4f}")
print()
# Survival function P(X > t)
for t in [5, 10, 15, 20, 30]:
print(f"P(T > {t:2d} min) = {dist.sf(t):.4f}")
# Memoryless property check
p_gt10 = dist.sf(10)
p_gt20_gt10 = dist.sf(20) / dist.sf(10)
print(f"
P(T>20|T>10) = {p_gt20_gt10:.4f} (should equal P(T>10) = {p_gt10:.4f})")import numpy as np
from scipy import stats
# Poisson: purchases per hour
lam_hourly = 12
pois = stats.poisson(mu=lam_hourly)
print(f"Expected purchases/hour: {pois.mean()}")
print(f"P(>=15 purchases in 1hr): {1 - pois.cdf(14):.4f}")
# Exponential: minutes between purchases
minutes_between = stats.expon(scale=60/lam_hourly)
print(f"
Avg time between purchases: {minutes_between.mean():.2f} min")
print(f"P(wait > 8 min): {minutes_between.sf(8):.4f}")
# Binomial: email campaign conversions
n_emails, p_conv = 200, 0.27
binom = stats.binom(n=n_emails, p=p_conv)
print(f"
Email campaign β n={n_emails}, p={p_conv}")
print(f"Expected conversions: {binom.mean():.1f}")
print(f"P(>=50 conversions): {1 - binom.cdf(49):.4f}")
# Normal: total daily revenue (CLT)
avg_order, std_order, n_orders = 85, 30, 150
rev_dist = stats.norm(loc=avg_order*n_orders, scale=std_order*np.sqrt(n_orders))
print(f"
P(daily revenue > $14000): {rev_dist.sf(14000):.4f}")
import numpy as np
from scipy import stats
np.random.seed(42)
# Simulate wait times (minutes) β true mean = 3 min
wait_times = np.random.exponential(scale=3.0, size=500)
# TODO: Fit exponential distribution with stats.expon.fit()
# TODO: Print fitted scale (mean wait time)
# TODO: Compute P(wait > 5 minutes) using fitted distribution
# TODO: Compute 90th percentile wait time
Random sampling draws a subset from a population. The Central Limit Theorem (CLT) states that the distribution of sample means approaches normality as sample size grows, regardless of population shape. Bootstrap resampling estimates sampling distributions empirically.
import numpy as np
np.random.seed(42)
# Skewed population (exponential)
population = np.random.exponential(scale=5, size=10000)
print(f"Population β mean: {population.mean():.3f}, std: {population.std():.3f}")
print()
# Draw samples of different sizes
for n in [5, 30, 100, 500]:
sample = np.random.choice(population, size=n, replace=False)
print(f"n={n:4d} β sample mean: {sample.mean():.3f}, sample std: {sample.std(ddof=1):.3f}")import numpy as np
from scipy import stats
np.random.seed(0)
# Highly skewed population
population = np.random.exponential(scale=5, size=100000)
pop_mean, pop_std = population.mean(), population.std()
n_simulations = 3000
for n in [5, 10, 30, 100]:
sample_means = [np.mean(np.random.choice(population, n, replace=False))
for _ in range(n_simulations)]
sm = np.array(sample_means)
# By CLT, sampling dist should be normal
w_stat, p_val = stats.shapiro(sm[:200])
print(f"n={n:3d}: mean of means={sm.mean():.3f}, "
f"std={sm.std():.3f} (theory={pop_std/np.sqrt(n):.3f}), "
f"Shapiro p={p_val:.4f}")import numpy as np
np.random.seed(7)
sample = np.random.lognormal(mean=2, sigma=0.5, size=50)
n_boot = 5000
boot_means = np.array([
np.mean(np.random.choice(sample, size=len(sample), replace=True))
for _ in range(n_boot)
])
print(f"Original sample mean: {sample.mean():.4f}")
print(f"Bootstrap mean: {boot_means.mean():.4f}")
print(f"Bootstrap std error: {boot_means.std():.4f}")
print(f"Bootstrap 95% CI: [{np.percentile(boot_means,2.5):.4f}, {np.percentile(boot_means,97.5):.4f}]")import numpy as np
import pandas as pd
np.random.seed(1)
n = 1000
df = pd.DataFrame({
"age_group": np.random.choice(["18-34","35-54","55+"], size=n, p=[0.4,0.35,0.25]),
"score": np.random.normal(70, 10, n),
})
# Proportional stratified sample (10%)
sample_rate = 0.10
stratified = (df.groupby("age_group", group_keys=False)
.apply(lambda g: g.sample(frac=sample_rate, random_state=42)))
print("Stratified sample composition:")
print(stratified["age_group"].value_counts())
print(f"
Pop mean: {df['score'].mean():.3f}")
print(f"Sample mean: {stratified['score'].mean():.3f}")import numpy as np
import pandas as pd
np.random.seed(42)
true_props = {"North":0.52, "South":0.44, "East":0.55, "West":0.48}
region_sizes = {"North":3000, "South":4500, "East":2000, "West":2500}
# Simulate full voter database
rows = []
for region, size in region_sizes.items():
votes = np.random.binomial(1, true_props[region], size)
rows.extend(zip([region]*size, votes))
population = pd.DataFrame(rows, columns=["region","prefer_A"])
# Stratified sample: 100 per region
sample = (population.groupby("region", group_keys=False)
.apply(lambda g: g.sample(n=100, random_state=42)))
overall_pct = sample["prefer_A"].mean()
print(f"Sample support for A: {overall_pct:.3f}")
# Bootstrap margin of error
n_boot = 4000
boot = [np.random.choice(sample["prefer_A"], size=len(sample), replace=True).mean()
for _ in range(n_boot)]
boot = np.array(boot)
moe = np.percentile(boot, 97.5) - np.percentile(boot, 2.5)
print(f"Bootstrap 95% CI: [{np.percentile(boot,2.5):.3f}, {np.percentile(boot,97.5):.3f}]")
print(f"Margin of error: Β±{moe/2:.3f}")
import numpy as np
np.random.seed(99)
n_simulations = 2000
pop_std_uniform = np.sqrt(1/12) # std of Uniform(0,1)
for n in [2, 10, 30, 100]:
# TODO: Draw n_simulations samples of size n from Uniform(0,1)
# TODO: Compute sample means array
# TODO: Print mean of sample means, empirical SE, theoretical SE
pass
A confidence interval (CI) gives a range of plausible values for a population parameter. A 95% CI means that if we repeated the study many times, 95% of the intervals would contain the true parameter. scipy.stats provides t-intervals; bootstrap CIs require no distributional assumptions.
import numpy as np
from scipy import stats
np.random.seed(42)
sample = np.random.normal(loc=50, scale=8, size=30)
n = len(sample)
mean = sample.mean()
se = stats.sem(sample) # standard error of mean
# 90%, 95%, 99% confidence intervals
for conf in [0.90, 0.95, 0.99]:
lo, hi = stats.t.interval(conf, df=n-1, loc=mean, scale=se)
print(f"{int(conf*100)}% CI: ({lo:.3f}, {hi:.3f}) width={hi-lo:.3f}")import numpy as np
from scipy import stats
# 143 successes out of 250 trials
successes, n = 143, 250
p_hat = successes / n
se = np.sqrt(p_hat * (1 - p_hat) / n)
for conf in [0.90, 0.95, 0.99]:
z = stats.norm.ppf((1 + conf) / 2)
lo = p_hat - z * se
hi = p_hat + z * se
print(f"{int(conf*100)}% CI for proportion: ({lo:.4f}, {hi:.4f})")
# Wilson score interval (better for extreme p)
from scipy.stats import proportion_confint
lo_w, hi_w = proportion_confint(successes, n, alpha=0.05, method="wilson")
print(f"
Wilson 95% CI: ({lo_w:.4f}, {hi_w:.4f})")import numpy as np
np.random.seed(0)
data = np.random.lognormal(2, 0.8, size=80)
# Percentile bootstrap CI for the median
n_boot = 5000
boot_medians = np.array([
np.median(np.random.choice(data, size=len(data), replace=True))
for _ in range(n_boot)
])
lo_p, hi_p = np.percentile(boot_medians, [2.5, 97.5])
print(f"Sample median: {np.median(data):.4f}")
print(f"Bootstrap 95% CI: ({lo_p:.4f}, {hi_p:.4f})")
print(f"Bootstrap std error: {boot_medians.std():.4f}")import numpy as np
from scipy import stats
np.random.seed(5)
group_a = np.random.normal(75, 10, 40)
group_b = np.random.normal(70, 12, 35)
# Welch t-test gives CI for difference in means
t_stat, p_val = stats.ttest_ind(group_a, group_b, equal_var=False)
diff = group_a.mean() - group_b.mean()
# Manual CI for difference
se_diff = np.sqrt(group_a.var(ddof=1)/len(group_a) +
group_b.var(ddof=1)/len(group_b))
df = len(group_a) + len(group_b) - 2
lo, hi = diff - stats.t.ppf(0.975, df)*se_diff, diff + stats.t.ppf(0.975, df)*se_diff
print(f"Group A mean: {group_a.mean():.3f}")
print(f"Group B mean: {group_b.mean():.3f}")
print(f"Difference: {diff:.3f}")
print(f"95% CI for diff: ({lo:.3f}, {hi:.3f})")
print(f"t={t_stat:.3f}, p={p_val:.4f}")import numpy as np
from scipy import stats
from scipy.stats import proportion_confint
np.random.seed(42)
# Reduction in systolic BP (mmHg) for 45 patients
reduction = np.random.normal(loc=11.5, scale=5.2, size=45)
mean_red = reduction.mean()
se = stats.sem(reduction)
lo, hi = stats.t.interval(0.95, df=len(reduction)-1, loc=mean_red, scale=se)
print(f"Mean reduction: {mean_red:.2f} mmHg")
print(f"95% t-CI: ({lo:.2f}, {hi:.2f}) mmHg")
# Proportion achieving > 10 mmHg reduction
successes = np.sum(reduction > 10)
lo_w, hi_w = proportion_confint(successes, len(reduction), alpha=0.05, method="wilson")
print(f"
Responders (>10 mmHg): {successes}/{len(reduction)} ({successes/len(reduction):.1%})")
print(f"Wilson 95% CI: ({lo_w:.3f}, {hi_w:.3f})")
# Bootstrap CI for median reduction (subgroup)
boot_med = np.array([np.median(np.random.choice(reduction, len(reduction), replace=True))
for _ in range(4000)])
print(f"
Bootstrap 95% CI (median): ({np.percentile(boot_med,2.5):.2f}, {np.percentile(boot_med,97.5):.2f})")
import numpy as np
from scipy import stats
np.random.seed(77)
# TODO: Sample 1 (n=20) from Normal(60,8) β compute 95% t-CI
# TODO: Sample 2 (n=100) from Normal(60,8) β compute 95% t-CI
# TODO: Sample 3 (n=40) from Exponential(scale=5) β compute both t-CI and bootstrap CI
# TODO: Compare widths and discuss
Hypothesis testing provides a framework for making statistical decisions. A null hypothesis (H0) is tested against an alternative. The p-value is the probability of observing data at least as extreme as the sample if H0 is true. scipy.stats provides t-tests, with significance typically judged at alpha=0.05.
import numpy as np
from scipy import stats
np.random.seed(42)
# H0: population mean = 100; H1: mean != 100
sample = np.random.normal(loc=103, scale=12, size=35)
t_stat, p_val = stats.ttest_1samp(sample, popmean=100)
print(f"Sample mean: {sample.mean():.4f}")
print(f"t-statistic: {t_stat:.4f}")
print(f"p-value: {p_val:.4f}")
print()
alpha = 0.05
conclusion = "Reject H0" if p_val < alpha else "Fail to reject H0"
print(f"At alpha={alpha}: {conclusion}")
print(f"Evidence that mean != 100: {'Yes' if p_val < alpha else 'No'}")import numpy as np
from scipy import stats
np.random.seed(7)
control = np.random.normal(50, 8, 40)
treatment = np.random.normal(55, 9, 38)
# Levene test for equal variances
lev_stat, lev_p = stats.levene(control, treatment)
equal_var = lev_p > 0.05
t_stat, p_val = stats.ttest_ind(control, treatment, equal_var=equal_var)
print(f"Control: n={len(control)}, mean={control.mean():.3f}, std={control.std(ddof=1):.3f}")
print(f"Treatment: n={len(treatment)}, mean={treatment.mean():.3f}, std={treatment.std(ddof=1):.3f}")
print(f"
Levene p={lev_p:.4f} β equal_var={equal_var}")
print(f"t={t_stat:.4f}, p={p_val:.4f}")
print(f"Result: {'Significant difference' if p_val < 0.05 else 'No significant difference'}")import numpy as np
from scipy import stats
np.random.seed(3)
n = 25
before = np.random.normal(130, 10, n)
after = before - np.random.normal(8, 5, n) # intervention reduces by ~8
t_stat, p_val = stats.ttest_rel(before, after)
diff = before - after
print(f"Before: {before.mean():.2f} Β± {before.std(ddof=1):.2f}")
print(f"After: {after.mean():.2f} Β± {after.std(ddof=1):.2f}")
print(f"Mean diff: {diff.mean():.2f} Β± {diff.std(ddof=1):.2f}")
print(f"
t={t_stat:.4f}, p={p_val:.6f}")
print(f"One-tailed p (before > after): {p_val/2:.6f}")import numpy as np
from scipy import stats
# Simulate Type I error rate (false positives under H0)
np.random.seed(0)
alpha = 0.05
n_simulations = 10000
n_per_group = 30
# Under H0: both groups have same mean
false_positives = 0
for _ in range(n_simulations):
g1 = np.random.normal(50, 10, n_per_group)
g2 = np.random.normal(50, 10, n_per_group)
_, p = stats.ttest_ind(g1, g2)
if p < alpha:
false_positives += 1
print(f"Simulated Type I error rate: {false_positives/n_simulations:.4f} (expected ~{alpha})")
# Under H1: real effect of size 0.5 SD
true_positives = 0
for _ in range(n_simulations):
g1 = np.random.normal(50, 10, n_per_group)
g2 = np.random.normal(55, 10, n_per_group)
_, p = stats.ttest_ind(g1, g2)
if p < alpha:
true_positives += 1
print(f"Simulated Power: {true_positives/n_simulations:.4f}")import numpy as np
from scipy import stats
np.random.seed(42)
# Simulate session revenues (in $) for each group
n_ctrl, n_trt = 500, 500
control = np.random.lognormal(mean=3.5, sigma=1.2, size=n_ctrl)
treatment = np.random.lognormal(mean=3.65, sigma=1.2, size=n_trt)
print("=== A/B Test: Checkout Page Revenue ===")
print(f"Control: n={n_ctrl}, mean=${control.mean():.2f}, median=${np.median(control):.2f}")
print(f"Treatment: n={n_trt}, mean=${treatment.mean():.2f}, median=${np.median(treatment):.2f}")
# Variance equality
lev_stat, lev_p = stats.levene(control, treatment)
print(f"
Levene test: F={lev_stat:.3f}, p={lev_p:.4f}")
equal_var = lev_p > 0.05
# Welch t-test
t_stat, p_val = stats.ttest_ind(control, treatment, equal_var=equal_var)
print(f"Welch t-test: t={t_stat:.3f}, p={p_val:.4f}")
print(f"Decision (alpha=0.05): {'Reject H0 β new page works' if p_val < 0.05 else 'Fail to reject H0'}")
uplift = (treatment.mean() - control.mean()) / control.mean() * 100
print(f"Revenue uplift: {uplift:.1f}%")
import numpy as np
from scipy import stats
np.random.seed(11)
deliveries = np.random.normal(loc=3.4, scale=0.8, size=40)
morning = np.random.normal(loc=3.2, scale=0.7, size=25)
afternoon = np.random.normal(loc=3.6, scale=0.9, size=25)
# TODO: One-sample t-test: H0: mean = 3.0 days
# TODO: Two-sample t-test: morning vs afternoon
# TODO: Print t, p, and conclusion for each
ANOVA (Analysis of Variance) tests whether means differ across three or more groups simultaneously, avoiding inflated Type I errors from multiple pairwise t-tests. One-way ANOVA requires normality and equal variances; Kruskal-Wallis is the non-parametric alternative. Post-hoc tests identify which specific pairs differ.
import numpy as np
from scipy import stats
np.random.seed(42)
group_a = np.random.normal(20, 4, 30)
group_b = np.random.normal(24, 4, 30)
group_c = np.random.normal(22, 4, 30)
group_d = np.random.normal(20, 4, 30)
f_stat, p_val = stats.f_oneway(group_a, group_b, group_c, group_d)
for name, grp in zip("ABCD", [group_a,group_b,group_c,group_d]):
print(f"Group {name}: mean={grp.mean():.3f}, std={grp.std(ddof=1):.3f}")
print(f"
F-statistic: {f_stat:.4f}")
print(f"p-value: {p_val:.4f}")
print(f"Result: {'Significant difference between groups' if p_val < 0.05 else 'No significant difference'}")import numpy as np
from scipy import stats
np.random.seed(0)
g1 = np.random.normal(10, 2, 25)
g2 = np.random.normal(12, 3, 25)
g3 = np.random.normal(11, 2, 25)
# Homogeneity of variance
lev_stat, lev_p = stats.levene(g1, g2, g3)
print(f"Levene test: F={lev_stat:.3f}, p={lev_p:.4f}")
print(f" Equal variances assumed: {lev_p > 0.05}")
# Normality within each group
for i, g in enumerate([g1,g2,g3],1):
w, p = stats.shapiro(g)
print(f"Shapiro-Wilk Group {i}: W={w:.4f}, p={p:.4f} β {'Normal' if p>0.05 else 'Non-normal'}")import numpy as np
from scipy import stats
np.random.seed(5)
# Skewed groups β ANOVA assumptions violated
g1 = np.random.exponential(2, 30)
g2 = np.random.exponential(3, 30)
g3 = np.random.exponential(2.5, 30)
kw_stat, kw_p = stats.kruskal(g1, g2, g3)
f_stat, f_p = stats.f_oneway(g1, g2, g3)
print("Group medians:")
for i, g in enumerate([g1,g2,g3],1):
print(f" Group {i}: median={np.median(g):.3f}")
print(f"
Kruskal-Wallis: H={kw_stat:.4f}, p={kw_p:.4f}")
print(f"One-way ANOVA: F={f_stat:.4f}, p={f_p:.4f}")import numpy as np
from scipy import stats
from itertools import combinations
np.random.seed(9)
groups = {
"Control": np.random.normal(50, 8, 30),
"Drug A": np.random.normal(58, 8, 30),
"Drug B": np.random.normal(55, 8, 30),
"Placebo": np.random.normal(51, 8, 30),
}
f_stat, p_val = stats.f_oneway(*groups.values())
print(f"ANOVA: F={f_stat:.3f}, p={p_val:.4f}")
# Pairwise t-tests with Bonferroni correction
names = list(groups.keys())
pairs = list(combinations(names, 2))
alpha_corr = 0.05 / len(pairs) # Bonferroni
print(f"
Bonferroni alpha: {alpha_corr:.4f} ({len(pairs)} comparisons)")
for n1, n2 in pairs:
t, p = stats.ttest_ind(groups[n1], groups[n2])
sig = "*" if p < alpha_corr else ""
print(f" {n1} vs {n2}: p={p:.4f} {sig}")import numpy as np
from scipy import stats
from itertools import combinations
np.random.seed(42)
diets = {
"Mediterranean": np.random.normal(5.2, 2.1, 35),
"Keto": np.random.normal(7.1, 2.5, 35),
"Vegan": np.random.normal(4.8, 1.9, 35),
"Low-Fat": np.random.normal(4.1, 2.0, 35),
}
print("=== Diet Weight Loss Study (kg) ===")
for diet, vals in diets.items():
print(f"{diet:15s}: mean={vals.mean():.2f}, std={vals.std(ddof=1):.2f}")
f_stat, p_val = stats.f_oneway(*diets.values())
print(f"
ANOVA: F={f_stat:.3f}, p={p_val:.4f}")
if p_val < 0.05:
print("Significant β running post-hoc tests")
names = list(diets.keys())
pairs = list(combinations(names, 2))
bonf = 0.05 / len(pairs)
print(f"Bonferroni alpha: {bonf:.4f}")
for n1, n2 in pairs:
_, p = stats.ttest_ind(diets[n1], diets[n2])
print(f" {n1:15s} vs {n2:15s}: p={p:.4f} {'*' if p<bonf else ''}")
import numpy as np
from scipy import stats
from itertools import combinations
np.random.seed(55)
lecture = np.random.normal(72, 10, 30)
active = np.random.normal(78, 10, 30)
online = np.random.normal(74, 10, 30)
# TODO: One-way ANOVA
# TODO: If significant, Bonferroni post-hoc
# TODO: Kruskal-Wallis test
# TODO: Compare ANOVA vs Kruskal-Wallis conclusions
Chi-square tests assess relationships between categorical variables. The goodness-of-fit test checks if observed frequencies match expected. The test of independence checks if two categorical variables are related. scipy.stats.chi2_contingency handles contingency tables directly.
import numpy as np
from scipy import stats
# Test if a die is fair (expected: equal frequency for each face)
observed = np.array([48, 55, 52, 60, 45, 40]) # 300 rolls
expected_freq = 300 / 6
expected = np.full(6, expected_freq)
chi2, p_val = stats.chisquare(f_obs=observed, f_exp=expected)
df = len(observed) - 1
print("Die Roll Test (300 rolls)")
print(f"Observed: {observed}")
print(f"Expected: {expected}")
print(f"
Chi2 statistic: {chi2:.4f}")
print(f"Degrees of freedom: {df}")
print(f"p-value: {p_val:.4f}")
print(f"Die is {'NOT fair' if p_val < 0.05 else 'fair (fail to reject H0)'}")import numpy as np
from scipy import stats
import pandas as pd
# Customer satisfaction by region
data = {
"Satisfied": {"North": 120, "South": 85, "East": 95, "West": 110},
"Neutral": {"North": 40, "South": 35, "East": 42, "West": 38},
"Unsatisfied": {"North": 20, "South": 30, "East": 23, "West": 22},
}
table = pd.DataFrame(data).T
print("Contingency Table:")
print(table)
chi2, p_val, dof, expected = stats.chi2_contingency(table)
print(f"
Chi2={chi2:.4f}, df={dof}, p={p_val:.4f}")
print(f"Satisfaction independent of region: {p_val > 0.05}")import numpy as np
from scipy import stats
# 2x2 contingency: Gender vs. Purchase
table = np.array([[230, 170], # Male: bought, did not buy
[195, 205]]) # Female: bought, did not buy
chi2, p_val, dof, expected = stats.chi2_contingency(table)
n = table.sum()
k = min(table.shape) - 1
cramers_v = np.sqrt(chi2 / (n * k))
print(f"Chi2={chi2:.4f}, df={dof}, p={p_val:.4f}")
print(f"n={n}, Cramer's V={cramers_v:.4f}")
print(f"Effect size: {'small' if cramers_v < 0.1 else 'medium' if cramers_v < 0.3 else 'large'}")import numpy as np
from scipy import stats
# Small sample β use Fisher's exact test
# Rare disease by treatment
table = np.array([[5, 15],
[2, 18]])
chi2, p_chi, dof, expected = stats.chi2_contingency(table)
oddsratio, p_fisher = stats.fisher_exact(table)
print("Observed:")
print(table)
print(f"
Expected frequencies:")
print(expected.round(2))
print(f"
Chi2 p-value: {p_chi:.4f} (valid if all expected >= 5)")
print(f"Fisher p-value: {p_fisher:.4f} (preferred for small samples)")
print(f"Odds ratio: {oddsratio:.4f}")import numpy as np
import pandas as pd
from scipy import stats
np.random.seed(42)
# Contingency table: age group vs payment method
table = np.array([
[45, 130, 125], # 18-34: Cash, Card, Mobile
[80, 155, 65], # 35-54
[95, 120, 25], # 55+
])
row_labels = ["18-34", "35-54", "55+"]
col_labels = ["Cash", "Card", "Mobile"]
df_table = pd.DataFrame(table, index=row_labels, columns=col_labels)
print("Payment Method by Age Group:")
print(df_table)
print(f"
Row totals: {df_table.sum(axis=1).values}")
chi2, p_val, dof, expected = stats.chi2_contingency(table)
n = table.sum()
cramers_v = np.sqrt(chi2 / (n * (min(table.shape)-1)))
print(f"
Chi2={chi2:.3f}, df={dof}, p={p_val:.4f}")
print(f"Cramer's V = {cramers_v:.4f} ({'small' if cramers_v<0.1 else 'medium' if cramers_v<0.3 else 'large'} effect)")
print(f"Association between age and payment method: {'Yes' if p_val<0.05 else 'No'}")
import numpy as np
from scipy import stats
# Control: 180 converted, 720 did not
# Treatment: 210 converted, 690 did not
table = np.array([
[180, 720],
[210, 690],
])
# TODO: Run chi2_contingency
# TODO: Compute and interpret Cramer's V
# TODO: Also run Fisher's exact as a check
Correlation measures the linear (Pearson) or monotonic (Spearman) relationship between two variables, ranging from -1 to +1. Simple linear regression models one variable as a linear function of another. scipy.stats.linregress provides slope, intercept, r-value, and p-value for the slope.
import numpy as np
from scipy import stats
np.random.seed(42)
x = np.random.uniform(0, 10, 50)
y = 2.5 * x + np.random.normal(0, 3, 50) # linear + noise
pearson_r, pearson_p = stats.pearsonr(x, y)
spearman_r, spearman_p = stats.spearmanr(x, y)
print(f"Pearson r: {pearson_r:.4f} (p={pearson_p:.4f})")
print(f"Spearman r: {spearman_r:.4f} (p={spearman_p:.4f})")
# Correlation matrix with pandas
import pandas as pd
df = pd.DataFrame({"x":x, "y":y, "z": y**0.5})
print("
Correlation matrix:")
print(df.corr(method="pearson").round(4))import numpy as np
from scipy import stats
np.random.seed(0)
hours_studied = np.random.uniform(1, 10, 60)
exam_score = 50 + 4.5 * hours_studied + np.random.normal(0, 5, 60)
result = stats.linregress(hours_studied, exam_score)
print(f"slope: {result.slope:.4f}")
print(f"intercept: {result.intercept:.4f}")
print(f"r-value: {result.rvalue:.4f}")
print(f"r-squared: {result.rvalue**2:.4f}")
print(f"p-value: {result.pvalue:.6f}")
print(f"std error: {result.stderr:.4f}")
# Prediction
pred_8hrs = result.slope * 8 + result.intercept
print(f"
Predicted score for 8 hrs: {pred_8hrs:.2f}")import numpy as np
from scipy import stats
np.random.seed(3)
x = np.linspace(0, 10, 80)
y = 3 + 2*x - 0.15*x**2 + np.random.normal(0, 2, 80)
# Linear fit
coef1 = np.polyfit(x, y, 1)
y_hat1 = np.polyval(coef1, x)
ss_res1 = np.sum((y - y_hat1)**2)
ss_tot = np.sum((y - y.mean())**2)
r2_lin = 1 - ss_res1/ss_tot
# Quadratic fit
coef2 = np.polyfit(x, y, 2)
y_hat2 = np.polyval(coef2, x)
ss_res2 = np.sum((y - y_hat2)**2)
r2_quad = 1 - ss_res2/ss_tot
print(f"Linear fit: RΒ² = {r2_lin:.4f}")
print(f"Quadratic fit: RΒ² = {r2_quad:.4f}")
print(f"Quadratic coefficients: {coef2.round(4)}")import numpy as np
from scipy import stats
np.random.seed(1)
x = np.random.uniform(0, 20, 50)
y = 5 + 1.8 * x + np.random.normal(0, 4, 50)
result = stats.linregress(x, y)
y_pred = result.slope * x + result.intercept
residuals = y - y_pred
# Standardized residuals
std_res = (residuals - residuals.mean()) / residuals.std(ddof=1)
outlier_mask = np.abs(std_res) > 2
print(f"n={len(x)}, RΒ²={result.rvalue**2:.4f}")
print(f"Residual std: {residuals.std(ddof=1):.4f}")
print(f"Outliers (|std resid|>2): {outlier_mask.sum()}")
print(f"Expected outliers ~5%: {int(0.05*len(x))}")
# Durbin-Watson statistic (autocorrelation in residuals)
diffs = np.diff(residuals)
dw = np.sum(diffs**2) / np.sum(residuals**2)
print(f"Durbin-Watson: {dw:.4f} (2=no autocorr, <2=positive, >2=negative)")import numpy as np
import pandas as pd
from scipy import stats
np.random.seed(42)
# Simulate house data
sqft = np.random.uniform(800, 3500, 150)
price = 50000 + 180 * sqft + np.random.normal(0, 25000, 150)
# Add a few luxury outliers
price[:5] += 200000
df = pd.DataFrame({"sqft": sqft, "price": price})
pearson_r, pearson_p = stats.pearsonr(df["sqft"], df["price"])
print(f"Pearson r = {pearson_r:.4f} (p={pearson_p:.2e})")
result = stats.linregress(df["sqft"], df["price"])
print(f"
Regression: price = {result.slope:.2f} * sqft + {result.intercept:,.0f}")
print(f"RΒ² = {result.rvalue**2:.4f}")
# Predict 2000 sqft house
pred = result.slope * 2000 + result.intercept
print(f"Predicted price (2000 sqft): ${pred:,.0f}")
# Residual analysis
residuals = df["price"] - (result.slope * df["sqft"] + result.intercept)
std_res = (residuals - residuals.mean()) / residuals.std(ddof=1)
print(f"
Outlier properties (|std resid|>2.5): {(np.abs(std_res)>2.5).sum()}")
import numpy as np
from scipy import stats
np.random.seed(88)
ad_spend = np.random.uniform(1000, 10000, 52) # weekly ad spend
sales = 2000 + 3.5 * ad_spend + np.random.normal(0, 3000, 52)
# TODO: Compute Pearson and Spearman correlations
# TODO: Fit linear regression with scipy.stats.linregress
# TODO: Print slope, intercept, RΒ²
# TODO: Predict sales for $7500 ad spend
Non-parametric tests make no assumptions about the underlying distribution. They operate on ranks rather than raw values, making them robust to outliers and skewed data. Key tests include Mann-Whitney U (two independent groups), Wilcoxon signed-rank (paired), and Shapiro-Wilk (normality check).
import numpy as np
from scipy import stats
np.random.seed(42)
normal_data = np.random.normal(50, 10, 50)
skewed_data = np.random.exponential(scale=5, size=50)
uniform_data = np.random.uniform(0, 100, 50)
print("Shapiro-Wilk Normality Test")
print(f"{'Dataset':<15} {'W-stat':>8} {'p-value':>9} {'Normal?':>8}")
for name, data in [("Normal", normal_data), ("Exponential", skewed_data), ("Uniform", uniform_data)]:
w, p = stats.shapiro(data)
print(f"{name:<15} {w:>8.4f} {p:>9.4f} {str(p>0.05):>8}")import numpy as np
from scipy import stats
np.random.seed(7)
# Skewed distributions β t-test assumptions violated
group1 = np.random.exponential(scale=3, size=35)
group2 = np.random.exponential(scale=4, size=38)
# Parametric (t-test)
t_stat, t_p = stats.ttest_ind(group1, group2)
# Non-parametric
u_stat, mw_p = stats.mannwhitneyu(group1, group2, alternative="two-sided")
print(f"Group 1: median={np.median(group1):.3f}, mean={group1.mean():.3f}")
print(f"Group 2: median={np.median(group2):.3f}, mean={group2.mean():.3f}")
print(f"
Welch t-test: t={t_stat:.3f}, p={t_p:.4f}")
print(f"Mann-Whitney U: U={u_stat:.1f}, p={mw_p:.4f}")
# Rank-biserial correlation (effect size)
r_rb = 1 - (2*u_stat) / (len(group1)*len(group2))
print(f"Rank-biserial r: {r_rb:.4f}")import numpy as np
from scipy import stats
np.random.seed(3)
n = 30
before = np.random.exponential(scale=8, size=n)
after = before * np.random.uniform(0.6, 0.95, n) # improvement
# Parametric paired t-test
t_stat, t_p = stats.ttest_rel(before, after)
# Non-parametric
w_stat, w_p = stats.wilcoxon(before, after)
print(f"Before: median={np.median(before):.3f}")
print(f"After: median={np.median(after):.3f}")
print(f"
Paired t-test: t={t_stat:.3f}, p={t_p:.4f}")
print(f"Wilcoxon signed-rank: W={w_stat:.1f}, p={w_p:.4f}")import numpy as np
from scipy import stats
np.random.seed(0)
data = np.random.normal(5, 2, 100)
# KS test: compare to normal distribution
ks_stat, ks_p = stats.kstest(data, "norm", args=(data.mean(), data.std()))
print(f"K-S test vs Normal: D={ks_stat:.4f}, p={ks_p:.4f}")
# Anderson-Darling test
ad_result = stats.anderson(data, dist="norm")
print(f"
Anderson-Darling: statistic={ad_result.statistic:.4f}")
for sl, cv in zip(ad_result.significance_level, ad_result.critical_values):
reject = ad_result.statistic > cv
print(f" alpha={sl:5.1f}%: critical={cv:.3f}, reject H0={reject}")
# Two-sample KS test
data2 = np.random.normal(5.5, 2, 100)
ks2_stat, ks2_p = stats.ks_2samp(data, data2)
print(f"
Two-sample KS test: D={ks2_stat:.4f}, p={ks2_p:.4f}")import numpy as np
from scipy import stats
np.random.seed(42)
n = 40
# Pain scores (0-10, skewed toward lower end)
before = np.random.beta(a=2, b=1.5, size=n) * 10
after = before * np.random.uniform(0.4, 0.85, n)
# Normality check
wb, pb = stats.shapiro(before)
wa, pa = stats.shapiro(after)
print("Shapiro-Wilk normality test:")
print(f" Before: W={wb:.4f}, p={pb:.4f} β {'Normal' if pb>0.05 else 'Non-normal'}")
print(f" After: W={wa:.4f}, p={pa:.4f} β {'Normal' if pa>0.05 else 'Non-normal'}")
# Wilcoxon signed-rank (paired, non-parametric)
w_stat, w_p = stats.wilcoxon(before, after)
print(f"
Wilcoxon signed-rank: W={w_stat:.1f}, p={w_p:.4f}")
print(f"Median reduction: {np.median(before-after):.2f} points")
# Independent cohort comparison
cohort_a = np.random.beta(2, 1.5, 40) * 10
cohort_b = np.random.beta(1.5, 2, 40) * 10
u_stat, mw_p = stats.mannwhitneyu(cohort_a, cohort_b, alternative="two-sided")
print(f"
Mann-Whitney U (cohort comparison): U={u_stat:.1f}, p={mw_p:.4f}")
import numpy as np
from scipy import stats
np.random.seed(22)
group_a = np.random.normal(50, 10, 40)
group_b = np.random.exponential(scale=5, size=40)
group_c = np.random.uniform(0, 100, 40)
# TODO: Shapiro-Wilk for each group
# TODO: For A vs B: t-test and Mann-Whitney
# TODO: For A vs C: t-test and Mann-Whitney
# TODO: Discuss which test is appropriate for each pair
Effect size quantifies the practical significance of a result, independent of sample size. Cohen's d measures standardized mean differences. Statistical power (1 - beta) is the probability of detecting a true effect. Power analysis determines the sample size needed to achieve adequate power (typically 0.80).
import numpy as np
from scipy import stats
np.random.seed(42)
group1 = np.random.normal(50, 10, 100)
group2 = np.random.normal(56, 10, 100)
# Pooled Cohen's d
mean_diff = group1.mean() - group2.mean()
pooled_std = np.sqrt((group1.var(ddof=1) + group2.var(ddof=1)) / 2)
cohens_d = abs(mean_diff) / pooled_std
print(f"Group 1 mean: {group1.mean():.3f}")
print(f"Group 2 mean: {group2.mean():.3f}")
print(f"Pooled std: {pooled_std:.3f}")
print(f"Cohen's d: {cohens_d:.4f}")
magnitude = ("small" if cohens_d < 0.2 else
"small-medium" if cohens_d < 0.5 else
"medium" if cohens_d < 0.8 else "large")
print(f"Effect size: {magnitude}")
_, p_val = stats.ttest_ind(group1, group2)
print(f"p-value: {p_val:.4f}")from statsmodels.stats.power import TTestPower
analysis = TTestPower()
# Given: effect size d=0.5, alpha=0.05, power=0.80
n_needed = analysis.solve_power(effect_size=0.5, alpha=0.05, power=0.80, alternative="two-sided")
print(f"Sample size for d=0.5, power=0.80: n = {n_needed:.1f} β {int(np.ceil(n_needed))}")
# Power curve: how does power change with n?
import numpy as np
print("
Power by sample size (d=0.5, alpha=0.05):")
for n in [10, 20, 30, 50, 80, 100, 150, 200]:
pwr = analysis.solve_power(effect_size=0.5, nobs=n, alpha=0.05, alternative="two-sided")
bar = "#" * int(pwr * 20)
print(f" n={n:4d}: power={pwr:.3f} {bar}")import numpy as np
from statsmodels.stats.power import TTestIndPower
analysis = TTestIndPower()
# Sample sizes for various effect sizes
print("Required n per group (alpha=0.05, power=0.80):")
for d in [0.2, 0.3, 0.5, 0.8, 1.0, 1.5]:
n = analysis.solve_power(effect_size=d, alpha=0.05, power=0.80, alternative="two-sided")
print(f" d={d}: n = {int(np.ceil(n))} per group (total={int(np.ceil(n))*2})")
# How much power do we have with n=30 per group?
print("
Power with n=30 per group (alpha=0.05):")
for d in [0.2, 0.5, 0.8]:
pwr = analysis.solve_power(effect_size=d, nobs1=30, alpha=0.05, alternative="two-sided")
print(f" d={d}: power={pwr:.4f}")import numpy as np
from scipy import stats
np.random.seed(0)
n_sim = 5000
alpha = 0.05
n = 40
effect = 0.5 # Cohen's d
# Empirical power
rejections = 0
for _ in range(n_sim):
g1 = np.random.normal(0, 1, n)
g2 = np.random.normal(effect, 1, n)
_, p = stats.ttest_ind(g1, g2)
if p < alpha:
rejections += 1
emp_power = rejections / n_sim
# Analytical power for comparison
from statsmodels.stats.power import TTestIndPower
analytical = TTestIndPower().solve_power(effect_size=effect, nobs1=n, alpha=alpha)
print(f"n={n} per group, d={effect}, alpha={alpha}")
print(f"Empirical power: {emp_power:.4f}")
print(f"Analytical power: {analytical:.4f}")
print(f"Type II error (Ξ²): {1-emp_power:.4f}")import numpy as np
from scipy import stats
from statsmodels.stats.power import TTestIndPower
np.random.seed(42)
# Business context: expected improvement in days-to-purchase
current_mean, new_mean, common_std = 12.0, 10.5, 4.0
effect_d = (current_mean - new_mean) / common_std
print(f"=== A/B Test Power Planning ===")
print(f"Expected effect size (Cohen's d): {effect_d:.4f}")
analysis = TTestIndPower()
n_needed = analysis.solve_power(effect_size=effect_d, alpha=0.05, power=0.80)
print(f"Required n per arm: {int(np.ceil(n_needed))}")
print(f"Total sample size: {int(np.ceil(n_needed))*2}")
# Simulate the actual experiment
n_actual = int(np.ceil(n_needed))
control = np.random.normal(current_mean, common_std, n_actual)
treatment = np.random.normal(new_mean, common_std, n_actual)
t_stat, p_val = stats.ttest_ind(control, treatment)
mean_diff = control.mean() - treatment.mean()
pooled_std = np.sqrt((control.var(ddof=1) + treatment.var(ddof=1)) / 2)
obs_d = mean_diff / pooled_std
print(f"
Simulated experiment results:")
print(f"Observed Cohen's d: {obs_d:.4f}")
print(f"p-value: {p_val:.4f}")
print(f"Decision: {'Reject H0 β new flow is better' if p_val<0.05 else 'Fail to reject H0'}")
import numpy as np
from scipy import stats
from statsmodels.stats.power import TTestIndPower
expected_diff = 2.0
pooled_std = 5.0
# TODO: Compute Cohen's d
# TODO: TTestIndPower: find n for power = 0.80, 0.85, 0.90
# TODO: Simulate 3000 trials with n from 80% power calculation
# TODO: Report empirical power
Update beliefs with evidence using Bayes' theorem. Compute posterior distributions, credible intervals, and compare Bayesian vs frequentist approaches.
import numpy as np
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
# Prior: uniform (Beta(1,1)) -> update with 7 heads out of 10 flips
heads, tails = 7, 3
theta = np.linspace(0, 1, 1000)
# Prior: Beta(1,1) = uniform
alpha_prior, beta_prior = 1, 1
# Posterior: Beta(alpha+heads, beta+tails)
alpha_post = alpha_prior + heads
beta_post = beta_prior + tails
from scipy.stats import beta
prior = beta.pdf(theta, alpha_prior, beta_prior)
likelihood = theta**heads * (1-theta)**tails
likelihood /= likelihood.sum() # normalize
posterior = beta.pdf(theta, alpha_post, beta_post)
fig, ax = plt.subplots(figsize=(8, 5))
ax.plot(theta, prior, label='Prior Beta(1,1)', linestyle='--')
ax.plot(theta, likelihood*max(posterior)/max(likelihood), label='Likelihood (scaled)')
ax.plot(theta, posterior, label=f'Posterior Beta({alpha_post},{beta_post})', linewidth=2)
ax.axvline(alpha_post/(alpha_post+beta_post), color='red', linestyle=':', label='Posterior mean')
ax.set_xlabel('theta (P(heads))')
ax.set_title(f'Bayesian Update: {heads}H/{tails}T')
ax.legend(); plt.tight_layout()
plt.savefig('bayesian_coin.png', dpi=80); plt.close()
print(f'Posterior mean: {alpha_post/(alpha_post+beta_post):.3f}')
print(f'95% credible interval: ({beta.ppf(0.025,alpha_post,beta_post):.3f}, {beta.ppf(0.975,alpha_post,beta_post):.3f})')import numpy as np
from scipy.stats import beta
# A: 120 conversions from 1000 visitors
# B: 140 conversions from 1000 visitors
np.random.seed(42)
alpha_a, beta_a = 1 + 120, 1 + 880
alpha_b, beta_b = 1 + 140, 1 + 860
# Sample from posteriors
samples_a = beta.rvs(alpha_a, beta_a, size=100000)
samples_b = beta.rvs(alpha_b, beta_b, size=100000)
prob_b_better = (samples_b > samples_a).mean()
expected_lift = (samples_b - samples_a).mean()
print(f'Control A: rate = {120/1000:.1%}')
print(f'Variant B: rate = {140/1000:.1%}')
print(f'P(B > A): {prob_b_better:.1%}')
print(f'Expected lift: +{expected_lift:.2%}')
print(f'95% CI for lift: ({np.percentile(samples_b-samples_a,2.5):.2%}, {np.percentile(samples_b-samples_a,97.5):.2%})')import numpy as np
from scipy.stats import beta, gamma, norm
np.random.seed(42)
# 1. Beta-Binomial: click-through rate estimation
clicks, views = 45, 200
alpha, beta_param = 1 + clicks, 1 + (views - clicks)
print('CTR posterior:')
print(f' Mean: {alpha/(alpha+beta_param):.3f}')
print(f' 95% CI: ({beta.ppf(0.025,alpha,beta_param):.3f}, {beta.ppf(0.975,alpha,beta_param):.3f})')
# 2. Normal-Normal: mean estimation (known variance)
data = np.random.normal(5.0, 2.0, 30) # true mean=5
prior_mean, prior_var = 0, 10 # weak prior
likelihood_var = 4 / len(data) # sigma^2/n
posterior_var = 1 / (1/prior_var + 1/likelihood_var)
posterior_mean = posterior_var * (prior_mean/prior_var + data.mean()/likelihood_var)
print(f'\nNormal posterior mean: {posterior_mean:.3f} (true=5.0, sample mean={data.mean():.3f})')
# 3. Gamma-Poisson: event rate estimation
events_per_day = [3, 5, 2, 4, 6, 3, 4] # observed counts
alpha_g, beta_g = 1 + sum(events_per_day), 1 + len(events_per_day)
print(f'\nPoisson rate posterior mean: {alpha_g/beta_g:.3f} events/day')import numpy as np
np.random.seed(42)
def log_posterior(theta, data, prior_std=2.0):
"""Log posterior for normal likelihood, normal prior."""
log_prior = -0.5 * (theta / prior_std)**2
log_likelihood = -0.5 * np.sum((data - theta)**2)
return log_prior + log_likelihood
# Simple Metropolis-Hastings
def metropolis(data, n_samples=5000, proposal_std=0.5):
samples = []
current = 0.0
for _ in range(n_samples):
proposed = current + np.random.normal(0, proposal_std)
log_alpha = log_posterior(proposed, data) - log_posterior(current, data)
if np.log(np.random.uniform()) < log_alpha:
current = proposed
samples.append(current)
return np.array(samples[500:]) # burn-in
# True parameter = 3.0
data = np.random.normal(3.0, 1.0, 20)
samples = metropolis(data)
print(f'Data mean: {data.mean():.3f}')
print(f'MCMC posterior mean: {samples.mean():.3f}')
print(f'MCMC 95% CI: ({np.percentile(samples,2.5):.3f}, {np.percentile(samples,97.5):.3f})')
print(f'Acceptance rate: {len(np.unique(samples.round(6)))/len(samples):.1%}')import numpy as np
from scipy.stats import beta
np.random.seed(42)
# Simulate daily data: A has 8% true rate, B has 10% true rate
true_a, true_b = 0.08, 0.10
n_days = 14
daily_visitors = 100
alpha_a, beta_a = 1, 1 # uniform prior
alpha_b, beta_b = 1, 1
print('Day | Visitors A/B | Conv A/B | P(B>A)')
print('-'*50)
for day in range(1, n_days+1):
conv_a = np.random.binomial(daily_visitors, true_a)
conv_b = np.random.binomial(daily_visitors, true_b)
alpha_a += conv_a; beta_a += (daily_visitors - conv_a)
alpha_b += conv_b; beta_b += (daily_visitors - conv_b)
samples = np.random.beta([alpha_a, alpha_b], [beta_a, beta_b], size=(100000, 2))
prob_b_better = (samples[:,1] > samples[:,0]).mean()
print(f' {day:2d} | {daily_visitors}/{daily_visitors} | {conv_a:3d}/{conv_b:3d} | {prob_b_better:.1%}')
if prob_b_better > 0.95:
print(f'>>> Declare B winner on day {day}! (P(B>A)={prob_b_better:.1%})')
breakfrom scipy.stats import beta
import numpy as np
conversions = 30
visitors = 200
# Prior: Beta(1, 1) = uniform
alpha_prior, beta_prior = 1, 1
# TODO: compute posterior parameters
# TODO: compute posterior mean
# TODO: compute 90% credible interval (5th and 95th percentile)
# TODO: compute MLE estimate (conversions/visitors)
# TODO: print comparisonAnalyze time-to-event data β customer churn, equipment failure, clinical trials β with Kaplan-Meier curves and the log-rank test.
import numpy as np
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
# (duration, event_occurred) β 0 = censored (still alive/active)
data = [(5,1),(8,0),(12,1),(14,1),(2,0),(20,1),(7,1),(18,0),(3,1),(25,1),
(11,1),(6,0),(15,1),(9,1),(22,0),(4,1),(16,1),(10,0),(13,1),(19,1)]
times = sorted(set(d for d,e in data if e==1))
def km_estimator(data):
n = len(data)
at_risk = n
S = 1.0
result = [(0, 1.0)]
for t in sorted(set(d for d,e in data if e==1)):
events = sum(1 for d,e in data if d==t and e==1)
censored = sum(1 for d,e in data if d==t and e==0)
S *= (1 - events/at_risk)
result.append((t, S))
at_risk -= (events + censored)
return result
km = km_estimator(data)
times_km, surv_km = zip(*km)
fig, ax = plt.subplots(figsize=(8, 5))
ax.step(times_km, surv_km, where='post', linewidth=2, color='steelblue', label='KM estimate')
ax.axhline(0.5, color='red', linestyle='--', label='Median survival')
ax.set_xlabel('Time'); ax.set_ylabel('Survival Probability')
ax.set_title('Kaplan-Meier Survival Curve'); ax.legend(); ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig('km_curve.png', dpi=80); plt.close()
print('Saved km_curve.png')
median_idx = next(i for i,s in enumerate(surv_km) if s < 0.5)
print(f'Median survival: ~{times_km[median_idx]} units')import numpy as np
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
np.random.seed(42)
n = 200
# Simulate customer tenures: plan A churns faster
plan_a_times = np.random.exponential(8, n//2).clip(0, 24).round(0)
plan_b_times = np.random.exponential(14, n//2).clip(0, 24).round(0)
events_a = (plan_a_times < 24).astype(int) # 1=churned, 0=still active (censored)
events_b = (plan_b_times < 24).astype(int)
def km_curve(durations, events):
n = len(durations)
times = sorted(set(durations[events==1]))
S, at_risk = 1.0, n
curve = [(0, 1.0)]
for t in times:
d = sum((durations==t) & (events==1))
c = sum((durations==t) & (events==0))
S *= 1 - d/at_risk
curve.append((t, S)); at_risk -= d + c
return zip(*curve)
fig, ax = plt.subplots(figsize=(9, 5))
for times, surv, label, color in [
(*km_curve(plan_a_times, events_a), 'Plan A (basic)', 'tomato'),
(*km_curve(plan_b_times, events_b), 'Plan B (premium)', 'steelblue'),
]:
ax.step(list(times), list(surv), where='post', label=label, color=color, linewidth=2)
ax.set_xlabel('Months'); ax.set_ylabel('Retention Rate')
ax.set_title('Customer Retention: Plan A vs Plan B')
ax.legend(); ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig('churn_survival.png', dpi=80); plt.close()
print('Saved churn_survival.png')
print(f'Plan A 12-month retention: ~{plan_a_times[plan_a_times>=12].shape[0]/n*2:.0%}')
print(f'Plan B 12-month retention: ~{plan_b_times[plan_b_times>=12].shape[0]/n*2:.0%}')import numpy as np
from scipy.stats import chi2
np.random.seed(42)
# Group 1: faster failure (mean=10)
# Group 2: slower failure (mean=18)
g1_times = np.random.exponential(10, 50).clip(0,30)
g1_events = (g1_times < 30).astype(int)
g2_times = np.random.exponential(18, 50).clip(0,30)
g2_events = (g2_times < 30).astype(int)
def log_rank_test(t1, e1, t2, e2):
all_times = sorted(set(np.concatenate([t1[e1==1], t2[e2==1]])))
O1, E1, O2, E2 = 0, 0, 0, 0
n1, n2 = len(t1), len(t2)
for t in all_times:
d1 = ((t1==t) & (e1==1)).sum()
d2 = ((t2==t) & (e2==1)).sum()
d = d1 + d2
n = n1 + n2
if n > 0:
E1 += d * n1 / n
E2 += d * n2 / n
O1 += d1; O2 += d2
n1 -= ((t1<=t) & (e1==1)).sum() + ((t1<=t) & (e1==0)).sum()
n2 -= ((t2<=t) & (e2==1)).sum() + ((t2<=t) & (e2==0)).sum()
n1 = max(n1, 0); n2 = max(n2, 0)
chi2_stat = (O1 - E1)**2 / E1 + (O2 - E2)**2 / E2
p_value = 1 - chi2.cdf(chi2_stat, df=1)
return chi2_stat, p_value
chi2_stat, p = log_rank_test(g1_times, g1_events, g2_times, g2_events)
print(f'Log-rank chi2: {chi2_stat:.3f}')
print(f'p-value: {p:.4f}')
print(f'Groups differ significantly: {p < 0.05}')import numpy as np
np.random.seed(42)
# Exponential distribution: constant hazard rate
lambda_rate = 0.1 # hazard rate
time_points = np.arange(1, 21)
survival = np.exp(-lambda_rate * time_points)
hazard = np.full_like(time_points, lambda_rate, dtype=float)
print('Exponential survival (lambda=0.1):')
print(f' Survival at t=5: {np.exp(-0.1*5):.3f}')
print(f' Survival at t=10: {np.exp(-0.1*10):.3f}')
print(f' Median survival: {np.log(2)/0.1:.1f} time units')
# Hazard ratio: how much riskier is group A vs group B?
lambda_a = 0.15 # higher hazard
lambda_b = 0.08
hr = lambda_a / lambda_b
print(f'\nHazard Ratio (A vs B): {hr:.2f}')
print(f'Group A is {hr:.1f}x more likely to fail at any time point')
print(f'Group A median: {np.log(2)/lambda_a:.1f} | Group B median: {np.log(2)/lambda_b:.1f}')import numpy as np
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
np.random.seed(42)
# Cohort 1: free trial then convert (longer retention)
# Cohort 2: direct paid (shorter retention but committed)
cohorts = {
'Free Trial': np.random.exponential(18, 150).clip(0, 36),
'Direct Paid': np.random.exponential(12, 100).clip(0, 36),
}
events = {k: (v < 36).astype(int) for k, v in cohorts.items()}
def km_curve(durations, ev):
n = len(durations)
times = sorted(set(durations[ev==1]))
S, ar = 1.0, n
pts = [(0, 1.0)]
for t in times:
d = ((durations==t) & (ev==1)).sum()
c = ((durations==t) & (ev==0)).sum()
S *= 1 - d/ar; pts.append((t, S)); ar -= d + c
return zip(*pts)
fig, ax = plt.subplots(figsize=(9, 5))
colors = ['#2196F3', '#F44336']
for (cohort, times_arr), color in zip(cohorts.items(), colors):
t_pts, s_pts = km_curve(times_arr, events[cohort])
ax.step(list(t_pts), list(s_pts), where='post', label=cohort, color=color, linewidth=2)
churned = events[cohort].sum()
print(f'{cohort}: n={len(times_arr)}, churned={churned} ({churned/len(times_arr):.0%}), avg tenure={times_arr.mean():.1f} months')
ax.axhline(0.5, color='gray', linestyle='--', linewidth=1)
ax.set_xlabel('Months Since Signup'); ax.set_ylabel('Retention Rate')
ax.set_title('Customer Retention by Acquisition Channel')
ax.legend(); ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig('retention_cohorts.png', dpi=80); plt.close()
print('Saved retention_cohorts.png')import numpy as np
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
np.random.seed(42)
# TODO: generate failure times for A (mean=100) and B (mean=150), clip at 200
# TODO: compute event flags (1 if failed < 200, 0 if censored)
# TODO: implement km_curve function (or copy from examples)
# TODO: plot both KM curves on same figure
# TODO: compute and print log-rank test p-value
# TODO: save to 'equipment_survival.png'Use bootstrapping, permutation tests, and Monte Carlo simulation to estimate uncertainty and test hypotheses without distributional assumptions.
import numpy as np
np.random.seed(42)
data = np.random.exponential(2.0, 50) # skewed data
print(f'Sample mean: {data.mean():.4f}')
# Bootstrap 95% CI for the mean
n_boot = 10000
boot_means = np.array([np.random.choice(data, len(data), replace=True).mean()
for _ in range(n_boot)])
ci_lower, ci_upper = np.percentile(boot_means, [2.5, 97.5])
print(f'Bootstrap 95% CI: ({ci_lower:.4f}, {ci_upper:.4f})')
print(f'Bootstrap SE: {boot_means.std():.4f}')
# Compare to normal approximation
from scipy.stats import sem
norm_ci = data.mean() + np.array([-1.96, 1.96]) * sem(data)
print(f'Normal approx 95% CI: ({norm_ci[0]:.4f}, {norm_ci[1]:.4f})')import numpy as np
np.random.seed(42)
# Two groups β is the mean difference real or chance?
group_a = np.random.normal(10.0, 3.0, 40)
group_b = np.random.normal(11.5, 3.0, 40)
obs_diff = group_b.mean() - group_a.mean()
combined = np.concatenate([group_a, group_b])
n_a = len(group_a)
n_perm = 10000
perm_diffs = np.array([
np.random.permutation(combined)[:n_a].mean() - np.random.permutation(combined)[:n_a].mean()
for _ in range(n_perm)
])
p_value = (np.abs(perm_diffs) >= np.abs(obs_diff)).mean()
print(f'Observed difference: {obs_diff:.4f}')
print(f'Permutation p-value: {p_value:.4f}')
print(f'Significant at 0.05: {p_value < 0.05}')import numpy as np
np.random.seed(42)
n_samples_list = [100, 1000, 10000, 100000, 1000000]
print(f'True pi: {np.pi:.6f}')
print(f"{'N':>10} {'Pi estimate':>12} {'Error':>10}")
for n in n_samples_list:
x, y = np.random.uniform(-1, 1, (2, n))
inside = (x**2 + y**2) <= 1.0
pi_est = 4 * inside.mean()
error = abs(pi_est - np.pi)
print(f'{n:>10,} {pi_est:>12.6f} {error:>10.6f}')import numpy as np
np.random.seed(42)
wages = np.array([35, 42, 28, 55, 38, 47, 31, 60, 44, 39,
52, 36, 29, 48, 41, 65, 33, 57, 43, 37]) * 1000
def bootstrap_ci(data, stat_fn, n_boot=10000, ci=95):
boot_stats = [stat_fn(np.random.choice(data, len(data), replace=True))
for _ in range(n_boot)]
lower = (100 - ci) / 2
return np.percentile(boot_stats, [lower, 100-lower])
stats = {
'mean': np.mean,
'median': np.median,
'std': np.std,
'q75': lambda x: np.percentile(x, 75),
}
for name, fn in stats.items():
ci = bootstrap_ci(wages, fn)
print(f'{name:6s}: {fn(wages):>8,.0f} 95% CI: (${ci[0]:,.0f}, ${ci[1]:,.0f})')import numpy as np
np.random.seed(42)
# Simulated revenue per user (heavy-tailed)
revenue = np.random.lognormal(mean=3.5, sigma=1.2, size=500)
print(f'n={len(revenue)}')
print(f'Mean: ${revenue.mean():,.2f}')
print(f'Median: ${np.median(revenue):,.2f}')
def bootstrap_stat(data, stat_fn, n_boot=10000):
boots = [stat_fn(np.random.choice(data, len(data), replace=True)) for _ in range(n_boot)]
return np.array(boots)
mean_boots = bootstrap_stat(revenue, np.mean)
median_boots = bootstrap_stat(revenue, np.median)
for name, boots, obs in [('mean', mean_boots, revenue.mean()), ('median', median_boots, np.median(revenue))]:
ci = np.percentile(boots, [2.5, 97.5])
print(f'\n{name.capitalize()} bootstrap:')
print(f' Observed: ${obs:,.2f}')
print(f' 95% CI: (${ci[0]:,.2f}, ${ci[1]:,.2f})')
print(f' SE: ${boots.std():,.2f}')import numpy as np
np.random.seed(42)
group_a = np.random.normal(4.2, 1.5, 50)
group_b = np.random.normal(4.8, 1.5, 50)
# TODO: compute observed difference
# TODO: combine groups, run 10000 permutations
# TODO: compute permutation p-value (two-tailed)
# TODO: print resultsimport numpy as np
np.random.seed(42)
data = np.random.exponential(scale=2, size=50)
B = 10000
boot_means = [np.mean(np.random.choice(data, size=len(data), replace=True)) for _ in range(B)]
ci = np.percentile(boot_means, [2.5, 97.5])
print(f"Sample mean: {np.mean(data):.4f}")
print(f"Bootstrap 95% CI: [{ci[0]:.4f}, {ci[1]:.4f}]")import numpy as np
from scipy import stats
np.random.seed(7)
data = np.random.lognormal(0, 0.5, 80)
stat_obs = np.median(data)
B = 5000
boot_stats = [np.median(np.random.choice(data, size=len(data), replace=True)) for _ in range(B)]
# Bias correction
z0 = stats.norm.ppf(np.mean(np.array(boot_stats) < stat_obs))
# Acceleration (jackknife)
jk = [np.median(np.delete(data, i)) for i in range(len(data))]
jk_mean = np.mean(jk)
a = np.sum((jk_mean - np.array(jk))**3) / (6*np.sum((jk_mean - np.array(jk))**2)**1.5)
alpha = 0.05
z_lo = stats.norm.ppf(alpha/2); z_hi = stats.norm.ppf(1 - alpha/2)
p_lo = stats.norm.cdf(z0 + (z0+z_lo)/(1-a*(z0+z_lo)))
p_hi = stats.norm.cdf(z0 + (z0+z_hi)/(1-a*(z0+z_hi)))
ci = np.percentile(boot_stats, [p_lo*100, p_hi*100])
print(f"BCa 95% CI for median: [{ci[0]:.4f}, {ci[1]:.4f}]")import numpy as np
np.random.seed(0)
group_a = np.random.normal(5.0, 1.5, 40)
group_b = np.random.normal(5.6, 1.5, 40)
obs_diff = np.mean(group_b) - np.mean(group_a)
combined = np.concatenate([group_a, group_b])
B = 10000
perm_diffs = []
for _ in range(B):
perm = np.random.permutation(combined)
perm_diffs.append(np.mean(perm[40:]) - np.mean(perm[:40]))
p_value = np.mean(np.abs(perm_diffs) >= np.abs(obs_diff))
print(f"Observed difference: {obs_diff:.4f}")
print(f"Permutation p-value: {p_value:.4f}")import numpy as np
np.random.seed(42)
# Simulated checkout conversion: design A=0.05, design B=0.065
n = 300
conv_a = np.random.binomial(1, 0.05, n).astype(float)
conv_b = np.random.binomial(1, 0.065, n).astype(float)
obs_diff = conv_b.mean() - conv_a.mean()
B = 20000
boot_diffs = [
np.random.choice(conv_b, n, replace=True).mean() -
np.random.choice(conv_a, n, replace=True).mean()
for _ in range(B)
]
ci = np.percentile(boot_diffs, [2.5, 97.5])
p_val = np.mean(np.array(boot_diffs) <= 0)
print(f"Observed lift: {obs_diff:.4f} ({obs_diff/conv_a.mean():.1%} relative)")
print(f"95% Bootstrap CI: [{ci[0]:.4f}, {ci[1]:.4f}]")
print(f"One-sided p-value: {p_val:.4f}")
print("Significant at alpha=0.05:", ci[0] > 0)import numpy as np
np.random.seed(11)
ltv = np.random.pareto(2, 60) * 100 + 50
premium = ltv[:30]
standard = ltv[30:]
B = 10000
# TODO: Bootstrap 90% and 95% CIs for mean LTV
# TODO: Bootstrap CI for 75th percentile LTV
# TODO: Permutation test: premium vs standard mean LTV
# TODO: Report Cohen's d effect size
import numpy as np
from scipy import stats
np.random.seed(42)
k = 20
# 18 nulls (no effect) + 2 true positives
p_values = np.concatenate([
np.random.uniform(0, 1, 18),
np.array([0.003, 0.011])
])
alpha = 0.05
bonferroni = p_values * k
reject_bon = bonferroni < alpha
# Holm correction
order = np.argsort(p_values)
holm = np.zeros(k)
for rank, idx in enumerate(order):
holm[idx] = p_values[idx] * (k - rank)
reject_holm = holm < alpha
print(f"Bonferroni rejects: {reject_bon.sum()} tests")
print(f"Holm rejects: {reject_holm.sum()} tests")import numpy as np
np.random.seed(5)
k = 50
# 40 nulls + 10 true effects (small p-values)
p_values = np.concatenate([
np.random.uniform(0.05, 1.0, 40),
np.random.uniform(0, 0.02, 10)
])
alpha = 0.05
order = np.argsort(p_values)
sorted_p = p_values[order]
bh_threshold = (np.arange(1, k+1) / k) * alpha
reject_sorted = sorted_p <= bh_threshold
# All tests up to last rejection are rejected
last_reject = np.where(reject_sorted)[0]
if len(last_reject):
cutoff = last_reject[-1]
reject_bh = np.zeros(k, dtype=bool)
reject_bh[order[:cutoff+1]] = True
else:
reject_bh = np.zeros(k, dtype=bool)
print(f"BH FDR rejects: {reject_bh.sum()} of {k} tests")
print(f"Expected FDR β€ {alpha}")import numpy as np
np.random.seed(2)
k = 100
p_vals = np.concatenate([np.random.uniform(0,1,75), np.random.beta(0.5,5,25)])
# Estimate pi0 (proportion of true nulls)
lambdas = np.arange(0.05, 0.95, 0.05)
pi0_hat = [(p_vals >= l).sum() / (k * (1-l)) for l in lambdas]
pi0 = min(1.0, np.polyfit(lambdas, pi0_hat, 2)[2]) # smoother estimate
# Compute q-values
order = np.argsort(p_vals)
sorted_p = p_vals[order]
q = pi0 * k * sorted_p / (np.arange(1, k+1))
# Enforce monotonicity
for i in range(k-2, -1, -1):
q[i] = min(q[i], q[i+1])
q_vals = np.empty(k); q_vals[order] = np.minimum(q, 1)
print(f"pi0 estimate: {pi0:.3f}")
print(f"Discoveries at FDR=0.05: {(q_vals <= 0.05).sum()}")import numpy as np
np.random.seed(99)
n_genes = 10000
n_de = 200 # truly differentially expressed
# Simulate p-values: most null (uniform), some true effects (beta)
p_null = np.random.uniform(0, 1, n_genes - n_de)
p_de = np.random.beta(0.3, 10, n_de)
p_values = np.concatenate([p_null, p_de])
alpha = 0.05
# BH procedure
order = np.argsort(p_values)
sorted_p = p_values[order]
ranks = np.arange(1, n_genes+1)
bh_thresh = (ranks / n_genes) * alpha
last = np.where(sorted_p <= bh_thresh)[0]
if len(last):
cutoff = last[-1]
reject = np.zeros(n_genes, dtype=bool)
reject[order[:cutoff+1]] = True
else:
reject = np.zeros(n_genes, dtype=bool)
# True positive rate among real DE genes
tp = reject[-n_de:].sum()
fp = reject[:-n_de].sum()
print(f"Significant genes: {reject.sum()}")
print(f"True positives: {tp}/{n_de} ({tp/n_de:.1%} sensitivity)")
print(f"False positives: {fp} (FDR={fp/max(reject.sum(),1):.3f})")import numpy as np
# 12 endpoint p-values from clinical trial
p_values = np.array([0.001, 0.008, 0.023, 0.046, 0.052, 0.071,
0.12, 0.18, 0.21, 0.34, 0.52, 0.74])
k = len(p_values)
alpha = 0.05
# TODO: Bonferroni correction + reject/accept
# TODO: Holm step-down correction + reject/accept
# TODO: BH FDR correction + reject/accept
# TODO: Print comparison table
# TODO: Discuss which method to use and why
import numpy as np
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
np.random.seed(0)
X = np.random.multivariate_normal([0,0,0], [[1,.7,.3],[.7,1,.2],[.3,.2,1]], 200)
X[5] += [4, 4, 4] # inject outlier
scaler = StandardScaler()
Xs = scaler.fit_transform(X)
pca = PCA(n_components=2)
scores = pca.fit_transform(Xs)
reconstruction = pca.inverse_transform(scores)
residuals = np.sum((Xs - reconstruction)**2, axis=1)
threshold = np.percentile(residuals, 97.5)
outliers = np.where(residuals > threshold)[0]
print(f"Explained variance: {pca.explained_variance_ratio_.cumsum()[-1]:.2%}")
print(f"Reconstruction outliers (index): {outliers}")import numpy as np
from sklearn.manifold import TSNE
from sklearn.datasets import make_blobs
np.random.seed(42)
X, y = make_blobs(n_samples=300, centers=4, n_features=8, cluster_std=1.5)
tsne = TSNE(n_components=2, perplexity=30, n_iter=300, random_state=42)
X_2d = tsne.fit_transform(X)
for cls in np.unique(y):
mask = y == cls
print(f"Cluster {cls}: centroid ({X_2d[mask,0].mean():.2f}, {X_2d[mask,1].mean():.2f}), n={mask.sum()}")
print(f"KL divergence: {tsne.kl_divergence_:.4f}")import numpy as np
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from scipy import stats
np.random.seed(1)
# Two groups with different multivariate structure
g1 = np.random.multivariate_normal([0]*5, np.eye(5), 100)
g2 = np.random.multivariate_normal([0.5]*5, np.eye(5)*1.5, 100)
X = np.vstack([g1, g2])
labels = np.array([0]*100 + [1]*100)
pca = PCA(n_components=3)
Xp = pca.fit_transform(StandardScaler().fit_transform(X))
for i in range(3):
t, p = stats.ttest_ind(Xp[labels==0, i], Xp[labels==1, i])
print(f"PC{i+1}: t={t:.3f}, p={p:.4f} ({'significant' if p<0.05 else 'not sig'})")
print(f"Total variance explained: {pca.explained_variance_ratio_.sum():.2%}")import numpy as np
from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_val_score
from scipy import stats
np.random.seed(42)
n = 500
# 20 correlated features simulating financial metrics
cov = 0.4 * np.ones((20, 20)) + 0.6 * np.eye(20)
X_raw = np.random.multivariate_normal(np.zeros(20), cov, n)
# Default probability depends on first latent factor
latent = X_raw[:, :5].mean(axis=1)
prob = 1 / (1 + np.exp(-(latent - 0.3)))
y = np.random.binomial(1, prob, n)
scaler = StandardScaler()
X = scaler.fit_transform(X_raw)
pca = PCA(n_components=5)
X_pca = pca.fit_transform(X)
print(f"Variance explained: {pca.explained_variance_ratio_.cumsum()[-1]:.2%}")
for i in range(5):
t, p = stats.ttest_ind(X_pca[y==0, i], X_pca[y==1, i])
print(f"PC{i+1}: defaulters vs non-defaulters t={t:.2f}, p={p:.4f}")
clf = LogisticRegression()
scores = cross_val_score(clf, X_pca, y, cv=5, scoring='roc_auc')
print(f"Logistic Regression AUC (PCA features): {scores.mean():.3f} Β± {scores.std():.3f}")import numpy as np
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from scipy import stats
np.random.seed(21)
X = np.random.randn(400, 15)
# Inject 4-cluster structure
for i in range(4):
X[i*100:(i+1)*100, :5] += i * 1.2
scaler = StandardScaler()
Xs = scaler.fit_transform(X)
# TODO: PCA to 2D and 5D
# TODO: KMeans(k=4) on 5D PCA
# TODO: ANOVA on top 3 PCs and 3 original features per cluster
# TODO: Report eta-squared effect sizes
# TODO: Plot 2D PCA colored by cluster
Hypothesis testing decides whether observed data supports a null hypothesis H0. One-sample, two-sample, and paired t-tests cover most practical scenarios. Always check assumptions: normality, equal variance.
from scipy import stats
import numpy as np
np.random.seed(42)
# One-sample t-test: is the mean different from a known value?
weights = np.random.normal(70, 8, 30)
t_stat, p_val = stats.ttest_1samp(weights, popmean=72)
print(f"One-sample t-test: t={t_stat:.3f}, p={p_val:.4f}")
print(f" Reject H0 (mean=72)? {p_val < 0.05}")
# Two-sample t-test: do two groups have different means?
group_a = np.random.normal(75, 10, 40)
group_b = np.random.normal(80, 10, 40)
t2, p2 = stats.ttest_ind(group_a, group_b)
print(f"Two-sample t-test: t={t2:.3f}, p={p2:.4f}")
print(f" Reject H0 (equal means)? {p2 < 0.05}")
# Welch's t-test (unequal variances β safer default)
group_c = np.random.normal(80, 20, 40) # larger std
t3, p3 = stats.ttest_ind(group_a, group_c, equal_var=False)
print(f"Welch's t-test: t={t3:.3f}, p={p3:.4f}")
print(f" Mean A={group_a.mean():.2f}, Mean C={group_c.mean():.2f}")from scipy import stats
import numpy as np
np.random.seed(0)
# Paired t-test: before/after measurements on same subjects
before = np.random.normal(130, 15, 25) # blood pressure before treatment
after = before - np.random.normal(8, 5, 25) # treatment lowers BP
t, p = stats.ttest_rel(before, after)
diff = before - after
ci = stats.t.interval(0.95, df=len(diff)-1,
loc=diff.mean(), scale=stats.sem(diff))
print(f"Paired t-test: t={t:.3f}, p={p:.4f}")
print(f"Mean reduction: {diff.mean():.2f} mmHg")
print(f"95% CI: ({ci[0]:.2f}, {ci[1]:.2f})")
print(f"Significant reduction? {p < 0.05}")
# Manual CI for a mean
n, mean, se = len(before), before.mean(), stats.sem(before)
ci_manual = stats.t.interval(0.95, df=n-1, loc=mean, scale=se)
print(f"\nCI for baseline BP mean {mean:.1f}: ({ci_manual[0]:.1f}, {ci_manual[1]:.1f})")from scipy import stats
import numpy as np
# Chi-square test for independence
# Question: Is treatment type independent of recovery?
observed = np.array([[45, 15], # Drug A: recovered, not recovered
[30, 30], # Drug B
[38, 22]]) # Placebo
chi2, p, dof, expected = stats.chi2_contingency(observed)
print(f"Chi-square: chi2={chi2:.3f}, p={p:.4f}, dof={dof}")
print(f"Association between treatment and recovery? {p < 0.05}")
# Two-proportion z-test
# Are conversion rates different between landing pages?
n_a, conv_a = 1000, 120 # page A
n_b, conv_b = 1000, 145 # page B
count = np.array([conv_a, conv_b])
nobs = np.array([n_a, n_b])
z, p_prop = stats.proportions_ztest(count, nobs)
print(f"\nProportions z-test: z={z:.3f}, p={p_prop:.4f}")
print(f"Rate A={conv_a/n_a:.1%}, Rate B={conv_b/n_b:.1%}")
print(f"Significantly different? {p_prop < 0.05}")from scipy import stats
import numpy as np
np.random.seed(7)
n = 50
drug_group = np.random.normal(200, 30, n) # baseline cholesterol
placebo_group = np.random.normal(200, 30, n)
drug_post = drug_group - np.random.normal(25, 10, n) # drug reduces ~25
placebo_post = placebo_group - np.random.normal(5, 10, n) # placebo reduces ~5
drug_diff = drug_group - drug_post
placebo_diff = placebo_group - placebo_post
t_drug, p_drug = stats.ttest_rel(drug_group, drug_post)
t_placebo, p_placebo = stats.ttest_rel(placebo_group, placebo_post)
t_compare, p_compare = stats.ttest_ind(drug_diff, placebo_diff)
print(f"Drug group: mean reduction={drug_diff.mean():.1f}, p={p_drug:.4f}")
print(f"Placebo group: mean reduction={placebo_diff.mean():.1f}, p={p_placebo:.4f}")
print(f"Drug vs Placebo: t={t_compare:.3f}, p={p_compare:.4f}")
print(f"Drug significantly better? {p_compare < 0.05}")from scipy import stats
import numpy as np
n_a, open_a = 800, 92
n_b, open_b = 800, 117
# (1) two-proportion z-test
# (2) 95% CI for difference
# (3) lift = (rate_b - rate_a) / rate_a
# (4) minimum detectable effect hint: use stats.norm.isf for z-scores
rate_a = open_a / n_a
rate_b = open_b / n_b
print(f"Rate A: {rate_a:.3f}, Rate B: {rate_b:.3f}")
One-way ANOVA tests whether means differ across 3+ groups in a single factor. Two-way ANOVA adds a second factor and interaction. Post-hoc tests (Tukey, Bonferroni) identify which pairs differ.
from scipy import stats
from statsmodels.stats.multicomp import pairwise_tukeyhsd
import numpy as np
np.random.seed(42)
# Crop yield under 4 fertilizer types
groups = {
'Control': np.random.normal(50, 8, 20),
'FertA': np.random.normal(58, 8, 20),
'FertB': np.random.normal(55, 8, 20),
'FertC': np.random.normal(62, 8, 20),
}
f_stat, p_val = stats.f_oneway(*groups.values())
print(f"One-way ANOVA: F={f_stat:.3f}, p={p_val:.4f}")
print(f"Significant difference exists? {p_val < 0.05}\n")
# Tukey HSD post-hoc
import numpy as np as _np
all_data = np.concatenate(list(groups.values()))
all_labels = np.repeat(list(groups.keys()), 20)
tukey = pairwise_tukeyhsd(endog=all_data, groups=all_labels, alpha=0.05)
print(tukey.summary())import statsmodels.api as sm
from statsmodels.formula.api import ols
import numpy as np, pandas as pd
np.random.seed(42)
n = 120
df = pd.DataFrame({
'fertilizer': np.repeat(['A','B','C'], n//3),
'irrigation': np.tile(['Low','High'], n//2),
'yield': (np.random.normal(55, 8, n) +
np.where(np.repeat(['A','B','C'], n//3)=='C', 8, 0) +
np.where(np.tile(['Low','High'], n//2)=='High', 5, 0)),
})
model = ols('yield ~ C(fertilizer) + C(irrigation) + C(fertilizer):C(irrigation)',
data=df).fit()
anova_table = sm.stats.anova_lm(model, typ=2)
print(anova_table.round(4))
print(f"\nFertilizer effect: p={anova_table.loc['C(fertilizer)','PR(>F)']:.4f}")
print(f"Irrigation effect: p={anova_table.loc['C(irrigation)','PR(>F)']:.4f}")from scipy import stats
from statsmodels.stats.multitest import multipletests
import numpy as np
from itertools import combinations
np.random.seed(5)
# Non-normal data: response times across 3 UI designs
groups = {
'Design A': np.random.exponential(2.0, 30),
'Design B': np.random.exponential(1.5, 30),
'Design C': np.random.exponential(2.5, 30),
}
names = list(groups.keys())
data = list(groups.values())
# Kruskal-Wallis
H, p = stats.kruskal(*data)
print(f"Kruskal-Wallis: H={H:.3f}, p={p:.4f}")
# Pairwise Mann-Whitney U with Bonferroni correction
pairs = list(combinations(range(len(names)), 2))
raw_p = []
for i, j in pairs:
_, pv = stats.mannwhitneyu(data[i], data[j], alternative='two-sided')
raw_p.append(pv)
reject, p_adj, _, _ = multipletests(raw_p, method='bonferroni')
for (i,j), pv, pa, rej in zip(pairs, raw_p, p_adj, reject):
print(f" {names[i]} vs {names[j]}: raw_p={pv:.4f}, adj_p={pa:.4f}, reject={rej}")from scipy import stats
from statsmodels.stats.multicomp import pairwise_tukeyhsd
import numpy as np, pandas as pd
np.random.seed(99)
flows = {'Flow 1': np.random.normal(45, 12, 20),
'Flow 2': np.random.normal(38, 10, 20),
'Flow 3': np.random.normal(52, 14, 20)}
f, p = stats.f_oneway(*flows.values())
print(f"ANOVA: F={f:.3f}, p={p:.4f}")
all_times = np.concatenate(list(flows.values()))
all_labels = np.repeat(list(flows.keys()), 20)
tukey = pairwise_tukeyhsd(all_times, all_labels, alpha=0.05)
print(tukey.summary())
for name, times in flows.items():
print(f"{name}: mean={times.mean():.1f}s, sd={times.std():.1f}s")from scipy import stats
from statsmodels.stats.multicomp import pairwise_tukeyhsd
import numpy as np
np.random.seed(42)
channels = {'Email': np.random.normal(120, 25, 25),
'Social': np.random.normal(135, 25, 25),
'Search': np.random.normal(150, 25, 25),
'Display': np.random.normal(110, 25, 25)}
# (1) One-way ANOVA
# (2) Tukey HSD if significant
# (3) eta-squared = SS_between / SS_total
# SS_total = sum of (xi - grand_mean)^2 across all observations
Pearson measures linear association between continuous variables. Spearman and Kendall are rank-based (robust to outliers and non-linearity). Point-biserial handles binary-continuous pairs.
from scipy import stats
import numpy as np
np.random.seed(42)
n = 100
# Pearson: linear relationship
x = np.random.randn(n)
y_linear = 2 * x + np.random.randn(n) * 0.5
r_p, p_p = stats.pearsonr(x, y_linear)
r_s, p_s = stats.spearmanr(x, y_linear)
r_k, p_k = stats.kendalltau(x, y_linear)
print("Linear relationship:")
print(f" Pearson r={r_p:.4f} Spearman r={r_s:.4f} Kendall tau={r_k:.4f}")
# Spearman handles monotonic non-linear relationships
y_mono = np.exp(x) + np.random.randn(n) * 0.3
r_p2, _ = stats.pearsonr(x, y_mono)
r_s2, _ = stats.spearmanr(x, y_mono)
print(f"\nMonotonic (non-linear):")
print(f" Pearson r={r_p2:.4f} Spearman r={r_s2:.4f}")
# With outliers: Spearman is robust
y_outlier = y_linear.copy()
y_outlier[[5, 10, 15]] = 100 # inject outliers
r_p3, _ = stats.pearsonr(x, y_outlier)
r_s3, _ = stats.spearmanr(x, y_outlier)
print(f"\nWith outliers:")
print(f" Pearson r={r_p3:.4f} Spearman r={r_s3:.4f} (Spearman more robust)")from scipy import stats
import numpy as np, pandas as pd
np.random.seed(42)
n = 200
# Confounding variable Z drives both X and Y
z = np.random.randn(n)
x = 0.7 * z + np.random.randn(n) * 0.5
y = 0.8 * z + np.random.randn(n) * 0.5
w = np.random.randn(n) # unrelated
df = pd.DataFrame({'X': x, 'Y': y, 'Z': z, 'W': w})
print("Correlation matrix:")
print(df.corr(method='pearson').round(3))
# Partial correlation: X-Y controlling for Z
def partial_corr(x, y, z):
# Regress out z from both x and y
r_xz = np.corrcoef(x, z)[0,1]
r_yz = np.corrcoef(y, z)[0,1]
r_xy = np.corrcoef(x, y)[0,1]
return (r_xy - r_xz*r_yz) / (np.sqrt(1-r_xz**2) * np.sqrt(1-r_yz**2))
pc = partial_corr(x, y, z)
print(f"\nX-Y raw correlation: {np.corrcoef(x,y)[0,1]:.4f}")
print(f"X-Y partial (given Z): {pc:.4f} (much lower β Z was confounding)")from scipy import stats
import numpy as np
np.random.seed(3)
n = 150
# Point-biserial: continuous vs binary
scores = np.random.normal(70, 10, n)
passed = (scores + np.random.randn(n)*5 > 68).astype(int)
r_pb, p_pb = stats.pointbiserialr(passed, scores)
print(f"Point-biserial r={r_pb:.4f}, p={p_pb:.4f}")
# Phi coefficient: binary vs binary
vaccinated = np.random.choice([0,1], n, p=[0.4,0.6])
infected = np.where(vaccinated==1,
np.random.choice([0,1], n, p=[0.9,0.1]),
np.random.choice([0,1], n, p=[0.4,0.6]))
chi2, p_chi, _, _ = stats.chi2_contingency(
np.array([[((vaccinated==0)&(infected==0)).sum(),
((vaccinated==0)&(infected==1)).sum()],
[((vaccinated==1)&(infected==0)).sum(),
((vaccinated==1)&(infected==1)).sum()]]))
phi = np.sqrt(chi2 / n)
print(f"Phi coefficient={phi:.4f}, chi2_p={p_chi:.4f}")
# Testing significance of Pearson r
r = 0.35
n_test = 50
t = r * np.sqrt(n_test-2) / np.sqrt(1-r**2)
p = 2 * stats.t.sf(abs(t), df=n_test-2)
print(f"\nr=0.35, n=50: t={t:.3f}, p={p:.4f}, significant={p<0.05}")from scipy import stats
import numpy as np, pandas as pd
np.random.seed(10)
n = 80
age = np.random.uniform(18, 35, n)
speed = -0.4*age + np.random.randn(n)*2 + 30
strength = 0.1*age + np.random.randn(n)*3 + 20
endurance = -0.3*age + np.random.randn(n)*2 + 25
performance= 0.5*speed + 0.3*strength + 0.4*endurance + np.random.randn(n)*2
df = pd.DataFrame({'age':age,'speed':speed,'strength':strength,
'endurance':endurance,'performance':performance})
print("Spearman correlations with performance:")
for col in ['age','speed','strength','endurance']:
r, p = stats.spearmanr(df[col], df['performance'])
print(f" {col:12s}: r={r:.3f}, p={p:.4f}")
# Partial: speed-performance controlling for age
r_sp = np.corrcoef(speed, performance)[0,1]
r_sa = np.corrcoef(speed, age)[0,1]
r_pa = np.corrcoef(performance, age)[0,1]
pc = (r_sp - r_sa*r_pa) / (np.sqrt(1-r_sa**2)*np.sqrt(1-r_pa**2))
print(f"\nSpeed-Perf raw r={r_sp:.3f}, partial (ctrl age)={pc:.3f}")from scipy import stats
import numpy as np, pandas as pd
np.random.seed(42)
n = 300
z = np.random.randn(n)
df = pd.DataFrame({
'f1': z + np.random.randn(n)*0.3,
'f2': -0.8*z + np.random.randn(n)*0.3, # correlated with f1
'f3': np.random.randn(n),
'f4': np.random.randn(n),
'f5': 0.9*z + np.random.randn(n)*0.1, # highly correlated
'f6': np.random.exponential(2, n),
'f7': np.random.randn(n),
'binary': (z > 0).astype(int),
})
# (1) correlation matrix
# (2) flag |r| > 0.7 pairs
# (3) point-biserial for binary vs continuous
# (4) test significance
Non-parametric tests make no assumptions about distributions. Use them when data is ordinal, heavily skewed, or sample sizes are too small to verify normality. Wilcoxon, Mann-Whitney, and Kolmogorov-Smirnov are the workhorses.
from scipy import stats
import numpy as np
np.random.seed(42)
datasets = {
'Normal': np.random.normal(50, 10, 50),
'Skewed': np.random.exponential(5, 50),
'Heavy-tailed':np.random.standard_t(df=3, size=50),
'Uniform': np.random.uniform(0, 100, 50),
}
print(f"{'Dataset':15s} {'Shapiro p':>10s} {'D\'Agostino p':>13s} {'Normal?'}")
print('-' * 60)
for name, data in datasets.items():
_, p_sw = stats.shapiro(data)
_, p_da = stats.normaltest(data)
is_norm = p_sw > 0.05 and p_da > 0.05
print(f"{name:15s} {p_sw:10.4f} {p_da:13.4f} {str(is_norm)}")
# Q-Q plot values (manual)
normal_data = datasets['Normal']
theoretical, observed = stats.probplot(normal_data, dist='norm')[0]
r = np.corrcoef(theoretical, observed)[0,1]
print(f"\nQ-Q correlation for Normal: {r:.4f} (close to 1 = normal)")from scipy import stats
import numpy as np
np.random.seed(7)
# Wilcoxon signed-rank: paired non-parametric alternative to paired t-test
before = np.random.exponential(2.0, 30)
after = before * 0.7 + np.random.exponential(0.3, 30)
stat_w, p_w = stats.wilcoxon(before, after)
stat_t, p_t = stats.ttest_rel(before, after)
print("Paired comparison (before vs after):")
print(f" Wilcoxon: stat={stat_w:.1f}, p={p_w:.4f}")
print(f" t-test: stat={stat_t:.3f}, p={p_t:.4f}")
# Mann-Whitney U: non-parametric two-sample test
group1 = np.random.exponential(3.0, 40)
group2 = np.random.exponential(4.5, 40)
u_stat, p_u = stats.mannwhitneyu(group1, group2, alternative='two-sided')
t_stat, p_t2 = stats.ttest_ind(group1, group2)
print(f"\nTwo-group comparison:")
print(f" Mann-Whitney: U={u_stat:.1f}, p={p_u:.4f}")
print(f" t-test: t={t_stat:.3f}, p={p_t2:.4f}")
# Effect size r = Z / sqrt(N) for Mann-Whitney
n1, n2 = len(group1), len(group2)
z = stats.norm.ppf(p_u/2) # approximate z
r_effect = abs(z) / np.sqrt(n1+n2)
print(f" Effect size r: {r_effect:.3f}")from scipy import stats
import numpy as np
np.random.seed(0)
# One-sample KS: does data come from a specified distribution?
data_normal = np.random.normal(0, 1, 100)
data_exp = np.random.exponential(1, 100)
for name, data in [('Normal', data_normal), ('Exponential', data_exp)]:
ks_n, p_n = stats.kstest(data, 'norm', args=(data.mean(), data.std()))
ks_e, p_e = stats.kstest(data, 'expon', args=(0, 1/data.mean()))
print(f"{name}: KS vs Normal p={p_n:.4f}, KS vs Expon p={p_e:.4f}")
# Two-sample KS: do two samples come from the same distribution?
sample_a = np.random.normal(0, 1, 100)
sample_b = np.random.normal(0.5, 1, 100) # slightly shifted
ks2, p2 = stats.ks_2samp(sample_a, sample_b)
print(f"\n2-sample KS: stat={ks2:.4f}, p={p2:.4f}")
# Anderson-Darling (more powerful than KS for normal testing)
result = stats.anderson(data_normal, dist='norm')
print(f"\nAnderson-Darling statistic: {result.statistic:.4f}")
for sl, cv in zip(result.significance_level, result.critical_values):
print(f" {sl}% significance level: crit={cv:.4f}, reject={result.statistic > cv}")from scipy import stats
import numpy as np
np.random.seed(42)
# Pain scores: skewed, bounded 1-10
drug_scores = np.clip(np.random.exponential(2.5, 40) + 1, 1, 10).round()
control_scores = np.clip(np.random.exponential(4.0, 40) + 1, 1, 10).round()
# Test normality first
_, p_drug = stats.shapiro(drug_scores)
_, p_ctrl = stats.shapiro(control_scores)
print(f"Shapiro p-values: drug={p_drug:.4f}, control={p_ctrl:.4f}")
print(f"Use non-parametric: {p_drug < 0.05 or p_ctrl < 0.05}")
# Mann-Whitney U
u, p = stats.mannwhitneyu(drug_scores, control_scores, alternative='less')
print(f"Mann-Whitney: U={u:.1f}, p={p:.4f}")
print(f"Drug reduces pain? {p < 0.05}")
print(f"Median drug={np.median(drug_scores):.1f}, control={np.median(control_scores):.1f}")from scipy import stats
import numpy as np
def compare_groups(a, b, alpha=0.05):
result = {}
# (1) normality
_, p_a = stats.shapiro(a)
_, p_b = stats.shapiro(b)
both_normal = p_a > alpha and p_b > alpha
# (2) if normal: Levene then t-test
# (3) if not normal: Mann-Whitney
# (4) return dict
return result
np.random.seed(42)
normal_a = np.random.normal(10, 2, 30)
normal_b = np.random.normal(11, 2, 30)
skewed_a = np.random.exponential(2, 30)
skewed_b = np.random.exponential(3, 30)
print(compare_groups(normal_a, normal_b))
print(compare_groups(skewed_a, skewed_b))
Bayesian inference updates prior beliefs with observed data to produce a posterior distribution. Beta-Binomial and Normal-Normal conjugates let you compute posteriors analytically. Compare to frequentist interpretation of the same data.
from scipy import stats
import numpy as np
# Prior: Beta(alpha=2, beta=10) β we expect ~17% conversion
prior_a, prior_b = 2, 10
# Observed data: 45 conversions out of 200 visits
obs_conv, obs_total = 45, 200
obs_failures = obs_total - obs_conv
# Posterior: Beta(alpha + successes, beta + failures) [conjugate]
post_a = prior_a + obs_conv
post_b = prior_b + obs_failures
prior = stats.beta(prior_a, prior_b)
posterior = stats.beta(post_a, post_b)
print(f"Prior: mean={prior.mean():.3f}, 95% CI: ({prior.ppf(0.025):.3f}, {prior.ppf(0.975):.3f})")
print(f"Posterior: mean={posterior.mean():.3f}, 95% CI: ({posterior.ppf(0.025):.3f}, {posterior.ppf(0.975):.3f})")
print(f"MLE (freq): {obs_conv/obs_total:.3f}")
# Probability that conversion > 25%
prob_above = 1 - posterior.cdf(0.25)
print(f"P(conversion > 25%): {prob_above:.4f}")
# Posterior predictive: expected conversions in next 100
pp_mean = posterior.mean() * 100
pp_sd = np.sqrt(posterior.var() * 100)
print(f"Expected conversions (next 100): {pp_mean:.1f} Β± {pp_sd:.1f}")from scipy import stats
import numpy as np
np.random.seed(42)
# A/B test with conjugate Beta posteriors
conv_a, n_a = 120, 1000 # page A
conv_b, n_b = 145, 1000 # page B
# Non-informative priors Beta(1,1) = Uniform
post_a = stats.beta(1 + conv_a, 1 + n_a - conv_a)
post_b = stats.beta(1 + conv_b, 1 + n_b - conv_b)
# Monte Carlo estimate P(B > A)
samples_a = post_a.rvs(100_000)
samples_b = post_b.rvs(100_000)
p_b_wins = (samples_b > samples_a).mean()
# Expected lift
lift_samples = (samples_b - samples_a) / samples_a
lift_mean = lift_samples.mean()
lift_ci = np.percentile(lift_samples, [2.5, 97.5])
print(f"P(B > A) = {p_b_wins:.4f}")
print(f"Expected lift: {lift_mean:.1%}")
print(f"95% credible interval for lift: ({lift_ci[0]:.1%}, {lift_ci[1]:.1%})")
print(f"Posterior A: mean={post_a.mean():.4f}")
print(f"Posterior B: mean={post_b.mean():.4f}")from scipy import stats
import numpy as np
np.random.seed(0)
# Estimating the true mean of a process
# Prior: Normal(mu0=100, sigma0=20) β prior belief about mean
# Known measurement noise: sigma=15
mu0, sigma0 = 100.0, 20.0 # prior
sigma_obs = 15.0 # known observation noise
# Observe 30 measurements
true_mean = 112.0
data = np.random.normal(true_mean, sigma_obs, 30)
n, x_bar = len(data), data.mean()
# Conjugate update
precision_prior = 1 / sigma0**2
precision_data = n / sigma_obs**2
precision_post = precision_prior + precision_data
mu_post = (precision_prior*mu0 + precision_data*x_bar) / precision_post
sigma_post = np.sqrt(1 / precision_post)
prior_dist = stats.norm(mu0, sigma0)
post_dist = stats.norm(mu_post, sigma_post)
print(f"Prior: mean={mu0:.1f}, 95% CI: ({prior_dist.ppf(0.025):.1f}, {prior_dist.ppf(0.975):.1f})")
print(f"Likelihood: mean={x_bar:.2f}, (n={n} obs, noise={sigma_obs})")
print(f"Posterior: mean={mu_post:.2f}, 95% CI: ({post_dist.ppf(0.025):.2f}, {post_dist.ppf(0.975):.2f})")
print(f"True mean: {true_mean}")
print(f"Posterior pulls toward prior when data is limited.")from scipy import stats
import numpy as np
np.random.seed(42)
# Sequential Bayesian A/B test
true_rate_a, true_rate_b = 0.10, 0.13
n_per_day = 50
post_a = stats.beta(1, 1)
post_b = stats.beta(1, 1)
alpha_a, beta_a = 1, 1
alpha_b, beta_b = 1, 1
print(f"{'Day':>5s} {'n_a':>6s} {'n_b':>6s} {'Rate A':>8s} {'Rate B':>8s} {'P(B>A)':>8s}")
for day in range(1, 31):
# New daily data
conv_a = np.random.binomial(n_per_day, true_rate_a)
conv_b = np.random.binomial(n_per_day, true_rate_b)
alpha_a += conv_a; beta_a += n_per_day - conv_a
alpha_b += conv_b; beta_b += n_per_day - conv_b
post_a = stats.beta(alpha_a, beta_a)
post_b = stats.beta(alpha_b, beta_b)
p_b_wins = (post_b.rvs(50000) > post_a.rvs(50000)).mean()
n_total = (alpha_a + beta_a - 2) + (alpha_b + beta_b - 2)
if day % 5 == 0 or p_b_wins > 0.95:
print(f"{day:5d} {n_total//2:6d} {n_total//2:6d} {post_a.mean():8.4f} {post_b.mean():8.4f} {p_b_wins:8.4f}")
if p_b_wins > 0.95:
print(f" --> Stopping: P(B>A)={p_b_wins:.4f} > 0.95 on day {day}")
breakfrom scipy import stats
import numpy as np
np.random.seed(0)
n_flips, n_heads = 100, 62
flips = np.array([1]*n_heads + [0]*(n_flips - n_heads))
np.random.shuffle(flips)
a, b = 1, 1 # prior Beta(1,1)
print(f"{'Step':>6s} {'Heads':>6s} {'n':>5s} {'Post mean':>10s} {'95% Credible':>20s} {'Freq 95% CI':>20s}")
for step in range(10, n_flips+1, 10):
chunk = flips[:step]
h = chunk.sum(); n = len(chunk)
a_post = a + h; b_post = b + n - h
post = stats.beta(a_post, b_post)
# TODO: compute posterior 95% CI from post
# TODO: compute frequentist 95% CI using stats.proportion_confint or normal approx
# TODO: print row
pass
# (5) P(p > 0.5) from final posterior
Effect size quantifies the magnitude of a difference independently of sample size. Cohen's d, r, eta-squared measure effect for different tests. Power analysis determines the sample size needed to detect a given effect.
from scipy import stats
import numpy as np
def cohens_d(group1, group2):
n1, n2 = len(group1), len(group2)
s_pooled = np.sqrt(((n1-1)*group1.std()**2 + (n2-1)*group2.std()**2) / (n1+n2-2))
return (group1.mean() - group2.mean()) / s_pooled
np.random.seed(42)
thresholds = [('Small', 0.2), ('Medium', 0.5), ('Large', 0.8)]
print(f"{'Effect':8s} {'Cohen d':>8s} {'Interpretation':>15s} {'p-value':>8s}")
print('-' * 55)
for label, target_d in thresholds:
a = np.random.normal(0, 1, 50)
b = np.random.normal(target_d, 1, 50)
d = cohens_d(b, a)
_, p = stats.ttest_ind(a, b)
print(f"{label:8s} {d:8.3f} {label:>15s} {p:8.4f}")
# r from t-statistic
t_stat = 3.2
n_total = 80
r = np.sqrt(t_stat**2 / (t_stat**2 + n_total - 2))
print(f"\nt={t_stat}, n={n_total} -> r effect size = {r:.3f}")
# Eta-squared from F (ANOVA)
F, df_between, df_within = 8.5, 2, 60
eta2 = (F * df_between) / (F * df_between + df_within)
print(f"F={F}, df_b={df_between}, df_w={df_within} -> etaΒ²={eta2:.3f}")from scipy import stats
import numpy as np
def sample_size_ttest(effect_size_d, alpha=0.05, power=0.80, tail=2):
z_alpha = stats.norm.ppf(1 - alpha/tail)
z_beta = stats.norm.ppf(power)
n = 2 * ((z_alpha + z_beta) / effect_size_d) ** 2
return int(np.ceil(n))
print("Required sample size per group (two-sample t-test):")
print(f"{'Effect d':>9s} {'Ξ±=0.05,80%':>12s} {'Ξ±=0.05,90%':>12s} {'Ξ±=0.01,80%':>12s}")
print('-' * 52)
for d in [0.2, 0.3, 0.5, 0.8, 1.0]:
n1 = sample_size_ttest(d, 0.05, 0.80)
n2 = sample_size_ttest(d, 0.05, 0.90)
n3 = sample_size_ttest(d, 0.01, 0.80)
print(f"{d:9.1f} {n1:12d} {n2:12d} {n3:12d}")
# Using statsmodels TTestIndPower
from statsmodels.stats.power import TTestIndPower
analysis = TTestIndPower()
n = analysis.solve_power(effect_size=0.5, alpha=0.05, power=0.80)
print(f"\nstatsmodels: d=0.5, Ξ±=0.05, power=0.80 -> n={n:.1f} per group")
# Achieved power for given n and effect
power = analysis.solve_power(effect_size=0.3, alpha=0.05, nobs1=100)
print(f"d=0.3, n=100 per group -> achieved power={power:.3f}")from statsmodels.stats.power import TTestIndPower, ChiSquarePower
import numpy as np
analysis = TTestIndPower()
# Power curve: power vs sample size for different effect sizes
print("Power by sample size and effect size:")
print(f"{'n/grp':>7s}", end='')
for d in [0.2, 0.3, 0.5, 0.8]:
print(f" d={d}", end='')
print()
for n in [20, 50, 100, 200, 500]:
print(f"{n:7d}", end='')
for d in [0.2, 0.3, 0.5, 0.8]:
pwr = analysis.solve_power(effect_size=d, alpha=0.05, nobs1=n)
print(f" {pwr:.3f}", end='')
print()
# Minimum detectable effect (MDE) given n and desired power
mde = analysis.solve_power(alpha=0.05, power=0.80, nobs1=200)
print(f"\nMDE (d) for n=200/grp, power=80%: {mde:.3f}")
# Chi-square power
chi_power = ChiSquarePower()
n_chi = chi_power.solve_power(effect_size=0.3, alpha=0.05, power=0.80, k_bins=3)
print(f"Chi-square: effect=0.3, k=3, power=80% -> n={n_chi:.1f}")from statsmodels.stats.power import NormalIndPower
from scipy import stats
import numpy as np
# Rate improvement: 60% -> 66%
rate_a, rate_b = 0.60, 0.66
# Cohen's h for proportions
h = 2 * np.arcsin(np.sqrt(rate_b)) - 2 * np.arcsin(np.sqrt(rate_a))
print(f"Cohen's h = {h:.4f}")
# Required sample size
analysis = NormalIndPower()
for power in [0.80, 0.90, 0.95]:
n = analysis.solve_power(effect_size=h, alpha=0.05, power=power)
print(f"Power={power:.0%}: n={n:.0f} per group (total {2*n:.0f})")
# Simulate the test
np.random.seed(42)
n_per = int(analysis.solve_power(effect_size=h, alpha=0.05, power=0.80)) + 1
conv_a = np.random.binomial(n_per, rate_a)
conv_b = np.random.binomial(n_per, rate_b)
_, p = stats.proportions_ztest([conv_a, conv_b], [n_per, n_per])
print(f"\nSimulated test: A={conv_a/n_per:.3f}, B={conv_b/n_per:.3f}, p={p:.4f}")from statsmodels.stats.power import TTestIndPower
import numpy as np
analysis = TTestIndPower()
# (1) n grid
print("Required n per group:")
effects = [0.2, 0.5, 0.8]
powers = [0.70, 0.80, 0.90]
# TODO: print header and rows
# (2) power table for n=150
print("\nAchieved power for n=150 per group:")
# TODO
# (3) MDE
mde = analysis.solve_power(alpha=0.05, power=0.80, nobs1=300)
print(f"\nMDE for n=300: d={mde:.4f}")
Running many tests inflates the false positive rate. Bonferroni controls family-wise error rate (conservative). Benjamini-Hochberg (BH) controls the false discovery rate β preferred for exploratory studies with many hypotheses.
from statsmodels.stats.multitest import multipletests
from scipy import stats
import numpy as np
np.random.seed(42)
n_tests = 20
n_per = 50
# 20 tests: 3 truly different (true positives), 17 null
true_effect = [0, 0, 0, 0.8, 0, 0, 0, 0, 0.5, 0, 0, 0, 0, 0, 0, 0.6, 0, 0, 0, 0]
raw_p = []
for d in true_effect:
a = np.random.normal(0, 1, n_per)
b = np.random.normal(d, 1, n_per)
_, p = stats.ttest_ind(a, b)
raw_p.append(p)
methods = ['bonferroni', 'holm', 'fdr_bh', 'fdr_by']
print(f"{'Method':12s} {'Rejections':>11s} {'True Pos':>9s} {'False Pos':>10s}")
for method in methods:
reject, p_adj, _, _ = multipletests(raw_p, alpha=0.05, method=method)
n_reject = reject.sum()
true_pos = sum(1 for r, d in zip(reject, true_effect) if r and d != 0)
false_pos = sum(1 for r, d in zip(reject, true_effect) if r and d == 0)
print(f"{method:12s} {n_reject:11d} {true_pos:9d} {false_pos:10d}")
print(f"\nUncorrected (p<0.05): {sum(p < 0.05 for p in raw_p)} rejections")from scipy import stats
import numpy as np
np.random.seed(1)
n_hypotheses = 15
alpha = 0.05
# Simulate p-values (mix of true positives and nulls)
true_effects = [0]*10 + [0.7, 0.9, 0.5, 1.2, 0.6]
raw_p = []
for d in true_effects:
a = np.random.normal(0, 1, 40)
b = np.random.normal(d, 1, 40)
_, p = stats.ttest_ind(a, b)
raw_p.append(p)
# BH procedure manually
sorted_idx = np.argsort(raw_p)
sorted_p = np.array(raw_p)[sorted_idx]
print(f"BH procedure (alpha={alpha}, m={n_hypotheses} tests):")
print(f"{'Rank':>5s} {'p-value':>8s} {'BH thresh':>10s} {'Reject?'}")
bh_threshold = alpha * np.arange(1, n_hypotheses+1) / n_hypotheses
last_reject = -1
for rank, (p, thr) in enumerate(zip(sorted_p, bh_threshold), 1):
if p <= thr:
last_reject = rank
for rank, (p, thr) in enumerate(zip(sorted_p[:8], bh_threshold[:8]), 1):
reject = rank <= last_reject
print(f"{rank:5d} {p:8.4f} {thr:10.4f} {reject}")
print(f" ... ({n_hypotheses} total)")
print(f"\nAll hypotheses with rank <= {last_reject} are rejected ({last_reject} total)")from statsmodels.stats.multitest import multipletests
from scipy import stats
import numpy as np
np.random.seed(7)
m = 1000 # large-scale testing (genomics-like)
n_true = 50
# Generate p-values: 50 true effects + 950 nulls
p_true = np.array([stats.ttest_ind(
np.random.normal(0,1,30), np.random.normal(0.6,1,30))[1]
for _ in range(n_true)])
p_null = np.random.uniform(0, 1, m - n_true) # null p-values ~ Uniform
all_p = np.concatenate([p_true, p_null])
true_labels = np.array([True]*n_true + [False]*(m-n_true))
results = {}
for method in ['bonferroni', 'fdr_bh']:
reject, _, _, _ = multipletests(all_p, alpha=0.05, method=method)
tp = (reject & true_labels).sum()
fp = (reject & ~true_labels).sum()
fn = (~reject & true_labels).sum()
fdr = fp / max(reject.sum(), 1)
power = tp / n_true
results[method] = (reject.sum(), tp, fp, fdr, power)
print(f"{method:12s}: rejections={reject.sum():4d} TP={tp:3d} FP={fp:3d} FDR={fdr:.3f} Power={power:.3f}")
print(f"\nSummary: Bonferroni is conservative; BH balances FDR and power")from statsmodels.stats.multitest import multipletests
from scipy import stats
import numpy as np
np.random.seed(42)
n_genes = 5000
n_de = 150 # truly differentially expressed
# Group A and B expression levels
expr_a = np.random.normal(0, 1, (n_genes, 30))
expr_b = np.random.normal(0, 1, (n_genes, 30))
# Inject true effects
effects = np.random.normal(0.8, 0.3, n_de)
expr_b[:n_de] += effects[:, np.newaxis]
# Compute p-values
p_vals = np.array([stats.ttest_ind(expr_a[i], expr_b[i])[1]
for i in range(n_genes)])
for method in ['none', 'bonferroni', 'fdr_bh']:
if method == 'none':
reject = p_vals < 0.05
adj_p = p_vals
else:
reject, adj_p, _, _ = multipletests(p_vals, alpha=0.05, method=method)
tp = reject[:n_de].sum()
fp = reject[n_de:].sum()
print(f"{method:12s}: significant={reject.sum():5d} TP={tp:4d} FP={fp:4d} FDR={fp/max(reject.sum(),1):.3f}")from statsmodels.stats.multitest import multipletests
import numpy as np
from scipy import stats
np.random.seed(42)
metrics = ['CTR','BounceRate','TimeOnPage','Conversions','PageViews',
'ScrollDepth','CartAdds','Checkout','Revenue','SignUps','ReturnVisits','NPS']
# Simulate p-values: some metrics truly affected, most not
true_effects = [0.4, 0, 0, 0.6, 0, 0, 0.3, 0.5, 0, 0, 0, 0]
p_values = []
for d in true_effects:
a = np.random.normal(0, 1, 200)
b = np.random.normal(d, 1, 200)
_, p = stats.ttest_ind(a, b)
p_values.append(p)
# TODO: apply all three corrections
# TODO: print table showing which metrics are significant under each method
# TODO: comment on when to prefer BH vs Bonferroni
Bootstrap resampling estimates confidence intervals for any statistic without distributional assumptions. Permutation tests provide exact p-values for two-sample comparisons by shuffling group labels.
import numpy as np
from scipy import stats
np.random.seed(42)
data = np.random.exponential(scale=5, size=50) # skewed data
def bootstrap_ci(data, statistic, n_boot=5000, ci=0.95):
boots = [statistic(np.random.choice(data, len(data), replace=True))
for _ in range(n_boot)]
alpha = (1 - ci) / 2
return np.percentile(boots, [alpha*100, (1-alpha)*100]), np.array(boots)
# Bootstrap CI for mean, median, and std
for stat_name, fn in [('mean', np.mean), ('median', np.median), ('std', np.std)]:
ci_boot, boots = bootstrap_ci(data, fn)
# Compare to analytical (where available)
print(f"{stat_name:7s}: obs={fn(data):.3f} boot_CI=({ci_boot[0]:.3f}, {ci_boot[1]:.3f})")
# Analytical CI for mean (t-interval)
ci_t = stats.t.interval(0.95, df=len(data)-1, loc=data.mean(), scale=stats.sem(data))
print(f"\nt-interval CI for mean: ({ci_t[0]:.3f}, {ci_t[1]:.3f})")
print(f"Boot CI for mean: above (similar for large n, more robust for skewed)")import numpy as np
from scipy import stats
np.random.seed(0)
n = 80
x = np.random.randn(n)
y = 2.5 * x + 1.0 + np.random.randn(n) * 1.5
def ols(x, y):
slope, intercept, *_ = stats.linregress(x, y)
return slope, intercept
# Bootstrap CI for slope
n_boot = 5000
slopes = []
for _ in range(n_boot):
idx = np.random.choice(n, n, replace=True)
s, _ = ols(x[idx], y[idx])
slopes.append(s)
slopes = np.array(slopes)
ci_slope = np.percentile(slopes, [2.5, 97.5])
obs_slope, obs_int = ols(x, y)
print(f"Observed slope: {obs_slope:.4f}")
print(f"Bootstrap CI slope: ({ci_slope[0]:.4f}, {ci_slope[1]:.4f})")
# Analytical CI from linregress
result = stats.linregress(x, y)
t_crit = stats.t.ppf(0.975, df=n-2)
se_slope = result.stderr
print(f"Analytical CI slope: ({obs_slope - t_crit*se_slope:.4f}, {obs_slope + t_crit*se_slope:.4f})")
print(f"Bootstrap SE of slope: {slopes.std():.4f}")import numpy as np
from scipy import stats
np.random.seed(42)
def permutation_test(a, b, n_perm=10000, stat='mean_diff'):
obs_stat = a.mean() - b.mean()
combined = np.concatenate([a, b])
n_a = len(a)
perm_stats = []
for _ in range(n_perm):
shuffled = np.random.permutation(combined)
perm_stats.append(shuffled[:n_a].mean() - shuffled[n_a:].mean())
perm_stats = np.array(perm_stats)
p_val = (np.abs(perm_stats) >= np.abs(obs_stat)).mean()
return obs_stat, p_val, perm_stats
# Test on small sample with non-normal data
group_a = np.random.exponential(3, 25)
group_b = np.random.exponential(4, 25)
obs, p_perm, perms = permutation_test(group_a, group_b)
_, p_mann = stats.mannwhitneyu(group_a, group_b, alternative='two-sided')
_, p_t = stats.ttest_ind(group_a, group_b)
print(f"Observed mean diff: {obs:.3f}")
print(f"Permutation p-val: {p_perm:.4f}")
print(f"Mann-Whitney p-val: {p_mann:.4f}")
print(f"t-test p-val: {p_t:.4f}")
print(f"Percentile of obs in perm dist: {(perms < obs).mean():.3f}")import numpy as np
from scipy import stats
np.random.seed(99)
# API response times (heavy-tailed)
version_a = np.random.pareto(2.0, 200) * 100 + 50
version_b = np.random.pareto(2.5, 200) * 80 + 45
# Bootstrap CI for difference in medians
n_boot = 10000
median_diffs = []
for _ in range(n_boot):
a_boot = np.random.choice(version_a, len(version_a), replace=True)
b_boot = np.random.choice(version_b, len(version_b), replace=True)
median_diffs.append(np.median(a_boot) - np.median(b_boot))
ci = np.percentile(median_diffs, [2.5, 97.5])
obs_diff = np.median(version_a) - np.median(version_b)
print(f"Median A: {np.median(version_a):.2f}ms")
print(f"Median B: {np.median(version_b):.2f}ms")
print(f"Observed difference: {obs_diff:.2f}ms")
print(f"Bootstrap 95% CI: ({ci[0]:.2f}, {ci[1]:.2f})ms")
print(f"CI excludes 0: {ci[0] > 0 or ci[1] < 0} -> B is {'faster' if obs_diff > 0 else 'slower'}")import numpy as np
from scipy import stats
np.random.seed(42)
datasets = {
'Normal': np.random.normal(10, 3, 30),
'Exponential': np.random.exponential(5, 30),
'Pareto': np.random.pareto(1.5, 30) * 5 + 5,
}
def bootstrap_ci(data, fn, n_boot=5000):
boots = [fn(np.random.choice(data, len(data), replace=True)) for _ in range(n_boot)]
return np.percentile(boots, [2.5, 97.5])
for name, data in datasets.items():
boot_mean = bootstrap_ci(data, np.mean)
t_ci = stats.t.interval(0.95, df=len(data)-1, loc=data.mean(), scale=stats.sem(data))
boot_median = bootstrap_ci(data, np.median)
print(f"{name}:")
print(f" Bootstrap mean CI: ({boot_mean[0]:.3f}, {boot_mean[1]:.3f})")
# TODO: print t-interval and bootstrap median CI