π¨ Seaborn
32 topics • Click any card to expand
Seaborn works with pandas DataFrames. Set a theme once at the top of your notebook and all plots inherit consistent styling.
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
# Set global theme β do this once at the top
sns.set_theme(style='whitegrid', palette='muted', font_scale=1.1)
# Seaborn ships with practice datasets
tips = sns.load_dataset('tips')
iris = sns.load_dataset('iris')
titanic = sns.load_dataset('titanic')
print(tips.head())
print(f"\ntips shape: {tips.shape}")
print(tips.dtypes)import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
styles = ['darkgrid','whitegrid','dark','white','ticks']
palettes = ['deep','muted','pastel','bright','dark','colorblind']
fig, axes = plt.subplots(2, 3, figsize=(12, 6))
for ax, style in zip(axes[0], styles[:3]):
sns.barplot(x=['A','B','C'], y=[3,5,4], ax=ax, palette='deep')
ax.set_title(f'style={style}')
for ax, pal in zip(axes[1], palettes[:3]):
sns.barplot(x=['A','B','C'], y=[3,5,4], ax=ax, palette=pal)
ax.set_title(f'palette={pal}')
plt.tight_layout()
plt.savefig('styles_palettes.png', dpi=80)
plt.close()import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
# Create and inspect custom palettes
blues = sns.color_palette("Blues", 6)
custom = sns.color_palette(["#e74c3c","#3498db","#2ecc71","#f39c12"])
husl = sns.color_palette("husl", 8)
fig, axes = plt.subplots(1, 3, figsize=(12, 2))
sns.palplot(blues, ax=axes[0] if hasattr(sns,'palplot') else None)
for ax, (pal, name) in zip(axes, [(blues,'Blues-6'),(custom,'Custom-4'),(husl,'HUSL-8')]):
for j, c in enumerate(pal):
ax.add_patch(plt.Rectangle((j, 0), 1, 1, color=c))
ax.set_xlim(0, len(pal)); ax.set_ylim(0, 1)
ax.set_xticks([]); ax.set_yticks([])
ax.set_title(name)
plt.suptitle('Color Palette Examples', y=1.05)
plt.tight_layout()
plt.savefig('palettes_custom.png', dpi=80)
plt.close()
print("Blues palette (RGB):", [tuple(round(v,2) for v in c) for c in blues])import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
sns.set_theme(style='whitegrid')
tips = sns.load_dataset('tips')
# --- axes-level function: returns an Axes, fits into any subplot grid ---
fig, axes = plt.subplots(1, 2, figsize=(12, 4))
sns.boxplot(data=tips, x='day', y='total_bill', palette='pastel', ax=axes[0])
axes[0].set_title('Axes-level: sns.boxplot (axes[0])')
# sns.despine removes top/right spines for a cleaner look
sns.despine(ax=axes[0], offset=8, trim=True)
# A second axes-level call on the other subplot
sns.stripplot(data=tips, x='day', y='total_bill',
color='steelblue', alpha=0.4, jitter=True, ax=axes[1])
axes[1].set_title('Axes-level: sns.stripplot (axes[1])')
sns.despine(ax=axes[1])
plt.suptitle('Axes-level functions β full subplot control', fontsize=13, y=1.02)
plt.tight_layout()
plt.savefig('setup_despine_axes_level.png', dpi=80)
plt.close()
# --- figure-level function: creates its OWN figure / FacetGrid ---
# You cannot pass ax= to figure-level functions (displot, catplot, relplot, etc.)
g = sns.displot(data=tips, x='total_bill', col='time',
kind='hist', kde=True, bins=20,
height=3.5, aspect=1.1, palette='Set2')
g.set_titles('{col_name}')
g.figure.suptitle('Figure-level: sns.displot (owns its own Figure)', y=1.04)
plt.tight_layout()
plt.savefig('setup_despine_figure_level.png', dpi=80)
plt.close()
print("despine + figure-level vs axes-level demo saved.")import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
# Team-wide style configuration
sns.set_theme(
style='whitegrid',
palette='colorblind', # accessible to color-blind viewers
font_scale=1.0,
rc={
'figure.figsize': (9, 4),
'axes.spines.top': False,
'axes.spines.right': False,
'grid.linewidth': 0.5,
}
)
# Quick sanity-check plot
np.random.seed(42)
df = pd.DataFrame({
'quarter': ['Q1','Q2','Q3','Q4'] * 3,
'region': ['North']*4 + ['South']*4 + ['East']*4,
'revenue': np.random.uniform(100, 500, 12).round(1),
})
sns.barplot(data=df, x='quarter', y='revenue', hue='region')
plt.title('Revenue by Quarter & Region')
plt.ylabel('Revenue ($K)')
plt.tight_layout()
plt.savefig('rw_setup_themes.png', dpi=80)
plt.close()import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
values = [3, 7, 5, 9, 4]
labels = ['A', 'B', 'C', 'D', 'E']
# 1. Apply three styles and save each
for style in ['darkgrid', 'white', 'ticks']:
with sns.axes_style(style):
fig, ax = plt.subplots(figsize=(6, 3))
# TODO: sns.barplot(x=labels, y=values, ax=ax, palette='muted')
# ax.set_title(f'Style: {style}')
# plt.tight_layout()
# plt.savefig(f'style_{style}.png', dpi=80)
# plt.close()
pass
# 2. Custom 5-color palette
my_colors = ['#e74c3c', '#3498db', '#2ecc71', '#f39c12', '#9b59b6']
# TODO: fig, ax = plt.subplots(figsize=(6, 3))
# TODO: sns.barplot(x=labels, y=values, palette=my_colors, ax=ax)
# TODO: ax.set_title('Custom Palette')
# TODO: plt.tight_layout(); plt.savefig('custom_palette.png', dpi=80); plt.close()
# 3. sns.set_theme with rc overrides
# TODO: sns.set_theme(style='whitegrid', rc={
# TODO: 'figure.figsize': (8, 3),
# TODO: 'axes.spines.top': False,
# TODO: 'axes.spines.right': False,
# TODO: })
# TODO: fig, ax = plt.subplots()
# TODO: sns.barplot(x=labels, y=values, palette='deep', ax=ax)
# TODO: plt.tight_layout(); plt.savefig('rc_override.png', dpi=80); plt.close()histplot and kdeplot show how data is distributed. displot combines both into a faceted figure-level function.
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_theme(style='whitegrid')
tips = sns.load_dataset('tips')
fig, axes = plt.subplots(1, 2, figsize=(11, 4))
# Histogram with KDE overlay
sns.histplot(data=tips, x='total_bill', bins=25,
kde=True, ax=axes[0], color='steelblue')
axes[0].set_title('Total Bill Distribution')
# KDE only β compare two groups
sns.kdeplot(data=tips, x='tip', hue='sex',
fill=True, alpha=0.4, ax=axes[1])
axes[1].set_title('Tip Distribution by Gender')
plt.tight_layout()
plt.savefig('dist_hist_kde.png', dpi=80)
plt.close()import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_theme(style='whitegrid')
tips = sns.load_dataset('tips')
# Separate histogram per day, colored by sex
g = sns.displot(
data=tips, x='total_bill', hue='sex',
col='day', kind='hist', kde=True,
bins=15, height=3.5, aspect=0.9,
palette='Set2'
)
g.set_titles('{col_name}')
g.set_xlabels('Total Bill ($)')
plt.tight_layout()
plt.savefig('dist_displot.png', dpi=80)
plt.close()import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_theme(style='whitegrid')
tips = sns.load_dataset('tips')
fig, axes = plt.subplots(1, 2, figsize=(11, 4))
# ECDF β empirical cumulative distribution
sns.ecdfplot(data=tips, x='total_bill', hue='time',
palette='Set1', linewidth=2, ax=axes[0])
axes[0].set_title('ECDF of Total Bill by Meal Time')
axes[0].set_xlabel('Total Bill ($)')
# Rug plot layered under KDE
sns.kdeplot(data=tips, x='total_bill', hue='smoker',
fill=True, alpha=0.3, ax=axes[1])
sns.rugplot(data=tips, x='total_bill', hue='smoker',
height=0.06, ax=axes[1])
axes[1].set_title('KDE + Rug: Total Bill by Smoker')
plt.tight_layout()
plt.savefig('dist_ecdf_rug.png', dpi=80)
plt.close()import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
np.random.seed(42)
sns.set_theme(style='whitegrid')
# Synthetic spend data for three customer tiers
tiers = {'Bronze': np.random.normal(30, 10, 300),
'Silver': np.random.normal(70, 15, 200),
'Gold': np.random.normal(130, 25, 100)}
rows = []
for tier, vals in tiers.items():
for v in vals.clip(0):
rows.append({'tier': tier, 'spend': round(v, 2)})
df = pd.DataFrame(rows)
fig, axes = plt.subplots(1, 2, figsize=(13, 4))
# Left: overlaid histograms normalised to density so groups are comparable
for tier, color in [('Bronze','#cd7f32'),('Silver','#aaa9ad'),('Gold','#ffd700')]:
subset = df[df['tier'] == tier]
sns.histplot(data=subset, x='spend', stat='density',
bins=25, alpha=0.45, color=color, label=tier,
kde=True, ax=axes[0])
axes[0].set_title('Spend Distribution by Tier (stat=density)')
axes[0].set_xlabel('Monthly Spend ($)')
axes[0].legend(title='Tier')
# Right: displot with kind='ecdf' β figure-level, drawn into a fresh figure
plt.tight_layout()
plt.savefig('dist_density_overlay.png', dpi=80)
plt.close()
g = sns.displot(data=df, x='spend', hue='tier',
kind='ecdf', linewidth=2.5,
palette={'Bronze':'#cd7f32','Silver':'#aaa9ad','Gold':'#ffd700'},
height=4, aspect=1.6)
g.set_axis_labels('Monthly Spend ($)', 'Proportion')
g.figure.suptitle('ECDF of Spend by Customer Tier (displot kind=ecdf)', y=1.03)
plt.tight_layout()
plt.savefig('dist_ecdf_displot.png', dpi=80)
plt.close()
print("Density overlay and ECDF displot saved.")import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
np.random.seed(42)
segments = {
'VIP': np.random.normal(350, 80, 200),
'Regular': np.random.normal(150, 50, 500),
'Occasional':np.random.normal(60, 30, 300),
}
rows = []
for seg, vals in segments.items():
for v in vals.clip(0):
rows.append({'segment': seg, 'spend': round(v, 2)})
df = pd.DataFrame(rows)
sns.set_theme(style='whitegrid')
fig, ax = plt.subplots(figsize=(10, 4))
sns.kdeplot(data=df, x='spend', hue='segment',
fill=True, alpha=0.3, linewidth=2,
palette={'VIP':'#e74c3c','Regular':'#3498db','Occasional':'#2ecc71'})
# Threshold lines
for thresh, label in [(100,'Entry'), (250,'Mid'), (400,'Premium')]:
ax.axvline(thresh, color='gray', linestyle='--', linewidth=1, alpha=0.7)
ax.text(thresh+3, ax.get_ylim()[1]*0.9, label, fontsize=8, color='gray')
ax.set_xlabel('Monthly Spend ($)')
ax.set_title('Customer Spend Distribution by Segment')
plt.tight_layout()
plt.savefig('rw_dist_spend.png', dpi=80)
plt.close()import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
sns.set_theme(style='whitegrid')
tips = sns.load_dataset('tips')
fig, axes = plt.subplots(1, 2, figsize=(12, 4))
# 1. Overlaid KDE for total_bill by smoker
# TODO: sns.kdeplot(data=tips, x='total_bill', hue='smoker',
# TODO: fill=True, alpha=0.35, ax=axes[0])
# 2. Add vertical lines at each group's median
for smoker_val, color in [('Yes', '#e74c3c'), ('No', '#3498db')]:
grp = tips[tips['smoker'] == smoker_val]['total_bill']
# TODO: axes[0].axvline(grp.median(), color=color, linestyle='--', linewidth=1.5,
# TODO: label=f'Median ({smoker_val}): ${grp.median():.1f}')
pass
# TODO: axes[0].legend(); axes[0].set_title('Total Bill KDE by Smoker')
# 3. ECDF of tip by day
# TODO: sns.ecdfplot(data=tips, x='tip', hue='day', palette='tab10', ax=axes[1])
# TODO: axes[1].set_title('Tip ECDF by Day')
plt.tight_layout()
plt.savefig('practice_dist.png', dpi=80)
plt.close()
print("Saved practice_dist.png")barplot shows mean Β± CI of a numeric variable by category. countplot shows frequency. Both accept hue for a third dimension.
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_theme(style='whitegrid')
tips = sns.load_dataset('tips')
fig, axes = plt.subplots(1, 2, figsize=(11, 4))
# Mean tip by day + confidence interval
sns.barplot(data=tips, x='day', y='tip',
hue='sex', palette='Set2', ax=axes[0])
axes[0].set_title('Avg Tip by Day & Gender')
axes[0].set_ylabel('Mean Tip ($)')
# Count of meals per day
sns.countplot(data=tips, x='day', hue='time',
palette='pastel', ax=axes[1])
axes[1].set_title('Meal Count by Day & Time')
plt.tight_layout()
plt.savefig('cat_bar_count.png', dpi=80)
plt.close()import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
sns.set_theme(style='whitegrid')
tips = sns.load_dataset('tips')
# Compute mean tip per day β sort for readability
order = tips.groupby('day')['tip'].mean().sort_values(ascending=False).index
fig, ax = plt.subplots(figsize=(7, 4))
sns.barplot(data=tips, y='day', x='tip',
order=order, palette='Blues_d', orient='h', ax=ax)
ax.set_xlabel('Average Tip ($)')
ax.set_title('Average Tip by Day of Week (sorted)')
plt.tight_layout()
plt.savefig('cat_hbar.png', dpi=80)
plt.close()import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
np.random.seed(7)
sns.set_theme(style='whitegrid')
# Simulated quarterly sales by product
products = ['Alpha', 'Beta', 'Gamma']
quarters = ['Q1', 'Q2', 'Q3', 'Q4']
rows = []
for q in quarters:
for p in products:
base = {'Alpha': 120, 'Beta': 85, 'Gamma': 160}[p]
rows.append({'quarter': q, 'product': p,
'sales': np.random.normal(base, 15)})
df = pd.DataFrame(rows)
fig, ax = plt.subplots(figsize=(9, 4))
sns.barplot(data=df, x='quarter', y='sales', hue='product',
palette='Set2', capsize=0.08, ax=ax)
# Value labels on bars
for bar in ax.patches:
if bar.get_height() > 0:
ax.text(bar.get_x() + bar.get_width()/2,
bar.get_height() + 1,
f'{bar.get_height():.0f}',
ha='center', va='bottom', fontsize=7)
ax.set_title('Quarterly Sales by Product (mean Β± 95% CI)')
ax.set_ylabel('Sales ($K)')
plt.tight_layout()
plt.savefig('cat_grouped_bar.png', dpi=80)
plt.close()import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
np.random.seed(21)
sns.set_theme(style='whitegrid')
tips = sns.load_dataset('tips')
# --- catplot: figure-level wrapper for all categorical plot kinds ---
# kind='bar' with estimator=median and dodge=True (default) for grouped bars
g = sns.catplot(
data=tips, x='day', y='total_bill', hue='time',
kind='bar',
estimator=np.median, # use median instead of default mean
errorbar=('ci', 95), # 95 % bootstrap CI
dodge=True, # bars side-by-side (not stacked)
palette='Set1',
capsize=0.10,
height=4, aspect=1.4,
order=['Thur', 'Fri', 'Sat', 'Sun']
)
g.set_axis_labels('Day', 'Median Total Bill ($)')
g.set_titles('Tip vs Bill β median estimator')
g.figure.suptitle('catplot: Median Total Bill by Day & Meal Time (dodge=True)', y=1.03)
plt.tight_layout()
plt.savefig('cat_catplot_median_dodge.png', dpi=80)
plt.close()
# --- second figure: countplot with dodge to compare two binary variables ---
fig, ax = plt.subplots(figsize=(8, 4))
sns.countplot(data=tips, x='day', hue='smoker',
dodge=True, palette='Dark2',
order=['Thur', 'Fri', 'Sat', 'Sun'], ax=ax)
ax.set_title('Diner Count by Day & Smoker Status (dodge=True)')
ax.set_ylabel('Count')
plt.tight_layout()
plt.savefig('cat_count_dodge.png', dpi=80)
plt.close()
print("catplot (median, dodge) and countplot saved.")import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
np.random.seed(7)
categories = ['Electronics','Clothing','Books','Toys','Sports','Home','Beauty']
data = []
for cat in categories:
base_rate = np.random.uniform(0.03, 0.18)
for _ in range(200):
data.append({
'category': cat,
'returned': int(np.random.random() < base_rate),
'channel': np.random.choice(['Online','In-store'], p=[0.7, 0.3]),
})
df = pd.DataFrame(data)
order = (df.groupby('category')['returned']
.mean()
.sort_values(ascending=False)
.index.tolist())
sns.set_theme(style='whitegrid')
fig, ax = plt.subplots(figsize=(9, 5))
sns.barplot(data=df, x='category', y='returned',
hue='channel', order=order,
palette={'Online':'#3498db','In-store':'#e67e22'},
capsize=0.08, ax=ax)
ax.axhline(df['returned'].mean(), color='red', linestyle='--',
linewidth=1.5, label=f"Overall avg: {df['returned'].mean():.1%}")
ax.set_ylabel('Return Rate'); ax.yaxis.set_major_formatter(
plt.FuncFormatter(lambda x, _: f'{x:.0%}'))
ax.set_xlabel(''); ax.set_title('Product Return Rate by Category & Channel')
ax.legend(); plt.xticks(rotation=20, ha='right')
plt.tight_layout()
plt.savefig('rw_cat_returns.png', dpi=80)
plt.close()import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_theme(style='whitegrid')
tips = sns.load_dataset('tips')
fig, ax = plt.subplots(figsize=(8, 4))
day_order = ['Thur', 'Fri', 'Sat', 'Sun']
# 1 & 2. Grouped barplot sorted by calendar order
# TODO: sns.barplot(data=tips, x='day', y='total_bill', hue='time',
# TODO: order=day_order, capsize=0.1, palette='Set2', ax=ax)
# 3. Overall mean reference line
overall_mean = tips['total_bill'].mean()
# TODO: ax.axhline(overall_mean, color='red', linestyle='--', linewidth=1.5,
# TODO: label=f'Overall mean: ${overall_mean:.2f}')
# TODO: ax.set_title('Total Bill by Day and Meal Time')
# TODO: ax.set_ylabel('Mean Total Bill ($)')
# TODO: ax.legend()
plt.tight_layout()
plt.savefig('practice_bar.png', dpi=80)
plt.close()
print("Saved practice_bar.png")Box plots show the 5-number summary (min, Q1, median, Q3, max). Violin plots also show the distribution shape via KDE.
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_theme(style='whitegrid')
tips = sns.load_dataset('tips')
fig, axes = plt.subplots(1, 2, figsize=(12, 4))
sns.boxplot(data=tips, x='day', y='total_bill',
hue='time', palette='Set3', ax=axes[0])
axes[0].set_title('Total Bill β Box Plot')
sns.violinplot(data=tips, x='day', y='total_bill',
hue='time', split=True,
palette='Set2', inner='quartile', ax=axes[1])
axes[1].set_title('Total Bill β Violin Plot')
plt.tight_layout()
plt.savefig('cat_box_violin.png', dpi=80)
plt.close()import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_theme(style='whitegrid')
tips = sns.load_dataset('tips')
fig, ax = plt.subplots(figsize=(8, 5))
# Letter-value plot (boxenplot) for larger datasets
sns.boxenplot(data=tips, x='day', y='total_bill',
palette='muted', ax=ax)
# Overlay raw points
sns.stripplot(data=tips, x='day', y='total_bill',
color='black', size=3, alpha=0.3, ax=ax, jitter=True)
ax.set_title('Total Bill Distribution per Day (boxen + strip)')
ax.set_ylabel('Total Bill ($)')
plt.tight_layout()
plt.savefig('cat_boxen_strip.png', dpi=80)
plt.close()import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
np.random.seed(42)
sns.set_theme(style='whitegrid')
iris = sns.load_dataset('iris')
fig, axes = plt.subplots(1, 2, figsize=(13, 5))
# Left: violin per species for petal_length
sns.violinplot(data=iris, x='species', y='petal_length',
palette='Set2', inner='box', ax=axes[0])
axes[0].set_title('Petal Length by Species (inner=box)')
axes[0].set_ylabel('Petal Length (cm)')
# Right: layered β violin + box + strip
sns.violinplot(data=iris, x='species', y='sepal_width',
palette='pastel', inner=None, ax=axes[1])
sns.boxplot(data=iris, x='species', y='sepal_width',
width=0.12, fliersize=0,
boxprops=dict(facecolor='white', zorder=2), ax=axes[1])
sns.stripplot(data=iris, x='species', y='sepal_width',
color='black', size=2.5, alpha=0.4, jitter=True, ax=axes[1])
axes[1].set_title('Sepal Width: Violin + Box + Strip')
axes[1].set_ylabel('Sepal Width (cm)')
plt.tight_layout()
plt.savefig('cat_violin_layered.png', dpi=80)
plt.close()import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
np.random.seed(7)
sns.set_theme(style='whitegrid')
tips = sns.load_dataset('tips')
fig, axes = plt.subplots(1, 2, figsize=(13, 5))
# Left: swarmplot β all points plotted without overlap (small datasets)
sns.swarmplot(data=tips, x='day', y='total_bill',
hue='sex', dodge=True,
palette='Set2', size=4, ax=axes[0])
axes[0].set_title('swarmplot: Total Bill by Day & Sex')
axes[0].set_ylabel('Total Bill ($)')
axes[0].legend(title='Sex', loc='upper left')
# Right: pointplot β shows mean + CI as a connected dot plot
# great for showing trends across ordered categories
sns.pointplot(data=tips, x='day', y='tip', hue='sex',
dodge=0.3, linestyles=['--', '-'],
markers=['o', 's'], palette='Set1',
capsize=0.12, errorbar=('ci', 95),
order=['Thur', 'Fri', 'Sat', 'Sun'],
ax=axes[1])
axes[1].set_title('pointplot: Mean Tip by Day & Sex (95% CI)')
axes[1].set_ylabel('Mean Tip ($)')
axes[1].legend(title='Sex')
plt.tight_layout()
plt.savefig('cat_swarm_point.png', dpi=80)
plt.close()
print("swarmplot and pointplot saved.")import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
np.random.seed(42)
depts = {
'Engineering': (120000, 25000, 80),
'Sales': (85000, 20000, 60),
'Marketing': (90000, 18000, 40),
'HR': (70000, 12000, 30),
'Data Science': (130000, 22000, 50),
'Operations': (75000, 15000, 55),
}
rows = []
for dept, (mean, std, n) in depts.items():
salaries = np.random.normal(mean, std, n).clip(50000, 200000)
for s in salaries:
rows.append({'dept': dept, 'salary': round(s, -2),
'level': np.random.choice(['Junior','Mid','Senior'],
p=[0.3,0.45,0.25])})
df = pd.DataFrame(rows)
order = df.groupby('dept')['salary'].median().sort_values(ascending=False).index
sns.set_theme(style='whitegrid')
fig, ax = plt.subplots(figsize=(11, 5))
sns.violinplot(data=df, x='dept', y='salary', order=order,
palette='muted', inner='quartile', ax=ax)
sns.stripplot(data=df, x='dept', y='salary', order=order,
hue='level', palette='dark:black', size=2.5,
alpha=0.35, dodge=False, ax=ax, legend=False)
ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda x,_: f'${x/1000:.0f}K'))
ax.set_xlabel(''); ax.set_ylabel('Annual Salary')
ax.set_title('Salary Distribution by Department')
plt.xticks(rotation=15, ha='right')
plt.tight_layout()
plt.savefig('rw_cat_salary.png', dpi=80)
plt.close()import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_theme(style='whitegrid')
tips = sns.load_dataset('tips')
fig, axes = plt.subplots(1, 2, figsize=(13, 5))
# 1 & 2. Split violin by sex with quartile lines
# TODO: sns.violinplot(data=tips, x='day', y='total_bill',
# TODO: hue='sex', split=True, inner='quartile',
# TODO: palette='Set2', ax=axes[0])
# TODO: axes[0].set_title('Total Bill by Day & Sex (split violin)')
# 3. Violin + box + strip overlay on axes[1]
# TODO: sns.violinplot(data=tips, x='day', y='total_bill',
# TODO: palette='pastel', inner=None, ax=axes[1])
# TODO: sns.boxplot(data=tips, x='day', y='total_bill',
# TODO: width=0.15, fliersize=0,
# TODO: boxprops=dict(facecolor='white', zorder=2), ax=axes[1])
# TODO: sns.stripplot(data=tips, x='day', y='total_bill',
# TODO: color='black', size=2.5, alpha=0.35, jitter=True, ax=axes[1])
# TODO: axes[1].set_title('Total Bill: Violin + Box + Strip')
plt.tight_layout()
plt.savefig('practice_violin.png', dpi=80)
plt.close()
print("Saved practice_violin.png")scatterplot visualizes two numeric variables. regplot / lmplot adds a regression line with confidence interval automatically.
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_theme(style='whitegrid')
tips = sns.load_dataset('tips')
fig, ax = plt.subplots(figsize=(8, 5))
sns.scatterplot(
data=tips, x='total_bill', y='tip',
hue='time', size='size',
sizes=(40, 200), alpha=0.7,
palette='Set1', ax=ax
)
ax.set_title('Tip vs Total Bill (size = party size)')
ax.set_xlabel('Total Bill ($)'); ax.set_ylabel('Tip ($)')
plt.tight_layout()
plt.savefig('scatter_hue_size.png', dpi=80)
plt.close()import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_theme(style='whitegrid')
tips = sns.load_dataset('tips')
fig, ax = plt.subplots(figsize=(7, 4))
# regplot β single regression line
sns.regplot(data=tips, x='total_bill', y='tip',
scatter_kws=dict(alpha=0.4, s=30),
line_kws=dict(color='red', linewidth=2), ax=ax)
ax.set_title('regplot: Tip vs Bill')
plt.tight_layout()
plt.savefig('scatter_regplot.png', dpi=80)
plt.close()
# lmplot β regression per group (returns FacetGrid)
g = sns.lmplot(data=tips, x='total_bill', y='tip',
hue='smoker', palette='Set1',
scatter_kws=dict(alpha=0.4, s=25),
height=4, aspect=1.4)
g.set_axis_labels('Total Bill ($)', 'Tip ($)')
g.figure.suptitle('lmplot: Tip vs Bill by Smoker', y=1.02)
plt.tight_layout()
plt.savefig('scatter_lmplot.png', dpi=80)
plt.close()import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
np.random.seed(10)
sns.set_theme(style='whitegrid')
# Synthetic dataset with 3 categorical dimensions
n = 120
df = pd.DataFrame({
'spend': np.random.uniform(10, 500, n),
'revenue': np.random.uniform(50, 2000, n),
'channel': np.random.choice(['Search', 'Social', 'Email'], n),
'region': np.random.choice(['North', 'South', 'East'], n),
'budget': np.random.uniform(5, 50, n),
})
fig, ax = plt.subplots(figsize=(9, 5))
sns.scatterplot(
data=df, x='spend', y='revenue',
hue='channel', # color
size='budget', # marker area
style='region', # marker shape
sizes=(30, 250),
alpha=0.75,
palette='tab10',
ax=ax
)
ax.set_title('Revenue vs Spend β hue=channel, size=budget, style=region')
ax.set_xlabel('Ad Spend ($)')
ax.set_ylabel('Revenue ($)')
ax.legend(bbox_to_anchor=(1.01, 1), loc='upper left', fontsize=8)
plt.tight_layout()
plt.savefig('scatter_hue_size_style.png', dpi=80)
plt.close()import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
np.random.seed(33)
sns.set_theme(style='whitegrid', font_scale=0.9)
# Synthetic multi-channel, multi-region dataset
channels = ['Search', 'Social', 'Email', 'Display', 'Affiliate']
regions = ['North', 'South', 'East']
rows = []
for ch in channels:
slope = {'Search': 4.0, 'Social': 2.5, 'Email': 5.5,
'Display': 1.5, 'Affiliate': 3.2}[ch]
for region in regions:
n = 30
spend = np.random.uniform(200, 8000, n)
rev = spend * slope + np.random.randn(n) * spend * 0.25
for s, r in zip(spend, rev):
rows.append({'channel': ch, 'region': region,
'spend': round(s, 0), 'revenue': round(r, 0)})
df = pd.DataFrame(rows)
# relplot with col=channel, col_wrap=3, hue+style=region
# Each hue level gets a distinct marker AND color automatically
g = sns.relplot(
data=df, x='spend', y='revenue',
col='channel', col_wrap=3,
hue='region', style='region', # different marker per region
markers=['o', 's', '^'], # explicit marker list
palette='Set2',
alpha=0.65, s=40,
height=3, aspect=1.2,
kind='scatter'
)
g.set_titles('{col_name}')
g.set_axis_labels('Ad Spend ($)', 'Revenue ($)')
g.figure.suptitle('relplot: Spend vs Revenue β col_wrap=3, marker per region', y=1.03)
g.add_legend(title='Region')
plt.tight_layout()
plt.savefig('scatter_relplot_colwrap_markers.png', dpi=80)
plt.close()
print("relplot col_wrap + marker styles saved.")import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
np.random.seed(5)
channels = ['Search','Social','Display','Email']
rows = []
for ch in channels:
n = 50
spend = np.random.uniform(500, 10000, n)
slope = {'Search':4.5,'Social':3.0,'Display':1.8,'Email':6.0}[ch]
rev = spend * slope + np.random.randn(n) * spend * 0.3
for s, r in zip(spend, rev):
rows.append({'channel':ch,'spend':round(s,0),'revenue':round(r,0)})
df = pd.DataFrame(rows)
sns.set_theme(style='whitegrid')
g = sns.lmplot(
data=df, x='spend', y='revenue', hue='channel',
col='channel', col_wrap=2,
scatter_kws=dict(alpha=0.5, s=25),
height=3.5, aspect=1.2,
palette='tab10'
)
g.set_axis_labels('Ad Spend ($)', 'Revenue ($)')
g.set_titles('{col_name}')
g.figure.suptitle('Ad Spend vs Revenue by Channel', y=1.02, fontsize=13)
# Print ROI per channel
print("Estimated ROI (revenue/spend):")
for ch in channels:
sub = df[df.channel==ch]
roi = sub.revenue.sum() / sub.spend.sum()
print(f" {ch:8s}: {roi:.2f}x")
plt.tight_layout()
plt.savefig('rw_scatter_ads.png', dpi=80)
plt.close()import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_theme(style='whitegrid')
tips = sns.load_dataset('tips')
fig, axes = plt.subplots(1, 2, figsize=(14, 5))
# 1. Scatter with hue=day, size=size, style=smoker
# TODO: sns.scatterplot(
# TODO: data=tips, x='total_bill', y='tip',
# TODO: hue='day', size='size', style='smoker',
# TODO: sizes=(30, 200), alpha=0.7, palette='tab10', ax=axes[0]
# TODO: )
# TODO: axes[0].set_title('Tip vs Bill β day/size/smoker encoded')
# 2. Regression lines for smokers vs non-smokers
for smoker_val, color in [('Yes', '#e74c3c'), ('No', '#3498db')]:
subset = tips[tips['smoker'] == smoker_val]
# TODO: sns.regplot(data=subset, x='total_bill', y='tip',
# TODO: scatter_kws=dict(alpha=0.3, s=20, color=color),
# TODO: line_kws=dict(color=color, linewidth=2, label=f'Smoker={smoker_val}'),
# TODO: ax=axes[1])
pass
# TODO: axes[1].legend(); axes[1].set_title('Regression by Smoker Status')
plt.tight_layout()
plt.savefig('practice_scatter.png', dpi=80)
plt.close()
print("Saved practice_scatter.png")sns.heatmap renders a matrix as color intensities. Ideal for correlation matrices, confusion matrices, and pivot table results.
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
sns.set_theme(style='white')
df = sns.load_dataset('tips')
# Select numeric columns
corr = df[['total_bill','tip','size']].corr()
fig, ax = plt.subplots(figsize=(5, 4))
sns.heatmap(corr, annot=True, fmt='.2f', cmap='RdBu_r',
vmin=-1, vmax=1, linewidths=0.5,
square=True, ax=ax)
ax.set_title('Feature Correlation Matrix')
plt.tight_layout()
plt.savefig('heatmap_corr.png', dpi=80)
plt.close()import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
np.random.seed(9)
months = ['Jan','Feb','Mar','Apr','May','Jun']
regions= ['North','South','East','West']
data = pd.DataFrame({
'month': np.tile(months, 4),
'region': np.repeat(regions, 6),
'revenue': np.random.uniform(50, 200, 24).round(1)
})
pivot = data.pivot(index='region', columns='month', values='revenue')
fig, ax = plt.subplots(figsize=(9, 4))
sns.heatmap(pivot, annot=True, fmt='.0f', cmap='YlGnBu',
linewidths=0.4, ax=ax, cbar_kws=dict(label='Revenue ($K)'))
ax.set_title('Monthly Revenue by Region ($K)')
ax.set_xlabel(''); ax.set_ylabel('')
plt.tight_layout()
plt.savefig('heatmap_pivot.png', dpi=80)
plt.close()import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
np.random.seed(42)
# Simulate a 4-class confusion matrix
classes = ['Cat', 'Dog', 'Bird', 'Fish']
# True labels and predicted labels
true_labels = np.random.choice(classes, 200, p=[0.3, 0.3, 0.2, 0.2])
# Add some misclassification noise
pred_labels = true_labels.copy()
noise_idx = np.random.choice(len(true_labels), 40, replace=False)
pred_labels[noise_idx] = np.random.choice(classes, 40)
# Build confusion matrix manually
cm = pd.crosstab(pd.Series(true_labels, name='Actual'),
pd.Series(pred_labels, name='Predicted'))
# Ensure all classes present
cm = cm.reindex(index=classes, columns=classes, fill_value=0)
fig, axes = plt.subplots(1, 2, figsize=(12, 4))
# Raw counts
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
linewidths=0.5, ax=axes[0])
axes[0].set_title('Confusion Matrix (counts)')
# Normalized (recall per class)
cm_norm = cm.div(cm.sum(axis=1), axis=0).round(2)
sns.heatmap(cm_norm, annot=True, fmt='.2f', cmap='YlOrRd',
vmin=0, vmax=1, linewidths=0.5, ax=axes[1])
axes[1].set_title('Confusion Matrix (row-normalized recall)')
plt.tight_layout()
plt.savefig('heatmap_confusion.png', dpi=80)
plt.close()import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
np.random.seed(42)
sns.set_theme(style='white')
# --- Part 1: lower-triangle heatmap using the mask parameter ---
iris = sns.load_dataset('iris')
corr = iris[['sepal_length','sepal_width','petal_length','petal_width']].corr()
# mask=True hides that cell; np.triu masks upper triangle (keep lower + diagonal)
mask_upper = np.triu(np.ones_like(corr, dtype=bool), k=1)
fig, ax = plt.subplots(figsize=(5, 4))
sns.heatmap(corr, mask=mask_upper, annot=True, fmt='.2f',
cmap='coolwarm', vmin=-1, vmax=1,
square=True, linewidths=0.5,
cbar_kws=dict(shrink=0.7), ax=ax)
ax.set_title('Iris Correlation β lower triangle only (mask=triu)')
plt.tight_layout()
plt.savefig('heatmap_triangle_mask.png', dpi=80)
plt.close()
# --- Part 2: clustermap β hierarchically clusters rows AND columns ---
np.random.seed(7)
n_genes, n_samples = 20, 12
data = pd.DataFrame(
np.random.randn(n_genes, n_samples),
index=[f'Gene_{i:02d}' for i in range(n_genes)],
columns=[f'S{j:02d}' for j in range(n_samples)]
)
# Add block structure so clustering is visible
data.iloc[:8, :6] += 2 # high block top-left
data.iloc[12:, 6:] -= 2 # low block bottom-right
g = sns.clustermap(
data,
cmap='RdBu_r', center=0,
figsize=(9, 7),
dendrogram_ratio=(0.12, 0.12),
cbar_pos=(0.02, 0.85, 0.03, 0.12),
linewidths=0.3,
method='ward'
)
g.figure.suptitle('clustermap: Hierarchical Clustering of Gene Expression', y=1.01)
plt.savefig('heatmap_clustermap.png', dpi=80, bbox_inches='tight')
plt.close()
print("Triangle mask heatmap and clustermap saved.")import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
np.random.seed(3)
n = 500
df = pd.DataFrame({
'tenure_months': np.random.exponential(24, n).clip(1, 72),
'monthly_spend': np.random.normal(120, 40, n).clip(20),
'support_tickets': np.random.poisson(2, n),
'logins_per_week': np.random.normal(5, 3, n).clip(0),
'nps_score': np.random.normal(7, 2, n).clip(1, 10),
'churned': np.random.binomial(1, 0.25, n),
})
# Add realistic correlations
df['churned'] = (
(df['support_tickets'] > 4).astype(int) * 0.4 +
(df['logins_per_week'] < 2).astype(int) * 0.3 +
(df['nps_score'] < 5).astype(int) * 0.3 +
np.random.rand(n) * 0.3
) > 0.5
corr = df.corr(numeric_only=True)
sns.set_theme(style='white')
fig, ax = plt.subplots(figsize=(7, 6))
sns.heatmap(
corr, annot=True, fmt='.2f', cmap='coolwarm',
vmin=-1, vmax=1, square=True, linewidths=0.5,
ax=ax, annot_kws=dict(size=9)
)
ax.set_title('Customer Feature Correlation (churn focus)', pad=12)
plt.tight_layout()
plt.savefig('rw_heatmap_churn.png', dpi=80)
plt.close()import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
sns.set_theme(style='white')
iris = sns.load_dataset('iris')
# 1. Compute correlation matrix
# TODO: corr = iris[['sepal_length','sepal_width','petal_length','petal_width']].corr()
# 2. Mask upper triangle
# TODO: mask = np.triu(np.ones_like(corr, dtype=bool))
fig, ax = plt.subplots(figsize=(6, 5))
# 3. Plot annotated heatmap with mask
# TODO: sns.heatmap(corr, annot=True, fmt='.2f', cmap='coolwarm',
# TODO: vmin=-1, vmax=1, square=True, linewidths=0.5,
# TODO: mask=mask, ax=ax)
# TODO: ax.set_title('Iris Feature Correlation (lower triangle)')
plt.tight_layout()
plt.savefig('practice_heatmap.png', dpi=80)
plt.close()
print("Saved practice_heatmap.png")pairplot creates a grid of scatter plots and distributions for all numeric column pairs β fast exploratory data analysis.
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_theme(style='whitegrid')
iris = sns.load_dataset('iris')
g = sns.pairplot(
iris, hue='species',
palette='Set2',
plot_kws=dict(alpha=0.5, s=25),
diag_kind='kde'
)
g.figure.suptitle('Iris Dataset β Pair Plot', y=1.02)
plt.tight_layout()
plt.savefig('pairplot_iris.png', dpi=80)
plt.close()import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_theme(style='whitegrid')
tips = sns.load_dataset('tips')
cols = ['total_bill', 'tip', 'size']
g = sns.pairplot(
tips[cols + ['time']], hue='time',
kind='reg', # scatter + regression line
diag_kind='hist', # histogram on diagonal
palette='Set1',
plot_kws=dict(scatter_kws=dict(alpha=0.3, s=20),
line_kws=dict(linewidth=1.5))
)
g.figure.suptitle('Tips Dataset β Regression Pair Plot', y=1.02)
plt.tight_layout()
plt.savefig('pairplot_reg.png', dpi=80)
plt.close()import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_theme(style='whitegrid')
iris = sns.load_dataset('iris')
cols = ['sepal_length', 'sepal_width', 'petal_length']
g = sns.PairGrid(iris[cols + ['species']], hue='species',
palette='tab10', diag_sharey=False)
# Upper triangle: scatter
g.map_upper(sns.scatterplot, alpha=0.4, s=20)
# Lower triangle: KDE contours
g.map_lower(sns.kdeplot, fill=True, alpha=0.25, levels=4)
# Diagonal: histogram
g.map_diag(sns.histplot, kde=True, alpha=0.5)
g.add_legend()
g.figure.suptitle('PairGrid: Custom Upper/Lower/Diagonal', y=1.02)
plt.tight_layout()
plt.savefig('pairgrid_custom.png', dpi=80)
plt.close()import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np, pandas as pd
from scipy import stats
sns.set_theme(style='whitegrid', font_scale=1.1)
rng = np.random.default_rng(7)
n = 200
df = pd.DataFrame({
'market_cap': rng.lognormal(8, 1.5, n),
'pe_ratio': rng.lognormal(3, 0.5, n),
'debt_equity': rng.exponential(1.2, n),
'revenue_growth': rng.normal(0.15, 0.12, n),
'return_1y': rng.normal(0.08, 0.22, n),
})
# Add mild relationship
df['return_1y'] += 0.03 * np.log(df['market_cap']) - 0.02 * df['pe_ratio'] + rng.normal(0, 0.05, n)
fig, axes = plt.subplots(2, 2, figsize=(12, 10))
# 1. Residual plot
slope, intercept, r, p, se = stats.linregress(df['pe_ratio'], df['return_1y'])
residuals = df['return_1y'] - (slope * df['pe_ratio'] + intercept)
axes[0,0].scatter(df['pe_ratio'], residuals, alpha=0.5, color='steelblue', s=25)
axes[0,0].axhline(0, color='red', ls='--', lw=1.5)
axes[0,0].set(title='Residual Plot (PE vs Return)', xlabel='P/E Ratio', ylabel='Residual')
axes[0,0].text(0.05, 0.95, f'r={r:.3f}', transform=axes[0,0].transAxes,
va='top', fontsize=10)
# 2. Q-Q plot for normality
(osm, osr), (slope2, intercept2, r2) = stats.probplot(df['return_1y'])
axes[0,1].scatter(osm, osr, alpha=0.5, color='coral', s=20)
axes[0,1].plot(osm, slope2*np.array(osm)+intercept2, 'r-', lw=2)
axes[0,1].set(title='Q-Q Plot: Return (1Y)', xlabel='Theoretical Quantiles', ylabel='Sample Quantiles')
# 3. Log transform effect
axes[1,0].hist(df['market_cap'], bins=30, color='seagreen', alpha=0.7, edgecolor='white')
axes[1,0].set(title='Market Cap (Raw)', xlabel='Market Cap')
axes[1,1].hist(np.log(df['market_cap']), bins=30, color='seagreen', alpha=0.7, edgecolor='white')
axes[1,1].set(title='Market Cap (Log-Transformed)', xlabel='log(Market Cap)')
plt.suptitle('Financial Feature Diagnostics', fontsize=14, y=1.01)
plt.tight_layout()
plt.savefig('financial_diagnostics.png', dpi=100)
plt.show()import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
np.random.seed(42)
n = 200
pe = np.random.lognormal(2.8, 0.5, n)
eps = np.random.normal(5, 2, n).clip(0.5)
rev_g = np.random.normal(0.12, 0.08, n)
roe = np.random.normal(0.15, 0.07, n).clip(0)
df = pd.DataFrame({
'P/E Ratio': pe.round(2),
'EPS ($)': eps.round(2),
'Rev Growth': rev_g.round(3),
'ROE': roe.round(3),
'sector': np.random.choice(['Tech','Finance','Healthcare','Energy'], n),
})
sns.set_theme(style='whitegrid', font_scale=0.85)
g = sns.pairplot(
df, hue='sector',
vars=['P/E Ratio','EPS ($)','Rev Growth','ROE'],
palette='tab10',
plot_kws=dict(alpha=0.4, s=20),
diag_kind='kde'
)
g.figure.suptitle('Financial Metrics β Pairplot by Sector', y=1.02)
plt.tight_layout()
plt.savefig('rw_pairplot_finance.png', dpi=80)
plt.close()import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_theme(style='whitegrid', font_scale=0.9)
penguins = sns.load_dataset('penguins').dropna()
num_cols = ['bill_length_mm', 'bill_depth_mm', 'flipper_length_mm', 'body_mass_g']
# 1. pairplot with hue='species'
# TODO: g = sns.pairplot(
# TODO: penguins[num_cols + ['species']], hue='species',
# TODO: diag_kind='kde', palette='Set2',
# TODO: plot_kws=dict(alpha=0.4, s=20)
# TODO: )
# TODO: g.figure.suptitle('Penguins Pairplot', y=1.02)
# TODO: plt.tight_layout(); plt.savefig('practice_pairplot.png', dpi=80); plt.close()
# 2. PairGrid: scatter upper, KDE lower
# TODO: g2 = sns.PairGrid(penguins[num_cols + ['species']], hue='species',
# TODO: palette='tab10', diag_sharey=False)
# TODO: g2.map_upper(sns.scatterplot, alpha=0.4, s=15)
# TODO: g2.map_lower(sns.kdeplot, fill=True, alpha=0.2, levels=3)
# TODO: g2.map_diag(sns.histplot, kde=True, alpha=0.5)
# TODO: g2.add_legend()
# TODO: g2.figure.suptitle('Penguins PairGrid', y=1.02)
# TODO: plt.tight_layout(); plt.savefig('practice_pairgrid.png', dpi=80); plt.close()
print("Practice pairplot complete")FacetGrid tiles the same plot across subsets of data defined by row, col, and hue β the most powerful Seaborn layout tool.
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_theme(style='whitegrid')
tips = sns.load_dataset('tips')
g = sns.FacetGrid(tips, col='time', row='smoker',
height=3, aspect=1.2, margin_titles=True)
g.map_dataframe(sns.histplot, x='total_bill', bins=15, kde=True)
g.set_axis_labels('Total Bill ($)', 'Count')
g.set_titles(row_template='{row_name}', col_template='{col_name}')
g.figure.suptitle('Total Bill by Time & Smoker', y=1.03)
plt.tight_layout()
plt.savefig('facet_grid.png', dpi=80)
plt.close()import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_theme(style='whitegrid')
tips = sns.load_dataset('tips')
# catplot wraps barplot/boxplot/etc into a FacetGrid
g = sns.catplot(
data=tips, x='day', y='tip',
col='time', kind='box',
palette='Set2',
height=4, aspect=0.9
)
g.set_titles('{col_name}')
g.set_axis_labels('Day', 'Tip ($)')
g.figure.suptitle('Tip Distribution by Day & Meal Time', y=1.03)
plt.tight_layout()
plt.savefig('facet_catplot.png', dpi=80)
plt.close()import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
np.random.seed(5)
sns.set_theme(style='whitegrid', font_scale=0.85)
# Synthetic multi-group data
groups = ['A', 'B', 'C']
conditions = ['Low', 'High']
rows = []
for grp in groups:
for cond in conditions:
n = 40
x = np.random.uniform(0, 100, n)
slope = {'A': 1.5, 'B': 0.8, 'C': 2.2}[grp]
offset = {'Low': 0, 'High': 50}[cond]
y = x * slope + offset + np.random.randn(n) * 20
rows.extend({'group': grp, 'condition': cond,
'x': round(xi, 1), 'y': round(yi, 1)}
for xi, yi in zip(x, y))
df = pd.DataFrame(rows)
g = sns.FacetGrid(df, col='group', row='condition',
height=3, aspect=1.2,
margin_titles=True, sharey=False)
g.map_dataframe(sns.scatterplot, x='x', y='y', alpha=0.5, s=20)
g.map_dataframe(sns.regplot, x='x', y='y',
scatter=False,
line_kws=dict(color='red', linewidth=1.5))
g.set_axis_labels('X', 'Y')
g.set_titles(col_template='{col_name}', row_template='{row_name}')
g.figure.suptitle('Faceted Scatter with Regression', y=1.03)
plt.tight_layout()
plt.savefig('facet_scatter_reg.png', dpi=80)
plt.close()import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np, pandas as pd
from scipy import stats
sns.set_theme(style='white', font_scale=1.1)
rng = np.random.default_rng(42)
regions = ['North','South','East','West','Central']
products = ['Widget','Gadget','Gizmo','Doohickey']
months = ['Jan','Feb','Mar','Apr','May','Jun']
# Simulate monthly revenue per region/product
data = rng.uniform(50, 300, (len(regions), len(months)))
df = pd.DataFrame(data, index=regions, columns=months)
# Compute growth vs prior month
growth = df.pct_change(axis=1) * 100
growth.iloc[:, 0] = rng.uniform(-5, 20, len(regions)) # fill first month
# Significance test: is growth > 0?
sig_markers = pd.DataFrame('', index=regions, columns=months)
for region in regions:
for month in months:
g = growth.loc[region, month]
sig_markers.loc[region, month] = 'β
' if g > 15 else ('βΌ' if g < -5 else '')
# Plot
fig, axes = plt.subplots(1, 2, figsize=(14, 5))
sns.heatmap(df, annot=True, fmt='.0f', cmap='YlOrRd',
linewidths=0.5, ax=axes[0], cbar_kws={'label': 'Revenue ($K)'})
axes[0].set_title('Monthly Revenue by Region', fontsize=12)
# Growth heatmap with significance markers
annot = growth.round(1).astype(str) + '%
' + sig_markers
sns.heatmap(growth, annot=annot, fmt='', cmap='RdYlGn', center=0,
linewidths=0.5, ax=axes[1], cbar_kws={'label': 'MoM Growth (%)'},
annot_kws={'size': 9})
axes[1].set_title('Month-over-Month Growth
β
=High Growth βΌ=Decline', fontsize=12)
plt.suptitle('Regional Sales Performance Analysis', fontsize=13, y=1.02)
plt.tight_layout()
plt.savefig('grouped_heatmap.png', dpi=100)
plt.show()import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
np.random.seed(11)
regions = ['North','South','East','West']
categories = ['Electronics','Clothing','Food','Sports']
rows = []
for region in regions:
for cat in categories:
n = 40
spend = np.random.uniform(100, 5000, n)
margin = np.random.uniform(0.05, 0.45, n)
rows.extend({'region':region,'category':cat,
'spend':round(s,0),'margin':round(m,3)}
for s,m in zip(spend, margin))
df = pd.DataFrame(rows)
sns.set_theme(style='whitegrid', font_scale=0.85)
g = sns.FacetGrid(df, col='region', col_wrap=2,
height=3.5, aspect=1.3, sharey=True)
g.map_dataframe(sns.scatterplot, x='spend', y='margin',
hue='category', palette='tab10', alpha=0.6, s=25)
g.add_legend(title='Category')
g.set_axis_labels('Spend ($)', 'Margin')
g.set_titles('{col_name} Region')
g.figure.suptitle('Spend vs Margin by Region & Category', y=1.03)
plt.tight_layout()
plt.savefig('rw_facet_sales.png', dpi=80)
plt.close()import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_theme(style='whitegrid', font_scale=0.9)
tips = sns.load_dataset('tips')
# 1 & 2. FacetGrid: col=day, scatter + regression
# TODO: g = sns.FacetGrid(tips, col='day', col_wrap=2,
# TODO: height=3.5, aspect=1.2, sharey=True)
# TODO: g.map_dataframe(sns.scatterplot, x='total_bill', y='tip',
# TODO: hue='sex', palette='Set1', alpha=0.6, s=30)
# TODO: g.map_dataframe(sns.regplot, x='total_bill', y='tip',
# TODO: scatter=False, line_kws=dict(color='black', linewidth=1.5))
# 3. Labels and title
# TODO: g.set_axis_labels('Total Bill ($)', 'Tip ($)')
# TODO: g.set_titles('{col_name}')
# TODO: g.figure.suptitle('Tip vs Bill by Day (colored by Sex)', y=1.03)
# TODO: g.add_legend(title='Sex')
plt.tight_layout()
plt.savefig('practice_facet.png', dpi=80)
plt.close()
print("Saved practice_facet.png")sns.lineplot handles time series naturally β it aggregates multiple observations per x-value and draws confidence intervals.
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
np.random.seed(5)
weeks = list(range(1, 13))
regions = ['North','South','East']
rows = []
for region in regions:
base = np.random.uniform(80, 120)
trend= np.random.uniform(1, 5)
for w in weeks:
for _ in range(5): # 5 reps per point β CI makes sense
rows.append({
'week': w,
'region': region,
'sales': base + trend * w + np.random.randn() * 15
})
df = pd.DataFrame(rows)
sns.set_theme(style='whitegrid')
fig, ax = plt.subplots(figsize=(10, 4))
sns.lineplot(data=df, x='week', y='sales', hue='region',
palette='Set2', linewidth=2.5, ax=ax)
ax.set_title('Weekly Sales by Region (with 95% CI)')
ax.set_xlabel('Week'); ax.set_ylabel('Sales ($K)')
plt.tight_layout()
plt.savefig('line_hue_ci.png', dpi=80)
plt.close()import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
np.random.seed(9)
products = ['Widget','Gadget','Doohickey']
months = pd.date_range('2024-01-01', periods=12, freq='MS')
rows = []
for prod in products:
base = np.random.uniform(50, 200)
vals = base + np.cumsum(np.random.randn(12) * 10)
for m, v in zip(months, vals):
rows.append({'month':m,'product':prod,'revenue':max(v,10)})
df = pd.DataFrame(rows)
g = sns.relplot(data=df, x='month', y='revenue',
col='product', kind='line',
height=3, aspect=1.3,
marker='o', markersize=5)
g.set_titles('{col_name}')
for ax in g.axes.flat:
ax.xaxis.set_major_formatter(plt.matplotlib.dates.DateFormatter('%b'))
ax.tick_params(axis='x', rotation=45)
g.figure.suptitle('Monthly Revenue by Product', y=1.03)
plt.tight_layout()
plt.savefig('line_relplot.png', dpi=80)
plt.close()import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
np.random.seed(15)
sns.set_theme(style='whitegrid')
months = list(range(1, 13))
products = ['Alpha', 'Beta', 'Gamma']
rows = []
for prod in products:
base = np.random.uniform(100, 300)
vals = base + np.cumsum(np.random.randn(12) * 20)
for m, v in zip(months, vals):
rows.append({'month': m, 'product': prod, 'revenue': max(v, 10)})
df = pd.DataFrame(rows)
fig, ax = plt.subplots(figsize=(11, 4))
sns.lineplot(data=df, x='month', y='revenue', hue='product',
marker='o', markersize=6, linewidth=2,
palette='tab10', ax=ax)
# Shade a promotion period
ax.axvspan(5, 7, alpha=0.12, color='green', label='Promo period')
# Annotate peak revenue
peak = df.loc[df['revenue'].idxmax()]
ax.annotate(f"Peak: {peak['product']}
${peak['revenue']:.0f}K",
xy=(peak['month'], peak['revenue']),
xytext=(peak['month'] + 0.5, peak['revenue'] - 30),
arrowprops=dict(arrowstyle='->', color='red'),
fontsize=8, color='red')
ax.set_title('Monthly Revenue with Event Shading')
ax.set_xlabel('Month'); ax.set_ylabel('Revenue ($K)')
ax.set_xticks(months)
ax.set_xticklabels(['Jan','Feb','Mar','Apr','May','Jun',
'Jul','Aug','Sep','Oct','Nov','Dec'], rotation=30)
ax.legend(loc='upper left', fontsize=8)
plt.tight_layout()
plt.savefig('line_annotated.png', dpi=80)
plt.close()import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np, pandas as pd
sns.set_theme(style='darkgrid', font_scale=1.0)
rng = np.random.default_rng(42)
n = 168 # 1 week of hourly data
t = pd.date_range('2024-01-01', periods=n, freq='h')
cpu = 40 + 20*np.sin(np.linspace(0, 4*np.pi, n)) + rng.normal(0, 5, n)
memory = 60 + 10*np.sin(np.linspace(0, 2*np.pi, n)) + rng.normal(0, 3, n)
# Inject anomalies
cpu[[24, 72, 120, 145]] += rng.uniform(35, 50, 4)
memory[[36, 96, 130]] += rng.uniform(25, 35, 3)
df = pd.DataFrame({'time': t, 'CPU': cpu.clip(0,100), 'Memory': memory.clip(0,100)})
# Detect anomalies via z-score
for col in ['CPU', 'Memory']:
mu, sigma = df[col].mean(), df[col].std()
df[f'{col}_anomaly'] = (df[col] - mu).abs() > 2.5 * sigma
fig, axes = plt.subplots(2, 1, figsize=(14, 8), sharex=True)
for ax, metric, color in zip(axes, ['CPU', 'Memory'], ['steelblue', 'seagreen']):
sns.lineplot(data=df, x='time', y=metric, ax=ax, color=color, lw=1.5, label=metric)
# Shade anomaly regions
anomalies = df[df[f'{metric}_anomaly']]
ax.scatter(anomalies['time'], anomalies[metric],
color='red', s=60, zorder=5, label='Anomaly')
ax.axhline(df[metric].mean() + 2.5*df[metric].std(),
color='red', ls='--', lw=1, alpha=0.6, label='2.5sigma threshold')
ax.set(ylabel=f'{metric} Usage (%)', title=f'{metric} Usage β Anomaly Detection')
ax.legend(loc='upper right')
axes[-1].set_xlabel('Time')
plt.suptitle('Server Metrics Anomaly Dashboard', fontsize=13, y=1.01)
plt.tight_layout()
plt.savefig('anomaly_dashboard.png', dpi=100)
plt.show()import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
np.random.seed(20)
hours = np.arange(24)
servers = [f'srv-{i:02d}' for i in range(1, 6)]
rows = []
for srv in servers:
cpu_base = np.random.uniform(20, 60)
mem_base = np.random.uniform(40, 70)
for h in hours:
spike = 30 if 9 <= h <= 17 else 0 # business hours spike
rows.append({
'hour': h,
'server': srv,
'cpu': (cpu_base + spike + np.random.randn()*8).clip(5,100),
'memory': (mem_base + spike*0.3 + np.random.randn()*5).clip(10,95),
})
df = pd.DataFrame(rows)
sns.set_theme(style='darkgrid')
fig, axes = plt.subplots(1, 2, figsize=(13, 4), sharey=False)
sns.lineplot(data=df, x='hour', y='cpu', hue='server',
palette='tab10', linewidth=1.5, alpha=0.7, ax=axes[0])
axes[0].set_title('CPU Usage % (24h)'); axes[0].set_xlabel('Hour')
sns.lineplot(data=df, x='hour', y='memory', hue='server',
palette='tab10', linewidth=1.5, alpha=0.7, ax=axes[1])
axes[1].set_title('Memory Usage % (24h)'); axes[1].set_xlabel('Hour')
axes[1].get_legend().remove()
# Highlight business hours
for ax in axes:
ax.axvspan(9, 17, alpha=0.07, color='yellow', label='Business hours')
plt.tight_layout()
plt.savefig('rw_line_servers.png', dpi=80)
plt.close()import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
np.random.seed(99)
sns.set_theme(style='whitegrid')
products = ['Alpha', 'Beta', 'Gamma', 'Delta']
months = list(range(1, 25))
rows = []
for prod in products:
base = np.random.uniform(80, 250)
vals = base + np.cumsum(np.random.randn(24) * 15)
for m, v in zip(months, vals):
rows.append({'month': m, 'product': prod, 'revenue': max(v, 5)})
df = pd.DataFrame(rows)
fig, ax = plt.subplots(figsize=(12, 4))
# 1. Line plot with hue and markers
# TODO: sns.lineplot(data=df, x='month', y='revenue', hue='product',
# TODO: marker='o', markersize=5, linewidth=2, palette='tab10', ax=ax)
# 2. Shade recession period (months 10-14)
# TODO: ax.axvspan(10, 14, alpha=0.12, color='red', label='Recession')
# 3. Annotate peak
peak = df.loc[df['revenue'].idxmax()]
# TODO: ax.annotate(f"Peak: {peak['product']}\n${peak['revenue']:.0f}K",
# TODO: xy=(peak['month'], peak['revenue']),
# TODO: xytext=(peak['month']+1, peak['revenue']-30),
# TODO: arrowprops=dict(arrowstyle='->', color='darkred'),
# TODO: fontsize=8, color='darkred')
# TODO: ax.set_title('Product Revenue Over 24 Months')
# TODO: ax.set_xlabel('Month'); ax.set_ylabel('Revenue ($K)')
# TODO: ax.legend(loc='upper left', fontsize=8)
plt.tight_layout()
plt.savefig('practice_lineplot.png', dpi=80)
plt.close()
print("Saved practice_lineplot.png")Seaborn returns Axes objects you can modify with any Matplotlib method. Combine both libraries for full control.
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
sns.set_theme(style='whitegrid')
tips = sns.load_dataset('tips')
fig, ax = plt.subplots(figsize=(8, 4))
sns.boxplot(data=tips, x='day', y='total_bill', palette='pastel', ax=ax)
# Matplotlib customizations on top
ax.set_title('Total Bill by Day', fontsize=14, fontweight='bold', pad=12)
ax.set_xlabel('')
ax.set_ylabel('Total Bill', fontsize=11)
ax.yaxis.set_major_formatter(mticker.StrMethodFormatter('${x:.0f}'))
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
# Add median annotations
medians = tips.groupby('day')['total_bill'].median()
for i, (day, med) in enumerate(medians.items()):
ax.text(i, med + 0.5, f'${med:.0f}', ha='center', fontsize=9, color='darkred')
plt.tight_layout()
plt.savefig('custom_axes.png', dpi=80)
plt.close()import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
sns.set_theme(style='whitegrid')
iris = sns.load_dataset('iris')
fig, ax = plt.subplots(figsize=(7, 5))
# Layer 1: violin
sns.violinplot(data=iris, x='species', y='petal_length',
palette='pastel', inner=None, ax=ax)
# Layer 2: box inside violin
sns.boxplot(data=iris, x='species', y='petal_length',
width=0.15, fliersize=0,
boxprops=dict(facecolor='white', zorder=2), ax=ax)
# Layer 3: points
sns.stripplot(data=iris, x='species', y='petal_length',
color='black', size=2.5, alpha=0.4, jitter=True, ax=ax)
ax.set_title('Petal Length by Species')
ax.set_ylabel('Petal Length (cm)')
plt.tight_layout()
plt.savefig('custom_layered.png', dpi=80)
plt.close()import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import numpy as np
import pandas as pd
np.random.seed(42)
sns.set_theme(style='ticks', palette='colorblind', font_scale=1.0)
# Synthetic dataset
df = pd.DataFrame({
'method': np.repeat(['Baseline', 'Model A', 'Model B', 'Model C'], 50),
'accuracy': np.concatenate([
np.random.normal(0.72, 0.04, 50),
np.random.normal(0.81, 0.03, 50),
np.random.normal(0.78, 0.05, 50),
np.random.normal(0.85, 0.03, 50),
])
})
fig, ax = plt.subplots(figsize=(8, 4))
order = df.groupby('method')['accuracy'].mean().sort_values().index
sns.violinplot(data=df, x='method', y='accuracy', order=order,
palette='colorblind', inner='box', ax=ax)
ax.yaxis.set_major_formatter(mticker.PercentFormatter(xmax=1, decimals=0))
ax.set_xlabel(''); ax.set_ylabel('Accuracy')
ax.set_title('Model Accuracy Comparison', fontsize=13, fontweight='bold')
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
# Significance bracket
best_mean = df[df['method']=='Model C']['accuracy'].mean()
base_mean = df[df['method']=='Baseline']['accuracy'].mean()
ax.annotate(f'+{(best_mean-base_mean)*100:.1f}% vs Baseline',
xy=(0.5, 0.96), xycoords='axes fraction',
ha='center', fontsize=9, color='darkgreen',
fontweight='bold')
plt.tight_layout()
plt.savefig('custom_publication.png', dpi=80)
plt.close()import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np, pandas as pd
rng = np.random.default_rng(7)
tips = sns.load_dataset('tips')
# Default style
fig, axes = plt.subplots(1, 3, figsize=(14, 4))
sns.histplot(tips['total_bill'], kde=True, ax=axes[0])
axes[0].set_title('Default Style')
# Paper context β smaller for publications
with sns.plotting_context('paper', font_scale=1.2):
sns.histplot(tips['total_bill'], kde=True, color='seagreen', ax=axes[1])
axes[1].set_title('Paper Context')
# Talk context β larger for presentations
with sns.plotting_context('talk', font_scale=0.9):
sns.histplot(tips['tip'], kde=True, color='coral', ax=axes[2])
axes[2].set_title('Talk Context')
plt.suptitle('Seaborn Context Comparison', fontsize=12)
plt.tight_layout()
plt.savefig('context_comparison.png', dpi=100)
plt.close()
# Override with rc_context β dark background for a single plot
with sns.axes_style('darkgrid'):
with plt.rc_context({'figure.facecolor': '#1e1e2e',
'axes.facecolor': '#1e1e2e',
'axes.labelcolor': 'white',
'xtick.color': 'white',
'ytick.color': 'white',
'text.color': 'white'}):
fig2, ax2 = plt.subplots(figsize=(6, 4))
sns.scatterplot(data=tips, x='total_bill', y='tip',
hue='time', palette=['#ffa600','#58508d'],
alpha=0.8, s=60, ax=ax2)
ax2.set_title('Dark Theme Override', color='white', fontsize=13)
ax2.legend(labelcolor='white', facecolor='#2e2e3e')
plt.tight_layout()
plt.savefig('dark_theme.png', dpi=100, facecolor='#1e1e2e')
plt.close()
print("Context comparison and dark theme override saved.")import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from scipy import stats
np.random.seed(42)
control = np.random.normal(45.0, 12, 500)
variant = np.random.normal(48.5, 11, 500)
df = pd.DataFrame({
'group': ['Control']*500 + ['Variant']*500,
'revenue': np.concatenate([control, variant]),
})
t_stat, p_val = stats.ttest_ind(control, variant)
sns.set_theme(style='whitegrid')
fig, axes = plt.subplots(1, 2, figsize=(11, 4))
# Left: violin + box + strip
sns.violinplot(data=df, x='group', y='revenue',
palette={'Control':'#3498db','Variant':'#e74c3c'},
inner=None, ax=axes[0])
sns.boxplot(data=df, x='group', y='revenue',
width=0.1, fliersize=0,
boxprops=dict(facecolor='white', zorder=2), ax=axes[0])
axes[0].set_title('Revenue Distribution'); axes[0].set_ylabel('Revenue ($)')
# Right: mean + CI bar chart
summary = df.groupby('group')['revenue'].agg(['mean','sem']).reset_index()
colors = ['#3498db','#e74c3c']
bars = axes[1].bar(summary['group'], summary['mean'],
yerr=summary['sem']*1.96, capsize=6,
color=colors, alpha=0.8, edgecolor='white', linewidth=1.5)
for bar, (_, row) in zip(bars, summary.iterrows()):
axes[1].text(bar.get_x()+bar.get_width()/2, bar.get_height()+1,
f"${row['mean']:.1f}", ha='center', fontsize=10, fontweight='bold')
sig = '***' if p_val < 0.001 else '**' if p_val < 0.01 else '*' if p_val < 0.05 else 'ns'
axes[1].annotate(f'p={p_val:.4f} {sig}', xy=(0.5,0.95), xycoords='axes fraction',
ha='center', fontsize=10, color='darkgreen' if p_val<0.05 else 'gray')
axes[1].set_title('Mean Revenue Β± 95% CI'); axes[1].set_ylabel('Mean Revenue ($)')
fig.suptitle('A/B Test Results β Revenue', fontsize=13, fontweight='bold')
plt.tight_layout()
plt.savefig('rw_custom_ab_test.png', dpi=80)
plt.close()import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
# 1. Custom theme
# TODO: sns.set_theme(style='ticks', font_scale=1.1, rc={
# TODO: 'axes.spines.top': False,
# TODO: 'axes.spines.right': False,
# TODO: })
iris = sns.load_dataset('iris')
fig, ax = plt.subplots(figsize=(7, 5))
# 2. Violin + strip with husl palette
my_palette = sns.color_palette('husl', 3)
# TODO: sns.violinplot(data=iris, x='species', y='petal_length',
# TODO: palette=my_palette, inner=None, ax=ax)
# TODO: sns.stripplot(data=iris, x='species', y='petal_length',
# TODO: color='black', size=2.5, alpha=0.4, jitter=True, ax=ax)
# 3. Gridlines, axis label, bold title
# TODO: ax.yaxis.grid(True, linewidth=0.7, alpha=0.7)
# TODO: ax.set_ylabel('Petal Length (cm)', fontsize=11)
# TODO: ax.set_xlabel('Species', fontsize=11)
# TODO: ax.set_title('Iris Petal Length by Species', fontsize=13, fontweight='bold')
plt.tight_layout()
plt.savefig('practice_custom.png', dpi=80)
plt.close()
print("Saved practice_custom.png")Create small multiples β the same visualization across data subsets β with FacetGrid, PairGrid, catplot, and relplot.
import seaborn as sns
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
tips = sns.load_dataset('tips')
g = sns.FacetGrid(tips, col='time', hue='smoker', height=4, aspect=0.8)
g.map(sns.scatterplot, 'total_bill', 'tip', alpha=0.7)
g.add_legend(); g.set_axis_labels('Total Bill ($)', 'Tip ($)')
g.set_titles(col_template='{col_name}')
g.figure.suptitle('Tips by Time and Smoker Status', y=1.02)
plt.savefig('facetgrid_tips.png', dpi=80, bbox_inches='tight'); plt.close()
print('Saved facetgrid_tips.png')import seaborn as sns
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
iris = sns.load_dataset('iris')
g = sns.PairGrid(iris, hue='species')
g.map_diag(sns.histplot, alpha=0.6)
g.map_upper(sns.scatterplot, alpha=0.6)
g.map_lower(sns.kdeplot)
g.add_legend()
plt.savefig('pairgrid_iris.png', dpi=80, bbox_inches='tight'); plt.close()
print('Saved pairgrid_iris.png')import seaborn as sns
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
tips = sns.load_dataset('tips')
g = sns.catplot(data=tips, x='day', y='total_bill',
col='time', hue='sex',
kind='box', height=4, aspect=0.8)
g.set_axis_labels('Day', 'Total Bill ($)')
g.set_titles(col_template='{col_name}')
g.figure.suptitle('Bills by Day, Time, and Sex', y=1.02)
plt.savefig('catplot_tips.png', dpi=80, bbox_inches='tight'); plt.close()
print('Saved catplot_tips.png')import seaborn as sns
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
fmri = sns.load_dataset('fmri')
g = sns.relplot(data=fmri, x='timepoint', y='signal',
col='region', hue='event',
kind='line', height=4, aspect=0.9,
errorbar='se')
g.set_titles(col_template='Region: {col_name}')
g.figure.suptitle('fMRI Signal by Region and Event', y=1.02)
plt.savefig('relplot_fmri.png', dpi=80, bbox_inches='tight'); plt.close()
print('Saved relplot_fmri.png')import seaborn as sns
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
flights = sns.load_dataset('flights')
# TODO: add 'season' column: 'Summer' if month in ['June','July','August'] else 'Other'
# TODO: relplot with col='month', x='year', y='passengers', hue='season', kind='line'
# TODO: save to 'flights_facet.png'Add significance stars, confidence intervals, and comparison markers to seaborn plots for publication-ready statistical charts.
import seaborn as sns
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
tips = sns.load_dataset('tips')
fig, ax = plt.subplots(figsize=(8, 5))
sns.barplot(data=tips, x='day', y='total_bill', hue='sex',
errorbar='ci', capsize=0.1, alpha=0.85, ax=ax)
ax.set_title('Mean Total Bill with 95% CI')
ax.set_xlabel('Day'); ax.set_ylabel('Total Bill ($)')
plt.tight_layout()
plt.savefig('barplot_ci.png', dpi=80); plt.close()
print('Saved barplot_ci.png')import seaborn as sns
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import numpy as np
from scipy import stats
np.random.seed(42)
group_a = np.random.normal(42, 8, 50)
group_b = np.random.normal(48, 9, 50)
import pandas as pd
df = pd.DataFrame({'value': np.concatenate([group_a, group_b]), 'group': ['A']*50+['B']*50})
fig, ax = plt.subplots(figsize=(6, 5))
sns.boxplot(data=df, x='group', y='value', ax=ax, palette='Set2')
_, p = stats.ttest_ind(group_a, group_b)
stars = '***' if p < 0.001 else '**' if p < 0.01 else '*' if p < 0.05 else 'ns'
y_max = df['value'].max() + 2
ax.plot([0, 0, 1, 1], [y_max, y_max+1, y_max+1, y_max], lw=1.5, color='black')
ax.text(0.5, y_max+1.2, stars, ha='center', va='bottom', fontsize=14)
ax.set_title(f'Group Comparison (p={p:.4f})')
plt.tight_layout()
plt.savefig('significance.png', dpi=80); plt.close()
print(f'Saved significance.png ({stars})')import seaborn as sns
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
tips = sns.load_dataset('tips')
fig, axes = plt.subplots(1, 2, figsize=(12, 5))
sns.pointplot(data=tips, x='day', y='total_bill', hue='sex',
errorbar='sd', capsize=0.1, dodge=True, ax=axes[0])
axes[0].set_title('Pointplot with SD error bars')
sns.pointplot(data=tips, x='day', y='tip', hue='smoker',
errorbar='se', join=False, dodge=0.4, ax=axes[1])
axes[1].set_title('Pointplot (no join) with SE')
plt.tight_layout()
plt.savefig('pointplot.png', dpi=80); plt.close()
print('Saved pointplot.png')import seaborn as sns
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy import stats
from itertools import combinations
np.random.seed(42)
groups = {'Control': np.random.normal(10,2,40), 'Drug A': np.random.normal(12,2,40), 'Drug B': np.random.normal(14,3,40)}
df = pd.DataFrame([(v, g) for g, vals in groups.items() for v in vals], columns=['response','group'])
fig, ax = plt.subplots(figsize=(7, 6))
sns.boxplot(data=df, x='group', y='response', ax=ax, palette='pastel')
pairs = list(combinations(groups.keys(), 2))
y_max = df['response'].max()
for i, (g1, g2) in enumerate(pairs):
_, p = stats.ttest_ind(groups[g1], groups[g2])
stars = '***' if p<0.001 else '**' if p<0.01 else '*' if p<0.05 else 'ns'
x1 = list(groups.keys()).index(g1)
x2 = list(groups.keys()).index(g2)
y = y_max + 1.5*(i+1)
ax.plot([x1, x1, x2, x2], [y, y+0.3, y+0.3, y], lw=1.2, color='black')
ax.text((x1+x2)/2, y+0.4, stars, ha='center', fontsize=12)
ax.set_title('Multi-Group Significance Test')
plt.tight_layout()
plt.savefig('multi_sig.png', dpi=80); plt.close()
print('Saved multi_sig.png')import seaborn as sns
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy import stats
np.random.seed(42)
group_a = np.random.normal(45, 12, 80)
group_b = np.random.normal(52, 14, 80)
df = pd.DataFrame({'amount': np.concatenate([group_a, group_b]), 'group': ['A']*80+['B']*80})
# TODO: barplot with ci errorbar
# TODO: t-test
# TODO: significance annotation if p<0.05
# TODO: save to 'ab_barplot.png'Visualize statistical relationships and distributions with lmplot, residplot, jointplot, and ecdfplot for thorough exploratory analysis.
import seaborn as sns
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
tips = sns.load_dataset('tips')
g = sns.lmplot(data=tips, x='total_bill', y='tip', hue='smoker',
ci=95, scatter_kws={'alpha':0.5}, height=5, aspect=1.2)
g.set_axis_labels('Total Bill ($)', 'Tip ($)')
g.axes[0,0].set_title('Tip vs Bill by Smoker Status (95% CI)')
plt.savefig('lmplot.png', dpi=80, bbox_inches='tight'); plt.close()
print('Saved lmplot.png')import seaborn as sns
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import numpy as np
tips = sns.load_dataset('tips')
fig, axes = plt.subplots(1, 2, figsize=(12, 5))
sns.residplot(data=tips, x='total_bill', y='tip', lowess=True, ax=axes[0],
scatter_kws={'alpha':0.5}, line_kws={'color':'red','linewidth':2})
axes[0].set_title('Residual Plot (lowess trend)')
axes[0].axhline(0, color='gray', linestyle='--')
sns.residplot(data=tips, x='size', y='total_bill', ax=axes[1],
scatter_kws={'alpha':0.5})
axes[1].set_title('Residual Plot: size vs bill')
axes[1].axhline(0, color='gray', linestyle='--')
plt.tight_layout()
plt.savefig('residplot.png', dpi=80); plt.close()
print('Saved residplot.png')import seaborn as sns
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
tips = sns.load_dataset('tips')
fig, axes = plt.subplots(1, 3, figsize=(15, 5))
for ax, kind in zip(axes, ['scatter', 'kde', 'hex']):
g = sns.jointplot(data=tips, x='total_bill', y='tip', kind=kind, height=4)
g.set_axis_labels('Total Bill ($)', 'Tip ($)')
g.figure.suptitle(f'Joint Plot: {kind}', y=1.02)
g.savefig(f'joint_{kind}.png', dpi=80)
plt.close(g.figure)
print(f'Saved joint_{kind}.png')import seaborn as sns
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import numpy as np
tips = sns.load_dataset('tips')
fig, axes = plt.subplots(1, 2, figsize=(12, 5))
# Polynomial regression
sns.regplot(data=tips, x='total_bill', y='tip', order=2, ax=axes[0],
scatter_kws={'alpha':0.4}, line_kws={'color':'red','linewidth':2})
axes[0].set_title('Polynomial Regression (degree=2)')
# ECDF for comparing distributions
for day in ['Thur','Fri','Sat','Sun']:
subset = tips[tips['day']==day]
sns.ecdfplot(data=subset, x='total_bill', ax=axes[1], label=day)
axes[1].set_title('ECDF of Total Bill by Day')
axes[1].legend(); axes[1].set_xlabel('Total Bill ($)')
plt.tight_layout()
plt.savefig('regplot_ecdf.png', dpi=80); plt.close()
print('Saved regplot_ecdf.png')import seaborn as sns
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
mpg = sns.load_dataset('mpg').dropna()
# TODO: lmplot x='horsepower', y='mpg', hue='origin', ci=90 -> save 'mpg_lm.png'
# TODO: residplot x='horsepower', y='mpg' -> save 'mpg_resid.png'
# TODO: jointplot kind='kde' -> save 'mpg_joint.png'import seaborn as sns
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
np.random.seed(42)
df = pd.DataFrame({'value': np.random.randn(300), 'group': np.repeat(['A','B','C'], 100)})
g = sns.FacetGrid(df, col='group', height=3.5, aspect=0.9)
g.map(sns.histplot, 'value', kde=True, bins=20)
g.set_titles(col_template='{col_name}')
g.figure.suptitle('FacetGrid by Group', y=1.02)
g.savefig('facetgrid_hist.png', dpi=100, bbox_inches='tight')
print('Saved facetgrid_hist.png')
plt.close('all')import seaborn as sns
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
tips = sns.load_dataset('tips')
g = sns.FacetGrid(tips, row='sex', col='time', height=3, aspect=1.2, margin_titles=True)
g.map(sns.scatterplot, 'total_bill', 'tip', alpha=0.6)
g.add_legend()
g.savefig('facetgrid_2d.png', dpi=100, bbox_inches='tight')
print('Saved facetgrid_2d.png')
plt.close('all')import seaborn as sns
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
iris = sns.load_dataset('iris')
g = sns.PairGrid(iris, hue='species', vars=['sepal_length','petal_length','petal_width'])
g.map_upper(sns.scatterplot, alpha=0.6)
g.map_lower(sns.kdeplot, fill=True, alpha=0.4)
g.map_diag(sns.histplot, kde=True)
g.add_legend()
g.savefig('pairgrid_mixed.png', dpi=80, bbox_inches='tight')
print('Saved pairgrid_mixed.png')
plt.close('all')import seaborn as sns
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
def scatter_means(x, y, **kw):
plt.scatter(x, y, alpha=0.5, **{k:v for k,v in kw.items() if k!='label'})
plt.axhline(y.mean(), color='red', ls='--', lw=1.5)
plt.axvline(x.mean(), color='blue', ls='--', lw=1.5)
np.random.seed(42)
df = pd.DataFrame({'x':np.random.randn(150),'y':np.random.randn(150),
'group':np.repeat(['G1','G2','G3'],50)})
g = sns.FacetGrid(df, col='group', height=3)
g.map(scatter_means, 'x', 'y')
g.figure.suptitle('Scatter with Group Means', y=1.02)
g.savefig('facetgrid_custom.png', dpi=100, bbox_inches='tight')
print('Saved facetgrid_custom.png')
plt.close('all')import seaborn as sns
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
titanic = sns.load_dataset('titanic').dropna(subset=['age'])
# TODO: FacetGrid col='pclass', hue='survived', map histplot 'age'
# TODO: add_legend(), suptitle, save 'titanic_facet.png'import seaborn as sns
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
np.random.seed(42)
df = pd.DataFrame({
'score': np.concatenate([np.random.normal(70,10,80), np.random.normal(75,8,80), np.random.normal(65,12,80)]),
'class': ['A']*80 + ['B']*80 + ['C']*80
})
fig, ax = plt.subplots(figsize=(7, 5))
sns.violinplot(data=df, x='class', y='score', inner=None, palette='muted', alpha=0.7, ax=ax)
sns.swarmplot(data=df, x='class', y='score', color='black', size=2.5, alpha=0.6, ax=ax)
ax.set_title('Score Distribution by Class (Violin + Swarm)')
fig.savefig('violin_swarm.png', dpi=100, bbox_inches='tight')
print('Saved violin_swarm.png')
plt.close()import seaborn as sns
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
np.random.seed(42)
df = pd.DataFrame({
'response_ms': np.concatenate([np.random.exponential(200,300),
np.random.exponential(150,300),
np.random.exponential(300,300)]),
'server': ['A']*300 + ['B']*300 + ['C']*300
})
fig, ax = plt.subplots(figsize=(8, 5))
sns.ecdfplot(data=df, x='response_ms', hue='server', ax=ax)
ax.axvline(200, color='gray', ls=':', label='200ms target')
ax.set_title('ECDF: Response Time by Server')
ax.set_xlabel('Response Time (ms)')
ax.legend()
fig.savefig('ecdf_comparison.png', dpi=100, bbox_inches='tight')
print('Saved ecdf_comparison.png')
plt.close()import seaborn as sns
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import numpy as np
np.random.seed(42)
x = np.linspace(0, 10, 100)
y = 2*x + np.random.normal(0, 2, 100)
fig, axes = plt.subplots(1, 2, figsize=(11, 4))
sns.regplot(x=x, y=y, ax=axes[0])
axes[0].set_title('Regression with CI')
sns.residplot(x=x, y=y, ax=axes[1])
axes[1].axhline(0, color='red', ls='--')
axes[1].set_title('Residuals vs Fitted')
fig.tight_layout()
fig.savefig('residual_diagnostic.png', dpi=100, bbox_inches='tight')
print('Saved residual_diagnostic.png')
plt.close()import seaborn as sns
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
np.random.seed(42)
df = pd.DataFrame({
'value': np.concatenate([np.random.normal(5,1,50), np.random.normal(7,1.5,50), np.random.normal(6.2,1.2,50)]),
'group': ['Control']*50 + ['Drug A']*50 + ['Drug B']*50
})
fig, ax = plt.subplots(figsize=(7, 5))
sns.boxplot(data=df, x='group', y='value', palette='Set2', ax=ax)
y_max = df['value'].max() + 0.5
ax.plot([0, 1], [y_max, y_max], 'k-', lw=1.5)
ax.text(0.5, y_max+0.1, '***', ha='center', fontsize=14)
ax.set_title('Drug Effect with Significance Bracket')
fig.savefig('boxplot_sig.png', dpi=100, bbox_inches='tight')
print('Saved boxplot_sig.png')
plt.close()import seaborn as sns
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
np.random.seed(42)
df = pd.DataFrame({'score':np.random.normal(70,15,150), 'group':np.repeat(['A','B','C'],50), 'study_hours':np.random.uniform(1,8,150)})
# TODO: 3-panel: stripplot, ecdfplot, residplot
# TODO: whitegrid style, save 'multi_stat.png'import seaborn as sns
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
np.random.seed(42)
df = pd.DataFrame({'x':np.random.randn(100),'y':np.random.randn(100)})
fig, axes = plt.subplots(2, 2, figsize=(12, 9))
for ax, ctx in zip(axes.flat, ['paper','notebook','talk','poster']):
with sns.plotting_context(ctx):
sns.scatterplot(data=df, x='x', y='y', ax=ax, alpha=0.6)
ax.set_title(f'Context: {ctx}')
fig.suptitle('Seaborn Contexts', fontsize=14)
fig.tight_layout()
fig.savefig('contexts.png', dpi=100, bbox_inches='tight')
print('Saved contexts.png')
plt.close()import seaborn as sns
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='darkgrid', palette='bright', font_scale=1.1,
rc={'axes.facecolor':'#1e1e2e','figure.facecolor':'#1e1e2e',
'text.color':'white','axes.labelcolor':'white',
'xtick.color':'white','ytick.color':'white'})
np.random.seed(42)
df = pd.DataFrame({'x':np.random.randn(200),'y':np.random.randn(200),'g':np.random.choice(['A','B','C'],200)})
fig, ax = plt.subplots(figsize=(7,5))
sns.scatterplot(data=df, x='x', y='y', hue='g', alpha=0.7, ax=ax)
ax.set_title('Dark Mode Plot')
fig.savefig('dark_theme.png', dpi=100, bbox_inches='tight')
print('Saved dark_theme.png')
sns.set_theme()
plt.close()import seaborn as sns
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
fig, axes = plt.subplots(2, 2, figsize=(12, 8))
tips = sns.load_dataset('tips')
pals = [('Blues',4), ('husl',4), ('Set1',4), (['#FF6B6B','#4ECDC4','#45B7D1','#96CEB4'],4)]
titles = ['Blues (seq)','HUSL (qual)','Set1 (qual)','Custom hex']
for ax, (pal, _), title in zip(axes.flat, pals, titles):
with sns.axes_style('whitegrid'):
sns.boxplot(data=tips, x='day', y='total_bill', palette=pal, ax=ax)
ax.set_title(title)
fig.tight_layout()
fig.savefig('palettes.png', dpi=100, bbox_inches='tight')
print('Saved palettes.png')
plt.close()import seaborn as sns
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
np.random.seed(42)
df = pd.DataFrame({'x':np.repeat(range(5),30),'y':np.random.randn(150)+np.repeat([1,2,3,4,5],30)})
fig, axes = plt.subplots(1, 2, figsize=(12,5))
with sns.axes_style('ticks'):
sns.boxplot(data=df, x='x', y='y', ax=axes[0], palette='pastel')
sns.despine(ax=axes[0], trim=True)
axes[0].set_title('Despined + trimmed')
with sns.axes_style('whitegrid'):
sns.violinplot(data=df, x='x', y='y', ax=axes[1], palette='muted', inner='box')
sns.despine(ax=axes[1], left=False, bottom=False, top=True, right=True)
axes[1].set_title('Whitegrid + partial despine')
fig.tight_layout()
fig.savefig('despine_styles.png', dpi=100, bbox_inches='tight')
print('Saved despine_styles.png')
plt.close()import seaborn as sns
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
# TODO: set_theme dark background rc params
# TODO: 3-panel: histplot, scatterplot, barplot
# TODO: suptitle, tight_layout, save 'night_report.png' 150 DPI
# TODO: sns.set_theme() at endUse stripplot() to show individual data points by category and swarmplot() to avoid overplotting by spacing points. Layer them over box or violin plots for richer displays.
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='whitegrid')
tips = sns.load_dataset('tips')
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(11, 5))
sns.stripplot(data=tips, x='day', y='total_bill', ax=ax1,
palette='Set2', jitter=True, alpha=0.7, size=5)
ax1.set_title('Strip Plot (jitter)')
sns.swarmplot(data=tips, x='day', y='total_bill', ax=ax2,
palette='Set2', size=4)
ax2.set_title('Swarm Plot (no overlap)')
for ax in (ax1, ax2):
ax.set_xlabel('Day'); ax.set_ylabel('Total Bill ($)')
fig.tight_layout()
fig.savefig('strip_swarm.png', dpi=120, bbox_inches='tight')
plt.close()
print('Saved strip_swarm.png')import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='whitegrid')
iris = sns.load_dataset('iris')
fig, ax = plt.subplots(figsize=(8, 5))
sns.boxplot(data=iris, x='species', y='sepal_length', ax=ax,
palette='pastel', width=0.5,
boxprops=dict(alpha=0.6))
sns.stripplot(data=iris, x='species', y='sepal_length', ax=ax,
palette='Set2', size=5, jitter=True, alpha=0.7,
linewidth=0.5, edgecolor='gray')
ax.set_title('Box + Strip Plot Overlay')
ax.set_xlabel('Species'); ax.set_ylabel('Sepal Length (cm)')
fig.tight_layout()
fig.savefig('box_strip.png', dpi=120, bbox_inches='tight')
plt.close()
print('Saved box_strip.png')import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='whitegrid')
np.random.seed(0)
df = pd.DataFrame({
'score': np.concatenate([np.random.normal(m, 1, 60) for m in [5, 6.5, 8]]),
'group': np.repeat(['Control', 'Low Dose', 'High Dose'], 60)
})
fig, ax = plt.subplots(figsize=(8, 5))
sns.violinplot(data=df, x='group', y='score', ax=ax,
palette='muted', inner=None, alpha=0.6)
sns.swarmplot(data=df, x='group', y='score', ax=ax,
color='black', size=3, alpha=0.6)
ax.set_title('Violin + Swarm: Treatment Groups')
ax.set_xlabel('Group'); ax.set_ylabel('Score')
fig.tight_layout()
fig.savefig('violin_swarm.png', dpi=120, bbox_inches='tight')
plt.close()
print('Saved violin_swarm.png')import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='whitegrid')
tips = sns.load_dataset('tips')
fig, ax = plt.subplots(figsize=(9, 5))
sns.stripplot(data=tips, x='day', y='total_bill', hue='sex',
dodge=True, jitter=True, alpha=0.7, size=5,
palette='Set1', ax=ax)
ax.set_title('Strip Plot: Tip Amount by Day and Sex (Dodged)')
ax.set_xlabel('Day'); ax.set_ylabel('Total Bill ($)')
ax.legend(title='Sex', bbox_to_anchor=(1, 1))
fig.tight_layout()
fig.savefig('strip_hue.png', dpi=120, bbox_inches='tight')
plt.close()
print('Saved strip_hue.png')import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='whitegrid')
np.random.seed(7)
depts = ['Eng', 'Sales', 'Mktg', 'HR']
df = pd.DataFrame({
'salary': np.concatenate([
np.random.normal(m, s, 60)
for m, s in [(95,15),(70,12),(75,13),(65,10)] for _ in range(1)
] * 2),
'dept': np.tile(np.repeat(depts, 60), 2),
'gender': np.repeat(['Female','Male'], 240)
})
fig, ax = plt.subplots(figsize=(11, 6))
sns.violinplot(data=df, x='dept', y='salary', hue='gender',
split=True, inner=None, palette='pastel', alpha=0.6, ax=ax)
sns.stripplot(data=df, x='dept', y='salary', hue='gender',
dodge=True, jitter=True, alpha=0.5, size=3,
palette='dark:#333333', ax=ax, legend=False)
ax.set_title('Salary Distribution by Department & Gender', fontweight='bold')
ax.set_xlabel('Department'); ax.set_ylabel('Salary ($K)')
handles, labels = ax.get_legend_handles_labels()
ax.legend(handles[:2], labels[:2], title='Gender')
fig.tight_layout()
fig.savefig('salary_audit.png', dpi=150, bbox_inches='tight')
plt.close()
print('Saved salary_audit.png')import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='whitegrid')
penguins = sns.load_dataset('penguins').dropna()
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))
# TODO: swarmplot body_mass_g by species, hue=sex
# TODO: boxplot + stripplot flipper_length_mm by island
# TODO: colorblind palette, titles, labels
fig.tight_layout()
fig.savefig('penguin_points.png', dpi=120, bbox_inches='tight')
plt.close()
print('Saved penguin_points.png')Use pointplot() to display means with confidence intervals for categorical variables. Use lineplot() with hue for multi-group time series with automatic CI shading.
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='whitegrid')
tips = sns.load_dataset('tips')
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(11, 5))
sns.pointplot(data=tips, x='day', y='total_bill', ax=ax1,
palette='Set2', capsize=0.1, errwidth=2,
markers='o', linestyles='-')
ax1.set_title('Point Plot: Mean Bill by Day')
sns.pointplot(data=tips, x='day', y='total_bill', hue='sex',
ax=ax2, palette='Set1', dodge=True,
capsize=0.08, errwidth=1.5)
ax2.set_title('Point Plot: By Day and Sex')
for ax in (ax1, ax2):
ax.set_xlabel('Day'); ax.set_ylabel('Mean Total Bill ($)')
fig.tight_layout()
fig.savefig('pointplot.png', dpi=120, bbox_inches='tight')
plt.close()
print('Saved pointplot.png')import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='darkgrid')
np.random.seed(0)
n = 50
df = pd.DataFrame({
't': np.tile(np.arange(n), 3),
'value': np.concatenate([
np.cumsum(np.random.randn(n)) + 5,
np.cumsum(np.random.randn(n)) + 3,
np.cumsum(np.random.randn(n)) + 7,
]),
'model': np.repeat(['Model A', 'Model B', 'Model C'], n)
})
# Add repeated measurements for CI
df_rep = pd.concat([df.assign(value=df.value + np.random.randn(len(df))*0.5)
for _ in range(5)], ignore_index=True)
fig, ax = plt.subplots(figsize=(10, 5))
sns.lineplot(data=df_rep, x='t', y='value', hue='model',
palette='Set2', linewidth=2, ax=ax)
ax.set_title('lineplot with 95% CI Shading')
ax.set_xlabel('Time Step'); ax.set_ylabel('Value')
fig.tight_layout()
fig.savefig('lineplot_ci.png', dpi=120, bbox_inches='tight')
plt.close()
print('Saved lineplot_ci.png')import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='whitegrid')
np.random.seed(1)
months = list(range(1, 13))
df = pd.DataFrame({
'month': months * 3,
'revenue': (
[100 + i*8 + np.random.randn()*5 for i in range(12)] +
[80 + i*6 + np.random.randn()*4 for i in range(12)] +
[60 + i*10 + np.random.randn()*6 for i in range(12)]
),
'region': ['North']*12 + ['South']*12 + ['East']*12
})
fig, ax = plt.subplots(figsize=(10, 5))
sns.lineplot(data=df, x='month', y='revenue', hue='region',
style='region', markers=True, dashes=False,
palette='Set1', linewidth=2, ax=ax)
ax.set_title('Monthly Revenue by Region')
ax.set_xlabel('Month'); ax.set_ylabel('Revenue ($K)')
ax.legend(title='Region')
fig.tight_layout()
fig.savefig('lineplot_markers.png', dpi=120, bbox_inches='tight')
plt.close()
print('Saved lineplot_markers.png')import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='whitegrid')
titanic = sns.load_dataset('titanic')
fig, axes = plt.subplots(1, 3, figsize=(14, 5))
# Bar: shows aggregated height
sns.barplot(data=titanic, x='class', y='survived', hue='sex',
palette='Set2', ax=axes[0])
axes[0].set_title('barplot: Mean Survival Rate')
# Point: cleaner for comparison
sns.pointplot(data=titanic, x='class', y='survived', hue='sex',
palette='Set1', dodge=True, capsize=0.1, ax=axes[1])
axes[1].set_title('pointplot: Same Data')
# Count the actual survivors
surv = titanic.groupby(['class','sex'])['survived'].mean().reset_index()
sns.pointplot(data=surv, x='class', y='survived', hue='sex',
palette='Set1', dodge=True, capsize=0.1, ax=axes[2],
markers=['o', 's'], linestyles=['-','--'])
axes[2].set_title('pointplot: Precomputed')
for ax in axes:
ax.set_ylim(0, 1); ax.set_ylabel('Survival Rate')
fig.tight_layout()
fig.savefig('point_vs_bar.png', dpi=120, bbox_inches='tight')
plt.close()
print('Saved point_vs_bar.png')import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='whitegrid')
np.random.seed(42)
subjects = ['Math', 'Science', 'English', 'History']
terms = ['T1', 'T2', 'T3']
rows = []
for subj in subjects:
base = np.random.uniform(60, 85)
for term in terms:
for gender in ['Female', 'Male']:
n = 30
offset = {'T1': 0, 'T2': 3, 'T3': 6}[term]
g_offset = 2 if gender == 'Female' else 0
scores = np.random.normal(base + offset + g_offset, 8, n)
for s in scores:
rows.append({'subject': subj, 'term': term, 'gender': gender, 'score': s})
df = pd.DataFrame(rows)
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))
sns.pointplot(data=df, x='subject', y='score', hue='gender',
dodge=True, capsize=0.08, palette='Set1',
markers=['o','s'], linestyles=['-','--'], ax=ax1)
ax1.set_title('Mean Score by Subject & Gender', fontweight='bold')
ax1.set_xlabel('Subject'); ax1.set_ylabel('Mean Score')
term_df = df.groupby(['term','subject']).score.mean().reset_index()
sns.lineplot(data=df, x='term', y='score', hue='subject',
palette='tab10', linewidth=2, markers=True, ax=ax2)
ax2.set_title('Score Trend by Term', fontweight='bold')
ax2.set_xlabel('Term'); ax2.set_ylabel('Score')
ax2.legend(title='Subject', bbox_to_anchor=(1,1))
fig.tight_layout()
fig.savefig('student_performance.png', dpi=150, bbox_inches='tight')
plt.close()
print('Saved student_performance.png')import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='darkgrid')
fmri = sns.load_dataset('fmri')
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(11, 8), sharex=False)
# TODO: lineplot signal by timepoint, hue=event, style=region
# TODO: pointplot mean signal by region with capsize
# TODO: titles, labels, tight_layout
fig.savefig('fmri_plots.png', dpi=120, bbox_inches='tight')
plt.close()
print('Saved fmri_plots.png')Use ecdfplot() for empirical CDFs, histplot() with multiple groups, and kdeplot() to compare distributions across categories without assuming a parametric form.
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='whitegrid')
np.random.seed(0)
df = pd.DataFrame({
'value': np.concatenate([
np.random.normal(5, 1.5, 200),
np.random.normal(7, 1.0, 200),
np.random.exponential(2, 200) + 3,
]),
'group': np.repeat(['Normal(5,1.5)', 'Normal(7,1)', 'Exp+3'], 200)
})
fig, ax = plt.subplots(figsize=(9, 5))
sns.ecdfplot(data=df, x='value', hue='group', palette='Set2', linewidth=2)
ax.axhline(0.5, color='gray', linestyle='--', linewidth=1, label='Median')
ax.set_title('ECDF: Distribution Comparison')
ax.set_xlabel('Value'); ax.set_ylabel('Cumulative Proportion')
ax.legend(title='Group')
fig.tight_layout()
fig.savefig('ecdf.png', dpi=120, bbox_inches='tight')
plt.close()
print('Saved ecdf.png')import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='whitegrid')
iris = sns.load_dataset('iris')
fig, axes = plt.subplots(1, 2, figsize=(12, 5))
# Separate feature distributions per species
for feat, ax in zip(['sepal_length', 'petal_length'], axes):
sns.kdeplot(data=iris, x=feat, hue='species',
fill=True, alpha=0.35, linewidth=2, palette='Set2', ax=ax)
ax.set_title(f'KDE: {feat.replace("_"," ").title()}')
ax.set_xlabel(feat.replace('_',' ').title())
fig.tight_layout()
fig.savefig('kde_compare.png', dpi=120, bbox_inches='tight')
plt.close()
print('Saved kde_compare.png')import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='whitegrid')
np.random.seed(1)
df = pd.DataFrame({
'response_ms': np.concatenate([
np.random.lognormal(5, 0.5, 300),
np.random.lognormal(5.5, 0.4, 200),
]),
'endpoint': np.repeat(['/api/search', '/api/checkout'], [300, 200])
})
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))
sns.histplot(data=df, x='response_ms', hue='endpoint',
stat='density', kde=True, alpha=0.4, palette='Set1',
common_norm=False, ax=ax1)
ax1.set_title('Histogram with KDE β Linear scale')
ax1.set_xlabel('Response Time (ms)')
sns.histplot(data=df, x='response_ms', hue='endpoint',
stat='density', kde=True, alpha=0.4, palette='Set1',
common_norm=False, log_scale=True, ax=ax2)
ax2.set_title('Histogram with KDE β Log scale')
ax2.set_xlabel('Response Time (ms)')
fig.tight_layout()
fig.savefig('hist_kde_compare.png', dpi=120, bbox_inches='tight')
plt.close()
print('Saved hist_kde_compare.png')import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='whitegrid')
np.random.seed(2)
data = pd.DataFrame({
'latency_ms': np.random.lognormal(5, 0.7, 1000),
'server': np.random.choice(['US-East', 'EU-West', 'AP-South'], 1000)
})
fig, ax = plt.subplots(figsize=(9, 5))
sns.ecdfplot(data=data, x='latency_ms', hue='server',
palette='tab10', linewidth=2, ax=ax)
# Mark p50, p95, p99
for pct, label, color in [(50,'p50','gray'),(95,'p95','orange'),(99,'p99','red')]:
val = np.percentile(data['latency_ms'], pct)
ax.axvline(val, color=color, linestyle='--', linewidth=1.2, alpha=0.8)
ax.axhline(pct/100, color=color, linestyle=':', linewidth=0.8, alpha=0.5)
ax.text(val+50, pct/100-0.04, f'{label}: {val:.0f}ms',
fontsize=8, color=color)
ax.set_title('API Latency ECDF by Server Region')
ax.set_xlabel('Latency (ms)'); ax.set_ylabel('Cumulative Proportion')
fig.tight_layout()
fig.savefig('ecdf_percentiles.png', dpi=120, bbox_inches='tight')
plt.close()
print('Saved ecdf_percentiles.png')import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='whitegrid')
np.random.seed(99)
variants = {'Control': (45, 12), 'Variant A': (52, 14), 'Variant B': (48, 10)}
dfs = [pd.DataFrame({'order_value': np.random.normal(mu, sd, 300).clip(5), 'variant': name})
for name, (mu, sd) in variants.items()]
df = pd.concat(dfs, ignore_index=True)
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(13, 5))
sns.ecdfplot(data=df, x='order_value', hue='variant',
palette='Set1', linewidth=2, ax=ax1)
for pct, ls in [(50,'--'),(90,':')]:
ax1.axhline(pct/100, color='gray', linestyle=ls, linewidth=1)
ax1.text(ax1.get_xlim()[0]+1, pct/100+0.01, f'p{pct}', fontsize=8, color='gray')
ax1.set_title('ECDF of Order Value by Variant', fontweight='bold')
ax1.set_xlabel('Order Value ($)')
sns.kdeplot(data=df, x='order_value', hue='variant',
fill=True, alpha=0.3, linewidth=2, palette='Set1',
common_norm=False, ax=ax2)
ax2.set_title('KDE: Order Value Distribution', fontweight='bold')
ax2.set_xlabel('Order Value ($)')
fig.tight_layout()
fig.savefig('ab_distribution.png', dpi=150, bbox_inches='tight')
plt.close()
print('Saved ab_distribution.png')import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='whitegrid')
titanic = sns.load_dataset('titanic').dropna(subset=['age','fare'])
fig, axes = plt.subplots(1, 3, figsize=(15, 5))
# TODO: ecdfplot fare by class
# TODO: kdeplot age by survived
# TODO: histplot fare log scale by class
fig.tight_layout()
fig.savefig('titanic_dist.png', dpi=120, bbox_inches='tight')
plt.close()
print('Saved titanic_dist.png')Use clustermap() to apply hierarchical clustering to rows and columns of a matrix, revealing natural groupings in correlation matrices, gene expression, or feature similarity data.
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='white')
iris = sns.load_dataset('iris')
corr = iris.drop('species', axis=1).corr()
g = sns.clustermap(corr, cmap='RdBu_r', vmin=-1, vmax=1,
annot=True, fmt='.2f', figsize=(7, 7),
linewidths=0.5)
g.ax_heatmap.set_title('Iris Feature Correlation Clustermap', pad=50)
plt.savefig('clustermap_iris.png', dpi=120, bbox_inches='tight')
plt.close()
print('Saved clustermap_iris.png')import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='white')
np.random.seed(0)
n_genes, n_samples = 20, 12
groups = ['A','A','A','A','B','B','B','B','C','C','C','C']
data = pd.DataFrame(
np.random.randn(n_genes, n_samples) +
np.array([np.sin(np.linspace(0,3,n_samples))*(i%3-1) for i in range(n_genes)]),
index=[f'Gene_{i:02d}' for i in range(n_genes)],
columns=[f'S{i+1:02d}' for i in range(n_samples)]
)
palette = {'A':'#4c72b0', 'B':'#dd8452', 'C':'#55a868'}
col_colors = pd.Series(groups, index=data.columns).map(palette)
g = sns.clustermap(data, cmap='vlag', figsize=(10, 8),
col_colors=col_colors,
row_cluster=True, col_cluster=True,
z_score=0, linewidths=0.3)
g.ax_heatmap.set_title('Gene Expression Clustermap', pad=50)
plt.savefig('clustermap_genes.png', dpi=120, bbox_inches='tight')
plt.close()
print('Saved clustermap_genes.png')import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='white')
np.random.seed(1)
features = ['Revenue', 'Growth', 'Margin', 'CAC', 'LTV', 'Churn']
companies = [f'Co{i:02d}' for i in range(1, 16)]
data = pd.DataFrame(
np.random.randn(len(companies), len(features)),
index=companies, columns=features
)
# Add cluster structure
data.iloc[:5, :3] += 2
data.iloc[5:10, 3:] -= 2
data.iloc[10:, [0,2,4]] += 1.5
g = sns.clustermap(data, cmap='coolwarm', figsize=(8, 10),
standard_scale=1, # standardize each column
linewidths=0.4, annot=False,
dendrogram_ratio=(0.15, 0.2))
g.ax_heatmap.set_title('Company KPI Clustermap
(column-standardized)', pad=50)
plt.savefig('clustermap_kpi.png', dpi=120, bbox_inches='tight')
plt.close()
print('Saved clustermap_kpi.png')import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='white')
np.random.seed(2)
data = pd.DataFrame(
np.random.randn(15, 10),
index=[f'R{i}' for i in range(15)],
columns=[f'C{j}' for j in range(10)]
)
data.iloc[:5, :4] += 2.5
data.iloc[10:, 6:] += 2.5
fig, axes_list = plt.subplots(1, 2, figsize=(14, 6))
for ax, (method, metric) in zip(axes_list, [('ward','euclidean'),('average','correlation')]):
g = sns.clustermap(data, cmap='RdYlBu_r', figsize=(6, 5),
method=method, metric=metric,
linewidths=0.3)
g.fig.suptitle(f'method={method}, metric={metric}', y=1.01, fontsize=10)
g.fig.savefig(f'clustermap_{method}.png', dpi=100, bbox_inches='tight')
plt.close(g.fig)
print('Saved clustermap_ward.png and clustermap_average.png')import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='white')
np.random.seed(55)
segments = ['Loyal','At-Risk','New','Churned','VIP']
features = ['Frequency','Avg Order','Recency','CLV','Tickets']
n_each = 15
rows = []
centroids = {
'Loyal': [8, 120, 5, 960, 1],
'At-Risk': [2, 80, 45, 160, 4],
'New': [1, 95, 15, 95, 0],
'Churned': [0, 60, 120, 0, 6],
'VIP': [12, 400, 3, 4800, 2],
}
for seg, center in centroids.items():
noise = np.random.randn(n_each, len(features)) * np.array([1, 20, 10, 200, 1])
block = np.array(center) + noise
df_seg = pd.DataFrame(block, columns=features)
df_seg['segment'] = seg
rows.append(df_seg)
df = pd.concat(rows, ignore_index=True)
matrix = df[features].values
row_labels = df['segment']
palette_seg = {'Loyal':'#55a868','At-Risk':'#dd8452','New':'#4c72b0',
'Churned':'#c44e52','VIP':'#9467bd'}
row_colors = row_labels.map(palette_seg)
data_df = pd.DataFrame(matrix, columns=features,
index=[f'{seg}_{i}' for seg, n in [(s, n_each) for s in segments]
for i in range(n_each)])
g = sns.clustermap(pd.DataFrame(matrix, columns=features),
cmap='vlag', z_score=0, figsize=(9, 11),
row_colors=row_colors.values,
linewidths=0.2, dendrogram_ratio=(0.2, 0.15))
g.ax_heatmap.set_title('Customer Segment Behavioral Clustermap', pad=50, fontsize=12)
plt.savefig('customer_clustermap.png', dpi=150, bbox_inches='tight')
plt.close()
print('Saved customer_clustermap.png')import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='white')
np.random.seed(3)
data = np.random.randn(12, 8)
data[:4, :3] += 2.5
data[4:8, 3:6] -= 2.5
data[8:, 5:] += 1.5
df = pd.DataFrame(data,
index=[f'R{i}' for i in range(12)],
columns=[f'C{j}' for j in range(8)])
# TODO: clustermap method='ward', cmap='RdBu_r', z_score=0, annot=True
# TODO: save 'block_clustermap.png'Use jointplot() to show the bivariate relationship alongside marginal univariate distributions. Explore kind='hex', 'kde', 'reg', and custom marginal plots with JointGrid.
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='whitegrid')
tips = sns.load_dataset('tips')
kinds = ['scatter', 'hex', 'kde', 'reg']
for kind in kinds:
g = sns.jointplot(data=tips, x='total_bill', y='tip',
kind=kind, palette='Set2', height=5,
marginal_kws=dict(bins=25))
g.fig.suptitle(f"kind='{kind}'", y=1.02)
g.fig.savefig(f'joint_{kind}.png', dpi=100, bbox_inches='tight')
plt.close(g.fig)
print('Saved joint_scatter/hex/kde/reg.png')import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='whitegrid')
iris = sns.load_dataset('iris')
g = sns.jointplot(data=iris, x='sepal_length', y='petal_length',
hue='species', palette='Set2',
height=6, marginal_kws=dict(fill=True, alpha=0.4))
g.fig.suptitle('Iris: Sepal vs Petal Length (hue=species)', y=1.02)
g.fig.savefig('joint_hue.png', dpi=120, bbox_inches='tight')
plt.close(g.fig)
print('Saved joint_hue.png')import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='whitegrid')
np.random.seed(42)
df = pd.DataFrame({
'x': np.random.lognormal(2, 0.5, 300),
'y': np.random.lognormal(1.5, 0.6, 300)
})
g = sns.JointGrid(data=df, x='x', y='y', height=6)
g.plot_joint(sns.scatterplot, alpha=0.4, color='steelblue', s=25)
g.plot_joint(sns.kdeplot, levels=5, color='navy', linewidths=1.0)
g.plot_marginals(sns.histplot, kde=True, bins=25, color='steelblue', alpha=0.6)
g.ax_joint.set_xlabel('X (lognormal)'); g.ax_joint.set_ylabel('Y (lognormal)')
g.fig.suptitle('JointGrid: Scatter + KDE + Histogram Marginals', y=1.01)
g.fig.savefig('joint_grid_custom.png', dpi=120, bbox_inches='tight')
plt.close(g.fig)
print('Saved joint_grid_custom.png')import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='whitegrid')
np.random.seed(5)
n = 150
x = np.random.uniform(10, 100, n)
y = 0.5*x + np.random.randn(n)*8 + 5
df = pd.DataFrame({'x': x, 'y': y})
g = sns.jointplot(data=df, x='x', y='y',
kind='reg', height=6,
scatter_kws=dict(alpha=0.5, s=30, color='steelblue'),
line_kws=dict(color='red', linewidth=2),
marginal_kws=dict(bins=20, kde=True))
# Compute and annotate correlation
r = df.corr().iloc[0,1]
g.ax_joint.text(0.05, 0.92, f'r = {r:.3f}', transform=g.ax_joint.transAxes,
fontsize=11, fontweight='bold', color='red')
g.fig.suptitle('Joint Regression Plot with Correlation', y=1.02)
g.fig.savefig('joint_reg.png', dpi=120, bbox_inches='tight')
plt.close(g.fig)
print('Saved joint_reg.png')import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='whitegrid')
np.random.seed(77)
df = pd.DataFrame({
'height_cm': np.concatenate([
np.random.normal(165, 7, 200),
np.random.normal(178, 8, 200)]),
'weight_kg': np.concatenate([
np.random.normal(62, 9, 200),
np.random.normal(78, 11, 200)]),
'gender': np.repeat(['Female','Male'], 200)
})
g = sns.JointGrid(data=df, x='height_cm', y='weight_kg', height=7)
palette = {'Female':'#dd8452','Male':'#4c72b0'}
for gender, grp in df.groupby('gender'):
g.ax_joint.scatter(grp.height_cm, grp.weight_kg,
alpha=0.4, s=20, color=palette[gender], label=gender)
sns.kdeplot(data=grp, x='height_cm', ax=g.ax_marg_x,
fill=True, alpha=0.4, color=palette[gender], linewidth=1.5)
sns.kdeplot(data=grp, y='weight_kg', ax=g.ax_marg_y,
fill=True, alpha=0.4, color=palette[gender], linewidth=1.5)
g.ax_joint.legend(title='Gender')
r = df[['height_cm','weight_kg']].corr().iloc[0,1]
g.ax_joint.text(0.05,0.92,f'r = {r:.3f}',transform=g.ax_joint.transAxes,
fontsize=11,fontweight='bold',color='darkred')
g.ax_joint.set_xlabel('Height (cm)'); g.ax_joint.set_ylabel('Weight (kg)')
g.fig.suptitle('Height-Weight Joint Distribution by Gender', y=1.02, fontweight='bold')
g.fig.savefig('height_weight_joint.png', dpi=150, bbox_inches='tight')
plt.close(g.fig)
print('Saved height_weight_joint.png')import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='whitegrid')
penguins = sns.load_dataset('penguins').dropna()
g = sns.JointGrid(data=penguins, x='bill_length_mm', y='bill_depth_mm', height=6)
# TODO: scatterplot joint with hue=species, s=30
# TODO: boxplot marginals for x and y
# TODO: legend, labels, title
# TODO: save 'penguin_joint.png'Use catplot() as a unified interface for all categorical plots. Control kind='strip'|'swarm'|'box'|'violin'|'bar'|'count'|'point' and use col/row to create FacetGrid-powered small multiples.
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='whitegrid')
tips = sns.load_dataset('tips')
g = sns.catplot(data=tips, x='day', y='total_bill',
hue='sex', col='time',
kind='box', palette='Set2',
height=5, aspect=0.85,
order=['Thur','Fri','Sat','Sun'])
g.set_axis_labels('Day', 'Total Bill ($)')
g.set_titles('{col_name}')
g.fig.suptitle('Bill by Day, Sex, and Time (catplot col)', y=1.02, fontweight='bold')
g.fig.savefig('catplot_col.png', dpi=120, bbox_inches='tight')
plt.close(g.fig)
print('Saved catplot_col.png')import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='whitegrid')
titanic = sns.load_dataset('titanic')
g = sns.catplot(data=titanic, x='class', y='age',
col='survived', hue='sex',
kind='violin', split=True, inner='quartile',
palette='pastel', height=5, aspect=0.9)
g.set_axis_labels('Class', 'Age')
g.set_titles(col_template='Survived: {col_name}')
g.fig.suptitle('Titanic Age by Class, Gender & Survival', y=1.02, fontweight='bold')
g.fig.savefig('catplot_violin.png', dpi=120, bbox_inches='tight')
plt.close(g.fig)
print('Saved catplot_violin.png')import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='whitegrid')
tips = sns.load_dataset('tips')
fig, axes = plt.subplots(1, 2, figsize=(13, 5))
# Count plot via catplot saved to file; replicate on axes
sns.countplot(data=tips, x='day', hue='sex', palette='Set1', ax=axes[0])
axes[0].set_title("countplot: Visits by Day")
sns.barplot(data=tips, x='day', y='tip', hue='sex',
palette='Set2', capsize=0.08, ax=axes[1])
axes[1].set_title("barplot: Mean Tip by Day")
for ax in axes:
ax.set_xlabel('Day')
fig.tight_layout()
fig.savefig('catplot_count_bar.png', dpi=120, bbox_inches='tight')
plt.close()
print('Saved catplot_count_bar.png')import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='whitegrid')
titanic = sns.load_dataset('titanic')
g = sns.catplot(data=titanic, x='class', y='survived',
hue='sex', col='embarked',
kind='point', dodge=True, capsize=0.1,
markers=['o','s'], linestyles=['-','--'],
palette='Set1', height=4, aspect=0.85,
order=['First','Second','Third'])
g.set_axis_labels('Class', 'Survival Rate')
g.set_titles(col_template='Embarked: {col_name}')
g.fig.suptitle('Survival Rate by Class, Sex & Port', y=1.02, fontweight='bold')
g.fig.savefig('catplot_point.png', dpi=120, bbox_inches='tight')
plt.close(g.fig)
print('Saved catplot_point.png')import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='whitegrid')
np.random.seed(42)
categories = ['Mobile App','Web Platform','API','Support']
segments = ['Enterprise','SMB','Startup']
rows = []
bases = {'Mobile App': 7.0,'Web Platform': 6.5,'API': 8.0,'Support': 5.5}
seg_offsets = {'Enterprise': 0.5,'SMB': 0.0,'Startup': -0.3}
for cat in categories:
for seg in segments:
mu = bases[cat] + seg_offsets[seg]
scores = np.random.normal(mu, 1.5, 40).clip(0, 10)
for s in scores:
rows.append({'category': cat,'segment': seg,'nps': s})
df = pd.DataFrame(rows)
g = sns.catplot(data=df, x='segment', y='nps', col='category',
kind='box', hue='segment', palette='Set2',
height=4, aspect=0.85, col_wrap=2,
order=segments, legend=False)
g.set_axis_labels('Segment', 'NPS Score')
g.set_titles('{col_name}')
g.fig.suptitle('Product NPS by Category & Segment', y=1.02, fontweight='bold')
g.fig.savefig('nps_catplot.png', dpi=150, bbox_inches='tight')
plt.close(g.fig)
print('Saved nps_catplot.png')import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='whitegrid')
exercise = sns.load_dataset('exercise')
# TODO: catplot pointplot: x=time, y=pulse, hue=kind, col=diet
# TODO: catplot boxplot: same variables
# TODO: save 'exercise_catplot.png'Use displot() as the figure-level interface for histplot, kdeplot, and ecdfplot. Add col/row faceting for small-multiple distribution comparisons.
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='whitegrid')
np.random.seed(0)
df = pd.DataFrame({
'value': np.concatenate([np.random.normal(0,1,300), np.random.normal(4,1.5,300)]),
'group': np.repeat(['A','B'], 300)
})
for kind in ['hist','kde','ecdf']:
g = sns.displot(data=df, x='value', hue='group',
kind=kind, height=4, aspect=1.5, palette='Set2',
fill=(kind != 'ecdf'), alpha=0.5, linewidth=2)
g.fig.suptitle(f"displot kind='{kind}'", y=1.02)
g.fig.savefig(f'displot_{kind}.png', dpi=100, bbox_inches='tight')
plt.close(g.fig)
print('Saved displot_hist/kde/ecdf.png')import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='whitegrid')
titanic = sns.load_dataset('titanic').dropna(subset=['age'])
g = sns.displot(data=titanic, x='age', col='class',
hue='survived', kind='hist', stat='density',
kde=True, alpha=0.5, palette='Set1',
height=4, aspect=0.85, col_order=['First','Second','Third'])
g.set_axis_labels('Age', 'Density')
g.set_titles(col_template='{col_name} Class')
g.fig.suptitle('Age Distribution by Class and Survival', y=1.02, fontweight='bold')
g.fig.savefig('displot_col.png', dpi=120, bbox_inches='tight')
plt.close(g.fig)
print('Saved displot_col.png')import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='whitegrid')
np.random.seed(1)
regions = ['North','South']
periods = ['Q1','Q2','Q3','Q4']
rows = []
for region in regions:
base = 50 if region == 'North' else 35
for period in periods:
offset = periods.index(period) * 5
vals = np.random.normal(base + offset, 12, 100)
for v in vals:
rows.append({'region': region, 'period': period, 'sales': v})
df = pd.DataFrame(rows)
g = sns.displot(data=df, x='sales', row='region', col='period',
kind='kde', fill=True, alpha=0.5, palette='Set2',
height=3, aspect=1.1, col_order=periods)
g.set_axis_labels('Sales ($K)', 'Density')
g.fig.suptitle('Sales Distribution by Region & Quarter', y=1.02, fontweight='bold')
g.fig.savefig('displot_grid.png', dpi=120, bbox_inches='tight')
plt.close(g.fig)
print('Saved displot_grid.png')import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='whitegrid')
np.random.seed(2)
df = pd.DataFrame({
'latency': np.concatenate([
np.random.lognormal(3, 0.4, 500),
np.random.lognormal(4, 0.5, 200),
]),
'endpoint': np.repeat(['/fast','/slow'], [500,200])
})
g = sns.displot(data=df, x='latency', hue='endpoint',
kind='hist', stat='density', kde=True,
rug=True, rug_kws=dict(height=0.05, alpha=0.3),
log_scale=True, alpha=0.4, palette='Set1',
height=5, aspect=1.6, binwidth=0.05)
g.set_axis_labels('Latency (ms, log scale)', 'Density')
g.fig.suptitle('Endpoint Latency Distribution with Rug', y=1.02, fontweight='bold')
g.fig.savefig('displot_rug.png', dpi=120, bbox_inches='tight')
plt.close(g.fig)
print('Saved displot_rug.png')import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='whitegrid')
np.random.seed(88)
experiments = ['Baseline','DropoutReg','AugData']
datasets = ['CIFAR-10','ImageNet']
rows = []
means = {'Baseline': (72,80), 'DropoutReg': (75,83), 'AugData': (77,85)}
for exp, (m1, m2) in means.items():
for ds, mu in zip(datasets, [m1, m2]):
vals = np.random.normal(mu, 3, 150)
for v in vals:
rows.append({'experiment': exp, 'dataset': ds, 'accuracy': v})
df = pd.DataFrame(rows)
g = sns.displot(data=df, x='accuracy', hue='experiment', col='dataset',
kind='kde', fill=True, alpha=0.4, linewidth=2,
palette='Set2', height=4, aspect=1.2, common_norm=False)
g.set_axis_labels('Accuracy (%)', 'Density')
g.set_titles(col_template='{col_name}')
g.fig.suptitle('Model Accuracy Distribution by Experiment & Dataset', y=1.02, fontweight='bold')
g.fig.savefig('experiment_displot.png', dpi=150, bbox_inches='tight')
plt.close(g.fig)
print('Saved experiment_displot.png')import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='whitegrid')
penguins = sns.load_dataset('penguins').dropna()
# TODO: displot hist kind, col=species, x=body_mass_g, hue=sex, kde=True
# TODO: save 'penguin_hist.png'
# TODO: displot ecdf kind, same grouping
# TODO: save 'penguin_ecdf.png'Use relplot() as the figure-level interface for scatterplot and lineplot. Use col, row, and hue to create multi-panel relational grids with consistent scaling.
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='whitegrid')
tips = sns.load_dataset('tips')
g = sns.relplot(data=tips, x='total_bill', y='tip',
hue='sex', col='time', style='sex',
palette='Set1', s=60, alpha=0.7,
height=4, aspect=1.0)
g.set_axis_labels('Total Bill ($)', 'Tip ($)')
g.set_titles(col_template='{col_name}')
g.fig.suptitle('Tips: Scatter by Time and Sex (relplot)', y=1.02, fontweight='bold')
g.fig.savefig('relplot_scatter.png', dpi=120, bbox_inches='tight')
plt.close(g.fig)
print('Saved relplot_scatter.png')import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='darkgrid')
np.random.seed(0)
subjects = [f'Subject_{i:02d}' for i in range(1, 7)]
df_list = []
for subj in subjects:
t = np.arange(20)
y = np.sin(t*0.3 + np.random.uniform(0,3)) + np.random.randn(20)*0.2
df_list.append(pd.DataFrame({'time':t,'value':y,'subject':subj}))
df = pd.concat(df_list, ignore_index=True)
g = sns.relplot(data=df, x='time', y='value', col='subject',
kind='line', col_wrap=3, palette='tab10',
height=3, aspect=1.3, linewidth=2)
g.set_axis_labels('Time', 'Signal')
g.set_titles(col_template='{col_name}')
g.fig.suptitle('Subject Time Series (relplot col_wrap=3)', y=1.02, fontweight='bold')
g.fig.savefig('relplot_line.png', dpi=120, bbox_inches='tight')
plt.close(g.fig)
print('Saved relplot_line.png')import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='whitegrid')
np.random.seed(1)
n = 200
df = pd.DataFrame({
'x': np.random.randn(n),
'y': np.random.randn(n),
'size_var': np.random.uniform(50, 300, n),
'category': np.random.choice(['A','B','C'], n),
'quality': np.random.choice(['High','Low'], n)
})
g = sns.relplot(data=df, x='x', y='y',
hue='category', size='size_var', style='quality',
palette='Set2', sizes=(20, 300), alpha=0.7,
height=5, aspect=1.2)
g.ax.set_title('Multi-Encoding Scatter: hue + size + style', fontweight='bold')
g.fig.savefig('relplot_multi.png', dpi=120, bbox_inches='tight')
plt.close(g.fig)
print('Saved relplot_multi.png')import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='whitegrid')
np.random.seed(2)
conditions = ['Fast','Slow']
groups = ['Treatment','Control']
rows = []
for cond in conditions:
for grp in groups:
mu = (4 if grp=='Treatment' else 2) + (1 if cond=='Fast' else 0)
t = np.arange(30)
y = mu + np.cumsum(np.random.randn(30)*0.3) + np.sin(t*0.2)
df_part = pd.DataFrame({'time':t,'signal':y,
'condition':cond,'group':grp})
rows.append(df_part)
df = pd.concat(rows, ignore_index=True)
g = sns.relplot(data=df, x='time', y='signal',
row='group', col='condition',
kind='line', palette='Set1', hue='group',
height=3.5, aspect=1.2, linewidth=2)
g.set_axis_labels('Time', 'Signal')
g.set_titles(row_template='{row_name}', col_template='{col_name}')
g.fig.suptitle('Signal by Group Γ Condition (relplot grid)', y=1.02, fontweight='bold')
g.fig.savefig('relplot_grid.png', dpi=120, bbox_inches='tight')
plt.close(g.fig)
print('Saved relplot_grid.png')import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='whitegrid')
np.random.seed(66)
channels = ['Search','Social','Display']
campaigns = ['Brand','Retargeting','Prospecting','Seasonal']
rows = []
for ch in channels:
for camp in campaigns:
n = 40
spend = np.random.uniform(500, 5000, n)
ctr = 0.02 + spend/100000 + np.random.randn(n)*0.005
ctr = np.clip(ctr, 0.001, 0.15)
impr = spend * np.random.uniform(100, 500, n)
for s, c, im in zip(spend, ctr, impr):
rows.append({'channel':ch,'campaign':camp,'spend':s,'ctr':c,'impressions':im})
df = pd.DataFrame(rows)
g = sns.relplot(data=df, x='spend', y='ctr',
col='channel', hue='campaign', style='campaign',
size='impressions', sizes=(20, 200),
palette='tab10', alpha=0.7, height=4, aspect=1.1)
# Overlay regression line for each axis
for ax in g.axes.flat:
if ax is not None:
ch_data = df[df.channel == ax.get_title().strip()]
if len(ch_data):
m, b = np.polyfit(ch_data.spend, ch_data.ctr, 1)
x_line = np.linspace(ch_data.spend.min(), ch_data.spend.max(), 50)
ax.plot(x_line, m*x_line+b, 'r--', linewidth=1.5, alpha=0.8)
g.set_axis_labels('Spend ($)', 'CTR')
g.set_titles(col_template='{col_name}')
g.fig.suptitle('Campaign CTR vs Spend by Channel', y=1.02, fontweight='bold')
g.fig.savefig('campaign_relplot.png', dpi=150, bbox_inches='tight')
plt.close(g.fig)
print('Saved campaign_relplot.png')import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='darkgrid')
fmri = sns.load_dataset('fmri')
# TODO: relplot line, x=timepoint, y=signal, hue=event, col=region
# TODO: save 'fmri_line.png'
# TODO: relplot scatter, same grouping
# TODO: save 'fmri_scatter.png'Go beyond basic heatmaps: annotate with custom formats, apply diverging colormaps for signed values, mask upper triangles, and combine with matplotlib for multi-panel layouts.
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='white')
np.random.seed(0)
n = 8
labels = [f'Var_{i}' for i in range(1, n+1)]
data = np.random.randn(200, n)
for i in range(n):
for j in range(i):
data[:, j] += data[:, i] * np.random.uniform(-0.5, 0.8)
corr = np.corrcoef(data.T)
df_corr = pd.DataFrame(corr, index=labels, columns=labels)
mask = np.triu(np.ones_like(corr, dtype=bool), k=1) # mask upper
fig, ax = plt.subplots(figsize=(8, 7))
sns.heatmap(df_corr, mask=mask, cmap='RdBu_r', vmin=-1, vmax=1,
annot=True, fmt='.2f', linewidths=0.5,
ax=ax, square=True, cbar_kws={'label': 'Pearson r'})
ax.set_title('Lower-Triangle Correlation Matrix', fontweight='bold')
fig.tight_layout()
fig.savefig('corr_heatmap.png', dpi=120, bbox_inches='tight')
plt.close()
print('Saved corr_heatmap.png')import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='white')
np.random.seed(1)
days = ['Mon','Tue','Wed','Thu','Fri','Sat','Sun']
hours = [f'{h:02d}:00' for h in range(7, 23)]
traffic = np.random.poisson(50, (len(hours), len(days)))
traffic[:, 5:] = np.random.poisson(80, (len(hours), 2))
traffic[8:12, :] += 30
traffic[17:20, :] += 40
df = pd.DataFrame(traffic, index=hours, columns=days)
fig, ax = plt.subplots(figsize=(10, 7))
sns.heatmap(df, cmap='YlOrRd', annot=True, fmt='d',
linewidths=0.3, ax=ax,
cbar_kws={'label': 'Page Views', 'shrink': 0.8})
ax.set_title('Website Traffic by Hour & Day', fontweight='bold')
ax.set_xlabel('Day of Week'); ax.set_ylabel('Hour')
fig.tight_layout()
fig.savefig('traffic_heatmap.png', dpi=120, bbox_inches='tight')
plt.close()
print('Saved traffic_heatmap.png')import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='white')
np.random.seed(2)
models = ['LR','RF','XGB','SVM','NN']
metrics = ['Accuracy','Precision','Recall','F1','AUC']
values = np.random.uniform(0.70, 0.98, (len(models), len(metrics)))
df = pd.DataFrame(values, index=models, columns=metrics)
# Custom annotations: highlight best per metric
annot = df.copy().applymap(lambda v: f'{v:.3f}')
best_rows = df.idxmax(axis=0)
highlights = pd.DataFrame('', index=df.index, columns=df.columns)
for metric in metrics:
highlights.loc[best_rows[metric], metric] = f'β
{df.loc[best_rows[metric],metric]:.3f}'
fig, ax = plt.subplots(figsize=(9, 5))
sns.heatmap(df, cmap='Blues', vmin=0.65, vmax=1.0, ax=ax,
annot=df.round(3), fmt='', linewidths=0.5)
ax.set_title('Model Comparison Heatmap (β
= best per metric)', fontweight='bold')
fig.tight_layout()
fig.savefig('model_heatmap.png', dpi=120, bbox_inches='tight')
plt.close()
print('Saved model_heatmap.png')import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='white')
np.random.seed(3)
products = [f'P{i:02d}' for i in range(1, 9)]
kpis = ['Revenue','Margin','Units','Traffic','Conv%','Retention']
changes = np.random.randn(len(products), len(kpis)) * 15
df = pd.DataFrame(changes, index=products, columns=kpis)
fig, ax = plt.subplots(figsize=(9, 6))
sns.heatmap(df, cmap='RdYlGn', center=0,
annot=True, fmt='.1f', linewidths=0.5, ax=ax,
cbar_kws={'label': 'YoY Change (%)'})
ax.set_title('Product KPI Year-over-Year Change (%)', fontweight='bold')
fig.tight_layout()
fig.savefig('diverging_heatmap.png', dpi=120, bbox_inches='tight')
plt.close()
print('Saved diverging_heatmap.png')import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='white')
np.random.seed(42)
classes = ['Cat','Dog','Bird','Fish','Horse']
n = 5
cm = np.array([
[45, 3, 1, 0, 1],
[2, 48, 0, 1, 0],
[1, 0, 42, 3, 2],
[0, 1, 4, 44, 1],
[2, 0, 3, 1, 43]
])
cm_norm = cm.astype(float) / cm.sum(axis=1, keepdims=True)
# Build annotation: count + pct
annot = np.empty_like(cm, dtype=object)
for i in range(n):
for j in range(n):
annot[i,j] = f'{cm[i,j]}
{cm_norm[i,j]*100:.1f}%'
fig, ax = plt.subplots(figsize=(8, 7))
sns.heatmap(cm_norm, annot=annot, fmt='', cmap='Blues',
xticklabels=classes, yticklabels=classes,
vmin=0, vmax=1, linewidths=0.5, ax=ax,
cbar_kws={'label': 'Precision (row-normalized)'})
ax.set_title('5-Class Confusion Matrix', fontweight='bold', fontsize=13)
ax.set_xlabel('Predicted'); ax.set_ylabel('True')
fig.tight_layout()
fig.savefig('confusion_matrix.png', dpi=150, bbox_inches='tight')
plt.close()
print('Saved confusion_matrix.png')import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='white')
np.random.seed(9)
months = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'][:10]
regions = ['North','South','East','West','Central','Pacific']
df = pd.DataFrame(np.random.randint(100,500,(10,6)), index=months, columns=regions)
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))
# TODO: standard heatmap YlGn on ax1, annot=True
# TODO: diverging heatmap RdYlGn centered at mean on ax2
# TODO: titles, tight_layout, save 'sales_heatmap.png'
plt.close()Use PairGrid for full control over diagonal, upper, and lower triangle plots. Map different plot types per region and use hue for group-aware multi-variable comparison.
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='whitegrid')
iris = sns.load_dataset('iris')
g = sns.PairGrid(iris, hue='species', palette='Set2',
vars=['sepal_length','sepal_width','petal_length','petal_width'])
g.map_diag(sns.histplot, kde=True, alpha=0.6)
g.map_offdiag(sns.scatterplot, s=25, alpha=0.6)
g.add_legend(title='Species')
g.fig.suptitle('PairGrid: Histogram Diagonal + Scatter', y=1.01, fontweight='bold')
g.fig.savefig('pairgrid_hist.png', dpi=100, bbox_inches='tight')
plt.close(g.fig)
print('Saved pairgrid_hist.png')import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='whitegrid')
iris = sns.load_dataset('iris')
vars_ = ['sepal_length','sepal_width','petal_length','petal_width']
g = sns.PairGrid(iris, hue='species', palette='Set1', vars=vars_)
g.map_diag(sns.kdeplot, fill=True, alpha=0.4, linewidth=1.5)
g.map_upper(sns.kdeplot, levels=4, warn_singular=False)
g.map_lower(sns.scatterplot, s=20, alpha=0.5)
g.add_legend(title='Species', fontsize=9)
g.fig.suptitle('PairGrid: KDE + Scatter Split', y=1.01, fontweight='bold')
g.fig.savefig('pairgrid_split.png', dpi=100, bbox_inches='tight')
plt.close(g.fig)
print('Saved pairgrid_split.png')import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='whitegrid')
iris = sns.load_dataset('iris')
vars_ = ['sepal_length','petal_length','petal_width']
g = sns.PairGrid(iris, hue='species', palette='Set2', vars=vars_)
def diag_box(x, **kwargs):
ax = plt.gca()
data = kwargs.get('data', x)
groups = x.groupby(level=0) if hasattr(x,'groupby') else None
ax.boxplot([x[x.index == i] for i in x.unique()], vert=True)
g.map_diag(sns.kdeplot, fill=True, alpha=0.5)
g.map_upper(sns.scatterplot, s=20, alpha=0.5)
g.map_lower(sns.regplot, scatter_kws=dict(s=10,alpha=0.4),
line_kws=dict(linewidth=1.5))
g.add_legend()
g.fig.suptitle('PairGrid: KDE / Scatter / Regression', y=1.01, fontweight='bold')
g.fig.savefig('pairgrid_reg.png', dpi=100, bbox_inches='tight')
plt.close(g.fig)
print('Saved pairgrid_reg.png')import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='whitegrid')
np.random.seed(0)
n = 150
df = pd.DataFrame({
'A': np.random.randn(n),
'B': np.random.randn(n),
'C': np.random.randn(n),
'group': np.random.choice(['X','Y'], n)
})
df['B'] += df['A'] * 0.7
df['C'] = df['A'] * (-0.5) + np.random.randn(n) * 0.7
def corrfunc(x, y, **kwargs):
r = np.corrcoef(x, y)[0,1]
ax = plt.gca()
ax.annotate(f'r = {r:.2f}', xy=(0.5,0.5), xycoords='axes fraction',
ha='center', va='center', fontsize=12,
color='darkred' if abs(r) > 0.4 else 'gray',
fontweight='bold' if abs(r) > 0.4 else 'normal')
g = sns.PairGrid(df[['A','B','C','group']], hue='group', palette='Set1',
vars=['A','B','C'])
g.map_diag(sns.kdeplot, fill=True, alpha=0.4)
g.map_lower(sns.scatterplot, s=20, alpha=0.5)
g.map_upper(corrfunc)
g.add_legend()
g.fig.suptitle('PairGrid: Lower=Scatter, Upper=Correlation', y=1.01, fontweight='bold')
g.fig.savefig('pairgrid_corr.png', dpi=100, bbox_inches='tight')
plt.close(g.fig)
print('Saved pairgrid_corr.png')import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='whitegrid')
penguins = sns.load_dataset('penguins').dropna()
vars_ = ['bill_length_mm','bill_depth_mm','flipper_length_mm','body_mass_g']
def upper_corr(x, y, **kwargs):
r = np.corrcoef(x, y)[0,1]
ax = plt.gca()
ax.annotate(f'r={r:.2f}', xy=(0.5,0.5), xycoords='axes fraction',
ha='center', va='center', fontsize=10,
color='darkred' if abs(r) > 0.5 else 'gray',
fontweight='bold')
g = sns.PairGrid(penguins, hue='species', palette='Set2', vars=vars_)
g.map_diag(sns.kdeplot, fill=True, alpha=0.5, linewidth=1.5)
g.map_lower(sns.scatterplot, s=20, alpha=0.5)
g.map_upper(upper_corr)
g.add_legend(title='Species')
g.fig.suptitle('Penguins PairGrid EDA', y=1.01, fontweight='bold')
g.fig.savefig('penguins_pairgrid.png', dpi=120, bbox_inches='tight')
plt.close(g.fig)
print('Saved penguins_pairgrid.png')import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='whitegrid')
mpg = sns.load_dataset('mpg').dropna()
vars_ = ['mpg','horsepower','weight','acceleration']
g = sns.PairGrid(mpg, hue='origin', palette='tab10', vars=vars_)
# TODO: map_diag histplot
# TODO: map_upper scatterplot
# TODO: map_lower regplot (scatter_kws, line_kws)
# TODO: add_legend, suptitle
# TODO: save 'mpg_pairgrid.png'Use residplot() for visual residual checks, lmplot() for grouped regression, and regplot() with custom order for polynomial fits. Combine with matplotlib for full diagnostic panels.
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='whitegrid')
np.random.seed(0)
n = 150
x = np.linspace(0, 10, n)
y_linear = 2*x + 1 + np.random.randn(n)*2
y_nonlin = 2*x + 0.3*x**2 + np.random.randn(n)*3
df = pd.DataFrame({'x':x,'y_linear':y_linear,'y_nonlin':y_nonlin})
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))
sns.residplot(data=df, x='x', y='y_linear', lowess=True,
scatter_kws=dict(alpha=0.5, s=20),
line_kws=dict(color='red', linewidth=2), ax=ax1)
ax1.set_title('Residuals: Linear Data (good fit)')
ax1.axhline(0, color='gray', linestyle='--', linewidth=1)
sns.residplot(data=df, x='x', y='y_nonlin', lowess=True,
scatter_kws=dict(alpha=0.5, s=20),
line_kws=dict(color='red', linewidth=2), ax=ax2)
ax2.set_title('Residuals: Nonlinear Data (pattern visible)')
ax2.axhline(0, color='gray', linestyle='--', linewidth=1)
fig.suptitle('residplot β Lowess Smoother', fontweight='bold')
fig.tight_layout()
fig.savefig('residplot.png', dpi=120, bbox_inches='tight')
plt.close()
print('Saved residplot.png')import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='whitegrid')
tips = sns.load_dataset('tips')
g = sns.lmplot(data=tips, x='total_bill', y='tip',
hue='sex', palette='Set1',
scatter_kws=dict(s=30, alpha=0.6),
line_kws=dict(linewidth=2),
height=5, aspect=1.3)
g.set_axis_labels('Total Bill ($)', 'Tip ($)')
g.fig.suptitle('lmplot: Regression by Sex', y=1.02, fontweight='bold')
g.fig.savefig('lmplot_hue.png', dpi=120, bbox_inches='tight')
plt.close(g.fig)
print('Saved lmplot_hue.png')import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='whitegrid')
tips = sns.load_dataset('tips')
g = sns.lmplot(data=tips, x='total_bill', y='tip',
col='time', hue='smoker',
palette='Set2',
scatter_kws=dict(s=30, alpha=0.6),
height=4, aspect=1.1)
g.set_axis_labels('Total Bill ($)', 'Tip ($)')
g.set_titles(col_template='{col_name}')
g.fig.suptitle('lmplot: Regression by Time and Smoker', y=1.02, fontweight='bold')
g.fig.savefig('lmplot_col.png', dpi=120, bbox_inches='tight')
plt.close(g.fig)
print('Saved lmplot_col.png')import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='whitegrid')
np.random.seed(1)
x = np.linspace(-3, 3, 100)
y = 2*x**3 - x**2 + x + np.random.randn(100)*3
df = pd.DataFrame({'x':x,'y':y})
fig, axes = plt.subplots(1, 3, figsize=(13, 4))
for ax, order, title in zip(axes, [1,2,3], ['Linear (order=1)','Quadratic (order=2)','Cubic (order=3)']):
sns.regplot(data=df, x='x', y='y', order=order,
scatter_kws=dict(s=15, alpha=0.5),
line_kws=dict(linewidth=2, color='tomato'),
ax=ax)
ax.set_title(title)
ax.grid(True, alpha=0.3)
fig.suptitle('Polynomial Regression Orders with regplot', fontweight='bold')
fig.tight_layout()
fig.savefig('poly_regplot.png', dpi=120, bbox_inches='tight')
plt.close()
print('Saved poly_regplot.png')import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='whitegrid')
np.random.seed(33)
hoods = ['Downtown','Suburbs','Rural']
rows = []
for hood in hoods:
base = {'Downtown':300,'Suburbs':200,'Rural':120}[hood]
n = 80
size = np.random.uniform(50, 250, n)
price = base + 1.2*size + np.random.randn(n)*30
rows.append(pd.DataFrame({'size_sqm':size,'price_k':price,'neighborhood':hood}))
df = pd.concat(rows, ignore_index=True)
g = sns.lmplot(data=df, x='size_sqm', y='price_k', col='neighborhood',
hue='neighborhood', palette='Set2',
scatter_kws=dict(s=25, alpha=0.6),
line_kws=dict(linewidth=2),
height=4, aspect=1.0, legend=False)
g.set_axis_labels('Size (sqm)', 'Price ($K)')
g.set_titles(col_template='{col_name}')
g.fig.suptitle('Housing Price Regression by Neighborhood', y=1.02, fontweight='bold')
g.fig.savefig('housing_regression.png', dpi=150, bbox_inches='tight')
plt.close(g.fig)
print('Saved housing_regression.png')import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='whitegrid')
np.random.seed(4)
x = np.linspace(0, 10, 100)
y = np.sin(x) + np.random.randn(100)*0.4
df = pd.DataFrame({'x':x,'y':y})
fig, axes = plt.subplots(1, 3, figsize=(14, 4))
# TODO: regplot order=1 on axes[0]
# TODO: regplot order=3 on axes[1]
# TODO: residplot order=3 on axes[2]
fig.suptitle('Regression & Residuals: sin(x)', fontweight='bold')
fig.tight_layout()
fig.savefig('sin_regression.png', dpi=120, bbox_inches='tight')
plt.close()
print('Saved sin_regression.png')Combine Seaborn plots with raw matplotlib artists: add reference lines, spans, custom patches, secondary axes, and annotations on top of seaborn outputs.
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='whitegrid')
np.random.seed(0)
df = pd.DataFrame({
'score': np.random.normal(72, 15, 200),
'group': np.random.choice(['A','B','C','D'], 200)
})
fig, ax = plt.subplots(figsize=(9, 5))
sns.boxplot(data=df, x='group', y='score', palette='Set2', ax=ax)
# Matplotlib overlays
ax.axhline(70, color='red', linestyle='--', linewidth=2, label='Target')
ax.axhspan(0, 60, color='red', alpha=0.07, label='Fail zone')
ax.axhspan(90, 100, color='green', alpha=0.07, label='Excellent')
ax.set_title('Test Scores by Group with Reference Lines', fontweight='bold')
ax.legend()
ax.set_ylim(20, 110)
fig.tight_layout()
fig.savefig('sns_mpl_lines.png', dpi=120, bbox_inches='tight')
plt.close()
print('Saved sns_mpl_lines.png')import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='whitegrid')
np.random.seed(1)
df = pd.DataFrame({
'revenue': np.concatenate([np.random.normal(100, 20, 95), [220, 230, 240, 10, 5]]),
'region': np.random.choice(['East','West'], 100)
})
fig, ax = plt.subplots(figsize=(9, 5))
sns.stripplot(data=df, x='region', y='revenue', jitter=True,
palette='Set2', size=6, alpha=0.7, ax=ax)
# Annotate extreme outliers
q1, q3 = df.revenue.quantile([0.25, 0.75])
iqr = q3 - q1
outliers = df[df.revenue > q3 + 1.5*iqr]
for _, row in outliers.iterrows():
x_jit = {'East':0,'West':1}[row.region] + np.random.uniform(-0.1,0.1)
ax.annotate(f'${row.revenue:.0f}K',
xy=(x_jit, row.revenue),
xytext=(x_jit+0.2, row.revenue),
fontsize=8, color='darkred',
arrowprops=dict(arrowstyle='->', color='darkred', lw=1))
ax.axhline(q3+1.5*iqr, color='red', linestyle='--', linewidth=1.2, label='Upper fence')
ax.set_title('Revenue Distribution with Outlier Annotations', fontweight='bold')
ax.legend()
fig.tight_layout()
fig.savefig('sns_annotate_outliers.png', dpi=120, bbox_inches='tight')
plt.close()
print('Saved sns_annotate_outliers.png')import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import matplotlib.patches as mpatches
sns.set_theme(style='white')
np.random.seed(2)
data = np.random.randn(8, 8)
data[2:4, 5:7] += 3 # hot cluster
fig, ax = plt.subplots(figsize=(7, 6))
sns.heatmap(data, cmap='RdBu_r', center=0, ax=ax,
annot=True, fmt='.1f', linewidths=0.3)
# Highlight cluster with rectangle
rect = mpatches.Rectangle((5, 2), 2, 2,
linewidth=3, edgecolor='gold', facecolor='none',
transform=ax.transData)
ax.add_patch(rect)
ax.text(6.5, 1.7, 'Hot cluster', ha='center', fontsize=10,
color='gold', fontweight='bold')
ax.set_title('Heatmap with Cluster Highlight', fontweight='bold')
fig.tight_layout()
fig.savefig('sns_heatmap_patch.png', dpi=120, bbox_inches='tight')
plt.close()
print('Saved sns_heatmap_patch.png')import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='whitegrid')
np.random.seed(3)
months = list(range(1, 13))
df = pd.DataFrame({
'month': months,
'revenue': [80+i*5+np.random.randn()*4 for i in range(12)],
'users': [1000+i*80+np.random.randn()*50 for i in range(12)]
})
fig, ax1 = plt.subplots(figsize=(10, 5))
ax2 = ax1.twinx()
sns.lineplot(data=df, x='month', y='revenue', color='steelblue',
linewidth=2, marker='o', label='Revenue ($K)', ax=ax1)
sns.lineplot(data=df, x='month', y='users', color='tomato',
linewidth=2, marker='s', linestyle='--', label='Users', ax=ax2)
ax1.set_ylabel('Revenue ($K)', color='steelblue')
ax2.set_ylabel('Users', color='tomato')
ax1.tick_params(axis='y', labelcolor='steelblue')
ax2.tick_params(axis='y', labelcolor='tomato')
ax1.set_xlabel('Month')
lines1, lab1 = ax1.get_legend_handles_labels()
lines2, lab2 = ax2.get_legend_handles_labels()
ax1.legend(lines1+lines2, lab1+lab2, loc='upper left')
ax1.get_legend().remove() if ax2.get_legend() else None
ax1.set_title('Revenue & Users (seaborn + twinx)', fontweight='bold')
fig.tight_layout()
fig.savefig('sns_twinx.png', dpi=120, bbox_inches='tight')
plt.close()
print('Saved sns_twinx.png')import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='whitegrid')
np.random.seed(88)
months = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']
revenue = [120+i*8+np.random.randn()*10 for i in range(12)]
conv_rate = [3.2+i*0.1+np.random.randn()*0.2 for i in range(12)]
target_rev = 160
df = pd.DataFrame({'month':months,'revenue':revenue,'conv_rate':conv_rate})
fig, ax1 = plt.subplots(figsize=(12, 6))
ax2 = ax1.twinx()
bar_colors = ['seagreen' if r >= target_rev else 'steelblue' for r in revenue]
bars = ax1.bar(months, revenue, color=bar_colors, alpha=0.7, width=0.6)
ax2.plot(months, conv_rate, 'ro-', linewidth=2, markersize=6, label='Conv Rate (%)')
ax1.axhline(target_rev, color='green', linestyle='--', linewidth=2, label='Revenue target')
ax1.axhspan(target_rev, max(revenue)*1.05, color='green', alpha=0.05)
# Annotate exceeding months
for i,(m,r) in enumerate(zip(months,revenue)):
if r >= target_rev:
ax1.text(i, r+3, 'β
', ha='center', fontsize=14, color='gold')
ax1.set_ylabel('Revenue ($K)', color='steelblue')
ax2.set_ylabel('Conversion Rate (%)', color='tomato')
ax1.tick_params(axis='y', labelcolor='steelblue')
ax2.tick_params(axis='y', labelcolor='tomato')
lines1, lab1 = ax1.get_legend_handles_labels()
lines2, lab2 = ax2.get_legend_handles_labels()
ax1.legend(lines1+lines2, lab1+lab2, loc='upper left')
ax1.set_title('Marketing KPI: Revenue vs Conversion Rate', fontweight='bold', fontsize=13)
fig.tight_layout()
fig.savefig('marketing_kpi.png', dpi=150, bbox_inches='tight')
plt.close()
print('Saved marketing_kpi.png')import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='whitegrid')
np.random.seed(5)
subjects = ['Math','Science','English','History']
df = pd.DataFrame({
'score': np.concatenate([np.random.normal(m, 15, 80) for m in [65,70,75,60]]).clip(0,100),
'subject': np.repeat(subjects, 80)
})
fig, ax = plt.subplots(figsize=(9, 5))
# TODO: violinplot
# TODO: axhline at 60 (passing)
# TODO: axhspan 85-100 (distinction zone)
# TODO: annotate highest median subject
# TODO: save 'exam_violin.png'
plt.close()Practice EDA workflows on seaborn's built-in datasets: titanic, penguins, diamonds, mpg, and fmri. Apply multiple plot types to reveal patterns, relationships, and anomalies.
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='whitegrid')
titanic = sns.load_dataset('titanic')
fig, axes = plt.subplots(2, 2, figsize=(12, 9))
# Survival by class
sns.barplot(data=titanic, x='class', y='survived', hue='sex',
palette='Set1', capsize=0.08, ax=axes[0,0])
axes[0,0].set_title('Survival Rate by Class & Sex')
axes[0,0].set_ylim(0,1)
# Age distribution by survival
sns.kdeplot(data=titanic.dropna(subset=['age']), x='age', hue='survived',
fill=True, alpha=0.4, palette='Set2', ax=axes[0,1])
axes[0,1].set_title('Age Distribution by Survival')
# Fare vs survival (box)
sns.boxplot(data=titanic, x='class', y='fare', hue='survived',
palette='pastel', ax=axes[1,0])
axes[1,0].set_title('Fare Distribution by Class & Survival')
# Count by embarkation port
sns.countplot(data=titanic, x='embarked', hue='survived',
palette='Set2', ax=axes[1,1])
axes[1,1].set_title('Counts by Port of Embarkation')
fig.suptitle('Titanic Dataset β EDA Dashboard', fontweight='bold', fontsize=13)
fig.tight_layout()
fig.savefig('titanic_eda.png', dpi=120, bbox_inches='tight')
plt.close()
print('Saved titanic_eda.png')import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='whitegrid')
diamonds = sns.load_dataset('diamonds')
sample = diamonds.sample(2000, random_state=42)
fig, axes = plt.subplots(1, 3, figsize=(14, 5))
# Price by cut
sns.violinplot(data=sample, x='cut', y='price', palette='Set2',
inner='quartile', ax=axes[0])
axes[0].set_title('Price by Cut')
# Carat vs price scatter
sns.scatterplot(data=sample, x='carat', y='price', hue='color',
palette='RdYlGn', s=15, alpha=0.5, ax=axes[1])
axes[1].set_title('Carat vs Price by Color')
# Correlation heatmap of numeric cols
corr = sample[['carat','depth','table','price','x','y','z']].corr()
sns.heatmap(corr, annot=True, fmt='.2f', cmap='coolwarm',
center=0, linewidths=0.5, ax=axes[2])
axes[2].set_title('Feature Correlation')
fig.suptitle('Diamonds EDA', fontweight='bold', fontsize=13)
fig.tight_layout()
fig.savefig('diamonds_eda.png', dpi=120, bbox_inches='tight')
plt.close()
print('Saved diamonds_eda.png')import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='whitegrid')
penguins = sns.load_dataset('penguins').dropna()
fig, axes = plt.subplots(2, 2, figsize=(12, 9))
sns.scatterplot(data=penguins, x='bill_length_mm', y='bill_depth_mm',
hue='species', style='island', s=60, alpha=0.7, ax=axes[0,0])
axes[0,0].set_title('Bill Length vs Depth by Species & Island')
sns.violinplot(data=penguins, x='species', y='body_mass_g',
hue='sex', split=True, palette='Set2', inner='box',
ax=axes[0,1])
axes[0,1].set_title('Body Mass by Species & Sex')
sns.ecdfplot(data=penguins, x='flipper_length_mm', hue='species',
palette='Set1', linewidth=2, ax=axes[1,0])
axes[1,0].set_title('Flipper Length ECDF by Species')
counts = penguins.groupby(['species','island']).size().reset_index(name='count')
sns.barplot(data=counts, x='species', y='count', hue='island',
palette='pastel', ax=axes[1,1])
axes[1,1].set_title('Population by Species & Island')
fig.suptitle('Penguins Complete EDA', fontweight='bold', fontsize=13)
fig.tight_layout()
fig.savefig('penguins_eda.png', dpi=120, bbox_inches='tight')
plt.close()
print('Saved penguins_eda.png')import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='whitegrid')
mpg = sns.load_dataset('mpg').dropna()
fig, axes = plt.subplots(1, 3, figsize=(14, 5))
# MPG trend by model year and origin
sns.lineplot(data=mpg, x='model_year', y='mpg', hue='origin',
palette='Set1', linewidth=2, markers=True, ax=axes[0])
axes[0].set_title('MPG by Year & Origin')
# Weight vs MPG regression by cylinders
cyl_order = sorted(mpg['cylinders'].unique())
pal = sns.color_palette('coolwarm', len(cyl_order))
for i, cyl in enumerate(cyl_order):
sub = mpg[mpg.cylinders == cyl]
axes[1].scatter(sub.weight, sub.mpg, s=20, alpha=0.5, color=pal[i], label=f'{cyl}cyl')
axes[1].set_xlabel('Weight (lbs)'); axes[1].set_ylabel('MPG')
axes[1].legend(title='Cylinders', fontsize=8); axes[1].set_title('Weight vs MPG by Cylinders')
# Horsepower distribution by origin
sns.boxplot(data=mpg, x='origin', y='horsepower', palette='Set2', ax=axes[2])
axes[2].set_title('Horsepower by Origin')
fig.suptitle('MPG Dataset β Fuel Efficiency EDA', fontweight='bold', fontsize=13)
fig.tight_layout()
fig.savefig('mpg_eda.png', dpi=120, bbox_inches='tight')
plt.close()
print('Saved mpg_eda.png')import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='whitegrid')
diamonds = sns.load_dataset('diamonds')
sample = diamonds.sample(3000, random_state=7)
cut_order = ['Fair','Good','Very Good','Premium','Ideal']
fig, axes = plt.subplots(2, 2, figsize=(13, 10))
sns.violinplot(data=sample, x='cut', y='price', palette='YlOrRd',
inner='quartile', order=cut_order, ax=axes[0,0])
axes[0,0].set_title('Price by Cut', fontweight='bold')
axes[0,0].set_xlabel('Cut'); axes[0,0].set_ylabel('Price ($)')
clarity_order = ['I1','SI2','SI1','VS2','VS1','VVS2','VVS1','IF']
sns.scatterplot(data=sample, x='carat', y='price', hue='clarity',
palette='RdYlGn', s=15, alpha=0.5,
hue_order=clarity_order, ax=axes[0,1])
axes[0,1].set_title('Carat vs Price by Clarity', fontweight='bold')
sns.ecdfplot(data=sample, x='price', hue='cut', palette='YlOrRd',
linewidth=2, hue_order=cut_order, ax=axes[1,0])
axes[1,0].set_title('Price ECDF by Cut', fontweight='bold')
axes[1,0].set_xlabel('Price ($)')
pivot = diamonds.groupby(['cut','color'])['price'].median().unstack()
pivot = pivot.loc[cut_order]
sns.heatmap(pivot, cmap='YlOrRd', annot=True, fmt='.0f',
linewidths=0.3, ax=axes[1,1])
axes[1,1].set_title('Median Price: Cut Γ Color', fontweight='bold')
fig.suptitle('Diamond Price Analysis β Complete Story', fontweight='bold', fontsize=14)
fig.tight_layout()
fig.savefig('diamond_story.png', dpi=150, bbox_inches='tight')
plt.close()
print('Saved diamond_story.png')import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='whitegrid')
mpg = sns.load_dataset('mpg').dropna()
# Pairplot separately (it creates its own figure)
g = sns.pairplot(mpg[['mpg','horsepower','weight','origin']],
hue='origin', palette='Set2', plot_kws=dict(s=15, alpha=0.5))
g.fig.suptitle('MPG Pairplot', y=1.01)
g.fig.savefig('mpg_pairplot.png', dpi=80, bbox_inches='tight')
plt.close(g.fig)
fig, axes = plt.subplots(1, 3, figsize=(15, 5))
# TODO: boxplot mpg by cylinders
# TODO: lineplot mean mpg by model_year, hue=origin
# TODO: scatterplot weight vs mpg, hue=origin, regplot overlay
fig.tight_layout()
fig.savefig('mpg_dashboard.png', dpi=120, bbox_inches='tight')
plt.close()
print('Saved mpg_dashboard.png')Use the modern Seaborn Figure API: create a Figure object, add Subplots with share axes, and chain .plot() calls. Use Figure.save() and Figure.show(). Available in Seaborn 0.12+.
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn.objects as so
sns.set_theme(style='whitegrid')
tips = sns.load_dataset('tips')
p = (
so.Plot(tips, x='total_bill', y='tip', color='sex')
.add(so.Dot(alpha=0.6, pointsize=5))
.add(so.Line(), so.PolyFit(order=1))
.label(x='Total Bill ($)', y='Tip ($)', color='Sex',
title='Tips: Scatter + Regression (objects API)')
)
p.save('so_scatter.png', dpi=120, bbox_inches='tight')
print('Saved so_scatter.png')import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn.objects as so
sns.set_theme(style='whitegrid')
tips = sns.load_dataset('tips')
p = (
so.Plot(tips, x='day', y='total_bill', color='sex')
.add(so.Bar(), so.Agg('mean'), so.Dodge())
.label(x='Day', y='Mean Total Bill ($)', color='Sex',
title='Mean Bill by Day & Sex (objects API)')
)
p.save('so_bar.png', dpi=120, bbox_inches='tight')
print('Saved so_bar.png')import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn.objects as so
sns.set_theme(style='whitegrid')
iris = sns.load_dataset('iris')
p = (
so.Plot(iris, x='petal_length', color='species')
.add(so.Bars(), so.Hist(binwidth=0.3), so.Norm('percent'))
.add(so.Line(), so.KDE())
.label(x='Petal Length (cm)', y='Percent',
title='Petal Length Distribution (objects API)')
)
p.save('so_hist.png', dpi=120, bbox_inches='tight')
print('Saved so_hist.png')import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn.objects as so
sns.set_theme(style='whitegrid')
tips = sns.load_dataset('tips')
p = (
so.Plot(tips, x='total_bill', y='tip', color='sex')
.facet(col='time', row='smoker')
.add(so.Dot(alpha=0.5, pointsize=4))
.label(x='Total Bill ($)', y='Tip ($)',
title='Tips: Faceted Grid (objects API)')
.limit(x=(0, 55), y=(0, 11))
)
p.save('so_faceted.png', dpi=120, bbox_inches='tight')
print('Saved so_faceted.png')import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn.objects as so
sns.set_theme(style='whitegrid')
penguins = sns.load_dataset('penguins').dropna()
p1 = (
so.Plot(penguins, x='bill_length_mm', y='bill_depth_mm', color='species')
.add(so.Dot(alpha=0.6, pointsize=5))
.label(x='Bill Length (mm)', y='Bill Depth (mm)',
title='Bill Dimensions by Species')
)
p1.save('so_penguin_scatter.png', dpi=120, bbox_inches='tight')
p2 = (
so.Plot(penguins, x='species', y='body_mass_g', color='sex')
.add(so.Bar(), so.Agg('mean'), so.Dodge())
.label(x='Species', y='Mean Body Mass (g)',
title='Body Mass by Species & Sex')
)
p2.save('so_penguin_bar.png', dpi=120, bbox_inches='tight')
print('Saved so_penguin_scatter.png and so_penguin_bar.png')import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn.objects as so
sns.set_theme(style='whitegrid')
tips = sns.load_dataset('tips')
# TODO: so.Plot with x=total_bill, y=tip, color=day
# TODO: .add(so.Dot) + .add(so.Line, so.PolyFit(order=1))
# TODO: .facet(col='time')
# TODO: .save('so_practice.png', dpi=120)Choose colorblind-safe palettes, use perceptually uniform colormaps, distinguish sequential vs. diverging vs. qualitative palettes, and apply Seaborn color_palette utilities.
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='whitegrid')
np.random.seed(0)
df = pd.DataFrame({
'value': np.random.randn(200),
'group': np.random.choice(list('ABCDE'), 200)
})
qual_palettes = ['Set1','Set2','Set3','tab10','colorblind','deep']
fig, axes = plt.subplots(2, 3, figsize=(14, 8))
for ax, pal in zip(axes.flat, qual_palettes):
sns.boxplot(data=df, x='group', y='value', palette=pal, ax=ax)
ax.set_title(f'palette="{pal}"')
fig.suptitle('Qualitative Palettes Comparison', fontweight='bold', fontsize=13)
fig.tight_layout()
fig.savefig('qual_palettes.png', dpi=120, bbox_inches='tight')
plt.close()
print('Saved qual_palettes.png')import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='white')
np.random.seed(1)
x = y = np.linspace(-3, 3, 60)
X, Y = np.meshgrid(x, y)
Z = np.sin(X) * np.cos(Y)
seq_palettes = ['Blues','YlOrRd','viridis','magma']
fig, axes = plt.subplots(1, 4, figsize=(16, 4))
for ax, pal in zip(axes, seq_palettes):
im = ax.pcolormesh(X, Y, Z, cmap=pal)
fig.colorbar(im, ax=ax, shrink=0.8)
ax.set_title(f'cmap="{pal}"')
ax.set_aspect('equal')
fig.suptitle('Sequential Palettes for Heatmap Data', fontweight='bold')
fig.tight_layout()
fig.savefig('seq_palettes.png', dpi=120, bbox_inches='tight')
plt.close()
print('Saved seq_palettes.png')import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='white')
np.random.seed(2)
data = np.random.randn(10, 8) * 2
df = pd.DataFrame(data,
index=[f'Product {i}' for i in range(1,11)],
columns=[f'Q{q}' for q in range(1,9)])
div_palettes = ['RdBu_r','RdYlGn','coolwarm','PiYG']
fig, axes = plt.subplots(1, 4, figsize=(16, 4))
for ax, pal in zip(axes, div_palettes):
sns.heatmap(df, cmap=pal, center=0, ax=ax,
cbar_kws={'shrink':0.8}, xticklabels=True, yticklabels=False)
ax.set_title(f'cmap="{pal}"')
fig.suptitle('Diverging Palettes for Change Data', fontweight='bold')
fig.tight_layout()
fig.savefig('div_palettes.png', dpi=120, bbox_inches='tight')
plt.close()
print('Saved div_palettes.png')import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='whitegrid')
np.random.seed(3)
df = pd.DataFrame({
'x': np.linspace(0, 10, 80),
'A': np.sin(np.linspace(0, 10, 80)) + np.random.randn(80)*0.2,
'B': np.cos(np.linspace(0, 10, 80)) + np.random.randn(80)*0.2,
'C': np.sin(np.linspace(0, 10, 80)*1.5) + np.random.randn(80)*0.2,
})
fig, axes = plt.subplots(1, 2, figsize=(13, 5))
# Non-safe: red-green confusion
colors_bad = ['#ff0000','#00aa00','#0000ff']
for col, c in zip(['A','B','C'], colors_bad):
axes[0].plot(df.x, df[col], color=c, linewidth=2, label=col,
linestyle='-')
axes[0].set_title('Non-colorblind-safe (avoid red/green)')
axes[0].legend()
# Colorblind-safe (Wong palette) with markers
cb_colors = ['#0072B2','#D55E00','#009E73']
markers = ['o','s','^']
for col, c, m in zip(['A','B','C'], cb_colors, markers):
axes[1].plot(df.x, df[col], color=c, linewidth=2,
label=col, marker=m, markevery=8, markersize=7)
axes[1].set_title('Colorblind-safe (Wong palette + markers)')
axes[1].legend()
for ax in axes:
ax.set_xlabel('x'); ax.set_ylabel('y')
ax.grid(True, alpha=0.3)
fig.suptitle('Accessibility: Color Choice Matters', fontweight='bold')
fig.tight_layout()
fig.savefig('colorblind_safe.png', dpi=120, bbox_inches='tight')
plt.close()
print('Saved colorblind_safe.png')import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='whitegrid')
np.random.seed(42)
df_cat = pd.DataFrame({
'group': np.repeat(['A','B','C','D'],60),
'value': np.concatenate([np.random.normal(m,1.2,60) for m in [3,5,4,6]])
})
df_seq = pd.DataFrame(np.random.rand(8,6)*100,
columns=[f'M{i}' for i in range(1,7)],
index=[f'R{i}' for i in range(1,9)])
df_div = pd.DataFrame(np.random.randn(6,5)*20,
columns=['Q1','Q2','Q3','Q4','Q5'],
index=[f'P{i}' for i in range(1,7)])
fig, axes = plt.subplots(2, 2, figsize=(13, 10))
# Categorical: colorblind + hatch
bars = sns.barplot(data=df_cat, x='group', y='value',
palette='colorblind', capsize=0.1, ax=axes[0,0])
hatches = ['','///','xxx','...']
for bar, hatch in zip(axes[0,0].patches, hatches*10):
bar.set_hatch(hatch)
axes[0,0].set_title('Categorical: colorblind + hatch')
# Sequential heatmap
sns.heatmap(df_seq, cmap='Blues', annot=False, ax=axes[0,1],
cbar_kws={'label':'Value'})
axes[0,1].set_title('Sequential: Blues')
# Diverging heatmap
sns.heatmap(df_div, cmap='RdBu_r', center=0, annot=True, fmt='.0f',
linewidths=0.3, ax=axes[1,0])
axes[1,0].set_title('Diverging: RdBu_r')
# Line with markers
cb_colors = ['#0072B2','#D55E00','#009E73','#CC79A7']
x = np.arange(12)
for i,(col,m) in enumerate(zip(cb_colors,['o','s','^','D'])):
y = np.cumsum(np.random.randn(12)*0.5) + i
axes[1,1].plot(x, y, color=col, marker=m, linewidth=2,
markevery=3, markersize=7, label=f'Series {i+1}')
axes[1,1].legend(fontsize=8); axes[1,1].set_title('Lines: Wong palette + markers')
axes[1,1].grid(True, alpha=0.3)
fig.suptitle('Accessible Dashboard β Colorblind-Safe Design', fontweight='bold', fontsize=13)
fig.tight_layout()
fig.savefig('accessible_seaborn.png', dpi=150, bbox_inches='tight')
plt.close()
print('Saved accessible_seaborn.png')import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sns.set_theme(style='whitegrid')
np.random.seed(6)
groups = ['Control','TreatmentA','TreatmentB','TreatmentC']
df = pd.DataFrame({
'value': np.concatenate([np.random.normal(m,1.5,60) for m in [3,4.5,3.8,5.2]]),
'group': np.repeat(groups,60)
})
# TODO: barplot with non-safe red/green palette, save 'color_bad.png'
# TODO: barplot with colorblind palette + stripplot with markers, save 'color_good.png'Combine multiple seaborn plot types, matplotlib GridSpec, annotation overlays, and consistent theming to produce publication-quality, multi-panel analytical dashboards.
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from matplotlib.gridspec import GridSpec
sns.set_theme(style='whitegrid', palette='Set2', font_scale=0.95)
np.random.seed(42)
df = pd.DataFrame({
'revenue': np.random.lognormal(5, 0.5, 300),
'spend': np.random.uniform(1000, 10000, 300),
'channel': np.random.choice(['Search','Social','Email','Direct'], 300),
'month': np.random.choice(range(1,13), 300)
})
df['revenue'] = df['spend'] * np.random.uniform(0.1, 0.5, 300) + np.random.randn(300)*50
fig = plt.figure(figsize=(14, 9))
gs = GridSpec(2, 3, figure=fig, hspace=0.4, wspace=0.35)
ax1 = fig.add_subplot(gs[0, :2])
ax2 = fig.add_subplot(gs[0, 2])
ax3 = fig.add_subplot(gs[1, :])
sns.scatterplot(data=df, x='spend', y='revenue', hue='channel',
style='channel', s=50, alpha=0.6, ax=ax1)
ax1.set_title('Revenue vs Spend by Channel', fontweight='bold')
sns.violinplot(data=df, x='channel', y='revenue', palette='Set2',
inner='quartile', ax=ax2)
ax2.set_title('Revenue Distribution', fontweight='bold')
ax2.set_xticklabels(ax2.get_xticklabels(), rotation=20, ha='right', fontsize=8)
month_agg = df.groupby(['month','channel'])['revenue'].mean().reset_index()
sns.lineplot(data=month_agg, x='month', y='revenue', hue='channel',
palette='Set2', linewidth=2, markers=True, ax=ax3)
ax3.set_title('Monthly Average Revenue by Channel', fontweight='bold')
ax3.set_xlabel('Month')
fig.suptitle('Marketing Revenue Dashboard', fontsize=14, fontweight='bold')
fig.savefig('sns_dashboard.png', dpi=150, bbox_inches='tight')
plt.close()
print('Saved sns_dashboard.png')import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy import stats
sns.set_theme(style='whitegrid')
np.random.seed(1)
control = np.random.normal(70, 12, 100)
treatment = np.random.normal(75, 11, 100)
t_stat, p_val = stats.ttest_ind(control, treatment)
effect_size = (treatment.mean() - control.mean()) / np.sqrt(
((len(control)-1)*control.std()**2 + (len(treatment)-1)*treatment.std()**2) /
(len(control)+len(treatment)-2))
df = pd.DataFrame({
'score': np.concatenate([control, treatment]),
'group': np.repeat(['Control','Treatment'], 100)
})
fig, axes = plt.subplots(1, 3, figsize=(14, 5))
sns.histplot(data=df, x='score', hue='group', kde=True, alpha=0.4,
palette='Set1', stat='density', common_norm=False, ax=axes[0])
axes[0].set_title('Score Distributions')
sns.violinplot(data=df, x='group', y='score', palette='Set1',
inner='box', ax=axes[1])
axes[1].set_title('Violin Comparison')
sns.ecdfplot(data=df, x='score', hue='group', palette='Set1',
linewidth=2, ax=axes[2])
axes[2].set_title('ECDF Comparison')
for ax in axes:
ax.set_xlabel('Score')
fig.suptitle(f'A/B Test: t={t_stat:.2f}, p={p_val:.4f}, d={effect_size:.2f}',
fontsize=13, fontweight='bold')
fig.tight_layout()
fig.savefig('hypothesis_dashboard.png', dpi=150, bbox_inches='tight')
plt.close()
print('Saved hypothesis_dashboard.png')import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from matplotlib.gridspec import GridSpec
sns.set_theme(style='whitegrid', palette='colorblind')
penguins = sns.load_dataset('penguins').dropna()
fig = plt.figure(figsize=(15, 10), facecolor='white')
gs = GridSpec(3, 3, figure=fig, hspace=0.5, wspace=0.4)
# Banner
ax0 = fig.add_subplot(gs[0, :])
ax0.text(0.5, 0.6, 'Palmer Penguins β Deep Dive EDA',
ha='center', va='center', fontsize=16, fontweight='bold',
transform=ax0.transAxes)
ax0.text(0.5, 0.1, f'n={len(penguins)} penguins | 3 species | 3 islands',
ha='center', va='center', fontsize=11, color='gray',
transform=ax0.transAxes)
ax0.axis('off')
# Scatter bill dimensions
ax1 = fig.add_subplot(gs[1, :2])
sns.scatterplot(data=penguins, x='bill_length_mm', y='bill_depth_mm',
hue='species', style='island', s=60, alpha=0.7, ax=ax1)
ax1.set_title('Bill Dimensions by Species & Island', fontweight='bold')
# Count by species
ax2 = fig.add_subplot(gs[1, 2])
sns.countplot(data=penguins, x='species', hue='sex',
palette='Set2', ax=ax2)
ax2.set_title('Counts by Species & Sex', fontweight='bold')
ax2.set_xlabel('Species')
# Body mass violin
ax3 = fig.add_subplot(gs[2, :2])
sns.violinplot(data=penguins, x='species', y='body_mass_g',
hue='sex', split=True, palette='pastel', inner='box', ax=ax3)
ax3.set_title('Body Mass Distribution', fontweight='bold')
# Flipper ECDF
ax4 = fig.add_subplot(gs[2, 2])
sns.ecdfplot(data=penguins, x='flipper_length_mm', hue='species',
palette='colorblind', linewidth=2, ax=ax4)
ax4.set_title('Flipper Length ECDF', fontweight='bold')
ax4.set_xlabel('Flipper Length (mm)')
fig.savefig('penguins_deep_dive.png', dpi=150, bbox_inches='tight',
facecolor=fig.get_facecolor())
plt.close()
print('Saved penguins_deep_dive.png')import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from matplotlib.gridspec import GridSpec
sns.set_theme(style='darkgrid', palette='Set2')
np.random.seed(77)
n_days = 90
dates = pd.date_range('2024-01-01', periods=n_days)
products = ['Widget','Gadget','Doohickey']
rows = []
for prod in products:
base = np.random.uniform(80, 150)
trend = np.cumsum(np.random.randn(n_days)*2) + base
for d, v in zip(dates, trend):
rows.append({'date':d,'product':prod,'sales':max(0,v),
'returns':max(0,v*0.05+np.random.randn())})
df = pd.DataFrame(rows)
df['month'] = df['date'].dt.month
fig = plt.figure(figsize=(14, 9))
gs = GridSpec(2, 2, figure=fig, hspace=0.4, wspace=0.35)
ax1 = fig.add_subplot(gs[0, :])
sns.lineplot(data=df, x='date', y='sales', hue='product',
palette='Set2', linewidth=2, ax=ax1)
ax1.set_title('Daily Sales Trend by Product', fontweight='bold')
ax1.set_xlabel('Date'); ax1.set_ylabel('Sales ($)')
ax2 = fig.add_subplot(gs[1, 0])
month_df = df.groupby(['month','product'])['sales'].mean().reset_index()
sns.barplot(data=month_df, x='month', y='sales', hue='product',
palette='Set2', ax=ax2)
ax2.set_title('Mean Monthly Sales', fontweight='bold')
ax2.set_xlabel('Month')
ax3 = fig.add_subplot(gs[1, 1])
sns.scatterplot(data=df, x='sales', y='returns', hue='product',
palette='Set2', s=20, alpha=0.4, ax=ax3)
# Add regression per product
for prod, grp in df.groupby('product'):
m,b = np.polyfit(grp.sales, grp.returns, 1)
x_l = np.linspace(grp.sales.min(), grp.sales.max(), 50)
ax3.plot(x_l, m*x_l+b, linewidth=1.5, alpha=0.8)
ax3.set_title('Sales vs Returns', fontweight='bold')
fig.suptitle('Product Analytics Dashboard', fontsize=14, fontweight='bold')
fig.savefig('time_series_dashboard.png', dpi=150, bbox_inches='tight')
plt.close()
print('Saved time_series_dashboard.png')import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from matplotlib.gridspec import GridSpec
sns.set_theme(style='whitegrid', palette='Set2')
diamonds = sns.load_dataset('diamonds')
sample = diamonds.sample(2000, random_state=42)
cut_order = ['Fair','Good','Very Good','Premium','Ideal']
fig = plt.figure(figsize=(14, 11), facecolor='white')
gs = GridSpec(3, 3, figure=fig, hspace=0.5, wspace=0.4)
# Banner
ax0 = fig.add_subplot(gs[0, :])
ax0.text(0.5, 0.6, 'Diamonds Dataset β Comprehensive Report',
ha='center', fontsize=15, fontweight='bold', transform=ax0.transAxes)
ax0.text(0.5, 0.1, f'{len(diamonds):,} diamonds | {diamonds.cut.nunique()} cuts | '
f'Price: ${diamonds.price.min():,}β${diamonds.price.max():,}',
ha='center', fontsize=10, color='gray', transform=ax0.transAxes)
ax0.axis('off')
ax1 = fig.add_subplot(gs[1, :2])
sns.violinplot(data=sample, x='cut', y='price', palette='YlOrRd',
inner='quartile', order=cut_order, ax=ax1)
ax1.set_title('Price by Cut', fontweight='bold')
ax2 = fig.add_subplot(gs[1, 2])
clarity_order = ['I1','SI2','SI1','VS2','VS1','VVS2','VVS1','IF']
sns.scatterplot(data=sample, x='carat', y='price', hue='clarity',
palette='RdYlGn', s=10, alpha=0.4,
hue_order=clarity_order, ax=ax2, legend=False)
ax2.set_title('Carat vs Price', fontweight='bold')
ax3 = fig.add_subplot(gs[2, :2])
sns.ecdfplot(data=sample, x='price', hue='cut', palette='YlOrRd',
linewidth=2, hue_order=cut_order, ax=ax3)
ax3.set_title('Price ECDF by Cut', fontweight='bold')
ax4 = fig.add_subplot(gs[2, 2])
counts = diamonds.groupby(['cut','color']).size().unstack().loc[cut_order]
sns.heatmap(counts, cmap='Blues', annot=True, fmt='d',
linewidths=0.3, ax=ax4, cbar=False)
ax4.set_title('Count: Cut Γ Color', fontweight='bold')
fig.suptitle('Diamond Market Analysis', fontsize=15, fontweight='bold', y=1.01)
fig.savefig('diamonds_report.png', dpi=150, bbox_inches='tight',
facecolor=fig.get_facecolor())
plt.close()
print('Saved diamonds_report.png')import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from matplotlib.gridspec import GridSpec
sns.set_theme(style='whitegrid', palette='Set2')
# Choose your dataset
tips = sns.load_dataset('tips')
fig = plt.figure(figsize=(13, 9))
gs = GridSpec(2, 3, figure=fig, hspace=0.4, wspace=0.35)
# TODO: Panel 1 β spanning top row (gs[0, :])
# TODO: Panel 2 β bottom-left (gs[1, :2])
# TODO: Panel 3 β bottom-right (gs[1, 2])
# Pick 4 different plot types across panels
# TODO: suptitle banner
# TODO: save 'my_sns_dashboard.png' at 150 DPI
plt.close()
print('Dashboard saved!')