Loading Module…

🎨 Seaborn

32 topics • Click any card to expand

1. Setup & Themes

Seaborn works with pandas DataFrames. Set a theme once at the top of your notebook and all plots inherit consistent styling.

sns.set_theme and built-in datasets
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt

# Set global theme β€” do this once at the top
sns.set_theme(style='whitegrid', palette='muted', font_scale=1.1)

# Seaborn ships with practice datasets
tips  = sns.load_dataset('tips')
iris  = sns.load_dataset('iris')
titanic = sns.load_dataset('titanic')

print(tips.head())
print(f"\ntips shape: {tips.shape}")
print(tips.dtypes)
Available styles and palettes
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt

styles   = ['darkgrid','whitegrid','dark','white','ticks']
palettes = ['deep','muted','pastel','bright','dark','colorblind']

fig, axes = plt.subplots(2, 3, figsize=(12, 6))
for ax, style in zip(axes[0], styles[:3]):
    sns.barplot(x=['A','B','C'], y=[3,5,4], ax=ax, palette='deep')
    ax.set_title(f'style={style}')

for ax, pal in zip(axes[1], palettes[:3]):
    sns.barplot(x=['A','B','C'], y=[3,5,4], ax=ax, palette=pal)
    ax.set_title(f'palette={pal}')

plt.tight_layout()
plt.savefig('styles_palettes.png', dpi=80)
plt.close()
Custom color palettes and sns.color_palette
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

# Create and inspect custom palettes
blues   = sns.color_palette("Blues", 6)
custom  = sns.color_palette(["#e74c3c","#3498db","#2ecc71","#f39c12"])
husl    = sns.color_palette("husl", 8)

fig, axes = plt.subplots(1, 3, figsize=(12, 2))
sns.palplot(blues,  ax=axes[0] if hasattr(sns,'palplot') else None)
for ax, (pal, name) in zip(axes, [(blues,'Blues-6'),(custom,'Custom-4'),(husl,'HUSL-8')]):
    for j, c in enumerate(pal):
        ax.add_patch(plt.Rectangle((j, 0), 1, 1, color=c))
    ax.set_xlim(0, len(pal)); ax.set_ylim(0, 1)
    ax.set_xticks([]); ax.set_yticks([])
    ax.set_title(name)

plt.suptitle('Color Palette Examples', y=1.05)
plt.tight_layout()
plt.savefig('palettes_custom.png', dpi=80)
plt.close()
print("Blues palette (RGB):", [tuple(round(v,2) for v in c) for c in blues])
sns.despine and figure-level vs axes-level functions
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

sns.set_theme(style='whitegrid')
tips = sns.load_dataset('tips')

# --- axes-level function: returns an Axes, fits into any subplot grid ---
fig, axes = plt.subplots(1, 2, figsize=(12, 4))

sns.boxplot(data=tips, x='day', y='total_bill', palette='pastel', ax=axes[0])
axes[0].set_title('Axes-level: sns.boxplot (axes[0])')
# sns.despine removes top/right spines for a cleaner look
sns.despine(ax=axes[0], offset=8, trim=True)

# A second axes-level call on the other subplot
sns.stripplot(data=tips, x='day', y='total_bill',
              color='steelblue', alpha=0.4, jitter=True, ax=axes[1])
axes[1].set_title('Axes-level: sns.stripplot (axes[1])')
sns.despine(ax=axes[1])

plt.suptitle('Axes-level functions β€” full subplot control', fontsize=13, y=1.02)
plt.tight_layout()
plt.savefig('setup_despine_axes_level.png', dpi=80)
plt.close()

# --- figure-level function: creates its OWN figure / FacetGrid ---
# You cannot pass ax= to figure-level functions (displot, catplot, relplot, etc.)
g = sns.displot(data=tips, x='total_bill', col='time',
                kind='hist', kde=True, bins=20,
                height=3.5, aspect=1.1, palette='Set2')
g.set_titles('{col_name}')
g.figure.suptitle('Figure-level: sns.displot (owns its own Figure)', y=1.04)
plt.tight_layout()
plt.savefig('setup_despine_figure_level.png', dpi=80)
plt.close()
print("despine + figure-level vs axes-level demo saved.")
💼 Real-World: Setting Up a Project-Wide Visual Style
A data team standardizes all report charts with a single style config at the top of every notebook.
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

# Team-wide style configuration
sns.set_theme(
    style='whitegrid',
    palette='colorblind',   # accessible to color-blind viewers
    font_scale=1.0,
    rc={
        'figure.figsize':    (9, 4),
        'axes.spines.top':   False,
        'axes.spines.right': False,
        'grid.linewidth':    0.5,
    }
)

# Quick sanity-check plot
np.random.seed(42)
df = pd.DataFrame({
    'quarter': ['Q1','Q2','Q3','Q4'] * 3,
    'region':  ['North']*4 + ['South']*4 + ['East']*4,
    'revenue': np.random.uniform(100, 500, 12).round(1),
})

sns.barplot(data=df, x='quarter', y='revenue', hue='region')
plt.title('Revenue by Quarter & Region')
plt.ylabel('Revenue ($K)')
plt.tight_layout()
plt.savefig('rw_setup_themes.png', dpi=80)
plt.close()
🏋️ Practice: Switch Themes and Build a Custom Palette
1) Apply three different Seaborn styles ('darkgrid', 'white', 'ticks') to the same bar chart of [3,7,5,9,4] and save each. 2) Create a custom 5-color palette using hex codes of your choice and pass it to a barplot. 3) Use sns.set_theme with rc overrides to remove top/right spines and set figure size to (8, 3).
Starter Code
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt

values = [3, 7, 5, 9, 4]
labels = ['A', 'B', 'C', 'D', 'E']

# 1. Apply three styles and save each
for style in ['darkgrid', 'white', 'ticks']:
    with sns.axes_style(style):
        fig, ax = plt.subplots(figsize=(6, 3))
        # TODO: sns.barplot(x=labels, y=values, ax=ax, palette='muted')
        # ax.set_title(f'Style: {style}')
        # plt.tight_layout()
        # plt.savefig(f'style_{style}.png', dpi=80)
        # plt.close()
        pass

# 2. Custom 5-color palette
my_colors = ['#e74c3c', '#3498db', '#2ecc71', '#f39c12', '#9b59b6']
# TODO: fig, ax = plt.subplots(figsize=(6, 3))
# TODO: sns.barplot(x=labels, y=values, palette=my_colors, ax=ax)
# TODO: ax.set_title('Custom Palette')
# TODO: plt.tight_layout(); plt.savefig('custom_palette.png', dpi=80); plt.close()

# 3. sns.set_theme with rc overrides
# TODO: sns.set_theme(style='whitegrid', rc={
# TODO:     'figure.figsize': (8, 3),
# TODO:     'axes.spines.top': False,
# TODO:     'axes.spines.right': False,
# TODO: })
# TODO: fig, ax = plt.subplots()
# TODO: sns.barplot(x=labels, y=values, palette='deep', ax=ax)
# TODO: plt.tight_layout(); plt.savefig('rc_override.png', dpi=80); plt.close()
✅ Practice Checklist
2. Distribution Plots

histplot and kdeplot show how data is distributed. displot combines both into a faceted figure-level function.

histplot and kdeplot
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt

sns.set_theme(style='whitegrid')
tips = sns.load_dataset('tips')

fig, axes = plt.subplots(1, 2, figsize=(11, 4))

# Histogram with KDE overlay
sns.histplot(data=tips, x='total_bill', bins=25,
             kde=True, ax=axes[0], color='steelblue')
axes[0].set_title('Total Bill Distribution')

# KDE only β€” compare two groups
sns.kdeplot(data=tips, x='tip', hue='sex',
            fill=True, alpha=0.4, ax=axes[1])
axes[1].set_title('Tip Distribution by Gender')

plt.tight_layout()
plt.savefig('dist_hist_kde.png', dpi=80)
plt.close()
displot β€” faceted distributions
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt

sns.set_theme(style='whitegrid')
tips = sns.load_dataset('tips')

# Separate histogram per day, colored by sex
g = sns.displot(
    data=tips, x='total_bill', hue='sex',
    col='day', kind='hist', kde=True,
    bins=15, height=3.5, aspect=0.9,
    palette='Set2'
)
g.set_titles('{col_name}')
g.set_xlabels('Total Bill ($)')
plt.tight_layout()
plt.savefig('dist_displot.png', dpi=80)
plt.close()
ECDF and rug plot
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt

sns.set_theme(style='whitegrid')
tips = sns.load_dataset('tips')

fig, axes = plt.subplots(1, 2, figsize=(11, 4))

# ECDF β€” empirical cumulative distribution
sns.ecdfplot(data=tips, x='total_bill', hue='time',
             palette='Set1', linewidth=2, ax=axes[0])
axes[0].set_title('ECDF of Total Bill by Meal Time')
axes[0].set_xlabel('Total Bill ($)')

# Rug plot layered under KDE
sns.kdeplot(data=tips, x='total_bill', hue='smoker',
            fill=True, alpha=0.3, ax=axes[1])
sns.rugplot(data=tips, x='total_bill', hue='smoker',
            height=0.06, ax=axes[1])
axes[1].set_title('KDE + Rug: Total Bill by Smoker')

plt.tight_layout()
plt.savefig('dist_ecdf_rug.png', dpi=80)
plt.close()
Overlaid histograms with stat='density' and displot kind='ecdf'
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

np.random.seed(42)
sns.set_theme(style='whitegrid')

# Synthetic spend data for three customer tiers
tiers = {'Bronze': np.random.normal(30, 10, 300),
         'Silver': np.random.normal(70, 15, 200),
         'Gold':   np.random.normal(130, 25, 100)}
rows = []
for tier, vals in tiers.items():
    for v in vals.clip(0):
        rows.append({'tier': tier, 'spend': round(v, 2)})
df = pd.DataFrame(rows)

fig, axes = plt.subplots(1, 2, figsize=(13, 4))

# Left: overlaid histograms normalised to density so groups are comparable
for tier, color in [('Bronze','#cd7f32'),('Silver','#aaa9ad'),('Gold','#ffd700')]:
    subset = df[df['tier'] == tier]
    sns.histplot(data=subset, x='spend', stat='density',
                 bins=25, alpha=0.45, color=color, label=tier,
                 kde=True, ax=axes[0])
axes[0].set_title('Spend Distribution by Tier (stat=density)')
axes[0].set_xlabel('Monthly Spend ($)')
axes[0].legend(title='Tier')

# Right: displot with kind='ecdf' β€” figure-level, drawn into a fresh figure
plt.tight_layout()
plt.savefig('dist_density_overlay.png', dpi=80)
plt.close()

g = sns.displot(data=df, x='spend', hue='tier',
                kind='ecdf', linewidth=2.5,
                palette={'Bronze':'#cd7f32','Silver':'#aaa9ad','Gold':'#ffd700'},
                height=4, aspect=1.6)
g.set_axis_labels('Monthly Spend ($)', 'Proportion')
g.figure.suptitle('ECDF of Spend by Customer Tier (displot kind=ecdf)', y=1.03)
plt.tight_layout()
plt.savefig('dist_ecdf_displot.png', dpi=80)
plt.close()
print("Density overlay and ECDF displot saved.")
💼 Real-World: Customer Spend Distribution by Segment
A marketing analyst compares spend distributions across customer segments to set targeted promotional thresholds.
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

np.random.seed(42)
segments = {
    'VIP':       np.random.normal(350, 80,  200),
    'Regular':   np.random.normal(150, 50,  500),
    'Occasional':np.random.normal(60,  30,  300),
}

rows = []
for seg, vals in segments.items():
    for v in vals.clip(0):
        rows.append({'segment': seg, 'spend': round(v, 2)})
df = pd.DataFrame(rows)

sns.set_theme(style='whitegrid')
fig, ax = plt.subplots(figsize=(10, 4))
sns.kdeplot(data=df, x='spend', hue='segment',
            fill=True, alpha=0.3, linewidth=2,
            palette={'VIP':'#e74c3c','Regular':'#3498db','Occasional':'#2ecc71'})

# Threshold lines
for thresh, label in [(100,'Entry'), (250,'Mid'), (400,'Premium')]:
    ax.axvline(thresh, color='gray', linestyle='--', linewidth=1, alpha=0.7)
    ax.text(thresh+3, ax.get_ylim()[1]*0.9, label, fontsize=8, color='gray')

ax.set_xlabel('Monthly Spend ($)')
ax.set_title('Customer Spend Distribution by Segment')
plt.tight_layout()
plt.savefig('rw_dist_spend.png', dpi=80)
plt.close()
🏋️ Practice: Overlaid KDE for Two Groups
Load the 'tips' dataset. 1) Plot overlaid KDE curves for 'total_bill' split by 'smoker' (fill=True, alpha=0.35). 2) Add vertical lines at each group's median. 3) In a second axes, plot an ECDF of 'tip' split by 'day'. Save both as a single figure.
Starter Code
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

sns.set_theme(style='whitegrid')
tips = sns.load_dataset('tips')

fig, axes = plt.subplots(1, 2, figsize=(12, 4))

# 1. Overlaid KDE for total_bill by smoker
# TODO: sns.kdeplot(data=tips, x='total_bill', hue='smoker',
# TODO:             fill=True, alpha=0.35, ax=axes[0])

# 2. Add vertical lines at each group's median
for smoker_val, color in [('Yes', '#e74c3c'), ('No', '#3498db')]:
    grp = tips[tips['smoker'] == smoker_val]['total_bill']
    # TODO: axes[0].axvline(grp.median(), color=color, linestyle='--', linewidth=1.5,
    # TODO:                 label=f'Median ({smoker_val}): ${grp.median():.1f}')
    pass
# TODO: axes[0].legend(); axes[0].set_title('Total Bill KDE by Smoker')

# 3. ECDF of tip by day
# TODO: sns.ecdfplot(data=tips, x='tip', hue='day', palette='tab10', ax=axes[1])
# TODO: axes[1].set_title('Tip ECDF by Day')

plt.tight_layout()
plt.savefig('practice_dist.png', dpi=80)
plt.close()
print("Saved practice_dist.png")
✅ Practice Checklist
3. Categorical Plots β€” Bar & Count

barplot shows mean Β± CI of a numeric variable by category. countplot shows frequency. Both accept hue for a third dimension.

barplot and countplot
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt

sns.set_theme(style='whitegrid')
tips = sns.load_dataset('tips')

fig, axes = plt.subplots(1, 2, figsize=(11, 4))

# Mean tip by day + confidence interval
sns.barplot(data=tips, x='day', y='tip',
            hue='sex', palette='Set2', ax=axes[0])
axes[0].set_title('Avg Tip by Day & Gender')
axes[0].set_ylabel('Mean Tip ($)')

# Count of meals per day
sns.countplot(data=tips, x='day', hue='time',
              palette='pastel', ax=axes[1])
axes[1].set_title('Meal Count by Day & Time')

plt.tight_layout()
plt.savefig('cat_bar_count.png', dpi=80)
plt.close()
Horizontal bar with order
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd

sns.set_theme(style='whitegrid')
tips = sns.load_dataset('tips')

# Compute mean tip per day β€” sort for readability
order = tips.groupby('day')['tip'].mean().sort_values(ascending=False).index

fig, ax = plt.subplots(figsize=(7, 4))
sns.barplot(data=tips, y='day', x='tip',
            order=order, palette='Blues_d', orient='h', ax=ax)
ax.set_xlabel('Average Tip ($)')
ax.set_title('Average Tip by Day of Week (sorted)')
plt.tight_layout()
plt.savefig('cat_hbar.png', dpi=80)
plt.close()
Grouped bar with error bars and value labels
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

np.random.seed(7)
sns.set_theme(style='whitegrid')

# Simulated quarterly sales by product
products = ['Alpha', 'Beta', 'Gamma']
quarters = ['Q1', 'Q2', 'Q3', 'Q4']
rows = []
for q in quarters:
    for p in products:
        base = {'Alpha': 120, 'Beta': 85, 'Gamma': 160}[p]
        rows.append({'quarter': q, 'product': p,
                     'sales': np.random.normal(base, 15)})
df = pd.DataFrame(rows)

fig, ax = plt.subplots(figsize=(9, 4))
sns.barplot(data=df, x='quarter', y='sales', hue='product',
            palette='Set2', capsize=0.08, ax=ax)

# Value labels on bars
for bar in ax.patches:
    if bar.get_height() > 0:
        ax.text(bar.get_x() + bar.get_width()/2,
                bar.get_height() + 1,
                f'{bar.get_height():.0f}',
                ha='center', va='bottom', fontsize=7)

ax.set_title('Quarterly Sales by Product (mean Β± 95% CI)')
ax.set_ylabel('Sales ($K)')
plt.tight_layout()
plt.savefig('cat_grouped_bar.png', dpi=80)
plt.close()
sns.catplot, dodge=True grouped bars, estimator=median
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

np.random.seed(21)
sns.set_theme(style='whitegrid')
tips = sns.load_dataset('tips')

# --- catplot: figure-level wrapper for all categorical plot kinds ---
# kind='bar' with estimator=median and dodge=True (default) for grouped bars
g = sns.catplot(
    data=tips, x='day', y='total_bill', hue='time',
    kind='bar',
    estimator=np.median,       # use median instead of default mean
    errorbar=('ci', 95),       # 95 % bootstrap CI
    dodge=True,                # bars side-by-side (not stacked)
    palette='Set1',
    capsize=0.10,
    height=4, aspect=1.4,
    order=['Thur', 'Fri', 'Sat', 'Sun']
)
g.set_axis_labels('Day', 'Median Total Bill ($)')
g.set_titles('Tip vs Bill β€” median estimator')
g.figure.suptitle('catplot: Median Total Bill by Day & Meal Time (dodge=True)', y=1.03)
plt.tight_layout()
plt.savefig('cat_catplot_median_dodge.png', dpi=80)
plt.close()

# --- second figure: countplot with dodge to compare two binary variables ---
fig, ax = plt.subplots(figsize=(8, 4))
sns.countplot(data=tips, x='day', hue='smoker',
              dodge=True, palette='Dark2',
              order=['Thur', 'Fri', 'Sat', 'Sun'], ax=ax)
ax.set_title('Diner Count by Day & Smoker Status (dodge=True)')
ax.set_ylabel('Count')
plt.tight_layout()
plt.savefig('cat_count_dodge.png', dpi=80)
plt.close()
print("catplot (median, dodge) and countplot saved.")
💼 Real-World: Product Return Rate by Category
An operations analyst visualizes return rates across product categories to prioritize quality improvements.
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

np.random.seed(7)
categories = ['Electronics','Clothing','Books','Toys','Sports','Home','Beauty']
data = []
for cat in categories:
    base_rate = np.random.uniform(0.03, 0.18)
    for _ in range(200):
        data.append({
            'category': cat,
            'returned': int(np.random.random() < base_rate),
            'channel':  np.random.choice(['Online','In-store'], p=[0.7, 0.3]),
        })
df = pd.DataFrame(data)

order = (df.groupby('category')['returned']
           .mean()
           .sort_values(ascending=False)
           .index.tolist())

sns.set_theme(style='whitegrid')
fig, ax = plt.subplots(figsize=(9, 5))
sns.barplot(data=df, x='category', y='returned',
            hue='channel', order=order,
            palette={'Online':'#3498db','In-store':'#e67e22'},
            capsize=0.08, ax=ax)

ax.axhline(df['returned'].mean(), color='red', linestyle='--',
           linewidth=1.5, label=f"Overall avg: {df['returned'].mean():.1%}")
ax.set_ylabel('Return Rate'); ax.yaxis.set_major_formatter(
    plt.FuncFormatter(lambda x, _: f'{x:.0%}'))
ax.set_xlabel(''); ax.set_title('Product Return Rate by Category & Channel')
ax.legend(); plt.xticks(rotation=20, ha='right')
plt.tight_layout()
plt.savefig('rw_cat_returns.png', dpi=80)
plt.close()
🏋️ Practice: Grouped Bar with Error Bars
Using the 'tips' dataset: 1) Create a grouped barplot of mean 'total_bill' by 'day' with 'time' as hue (Lunch vs Dinner), using capsize=0.1 to show confidence intervals. 2) Sort the x-axis days in calendar order (Thur, Fri, Sat, Sun). 3) Add a horizontal dashed line at the overall mean. Save the figure.
Starter Code
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt

sns.set_theme(style='whitegrid')
tips = sns.load_dataset('tips')

fig, ax = plt.subplots(figsize=(8, 4))

day_order = ['Thur', 'Fri', 'Sat', 'Sun']

# 1 & 2. Grouped barplot sorted by calendar order
# TODO: sns.barplot(data=tips, x='day', y='total_bill', hue='time',
# TODO:             order=day_order, capsize=0.1, palette='Set2', ax=ax)

# 3. Overall mean reference line
overall_mean = tips['total_bill'].mean()
# TODO: ax.axhline(overall_mean, color='red', linestyle='--', linewidth=1.5,
# TODO:            label=f'Overall mean: ${overall_mean:.2f}')

# TODO: ax.set_title('Total Bill by Day and Meal Time')
# TODO: ax.set_ylabel('Mean Total Bill ($)')
# TODO: ax.legend()

plt.tight_layout()
plt.savefig('practice_bar.png', dpi=80)
plt.close()
print("Saved practice_bar.png")
✅ Practice Checklist
4. Box Plot & Violin Plot

Box plots show the 5-number summary (min, Q1, median, Q3, max). Violin plots also show the distribution shape via KDE.

boxplot and violinplot
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt

sns.set_theme(style='whitegrid')
tips = sns.load_dataset('tips')

fig, axes = plt.subplots(1, 2, figsize=(12, 4))

sns.boxplot(data=tips, x='day', y='total_bill',
            hue='time', palette='Set3', ax=axes[0])
axes[0].set_title('Total Bill β€” Box Plot')

sns.violinplot(data=tips, x='day', y='total_bill',
               hue='time', split=True,
               palette='Set2', inner='quartile', ax=axes[1])
axes[1].set_title('Total Bill β€” Violin Plot')

plt.tight_layout()
plt.savefig('cat_box_violin.png', dpi=80)
plt.close()
boxenplot and stripplot overlay
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt

sns.set_theme(style='whitegrid')
tips = sns.load_dataset('tips')

fig, ax = plt.subplots(figsize=(8, 5))

# Letter-value plot (boxenplot) for larger datasets
sns.boxenplot(data=tips, x='day', y='total_bill',
              palette='muted', ax=ax)

# Overlay raw points
sns.stripplot(data=tips, x='day', y='total_bill',
              color='black', size=3, alpha=0.3, ax=ax, jitter=True)

ax.set_title('Total Bill Distribution per Day (boxen + strip)')
ax.set_ylabel('Total Bill ($)')
plt.tight_layout()
plt.savefig('cat_boxen_strip.png', dpi=80)
plt.close()
Side-by-side violin plots with inner box
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

np.random.seed(42)
sns.set_theme(style='whitegrid')
iris = sns.load_dataset('iris')

fig, axes = plt.subplots(1, 2, figsize=(13, 5))

# Left: violin per species for petal_length
sns.violinplot(data=iris, x='species', y='petal_length',
               palette='Set2', inner='box', ax=axes[0])
axes[0].set_title('Petal Length by Species (inner=box)')
axes[0].set_ylabel('Petal Length (cm)')

# Right: layered β€” violin + box + strip
sns.violinplot(data=iris, x='species', y='sepal_width',
               palette='pastel', inner=None, ax=axes[1])
sns.boxplot(data=iris, x='species', y='sepal_width',
            width=0.12, fliersize=0,
            boxprops=dict(facecolor='white', zorder=2), ax=axes[1])
sns.stripplot(data=iris, x='species', y='sepal_width',
              color='black', size=2.5, alpha=0.4, jitter=True, ax=axes[1])
axes[1].set_title('Sepal Width: Violin + Box + Strip')
axes[1].set_ylabel('Sepal Width (cm)')

plt.tight_layout()
plt.savefig('cat_violin_layered.png', dpi=80)
plt.close()
swarmplot and pointplot
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

np.random.seed(7)
sns.set_theme(style='whitegrid')
tips = sns.load_dataset('tips')

fig, axes = plt.subplots(1, 2, figsize=(13, 5))

# Left: swarmplot β€” all points plotted without overlap (small datasets)
sns.swarmplot(data=tips, x='day', y='total_bill',
              hue='sex', dodge=True,
              palette='Set2', size=4, ax=axes[0])
axes[0].set_title('swarmplot: Total Bill by Day & Sex')
axes[0].set_ylabel('Total Bill ($)')
axes[0].legend(title='Sex', loc='upper left')

# Right: pointplot β€” shows mean + CI as a connected dot plot
#   great for showing trends across ordered categories
sns.pointplot(data=tips, x='day', y='tip', hue='sex',
              dodge=0.3, linestyles=['--', '-'],
              markers=['o', 's'], palette='Set1',
              capsize=0.12, errorbar=('ci', 95),
              order=['Thur', 'Fri', 'Sat', 'Sun'],
              ax=axes[1])
axes[1].set_title('pointplot: Mean Tip by Day & Sex (95% CI)')
axes[1].set_ylabel('Mean Tip ($)')
axes[1].legend(title='Sex')

plt.tight_layout()
plt.savefig('cat_swarm_point.png', dpi=80)
plt.close()
print("swarmplot and pointplot saved.")
💼 Real-World: Employee Salary Distribution by Department
An HR analyst compares salary spreads across departments to detect pay equity issues before a compensation review.
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

np.random.seed(42)
depts = {
    'Engineering':  (120000, 25000, 80),
    'Sales':        (85000,  20000, 60),
    'Marketing':    (90000,  18000, 40),
    'HR':           (70000,  12000, 30),
    'Data Science': (130000, 22000, 50),
    'Operations':   (75000,  15000, 55),
}

rows = []
for dept, (mean, std, n) in depts.items():
    salaries = np.random.normal(mean, std, n).clip(50000, 200000)
    for s in salaries:
        rows.append({'dept': dept, 'salary': round(s, -2),
                     'level': np.random.choice(['Junior','Mid','Senior'],
                                               p=[0.3,0.45,0.25])})
df = pd.DataFrame(rows)

order = df.groupby('dept')['salary'].median().sort_values(ascending=False).index

sns.set_theme(style='whitegrid')
fig, ax = plt.subplots(figsize=(11, 5))
sns.violinplot(data=df, x='dept', y='salary', order=order,
               palette='muted', inner='quartile', ax=ax)
sns.stripplot(data=df, x='dept', y='salary', order=order,
              hue='level', palette='dark:black', size=2.5,
              alpha=0.35, dodge=False, ax=ax, legend=False)

ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda x,_: f'${x/1000:.0f}K'))
ax.set_xlabel(''); ax.set_ylabel('Annual Salary')
ax.set_title('Salary Distribution by Department')
plt.xticks(rotation=15, ha='right')
plt.tight_layout()
plt.savefig('rw_cat_salary.png', dpi=80)
plt.close()
🏋️ Practice: Side-by-Side Violin Plots
Using the 'tips' dataset: 1) Create side-by-side violin plots of 'total_bill' for each 'day', split by 'sex' (use split=True and hue='sex'). 2) Set inner='quartile' to show quartile lines inside the violins. 3) In a second subplot, overlay a boxplot (width=0.15) and stripplot on the violin. Save both.
Starter Code
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt

sns.set_theme(style='whitegrid')
tips = sns.load_dataset('tips')

fig, axes = plt.subplots(1, 2, figsize=(13, 5))

# 1 & 2. Split violin by sex with quartile lines
# TODO: sns.violinplot(data=tips, x='day', y='total_bill',
# TODO:                hue='sex', split=True, inner='quartile',
# TODO:                palette='Set2', ax=axes[0])
# TODO: axes[0].set_title('Total Bill by Day & Sex (split violin)')

# 3. Violin + box + strip overlay on axes[1]
# TODO: sns.violinplot(data=tips, x='day', y='total_bill',
# TODO:                palette='pastel', inner=None, ax=axes[1])
# TODO: sns.boxplot(data=tips, x='day', y='total_bill',
# TODO:             width=0.15, fliersize=0,
# TODO:             boxprops=dict(facecolor='white', zorder=2), ax=axes[1])
# TODO: sns.stripplot(data=tips, x='day', y='total_bill',
# TODO:               color='black', size=2.5, alpha=0.35, jitter=True, ax=axes[1])
# TODO: axes[1].set_title('Total Bill: Violin + Box + Strip')

plt.tight_layout()
plt.savefig('practice_violin.png', dpi=80)
plt.close()
print("Saved practice_violin.png")
✅ Practice Checklist
5. Scatter & Regression Plots

scatterplot visualizes two numeric variables. regplot / lmplot adds a regression line with confidence interval automatically.

scatterplot with hue and size
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt

sns.set_theme(style='whitegrid')
tips = sns.load_dataset('tips')

fig, ax = plt.subplots(figsize=(8, 5))
sns.scatterplot(
    data=tips, x='total_bill', y='tip',
    hue='time', size='size',
    sizes=(40, 200), alpha=0.7,
    palette='Set1', ax=ax
)
ax.set_title('Tip vs Total Bill (size = party size)')
ax.set_xlabel('Total Bill ($)'); ax.set_ylabel('Tip ($)')
plt.tight_layout()
plt.savefig('scatter_hue_size.png', dpi=80)
plt.close()
regplot and lmplot
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt

sns.set_theme(style='whitegrid')
tips = sns.load_dataset('tips')

fig, ax = plt.subplots(figsize=(7, 4))

# regplot β€” single regression line
sns.regplot(data=tips, x='total_bill', y='tip',
            scatter_kws=dict(alpha=0.4, s=30),
            line_kws=dict(color='red', linewidth=2), ax=ax)
ax.set_title('regplot: Tip vs Bill')
plt.tight_layout()
plt.savefig('scatter_regplot.png', dpi=80)
plt.close()

# lmplot β€” regression per group (returns FacetGrid)
g = sns.lmplot(data=tips, x='total_bill', y='tip',
               hue='smoker', palette='Set1',
               scatter_kws=dict(alpha=0.4, s=25),
               height=4, aspect=1.4)
g.set_axis_labels('Total Bill ($)', 'Tip ($)')
g.figure.suptitle('lmplot: Tip vs Bill by Smoker', y=1.02)
plt.tight_layout()
plt.savefig('scatter_lmplot.png', dpi=80)
plt.close()
Scatter with hue, size, and style encoding
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

np.random.seed(10)
sns.set_theme(style='whitegrid')

# Synthetic dataset with 3 categorical dimensions
n = 120
df = pd.DataFrame({
    'spend':    np.random.uniform(10, 500, n),
    'revenue':  np.random.uniform(50, 2000, n),
    'channel':  np.random.choice(['Search', 'Social', 'Email'], n),
    'region':   np.random.choice(['North', 'South', 'East'], n),
    'budget':   np.random.uniform(5, 50, n),
})

fig, ax = plt.subplots(figsize=(9, 5))
sns.scatterplot(
    data=df, x='spend', y='revenue',
    hue='channel',    # color
    size='budget',    # marker area
    style='region',   # marker shape
    sizes=(30, 250),
    alpha=0.75,
    palette='tab10',
    ax=ax
)
ax.set_title('Revenue vs Spend β€” hue=channel, size=budget, style=region')
ax.set_xlabel('Ad Spend ($)')
ax.set_ylabel('Revenue ($)')
ax.legend(bbox_to_anchor=(1.01, 1), loc='upper left', fontsize=8)
plt.tight_layout()
plt.savefig('scatter_hue_size_style.png', dpi=80)
plt.close()
sns.relplot with col_wrap and per-hue marker styles
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

np.random.seed(33)
sns.set_theme(style='whitegrid', font_scale=0.9)

# Synthetic multi-channel, multi-region dataset
channels = ['Search', 'Social', 'Email', 'Display', 'Affiliate']
regions  = ['North', 'South', 'East']
rows = []
for ch in channels:
    slope = {'Search': 4.0, 'Social': 2.5, 'Email': 5.5,
             'Display': 1.5, 'Affiliate': 3.2}[ch]
    for region in regions:
        n = 30
        spend = np.random.uniform(200, 8000, n)
        rev   = spend * slope + np.random.randn(n) * spend * 0.25
        for s, r in zip(spend, rev):
            rows.append({'channel': ch, 'region': region,
                         'spend': round(s, 0), 'revenue': round(r, 0)})
df = pd.DataFrame(rows)

# relplot with col=channel, col_wrap=3, hue+style=region
# Each hue level gets a distinct marker AND color automatically
g = sns.relplot(
    data=df, x='spend', y='revenue',
    col='channel', col_wrap=3,
    hue='region', style='region',     # different marker per region
    markers=['o', 's', '^'],          # explicit marker list
    palette='Set2',
    alpha=0.65, s=40,
    height=3, aspect=1.2,
    kind='scatter'
)
g.set_titles('{col_name}')
g.set_axis_labels('Ad Spend ($)', 'Revenue ($)')
g.figure.suptitle('relplot: Spend vs Revenue β€” col_wrap=3, marker per region', y=1.03)
g.add_legend(title='Region')
plt.tight_layout()
plt.savefig('scatter_relplot_colwrap_markers.png', dpi=80)
plt.close()
print("relplot col_wrap + marker styles saved.")
💼 Real-World: Advertising Spend vs Revenue
A marketing data scientist plots ad spend against revenue by channel to compare ROI and fit regression lines.
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

np.random.seed(5)
channels = ['Search','Social','Display','Email']
rows = []
for ch in channels:
    n      = 50
    spend  = np.random.uniform(500, 10000, n)
    slope  = {'Search':4.5,'Social':3.0,'Display':1.8,'Email':6.0}[ch]
    rev    = spend * slope + np.random.randn(n) * spend * 0.3
    for s, r in zip(spend, rev):
        rows.append({'channel':ch,'spend':round(s,0),'revenue':round(r,0)})
df = pd.DataFrame(rows)

sns.set_theme(style='whitegrid')
g = sns.lmplot(
    data=df, x='spend', y='revenue', hue='channel',
    col='channel', col_wrap=2,
    scatter_kws=dict(alpha=0.5, s=25),
    height=3.5, aspect=1.2,
    palette='tab10'
)
g.set_axis_labels('Ad Spend ($)', 'Revenue ($)')
g.set_titles('{col_name}')
g.figure.suptitle('Ad Spend vs Revenue by Channel', y=1.02, fontsize=13)

# Print ROI per channel
print("Estimated ROI (revenue/spend):")
for ch in channels:
    sub = df[df.channel==ch]
    roi = sub.revenue.sum() / sub.spend.sum()
    print(f"  {ch:8s}: {roi:.2f}x")
plt.tight_layout()
plt.savefig('rw_scatter_ads.png', dpi=80)
plt.close()
🏋️ Practice: Scatter with hue, size, and style
Using the 'tips' dataset: 1) Create a scatterplot of 'total_bill' vs 'tip' using hue='day', size='size' (sizes=(30,200)), and style='smoker'. 2) In a second subplot, add a regression line using regplot for smokers vs non-smokers separately (two calls). 3) Label axes clearly. Save as a single wide figure.
Starter Code
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt

sns.set_theme(style='whitegrid')
tips = sns.load_dataset('tips')

fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# 1. Scatter with hue=day, size=size, style=smoker
# TODO: sns.scatterplot(
# TODO:     data=tips, x='total_bill', y='tip',
# TODO:     hue='day', size='size', style='smoker',
# TODO:     sizes=(30, 200), alpha=0.7, palette='tab10', ax=axes[0]
# TODO: )
# TODO: axes[0].set_title('Tip vs Bill β€” day/size/smoker encoded')

# 2. Regression lines for smokers vs non-smokers
for smoker_val, color in [('Yes', '#e74c3c'), ('No', '#3498db')]:
    subset = tips[tips['smoker'] == smoker_val]
    # TODO: sns.regplot(data=subset, x='total_bill', y='tip',
    # TODO:             scatter_kws=dict(alpha=0.3, s=20, color=color),
    # TODO:             line_kws=dict(color=color, linewidth=2, label=f'Smoker={smoker_val}'),
    # TODO:             ax=axes[1])
    pass
# TODO: axes[1].legend(); axes[1].set_title('Regression by Smoker Status')

plt.tight_layout()
plt.savefig('practice_scatter.png', dpi=80)
plt.close()
print("Saved practice_scatter.png")
6. Heatmap

sns.heatmap renders a matrix as color intensities. Ideal for correlation matrices, confusion matrices, and pivot table results.

Correlation heatmap with annotations
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd

sns.set_theme(style='white')
df = sns.load_dataset('tips')

# Select numeric columns
corr = df[['total_bill','tip','size']].corr()

fig, ax = plt.subplots(figsize=(5, 4))
sns.heatmap(corr, annot=True, fmt='.2f', cmap='RdBu_r',
            vmin=-1, vmax=1, linewidths=0.5,
            square=True, ax=ax)
ax.set_title('Feature Correlation Matrix')
plt.tight_layout()
plt.savefig('heatmap_corr.png', dpi=80)
plt.close()
Pivot table heatmap
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

np.random.seed(9)
months = ['Jan','Feb','Mar','Apr','May','Jun']
regions= ['North','South','East','West']
data   = pd.DataFrame({
    'month':   np.tile(months, 4),
    'region':  np.repeat(regions, 6),
    'revenue': np.random.uniform(50, 200, 24).round(1)
})

pivot = data.pivot(index='region', columns='month', values='revenue')

fig, ax = plt.subplots(figsize=(9, 4))
sns.heatmap(pivot, annot=True, fmt='.0f', cmap='YlGnBu',
            linewidths=0.4, ax=ax, cbar_kws=dict(label='Revenue ($K)'))
ax.set_title('Monthly Revenue by Region ($K)')
ax.set_xlabel(''); ax.set_ylabel('')
plt.tight_layout()
plt.savefig('heatmap_pivot.png', dpi=80)
plt.close()
Annotated confusion matrix heatmap
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

np.random.seed(42)
# Simulate a 4-class confusion matrix
classes = ['Cat', 'Dog', 'Bird', 'Fish']
# True labels and predicted labels
true_labels = np.random.choice(classes, 200, p=[0.3, 0.3, 0.2, 0.2])
# Add some misclassification noise
pred_labels = true_labels.copy()
noise_idx = np.random.choice(len(true_labels), 40, replace=False)
pred_labels[noise_idx] = np.random.choice(classes, 40)

# Build confusion matrix manually
cm = pd.crosstab(pd.Series(true_labels, name='Actual'),
                 pd.Series(pred_labels, name='Predicted'))
# Ensure all classes present
cm = cm.reindex(index=classes, columns=classes, fill_value=0)

fig, axes = plt.subplots(1, 2, figsize=(12, 4))

# Raw counts
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            linewidths=0.5, ax=axes[0])
axes[0].set_title('Confusion Matrix (counts)')

# Normalized (recall per class)
cm_norm = cm.div(cm.sum(axis=1), axis=0).round(2)
sns.heatmap(cm_norm, annot=True, fmt='.2f', cmap='YlOrRd',
            vmin=0, vmax=1, linewidths=0.5, ax=axes[1])
axes[1].set_title('Confusion Matrix (row-normalized recall)')

plt.tight_layout()
plt.savefig('heatmap_confusion.png', dpi=80)
plt.close()
sns.clustermap and triangular mask heatmap
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

np.random.seed(42)
sns.set_theme(style='white')

# --- Part 1: lower-triangle heatmap using the mask parameter ---
iris = sns.load_dataset('iris')
corr = iris[['sepal_length','sepal_width','petal_length','petal_width']].corr()

# mask=True hides that cell; np.triu masks upper triangle (keep lower + diagonal)
mask_upper = np.triu(np.ones_like(corr, dtype=bool), k=1)

fig, ax = plt.subplots(figsize=(5, 4))
sns.heatmap(corr, mask=mask_upper, annot=True, fmt='.2f',
            cmap='coolwarm', vmin=-1, vmax=1,
            square=True, linewidths=0.5,
            cbar_kws=dict(shrink=0.7), ax=ax)
ax.set_title('Iris Correlation β€” lower triangle only (mask=triu)')
plt.tight_layout()
plt.savefig('heatmap_triangle_mask.png', dpi=80)
plt.close()

# --- Part 2: clustermap β€” hierarchically clusters rows AND columns ---
np.random.seed(7)
n_genes, n_samples = 20, 12
data = pd.DataFrame(
    np.random.randn(n_genes, n_samples),
    index=[f'Gene_{i:02d}' for i in range(n_genes)],
    columns=[f'S{j:02d}' for j in range(n_samples)]
)
# Add block structure so clustering is visible
data.iloc[:8,  :6]  += 2   # high block top-left
data.iloc[12:, 6:]  -= 2   # low block bottom-right

g = sns.clustermap(
    data,
    cmap='RdBu_r', center=0,
    figsize=(9, 7),
    dendrogram_ratio=(0.12, 0.12),
    cbar_pos=(0.02, 0.85, 0.03, 0.12),
    linewidths=0.3,
    method='ward'
)
g.figure.suptitle('clustermap: Hierarchical Clustering of Gene Expression', y=1.01)
plt.savefig('heatmap_clustermap.png', dpi=80, bbox_inches='tight')
plt.close()
print("Triangle mask heatmap and clustermap saved.")
💼 Real-World: Churn Risk Feature Correlation
A customer success team uses a heatmap to identify which features are most correlated with customer churn.
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

np.random.seed(3)
n = 500
df = pd.DataFrame({
    'tenure_months':    np.random.exponential(24, n).clip(1, 72),
    'monthly_spend':    np.random.normal(120, 40, n).clip(20),
    'support_tickets':  np.random.poisson(2, n),
    'logins_per_week':  np.random.normal(5, 3, n).clip(0),
    'nps_score':        np.random.normal(7, 2, n).clip(1, 10),
    'churned':          np.random.binomial(1, 0.25, n),
})
# Add realistic correlations
df['churned'] = (
    (df['support_tickets'] > 4).astype(int) * 0.4 +
    (df['logins_per_week'] < 2).astype(int) * 0.3 +
    (df['nps_score'] < 5).astype(int) * 0.3 +
    np.random.rand(n) * 0.3
) > 0.5

corr = df.corr(numeric_only=True)

sns.set_theme(style='white')
fig, ax = plt.subplots(figsize=(7, 6))
sns.heatmap(
    corr, annot=True, fmt='.2f', cmap='coolwarm',
    vmin=-1, vmax=1, square=True, linewidths=0.5,
    ax=ax, annot_kws=dict(size=9)
)
ax.set_title('Customer Feature Correlation (churn focus)', pad=12)
plt.tight_layout()
plt.savefig('rw_heatmap_churn.png', dpi=80)
plt.close()
🏋️ Practice: Annotated Correlation Heatmap
Load the 'iris' dataset. 1) Compute the Pearson correlation matrix for all 4 numeric columns. 2) Plot a heatmap with annotations (fmt='.2f'), cmap='coolwarm', vmin=-1, vmax=1, and square=True. 3) Mask the upper triangle so only the lower triangle and diagonal are shown. Save the figure.
Starter Code
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

sns.set_theme(style='white')
iris = sns.load_dataset('iris')

# 1. Compute correlation matrix
# TODO: corr = iris[['sepal_length','sepal_width','petal_length','petal_width']].corr()

# 2. Mask upper triangle
# TODO: mask = np.triu(np.ones_like(corr, dtype=bool))

fig, ax = plt.subplots(figsize=(6, 5))

# 3. Plot annotated heatmap with mask
# TODO: sns.heatmap(corr, annot=True, fmt='.2f', cmap='coolwarm',
# TODO:             vmin=-1, vmax=1, square=True, linewidths=0.5,
# TODO:             mask=mask, ax=ax)
# TODO: ax.set_title('Iris Feature Correlation (lower triangle)')

plt.tight_layout()
plt.savefig('practice_heatmap.png', dpi=80)
plt.close()
print("Saved practice_heatmap.png")
7. Pair Plot

pairplot creates a grid of scatter plots and distributions for all numeric column pairs β€” fast exploratory data analysis.

Basic pairplot with hue
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt

sns.set_theme(style='whitegrid')
iris = sns.load_dataset('iris')

g = sns.pairplot(
    iris, hue='species',
    palette='Set2',
    plot_kws=dict(alpha=0.5, s=25),
    diag_kind='kde'
)
g.figure.suptitle('Iris Dataset β€” Pair Plot', y=1.02)
plt.tight_layout()
plt.savefig('pairplot_iris.png', dpi=80)
plt.close()
pairplot with regression and custom diag
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt

sns.set_theme(style='whitegrid')
tips = sns.load_dataset('tips')
cols = ['total_bill', 'tip', 'size']

g = sns.pairplot(
    tips[cols + ['time']], hue='time',
    kind='reg',          # scatter + regression line
    diag_kind='hist',    # histogram on diagonal
    palette='Set1',
    plot_kws=dict(scatter_kws=dict(alpha=0.3, s=20),
                  line_kws=dict(linewidth=1.5))
)
g.figure.suptitle('Tips Dataset β€” Regression Pair Plot', y=1.02)
plt.tight_layout()
plt.savefig('pairplot_reg.png', dpi=80)
plt.close()
PairGrid with custom upper/lower/diagonal
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt

sns.set_theme(style='whitegrid')
iris = sns.load_dataset('iris')
cols = ['sepal_length', 'sepal_width', 'petal_length']

g = sns.PairGrid(iris[cols + ['species']], hue='species',
                 palette='tab10', diag_sharey=False)

# Upper triangle: scatter
g.map_upper(sns.scatterplot, alpha=0.4, s=20)

# Lower triangle: KDE contours
g.map_lower(sns.kdeplot, fill=True, alpha=0.25, levels=4)

# Diagonal: histogram
g.map_diag(sns.histplot, kde=True, alpha=0.5)

g.add_legend()
g.figure.suptitle('PairGrid: Custom Upper/Lower/Diagonal', y=1.02)
plt.tight_layout()
plt.savefig('pairgrid_custom.png', dpi=80)
plt.close()
Residual Plot & Regression Diagnostics
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np, pandas as pd
from scipy import stats

sns.set_theme(style='whitegrid', font_scale=1.1)
rng = np.random.default_rng(7)

n = 200
df = pd.DataFrame({
    'market_cap':   rng.lognormal(8, 1.5, n),
    'pe_ratio':     rng.lognormal(3, 0.5, n),
    'debt_equity':  rng.exponential(1.2, n),
    'revenue_growth': rng.normal(0.15, 0.12, n),
    'return_1y':    rng.normal(0.08, 0.22, n),
})
# Add mild relationship
df['return_1y'] += 0.03 * np.log(df['market_cap']) - 0.02 * df['pe_ratio'] + rng.normal(0, 0.05, n)

fig, axes = plt.subplots(2, 2, figsize=(12, 10))

# 1. Residual plot
slope, intercept, r, p, se = stats.linregress(df['pe_ratio'], df['return_1y'])
residuals = df['return_1y'] - (slope * df['pe_ratio'] + intercept)
axes[0,0].scatter(df['pe_ratio'], residuals, alpha=0.5, color='steelblue', s=25)
axes[0,0].axhline(0, color='red', ls='--', lw=1.5)
axes[0,0].set(title='Residual Plot (PE vs Return)', xlabel='P/E Ratio', ylabel='Residual')
axes[0,0].text(0.05, 0.95, f'r={r:.3f}', transform=axes[0,0].transAxes,
               va='top', fontsize=10)

# 2. Q-Q plot for normality
(osm, osr), (slope2, intercept2, r2) = stats.probplot(df['return_1y'])
axes[0,1].scatter(osm, osr, alpha=0.5, color='coral', s=20)
axes[0,1].plot(osm, slope2*np.array(osm)+intercept2, 'r-', lw=2)
axes[0,1].set(title='Q-Q Plot: Return (1Y)', xlabel='Theoretical Quantiles', ylabel='Sample Quantiles')

# 3. Log transform effect
axes[1,0].hist(df['market_cap'], bins=30, color='seagreen', alpha=0.7, edgecolor='white')
axes[1,0].set(title='Market Cap (Raw)', xlabel='Market Cap')

axes[1,1].hist(np.log(df['market_cap']), bins=30, color='seagreen', alpha=0.7, edgecolor='white')
axes[1,1].set(title='Market Cap (Log-Transformed)', xlabel='log(Market Cap)')

plt.suptitle('Financial Feature Diagnostics', fontsize=14, y=1.01)
plt.tight_layout()
plt.savefig('financial_diagnostics.png', dpi=100)
plt.show()
💼 Real-World: Financial Feature Exploration Before Modeling
A quant analyst uses pairplot to explore relationships between financial metrics before selecting features for a predictive model.
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

np.random.seed(42)
n = 200
pe    = np.random.lognormal(2.8, 0.5, n)
eps   = np.random.normal(5, 2, n).clip(0.5)
rev_g = np.random.normal(0.12, 0.08, n)
roe   = np.random.normal(0.15, 0.07, n).clip(0)

df = pd.DataFrame({
    'P/E Ratio':   pe.round(2),
    'EPS ($)':     eps.round(2),
    'Rev Growth':  rev_g.round(3),
    'ROE':         roe.round(3),
    'sector':      np.random.choice(['Tech','Finance','Healthcare','Energy'], n),
})

sns.set_theme(style='whitegrid', font_scale=0.85)
g = sns.pairplot(
    df, hue='sector',
    vars=['P/E Ratio','EPS ($)','Rev Growth','ROE'],
    palette='tab10',
    plot_kws=dict(alpha=0.4, s=20),
    diag_kind='kde'
)
g.figure.suptitle('Financial Metrics β€” Pairplot by Sector', y=1.02)
plt.tight_layout()
plt.savefig('rw_pairplot_finance.png', dpi=80)
plt.close()
🏋️ Practice: Pairplot with hue and custom diagonal
Load the 'penguins' dataset (sns.load_dataset('penguins')). Drop NaN rows. 1) Create a pairplot of bill_length_mm, bill_depth_mm, flipper_length_mm, body_mass_g with hue='species', diag_kind='kde', kind='scatter'. 2) Then create a second PairGrid where the upper triangle shows scatterplots and the lower triangle shows KDE contours. Save both.
Starter Code
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt

sns.set_theme(style='whitegrid', font_scale=0.9)
penguins = sns.load_dataset('penguins').dropna()
num_cols = ['bill_length_mm', 'bill_depth_mm', 'flipper_length_mm', 'body_mass_g']

# 1. pairplot with hue='species'
# TODO: g = sns.pairplot(
# TODO:     penguins[num_cols + ['species']], hue='species',
# TODO:     diag_kind='kde', palette='Set2',
# TODO:     plot_kws=dict(alpha=0.4, s=20)
# TODO: )
# TODO: g.figure.suptitle('Penguins Pairplot', y=1.02)
# TODO: plt.tight_layout(); plt.savefig('practice_pairplot.png', dpi=80); plt.close()

# 2. PairGrid: scatter upper, KDE lower
# TODO: g2 = sns.PairGrid(penguins[num_cols + ['species']], hue='species',
# TODO:                   palette='tab10', diag_sharey=False)
# TODO: g2.map_upper(sns.scatterplot, alpha=0.4, s=15)
# TODO: g2.map_lower(sns.kdeplot, fill=True, alpha=0.2, levels=3)
# TODO: g2.map_diag(sns.histplot, kde=True, alpha=0.5)
# TODO: g2.add_legend()
# TODO: g2.figure.suptitle('Penguins PairGrid', y=1.02)
# TODO: plt.tight_layout(); plt.savefig('practice_pairgrid.png', dpi=80); plt.close()

print("Practice pairplot complete")
8. FacetGrid

FacetGrid tiles the same plot across subsets of data defined by row, col, and hue β€” the most powerful Seaborn layout tool.

FacetGrid with map
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt

sns.set_theme(style='whitegrid')
tips = sns.load_dataset('tips')

g = sns.FacetGrid(tips, col='time', row='smoker',
                  height=3, aspect=1.2, margin_titles=True)
g.map_dataframe(sns.histplot, x='total_bill', bins=15, kde=True)
g.set_axis_labels('Total Bill ($)', 'Count')
g.set_titles(row_template='{row_name}', col_template='{col_name}')
g.figure.suptitle('Total Bill by Time & Smoker', y=1.03)
plt.tight_layout()
plt.savefig('facet_grid.png', dpi=80)
plt.close()
catplot β€” figure-level categorical plot
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt

sns.set_theme(style='whitegrid')
tips = sns.load_dataset('tips')

# catplot wraps barplot/boxplot/etc into a FacetGrid
g = sns.catplot(
    data=tips, x='day', y='tip',
    col='time', kind='box',
    palette='Set2',
    height=4, aspect=0.9
)
g.set_titles('{col_name}')
g.set_axis_labels('Day', 'Tip ($)')
g.figure.suptitle('Tip Distribution by Day & Meal Time', y=1.03)
plt.tight_layout()
plt.savefig('facet_catplot.png', dpi=80)
plt.close()
FacetGrid faceted scatter with custom formatting
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

np.random.seed(5)
sns.set_theme(style='whitegrid', font_scale=0.85)

# Synthetic multi-group data
groups = ['A', 'B', 'C']
conditions = ['Low', 'High']
rows = []
for grp in groups:
    for cond in conditions:
        n = 40
        x = np.random.uniform(0, 100, n)
        slope = {'A': 1.5, 'B': 0.8, 'C': 2.2}[grp]
        offset = {'Low': 0, 'High': 50}[cond]
        y = x * slope + offset + np.random.randn(n) * 20
        rows.extend({'group': grp, 'condition': cond,
                     'x': round(xi, 1), 'y': round(yi, 1)}
                    for xi, yi in zip(x, y))
df = pd.DataFrame(rows)

g = sns.FacetGrid(df, col='group', row='condition',
                  height=3, aspect=1.2,
                  margin_titles=True, sharey=False)
g.map_dataframe(sns.scatterplot, x='x', y='y', alpha=0.5, s=20)
g.map_dataframe(sns.regplot, x='x', y='y',
                scatter=False,
                line_kws=dict(color='red', linewidth=1.5))
g.set_axis_labels('X', 'Y')
g.set_titles(col_template='{col_name}', row_template='{row_name}')
g.figure.suptitle('Faceted Scatter with Regression', y=1.03)
plt.tight_layout()
plt.savefig('facet_scatter_reg.png', dpi=80)
plt.close()
Grouped Heatmap with Significance Markers
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np, pandas as pd
from scipy import stats

sns.set_theme(style='white', font_scale=1.1)
rng = np.random.default_rng(42)

regions  = ['North','South','East','West','Central']
products = ['Widget','Gadget','Gizmo','Doohickey']
months   = ['Jan','Feb','Mar','Apr','May','Jun']

# Simulate monthly revenue per region/product
data = rng.uniform(50, 300, (len(regions), len(months)))
df = pd.DataFrame(data, index=regions, columns=months)

# Compute growth vs prior month
growth = df.pct_change(axis=1) * 100
growth.iloc[:, 0] = rng.uniform(-5, 20, len(regions))  # fill first month

# Significance test: is growth > 0?
sig_markers = pd.DataFrame('', index=regions, columns=months)
for region in regions:
    for month in months:
        g = growth.loc[region, month]
        sig_markers.loc[region, month] = 'β˜…' if g > 15 else ('β–Ό' if g < -5 else '')

# Plot
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

sns.heatmap(df, annot=True, fmt='.0f', cmap='YlOrRd',
            linewidths=0.5, ax=axes[0], cbar_kws={'label': 'Revenue ($K)'})
axes[0].set_title('Monthly Revenue by Region', fontsize=12)

# Growth heatmap with significance markers
annot = growth.round(1).astype(str) + '%
' + sig_markers
sns.heatmap(growth, annot=annot, fmt='', cmap='RdYlGn', center=0,
            linewidths=0.5, ax=axes[1], cbar_kws={'label': 'MoM Growth (%)'},
            annot_kws={'size': 9})
axes[1].set_title('Month-over-Month Growth
β˜…=High Growth β–Ό=Decline', fontsize=12)

plt.suptitle('Regional Sales Performance Analysis', fontsize=13, y=1.02)
plt.tight_layout()
plt.savefig('grouped_heatmap.png', dpi=100)
plt.show()
💼 Real-World: Multi-Region Sales Performance Tiles
A BI engineer uses FacetGrid to generate one scatter panel per sales region, colored by product category.
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

np.random.seed(11)
regions    = ['North','South','East','West']
categories = ['Electronics','Clothing','Food','Sports']

rows = []
for region in regions:
    for cat in categories:
        n = 40
        spend   = np.random.uniform(100, 5000, n)
        margin  = np.random.uniform(0.05, 0.45, n)
        rows.extend({'region':region,'category':cat,
                     'spend':round(s,0),'margin':round(m,3)}
                    for s,m in zip(spend, margin))
df = pd.DataFrame(rows)

sns.set_theme(style='whitegrid', font_scale=0.85)
g = sns.FacetGrid(df, col='region', col_wrap=2,
                  height=3.5, aspect=1.3, sharey=True)
g.map_dataframe(sns.scatterplot, x='spend', y='margin',
                hue='category', palette='tab10', alpha=0.6, s=25)

g.add_legend(title='Category')
g.set_axis_labels('Spend ($)', 'Margin')
g.set_titles('{col_name} Region')
g.figure.suptitle('Spend vs Margin by Region & Category', y=1.03)
plt.tight_layout()
plt.savefig('rw_facet_sales.png', dpi=80)
plt.close()
🏋️ Practice: Faceted Scatter Plots
Using the 'tips' dataset: 1) Build a FacetGrid with col='day' (4 panels) and map a scatterplot of 'total_bill' vs 'tip', colored by 'sex'. 2) Add a regression line to each panel using map_dataframe with sns.regplot (scatter=False). 3) Use sharey=True and set consistent axis labels. Save the figure.
Starter Code
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt

sns.set_theme(style='whitegrid', font_scale=0.9)
tips = sns.load_dataset('tips')

# 1 & 2. FacetGrid: col=day, scatter + regression
# TODO: g = sns.FacetGrid(tips, col='day', col_wrap=2,
# TODO:                   height=3.5, aspect=1.2, sharey=True)
# TODO: g.map_dataframe(sns.scatterplot, x='total_bill', y='tip',
# TODO:                 hue='sex', palette='Set1', alpha=0.6, s=30)
# TODO: g.map_dataframe(sns.regplot, x='total_bill', y='tip',
# TODO:                 scatter=False, line_kws=dict(color='black', linewidth=1.5))

# 3. Labels and title
# TODO: g.set_axis_labels('Total Bill ($)', 'Tip ($)')
# TODO: g.set_titles('{col_name}')
# TODO: g.figure.suptitle('Tip vs Bill by Day (colored by Sex)', y=1.03)
# TODO: g.add_legend(title='Sex')

plt.tight_layout()
plt.savefig('practice_facet.png', dpi=80)
plt.close()
print("Saved practice_facet.png")
9. Time Series & Line Plot

sns.lineplot handles time series naturally β€” it aggregates multiple observations per x-value and draws confidence intervals.

lineplot with hue and CI
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

np.random.seed(5)
weeks   = list(range(1, 13))
regions = ['North','South','East']

rows = []
for region in regions:
    base = np.random.uniform(80, 120)
    trend= np.random.uniform(1, 5)
    for w in weeks:
        for _ in range(5):   # 5 reps per point β†’ CI makes sense
            rows.append({
                'week':   w,
                'region': region,
                'sales':  base + trend * w + np.random.randn() * 15
            })
df = pd.DataFrame(rows)

sns.set_theme(style='whitegrid')
fig, ax = plt.subplots(figsize=(10, 4))
sns.lineplot(data=df, x='week', y='sales', hue='region',
             palette='Set2', linewidth=2.5, ax=ax)
ax.set_title('Weekly Sales by Region (with 95% CI)')
ax.set_xlabel('Week'); ax.set_ylabel('Sales ($K)')
plt.tight_layout()
plt.savefig('line_hue_ci.png', dpi=80)
plt.close()
relplot β€” faceted time series
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

np.random.seed(9)
products = ['Widget','Gadget','Doohickey']
months   = pd.date_range('2024-01-01', periods=12, freq='MS')

rows = []
for prod in products:
    base = np.random.uniform(50, 200)
    vals = base + np.cumsum(np.random.randn(12) * 10)
    for m, v in zip(months, vals):
        rows.append({'month':m,'product':prod,'revenue':max(v,10)})
df = pd.DataFrame(rows)

g = sns.relplot(data=df, x='month', y='revenue',
                col='product', kind='line',
                height=3, aspect=1.3,
                marker='o', markersize=5)
g.set_titles('{col_name}')
for ax in g.axes.flat:
    ax.xaxis.set_major_formatter(plt.matplotlib.dates.DateFormatter('%b'))
    ax.tick_params(axis='x', rotation=45)
g.figure.suptitle('Monthly Revenue by Product', y=1.03)
plt.tight_layout()
plt.savefig('line_relplot.png', dpi=80)
plt.close()
Multi-line with markers, annotations, and event shading
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

np.random.seed(15)
sns.set_theme(style='whitegrid')

months = list(range(1, 13))
products = ['Alpha', 'Beta', 'Gamma']
rows = []
for prod in products:
    base = np.random.uniform(100, 300)
    vals = base + np.cumsum(np.random.randn(12) * 20)
    for m, v in zip(months, vals):
        rows.append({'month': m, 'product': prod, 'revenue': max(v, 10)})
df = pd.DataFrame(rows)

fig, ax = plt.subplots(figsize=(11, 4))
sns.lineplot(data=df, x='month', y='revenue', hue='product',
             marker='o', markersize=6, linewidth=2,
             palette='tab10', ax=ax)

# Shade a promotion period
ax.axvspan(5, 7, alpha=0.12, color='green', label='Promo period')

# Annotate peak revenue
peak = df.loc[df['revenue'].idxmax()]
ax.annotate(f"Peak: {peak['product']}
${peak['revenue']:.0f}K",
            xy=(peak['month'], peak['revenue']),
            xytext=(peak['month'] + 0.5, peak['revenue'] - 30),
            arrowprops=dict(arrowstyle='->', color='red'),
            fontsize=8, color='red')

ax.set_title('Monthly Revenue with Event Shading')
ax.set_xlabel('Month'); ax.set_ylabel('Revenue ($K)')
ax.set_xticks(months)
ax.set_xticklabels(['Jan','Feb','Mar','Apr','May','Jun',
                    'Jul','Aug','Sep','Oct','Nov','Dec'], rotation=30)
ax.legend(loc='upper left', fontsize=8)
plt.tight_layout()
plt.savefig('line_annotated.png', dpi=80)
plt.close()
Anomaly Detection Overlay with Seaborn
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np, pandas as pd

sns.set_theme(style='darkgrid', font_scale=1.0)
rng = np.random.default_rng(42)

n = 168  # 1 week of hourly data
t = pd.date_range('2024-01-01', periods=n, freq='h')
cpu    = 40 + 20*np.sin(np.linspace(0, 4*np.pi, n)) + rng.normal(0, 5, n)
memory = 60 + 10*np.sin(np.linspace(0, 2*np.pi, n)) + rng.normal(0, 3, n)
# Inject anomalies
cpu[[24, 72, 120, 145]] += rng.uniform(35, 50, 4)
memory[[36, 96, 130]]    += rng.uniform(25, 35, 3)

df = pd.DataFrame({'time': t, 'CPU': cpu.clip(0,100), 'Memory': memory.clip(0,100)})

# Detect anomalies via z-score
for col in ['CPU', 'Memory']:
    mu, sigma = df[col].mean(), df[col].std()
    df[f'{col}_anomaly'] = (df[col] - mu).abs() > 2.5 * sigma

fig, axes = plt.subplots(2, 1, figsize=(14, 8), sharex=True)

for ax, metric, color in zip(axes, ['CPU', 'Memory'], ['steelblue', 'seagreen']):
    sns.lineplot(data=df, x='time', y=metric, ax=ax, color=color, lw=1.5, label=metric)
    # Shade anomaly regions
    anomalies = df[df[f'{metric}_anomaly']]
    ax.scatter(anomalies['time'], anomalies[metric],
               color='red', s=60, zorder=5, label='Anomaly')
    ax.axhline(df[metric].mean() + 2.5*df[metric].std(),
               color='red', ls='--', lw=1, alpha=0.6, label='2.5sigma threshold')
    ax.set(ylabel=f'{metric} Usage (%)', title=f'{metric} Usage β€” Anomaly Detection')
    ax.legend(loc='upper right')

axes[-1].set_xlabel('Time')
plt.suptitle('Server Metrics Anomaly Dashboard', fontsize=13, y=1.01)
plt.tight_layout()
plt.savefig('anomaly_dashboard.png', dpi=100)
plt.show()
💼 Real-World: Server Metrics Time Series Dashboard
A DevOps engineer plots CPU and memory usage over 24 hours across multiple servers with confidence bands.
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

np.random.seed(20)
hours   = np.arange(24)
servers = [f'srv-{i:02d}' for i in range(1, 6)]

rows = []
for srv in servers:
    cpu_base  = np.random.uniform(20, 60)
    mem_base  = np.random.uniform(40, 70)
    for h in hours:
        spike = 30 if 9 <= h <= 17 else 0   # business hours spike
        rows.append({
            'hour':   h,
            'server': srv,
            'cpu':    (cpu_base + spike + np.random.randn()*8).clip(5,100),
            'memory': (mem_base + spike*0.3 + np.random.randn()*5).clip(10,95),
        })
df = pd.DataFrame(rows)

sns.set_theme(style='darkgrid')
fig, axes = plt.subplots(1, 2, figsize=(13, 4), sharey=False)

sns.lineplot(data=df, x='hour', y='cpu',    hue='server',
             palette='tab10', linewidth=1.5, alpha=0.7, ax=axes[0])
axes[0].set_title('CPU Usage % (24h)'); axes[0].set_xlabel('Hour')

sns.lineplot(data=df, x='hour', y='memory', hue='server',
             palette='tab10', linewidth=1.5, alpha=0.7, ax=axes[1])
axes[1].set_title('Memory Usage % (24h)'); axes[1].set_xlabel('Hour')
axes[1].get_legend().remove()

# Highlight business hours
for ax in axes:
    ax.axvspan(9, 17, alpha=0.07, color='yellow', label='Business hours')

plt.tight_layout()
plt.savefig('rw_line_servers.png', dpi=80)
plt.close()
🏋️ Practice: Multi-Line Time Series with Annotations
Create a synthetic dataset of 4 products over 24 months of revenue. 1) Plot all 4 as line series with markers on a single axes using sns.lineplot with hue='product'. 2) Shade a recession period (months 10-14) with axvspan. 3) Annotate the single highest revenue point with an arrow. Save the figure.
Starter Code
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

np.random.seed(99)
sns.set_theme(style='whitegrid')

products = ['Alpha', 'Beta', 'Gamma', 'Delta']
months = list(range(1, 25))
rows = []
for prod in products:
    base = np.random.uniform(80, 250)
    vals = base + np.cumsum(np.random.randn(24) * 15)
    for m, v in zip(months, vals):
        rows.append({'month': m, 'product': prod, 'revenue': max(v, 5)})
df = pd.DataFrame(rows)

fig, ax = plt.subplots(figsize=(12, 4))

# 1. Line plot with hue and markers
# TODO: sns.lineplot(data=df, x='month', y='revenue', hue='product',
# TODO:              marker='o', markersize=5, linewidth=2, palette='tab10', ax=ax)

# 2. Shade recession period (months 10-14)
# TODO: ax.axvspan(10, 14, alpha=0.12, color='red', label='Recession')

# 3. Annotate peak
peak = df.loc[df['revenue'].idxmax()]
# TODO: ax.annotate(f"Peak: {peak['product']}\n${peak['revenue']:.0f}K",
# TODO:             xy=(peak['month'], peak['revenue']),
# TODO:             xytext=(peak['month']+1, peak['revenue']-30),
# TODO:             arrowprops=dict(arrowstyle='->', color='darkred'),
# TODO:             fontsize=8, color='darkred')

# TODO: ax.set_title('Product Revenue Over 24 Months')
# TODO: ax.set_xlabel('Month'); ax.set_ylabel('Revenue ($K)')
# TODO: ax.legend(loc='upper left', fontsize=8)

plt.tight_layout()
plt.savefig('practice_lineplot.png', dpi=80)
plt.close()
print("Saved practice_lineplot.png")
10. Customization & Matplotlib Integration

Seaborn returns Axes objects you can modify with any Matplotlib method. Combine both libraries for full control.

Accessing and modifying the Axes
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker

sns.set_theme(style='whitegrid')
tips = sns.load_dataset('tips')

fig, ax = plt.subplots(figsize=(8, 4))
sns.boxplot(data=tips, x='day', y='total_bill', palette='pastel', ax=ax)

# Matplotlib customizations on top
ax.set_title('Total Bill by Day', fontsize=14, fontweight='bold', pad=12)
ax.set_xlabel('')
ax.set_ylabel('Total Bill', fontsize=11)
ax.yaxis.set_major_formatter(mticker.StrMethodFormatter('${x:.0f}'))
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)

# Add median annotations
medians = tips.groupby('day')['total_bill'].median()
for i, (day, med) in enumerate(medians.items()):
    ax.text(i, med + 0.5, f'${med:.0f}', ha='center', fontsize=9, color='darkred')

plt.tight_layout()
plt.savefig('custom_axes.png', dpi=80)
plt.close()
Combining multiple Seaborn plots
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

sns.set_theme(style='whitegrid')
iris = sns.load_dataset('iris')

fig, ax = plt.subplots(figsize=(7, 5))

# Layer 1: violin
sns.violinplot(data=iris, x='species', y='petal_length',
               palette='pastel', inner=None, ax=ax)
# Layer 2: box inside violin
sns.boxplot(data=iris, x='species', y='petal_length',
            width=0.15, fliersize=0,
            boxprops=dict(facecolor='white', zorder=2), ax=ax)
# Layer 3: points
sns.stripplot(data=iris, x='species', y='petal_length',
              color='black', size=2.5, alpha=0.4, jitter=True, ax=ax)

ax.set_title('Petal Length by Species')
ax.set_ylabel('Petal Length (cm)')
plt.tight_layout()
plt.savefig('custom_layered.png', dpi=80)
plt.close()
Custom themes, palettes, and publication-ready styling
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import numpy as np
import pandas as pd

np.random.seed(42)
sns.set_theme(style='ticks', palette='colorblind', font_scale=1.0)

# Synthetic dataset
df = pd.DataFrame({
    'method': np.repeat(['Baseline', 'Model A', 'Model B', 'Model C'], 50),
    'accuracy': np.concatenate([
        np.random.normal(0.72, 0.04, 50),
        np.random.normal(0.81, 0.03, 50),
        np.random.normal(0.78, 0.05, 50),
        np.random.normal(0.85, 0.03, 50),
    ])
})

fig, ax = plt.subplots(figsize=(8, 4))
order = df.groupby('method')['accuracy'].mean().sort_values().index

sns.violinplot(data=df, x='method', y='accuracy', order=order,
               palette='colorblind', inner='box', ax=ax)

ax.yaxis.set_major_formatter(mticker.PercentFormatter(xmax=1, decimals=0))
ax.set_xlabel(''); ax.set_ylabel('Accuracy')
ax.set_title('Model Accuracy Comparison', fontsize=13, fontweight='bold')
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)

# Significance bracket
best_mean  = df[df['method']=='Model C']['accuracy'].mean()
base_mean  = df[df['method']=='Baseline']['accuracy'].mean()
ax.annotate(f'+{(best_mean-base_mean)*100:.1f}% vs Baseline',
            xy=(0.5, 0.96), xycoords='axes fraction',
            ha='center', fontsize=9, color='darkgreen',
            fontweight='bold')

plt.tight_layout()
plt.savefig('custom_publication.png', dpi=80)
plt.close()
rc_context for One-Off Style Overrides
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np, pandas as pd

rng = np.random.default_rng(7)
tips = sns.load_dataset('tips')

# Default style
fig, axes = plt.subplots(1, 3, figsize=(14, 4))

sns.histplot(tips['total_bill'], kde=True, ax=axes[0])
axes[0].set_title('Default Style')

# Paper context β€” smaller for publications
with sns.plotting_context('paper', font_scale=1.2):
    sns.histplot(tips['total_bill'], kde=True, color='seagreen', ax=axes[1])
    axes[1].set_title('Paper Context')

# Talk context β€” larger for presentations
with sns.plotting_context('talk', font_scale=0.9):
    sns.histplot(tips['tip'], kde=True, color='coral', ax=axes[2])
    axes[2].set_title('Talk Context')

plt.suptitle('Seaborn Context Comparison', fontsize=12)
plt.tight_layout()
plt.savefig('context_comparison.png', dpi=100)
plt.close()

# Override with rc_context β€” dark background for a single plot
with sns.axes_style('darkgrid'):
    with plt.rc_context({'figure.facecolor': '#1e1e2e',
                         'axes.facecolor':   '#1e1e2e',
                         'axes.labelcolor':  'white',
                         'xtick.color':      'white',
                         'ytick.color':      'white',
                         'text.color':       'white'}):
        fig2, ax2 = plt.subplots(figsize=(6, 4))
        sns.scatterplot(data=tips, x='total_bill', y='tip',
                        hue='time', palette=['#ffa600','#58508d'],
                        alpha=0.8, s=60, ax=ax2)
        ax2.set_title('Dark Theme Override', color='white', fontsize=13)
        ax2.legend(labelcolor='white', facecolor='#2e2e3e')
        plt.tight_layout()
        plt.savefig('dark_theme.png', dpi=100, facecolor='#1e1e2e')
        plt.close()
print("Context comparison and dark theme override saved.")
💼 Real-World: Publication-Ready A/B Test Results
A product analyst creates a publication-ready figure comparing two experiment variants with statistical annotations.
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from scipy import stats

np.random.seed(42)
control  = np.random.normal(45.0, 12, 500)
variant  = np.random.normal(48.5, 11, 500)
df = pd.DataFrame({
    'group':      ['Control']*500 + ['Variant']*500,
    'revenue':    np.concatenate([control, variant]),
})

t_stat, p_val = stats.ttest_ind(control, variant)

sns.set_theme(style='whitegrid')
fig, axes = plt.subplots(1, 2, figsize=(11, 4))

# Left: violin + box + strip
sns.violinplot(data=df, x='group', y='revenue',
               palette={'Control':'#3498db','Variant':'#e74c3c'},
               inner=None, ax=axes[0])
sns.boxplot(data=df, x='group', y='revenue',
            width=0.1, fliersize=0,
            boxprops=dict(facecolor='white', zorder=2), ax=axes[0])
axes[0].set_title('Revenue Distribution'); axes[0].set_ylabel('Revenue ($)')

# Right: mean + CI bar chart
summary = df.groupby('group')['revenue'].agg(['mean','sem']).reset_index()
colors  = ['#3498db','#e74c3c']
bars    = axes[1].bar(summary['group'], summary['mean'],
                      yerr=summary['sem']*1.96, capsize=6,
                      color=colors, alpha=0.8, edgecolor='white', linewidth=1.5)
for bar, (_, row) in zip(bars, summary.iterrows()):
    axes[1].text(bar.get_x()+bar.get_width()/2, bar.get_height()+1,
                 f"${row['mean']:.1f}", ha='center', fontsize=10, fontweight='bold')

sig = '***' if p_val < 0.001 else '**' if p_val < 0.01 else '*' if p_val < 0.05 else 'ns'
axes[1].annotate(f'p={p_val:.4f} {sig}', xy=(0.5,0.95), xycoords='axes fraction',
                 ha='center', fontsize=10, color='darkgreen' if p_val<0.05 else 'gray')
axes[1].set_title('Mean Revenue Β± 95% CI'); axes[1].set_ylabel('Mean Revenue ($)')

fig.suptitle('A/B Test Results β€” Revenue', fontsize=13, fontweight='bold')
plt.tight_layout()
plt.savefig('rw_custom_ab_test.png', dpi=80)
plt.close()
🏋️ Practice: Switch Themes and Build a Custom Publication Plot
1) Apply sns.set_theme with style='ticks' and a custom rc dict that removes top/right spines and sets font_scale=1.1. 2) Plot a violin+strip chart of the 'iris' petal_length by species. 3) Add y-axis gridlines only (ax.yaxis.grid(True)), format the y-axis as cm, and add a bold title. 4) Try swapping to a 'husl' palette with 3 colors. Save the final figure.
Starter Code
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt

# 1. Custom theme
# TODO: sns.set_theme(style='ticks', font_scale=1.1, rc={
# TODO:     'axes.spines.top':   False,
# TODO:     'axes.spines.right': False,
# TODO: })

iris = sns.load_dataset('iris')

fig, ax = plt.subplots(figsize=(7, 5))

# 2. Violin + strip with husl palette
my_palette = sns.color_palette('husl', 3)
# TODO: sns.violinplot(data=iris, x='species', y='petal_length',
# TODO:                palette=my_palette, inner=None, ax=ax)
# TODO: sns.stripplot(data=iris, x='species', y='petal_length',
# TODO:               color='black', size=2.5, alpha=0.4, jitter=True, ax=ax)

# 3. Gridlines, axis label, bold title
# TODO: ax.yaxis.grid(True, linewidth=0.7, alpha=0.7)
# TODO: ax.set_ylabel('Petal Length (cm)', fontsize=11)
# TODO: ax.set_xlabel('Species', fontsize=11)
# TODO: ax.set_title('Iris Petal Length by Species', fontsize=13, fontweight='bold')

plt.tight_layout()
plt.savefig('practice_custom.png', dpi=80)
plt.close()
print("Saved practice_custom.png")
11. FacetGrid & Multi-Plot Grids

Create small multiples β€” the same visualization across data subsets β€” with FacetGrid, PairGrid, catplot, and relplot.

FacetGrid with col and hue
import seaborn as sns
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt

tips = sns.load_dataset('tips')
g = sns.FacetGrid(tips, col='time', hue='smoker', height=4, aspect=0.8)
g.map(sns.scatterplot, 'total_bill', 'tip', alpha=0.7)
g.add_legend(); g.set_axis_labels('Total Bill ($)', 'Tip ($)')
g.set_titles(col_template='{col_name}')
g.figure.suptitle('Tips by Time and Smoker Status', y=1.02)
plt.savefig('facetgrid_tips.png', dpi=80, bbox_inches='tight'); plt.close()
print('Saved facetgrid_tips.png')
PairGrid for pairwise relationships
import seaborn as sns
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt

iris = sns.load_dataset('iris')
g = sns.PairGrid(iris, hue='species')
g.map_diag(sns.histplot, alpha=0.6)
g.map_upper(sns.scatterplot, alpha=0.6)
g.map_lower(sns.kdeplot)
g.add_legend()
plt.savefig('pairgrid_iris.png', dpi=80, bbox_inches='tight'); plt.close()
print('Saved pairgrid_iris.png')
catplot β€” FacetGrid wrapper for categorical
import seaborn as sns
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt

tips = sns.load_dataset('tips')
g = sns.catplot(data=tips, x='day', y='total_bill',
                col='time', hue='sex',
                kind='box', height=4, aspect=0.8)
g.set_axis_labels('Day', 'Total Bill ($)')
g.set_titles(col_template='{col_name}')
g.figure.suptitle('Bills by Day, Time, and Sex', y=1.02)
plt.savefig('catplot_tips.png', dpi=80, bbox_inches='tight'); plt.close()
print('Saved catplot_tips.png')
relplot β€” FacetGrid wrapper for relationships
import seaborn as sns
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt

fmri = sns.load_dataset('fmri')
g = sns.relplot(data=fmri, x='timepoint', y='signal',
               col='region', hue='event',
               kind='line', height=4, aspect=0.9,
               errorbar='se')
g.set_titles(col_template='Region: {col_name}')
g.figure.suptitle('fMRI Signal by Region and Event', y=1.02)
plt.savefig('relplot_fmri.png', dpi=80, bbox_inches='tight'); plt.close()
print('Saved relplot_fmri.png')
🏋️ Practice: Multi-Segment Time Series
Load the 'flights' dataset. Use relplot to create a FacetGrid with col='month' showing passenger count over years, hued by whether month is in summer (Jun-Aug) or not.
Starter Code
import seaborn as sns
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt

flights = sns.load_dataset('flights')
# TODO: add 'season' column: 'Summer' if month in ['June','July','August'] else 'Other'
# TODO: relplot with col='month', x='year', y='passengers', hue='season', kind='line'
# TODO: save to 'flights_facet.png'
12. Statistical Annotations

Add significance stars, confidence intervals, and comparison markers to seaborn plots for publication-ready statistical charts.

Bootstrap CI with barplot errorbar
import seaborn as sns
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt

tips = sns.load_dataset('tips')
fig, ax = plt.subplots(figsize=(8, 5))
sns.barplot(data=tips, x='day', y='total_bill', hue='sex',
            errorbar='ci', capsize=0.1, alpha=0.85, ax=ax)
ax.set_title('Mean Total Bill with 95% CI')
ax.set_xlabel('Day'); ax.set_ylabel('Total Bill ($)')
plt.tight_layout()
plt.savefig('barplot_ci.png', dpi=80); plt.close()
print('Saved barplot_ci.png')
Adding significance stars between groups
import seaborn as sns
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import numpy as np
from scipy import stats

np.random.seed(42)
group_a = np.random.normal(42, 8, 50)
group_b = np.random.normal(48, 9, 50)
import pandas as pd
df = pd.DataFrame({'value': np.concatenate([group_a, group_b]), 'group': ['A']*50+['B']*50})

fig, ax = plt.subplots(figsize=(6, 5))
sns.boxplot(data=df, x='group', y='value', ax=ax, palette='Set2')

_, p = stats.ttest_ind(group_a, group_b)
stars = '***' if p < 0.001 else '**' if p < 0.01 else '*' if p < 0.05 else 'ns'
y_max = df['value'].max() + 2
ax.plot([0, 0, 1, 1], [y_max, y_max+1, y_max+1, y_max], lw=1.5, color='black')
ax.text(0.5, y_max+1.2, stars, ha='center', va='bottom', fontsize=14)
ax.set_title(f'Group Comparison (p={p:.4f})')
plt.tight_layout()
plt.savefig('significance.png', dpi=80); plt.close()
print(f'Saved significance.png ({stars})')
Pointplot with SD error bars
import seaborn as sns
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt

tips = sns.load_dataset('tips')
fig, axes = plt.subplots(1, 2, figsize=(12, 5))

sns.pointplot(data=tips, x='day', y='total_bill', hue='sex',
              errorbar='sd', capsize=0.1, dodge=True, ax=axes[0])
axes[0].set_title('Pointplot with SD error bars')

sns.pointplot(data=tips, x='day', y='tip', hue='smoker',
              errorbar='se', join=False, dodge=0.4, ax=axes[1])
axes[1].set_title('Pointplot (no join) with SE')
plt.tight_layout()
plt.savefig('pointplot.png', dpi=80); plt.close()
print('Saved pointplot.png')
Multi-group significance annotations
import seaborn as sns
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy import stats
from itertools import combinations

np.random.seed(42)
groups = {'Control': np.random.normal(10,2,40), 'Drug A': np.random.normal(12,2,40), 'Drug B': np.random.normal(14,3,40)}
df = pd.DataFrame([(v, g) for g, vals in groups.items() for v in vals], columns=['response','group'])

fig, ax = plt.subplots(figsize=(7, 6))
sns.boxplot(data=df, x='group', y='response', ax=ax, palette='pastel')

pairs = list(combinations(groups.keys(), 2))
y_max = df['response'].max()
for i, (g1, g2) in enumerate(pairs):
    _, p = stats.ttest_ind(groups[g1], groups[g2])
    stars = '***' if p<0.001 else '**' if p<0.01 else '*' if p<0.05 else 'ns'
    x1 = list(groups.keys()).index(g1)
    x2 = list(groups.keys()).index(g2)
    y = y_max + 1.5*(i+1)
    ax.plot([x1, x1, x2, x2], [y, y+0.3, y+0.3, y], lw=1.2, color='black')
    ax.text((x1+x2)/2, y+0.4, stars, ha='center', fontsize=12)
ax.set_title('Multi-Group Significance Test')
plt.tight_layout()
plt.savefig('multi_sig.png', dpi=80); plt.close()
print('Saved multi_sig.png')
🏋️ Practice: A/B Test Visualization
Given two groups of purchase amounts (A: mean=$45, B: mean=$52), create a barplot with 95% CI. Run a t-test and add a significance star above the bars if p<0.05.
Starter Code
import seaborn as sns
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy import stats

np.random.seed(42)
group_a = np.random.normal(45, 12, 80)
group_b = np.random.normal(52, 14, 80)
df = pd.DataFrame({'amount': np.concatenate([group_a, group_b]), 'group': ['A']*80+['B']*80})
# TODO: barplot with ci errorbar
# TODO: t-test
# TODO: significance annotation if p<0.05
# TODO: save to 'ab_barplot.png'
13. Regression & Distribution Plots

Visualize statistical relationships and distributions with lmplot, residplot, jointplot, and ecdfplot for thorough exploratory analysis.

lmplot with grouping and confidence bands
import seaborn as sns
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt

tips = sns.load_dataset('tips')
g = sns.lmplot(data=tips, x='total_bill', y='tip', hue='smoker',
               ci=95, scatter_kws={'alpha':0.5}, height=5, aspect=1.2)
g.set_axis_labels('Total Bill ($)', 'Tip ($)')
g.axes[0,0].set_title('Tip vs Bill by Smoker Status (95% CI)')
plt.savefig('lmplot.png', dpi=80, bbox_inches='tight'); plt.close()
print('Saved lmplot.png')
Residual plot for regression diagnostics
import seaborn as sns
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import numpy as np

tips = sns.load_dataset('tips')
fig, axes = plt.subplots(1, 2, figsize=(12, 5))
sns.residplot(data=tips, x='total_bill', y='tip', lowess=True, ax=axes[0],
              scatter_kws={'alpha':0.5}, line_kws={'color':'red','linewidth':2})
axes[0].set_title('Residual Plot (lowess trend)')
axes[0].axhline(0, color='gray', linestyle='--')

sns.residplot(data=tips, x='size', y='total_bill', ax=axes[1],
              scatter_kws={'alpha':0.5})
axes[1].set_title('Residual Plot: size vs bill')
axes[1].axhline(0, color='gray', linestyle='--')
plt.tight_layout()
plt.savefig('residplot.png', dpi=80); plt.close()
print('Saved residplot.png')
Joint distribution with marginals
import seaborn as sns
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt

tips = sns.load_dataset('tips')
fig, axes = plt.subplots(1, 3, figsize=(15, 5))
for ax, kind in zip(axes, ['scatter', 'kde', 'hex']):
    g = sns.jointplot(data=tips, x='total_bill', y='tip', kind=kind, height=4)
    g.set_axis_labels('Total Bill ($)', 'Tip ($)')
    g.figure.suptitle(f'Joint Plot: {kind}', y=1.02)
    g.savefig(f'joint_{kind}.png', dpi=80)
    plt.close(g.figure)
    print(f'Saved joint_{kind}.png')
Polynomial regression and ECDF
import seaborn as sns
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import numpy as np

tips = sns.load_dataset('tips')
fig, axes = plt.subplots(1, 2, figsize=(12, 5))

# Polynomial regression
sns.regplot(data=tips, x='total_bill', y='tip', order=2, ax=axes[0],
            scatter_kws={'alpha':0.4}, line_kws={'color':'red','linewidth':2})
axes[0].set_title('Polynomial Regression (degree=2)')

# ECDF for comparing distributions
for day in ['Thur','Fri','Sat','Sun']:
    subset = tips[tips['day']==day]
    sns.ecdfplot(data=subset, x='total_bill', ax=axes[1], label=day)
axes[1].set_title('ECDF of Total Bill by Day')
axes[1].legend(); axes[1].set_xlabel('Total Bill ($)')
plt.tight_layout()
plt.savefig('regplot_ecdf.png', dpi=80); plt.close()
print('Saved regplot_ecdf.png')
🏋️ Practice: MPG Regression Analysis
Using the mpg dataset: (1) create an lmplot of horsepower vs mpg grouped by origin with 90% CI. (2) Plot residuals. (3) Use jointplot(kind='kde') to show the joint distribution.
Starter Code
import seaborn as sns
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt

mpg = sns.load_dataset('mpg').dropna()
# TODO: lmplot x='horsepower', y='mpg', hue='origin', ci=90 -> save 'mpg_lm.png'
# TODO: residplot x='horsepower', y='mpg' -> save 'mpg_resid.png'
# TODO: jointplot kind='kde' -> save 'mpg_joint.png'
14. FacetGrid & PairGrid

FacetGrid histogram per group
import seaborn as sns
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

np.random.seed(42)
df = pd.DataFrame({'value': np.random.randn(300), 'group': np.repeat(['A','B','C'], 100)})
g = sns.FacetGrid(df, col='group', height=3.5, aspect=0.9)
g.map(sns.histplot, 'value', kde=True, bins=20)
g.set_titles(col_template='{col_name}')
g.figure.suptitle('FacetGrid by Group', y=1.02)
g.savefig('facetgrid_hist.png', dpi=100, bbox_inches='tight')
print('Saved facetgrid_hist.png')
plt.close('all')
FacetGrid row x col (2D grid)
import seaborn as sns
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt

tips = sns.load_dataset('tips')
g = sns.FacetGrid(tips, row='sex', col='time', height=3, aspect=1.2, margin_titles=True)
g.map(sns.scatterplot, 'total_bill', 'tip', alpha=0.6)
g.add_legend()
g.savefig('facetgrid_2d.png', dpi=100, bbox_inches='tight')
print('Saved facetgrid_2d.png')
plt.close('all')
PairGrid with mixed plot types
import seaborn as sns
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt

iris = sns.load_dataset('iris')
g = sns.PairGrid(iris, hue='species', vars=['sepal_length','petal_length','petal_width'])
g.map_upper(sns.scatterplot, alpha=0.6)
g.map_lower(sns.kdeplot, fill=True, alpha=0.4)
g.map_diag(sns.histplot, kde=True)
g.add_legend()
g.savefig('pairgrid_mixed.png', dpi=80, bbox_inches='tight')
print('Saved pairgrid_mixed.png')
plt.close('all')
FacetGrid with custom mapping function
import seaborn as sns
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

def scatter_means(x, y, **kw):
    plt.scatter(x, y, alpha=0.5, **{k:v for k,v in kw.items() if k!='label'})
    plt.axhline(y.mean(), color='red', ls='--', lw=1.5)
    plt.axvline(x.mean(), color='blue', ls='--', lw=1.5)

np.random.seed(42)
df = pd.DataFrame({'x':np.random.randn(150),'y':np.random.randn(150),
                   'group':np.repeat(['G1','G2','G3'],50)})
g = sns.FacetGrid(df, col='group', height=3)
g.map(scatter_means, 'x', 'y')
g.figure.suptitle('Scatter with Group Means', y=1.02)
g.savefig('facetgrid_custom.png', dpi=100, bbox_inches='tight')
print('Saved facetgrid_custom.png')
plt.close('all')
🏋️ Practice: Titanic FacetGrid
Load titanic. Create a FacetGrid with pclass as columns (3 panels). Show age histplot, colored by survived. Add legend and suptitle. Save to titanic_facet.png.
Starter Code
import seaborn as sns
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt

titanic = sns.load_dataset('titanic').dropna(subset=['age'])
# TODO: FacetGrid col='pclass', hue='survived', map histplot 'age'
# TODO: add_legend(), suptitle, save 'titanic_facet.png'
15. Statistical Visualization Deep Dive

Violin + swarmplot overlay
import seaborn as sns
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

np.random.seed(42)
df = pd.DataFrame({
    'score': np.concatenate([np.random.normal(70,10,80), np.random.normal(75,8,80), np.random.normal(65,12,80)]),
    'class': ['A']*80 + ['B']*80 + ['C']*80
})
fig, ax = plt.subplots(figsize=(7, 5))
sns.violinplot(data=df, x='class', y='score', inner=None, palette='muted', alpha=0.7, ax=ax)
sns.swarmplot(data=df, x='class', y='score', color='black', size=2.5, alpha=0.6, ax=ax)
ax.set_title('Score Distribution by Class (Violin + Swarm)')
fig.savefig('violin_swarm.png', dpi=100, bbox_inches='tight')
print('Saved violin_swarm.png')
plt.close()
ECDF comparison across groups
import seaborn as sns
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

np.random.seed(42)
df = pd.DataFrame({
    'response_ms': np.concatenate([np.random.exponential(200,300),
                                   np.random.exponential(150,300),
                                   np.random.exponential(300,300)]),
    'server': ['A']*300 + ['B']*300 + ['C']*300
})
fig, ax = plt.subplots(figsize=(8, 5))
sns.ecdfplot(data=df, x='response_ms', hue='server', ax=ax)
ax.axvline(200, color='gray', ls=':', label='200ms target')
ax.set_title('ECDF: Response Time by Server')
ax.set_xlabel('Response Time (ms)')
ax.legend()
fig.savefig('ecdf_comparison.png', dpi=100, bbox_inches='tight')
print('Saved ecdf_comparison.png')
plt.close()
Residual plot for regression diagnostics
import seaborn as sns
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import numpy as np

np.random.seed(42)
x = np.linspace(0, 10, 100)
y = 2*x + np.random.normal(0, 2, 100)
fig, axes = plt.subplots(1, 2, figsize=(11, 4))
sns.regplot(x=x, y=y, ax=axes[0])
axes[0].set_title('Regression with CI')
sns.residplot(x=x, y=y, ax=axes[1])
axes[1].axhline(0, color='red', ls='--')
axes[1].set_title('Residuals vs Fitted')
fig.tight_layout()
fig.savefig('residual_diagnostic.png', dpi=100, bbox_inches='tight')
print('Saved residual_diagnostic.png')
plt.close()
Box plot with significance bracket
import seaborn as sns
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

np.random.seed(42)
df = pd.DataFrame({
    'value': np.concatenate([np.random.normal(5,1,50), np.random.normal(7,1.5,50), np.random.normal(6.2,1.2,50)]),
    'group': ['Control']*50 + ['Drug A']*50 + ['Drug B']*50
})
fig, ax = plt.subplots(figsize=(7, 5))
sns.boxplot(data=df, x='group', y='value', palette='Set2', ax=ax)
y_max = df['value'].max() + 0.5
ax.plot([0, 1], [y_max, y_max], 'k-', lw=1.5)
ax.text(0.5, y_max+0.1, '***', ha='center', fontsize=14)
ax.set_title('Drug Effect with Significance Bracket')
fig.savefig('boxplot_sig.png', dpi=100, bbox_inches='tight')
print('Saved boxplot_sig.png')
plt.close()
🏋️ Practice: Multi-Stat Figure
Create 3-panel figure: (1) stripplot of scores by group with means, (2) ECDF for 3 groups, (3) residual plot from regressing score on study_hours. Use whitegrid style.
Starter Code
import seaborn as sns
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

np.random.seed(42)
df = pd.DataFrame({'score':np.random.normal(70,15,150), 'group':np.repeat(['A','B','C'],50), 'study_hours':np.random.uniform(1,8,150)})
# TODO: 3-panel: stripplot, ecdfplot, residplot
# TODO: whitegrid style, save 'multi_stat.png'
16. Custom Seaborn Themes & Styling

Context comparison (paper/talk/poster)
import seaborn as sns
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

np.random.seed(42)
df = pd.DataFrame({'x':np.random.randn(100),'y':np.random.randn(100)})
fig, axes = plt.subplots(2, 2, figsize=(12, 9))
for ax, ctx in zip(axes.flat, ['paper','notebook','talk','poster']):
    with sns.plotting_context(ctx):
        sns.scatterplot(data=df, x='x', y='y', ax=ax, alpha=0.6)
        ax.set_title(f'Context: {ctx}')
fig.suptitle('Seaborn Contexts', fontsize=14)
fig.tight_layout()
fig.savefig('contexts.png', dpi=100, bbox_inches='tight')
print('Saved contexts.png')
plt.close()
Custom set_theme with dark background
import seaborn as sns
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='darkgrid', palette='bright', font_scale=1.1,
              rc={'axes.facecolor':'#1e1e2e','figure.facecolor':'#1e1e2e',
                  'text.color':'white','axes.labelcolor':'white',
                  'xtick.color':'white','ytick.color':'white'})
np.random.seed(42)
df = pd.DataFrame({'x':np.random.randn(200),'y':np.random.randn(200),'g':np.random.choice(['A','B','C'],200)})
fig, ax = plt.subplots(figsize=(7,5))
sns.scatterplot(data=df, x='x', y='y', hue='g', alpha=0.7, ax=ax)
ax.set_title('Dark Mode Plot')
fig.savefig('dark_theme.png', dpi=100, bbox_inches='tight')
print('Saved dark_theme.png')
sns.set_theme()
plt.close()
Custom color palettes
import seaborn as sns
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt

fig, axes = plt.subplots(2, 2, figsize=(12, 8))
tips = sns.load_dataset('tips')
pals = [('Blues',4), ('husl',4), ('Set1',4), (['#FF6B6B','#4ECDC4','#45B7D1','#96CEB4'],4)]
titles = ['Blues (seq)','HUSL (qual)','Set1 (qual)','Custom hex']
for ax, (pal, _), title in zip(axes.flat, pals, titles):
    with sns.axes_style('whitegrid'):
        sns.boxplot(data=tips, x='day', y='total_bill', palette=pal, ax=ax)
        ax.set_title(title)
fig.tight_layout()
fig.savefig('palettes.png', dpi=100, bbox_inches='tight')
print('Saved palettes.png')
plt.close()
Despine and axis trimming
import seaborn as sns
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

np.random.seed(42)
df = pd.DataFrame({'x':np.repeat(range(5),30),'y':np.random.randn(150)+np.repeat([1,2,3,4,5],30)})
fig, axes = plt.subplots(1, 2, figsize=(12,5))
with sns.axes_style('ticks'):
    sns.boxplot(data=df, x='x', y='y', ax=axes[0], palette='pastel')
    sns.despine(ax=axes[0], trim=True)
    axes[0].set_title('Despined + trimmed')
with sns.axes_style('whitegrid'):
    sns.violinplot(data=df, x='x', y='y', ax=axes[1], palette='muted', inner='box')
    sns.despine(ax=axes[1], left=False, bottom=False, top=True, right=True)
    axes[1].set_title('Whitegrid + partial despine')
fig.tight_layout()
fig.savefig('despine_styles.png', dpi=100, bbox_inches='tight')
print('Saved despine_styles.png')
plt.close()
🏋️ Practice: Night Mode Report
Create a dark-themed 3-panel figure (histogram, scatter, bar). Use set_theme with dark facecolor and bright palette. Add suptitle and save at 150 DPI. Reset theme at the end.
Starter Code
import seaborn as sns
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

# TODO: set_theme dark background rc params
# TODO: 3-panel: histplot, scatterplot, barplot
# TODO: suptitle, tight_layout, save 'night_report.png' 150 DPI
# TODO: sns.set_theme() at end
17. Strip Plot & Swarm Plot

Use stripplot() to show individual data points by category and swarmplot() to avoid overplotting by spacing points. Layer them over box or violin plots for richer displays.

Basic strip and swarm comparison
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='whitegrid')
tips = sns.load_dataset('tips')

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(11, 5))
sns.stripplot(data=tips, x='day', y='total_bill', ax=ax1,
              palette='Set2', jitter=True, alpha=0.7, size=5)
ax1.set_title('Strip Plot (jitter)')

sns.swarmplot(data=tips, x='day', y='total_bill', ax=ax2,
              palette='Set2', size=4)
ax2.set_title('Swarm Plot (no overlap)')

for ax in (ax1, ax2):
    ax.set_xlabel('Day'); ax.set_ylabel('Total Bill ($)')
fig.tight_layout()
fig.savefig('strip_swarm.png', dpi=120, bbox_inches='tight')
plt.close()
print('Saved strip_swarm.png')
Strip plot layered over box plot
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='whitegrid')
iris = sns.load_dataset('iris')

fig, ax = plt.subplots(figsize=(8, 5))
sns.boxplot(data=iris, x='species', y='sepal_length', ax=ax,
            palette='pastel', width=0.5,
            boxprops=dict(alpha=0.6))
sns.stripplot(data=iris, x='species', y='sepal_length', ax=ax,
              palette='Set2', size=5, jitter=True, alpha=0.7,
              linewidth=0.5, edgecolor='gray')
ax.set_title('Box + Strip Plot Overlay')
ax.set_xlabel('Species'); ax.set_ylabel('Sepal Length (cm)')
fig.tight_layout()
fig.savefig('box_strip.png', dpi=120, bbox_inches='tight')
plt.close()
print('Saved box_strip.png')
Swarm layered over violin
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='whitegrid')
np.random.seed(0)
df = pd.DataFrame({
    'score': np.concatenate([np.random.normal(m, 1, 60) for m in [5, 6.5, 8]]),
    'group': np.repeat(['Control', 'Low Dose', 'High Dose'], 60)
})

fig, ax = plt.subplots(figsize=(8, 5))
sns.violinplot(data=df, x='group', y='score', ax=ax,
               palette='muted', inner=None, alpha=0.6)
sns.swarmplot(data=df, x='group', y='score', ax=ax,
              color='black', size=3, alpha=0.6)
ax.set_title('Violin + Swarm: Treatment Groups')
ax.set_xlabel('Group'); ax.set_ylabel('Score')
fig.tight_layout()
fig.savefig('violin_swarm.png', dpi=120, bbox_inches='tight')
plt.close()
print('Saved violin_swarm.png')
Strip plot with hue and dodge
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='whitegrid')
tips = sns.load_dataset('tips')

fig, ax = plt.subplots(figsize=(9, 5))
sns.stripplot(data=tips, x='day', y='total_bill', hue='sex',
              dodge=True, jitter=True, alpha=0.7, size=5,
              palette='Set1', ax=ax)
ax.set_title('Strip Plot: Tip Amount by Day and Sex (Dodged)')
ax.set_xlabel('Day'); ax.set_ylabel('Total Bill ($)')
ax.legend(title='Sex', bbox_to_anchor=(1, 1))
fig.tight_layout()
fig.savefig('strip_hue.png', dpi=120, bbox_inches='tight')
plt.close()
print('Saved strip_hue.png')
💼 Real-World: Employee Salary Audit
HR needs to show individual salaries by department and gender on top of a violin plot to reveal outliers and within-group spread. Use dodge=True so genders are side by side.
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='whitegrid')
np.random.seed(7)
depts = ['Eng', 'Sales', 'Mktg', 'HR']
df = pd.DataFrame({
    'salary': np.concatenate([
        np.random.normal(m, s, 60)
        for m, s in [(95,15),(70,12),(75,13),(65,10)] for _ in range(1)
    ] * 2),
    'dept': np.tile(np.repeat(depts, 60), 2),
    'gender': np.repeat(['Female','Male'], 240)
})

fig, ax = plt.subplots(figsize=(11, 6))
sns.violinplot(data=df, x='dept', y='salary', hue='gender',
               split=True, inner=None, palette='pastel', alpha=0.6, ax=ax)
sns.stripplot(data=df, x='dept', y='salary', hue='gender',
              dodge=True, jitter=True, alpha=0.5, size=3,
              palette='dark:#333333', ax=ax, legend=False)
ax.set_title('Salary Distribution by Department & Gender', fontweight='bold')
ax.set_xlabel('Department'); ax.set_ylabel('Salary ($K)')
handles, labels = ax.get_legend_handles_labels()
ax.legend(handles[:2], labels[:2], title='Gender')
fig.tight_layout()
fig.savefig('salary_audit.png', dpi=150, bbox_inches='tight')
plt.close()
print('Saved salary_audit.png')
🏋️ Practice: Strip/Swarm Practice
Load the 'penguins' dataset. Create a 1x2 subplot: (1) swarmplot of body_mass_g by species with hue=sex, (2) stripplot of flipper_length_mm by island with box overlay. Use palette='colorblind' and whitegrid style.
Starter Code
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='whitegrid')
penguins = sns.load_dataset('penguins').dropna()

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))
# TODO: swarmplot body_mass_g by species, hue=sex
# TODO: boxplot + stripplot flipper_length_mm by island
# TODO: colorblind palette, titles, labels
fig.tight_layout()
fig.savefig('penguin_points.png', dpi=120, bbox_inches='tight')
plt.close()
print('Saved penguin_points.png')
18. Point Plot & Statistical Line Plots

Use pointplot() to display means with confidence intervals for categorical variables. Use lineplot() with hue for multi-group time series with automatic CI shading.

pointplot: means with CI by category
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='whitegrid')
tips = sns.load_dataset('tips')

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(11, 5))
sns.pointplot(data=tips, x='day', y='total_bill', ax=ax1,
              palette='Set2', capsize=0.1, errwidth=2,
              markers='o', linestyles='-')
ax1.set_title('Point Plot: Mean Bill by Day')

sns.pointplot(data=tips, x='day', y='total_bill', hue='sex',
              ax=ax2, palette='Set1', dodge=True,
              capsize=0.08, errwidth=1.5)
ax2.set_title('Point Plot: By Day and Sex')

for ax in (ax1, ax2):
    ax.set_xlabel('Day'); ax.set_ylabel('Mean Total Bill ($)')
fig.tight_layout()
fig.savefig('pointplot.png', dpi=120, bbox_inches='tight')
plt.close()
print('Saved pointplot.png')
lineplot with confidence interval shading
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='darkgrid')
np.random.seed(0)
n = 50
df = pd.DataFrame({
    't': np.tile(np.arange(n), 3),
    'value': np.concatenate([
        np.cumsum(np.random.randn(n)) + 5,
        np.cumsum(np.random.randn(n)) + 3,
        np.cumsum(np.random.randn(n)) + 7,
    ]),
    'model': np.repeat(['Model A', 'Model B', 'Model C'], n)
})

# Add repeated measurements for CI
df_rep = pd.concat([df.assign(value=df.value + np.random.randn(len(df))*0.5)
                    for _ in range(5)], ignore_index=True)

fig, ax = plt.subplots(figsize=(10, 5))
sns.lineplot(data=df_rep, x='t', y='value', hue='model',
             palette='Set2', linewidth=2, ax=ax)
ax.set_title('lineplot with 95% CI Shading')
ax.set_xlabel('Time Step'); ax.set_ylabel('Value')
fig.tight_layout()
fig.savefig('lineplot_ci.png', dpi=120, bbox_inches='tight')
plt.close()
print('Saved lineplot_ci.png')
lineplot with markers and styles
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='whitegrid')
np.random.seed(1)
months = list(range(1, 13))
df = pd.DataFrame({
    'month': months * 3,
    'revenue': (
        [100 + i*8 + np.random.randn()*5 for i in range(12)] +
        [80 + i*6 + np.random.randn()*4 for i in range(12)] +
        [60 + i*10 + np.random.randn()*6 for i in range(12)]
    ),
    'region': ['North']*12 + ['South']*12 + ['East']*12
})

fig, ax = plt.subplots(figsize=(10, 5))
sns.lineplot(data=df, x='month', y='revenue', hue='region',
             style='region', markers=True, dashes=False,
             palette='Set1', linewidth=2, ax=ax)
ax.set_title('Monthly Revenue by Region')
ax.set_xlabel('Month'); ax.set_ylabel('Revenue ($K)')
ax.legend(title='Region')
fig.tight_layout()
fig.savefig('lineplot_markers.png', dpi=120, bbox_inches='tight')
plt.close()
print('Saved lineplot_markers.png')
pointplot vs barplot comparison
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='whitegrid')
titanic = sns.load_dataset('titanic')

fig, axes = plt.subplots(1, 3, figsize=(14, 5))
# Bar: shows aggregated height
sns.barplot(data=titanic, x='class', y='survived', hue='sex',
            palette='Set2', ax=axes[0])
axes[0].set_title('barplot: Mean Survival Rate')

# Point: cleaner for comparison
sns.pointplot(data=titanic, x='class', y='survived', hue='sex',
              palette='Set1', dodge=True, capsize=0.1, ax=axes[1])
axes[1].set_title('pointplot: Same Data')

# Count the actual survivors
surv = titanic.groupby(['class','sex'])['survived'].mean().reset_index()
sns.pointplot(data=surv, x='class', y='survived', hue='sex',
              palette='Set1', dodge=True, capsize=0.1, ax=axes[2],
              markers=['o', 's'], linestyles=['-','--'])
axes[2].set_title('pointplot: Precomputed')

for ax in axes:
    ax.set_ylim(0, 1); ax.set_ylabel('Survival Rate')
fig.tight_layout()
fig.savefig('point_vs_bar.png', dpi=120, bbox_inches='tight')
plt.close()
print('Saved point_vs_bar.png')
💼 Real-World: Student Performance Tracking
Plot mean exam scores with 95% CI for 4 subjects across 3 school terms. Use pointplot with dodge for gender comparison and a lineplot showing term-over-term trend with shaded uncertainty.
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='whitegrid')
np.random.seed(42)
subjects = ['Math', 'Science', 'English', 'History']
terms = ['T1', 'T2', 'T3']
rows = []
for subj in subjects:
    base = np.random.uniform(60, 85)
    for term in terms:
        for gender in ['Female', 'Male']:
            n = 30
            offset = {'T1': 0, 'T2': 3, 'T3': 6}[term]
            g_offset = 2 if gender == 'Female' else 0
            scores = np.random.normal(base + offset + g_offset, 8, n)
            for s in scores:
                rows.append({'subject': subj, 'term': term, 'gender': gender, 'score': s})
df = pd.DataFrame(rows)

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))
sns.pointplot(data=df, x='subject', y='score', hue='gender',
              dodge=True, capsize=0.08, palette='Set1',
              markers=['o','s'], linestyles=['-','--'], ax=ax1)
ax1.set_title('Mean Score by Subject & Gender', fontweight='bold')
ax1.set_xlabel('Subject'); ax1.set_ylabel('Mean Score')

term_df = df.groupby(['term','subject']).score.mean().reset_index()
sns.lineplot(data=df, x='term', y='score', hue='subject',
             palette='tab10', linewidth=2, markers=True, ax=ax2)
ax2.set_title('Score Trend by Term', fontweight='bold')
ax2.set_xlabel('Term'); ax2.set_ylabel('Score')
ax2.legend(title='Subject', bbox_to_anchor=(1,1))
fig.tight_layout()
fig.savefig('student_performance.png', dpi=150, bbox_inches='tight')
plt.close()
print('Saved student_performance.png')
🏋️ Practice: Point/Line Plot Practice
Load the 'fmri' dataset from seaborn. Plot a lineplot of signal by timepoint, with hue=event and style=region. Add a pointplot below it showing mean signal per region. Use a 2x1 subplot with shared x-axis.
Starter Code
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='darkgrid')
fmri = sns.load_dataset('fmri')

fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(11, 8), sharex=False)
# TODO: lineplot signal by timepoint, hue=event, style=region
# TODO: pointplot mean signal by region with capsize
# TODO: titles, labels, tight_layout
fig.savefig('fmri_plots.png', dpi=120, bbox_inches='tight')
plt.close()
print('Saved fmri_plots.png')
19. ECDF & Distribution Comparison

Use ecdfplot() for empirical CDFs, histplot() with multiple groups, and kdeplot() to compare distributions across categories without assuming a parametric form.

ECDF plot for group comparison
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='whitegrid')
np.random.seed(0)
df = pd.DataFrame({
    'value': np.concatenate([
        np.random.normal(5, 1.5, 200),
        np.random.normal(7, 1.0, 200),
        np.random.exponential(2, 200) + 3,
    ]),
    'group': np.repeat(['Normal(5,1.5)', 'Normal(7,1)', 'Exp+3'], 200)
})

fig, ax = plt.subplots(figsize=(9, 5))
sns.ecdfplot(data=df, x='value', hue='group', palette='Set2', linewidth=2)
ax.axhline(0.5, color='gray', linestyle='--', linewidth=1, label='Median')
ax.set_title('ECDF: Distribution Comparison')
ax.set_xlabel('Value'); ax.set_ylabel('Cumulative Proportion')
ax.legend(title='Group')
fig.tight_layout()
fig.savefig('ecdf.png', dpi=120, bbox_inches='tight')
plt.close()
print('Saved ecdf.png')
Overlapping KDE comparison
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='whitegrid')
iris = sns.load_dataset('iris')

fig, axes = plt.subplots(1, 2, figsize=(12, 5))
# Separate feature distributions per species
for feat, ax in zip(['sepal_length', 'petal_length'], axes):
    sns.kdeplot(data=iris, x=feat, hue='species',
                fill=True, alpha=0.35, linewidth=2, palette='Set2', ax=ax)
    ax.set_title(f'KDE: {feat.replace("_"," ").title()}')
    ax.set_xlabel(feat.replace('_',' ').title())
fig.tight_layout()
fig.savefig('kde_compare.png', dpi=120, bbox_inches='tight')
plt.close()
print('Saved kde_compare.png')
histplot with stat='density' and kde overlay
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='whitegrid')
np.random.seed(1)
df = pd.DataFrame({
    'response_ms': np.concatenate([
        np.random.lognormal(5, 0.5, 300),
        np.random.lognormal(5.5, 0.4, 200),
    ]),
    'endpoint': np.repeat(['/api/search', '/api/checkout'], [300, 200])
})

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))
sns.histplot(data=df, x='response_ms', hue='endpoint',
             stat='density', kde=True, alpha=0.4, palette='Set1',
             common_norm=False, ax=ax1)
ax1.set_title('Histogram with KDE β€” Linear scale')
ax1.set_xlabel('Response Time (ms)')

sns.histplot(data=df, x='response_ms', hue='endpoint',
             stat='density', kde=True, alpha=0.4, palette='Set1',
             common_norm=False, log_scale=True, ax=ax2)
ax2.set_title('Histogram with KDE β€” Log scale')
ax2.set_xlabel('Response Time (ms)')
fig.tight_layout()
fig.savefig('hist_kde_compare.png', dpi=120, bbox_inches='tight')
plt.close()
print('Saved hist_kde_compare.png')
ECDF with percentile markers
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='whitegrid')
np.random.seed(2)
data = pd.DataFrame({
    'latency_ms': np.random.lognormal(5, 0.7, 1000),
    'server': np.random.choice(['US-East', 'EU-West', 'AP-South'], 1000)
})

fig, ax = plt.subplots(figsize=(9, 5))
sns.ecdfplot(data=data, x='latency_ms', hue='server',
             palette='tab10', linewidth=2, ax=ax)

# Mark p50, p95, p99
for pct, label, color in [(50,'p50','gray'),(95,'p95','orange'),(99,'p99','red')]:
    val = np.percentile(data['latency_ms'], pct)
    ax.axvline(val, color=color, linestyle='--', linewidth=1.2, alpha=0.8)
    ax.axhline(pct/100, color=color, linestyle=':', linewidth=0.8, alpha=0.5)
    ax.text(val+50, pct/100-0.04, f'{label}: {val:.0f}ms',
            fontsize=8, color=color)

ax.set_title('API Latency ECDF by Server Region')
ax.set_xlabel('Latency (ms)'); ax.set_ylabel('Cumulative Proportion')
fig.tight_layout()
fig.savefig('ecdf_percentiles.png', dpi=120, bbox_inches='tight')
plt.close()
print('Saved ecdf_percentiles.png')
💼 Real-World: A/B Test Distribution Analysis
Compare three e-commerce funnel variants: show ECDF of conversion values, overlapping KDE of order sizes, and a combined histplot with p50/p90 markers for each variant.
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='whitegrid')
np.random.seed(99)
variants = {'Control': (45, 12), 'Variant A': (52, 14), 'Variant B': (48, 10)}
dfs = [pd.DataFrame({'order_value': np.random.normal(mu, sd, 300).clip(5), 'variant': name})
       for name, (mu, sd) in variants.items()]
df = pd.concat(dfs, ignore_index=True)

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(13, 5))
sns.ecdfplot(data=df, x='order_value', hue='variant',
             palette='Set1', linewidth=2, ax=ax1)
for pct, ls in [(50,'--'),(90,':')]:
    ax1.axhline(pct/100, color='gray', linestyle=ls, linewidth=1)
    ax1.text(ax1.get_xlim()[0]+1, pct/100+0.01, f'p{pct}', fontsize=8, color='gray')
ax1.set_title('ECDF of Order Value by Variant', fontweight='bold')
ax1.set_xlabel('Order Value ($)')

sns.kdeplot(data=df, x='order_value', hue='variant',
            fill=True, alpha=0.3, linewidth=2, palette='Set1',
            common_norm=False, ax=ax2)
ax2.set_title('KDE: Order Value Distribution', fontweight='bold')
ax2.set_xlabel('Order Value ($)')
fig.tight_layout()
fig.savefig('ab_distribution.png', dpi=150, bbox_inches='tight')
plt.close()
print('Saved ab_distribution.png')
🏋️ Practice: Distribution Comparison Practice
Load the 'titanic' dataset. Create a 1x3 figure: (1) ecdfplot of 'fare' by 'class', (2) kdeplot of 'age' by 'survived' (use hue), (3) histplot of 'fare' with log_scale=True, hue='class'. Drop NaN rows first.
Starter Code
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='whitegrid')
titanic = sns.load_dataset('titanic').dropna(subset=['age','fare'])

fig, axes = plt.subplots(1, 3, figsize=(15, 5))
# TODO: ecdfplot fare by class
# TODO: kdeplot age by survived
# TODO: histplot fare log scale by class
fig.tight_layout()
fig.savefig('titanic_dist.png', dpi=120, bbox_inches='tight')
plt.close()
print('Saved titanic_dist.png')
20. Cluster Map

Use clustermap() to apply hierarchical clustering to rows and columns of a matrix, revealing natural groupings in correlation matrices, gene expression, or feature similarity data.

Basic clustermap on iris correlation
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='white')
iris = sns.load_dataset('iris')
corr = iris.drop('species', axis=1).corr()

g = sns.clustermap(corr, cmap='RdBu_r', vmin=-1, vmax=1,
                   annot=True, fmt='.2f', figsize=(7, 7),
                   linewidths=0.5)
g.ax_heatmap.set_title('Iris Feature Correlation Clustermap', pad=50)
plt.savefig('clustermap_iris.png', dpi=120, bbox_inches='tight')
plt.close()
print('Saved clustermap_iris.png')
Clustermap with row/col color bars
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='white')
np.random.seed(0)
n_genes, n_samples = 20, 12
groups = ['A','A','A','A','B','B','B','B','C','C','C','C']
data = pd.DataFrame(
    np.random.randn(n_genes, n_samples) +
    np.array([np.sin(np.linspace(0,3,n_samples))*(i%3-1) for i in range(n_genes)]),
    index=[f'Gene_{i:02d}' for i in range(n_genes)],
    columns=[f'S{i+1:02d}' for i in range(n_samples)]
)
palette = {'A':'#4c72b0', 'B':'#dd8452', 'C':'#55a868'}
col_colors = pd.Series(groups, index=data.columns).map(palette)

g = sns.clustermap(data, cmap='vlag', figsize=(10, 8),
                   col_colors=col_colors,
                   row_cluster=True, col_cluster=True,
                   z_score=0, linewidths=0.3)
g.ax_heatmap.set_title('Gene Expression Clustermap', pad=50)
plt.savefig('clustermap_genes.png', dpi=120, bbox_inches='tight')
plt.close()
print('Saved clustermap_genes.png')
Clustermap with standard_scale
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='white')
np.random.seed(1)
features = ['Revenue', 'Growth', 'Margin', 'CAC', 'LTV', 'Churn']
companies = [f'Co{i:02d}' for i in range(1, 16)]
data = pd.DataFrame(
    np.random.randn(len(companies), len(features)),
    index=companies, columns=features
)
# Add cluster structure
data.iloc[:5, :3] += 2
data.iloc[5:10, 3:] -= 2
data.iloc[10:, [0,2,4]] += 1.5

g = sns.clustermap(data, cmap='coolwarm', figsize=(8, 10),
                   standard_scale=1,  # standardize each column
                   linewidths=0.4, annot=False,
                   dendrogram_ratio=(0.15, 0.2))
g.ax_heatmap.set_title('Company KPI Clustermap
(column-standardized)', pad=50)
plt.savefig('clustermap_kpi.png', dpi=120, bbox_inches='tight')
plt.close()
print('Saved clustermap_kpi.png')
Clustermap method and metric options
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='white')
np.random.seed(2)
data = pd.DataFrame(
    np.random.randn(15, 10),
    index=[f'R{i}' for i in range(15)],
    columns=[f'C{j}' for j in range(10)]
)
data.iloc[:5, :4] += 2.5
data.iloc[10:, 6:] += 2.5

fig, axes_list = plt.subplots(1, 2, figsize=(14, 6))
for ax, (method, metric) in zip(axes_list, [('ward','euclidean'),('average','correlation')]):
    g = sns.clustermap(data, cmap='RdYlBu_r', figsize=(6, 5),
                       method=method, metric=metric,
                       linewidths=0.3)
    g.fig.suptitle(f'method={method}, metric={metric}', y=1.01, fontsize=10)
    g.fig.savefig(f'clustermap_{method}.png', dpi=100, bbox_inches='tight')
    plt.close(g.fig)
print('Saved clustermap_ward.png and clustermap_average.png')
💼 Real-World: Customer Segment Heatmap
Build a clustermap of customer segments vs. behavioral features (purchase frequency, avg order, recency, CLV, support tickets). Use z_score=0, vlag colormap, and annotate row colors by segment.
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='white')
np.random.seed(55)
segments = ['Loyal','At-Risk','New','Churned','VIP']
features = ['Frequency','Avg Order','Recency','CLV','Tickets']
n_each = 15
rows = []
centroids = {
    'Loyal':   [8, 120, 5, 960, 1],
    'At-Risk': [2, 80, 45, 160, 4],
    'New':     [1, 95, 15, 95, 0],
    'Churned': [0, 60, 120, 0, 6],
    'VIP':     [12, 400, 3, 4800, 2],
}
for seg, center in centroids.items():
    noise = np.random.randn(n_each, len(features)) * np.array([1, 20, 10, 200, 1])
    block = np.array(center) + noise
    df_seg = pd.DataFrame(block, columns=features)
    df_seg['segment'] = seg
    rows.append(df_seg)
df = pd.concat(rows, ignore_index=True)
matrix = df[features].values
row_labels = df['segment']
palette_seg = {'Loyal':'#55a868','At-Risk':'#dd8452','New':'#4c72b0',
               'Churned':'#c44e52','VIP':'#9467bd'}
row_colors = row_labels.map(palette_seg)

data_df = pd.DataFrame(matrix, columns=features,
                        index=[f'{seg}_{i}' for seg, n in [(s, n_each) for s in segments]
                               for i in range(n_each)])
g = sns.clustermap(pd.DataFrame(matrix, columns=features),
                   cmap='vlag', z_score=0, figsize=(9, 11),
                   row_colors=row_colors.values,
                   linewidths=0.2, dendrogram_ratio=(0.2, 0.15))
g.ax_heatmap.set_title('Customer Segment Behavioral Clustermap', pad=50, fontsize=12)
plt.savefig('customer_clustermap.png', dpi=150, bbox_inches='tight')
plt.close()
print('Saved customer_clustermap.png')
🏋️ Practice: Clustermap Practice
Generate a 12x8 random matrix with 3 obvious block clusters (add +2 to block (0:4, 0:3), +(-2) to block (4:8, 3:6)). Plot a clustermap with method='ward', cmap='RdBu_r', and z_score=0. Annotate the values with fmt='.1f'.
Starter Code
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='white')
np.random.seed(3)
data = np.random.randn(12, 8)
data[:4, :3] += 2.5
data[4:8, 3:6] -= 2.5
data[8:, 5:] += 1.5
df = pd.DataFrame(data,
                  index=[f'R{i}' for i in range(12)],
                  columns=[f'C{j}' for j in range(8)])
# TODO: clustermap method='ward', cmap='RdBu_r', z_score=0, annot=True
# TODO: save 'block_clustermap.png'
21. Joint Plot Advanced

Use jointplot() to show the bivariate relationship alongside marginal univariate distributions. Explore kind='hex', 'kde', 'reg', and custom marginal plots with JointGrid.

jointplot: hex, kde, reg, scatter kinds
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='whitegrid')
tips = sns.load_dataset('tips')

kinds = ['scatter', 'hex', 'kde', 'reg']
for kind in kinds:
    g = sns.jointplot(data=tips, x='total_bill', y='tip',
                      kind=kind, palette='Set2', height=5,
                      marginal_kws=dict(bins=25))
    g.fig.suptitle(f"kind='{kind}'", y=1.02)
    g.fig.savefig(f'joint_{kind}.png', dpi=100, bbox_inches='tight')
    plt.close(g.fig)
print('Saved joint_scatter/hex/kde/reg.png')
jointplot with hue
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='whitegrid')
iris = sns.load_dataset('iris')

g = sns.jointplot(data=iris, x='sepal_length', y='petal_length',
                  hue='species', palette='Set2',
                  height=6, marginal_kws=dict(fill=True, alpha=0.4))
g.fig.suptitle('Iris: Sepal vs Petal Length (hue=species)', y=1.02)
g.fig.savefig('joint_hue.png', dpi=120, bbox_inches='tight')
plt.close(g.fig)
print('Saved joint_hue.png')
JointGrid: custom marginals
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='whitegrid')
np.random.seed(42)
df = pd.DataFrame({
    'x': np.random.lognormal(2, 0.5, 300),
    'y': np.random.lognormal(1.5, 0.6, 300)
})

g = sns.JointGrid(data=df, x='x', y='y', height=6)
g.plot_joint(sns.scatterplot, alpha=0.4, color='steelblue', s=25)
g.plot_joint(sns.kdeplot, levels=5, color='navy', linewidths=1.0)
g.plot_marginals(sns.histplot, kde=True, bins=25, color='steelblue', alpha=0.6)

g.ax_joint.set_xlabel('X (lognormal)'); g.ax_joint.set_ylabel('Y (lognormal)')
g.fig.suptitle('JointGrid: Scatter + KDE + Histogram Marginals', y=1.01)
g.fig.savefig('joint_grid_custom.png', dpi=120, bbox_inches='tight')
plt.close(g.fig)
print('Saved joint_grid_custom.png')
jointplot with regression and stats
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='whitegrid')
np.random.seed(5)
n = 150
x = np.random.uniform(10, 100, n)
y = 0.5*x + np.random.randn(n)*8 + 5

df = pd.DataFrame({'x': x, 'y': y})

g = sns.jointplot(data=df, x='x', y='y',
                  kind='reg', height=6,
                  scatter_kws=dict(alpha=0.5, s=30, color='steelblue'),
                  line_kws=dict(color='red', linewidth=2),
                  marginal_kws=dict(bins=20, kde=True))
# Compute and annotate correlation
r = df.corr().iloc[0,1]
g.ax_joint.text(0.05, 0.92, f'r = {r:.3f}', transform=g.ax_joint.transAxes,
                fontsize=11, fontweight='bold', color='red')
g.fig.suptitle('Joint Regression Plot with Correlation', y=1.02)
g.fig.savefig('joint_reg.png', dpi=120, bbox_inches='tight')
plt.close(g.fig)
print('Saved joint_reg.png')
💼 Real-World: Height-Weight Bivariate Analysis
Show the joint distribution of height and weight across gender using JointGrid: central scatter colored by gender, marginal KDEs filled by gender, and a correlation annotation in the joint panel.
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='whitegrid')
np.random.seed(77)
df = pd.DataFrame({
    'height_cm': np.concatenate([
        np.random.normal(165, 7, 200),
        np.random.normal(178, 8, 200)]),
    'weight_kg': np.concatenate([
        np.random.normal(62, 9, 200),
        np.random.normal(78, 11, 200)]),
    'gender': np.repeat(['Female','Male'], 200)
})

g = sns.JointGrid(data=df, x='height_cm', y='weight_kg', height=7)
palette = {'Female':'#dd8452','Male':'#4c72b0'}
for gender, grp in df.groupby('gender'):
    g.ax_joint.scatter(grp.height_cm, grp.weight_kg,
                       alpha=0.4, s=20, color=palette[gender], label=gender)
    sns.kdeplot(data=grp, x='height_cm', ax=g.ax_marg_x,
                fill=True, alpha=0.4, color=palette[gender], linewidth=1.5)
    sns.kdeplot(data=grp, y='weight_kg', ax=g.ax_marg_y,
                fill=True, alpha=0.4, color=palette[gender], linewidth=1.5)

g.ax_joint.legend(title='Gender')
r = df[['height_cm','weight_kg']].corr().iloc[0,1]
g.ax_joint.text(0.05,0.92,f'r = {r:.3f}',transform=g.ax_joint.transAxes,
                fontsize=11,fontweight='bold',color='darkred')
g.ax_joint.set_xlabel('Height (cm)'); g.ax_joint.set_ylabel('Weight (kg)')
g.fig.suptitle('Height-Weight Joint Distribution by Gender', y=1.02, fontweight='bold')
g.fig.savefig('height_weight_joint.png', dpi=150, bbox_inches='tight')
plt.close(g.fig)
print('Saved height_weight_joint.png')
🏋️ Practice: Joint Plot Practice
Load the 'penguins' dataset. Create a JointGrid for bill_length_mm vs bill_depth_mm: (1) scatter in the joint with hue=species and s=30, (2) boxplot in the marginals (ax_marg_x and ax_marg_y). Drop NaN rows first.
Starter Code
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='whitegrid')
penguins = sns.load_dataset('penguins').dropna()

g = sns.JointGrid(data=penguins, x='bill_length_mm', y='bill_depth_mm', height=6)
# TODO: scatterplot joint with hue=species, s=30
# TODO: boxplot marginals for x and y
# TODO: legend, labels, title
# TODO: save 'penguin_joint.png'
22. catplot β€” Figure-Level Categorical

Use catplot() as a unified interface for all categorical plots. Control kind='strip'|'swarm'|'box'|'violin'|'bar'|'count'|'point' and use col/row to create FacetGrid-powered small multiples.

catplot with col splitting
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='whitegrid')
tips = sns.load_dataset('tips')

g = sns.catplot(data=tips, x='day', y='total_bill',
                hue='sex', col='time',
                kind='box', palette='Set2',
                height=5, aspect=0.85,
                order=['Thur','Fri','Sat','Sun'])
g.set_axis_labels('Day', 'Total Bill ($)')
g.set_titles('{col_name}')
g.fig.suptitle('Bill by Day, Sex, and Time (catplot col)', y=1.02, fontweight='bold')
g.fig.savefig('catplot_col.png', dpi=120, bbox_inches='tight')
plt.close(g.fig)
print('Saved catplot_col.png')
catplot kind='violin' with row and col
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='whitegrid')
titanic = sns.load_dataset('titanic')

g = sns.catplot(data=titanic, x='class', y='age',
                col='survived', hue='sex',
                kind='violin', split=True, inner='quartile',
                palette='pastel', height=5, aspect=0.9)
g.set_axis_labels('Class', 'Age')
g.set_titles(col_template='Survived: {col_name}')
g.fig.suptitle('Titanic Age by Class, Gender & Survival', y=1.02, fontweight='bold')
g.fig.savefig('catplot_violin.png', dpi=120, bbox_inches='tight')
plt.close(g.fig)
print('Saved catplot_violin.png')
catplot kind='count' and 'bar'
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='whitegrid')
tips = sns.load_dataset('tips')

fig, axes = plt.subplots(1, 2, figsize=(13, 5))
# Count plot via catplot saved to file; replicate on axes
sns.countplot(data=tips, x='day', hue='sex', palette='Set1', ax=axes[0])
axes[0].set_title("countplot: Visits by Day")

sns.barplot(data=tips, x='day', y='tip', hue='sex',
            palette='Set2', capsize=0.08, ax=axes[1])
axes[1].set_title("barplot: Mean Tip by Day")
for ax in axes:
    ax.set_xlabel('Day')
fig.tight_layout()
fig.savefig('catplot_count_bar.png', dpi=120, bbox_inches='tight')
plt.close()
print('Saved catplot_count_bar.png')
catplot kind='point' with order control
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='whitegrid')
titanic = sns.load_dataset('titanic')

g = sns.catplot(data=titanic, x='class', y='survived',
                hue='sex', col='embarked',
                kind='point', dodge=True, capsize=0.1,
                markers=['o','s'], linestyles=['-','--'],
                palette='Set1', height=4, aspect=0.85,
                order=['First','Second','Third'])
g.set_axis_labels('Class', 'Survival Rate')
g.set_titles(col_template='Embarked: {col_name}')
g.fig.suptitle('Survival Rate by Class, Sex & Port', y=1.02, fontweight='bold')
g.fig.savefig('catplot_point.png', dpi=120, bbox_inches='tight')
plt.close(g.fig)
print('Saved catplot_point.png')
💼 Real-World: Product Feedback Dashboard
Use catplot to compare NPS scores across 4 product categories and 3 user segments. Show kind='box' with col=category, hue=segment, and a separate catplot kind='point' for mean scores.
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='whitegrid')
np.random.seed(42)
categories = ['Mobile App','Web Platform','API','Support']
segments = ['Enterprise','SMB','Startup']
rows = []
bases = {'Mobile App': 7.0,'Web Platform': 6.5,'API': 8.0,'Support': 5.5}
seg_offsets = {'Enterprise': 0.5,'SMB': 0.0,'Startup': -0.3}
for cat in categories:
    for seg in segments:
        mu = bases[cat] + seg_offsets[seg]
        scores = np.random.normal(mu, 1.5, 40).clip(0, 10)
        for s in scores:
            rows.append({'category': cat,'segment': seg,'nps': s})
df = pd.DataFrame(rows)

g = sns.catplot(data=df, x='segment', y='nps', col='category',
                kind='box', hue='segment', palette='Set2',
                height=4, aspect=0.85, col_wrap=2,
                order=segments, legend=False)
g.set_axis_labels('Segment', 'NPS Score')
g.set_titles('{col_name}')
g.fig.suptitle('Product NPS by Category & Segment', y=1.02, fontweight='bold')
g.fig.savefig('nps_catplot.png', dpi=150, bbox_inches='tight')
plt.close(g.fig)
print('Saved nps_catplot.png')
🏋️ Practice: catplot Practice
Load the 'exercise' dataset from seaborn. Use catplot with x='time', y='pulse', hue='kind', col='diet', kind='point'. Then create a second catplot with kind='box'. Add appropriate titles and labels.
Starter Code
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='whitegrid')
exercise = sns.load_dataset('exercise')

# TODO: catplot pointplot: x=time, y=pulse, hue=kind, col=diet
# TODO: catplot boxplot: same variables
# TODO: save 'exercise_catplot.png'
23. displot β€” Figure-Level Distributions

Use displot() as the figure-level interface for histplot, kdeplot, and ecdfplot. Add col/row faceting for small-multiple distribution comparisons.

displot: hist, kde, ecdf kinds
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='whitegrid')
np.random.seed(0)
df = pd.DataFrame({
    'value': np.concatenate([np.random.normal(0,1,300), np.random.normal(4,1.5,300)]),
    'group': np.repeat(['A','B'], 300)
})

for kind in ['hist','kde','ecdf']:
    g = sns.displot(data=df, x='value', hue='group',
                    kind=kind, height=4, aspect=1.5, palette='Set2',
                    fill=(kind != 'ecdf'), alpha=0.5, linewidth=2)
    g.fig.suptitle(f"displot kind='{kind}'", y=1.02)
    g.fig.savefig(f'displot_{kind}.png', dpi=100, bbox_inches='tight')
    plt.close(g.fig)
print('Saved displot_hist/kde/ecdf.png')
displot with col faceting
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='whitegrid')
titanic = sns.load_dataset('titanic').dropna(subset=['age'])

g = sns.displot(data=titanic, x='age', col='class',
                hue='survived', kind='hist', stat='density',
                kde=True, alpha=0.5, palette='Set1',
                height=4, aspect=0.85, col_order=['First','Second','Third'])
g.set_axis_labels('Age', 'Density')
g.set_titles(col_template='{col_name} Class')
g.fig.suptitle('Age Distribution by Class and Survival', y=1.02, fontweight='bold')
g.fig.savefig('displot_col.png', dpi=120, bbox_inches='tight')
plt.close(g.fig)
print('Saved displot_col.png')
displot with row and col
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='whitegrid')
np.random.seed(1)
regions = ['North','South']
periods = ['Q1','Q2','Q3','Q4']
rows = []
for region in regions:
    base = 50 if region == 'North' else 35
    for period in periods:
        offset = periods.index(period) * 5
        vals = np.random.normal(base + offset, 12, 100)
        for v in vals:
            rows.append({'region': region, 'period': period, 'sales': v})
df = pd.DataFrame(rows)

g = sns.displot(data=df, x='sales', row='region', col='period',
                kind='kde', fill=True, alpha=0.5, palette='Set2',
                height=3, aspect=1.1, col_order=periods)
g.set_axis_labels('Sales ($K)', 'Density')
g.fig.suptitle('Sales Distribution by Region & Quarter', y=1.02, fontweight='bold')
g.fig.savefig('displot_grid.png', dpi=120, bbox_inches='tight')
plt.close(g.fig)
print('Saved displot_grid.png')
displot with rug and binwidth
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='whitegrid')
np.random.seed(2)
df = pd.DataFrame({
    'latency': np.concatenate([
        np.random.lognormal(3, 0.4, 500),
        np.random.lognormal(4, 0.5, 200),
    ]),
    'endpoint': np.repeat(['/fast','/slow'], [500,200])
})

g = sns.displot(data=df, x='latency', hue='endpoint',
                kind='hist', stat='density', kde=True,
                rug=True, rug_kws=dict(height=0.05, alpha=0.3),
                log_scale=True, alpha=0.4, palette='Set1',
                height=5, aspect=1.6, binwidth=0.05)
g.set_axis_labels('Latency (ms, log scale)', 'Density')
g.fig.suptitle('Endpoint Latency Distribution with Rug', y=1.02, fontweight='bold')
g.fig.savefig('displot_rug.png', dpi=120, bbox_inches='tight')
plt.close(g.fig)
print('Saved displot_rug.png')
💼 Real-World: Multi-Experiment Distribution Report
Use displot to compare test metric distributions across 3 ML experiments and 2 datasets (col=dataset, hue=experiment). Show kind='kde' with fill=True. Then show kind='ecdf' for cumulative comparison.
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='whitegrid')
np.random.seed(88)
experiments = ['Baseline','DropoutReg','AugData']
datasets = ['CIFAR-10','ImageNet']
rows = []
means = {'Baseline': (72,80), 'DropoutReg': (75,83), 'AugData': (77,85)}
for exp, (m1, m2) in means.items():
    for ds, mu in zip(datasets, [m1, m2]):
        vals = np.random.normal(mu, 3, 150)
        for v in vals:
            rows.append({'experiment': exp, 'dataset': ds, 'accuracy': v})
df = pd.DataFrame(rows)

g = sns.displot(data=df, x='accuracy', hue='experiment', col='dataset',
                kind='kde', fill=True, alpha=0.4, linewidth=2,
                palette='Set2', height=4, aspect=1.2, common_norm=False)
g.set_axis_labels('Accuracy (%)', 'Density')
g.set_titles(col_template='{col_name}')
g.fig.suptitle('Model Accuracy Distribution by Experiment & Dataset', y=1.02, fontweight='bold')
g.fig.savefig('experiment_displot.png', dpi=150, bbox_inches='tight')
plt.close(g.fig)
print('Saved experiment_displot.png')
🏋️ Practice: displot Practice
Load the 'penguins' dataset. Use displot to create: (1) hist kind with col=species, x=body_mass_g, hue=sex β€” set kde=True, (2) ecdf kind with same grouping on a new figure. Save both as separate PNGs.
Starter Code
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='whitegrid')
penguins = sns.load_dataset('penguins').dropna()

# TODO: displot hist kind, col=species, x=body_mass_g, hue=sex, kde=True
# TODO: save 'penguin_hist.png'
# TODO: displot ecdf kind, same grouping
# TODO: save 'penguin_ecdf.png'
24. relplot β€” Figure-Level Relational

Use relplot() as the figure-level interface for scatterplot and lineplot. Use col, row, and hue to create multi-panel relational grids with consistent scaling.

relplot scatter with col and hue
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='whitegrid')
tips = sns.load_dataset('tips')

g = sns.relplot(data=tips, x='total_bill', y='tip',
                hue='sex', col='time', style='sex',
                palette='Set1', s=60, alpha=0.7,
                height=4, aspect=1.0)
g.set_axis_labels('Total Bill ($)', 'Tip ($)')
g.set_titles(col_template='{col_name}')
g.fig.suptitle('Tips: Scatter by Time and Sex (relplot)', y=1.02, fontweight='bold')
g.fig.savefig('relplot_scatter.png', dpi=120, bbox_inches='tight')
plt.close(g.fig)
print('Saved relplot_scatter.png')
relplot lineplot with col_wrap
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='darkgrid')
np.random.seed(0)
subjects = [f'Subject_{i:02d}' for i in range(1, 7)]
df_list = []
for subj in subjects:
    t = np.arange(20)
    y = np.sin(t*0.3 + np.random.uniform(0,3)) + np.random.randn(20)*0.2
    df_list.append(pd.DataFrame({'time':t,'value':y,'subject':subj}))
df = pd.concat(df_list, ignore_index=True)

g = sns.relplot(data=df, x='time', y='value', col='subject',
                kind='line', col_wrap=3, palette='tab10',
                height=3, aspect=1.3, linewidth=2)
g.set_axis_labels('Time', 'Signal')
g.set_titles(col_template='{col_name}')
g.fig.suptitle('Subject Time Series (relplot col_wrap=3)', y=1.02, fontweight='bold')
g.fig.savefig('relplot_line.png', dpi=120, bbox_inches='tight')
plt.close(g.fig)
print('Saved relplot_line.png')
relplot with size and style encoding
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='whitegrid')
np.random.seed(1)
n = 200
df = pd.DataFrame({
    'x': np.random.randn(n),
    'y': np.random.randn(n),
    'size_var': np.random.uniform(50, 300, n),
    'category': np.random.choice(['A','B','C'], n),
    'quality': np.random.choice(['High','Low'], n)
})

g = sns.relplot(data=df, x='x', y='y',
                hue='category', size='size_var', style='quality',
                palette='Set2', sizes=(20, 300), alpha=0.7,
                height=5, aspect=1.2)
g.ax.set_title('Multi-Encoding Scatter: hue + size + style', fontweight='bold')
g.fig.savefig('relplot_multi.png', dpi=120, bbox_inches='tight')
plt.close(g.fig)
print('Saved relplot_multi.png')
relplot row and col grid
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='whitegrid')
np.random.seed(2)
conditions = ['Fast','Slow']
groups = ['Treatment','Control']
rows = []
for cond in conditions:
    for grp in groups:
        mu = (4 if grp=='Treatment' else 2) + (1 if cond=='Fast' else 0)
        t = np.arange(30)
        y = mu + np.cumsum(np.random.randn(30)*0.3) + np.sin(t*0.2)
        df_part = pd.DataFrame({'time':t,'signal':y,
                                 'condition':cond,'group':grp})
        rows.append(df_part)
df = pd.concat(rows, ignore_index=True)

g = sns.relplot(data=df, x='time', y='signal',
                row='group', col='condition',
                kind='line', palette='Set1', hue='group',
                height=3.5, aspect=1.2, linewidth=2)
g.set_axis_labels('Time', 'Signal')
g.set_titles(row_template='{row_name}', col_template='{col_name}')
g.fig.suptitle('Signal by Group Γ— Condition (relplot grid)', y=1.02, fontweight='bold')
g.fig.savefig('relplot_grid.png', dpi=120, bbox_inches='tight')
plt.close(g.fig)
print('Saved relplot_grid.png')
💼 Real-World: Marketing Campaign Performance
Plot click-through rate vs spend for 4 campaign types across 3 channels (col=channel). Use relplot kind='scatter', size=impressions (scaled), hue=campaign_type. Add a regression line for each panel.
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='whitegrid')
np.random.seed(66)
channels = ['Search','Social','Display']
campaigns = ['Brand','Retargeting','Prospecting','Seasonal']
rows = []
for ch in channels:
    for camp in campaigns:
        n = 40
        spend = np.random.uniform(500, 5000, n)
        ctr   = 0.02 + spend/100000 + np.random.randn(n)*0.005
        ctr   = np.clip(ctr, 0.001, 0.15)
        impr  = spend * np.random.uniform(100, 500, n)
        for s, c, im in zip(spend, ctr, impr):
            rows.append({'channel':ch,'campaign':camp,'spend':s,'ctr':c,'impressions':im})
df = pd.DataFrame(rows)

g = sns.relplot(data=df, x='spend', y='ctr',
                col='channel', hue='campaign', style='campaign',
                size='impressions', sizes=(20, 200),
                palette='tab10', alpha=0.7, height=4, aspect=1.1)
# Overlay regression line for each axis
for ax in g.axes.flat:
    if ax is not None:
        ch_data = df[df.channel == ax.get_title().strip()]
        if len(ch_data):
            m, b = np.polyfit(ch_data.spend, ch_data.ctr, 1)
            x_line = np.linspace(ch_data.spend.min(), ch_data.spend.max(), 50)
            ax.plot(x_line, m*x_line+b, 'r--', linewidth=1.5, alpha=0.8)
g.set_axis_labels('Spend ($)', 'CTR')
g.set_titles(col_template='{col_name}')
g.fig.suptitle('Campaign CTR vs Spend by Channel', y=1.02, fontweight='bold')
g.fig.savefig('campaign_relplot.png', dpi=150, bbox_inches='tight')
plt.close(g.fig)
print('Saved campaign_relplot.png')
🏋️ Practice: relplot Practice
Load the 'fmri' dataset. Use relplot with kind='line', x='timepoint', y='signal', hue='event', col='region'. Then create a second relplot kind='scatter' with the same groupings and size=0.5. Save both.
Starter Code
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='darkgrid')
fmri = sns.load_dataset('fmri')

# TODO: relplot line, x=timepoint, y=signal, hue=event, col=region
# TODO: save 'fmri_line.png'
# TODO: relplot scatter, same grouping
# TODO: save 'fmri_scatter.png'
25. Heatmap Advanced

Go beyond basic heatmaps: annotate with custom formats, apply diverging colormaps for signed values, mask upper triangles, and combine with matplotlib for multi-panel layouts.

Masked upper-triangle correlation heatmap
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='white')
np.random.seed(0)
n = 8
labels = [f'Var_{i}' for i in range(1, n+1)]
data = np.random.randn(200, n)
for i in range(n):
    for j in range(i):
        data[:, j] += data[:, i] * np.random.uniform(-0.5, 0.8)
corr = np.corrcoef(data.T)
df_corr = pd.DataFrame(corr, index=labels, columns=labels)

mask = np.triu(np.ones_like(corr, dtype=bool), k=1)  # mask upper

fig, ax = plt.subplots(figsize=(8, 7))
sns.heatmap(df_corr, mask=mask, cmap='RdBu_r', vmin=-1, vmax=1,
            annot=True, fmt='.2f', linewidths=0.5,
            ax=ax, square=True, cbar_kws={'label': 'Pearson r'})
ax.set_title('Lower-Triangle Correlation Matrix', fontweight='bold')
fig.tight_layout()
fig.savefig('corr_heatmap.png', dpi=120, bbox_inches='tight')
plt.close()
print('Saved corr_heatmap.png')
Heatmap with custom fmt and colorbar
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='white')
np.random.seed(1)
days = ['Mon','Tue','Wed','Thu','Fri','Sat','Sun']
hours = [f'{h:02d}:00' for h in range(7, 23)]
traffic = np.random.poisson(50, (len(hours), len(days)))
traffic[:, 5:] = np.random.poisson(80, (len(hours), 2))
traffic[8:12, :] += 30
traffic[17:20, :] += 40

df = pd.DataFrame(traffic, index=hours, columns=days)
fig, ax = plt.subplots(figsize=(10, 7))
sns.heatmap(df, cmap='YlOrRd', annot=True, fmt='d',
            linewidths=0.3, ax=ax,
            cbar_kws={'label': 'Page Views', 'shrink': 0.8})
ax.set_title('Website Traffic by Hour & Day', fontweight='bold')
ax.set_xlabel('Day of Week'); ax.set_ylabel('Hour')
fig.tight_layout()
fig.savefig('traffic_heatmap.png', dpi=120, bbox_inches='tight')
plt.close()
print('Saved traffic_heatmap.png')
Heatmap with custom annotation text
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='white')
np.random.seed(2)
models = ['LR','RF','XGB','SVM','NN']
metrics = ['Accuracy','Precision','Recall','F1','AUC']
values = np.random.uniform(0.70, 0.98, (len(models), len(metrics)))
df = pd.DataFrame(values, index=models, columns=metrics)

# Custom annotations: highlight best per metric
annot = df.copy().applymap(lambda v: f'{v:.3f}')
best_rows = df.idxmax(axis=0)
highlights = pd.DataFrame('', index=df.index, columns=df.columns)
for metric in metrics:
    highlights.loc[best_rows[metric], metric] = f'β˜…{df.loc[best_rows[metric],metric]:.3f}'

fig, ax = plt.subplots(figsize=(9, 5))
sns.heatmap(df, cmap='Blues', vmin=0.65, vmax=1.0, ax=ax,
            annot=df.round(3), fmt='', linewidths=0.5)
ax.set_title('Model Comparison Heatmap (β˜… = best per metric)', fontweight='bold')
fig.tight_layout()
fig.savefig('model_heatmap.png', dpi=120, bbox_inches='tight')
plt.close()
print('Saved model_heatmap.png')
Diverging heatmap for signed changes
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='white')
np.random.seed(3)
products = [f'P{i:02d}' for i in range(1, 9)]
kpis = ['Revenue','Margin','Units','Traffic','Conv%','Retention']
changes = np.random.randn(len(products), len(kpis)) * 15
df = pd.DataFrame(changes, index=products, columns=kpis)

fig, ax = plt.subplots(figsize=(9, 6))
sns.heatmap(df, cmap='RdYlGn', center=0,
            annot=True, fmt='.1f', linewidths=0.5, ax=ax,
            cbar_kws={'label': 'YoY Change (%)'})
ax.set_title('Product KPI Year-over-Year Change (%)', fontweight='bold')
fig.tight_layout()
fig.savefig('diverging_heatmap.png', dpi=120, bbox_inches='tight')
plt.close()
print('Saved diverging_heatmap.png')
💼 Real-World: Confusion Matrix Dashboard
Create a styled confusion matrix heatmap for a 5-class classifier. Normalize by true class (row), show counts in top-left and percentages in bottom-right of each cell, use Blues cmap.
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='white')
np.random.seed(42)
classes = ['Cat','Dog','Bird','Fish','Horse']
n = 5
cm = np.array([
    [45, 3, 1, 0, 1],
    [2, 48, 0, 1, 0],
    [1, 0, 42, 3, 2],
    [0, 1, 4, 44, 1],
    [2, 0, 3, 1, 43]
])
cm_norm = cm.astype(float) / cm.sum(axis=1, keepdims=True)

# Build annotation: count + pct
annot = np.empty_like(cm, dtype=object)
for i in range(n):
    for j in range(n):
        annot[i,j] = f'{cm[i,j]}
{cm_norm[i,j]*100:.1f}%'

fig, ax = plt.subplots(figsize=(8, 7))
sns.heatmap(cm_norm, annot=annot, fmt='', cmap='Blues',
            xticklabels=classes, yticklabels=classes,
            vmin=0, vmax=1, linewidths=0.5, ax=ax,
            cbar_kws={'label': 'Precision (row-normalized)'})
ax.set_title('5-Class Confusion Matrix', fontweight='bold', fontsize=13)
ax.set_xlabel('Predicted'); ax.set_ylabel('True')
fig.tight_layout()
fig.savefig('confusion_matrix.png', dpi=150, bbox_inches='tight')
plt.close()
print('Saved confusion_matrix.png')
🏋️ Practice: Heatmap Practice
Generate a 10x6 DataFrame of monthly sales by region (random integers 100-500). Create: (1) a standard heatmap with annot=True and YlGn cmap, (2) the same data as a diverging heatmap centered on the mean (RdYlGn), side by side in a 1x2 figure.
Starter Code
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='white')
np.random.seed(9)
months = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'][:10]
regions = ['North','South','East','West','Central','Pacific']
df = pd.DataFrame(np.random.randint(100,500,(10,6)), index=months, columns=regions)

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))
# TODO: standard heatmap YlGn on ax1, annot=True
# TODO: diverging heatmap RdYlGn centered at mean on ax2
# TODO: titles, tight_layout, save 'sales_heatmap.png'
plt.close()
26. PairGrid Advanced

Use PairGrid for full control over diagonal, upper, and lower triangle plots. Map different plot types per region and use hue for group-aware multi-variable comparison.

PairGrid: hist diagonal, scatter off-diag
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='whitegrid')
iris = sns.load_dataset('iris')

g = sns.PairGrid(iris, hue='species', palette='Set2',
                 vars=['sepal_length','sepal_width','petal_length','petal_width'])
g.map_diag(sns.histplot, kde=True, alpha=0.6)
g.map_offdiag(sns.scatterplot, s=25, alpha=0.6)
g.add_legend(title='Species')
g.fig.suptitle('PairGrid: Histogram Diagonal + Scatter', y=1.01, fontweight='bold')
g.fig.savefig('pairgrid_hist.png', dpi=100, bbox_inches='tight')
plt.close(g.fig)
print('Saved pairgrid_hist.png')
PairGrid: KDE diagonal, upper KDE, lower scatter
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='whitegrid')
iris = sns.load_dataset('iris')
vars_ = ['sepal_length','sepal_width','petal_length','petal_width']

g = sns.PairGrid(iris, hue='species', palette='Set1', vars=vars_)
g.map_diag(sns.kdeplot, fill=True, alpha=0.4, linewidth=1.5)
g.map_upper(sns.kdeplot, levels=4, warn_singular=False)
g.map_lower(sns.scatterplot, s=20, alpha=0.5)
g.add_legend(title='Species', fontsize=9)
g.fig.suptitle('PairGrid: KDE + Scatter Split', y=1.01, fontweight='bold')
g.fig.savefig('pairgrid_split.png', dpi=100, bbox_inches='tight')
plt.close(g.fig)
print('Saved pairgrid_split.png')
PairGrid: boxplot on diagonal
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='whitegrid')
iris = sns.load_dataset('iris')
vars_ = ['sepal_length','petal_length','petal_width']

g = sns.PairGrid(iris, hue='species', palette='Set2', vars=vars_)

def diag_box(x, **kwargs):
    ax = plt.gca()
    data = kwargs.get('data', x)
    groups = x.groupby(level=0) if hasattr(x,'groupby') else None
    ax.boxplot([x[x.index == i] for i in x.unique()], vert=True)

g.map_diag(sns.kdeplot, fill=True, alpha=0.5)
g.map_upper(sns.scatterplot, s=20, alpha=0.5)
g.map_lower(sns.regplot, scatter_kws=dict(s=10,alpha=0.4),
            line_kws=dict(linewidth=1.5))
g.add_legend()
g.fig.suptitle('PairGrid: KDE / Scatter / Regression', y=1.01, fontweight='bold')
g.fig.savefig('pairgrid_reg.png', dpi=100, bbox_inches='tight')
plt.close(g.fig)
print('Saved pairgrid_reg.png')
PairGrid with correlation annotation
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='whitegrid')
np.random.seed(0)
n = 150
df = pd.DataFrame({
    'A': np.random.randn(n),
    'B': np.random.randn(n),
    'C': np.random.randn(n),
    'group': np.random.choice(['X','Y'], n)
})
df['B'] += df['A'] * 0.7
df['C'] = df['A'] * (-0.5) + np.random.randn(n) * 0.7

def corrfunc(x, y, **kwargs):
    r = np.corrcoef(x, y)[0,1]
    ax = plt.gca()
    ax.annotate(f'r = {r:.2f}', xy=(0.5,0.5), xycoords='axes fraction',
                ha='center', va='center', fontsize=12,
                color='darkred' if abs(r) > 0.4 else 'gray',
                fontweight='bold' if abs(r) > 0.4 else 'normal')

g = sns.PairGrid(df[['A','B','C','group']], hue='group', palette='Set1',
                 vars=['A','B','C'])
g.map_diag(sns.kdeplot, fill=True, alpha=0.4)
g.map_lower(sns.scatterplot, s=20, alpha=0.5)
g.map_upper(corrfunc)
g.add_legend()
g.fig.suptitle('PairGrid: Lower=Scatter, Upper=Correlation', y=1.01, fontweight='bold')
g.fig.savefig('pairgrid_corr.png', dpi=100, bbox_inches='tight')
plt.close(g.fig)
print('Saved pairgrid_corr.png')
💼 Real-World: Penguins Multi-Variable EDA
Create a PairGrid on the penguins dataset (4 numeric features, hue=species): diagonal = KDE filled, upper = KDE contour, lower = scatter. Annotate correlation values in each upper cell.
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='whitegrid')
penguins = sns.load_dataset('penguins').dropna()
vars_ = ['bill_length_mm','bill_depth_mm','flipper_length_mm','body_mass_g']

def upper_corr(x, y, **kwargs):
    r = np.corrcoef(x, y)[0,1]
    ax = plt.gca()
    ax.annotate(f'r={r:.2f}', xy=(0.5,0.5), xycoords='axes fraction',
                ha='center', va='center', fontsize=10,
                color='darkred' if abs(r) > 0.5 else 'gray',
                fontweight='bold')

g = sns.PairGrid(penguins, hue='species', palette='Set2', vars=vars_)
g.map_diag(sns.kdeplot, fill=True, alpha=0.5, linewidth=1.5)
g.map_lower(sns.scatterplot, s=20, alpha=0.5)
g.map_upper(upper_corr)
g.add_legend(title='Species')
g.fig.suptitle('Penguins PairGrid EDA', y=1.01, fontweight='bold')
g.fig.savefig('penguins_pairgrid.png', dpi=120, bbox_inches='tight')
plt.close(g.fig)
print('Saved penguins_pairgrid.png')
🏋️ Practice: PairGrid Practice
Load the 'mpg' dataset. Create a PairGrid with vars=['mpg','horsepower','weight','acceleration'], hue='origin'. Map: diagonal=histplot, upper=scatterplot, lower=regplot. Add legend and a suptitle.
Starter Code
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='whitegrid')
mpg = sns.load_dataset('mpg').dropna()
vars_ = ['mpg','horsepower','weight','acceleration']

g = sns.PairGrid(mpg, hue='origin', palette='tab10', vars=vars_)
# TODO: map_diag histplot
# TODO: map_upper scatterplot
# TODO: map_lower regplot (scatter_kws, line_kws)
# TODO: add_legend, suptitle
# TODO: save 'mpg_pairgrid.png'
27. Residual & Regression Diagnostics

Use residplot() for visual residual checks, lmplot() for grouped regression, and regplot() with custom order for polynomial fits. Combine with matplotlib for full diagnostic panels.

residplot: detect non-linearity
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='whitegrid')
np.random.seed(0)
n = 150
x = np.linspace(0, 10, n)
y_linear  = 2*x + 1 + np.random.randn(n)*2
y_nonlin  = 2*x + 0.3*x**2 + np.random.randn(n)*3

df = pd.DataFrame({'x':x,'y_linear':y_linear,'y_nonlin':y_nonlin})

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))
sns.residplot(data=df, x='x', y='y_linear', lowess=True,
              scatter_kws=dict(alpha=0.5, s=20),
              line_kws=dict(color='red', linewidth=2), ax=ax1)
ax1.set_title('Residuals: Linear Data (good fit)')
ax1.axhline(0, color='gray', linestyle='--', linewidth=1)

sns.residplot(data=df, x='x', y='y_nonlin', lowess=True,
              scatter_kws=dict(alpha=0.5, s=20),
              line_kws=dict(color='red', linewidth=2), ax=ax2)
ax2.set_title('Residuals: Nonlinear Data (pattern visible)')
ax2.axhline(0, color='gray', linestyle='--', linewidth=1)

fig.suptitle('residplot β€” Lowess Smoother', fontweight='bold')
fig.tight_layout()
fig.savefig('residplot.png', dpi=120, bbox_inches='tight')
plt.close()
print('Saved residplot.png')
lmplot: grouped regression per hue
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='whitegrid')
tips = sns.load_dataset('tips')

g = sns.lmplot(data=tips, x='total_bill', y='tip',
               hue='sex', palette='Set1',
               scatter_kws=dict(s=30, alpha=0.6),
               line_kws=dict(linewidth=2),
               height=5, aspect=1.3)
g.set_axis_labels('Total Bill ($)', 'Tip ($)')
g.fig.suptitle('lmplot: Regression by Sex', y=1.02, fontweight='bold')
g.fig.savefig('lmplot_hue.png', dpi=120, bbox_inches='tight')
plt.close(g.fig)
print('Saved lmplot_hue.png')
lmplot with col faceting
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='whitegrid')
tips = sns.load_dataset('tips')

g = sns.lmplot(data=tips, x='total_bill', y='tip',
               col='time', hue='smoker',
               palette='Set2',
               scatter_kws=dict(s=30, alpha=0.6),
               height=4, aspect=1.1)
g.set_axis_labels('Total Bill ($)', 'Tip ($)')
g.set_titles(col_template='{col_name}')
g.fig.suptitle('lmplot: Regression by Time and Smoker', y=1.02, fontweight='bold')
g.fig.savefig('lmplot_col.png', dpi=120, bbox_inches='tight')
plt.close(g.fig)
print('Saved lmplot_col.png')
Polynomial regression with regplot order
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='whitegrid')
np.random.seed(1)
x = np.linspace(-3, 3, 100)
y = 2*x**3 - x**2 + x + np.random.randn(100)*3
df = pd.DataFrame({'x':x,'y':y})

fig, axes = plt.subplots(1, 3, figsize=(13, 4))
for ax, order, title in zip(axes, [1,2,3], ['Linear (order=1)','Quadratic (order=2)','Cubic (order=3)']):
    sns.regplot(data=df, x='x', y='y', order=order,
                scatter_kws=dict(s=15, alpha=0.5),
                line_kws=dict(linewidth=2, color='tomato'),
                ax=ax)
    ax.set_title(title)
    ax.grid(True, alpha=0.3)
fig.suptitle('Polynomial Regression Orders with regplot', fontweight='bold')
fig.tight_layout()
fig.savefig('poly_regplot.png', dpi=120, bbox_inches='tight')
plt.close()
print('Saved poly_regplot.png')
💼 Real-World: Housing Price Regression Diagnostics
Fit a linear regression of house price on size and show: (1) lmplot by neighborhood, (2) residplot of residuals, (3) regplot with CI band.
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='whitegrid')
np.random.seed(33)
hoods = ['Downtown','Suburbs','Rural']
rows = []
for hood in hoods:
    base = {'Downtown':300,'Suburbs':200,'Rural':120}[hood]
    n = 80
    size = np.random.uniform(50, 250, n)
    price = base + 1.2*size + np.random.randn(n)*30
    rows.append(pd.DataFrame({'size_sqm':size,'price_k':price,'neighborhood':hood}))
df = pd.concat(rows, ignore_index=True)

g = sns.lmplot(data=df, x='size_sqm', y='price_k', col='neighborhood',
               hue='neighborhood', palette='Set2',
               scatter_kws=dict(s=25, alpha=0.6),
               line_kws=dict(linewidth=2),
               height=4, aspect=1.0, legend=False)
g.set_axis_labels('Size (sqm)', 'Price ($K)')
g.set_titles(col_template='{col_name}')
g.fig.suptitle('Housing Price Regression by Neighborhood', y=1.02, fontweight='bold')
g.fig.savefig('housing_regression.png', dpi=150, bbox_inches='tight')
plt.close(g.fig)
print('Saved housing_regression.png')
🏋️ Practice: Regression Diagnostics Practice
Generate 100 data points: x = linspace(0,10), y = sin(x) + noise. Use a 1x3 panel: (1) regplot order=1, (2) regplot order=3, (3) residplot for the order=3 fit. Use whitegrid style.
Starter Code
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='whitegrid')
np.random.seed(4)
x = np.linspace(0, 10, 100)
y = np.sin(x) + np.random.randn(100)*0.4
df = pd.DataFrame({'x':x,'y':y})

fig, axes = plt.subplots(1, 3, figsize=(14, 4))
# TODO: regplot order=1 on axes[0]
# TODO: regplot order=3 on axes[1]
# TODO: residplot order=3 on axes[2]
fig.suptitle('Regression & Residuals: sin(x)', fontweight='bold')
fig.tight_layout()
fig.savefig('sin_regression.png', dpi=120, bbox_inches='tight')
plt.close()
print('Saved sin_regression.png')
28. Mixed Seaborn + Matplotlib

Combine Seaborn plots with raw matplotlib artists: add reference lines, spans, custom patches, secondary axes, and annotations on top of seaborn outputs.

Add reference line and span to seaborn plot
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='whitegrid')
np.random.seed(0)
df = pd.DataFrame({
    'score': np.random.normal(72, 15, 200),
    'group': np.random.choice(['A','B','C','D'], 200)
})

fig, ax = plt.subplots(figsize=(9, 5))
sns.boxplot(data=df, x='group', y='score', palette='Set2', ax=ax)

# Matplotlib overlays
ax.axhline(70, color='red', linestyle='--', linewidth=2, label='Target')
ax.axhspan(0, 60, color='red', alpha=0.07, label='Fail zone')
ax.axhspan(90, 100, color='green', alpha=0.07, label='Excellent')
ax.set_title('Test Scores by Group with Reference Lines', fontweight='bold')
ax.legend()
ax.set_ylim(20, 110)
fig.tight_layout()
fig.savefig('sns_mpl_lines.png', dpi=120, bbox_inches='tight')
plt.close()
print('Saved sns_mpl_lines.png')
Annotate outliers on seaborn plot
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='whitegrid')
np.random.seed(1)
df = pd.DataFrame({
    'revenue': np.concatenate([np.random.normal(100, 20, 95), [220, 230, 240, 10, 5]]),
    'region': np.random.choice(['East','West'], 100)
})

fig, ax = plt.subplots(figsize=(9, 5))
sns.stripplot(data=df, x='region', y='revenue', jitter=True,
              palette='Set2', size=6, alpha=0.7, ax=ax)

# Annotate extreme outliers
q1, q3 = df.revenue.quantile([0.25, 0.75])
iqr = q3 - q1
outliers = df[df.revenue > q3 + 1.5*iqr]
for _, row in outliers.iterrows():
    x_jit = {'East':0,'West':1}[row.region] + np.random.uniform(-0.1,0.1)
    ax.annotate(f'${row.revenue:.0f}K',
                xy=(x_jit, row.revenue),
                xytext=(x_jit+0.2, row.revenue),
                fontsize=8, color='darkred',
                arrowprops=dict(arrowstyle='->', color='darkred', lw=1))

ax.axhline(q3+1.5*iqr, color='red', linestyle='--', linewidth=1.2, label='Upper fence')
ax.set_title('Revenue Distribution with Outlier Annotations', fontweight='bold')
ax.legend()
fig.tight_layout()
fig.savefig('sns_annotate_outliers.png', dpi=120, bbox_inches='tight')
plt.close()
print('Saved sns_annotate_outliers.png')
seaborn heatmap + matplotlib patch overlay
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import matplotlib.patches as mpatches

sns.set_theme(style='white')
np.random.seed(2)
data = np.random.randn(8, 8)
data[2:4, 5:7] += 3  # hot cluster

fig, ax = plt.subplots(figsize=(7, 6))
sns.heatmap(data, cmap='RdBu_r', center=0, ax=ax,
            annot=True, fmt='.1f', linewidths=0.3)

# Highlight cluster with rectangle
rect = mpatches.Rectangle((5, 2), 2, 2,
    linewidth=3, edgecolor='gold', facecolor='none',
    transform=ax.transData)
ax.add_patch(rect)
ax.text(6.5, 1.7, 'Hot cluster', ha='center', fontsize=10,
        color='gold', fontweight='bold')
ax.set_title('Heatmap with Cluster Highlight', fontweight='bold')
fig.tight_layout()
fig.savefig('sns_heatmap_patch.png', dpi=120, bbox_inches='tight')
plt.close()
print('Saved sns_heatmap_patch.png')
twinx on seaborn axes
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='whitegrid')
np.random.seed(3)
months = list(range(1, 13))
df = pd.DataFrame({
    'month': months,
    'revenue': [80+i*5+np.random.randn()*4 for i in range(12)],
    'users': [1000+i*80+np.random.randn()*50 for i in range(12)]
})

fig, ax1 = plt.subplots(figsize=(10, 5))
ax2 = ax1.twinx()

sns.lineplot(data=df, x='month', y='revenue', color='steelblue',
             linewidth=2, marker='o', label='Revenue ($K)', ax=ax1)
sns.lineplot(data=df, x='month', y='users', color='tomato',
             linewidth=2, marker='s', linestyle='--', label='Users', ax=ax2)

ax1.set_ylabel('Revenue ($K)', color='steelblue')
ax2.set_ylabel('Users', color='tomato')
ax1.tick_params(axis='y', labelcolor='steelblue')
ax2.tick_params(axis='y', labelcolor='tomato')
ax1.set_xlabel('Month')

lines1, lab1 = ax1.get_legend_handles_labels()
lines2, lab2 = ax2.get_legend_handles_labels()
ax1.legend(lines1+lines2, lab1+lab2, loc='upper left')
ax1.get_legend().remove() if ax2.get_legend() else None
ax1.set_title('Revenue & Users (seaborn + twinx)', fontweight='bold')
fig.tight_layout()
fig.savefig('sns_twinx.png', dpi=120, bbox_inches='tight')
plt.close()
print('Saved sns_twinx.png')
💼 Real-World: Marketing KPI Report
Combine a seaborn barplot for monthly revenue with twinx for conversion rate, axhspan for target zones, and annotate bars that exceed the target with a star marker.
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='whitegrid')
np.random.seed(88)
months = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']
revenue = [120+i*8+np.random.randn()*10 for i in range(12)]
conv_rate = [3.2+i*0.1+np.random.randn()*0.2 for i in range(12)]
target_rev = 160

df = pd.DataFrame({'month':months,'revenue':revenue,'conv_rate':conv_rate})

fig, ax1 = plt.subplots(figsize=(12, 6))
ax2 = ax1.twinx()

bar_colors = ['seagreen' if r >= target_rev else 'steelblue' for r in revenue]
bars = ax1.bar(months, revenue, color=bar_colors, alpha=0.7, width=0.6)
ax2.plot(months, conv_rate, 'ro-', linewidth=2, markersize=6, label='Conv Rate (%)')

ax1.axhline(target_rev, color='green', linestyle='--', linewidth=2, label='Revenue target')
ax1.axhspan(target_rev, max(revenue)*1.05, color='green', alpha=0.05)

# Annotate exceeding months
for i,(m,r) in enumerate(zip(months,revenue)):
    if r >= target_rev:
        ax1.text(i, r+3, 'β˜…', ha='center', fontsize=14, color='gold')

ax1.set_ylabel('Revenue ($K)', color='steelblue')
ax2.set_ylabel('Conversion Rate (%)', color='tomato')
ax1.tick_params(axis='y', labelcolor='steelblue')
ax2.tick_params(axis='y', labelcolor='tomato')
lines1, lab1 = ax1.get_legend_handles_labels()
lines2, lab2 = ax2.get_legend_handles_labels()
ax1.legend(lines1+lines2, lab1+lab2, loc='upper left')
ax1.set_title('Marketing KPI: Revenue vs Conversion Rate', fontweight='bold', fontsize=13)
fig.tight_layout()
fig.savefig('marketing_kpi.png', dpi=150, bbox_inches='tight')
plt.close()
print('Saved marketing_kpi.png')
🏋️ Practice: Mixed Plot Practice
Create a seaborn violinplot of exam scores by subject (4 subjects). Overlay a horizontal line at the passing grade (60), a shaded band for distinction (85-100), and annotate the subject with highest median. Use whitegrid style.
Starter Code
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='whitegrid')
np.random.seed(5)
subjects = ['Math','Science','English','History']
df = pd.DataFrame({
    'score': np.concatenate([np.random.normal(m, 15, 80) for m in [65,70,75,60]]).clip(0,100),
    'subject': np.repeat(subjects, 80)
})

fig, ax = plt.subplots(figsize=(9, 5))
# TODO: violinplot
# TODO: axhline at 60 (passing)
# TODO: axhspan 85-100 (distinction zone)
# TODO: annotate highest median subject
# TODO: save 'exam_violin.png'
plt.close()
29. Seaborn with Real Datasets

Practice EDA workflows on seaborn's built-in datasets: titanic, penguins, diamonds, mpg, and fmri. Apply multiple plot types to reveal patterns, relationships, and anomalies.

Titanic survival EDA
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='whitegrid')
titanic = sns.load_dataset('titanic')

fig, axes = plt.subplots(2, 2, figsize=(12, 9))
# Survival by class
sns.barplot(data=titanic, x='class', y='survived', hue='sex',
            palette='Set1', capsize=0.08, ax=axes[0,0])
axes[0,0].set_title('Survival Rate by Class & Sex')
axes[0,0].set_ylim(0,1)

# Age distribution by survival
sns.kdeplot(data=titanic.dropna(subset=['age']), x='age', hue='survived',
            fill=True, alpha=0.4, palette='Set2', ax=axes[0,1])
axes[0,1].set_title('Age Distribution by Survival')

# Fare vs survival (box)
sns.boxplot(data=titanic, x='class', y='fare', hue='survived',
            palette='pastel', ax=axes[1,0])
axes[1,0].set_title('Fare Distribution by Class & Survival')

# Count by embarkation port
sns.countplot(data=titanic, x='embarked', hue='survived',
              palette='Set2', ax=axes[1,1])
axes[1,1].set_title('Counts by Port of Embarkation')

fig.suptitle('Titanic Dataset β€” EDA Dashboard', fontweight='bold', fontsize=13)
fig.tight_layout()
fig.savefig('titanic_eda.png', dpi=120, bbox_inches='tight')
plt.close()
print('Saved titanic_eda.png')
Diamonds dataset analysis
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='whitegrid')
diamonds = sns.load_dataset('diamonds')
sample = diamonds.sample(2000, random_state=42)

fig, axes = plt.subplots(1, 3, figsize=(14, 5))
# Price by cut
sns.violinplot(data=sample, x='cut', y='price', palette='Set2',
               inner='quartile', ax=axes[0])
axes[0].set_title('Price by Cut')

# Carat vs price scatter
sns.scatterplot(data=sample, x='carat', y='price', hue='color',
                palette='RdYlGn', s=15, alpha=0.5, ax=axes[1])
axes[1].set_title('Carat vs Price by Color')

# Correlation heatmap of numeric cols
corr = sample[['carat','depth','table','price','x','y','z']].corr()
sns.heatmap(corr, annot=True, fmt='.2f', cmap='coolwarm',
            center=0, linewidths=0.5, ax=axes[2])
axes[2].set_title('Feature Correlation')

fig.suptitle('Diamonds EDA', fontweight='bold', fontsize=13)
fig.tight_layout()
fig.savefig('diamonds_eda.png', dpi=120, bbox_inches='tight')
plt.close()
print('Saved diamonds_eda.png')
Penguins complete EDA
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='whitegrid')
penguins = sns.load_dataset('penguins').dropna()

fig, axes = plt.subplots(2, 2, figsize=(12, 9))
sns.scatterplot(data=penguins, x='bill_length_mm', y='bill_depth_mm',
                hue='species', style='island', s=60, alpha=0.7, ax=axes[0,0])
axes[0,0].set_title('Bill Length vs Depth by Species & Island')

sns.violinplot(data=penguins, x='species', y='body_mass_g',
               hue='sex', split=True, palette='Set2', inner='box',
               ax=axes[0,1])
axes[0,1].set_title('Body Mass by Species & Sex')

sns.ecdfplot(data=penguins, x='flipper_length_mm', hue='species',
             palette='Set1', linewidth=2, ax=axes[1,0])
axes[1,0].set_title('Flipper Length ECDF by Species')

counts = penguins.groupby(['species','island']).size().reset_index(name='count')
sns.barplot(data=counts, x='species', y='count', hue='island',
            palette='pastel', ax=axes[1,1])
axes[1,1].set_title('Population by Species & Island')

fig.suptitle('Penguins Complete EDA', fontweight='bold', fontsize=13)
fig.tight_layout()
fig.savefig('penguins_eda.png', dpi=120, bbox_inches='tight')
plt.close()
print('Saved penguins_eda.png')
MPG fuel efficiency analysis
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='whitegrid')
mpg = sns.load_dataset('mpg').dropna()

fig, axes = plt.subplots(1, 3, figsize=(14, 5))
# MPG trend by model year and origin
sns.lineplot(data=mpg, x='model_year', y='mpg', hue='origin',
             palette='Set1', linewidth=2, markers=True, ax=axes[0])
axes[0].set_title('MPG by Year & Origin')

# Weight vs MPG regression by cylinders
cyl_order = sorted(mpg['cylinders'].unique())
pal = sns.color_palette('coolwarm', len(cyl_order))
for i, cyl in enumerate(cyl_order):
    sub = mpg[mpg.cylinders == cyl]
    axes[1].scatter(sub.weight, sub.mpg, s=20, alpha=0.5, color=pal[i], label=f'{cyl}cyl')
axes[1].set_xlabel('Weight (lbs)'); axes[1].set_ylabel('MPG')
axes[1].legend(title='Cylinders', fontsize=8); axes[1].set_title('Weight vs MPG by Cylinders')

# Horsepower distribution by origin
sns.boxplot(data=mpg, x='origin', y='horsepower', palette='Set2', ax=axes[2])
axes[2].set_title('Horsepower by Origin')

fig.suptitle('MPG Dataset β€” Fuel Efficiency EDA', fontweight='bold', fontsize=13)
fig.tight_layout()
fig.savefig('mpg_eda.png', dpi=120, bbox_inches='tight')
plt.close()
print('Saved mpg_eda.png')
💼 Real-World: Complete Data Story
Using the 'diamonds' dataset: tell a 4-panel data story: (1) price distribution by cut (violin), (2) price vs carat regression by clarity, (3) ECDF of price by cut, (4) heatmap of median price by cutΓ—color.
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='whitegrid')
diamonds = sns.load_dataset('diamonds')
sample = diamonds.sample(3000, random_state=7)
cut_order = ['Fair','Good','Very Good','Premium','Ideal']

fig, axes = plt.subplots(2, 2, figsize=(13, 10))

sns.violinplot(data=sample, x='cut', y='price', palette='YlOrRd',
               inner='quartile', order=cut_order, ax=axes[0,0])
axes[0,0].set_title('Price by Cut', fontweight='bold')
axes[0,0].set_xlabel('Cut'); axes[0,0].set_ylabel('Price ($)')

clarity_order = ['I1','SI2','SI1','VS2','VS1','VVS2','VVS1','IF']
sns.scatterplot(data=sample, x='carat', y='price', hue='clarity',
                palette='RdYlGn', s=15, alpha=0.5,
                hue_order=clarity_order, ax=axes[0,1])
axes[0,1].set_title('Carat vs Price by Clarity', fontweight='bold')

sns.ecdfplot(data=sample, x='price', hue='cut', palette='YlOrRd',
             linewidth=2, hue_order=cut_order, ax=axes[1,0])
axes[1,0].set_title('Price ECDF by Cut', fontweight='bold')
axes[1,0].set_xlabel('Price ($)')

pivot = diamonds.groupby(['cut','color'])['price'].median().unstack()
pivot = pivot.loc[cut_order]
sns.heatmap(pivot, cmap='YlOrRd', annot=True, fmt='.0f',
            linewidths=0.3, ax=axes[1,1])
axes[1,1].set_title('Median Price: Cut Γ— Color', fontweight='bold')

fig.suptitle('Diamond Price Analysis β€” Complete Story', fontweight='bold', fontsize=14)
fig.tight_layout()
fig.savefig('diamond_story.png', dpi=150, bbox_inches='tight')
plt.close()
print('Saved diamond_story.png')
🏋️ Practice: Real Dataset EDA Practice
Load the 'mpg' dataset. Create a 2x2 EDA dashboard: (1) lmplot of mpg vs weight by origin, (2) pairplot of [mpg, horsepower, weight] with hue=origin (quick version), (3) boxplot of mpg by cylinders, (4) lineplot of mean mpg by model_year. Drop NaN first.
Starter Code
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='whitegrid')
mpg = sns.load_dataset('mpg').dropna()

# Pairplot separately (it creates its own figure)
g = sns.pairplot(mpg[['mpg','horsepower','weight','origin']],
                 hue='origin', palette='Set2', plot_kws=dict(s=15, alpha=0.5))
g.fig.suptitle('MPG Pairplot', y=1.01)
g.fig.savefig('mpg_pairplot.png', dpi=80, bbox_inches='tight')
plt.close(g.fig)

fig, axes = plt.subplots(1, 3, figsize=(15, 5))
# TODO: boxplot mpg by cylinders
# TODO: lineplot mean mpg by model_year, hue=origin
# TODO: scatterplot weight vs mpg, hue=origin, regplot overlay
fig.tight_layout()
fig.savefig('mpg_dashboard.png', dpi=120, bbox_inches='tight')
plt.close()
print('Saved mpg_dashboard.png')
30. Object-Oriented Seaborn

Use the modern Seaborn Figure API: create a Figure object, add Subplots with share axes, and chain .plot() calls. Use Figure.save() and Figure.show(). Available in Seaborn 0.12+.

Seaborn objects API: basic scatter
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import seaborn.objects as so
sns.set_theme(style='whitegrid')
tips = sns.load_dataset('tips')

p = (
    so.Plot(tips, x='total_bill', y='tip', color='sex')
    .add(so.Dot(alpha=0.6, pointsize=5))
    .add(so.Line(), so.PolyFit(order=1))
    .label(x='Total Bill ($)', y='Tip ($)', color='Sex',
           title='Tips: Scatter + Regression (objects API)')
)
p.save('so_scatter.png', dpi=120, bbox_inches='tight')
print('Saved so_scatter.png')
Seaborn objects: Bar plot with grouping
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import seaborn.objects as so
sns.set_theme(style='whitegrid')
tips = sns.load_dataset('tips')

p = (
    so.Plot(tips, x='day', y='total_bill', color='sex')
    .add(so.Bar(), so.Agg('mean'), so.Dodge())
    .label(x='Day', y='Mean Total Bill ($)', color='Sex',
           title='Mean Bill by Day & Sex (objects API)')
)
p.save('so_bar.png', dpi=120, bbox_inches='tight')
print('Saved so_bar.png')
Seaborn objects: histogram with KDE
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import seaborn.objects as so
sns.set_theme(style='whitegrid')
iris = sns.load_dataset('iris')

p = (
    so.Plot(iris, x='petal_length', color='species')
    .add(so.Bars(), so.Hist(binwidth=0.3), so.Norm('percent'))
    .add(so.Line(), so.KDE())
    .label(x='Petal Length (cm)', y='Percent',
           title='Petal Length Distribution (objects API)')
)
p.save('so_hist.png', dpi=120, bbox_inches='tight')
print('Saved so_hist.png')
Seaborn objects: faceted grid
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import seaborn.objects as so
sns.set_theme(style='whitegrid')
tips = sns.load_dataset('tips')

p = (
    so.Plot(tips, x='total_bill', y='tip', color='sex')
    .facet(col='time', row='smoker')
    .add(so.Dot(alpha=0.5, pointsize=4))
    .label(x='Total Bill ($)', y='Tip ($)',
           title='Tips: Faceted Grid (objects API)')
    .limit(x=(0, 55), y=(0, 11))
)
p.save('so_faceted.png', dpi=120, bbox_inches='tight')
print('Saved so_faceted.png')
💼 Real-World: Modern API Visualization
Use the seaborn.objects API to create a 3-part analysis of the 'penguins' dataset: (1) scatter bill_length vs bill_depth with color=species, (2) bar mean body_mass by species+sex with dodge, (3) line flipper_length trend β€” all using so.Plot.
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import seaborn.objects as so
sns.set_theme(style='whitegrid')
penguins = sns.load_dataset('penguins').dropna()

p1 = (
    so.Plot(penguins, x='bill_length_mm', y='bill_depth_mm', color='species')
    .add(so.Dot(alpha=0.6, pointsize=5))
    .label(x='Bill Length (mm)', y='Bill Depth (mm)',
           title='Bill Dimensions by Species')
)
p1.save('so_penguin_scatter.png', dpi=120, bbox_inches='tight')

p2 = (
    so.Plot(penguins, x='species', y='body_mass_g', color='sex')
    .add(so.Bar(), so.Agg('mean'), so.Dodge())
    .label(x='Species', y='Mean Body Mass (g)',
           title='Body Mass by Species & Sex')
)
p2.save('so_penguin_bar.png', dpi=120, bbox_inches='tight')
print('Saved so_penguin_scatter.png and so_penguin_bar.png')
🏋️ Practice: Objects API Practice
Using the seaborn objects API (so.Plot), recreate a regression scatter of tips total_bill vs tip, color by day, with a PolyFit(order=1) line. Facet by time (col='time'). Save as 'so_practice.png'.
Starter Code
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import seaborn.objects as so
sns.set_theme(style='whitegrid')
tips = sns.load_dataset('tips')

# TODO: so.Plot with x=total_bill, y=tip, color=day
# TODO: .add(so.Dot) + .add(so.Line, so.PolyFit(order=1))
# TODO: .facet(col='time')
# TODO: .save('so_practice.png', dpi=120)
31. Color Systems & Accessibility

Choose colorblind-safe palettes, use perceptually uniform colormaps, distinguish sequential vs. diverging vs. qualitative palettes, and apply Seaborn color_palette utilities.

Qualitative palettes for categorical data
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='whitegrid')
np.random.seed(0)
df = pd.DataFrame({
    'value': np.random.randn(200),
    'group': np.random.choice(list('ABCDE'), 200)
})

qual_palettes = ['Set1','Set2','Set3','tab10','colorblind','deep']
fig, axes = plt.subplots(2, 3, figsize=(14, 8))
for ax, pal in zip(axes.flat, qual_palettes):
    sns.boxplot(data=df, x='group', y='value', palette=pal, ax=ax)
    ax.set_title(f'palette="{pal}"')
fig.suptitle('Qualitative Palettes Comparison', fontweight='bold', fontsize=13)
fig.tight_layout()
fig.savefig('qual_palettes.png', dpi=120, bbox_inches='tight')
plt.close()
print('Saved qual_palettes.png')
Sequential palettes for ordered data
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='white')
np.random.seed(1)
x = y = np.linspace(-3, 3, 60)
X, Y = np.meshgrid(x, y)
Z = np.sin(X) * np.cos(Y)

seq_palettes = ['Blues','YlOrRd','viridis','magma']
fig, axes = plt.subplots(1, 4, figsize=(16, 4))
for ax, pal in zip(axes, seq_palettes):
    im = ax.pcolormesh(X, Y, Z, cmap=pal)
    fig.colorbar(im, ax=ax, shrink=0.8)
    ax.set_title(f'cmap="{pal}"')
    ax.set_aspect('equal')
fig.suptitle('Sequential Palettes for Heatmap Data', fontweight='bold')
fig.tight_layout()
fig.savefig('seq_palettes.png', dpi=120, bbox_inches='tight')
plt.close()
print('Saved seq_palettes.png')
Diverging palettes for signed values
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='white')
np.random.seed(2)
data = np.random.randn(10, 8) * 2
df = pd.DataFrame(data,
    index=[f'Product {i}' for i in range(1,11)],
    columns=[f'Q{q}' for q in range(1,9)])

div_palettes = ['RdBu_r','RdYlGn','coolwarm','PiYG']
fig, axes = plt.subplots(1, 4, figsize=(16, 4))
for ax, pal in zip(axes, div_palettes):
    sns.heatmap(df, cmap=pal, center=0, ax=ax,
                cbar_kws={'shrink':0.8}, xticklabels=True, yticklabels=False)
    ax.set_title(f'cmap="{pal}"')
fig.suptitle('Diverging Palettes for Change Data', fontweight='bold')
fig.tight_layout()
fig.savefig('div_palettes.png', dpi=120, bbox_inches='tight')
plt.close()
print('Saved div_palettes.png')
Colorblind-safe vs non-safe comparison
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='whitegrid')
np.random.seed(3)
df = pd.DataFrame({
    'x': np.linspace(0, 10, 80),
    'A': np.sin(np.linspace(0, 10, 80)) + np.random.randn(80)*0.2,
    'B': np.cos(np.linspace(0, 10, 80)) + np.random.randn(80)*0.2,
    'C': np.sin(np.linspace(0, 10, 80)*1.5) + np.random.randn(80)*0.2,
})

fig, axes = plt.subplots(1, 2, figsize=(13, 5))
# Non-safe: red-green confusion
colors_bad = ['#ff0000','#00aa00','#0000ff']
for col, c in zip(['A','B','C'], colors_bad):
    axes[0].plot(df.x, df[col], color=c, linewidth=2, label=col,
                 linestyle='-')
axes[0].set_title('Non-colorblind-safe (avoid red/green)')
axes[0].legend()

# Colorblind-safe (Wong palette) with markers
cb_colors = ['#0072B2','#D55E00','#009E73']
markers = ['o','s','^']
for col, c, m in zip(['A','B','C'], cb_colors, markers):
    axes[1].plot(df.x, df[col], color=c, linewidth=2,
                 label=col, marker=m, markevery=8, markersize=7)
axes[1].set_title('Colorblind-safe (Wong palette + markers)')
axes[1].legend()

for ax in axes:
    ax.set_xlabel('x'); ax.set_ylabel('y')
    ax.grid(True, alpha=0.3)
fig.suptitle('Accessibility: Color Choice Matters', fontweight='bold')
fig.tight_layout()
fig.savefig('colorblind_safe.png', dpi=120, bbox_inches='tight')
plt.close()
print('Saved colorblind_safe.png')
💼 Real-World: Accessible Dashboard Design
Redesign a 4-panel report using only colorblind-safe palettes: use 'colorblind' for categorical, 'Blues' for sequential, 'RdBu_r' for diverging, and add hatch patterns or markers as secondary encodings.
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='whitegrid')
np.random.seed(42)
df_cat = pd.DataFrame({
    'group': np.repeat(['A','B','C','D'],60),
    'value': np.concatenate([np.random.normal(m,1.2,60) for m in [3,5,4,6]])
})
df_seq = pd.DataFrame(np.random.rand(8,6)*100,
                       columns=[f'M{i}' for i in range(1,7)],
                       index=[f'R{i}' for i in range(1,9)])
df_div = pd.DataFrame(np.random.randn(6,5)*20,
                       columns=['Q1','Q2','Q3','Q4','Q5'],
                       index=[f'P{i}' for i in range(1,7)])

fig, axes = plt.subplots(2, 2, figsize=(13, 10))

# Categorical: colorblind + hatch
bars = sns.barplot(data=df_cat, x='group', y='value',
                   palette='colorblind', capsize=0.1, ax=axes[0,0])
hatches = ['','///','xxx','...']
for bar, hatch in zip(axes[0,0].patches, hatches*10):
    bar.set_hatch(hatch)
axes[0,0].set_title('Categorical: colorblind + hatch')

# Sequential heatmap
sns.heatmap(df_seq, cmap='Blues', annot=False, ax=axes[0,1],
            cbar_kws={'label':'Value'})
axes[0,1].set_title('Sequential: Blues')

# Diverging heatmap
sns.heatmap(df_div, cmap='RdBu_r', center=0, annot=True, fmt='.0f',
            linewidths=0.3, ax=axes[1,0])
axes[1,0].set_title('Diverging: RdBu_r')

# Line with markers
cb_colors = ['#0072B2','#D55E00','#009E73','#CC79A7']
x = np.arange(12)
for i,(col,m) in enumerate(zip(cb_colors,['o','s','^','D'])):
    y = np.cumsum(np.random.randn(12)*0.5) + i
    axes[1,1].plot(x, y, color=col, marker=m, linewidth=2,
                   markevery=3, markersize=7, label=f'Series {i+1}')
axes[1,1].legend(fontsize=8); axes[1,1].set_title('Lines: Wong palette + markers')
axes[1,1].grid(True, alpha=0.3)

fig.suptitle('Accessible Dashboard β€” Colorblind-Safe Design', fontweight='bold', fontsize=13)
fig.tight_layout()
fig.savefig('accessible_seaborn.png', dpi=150, bbox_inches='tight')
plt.close()
print('Saved accessible_seaborn.png')
🏋️ Practice: Color Accessibility Practice
Create a 4-group comparison plot (your choice of data). First plot with the red/green palette (unsafe). Then create a duplicate using the 'colorblind' palette with different markers per group. Save both as 'color_bad.png' and 'color_good.png'.
Starter Code
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sns.set_theme(style='whitegrid')
np.random.seed(6)
groups = ['Control','TreatmentA','TreatmentB','TreatmentC']
df = pd.DataFrame({
    'value': np.concatenate([np.random.normal(m,1.5,60) for m in [3,4.5,3.8,5.2]]),
    'group': np.repeat(groups,60)
})

# TODO: barplot with non-safe red/green palette, save 'color_bad.png'
# TODO: barplot with colorblind palette + stripplot with markers, save 'color_good.png'
32. Seaborn Dashboard Composition

Combine multiple seaborn plot types, matplotlib GridSpec, annotation overlays, and consistent theming to produce publication-quality, multi-panel analytical dashboards.

4-panel EDA dashboard
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from matplotlib.gridspec import GridSpec

sns.set_theme(style='whitegrid', palette='Set2', font_scale=0.95)
np.random.seed(42)
df = pd.DataFrame({
    'revenue': np.random.lognormal(5, 0.5, 300),
    'spend':   np.random.uniform(1000, 10000, 300),
    'channel': np.random.choice(['Search','Social','Email','Direct'], 300),
    'month':   np.random.choice(range(1,13), 300)
})
df['revenue'] = df['spend'] * np.random.uniform(0.1, 0.5, 300) + np.random.randn(300)*50

fig = plt.figure(figsize=(14, 9))
gs = GridSpec(2, 3, figure=fig, hspace=0.4, wspace=0.35)

ax1 = fig.add_subplot(gs[0, :2])
ax2 = fig.add_subplot(gs[0, 2])
ax3 = fig.add_subplot(gs[1, :])

sns.scatterplot(data=df, x='spend', y='revenue', hue='channel',
                style='channel', s=50, alpha=0.6, ax=ax1)
ax1.set_title('Revenue vs Spend by Channel', fontweight='bold')

sns.violinplot(data=df, x='channel', y='revenue', palette='Set2',
               inner='quartile', ax=ax2)
ax2.set_title('Revenue Distribution', fontweight='bold')
ax2.set_xticklabels(ax2.get_xticklabels(), rotation=20, ha='right', fontsize=8)

month_agg = df.groupby(['month','channel'])['revenue'].mean().reset_index()
sns.lineplot(data=month_agg, x='month', y='revenue', hue='channel',
             palette='Set2', linewidth=2, markers=True, ax=ax3)
ax3.set_title('Monthly Average Revenue by Channel', fontweight='bold')
ax3.set_xlabel('Month')

fig.suptitle('Marketing Revenue Dashboard', fontsize=14, fontweight='bold')
fig.savefig('sns_dashboard.png', dpi=150, bbox_inches='tight')
plt.close()
print('Saved sns_dashboard.png')
Statistical report: hypothesis testing visual
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from scipy import stats

sns.set_theme(style='whitegrid')
np.random.seed(1)
control    = np.random.normal(70, 12, 100)
treatment  = np.random.normal(75, 11, 100)
t_stat, p_val = stats.ttest_ind(control, treatment)
effect_size = (treatment.mean() - control.mean()) / np.sqrt(
    ((len(control)-1)*control.std()**2 + (len(treatment)-1)*treatment.std()**2) /
    (len(control)+len(treatment)-2))

df = pd.DataFrame({
    'score': np.concatenate([control, treatment]),
    'group': np.repeat(['Control','Treatment'], 100)
})

fig, axes = plt.subplots(1, 3, figsize=(14, 5))
sns.histplot(data=df, x='score', hue='group', kde=True, alpha=0.4,
             palette='Set1', stat='density', common_norm=False, ax=axes[0])
axes[0].set_title('Score Distributions')

sns.violinplot(data=df, x='group', y='score', palette='Set1',
               inner='box', ax=axes[1])
axes[1].set_title('Violin Comparison')

sns.ecdfplot(data=df, x='score', hue='group', palette='Set1',
             linewidth=2, ax=axes[2])
axes[2].set_title('ECDF Comparison')

for ax in axes:
    ax.set_xlabel('Score')

fig.suptitle(f'A/B Test: t={t_stat:.2f}, p={p_val:.4f}, d={effect_size:.2f}',
             fontsize=13, fontweight='bold')
fig.tight_layout()
fig.savefig('hypothesis_dashboard.png', dpi=150, bbox_inches='tight')
plt.close()
print('Saved hypothesis_dashboard.png')
Full EDA report: penguins deep dive
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from matplotlib.gridspec import GridSpec

sns.set_theme(style='whitegrid', palette='colorblind')
penguins = sns.load_dataset('penguins').dropna()

fig = plt.figure(figsize=(15, 10), facecolor='white')
gs = GridSpec(3, 3, figure=fig, hspace=0.5, wspace=0.4)

# Banner
ax0 = fig.add_subplot(gs[0, :])
ax0.text(0.5, 0.6, 'Palmer Penguins β€” Deep Dive EDA',
         ha='center', va='center', fontsize=16, fontweight='bold',
         transform=ax0.transAxes)
ax0.text(0.5, 0.1, f'n={len(penguins)} penguins | 3 species | 3 islands',
         ha='center', va='center', fontsize=11, color='gray',
         transform=ax0.transAxes)
ax0.axis('off')

# Scatter bill dimensions
ax1 = fig.add_subplot(gs[1, :2])
sns.scatterplot(data=penguins, x='bill_length_mm', y='bill_depth_mm',
                hue='species', style='island', s=60, alpha=0.7, ax=ax1)
ax1.set_title('Bill Dimensions by Species & Island', fontweight='bold')

# Count by species
ax2 = fig.add_subplot(gs[1, 2])
sns.countplot(data=penguins, x='species', hue='sex',
              palette='Set2', ax=ax2)
ax2.set_title('Counts by Species & Sex', fontweight='bold')
ax2.set_xlabel('Species')

# Body mass violin
ax3 = fig.add_subplot(gs[2, :2])
sns.violinplot(data=penguins, x='species', y='body_mass_g',
               hue='sex', split=True, palette='pastel', inner='box', ax=ax3)
ax3.set_title('Body Mass Distribution', fontweight='bold')

# Flipper ECDF
ax4 = fig.add_subplot(gs[2, 2])
sns.ecdfplot(data=penguins, x='flipper_length_mm', hue='species',
             palette='colorblind', linewidth=2, ax=ax4)
ax4.set_title('Flipper Length ECDF', fontweight='bold')
ax4.set_xlabel('Flipper Length (mm)')

fig.savefig('penguins_deep_dive.png', dpi=150, bbox_inches='tight',
            facecolor=fig.get_facecolor())
plt.close()
print('Saved penguins_deep_dive.png')
Time series analytics dashboard
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from matplotlib.gridspec import GridSpec

sns.set_theme(style='darkgrid', palette='Set2')
np.random.seed(77)
n_days = 90
dates = pd.date_range('2024-01-01', periods=n_days)
products = ['Widget','Gadget','Doohickey']
rows = []
for prod in products:
    base = np.random.uniform(80, 150)
    trend = np.cumsum(np.random.randn(n_days)*2) + base
    for d, v in zip(dates, trend):
        rows.append({'date':d,'product':prod,'sales':max(0,v),
                     'returns':max(0,v*0.05+np.random.randn())})
df = pd.DataFrame(rows)
df['month'] = df['date'].dt.month

fig = plt.figure(figsize=(14, 9))
gs = GridSpec(2, 2, figure=fig, hspace=0.4, wspace=0.35)

ax1 = fig.add_subplot(gs[0, :])
sns.lineplot(data=df, x='date', y='sales', hue='product',
             palette='Set2', linewidth=2, ax=ax1)
ax1.set_title('Daily Sales Trend by Product', fontweight='bold')
ax1.set_xlabel('Date'); ax1.set_ylabel('Sales ($)')

ax2 = fig.add_subplot(gs[1, 0])
month_df = df.groupby(['month','product'])['sales'].mean().reset_index()
sns.barplot(data=month_df, x='month', y='sales', hue='product',
            palette='Set2', ax=ax2)
ax2.set_title('Mean Monthly Sales', fontweight='bold')
ax2.set_xlabel('Month')

ax3 = fig.add_subplot(gs[1, 1])
sns.scatterplot(data=df, x='sales', y='returns', hue='product',
                palette='Set2', s=20, alpha=0.4, ax=ax3)
# Add regression per product
for prod, grp in df.groupby('product'):
    m,b = np.polyfit(grp.sales, grp.returns, 1)
    x_l = np.linspace(grp.sales.min(), grp.sales.max(), 50)
    ax3.plot(x_l, m*x_l+b, linewidth=1.5, alpha=0.8)
ax3.set_title('Sales vs Returns', fontweight='bold')

fig.suptitle('Product Analytics Dashboard', fontsize=14, fontweight='bold')
fig.savefig('time_series_dashboard.png', dpi=150, bbox_inches='tight')
plt.close()
print('Saved time_series_dashboard.png')
💼 Real-World: Comprehensive Analysis Report
Design a 5-panel Seaborn+GridSpec report on the 'diamonds' dataset: (1) banner with dataset stats, (2) price by cut violin, (3) carat vs price scatter by clarity, (4) price ECDF by cut, (5) heatmap of counts cutΓ—color.
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from matplotlib.gridspec import GridSpec

sns.set_theme(style='whitegrid', palette='Set2')
diamonds = sns.load_dataset('diamonds')
sample = diamonds.sample(2000, random_state=42)
cut_order = ['Fair','Good','Very Good','Premium','Ideal']

fig = plt.figure(figsize=(14, 11), facecolor='white')
gs = GridSpec(3, 3, figure=fig, hspace=0.5, wspace=0.4)

# Banner
ax0 = fig.add_subplot(gs[0, :])
ax0.text(0.5, 0.6, 'Diamonds Dataset β€” Comprehensive Report',
         ha='center', fontsize=15, fontweight='bold', transform=ax0.transAxes)
ax0.text(0.5, 0.1, f'{len(diamonds):,} diamonds | {diamonds.cut.nunique()} cuts | '
                   f'Price: ${diamonds.price.min():,}–${diamonds.price.max():,}',
         ha='center', fontsize=10, color='gray', transform=ax0.transAxes)
ax0.axis('off')

ax1 = fig.add_subplot(gs[1, :2])
sns.violinplot(data=sample, x='cut', y='price', palette='YlOrRd',
               inner='quartile', order=cut_order, ax=ax1)
ax1.set_title('Price by Cut', fontweight='bold')

ax2 = fig.add_subplot(gs[1, 2])
clarity_order = ['I1','SI2','SI1','VS2','VS1','VVS2','VVS1','IF']
sns.scatterplot(data=sample, x='carat', y='price', hue='clarity',
                palette='RdYlGn', s=10, alpha=0.4,
                hue_order=clarity_order, ax=ax2, legend=False)
ax2.set_title('Carat vs Price', fontweight='bold')

ax3 = fig.add_subplot(gs[2, :2])
sns.ecdfplot(data=sample, x='price', hue='cut', palette='YlOrRd',
             linewidth=2, hue_order=cut_order, ax=ax3)
ax3.set_title('Price ECDF by Cut', fontweight='bold')

ax4 = fig.add_subplot(gs[2, 2])
counts = diamonds.groupby(['cut','color']).size().unstack().loc[cut_order]
sns.heatmap(counts, cmap='Blues', annot=True, fmt='d',
            linewidths=0.3, ax=ax4, cbar=False)
ax4.set_title('Count: Cut Γ— Color', fontweight='bold')

fig.suptitle('Diamond Market Analysis', fontsize=15, fontweight='bold', y=1.01)
fig.savefig('diamonds_report.png', dpi=150, bbox_inches='tight',
            facecolor=fig.get_facecolor())
plt.close()
print('Saved diamonds_report.png')
🏋️ Practice: Dashboard Practice
Build your own 4-panel seaborn dashboard using any dataset you prefer (tips, penguins, titanic, mpg, etc.). Requirements: (1) GridSpec layout with at least one spanning panel, (2) 4 different plot types, (3) consistent palette and theme, (4) suptitle banner, (5) save at 150 DPI.
Starter Code
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from matplotlib.gridspec import GridSpec

sns.set_theme(style='whitegrid', palette='Set2')
# Choose your dataset
tips = sns.load_dataset('tips')

fig = plt.figure(figsize=(13, 9))
gs = GridSpec(2, 3, figure=fig, hspace=0.4, wspace=0.35)

# TODO: Panel 1 β€” spanning top row (gs[0, :])
# TODO: Panel 2 β€” bottom-left (gs[1, :2])
# TODO: Panel 3 β€” bottom-right (gs[1, 2])
# Pick 4 different plot types across panels
# TODO: suptitle banner
# TODO: save 'my_sns_dashboard.png' at 150 DPI
plt.close()
print('Dashboard saved!')