Combined Visualizations¶
Combine box plots with other chart types for comprehensive data analysis and presentation.
Mental Model
Box plots summarize but hide detail; histograms reveal shape but are hard to compare across groups. Combining them -- a box plot beside a histogram, or overlaid with a strip plot -- gives readers both the quick summary and the full picture. Use subplots side by side or overlay techniques to get the best of both worlds.
The core principle: different plots reveal different aspects of the same distribution. A box plot shows center and spread; a histogram shows shape; a scatter/strip shows individual points. Combining them is not decoration — it is choosing the right resolution for each aspect of the data:
Combine at most 2–3 layers. Too many overlays reduce clarity — each additional layer should add information the reader cannot get from the existing ones.
| Combination | What you gain |
|---|---|
| Box + histogram | Summary + shape |
| Box + scatter/strip | Summary + raw data |
| Violin + box | Density shape + quartile markers |
Box Plot with Histogram¶
Pair box plots with histograms to show both summary statistics and distributional shape.
1. Side by Side Layout¶
```python import matplotlib.pyplot as plt import numpy as np
np.random.seed(42) data = np.random.normal(100, 15, 500)
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 4))
ax1.hist(data, bins=25, edgecolor='black', alpha=0.7) ax1.axvline(np.mean(data), color='red', linestyle='--', label='Mean') ax1.axvline(np.median(data), color='blue', linestyle='--', label='Median') ax1.legend() ax1.set_title('Histogram')
ax2.boxplot(data) ax2.set_title('Box Plot')
plt.tight_layout() plt.show() ```
2. Stacked Vertically¶
```python fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(8, 6), gridspec_kw={'height_ratios': [3, 1]})
ax1.hist(data, bins=25, edgecolor='black', alpha=0.7) ax1.set_ylabel('Frequency')
ax2.boxplot(data, vert=False) ax2.set_xlabel('Value')
plt.tight_layout() plt.show() ```
3. Reusable Function¶
```python def plot_distribution(data, title='Distribution Analysis', bins=20): mean = np.mean(data) median = np.median(data)
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))
fig.suptitle(title, fontsize=14)
ax1.hist(data, bins=bins, density=True, alpha=0.7, edgecolor='black')
ax1.axvline(mean, color='blue', linestyle='--', label=f'Mean: {mean:.1f}')
ax1.axvline(median, color='red', linestyle='--', label=f'Median: {median:.1f}')
ax1.legend()
ax1.set_title('Histogram')
ax2.boxplot(data)
ax2.set_title('Box Plot')
plt.tight_layout()
plt.show()
```
Box Plot with Reference Lines¶
Add horizontal reference lines to compare distributions against benchmarks.
1. Single Reference Line¶
```python fig, ax = plt.subplots()
data = [np.random.normal(100, 15, 100) for _ in range(4)] ax.boxplot(data, labels=['Q1', 'Q2', 'Q3', 'Q4']) ax.axhline(y=100, color='red', linestyle='--', label='Target', alpha=0.7) ax.legend() ax.set_ylabel('Performance')
plt.show() ```
2. Multiple Reference Lines¶
```python fig, ax = plt.subplots()
ax.boxplot(data, labels=['Q1', 'Q2', 'Q3', 'Q4']) ax.axhline(y=90, color='red', linestyle='--', label='Minimum', alpha=0.7) ax.axhline(y=100, color='green', linestyle='--', label='Target', alpha=0.7) ax.axhline(y=110, color='blue', linestyle='--', label='Stretch', alpha=0.7) ax.legend()
plt.show() ```
3. Shaded Region¶
```python fig, ax = plt.subplots()
ax.boxplot(data, labels=['Q1', 'Q2', 'Q3', 'Q4']) ax.axhspan(95, 105, color='green', alpha=0.2, label='Acceptable Range') ax.legend()
plt.show() ```
Box Plot with Scatter Overlay¶
Show individual data points alongside the box plot summary.
1. Jittered Points¶
```python import matplotlib.pyplot as plt import numpy as np
np.random.seed(42) data = [np.random.normal(0, std, 50) for std in range(1, 5)]
fig, ax = plt.subplots()
bp = ax.boxplot(data, patch_artist=True, showfliers=False) for patch in bp['boxes']: patch.set_facecolor('lightblue') patch.set_alpha(0.5)
for i, d in enumerate(data, 1): jitter = np.random.normal(0, 0.04, len(d)) ax.scatter(np.full_like(d, i) + jitter, d, alpha=0.5, s=20, c='steelblue')
plt.show() ```
2. Swarm-Like Layout¶
```python def add_points(ax, data, positions, width=0.2): for pos, d in zip(positions, data): n = len(d) offsets = np.linspace(-width/2, width/2, n) ax.scatter(np.full(n, pos) + offsets, np.sort(d), alpha=0.4, s=15, c='darkblue')
fig, ax = plt.subplots() bp = ax.boxplot(data, showfliers=False) add_points(ax, data, range(1, len(data) + 1)) plt.show() ```
3. Strip Plot Style¶
```python fig, ax = plt.subplots()
ax.boxplot(data, showfliers=False, widths=0.3)
for i, d in enumerate(data, 1): y = d x = np.random.uniform(i - 0.15, i + 0.15, len(d)) ax.scatter(x, y, alpha=0.3, s=10, c='black')
plt.show() ```
Grouped Box Plots¶
Compare multiple categories across groups.
1. Manual Positioning¶
```python import matplotlib.pyplot as plt import numpy as np
np.random.seed(42)
group1_a = np.random.normal(100, 10, 50) group1_b = np.random.normal(110, 15, 50) group2_a = np.random.normal(90, 12, 50) group2_b = np.random.normal(95, 18, 50)
fig, ax = plt.subplots()
positions_a = [1, 3] positions_b = [1.6, 3.6]
bp1 = ax.boxplot([group1_a, group2_a], positions=positions_a, widths=0.5, patch_artist=True) bp2 = ax.boxplot([group1_b, group2_b], positions=positions_b, widths=0.5, patch_artist=True)
for patch in bp1['boxes']: patch.set_facecolor('lightblue') for patch in bp2['boxes']: patch.set_facecolor('lightcoral')
ax.set_xticks([1.3, 3.3]) ax.set_xticklabels(['Group 1', 'Group 2']) ax.legend([bp1['boxes'][0], bp2['boxes'][0]], ['Method A', 'Method B'])
plt.show() ```
2. Color by Category¶
```python fig, ax = plt.subplots(figsize=(10, 5))
data_dict = { 'Control': [np.random.normal(100, 10, 50) for _ in range(3)], 'Treatment': [np.random.normal(110, 12, 50) for _ in range(3)] }
colors = {'Control': 'lightblue', 'Treatment': 'lightcoral'} positions = {'Control': [1, 4, 7], 'Treatment': [2, 5, 8]}
for label, datasets in data_dict.items(): bp = ax.boxplot(datasets, positions=positions[label], widths=0.8, patch_artist=True) for patch in bp['boxes']: patch.set_facecolor(colors[label])
ax.set_xticks([1.5, 4.5, 7.5]) ax.set_xticklabels(['Week 1', 'Week 2', 'Week 3'])
plt.show() ```
3. Legend for Groups¶
```python from matplotlib.patches import Patch
legend_elements = [Patch(facecolor='lightblue', label='Control'), Patch(facecolor='lightcoral', label='Treatment')] ax.legend(handles=legend_elements, loc='upper right') ```
Box Plot with Violin Overlay¶
Combine box plots with violin plots for complete distribution visualization.
1. Overlay Approach¶
```python import matplotlib.pyplot as plt import numpy as np
np.random.seed(42) data = [np.random.normal(0, std, 200) for std in range(1, 5)]
fig, ax = plt.subplots()
vp = ax.violinplot(data, showextrema=False) for body in vp['bodies']: body.set_alpha(0.3)
ax.boxplot(data, widths=0.1)
plt.show() ```
2. Side by Side¶
```python fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 5), sharey=True)
ax1.violinplot(data) ax1.set_title('Violin Plot')
ax2.boxplot(data) ax2.set_title('Box Plot')
plt.tight_layout() plt.show() ```
3. Hybrid Visualization¶
```python fig, ax = plt.subplots()
vp = ax.violinplot(data, showmedians=False, showextrema=False) for body in vp['bodies']: body.set_facecolor('lightblue') body.set_alpha(0.5)
bp = ax.boxplot(data, widths=0.15, patch_artist=True, boxprops=dict(facecolor='white', edgecolor='black'), medianprops=dict(color='red', linewidth=2))
plt.show() ```
Exercises¶
Exercise 1.
Create a figure with a box plot on the left and a histogram on the right showing the same dataset (500 samples from a skewed distribution using np.random.exponential). Use the box plot to identify the median and quartiles, then mark those same values on the histogram with vertical lines.
Solution to Exercise 1
import matplotlib.pyplot as plt
import numpy as np
np.random.seed(42)
data = np.random.exponential(2, 500)
q1, median, q3 = np.percentile(data, [25, 50, 75])
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))
ax1.boxplot(data, vert=True)
ax1.set_title('Box Plot')
ax2.hist(data, bins=30, color='steelblue', edgecolor='white', alpha=0.7)
ax2.axvline(q1, color='orange', linestyle='--', label=f'Q1={q1:.2f}')
ax2.axvline(median, color='red', linestyle='-', linewidth=2, label=f'Median={median:.2f}')
ax2.axvline(q3, color='orange', linestyle='--', label=f'Q3={q3:.2f}')
ax2.legend()
ax2.set_title('Histogram with Quartile Lines')
plt.tight_layout()
plt.show()
Exercise 2.
Overlay a strip plot (individual points with jitter) on top of a box plot. Generate 4 groups of 50 samples from normal distributions with different means. Show the box plot with patch_artist=True and alpha=0.5, then scatter the actual points on top.
Solution to Exercise 2
import matplotlib.pyplot as plt
import numpy as np
np.random.seed(42)
data = [np.random.normal(loc=m, scale=1, size=50) for m in [2, 4, 6, 8]]
fig, ax = plt.subplots(figsize=(8, 5))
bp = ax.boxplot(data, patch_artist=True, showfliers=False)
colors = ['#a6cee3', '#b2df8a', '#fb9a99', '#fdbf6f']
for patch, color in zip(bp['boxes'], colors):
patch.set_facecolor(color)
patch.set_alpha(0.5)
for i, d in enumerate(data, 1):
jitter = np.random.uniform(-0.15, 0.15, len(d))
ax.scatter(np.full_like(d, i) + jitter, d, alpha=0.6, s=20, color='black', zorder=3)
ax.set_xticklabels(['Group 1', 'Group 2', 'Group 3', 'Group 4'])
ax.set_title('Box Plot with Strip Plot Overlay')
plt.show()
Exercise 3. Create a combined visualization with three panels stacked vertically: a box plot at the top showing distribution summary, a histogram in the middle showing frequency, and a rug plot (short vertical lines at each data point) at the bottom. Use 300 samples from a bimodal distribution (mix of two normals).
Solution to Exercise 3
import matplotlib.pyplot as plt
import numpy as np
np.random.seed(42)
data = np.concatenate([np.random.normal(-2, 0.8, 150),
np.random.normal(2, 0.8, 150)])
fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(8, 8),
gridspec_kw={'height_ratios': [1, 3, 0.5]},
sharex=True)
ax1.boxplot(data, vert=False, widths=0.6)
ax1.set_yticks([])
ax1.set_title('Distribution Summary')
ax2.hist(data, bins=40, color='steelblue', edgecolor='white')
ax2.set_ylabel('Frequency')
ax3.eventplot(data, orientation='horizontal', lineoffsets=0.5,
linelengths=0.8, color='black', linewidths=0.5)
ax3.set_yticks([])
ax3.set_xlabel('Value')
ax3.set_ylabel('Rug')
plt.tight_layout()
plt.show()