Bar Plots¶
Bar plots visualize categorical data by displaying rectangular bars with heights proportional to the values they represent. Pandas provides multiple ways to create bar plots.
Basic Bar Plot¶
Using plot(kind='bar')¶
import pandas as pd
import matplotlib.pyplot as plt
# Count categories
url = 'https://raw.githubusercontent.com/justmarkham/DAT8/master/data/drinks.csv'
df = pd.read_csv(url)
fig, ax = plt.subplots(figsize=(10, 4))
df['continent'].value_counts().plot(kind='bar', ax=ax)
ax.set_title('Countries by Continent')
ax.set_ylabel('Count')
plt.tight_layout()
plt.show()
Horizontal Bar (kind='barh')¶
fig, ax = plt.subplots(figsize=(8, 5))
df['continent'].value_counts().plot(kind='barh', ax=ax)
ax.set_title('Countries by Continent')
ax.set_xlabel('Count')
plt.tight_layout()
plt.show()
Single Bar Plot with Matplotlib¶
For more control, use matplotlib's ax.bar():
import matplotlib.pyplot as plt
import pandas as pd
def load_teachers_data():
data = {
'Courses': ('Language', 'History', 'Geometry', 'Chemistry', 'Physics'),
'Number of Teachers': (7, 3, 9, 1, 2)
}
return pd.DataFrame(data).set_index('Courses')
df = load_teachers_data()
fig, ax = plt.subplots(figsize=(10, 4))
teacher_counts = df['Number of Teachers']
ax.bar(
x=range(len(teacher_counts)),
height=teacher_counts,
tick_label=df.index,
width=0.5
)
ax.set_xlabel('Courses')
ax.set_ylabel('Number of Teachers')
ax.set_title('Favorite Courses of Teachers')
ax.spines[['right', 'top']].set_visible(False)
plt.tight_layout()
plt.show()
Grouped Bar Plot¶
Compare multiple metrics across categories:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
def load_student_scores():
data = {
'Student': ['Brandon', 'Vanessa', 'Daniel', 'Kevin', 'William'],
'Midterm': [85, 60, 60, 65, 100],
'Final': [90, 90, 65, 80, 95]
}
return pd.DataFrame(data).set_index('Student')
df = load_student_scores()
# Set up positions
positions = np.arange(len(df))
width = 0.35
fig, ax = plt.subplots(figsize=(10, 5))
# Plot bars side by side
ax.bar(positions - width/2, df['Midterm'], width, label='Midterm')
ax.bar(positions + width/2, df['Final'], width, label='Final')
# Customize
ax.set_xticks(positions)
ax.set_xticklabels(df.index)
ax.set_xlabel('Student')
ax.set_ylabel('Score')
ax.set_title('Midterm and Final Scores')
ax.legend()
ax.spines[['right', 'top']].set_visible(False)
plt.tight_layout()
plt.show()
Using pandas plot() for Grouped Bars¶
fig, ax = plt.subplots(figsize=(10, 5))
df.plot(kind='bar', ax=ax)
ax.set_title('Student Scores')
ax.set_ylabel('Score')
plt.xticks(rotation=0)
plt.tight_layout()
plt.show()
Stacked Bar Plot¶
Show composition within categories:
# Using pandas
fig, ax = plt.subplots(figsize=(10, 5))
df.plot(kind='bar', stacked=True, ax=ax)
ax.set_title('Student Scores (Stacked)')
plt.xticks(rotation=0)
plt.tight_layout()
plt.show()
Segmented Bar Plot (100% Stacked)¶
Show proportions rather than absolute values:
import matplotlib.pyplot as plt
import numpy as np
# Data: Has Antibodies?
labels = ('Yes', 'No')
antibody_pcts = (
np.array([95, 90, 40]), # Yes percentages
np.array([5, 10, 60]) # No percentages
)
age_groups = ('Adults', 'Children', 'Infants')
fig, ax = plt.subplots(figsize=(8, 5))
# Initialize bottom for stacking
bottom = np.zeros(3)
# Stack bars
for label, pct in zip(labels, antibody_pcts):
ax.bar(
x=np.arange(3),
height=pct,
width=0.5,
bottom=bottom,
label=label
)
bottom += pct
ax.set_xticks(np.arange(3))
ax.set_xticklabels(age_groups)
ax.set_ylabel('Percentage')
ax.set_title('Has Antibodies?')
ax.spines[['top', 'right']].set_visible(False)
ax.legend(title='Response', loc='center left', bbox_to_anchor=(1.0, 0.5))
plt.tight_layout()
plt.show()
Bar Plot from Value Counts¶
Common pattern for categorical data:
url = "https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv"
df = pd.read_csv(url)
fig, axes = plt.subplots(1, 3, figsize=(12, 4))
# Survival counts
df['Survived'].value_counts().plot(kind='bar', ax=axes[0])
axes[0].set_title('Survival')
axes[0].set_xticklabels(['Died', 'Survived'], rotation=0)
# Passenger class
df['Pclass'].value_counts().sort_index().plot(kind='bar', ax=axes[1])
axes[1].set_title('Passenger Class')
axes[1].set_xticklabels(['1st', '2nd', '3rd'], rotation=0)
# Embarkation port
df['Embarked'].value_counts().plot(kind='bar', ax=axes[2])
axes[2].set_title('Embarkation Port')
plt.tight_layout()
plt.show()
Customization Options¶
Colors¶
# Single color
df['continent'].value_counts().plot(kind='bar', color='steelblue')
# Multiple colors
colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b']
df['continent'].value_counts().plot(kind='bar', color=colors)
Edge Color¶
df['continent'].value_counts().plot(kind='bar', edgecolor='black')
Bar Width¶
df['continent'].value_counts().plot(kind='bar', width=0.8) # 0-1 range
Rotation¶
# Rotate x-tick labels
df.plot(kind='bar', rot=45)
Grid¶
df.plot(kind='bar', grid=True)
Sorting Bars¶
# Sort by value (descending - default for value_counts)
df['continent'].value_counts().plot(kind='bar')
# Sort by value (ascending)
df['continent'].value_counts().sort_values().plot(kind='bar')
# Sort alphabetically
df['continent'].value_counts().sort_index().plot(kind='bar')
Adding Value Labels¶
fig, ax = plt.subplots(figsize=(10, 5))
counts = df['continent'].value_counts()
bars = ax.bar(range(len(counts)), counts.values)
# Add labels on bars
for bar, count in zip(bars, counts.values):
ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.5,
str(count), ha='center', va='bottom')
ax.set_xticks(range(len(counts)))
ax.set_xticklabels(counts.index)
ax.set_title('Countries by Continent')
plt.tight_layout()
plt.show()
Summary of Bar Plot Types¶
| Type | Code | Use Case |
|---|---|---|
| Vertical bar | plot(kind='bar') |
Category comparison |
| Horizontal bar | plot(kind='barh') |
Long category names |
| Grouped bar | Multiple ax.bar() calls |
Compare metrics |
| Stacked bar | plot(kind='bar', stacked=True) |
Show composition |
| 100% stacked | Manual with percentages | Show proportions |
Quick Reference¶
# Basic bar from value counts
series.value_counts().plot(kind='bar')
# Horizontal
series.value_counts().plot(kind='barh')
# Multiple columns grouped
df.plot(kind='bar')
# Stacked
df.plot(kind='bar', stacked=True)
# Customized
series.value_counts().plot(
kind='bar',
color='steelblue',
edgecolor='black',
width=0.7,
rot=45
)