defaultdict¶
A defaultdict is a dict subclass that automatically creates missing keys using a factory function.
The Problem¶
Regular dicts raise KeyError for missing keys:
# Regular dict: must check or use setdefault
groups = {}
for name, category in data:
if category not in groups:
groups[category] = []
groups[category].append(name)
# Or using setdefault (verbose)
groups = {}
for name, category in data:
groups.setdefault(category, []).append(name)
The Solution¶
from collections import defaultdict
groups = defaultdict(list)
for name, category in data:
groups[category].append(name) # Auto-creates empty list!
How It Works¶
from collections import defaultdict
d = defaultdict(list) # Factory function: list
# Accessing missing key:
# 1. Calls list() to create []
# 2. Assigns d['new_key'] = []
# 3. Returns the empty list
d['fruits'].append('apple')
print(d) # defaultdict(<class 'list'>, {'fruits': ['apple']})
Common Factory Functions¶
list — Grouping¶
from collections import defaultdict
data = [('apple', 'fruit'), ('carrot', 'vegetable'),
('banana', 'fruit'), ('broccoli', 'vegetable')]
groups = defaultdict(list)
for item, category in data:
groups[category].append(item)
print(dict(groups))
# {'fruit': ['apple', 'banana'], 'vegetable': ['carrot', 'broccoli']}
int — Counting¶
counts = defaultdict(int) # int() returns 0
for char in 'mississippi':
counts[char] += 1
print(dict(counts))
# {'m': 1, 'i': 4, 's': 4, 'p': 2}
set — Unique Grouping¶
tags = defaultdict(set)
data = [('doc1', 'python'), ('doc1', 'tutorial'),
('doc2', 'python'), ('doc1', 'python')] # duplicate
for doc, tag in data:
tags[doc].add(tag)
print(dict(tags))
# {'doc1': {'python', 'tutorial'}, 'doc2': {'python'}}
lambda — Custom Default¶
# Default value 'N/A'
d = defaultdict(lambda: 'N/A')
d['name'] = 'Alice'
print(d['name']) # Alice
print(d['age']) # N/A
# Default value 0.0
prices = defaultdict(lambda: 0.0)
prices['apple'] = 1.50
print(prices['banana']) # 0.0
Nested defaultdict¶
Two Levels¶
# year -> month -> count
stats = defaultdict(lambda: defaultdict(int))
stats['2024']['Jan'] += 100
stats['2024']['Feb'] += 200
stats['2025']['Jan'] += 150
print(stats['2024']['Jan']) # 100
print(stats['2024']['Mar']) # 0 (auto-created)
Three Levels¶
# country -> city -> category -> count
data = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
data['USA']['NYC']['sales'] += 1000
data['USA']['NYC']['returns'] += 50
data['USA']['LA']['sales'] += 800
Converting to Regular Dict¶
d = defaultdict(list)
d['a'].append(1)
d['b'].append(2)
# Convert to regular dict
regular = dict(d)
print(regular) # {'a': [1], 'b': [2]}
# Nested conversion
import json
print(json.dumps(dict(d))) # Works after conversion
defaultdict vs setdefault¶
| Aspect | defaultdict |
setdefault |
|---|---|---|
| Syntax | d[key].append(x) |
d.setdefault(key, []).append(x) |
| Readability | ✅ Clean | ❌ Verbose |
| Creates on read | ✅ Yes | ❌ No |
| Regular dict | ❌ No | ✅ Yes |
# defaultdict: creates key even on read
d = defaultdict(list)
_ = d['key'] # Creates empty list
print('key' in d) # True
# setdefault: only creates on explicit call
d = {}
_ = d.get('key', []) # Does NOT create
print('key' in d) # False
Practical Examples¶
Word Index¶
from collections import defaultdict
text = "the quick brown fox jumps over the lazy dog"
index = defaultdict(list)
for pos, word in enumerate(text.split()):
index[word].append(pos)
print(dict(index))
# {'the': [0, 6], 'quick': [1], 'brown': [2], ...}
Graph Adjacency List¶
graph = defaultdict(list)
edges = [('A', 'B'), ('A', 'C'), ('B', 'C'), ('C', 'D')]
for src, dst in edges:
graph[src].append(dst)
graph[dst].append(src) # Undirected
print(dict(graph))
# {'A': ['B', 'C'], 'B': ['A', 'C'], 'C': ['A', 'B', 'D'], 'D': ['C']}
Frequency Table¶
from collections import defaultdict
scores = [85, 90, 85, 78, 90, 90, 85]
freq = defaultdict(int)
for score in scores:
freq[score] += 1
print(dict(freq)) # {85: 3, 90: 3, 78: 1}
Key Takeaways¶
defaultdict(factory)auto-creates missing keys- Common factories:
list,int,set,lambda - Cleaner than
setdefault()for grouping/counting - Use
dict(d)to convert to regular dict - Accessing missing key creates it (unlike regular dict)