Race Conditions¶

A race condition occurs when the behavior of a program depends on the relative timing of events, such as the order in which threads execute.

Classic Race Condition: Check-Then-Act¶

The Problem¶

import threading
import time

balance = 100

def withdraw(amount):
    global balance

    # CHECK: Is there enough?
    if balance >= amount:
        # Race window: another thread might withdraw here!
        time.sleep(0.001)  # Simulates processing delay

        # ACT: Withdraw
        balance -= amount
        print(f"Withdrew {amount}, balance: {balance}")
        return True
    else:
        print(f"Insufficient funds for {amount}")
        return False

# Two threads try to withdraw 80 from balance of 100
t1 = threading.Thread(target=withdraw, args=(80,))
t2 = threading.Thread(target=withdraw, args=(80,))

t1.start()
t2.start()
t1.join()
t2.join()

print(f"Final balance: {balance}")
# PROBLEM: Both may succeed, resulting in negative balance!

What Happens¶

Time    Thread 1              Thread 2              Balance
──────────────────────────────────────────────────────────────
0       Check: 100 >= 80 ✓                           100
1                             Check: 100 >= 80 ✓     100
2       Withdraw 80                                   20
3                             Withdraw 80            -60  ← BUG!

The Fix: Use Locks¶

import threading

balance = 100
lock = threading.Lock()

def withdraw_safe(amount):
    global balance

    with lock:  # Atomic check-then-act
        if balance >= amount:
            balance -= amount
            print(f"Withdrew {amount}, balance: {balance}")
            return True
        else:
            print(f"Insufficient funds for {amount}")
            return False

# Now only one withdraw succeeds
t1 = threading.Thread(target=withdraw_safe, args=(80,))
t2 = threading.Thread(target=withdraw_safe, args=(80,))
t1.start()
t2.start()
t1.join()
t2.join()
# Final balance: 20 (correct!)

Race Condition: Read-Modify-Write¶

The Problem¶

import threading

counter = 0

def increment():
    global counter
    for _ in range(100000):
        counter += 1  # Not atomic!

t1 = threading.Thread(target=increment)
t2 = threading.Thread(target=increment)

t1.start()
t2.start()
t1.join()
t2.join()

print(f"Counter: {counter}")
# Expected: 200000
# Actual: Often less (e.g., 134521)

Why It Happens¶

counter += 1 is three operations:

# What looks atomic:
counter += 1

# Is actually:
temp = counter      # 1. READ
temp = temp + 1     # 2. MODIFY
counter = temp      # 3. WRITE

# Race condition timeline:
# Thread 1: READ counter (0)
# Thread 2: READ counter (0)  ← Same value!
# Thread 1: WRITE counter (1)
# Thread 2: WRITE counter (1)  ← Overwrites Thread 1's work!

The Fix¶

import threading

counter = 0
lock = threading.Lock()

def increment_safe():
    global counter
    for _ in range(100000):
        with lock:
            counter += 1

# Now counter is always 200000

Race Condition: Lazy Initialization¶

The Problem (Singleton Pattern)¶

import threading
import time

class Singleton:
    _instance = None

    def __new__(cls):
        if cls._instance is None:
            # Race window!
            time.sleep(0.001)
            cls._instance = super().__new__(cls)
            cls._instance.value = 0
        return cls._instance

def create_singleton():
    s = Singleton()
    print(f"Got instance {id(s)}")
    return s

# Multiple threads may create multiple instances!
threads = [threading.Thread(target=create_singleton) for _ in range(5)]
for t in threads:
    t.start()
for t in threads:
    t.join()

The Fix: Double-Checked Locking¶

import threading

class Singleton:
    _instance = None
    _lock = threading.Lock()

    def __new__(cls):
        if cls._instance is None:  # First check (no lock)
            with cls._lock:
                if cls._instance is None:  # Second check (with lock)
                    cls._instance = super().__new__(cls)
                    cls._instance.value = 0
        return cls._instance

Race Condition: Lost Update¶

The Problem¶

import threading

class Account:
    def __init__(self, balance):
        self.balance = balance

account = Account(1000)

def update_balance():
    # Read
    current = account.balance

    # Modify (simulate computation)
    new_balance = current + 100

    # Write
    account.balance = new_balance

threads = [threading.Thread(target=update_balance) for _ in range(10)]
for t in threads:
    t.start()
for t in threads:
    t.join()

print(f"Balance: {account.balance}")
# Expected: 2000 (1000 + 10*100)
# Actual: Often 1100 or similar (updates lost)

The Fix¶

import threading

class Account:
    def __init__(self, balance):
        self.balance = balance
        self.lock = threading.Lock()

    def deposit(self, amount):
        with self.lock:
            self.balance += amount

Race Condition: File Operations¶

The Problem¶

import threading
import os

def write_if_not_exists(filename, content):
    # CHECK
    if not os.path.exists(filename):
        # Race window: another thread may create file here!
        with open(filename, 'w') as f:
            f.write(content)
        return True
    return False

# Both threads may write!
t1 = threading.Thread(target=write_if_not_exists, args=("data.txt", "from thread 1"))
t2 = threading.Thread(target=write_if_not_exists, args=("data.txt", "from thread 2"))

The Fix: Atomic File Operations¶

import os
import tempfile

def write_if_not_exists_safe(filename, content):
    # Write to temp file first
    fd, temp_path = tempfile.mkstemp(dir=os.path.dirname(filename))
    try:
        os.write(fd, content.encode())
        os.close(fd)

        # Atomic rename (fails if target exists on some systems)
        try:
            os.link(temp_path, filename)
            return True
        except FileExistsError:
            return False
    finally:
        os.unlink(temp_path)

Race Condition: List Operations¶

The Problem¶

import threading

items = []

def append_items(start):
    for i in range(start, start + 100):
        items.append(i)  # Append is atomic in CPython, but...

        if len(items) > 50:  # Check-then-act race!
            # Another thread might append between check and here
            item = items.pop()  # Might pop wrong item

# Lists are not thread-safe for compound operations

The Fix: Use Queue or Lock¶

import threading
import queue

# Option 1: Use thread-safe queue
q = queue.Queue()

def safe_append(item):
    q.put(item)

def safe_pop():
    return q.get()

# Option 2: Lock for list operations
items = []
lock = threading.Lock()

def safe_list_operation():
    with lock:
        items.append("item")
        if len(items) > 50:
            items.pop()

Race Condition: Dictionary Operations¶

The Problem¶

import threading

cache = {}

def get_or_compute(key):
    # CHECK
    if key not in cache:
        # Race: another thread may compute same value!
        result = expensive_computation(key)
        cache[key] = result
    return cache[key]

The Fix¶

import threading

cache = {}
cache_lock = threading.Lock()

def get_or_compute_safe(key):
    # First check without lock (optimization)
    if key in cache:
        return cache[key]

    with cache_lock:
        # Second check with lock
        if key not in cache:
            cache[key] = expensive_computation(key)
        return cache[key]

Detecting Race Conditions¶

1. ThreadSanitizer (TSan)¶

For C extensions or when available:

# Compile Python with TSan support
./configure --with-thread-sanitizer

2. Stress Testing¶

import threading
import random

def stress_test(target_func, num_threads=100, iterations=1000):
    """Run function from many threads to expose race conditions."""
    errors = []

    def worker():
        for _ in range(iterations):
            try:
                target_func()
            except Exception as e:
                errors.append(e)

            # Random sleep to vary timing
            if random.random() < 0.01:
                time.sleep(0.001)

    threads = [threading.Thread(target=worker) for _ in range(num_threads)]
    for t in threads:
        t.start()
    for t in threads:
        t.join()

    return errors

3. Adding Delays to Expose Races¶

import threading
import time
import os

DEBUG_RACES = os.environ.get('DEBUG_RACES', False)

def potentially_racy_operation():
    check_condition()

    if DEBUG_RACES:
        time.sleep(0.1)  # Expose race window

    perform_action()

Common Patterns to Avoid Races¶

1. Immutable Data¶

from dataclasses import dataclass

@dataclass(frozen=True)
class Config:
    host: str
    port: int

# Can safely share across threads
config = Config("localhost", 8080)

2. Thread Confinement¶

# Each thread works with its own data
def worker(data):
    # data is private to this thread
    process(data)
    return result

# Divide work, don't share
from concurrent.futures import ThreadPoolExecutor

with ThreadPoolExecutor() as executor:
    results = executor.map(worker, data_chunks)

3. Message Passing¶

import queue

# Instead of shared state, pass messages
work_queue = queue.Queue()
result_queue = queue.Queue()

def worker():
    while True:
        item = work_queue.get()
        if item is None:
            break
        result = process(item)
        result_queue.put(result)

Key Takeaways¶

Race conditions occur when timing affects correctness
Common patterns: check-then-act, read-modify-write, lazy init
Fix: Use locks to make compound operations atomic
Prefer immutable data and message passing over shared mutable state
Test with many threads and random delays to expose races
Python's GIL doesn't prevent all race conditions
When in doubt, use locks or thread-safe data structures