Binomial Distribution¶
The binomial distribution models the number of successes in a fixed number of independent trials.
np.random.binomial¶
1. Basic Usage¶
import numpy as np
def main():
np.random.seed(42)
n = 10 # number of trials
p = 0.5 # probability of success
samples = np.random.binomial(n, p, size=5)
print(f"Samples: {samples}")
if __name__ == "__main__":
main()
Output:
Samples: [4 8 6 5 4]
2. Parameters¶
n: Number of trials (positive integer)p: Probability of success in each trial (0 ≤ p ≤ 1)size: Output shape
3. Mathematical Form¶
\[P(X = k) = \binom{n}{k} p^k (1-p)^{n-k}\]
Coin Flip Model¶
1. Fair Coin¶
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
def main():
np.random.seed(0)
n = 10
p = 0.5
data = np.random.binomial(n, p, size=10_000)
fig, ax = plt.subplots(figsize=(10, 4))
ax.set_title(f"Binomial(n={n}, p={p})", fontsize=15)
bins = np.arange(n + 2) - 0.5
ax.hist(data, bins=bins, density=True, alpha=0.4, label='Samples')
# Theoretical PMF
x = np.arange(n + 1)
pmf = stats.binom(n, p).pmf(x)
ax.stem(x, pmf, linefmt='r-', markerfmt='ro', basefmt=' ', label='PMF')
ax.set_xlabel('Number of Successes')
ax.set_ylabel('Probability')
ax.set_xticks(np.arange(n + 1))
ax.legend()
plt.show()
if __name__ == "__main__":
main()
2. Biased Coin¶
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
def main():
np.random.seed(0)
n = 10
p = 0.3 # biased toward tails
data = np.random.binomial(n, p, size=10_000)
fig, ax = plt.subplots(figsize=(10, 4))
ax.set_title(f"Binomial(n={n}, p={p})", fontsize=15)
bins = np.arange(n + 2) - 0.5
ax.hist(data, bins=bins, density=True, alpha=0.4, label='Samples')
x = np.arange(n + 1)
pmf = stats.binom(n, p).pmf(x)
ax.stem(x, pmf, linefmt='r-', markerfmt='ro', basefmt=' ', label='PMF')
ax.set_xlabel('Number of Successes')
ax.set_ylabel('Probability')
ax.set_xticks(np.arange(n + 1))
ax.legend()
plt.show()
if __name__ == "__main__":
main()
3. Interpretation¶
Each sample counts successes in n independent Bernoulli trials with probability p.
Varying Parameters¶
1. Effect of n¶
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
def main():
np.random.seed(0)
p = 0.5
fig, axes = plt.subplots(1, 3, figsize=(15, 4))
for ax, n in zip(axes, [5, 20, 50]):
data = np.random.binomial(n, p, size=10_000)
bins = np.arange(n + 2) - 0.5
ax.hist(data, bins=bins, density=True, alpha=0.4)
x = np.arange(n + 1)
pmf = stats.binom(n, p).pmf(x)
ax.stem(x, pmf, linefmt='r-', markerfmt='ro', basefmt=' ')
ax.set_title(f"n={n}, p={p}")
ax.set_xlabel('Successes')
plt.tight_layout()
plt.show()
if __name__ == "__main__":
main()
2. Effect of p¶
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
def main():
np.random.seed(0)
n = 20
fig, axes = plt.subplots(1, 3, figsize=(15, 4))
for ax, p in zip(axes, [0.2, 0.5, 0.8]):
data = np.random.binomial(n, p, size=10_000)
bins = np.arange(n + 2) - 0.5
ax.hist(data, bins=bins, density=True, alpha=0.4)
x = np.arange(n + 1)
pmf = stats.binom(n, p).pmf(x)
ax.stem(x, pmf, linefmt='r-', markerfmt='ro', basefmt=' ')
ax.set_title(f"n={n}, p={p}")
ax.set_xlabel('Successes')
plt.tight_layout()
plt.show()
if __name__ == "__main__":
main()
3. Mean and Variance¶
\[E[X] = np, \quad \text{Var}(X) = np(1-p)\]
import numpy as np
def main():
n, p = 100, 0.3
samples = np.random.binomial(n, p, size=100_000)
print(f"Theoretical mean: {n * p}")
print(f"Sample mean: {samples.mean():.2f}")
print()
print(f"Theoretical var: {n * p * (1 - p)}")
print(f"Sample var: {samples.var():.2f}")
if __name__ == "__main__":
main()
scipy.stats Alternative¶
1. Using rvs¶
import numpy as np
from scipy import stats
def main():
np.random.seed(42)
n, p = 10, 0.5
# NumPy
samples_np = np.random.binomial(n, p, size=5)
# scipy.stats
samples_scipy = stats.binom(n, p).rvs(size=5)
print(f"NumPy: {samples_np}")
print(f"SciPy: {samples_scipy}")
if __name__ == "__main__":
main()
2. PMF and CDF¶
import numpy as np
from scipy import stats
def main():
n, p = 10, 0.5
dist = stats.binom(n, p)
# Probability of exactly 5 successes
print(f"P(X = 5) = {dist.pmf(5):.4f}")
# Probability of at most 5 successes
print(f"P(X ≤ 5) = {dist.cdf(5):.4f}")
if __name__ == "__main__":
main()
3. When to Use Each¶
np.random.binomial: Fast sampling, simple interfacestats.binom: Full distribution object with PMF, CDF, quantiles
Applications¶
1. Quality Control¶
import numpy as np
def main():
np.random.seed(42)
# Defect rate 2%, sample 100 items
n_items = 100
defect_rate = 0.02
# Simulate 1000 inspections
defects = np.random.binomial(n_items, defect_rate, size=1000)
print(f"Mean defects per batch: {defects.mean():.2f}")
print(f"Max defects observed: {defects.max()}")
print(f"Batches with 0 defects: {(defects == 0).sum()}")
if __name__ == "__main__":
main()
2. A/B Testing¶
import numpy as np
def main():
np.random.seed(42)
# Conversion rates
p_control = 0.10
p_treatment = 0.12
n_users = 1000
# Simulate experiments
control = np.random.binomial(n_users, p_control, size=1000)
treatment = np.random.binomial(n_users, p_treatment, size=1000)
# How often does treatment beat control?
wins = (treatment > control).mean()
print(f"Treatment wins: {wins:.1%}")
if __name__ == "__main__":
main()
3. Election Polling¶
import numpy as np
def main():
np.random.seed(42)
# True support 52%, poll 1000 voters
true_support = 0.52
n_polled = 1000
# Simulate 1000 polls
polls = np.random.binomial(n_polled, true_support, size=1000) / n_polled
print(f"Mean poll result: {polls.mean():.3f}")
print(f"Std of polls: {polls.std():.3f}")
print(f"Polls showing <50%: {(polls < 0.5).mean():.1%}")
if __name__ == "__main__":
main()