Normal Distributions¶

NumPy provides multiple functions for generating samples from normal (Gaussian) distributions.

np.random.randn¶

Generates samples from the standard normal distribution \(\mathcal{N}(0, 1)\).

1. Basic Usage¶

import numpy as np
import matplotlib.pyplot as plt
from scipy import stats

def main():
    np.random.seed(0)

    n_samples = 10_000
    data = np.random.randn(n_samples)

    fig, ax = plt.subplots(figsize=(12, 3))

    _, bins, _ = ax.hist(data, bins=100, density=True, alpha=0.3, label='Histogram')

    pdf = stats.norm().pdf(bins)
    ax.plot(bins, pdf, '--r', linewidth=2, label='Standard Normal PDF')

    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.legend()
    plt.show()

if __name__ == "__main__":
    main()

2. Shape Argument¶

Pass dimensions as separate arguments.

import numpy as np

def main():
    np.random.seed(42)

    # 1D array
    a = np.random.randn(5)
    print(f"1D: {a.shape}")

    # 2D array
    b = np.random.randn(3, 4)
    print(f"2D: {b.shape}")

    # 3D array
    c = np.random.randn(2, 3, 4)
    print(f"3D: {c.shape}")

if __name__ == "__main__":
    main()

3. Quick Sampling¶

Use randn for quick standard normal samples with positional shape.

np.random.standard_normal¶

Alternative syntax for standard normal samples using size keyword.

1. Size Keyword¶

import numpy as np
import matplotlib.pyplot as plt
from scipy import stats

def main():
    np.random.seed(0)

    n_samples = 10_000
    data = np.random.standard_normal(size=(n_samples,))

    fig, ax = plt.subplots(figsize=(12, 3))

    _, bins, _ = ax.hist(data, bins=100, density=True, alpha=0.3, label='Histogram')

    pdf = stats.norm().pdf(bins)
    ax.plot(bins, pdf, '--r', linewidth=2, label='Standard Normal PDF')

    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.legend()
    plt.show()

if __name__ == "__main__":
    main()

2. Difference from randn¶

Uses size keyword tuple instead of positional dimension arguments.

import numpy as np

def main():
    np.random.seed(42)

    # randn: positional arguments
    a = np.random.randn(3, 4)

    # standard_normal: size keyword
    b = np.random.standard_normal(size=(3, 4))

    print(f"randn shape: {a.shape}")
    print(f"standard_normal shape: {b.shape}")

if __name__ == "__main__":
    main()

3. Equivalent Results¶

Both produce standard normal samples; choice is stylistic.

np.random.normal¶

Generates samples from a general normal distribution \(\mathcal{N}(\mu, \sigma^2)\).

1. Parameters¶

import numpy as np
import matplotlib.pyplot as plt
from scipy import stats

def main():
    np.random.seed(0)

    loc = 5      # mean (μ)
    scale = 2    # standard deviation (σ)
    n_samples = 10_000

    data = np.random.normal(loc=loc, scale=scale, size=(n_samples,))

    fig, ax = plt.subplots(figsize=(12, 3))

    _, bins, _ = ax.hist(data, bins=100, density=True, alpha=0.3, label='Histogram')

    pdf = stats.norm(loc=loc, scale=scale).pdf(bins)
    ax.plot(bins, pdf, '--r', linewidth=2, label=f'N({loc}, {scale}²) PDF')

    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.legend()
    plt.show()

if __name__ == "__main__":
    main()

2. Scaling Relation¶

\(X \sim \mathcal{N}(\mu, \sigma^2)\) is equivalent to \(X = \mu + \sigma Z\) where \(Z \sim \mathcal{N}(0, 1)\).

import numpy as np

def main():
    np.random.seed(42)

    mu, sigma = 5, 2
    n = 10_000

    # Method 1: np.random.normal
    x1 = np.random.normal(loc=mu, scale=sigma, size=n)

    # Method 2: transform standard normal
    np.random.seed(42)
    z = np.random.randn(n)
    x2 = mu + sigma * z

    print(f"Method 1 mean: {x1.mean():.4f}")
    print(f"Method 2 mean: {x2.mean():.4f}")

if __name__ == "__main__":
    main()

3. Use for Custom Mean/Std¶

Use normal when you need to specify mean and standard deviation.

scipy.stats.norm.rvs¶

The scipy.stats alternative for normal sampling.

1. Basic Usage¶

import numpy as np
import matplotlib.pyplot as plt
from scipy import stats

def main():
    np.random.seed(0)

    n_samples = 10_000
    data = stats.norm(loc=0, scale=1).rvs(n_samples)

    fig, ax = plt.subplots(figsize=(12, 3))

    _, bins, _ = ax.hist(data, bins=100, density=True, alpha=0.3, label='Histogram')

    pdf = stats.norm().pdf(bins)
    ax.plot(bins, pdf, '--r', linewidth=2, label='Standard Normal PDF')

    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.set_xlabel('Value')
    ax.set_ylabel('Density')
    ax.legend()
    plt.show()

if __name__ == "__main__":
    main()

2. Distribution Object¶

Create a frozen distribution for repeated use.

import numpy as np
from scipy import stats

def main():
    np.random.seed(42)

    # Create distribution object
    dist = stats.norm(loc=10, scale=3)

    # Sample
    samples = dist.rvs(size=5)
    print(f"Samples: {samples}")

    # Also get PDF, CDF, etc.
    print(f"PDF at 10: {dist.pdf(10):.4f}")
    print(f"CDF at 10: {dist.cdf(10):.4f}")

if __name__ == "__main__":
    main()

3. When to Use¶

Use stats.norm when you also need PDF, CDF, quantiles, or other distribution methods.

Method Comparison¶

1. All Four Methods¶

import numpy as np
from scipy import stats

def main():
    np.random.seed(0)
    n = 5

    print("Standard Normal N(0,1) - 4 equivalent methods:")
    print()

    np.random.seed(42)
    print(f"np.random.randn({n}):")
    print(f"  {np.random.randn(n)}")

    np.random.seed(42)
    print(f"np.random.standard_normal(size=({n},)):")
    print(f"  {np.random.standard_normal(size=(n,))}")

    np.random.seed(42)
    print(f"np.random.normal(0, 1, size={n}):")
    print(f"  {np.random.normal(0, 1, size=n)}")

    np.random.seed(42)
    print(f"stats.norm(0, 1).rvs({n}):")
    print(f"  {stats.norm(0, 1).rvs(n)}")

if __name__ == "__main__":
    main()

2. Summary Table¶

Function	Standard Normal	General Normal	Shape Syntax
`randn`	✓	✗	Positional args
`standard_normal`	✓	✗	`size=` keyword
`normal`	✓	✓	`size=` keyword
`stats.norm.rvs`	✓	✓	Positional or `size=`

3. Recommendations¶

Quick standard normal: randn
Custom mean/std: normal
Need PDF/CDF too: stats.norm

Multivariate Normal¶

Generates samples from a multivariate normal distribution.

1. Covariance Matrix¶

import numpy as np
import matplotlib.pyplot as plt
from scipy import stats

def main():
    np.random.seed(42)

    mean = [0, 0]
    cov = [[1, 0.8], [0.8, 1]]

    x = np.random.multivariate_normal(mean, cov, size=1000)
    print(f"Shape: {x.shape}")

    fig, ax = plt.subplots(figsize=(6, 6))
    ax.scatter(x[:, 0], x[:, 1], alpha=0.3)
    ax.set_xlabel('X1')
    ax.set_ylabel('X2')
    ax.set_title('Bivariate Normal (ρ=0.8)')
    ax.set_aspect('equal')
    plt.show()

if __name__ == "__main__":
    main()

2. Correlation Structure¶

The covariance matrix determines the shape and orientation.

import numpy as np
import matplotlib.pyplot as plt

def main():
    np.random.seed(42)

    fig, axes = plt.subplots(1, 3, figsize=(15, 5))

    correlations = [-0.8, 0, 0.8]

    for ax, rho in zip(axes, correlations):
        cov = [[1, rho], [rho, 1]]
        x = np.random.multivariate_normal([0, 0], cov, size=500)
        ax.scatter(x[:, 0], x[:, 1], alpha=0.3)
        ax.set_title(f'ρ = {rho}')
        ax.set_xlim(-4, 4)
        ax.set_ylim(-4, 4)
        ax.set_aspect('equal')

    plt.tight_layout()
    plt.show()

if __name__ == "__main__":
    main()

3. Higher Dimensions¶

import numpy as np

def main():
    np.random.seed(42)

    # 4D multivariate normal
    mean = [0, 0, 0, 0]
    cov = np.eye(4)  # independent components

    samples = np.random.multivariate_normal(mean, cov, size=1000)
    print(f"Shape: {samples.shape}")
    print(f"Sample mean: {samples.mean(axis=0)}")

if __name__ == "__main__":
    main()

Chi-Square Distribution¶

A distribution derived from squared normal random variables.

1. Degrees of Freedom¶

import numpy as np
import matplotlib.pyplot as plt
from scipy import stats

def main():
    np.random.seed(0)

    df = 5
    data = np.random.chisquare(df=df, size=10_000)

    fig, ax = plt.subplots(figsize=(10, 4))

    _, bins, _ = ax.hist(data, bins=100, density=True, alpha=0.3)

    pdf = stats.chi2(df).pdf(bins)
    ax.plot(bins, pdf, 'r-', linewidth=2, label=f'χ²({df}) PDF')

    ax.set_xlabel('Value')
    ax.set_ylabel('Density')
    ax.legend()
    plt.show()

if __name__ == "__main__":
    main()

2. Relation to Normal¶

\(\(\chi^2_k = \sum_{i=1}^{k} Z_i^2\)\) where \(Z_i \sim \mathcal{N}(0, 1)\).

import numpy as np

def main():
    np.random.seed(42)

    k = 5
    n_samples = 10_000

    # Method 1: np.random.chisquare
    chi2_direct = np.random.chisquare(df=k, size=n_samples)

    # Method 2: sum of squared normals
    z = np.random.randn(n_samples, k)
    chi2_manual = (z ** 2).sum(axis=1)

    print(f"Direct mean: {chi2_direct.mean():.2f} (expected: {k})")
    print(f"Manual mean: {chi2_manual.mean():.2f} (expected: {k})")

if __name__ == "__main__":
    main()

3. Varying df¶

import numpy as np
import matplotlib.pyplot as plt
from scipy import stats

def main():
    x = np.linspace(0, 30, 200)

    fig, ax = plt.subplots(figsize=(10, 4))

    for df in [2, 5, 10, 15]:
        pdf = stats.chi2(df).pdf(x)
        ax.plot(x, pdf, linewidth=2, label=f'df={df}')

    ax.set_xlabel('x')
    ax.set_ylabel('f(x)')
    ax.set_title('Chi-Square Distributions')
    ax.legend()
    plt.show()

if __name__ == "__main__":
    main()