Skip to content

Normal Distributions

NumPy provides multiple functions for generating samples from normal (Gaussian) distributions.

np.random.randn

Generates samples from the standard normal distribution \(\mathcal{N}(0, 1)\).

1. Basic Usage

import numpy as np
import matplotlib.pyplot as plt
from scipy import stats

def main():
    np.random.seed(0)

    n_samples = 10_000
    data = np.random.randn(n_samples)

    fig, ax = plt.subplots(figsize=(12, 3))

    _, bins, _ = ax.hist(data, bins=100, density=True, alpha=0.3, label='Histogram')

    pdf = stats.norm().pdf(bins)
    ax.plot(bins, pdf, '--r', linewidth=2, label='Standard Normal PDF')

    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.legend()
    plt.show()

if __name__ == "__main__":
    main()

2. Shape Argument

Pass dimensions as separate arguments.

import numpy as np

def main():
    np.random.seed(42)

    # 1D array
    a = np.random.randn(5)
    print(f"1D: {a.shape}")

    # 2D array
    b = np.random.randn(3, 4)
    print(f"2D: {b.shape}")

    # 3D array
    c = np.random.randn(2, 3, 4)
    print(f"3D: {c.shape}")

if __name__ == "__main__":
    main()

3. Quick Sampling

Use randn for quick standard normal samples with positional shape.

np.random.standard_normal

Alternative syntax for standard normal samples using size keyword.

1. Size Keyword

import numpy as np
import matplotlib.pyplot as plt
from scipy import stats

def main():
    np.random.seed(0)

    n_samples = 10_000
    data = np.random.standard_normal(size=(n_samples,))

    fig, ax = plt.subplots(figsize=(12, 3))

    _, bins, _ = ax.hist(data, bins=100, density=True, alpha=0.3, label='Histogram')

    pdf = stats.norm().pdf(bins)
    ax.plot(bins, pdf, '--r', linewidth=2, label='Standard Normal PDF')

    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.legend()
    plt.show()

if __name__ == "__main__":
    main()

2. Difference from randn

Uses size keyword tuple instead of positional dimension arguments.

import numpy as np

def main():
    np.random.seed(42)

    # randn: positional arguments
    a = np.random.randn(3, 4)

    # standard_normal: size keyword
    b = np.random.standard_normal(size=(3, 4))

    print(f"randn shape: {a.shape}")
    print(f"standard_normal shape: {b.shape}")

if __name__ == "__main__":
    main()

3. Equivalent Results

Both produce standard normal samples; choice is stylistic.

np.random.normal

Generates samples from a general normal distribution \(\mathcal{N}(\mu, \sigma^2)\).

1. Parameters

import numpy as np
import matplotlib.pyplot as plt
from scipy import stats

def main():
    np.random.seed(0)

    loc = 5      # mean (μ)
    scale = 2    # standard deviation (σ)
    n_samples = 10_000

    data = np.random.normal(loc=loc, scale=scale, size=(n_samples,))

    fig, ax = plt.subplots(figsize=(12, 3))

    _, bins, _ = ax.hist(data, bins=100, density=True, alpha=0.3, label='Histogram')

    pdf = stats.norm(loc=loc, scale=scale).pdf(bins)
    ax.plot(bins, pdf, '--r', linewidth=2, label=f'N({loc}, {scale}²) PDF')

    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.legend()
    plt.show()

if __name__ == "__main__":
    main()

2. Scaling Relation

\(X \sim \mathcal{N}(\mu, \sigma^2)\) is equivalent to \(X = \mu + \sigma Z\) where \(Z \sim \mathcal{N}(0, 1)\).

import numpy as np

def main():
    np.random.seed(42)

    mu, sigma = 5, 2
    n = 10_000

    # Method 1: np.random.normal
    x1 = np.random.normal(loc=mu, scale=sigma, size=n)

    # Method 2: transform standard normal
    np.random.seed(42)
    z = np.random.randn(n)
    x2 = mu + sigma * z

    print(f"Method 1 mean: {x1.mean():.4f}")
    print(f"Method 2 mean: {x2.mean():.4f}")

if __name__ == "__main__":
    main()

3. Use for Custom Mean/Std

Use normal when you need to specify mean and standard deviation.

scipy.stats.norm.rvs

The scipy.stats alternative for normal sampling.

1. Basic Usage

import numpy as np
import matplotlib.pyplot as plt
from scipy import stats

def main():
    np.random.seed(0)

    n_samples = 10_000
    data = stats.norm(loc=0, scale=1).rvs(n_samples)

    fig, ax = plt.subplots(figsize=(12, 3))

    _, bins, _ = ax.hist(data, bins=100, density=True, alpha=0.3, label='Histogram')

    pdf = stats.norm().pdf(bins)
    ax.plot(bins, pdf, '--r', linewidth=2, label='Standard Normal PDF')

    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.set_xlabel('Value')
    ax.set_ylabel('Density')
    ax.legend()
    plt.show()

if __name__ == "__main__":
    main()

2. Distribution Object

Create a frozen distribution for repeated use.

import numpy as np
from scipy import stats

def main():
    np.random.seed(42)

    # Create distribution object
    dist = stats.norm(loc=10, scale=3)

    # Sample
    samples = dist.rvs(size=5)
    print(f"Samples: {samples}")

    # Also get PDF, CDF, etc.
    print(f"PDF at 10: {dist.pdf(10):.4f}")
    print(f"CDF at 10: {dist.cdf(10):.4f}")

if __name__ == "__main__":
    main()

3. When to Use

Use stats.norm when you also need PDF, CDF, quantiles, or other distribution methods.

Method Comparison

1. All Four Methods

import numpy as np
from scipy import stats

def main():
    np.random.seed(0)
    n = 5

    print("Standard Normal N(0,1) - 4 equivalent methods:")
    print()

    np.random.seed(42)
    print(f"np.random.randn({n}):")
    print(f"  {np.random.randn(n)}")

    np.random.seed(42)
    print(f"np.random.standard_normal(size=({n},)):")
    print(f"  {np.random.standard_normal(size=(n,))}")

    np.random.seed(42)
    print(f"np.random.normal(0, 1, size={n}):")
    print(f"  {np.random.normal(0, 1, size=n)}")

    np.random.seed(42)
    print(f"stats.norm(0, 1).rvs({n}):")
    print(f"  {stats.norm(0, 1).rvs(n)}")

if __name__ == "__main__":
    main()

2. Summary Table

Function Standard Normal General Normal Shape Syntax
randn Positional args
standard_normal size= keyword
normal size= keyword
stats.norm.rvs Positional or size=

3. Recommendations

  • Quick standard normal: randn
  • Custom mean/std: normal
  • Need PDF/CDF too: stats.norm

Multivariate Normal

Generates samples from a multivariate normal distribution.

1. Covariance Matrix

import numpy as np
import matplotlib.pyplot as plt
from scipy import stats

def main():
    np.random.seed(42)

    mean = [0, 0]
    cov = [[1, 0.8], [0.8, 1]]

    x = np.random.multivariate_normal(mean, cov, size=1000)
    print(f"Shape: {x.shape}")

    fig, ax = plt.subplots(figsize=(6, 6))
    ax.scatter(x[:, 0], x[:, 1], alpha=0.3)
    ax.set_xlabel('X1')
    ax.set_ylabel('X2')
    ax.set_title('Bivariate Normal (ρ=0.8)')
    ax.set_aspect('equal')
    plt.show()

if __name__ == "__main__":
    main()

2. Correlation Structure

The covariance matrix determines the shape and orientation.

import numpy as np
import matplotlib.pyplot as plt

def main():
    np.random.seed(42)

    fig, axes = plt.subplots(1, 3, figsize=(15, 5))

    correlations = [-0.8, 0, 0.8]

    for ax, rho in zip(axes, correlations):
        cov = [[1, rho], [rho, 1]]
        x = np.random.multivariate_normal([0, 0], cov, size=500)
        ax.scatter(x[:, 0], x[:, 1], alpha=0.3)
        ax.set_title(f'ρ = {rho}')
        ax.set_xlim(-4, 4)
        ax.set_ylim(-4, 4)
        ax.set_aspect('equal')

    plt.tight_layout()
    plt.show()

if __name__ == "__main__":
    main()

3. Higher Dimensions

import numpy as np

def main():
    np.random.seed(42)

    # 4D multivariate normal
    mean = [0, 0, 0, 0]
    cov = np.eye(4)  # independent components

    samples = np.random.multivariate_normal(mean, cov, size=1000)
    print(f"Shape: {samples.shape}")
    print(f"Sample mean: {samples.mean(axis=0)}")

if __name__ == "__main__":
    main()

Chi-Square Distribution

A distribution derived from squared normal random variables.

1. Degrees of Freedom

import numpy as np
import matplotlib.pyplot as plt
from scipy import stats

def main():
    np.random.seed(0)

    df = 5
    data = np.random.chisquare(df=df, size=10_000)

    fig, ax = plt.subplots(figsize=(10, 4))

    _, bins, _ = ax.hist(data, bins=100, density=True, alpha=0.3)

    pdf = stats.chi2(df).pdf(bins)
    ax.plot(bins, pdf, 'r-', linewidth=2, label=f'χ²({df}) PDF')

    ax.set_xlabel('Value')
    ax.set_ylabel('Density')
    ax.legend()
    plt.show()

if __name__ == "__main__":
    main()

2. Relation to Normal

\(\(\chi^2_k = \sum_{i=1}^{k} Z_i^2\)\) where \(Z_i \sim \mathcal{N}(0, 1)\).

import numpy as np

def main():
    np.random.seed(42)

    k = 5
    n_samples = 10_000

    # Method 1: np.random.chisquare
    chi2_direct = np.random.chisquare(df=k, size=n_samples)

    # Method 2: sum of squared normals
    z = np.random.randn(n_samples, k)
    chi2_manual = (z ** 2).sum(axis=1)

    print(f"Direct mean: {chi2_direct.mean():.2f} (expected: {k})")
    print(f"Manual mean: {chi2_manual.mean():.2f} (expected: {k})")

if __name__ == "__main__":
    main()

3. Varying df

import numpy as np
import matplotlib.pyplot as plt
from scipy import stats

def main():
    x = np.linspace(0, 30, 200)

    fig, ax = plt.subplots(figsize=(10, 4))

    for df in [2, 5, 10, 15]:
        pdf = stats.chi2(df).pdf(x)
        ax.plot(x, pdf, linewidth=2, label=f'df={df}')

    ax.set_xlabel('x')
    ax.set_ylabel('f(x)')
    ax.set_title('Chi-Square Distributions')
    ax.legend()
    plt.show()

if __name__ == "__main__":
    main()