Covariance Correlation¶
np.cov¶
1. Basic Usage¶
np.cov computes the covariance matrix. Note: it's a function only, not a method.
import numpy as np
def main():
a = np.random.normal(size=(2, 5))
print("a =")
print(a)
print()
# np.cov is a function, not a method
try:
print(a.cov())
except AttributeError as e:
print(f"Error: {e}")
print()
print("np.cov(a) =")
print(np.cov(a))
if __name__ == "__main__":
main()
2. Row Convention¶
By default, each row is a variable and each column is an observation.
import numpy as np
def main():
# 2 variables, 5 observations each
np.random.seed(42)
x = np.random.randn(5)
y = 2 * x + np.random.randn(5) * 0.5 # correlated with x
data = np.vstack([x, y]) # shape (2, 5)
print(f"data.shape = {data.shape}")
print("data =")
print(data)
print()
cov_matrix = np.cov(data)
print("Covariance matrix:")
print(cov_matrix)
print()
print(f"Var(x) = {cov_matrix[0, 0]:.4f}")
print(f"Var(y) = {cov_matrix[1, 1]:.4f}")
print(f"Cov(x, y) = {cov_matrix[0, 1]:.4f}")
if __name__ == "__main__":
main()
3. Single Variable¶
For a 1D array, np.cov returns the variance as a 0D array.
import numpy as np
def main():
x = np.array([1, 2, 3, 4, 5])
# Covariance of single variable = variance
cov_result = np.cov(x)
var_result = np.var(x, ddof=1) # ddof=1 for sample variance
print(f"x = {x}")
print(f"np.cov(x) = {cov_result}")
print(f"np.var(x, ddof=1) = {var_result}")
if __name__ == "__main__":
main()
np.corrcoef¶
1. Basic Usage¶
np.corrcoef computes the correlation coefficient matrix.
import numpy as np
def main():
a = np.random.normal(size=(2, 5))
print("a =")
print(a)
print()
# np.corrcoef is a function, not a method
try:
print(a.corrcoef())
except AttributeError as e:
print(f"Error: {e}")
print()
print("np.corrcoef(a) =")
print(np.corrcoef(a))
if __name__ == "__main__":
main()
2. Interpretation¶
Correlation coefficients range from -1 to 1.
import numpy as np
def main():
np.random.seed(42)
# Create correlated variables
x = np.random.randn(100)
y_pos = 0.8 * x + 0.2 * np.random.randn(100) # positive correlation
y_neg = -0.8 * x + 0.2 * np.random.randn(100) # negative correlation
y_none = np.random.randn(100) # no correlation
print(f"Corr(x, y_pos): {np.corrcoef(x, y_pos)[0, 1]:+.4f}")
print(f"Corr(x, y_neg): {np.corrcoef(x, y_neg)[0, 1]:+.4f}")
print(f"Corr(x, y_none): {np.corrcoef(x, y_none)[0, 1]:+.4f}")
if __name__ == "__main__":
main()
3. Multiple Variables¶
import numpy as np
def main():
np.random.seed(42)
# 3 variables, 100 observations
x1 = np.random.randn(100)
x2 = 0.5 * x1 + np.random.randn(100)
x3 = -0.3 * x1 + 0.4 * x2 + np.random.randn(100)
data = np.vstack([x1, x2, x3])
corr_matrix = np.corrcoef(data)
print("Correlation matrix:")
print(np.round(corr_matrix, 3))
print()
print("Interpretation:")
print(f" x1-x2: {corr_matrix[0, 1]:+.3f}")
print(f" x1-x3: {corr_matrix[0, 2]:+.3f}")
print(f" x2-x3: {corr_matrix[1, 2]:+.3f}")
if __name__ == "__main__":
main()
cov vs corrcoef¶
1. Key Difference¶
Correlation is normalized covariance (scale-independent).
import numpy as np
def main():
np.random.seed(42)
x = np.random.randn(100)
y = 2 * x + np.random.randn(100)
# Scale y by 100
y_scaled = y * 100
print("Covariance (scale-dependent):")
print(f" Cov(x, y): {np.cov(x, y)[0, 1]:.4f}")
print(f" Cov(x, y_scaled): {np.cov(x, y_scaled)[0, 1]:.4f}")
print()
print("Correlation (scale-independent):")
print(f" Corr(x, y): {np.corrcoef(x, y)[0, 1]:.4f}")
print(f" Corr(x, y_scaled): {np.corrcoef(x, y_scaled)[0, 1]:.4f}")
if __name__ == "__main__":
main()
2. Formula Relation¶
\[\text{Corr}(X, Y) = \frac{\text{Cov}(X, Y)}{\sigma_X \cdot \sigma_Y}\]
import numpy as np
def main():
np.random.seed(42)
x = np.random.randn(100)
y = 0.8 * x + np.random.randn(100) * 0.5
# Manual correlation calculation
cov_xy = np.cov(x, y)[0, 1]
std_x = np.std(x, ddof=1)
std_y = np.std(y, ddof=1)
manual_corr = cov_xy / (std_x * std_y)
numpy_corr = np.corrcoef(x, y)[0, 1]
print(f"Cov(x, y): {cov_xy:.4f}")
print(f"Std(x): {std_x:.4f}")
print(f"Std(y): {std_y:.4f}")
print()
print(f"Manual corr: {manual_corr:.4f}")
print(f"np.corrcoef: {numpy_corr:.4f}")
if __name__ == "__main__":
main()
3. Diagonal Elements¶
import numpy as np
def main():
np.random.seed(42)
x = np.random.randn(100) * 5 # std ≈ 5
y = np.random.randn(100) * 2 # std ≈ 2
data = np.vstack([x, y])
print("Covariance matrix diagonal (variances):")
cov = np.cov(data)
print(f" Var(x): {cov[0, 0]:.4f}")
print(f" Var(y): {cov[1, 1]:.4f}")
print()
print("Correlation matrix diagonal (always 1):")
corr = np.corrcoef(data)
print(f" Corr(x, x): {corr[0, 0]:.4f}")
print(f" Corr(y, y): {corr[1, 1]:.4f}")
if __name__ == "__main__":
main()
Practical Examples¶
1. Stock Returns¶
import numpy as np
def main():
np.random.seed(42)
# Simulated daily returns for 3 stocks
market = np.random.randn(252) * 0.01
stock_a = 1.2 * market + np.random.randn(252) * 0.005
stock_b = 0.8 * market + np.random.randn(252) * 0.008
stock_c = -0.5 * market + np.random.randn(252) * 0.01
returns = np.vstack([stock_a, stock_b, stock_c])
print("Correlation matrix of returns:")
corr = np.corrcoef(returns)
labels = ['A', 'B', 'C']
print(" A B C")
for i, label in enumerate(labels):
row = " ".join(f"{corr[i, j]:+.3f}" for j in range(3))
print(f"{label} {row}")
if __name__ == "__main__":
main()
2. Portfolio Variance¶
import numpy as np
def main():
np.random.seed(42)
# Annual returns for 2 assets
returns = np.random.randn(2, 100) * 0.1 # 10% volatility
# Portfolio weights
weights = np.array([0.6, 0.4])
# Covariance matrix
cov = np.cov(returns)
# Portfolio variance: w' * Cov * w
portfolio_var = weights @ cov @ weights
portfolio_std = np.sqrt(portfolio_var)
print("Covariance matrix:")
print(np.round(cov, 4))
print()
print(f"Weights: {weights}")
print(f"Portfolio variance: {portfolio_var:.4f}")
print(f"Portfolio std dev: {portfolio_std:.4f}")
if __name__ == "__main__":
main()
3. Feature Selection¶
import numpy as np
def main():
np.random.seed(42)
# Features and target
n = 1000
x1 = np.random.randn(n)
x2 = 0.9 * x1 + np.random.randn(n) * 0.1 # highly correlated with x1
x3 = np.random.randn(n) # independent
target = 2 * x1 + 0.5 * x3 + np.random.randn(n) * 0.5
features = np.vstack([x1, x2, x3])
# Correlation with target
corr_with_target = [np.corrcoef(f, target)[0, 1] for f in features]
print("Correlation with target:")
for i, corr in enumerate(corr_with_target, 1):
print(f" x{i}: {corr:+.4f}")
# Feature correlation matrix
print()
print("Feature correlation matrix:")
print(np.round(np.corrcoef(features), 3))
if __name__ == "__main__":
main()