SP500 Analysis¶
A comprehensive example demonstrating pandas operations for analyzing S&P 500 stock data.
SP500 Class Design¶
Build a class to download and analyze S&P 500 data.
1. Class Structure¶
import pandas as pd
import yfinance as yf
class SP500:
"""
Class to download SP500 companies' fundamental and stock price data.
"""
def __init__(self):
self.tickers = []
self.data = pd.DataFrame()
self.price_data = pd.DataFrame()
self.fundamental_data = pd.DataFrame()
2. Fetch Tickers¶
def fetch_sp500_tickers(self):
table = pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
self.tickers = table[0]['Symbol'].tolist()
# Adjust ticker symbols if needed
self.tickers = [ticker.replace('.', '-') for ticker in self.tickers]
3. Fetch Data¶
def fetch_data(self):
infos = []
closes = []
for ticker in self.tickers:
try:
stock = yf.Ticker(ticker)
info = stock.info
df = stock.history(period='1y')
closes.append(df[['Close']].rename(columns={'Close': ticker}))
infos.append({
'Ticker': ticker,
'PER (Trailing)': info.get('trailingPE'),
'PBR': info.get('priceToBook'),
'Market Cap': info.get('marketCap')
})
except Exception as e:
print(f"Failed to fetch {ticker}: {e}")
continue
self.price_data = pd.concat(closes, axis=1)
self.fundamental_data = pd.DataFrame(infos)
Data Merging¶
Combine price and fundamental data.
1. Merge Method¶
def merge_data(self):
self.data = self.fundamental_data.set_index('Ticker').join(
self.price_data.transpose(),
how='left'
)
2. Get Data¶
def get_data(self):
return self.data
3. Usage Example¶
# Example Usage
sp500 = SP500()
sp500.fetch_sp500_tickers()
sp500.fetch_data()
sp500.merge_data()
data = sp500.get_data()
print(data.head())
Analysis Operations¶
Common analysis patterns with the data.
1. Filter by Sector¶
# Assuming sector data is available
finance_df = data[data['Sector'] == 'Finance']
2. Group Statistics¶
sector_stats = data.groupby('Sector').agg({
'Market Cap': 'sum',
'PER (Trailing)': 'mean',
'PBR': ['mean', 'std']
})
3. Top Performers¶
# Top 10 by market cap
top_10 = data.nlargest(10, 'Market Cap')
Visualization Integration¶
Combine pandas with matplotlib.
1. Sector Distribution¶
import matplotlib.pyplot as plt
sector_caps = data.groupby('Sector')['Market Cap'].sum()
sector_caps.plot(kind='bar', figsize=(12, 6))
plt.title('Market Cap by Sector')
plt.ylabel('Market Cap ($)')
plt.show()
2. Price Correlation¶
# Correlation matrix of prices
price_corr = sp500.price_data.corr()
3. Returns Analysis¶
returns = sp500.price_data.pct_change()
returns.mean().nlargest(10).plot(kind='bar')
plt.title('Top 10 Average Daily Returns')
plt.show()
Best Practices¶
Guidelines for large-scale data analysis.
1. Error Handling¶
# Always handle API errors gracefully
try:
data = yf.download(ticker)
except Exception as e:
print(f"Error: {e}")
2. Incremental Loading¶
# For large datasets, process in batches
batch_size = 50
for i in range(0, len(tickers), batch_size):
batch = tickers[i:i+batch_size]
# Process batch
3. Caching Results¶
# Save intermediate results
data.to_pickle('sp500_data.pkl')
# Load later
data = pd.read_pickle('sp500_data.pkl')