DataFrame Attributes¶

DataFrame attributes provide information about the structure and properties of your data.

columns¶

Access column labels.

import pandas as pd
import yfinance as yf

df = yf.Ticker('WMT').history(start='2020-01-01', end='2020-12-31')
print(df.columns)

Index(['Open', 'High', 'Low', 'Close', 'Volume', 'Dividends', 'Stock Splits'], dtype='object')

print(df.columns[0])  # 'Open'
print(type(df.columns[0]))  # <class 'str'>

col_list = df.columns.tolist()

Access row labels.

print(df.index)

DatetimeIndex(['2020-01-02', '2020-01-03', ...], dtype='datetime64[ns]', name='Date', freq=None)

print(df.index[0])  # Timestamp('2020-01-02 00:00:00')
print(type(df.index[0]))  # <class 'pandas._libs.tslibs.timestamps.Timestamp'>

print(df.index.name)  # 'Date'
print(df.index.dtype)  # datetime64[ns]

Get DataFrame dimensions.

url = 'https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv'
df = pd.read_csv(url)
print(df.shape)  # (891, 12)

df_subset = df[['Survived', 'Sex']]
print(df_subset.shape)  # (891, 2)

df_col = df[['Survived']]  # DataFrame
print(df_col.shape)  # (891, 1)

series = df['Survived']  # Series
print(series.shape)  # (891,)

Get underlying NumPy array.

x = df.values
print(type(x))  # <class 'numpy.ndarray'>
print(x.shape)  # Same as df.shape

print(x[1:2, 2:3].shape)  # (1, 1)
print(x[1:2, 2].shape)    # (1,)
print(x[1, 2].shape)      # () scalar

# Modern pandas recommends to_numpy()
arr = df.to_numpy()

Get data types of each column.

print(df.dtypes)

PassengerId      int64
Survived         int64
Pclass           int64
Name            object
Sex             object
Age            float64
...

# DataFrame has dtypes (plural)
print(df.dtypes)

# Series has dtype (singular)
print(df['Age'].dtype)  # float64

# This raises AttributeError
try:
    print(df.dtype)  # Wrong! Use dtypes
except AttributeError as e:
    print(e)

Total number of elements.

print(df.size)  # rows × columns

# Equivalent to
print(df.shape[0] * df.shape[1])

print(len(df))  # Number of rows only
print(df.size)  # Total elements

Number of dimensions.

print(df.ndim)  # 2

print(df['Age'].ndim)  # 1

if data.ndim == 1:
    print("Series")
else:
    print("DataFrame")

Check if DataFrame is empty.

print(df.empty)  # False

empty_df = pd.DataFrame()
print(empty_df.empty)  # True

if not df.empty:
    process_data(df)

Transpose rows and columns.

df_t = df.T
print(df_t.shape)  # Swapped dimensions

# Useful for displaying wide DataFrames
print(df.head().T)

df_transposed = df.transpose()