dtype and Shape¶
Type System¶
1. Built-in dtypes¶
NumPy provides a rich type system:
import numpy as np
# Integer types
int8 = np.array([1, 2], dtype=np.int8) # -128 to 127
int16 = np.array([1, 2], dtype=np.int16) # -32768 to 32767
int32 = np.array([1, 2], dtype=np.int32)
int64 = np.array([1, 2], dtype=np.int64) # Default integer
# Unsigned integers
uint8 = np.array([1, 2], dtype=np.uint8) # 0 to 255
uint16 = np.array([1, 2], dtype=np.uint16)
# Floating point
float16 = np.array([1.0], dtype=np.float16) # Half precision
float32 = np.array([1.0], dtype=np.float32) # Single precision
float64 = np.array([1.0], dtype=np.float64) # Double (default)
# Complex
complex64 = np.array([1+2j], dtype=np.complex64)
complex128 = np.array([1+2j], dtype=np.complex128)
# Boolean
bool_arr = np.array([True, False], dtype=np.bool_)
# String
str_arr = np.array(['a', 'b'], dtype='U10') # Unicode, max 10 chars
2. Type Inspection¶
arr = np.array([1, 2, 3])
print(arr.dtype) # dtype('int64')
print(arr.dtype.name) # 'int64'
print(arr.dtype.kind) # 'i' (integer)
print(arr.dtype.itemsize) # 8 bytes
3. Type Casting¶
# Implicit upcasting
arr1 = np.array([1, 2, 3]) # int64
arr2 = np.array([1.0, 2.0, 3.0]) # float64
result = arr1 + arr2 # float64 (upcast)
# Explicit casting
arr_int = np.array([1.5, 2.7, 3.9])
arr_int_cast = arr_int.astype(np.int32) # [1, 2, 3]
# Safe casting check
can_cast = np.can_cast(np.float64, np.int32)
print(can_cast) # False
Shape Metadata¶
1. Dimension Properties¶
# 1D array
arr1d = np.array([1, 2, 3, 4])
print(arr1d.shape) # (4,)
print(arr1d.ndim) # 1
# 2D array
arr2d = np.array([[1, 2, 3], [4, 5, 6]])
print(arr2d.shape) # (2, 3)
print(arr2d.ndim) # 2
# 3D array
arr3d = np.arange(24).reshape(2, 3, 4)
print(arr3d.shape) # (2, 3, 4)
print(arr3d.ndim) # 3
2. Reshaping¶
arr = np.arange(12)
# Explicit shape
reshaped = arr.reshape(3, 4)
print(reshaped.shape) # (3, 4)
# Infer one dimension
reshaped = arr.reshape(3, -1) # (3, 4) inferred
reshaped = arr.reshape(-1, 4) # (3, 4) inferred
# Flatten
flat = reshaped.reshape(-1) # (12,)
3. Dimension Manipulation¶
arr = np.array([1, 2, 3])
# Add dimension
arr_2d = arr[np.newaxis, :] # (1, 3)
arr_2d = arr[:, np.newaxis] # (3, 1)
# Using reshape
arr_2d = arr.reshape(1, -1) # (1, 3)
arr_2d = arr.reshape(-1, 1) # (3, 1)
# Squeeze (remove size-1 dimensions)
arr_squeezed = np.array([[[1, 2, 3]]]) # (1, 1, 3)
print(arr_squeezed.squeeze().shape) # (3,)
Structured dtypes¶
1. Record Arrays¶
# Define structured dtype
dt = np.dtype([('name', 'U10'), ('age', 'i4'), ('weight', 'f4')])
# Create array
data = np.array([
('Alice', 25, 55.5),
('Bob', 30, 75.0)
], dtype=dt)
print(data['name']) # ['Alice' 'Bob']
print(data['age']) # [25 30]
print(data[0]) # ('Alice', 25, 55.5)
2. Field Access¶
# Access by field
ages = data['age']
ages[0] = 26
print(data[0]['age']) # 26 - structured array is a view
3. Nested Structures¶
dt = np.dtype([
('name', 'U10'),
('position', [('x', 'f4'), ('y', 'f4')])
])
data = np.array([
('Alice', (1.0, 2.0)),
('Bob', (3.0, 4.0))
], dtype=dt)
print(data['position']['x']) # [1. 3.]
Type Promotion¶
1. Automatic Promotion¶
# Integer + Float → Float
arr_int = np.array([1, 2, 3])
arr_float = np.array([1.0, 2.0, 3.0])
result = arr_int + arr_float
print(result.dtype) # float64
# Smaller + Larger → Larger
arr_8 = np.array([1], dtype=np.int8)
arr_64 = np.array([1], dtype=np.int64)
result = arr_8 + arr_64
print(result.dtype) # int64
2. Result Type¶
# Check result dtype before operation
result_dtype = np.result_type(np.int8, np.float32)
print(result_dtype) # float32
3. Promotion Rules¶
# bool < int < float < complex
print(np.result_type(np.bool_, np.int32)) # int32
print(np.result_type(np.int32, np.float64)) # float64
print(np.result_type(np.float32, np.complex64)) # complex64
Memory Efficiency¶
1. Choosing dtypes¶
# Small integers - use smaller types
small_ints = np.arange(100, dtype=np.int8) # 100 bytes
large_ints = np.arange(100, dtype=np.int64) # 800 bytes
# Precision trade-offs
half = np.array([1.0], dtype=np.float16) # 2 bytes, less precision
double = np.array([1.0], dtype=np.float64) # 8 bytes, more precision
2. String Optimization¶
# Fixed-length strings
arr = np.array(['a', 'bb', 'ccc'], dtype='U3') # 3 chars max
print(arr.dtype) # '<U3'
print(arr.nbytes) # 36 bytes (3 items × 3 chars × 4 bytes/char)
# Oversized strings waste memory
arr_big = np.array(['a', 'bb', 'ccc'], dtype='U100') # wasteful
print(arr_big.nbytes) # 1200 bytes
3. Boolean Masking¶
# Boolean arrays are memory efficient for masks
arr = np.arange(1000000)
mask = arr > 500000 # dtype=bool, 1 byte per element
filtered = arr[mask]
Shape Broadcasting¶
1. Dimension Compatibility¶
# Same shape - compatible
arr1 = np.ones((3, 4))
arr2 = np.ones((3, 4))
result = arr1 + arr2 # ✅ (3, 4)
# Trailing dimensions match
arr1 = np.ones((5, 3, 4))
arr2 = np.ones((3, 4))
result = arr1 + arr2 # ✅ (5, 3, 4)
# Size-1 dimensions stretch
arr1 = np.ones((3, 1))
arr2 = np.ones((1, 4))
result = arr1 + arr2 # ✅ (3, 4)
2. Shape Rules¶
Broadcasting works when: - Dimensions are equal, or - One dimension is 1, or - Dimension doesn't exist (added as size 1)
# (3, 4) + (4,) → (3, 4) + (1, 4) → (3, 4) ✅
# (3, 4) + (3,) → incompatible ❌
# (3, 4) + (3, 1) → (3, 4) ✅
3. Explicit Broadcasting¶
arr1 = np.array([[1], [2], [3]]) # (3, 1)
arr2 = np.array([10, 20, 30]) # (3,)
# Manual broadcast
arr2_broadcast = arr2[np.newaxis, :] # (1, 3)
result = arr1 + arr2_broadcast # (3, 3)