Binary Files¶
Binary files store raw bytes rather than human-readable text. Working with binary mode is essential for images, audio, executables, and custom data formats.
Opening Binary Files¶
Use 'b' mode flag to read and write files as raw bytes.
1. Mode Flags¶
Combine 'b' with read, write, or append modes.
# Read binary
with open("image.png", "rb") as f:
data = f.read()
# Write binary
with open("output.bin", "wb") as f:
f.write(data)
# Append binary
with open("log.bin", "ab") as f:
f.write(b"\x00\x01\x02")
# Read and write binary
with open("data.bin", "r+b") as f:
content = f.read()
f.seek(0)
f.write(modified)
2. Bytes vs Strings¶
Binary mode works with bytes, not str.
# Text mode returns str
with open("text.txt", "r") as f:
data = f.read()
print(type(data)) # <class 'str'>
# Binary mode returns bytes
with open("text.txt", "rb") as f:
data = f.read()
print(type(data)) # <class 'bytes'>
3. No Encoding¶
Binary mode bypasses text encoding entirely.
# Text mode uses encoding
with open("file.txt", "r", encoding="utf-8") as f:
text = f.read()
# Binary mode: raw bytes, no encoding
with open("file.txt", "rb") as f:
raw = f.read()
text = raw.decode("utf-8") # Manual decode
Reading Binary Data¶
Methods for reading raw bytes from files.
1. Read Entire File¶
Load complete file contents into memory.
with open("photo.jpg", "rb") as f:
image_data = f.read()
print(len(image_data)) # File size in bytes
print(image_data[:10]) # First 10 bytes
2. Read Fixed Chunks¶
Read specific number of bytes at a time.
with open("large_file.bin", "rb") as f:
# Read first 1024 bytes
header = f.read(1024)
# Read next chunk
chunk = f.read(4096)
# Empty bytes means EOF
while chunk := f.read(4096):
process(chunk)
3. Read Into Buffer¶
Use readinto() for memory-efficient reading.
buffer = bytearray(4096)
with open("data.bin", "rb") as f:
# Read into existing buffer
bytes_read = f.readinto(buffer)
print(f"Read {bytes_read} bytes")
# Process buffer[:bytes_read]
Writing Binary Data¶
Methods for writing raw bytes to files.
1. Write Bytes¶
Write bytes objects directly to file.
data = b"\x89PNG\r\n\x1a\n" # PNG header
with open("header.bin", "wb") as f:
f.write(data)
# Write bytearray
buffer = bytearray([0, 1, 2, 3, 4])
with open("buffer.bin", "wb") as f:
f.write(buffer)
2. Write Multiple Chunks¶
Write data in segments for large files.
chunks = [b"chunk1", b"chunk2", b"chunk3"]
with open("output.bin", "wb") as f:
for chunk in chunks:
f.write(chunk)
# Using writelines (no separator added)
with open("output.bin", "wb") as f:
f.writelines(chunks)
3. Buffered Writing¶
Control write buffering behavior.
# Unbuffered (immediate writes)
with open("log.bin", "wb", buffering=0) as f:
f.write(b"immediate")
# Line buffered (not for binary)
# buffering=1 only works for text mode
# Custom buffer size
with open("data.bin", "wb", buffering=8192) as f:
f.write(b"buffered")
File Position¶
Navigate within binary files using seek and tell.
1. Current Position¶
Use tell() to get current byte position.
with open("data.bin", "rb") as f:
print(f.tell()) # 0 (start)
f.read(10)
print(f.tell()) # 10
f.read(5)
print(f.tell()) # 15
2. Seek Absolute¶
Move to specific byte position with seek().
with open("data.bin", "rb") as f:
f.seek(100) # Go to byte 100
chunk = f.read(50) # Read bytes 100-149
f.seek(0) # Back to start
header = f.read(10)
3. Seek Relative¶
Use whence parameter for relative seeking.
import os
with open("data.bin", "rb") as f:
# From start (default, whence=0)
f.seek(10, os.SEEK_SET)
# From current position (whence=1)
f.seek(5, os.SEEK_CUR) # Now at 15
# From end (whence=2)
f.seek(-10, os.SEEK_END) # 10 bytes before end
Struct Module¶
Pack and unpack binary data with defined formats.
1. Basic Packing¶
Convert Python values to bytes.
import struct
# Pack integer and float
data = struct.pack("if", 42, 3.14)
print(data) # b'*\x00\x00\x00\xc3\xf5H@'
print(len(data)) # 8 bytes
# Format characters: i=int, f=float, d=double
# h=short, b=byte, s=string
2. Basic Unpacking¶
Convert bytes back to Python values.
import struct
data = b'*\x00\x00\x00\xc3\xf5H@'
# Unpack to tuple
values = struct.unpack("if", data)
print(values) # (42, 3.140000104904175)
# Unpack single value
num = struct.unpack("i", data[:4])[0]
print(num) # 42
3. Byte Order¶
Specify endianness in format string.
import struct
num = 0x12345678
# Native byte order (system-dependent)
native = struct.pack("I", num)
# Little-endian
little = struct.pack("<I", num)
print(little.hex()) # 78563412
# Big-endian (network order)
big = struct.pack(">I", num)
print(big.hex()) # 12345678
Common Patterns¶
Practical binary file operations.
1. File Header Reading¶
Parse structured file headers.
import struct
def read_bmp_header(filename):
"""Read BMP image header."""
with open(filename, "rb") as f:
# BMP signature
sig = f.read(2)
if sig != b"BM":
raise ValueError("Not a BMP file")
# File size, reserved, data offset
size, _, _, offset = struct.unpack("<IHHI", f.read(12))
return {"size": size, "offset": offset}
# header = read_bmp_header("image.bmp")
2. Copy Binary File¶
Efficiently copy large binary files.
def copy_binary(src, dst, chunk_size=8192):
"""Copy binary file in chunks."""
with open(src, "rb") as fin:
with open(dst, "wb") as fout:
while chunk := fin.read(chunk_size):
fout.write(chunk)
copy_binary("source.bin", "dest.bin")
3. Modify In Place¶
Update specific bytes within a file.
def patch_byte(filename, offset, value):
"""Change single byte at offset."""
with open(filename, "r+b") as f:
f.seek(offset)
f.write(bytes([value]))
# patch_byte("data.bin", 100, 0xFF)