Skip to content

Searching Arrays

np.searchsorted

1. Basic Usage

np.searchsorted finds insertion points to maintain sorted order.

import numpy as np

def main():
    a = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
    v = 3.14

    idx = np.searchsorted(a, v)

    print(f"Array: {a}")
    print(f"Value: {v}")
    print(f"Insert at index: {idx}")

if __name__ == "__main__":
    main()

Output:

Array: [0 1 2 3 4 5 6 7 8 9]
Value: 3.14
Insert at index: 4

2. Left vs Right

side parameter controls behavior for equal values.

import numpy as np

def main():
    a = np.array([1, 2, 2, 2, 3, 4, 5])
    v = 2

    left_idx = np.searchsorted(a, v, side='left')
    right_idx = np.searchsorted(a, v, side='right')

    print(f"Array: {a}")
    print(f"Value: {v}")
    print(f"Left index:  {left_idx}")
    print(f"Right index: {right_idx}")

if __name__ == "__main__":
    main()

Output:

Array: [1 2 2 2 3 4 5]
Value: 2
Left index:  1
Right index: 4

3. Multiple Values

import numpy as np

def main():
    a = np.array([0, 10, 20, 30, 40, 50])
    values = np.array([5, 15, 25, 35])

    indices = np.searchsorted(a, values)

    print(f"Array:   {a}")
    print(f"Values:  {values}")
    print(f"Indices: {indices}")

if __name__ == "__main__":
    main()

Practical Uses

1. Binning Data

Assign values to bins.

import numpy as np

def main():
    bins = np.array([0, 10, 20, 30, 40, 50])
    data = np.array([5, 15, 25, 35, 45, 55, -5])

    bin_indices = np.searchsorted(bins, data)

    print(f"Bins: {bins}")
    print(f"Data: {data}")
    print(f"Bin indices: {bin_indices}")
    print()

    for d, idx in zip(data, bin_indices):
        if idx == 0:
            print(f"  {d}: below first bin")
        elif idx == len(bins):
            print(f"  {d}: above last bin")
        else:
            print(f"  {d}: bin [{bins[idx-1]}, {bins[idx]})")

if __name__ == "__main__":
    main()

2. Interpolation Lookup

Find surrounding values for interpolation.

import numpy as np

def main():
    # Known data points
    x_known = np.array([0, 1, 2, 3, 4, 5])
    y_known = np.array([0, 1, 4, 9, 16, 25])  # y = x^2

    # Query point
    x_query = 2.5

    # Find position
    idx = np.searchsorted(x_known, x_query)

    # Linear interpolation
    x0, x1 = x_known[idx-1], x_known[idx]
    y0, y1 = y_known[idx-1], y_known[idx]

    t = (x_query - x0) / (x1 - x0)
    y_interp = y0 + t * (y1 - y0)

    print(f"Query x: {x_query}")
    print(f"Between x[{idx-1}]={x0} and x[{idx}]={x1}")
    print(f"Interpolated y: {y_interp}")
    print(f"Actual x^2: {x_query**2}")

if __name__ == "__main__":
    main()

3. Histogram Counts

import numpy as np

def main():
    np.random.seed(42)
    data = np.random.randn(1000)

    bin_edges = np.array([-3, -2, -1, 0, 1, 2, 3])

    # Find bin for each data point
    bin_idx = np.searchsorted(bin_edges, data)

    # Count per bin (excluding out of range)
    counts = np.bincount(bin_idx, minlength=len(bin_edges)+1)

    print("Bin edges:", bin_edges)
    print("Counts:", counts[1:-1])  # exclude under/over
    print()

    # Verify with np.histogram
    hist_counts, _ = np.histogram(data, bins=bin_edges)
    print("np.histogram:", hist_counts)

if __name__ == "__main__":
    main()

Requirements

1. Sorted Input

np.searchsorted requires the input array to be sorted.

import numpy as np

def main():
    # Correct: sorted array
    a_sorted = np.array([1, 3, 5, 7, 9])
    idx = np.searchsorted(a_sorted, 4)
    print(f"Sorted array: {a_sorted}")
    print(f"Index for 4: {idx}")
    print()

    # Incorrect: unsorted array gives wrong results
    a_unsorted = np.array([5, 1, 9, 3, 7])
    idx_wrong = np.searchsorted(a_unsorted, 4)
    print(f"Unsorted array: {a_unsorted}")
    print(f"Index for 4: {idx_wrong} (incorrect!)")

if __name__ == "__main__":
    main()

2. Sort First

import numpy as np

def main():
    data = np.array([5, 1, 9, 3, 7])

    # Sort first
    sorted_data = np.sort(data)

    # Now searchsorted works correctly
    idx = np.searchsorted(sorted_data, 4)

    print(f"Original: {data}")
    print(f"Sorted:   {sorted_data}")
    print(f"Index for 4: {idx}")

if __name__ == "__main__":
    main()

np.searchsorted uses binary search, so it's O(log n).

import numpy as np

def main():
    # Even with large arrays, searchsorted is fast
    large_array = np.arange(1_000_000)

    # Find insertion point for many values
    queries = np.array([100, 50000, 999999])

    indices = np.searchsorted(large_array, queries)

    print(f"Array size: {len(large_array):,}")
    print(f"Queries: {queries}")
    print(f"Indices: {indices}")

if __name__ == "__main__":
    main()