Skip to content

LeetCode DataFrame Problems

This document covers common LeetCode patterns involving pandas DataFrame operations including groupby, merge, pivot, and multi-table manipulations.

Pattern 1: GroupBy and Aggregation

LeetCode 586: Customer Placing the Largest Number of Orders

Problem: Find the customer who placed the most orders.

def largest_orders(orders: pd.DataFrame) -> pd.DataFrame:
    # Group by customer and count orders
    order_counts = orders.groupby('customer_number')['order_number'].count()

    # Find customer with max orders
    max_customer = order_counts.idxmax()

    return pd.DataFrame({'customer_number': [max_customer]})

With reset_index:

def largest_orders(orders: pd.DataFrame) -> pd.DataFrame:
    counts = orders.groupby('customer_number')['order_number'].count().reset_index()
    counts.columns = ['customer_number', 'order_count']

    max_count = counts['order_count'].max()
    return counts[counts['order_count'] == max_count][['customer_number']]

LeetCode 182: Duplicate Emails

Problem: Find emails that appear more than once.

def duplicate_emails(person: pd.DataFrame) -> pd.DataFrame:
    # Group by email and count
    email_counts = person.groupby('email')['id'].count().reset_index(name='count')

    # Filter duplicates
    duplicates = email_counts[email_counts['count'] > 1]

    return duplicates[['email']].rename(columns={'email': 'Email'})

Pattern 2: Merge Operations

LeetCode 175: Combine Two Tables

Problem: Left join person and address tables.

def combine_two_tables(person: pd.DataFrame, address: pd.DataFrame) -> pd.DataFrame:
    result = person.merge(
        address,
        on='personId',
        how='left'
    )
    return result[['firstName', 'lastName', 'city', 'state']]

LeetCode 577: Employee Bonus

Problem: Find employees with bonus < 1000 or no bonus.

def employee_bonus(employee: pd.DataFrame, bonus: pd.DataFrame) -> pd.DataFrame:
    # Left merge to include employees without bonus
    merged = employee.merge(bonus, on='empId', how='left')

    # Filter: bonus < 1000 OR bonus is null
    result = merged[(merged['bonus'] < 1000) | (merged['bonus'].isnull())]

    return result[['name', 'bonus']]

Key Concepts: - Left join preserves all employees - isnull() catches employees without bonus records

LeetCode 181: Employees Earning More Than Their Managers

Problem: Self-join to compare employee and manager salaries.

def employees_earning_more(employee: pd.DataFrame) -> pd.DataFrame:
    # Self merge: join employee with their manager
    merged = employee.merge(
        employee,
        left_on='managerId',
        right_on='id',
        suffixes=('_emp', '_mgr')
    )

    # Filter where employee earns more
    result = merged[merged['salary_emp'] > merged['salary_mgr']]

    return result[['name_emp']].rename(columns={'name_emp': 'Employee'})

Pattern 3: Pivot and Reshape

LeetCode 1179: Reformat Department Table

Problem: Pivot monthly revenue by department.

def reformat_table(department: pd.DataFrame) -> pd.DataFrame:
    # Pivot: rows=id, columns=month, values=revenue
    pivoted = department.pivot(
        index='id',
        columns='month',
        values='revenue'
    ).reset_index()

    # Ensure all months present
    all_months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
                  'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']

    # Add revenue suffix to column names
    pivoted.columns = ['id'] + [f'{m}_Revenue' for m in all_months 
                                if m in pivoted.columns]

    # Reindex to include missing months
    for month in all_months:
        col = f'{month}_Revenue'
        if col not in pivoted.columns:
            pivoted[col] = None

    return pivoted

LeetCode 626: Exchange Seats

Problem: Swap adjacent seat IDs.

def exchange_seats(seat: pd.DataFrame) -> pd.DataFrame:
    n = len(seat)

    # Create new id column
    seat['new_id'] = seat['id'].apply(
        lambda x: x + 1 if x % 2 == 1 and x < n 
                  else x - 1 if x % 2 == 0 
                  else x
    )

    # Sort by new_id and reset
    result = seat.sort_values('new_id').reset_index(drop=True)
    result['id'] = result['new_id']

    return result[['id', 'student']]

Pattern 4: Ranking and Window Functions

LeetCode 176: Second Highest Salary

Problem: Find the second highest distinct salary.

def second_highest_salary(employee: pd.DataFrame) -> pd.DataFrame:
    # Get unique salaries sorted descending
    unique_salaries = employee['salary'].drop_duplicates().sort_values(ascending=False)

    # Get second highest or None
    if len(unique_salaries) >= 2:
        second = unique_salaries.iloc[1]
    else:
        second = None

    return pd.DataFrame({'SecondHighestSalary': [second]})

LeetCode 178: Rank Scores

Problem: Rank scores with dense ranking.

def rank_scores(scores: pd.DataFrame) -> pd.DataFrame:
    # Dense rank: no gaps in ranking
    scores['rank'] = scores['score'].rank(method='dense', ascending=False)

    return scores[['score', 'rank']].sort_values('score', ascending=False)

LeetCode 184: Department Highest Salary

Problem: Find highest paid employee(s) per department.

def department_highest_salary(
    employee: pd.DataFrame, 
    department: pd.DataFrame
) -> pd.DataFrame:
    # Merge to get department names
    merged = employee.merge(department, left_on='departmentId', right_on='id')

    # Find max salary per department
    max_salaries = merged.groupby('departmentId')['salary'].transform('max')

    # Filter employees with max salary
    result = merged[merged['salary'] == max_salaries]

    return result[['name_y', 'name_x', 'salary']].rename(
        columns={'name_y': 'Department', 'name_x': 'Employee', 'salary': 'Salary'}
    )

Pattern 5: Date Operations

LeetCode 197: Rising Temperature

Problem: Find dates where temperature was higher than previous day.

def rising_temperature(weather: pd.DataFrame) -> pd.DataFrame:
    # Sort by date
    weather = weather.sort_values('recordDate')

    # Self merge on consecutive dates
    weather['prev_date'] = weather['recordDate'] - pd.Timedelta(days=1)

    merged = weather.merge(
        weather,
        left_on='prev_date',
        right_on='recordDate',
        suffixes=('', '_prev')
    )

    # Filter where temperature increased
    rising = merged[merged['temperature'] > merged['temperature_prev']]

    return rising[['id']]

Alternative with shift:

def rising_temperature(weather: pd.DataFrame) -> pd.DataFrame:
    weather = weather.sort_values('recordDate')

    # Check for consecutive dates
    weather['prev_temp'] = weather['temperature'].shift(1)
    weather['prev_date'] = weather['recordDate'].shift(1)
    weather['is_consecutive'] = (
        weather['recordDate'] - weather['prev_date']
    ) == pd.Timedelta(days=1)

    # Filter rising and consecutive
    result = weather[
        (weather['temperature'] > weather['prev_temp']) & 
        weather['is_consecutive']
    ]

    return result[['id']]

Pattern 6: Multiple Aggregations

LeetCode 1193: Monthly Transactions I

Problem: Aggregate transaction counts and amounts by month and country.

def monthly_transactions(transactions: pd.DataFrame) -> pd.DataFrame:
    # Extract month
    transactions['month'] = transactions['trans_date'].dt.strftime('%Y-%m')

    # Aggregate
    result = transactions.groupby(['month', 'country']).agg(
        trans_count=('id', 'count'),
        approved_count=('state', lambda x: (x == 'approved').sum()),
        trans_total_amount=('amount', 'sum'),
        approved_total_amount=('amount', lambda x: x[transactions.loc[x.index, 'state'] == 'approved'].sum())
    ).reset_index()

    return result

Alternative approach:

def monthly_transactions(transactions: pd.DataFrame) -> pd.DataFrame:
    transactions['month'] = transactions['trans_date'].dt.strftime('%Y-%m')
    transactions['is_approved'] = transactions['state'] == 'approved'
    transactions['approved_amount'] = transactions['amount'] * transactions['is_approved']

    result = transactions.groupby(['month', 'country']).agg({
        'id': 'count',
        'is_approved': 'sum',
        'amount': 'sum',
        'approved_amount': 'sum'
    }).reset_index()

    result.columns = ['month', 'country', 'trans_count', 'approved_count',
                      'trans_total_amount', 'approved_total_amount']

    return result

Pattern 7: Conditional Updates with loc

LeetCode 1873: Calculate Special Bonus

Problem: Assign bonus based on conditions.

def calculate_special_bonus(employees: pd.DataFrame) -> pd.DataFrame:
    # Initialize bonus to 0
    employees['bonus'] = 0

    # Condition: odd employee_id AND name doesn't start with 'M'
    mask = (
        (employees['employee_id'] % 2 == 1) & 
        (~employees['name'].str.startswith('M'))
    )

    # Assign salary as bonus where condition is met
    employees.loc[mask, 'bonus'] = employees.loc[mask, 'salary']

    return employees[['employee_id', 'bonus']].sort_values('employee_id')

Pattern 8: Outer Joins for Finding Missing Data

LeetCode 1148: Article Views I

Problem: Find authors who viewed their own articles.

def article_views(views: pd.DataFrame) -> pd.DataFrame:
    # Filter where author == viewer
    self_views = views[views['author_id'] == views['viewer_id']]

    # Get unique authors
    result = self_views[['author_id']].drop_duplicates()
    result.columns = ['id']

    return result.sort_values('id')

Pattern Summary

Pattern Methods Used Common Use Case
GroupBy Aggregation groupby(), count(), sum() Counting, totaling
Merge/Join merge(), how='left/inner/outer' Combining tables
Self Merge merge() with same DataFrame Comparing rows
Pivot pivot(), pivot_table() Reshaping data
Ranking rank(), nlargest() Finding top N
Date Operations dt accessor, shift(), Timedelta Time comparisons
Conditional Update loc[] with mask Row-wise assignment
Multiple Aggregations agg() with dict Complex summaries

Common DataFrame Operations Quick Reference

# GroupBy with multiple aggregations
df.groupby('col').agg({
    'value': ['sum', 'mean', 'count'],
    'other': 'max'
})

# Merge with different keys
df1.merge(df2, left_on='id1', right_on='id2', how='left')

# Self merge
df.merge(df, left_on='parent_id', right_on='id', suffixes=('', '_parent'))

# Conditional column
df['new_col'] = np.where(condition, value_if_true, value_if_false)

# Transform (broadcast aggregation back)
df['group_mean'] = df.groupby('group')['value'].transform('mean')

# Rank within groups
df['rank'] = df.groupby('group')['value'].rank(method='dense', ascending=False)