-- Gaps and Islands: find consecutive date ranges from sparse event data. -- -- Given a table of user logins (one row per day a user logged in), -- find each user's "streaks" — consecutive days of activity. -- -- This is the classic gaps-and-islands problem. The trick: subtract -- a row_number from each date. Consecutive dates produce the same -- "island_id" because both the date and row_number increment by 1. WITH daily_logins AS ( -- Sample data: user activity log SELECT user_id, login_date FROM (VALUES ('alice', '2025-01-01'), ('alice', '2025-01-02'), ('alice', '2025-01-03'), ('alice', '2025-01-07'), ('alice', '2025-01-08'), ('alice', '2025-01-15'), ('bob', '2025-01-01'), ('bob', '2025-01-05'), ('bob', '2025-01-06'), ('bob', '2025-01-07'), ('bob', '2025-01-08'), ('bob', '2025-01-09') ) AS t(user_id, login_date) ), islands AS ( SELECT user_id, login_date, -- The magic: date minus row_number is constant within a streak DATE( login_date, '-' || ROW_NUMBER() OVER ( PARTITION BY user_id ORDER BY login_date ) || ' days' ) AS island_id FROM daily_logins ) SELECT user_id, MIN(login_date) AS streak_start, MAX(login_date) AS streak_end, COUNT(*) AS streak_days, CASE WHEN COUNT(*) >= 7 THEN 'weekly' WHEN COUNT(*) >= 3 THEN 'building' WHEN COUNT(*) = 1 THEN 'one-off' ELSE 'short' END AS streak_type FROM islands GROUP BY user_id, island_id ORDER BY user_id, streak_start; -- Result: -- alice | 2025-01-01 | 2025-01-03 | 3 | building -- alice | 2025-01-07 | 2025-01-08 | 2 | short -- alice | 2025-01-15 | 2025-01-15 | 1 | one-off -- bob | 2025-01-01 | 2025-01-01 | 1 | one-off -- bob | 2025-01-05 | 2025-01-09 | 5 | building
The classic gaps-and-islands problem solved with window functions. Find consecutive date ranges from sparse event data.
Log in to leave a comment.