-- Gaps and Islands: find consecutive date ranges from sparse event data.
--
-- Given a table of user logins (one row per day a user logged in),
-- find each user's "streaks" — consecutive days of activity.
--
-- This is the classic gaps-and-islands problem. The trick: subtract
-- a row_number from each date. Consecutive dates produce the same
-- "island_id" because both the date and row_number increment by 1.

WITH daily_logins AS (
    -- Sample data: user activity log
    SELECT user_id, login_date FROM (VALUES
        ('alice', '2025-01-01'), ('alice', '2025-01-02'), ('alice', '2025-01-03'),
        ('alice', '2025-01-07'), ('alice', '2025-01-08'),
        ('alice', '2025-01-15'),
        ('bob',   '2025-01-01'),
        ('bob',   '2025-01-05'), ('bob',   '2025-01-06'), ('bob',   '2025-01-07'),
        ('bob',   '2025-01-08'), ('bob',   '2025-01-09')
    ) AS t(user_id, login_date)
),

islands AS (
    SELECT
        user_id,
        login_date,
        -- The magic: date minus row_number is constant within a streak
        DATE(
            login_date,
            '-' || ROW_NUMBER() OVER (
                PARTITION BY user_id
                ORDER BY login_date
            ) || ' days'
        ) AS island_id
    FROM daily_logins
)

SELECT
    user_id,
    MIN(login_date)                             AS streak_start,
    MAX(login_date)                             AS streak_end,
    COUNT(*)                                    AS streak_days,
    CASE
        WHEN COUNT(*) >= 7  THEN 'weekly'
        WHEN COUNT(*) >= 3  THEN 'building'
        WHEN COUNT(*) = 1   THEN 'one-off'
        ELSE 'short'
    END                                         AS streak_type
FROM islands
GROUP BY user_id, island_id
ORDER BY user_id, streak_start;

-- Result:
-- alice | 2025-01-01 | 2025-01-03 | 3 | building
-- alice | 2025-01-07 | 2025-01-08 | 2 | short
-- alice | 2025-01-15 | 2025-01-15 | 1 | one-off
-- bob   | 2025-01-01 | 2025-01-01 | 1 | one-off
-- bob   | 2025-01-05 | 2025-01-09 | 5 | building
