Posted by Matthew, Oct. 11, 2023, 5:25 p.m.
Question about Top 5 content by number of watchers queries
I'm very confused as to how these queries differ:
with cte1 as (
SELECT nds.content_id, nds.account_id, SUM(duration) as watched_minutes
FROM netflix_content nc
JOIN netflix_daily_streaming nds
ON nc.content_id = nds.content_id
WHERE nds.date <= release_date + 28
GROUP BY 1, 2
),
cte2 as (
SELECT c1.content_id, COUNT(CASE WHEN watched_minutes > 600 THEN account_id ELSE NULL END) as uu_cnt
FROM cte1 c1
WHERE c1.watched_minutes > 600
GROUP BY 1
ORDER BY 2 DESC
)
SELECT *
FROM cte2
ORDER BY uu_cnt DESC
LIMIT 5
vs
with cte1 as (
SELECT nds.content_id, nds.account_id, SUM(duration) as watched_minutes
FROM netflix_content nc
JOIN netflix_daily_streaming nds
ON nc.content_id = nds.content_id
WHERE nds.date <= DATE_ADD(nc.release_date, INTERVAL 28 DAY)
GROUP BY 1, 2
),
cte2 as (
SELECT c1.content_id, COUNT(CASE WHEN watched_minutes > 600 THEN account_id ELSE NULL END) as uu_cnt
FROM cte1 c1
WHERE c1.watched_minutes > 600
GROUP BY 1
ORDER BY 2 DESC
)
SELECT *
FROM cte2
ORDER BY uu_cnt DESC
LIMIT 5
The first one passes while the second one doesn't. Does anyone have an explanation?