select day,ROW_NUMBER() OVER(ORDER BY day) daynum from hive.ods.dim_date
接下來,我們需要將用戶登錄日志按照uid分組,然后按照日期排序,然后計算出rownumber
with a as (select uid,day from hive.traffic.access_user where day>=20190801 and uid<>'')
select uid,day,ROW_NUMBER() OVER(PARTITION BY uid ORDER BY uid,day) rownum from a group by day,uid
接下來就是計算差值,差值相同的代表連續登錄日期,完整sql如下
with a as (select uid,day from hive.traffic.access_user where day>=20190801 and uid<>''),
b as (select uid,day,ROW_NUMBER() OVER(PARTITION BY uid ORDER BY uid,day) rownum from a group by day,uid ),
c as(select day,ROW_NUMBER() OVER(ORDER BY day) daynum from hive.ods.dim_date),
d as (select uid,b.day,daynum,rownum,daynum-rownum days from b join c on b.day=c.day )
select uid,min(day)"連續登錄開始日",count(*) "連續登錄天數" from d group by uid,days