--总访问量PV,总用户量UVcreatetableres_pv_uvcomment "page views and unique visitor"row format delimitedfields terminated by','lines terminated by'\n'STORED AS TEXTFILEasselect pv, uvfrom (selectcount(*) as pv from user_behavior1 where behavior_type='pv') t1join(selectcount(distinct user_id) as uv from user_behavior1) t2on1=1;select*from res_pv_uv;+-----------+---------+| pv | uv |+-----------+---------+| 88596886 | 987984 |+-----------+---------+
select sum(pv) as pv, count(distinct uv) as uv from (
select count(*) as pv, distinct user_id as uv from user_behavior1 where behavior_type='pv'
union all
select 0 as pv, distinct user_id as uv from user_behavior1 where behavior_type='buy'
union all
select 0 as pv, distinct user_id as uv from user_behavior1 where behavior_type='cart'
union all
select 0 as pv, distinct user_id as uv from user_behavior1 where behavior_type='fav'
) t;
select sum(case when behavior_type = 'pv' then 1 else 0 end) as pv,
count(distinct user_id) as uv
from user_behavior1;
--日均访问量,日均用户量
create table res_pv_uv_per_day
comment "page views and unique visitor each day"
row format delimited
fields terminated by ','
lines terminated by '\n'
STORED AS TEXTFILE
as
select t1.`date` as `date`, pv, uv
from(
select `date`, count(*) as pv from user_behavior1 group by `date` order by `date`
) t1
join(
select `date`, count(distinct user_id) as uv from user_behavior1 group by `date` order by `date`
) t2
on t1.`date`=t2.`date`
order by `date`;
+--------------------------+------------------------+------------------------+
| res_pv_uv_per_day1.date | res_pv_uv_per_day1.pv | res_pv_uv_per_day1.uv |
+--------------------------+------------------------+------------------------+
| 2017-11-25 | 10511597 | 705571 |
| 2017-11-26 | 10571039 | 713522 |
| 2017-11-27 | 10013455 | 709207 |
| 2017-11-28 | 9884185 | 708339 |
| 2017-11-29 | 10319060 | 719356 |
| 2017-11-30 | 10541695 | 730809 |
| 2017-12-01 | 11171505 | 753166 |
| 2017-12-02 | 13940942 | 941709 |
| 2017-12-03 | 11961006 | 917531 |
+--------------------------+------------------------+------------------------+
-- 一天的活跃时段分布
create table res_behavior_among_day
comment "page views and unique visitor each day"
row format delimited
fields terminated by ','
lines terminated by '\n'
STORED AS TEXTFILE
as
select t.hour as hour,
collect_list(map(t.behavior_type, t.count)) as tc
from(
SELECT hour(`timestamp`) AS hour,
behavior_type,
COUNT(*) AS count
FROM user_behavior1
GROUP BY behavior_type, hour(`timestamp`)
) t
group by hour
order by hour;
+---------+----------------------------------------------------+
| hour | tc |
+---------+----------------------------------------------------+
| 0 | [{"buy":64916},{"cart":192036},{"fav":103721},{"pv":3042342}] |
| 1 | [{"buy":96134},{"cart":229890},{"fav":127976},{"pv":3728498}] |
| 2 | [{"buy":127932},{"cart":266963},{"fav":147752},{"pv":4334810}] |
| 3 | [{"buy":122046},{"cart":260831},{"fav":145412},{"pv":4213518}] |
| 4 | [{"buy":118591},{"cart":255811},{"fav":140862},{"pv":4255794}] |
| 5 | [{"buy":123426},{"cart":279829},{"fav":150844},{"pv":4653933}] |
| 6 | [{"buy":122171},{"cart":277093},{"fav":148561},{"pv":4642054}] |
| 7 | [{"buy":122728},{"cart":284269},{"fav":151321},{"pv":4806704}] |
| 8 | [{"buy":116444},{"cart":279035},{"fav":148722},{"pv":4607743}] |
| 9 | [{"buy":101300},{"cart":255342},{"fav":137631},{"pv":4203395}] |
| 10 | [{"buy":95907},{"cart":253193},{"fav":133262},{"pv":4313516}] |
| 11 | [{"buy":115032},{"cart":314774},{"fav":161057},{"pv":5430878}] |
| 12 | [{"buy":133859},{"cart":393209},{"fav":191406},{"pv":6586331}] |
| 13 | [{"buy":145431},{"cart":465924},{"fav":219974},{"pv":7538382}] |
| 14 | [{"buy":138263},{"cart":486249},{"fav":232222},{"pv":7443069}] |
| 15 | [{"buy":100070},{"cart":395920},{"fav":195330},{"pv":5599901}] |
| 16 | [{"buy":52422},{"cart":164776},{"fav":94930},{"pv":2747149}] |
| 17 | [{"buy":20948},{"cart":76954},{"fav":46239},{"pv":1278813}] |
| 18 | [{"buy":10748},{"cart":41541},{"fav":25079},{"pv":692240}] |
| 19 | [{"buy":7212},{"cart":29333},{"fav":16791},{"pv":471981}] |
| 20 | [{"buy":6044},{"cart":25564},{"fav":13260},{"pv":403765}] |
| 21 | [{"buy":7351},{"cart":33462},{"fav":17158},{"pv":522063}] |
| 22 | [{"buy":16251},{"cart":73014},{"fav":36388},{"pv":1097628}] |
| 23 | [{"buy":33718},{"cart":131106},{"fav":66638},{"pv":1982379}] |
+---------+----------------------------------------------------+
select hour(`timestamp`) as hour,
sum(case when behavior_type = 'pv' then 1 else 0 end) as pv, --点击数
sum(case when behavior_type = 'fav' then 1 else 0 end) as fav, --收藏数
sum(case when behavior_type = 'cart' then 1 else 0 end) as cart, --加购物车数
sum(case when behavior_type = 'buy' then 1 else 0 end) as buy --购买数
from user_behavior1
group by hour(`timestamp`)
order by hour;
--一周用户的活跃分布
create table res_behavior_among_week
comment "page views and unique visitor each day"
row format delimited
fields terminated by ','
lines terminated by '\n'
STORED AS TEXTFILE
as
select t1.weekday as weekday ,
collect_list(map(t1.behavior_type ,
(case when weekday=6 then ceil(t1.count/2)
when weekday=7 then ceil(t1.count/2)
else t1.count end)
)) as ct
from(
select pmod(datediff(t.d, '2017-11-25')+5, 7)+1 as weekday,
t.behavior_type as behavior_type ,
sum(t.count) as count
from (
select `date` as d,
behavior_type,
COUNT(*) as count
from user_behavior1
group by behavior_type , `date`
) t
group by pmod(datediff(t.d, '2017-11-25')+5, 7)+1, t.behavior_type
order by weekday
) t1
group by t1.weekday
order by weekday;
+----------+----------------------------------------------------+
| weekday | tc |
+----------+----------------------------------------------------+
| 1 | [{"buy":218401},{"cart":539212},{"fav":289413},{"pv":8966429}] |
| 2 | [{"buy":211754},{"cart":533807},{"fav":289431},{"pv":8849193}] |
| 3 | [{"buy":223077},{"cart":554747},{"fav":299588},{"pv":9241648}] |
| 4 | [{"buy":222235},{"cart":573032},{"fav":304428},{"pv":9442000}] |
| 5 | [{"buy":212849},{"cart":642251},{"fav":314121},{"pv":10002284}] |
| 6 | [{"buy":230424},{"cart":685302},{"fav":355318},{"pv":10955227}] |
| 7 | [{"buy":224891},{"cart":626233},{"fav":322460},{"pv":10092439}] |
+----------+----------------------------------------------------+
select pmod(datediff(`date`, '2017-11-25') + 5, 7)+1 as weekday,
sum(case when behavior_type = 'pv' then 1 else 0 end) as pv, --点击数
sum(case when behavior_type = 'fav' then 1 else 0 end) as fav, --收藏数
sum(case when behavior_type = 'cart' then 1 else 0 end) as cart, --加购物车数
sum(case when behavior_type = 'buy' then 1 else 0 end) as buy --购买数
from user_behavior1
group by pmod(datediff(`date`, '2017-11-25')+5, 7)+1
order by weekday;
select t.weekday as weekday,
collect_list(map(t.behavior_type,
(case when weekday=6 then ceil(t.count/2)
when weekday=7 then ceil(t.count/2)
else t.count end)
)) as tc
from (
select pmod(datediff(`date`, '2017-11-25')+5, 7)+1 as weekday,
behavior_type,
COUNT(*) as count
from user_behavior1
group by behavior_type , pmod(datediff(`date`, '2017-11-25')+5, 7)+1
) t
group by weekday
order by weekday;
--点击/(加购物车+收藏)/购买 , 各环节转化率
create table res_conversion_rate
comment "page views and unique visitor each day"
row format delimited
fields terminated by ','
lines terminated by '\n'
STORED AS TEXTFILE
as
select pv, fav, cart, fav_cart, buy,
round(fav_cart/pv, 4) as pv2favcart,
round(buy/fav_cart, 4) as favcart2buy,
round(buy/pv, 4) as pv2buy
from(
select cast(gm['pv'] as int) as pv,
cast(gm['fav'] as int) as fav,
cast(gm['cart'] as int) as cart,
cast(gm['fav']+gm['cart'] as int) as fav_cart,
cast(gm['buy'] as int) as buy
from(
select collect(cast(behavior_type as string), cast(count as string)) as gm
from(
select behavior_type,
COUNT(*) as count
from user_behavior1
group by behavior_type
)t1
) t2
) t3;
+-----------+----------+----------+------------+----------+-------------+--------------+---------+
| pv | fav | cart | fav_cart | buy | pv2favcart | favcart2buy | pv2buy |
+-----------+----------+----------+------------+----------+-------------+--------------+---------+
| 88596886 | 2852536 | 5466118 | 8318654.0 | 1998944 | 0.0939 | 0.2403 | 0.0226 |
+-----------+----------+----------+------------+----------+-------------+--------------+---------+
--每个用户的购物情况,加工到 user_behavior_count
create table res_doublebuy_rate
comment "page views and unique visitor each day"
row format delimited
fields terminated by ','
lines terminated by '\n'
STORED AS TEXTFILE
as
select round(t2.count/uv,6) as rate
from(
select count(*) as count
from(
select count(*) as count
from user_behavior1
where behavior_type='buy'
group by user_id
) t1
where t1.count>1
) t2
join(
select uv from res_pv_uv
) t3;
+--------------------------+
| res_doublebuy_rate.rate |
+--------------------------+
| 0.446562 |
+--------------------------+