Hive留存計算

準備表

CREATE EXTERNAL TABLE ods_app_log (
    sdk_ver string
    ,time_zone string
    ,commit_id string
    ,commit_time string
    ,pid string
    ,app_token string
    ,app_id string
    ,device_id string
    ,device_id_type string
    ,release_channel string
    ,app_ver_name string
    ,app_ver_code string
    ,os_name string
    ,os_ver string
    ,LANGUAGE string
    ,country string
    ,manufacture string
    ,device_model string
    ,resolution string
    ,net_type string
    ,account string
    ,app_device_id string
    ,mac string
    ,android_id string
    ,imei string
    ,cid_sn string
    ,build_num string
    ,mobile_data_type string
    ,promotion_channel string
    ,carrier string
    ,city string
    ,user_id string
    ) partitioned BY (
    day string
    ,os string
    ) row format delimited fields terminated BY '\001' location '/app-log-data/clean';

ALTER TABLE ods_app_log ADD PARTITION (day = '2017-09-21',os = 'android') location '/app-log-data/clean/2017-09-21/android';
ALTER TABLE ods_app_log ADD PARTITION (day = '2017-09-21',os = 'ios') location '/app-log-data/clean/2017-09-21/ios';


ALTER TABLE ods_app_log ADD PARTITION (day = '2017-09-22',os = 'android') location '/app-log-data/clean/2017-09-22/android';
ALTER TABLE ods_app_log ADD PARTITION (day = '2017-09-22',os = 'ios') location '/app-log-data/clean/2017-09-22/ios';

ALTER TABLE ods_app_log ADD PARTITION (day = '2017-09-23',os = 'android') location '/app-log-data/clean/2017-09-23/android';
ALTER TABLE ods_app_log ADD PARTITION (day = '2017-09-23',os = 'ios') location '/app-log-data/clean/2017-09-23/ios';

ALTER TABLE ods_app_log ADD PARTITION (day = '2017-09-24',os = 'android') location '/app-log-data/clean/2017-09-24/android';
ALTER TABLE ods_app_log ADD PARTITION (day = '2017-09-25',os = 'ios') location '/app-log-data/clean/2017-09-25/ios';

--新增用戶表
CREATE EXTERNAL TABLE etl_user_new_day (
    sdk_ver string
    ,time_zone string
    ,commit_id string
    ,commit_time string
    ,pid string
    ,app_token string
    ,app_id string
    ,device_id string
    ,device_id_type string
    ,release_channel string
    ,app_ver_name string
    ,app_ver_code string
    ,os_name string
    ,os_ver string
    ,LANGUAGE string
    ,country string
    ,manufacture string
    ,device_model string
    ,resolution string
    ,net_type string
    ,account string
    ,app_device_id string
    ,mac string
    ,android_id string
    ,imei string
    ,cid_sn string
    ,build_num string
    ,mobile_data_type string
    ,promotion_channel string
    ,carrier string
    ,city string
    ,user_id string
    ) partitioned BY (
    day string
    ) row format delimited fields terminated BY '\001';
    
-- 1 歷史用戶表
create table etl_user_history(user_id string);
insert into etl_user_history(user_id)
select distinct user_id from ods_app_log;


-- 2 當日新增用戶表:存所有字段(每個人時間最早的一條),帶有一個分區字段:day string;
create table etl_user_new_day like etl_user_active_day;

中間處理

當日活躍-歷史用戶表 --> 新增用戶表的當日分區
insert  into etl_user_new_day partition(day='2017-09-21')
SELECT sdk_ver
    ,time_zone
    ,commit_id
    ,commit_time
    ,pid
    ,app_token
    ,app_id
    ,device_id
    ,device_id_type
    ,release_channel
    ,app_ver_name
    ,app_ver_code
    ,os_name
    ,os_ver
    ,LANGUAGE
    ,country
    ,manufacture
    ,device_model
    ,resolution
    ,net_type
    ,account
    ,app_device_id
    ,mac
    ,android_id
    ,imei
    ,cid_sn
    ,build_num
    ,mobile_data_type
    ,promotion_channel
    ,carrier
    ,city
    ,a.user_id
from  ods_app_log a left join  etl_user_history b on a.user_id = b.user_id
where a.day='2017-09-21' and b.user_id is null;

-- 2 將當日新增用戶的user_id追加到歷史表
insert into table etl_user_history
select distinct user_id from etl_user_new_day where day='2017-09-21';

-- 1 日新維度統計報表--數據建模
create table dim_user_new_day(os_name string,city string,release_channel string,app_ver_name string,cnts int)
partitioned by (day string, dim string);

計算留存

--次日留存用戶統計
--昨天的數據跟今天的數據關聯,yestoday=today-1天
select yestoday.day
,count(distinct yestoday.user_id) cnt1
,count(distinct today.user_id) cnt2 
from
(select * from ods_app_log where day>='2017-09-21' and day<='2017-09-24')  as yestoday
left  join 
(select * from  ods_app_log where day>='2017-09-21' and day<='2017-09-24')  as today 
on yestoday.day =date_add(today.day,-1) and yestoday.user_id=today.user_id
group by yestoday.day;


select yestoday.day
,count(distinct yestoday.user_id) as cnt1
,count(distinct today.user_id) as cnt2 
from
(select user_id,day from ods_app_log where day='2017-09-21')  as yestoday
left  join 
(select user_id,day from  ods_app_log where day='2017-09-22')  as today 
on yestoday.user_id=today.user_id
group by yestoday.day;

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章