Hive 螞蟻森林面試題訓練

背景說明:
以下表記錄了用戶每天的螞蟻森林低碳生活領取的記錄流水。
table_name:user_carbon
user_id data_dt low_carbon
用戶 日期 減少碳排放(g)

螞蟻森林植物換購表,用於記錄申領環保植物所需要減少的碳排放量
table_name: plant_carbon
plant_id plant_name low_carbon
植物編號 植物名 換購植物所需要的碳

----題目
1.螞蟻森林植物申領統計
問題:假設2017年1月1日開始記錄低碳數據(user_low_carbon),假設2017年10月1日之前滿足申領條件的用戶都申領了一顆p004-胡楊,
剩餘的能量全部用來領取“p002-沙柳” 。
統計在10月1日累計申領“p002-沙柳” 排名前10的用戶信息;以及他比後一名多領了幾顆沙柳。
得到的統計結果如下表樣式:
user_id plant_count less_count(比後一名多領了幾顆沙柳)
u_101 1000 100
u_088 900 400
u_103 500 …

1,將日期修改成date日期
select
user_id,date_format(regexp_replace(data_dt,’/’,’-’),‘YYYY-MM-DD’) dat,low_carbon
from
user_carbon;t1

2,選擇使用者在2017-10-1以前,前11的總排放量,並排序.
select
user_id,sum(low_carbon) sumcarbon
from
(select
user_id,date_format(regexp_replace(data_dt,’/’,’-’),‘YYYY-MM-DD’) dat,low_carbon
from
user_carbon)t1
where
dat<=‘2017-10-1’
group by
user_id
order by
sumcarbon desc
limit 11;

3,在2的基礎上進行排序
select
user_id,sumcarbon,rank()over(order by sumcarbon desc) rown
from
(select
user_id,sum(low_carbon) sumcarbon
from
(select
user_id,date_format(regexp_replace(data_dt,’/’,’-’),‘YYYY-MM-DD’) dat,low_carbon
from
user_carbon)t1
where
dat<=‘2017-10-1’
group by
user_id
order by
sumcarbon desc) t2
limit 11;t3

4,計算每人領取樹的數量

select
user_id,floor((sumcarbon-215)/19) treenum
from
(select
user_id,sumcarbon,rank()over(order by sumcarbon desc) rown
from
(select
user_id,sum(low_carbon) sumcarbon
from
(select
user_id,date_format(regexp_replace(data_dt,’/’,’-’),‘YYYY-MM-DD’) dat,low_carbon
from
user_carbon)t1
where
dat<=‘2017-10-1’
group by
user_id
order by
sumcarbon desc) t2
limit 11) t3
order by treenum desc;

5,計算輔助列
select
user_id, treenum,lead(treenum,1,‘9999’) over(order by treenum desc) treenum2
from
(select
user_id,floor((sumcarbon-215)/19) treenum
from
(select
user_id,sumcarbon,rank()over(order by sumcarbon desc) rown
from
(select
user_id,sum(low_carbon) sumcarbon
from
(select
user_id,date_format(regexp_replace(data_dt,’/’,’-’),‘YYYY-MM-DD’) dat,low_carbon
from
user_carbon)t1
where
dat<=‘2017-10-1’
group by
user_id
order by
sumcarbon desc) t2
limit 11) t3
order by treenum desc) t4
order by treenum desc;t5

6,計算索要結果
select
user_id, treenum,treenum-treenum2 diff
from
(select
user_id, treenum,lead(treenum,1,‘9999’) over(order by treenum desc) treenum2
from
(select
user_id,floor((sumcarbon-215)/19) treenum
from
(select
user_id,sumcarbon,rank()over(order by sumcarbon desc) rown
from
(select
user_id,sum(low_carbon) sumcarbon
from
(select
user_id,date_format(regexp_replace(data_dt,’/’,’-’),‘YYYY-MM-DD’) dat,low_carbon
from
user_carbon)t1
where
dat<=‘2017-10-1’
group by
user_id
order by
sumcarbon desc) t2
limit 11) t3
order by treenum desc) t4
order by treenum desc) t6
limit 10;

2、螞蟻森林低碳用戶排名分析
問題:查詢user_low_carbon表中每日流水記錄,條件爲:
用戶在2017年,連續三天(或以上)的天數裏,
每天減少碳排放(low_carbon)都超過100g的用戶低碳流水。
需要查詢返回滿足以上條件的user_low_carbon表中的記錄流水。
例如用戶u_002符合條件的記錄如下,因爲2017/1/2~2017/1/5連續四天的碳排放量之和都大於等於100g:
seq(key) user_id data_dt low_carbon
xxxxx10 u_002 2017/1/2 150
xxxxx11 u_002 2017/1/2 70
xxxxx12 u_002 2017/1/3 30
xxxxx13 u_002 2017/1/3 80
xxxxx14 u_002 2017/1/4 150
xxxxx14 u_002 2017/1/5 101
備註:統計方法不限於sql、procedure、python,java等

1,求出每個用戶每日碳排放量.並根據日期你進行升序排列,利用上一題的t1;
select
user_id,date_format(regexp_replace(data_dt,’/’,’-’),‘YYYY-MM-DD’) dat,low_carbon
from
user_carbon;t1

select
user_id,dat,sum(low_carbon) sumcarbon
from
(select user_id,date_format(regexp_replace(data_dt,’/’,’-’),‘YYYY-MM-DD’) dat,low_carbon
from
user_carbon )t1
group by
user_id,dat
having
sumcarbon>=100;t2

2,求出每個用戶有幾個日期達到要求
select
user_id ,dat,count(user_id) over(partition by user_id) cou
from
(select
user_id,dat,sum(low_carbon) sumcarbon
from
(select user_id,date_format(regexp_replace(data_dt,’/’,’-’),‘YYYY-MM-DD’) dat,low_carbon
from
user_carbon )t1
group by
user_id,dat
having
sumcarbon>=100)t2;

select
user_id,dat,cou
from
(select
user_id ,dat,count(user_id) over(partition by user_id) cou
from
(select
user_id,dat,sum(low_carbon) sumcarbon
from
(select user_id,date_format(regexp_replace(data_dt,’/’,’-’),‘YYYY-MM-DD’) dat,low_carbon
from
user_carbon )t1
group by
user_id,dat
having
sumcarbon>=100)t2)t3
where cou >=3;

3, 計算出每一行的前兩行數值和後兩行數值

select
user_id,dat,lag(dat,2) over(partition by user_id) a,lag(dat,1) over(partition by user_id) b,lead(dat,1) over(partition by user_id) c,lead(dat,2) over(partition by user_id) d
from
(select
user_id,dat,cou
from
(select
user_id ,dat,count(user_id) over(partition by user_id) cou
from
(select
user_id,dat,sum(low_carbon) sumcarbon
from
(select user_id,date_format(regexp_replace(data_dt,’/’,’-’),‘YYYY-MM-DD’) dat,low_carbon
from
user_carbon )t1
group by
user_id,dat
having
sumcarbon>=100)t2)t3
where cou >=3)
t4;

4,得出結果
select
user_id,dat,datediff(dat,a) ,datediff(dat,b),datediff(dat,c) ,datediff(dat,d)
from
(select
user_id,dat,lag(dat,2) over(partition by user_id) a,lag(dat,1) over(partition by user_id) b,lead(dat,1) over(partition by user_id) c,lead(dat,2) over(partition by user_id) d
from
(select
user_id,dat,cou
from
(select
user_id ,dat,count(user_id) over(partition by user_id) cou
from
(select
user_id,dat,sum(low_carbon) sumcarbon
from
(select user_id,date_format(regexp_replace(data_dt,’/’,’-’),‘YYYY-MM-DD’) dat,low_carbon
from
user_carbon )t1
group by
user_id,dat
having
sumcarbon>=100)t2)t3
where cou >=3)
t4)
t5
where
(datediff(dat,a)=2 and datediff(dat,b)=1)
or
(datediff(dat,b)=1 and datediff(dat,c)=-1)
or
(datediff(dat,c)=-1 and datediff(dat,d)=-2)
;

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章