insert overwrite table dwd.dwd_user_play_event_1d_delta_daily partitions (dt = '${DT}')
select *
from ods.user_event_log_ex
where dt = '${DT}' and event_name = 'play_start'
union all
select *
from ods.user_event_log_ex
where dt = '${DT}' and event_name = 'play_end' and play_time > 0
union all
select *
from ods.user_event_log_ex
where dt = '${DT}' and event_name = 'play_error' and error_code is not null
從上SQL中可以看出都是從同一張表中拉取數據,只是條件不一樣,但上面的SQL需要對同一張表掃描三次,每次union all都會產生mapreduce,都是需要消耗資源和時間的,可對以上做如下優化,減少對錶的掃描次數和mapreduce的產生。
from ods.user_event_log_ex
inser overwrite table dwd.dwd_user_play_event_1d_delta_daily partition (dt = '${DT}')
select *
where dt ='${DT}' and event_name = 'play_start'
insert overwrite table dwd.dwd_user_play_event_1d_delta_daily partition (dt = '${DT}')
select *
where dt ='${DT}' and event_name = 'play_end' and play_time > 0
insert overwrite table dwd.dwd_user_play_event_1d_delta_daily partition (dt = '${DT}')
select *
where dt ='${DT}' and event_name = 'play_error' and error_code is not null