hive 中的二級分區表和動態分區表

二級分區表/管理表：
create table emp_part1(
empno int,
empname string,
empjob string,
mgrno int,
birthday string,
salary float,
bonus float,
deptno int
)
partitioned by (day string,hour string)
row format delimited fields terminated by '\t';

增加分區
alter table emp_part1 add partition (day='20170306',hour='0');

刪除分區
alter table emp_part1 drop partition (day='20170306',hour='0');

load data local inpath '/home/user01/emp.txt' into table emp_part1 partition (day='20170308',hour='9');

load data local inpath '/home/user01/emp.txt' into table emp_part1 partition (day='20170308',hour='10');

load data local inpath '/home/user01/emp.txt' into table emp_part1 partition (day='20170308',hour='14');

load data local inpath '/home/user01/emp.txt' into table emp_part1 partition (day='20170309',hour='10');

查詢分區數據
select * from emp_part1 where day='20170308'

select * from emp_part1 where day='20170308' and hour='14';

查詢所有的分區信息
show partitons emp_part1;

分區可以理解爲分類，通過分類把不同類型，時間，地域的數據放到不同的目錄下。
分類的標準就是分區字段，可以一個，也可以多個。
分區表的意義在於優化查詢。查詢時儘量利用分區字段。如果不使用分區字段，就會全表掃描。
-
動態分區表：多維度數據處理及查詢嚴格模式：static partitioned by (county string,states
string) 非嚴格模式：partitioned by (county string,states string)
需要設置以下參數： //是否開啓動態分區功能 0.13版本默認開啓
set hive.exec.dynamic.partition=true;
動態分區的模式，默認strict，表示必須指定至少一個分區爲靜態分區，nonstrict模式表示允許所有的分區字段都可以使用動態分區
set hive.exec.dynamic.partition.mode=nostrict;

create table dypart(
id int,
name string
)
partitioned by (addr string)
row format delimited fields terminated by '\;';
//使用特殊字符作爲分隔符時需要轉義

//動態分區必須使用mapreduce才能完成，所以不能使用load方式加載
insert into table dypart partition (addr) select deptno,deptname,addr as addr from dept;

二級動態分區表
create external table dypart2(
empno int,
empname string,
empjob string,
mgrno int,
birthday string,
salary int,
bonus float,
deptno int
)
partitioned by (country string,province string)
row format delimited fields terminated by '\t'
location '/hive/dynamic/dypart2';
//location之後的目錄可以不存在，創建表會自動創建，但作爲外部表推薦目錄和數據已經存在

開啓嚴格模式
set hive.exec.dynamic.partition.mode=strict;
1
嚴格模式中，要求主分區必須爲靜態分區，輔助分區可以爲動態
insert into table dypart2 partition (country='usa',province) select empno,empname,empjob,mgno,birthday,salary,bonus,depno,depno as province from emp;
1
通過子查詢方式
insert into table dypart2 partition (country='usa',province) select c.empno,c.empname,c.empjob,c.mgno,c.birthday,c.salary,c.bonus,c.depno,c.deptname as province from (select * from emp a join dept b on a.depno=b.deptno) c;

覆蓋導入方式，此時overwrite和into不能連用
insert overwrite table dypart2 partition (country='china',province) select a.empno,a.empname,a.empjob,a.mgno,a.birthday,a.salary,a.bonus,a.depno,b.deptname as province from emp a join dept b on a.depno=b.deptno;

桶表：將內部表，外部表和分區表進一步組織成桶表可以將表的列通過Hash算法進一步分解成不同的文件存儲
create table test_bucket_table(
id int,
name string,
addr string
)
clustered by (id) into 4 buckets
row format delimited fields terminated by '\|';
`//強制開啓分桶
set hive.enforce.bucketing=true;
insert overwrite table test_bucket_table select * from dept;
`//若沒有使用hive.enforce.bucketing屬性, 則需要設置和分桶個數相匹配的reducer個數, 同時SELECT後添加CLUSTER BY
set mapred.reduce.tasks=4;
insert into table test_bucket_table select * from dept cluster by deptno;

作者：我不是李尋歡
原文：https://blog.csdn.net/qq_39532946/article/details/77921039

發表評論

所有評論

還沒有人評論，想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.

hive 中的二級分區表和動態分區表

杭州的 IT 崩盤了麼？

開源高性能結構化日誌模塊NanoLog

Azure Virtual Network (22) 多訂閱使用Azure DNS解析問題 Windows Azure Platform 系列文章目錄

Python 潮流週刊#55：分享 9 個高質量的技術類信息源！

【簡寫Mybatis-02】註冊機的實現以及SqlSession處理

手繪二維碼

.NET藉助虛擬網卡實現一個簡單異地組網工具

shell腳本調用hive示例

shell中判斷hive表分區是否存在

awk統計文本里某一列重複出現的次數

Kettle性能調優彙總

Kettle性能優化

Mac下配置sublime實現LaTeX

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結