hive練習:行列轉換相關

1

數據:

id userid subject score

1 001 語文 90
2 001 數學 92
3 001 英語 80
4 002 語文 88
5 002 數學 90
6 002 英語 75.5
7 003 語文 70
8 003 數學 85
9 003 英語 90
10 003 政治 82

目標輸出

userid 語文 數學 英語 政治 total
001 90 92 80 0 262
002 88 90 75.5 0 253.5
003 70 85 90 82 327
total 248 267 245.5 82 842.5

建表:

create table if not exists score(
id int,
uid string,
subject string,
score double
)
row format delimited fields terminated by ' '
;

load data local inpath '/root/hivedata/score.txt' into table score;

執行

select
*,
c+m+e+p total
from
(select
uid,
sum(case when subject="語文" then score else 0 end) as c,
sum(case when subject="數學" then score else 0 end) as m,
sum(case when subject="英語" then score else 0 end) as e,
sum(case when subject="政治" then score else 0 end) as p
from score
group by uid) t1
;

2

數據:
t1表
uid tags
1 1,2,3
2 2,3
3 1,2
編寫sql實現如下結果:
uid tag
1 1
1 2
1 3
2 2
2 3
3 1
3 2

建表

create table if not exists table1(
uid int,
tags string
)
row format delimited fields terminated by ' '
;

load data local inpath '/root/hivedata/table1.txt' into table table1;

執行:

select
uid,
tag
from table1
lateral view explode(split(tags,",")) t2 as tag
;

3

數據:
T1表:
Tags
1,2,3
1,2
2,3
T2表:
Id lab
1 A
2 B
3 C
根據T1和T2表的數據,編寫sql實現如下結果:
ids tags
1,2,3 A,B,C
1,2 A,B
2,3 B,C

建表:

create table if not exists tag1(
tags string
)
row format delimited fields terminated by ' '
;

create table if not exists tag2(
id int,
tag string
)
row format delimited fields terminated by ' '
;

load data local inpath '/root/hivedata/tag1.txt' into table tag1;
load data local inpath '/root/hivedata/tag2.txt' into table tag2;

執行:

select
tags,
concat_ws(',',collect_list(t3.tag)) tags
from
(select
tags,
tag
from
(select
tags,
id
from tag1
lateral view explode(split(tags,",")) tmp as id) t1
left join
tag2 t2
on t1.id=t2.id) t3
group by tags
;

4用戶標籤組合

數據:
t1表:
id tag flag
a b 2
a b 1
a b 3
c d 6
c d 8
c d 8
編寫sql實現如下結果:
id tag flag
a b 1|2|3
c d 6|8

建表:

create table if not exists utag(
id string,
tag string,
flag string
)
row format delimited fields terminated by ' '
;

load data local inpath '/root/hivedata/utag.txt' into table utag;

執行:

select
id,
tag,
concat_ws('|',collect_set(flag))
from utag
group by id,tag
;

5用戶標籤行列互換

數據:
t1表
uid name tags
1 goudan chihuo,huaci
2 mazi sleep
3 laotie paly
編寫sql實現如下結果: 
uid name tag
1 goudan chihuo
1 goudan huaci
2 mazi sleep
3 laotie paly

建表

create table if not exists utag2(
uid string,
name string,
tags string
)
row format delimited fields terminated by ' '
;

load data local inpath '/root/hivedata/utag2.txt' into table utag2;

執行:

select
uid,
name,
tag
from utag2
lateral view explode(split(tags,",")) t1 as tag
;

6 hive實現詞頻統計

數據:
t1表:
uid contents 
1 i|love|china
2 china|is|good|i|i|like

統計結果如下,如果出現次數一樣,則按照content名稱排序: content cnt
i 3
china 2
good 1
like 1
love 1
is 1

建表:

create table if not exists content(
uid string,
contents string
)
row format delimited fields terminated by ' '
;

load data local inpath '/root/hivedata/content.txt' into table content;

執行:

select
con,
count(con) cnt
from content
lateral view explode(split(contents,"\\|")) t1 as con
group by con
order by cnt desc,con
;

7課程行轉列

數據:
t1表
id course
1,a
1,b
1,c
1,e
2,a
2,c
2,d
2,f
3,a
3,b
3,c
3,e
根據編寫sql,得到結果如下(表中的1表示選修,表中的0表示未選修): 
id a b c d e f
1 1 1 1 0 1 0
2 1 0 1 1 0 1
3 1 1 1 0 1 0

建表

create table if not exists course1(
id string,
course string
)
row format delimited fields terminated by ','
;

load data local inpath '/root/hivedata/course1.txt' into table course1;

執行:

select
id,
sum(case when c.course="a" then 1 else 0 end),
sum(case when c.course="b" then 1 else 0 end),
sum(case when c.course="c" then 1 else 0 end),
sum(case when c.course="d" then 1 else 0 end),
sum(case when c.course="e" then 1 else 0 end),
sum(case when c.course="f" then 1 else 0 end)
from course1 c
group by id
;

8 興趣行轉列

t1表
name sex hobby
janson 男 打乒乓球、游泳、看電影
tom 男 打乒乓球、看電影

hobby最多3個值,使用hql實現結果如下:
name sex hobby1 hobby2 hobby3
janson 男 打乒乓球 游泳 看電影
tom 男 打乒乓球 看電影

建表

create table if not exists hobby(
name string,
sex string,
hobbies string
)
row format delimited fields terminated by ' '
;

load data local inpath '/root/hivedata/hobby.txt' into table hobby;

執行:

select
name,
sex,
nvl(arr[0]," "),
nvl(arr[1]," "),
nvl(arr[2]," ")
from
(select
name,
sex,
split(hobbies,"、") arr
from hobby) t1
;

9 用戶商品行列互換

t1表:
用戶 商品
A P1
B P1
A P2
B P3

請你使用hql變成如下結果:
用戶 P1 P2 P3
A 1 1 0
B 1 0 1

建表

create table if not exists comm(
uid string,
commodity string
)
row format delimited fields terminated by ' '
;

load data local inpath '/root/hivedata/comm.txt' into table comm;

執行

select
uid,
sum(case when c.commodity='P1' then 1 else 0 end),
sum(case when c.commodity='P2' then 1 else 0 end),
sum(case when c.commodity='P3' then 1 else 0 end)
from comm c
group by uid
;

10成績課程行列互換

t1表:
name course score
aa English 75
bb math 85
aa math 90

使用hql輸出以下結果
name English math
aa 75 90
bb 0 85

建表

create table if not exists score1(
sname string,
course string,
score string
)
row format delimited fields terminated by ' '
;

load data local inpath '/root/hivedata/score1.txt' into table score1;

執行:

select
sname,
sum(case when s.course='English' then s.score else 0 end),
sum(case when s.course='math' then s.score else 0 end)
from score1 s
group by sname
;
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章