安裝注意事項
#Set path to where bin/hadoop is available
export HADOOP_COMMON_HOME=/app/bigdata/hadoop
#Set path to where hadoop-*-core.jar is available
export HADOOP_MAPRED_HOME=/app/bigdata/hadoop
#set the path to where bin/hbase is available
export HBASE_HOME=/app/bigdata/hbase
#Set the path to where bin/hive is available
export HIVE_HOME=/app/bigdata/hive
#Set the path for where zookeper config dir is
export ZOOCFGDIR=/app/bigdata/zookeeper
鏈接mysql,前提是mysql允許遠程登錄,不允許的話需要授權:
--GRANT ALL PRIVILEGES ON *.* TO 'root'@'%' IDENTIFIED BY 'root' WITH GRANT OPTION;
#查詢指定jdbc的所有庫
./sqoop-list-databases --connect jdbc:mysql://192.168.10.107:3306 --username root --password root
#查詢指定庫xlhdw的所有表
./sqoop-list-tables --connect jdbc:mysql://192.168.10.107:3306/xlhdw --username root --password root
注意事項:
sqoop必須運行在yarn上
案例詳解
==================IMPORT 導入到hive=================
/** 將mysql的表導入 hdfs文件系統 **/
sqoop import \
--connect jdbc:mysql://192.168.10.107:3306/userdb \
--username root \
--password root \
--target-dir \
/sqooptest/emp \
--fields-terminated-by ',' \
--table emp \
--split-by id \
--m 2
結果數據:
[root@COLBY-NN-101 bigdata]# hdfs dfs -cat /sqooptest/emp/*
1201,gopal,manager,50000,TP
1202,manisha,Proof reader,50000,TP
1203,khalil,php dev,30000,AC
1204,prasanth,php dev,30000,AC
1205,kranthi,admin,20000,TP
/** 將mysql的表導入 hive表,sqoop會自動創建表結構 **/
sqoop import \
--connect jdbc:mysql://192.168.10.107:3306/userdb \
--username root \
--password root \
--hive-import \
--fields-terminated-by ',' \
--table emp \
--split-by id \
--m 2
結果數據:
hive> select * from emp;
OK
1201 gopal manager 50000 TP
1202 manisha Proof reader 50000 TP
1203 khalil php dev 30000 AC
1204 prasanth php dev 30000 AC
1205 kranthi admin 20000 TP
Time taken: 1.586 seconds, Fetched: 5 row(s)
sqoop import \
--connect jdbc:mysql://192.168.10.107:3306/xlhdw \
--username root \
--password root \
--hive-import \
--fields-terminated-by ',' \
--table cdm_ent_dto_business_change_d \
--split-by reg_credit_no \
--m 2
/** 將mysql的表的增量數據導入 hdfs **/
sqoop import \
--connect jdbc:mysql://192.168.10.107:3306/userdb \
--target-dir /sqooptest \
--username root \
--password root \
--table emp \
--m 1 \
--incremental append \
--check-column id \
--last-value 1205
========================條件導入=================
/** 將mysql的表的條件數據導入到hive **/
sqoop import \
--connect jdbc:mysql://192.168.10.107:3306/userdb \
--username root \
--password root \
--where "id='1201'" \
--hive-import \
--fields-terminated-by ',' \
--table emp \
--split-by id \
--m 2
/** 按需求條件導入從mysql到hdfs **/
sqoop import \
--connect jdbc:mysql://192.168.10.107:3306/userdb \
--username root \
--password root \
--target-dir /wherequery2 \
--query 'select id,name,deg from emp WHERE id<1207 and $CONDITIONS' \
--split-by id \
--fields-terminated-by '\t' \
--m 2
===============增量導入======================
增量導入是僅導入新添加的表中的行的技術。
sqoop支持兩種增量MySql導入到hive的模式,
一種是append,即通過指定一個遞增的列,比如:
--incremental append --check-column num_id --last-value 0
另種是可以根據時間戳,比如:
--incremental lastmodified --check-column created --last-value '2012-02-01 11:0:00'
就是隻導入created 比'2012-02-01 11:0:00'更大的數據
1、append模式
它需要添加‘incremental’, ‘check-column’, 和 ‘last-value’選項來執行增量導入。
下面的語法用於Sqoop導入命令增量選項。
--incremental <mode>
--check-column <column name>
--last value <last check column value>上次導到哪個值了
/* 向emp表增量導入數據,指定emp的目錄即可*/
sqoop import \
--connect jdbc:mysql://192.168.10.107:3306/userdb \
--username root \
--password root \
--target-dir /user/hive/warehouse/emp \
--table emp --m 1 \
--incremental append \
--check-column id \
--last-value 1205
=====================EXPORT 導出到mysql==============
/** 將hdfs的文件數據導出到mysql **/
bin/sqoop export \
--connect jdbc:mysql://192.168.10.107:3306/userdb \
--username root \
--password root \
--input-fields-terminated-by ',' \
--table emp \
--export-dir /sqooptest/
/** 將hive的表數據(hdfs的文件)導出到mysql **/
注意指定輸入文件的分隔符,而且不能直接指定hive表名,而是指定表名所在的路徑
sqoop export \
--connect jdbc:mysql://192.168.10.107:3306/xlhdw \
--username root \
--password root \
--input-fields-terminated-by ',' \
--table cdm_ent_dto_business_change_d \
--export-dir /user/hive/warehouse/cdm_ent_dto_business_change_d/
--TODO -----
將app數據倉庫中的 日新用戶維度統計報表:dim_user_new_day 導出到mysql的表中去
----------------------------------------
create table dim_user_new_day(os_name string,city string,release_channel string,app_ver_name string,cnts int)
partitioned by (day string, dim string);
-- 2 日新維度統計報表sql開發(利用多重插入語法)
from etl_user_new_day
insert into table dim_user_new_day partition(day='2017-09-21',dim='0000')
select 'all','all','all','all',count(1)
where day='2017-09-21'
insert into table dim_user_new_day partition(day='2017-09-21',dim='0001')
select 'all','all','all',app_ver_name,count(1)
where day='2017-09-21'
group by app_ver_name
insert into table dim_user_new_day partition(day='2017-09-21',dim='0010')
select 'all','all',release_channel,'all',count(1)
where day='2017-09-21'
group by release_channel
insert into table dim_user_new_day partition(day='2017-09-21',dim='0011')
select 'all','all',release_channel,app_ver_name,count(1)
where day='2017-09-21'
group by release_channel,app_ver_name
insert into table dim_user_new_day partition(day='2017-09-21',dim='0100')
select 'all',city,'all','all',count(1)
where day='2017-09-21'
group by city
----------------------------------------
-- 1 在mysql中建庫建表
create database app;
create table dim_user_new_day(
os_name varchar(20),city varchar(20),release_channel varchar(20),app_ver_name varchar(20),cnts int,dt varchar(20)
);
--注意:將庫和表的編碼集改成utf8,命令如下:
修改庫的編碼:
mysql> alter database db_name character set utf8;
修改表的編碼:
mysql> ALTER TABLE table_name CONVERT TO CHARACTER SET utf8 COLLATE utf8_general_ci;
-- 2 用sqoop將hive中的 dim_user_new_day 中的指定日分區的數據導出到mysql 的dim_user_new_day
sqoop導出hive多級分區表會出現異常,如下:
“Error: java.io.IOException: Can't export data, please check failed map task logs”
解決方式是:將數據存儲爲單分區,再導出到mysql或者其他RDMS
命令模式:注意分隔符的使用
sqoop export \
--connect "jdbc:mysql://192.168.10.107:3306/app?useUnicode=true&characterEncoding=utf-8" \
--username root \
--password root \
--input-fields-terminated-by '\001' \
--table dim_user_new_day \
--export-dir /user/hive/warehouse/dim_user_new_day/day=2017-09-21/dim*/*
-----腳本模式
#!/bin/bash
day=`date -d '-1 day' +'%Y-%m-%d'`
/root/apps/sqoop/bin/sqoop export \
--connect "jdbc:mysql://192.168.10.107:3306/app?useUnicode=true&characterEncoding=utf-8" \
--username root \
--password root \
--input-fields-terminated-by '\001' \
--table dim_user_new_day \
--export-dir /user/hive/warehouse/app.db/dim_user_new_day_1p/day=${day} /