SpringBoot整合hive-jdbc詳解

本文將對如何在Springboot項目中整合hive-jdbc進行簡單示例和介紹,項目的完整目錄層次如下圖所示。

官方幫助文檔地址:https://cwiki.apache.org/confluence/display/Hive/HiveClient#HiveClient-JDBC

 添加依賴與配置

首先,需要在工程POM文件中引入hive-jdbc所需的Maven依賴。

		<dependency>
			<groupId>org.springframework.boot</groupId>
			<artifactId>spring-boot-starter-web</artifactId>
		</dependency>
		<dependency>
			<groupId>com.alibaba</groupId>
			<artifactId>druid-spring-boot-starter</artifactId>
			<version>1.1.1</version>
		</dependency>
		<dependency>
			<groupId>org.springframework.boot</groupId>
			<artifactId>spring-boot-starter-jdbc</artifactId>
		</dependency>
		<dependency>
			<groupId>org.springframework.data</groupId>
			<artifactId>spring-data-hadoop</artifactId>
			<version>2.5.0.RELEASE</version>
		</dependency>
		<dependency>
			<groupId>org.apache.hive</groupId>
			<artifactId>hive-jdbc</artifactId>
			<version>2.3.3</version>
			<exclusions>
				<exclusion>
					<groupId>org.eclipse.jetty.aggregate</groupId>
					<artifactId>*</artifactId>
				</exclusion>
			</exclusions>
		</dependency>
		<dependency>
			<groupId>org.apache.tomcat</groupId>
			<artifactId>tomcat-jdbc</artifactId>
		</dependency>
		<dependency>
			<groupId>jdk.tools</groupId>
			<artifactId>jdk.tools</artifactId>
			<version>1.8</version>
			<scope>system</scope>
			<systemPath>${JAVA_HOME}/lib/tools.jar</systemPath>
		</dependency>

然後,在覈心配置文件application.yml中添加數據源相關配置。

hive:
  url: jdbc:hive2://172.16.250.234:10000/hive
  driver-class-name: org.apache.hive.jdbc.HiveDriver
  type: com.alibaba.druid.pool.DruidDataSource
  user: hadoop
  password: Pure@123
  # 下面爲連接池的補充設置,應用到上面所有數據源中
  # 初始化大小,最小,最大
  initialSize: 1
  minIdle: 3
  maxActive: 20
  # 配置獲取連接等待超時的時間
  maxWait: 60000
  # 配置間隔多久才進行一次檢測,檢測需要關閉的空閒連接,單位是毫秒
  timeBetweenEvictionRunsMillis: 60000
  # 配置一個連接在池中最小生存的時間,單位是毫秒
  minEvictableIdleTimeMillis: 30000
  validationQuery: select 1
  testWhileIdle: true
  testOnBorrow: false
  testOnReturn: false
  # 打開PSCache,並且指定每個連接上PSCache的大小
  poolPreparedStatements: true
  maxPoolPreparedStatementPerConnectionSize: 20

 配置數據源與JdbcTemplate

我們可以使用SpringBoot默認的 org.apache.tomcat.jdbc.pool.DataSource 數據源,並使用這個數據源裝配一個JdbcTemplate。


import org.apache.tomcat.jdbc.pool.DataSource;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.core.env.Environment;
import org.springframework.jdbc.core.JdbcTemplate;

@Configuration
public class HiveJdbcConfig {

	private static final Logger logger = LoggerFactory.getLogger(HiveJdbcConfig.class);

	@Autowired
	private Environment env;

	@Bean(name = "hiveJdbcDataSource")
	@Qualifier("hiveJdbcDataSource")
	public DataSource dataSource() {
		DataSource dataSource = new DataSource();
		dataSource.setUrl(env.getProperty("hive.url"));
		dataSource.setDriverClassName(env.getProperty("hive.driver-class-name"));
		dataSource.setUsername(env.getProperty("hive.user"));
		dataSource.setPassword(env.getProperty("hive.password"));
		logger.debug("Hive DataSource Inject Successfully...");
		return dataSource;
	}

	@Bean(name = "hiveJdbcTemplate")
	public JdbcTemplate hiveJdbcTemplate(@Qualifier("hiveJdbcDataSource") DataSource dataSource) {
		return new JdbcTemplate(dataSource);
	}

}

我們也可以使用數據源,本例中使用的是Druid數據源,其配置內容如下。

import javax.sql.DataSource;

import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.boot.context.properties.ConfigurationProperties;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.jdbc.core.JdbcTemplate;

import com.alibaba.druid.pool.DruidDataSource;

@Configuration
@ConfigurationProperties(prefix = "hive")
public class HiveDruidConfig {

	private String url;
	private String user;
	private String password;
	private String driverClassName;
	private int initialSize;
	private int minIdle;
	private int maxActive;
	private int maxWait;
	private int timeBetweenEvictionRunsMillis;
	private int minEvictableIdleTimeMillis;
	private String validationQuery;
	private boolean testWhileIdle;
	private boolean testOnBorrow;
	private boolean testOnReturn;
	private boolean poolPreparedStatements;
	private int maxPoolPreparedStatementPerConnectionSize;

	@Bean(name = "hiveDruidDataSource")
	@Qualifier("hiveDruidDataSource")
	public DataSource dataSource() {
		DruidDataSource datasource = new DruidDataSource();
		datasource.setUrl(url);
		datasource.setUsername(user);
		datasource.setPassword(password);
		datasource.setDriverClassName(driverClassName);

		// pool configuration
		datasource.setInitialSize(initialSize);
		datasource.setMinIdle(minIdle);
		datasource.setMaxActive(maxActive);
		datasource.setMaxWait(maxWait);
		datasource.setTimeBetweenEvictionRunsMillis(timeBetweenEvictionRunsMillis);
		datasource.setMinEvictableIdleTimeMillis(minEvictableIdleTimeMillis);
		datasource.setValidationQuery(validationQuery);
		datasource.setTestWhileIdle(testWhileIdle);
		datasource.setTestOnBorrow(testOnBorrow);
		datasource.setTestOnReturn(testOnReturn);
		datasource.setPoolPreparedStatements(poolPreparedStatements);
		datasource.setMaxPoolPreparedStatementPerConnectionSize(maxPoolPreparedStatementPerConnectionSize);
		return datasource;
	}
	
	// 此處省略各個屬性的get和set方法

	@Bean(name = "hiveDruidTemplate")
	public JdbcTemplate hiveDruidTemplate(@Qualifier("hiveDruidDataSource") DataSource dataSource) {
		return new JdbcTemplate(dataSource);
	}

}

使用DataSource操作 Hive

import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.List;

import javax.sql.DataSource;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;

/**
 * 使用 DataSource 操作 Hive
 */
@RestController
@RequestMapping("/hive")
public class HiveDataSourceController {

	private static final Logger logger = LoggerFactory.getLogger(HiveDataSourceController.class);

	@Autowired
	@Qualifier("hiveJdbcDataSource")
	org.apache.tomcat.jdbc.pool.DataSource jdbcDataSource;

	@Autowired
	@Qualifier("hiveDruidDataSource")
	DataSource druidDataSource;

	/**
	 * 列舉當前Hive庫中的所有數據表
	 */
	@RequestMapping("/table/list")
	public List<String> listAllTables() throws SQLException {
		List<String> list = new ArrayList<String>();
		// Statement statement = jdbcDataSource.getConnection().createStatement();
		Statement statement = druidDataSource.getConnection().createStatement();
		String sql = "show tables";
		logger.info("Running: " + sql);
		ResultSet res = statement.executeQuery(sql);
		while (res.next()) {
			list.add(res.getString(1));
		}
		return list;
	}

	/**
	 * 查詢Hive庫中的某張數據表字段信息
	 */
	@RequestMapping("/table/describe")
	public List<String> describeTable(String tableName) throws SQLException {
		List<String> list = new ArrayList<String>();
		// Statement statement = jdbcDataSource.getConnection().createStatement();
		Statement statement = druidDataSource.getConnection().createStatement();
		String sql = "describe " + tableName;
		logger.info("Running: " + sql);
		ResultSet res = statement.executeQuery(sql);
		while (res.next()) {
			list.add(res.getString(1));
		}
		return list;
	}

	/**
	 * 查詢指定tableName表中的數據
	 */
	@RequestMapping("/table/select")
	public List<String> selectFromTable(String tableName) throws SQLException {
		// Statement statement = jdbcDataSource.getConnection().createStatement();
		Statement statement = druidDataSource.getConnection().createStatement();
		String sql = "select * from " + tableName;
		logger.info("Running: " + sql);
		ResultSet res = statement.executeQuery(sql);
		List<String> list = new ArrayList<String>();
		int count = res.getMetaData().getColumnCount();
		String str = null;
		while (res.next()) {
			str = "";
			for (int i = 1; i < count; i++) {
				str += res.getString(i) + " ";
			}
			str += res.getString(count);
			logger.info(str);
			list.add(str);
		}
		return list;
	}

}

 使用 JdbcTemplate 操作 Hive

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.dao.DataAccessException;
import org.springframework.jdbc.core.JdbcTemplate;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;

/**
 * 使用 JdbcTemplate 操作 Hive
 */
@RestController
@RequestMapping("/hive2")
public class HiveJdbcTemplateController {

	private static final Logger logger = LoggerFactory.getLogger(HiveJdbcTemplateController.class);

	@Autowired
	@Qualifier("hiveDruidTemplate")
	private JdbcTemplate hiveDruidTemplate;

	@Autowired
	@Qualifier("hiveJdbcTemplate")
	private JdbcTemplate hiveJdbcTemplate;

	/**
	 * 示例:創建新表
	 */
	@RequestMapping("/table/create")
	public String createTable() {
		StringBuffer sql = new StringBuffer("CREATE TABLE IF NOT EXISTS ");
		sql.append("user_sample");
		sql.append("(user_num BIGINT, user_name STRING, user_gender STRING, user_age INT)");
		sql.append("ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n' "); // 定義分隔符
		sql.append("STORED AS TEXTFILE"); // 作爲文本存儲

		logger.info("Running: " + sql);
		String result = "Create table successfully...";
		try {
			// hiveJdbcTemplate.execute(sql.toString());
			hiveDruidTemplate.execute(sql.toString());
		} catch (DataAccessException dae) {
			result = "Create table encounter an error: " + dae.getMessage();
			logger.error(result);
		}
		return result;

	}

	/**
	 * 示例:將Hive服務器本地文檔中的數據加載到Hive表中
	 */
	@RequestMapping("/table/load")
	public String loadIntoTable() {
		String filepath = "/home/hadoop/user_sample.txt";
		String sql = "load data local inpath '" + filepath + "' into table user_sample";
		String result = "Load data into table successfully...";
		try {
			// hiveJdbcTemplate.execute(sql);
			hiveDruidTemplate.execute(sql);
		} catch (DataAccessException dae) {
			result = "Load data into table encounter an error: " + dae.getMessage();
			logger.error(result);
		}
		return result;
	}

	/**
	 * 示例:向Hive表中添加數據
	 */
	@RequestMapping("/table/insert")
	public String insertIntoTable() {
		String sql = "INSERT INTO TABLE  user_sample(user_num,user_name,user_gender,user_age) VALUES(888,'Plum','M',32)";
		String result = "Insert into table successfully...";
		try {
			// hiveJdbcTemplate.execute(sql);
			hiveDruidTemplate.execute(sql);
		} catch (DataAccessException dae) {
			result = "Insert into table encounter an error: " + dae.getMessage();
			logger.error(result);
		}
		return result;
	}

	/**
	 * 示例:刪除表
	 */
	@RequestMapping("/table/delete")
	public String delete(String tableName) {
		String sql = "DROP TABLE IF EXISTS "+tableName;
		String result = "Drop table successfully...";
		logger.info("Running: " + sql);
		try {
			// hiveJdbcTemplate.execute(sql);
			hiveDruidTemplate.execute(sql);
		} catch (DataAccessException dae) {
			result = "Drop table encounter an error: " + dae.getMessage();
			logger.error(result);
		}
		return result;
	}
}

啓動測試 

通過運行HiveApplication類的main方法啓動項目,接下來對每個示例方法進行測試。

待項目啓動後,在瀏覽器中訪問 http://localhost:8080/hive2/table/create 來創建一張 user_sample 測試表:

user_sample 表的創建 sql 如下:

    create table user_sample
    ( 
        user_num bigint, 
        user_name string, 
        user_gender string, 
        user_age int
    ) row format delimited fields terminated by ',';

測試表創建完成後,通過訪問 http://localhost:8080/hive/table/list 來查看hive庫中的數據表都有哪些?

返回如下內容:

在Hive客戶端中使用 show tables 命令查看,與瀏覽器中看到的數據表相同,內容如下:

訪問 http://localhost:8080/hive/table/describe?tableName=user_sample 來查看 user_sample 表的字段信息:

返回如下內容:

 在Hive客戶端中使用 describe user_sample 命令進行查看,與瀏覽器中看到的數據表字段相同。

接下來進行數據導入測試,先在Hive服務器的 /home/hadoop/ 目錄下新建一個user_sample.txt 文件,內容如下:

622,Lee,M,25
633,Andy,F,27
644,Chow,M,25
655,Grace,F,24
666,Lily,F,29
677,Angle,F,23

 然後在瀏覽器中訪問以下地址,將 /home/hadoop/user_sample.txt 文件中的內容加載到 user_sample 數據表中。

http://localhost:8080/hive2/table/load

數據導入成功之後,訪問 http://localhost:8080/hive/table/select?tableName=user_sample ,返回如下內容:

 在Hive客戶端中使用 select * form user_sample 命令進行查看,與瀏覽器中看到的內容相同。

再訪問  http://localhost:8080/hive2/table/insert 來測試向 user_sample 表中插入一條數據。

Hive客戶端打印的Map-Reduce執行過程日誌如下:

再次訪問 http://localhost:8080/hive/table/select?tableName=user_sample ,內容如下:

項目源碼已上傳至CSDN,資源地址:https://download.csdn.net/download/pengjunlee/10613827

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章