使用java連接hive，並執行hive語句詳解

第一篇：

安裝hadoop 和 hive我就不多說了，網上太多文章自己看去

首先，在機器上打開hiveservice

[html] view plain copy

hive --service hiveserver -p 50000 &

打開50000端口，然後java就可以使用java連了，需要的jar包我發個圖片

就這多jar包，必須的

不多說，直接上代碼

[java] view plain copy

package asia.wildfire.hive.service;
import java.sql.*;
import java.sql.Date;
import java.text.SimpleDateFormat;
import java.util.*;
/**
* User: liuxiaochen
* Date: 13-9-24
* Time: 下午5:47
* 修改描述
*/
public class HiveService {
private static final String URLHIVE = "jdbc:hive://ip:50000/default";
private static Connection connection = null;
public static Connection getHiveConnection() {
if (null == connection) {
synchronized (HiveService.class) {
if (null == connection) {
try {
Class.forName("org.apache.hadoop.hive.jdbc.HiveDriver");
connection = DriverManager.getConnection(URLHIVE, "", "");
} catch (SQLException e) {
e.printStackTrace();
} catch (ClassNotFoundException e) {
e.printStackTrace();
}
}
}
}
return connection;
}
public static void createTable() throws SQLException {
String tweetTableSql = "DROP TABLE IF EXISTS hive_crm_tweet2222";
String createTable1 = "CREATE EXTERNAL TABLE hive_crm_tweet2222(tweet_id string, cuser_id string, created_at bigint, year bigint, month bigint, day bigint, hour bigint, text string, comments_count bigint, reposts_count bigint, source string, retweeted_id string, post_type string, sentiment string, positive_tags_string string, predict_tags_string string, tags_string string) STORED BY 'org.apache.hadoop.hive.dynamodb.DynamoDBStorageHandler' TBLPROPERTIES (\"dynamodb.table.name\" = \"crm_tweet\",\"dynamodb.column.mapping\" = \"tweet_id:tweet_id,cuser_id:cuser_id,created_at:created_at,year:year,month:month,day:day,hour:hour,text:text,comments_count:comments_count,reposts_count:reposts_count,source:source,retweeted_id:retweeted_id,post_type:post_type,sentiment:sentiment,positive_tags_string:positive_tags_string,predict_tags_string:predict_tags_string,tags_string:tags_string\")";
String commentTableSql = "DROP TABLE IF EXISTS hive_tweet_comment2222";
String createTable2 = "CREATE EXTERNAL TABLE hive_tweet_comment2222(tweet_id string,comment_id string, cuser_id string, user_id string, created_at bigint, year bigint, month bigint, day bigint, hour bigint, text string, comments_count bigint, reposts_count bigint, source string, topic_id string, post_type string, sentiment string) STORED BY 'org.apache.hadoop.hive.dynamodb.DynamoDBStorageHandler' TBLPROPERTIES (\"dynamodb.table.name\" = \"crm_tweet_comment\",\"dynamodb.column.mapping\" = \"tweet_id:tweet_id,comment_id:comment_id,cuser_id:cuser_id,user_id:user_id,created_at:created_at,year:year,month:month,day:day,hour:hour,text:text,comments_count:comments_count,reposts_count:reposts_count,source:source,topic_id:tweet_id,post_type:post_type,sentiment:sentiment\")";
String retweetTableSql = "DROP TABLE IF EXISTS hive_tweet_retweet2222";
String createTable3 = "CREATE EXTERNAL TABLE hive_tweet_retweet2222(tweet_id string, cuser_id string, user_id string, retweet_id string, created_at BIGINT, year BIGINT, month BIGINT, day BIGINT, hour BIGINT, text string, comments_count BIGINT, reposts_count BIGINT, source string, topic_id string, verified_type BIGINT, post_type string, sentiment string) STORED BY 'org.apache.hadoop.hive.dynamodb.DynamoDBStorageHandler' TBLPROPERTIES (\"dynamodb.table.name\" = \"crm_tweet_retweet\",\"dynamodb.column.mapping\" = \"tweet_id:tweet_id,cuser_id:cuser_id,user_id:user_id,retweet_id:retweet_id,created_at:created_at,year:year,month:month,day:day,hour:hour,text:text,comments_count:comments_count,reposts_count:reposts_count,source:source,topic_id:tweet_id,verified_type:verified_type,post_type:post_type,sentiment:sentiment\")";
Statement stmt = getHiveConnection().createStatement();
stmt.executeQuery(tweetTableSql);
stmt.executeQuery(createTable1);
stmt.executeQuery(commentTableSql);
stmt.executeQuery(createTable2);
stmt.executeQuery(retweetTableSql);
stmt.executeQuery(createTable3);
}
public static void selectTweet() throws SQLException {
long aaa = System.currentTimeMillis();
long start = DateUtils.getNDaysAgo(DateUtils.getMidNight(), 15).getTime().getTime();
long end = DateUtils.getNDaysAgo(DateUtils.getMidNight(), 13).getTime().getTime();
String sql = "select cuser_id, count(*) as tw_hour, year, month, day from hive_crm_tweet2222 where created_at > ? and created_at < ? and cuser_id = ? group by cuser_id, year, month, day, hour";
PreparedStatement pstm = getHiveConnection().prepareStatement(sql);
pstm.setLong(1, start);
pstm.setLong(2, end);
pstm.setString(3, "2176270443");
ResultSet rss = pstm.executeQuery();
while (rss.next()) {
System.out.println("1: " + rss.getString("cuser_id") + " 2: " + rss.getInt("tw_hour") + " 3: " + rss.getInt("year") + " 4: " + rss.getInt("month") + " 5: " + rss.getInt("day"));
}
System.out.println(System.currentTimeMillis() - aaa);
}
public static void selectTweet22() throws SQLException {
long aaa = System.currentTimeMillis();
long start = DateUtils.getNDaysAgo(DateUtils.getMidNight(), 15).getTime().getTime();
long end = DateUtils.getNDaysAgo(DateUtils.getMidNight(), 13).getTime().getTime();
String sql = "select cuser_id, created_at, tweet_id from hive_crm_tweet2222 where created_at > ? and created_at < ? and cuser_id = ?";
PreparedStatement pstm = getHiveConnection().prepareStatement(sql);
pstm.setLong(1, start);
pstm.setLong(2, end);
pstm.setString(3, "2176270443");
ResultSet rss = pstm.executeQuery();
SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH");
while (rss.next()) {
long cc = Long.valueOf(String.valueOf(rss.getInt("created_at")) + "000");
java.util.Date date = new java.util.Date(cc);
System.out.println(dateFormat.format(date));
System.out.println(rss.getString("cuser_id") + " " + rss.getString("tweet_id"));
}
System.out.println(System.currentTimeMillis() - aaa);
}
public static void main(String[] args) throws ClassNotFoundException, SQLException {
// Class.forName("org.apache.hadoop.hive.jdbc.HiveDriver");
// String querySQL = "SELECT a.* FROM test_time a";
//
// Connection con = DriverManager.getConnection(URLHIVE, "", "");
// Statement stmt = con.createStatement();
// ResultSet res = stmt.executeQuery(querySQL); // 執行查詢語句
//
// while (res.next()) {
// System.out.println("Result: key:" + res.getString(1) + " –> value:" + res.getString(2));
// }
selectTweet22();
// SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH");
// System.out.println(dateFormat.format(new java.util.Date()));
}
}

第二篇：

我們可以通過CLI、Client、Web UI等Hive提供的用戶接口來和Hive通信，但這三種方式最常用的是CLI；Client 是Hive的客戶端，用戶連接至 Hive Server。在啓動 Client 模式的時候，需要指出Hive Server所在節點，並且在該節點啓動 Hive Server。 WUI 是通過瀏覽器訪問 Hive。今天我們來談談怎麼通過HiveServer來操作Hive。

　　Hive提供了jdbc驅動，使得我們可以用Java代碼來連接Hive並進行一些類關係型數據庫的sql語句查詢等操作。同關係型數據庫一樣，我們也需要將Hive的服務打開；在Hive 0.11.0版本之前，只有HiveServer服務可用，你得在程序操作Hive之前，必須在Hive安裝的服務器上打開HiveServer服務，如下：


[wyp@localhost

/home/q/hive-0.11.0]$
bin/hive --service hiveserver -p 10002

Starting
Hive Thrift Server

上面代表你已經成功的在端口爲10002（默認的端口是10000）啓動了hiveserver服務。這時候，你就可以通過Java代碼來連接hiveserver，代碼如下：


package

com.wyp;

/**

 *
User: 過往記憶

 *
Blog: 
http://www.iteblog.com/

 *
Date: 13-11-27

 *
Time: 下午5:52

 */

import

java.sql.SQLException;

import

java.sql.Connection;

import

java.sql.ResultSet;

import

java.sql.Statement;

import

java.sql.DriverManager;

public

class 
HiveJdbcTest {

    private

static 
String driverName = 

                   "org.apache.hadoop.hive.jdbc.HiveDriver";

    public

static 
void 
main(String[] args) 

                            throws

SQLException {

        try

{

            Class.forName(driverName);

        }
catch

(ClassNotFoundException e) {

            e.printStackTrace();

            System.exit(1);

        }

        Connection
con = DriverManager.getConnection(

                           "jdbc:hive://localhost:10002/default",
"wyp",
"");

        Statement
stmt = con.createStatement();

        String
tableName = "wyphao";

        stmt.execute("drop
table if exists " 
+ tableName);

        stmt.execute("create
table " 
+ tableName + 

                                     "
(key int, value string)");

        System.out.println("Create
table success!");

        //
show tables

        String
sql = "show
tables '" 
+ tableName + "'";

        System.out.println("Running:
" 
+ sql);

        ResultSet
res = stmt.executeQuery(sql);

        if

(res.next()) {

            System.out.println(res.getString(1));

        }

        //
describe table

        sql
= "describe
" 
+ tableName;

        System.out.println("Running:
" 
+ sql);

        res
= stmt.executeQuery(sql);

        while

(res.next()) {

            System.out.println(res.getString(1)
+ "\t"

+ res.getString(2));

        }

        sql
= "select
* from " 
+ tableName;

        res
= stmt.executeQuery(sql);

        while

(res.next()) {

            System.out.println(String.valueOf(res.getInt(1))
+ "\t"

                                               +
res.getString(2));

        }

        sql
= "select
count(1) from " 
+ tableName;

        System.out.println("Running:
" 
+ sql);

        res
= stmt.executeQuery(sql);

        while

(res.next()) {

            System.out.println(res.getString(1));

        }

    }

}

編譯上面的代碼，之後就可以運行(我是在集成開發環境下面運行這個程序的)，結果如下：


Create
table success!

Running:
show tables 'wyphao'

wyphao

Running:
describe wyphao

key                    
int                

value                  
string              

Running:
select count(1)
from wyphao

0

Process
finished with exit code 0

　　如果你想在腳本里面運行，請將上面的程序打包成jar文件，並將上面的依賴庫放在/home/wyp/lib/（這個根據你自己的情況弄）中，同時加入到運行的環境變量，腳本如下：


#!/bin/bash

HADOOP_HOME=/home/q/hadoop-2.2.0

HIVE_HOME=/home/q/hive-0.11.0-bin

CLASSPATH=$CLASSPATH:

for

i in /home/wyp/lib/*.jar ; do

    CLASSPATH=$CLASSPATH:$i

done

echo
$CLASSPATH

/home/q/java/jdk1.6.0_20/bin/java
-cp  \

   $CLASSPATH:/export1/tmp/yangping.wu/OutputText.jar 
com.wyp.HiveJdbcTest

上面是用Java連接HiveServer，而HiveServer本身存在很多問題（比如：安全性、併發性等）；針對這些問題，Hive0.11.0版本提供了一個全新的服務：HiveServer2，這個很好的解決HiveServer存在的安全性、併發性等問題。這個服務啓動程序在${HIVE_HOME}/bin/hiveserver2裏面，你可以通過下面的方式來啓動HiveServer2服務：


$HIVE_HOME/bin/hiveserver2

也可以通過下面的方式啓動HiveServer2


$HIVE_HOME/bin/hive
--service hiveserver2

兩種方式效果都一樣的。但是以前的程序需要修改兩個地方，如下所示：


private

static 
String driverName = "org.apache.hadoop.hive.jdbc.HiveDriver";

改爲

private

static 
String driverName = "org.apache.hive.jdbc.HiveDriver";

Connection
con = DriverManager.getConnection(

                           "jdbc:hive://localhost:10002/default",
"wyp",
"");

改爲

Connection
con = DriverManager.getConnection(

                           "jdbc:hive2://localhost:10002/default",
"wyp",
"");

其他的不變就可以了。

　　這裏順便說說本程序所依賴的jar包，一共有以下幾個：


hadoop-2.2.0/share/hadoop/common/hadoop-common-2.2.0.jar

$HIVE_HOME/lib/hive-exec-0.11.0.jar

$HIVE_HOME/lib/hive-jdbc-0.11.0.jar

$HIVE_HOME/lib/hive-metastore-0.11.0.jar 

$HIVE_HOME/lib/hive-service-0.11.0.jar  

$HIVE_HOME/lib/libfb303-0.9.0.jar  

$HIVE_HOME/lib/commons-logging-1.0.4.jar 

$HIVE_HOME/lib/slf4j-api-1.6.1.jar

　　如果你是用Maven，加入以下依賴


<dependency>

        <groupId>org.apache.hive</groupId>

        <artifactId>hive-jdbc</artifactId>

        <version>0.11.0</version>

</dependency>

<dependency>

        <groupId>org.apache.hadoop</groupId>

        <artifactId>hadoop-common</artifactId>

        <version>2.2.0</version>

</dependency>

使用java連接hive，並執行hive語句詳解

關於遊戲付費的一點想法

我通過CKA和CKS啦！

《最新出爐》系列入門篇-Python+Playwright自動化測試-42-強大的可視化追蹤利器Trace Viewer

大數據怎麼學？對大數據開發領域及崗位的詳細解讀，完整理解大數據開發領域技術體系

ubuntu操作遇到問題

ubuntu下安裝eclipse，配置jdk環境變量仍然報錯

關於java的初始化順序的問題

HBase HA (多HMaster)

Hive:用Java代碼通過JDBC連接Hiveserver

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結