環境:centos7+hadoop2.5.2+hive1.2.1+mysql5.6.22+indigo service 2
思路:hive加載日誌→hadoop分佈式執行→需求數據進入mysql
注意:hadoop日誌分析系統網上資料很多,但是大多都有寫小問題,無法順利運行,但本文中都是經過親自驗證的,可以一氣呵成。另外還包括可能遇到的異常的詳細解釋及相關解決方案。
1) 日誌格式分析
首先分析 Hadoop 的日誌格式, 此種日誌格式是最簡單的,一行一條, 日誌格式可以依次描述爲:日期、時間、類別、相關類和提示信息。如下所示:
2) hive存儲表設計
rdate Time type rclass infor1 Infor2 Infor3
string array string string string string string
3) hive表定義
create table if not exists loginfo(
rdate string,
time array<string>,
type string,
relateclass string,
information1 string,
information2 string,
information3 string)
row format delimited fields terminated by ' '
collection items terminated by ','
map keys terminated by ':';
4) mysql表定義
drop table if exists hadooplog;
create table hadooplog(
id int(11) not null auto_increment,
rdate varchar(50) null,
time varchar(50) default null,
type varchar(50) default null,
relateclass tinytext default null,
information longtext default null,
primary key (id)
) engine=innodb default charset=utf8;
5) mysql數據庫操作
[root@master /]# cd /usr/bin
[root@master bin]# service mysql start
Starting MySQL SUCCESS!
[root@master bin]# mysql -uroot –p
mysql> create database hive;
Query OK, 1 row affected (0.03 sec)
mysql> use hive
Database changed
mysql> source /usr/local/mysql/sql/hadooplog.sql
Query OK, 0 rows affected, 1 warning (0.05 sec)
Query OK, 0 rows affected (0.18 sec)
mysql> desc hadooplog;
6) DBHelper: 負責建立與 Hive 和 MySQL 的連接
package com.smq.hive;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.SQLException;
public class DBHelper {
private static Connection connToHive = null;
private static Connection connToMySQL = null;
private DBHelper() {
}
// 與 Hive 連接
public static Connection getHiveConn() throws SQLException {
if (connToHive == null) {
try {
Class.forName("org.apache.hive.jdbc.HiveDriver");
} catch (ClassNotFoundException err) {
err.printStackTrace();
System.exit(1);
}
connToHive = DriverManager.getConnection("jdbc:hive2://192.168.2.10:10000/default", "", "");
}
return connToHive;
}
// 與 MySQL 連接
public static Connection getMySQLConn() throws SQLException {
if (connToMySQL == null) {
try {
Class.forName("com.mysql.jdbc.Driver");
} catch (ClassNotFoundException err) {
err.printStackTrace();
System.exit(1);
}
connToMySQL = DriverManager.getConnection("jdbc:mysql://192.168.2.10:3306/hive",
"root", "xxxx");
}
return connToMySQL;
}
public static void closeHiveConn() throws SQLException {
if (connToHive != null) {
connToHive.close();
}
}
public static void closeMySQLConn() throws SQLException {
if (connToMySQL != null) {
connToMySQL.close();
}
}
public static void main(String[] args) throws SQLException {
System.out.println(getMySQLConn());
closeMySQLConn();
}
}
7) HiveUtil:工具類
package com.smq.hive;
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
public class HiveUtil {
// 創建表
public static void createTable(String sql) throws SQLException {
Connection conn = DBHelper.getHiveConn();
Statement stmt = conn.createStatement();
stmt.execute(sql);
}
// 依據條件查詢數據
public static ResultSet queryData(String sql) throws SQLException {
Connection conn = DBHelper.getHiveConn();
Statement stmt = conn.createStatement();
ResultSet res = stmt.executeQuery(sql);
return res;
}
// 加載數據
public static void loadData(String sql) throws SQLException {
Connection conn = DBHelper.getHiveConn();
Statement stmt = conn.createStatement();
stmt.execute(sql);
}
// 把數據存儲到 MySQL 中
public static void hiveToMySQL(ResultSet res) throws SQLException {
Connection conn = DBHelper.getMySQLConn();
Statement stmt = conn.createStatement();
while (res.next()) {
String rdate = res.getString(1);
String time = res.getString(2);
String type = res.getString(3);
String relateclass = res.getString(4);
String information = res.getString(5) + res.getString(6) + res.getString(7);
StringBuffer sql = new StringBuffer();
sql.append("insert into hadooplog values(0,'");
sql.append(rdate + "','");
sql.append(time + "','");
sql.append(type + "','");
sql.append(relateclass + "','");
sql.append(information.replaceAll("\'", "\"") + "')");
System.out.println(sql);
stmt.executeUpdate(sql.toString());
}
}
}
8) AnalyszeHadoopLog:主類
package com.smq.hive;
import java.sql.ResultSet;
import java.sql.SQLException;
public class AnalyszeHadoopLog {
public static void main(String[] args) throws SQLException {
StringBuffer sql = new StringBuffer();
// 第一步:在 Hive 中創建表
sql.append("create table if not exists loginfo( ");
sql.append("rdate string, ");
sql.append("time array<string>, ");
sql.append("type string, ");
sql.append("relateclass string, ");
sql.append("information1 string, ");
sql.append("information2 string, ");
sql.append("information3 string) ");
sql.append("row format delimited fields terminated by ' ' ");
sql.append("collection items terminated by ',' ");
sql.append("map keys terminated by ':'");
System.out.println(sql);
HiveUtil.createTable(sql.toString());
// 第二步:加載 Hadoop 日誌文件
sql.delete(0, sql.length());
sql.append("load data local inpath ");
sql.append("'/usr/local/mysql/sql/hadoop-root-namenode-master.log'");
sql.append(" overwrite into table loginfo");
System.out.println(sql);
HiveUtil.loadData(sql.toString());
// 第三步:查詢有用信息
sql.delete(0, sql.length());
sql.append("select rdate,time[0],type,relateclass,");
sql.append("information1,information2,information3 ");
sql.append("from loginfo where type='INFO'");
System.out.println(sql);
ResultSet res = HiveUtil.queryData(sql.toString());
// 第四步:查出的信息經過變換後保存到 MySQL 中
HiveUtil.hiveToMySQL(res);
// 第五步:關閉 Hive 連接
DBHelper.closeHiveConn();
// 第六步:關閉 MySQL 連接
DBHelper.closeMySQLConn();
}
}
9) eclipse操作步驟
a) 導入hive/lib下面的所有jar包
b) 導入mysql的jdbc驅動包
c) 啓動hadoop、mysql服務
d) 啓動hive遠程服務:
]# hive –service hiveserver &
發生異常:
Exception in thread “main” java.lang.ClassNotFoundException: org.apache.hadoop.hive.service.HiveServer
解決方案:
]# hive –service hiveserver2 &
e) 運行程序:run as → run on hadoop
發生異常1:
java.lang.ClassNotFoundException: org.apache.hadoop.hive.jdbc.HiveDriver
解決方案:
在hive1.2中,相關的目錄結構有變化
“org.apache.hadoop.hive.jdbc.HiveDriver”;
改爲
“org.apache.hive.jdbc.HiveDriver”;
如果採用hiveserver2的話,必須做如下改動:
“jdbc:hive://localhost:10002/default”, “”, “”;
改爲
“jdbc:hive2://localhost:10002/default”, “”, “”
發生異常2:
Exception in thread “main” java.sql.SQLException: null, message from server: “Host ‘192.168.2.1’ is not allowed to connect to this MySQL server”
解決方案:
mysql> GRANT ALL PRIVILEGES ON . TO ‘myuser’@’%’ IDENTIFIED BY ‘mypassword’ WITH GRANT OPTION;
mysql> FLUSH PRIVILEGES;
f) 登錄mysql查看:select * from table
10)異常統計
從中看出在學習hadoop期間,異常發生比例較低,學習過程比較順利。
在下圖中可以發現,datanode比namenode更容易發生異常
11)工作量統計
上下圖都可以看出學習量最大的月份是6,7月
下圖反應出,學習的時間主要在中午至凌晨,多出現於週末