前言
Spark的Web UI可以監控一些作業信息,但是Web UI是跟隨作業的生命週期,不方便隨時隨地的獲取作業的信息。
因此,介紹一下以下兩種監控方式:Spark自身監控和打造自己的監控
官方地址:http://spark.apache.org/docs/latest/monitoring.html
一、Spark自身監控
Spark自帶了history server腳本,只要我們配置好相關參數,即可保存作業信息,事後通過歷史來查看作業的信息。
1、配置存儲
# vi spark-default.conf 開啓日誌 設置日誌保存目錄
spark.eventLog.enabled true
spark.eventLog.dir hdfs://mycluster/log/spark-events
# vi spark-env.sh 指定歷史信息的地址
export SPARK_HISTORY_OPTS = "-Dspark.history.fs.logDirectory=hdfs://mycluster/log/spark-events"
# 默認18080端口,雲主機修改端口,防止被挖礦
# export SPARK_HISTORY_OPTS = "-Dspark.history.fs.logDirectory=hdfs://mycluster/log/spark-events -Dspark.history.ui.port=123456"
2、開啓服務
# jps ==> HistoryServer
./sbin/start-history-server.sh
3、Web UI查看:master:18080
二、打造自己的監控
雖然Spark提供了REST API,但是我們使用SparkListener來監控
1、繼承SparkListener,實現其方法,可以拿到很多作業信息,把信息持久化到DB
import com.wsd.spark.utils.DBUtils
import org.apache.spark.SparkConf
import org.apache.spark.internal.Logging
import org.apache.spark.scheduler._
import org.json4s.DefaultFormats
import org.json4s.jackson.Json
import scala.collection.JavaConverters._
import scala.collection.mutable
/**
* appName、jobId、開始時間、結束時間、有幾個stage、有幾個task、讀進來多少數據、shuffle了多少數據、寫出去多少數據
* TODO: 能夠拿到很多信息 ,把信息都持久化到HBase/Redis/RDBMS
* TODO 如何拿Spark作業的名字?
*
* @author wsd
* @date 2020/3/25 11:35
*/
class SparkListenerDemo(conf: SparkConf) extends SparkListener with Logging {
//定義了一系列的Key
val appNameKey = "appName"
val jobIdKey = "jobId"
val jobStartTimeKey = "jobStartTime"
val jobEndTimeKey = "jobEndTime"
val stageNumKey = "stageNum"
val taskNumKey = "taskNum"
val inputBytesReadKey = "inputBytesRead"
val shuffleBytesKey = "shuffleBytes"
val bytesWrittenKey = "bytesWritten"
var counter = 0
//使用map存取作業的信息
val map = new mutable.HashMap[String, String]()
/**
* 當作業開始
* ==> appName
* jobId
* jobStartTime
*
*
* @param jobStart
*/
override def onJobStart(jobStart: SparkListenerJobStart): Unit = {
val appName = conf.get("spark.app.name")
val jobId = jobStart.jobId
val jobStartTime = jobStart.time
map.put(appNameKey, appName)
map.put(jobIdKey, jobId.toString)
map.put(jobStartTimeKey, jobStartTime.toString)
//logError(appName + " " + jobId + " " + jobStartTime)
}
/**
* 當task執行結束
* ==>
* TaskNum
* inputBytesRead
* shuffleBytes
* bytesWritten
*
*
* @param taskEnd
*/
override def onTaskEnd(taskEnd: SparkListenerTaskEnd): Unit = {
val metrics = taskEnd.taskMetrics
/**
* inputBytesRead":19+37=56
* "shuffleReadMetrics":51+119=170
* "shuffleWriteMetrics":58+112=170
* "outputBytesWritten":14+32=45
*/
val inputBytesRead = map.getOrElse(inputBytesReadKey, 0).toString.toLong + metrics.inputMetrics.bytesRead
val shuffleBytes = map.getOrElse(shuffleBytesKey, 0).toString.toLong + metrics.shuffleWriteMetrics.bytesWritten
val bytesWritten = map.getOrElse(bytesWrittenKey, 0).toString.toLong + metrics.outputMetrics.bytesWritten
counter += 1
map.put(taskNumKey, counter.toString)
map.put(inputBytesReadKey, inputBytesRead.toString)
map.put(shuffleBytesKey, shuffleBytes.toString)
map.put(bytesWrittenKey, bytesWritten.toString)
}
/**
* 作業結束
*
* ==>jobEndTime
* @param jobEnd
*/
override def onJobEnd(jobEnd: SparkListenerJobEnd): Unit = {
val jobEndTime = jobEnd.time
map.put(jobEndTimeKey, jobEndTime.toString)
logError(Json(DefaultFormats).write(map))
//scala map ==> java map 添加隱式轉換
val javaMap = map.map {
case (k, v) => (k, v)
}.asJava
DBUtils.add(javaMap)
}
}
2、DBUtils
import java.sql.*;
import java.util.Map;
/**
* MySQL工具類
*
* @author wsd
* @date 2020/3/26 10:27
*/
public class DBUtils {
static {
try {
Class.forName("com.mysql.jdbc.Driver");
// Class.forName("com.mysql.cj.jdbc.Driver");
} catch (ClassNotFoundException e) {
e.printStackTrace();
}
}
private static Connection getConnection() {
try {
return DriverManager.getConnection("jdbc:mysql://localhost:3306/ruozedata?serverTimezone=UTC"
, "root", "123456");
} catch (SQLException e) {
e.printStackTrace();
}
return null;
}
/**
* val appNameKey = "appName"
* val jobIdKey = "jobId"
* val jobStartTimeKey = "jobStartTime"
* val jobEndTimeKey = "jobEndTime"
* val stageNumKey = "stageNum"
* val taskNumKey = "taskNum"
* val inputBytesReadKey = "inputBytesRead"
* val shuffleBytesKey = "shuffleBytes"
* val bytesWrittenKey = "bytesWritten"
*
* @param map
*/
public static boolean add(Map<String, String> map) {
Connection conn = null;
PreparedStatement psmt = null;
try {
conn = getConnection();
psmt = conn.prepareStatement("insert into spark_job_info (appname,jobid,jobstarttime,jobendtime,tasknum,inputsize,shufflesize,outputsize) values(?,?,?,?,?,?,?,?)");
System.out.println(map.size());
psmt.setString(1, map.getOrDefault("appName", "-"));
psmt.setString(2, map.getOrDefault("jobId", "-"));
psmt.setString(3, map.getOrDefault("jobStartTime", "-"));
psmt.setString(4, map.getOrDefault("jobEndTime", "-"));
psmt.setString(5, map.getOrDefault("taskNum", "-"));
psmt.setString(6, map.getOrDefault("inputBytesRead", "-"));
psmt.setString(7, map.getOrDefault("shuffleBytes", "-"));
psmt.setString(8, map.getOrDefault("bytesWritten", "-"));
boolean result = psmt.execute();
return result;
} catch (SQLException e) {
e.printStackTrace();
return false;
} finally {
close(conn, psmt, null);
}
}
/**
* 關閉
*
* @param conn
* @param st
* @param set
*/
private static void close(Connection conn, PreparedStatement st, ResultSet set) {
try {
if (set != null) {
set.close();
}
if (st != null) {
st.close();
}
if (conn != null) {
conn.close();
}
} catch (Exception e) {
e.printStackTrace();
}
}
public static void main(String[] args) {
Connection connection = getConnection();
System.out.println(connection);
}
}
3、使用通過sparkConf.set進去
val sparkConf = new SparkConf() //.setMaster("local").setAppName("SparkWC")
.set("spark.extraListeners", "com.wsd.sparkcore.listener.SparkListenerDemo")
三、總結
以上只是簡單使用了SparkListener拿到作業信息,具體拿到信息後保存到哪?通過郵箱或短信等如何告警?需要根據自己的業務進一步開發