本文介紹springboot下 spark2 操作es6.
1、環境
springboot 2.1.3.RELEASE
es: elasticsearch 6.4.3
spark : 2.4.3
scala: 2.11.12
本人MAC本相關環境變量配置如下:
JAVA_HOME="/Library/Java/JavaVirtualMachines/jdk1.8.0_152.jdk/Contents/Home"
CLASS_PATH="$JAVA_HOME/lib"
PATH=".:$PATH:$JAVA_HOME/bin"
export M2_HOME="/Applications/dev/apache-maven-3.5.3"
export SCALA_HOME="/Users/xxxx/Documents/programs/scala-2.11.12"
export SPARK_HOME="/Users/xxxx/Documents/programs/spark-2.4.3-bin-hadoop2.7"
export PATH="$SPARK_HOME/bin:$SCALA_HOME/bin:$M2_HOME/bin:$PATH"
2、springboot maven配置
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<spark.version>2.3.3</spark.version>
<hadoop.version>2.6.4</hadoop.version>
<es.hadoop.version>6.3.2</es.hadoop.version>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.11</artifactId>
<version>${spark.version}</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId>
<version>${spark.version}</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-hive_2.11</artifactId>
<version>${spark.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>${hadoop.version}</version>
</dependency>
<!--<dependency>
<groupId>org.elasticsearch</groupId>
<artifactId>elasticsearch-hadoop</artifactId>
<version>6.3.2</version>
</dependency>-->
<dependency>
<groupId>org.elasticsearch</groupId>
<artifactId>elasticsearch-spark-20_2.11</artifactId>
<version>${es.hadoop.version}</version>
</dependency>
</dependencies>
3、demo
private void demoSaveES() {
String tableName = "user/docs";
SparkSession hiveSparkSession = null;
IESDAC esDAC = SpringBootBeanUtils.getBean("esDAC", IESDAC.class);
IDAC hiveDAC = SpringBootBeanUtils.getBean("hiveDAC", IDAC.class);
ArrayList<UserES> users = Lists.newArrayList(
UserES.builder().id(1001L).userName("張三").userPhone("178133348888").build(),
UserES.builder().id(1002L).userName("李四").userPhone("17979188888").build(),
UserES.builder().id(1004L).userName("王五").userPhone("123003188888").build()
);
try {
hiveSparkSession = hiveDAC.createSparkSession();
Dataset<Row> hiveData = hiveSparkSession.sql(
"SELECT * " +
"FROM aa " +
"LIMIT 1 ");
hiveData.persist(StorageLevel.MEMORY_ONLY()).show();
Dataset<UserES> esDataset = hiveData.map(new MapFunction<Row, UserES>() {
@Override
public UserES call(Row row) throws Exception {
Long id = Long.valueOf(row.getAs("AA"));
return UserES.builder()
.id(id)
.userName(row.getAs("bb"))
.userPhone("13890909999")
.build();
}
}, Encoders.bean(UserES.class));
esDataset.show();
esDAC.save2DbWithMappingId(esDataset, tableName, "id");
} finally {
hiveDAC.Stop(hiveSparkSession);
}
}
@Service("esDAC")
public class EsDAC implements IESDAC {
@Autowired
private ESConfig esConfig;
/**
* 創建 Spark Session 對象
*
* @return org.apache.spark.sql.SparkSession
*/
@Override
public SparkSession createSparkSession() {
SparkSession sparkSession = SparkSession.builder().appName("ESHadoop").master("local[3]")
.config("es.nodes", esConfig.getHost())
// .config("es.nodes", esConfig.getClusterNodes())
.config("es.port", esConfig.getPort())
// .config("es.nodes.wan.only", "true")
.config("es.index.auto.create", "true")
.getOrCreate();
return sparkSession;
}
/**
* 執行結果
*
* @param sparkSession
* @param sql
* @return org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>
*/
@Override
public Dataset<Row> execSql(SparkSession sparkSession, String sql) {
return null;
}
/**
* 停止 Spark session ,釋放資源
*
* @param sparkSession
* @return void
*/
@Override
public void Stop(SparkSession sparkSession) {
if (sparkSession == null) {
return;
}
sparkSession.stop();
}
@Override
public <T> int save2Db(Dataset<T> dataFrame, SaveMode saveMode, String tableName) {
save2ES(dataFrame, tableName, null);
return 1;
}
private <T> int save2ES(Dataset<T> dataFrame, String tableName, Map<String, String> mapParams) {
if (mapParams == null || mapParams.size() <= 0) {
JavaEsSparkSQL.saveToEs(dataFrame, tableName);
} else {
JavaEsSparkSQL.saveToEs(dataFrame, tableName, mapParams);
}
return 1;
}
@Override
public <T> int save2DbWithAppend(Dataset<T> dataFrame, String tableName) {
return save2Db(dataFrame, SaveMode.Append, tableName);
}
@Override
public <T> int save2DbWithMappingId(Dataset<T> dataFrame, String tableName, String mappingId) {
HashMap<String, String> map = new HashMap<>();
if (StringUtils.isNotBlank(mappingId)) {
map.put("es.mapping.id", mappingId);
}
map.put("es.nodes", esConfig.getClusterNodes());
map.put("es.port", esConfig.getPort());
JavaEsSparkSQL.saveToEs(dataFrame, tableName, map);
return 1;
}
}
ES問題解決:Elasticsearch health check failed
參考:https://blog.csdn.net/CharlesYooSky/article/details/90405699