參考資料:https://blog.csdn.net/qq_38689769/article/details/79471332
參考資料:https://blog.csdn.net/Dr_Guo/article/details/77374403?locationNum=9&fps=1
pom.xml:
<!--讀取excel文件-->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>3.10-FINAL</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>3.10-FINAL</version>
</dependency>
數據:
代碼:
import java.io.FileInputStream
import com.emg.join.model.{AA, BB}
import org.apache.poi.ss.usermodel.Cell
import org.apache.poi.xssf.usermodel.XSSFWorkbook
import org.apache.spark.SparkConf
import org.apache.spark.sql.SparkSession
import scala.collection.mutable.ListBuffer
object Excels {
val conf = new SparkConf().setAppName("join")
.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
.setMaster("local[*]")
.registerKryoClasses(Array[Class[_]](AA.getClass, BB.getClass))
val spark = SparkSession.builder().config(conf).getOrCreate()
val sc = spark.sparkContext
import spark.implicits._
val filePath = "c:\\user\\id.xlsx"
//val filePath1 = "hdfs://192.168.40.0:9000/user/id.xlsx"
val fs = new FileInputStream(filePath)
val workbook: XSSFWorkbook = new XSSFWorkbook(fs)
val sheet = workbook.getSheetAt(0) //獲取第一個sheet
val rowCount = sheet.getPhysicalNumberOfRows() //獲取總行數
val data = new ListBuffer[BB]()
for (i <- 1 until rowCount) {
val row = sheet.getRow(i)
// 得到第一列第一行的單元格
val cellwellname: Cell = row.getCell(0)
//同一字段不同數據類型處理
var wellname = 0L
if (cellwellname.getCellType == 0) {
wellname = cellwellname.getNumericCellValue.toLong
}
data.+=(BB(wellname))
data
}
val data1 = spark.createDataset(data)
data1.createTempView("data1")
val result = spark.sql("select * from data1").coalesce(1)
result.rdd.saveAsTextFile(outPath)
}
注意:
當路徑爲本地的時候,運行好使。當路徑爲hdfs時,報錯找不到路徑,會出現轉義符問題,查了查資料還是沒能解決!
有解決方法記得回覆哈。