修改OrderInfoApp
import com.alibaba.fastjson.{JSON, JSONObject}
import com.atguigu.gmall.realtime.bean.{OrderInfo, UserState}
import com.atguigu.gmall.realtime.utils.{MyKafkaUtil, OffsetManagerUtil, PhoenixUtil}
import org.apache.hadoop.conf.Configuration
import org.apache.kafka.clients.consumer.ConsumerRecord
import org.apache.kafka.common.TopicPartition
import org.apache.spark.SparkConf
import org.apache.spark.broadcast.Broadcast
import org.apache.spark.rdd.RDD
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.streaming.dstream.{DStream, InputDStream}
import org.apache.spark.streaming.kafka010.{HasOffsetRanges, OffsetRange}
object OrderInfoApp {
def main(args: Array[String]): Unit = {
val sparkConf: SparkConf = new SparkConf().setMaster("local[*]").setAppName("dw_order_info_app")
val ssc = new StreamingContext(sparkConf, Seconds(5))
val topic = "ODS_T_ORDER_INFO";
val groupId = "base_order_info_group"
val offset: Map[TopicPartition, Long] = OffsetManagerUtil.getOffset(groupId, topic)
var inputDstream: InputDStream[ConsumerRecord[String, String]] = null
if (offset != null && offset.size > 0) {
inputDstream = MyKafkaUtil.getKafkaStream(topic, ssc, offset, groupId)
} else {
inputDstream = MyKafkaUtil.getKafkaStream(topic, ssc, groupId)
}
var offsetRanges: Array[OffsetRange] = null
val inputGetOffsetDstream: DStream[ConsumerRecord[String, String]] = inputDstream.transform { rdd =>
offsetRanges = rdd.asInstanceOf[HasOffsetRanges].offsetRanges
rdd
}
val orderInfoDstream: DStream[OrderInfo] = inputGetOffsetDstream.map { record =>
val jsonString: String = record.value()
val orderInfo: OrderInfo = JSON.parseObject(jsonString,classOf[OrderInfo])
val datetimeArr: Array[String] = orderInfo.create_time.split(" ")
orderInfo.create_date=datetimeArr(0)
val timeArr: Array[String] = datetimeArr(1).split(":")
orderInfo.create_hour=timeArr(0)
orderInfo
}
val orderInfoWithfirstDstream: DStream[OrderInfo] = orderInfoDstream.mapPartitions { orderInfoItr =>
val orderInfoList: List[OrderInfo] = orderInfoItr.toList
if(orderInfoList.size>0){
val userIdList: List[String] = orderInfoList.map(_.user_id.toString)
var sql = "select user_id,if_consumed from user_state where user_id in ('" + userIdList.mkString("','") + "')"
val userStateList: List[JSONObject] = PhoenixUtil.queryList(sql)
val userStateMap: Map[String, String] = userStateList.map(userStateJsonObj =>
(userStateJsonObj.getString("USER_ID"), userStateJsonObj.getString("IF_CONSUMED"))
).toMap
for (orderInfo <- orderInfoList) {
val userIfConsumed: String = userStateMap.getOrElse(orderInfo.user_id.toString, null)
if (userIfConsumed != null && userIfConsumed == "1") {
orderInfo.if_first_order = "0"
} else {
orderInfo.if_first_order = "1"
}
}
}
orderInfoList.toIterator
}
val orderInfoWithUidDstream: DStream[(Long, OrderInfo)] = orderInfoWithfirstDstream.map(orderInfo=>(orderInfo.user_id,orderInfo))
val orderInfoGroupbyUidDstream: DStream[(Long, Iterable[OrderInfo])] = orderInfoWithUidDstream.groupByKey()
val orderInfoFinalFirstDstream: DStream[OrderInfo] = orderInfoGroupbyUidDstream.flatMap { case (userId, orderInfoItr) =>
val orderInfoList: List[OrderInfo] = orderInfoItr.toList
if (orderInfoList(0).if_first_order == "1" && orderInfoList.size > 1) {
val orderInfoSortedList: List[OrderInfo] = orderInfoList.sortWith { (orderInfo1, orderInfo2) =>
(orderInfo1.create_time < orderInfo2.create_time)
}
for (i <- 1 to orderInfoSortedList.size - 1) {
orderInfoSortedList(i).if_first_order = "0"
}
orderInfoSortedList.toIterator
} else {
orderInfoList.toIterator
}
}
val orderInfoWithProvinceDstream: DStream[OrderInfo] = orderInfoFinalFirstDstream.transform {rdd =>
val sql = "select id,name,region_id,area_code from gmall_province_info"
val provinceJsonObject: List[JSONObject] = PhoenixUtil.queryList(sql)
val provinceJsonObjMap: Map[Long, JSONObject] = provinceJsonObject.map {
JsonObj =>
(JsonObj.getLongValue("ID"), JsonObj)
}.toMap
val provinceJsonObjMapBc: Broadcast[Map[Long, JSONObject]] = ssc.sparkContext.broadcast(provinceJsonObjMap)
val orderInfoWithProvinceRDD: RDD[OrderInfo] = rdd.mapPartitions { orderInfoItr =>
val provinceJsonObjMap: Map[Long, JSONObject] = provinceJsonObjMapBc.value
val orderInfoList: List[OrderInfo] = orderInfoItr.toList
for (orderInfo <- orderInfoList) {
val provinceJsonObj: JSONObject = provinceJsonObjMap.getOrElse(orderInfo.province_id, null)
if (provinceJsonObj != null) {
orderInfo.province_name = provinceJsonObj.getString("NAME")
println(orderInfo.province_name + "-----------")
orderInfo.province_area_code = provinceJsonObj.getString("AREA_CODE")
}
}
orderInfoList.toIterator
}
orderInfoWithProvinceRDD
}
orderInfoWithProvinceDstream.cache()
orderInfoWithProvinceDstream.print(1000)
orderInfoWithProvinceDstream.foreachRDD{rdd=>
val userStatRDD:RDD[UserState] = rdd.filter(_.if_first_order=="1").map(orderInfo=>
UserState(orderInfo.user_id.toString,orderInfo.if_first_order)
)
import org.apache.phoenix.spark._
userStatRDD.saveToPhoenix("user_state",
Seq("USER_ID","IF_CONSUMED"),
new Configuration,
Some("hadoop102,hadoop103,hadoop104:2181"))
OffsetManagerUtil.saveOffset(groupId, topic, offsetRanges)
}
ssc.start()
ssc.awaitTermination()
}
}
ProvinceInfoApp
import com.alibaba.fastjson.JSON
import com.atguigu.gmall.realtime.bean.ProvinceInfo
import com.atguigu.gmall.realtime.utils.{MyKafkaUtil, OffsetManagerUtil}
import org.apache.hadoop.conf.Configuration
import org.apache.kafka.clients.consumer.ConsumerRecord
import org.apache.kafka.common.TopicPartition
import org.apache.spark.SparkConf
import org.apache.spark.streaming.dstream.{DStream, InputDStream}
import org.apache.spark.streaming.kafka010.{HasOffsetRanges, OffsetRange}
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.phoenix.spark._
object ProvinceInfoApp {
def main(args: Array[String]): Unit = {
val sparkConf: SparkConf = new SparkConf().setAppName("province_info_app").setMaster("local[*]")
val ssc = new StreamingContext(sparkConf, Seconds(5))
val groupId = "base_province_group"
val topic = "ODS_T_BASE_PROVINCE"
val offsets: Map[TopicPartition, Long] = OffsetManagerUtil.getOffset(groupId, topic)
var inputDstream: InputDStream[ConsumerRecord[String, String]] = null
if (offsets != null && offsets.size > 0) {
inputDstream = MyKafkaUtil.getKafkaStream(topic, ssc, offsets, groupId)
} else {
inputDstream = MyKafkaUtil.getKafkaStream(topic, ssc, groupId)
}
var offsetRanges: Array[OffsetRange] = Array.empty[OffsetRange]
val inputGetOffsetDstream: DStream[ConsumerRecord[String, String]] = inputDstream.transform { rdd =>
offsetRanges = rdd.asInstanceOf[HasOffsetRanges].offsetRanges
rdd
}
val provinceInfoDstream: DStream[ProvinceInfo] = inputGetOffsetDstream.map { record =>
val jsonString: String = record.value()
val provinceInfo: ProvinceInfo = JSON.parseObject(jsonString, classOf[ProvinceInfo])
provinceInfo
}
provinceInfoDstream.cache()
provinceInfoDstream.print(1000)
provinceInfoDstream.foreachRDD { rdd =>
rdd.saveToPhoenix("GMALL_PROVINCE_INFO", Seq("ID", "NAME", "REGION_ID", "AREA_CODE"),
new Configuration, Some("hadoop102,hadoop103,hadoop104:2181"))
OffsetManagerUtil.saveOffset(groupId, topic, offsetRanges)
}
ssc.start()
ssc.awaitTermination()
}
}
測試