需求
主訂單的應付金額【origin_total_amount】一般是由所有訂單明細的商品單價數量彙總【sku_pricesku_num】組成。
但是由於優惠、運費等都是以訂單爲單位進行計算的,所以減掉優惠、加上運費會得到一個最終實付金額【final_total_amount】。
但問題在於如果是以商品進行交易額分析,也要把優惠、運費的效果分攤到購買的每個商品中。
如何分攤
一般是由訂單明細每種商品的消費佔總訂單的比重進行分攤,比如總價1000元的商品,
由分別由600元和400元的A、B兩種商品組成, 但是經過打折和加運費後,實際付款金額變爲810,那麼A的分攤實付金額爲486元和B的分攤實付金額爲324元。
複雜情況
由於明細的分攤是由佔比而得,那就會進行除法,除法就有可能出現除不盡的情況。
比如:原價90元 ,三種商品每件30元。沒有優惠但有10元運費,總實付金額爲100元。按佔比分攤各三分之一,就會出現三個33.33元。加起來就會出現99.99元。就會出現差一分錢的情況。
而我們要求所有訂單明細的實付分攤加總必須和訂單的總實付相等。
所以我們要的是100=33.33+33.33+33.34
解決思路
分攤計算分成兩種情況:
不是最後一筆: 按照佔比,用除法求分攤金額
最後一筆: 用減法 用主表的總實付金額-之前所有明細以求得的分攤彙總值
如何判斷是最後一筆
如果訂單明細10條 分佈在不同的批次中
雙流合併
由於兩個流的數據是獨立保存,獨立消費,很有可能同一業務的數據,分佈在不同的批次。因爲join算子只join同一批次的數據。如果只用簡單的join流方式,會丟失掉不同批次的數據。
1. 利用滑動窗口進行join 然後再進行去重
2. 把數據存入緩存,關聯時進行join後 再去查詢緩存中的數據,來彌補不同批次的問題。
添加OrderDetailWide
在scala\com\atguigu\gmall\realtime\bean\OrderDetailWide.scala
case class OrderDetailWide(
var order_detail_id:Long =0L,
var order_id: Long=0L,
var order_status:String=null,
var create_time:String=null,
var user_id: Long=0L,
var sku_id: Long=0L,
var sku_price: Double=0D,
var sku_num: Long=0L,
var sku_name: String=null,
var benefit_reduce_amount:Double =0D ,
var original_total_amount:Double =0D ,// 訂單主表中的 原始交易額 = 所有明細 單價*數量的彙總值
var feight_fee:Double=0D,
var final_total_amount: Double =0D, //訂單主表中的 實際付款總額
var final_detail_amount:Double=0D, //從表中的 實際分攤總額
var if_first_order:String=null,
var province_name:String=null,
var province_area_code:String=null,
var user_age_group:String=null,
var user_gender:String=null,
var dt:String=null,
var spu_id: Long=0L,
var tm_id: Long=0L,
var category3_id: Long=0L,
var spu_name: String=null,
var tm_name: String=null,
var category3_name: String=null
)
{
def this(orderInfo:OrderInfo,orderDetail: OrderDetail) {
this
mergeOrderInfo(orderInfo)
mergeOrderDetail(orderDetail)
}
def mergeOrderInfo(orderInfo:OrderInfo): Unit ={
if(orderInfo!=null){
this.order_id=orderInfo.id
this.order_status=orderInfo.order_status
this.create_time=orderInfo.create_time
this.dt=orderInfo.create_date
this.benefit_reduce_amount =orderInfo.benefit_reduce_amount
this.original_total_amount =orderInfo.original_total_amount
this.feight_fee =orderInfo.feight_fee
this.final_total_amount =orderInfo.final_total_amount
this.province_name=orderInfo.province_name
this.province_area_code=orderInfo.province_area_code
this.user_age_group=orderInfo.user_age_group
this.user_gender=orderInfo.user_gender
this.if_first_order=orderInfo.if_first_order
this.user_id=orderInfo.user_id
}
}
def mergeOrderDetail(orderDetail: OrderDetail): Unit ={
if(orderDetail!=null){
this.order_detail_id=orderDetail.id
this.sku_id=orderDetail.sku_id
this.sku_name=orderDetail.sku_name
this.sku_price=orderDetail.order_price
this.sku_num=orderDetail.sku_num
this.spu_id =orderDetail.spu_id
this.tm_id =orderDetail.tm_id
this.category3_id =orderDetail.category3_id
this.spu_name =orderDetail.spu_name
this.tm_name =orderDetail.tm_name
this.category3_name =orderDetail.category3_name
}
}
}
增加OrderDetailWideApp
在scala\com\atguigu\gmall\realtime\app\dws\OrderDetailWideApp.scala
import java.{lang, util}
import com.alibaba.fastjson.serializer.SerializeConfig
import com.alibaba.fastjson.{JSON, JSONObject}
import com.atguigu.gmall.realtime.bean.{OrderDetail, OrderDetailWide, OrderInfo}
import com.atguigu.gmall.realtime.utils.{MyKafkaSinkUtil, MyKafkaUtil, OffsetManagerUtil, RedisUtil}
import org.apache.kafka.clients.consumer.ConsumerRecord
import org.apache.kafka.common.TopicPartition
import org.apache.spark.SparkConf
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.streaming.dstream.{DStream, InputDStream}
import org.apache.spark.streaming.kafka010.{HasOffsetRanges, OffsetRange}
import redis.clients.jedis.Jedis
import scala.collection.mutable.ListBuffer
object OrderDetailWideApp {
def main(args: Array[String]): Unit = {
val sparkConf: SparkConf = new SparkConf().setMaster("local[*]").setAppName("dws_order_wide_app")
val ssc = new StreamingContext(sparkConf, Seconds(5))
val topicOrderInfo = "DW_ORDER_INFO";
val topicOrderDetail = "DW_ORDER_DETAIL";
val groupId = "dws_order_wide_group"
///////////////////// 偏移量處理///////////////////////////
val offsetOrderInfo: Map[TopicPartition, Long] = OffsetManagerUtil.getOffset(groupId, topicOrderInfo)
val offsetOrderDetail: Map[TopicPartition, Long] = OffsetManagerUtil.getOffset(groupId, topicOrderDetail)
// offsetOrderInfo=null
// offsetOrderDetail=null
var inputOrderInfoDstream: InputDStream[ConsumerRecord[String, String]] = null
var inputOrderDetailDstream: InputDStream[ConsumerRecord[String, String]] = null
// 判斷如果從redis中讀取當前最新偏移量 則用該偏移量加載kafka中的數據 否則直接用kafka讀出默認最新的數據
//加載orderInfo流
if (offsetOrderInfo != null && offsetOrderInfo.size > 0) {
inputOrderInfoDstream = MyKafkaUtil.getKafkaStream(topicOrderInfo, ssc, offsetOrderInfo, groupId)
} else {
inputOrderInfoDstream = MyKafkaUtil.getKafkaStream(topicOrderInfo, ssc, groupId)
}
//加載orderDetail流
if (offsetOrderInfo != null && offsetOrderInfo.size > 0) {
inputOrderDetailDstream = MyKafkaUtil.getKafkaStream(topicOrderDetail, ssc, offsetOrderDetail, groupId)
} else {
inputOrderDetailDstream = MyKafkaUtil.getKafkaStream(topicOrderDetail, ssc, groupId)
}
//取得偏移量步長
var orderInfoOffsetRanges: Array[OffsetRange] = null
var orderDetailOffsetRanges: Array[OffsetRange] = null
val inputOrderInfoGetOffsetDstream: DStream[ConsumerRecord[String, String]] = inputOrderInfoDstream.transform { rdd =>
orderInfoOffsetRanges = rdd.asInstanceOf[HasOffsetRanges].offsetRanges
rdd
}
val inputOrderDetailGetOffsetDstream: DStream[ConsumerRecord[String, String]] = inputOrderDetailDstream.transform { rdd =>
orderDetailOffsetRanges = rdd.asInstanceOf[HasOffsetRanges].offsetRanges
rdd
}
/**
* 把訂單和訂單明細 轉換爲 case class的流
*/
val orderInfoDstream: DStream[(Long, OrderInfo)] = inputOrderInfoGetOffsetDstream.map { record =>
val orderJsonstr: String = record.value()
val orderInfo: OrderInfo = JSON.parseObject(orderJsonstr, classOf[OrderInfo])
(orderInfo.id, orderInfo)
}
val orderDetailDstream: DStream[(Long, OrderDetail)] = inputOrderDetailGetOffsetDstream.map { record =>
val orderDetailJsonstr: String = record.value()
val orderDetail: OrderDetail = JSON.parseObject(orderDetailJsonstr, classOf[OrderDetail])
(orderDetail.order_id, orderDetail)
}
/**
* 開窗
*/
//窗口開小了 數據延遲大的話 還是會出現丟失 //窗口開大了 會造成大量冗餘數據
val orderInfoWindowDstream: DStream[(Long, OrderInfo)] = orderInfoDstream.window(Seconds(15), Seconds(5))
val orderDetailWindowDstream: DStream[(Long, OrderDetail)] = orderDetailDstream.window(Seconds(15), Seconds(5))
/**
* orderinfo 和 orderDetail 的雙流join
*/
// 如何防止shuffle的發生 // 讓相同訂單的明細保持在一個分區下-> 寫入kafka時 ,選用order_id 作爲key
val orderJoinedDstream: DStream[(Long, (OrderInfo, OrderDetail))] = orderInfoWindowDstream.join(orderDetailWindowDstream)
val orderDetailWideDstream: DStream[OrderDetailWide] = orderJoinedDstream.map { case (orderId, (orderInfo, orderDetail)) => new OrderDetailWide(orderInfo, orderDetail) }
/**
* 去重
*/
val orderDetailWideFilteredDstream: DStream[OrderDetailWide] = orderDetailWideDstream.mapPartitions { orderWideItr =>
val jedis: Jedis = RedisUtil.getJedisClient
val orderWideList: List[OrderDetailWide] = orderWideItr.toList
val orderWideFilteredList: ListBuffer[OrderDetailWide] = ListBuffer[OrderDetailWide]()
for (orderWide <- orderWideList) {
// redis type sadd key order_wide:order_id:[order_id] value [order_detail_id] expire 600
val orderWideKey = "order_wide:order_id:" + orderWide.order_id
val isNew: lang.Long = jedis.sadd(orderWideKey, orderWide.order_detail_id.toString)
jedis.expire(orderWideKey, 600)
if (isNew == 1L) {
orderWideFilteredList += orderWide
}
}
jedis.close()
orderWideFilteredList.toIterator
}
// orderWide. final_total_amount 實付總金額
// origin_total_amount 應付總金額 = sum( sku_price* sku_num ) 單價* 個數
// sku_price 商品單價 sku_num 商品購買個數
// 目標 : final_detail_amount 明細分攤實付金額 明細分攤實付金額 / 實付總金額 = (單價* 個數)/ 應付總金額
//但是還要考慮 必須保證 sum(明細分攤實付金額) = 實付總金額
val orderDetailWideWithSplitDsteam: DStream[OrderDetailWide] = orderDetailWideFilteredDstream.mapPartitions { orderWideItr =>
// jedis
val jedis: Jedis = RedisUtil.getJedisClient
val orderDetailWideList: List[OrderDetailWide] = orderWideItr.toList
for (orderWide <- orderDetailWideList) {
// 首先 從 redis中取得該筆名的已經存入(計算)的兄弟明細數據
// redis type ? list key ? order_wide:split:[order_id] value ? json{ sku_price:xxx ,sku_num:xxx,final_detail_amount:xxx} expire 600
var orderWideSplitKey = "order_wide:split:" + orderWide.order_id
val orderWideSplitList: util.List[String] = jedis.lrange(orderWideSplitKey, 0, -1)
var originAmountSum = 0D
var finalAmountSum = 0D
// 把兄弟明細的應收金額+自己的應收金額 得到一個彙總值
// 把兄弟明細中的實收分攤彙總值 求出
import scala.collection.JavaConversions._
if (orderWideSplitList != null && orderWideSplitList.size() > 0) {
for (splitJson <- orderWideSplitList) {
val splitJsonObj: JSONObject = JSON.parseObject(splitJson)
originAmountSum += splitJsonObj.getDouble("sku_price") * splitJsonObj.getDouble("sku_num")
finalAmountSum += splitJsonObj.getDouble("final_detail_amount")
}
}
//比較 主訂單的應收總值是否= 明細的應收匯總值(含自己)
if (orderWide.original_total_amount == originAmountSum + orderWide.sku_price * orderWide.sku_num) {
//如果等於
// 用減法 總實付-兄弟明細的分攤彙總
orderWide.final_detail_amount = Math.round((orderWide.final_total_amount - finalAmountSum) * 100) / 100D
} else {
//如果不等於 (非最後一筆)
// 用乘除佔比 求得 該明細分攤金額 公式: 明細分攤實付金額? / 實付總金額 = (單價* 個數)/ 應付總金額
//明細分攤實付金額?= 實付總金額*(單價* 個數) / 應付總金額
orderWide.final_detail_amount = Math.round(orderWide.final_total_amount * (orderWide.sku_price * orderWide.sku_num) / orderWide.original_total_amount * 100) / 100D
}
//把當前明細的計算結果保存到redis 中
val curObject = new JSONObject()
curObject.put("sku_num", orderWide.sku_num)
curObject.put("sku_price", orderWide.sku_price)
curObject.put("final_detail_amount", orderWide.final_detail_amount)
jedis.lpush(orderWideSplitKey, curObject.toJSONString)
}
jedis.close()
orderDetailWideList.toIterator
}
orderDetailWideWithSplitDsteam.foreachRDD{rdd=>
rdd.foreachPartition { orderWideItr =>
for (orderWide <- orderWideItr) {
MyKafkaSinkUtil.send("DWS_ORDER_DETAIL_WIDE", JSON.toJSONString(orderWide,new SerializeConfig(true)))
}
}
OffsetManagerUtil.saveOffset(groupId,topicOrderInfo,orderInfoOffsetRanges)
OffsetManagerUtil.saveOffset(groupId,topicOrderDetail,orderDetailOffsetRanges)
}
ssc.start()
ssc.awaitTermination()
}
}