代碼:
import org.apache.spark.{SparkConf, SparkContext}
import scala.collection.mutable.Map
object CollaborativeFilteringSpark {
val conf = new SparkConf().setMaster("local").setAppName("CollaborativeFilteringSpark ")
//設置環境變量
val sc = new SparkContext(conf)
//實例化環境
val users = sc.parallelize(Array("aaa","bbb","ccc","ddd","eee")) //設置用戶
val films = sc.parallelize(Array("smzdm","ylxb","znh","nhsc","fcwr")) //設置電影名
val source = Map[String,Map[String,Int]]()
//使用一個source嵌套map作爲姓名電影名和分值的存儲
val filmSource = Map[String,Int]()
//設置一個用以存放電影分的map
def getSource(): Map[String,Map[String,Int]] = { //設置電影評分
val user1FilmSource = Map("smzdm" -> 2,"ylxb" -> 3,"znh" -> 1,"nhsc" -> 0,"fcwr" -> 1)
val user2FilmSource = Map("smzdm" -> 1,"ylxb" -> 2,"znh" -> 2,"nhsc" -> 1,"fcwr" -> 4)
val user3FilmSource = Map("smzdm" -> 2,"ylxb" -> 1,"znh" -> 0,"nhsc" -> 1,"fcwr" -> 4)
val user4FilmSource = Map("smzdm" -> 3,"ylxb" -> 2,"znh" -> 0,"nhsc" -> 5,"fcwr" -> 3)
val user5FilmSource = Map("smzdm" -> 5,"ylxb" -> 3,"znh" -> 1,"nhsc" -> 1,"fcwr" -> 2)
source += ("aaa" -> user1FilmSource) //對人名進行存儲
source += ("bbb" -> user2FilmSource) //對人名進行存儲
source += ("ccc" -> user3FilmSource) //對人名進行存儲
source += ("ddd" -> user4FilmSource) //對人名進行存儲
source += ("eee" -> user5FilmSource) //對人名進行存儲
source //返回嵌套map
}
//兩兩計算分值,採用餘弦相似性
def getCollaborateSource(user1:String,user2:String):Double = {
val user1FilmSource = source.get(user1).get.values.toVector //獲得第1個用戶的評分
val user2FilmSource = source.get(user2).get.values.toVector //獲得第2個用戶的評分
val member = user1FilmSource.zip(user2FilmSource).map(d => d._1 * d._2).reduce(_ + _).toDouble
//對公式分子部分進行計算
val temp1 = math.sqrt(user1FilmSource.map(num => { //求出分母第1個變量值
math.pow(num,2) //數學計算
}).reduce(_ + _)) //進行疊加
val temp2 = math.sqrt(user2FilmSource.map(num => { ////求出分母第2個變量值
math.pow(num,2) //數學計算
}).reduce(_ + _)) //進行疊加
val denominator = temp1 * temp2 //求出分母
member / denominator //進行計算
}
def main(args: Array[String]) {
getSource() //初始化分數
val name = "bbb" //設定目標對象
users.foreach(user =>{ //迭代進行計算
println(name + " 相對於 " + user +"的相似性分數是:"+ getCollaborateSource(name,user))
})
}
}
運行結果:
bbb 相對於 aaa的相似性分數是:0.7089175569585667
bbb 相對於 bbb的相似性分數是:1.0000000000000002
bbb 相對於 ccc的相似性分數是:0.8780541105074453
bbb 相對於 ddd的相似性分數是:0.6865554812287477
bbb 相對於 eee的相似性分數是:0.6821910402406466