第五章(餘弦相似度)

代碼:

import org.apache.spark.{SparkConf, SparkContext}
import scala.collection.mutable.Map

object CollaborativeFilteringSpark {
val conf = new SparkConf().setMaster("local").setAppName("CollaborativeFilteringSpark ")	
//設置環境變量
val sc = new SparkContext(conf)                                 
//實例化環境
val users = sc.parallelize(Array("aaa","bbb","ccc","ddd","eee"))       //設置用戶
val films = sc.parallelize(Array("smzdm","ylxb","znh","nhsc","fcwr"))	//設置電影名
   
val source = Map[String,Map[String,Int]]()	
//使用一個source嵌套map作爲姓名電影名和分值的存儲
   val filmSource = Map[String,Int]() 
   //設置一個用以存放電影分的map
   def getSource(): Map[String,Map[String,Int]] = {			//設置電影評分
     val user1FilmSource = Map("smzdm" -> 2,"ylxb" -> 3,"znh" -> 1,"nhsc" -> 0,"fcwr" -> 1)
     val user2FilmSource = Map("smzdm" -> 1,"ylxb" -> 2,"znh" -> 2,"nhsc" -> 1,"fcwr" -> 4)
     val user3FilmSource = Map("smzdm" -> 2,"ylxb" -> 1,"znh" -> 0,"nhsc" -> 1,"fcwr" -> 4)
     val user4FilmSource = Map("smzdm" -> 3,"ylxb" -> 2,"znh" -> 0,"nhsc" -> 5,"fcwr" -> 3)
     val user5FilmSource = Map("smzdm" -> 5,"ylxb" -> 3,"znh" -> 1,"nhsc" -> 1,"fcwr" -> 2)
     source += ("aaa" -> user1FilmSource)				//對人名進行存儲
     source += ("bbb" -> user2FilmSource) 				//對人名進行存儲
     source += ("ccc" -> user3FilmSource) 				//對人名進行存儲
     source += ("ddd" -> user4FilmSource) 				//對人名進行存儲
     source += ("eee" -> user5FilmSource) 				//對人名進行存儲
     source										//返回嵌套map
   }

   //兩兩計算分值,採用餘弦相似性
   def getCollaborateSource(user1:String,user2:String):Double = {
     val user1FilmSource = source.get(user1).get.values.toVector		//獲得第1個用戶的評分
     val user2FilmSource = source.get(user2).get.values.toVector		//獲得第2個用戶的評分
     val member = user1FilmSource.zip(user2FilmSource).map(d => d._1 * d._2).reduce(_ + _).toDouble
       //對公式分子部分進行計算
     val temp1  = math.sqrt(user1FilmSource.map(num => {			//求出分母第1個變量值
       math.pow(num,2)										//數學計算
     }).reduce(_ + _))										//進行疊加
     val temp2  = math.sqrt(user2FilmSource.map(num => {			////求出分母第2個變量值
       math.pow(num,2) 									//數學計算
     }).reduce(_ + _))										//進行疊加
     val denominator = temp1 * temp2							//求出分母
     member / denominator									//進行計算
}
   def main(args: Array[String]) {
     getSource()											//初始化分數
     val name = "bbb"										//設定目標對象
    users.foreach(user =>{									//迭代進行計算
      println(name + " 相對於 " + user +"的相似性分數是:"+ getCollaborateSource(name,user))
    })
   }
 }

運行結果:

bbb 相對於 aaa的相似性分數是:0.7089175569585667
bbb 相對於 bbb的相似性分數是:1.0000000000000002
bbb 相對於 ccc的相似性分數是:0.8780541105074453
bbb 相對於 ddd的相似性分數是:0.6865554812287477
bbb 相對於 eee的相似性分數是:0.6821910402406466
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章