1. 背景
搜索排序最簡單的模型算法是point wise,這裏簡單展示使用簡單的線性模型調權的方案。
2. 數據集合
grade,titleScore,overviewScore,ratingScore,comment:# <docid> keywords@movietitle
4,10.65,8.41,7.40,# 1366 rocky@Rocky
3,0.00,6.75,7.00,# 12412 rocky@Creed
3,8.22,9.72,6.60,# 1246 rocky@Rocky Balboa
3,8.22,8.41,0.00,# 1374 rocky@Rocky IV
3,8.22,7.68,6.90,# 1367 rocky@Rocky II
3,8.22,7.15,0.00,# 1375 rocky@Rocky V
3,8.22,5.28,0.00,# 1371 rocky@Rocky III
2,0.00,0.00,7.60,# 154019 rocky@Belarmino
2,0.00,0.00,7.10,# 1368 rocky@First Blood
2,0.00,0.00,6.70,# 13258 rocky@Son of Rambow
2,0.00,0.00,0.00,# 70808 rocky@Klitschko
2,0.00,0.00,0.00,# 64807 rocky@Grudge Match
2,0.00,0.00,0.00,# 47059 rocky@Boxing Gym
3. 線性模型調參
from sklearn.linear_model import LinearRegression
from math import sin
import numpy as np
import csv
rockyData = np.genfromtxt('rocky.csv', delimiter=',')[1:] # Remove the CSV header
rockyGrades = rockyData[:,0] # Slice out column 0, where the grades are
rockySignals = rockyData[:,1:-1] # Features in columns 1...all but last column (the comment)
butIRegress = LinearRegression()
butIRegress.fit(rockySignals, rockyGrades)
butIRegress.coef_ #boost for title, boost for overview, boost for rating
array([ 0.04999419, 0.22958357, 0.00573909])
butIRegress.intercept_
def relevanceScore(intercept, titleCoef, overviewCoef, ratingCoef, titleScore, overviewScore, movieRating):
return intercept + (titleCoef * titleScore) + (overviewCoef * overviewScore) + (ratingCoef * movieRating)
# Score Rambo
relevanceScore(butIRegress.intercept_, butIRegress.coef_[0], butIRegress.coef_[1], butIRegress.coef_[2],
titleScore=12.28, overviewScore=9.82, movieRating=6.40)
# Score First Blood
relevanceScore(butIRegress.intercept_, butIRegress.coef_[0], butIRegress.coef_[1], butIRegress.coef_[2],
titleScore=0.00, overviewScore=10.76, movieRating=7.10)