- #-*- coding: utf-8 -*-
- '''
- Created on 2012-9-3
- @author: Jekey
- 余弦相关性,如果数据稀疏,考虑使用该算法
- '''
- import codecs
- from math import sqrt
- users = {"Angelica": {"Blues Traveler": 3.5, "Broken Bells": 2.0, "Norah Jones": 4.5, "Phoenix": 5.0, "Slightly Stoopid": 1.5, "The Strokes": 2.5, "Vampire Weekend": 2.0},
- "Bill":{"Blues Traveler": 2.0, "Broken Bells": 3.5, "Deadmau5": 4.0, "Phoenix": 2.0, "Slightly Stoopid": 3.5, "Vampire Weekend": 3.0},
- "Chan": {"Blues Traveler": 5.0, "Broken Bells": 1.0, "Deadmau5": 1.0, "Norah Jones": 3.0, "Phoenix": 5, "Slightly Stoopid": 1.0},
- "Dan": {"Blues Traveler": 3.0, "Broken Bells": 4.0, "Deadmau5": 4.5, "Phoenix": 3.0, "Slightly Stoopid": 4.5, "The Strokes": 4.0, "Vampire Weekend": 2.0},
- "Hailey": {"Broken Bells": 4.0, "Deadmau5": 1.0, "Norah Jones": 4.0, "The Strokes": 4.0, "Vampire Weekend": 1.0},
- "Jordyn": {"Broken Bells": 4.5, "Deadmau5": 4.0, "Norah Jones": 5.0, "Phoenix": 5.0, "Slightly Stoopid": 4.5, "The Strokes": 4.0, "Vampire Weekend": 4.0},
- "Sam": {"Blues Traveler": 5.0, "Broken Bells": 2.0, "Norah Jones": 3.0, "Phoenix": 5.0, "Slightly Stoopid": 4.0, "The Strokes": 5.0},
- "Veronica": {"Blues Traveler": 3.0, "Norah Jones": 5.0, "Phoenix": 4.0, "Slightly Stoopid": 2.5, "The Strokes": 3.0}
- }
- #cosine 距离
- def cosine(rate1,rate2):
- sum_xy = 0
- sum_x=0
- sum_y=0
- n=0
- for key in rate1:
- if key in rate2:
- n+=1
- x=rate1[key]
- y=rate2[key]
- sum_xy += x*y
- sum_x +=x*x
- sum_y +=y*y
- #计算距离
- if n==0:
- return 0
- else:
- sx=pow(sum_x,1/2)
- sy=pow(sum_y,1/2)
- if sum_xy<>0:
- denominator=sx*sy/sum_xy
- else:
- denominator=0
- return denominator
- #返回最近距离用户
- def computeNearestNeighbor(username,users):
- distances = []
- for key in users:
- if key<>username:
- distance = cosine(users[username],users[key])
- distances.append((distance,key))
- distances.sort()
- return distances
- #推荐
- def recommend(username,users):
- #获得最近用户的name
- nearest = computeNearestNeighbor(username,users)[0][1]
- recommendations =[]
- #得到最近用户的推荐列表
- neighborRatings = users[nearest]
- for key in neighborRatings:
- if not key in users[username]:
- recommendations.append((key,neighborRatings[key]))
- recommendations.sort(key=lambda rat:rat[1], reverse=True)
- return recommendations
- if __name__ == '__main__':
- print recommend('Hailey', users)
- #该片段来自于http://www.codesnippet.cn/detail/140620134049.html
来源: http://www.codesnippet.cn/detail/140620134049.html