读书人

基于用户的不久前邻推荐

发布时间: 2013-10-23 11:39:13 作者: rapoo

基于用户的最近邻推荐


课程 Python代码:

__author__ = 'LiFeiteng(Email: lifeiteng0422@gmail.com)'# -*- coding: utf-8 -*-import numpy as npclassUserUserRec:def __init__(self):self.U = 0  # user numberself.M = 0  # movie numberself.user_dict = {}self.movie_dict = {}self.movie_title = {}self.user_ratings = np.matrix([])def GetRatingData(self, ratings_file):for line in open(ratings_file):user, movie, rating = line.split(",")if not self.user_dict.has_key(user):self.user_dict[user] = self.Uself.U += 1if not self.movie_dict.has_key(movie):self.movie_dict[movie] = self.Mself.M += 1print self.U, self.Mself.user_ratings = np.matrix(np.zeros([self.U, self.M]))for line in open("ratings.csv", "r"):user, movie, rating = line.split(",")self.user_ratings[self.user_dict[user], self.movie_dict[movie]] = np.double(rating)def GetMovieTitles(self, movie_titles_file):for line in open(movie_titles_file):movie, title = line.split(",")#delete '\n'self.movie_title[movie] = title[:-1]def CosineUserSim(self, user1, user2): '''用户相似性计算 useri 为评分矩阵对应的行号'''user_rat = self.user_ratings[user1,:].copy()u1 = user_rat - np.mean(user_rat[user_rat>0.0])u1 = np.array(u1)*np.array(np.where(user_rat>0, 1, 0))user_rat = self.user_ratings[user2,:].copy()u2 = user_rat - np.mean(user_rat[user_rat>0.0])u2 = np.array(u2)*np.array(np.where(user_rat>0, 1, 0))if (np.linalg.norm(u1[0,:])*np.linalg.norm(u2[0,:])) == 0:sim = 0.0else:sim = np.dot(u1[0,:],u2[0,:])/(np.linalg.norm(u1[0,:])*np.linalg.norm(u2[0,:]))return np.double(sim)def MovieScore4User(self, user, movie):'''基于用户的推荐,根据user最相似的30位其他用户预测user对movie的rating'''rating4movie = self.user_ratings[:, self.movie_dict[movie]]Temp = []userID = 0for rating in rating4movie:if rating != 0.0:Temp.append([userID, rating, self.CosineUserSim(self.user_dict[user], userID)])userID += 1Temp = sorted(Temp, key=lambda e:e[2], reverse=True)n = 0sim_add = 0.0score4movie = 0.0for data in Temp:if n >= 30:breakuserID = data[0]rat = data[1]if userID != self.user_dict[user] and rat != 0.0:sim = data[2]user_rat = self.user_ratings[userID,:].copy()mu = np.mean(user_rat[user_rat > 0.0])score4movie += (rat-mu) * simsim_add += np.abs(sim)n += 1score4movie /= sim_adduser_rat = self.user_ratings[self.user_dict[user],:].copy()score4movie += np.mean(user_rat[user_rat > 0.0])score4movie = np.double(score4movie)print ",".join([user, movie, format(score4movie,".4f"), self.movie_title[movie]])return score4movie# end of class UserUserRecif __name__ == '__main__':#### PA3user_user_rec = UserUserRec()user_user_rec.GetRatingData("ratings.csv")user_user_rec.GetMovieTitles("movie-titles.csv")outfile = open("outfile.txt","w")for line in open("input.txt"):# inputuser, movie = line.split(":")movie = str(int(movie))score = user_user_rec.MovieScore4User(user, movie)str1 = ",".join([user, movie, format(score, ".4f"), user_user_rec.movie_title[movie]])outfile.write(str1+"\n")outfile.close()

代码数据连接:https://www.dropbox.com/s/78ifrycp9x1238i/UserUserRec.rar





读书人网 >其他相关

热点推荐