User-base/Item-base实现
import pandas as pd
import numpy as np
def userCF(users, items):
num_user = len(users.keys())
num_item = len(items.keys())
sim_matrix_user = pd.DataFrame(np.zeros((num_user,num_user)), index=users.keys(), columns=users.keys())
for i in range(num_user):
for j in range(i, num_user):
intersec_items = []
dot_prod = 0
num_ui,num_uj = 0,0
ui = sim_matrix_user.columns[i]
uj = sim_matrix_user.columns[j]
for item in items.keys():
# using cosine similarity
if item in users[ui].keys():
num_ui += users[ui][item]**2
if item in users[uj].keys():
num_uj += users[uj][item]**2
if item in users[ui].keys() and item in users[uj].keys():
dot_prod += (users[ui][item] * users[uj][item])
similarity = dot_prod/(np.sqrt(num_uj) * np.sqrt(num_ui))
sim_matrix_user[ui][uj] = similarity
sim_matrix_user[uj][ui] = similarity
return sim_matrix_user
def user_Recommend(user, sim_matrix_user, users, items, k):
# select top K similar users for selection
similar_users = sim_matrix_user[user].sort_values(ascending = False)
topk_users = similar_users[1:1+k]
# dataframe storing result
rating_df = pd.DataFrame()
user_rating = pd.DataFrame(users)
w_sum = 0
# find weighted sum of rating between input user and all item
for u in topk_users.keys():
rating_df = rating_df.append(topk_users[u]* user_rating[u])
rating_df = (rating_df.sum()/sum(topk_users)).sort_values(ascending = False)
return topk_users, rating_df

Last updated