数据分析作业四-基于用户及物品数据进行内容推荐

## 导入支持库
import pandas as pd
import matplotlib.pyplot as plt
import sklearn.metrics as metrics
import numpy as np
from sklearn.neighbors import NearestNeighbors
from scipy.spatial.distance import correlation
from sklearn.metrics.pairwise import pairwise_distances
import ipywidgets as widgets
from IPython.display import display, clear_output
from contextlib import contextmanager
import warnings
warnings.filterwarnings('ignore')
import numpy as np
import os, sys
import re
import seaborn as sns
## 加载数据集并检查书籍,用户和评级数据集的形状
books = pd.read_csv('F:\\data\\bleeding_data\\BX-Books.csv',sep=None,encoding="latin-1")
books.columns = ['ISBN', 'bookTitle', 'bookAuthor','yearOfPublication', 'publisher','imageUrlS', 'imageUrlM', 'imageUrlL']users = pd.read_csv('F:\\data\\bleeding_data\\BX-Users.csv',sep=None, encoding="latin-1")
users.columns = ['userID', 'Location', 'Age']ratings = pd.read_csv('F:\\data\\bleeding_data\\BX-Book-Ratings.csv',sep=None, encoding="latin-1")
ratings.columns = ['userID', 'ISBN', 'bookRating']print (books.shape)
print (users.shape)
print (ratings.shape)
(271360, 8)
(278858, 3)
(1149780, 3)
## 一、图书数据集
books.head()
ISBNbookTitlebookAuthoryearOfPublicationpublisherimageUrlSimageUrlMimageUrlL
00195153448Classical MythologyMark P. O. Morford2002Oxford University Presshttp://images.amazon.com/images/P/0195153448.0...http://images.amazon.com/images/P/0195153448.0...http://images.amazon.com/images/P/0195153448.0...
10002005018Clara CallanRichard Bruce Wright2001HarperFlamingo Canadahttp://images.amazon.com/images/P/0002005018.0...http://images.amazon.com/images/P/0002005018.0...http://images.amazon.com/images/P/0002005018.0...
20060973129Decision in NormandyCarlo D'Este1991HarperPerennialhttp://images.amazon.com/images/P/0060973129.0...http://images.amazon.com/images/P/0060973129.0...http://images.amazon.com/images/P/0060973129.0...
30374157065Flu: The Story of the Great Influenza Pandemic...Gina Bari Kolata1999Farrar Straus Girouxhttp://images.amazon.com/images/P/0374157065.0...http://images.amazon.com/images/P/0374157065.0...http://images.amazon.com/images/P/0374157065.0...
40393045218The Mummies of UrumchiE. J. W. Barber1999W. W. Norton & Companyhttp://images.amazon.com/images/P/0393045218.0...http://images.amazon.com/images/P/0393045218.0...http://images.amazon.com/images/P/0393045218.0...
## url不需要分析,进行删除
books.drop(['imageUrlS', 'imageUrlM', 'imageUrlL'],axis=1,inplace=True)
books.head()
ISBNbookTitlebookAuthoryearOfPublicationpublisher
00195153448Classical MythologyMark P. O. Morford2002Oxford University Press
10002005018Clara CallanRichard Bruce Wright2001HarperFlamingo Canada
20060973129Decision in NormandyCarlo D'Este1991HarperPerennial
30374157065Flu: The Story of the Great Influenza Pandemic...Gina Bari Kolata1999Farrar Straus Giroux
40393045218The Mummies of UrumchiE. J. W. Barber1999W. W. Norton & Company
## books.dtypes
books.dtypes
ISBN                 object
bookTitle            object
bookAuthor           object
yearOfPublication    object
publisher            object
dtype: object
## 现在检查属性的唯一值
books.bookTitle.unique()
array(['Classical Mythology', 'Clara Callan', 'Decision in Normandy', ...,'Lily Dale : The True Story of the Town that Talks to the Dead',"Republic (World's Classics)","A Guided Tour of Rene Descartes' Meditations on First Philosophy with Complete Translations of the Meditations by Ronald Rubin"],dtype=object)
books.yearOfPublication.unique()
array(['2002', '2001', '1991', '1999', '2000', '1993', '1996', '1988','2004', '1998', '1994', '2003', '1997', '1983', '1979', '1995','1982', '1985', '1992', '1986', '1978', '1980', '1952', '1987','1990', '1981', '1989', '1984', '0', '1968', '1961', '1958','1974', '1976', '1971', '1977', '1975', '1965', '1941', '1970','1962', '1973', '1972', '1960', '1966', '1920', '1956', '1959','1953', '1951', '1942', '1963', '1964', '1969', '1954', '1950','1967', '2005', '1957', '1940', '1937', '1955', '1946', '1936','1930', '2011', '1925', '1948', '1943', '1947', '1945', '1923','2020', '1939', '1926', '1938', '2030', '1911', '1904', '1949','1932', '1928', '1929', '1927', '1931', '1914', '2050', '1934','1910', '1933', '1902', '1924', '1921', '1900', '2038', '2026','1944', '1917', '1901', '2010', '1908', '1906', '1935', '1806','2021', '2012', '2006', 'DK Publishing Inc', 'Gallimard', '1909','2008', '1378', '1919', '1922', '1897', '2024', '1376', '2037'],dtype=object)
books.loc[books.yearOfPublication == 'DK Publishing Inc',:]
books.yearOfPublication.unique()
array(['2002', '2001', '1991', '1999', '2000', '1993', '1996', '1988','2004', '1998', '1994', '2003', '1997', '1983', '1979', '1995','1982', '1985', '1992', '1986', '1978', '1980', '1952', '1987','1990', '1981', '1989', '1984', '0', '1968', '1961', '1958','1974', '1976', '1971', '1977', '1975', '1965', '1941', '1970','1962', '1973', '1972', '1960', '1966', '1920', '1956', '1959','1953', '1951', '1942', '1963', '1964', '1969', '1954', '1950','1967', '2005', '1957', '1940', '1937', '1955', '1946', '1936','1930', '2011', '1925', '1948', '1943', '1947', '1945', '1923','2020', '1939', '1926', '1938', '2030', '1911', '1904', '1949','1932', '1928', '1929', '1927', '1931', '1914', '2050', '1934','1910', '1933', '1902', '1924', '1921', '1900', '2038', '2026','1944', '1917', '1901', '2010', '1908', '1906', '1935', '1806','2021', '2012', '2006', 'DK Publishing Inc', 'Gallimard', '1909','2008', '1378', '1919', '1922', '1897', '2024', '1376', '2037'],dtype=object)
print(books.loc[books.yearOfPublication == 'DK Publishing Inc',:])
              ISBN                                          bookTitle  \
209538  078946697X  DK Readers: Creating the X-Men, How It All Beg...   
221678  0789466953  DK Readers: Creating the X-Men, How Comic Book...   bookAuthor  yearOfPublication  \
209538       2000  DK Publishing Inc   
221678       2000  DK Publishing Inc   publisher  
209538  http://images.amazon.com/images/P/078946697X.0...  
221678  http://images.amazon.com/images/P/0789466953.0...  
books.loc[books.yearOfPublication == 'DK Publishing Inc',:]
ISBNbookTitlebookAuthoryearOfPublicationpublisher
209538078946697XDK Readers: Creating the X-Men, How It All Beg...2000DK Publishing Inchttp://images.amazon.com/images/P/078946697X.0...
2216780789466953DK Readers: Creating the X-Men, How Comic Book...2000DK Publishing Inchttp://images.amazon.com/images/P/0789466953.0...
## 从上面可以看出,bookAuthor错误地装载了bookTitle,因此需要进行修正。
# ISBN '0789466953'
books.loc[books.ISBN == '0789466953','yearOfPublication'] = 2000
books.loc[books.ISBN == '0789466953','bookAuthor'] = "James Buckley"
books.loc[books.ISBN == '0789466953','publisher'] = "DK Publishing Inc"
books.loc[books.ISBN == '0789466953','bookTitle'] = "DK Readers: Creating the X-Men, How Comic Books Come to Life (Level 4: Proficient Readers)"#ISBN '078946697X'
books.loc[books.ISBN == '078946697X','yearOfPublication'] = 2000
books.loc[books.ISBN == '078946697X','bookAuthor'] = "Michael Teitelbaum"
books.loc[books.ISBN == '078946697X','publisher'] = "DK Publishing Inc"
books.loc[books.ISBN == '078946697X','bookTitle'] = "DK Readers: Creating the X-Men, How It All Began (Level 4: Proficient Readers)"
books.loc[(books.ISBN == '0789466953') | (books.ISBN == '078946697X'),:]
ISBNbookTitlebookAuthoryearOfPublicationpublisher
209538078946697XDK Readers: Creating the X-Men, How It All Beg...Michael Teitelbaum2000DK Publishing Inc
2216780789466953DK Readers: Creating the X-Men, How Comic Book...James Buckley2000DK Publishing Inc
## 继续纠正出版年鉴的类型
books.yearOfPublication=pd.to_numeric(books.yearOfPublication, errors='coerce')
sorted(books['yearOfPublication'].unique())
[0.0,1376.0,1378.0,1806.0,1897.0,1900.0,1901.0,1902.0,1904.0,1906.0,1908.0,1909.0,1910.0,1911.0,1914.0,1917.0,1919.0,1920.0,1921.0,1922.0,1923.0,1924.0,1925.0,1926.0,1927.0,1928.0,1929.0,1930.0,1931.0,1932.0,1933.0,1934.0,1935.0,1936.0,1937.0,1938.0,1939.0,1940.0,1941.0,1942.0,1943.0,1944.0,1945.0,1946.0,1947.0,1948.0,1949.0,1950.0,1951.0,1952.0,1953.0,1954.0,1955.0,1956.0,1957.0,1958.0,1959.0,1960.0,1961.0,1962.0,1963.0,1964.0,1965.0,1966.0,1967.0,1968.0,1969.0,1970.0,1971.0,1972.0,1973.0,1974.0,1975.0,1976.0,1977.0,1978.0,1979.0,1980.0,1981.0,1982.0,1983.0,1984.0,1985.0,1986.0,1987.0,1988.0,1989.0,1990.0,1991.0,1992.0,1993.0,1994.0,1995.0,1996.0,1997.0,1998.0,1999.0,2000.0,2001.0,2002.0,2003.0,2004.0,2005.0,2006.0,2008.0,2010.0,2011.0,2012.0,2020.0,2021.0,2024.0,2026.0,2030.0,2037.0,2038.0,2050.0,nan]
## 现在可以看出yearOfPublication的类型为int,其值范围为0-2050。## 由于该数据集建于2004年,我假设2006年之后的所有年份都无效,保留两年的保证金,以防数据集可能已更新。## 对于所有无效条目(包括0),我将这些条目转换为NaN,然后​​用剩余年份的平均值替换它们。
books.loc[(books.yearOfPublication > 2006) | (books.yearOfPublication == 0),'yearOfPublication'] = np.NAN
# 用年出版的平均价值代替NaNs在案例数据集被更新的情况下保留一定的空白
books.yearOfPublication.fillna(round(books.yearOfPublication.mean()), inplace=True)
books.yearOfPublication.isnull().sum()
0
books.yearOfPublication = books.yearOfPublication.astype(np.int32)
## publisher
books.loc[books.publisher.isnull(),:]
ISBNbookTitlebookAuthoryearOfPublicationpublisher
128890193169656XTyrant MoonElaine Corvidae2002NaN
1290371931696993Finders KeepersLinnea Sinclair2001NaN
## 检查行是否有书签作为查找器,看看我们是否能得到任何线索## 与不同的出版商和图书作者的所有行
books.loc[(books.bookTitle == 'Tyrant Moon'),:]
ISBNbookTitlebookAuthoryearOfPublicationpublisher
128890193169656XTyrant MoonElaine Corvidae2002NaN
books.loc[(books.bookTitle == 'Finders Keepers'),:]
ISBNbookTitlebookAuthoryearOfPublicationpublisher
10799082177364XFinders KeepersFern Michaels2002Zebra Books
420190070465037Finders KeepersBarbara Nickolae1989McGraw-Hill Companies
582640688118461Finders KeepersEmily Rodda1993Harpercollins Juvenile Books
666781575663236Finders KeepersFern Michaels1998Kensington Publishing Corporation
1290371931696993Finders KeepersLinnea Sinclair2001NaN
1343090156309505Finders KeepersWill1989Voyager Books
1734730973146907Finders KeepersSean M. Costello2002Red Tower Publications
1958850061083909Finders KeepersSharon Sala2003HarperTorch
2118740373261160Finders KeepersElizabeth Travis1993Worldwide Library
## 由图书作者检查以找到模式## 都有不同的出版商。这里没有线索
books.loc[(books.bookAuthor == 'Elaine Corvidae'),:]
ISBNbookTitlebookAuthoryearOfPublicationpublisher
1267621931696934Winter's OrphansElaine Corvidae2001Novelbooks
128890193169656XTyrant MoonElaine Corvidae2002NaN
1290010759901880WolfkinElaine Corvidae2001Hard Shell Word Factory
## 由图书作者检查以找到模式
books.loc[(books.bookAuthor == 'Linnea Sinclair'),:]
ISBNbookTitlebookAuthoryearOfPublicationpublisher
1290371931696993Finders KeepersLinnea Sinclair2001NaN
## 因为没有什么共同的东西可以推断出NaNs的发布者,将它们替换为“other”
books.loc[(books.ISBN == '193169656X'),'publisher'] = 'other'
books.loc[(books.ISBN == '1931696993'),'publisher'] = 'other'
## 二、用户数据集
print (users.shape)
users.head()
(278858, 3)
userIDLocationAge
01nyc, new york, usaNaN
12stockton, california, usa18.0
23moscow, yukon territory, russiaNaN
34porto, v.n.gaia, portugal17.0
45farnborough, hants, united kingdomNaN
users.dtypes
userID        int64
Location     object
Age         float64
dtype: object
users.userID.values
array([     1,      2,      3, ..., 278856, 278857, 278858], dtype=int64)
## Age 
sorted(users.Age.unique())
[nan,0.0,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0,11.0,12.0,13.0,14.0,15.0,16.0,17.0,18.0,19.0,20.0,21.0,22.0,23.0,24.0,25.0,26.0,27.0,28.0,29.0,30.0,31.0,32.0,33.0,34.0,35.0,36.0,37.0,38.0,39.0,40.0,41.0,42.0,43.0,44.0,45.0,46.0,47.0,48.0,49.0,50.0,51.0,52.0,53.0,54.0,55.0,56.0,57.0,58.0,59.0,60.0,61.0,62.0,63.0,64.0,65.0,66.0,67.0,68.0,69.0,70.0,71.0,72.0,73.0,74.0,75.0,76.0,77.0,78.0,79.0,80.0,81.0,82.0,83.0,84.0,85.0,86.0,87.0,88.0,89.0,90.0,91.0,92.0,93.0,94.0,95.0,96.0,97.0,98.0,99.0,100.0,101.0,102.0,103.0,104.0,105.0,106.0,107.0,108.0,109.0,110.0,111.0,113.0,114.0,115.0,116.0,118.0,119.0,123.0,124.0,127.0,128.0,132.0,133.0,136.0,137.0,138.0,140.0,141.0,143.0,146.0,147.0,148.0,151.0,152.0,156.0,157.0,159.0,162.0,168.0,172.0,175.0,183.0,186.0,189.0,199.0,200.0,201.0,204.0,207.0,208.0,209.0,210.0,212.0,219.0,220.0,223.0,226.0,228.0,229.0,230.0,231.0,237.0,239.0,244.0]
## 年龄栏有一些无效的条目,比如nan,0和非常高的值,比如100和以上
users.loc[(users.Age > 90) | (users.Age < 5), 'Age'] = np.nan
## 用平均值代替NaN
## 将数据类型设置为int
users.Age = users.Age.fillna(users.Age.mean())
users.Age = users.Age.astype(np.int32)
sorted(users.Age.unique())
[5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90]
## 三、评级数据集
ratings.shape
(1149780, 3)
## 如果每个用户对每个条目进行评级,那么评级数据集将有nusers * nbooks条目,这表明数据集非常稀疏。
n_users = users.shape[0]
n_books = books.shape[0]
print (n_users * n_books)
75670906880
ratings.head(5)
userIDISBNbookRating
0276725034545104X0
127672601550612245
227672704465208020
3276729052165615X3
427672905217950286
ratings.bookRating.unique()
array([ 0,  5,  3,  6,  8,  7, 10,  9,  4,  1,  2], dtype=int64)
ratings_new = ratings[ratings.ISBN.isin(books.ISBN)]
print (ratings.shape)
print (ratings_new.shape)
(1149780, 3)
(1031136, 3)
## 没有新用户添加,因此我们将使用高于数据集的新用户(1031136,3)
print ("number of users: " + str(n_users))
print ("number of books: " + str(n_books))
number of users: 278858
number of books: 271360
sparsity=1.0-len(ratings_new)/float(n_users*n_books)
print ('图书交叉数据集的稀疏级别是 ' +  str(sparsity*100) + ' %')
图书交叉数据集的稀疏级别是 99.99863734155898 %
ratings.bookRating.unique()
array([ 0,  5,  3,  6,  8,  7, 10,  9,  4,  1,  2], dtype=int64)
ratings_explicit = ratings_new[ratings_new.bookRating != 0]
ratings_implicit = ratings_new[ratings_new.bookRating == 0]
print (ratings_new.shape)
print( ratings_explicit.shape)
print (ratings_implicit.shape)
(1031136, 3)
(383842, 3)
(647294, 3)
## 统计
sns.countplot(data=ratings_explicit , x='bookRating')
plt.show()

在这里插入图片描述

## 基于简单流行度的推荐系统
ratings_count = pd.DataFrame(ratings_explicit.groupby(['ISBN'])['bookRating'].sum())
top10 = ratings_count.sort_values('bookRating', ascending = False).head(10)
print ("推荐下列书籍")
top10.merge(books, left_index = True, right_on = 'ISBN')
推荐下列书籍
bookRatingISBNbookTitlebookAuthoryearOfPublicationpublisher
40857870316666343The Lovely Bones: A NovelAlice Sebold2002Little, Brown
74841080385504209The Da Vinci CodeDan Brown2003Doubleday
52231340312195516The Red Tent (Bestselling Backlist)Anita Diamant1998Picador USA
21432798059035342XHarry Potter and the Sorcerer's Stone (Harry P...J. K. Rowling1999Arthur A. Levine Books
35625950142001740The Secret Life of BeesSue Monk Kidd2003Penguin Books
2625510971880107Wild AnimusRich Shapero2004Too Far
110525240060928336Divine Secrets of the Ya-Ya Sisterhood: A NovelRebecca Wells1997Perennial
70624020446672211Where the Heart Is (Oprah's Book Club (Paperba...Billie Letts1998Warner Books
23122190452282152Girl with a Pearl EarringTracy Chevalier2001Plume Books
11821790671027360Angels &amp; DemonsDan Brown2001Pocket Star
users_exp_ratings = users[users.userID.isin(ratings_explicit.userID)]
users_imp_ratings = users[users.userID.isin(ratings_implicit.userID)]
print (users.shape)
print (users_exp_ratings.shape)
print (users_imp_ratings.shape)
(278858, 3)
(68091, 3)
(52451, 3)
## 基于协同过滤的推荐系统
counts1 = ratings_explicit['userID'].value_counts()
ratings_explicit = ratings_explicit[ratings_explicit['userID'].isin(counts1[counts1 >= 100].index)]
counts = ratings_explicit['bookRating'].value_counts()
ratings_explicit = ratings_explicit[ratings_explicit['bookRating'].isin(counts[counts >= 100].index)]
ratings_matrix = ratings_explicit.pivot(index='userID', columns='ISBN', values='bookRating')
userID = ratings_matrix.index
ISBN = ratings_matrix.columns
print(ratings_matrix.shape)
ratings_matrix.head()
(449, 66574)
ISBN00009131540001046438000104687X00010472130001047973000104799X0001048082000105373600010537440001055607...B000092Q0AB00009EF82B00009NDANB0000DYXIDB0000T6KHIB0000VZEJQB0000X8HIEB00013AX9EB0001I1KOGB000234N3A
userID
2033NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
2110NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
2276NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
4017NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
4385NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN

5 rows × 66574 columns

n_users = ratings_matrix.shape[0] #只考虑那些给出明确评级的用户
n_books = ratings_matrix.shape[1]
print (n_users, n_books)
449 66574
ratings_matrix.fillna(0, inplace = True)
ratings_matrix = ratings_matrix.astype(np.int32)
ratings_matrix.head(5)
ISBN00009131540001046438000104687X00010472130001047973000104799X0001048082000105373600010537440001055607...B000092Q0AB00009EF82B00009NDANB0000DYXIDB0000T6KHIB0000VZEJQB0000X8HIEB00013AX9EB0001I1KOGB000234N3A
userID
20330000000000...0000000000
21100000000000...0000000000
22760000000000...0000000000
40170000000000...0000000000
43850000000000...0000000000

5 rows × 66574 columns

sparsity=1.0-len(ratings_explicit)/float(users_exp_ratings.shape[0]*n_books)
print ('图书交叉数据集的稀疏级别是 ' +  str(sparsity*100) + ' %')
图书交叉数据集的稀疏级别是 99.99772184106935 %
## 基于用户的协同过滤
global metric,k
k=10
metric='cosine'
def findksimilarusers(user_id, ratings, metric = metric, k=k):similarities=[]indices=[]model_knn = NearestNeighbors(metric = metric, algorithm = 'brute') model_knn.fit(ratings)loc = ratings.index.get_loc(user_id)distances, indices = model_knn.kneighbors(ratings.iloc[loc, :].values.reshape(1, -1), n_neighbors = k+1)similarities = 1-distances.flatten()return similarities,indices
def predict_userbased(user_id, item_id, ratings, metric = metric, k=k):prediction=0user_loc = ratings.index.get_loc(user_id)item_loc = ratings.columns.get_loc(item_id)similarities, indices=findksimilarusers(user_id, ratings,metric, k) #similar users based on cosine similaritymean_rating = ratings.iloc[user_loc,:].mean() #to adjust for zero based indexingsum_wt = np.sum(similarities)-1product=1wtd_sum = 0 for i in range(0, len(indices.flatten())):if indices.flatten()[i] == user_loc:continue;else: ratings_diff = ratings.iloc[indices.flatten()[i],item_loc]-np.mean(ratings.iloc[indices.flatten()[i],:])product = ratings_diff * (similarities[i])wtd_sum = wtd_sum + product#在非常稀疏的数据集的情况下,使用基于协作的方法的相关度量可能会给出负面的评价#在这里的处理如下if prediction <= 0:prediction = 1   elif prediction >10:prediction = 10prediction = int(round(mean_rating + (wtd_sum/sum_wt)))print ('用户预测等级 {0} -> item {1}: {2}'.format(user_id,item_id,prediction))return prediction
## 测试
predict_userbased(11676,'0001056107',ratings_matrix)
用户预测等级 11676 -> item 0001056107: 22
## 基于项目的协同过滤
def findksimilaritems(item_id, ratings, metric=metric, k=k):similarities=[]indices=[]ratings=ratings.Tloc = ratings.index.get_loc(item_id)model_knn = NearestNeighbors(metric = metric, algorithm = 'brute')model_knn.fit(ratings)distances, indices = model_knn.kneighbors(ratings.iloc[loc, :].values.reshape(1, -1), n_neighbors = k+1)similarities = 1-distances.flatten()return similarities,indices
def predict_itembased(user_id, item_id, ratings, metric = metric, k=k):prediction= wtd_sum =0user_loc = ratings.index.get_loc(user_id)item_loc = ratings.columns.get_loc(item_id)similarities, indices=findksimilaritems(item_id, ratings) #similar users based on correlation coefficientssum_wt = np.sum(similarities)-1product=1for i in range(0, len(indices.flatten())):if indices.flatten()[i] == item_loc:continue;else:product = ratings.iloc[user_loc,indices.flatten()[i]] * (similarities[i])wtd_sum = wtd_sum + product                              prediction = int(round(wtd_sum/sum_wt))#在非常稀疏的数据集的情况下,使用基于协作的方法的相关度量可能会给出负面的评价#在这里处理的是下面的//代码,没有下面的代码片段,下面的代码片段是为了避免负面影响#在使用相关度规时,可能会出现非常稀疏的数据集的预测if prediction <= 0:prediction = 1   elif prediction >10:prediction = 10print ('用户预测等级 {0} -> item {1}: {2}'.format(user_id,item_id,prediction)    )  return prediction
## 测试
prediction = predict_itembased(11676,'0001056107',ratings_matrix)
用户预测等级 11676 -> item 0001056107: 1

本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如若转载,请注明出处:http://www.hqwc.cn/news/87639.html

如若内容造成侵权/违法违规/事实不符,请联系编程知识网进行投诉反馈email:809451989@qq.com,一经查实,立即删除!

相关文章

AxureRP制作静态站点发布互联网,内网穿透实现公网访问

AxureRP制作静态站点发布互联网&#xff0c;内网穿透实现公网访问 文章目录 AxureRP制作静态站点发布互联网&#xff0c;内网穿透实现公网访问前言1.在AxureRP中生成HTML文件2.配置IIS服务3.添加防火墙安全策略4.使用cpolar内网穿透实现公网访问4.1 登录cpolar web ui管理界面4…

Android studio APK切换多个摄像头(Camera2)

1.先设置camera的权限 <uses-permission android:name"android.permission.CAMERA" /> 2.布局 <?xml version"1.0" encoding"utf-8"?> <LinearLayout xmlns:android"http://schemas.android.com/apk/res/android"and…

服务器部署前后端项目-SQL Father为例

hello~大家好哇&#xff0c;好久没更新博客了。现在来更新一波hhh 现在更新一下部署上的一些东西&#xff0c;因为其实有很多小伙伴跟我之前一样&#xff0c;很多时候只是开发了&#xff0c;本地前后端都能调通&#xff0c;也能用&#xff0c;但是没有部署到服务器试过&#x…

新SDK平台下载开源全志V853的SDK

获取SDK SDK 使用 Repo 工具管理&#xff0c;拉取 SDK 需要配置安装 Repo 工具。 Repo is a tool built on top of Git. Repo helps manage many Git repositories, does the uploads to revision control systems, and automates parts of the development workflow. Repo is…

用NeRFMeshing精确提取NeRF网络中的3D网格

准确的 3D 场景和对象重建对于机器人、摄影测量和 AR/VR 等各种应用至关重要。 NeRF 在合成新颖视图方面取得了成功&#xff0c;但在准确表示底层几何方面存在不足。 推荐&#xff1a;用 NSDT编辑器 快速搭建可编程3D场景 我们已经看到了最新的进展&#xff0c;例如 NVIDIA 的…

【算法与数据结构】404、LeetCode左叶子之和

文章目录 一、题目二、解法三、完整代码 所有的LeetCode题解索引&#xff0c;可以看这篇文章——【算法和数据结构】LeetCode题解。 一、题目 二、解法 思路分析&#xff1a;思路比较简单&#xff0c;遍历所有节点然后判断该节点是否为左叶子节点&#xff0c;如果是&#xff0c…

文件夹无法删除?简单3招,轻松解决问题!

“我电脑里有一个文件夹占用了很大的内存&#xff0c;我想将它删除来释放一些内存&#xff0c;但是根本没法删除&#xff0c;为什么会这样呢&#xff1f;文件夹无法删除应该怎么办呢&#xff1f;” 在日常电脑使用中&#xff0c;有时候会遇到文件夹无法删除的情况&#xff0c;这…

“互联网+”背景下燃气行业的数字化之路

文章来源&#xff1a;智慧美好生活 关键词&#xff1a;智慧燃气、智慧燃气场站、智慧燃气平台、设备设施数字化、数字孪生、工业互联网 近年来&#xff0c;随着互联网行业的发展&#xff0c;其影响力正在逐渐渗透到各个领域。在能源行业&#xff0c;各个互联网巨头与燃气企业…

STM32 CubeMX (H750)RGB屏幕 LTDC

STM32 CubeMX STM32 RGB888 LTDC STM32 CubeMX一、STM32 CubeMX 设置时钟树LTDC使能设置屏幕参数修改RGB888的GPIO 二、代码部分效果 RGB屏幕线束定义&#xff1a; 一、STM32 CubeMX 设置 时钟树 这里设置的时钟&#xff0c;关于刷新速度 举例子&#xff1a;LCD_CLK24MHz 时…

网易新财报:游戏稳、有道进、云音乐正爬坡

今年以来&#xff0c;AI大模型的火热程度屡屡攀升&#xff0c;越来越多的企业都加入到了AI大模型的赛场中&#xff0c;纷纷下场布局。而在众多参与者中&#xff0c;互联网企业的身影更是频频浮现&#xff0c;比如&#xff0c;百度、阿里巴巴、腾讯等等。值得一提的是&#xff0…

自然语言处理(三):基于跳元模型的word2vec实现

跳元模型 回顾一下第一节讲过的跳元模型 跳元模型&#xff08;Skip-gram Model&#xff09;是一种用于学习词向量的模型&#xff0c;属于Word2Vec算法中的一种。它的目标是通过给定一个中心词语来预测其周围的上下文词语。 这节我们以跳元模型为例&#xff0c;讲解word2vec的…

改进YOLO系列:6.添加ECA注意力机制

添加ECA注意力机制 1. ECA注意力机制论文2. ECA注意力机制原理3. ECA注意力机制的配置3.1common.py配置3.2yolo.py配置3.3yaml文件配置1. ECA注意力机制论文 论文题目:ECA-Net: Efficient Channel Attention for Deep Convolutional Neural Networks 论文链接:ECA-N…