首先我们进入华为官方旗舰店,点击Mate60,得到:
找到存放评论的接口网址:
然后使用cookie模拟登录,得到字典筛选出我们想要的内容。
爬取1000条评论
同样可以对任意商品进行操作,得到16款手机共计16000条评论。
完整代码如下:
# 导包
import requests
import time
import random# 获取评论
def get_content(page):# 目标网址url = 'https://api.m.jd.com/?appid=item-v3&functionId=pc_club_productPageComments&client=pc&clientVersion=1.0.0&t=1708081217616&loginType=3&uuid=181111935.17080674671511977691958.1708067467.1708067467.1708071360.2&productId=100077414769&score=0&sortType=5&page='+str(page)+'&pageSize=10&isShadowSku=0&fold=1&bbtf=&shield='# 请求头headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36','cookie':'__jdv=76161171|cn.bing.com|-|referral|-|1708067467152; __jdu=17080674671511977691958; areaId=18; shshshfpa=f79ca8f0-dbeb-483a-65d1-1903e4d092d7-1708067469; shshshfpx=f79ca8f0-dbeb-483a-65d1-1903e4d092d7-1708067469; pinId=KwTmTjb4JkIRQRTFN102vw; pin=jd_TbEqoALcQroG; unick=jd_TbEqoALcQroG; _tp=ymdIo7YzbPdf3lcwvjfR%2Bw%3D%3D; _pst=jd_TbEqoALcQroG; jsavif=1; user-key=6e806906-3180-4c42-a5e2-e31a1194ae77; ipLoc-djd=18-1511-29459-31137; cn=1; TrackID=1DA2qdWl8nrh1WmgmQF2S1usFqrqC78TDQvYzcjd8AZ4nbOCNjQbMxJU_1U9MaAgd49EQVTanG262mStqNYe5GK7crEh7KjvjwYq7sjviH7s; thor=4DF93E3A971F99490C488635E7F8D589A8196F54231ECCDB94DE8327335167EB93F82BED3BAFAA1381CFB2F61F2B9491709195640404A09F26521656E4AD10C5D60CA5E579D00521EE9F94F7B4F2FEC71D1F6D243FED2A6492E4157754799CFD69F1143F088E8187CDDD17156C425AB722FAC07C1593BD5CA30078A1A86B8F4D812FE7C612C99FA6BED546A10B210F57A09F60E3C6A4D8EBD3E43ED3FF0E2C6F; flash=2_up4fAwgFa9_MyKNlF4cN75sYhl63GvemBVDO6vAuyiI5HlAz40IWc60iy9d2TjDrgCpKEIeoPkjQTtvlWiokjxPStyb_uzTBbT9R3VsYTdq*; ceshi3.com=000; token=3ef9d1eaecaba20757d307e8f6496c8c,3,948931; __tk=nZbwWciMVxOOVMP0R0kzRUeCVxR0VxzzVMnKWZzzV0bwWlAFRlROiX,3,948931; __jda=181111935.17080674671511977691958.1708067467.1708067467.1708071360.2; __jdc=181111935; 3AB9D23F7A4B3C9B=U5DLBKPWR7TZKL3D7XJZK3O2MYXSUJLW6I36XE4JBMU2Y3IG3C5RK2XG2PWQ7RV6NISS6NMHNWOW4KGDAMCFJYYMKQ; 3AB9D23F7A4B3CSS=jdd03U5DLBKPWR7TZKL3D7XJZK3O2MYXSUJLW6I36XE4JBMU2Y3IG3C5RK2XG2PWQ7RV6NISS6NMHNWOW4KGDAMCFJYYMKQAAAAMNWFHPIOYAAAAACTP4Q7G4XZEE7EX; _gia_d=1; shshshsID=37a146fdd9c1b8a7af5bc40153a38575_34_1708076765078; __jdb=181111935.39.17080674671511977691958|2.1708071360; shshshfpb=BApXeM-dHsuhAagbgKaymjtcYcHRfFiMNBkpHc0Zf9xJ1Mhox0oO2','referer': 'https://item.jd.com/'}# 发生请求并获取json数据resp = requests.get(url,headers=headers).json()# 获取评论内容并保存for comment in resp['comments']:# 将评论内容里的换行符剔除content = comment['content'].replace('\n','')print(content)f.flush()f.write(content)f.write('\n')print(f'============================第{page+1}页爬取完毕===============================')if __name__ == '__main__':product= input('请输入要爬取的商品:')page_number = int(input('请输入要爬取的页数:'))with open(f'JD_comment_{product}.txt','a',encoding='utf-8')as f:for page in range(page_number):try:get_content(page)time.sleep(1+random.random())except:breakprint(f'爬虫程序已结束!评论内容请在同目录下的 JD_comment_{product}.txt 查看!')
请大家关注一下我的公众号。