1. 背景信息
爬取安居客二手房源信息
URL地址:https://wuhan.anjuke.com/sale/?from=navigation
2. 代码实现
import requests
from lxml import etreeif __name__ == '__main__':# 1.指定URLurl = "https://wuhan.anjuke.com/sale/?from=navigation"# 2.UA伪装(模拟浏览器)headers = {'User-Agent' : 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36'}# 3.发起请求并获取响应内容page_text = requests.get(url=url,headers=headers).text# print(page_text)# 4.使用etree进行数据解析tree = etree.HTML(page_text)# 5.存储div标签的对象div_list = tree.xpath('//section[@class="list"]/div')# print(div_list)fp = open('58.txt','w',encoding='utf-8')for div in div_list:# 局部解析title = div.xpath('./a/div[2]/div[1]/div[1]/h3/text()')[0]print(title)fp.write(title+'\n')