import requests
from lxml import etree
class TianJiPlc:
def init(self):
self.url_home_page=“https://wap.yesky.com/pic/”
self.headers={‘User-Agent’: ‘Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Safari/537.36’}
def requests_get(self,url):req=requests.get(url=url,headers=self.headers) return req.textdef requests_get_plc(self,plc_url):for url in plc_url:req_plc=requests.get(url=url,headers=self.headers)return req_plc.content def lxml_etre(self,html_text,xpath):html=etree.HTML(html_text)list_html=html.xpath(xpath)return list_htmldef url_url(self,url_):list_url=[]http="https:"for i in url_:url_urls=http+ilist_url.append(url_urls)return list_url def dict_name(self,list_name,list_html):dict_type=dict(zip(list_name,list_html))return dict_typedef plc_files(self,plc_url_list,content):for plc_url in plc_url_list:name_plc=plc_url.split('/')[-1]with open(f"/storage/emulated/0/爬虫相册/{name_plc}","wb") as f:f.write(content)print("下载完成:",len(plc_url_list)) def one_url(self):req=self.requests_get(self.url_home_page)#print(req)nu='//div/div/ul/li//a/@href'list_nu=set(self.lxml_etre(req,nu))list_plc=self.url_url(list_nu)return list_plc
tianji=TianJiPlc()
one=tianji.one_url()
print(one,len(one))
for i in one:
two=tianji.requests_get(i)
src=’//div/ul/li/img/@data-src’
src_list=tianji.lxml_etre(two,src)
print(src_list)
cont=tianji.requests_get_plc(src_list)
tianji.plc_files(src_list,cont)
hre='//ul[@class="swiper-slide"]/li/a/img/@src'
list_hre=tianji.lxml_etre(two,hre)co=tianji.requests_get_plc(list_hre)
tianji.plc_files(list_hre,co)