我们现在具体讲解一下主要功能的实现
我们先定义一个接口类 名称:MyObject 主要定义需要实现的方法定义
代码如下:
class MyObject(object):start_urls: Optional[list] = Nonename: str = Nonekeywords = Falsecookies : dict = Noneencode :str = Nonetimeout = 30method = "GET"params = Nonedata = Nonefiles = Nonejson = Noneverify = Falseproxies = Noneheaders = {}_key_url = set()def to_excel(self):'''call interface must be implemented ;'''raise NotImplemented(f"{sys._getframe().f_code.co_name} must be overwrite")def to_database(self):'''call interface must be implemented ;'''raise NotImplemented(f"{sys._getframe().f_code.co_name} must be overwrite")def follow(self, url=None, callback=None, cb_kwargs=None, rq_kwargs=None):raise NotImplemented(f"{sys._getframe().f_code.co_name.co_name} must be overwrite")def append(self, excel_item):raise NotImplemented(f"{sys._getframe().f_code.co_name.co_name} must be overwrite")
接下来实现一下myrequest 类 主要实现 http的请求
实现代码如下:
from config import setting
import requests
from common.log import loger
from common.myobject import MyObject
import timeclass MyRequest(MyObject):_request = requests.Session()@classmethoddef request(self, url, retry=3, **kwargs):''':param retry-> times of request:param **kwargs -> parameter of requests.request; default value is property of Myobject'''if kwargs.get("headers") is None:if self.cookies:setting.DEFAULT_REQUEST_HEADERS.setdefault('Cookie',self.cookies)kwargs.setdefault("headers",setting.DEFAULT_REQUEST_HEADERS)self.headers=setting.DEFAULT_REQUEST_HEADERSelse:self.headers = kwargs.get('headers')if kwargs.get('method') :self.method = kwargs.get('method')if self.method.lower() == "post":if kwargs.get("type") == 1:self.headers.setdefault("X-Requested-With","XMLHttpRequest")self.headers.setdefault("Content-Type","application/x-www-form-urlencoded")if kwargs.get("type") == 2:self.headers.setdefault("Content-Type", "application/json")if kwargs.get('timeout') :self.timeout = kwargs.get('timeout')if kwargs.get('params'):self.params = kwargs.get('params')if kwargs.get('data'):self.data = kwargs.get('data')if kwargs.get('files'):self.files = kwargs.get('files')if kwargs.get('json'):self.json = kwargs.get('json')if kwargs.get('cookies'):self.cookies = kwargs.get('cookies')if kwargs.get('proxies') :self.proxies = kwargs.get('proxies')try:response = self._request.request(url=url,method=self.method,timeout=self.timeout,params=self.params,data=self.data,files=self.files,json=self.json,cookies=self.cookies,proxies=self.proxies,headers=self.headers ,verify=self.verify)if response is None:raise ValueError("response is none!!!")encoding = response.apparent_encodingif encoding is None:encoding = "utf-8"if self.encode is not None :encoding = self.encodecontent = response.content.decode(encoding,"ignore")self._key_url.add(url)return response,contentexcept Exception as e:loger.warn('request error:%s' % e)if retry > 0 :time.sleep(retry)return self.request(url, retry - 1, **kwargs)else:return None