urllib.request.urlopen()源代码——urlopen()在干什么
返回opener.open(url, data, timeout)方法的结果
_opener = None # _opener被赋值为None
def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,*, cafile=None, capath=None, cadefault=False, context=None):global _openerif cafile or capath or cadefault:import warningswarnings.warn("cafile, capath and cadefault are deprecated, use a ""custom context instead.", DeprecationWarning, 2)if context is not None:raise ValueError("You can't pass both context and any of cafile, capath, and ""cadefault")if not _have_ssl:raise ValueError('SSL support not available')context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH,cafile=cafile,capath=capath)# send ALPN extension to indicate HTTP/1.1 protocolcontext.set_alpn_protocols(['http/1.1'])https_handler = HTTPSHandler(context=context)opener = build_opener(https_handler)elif context:https_handler = HTTPSHandler(context=context)opener = build_opener(https_handler)elif _opener is None: # 默认情况下会调用build_opener()_opener = opener = build_opener()else:opener = _openerreturn opener.open(url, data, timeout)
urllib.request.build_opener()
build_opener()函数返回的是OpenerDirector类的实例
def build_opener(*handlers):opener = OpenerDirector() # 创建OpenerDirector类的实例openerdefault_classes = [ProxyHandler, UnknownHandler, HTTPHandler,HTTPDefaultErrorHandler, HTTPRedirectHandler,FTPHandler, FileHandler, HTTPErrorProcessor,DataHandler]if hasattr(http.client, "HTTPSConnection"):default_classes.append(HTTPSHandler)skip = set()for klass in default_classes:for check in handlers:if isinstance(check, type):if issubclass(check, klass):skip.add(klass)elif isinstance(check, klass):skip.add(klass)for klass in skip:default_classes.remove(klass)for klass in default_classes:opener.add_handler(klass())for h in handlers:if isinstance(h, type):h = h()opener.add_handler(h)return opener # 返回实例opener
urllib.request.OpenerDirector().open()
由Class OpenerDirector
得OpenerDirector
为一个类。由上面的代码opener = build_opener()
得,opener为OpenerDirector类的一个实例即根据OpenerDirector类创建的对象(build_opener()将创建的实例赋值给opener)。
所以urlopen返回的结果是opener.open()方法的结果——响应(response),而opener.open()是一个更底层的方法,它允许自定义opener对象发送特定的请求,获取响应结果。
自定义opener对象发送请求(添加网络代理Proxy)
在build_opener()函数的参数中添加一个或多个处理程序(handlers)
from urllib.request import Request, ProxyHandler, build_opener
# 设置访问地址
url = 'http://httpbin/get'
# 设置请求对象
req = Request(url)
# 构建可以使用代理的服务器
# ProxyHandler({'type': 'ip: port'})
handler = ProxyHandler()
# 构建一个opener对象
opener = build_opener(handler)
# 发送请求
resp = opener.open(req)
# 打印结果
print(resp.read().decode())
使用OpenerDirector类的add_handler()方法
爬虫设置代理就是让别的服务器代替自己的服务器去获取数据。
代理分类
代理网站
小象代理
快代理
云代理
66ip代理
站大爷
开心代理
讯代理