post请求案例

# _*_ coding: utf-8 _*_
import requests
import sys
import warnings
import re
import urllib
warnings.filterwarnings('ignore')# 消除警告信息
 
reload(sys)
sys.setdefaultencoding('utf8')# 更改环境编码。
'''
简化后
proxies={'http':'http://username::8080',
         'https': 'https://username::8080'}
url='https://www.telegeography_provider-profiles.com/products/global-internet-geography/provider-rankings/provider-profiles/index.html'
base_url='https://www.telegeography_provider-profiles.com/login/login'
data={
'username':'username',
'password':'password',
'service':'https://www.telegeography_provider-profiles.com/products/global-internet-geography/provider-rankings/provider-profiles/index.html'
}
req=requests.get(base_url,proxies=proxies,verify=False).content
pattern=re.compile('(.*?)',re.S)
a=re.findall(pattern,req)[0]
data['lt']=re.findall('',a,re.S)[0]
s=requests.session()
re=s.post(base_url,data=data,proxies=proxies,verify=False)
res=s.get(url,proxies=proxies,verify=False).content
print res
'''
proxies = {'http':'http://username::8080',
           'https':'https://username::8080'}
data = dict(username='username', password='password')
base_url ='https://www.telegeography_provider-profiles.com/products/global-internet-geography/provider-rankings/provider-profiles/index.html'
s = requests.session()
# 请求初始网页， 获取登陆所需要的字段。包括post的数据和登陆的时候的url.base_res = requests.get(base_url, proxies=proxies, verify=False)
data['lt'] = re.findall(r'"([A-Z]{2}-\w.+?)" ', base_res.content)[0]
#无脑正则，取form下的input须先去form下的内容，注意空格对()的影响
# pattern=re.compile('<td id="submit-container">(.*?)</td>',re.S)
# d= re.findall(pattern, base_res.content)[0]
# data['lt']=re.findall(r'<input type="hidden" name="lt" value="(.*?)" id="lt"/>',d,re.S)[0]#注意标签结束的斜杠data['service'] = base_res.url.split('?service=')[1]
# data['service'] ='https://www.telegeography.com/products/global-internet-geography/provider-rankings/provider-profiles/index.html'login_url = base_res.url.split('?service=')[0]
# login_url = 'https://www.telegeography.com/login/login'
# 由于redirect会自动加上域，所以禁止redirect，allow_redirects=False。随后自己构造redirect
# redirect的地址在之前禁止redirect那次请求的header里面的Location字段。
#post的request url实际为登录页面action，而session可自动处理cookies信息login_res = s.post(login_url, data=data, proxies=proxies, verify=False, allow_redirects=False)#allow_redirects,定向正确，则不需要
printlogin_res.headers['Location']
'''
不要以下代码也能实现
# redir_url = urllib.unquote(login_res.headers['Location'])  # unquote只是把url decode回基本字串，而不是url编码过的。
# redir_res = s.get(redir_url, proxies=proxies, verify=False, allow_redirects=False)
# print redir_res
'''
# 现在这个登陆状态s就可以直接取GET任何一个你要的网页了。
# 得到的response就是GET的响应，同样用content来获取source
# from lxml import etree
# req=s.get(base_url,proxies=proxies, verify=False).content
# html=etree.HTML(req)
# urls=html.xpath('//*[@id="content"]/table/tbody/tr/td[1])
# print urls
# for i in urls:
#     url='https://www.telegeography.com/products/global-internet-geography/provider-rankings/provider-profiles/'+i
#     html=etree.HTML(s.get(url,proxies=proxies, verify=False).content)
#     arrays=html.xpath('//*[@id="content"]/div[2]/div[@class="broadband-customers landscape wide"]/table/tbody/tr')
#     print arrays
# print 'True International Gateway (TIG)' in base_html
# print base_res.url
# print base_res.cookies
# print base_res.headers
# print 'XO Communications' in base_html
# print '1299, 3301, 5518, 1759, 3308' in base_html
# print 'Bosnia-Herzegovina' in base_html
来源:
与本文相关文章

暂无,快来抢沙发吧！