alias pass spa .post windows chrome apr ror
- import requests
- frombs4import BeautifulSoup
- fromPILimport Image
- headers = {
- 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) ApplewebKit/537.36 (Khtml, like Gecko) Chrome/57.0.2987.133 Safari/537.36',
- 'Referer':'https://accounts.douban.com/login?alias=&redir=https://www.douban.com/&source=index_nav&error=1001'
- }
- session = requests.Session()
- session.headers.update(headers)
- username = input('请输入你的用户名:')
- password = input('请输入你的密码:')
- url ='https://accounts.douban.com/login'
- deflogin(username,password,source='index_nav',redir='https://www.douban.com/',login='登录'):#模拟登入函数caprcha_id,caprcha_link = get_captcha(url)#把get_captcha函数返回的值
- ifcaprcha_id:#如果有caprcha_id,就执行解析caprcha_link网页信息,并把图片保存下来打开img_html = session.get(caprcha_link)
- with open('caprcha.jpg','wb') as f:
- f.write(img_html.content)
- try:
- im = Image.open('caprcha.jpg')
- im.show()
- im.close()
- except:
- print('打开错误')
- caprcha = input('请输入验证码:')#把看到的验证码图片输入进去data = {#需要传去的数据
- 'source':source,
- 'redir':redir,
- 'form_email':username,
- 'form_password':password,
- 'login':login,
- }
- ifcaprcha_id:#如果需要验证码就把下面的两个数据加入到data里面data['captcha-id'] = caprcha_id
- data['captcha-solution'] = caprcha
- html = session.post(url,data=data,headers=headers)
- print(session.cookies.items())
- defget_captcha(url):#解析登入界面,获取caprcha_id和caprcha_linkhtml = requests.get(url)
- soup = BeautifulSoup(html.text,'lxml')
- caprcha_link = soup.select('#captcha_image')[0]['src']
- #lzform > div.item.item-captcha > div > div > input[type="hidden"]:nth-child(3)caprcha_id = soup.select('div.captcha_block > input')[1]['value']
- return caprcha_id,caprcha_link
- login(username,password)
- login_url ='https://www.douban.com/group/'
- xiaozu_html = session.get(login_url)
- soup = BeautifulSoup(xiaozu_html.text,'lxml')
- #content > div > div.article > div.topics > table > tbody > tr:nth-child(1) > td.td-subject > atitles = soup.select('tr.pl > td.td-subject > a.title')
- fortitlein titles:
- print(title['href'],title.string)
Python 模拟登入豆瓣网,并爬取小组信息
来源: http://www.bubuko.com/infodetail-2055206.html