因为崔前辈给出的代码运行有误, 略作修改和简化了.
书上例题, 不做介绍.
- import requests
- from lxml import etree
- class Login(object):
- def __init__(self):
- self.headers = {
- 'Referer': 'https://github.com/',
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) ApplewebKit/537.36 (Khtml, like Gecko) Chrome/57.0.2987.133 Safari/537.36',
- 'Host': 'github.com'
- }
- #登陆地址
- self.login_url = 'https://github.com/login'
- #POST 请求地址
- self.post_url = 'https://github.com/session'
- #使用 session 保持状态, 并自动处理 Cookies(在访问其他子网页时, 可以保持登陆, 爬取网页)
- self.session = requests.Session()
- def token(self):
- #获取网页数据
- response = self.session.get(self.login_url, headers=self.headers)
- #提取网页中我们需要的 authenticity_token 并返回
- selector = etree.HTML(response.text)
- token = selector.xpath('//input[@name="authenticity_token"]/@value')
- return token
- def login(self, email, password):
- post_data = {
- 'commit': 'Sign in',
- 'utf8': '?',
- 'authenticity_token': self.token(),
- 'login': email,
- 'password': password
- }
- #使用 post 方法模拟登陆
- response = self.session.post(self.post_url, data=post_data, headers=self.headers)
- #登陆正常, 输出登陆后的网页代码, 并将它存储带 D 盘 GitHub.txt
- if response.status_code == 200:
- print(response.text)
- with open('D:/github.txt', 'w', encoding = 'utf-8') as f:
- f.write(response.text)
- else:
- print("Error!!!")
- if __name__ == "__main__":
- login = Login()
- login.login(email='[email protected]', password='password')# 输入你自己的账户密码
可以改成网页形式查看
来源: http://www.bubuko.com/infodetail-3043149.html