下载图片
- import requests
- response = requests.get('http://www.51gis.com.cn/static/upload/3e223daf9df6216f/f3e187dfc0e4143a.jpg')
- with open('51gis.jpg', 'wb') as f:
- f.write(response.content)
- ==================================
获得天气
- import requests
- def getWeather(city):
- r = requests.get('http://wthrcdn.etouch.cn/weather_mini?city='+city)
- data = r.JSON()['data']['forecast'][0]
- return '%s:%s,%s'%(city,data['low'],data['high'])
- print(getWeather('北京'))
- import requests
- class TiebaSpider(object):
- def __init__(self, tieba_name):
- self.tieba_name = tieba_name
- self.url_temp = "http://tieba.baidu.com/f?kw=" + tieba_name + "&ie=utf-8&pn={}"
- self.headers = {
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) ApplewebKit/537.36 (Khtml, like Gecko) Chrome/77.0.3865.75 Safari/537.36'
- }
- def parse_url(self, url):
- #'''访问 url'''
- response = requests.get(url, headers=self.headers)
- return response.text
- def save_html(self, url_html, page_num):
- '''保存页面'''
- file_path = "《{}》- 第 {} 页".format(self.tieba_name, page_num)
- with open(file_path + '.htm', 'w') as f:
- f.write(url_html)
- def get_url_list(self):
- '''构造 url 列表'''
- # 方法 1
- url_list = []
- for i in range(10):
- url_list.append(self.url_temp.format(i * 50))
- return url_list
- # 方法 2
- #return [self.url_temp.format(i * 50) for i in range(10)]
- def run(self):
- '''主要逻辑'''
- # 1 构造 url
- url_list = self.get_url_list()
- # 2 访问 url
- for url in url_list:
- url_html = self.parse_url(url)
- # 3 保存
- page_num = url_list.index(url) + 1 # 获取页码
- self.save_html(url_html, page_num)
- if __name__ == '__main__':
- name = input('请输入你想要爬取的论坛名称:')
- tb_spider = TiebaSpider(name)
- tb_spider.run()
来源: http://www.bubuko.com/infodetail-3412692.html