python 爬虫
- from bs4 import BeautifulSoup
- import requests
- url=http://www.baidu.com/
- wb_data = requests.get(url)
- soup = BeautifulSoup(wb_data.text,lxml)
- titles = soup.select(div.property_title> a[target="_blank"])
- imgs = soup.select(img[width="160"])
- cates = soup.select(div.pl3n_reasoning_v2)
- for title,img,cate in zip(titles,imgs,cates):
- data = {
- title:title.get_text(),
- img:img.get(src),
- cate:list(cate.stripped_strings),
- }
- print(data)
来源: http://www.bubuko.com/infodetail-2544820.html