ont toolbar req all sts pen 51cto 小程序 eve
爬虫小程序,获取主网页的内容,并获取在该主网页内容下的连接
python 爬虫小程序
#coding:utf-8
import re
import requests
url='http://ai.51cto.com/'
con=requests.get(url)
file=open(r'D:\Python27\sevenot_test\curbug3\test.txt','wb')
file.write(con.content)
file.close()
href=re.findall('<a href="(http.*?)"',con.content,re.S)
a=0
for i in href:
print str(a)+' '+i
cc=requests.get(i)
file_=open(r'D:\Python27\sevenot_test\curbug3\test'+ str(a)+'.txt','wb')
file_.write(cc.content)
file_.close()
a+=1
来源: http://www.bubuko.com/infodetail-2464425.html