- from sgmllib import SGMLParser
- import urllib2
- class sgm(SGMLParser):
- def reset(self):
- SGMLParser.reset(self)
- self.srcs=[]
- self.ISTRUE=True
- def start_div(self,artts):
- for k,v in artts:
- if v=="author":
- self.ISTRUE=False
- def end_div(self):
- self.ISTRUE=True
- def start_img(self,artts):
- for k,v in artts:
- if k=="src" and self.ISTRUE==True:
- self.srcs.append(v)
- def download(self):
- for src in self.srcs:
- f=open(src[-12:],"wb")
- print src
- img=urllib2.urlopen(src)
- f.write(img.read())
- f.close()
- sgm=sgm()
- for page in range(1,500):
- url="http://www.qiushibaike.com/late/page/%s?s=4622726" % page
- data=urllib2.urlopen(url).read()
- sgm.feed(data)
- sgm.download()
- #该片段来自于http://www.codesnippet.cn/detail/181220138126.html
来源: http://www.codesnippet.cn/detail/181220138126.html