- #!/usr/bin/env python
- #! -*- coding: utf-8 -*-
- #图片地址样例:src="http://ww2.sinaimg.cn/large/005Yan1vjw1erf95qkbfog307e08uu0y.gif" style="width:460px"
- import urllib,urllib2
- import re
- #返回网页源代码
- def gethtml(url, page):
- tempUrl = url + str(page)
- print tempUrl
- html = urllib2.urlopen(tempUrl)
- srcCode = html.read()
- return srcCode
- def getImg(url, start_page, end_page):
- #对网页中图片建立正则
- pattern = re.compile(r'src="(.*?\.gif)".*?style="width:460px"')
- for x in xrange(start_page, end_page+1):
- srcCode = getHtml(url,x)
- #图片完整路径存储为list
- imgSrc = pattern.findall(srcCode)
- num = 0
- for i in imgSrc:
- urllib.urlretrieve(i,'P%s0%s.gif' %(x ,num))
- num += 1
- print "正则下载"
- print i
- print '全部任务完成!'
- #----------- 程序的入口处 -----------
- print u"""
- 程序开始运行
- #=========================================================
- # 程序:暴漫爬虫
- # 版本:V0.1
- # 作者:江前云后
- # 语言:Python 2.7
- # 操作:输入要下载的暴漫gif的页码范围,自动下载所有gif图
- #=========================================================
- """
- #测试url
- myUrl = 'http://baozoumanhua.com/gif/hot/page/'
- start_page = int(raw_input(u"输入起始页\n"))
- end_page = int(raw_input(u"输入结束页\n"))
- getImg(myUrl, start_page, end_page)
来源: http://www.phpxs.com/code/1005119/