- # !/usr/bin/python
- # -*- coding: UTF-8 -*-
- import re
- import urllib2
- #charset 编码转换
- def mdcode( str, encoding='utf-8' ):
- if isinstance(str, unicode):
- return str.encode(encoding)
- for c in ('utf-8', 'gbk', 'gb2312','gb18030','utf-16'):
- try:
- if encoding == 'unicode':
- return str.decode(c)
- else:
- return str.decode(c).encode( encoding )
- except:
- pass
- raise 'Unknown charset'
- #下载mp3文件,不支持断点续传
- def downmp3(url,name):
- res=urllib2.urlopen(url)
- open(mdcode(name,'gbk'),"wb").write(res.read())
- #请求html文件
- def gethtml(url):
- context = urllib2.urlopen(url).read()
- return mdcode(context)
- #通过正则,提取MP3的下载地址,以及mp3的演唱者
- def feedmp3url(data):
- mp3url = re.findall(r'''<input(\\s*)(.*?)(\\s*)downlink(\\s*)=(\\s*)([\\"\\s]*)([^\\"\\']+?)([\\"\\s]+)(.*?)id=\\"bit128\\"(.*?)>''' ,data,re.S|re.I)
- #print result
- author = re.findall(r'''<span class=\\"author_list\\" title(\\s*)=(\\s*)([\\"\\s]*)([^\\"\\']+?)([\\"\\s]+)(\\s*)>''',data,re.S|re.I)
- return mp3url[0][6], author[0][3]
- #通过正则,提取每期的MP3列表
- def feedurllist(data):
- urls = []
- url = re.findall(r'''<a(\\s*)(.*?)(\\s*)href(\\s*)=(\\s*)([\\"\\s]*)([^\\"\\']+?)([\\"\\s]+)(\\s*)title(\\s*)=(\\s*)([\\"\\s]*)([^\\"\\']+?)([\\"\\s]+)(\\s*)>''' ,data,re.S|re.I)
- #print url
- for u in url:
- #print u[6],u[12]
- mp3url, author=feedmp3url(gethtml('http://ting.baidu.com'+u[6]+'/download'))
- urls.append([mp3url, u[12], author])
- print mdcode(u[12]+'-'+author,'gbk')
- return urls
- #主函数,传入好声音各期的页面url
- def main(urls):
- url = []
- for u in urls:
- url += feedurllist(gethtml(u))
- return url
- if __name__ == '__main__':
- #context = urllib2.urlopen('http://ting.baidu.com/album/23149328').read()
- #print context
- all = ['http://ting.baidu.com/album/23149328',
- 'http://ting.baidu.com/album/23150394',
- 'http://ting.baidu.com/album/23150523',
- 'http://ting.baidu.com/album/23152500',
- 'http://ting.baidu.com/album/23152435',
- 'http://ting.baidu.com/album/23151786',
- 'http://ting.baidu.com/album/23160050',
- 'http://ting.baidu.com/album/23364352',
- 'http://ting.baidu.com/album/23528761',
- 'http://ting.baidu.com/album/24493381' ]
- #urls = feedurllist(gethtml('http://ting.baidu.com/album/23149328'))
- mp3=feedmp3url(gethtml('http://ting.baidu.com'+'/song/23528758'+'/download'))
- urls = main(all)
- print "Start down mp3 ..."
- for url in urls:
- mp3name=mdcode(url[1]+'-'+url[2]+'.mp3','gbk')
- print mp3name,"Down ..."
- downmp3("http://ting.baidu.com"+url[0],mp3name)
- #该片段来自于http://www.codesnippet.cn/detail/170620134070.html
来源: http://www.codesnippet.cn/detail/170620134070.html