好声音mp3下载

 
# !/usr/bin/python
# -*- coding: UTF-8 -*-
 
import re
import urllib2
 
#charset 编码转换
def mdcode( str, encoding='utf-8' ):
    if isinstance(str, unicode):
        return str.encode(encoding)
 
    for c in ('utf-8', 'gbk', 'gb2312','gb18030','utf-16'):
        try:
            if encoding == 'unicode':
                return str.decode(c)
            else:
                return str.decode(c).encode( encoding )
        except:
            pass
    raise 'Unknown charset'
 
#下载mp3文件，不支持断点续传
def downmp3(url,name):
    res=urllib2.urlopen(url)
    open(mdcode(name,'gbk'),"wb").write(res.read())
 
#请求html文件
def gethtml(url):
    context = urllib2.urlopen(url).read()
    return mdcode(context)
 
#通过正则，提取MP3的下载地址，以及mp3的演唱者
def feedmp3url(data):
    mp3url = re.findall(r'''<input(\\s*)(.*?)(\\s*)downlink(\\s*)=(\\s*)([\\"\\s]*)([^\\"\\']+?)([\\"\\s]+)(.*?)id=\\"bit128\\"(.*?)>''' ,data,re.S|re.I)
    #print result
    author = re.findall(r'''<span class=\\"author_list\\" title(\\s*)=(\\s*)([\\"\\s]*)([^\\"\\']+?)([\\"\\s]+)(\\s*)>''',data,re.S|re.I)
    return mp3url[0][6], author[0][3]
 
#通过正则，提取每期的MP3列表    
def feedurllist(data):
    urls = []
    url = re.findall(r'''<a(\\s*)(.*?)(\\s*)href(\\s*)=(\\s*)([\\"\\s]*)([^\\"\\']+?)([\\"\\s]+)(\\s*)title(\\s*)=(\\s*)([\\"\\s]*)([^\\"\\']+?)([\\"\\s]+)(\\s*)>''' ,data,re.S|re.I)
    #print url
    for u in url:
        #print u[6],u[12]
        mp3url, author=feedmp3url(gethtml('http://ting.baidu.com'+u[6]+'/download'))
        urls.append([mp3url, u[12], author])
        print mdcode(u[12]+'-'+author,'gbk')
             
    return urls
 
#主函数，传入好声音各期的页面url
def main(urls):
    url = []
    for u in urls:
        url += feedurllist(gethtml(u))
    return url
 
if __name__ == '__main__':
    #context = urllib2.urlopen('http://ting.baidu.com/album/23149328').read()
    #print context
    all = ['http://ting.baidu.com/album/23149328',
            'http://ting.baidu.com/album/23150394',
            'http://ting.baidu.com/album/23150523',
            'http://ting.baidu.com/album/23152500',
            'http://ting.baidu.com/album/23152435',
            'http://ting.baidu.com/album/23151786',
            'http://ting.baidu.com/album/23160050',
            'http://ting.baidu.com/album/23364352',
            'http://ting.baidu.com/album/23528761',
            'http://ting.baidu.com/album/24493381' ]
 
    #urls = feedurllist(gethtml('http://ting.baidu.com/album/23149328'))
    mp3=feedmp3url(gethtml('http://ting.baidu.com'+'/song/23528758'+'/download'))
    urls = main(all)
    print "Start down mp3 ..."
    for url in urls:
        mp3name=mdcode(url[1]+'-'+url[2]+'.mp3','gbk')
        print mp3name,"Down ..."
        downmp3("http://ting.baidu.com"+url[0],mp3name)
#该片段来自于http://www.codesnippet.cn/detail/170620134070.html
来源: http://www.codesnippet.cn/detail/170620134070.html
与本文相关文章

暂无,快来抢沙发吧！