- #!/usr/bin/python
- #coding:utf8
- #data:2013.07.03
- #author: Finy
- import re,urllib
- #url='http://music.baidu.com'
- url='http://music.baidu.com/top/new'
- save_url_file='c:/down.txt'
- class music_download():
- def __init__(self,site_url):
- self.Music_site_url=site_url
- self.Music_id=[]
- self.Music_name=[]
- self.Music_author=[]
- self.openurl=[]
- def obthin_data(self):
- self.openurl=urllib.URLopener()
- headers = ('User-Agent','Mozilla/5.0 (Windows NT 5.1; rv:14.0) Gecko/20100101 Firefox/14.0.1')
- self.openurl.addheaders = [headers]
- data=self.openurl.open(self.Music_site_url).read()
- data=data.decode('utf8')
- return data
- def data_re(self,data):
- #data=self.obthin_data()
- #Music_id==re.findall(re.compile(r"'sid': '(.*)', 'sname'"),data)
- Music_name=re.findall(re.compile(r"'sname': '(.*)', 'author'"),data)
- Music_author=re.findall(re.compile(r"'author': '(.*)' }"),data)
- Music_id=re.findall(re.compile(r"'sid': '(.*)', 'sname'"),data)
- return Music_name,Music_author,Music_id
- def main(self,save_url_file):
- urlsava=open(save_url_file,'w')
- data=self.obthin_data()
- self.Music_name,self.Music_author,self.Music_id=self.data_re(data)
- for i in range(len(self.Music_id)):
- print str(i) + ': '+ self.Music_name[i] + '-' + self.Music_author[i]
- page_data=self.openurl.open('http://music.baidu.com/song/%s/download'% str(self.Music_id[i])).read()
- try:
- downurl=re.findall(re.compile(r'href="/data/music/file\\?link=(.*)" id="'),page_data)[1]
- except:
- print '这个无法下载'
- urlsava.write(downurl+'\\n')
- print '%s music file download Ing ..................'%self.Music_name[i]
- urllib.urlretrieve(downurl,self.Music_name[i]+'-'+self.Music_author[i]+'.mp3')
- print '-'*50
- urlsava.close()
- if __name__ == '__main__':
- baidu_music_down=music_download(url)
- baidu_music_down.main('download.txt')
- #该片段来自于http://www.codesnippet.cn/detail/100920135800.html
来源: http://www.codesnippet.cn/detail/100920135800.html