- #! /usr/bin/python
- # -*- coding: utf-8 -*-
- import sys, os, urllib, urllib2, json
- from bs4 import BeautifulSoup
- #User-Agent
- ua = 'Mozilla/5.0'
- #豆瓣音乐搜索api
- dbsch = 'https://api.douban.com/v2/music/search?q='
- #虾米音乐搜索api
- xmsch = 'http://www.xiami.com/app/nineteen/search/key/'
- #虾米播放器url前部
- xmplay_font = 'http://www.xiami.com/song/play?ids=/song/playlist/id/'
- #虾米播放器url后部
- xmplay_end = '/type/1'
- #使用url获取数据
- def get_content(url, user_agent = None):
- try:
- if user_agent:
- req = urllib2.Request(url, headers = {'User-Agent': user_agent})
- else:
- req = urllib2.Request(url)
- fd = urllib2.urlopen(req)
- data = None
- while 1:
- buf = fd.read(1024*9)
- if not len(buf):
- break
- else:
- if data is None:
- data = buf
- else:
- data += buf
- return data
- except:
- pass
- #在虾米查找url指定的豆列中的专辑
- def work(url):
- #使用BeautifuSoup4解析豆列页面数据 得到所有专辑的标题和艺术家列表
- soup = BeautifulSoup(get_content(url))
- titles = soup.find_all('div', class_='pl2')
- titles = [t.a.string for t in titles]
- artists = soup.find_all('p', class_='pl')
- artists = [a.text.split(':')[1][:-4] for a in artists]
- disk_list= zip(titles, artists)
- #在虾米查找专辑
- jsd = json.JSONDecoder()
- for disk in disk_list:
- title = disk[0].strip().encode('utf-8')
- singer = disk[1].strip().encode('utf-8')
- print '-'*20
- print 'search %s-%s...' % (title, singer)
- #使用虾米api查找专辑 向虾米发送请求要加上User-Agent 否则会出现403错误
- page = 1
- while 1:
- url = '%s%s+%s/page/%s' % (xmsch, title, singer, page)
- page += 1
- xmlist = get_content(url, ua)
- try:
- #解析json
- xmlist = jsd.decode(xmlist)
- except:
- break
- #遍历搜索结果集的一个分页 寻找豆列中的专辑
- if not xmlist['results']:
- print '%s-%s not found' % (title, singer)
- break
- else:
- album_id = None
- for item in xmlist['results']:
- ctitle = urllib.unquote(repr(item['album_name'])).strip()
- csinger = urllib2.unquote(repr(item['artist_name'])).strip()
- #拙劣的匹配方式……
- if title in ctitle and singer in csinger:
- album_id = item['album_id'].encode('utf-8')
- break
- else:
- continue
- #构造虾米播放器的url
- print '%s-%s was found in album %s' % (title, singer, album_id)
- print '%s%s%s' % (xmplay_font, album_id, xmplay_end)
- break
- if __name__ == '__main__':
- if len(sys.argv) < 2:
- print 'Useage: python dl.py douban_list_url.'
- exit(0)
- else:
- work(sys.argv[1])
- #该片段来自于http://www.codesnippet.cn/detail/220920136052.html
来源: http://www.codesnippet.cn/detail/220920136052.html