- import threading
- fromseleniumimport webdriver
- fromcollectionsimport deque
- songList =set([]);
- playList =set([]);
- #歌单
- def chrome_browser_songList(url,browser):
- browser.get(url)
- play_count = browser.find_element_by_id('play-count').text
- if(int(play_count)>10000):
- data='\n'+browser.find_element_by_class_name('f-ff2').text+' 评论数:'+str(play_count)+' 地址:'+url
- save_file(data,'D:\\songList.txt')
- songQueue = deque()
- try:
- # a[href^=\/song]
- foreachinbrowser.find_elements_by_CSS_selector('a[href^=\/song]'):
- try:
- print("歌曲名字: %s 地址 %s"% (each.text, each.get_property('href')))
- songQueue.append(each.get_property('href'))
- except:
- continue
- except:
- print('someerror')
- song_queue(songQueue,browser)
- #寻找歌单playListQueue = deque()
- try:
- foreachinbrowser.find_elements_by_css_selector('a[href^=\/playlist]'):
- try:
- print("歌单: %s 地址 %s"% (each.text, each.get_property('href')))
- playListQueue.append(each.get_property('href'))
- except:
- continue
- except:
- print('someerror')
- browser.close()
- browser = webdriver.Chrome('C:\Program Files\Google\Chrome\Application\chromedriver.exe')
- play_list_queue(playListQueue,browser)
- #browser.close()
- #歌曲
- def chrome_browser_song(url):
- browser = webdriver.Chrome('C:\Program Files\Google\Chrome\Application\chromedriver.exe')
- browser.get(url)
- browser.switch_to_frame('g_iframe')
- comment_count = browser.find_element_by_id('cnt_comment_count').text
- if(int(comment_count)>10000):
- data ='\n歌曲名字:'+browser.find_element_by_class_name('f-ff2').text+' 歌手:'+browser.find_element_by_css_selector('a[href^=\/artist]').text+' 评论数:'+comment_count+' 歌曲地址:'+url;
- save_file(data,'D:\\song.txt')
- browser.close()
- #保存文件
- def save_file(data,file):
- save_path = file
- f_obj = open(save_path,'a')
- f_obj.write(data)
- f_obj.close()
- #歌队列
- def song_queue(songQueue,browser):
- while songQueue:
- current_url = songQueue.popleft()
- ifcurrent_urlnot in songList:
- songList.add(current_url)
- try:
- chrome_browser_song(current_url)
- except:
- continue
- #歌单队列
- def play_list_queue(listQueue,browser):
- while listQueue:
- current_url = listQueue.popleft()
- ifcurrent_urlnot in playList:
- playList.add(current_url)
- try:
- chrome_browser_songList(current_url,browser)
- except:
- continue
- url_list =[
- 'http://music.163.com/playlist?id=598057191',
- 'http://music.163.com/#/playlist?id=144236857',
- ]
- def thread_1():
- url = url_list[0]
- browser = webdriver.Chrome('C:\Program Files\Google\Chrome\Application\chromedriver.exe')
- chrome_browser_songList(url, browser)
- def thread_2():
- url = url_list[1]
- browser = webdriver.Chrome('C:\Program Files\Google\Chrome\Application\chromedriver.exe')
- chrome_browser_songList(url, browser)
- #多线程
- def thread_song():
- threads = []
- t1 = threading.Thread(target=thread_1)
- threads.append(t1)
- t2 = threading.Thread(target=thread_2)
- threads.append(t2)
- return threads
- if __name__=='__main__':
- # url = url_list[1]
- # browser = webdriver.Chrome('C:\Program Files\Google\Chrome\Application\chromedriver.exe')
- # chrome_browser_songList(url, browser)threads = thread_song()
- fortin threads:
- t.setDaemon(True)
- t.start()
- t.join()
来源: http://www.bubuko.com/infodetail-1956536.html