python beautifulsoup抓取小说练习

 
#coding:utf-8
import urllib2
from bs4 import BeautifulSoup
 
root = 'http://www.biquge.com/'
urlList = [];
 
print u"正在解析章节列表..."
 
soup = BeautifulSoup(urllib2.urlopen('http://www.biquge.com/0_360/').read())
 
for result in soup.find(id="list").find("dt").find_next("dt").find_all_next("a"):
        urlList.append(result['href'])
 
fileHandle = open('test.txt','a')
#fileHandle.write("hello")
 
for result in urlList:
        temp = BeautifulSoup(urllib2.urlopen(root+result).read())
        #print temp.find(id="content").get_text()
        print u"正在下载:"+temp.title.text;
        content = temp.find(id="content").get_text().encode('gbk','ignore')
        fileHandle.write(content)
fileHandle.close()
#该片段来自于http://www.codesnippet.cn/detail/060120148432.html

来源: http://www.codesnippet.cn/detail/060120148432.html

与本文相关文章

暂无,快来抢沙发吧！