- #coding:utf-8
- import urllib2
- from bs4 import BeautifulSoup
- root = 'http://www.biquge.com/'
- urlList = [];
- print u"正在解析章节列表..."
- soup = BeautifulSoup(urllib2.urlopen('http://www.biquge.com/0_360/').read())
- for result in soup.find(id="list").find("dt").find_next("dt").find_all_next("a"):
- urlList.append(result['href'])
- fileHandle = open('test.txt','a')
- #fileHandle.write("hello")
- for result in urlList:
- temp = BeautifulSoup(urllib2.urlopen(root+result).read())
- #print temp.find(id="content").get_text()
- print u"正在下载:"+temp.title.text;
- content = temp.find(id="content").get_text().encode('gbk','ignore')
- fileHandle.write(content)
- fileHandle.close()
- #该片段来自于http://www.codesnippet.cn/detail/060120148432.html
来源: http://www.codesnippet.cn/detail/060120148432.html