- # coding=utf-6
- #飞库小说
- import bs4
- from bs4 import BeautifulSoup
- import urllib2
- import sys
- import re
- reobj1=re.compile('(<br */> *\\n *<br */>)|(<br */>)')
- reobj2=re.compile('( )|(<div *>)|(< */div>)')
- def getContents(url,title):
- f = urllib2.urlopen(url)
- soup = BeautifulSoup(f.read().decode('utf-8','ignore'))
- str1='\\n'+title+'\\n'+str(soup.find('div',id='chcontent'))
- print title
- return str1
- def filter(txt):
- result, number = reobj1.subn('\\n', txt)
- result, number = reobj2.subn(',', result)
- return result
- if __name__=='__main__':
- print(u'输入目录网址:')
- url =raw_input()
- f = urllib2.urlopen(url)
- soup = BeautifulSoup(f.read().decode('utf-8','ignore').encode('utf-8'))
- body = soup.findAll('td')
- str3=""
- print(u'输入书名:')
- bookname=raw_input().encode('utf-8')
- ff=open(bookname+'.txt','w')
- for i in body:
- try:
- str2=i.a['href']
- str3=getContents(str2,str(i.a.string))
- str3=filter(str3)
- ff.write(str3)
- except Exception:
- pass
- ff.close()
- f.close()
- #该片段来自于http://www.codesnippet.cn/detail/190820135209.html
来源: http://www.codesnippet.cn/detail/190820135209.html