- # -*- coding:utf-8 -*-
- #制作者:archingB
- import urllib
- import urllib2
- import re
- readme="按下回车键开始读取,q退出,w写入save.txt"
- def getpage(page):
- url='http://www.qiushibaike.com/textnew/page/' + str(page)
- user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
- headers = { 'User-Agent' : user_agent }
- try:
- request = urllib2.Request(url,headers = headers)
- response = urllib2.urlopen(request)
- content = response.read().decode('utf-8')
- pattern = re.compile('<div class="content">(.*?)<!--\\d{10}-->',re.S)
- items=re.findall(pattern,content)
- return items
- except urllib2.URLError, e:
- if hasattr(e,"code"):
- print e.code
- if hasattr(e,"reason"):
- print e.reason
- def writes(item):
- item_swap=item.encode('utf-8')
- try:
- f=open('save.txt','a')
- f.write(str(M)+'\\n'+item_swap+'\\n')
- finally:
- if f:
- f.close()
- def main():
- P=1
- N=0
- IS=True
- print readme.decode('utf-8')
- global M
- M=1
- items=getpage(P)
- raw_input()
- while(IS):
- if(N+1==len(items)):
- P+=1
- N=0
- items=getpage(P)
- items[N]=re.sub(r'<br/>','\\n',items[N])
- print N+1,items[N]
- static=str(raw_input())
- if(static=='q'or static=='Q'):
- IS=False
- break
- if(static=='w'or static=='W'):
- writes(items[N])
- M+=1
- N+=1
- main()
- #该片段来自于http://www.codesnippet.cn/detail/2412201514299.html
来源: http://www.codesnippet.cn/detail/2412201514299.html