- import re
- import os
- import urllib.request
- # 小说目录
- home = "http://www.23zw.com/olread/9/9068/"
- if __name__ == '__main__':
- url = home + "index.html"
- page = urllib.request.urlopen(url).read()
- page = page.decode("gbk")
- print(len(page))
- s_key = 'h1>(.+?)<'
- re_c = re.compile(s_key)
- ls = re.findall(re_c, page)
- if len(ls) > 0:
- title = ls[0]
- print(title)
- if not os.path.exists(title):
- os.makedirs(title)
- urllib.request.urlretrieve(url, title + "/index.html")
- s_key = 'href="(.{37}?)"'
- re_c = re.compile(s_key)
- ls = re.findall(re_c, page)
- i = 0
- for l in ls:
- try:
- i += 1
- print("(" + str(i) + "/" + str(len(ls)) + ") " + l)
- if os.path.exists(title + "/" + l):
- continue
- url = home + l
- urllib.request.urlretrieve(url, title + "/" + l)
- except:
- print("error!")
- print("finish!")
- #该片段来自于http://www.codesnippet.cn/detail/2209201513723.html
来源: http://www.codesnippet.cn/detail/2209201513723.html