简单爬虫返回列表
-
- import re
- import urllib.request
- def getUrl(url):
- page = urllib.request.urlopen(url)
- html = page.read().decode('utf-8')
- return html
- def getHtnlList(html):
- reg = r'正则表达式'
- reglist = re.compile(reg)
- return reglist
- html = getUrl("网址")
- getHtml(html)
- #该片段来自于http://www.codesnippet.cn/detail/2907201513258.html
来源: http://www.codesnippet.cn/detail/2907201513258.html