- import urllib.request
- import re
- def get_new_qq(url, pat, file):
- data = urllib.request.urlopen(url).read().decode("gbk", "ignore").strip()
- res = re.compile(pat).findall(data)
- with open(file, "w") as f:
- f.write(str(res))
- return res
- if __name__ == '__main__':
- url = "https://www.qq.com/?pgv_ref=1"
- pat = "<li><a .*>(.*)</a></li>"
- file = "../ 腾讯新闻. txt"
- print(get_new_qq(url, pat, file))
来源: http://www.bubuko.com/infodetail-3374009.html