- #!/usr/bin/env python
- # -*- coding:utf-8 -*-
- from gevent import monkey
- monkey.patch_all()
- from gevent.pool import Pool
- import requests
- import sys
- import os
- def download(url):
- chrome = 'Mozilla/5.0 (X11; Linux i86_64) ApplewebKit/537.36 ' + \\
- '(Khtml, like Gecko) Chrome/41.0.2272.101 Safari/537.36'
- headers = {'User-Agent': chrome}
- filename = url.split('/')[-1].strip()
- r = requests.get(url.strip(), headers=headers, stream=True)
- with open(filename, 'wb') as f:
- for chunk in r.iter_content(chunk_size=1024):
- if chunk:
- f.write(chunk)
- f.flush()
- print filename, "is ok"
- def removeLine(key, filename):
- os.system('sed -i /%s/d %s' % (key, filename))
- if __name__ == "__main__":
- if len(sys.argv) == 2:
- filename = sys.argv[1]
- f = open(filename, "r")
- p = Pool(4)
- for line in f.readlines():
- if line:
- p.spawn(download, line.strip())
- key = line.split('/')[-1].strip()
- removeLine(key, filename)
- f.close()
- p.join()
- else:
- print 'Usage: python %s urls.txt' % sys.argv[0]
- #该片段来自于http://www.codesnippet.cn/detail/1305201512562.html
来源: http://www.codesnippet.cn/detail/1305201512562.html