- '''
- Created on 2013-4-3
- @author: zdh
- '''
- #!/usr/bin/env python
- # -*- coding: utf-8 -*-
- import re
- import urllib
- def main():
- die= {'d':'0', 'c':'1' ,'k':'2' ,'z':'3' ,'m':'4' ,'b':'5', 'w':'6', 'i':'7', 'r':'8', 'l':'9'}
- headers = ('User-Agent','Mozilla/5.0 (Windows NT 5.1; rv:14.0) Gecko/20100101 Firefox/14.0.1')
- opener = urllib.URLopener()
- opener.addheaders = [headers]
- f = open(r'E:/ip.txt','w')
- for page in range(1,11):
- url = "http://www.veryhuo.com/res/ip/page_"+str(page)+".php"
- data = opener.open(url).read()
- data = data.decode('GBK')
- ip_list = re.findall(re.compile(r'<td>(.*)<S'),data)
- port_list = re.findall(re.compile(r'\\"(\\+.*?\\+.)\\)'),data)
- for x in range(len(ip_list)):
- port_list[x] = port_list[x].replace('+', '')
- if 'e' in port_list[x] or 'x' in port_list[x] or 'a' in port_list[x] or 'f' in port_list[x]:
- s = ip_list[x]
- else:
- s = (ip_list[x] + ':' + port_list[x])
- for key in die:
- s = s.replace(key, die[key])
- f.write(s + '\\n')
- print(s)
- print(len(ip_list))
- f.close()
- if __name__ == '__main__':
- main()
- #该片段来自于http://www.codesnippet.cn/detail/100920135779.html
来源: http://www.codesnippet.cn/detail/100920135779.html