- #!/usr/bin/env python
- #-*- coding:utf-8-*-
- import urllib2
- import re
- import hashlib
- import json
- #--------------------------------------------------- 工具 start
- def md5(str):
- '''
- 计算MD5值
- '''
- m = hashlib.md5()
- m.update(str)
- return m.hexdigest()
- def search(regex, content, group = 1):
- '''
- 搜索指定正则匹配的内容
- '''
- pattern = re.search(regex, content, re.DOTALL)
- if(pattern != None):
- return pattern.group(group)
- return ''
- def findall(regex, content):
- '''
- 查找指定正则匹配的所有内容
- '''
- return re.findall(regex, content, re.DOTALL)
- def cleanhtmlTag(content):
- '''
- 清理HTML标签
- '''
- return content or re.sub(r'<[^>]*?>', '', content).strip()
- def cleanedSearch(regex, content, group = 1):
- '''
- 查找匹配的指定字符串并清除HTML标签
- '''
- return cleanHtmlTag(search(regex, content, group))
- def httpGet(url, encoding='gbk'):
- '''
- 发送Http GET请求,返回内容
- '''
- return urllib2.urlopen(url).read().decode(encoding, 'ignore').encode('utf-8')
- def toJson(dict):
- return json.dumps(dict, ensure_ascii=False, indent=4)
- #--------------------------------------------------- 工具 end
- #该片段来自于http://www.codesnippet.cn/detail/2007201513168.html
来源: http://www.codesnippet.cn/detail/2007201513168.html