- import re
- from collections import Counter
- class Database:
- def __init__(self):
- self.host = "localhost"
- self.port = 6379
- self.write_pool = {}
- self.all_list= []
- self.all_time= {}
- self.dicts = {}
- def start(self,paths):
- openfile = open("/home/www/%s" % paths ,"r")
- re1='^.*?'
- url = '\\"(.*?)\\"'
- re2='.*?'
- time='(\\d*\\.\\d*)$'
- rg = re.compile(re1+url+re2+time,re.IGNORECASE|re.DOTALL)
- for i in openfile:
- m = rg.search(i)
- if m:
- urls = m.group(1)
- times = m.group(2)
- self.all_list.append(urls)
- ti = "%.4f" % float(times)
- if self.all_time.get(urls) == None:
- self.all_time[urls]= ti
- else:
- a = float(self.all_time.get(urls)) + float(ti)
- self.all_time[urls] = a
- def lists(self):
- c = Counter(self.all_list)
- list_first_10 = c.most_common(10)
- for i in range(len(list_first_10)):
- a =list_first_10[i][1]
- e = list_first_10[i][0]
- b = self.all_time.get(e)
- times = float(b)/float(a)
- self.dicts[e] = "%.4f" % times
- print self.dicts
- if __name__=="__main__":
- db = Database()
- db.start("blog")
- db.lists()
- #该片段来自于http://www.codesnippet.cn/detail/251120137434.html
来源: http://www.codesnippet.cn/detail/251120137434.html