- #!/usr/bin/env python
- #coding: utf-8
- import md5
- import os
- from time import clock as now
- def getmd5(filename):
- file_txt = open(filename, 'rb').read()
- m = md5.new(file_txt)
- return m.hexdigest()
- def main():
- path = u'C:\\aa\bb\cc\dd'
- all_md5 = {}
- all_size = {}
- total_file = 0
- total_delete = 0
- start = now()
- for file in os.listdir(path):
- total_file += 1
- real_path = os.path.join(path, file)
- if os.path.isfile(real_path) == True:
- size = os.stat(real_path).st_size
- name_and_md5 = [real_path, '']
- if size in all_size.keys():
- new_md5 = getmd5(real_path)
- if all_size[size][1] == '':
- all_size[size][1] = getmd5(all_size[size][0])
- if new_md5 in all_size[size]:
- total_delete += 1
- print u'删除', file
- try:
- os.remove(os.path.join(path, file))
- except:
- print 'No such file: %s' % file
- else:
- all_size[size].append(new_md5)
- else:
- all_size[size] = name_and_md5
- end = now()
- time_last = end - start
- print u'文件总数: ', total_file
- print u'删除个数: ', total_delete
- print u'耗时: ', time_last, '秒'
- if __name__ == '__main__':
- main()
参考:http://developer.51cto.com/art/201205/334378.htm
原文没有执行删除文件的操作:
- try:
- os.remove(os.path.join(path, file))
- except:
- print 'No such file: %s' % file
还是看官方文档靠谱
https://docs.python.org/2/library/os.html?highlight=os.remove#os.remove
来源: http://www.phpxs.com/code/1005183/