- #!/usr/bin/env python
- import random
- # 'abc..z'
- alphaStr = "".join(map(chr, range(97,123)))
- fp = open("word.txt", "w")
- maxIter = 100000
- for i in range(maxIter):
- word = ""
- len =random.randint(1,5)
- for j in range(len):
- word + = alphaStr[random.randint(0,25)]
- fp.write(word + '\n')
- fp.close()
- cat word.txt | ./wordcount_mapper.py | ./wordcount_reducer.py .
- word count reduce, python
- #filename: wordcount_reducer.py
- from operator import itemgetter
- import sys
- wordcount = {}
- for line in sys.stdin:
- word, count = line.strip().split('\t',1)
- try:
- count = int(count)
- wordcount[word] = wordcount.get(word,0) + count
- except ValueError
- pass
- sorted_wordcount = sorted(wordcount.iterms(), key = itemgettter(0))
- for word,count in sorted_wordcount:
- print("%s\t%s") %(word, count)
来源: http://www.phpxs.com/code/1004803/