#! /usr/bin/env python from mrjob.job import MRJob class Anagram(MRJob): def mapper(self, _, word): letters = list(word) letters.sort() if word != '': yield letters, word def reducer(self, _, words): anagrams = [w for w in words] if len(anagrams) >= 2: yield len(anagrams), anagrams if __name__ == '__main__': Anagram.run()Call it like this:
Sample output:
The output file consists of two columns: the first column is the number of word in the set, and the second column is the set itself.


No comments:
Post a Comment