Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from mrjob.job import MRJob
- from mrjob.step import MRStep
- from collections import Counter
- class MR_Ex3(MRJob):
- def mapper_get_words(self, _, line):
- # split line into array
- (event_time,event_type,product_id,category_id,category_code,brand,price,user_id,user_session) = line.split(',')
- # check if brand has a value and if so yield user_id and brand
- # also check if event_type is purchase
- if brand and event_type == "purchase":
- yield user_id, brand
- def reducer_count_words(self, user, brands):
- yield user,Counter(brands).most_common(1)[0][0]
- def steps(self):
- return [
- MRStep(mapper = self.mapper_get_words,
- reducer = self.reducer_count)
- ]
- if __name__ == '__main__':
- MR_Ex3.run()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement