Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from mrjob.job import MRJob
- from mrjob.step import MRStep
- from collections import Counter
- class MR_Ex3(MRJob):
- def mapper_get_words(self, _, line):
- # split line into array
- (event_time,event_type,product_id,category_id,category_code,brand,price,user_id,user_session) = line.split(',')
- # check if brand has a value and if so yield user_id and brand
- if brand and event_type == "purchase":
- yield user_id, brand
- def reducer_count_words(self, user, brands):
- # send all (num_occurrences, word) pairs to the same reducer.
- # num_occurrences is so we can easily use Python's max() function.
- yield user,Counter(brands).most_common(1)[0][0]
- def steps(self):
- return [
- MRStep(mapper=self.mapper_get_words,
- reducer=self.reducer_count_words)
- ]
- if __name__ == '__main__':
- MR_Ex3.run()
- # ! python3 Ex3a.py /home/adbs20/shared/ecommerce/2019-Oct-short.csv /home/adbs20/shared/ecommerce/2019-Nov-short.csv > out.csv
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement