Advertisement
kieni17

Untitled

Apr 18th, 2020
394
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.07 KB | None | 0 0
  1. from mrjob.job import MRJob
  2. from mrjob.step import MRStep
  3. from collections import Counter
  4.  
  5. class MR_Ex3(MRJob):
  6.  
  7.     def mapper_get_words(self, _, line):
  8.         # split line into array
  9.         (event_time,event_type,product_id,category_id,category_code,brand,price,user_id,user_session) = line.split(',')
  10.        
  11.         # check if brand has a value and if so yield user_id and brand
  12.         if brand and event_type == "purchase":
  13.             yield user_id, brand
  14.            
  15.     def reducer_count_words(self, user, brands):
  16.         # send all (num_occurrences, word) pairs to the same reducer.
  17.         # num_occurrences is so we can easily use Python's max() function.
  18.         yield  user,Counter(brands).most_common(1)[0][0]
  19.  
  20.     def steps(self):
  21.         return [
  22.             MRStep(mapper=self.mapper_get_words,
  23.                    reducer=self.reducer_count_words)
  24.         ]
  25.  
  26.  
  27. if __name__ == '__main__':
  28.     MR_Ex3.run()
  29.    
  30.    
  31. #  ! python3 Ex3a.py /home/adbs20/shared/ecommerce/2019-Oct-short.csv /home/adbs20/shared/ecommerce/2019-Nov-short.csv > out.csv
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement