Advertisement
desdemona

run_better_map_reduce

Jun 8th, 2016
519
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Bash 3.17 KB | None | 0 0
  1. #!/bin/bash
  2.  
  3. set -x
  4.  
  5. cd /usr/local/hadoop/
  6. bin/hdfs namenode -format
  7. sbin/start-dfs.sh
  8. bin/hdfs dfs -mkdir /user
  9. bin/hdfs dfs -mkdir /user/domi
  10.  
  11. #bin/hdfs dfs -put /home/domi/Desktop/input_files/set01 wc_input
  12. bin/hdfs dfs -put /home/domi/Desktop/input_files/set03 wc_input
  13.  
  14. #wc map reduce
  15. bin/hadoop jar share/hadoop/tools/lib/hadoop-streaming-2.7.2.jar -input wc_input -output wc_output -mapper /home/domi/PycharmProjects/HadoopWordCount/better/wc/wc_mapper.py -reducer /home/domi/PycharmProjects/HadoopWordCount/better/wc/wc_reducer.py
  16.  
  17. #bin/hdfs dfs -cat wc_output/*
  18. #read -p "Press [Enter] to confirm wc output..."
  19.  
  20. #all map reduce
  21. bin/hadoop jar share/hadoop/tools/lib/hadoop-streaming-2.7.2.jar -input wc_input -output all_output -mapper /home/domi/PycharmProjects/HadoopWordCount/better/all/all_mapper.py -reducer /home/domi/PycharmProjects/HadoopWordCount/better/all/all_reducer.py
  22. bin/hdfs dfs -cat all_output/* > /home/domi/all_output.txt
  23. bin/hdfs dfs -put /home/domi/all_output.txt .
  24. bin/hdfs dfs -cat all_output/*
  25.  
  26. #freq map reduce
  27. bin/hadoop jar share/hadoop/tools/lib/hadoop-streaming-2.7.2.jar -input wc_output -output freq_output -mapper /home/domi/PycharmProjects/HadoopWordCount/better/freq/freq_mapper.py -reducer /home/domi/PycharmProjects/HadoopWordCount/better/freq/freq_reducer.py -cacheFile hdfs://localhost:9000/user/domi/all_output.txt#all_output.txt
  28.  
  29. bin/hdfs dfs -cat freq_output/*
  30. read -p "Press [Enter] key to confirm freq output..."
  31.  
  32. #copy input files for freq - list of 200 most popular words in 6 languages
  33. bin/hdfs dfs -put /home/domi/PycharmProjects/HadoopWordCount/better/most_frequent/en_200_most_frequent.txt .
  34. bin/hdfs dfs -put /home/domi/PycharmProjects/HadoopWordCount/better/most_frequent/pl_200_most_frequent.txt .
  35. bin/hdfs dfs -put /home/domi/PycharmProjects/HadoopWordCount/better/most_frequent/hu_200_most_frequent.txt .
  36. bin/hdfs dfs -put /home/domi/PycharmProjects/HadoopWordCount/better/most_frequent/de_200_most_frequent.txt .
  37. bin/hdfs dfs -put /home/domi/PycharmProjects/HadoopWordCount/better/most_frequent/sv_200_most_frequent.txt .
  38. bin/hdfs dfs -put /home/domi/PycharmProjects/HadoopWordCount/better/most_frequent/nl_200_most_frequent.txt .
  39.  
  40. #lang mp reduce
  41. bin/hadoop jar share/hadoop/tools/lib/hadoop-streaming-2.7.2.jar -input freq_output -output lang_output -mapper /home/domi/PycharmProjects/HadoopWordCount/better/lang/lang_mapper.py -reducer /home/domi/PycharmProjects/HadoopWordCount/better/lang/lang_reducer.py -cacheFile hdfs://localhost:9000/user/domi/en_200_most_frequent.txt#en_200_most_frequent.txt,hdfs://localhost:9000/user/domi/pl_200_most_frequent.txt#pl_200_most_frequent.txt,hdfs://localhost:9000/user/domi/hu_200_most_frequent.txt#hu_200_most_frequent.txt,hdfs://localhost:9000/user/domi/de_200_most_frequent.txt#de_200_most_frequent.txt,hdfs://localhost:9000/user/domi/sv_200_most_frequent.txt#sv_200_most_frequent.txt,hdfs://localhost:9000/user/domi/nl_200_most_frequent.txt#nl_200_most_frequent.txt
  42.  
  43.  
  44. bin/hdfs dfs -cat lang_output/* > /home/domi/lang_output.txt
  45. bin/hdfs dfs -cat lang_output/*
  46.  
  47. sbin/stop-dfs.sh
  48. for x in `cd /etc/init.d ; ls hadoop*` ; do sudo service $x stop ; done
  49. rm -rf /tmp/hadoop-domi/dfs/*
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement