desdemona

generate_word_frequency_data

Jun 8th, 2016
487
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 9.33 KB | None | 0 0
  1. #!/usr/bin/env bash
  2.  
  3.  
  4. set -x
  5.  
  6. #en
  7. cd /usr/local/hadoop/
  8. bin/hdfs namenode -format
  9. sbin/start-dfs.sh
  10. bin/hdfs dfs -mkdir /user
  11. bin/hdfs dfs -mkdir /user/domi
  12. bin/hdfs dfs -put /home/domi/Desktop/wikiscraps/en wc_input
  13.  
  14. #wc map reduce
  15. bin/hadoop jar share/hadoop/tools/lib/hadoop-streaming-2.7.2.jar -input wc_input -output wc_output -mapper /home/domi/PycharmProjects/HadoopWordCount/better/wc/wc_mapper.py -reducer /home/domi/PycharmProjects/HadoopWordCount/better/wc/wc_reducer.py
  16. #bin/hdfs dfs -cat wc_output/*
  17.  
  18. #all map reduce
  19. bin/hadoop jar share/hadoop/tools/lib/hadoop-streaming-2.7.2.jar -input wc_input -output all_output -mapper /home/domi/PycharmProjects/HadoopWordCount/better/all/all_mapper.py -reducer /home/domi/PycharmProjects/HadoopWordCount/better/all/all_reducer.py
  20. bin/hdfs dfs -cat all_output/* > /home/domi/all_output.txt
  21. bin/hdfs dfs -put /home/domi/all_output.txt .
  22.  
  23. #freq map reduce
  24. bin/hadoop jar share/hadoop/tools/lib/hadoop-streaming-2.7.2.jar -input wc_output -output freq_output -mapper /home/domi/PycharmProjects/HadoopWordCount/better/freq/freq_mapper.py -reducer /home/domi/PycharmProjects/HadoopWordCount/better/freq/freq_reducer.py -cacheFile hdfs://localhost:9000/user/domi/all_output.txt#all_output.txt
  25.  
  26. bin/hdfs dfs -cat freq_output/* > /home/domi/freq_output.txt
  27. bin/hdfs dfs -cat freq_output/*
  28. head -200 /home/domi/freq_output.txt > /home/domi/PycharmProjects/HadoopWordCount/better/most_frequent/en_200_most_frequent.txt
  29. sbin/stop-dfs.sh
  30. for x in `cd /etc/init.d ; ls hadoop*` ; do sudo service $x stop ; done
  31. rm -rf /tmp/hadoop-domi/dfs/*
  32.  
  33.  
  34.  
  35.  
  36. #pl
  37. cd /usr/local/hadoop/
  38. bin/hdfs namenode -format
  39. sbin/start-dfs.sh
  40. bin/hdfs dfs -mkdir /user
  41. bin/hdfs dfs -mkdir /user/domi
  42. bin/hdfs dfs -put /home/domi/Desktop/wikiscraps/pl wc_input
  43.  
  44. #wc map reduce
  45. bin/hadoop jar share/hadoop/tools/lib/hadoop-streaming-2.7.2.jar -input wc_input -output wc_output -mapper /home/domi/PycharmProjects/HadoopWordCount/better/wc/wc_mapper.py -reducer /home/domi/PycharmProjects/HadoopWordCount/better/wc/wc_reducer.py
  46. #bin/hdfs dfs -cat wc_output/*
  47.  
  48. #all map reduce
  49. bin/hadoop jar share/hadoop/tools/lib/hadoop-streaming-2.7.2.jar -input wc_input -output all_output -mapper /home/domi/PycharmProjects/HadoopWordCount/better/all/all_mapper.py -reducer /home/domi/PycharmProjects/HadoopWordCount/better/all/all_reducer.py
  50. bin/hdfs dfs -cat all_output/* > /home/domi/all_output.txt
  51. bin/hdfs dfs -put /home/domi/all_output.txt .
  52.  
  53. #freq map reduce
  54. bin/hadoop jar share/hadoop/tools/lib/hadoop-streaming-2.7.2.jar -input wc_output -output freq_output -mapper /home/domi/PycharmProjects/HadoopWordCount/better/freq/freq_mapper.py -reducer /home/domi/PycharmProjects/HadoopWordCount/better/freq/freq_reducer.py -cacheFile hdfs://localhost:9000/user/domi/all_output.txt#all_output.txt
  55.  
  56. bin/hdfs dfs -cat freq_output/* > /home/domi/freq_output.txt
  57. bin/hdfs dfs -cat freq_output/*
  58. head -200 /home/domi/freq_output.txt > /home/domi/PycharmProjects/HadoopWordCount/better/most_frequent/pl_200_most_frequent.txt
  59. sbin/stop-dfs.sh
  60. for x in `cd /etc/init.d ; ls hadoop*` ; do sudo service $x stop ; done
  61. rm -rf /tmp/hadoop-domi/dfs/*
  62.  
  63.  
  64.  
  65.  
  66. #hu
  67. cd /usr/local/hadoop/
  68. bin/hdfs namenode -format
  69. sbin/start-dfs.sh
  70. bin/hdfs dfs -mkdir /user
  71. bin/hdfs dfs -mkdir /user/domi
  72. bin/hdfs dfs -put /home/domi/Desktop/wikiscraps/hu wc_input
  73.  
  74. #wc map reduce
  75. bin/hadoop jar share/hadoop/tools/lib/hadoop-streaming-2.7.2.jar -input wc_input -output wc_output -mapper /home/domi/PycharmProjects/HadoopWordCount/better/wc/wc_mapper.py -reducer /home/domi/PycharmProjects/HadoopWordCount/better/wc/wc_reducer.py
  76. #bin/hdfs dfs -cat wc_output/*
  77.  
  78. #all map reduce
  79. bin/hadoop jar share/hadoop/tools/lib/hadoop-streaming-2.7.2.jar -input wc_input -output all_output -mapper /home/domi/PycharmProjects/HadoopWordCount/better/all/all_mapper.py -reducer /home/domi/PycharmProjects/HadoopWordCount/better/all/all_reducer.py
  80. bin/hdfs dfs -cat all_output/* > /home/domi/all_output.txt
  81. bin/hdfs dfs -put /home/domi/all_output.txt .
  82.  
  83. #freq map reduce
  84. bin/hadoop jar share/hadoop/tools/lib/hadoop-streaming-2.7.2.jar -input wc_output -output freq_output -mapper /home/domi/PycharmProjects/HadoopWordCount/better/freq/freq_mapper.py -reducer /home/domi/PycharmProjects/HadoopWordCount/better/freq/freq_reducer.py -cacheFile hdfs://localhost:9000/user/domi/all_output.txt#all_output.txt
  85.  
  86. bin/hdfs dfs -cat freq_output/* > /home/domi/freq_output.txt
  87. bin/hdfs dfs -cat freq_output/*
  88. head -200 /home/domi/freq_output.txt > /home/domi/PycharmProjects/HadoopWordCount/better/most_frequent/hu_200_most_frequent.txt
  89. sbin/stop-dfs.sh
  90. for x in `cd /etc/init.d ; ls hadoop*` ; do sudo service $x stop ; done
  91. rm -rf /tmp/hadoop-domi/dfs/*
  92.  
  93.  
  94.  
  95.  
  96.  
  97. #de
  98. cd /usr/local/hadoop/
  99. bin/hdfs namenode -format
  100. sbin/start-dfs.sh
  101. bin/hdfs dfs -mkdir /user
  102. bin/hdfs dfs -mkdir /user/domi
  103. bin/hdfs dfs -put /home/domi/Desktop/wikiscraps/de wc_input
  104.  
  105. #wc map reduce
  106. bin/hadoop jar share/hadoop/tools/lib/hadoop-streaming-2.7.2.jar -input wc_input -output wc_output -mapper /home/domi/PycharmProjects/HadoopWordCount/better/wc/wc_mapper.py -reducer /home/domi/PycharmProjects/HadoopWordCount/better/wc/wc_reducer.py
  107. #bin/hdfs dfs -cat wc_output/*
  108.  
  109. #all map reduce
  110. bin/hadoop jar share/hadoop/tools/lib/hadoop-streaming-2.7.2.jar -input wc_input -output all_output -mapper /home/domi/PycharmProjects/HadoopWordCount/better/all/all_mapper.py -reducer /home/domi/PycharmProjects/HadoopWordCount/better/all/all_reducer.py
  111. bin/hdfs dfs -cat all_output/* > /home/domi/all_output.txt
  112. bin/hdfs dfs -put /home/domi/all_output.txt .
  113.  
  114. #freq map reduce
  115. bin/hadoop jar share/hadoop/tools/lib/hadoop-streaming-2.7.2.jar -input wc_output -output freq_output -mapper /home/domi/PycharmProjects/HadoopWordCount/better/freq/freq_mapper.py -reducer /home/domi/PycharmProjects/HadoopWordCount/better/freq/freq_reducer.py -cacheFile hdfs://localhost:9000/user/domi/all_output.txt#all_output.txt
  116.  
  117. bin/hdfs dfs -cat freq_output/* > /home/domi/freq_output.txt
  118. bin/hdfs dfs -cat freq_output/*
  119. head -200 /home/domi/freq_output.txt > /home/domi/PycharmProjects/HadoopWordCount/better/most_frequent/de_200_most_frequent.txt
  120. sbin/stop-dfs.sh
  121. for x in `cd /etc/init.d ; ls hadoop*` ; do sudo service $x stop ; done
  122. rm -rf /tmp/hadoop-domi/dfs/*
  123.  
  124.  
  125.  
  126.  
  127.  
  128. #sv
  129. cd /usr/local/hadoop/
  130. bin/hdfs namenode -format
  131. sbin/start-dfs.sh
  132. bin/hdfs dfs -mkdir /user
  133. bin/hdfs dfs -mkdir /user/domi
  134. bin/hdfs dfs -put /home/domi/Desktop/wikiscraps/sv wc_input
  135.  
  136. #wc map reduce
  137. bin/hadoop jar share/hadoop/tools/lib/hadoop-streaming-2.7.2.jar -input wc_input -output wc_output -mapper /home/domi/PycharmProjects/HadoopWordCount/better/wc/wc_mapper.py -reducer /home/domi/PycharmProjects/HadoopWordCount/better/wc/wc_reducer.py
  138. #bin/hdfs dfs -cat wc_output/*
  139.  
  140. #all map reduce
  141. bin/hadoop jar share/hadoop/tools/lib/hadoop-streaming-2.7.2.jar -input wc_input -output all_output -mapper /home/domi/PycharmProjects/HadoopWordCount/better/all/all_mapper.py -reducer /home/domi/PycharmProjects/HadoopWordCount/better/all/all_reducer.py
  142. bin/hdfs dfs -cat all_output/* > /home/domi/all_output.txt
  143. bin/hdfs dfs -put /home/domi/all_output.txt .
  144.  
  145. #freq map reduce
  146. bin/hadoop jar share/hadoop/tools/lib/hadoop-streaming-2.7.2.jar -input wc_output -output freq_output -mapper /home/domi/PycharmProjects/HadoopWordCount/better/freq/freq_mapper.py -reducer /home/domi/PycharmProjects/HadoopWordCount/better/freq/freq_reducer.py -cacheFile hdfs://localhost:9000/user/domi/all_output.txt#all_output.txt
  147.  
  148. bin/hdfs dfs -cat freq_output/* > /home/domi/freq_output.txt
  149. bin/hdfs dfs -cat freq_output/*
  150. head -200 /home/domi/freq_output.txt > /home/domi/PycharmProjects/HadoopWordCount/better/most_frequent/sv_200_most_frequent.txt
  151. sbin/stop-dfs.sh
  152. for x in `cd /etc/init.d ; ls hadoop*` ; do sudo service $x stop ; done
  153. rm -rf /tmp/hadoop-domi/dfs/*
  154.  
  155.  
  156.  
  157.  
  158.  
  159.  
  160. #nl
  161. cd /usr/local/hadoop/
  162. bin/hdfs namenode -format
  163. sbin/start-dfs.sh
  164. bin/hdfs dfs -mkdir /user
  165. bin/hdfs dfs -mkdir /user/domi
  166. bin/hdfs dfs -put /home/domi/Desktop/wikiscraps/nl wc_input
  167.  
  168. #wc map reduce
  169. bin/hadoop jar share/hadoop/tools/lib/hadoop-streaming-2.7.2.jar -input wc_input -output wc_output -mapper /home/domi/PycharmProjects/HadoopWordCount/better/wc/wc_mapper.py -reducer /home/domi/PycharmProjects/HadoopWordCount/better/wc/wc_reducer.py
  170. #bin/hdfs dfs -cat wc_output/*
  171.  
  172. #all map reduce
  173. bin/hadoop jar share/hadoop/tools/lib/hadoop-streaming-2.7.2.jar -input wc_input -output all_output -mapper /home/domi/PycharmProjects/HadoopWordCount/better/all/all_mapper.py -reducer /home/domi/PycharmProjects/HadoopWordCount/better/all/all_reducer.py
  174. bin/hdfs dfs -cat all_output/* > /home/domi/all_output.txt
  175. bin/hdfs dfs -put /home/domi/all_output.txt .
  176.  
  177. #freq map reduce
  178. bin/hadoop jar share/hadoop/tools/lib/hadoop-streaming-2.7.2.jar -input wc_output -output freq_output -mapper /home/domi/PycharmProjects/HadoopWordCount/better/freq/freq_mapper.py -reducer /home/domi/PycharmProjects/HadoopWordCount/better/freq/freq_reducer.py -cacheFile hdfs://localhost:9000/user/domi/all_output.txt#all_output.txt
  179.  
  180. bin/hdfs dfs -cat freq_output/* > /home/domi/freq_output.txt
  181. bin/hdfs dfs -cat freq_output/*
  182. head -200 /home/domi/freq_output.txt > /home/domi/PycharmProjects/HadoopWordCount/better/most_frequent/nl_200_most_frequent.txt
  183. sbin/stop-dfs.sh
  184. for x in `cd /etc/init.d ; ls hadoop*` ; do sudo service $x stop ; done
  185. rm -rf /tmp/hadoop-domi/dfs/*
Add Comment
Please, Sign In to add comment