Friday, May 30, 2014

CDH 5.0 & SPARK



http://archive.cloudera.com/cdh5/one-click-install/redhat/6/x86_64/cloudera-cdh-5-0.x86_64.rpm

$ sudo yum --nogpgcheck localinstall cloudera-cdh-5-0.x86_64.rpm

$ sudo rpm --import http://archive.cloudera.com/cdh5/redhat/6/x86_64/cdh/RPM-GPG-KEY-cloudera

$ rpm -ql hadoop-conf-pseudo


$sudo -u hdfs hdfs namenode -format

 $for x in `cd /etc/init.d ; ls hadoop-hdfs-*` ; do sudo service $x start ; done

$sudo -u hdfs hadoop fs -rm -r /tmp

$ sudo -u hdfs hadoop fs -mkdir -p /tmp/hadoop-yarn/staging/history/done_intermediate
$ sudo -u hdfs hadoop fs -chown -R mapred:mapred /tmp/hadoop-yarn/staging
 $ sudo -u hdfs hadoop fs -chmod -R 1777 /tmp
 $ sudo -u hdfs hadoop fs -mkdir -p /var/log/hadoop-yarn
 $ sudo -u hdfs hadoop fs -chown yarn:mapred /var/log/hadoop-yarn

$ sudo service hadoop-yarn-resourcemanager start
$ sudo service hadoop-yarn-nodemanager start
$ sudo service hadoop-mapreduce-historyserver start

$ sudo -u hdfs hadoop fs -mkdir /user
$ sudo -u hdfs hadoop fs -mkdir /user/wylee
$ sudo -u hdfs hadoop fs -chown wylee /user/wylee

$ hadoop fs -mkdir input
$ hadoop fs -put /etc/hadoop/conf/*.xml input
$ hadoop fs -ls input

 $ export HADOOP_MAPRED_HOME=/usr/lib/hadoop-mapreduce

$ hadoop fs -ls
$ hadoop fs -ls output23

$ sudo yum install spark-core spark-master spark-worker spark-python

 $sudo service spark-master start
 $sudo service spark-worker start
$/etc/spark/conf/spark-env.sh

http://localhost:18080/

$ spark-shell

val file = sc.textFile("hdfs://localhost:8020/tmp/input")
val counts = file.flatMap(line => line.split(" ")).map(word => (word, 1)).reduceByKey(_ + _)
counts.saveAsTextFile("hdfs://localhost:8020/tmp/output")

No comments:

Post a Comment