install
#!/bin/bash # Install Spark on CentOS 7 cd $home yum install java -y java -version yum install wget -y wget http://downloads.typesafe.com/scala/2.11.7/scala-2.11.7.tgz tar xvf scala-2.11.7.tgz sudo mv scala-2.11.7 /usr/lib sudo ln -s /usr/lib/scala-2.11.7 /usr/lib/scala export PATH=$PATH:/usr/lib/scala/bin scala -version #Spark 버젼을 선택하여 진행합니다. https://spark.apache.org/downloads.html # 1.6 wget http://d3kbcqa49mib13.cloudfront.net/spark-1.6.0-bin-hadoop2.6.tgz tar xvf spark-1.6.0-bin-hadoop2.6.tgz export SPARK_HOME=$HOME/spark-1.6.0-bin-hadoop2.6 export PATH=$PATH:$SPARK_HOME/bin # 2.6 wget http://d3kbcqa49mib13.cloudfront.net/spark-2.2.0-bin-hadoop2.6.tgz tar xvf spark-2.2.0-bin-hadoop2.6.tgz export SPARK_HOME=$HOME/spark-2.2.0-bin-hadoop2.6 export PATH=$PATH:$SPARK_HOME/bin
접속 보안 허용
firewall-cmd --permanent --zone=public --add-port=6066/tcp firewall-cmd --permanent --zone=public --add-port=7077/tcp firewall-cmd --permanent --zone=public --add-port=8080-8081/tcp firewall-cmd --reload
환경 변수 등록
>vi ~/.bash_profile export PATH=$PATH:/usr/lib/scala/bin # spark-2.2.0-bin-hadoop2.6 - 선택한 버젼 기입 export SPARK_HOME=$HOME/spark-1.6.0-bin-hadoop2.6 export PATH=$PATH:$SPARK_HOME/bin # 환경변수 재적용 >source ~/.bash_profile
실행
spark 구동
spark설치경로/sbin> bash start-all.sh
[psmon@localhost sbin]$ ls slaves.sh start-slaves.sh spark-config.sh start-thriftserver.sh spark-daemon.sh stop-all.sh spark-daemons.sh stop-history-server.sh start-all.sh stop-master.sh start-history-server.sh stop-mesos-dispatcher.sh start-master.sh stop-mesos-shuffle-service.sh start-mesos-dispatcher.sh stop-shuffle-service.sh start-mesos-shuffle-service.sh stop-slave.sh start-shuffle-service.sh stop-slaves.sh start-slave.sh stop-thriftserver.sh [psmon@localhost sbin]$ bash start-all.sh starting org.apache.spark.deploy.master.Master, logging to /home/psmon/spark-1.6.0-bin-hadoop2.6/logs/spark-psmon-org.apache.spark.deploy.master.Master-1-localhost.localdomain.out localhost: Warning: Permanently added 'localhost' (ECDSA) to the list of known hosts. psmon@localhost's password: localhost: starting org.apache.spark.deploy.worker.Worker, logging to /home/psmon/spark-1.6.0-bin-hadoop2.6/logs/spark-psmon-org.apache.spark.deploy.worker.Worker-1-localhost.localdomain.out [psmon@localhost sbin]$
spark 끄는 법
spark/sbin > bash spark/sbin/stop-all.sh
spark shell
- spark-shell : 스칼라로 이용
- sparkR : R 로이용
- pyspark : 파이썬으로 이용
- spark-sql : sql문으로 이용
spark/bin> spark-shell
shell 끄는 법
scala>exit