¿Cómo puedo usar Apache Spark con cuaderno en Anaconda?
PYSPARK_PYTHON=/opt/continuum/anaconda/bin/python spark-submit pyspark_script.py
Lazy Lion
PYSPARK_PYTHON=/opt/continuum/anaconda/bin/python spark-submit pyspark_script.py
import os
import sys
os.environ["PYSPARK_PYTHON"] = "/opt/continuum/anaconda/bin/python"
os.environ["JAVA_HOME"] = "/usr/java/jdk1.7.0_67-cloudera/jre"
os.environ["SPARK_HOME"] = "/opt/cloudera/parcels/CDH/lib/spark"
os.environ["PYLIB"] = os.environ["SPARK_HOME"] + "/python/lib"
sys.path.insert(0, os.environ["PYLIB"] +"/py4j-0.9-src.zip")
sys.path.insert(0, os.environ["PYLIB"] +"/pyspark.zip")