1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 |
# test_spark.py import os import sys from pyspark import SparkContext from pyspark import SparkConf conf = SparkConf() conf.setAppName("get-hosts") sc = SparkContext(conf=conf) def noop(x): import socket import sys return socket.gethostname() + ' '.join(sys.path) + ' '.join(os.environ) rdd = sc.parallelize(range(1000), 100) hosts = rdd.map(noop).distinct().collect() print(hosts) |
1
|
PYSPARK_PYTHON=./ANACONDA/mlpy_env/bin/python spark-submit --conf spark.yarn.appMasterEnv.PYSPARK_PYTHON=./ANACONDA/mlpy_env/bin/python --conf spark.executorEnv.PYSPARK_PYTHON=./ANACONDA/mlpy_env/bin/python --master yarn-cluster --archives /opt/mlpy_env.zip#ANACONDA /opt/test_spark.py |
executors运行情况如下图:
其中,agent1为x86服务器,agent2为ARM服务器。agent1上运行失败是由于独立Python包中打包的是ARM版本的Python,导致在x86服务器上运行失败。
运行失败executor的错误日志为: