spark 2.2.0 action操作python版

import os
import sys
os.environ['SPARK_HOME'] = '/opt/spark'
sys.path.append("/opt/spark/python")

from pyspark import SparkContext
from pyspark import SparkConf

def reducetest():
    sc = SparkContext("spark://node0:7077", "reduce")
    list = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
    listRdd = sc.parallelize(list)
    count =listRdd.reduce(lambda x,y:x+y)
    print count
    sc.stop
def collecttest():
    sc = SparkContext("spark://node0:7077", "collec")
    list = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
    listRdd = sc.parallelize(list)
    collect =listRdd.collect()
    print collect
    sc.stop
def counttest():
    sc = SparkContext("spark://node0:7077", "count")
    list = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
    listRdd = sc.parallelize(list)
    count =listRdd.count()
    print count
    sc.stop
def taketest():
    sc = SparkContext("spark://node0:7077", "take")
    list = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
    listRdd = sc.parallelize(list)
    three =listRdd.take(3)
    print three
    sc.stop
def saveAstextFiletest():
    sc = SparkContext("spark://node0:7077", "saveAstextFile")
    list = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
    listRdd = sc.parallelize(list)
    listRdd.saveAsTextFile("/count")
    #print three
    sc.stop
def countByKeytest():
    sc = SparkContext("spark://node0:7077", "countByKey")
    listtest = [("class1","elo"), ("class2","jave"), ("class1","tom"), ("class2","smi")]
    listRDD = sc.parallelize(listtest)
    count = listRDD.countByKey()
    print count
def f(x):
    print(x)
def foreachtest():
    sc = SparkContext("local", "foreach")
    sc.parallelize([1, 2, 3, 4, 5]).foreach(f)


if __name__ == '__main__':
    #reducetest()
    #collecttest()
    #counttest()
    #taketest()
    #saveAstextFiletest()
    #countByKeytest()
    foreachtest()

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章