import os
import sys
os.environ['SPARK_HOME'] = '/opt/spark'
sys.path.append("/opt/spark/python")
from pyspark import SparkContext
from pyspark import SparkConf
def reducetest():
sc = SparkContext("spark://node0:7077", "reduce")
list = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
listRdd = sc.parallelize(list)
count =listRdd.reduce(lambda x,y:x+y)
print count
sc.stop
def collecttest():
sc = SparkContext("spark://node0:7077", "collec")
list = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
listRdd = sc.parallelize(list)
collect =listRdd.collect()
print collect
sc.stop
def counttest():
sc = SparkContext("spark://node0:7077", "count")
list = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
listRdd = sc.parallelize(list)
count =listRdd.count()
print count
sc.stop
def taketest():
sc = SparkContext("spark://node0:7077", "take")
list = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
listRdd = sc.parallelize(list)
three =listRdd.take(3)
print three
sc.stop
def saveAstextFiletest():
sc = SparkContext("spark://node0:7077", "saveAstextFile")
list = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
listRdd = sc.parallelize(list)
listRdd.saveAsTextFile("/count")
#print three
sc.stop
def countByKeytest():
sc = SparkContext("spark://node0:7077", "countByKey")
listtest = [("class1","elo"), ("class2","jave"), ("class1","tom"), ("class2","smi")]
listRDD = sc.parallelize(listtest)
count = listRDD.countByKey()
print count
def f(x):
print(x)
def foreachtest():
sc = SparkContext("local", "foreach")
sc.parallelize([1, 2, 3, 4, 5]).foreach(f)
if __name__ == '__main__':
#reducetest()
#collecttest()
#counttest()
#taketest()
#saveAstextFiletest()
#countByKeytest()
foreachtest()
spark 2.2.0 action操作python版
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.