由Socket方式構建
package com.zhengkw.stu.day01
import org.apache.spark.streaming.dstream.{DStream, ReceiverInputDStream}
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.{SparkConf, SparkContext}
object SoketWordCount {
def main(args: Array[String]): Unit = {
val conf: SparkConf = new SparkConf().setAppName("SoketWordCount").setMaster("local[2]")
val ssc: StreamingContext = new StreamingContext(conf, Seconds(3))
val sourceStream: ReceiverInputDStream[String] = ssc.socketTextStream("hadoop102", 9999)
val resultStream: DStream[(String, Int)] = sourceStream.flatMap(_.split(" ")).map((_, 1)).reduceByKey(_ + _)
resultStream.print(100)
ssc.start()
ssc.awaitTermination()
}
}
由RDD[T]隊列構建
package com.zhengkw.stu.day01
import org.apache.spark.SparkConf
import org.apache.spark.rdd.RDD
import org.apache.spark.streaming.dstream.{DStream, InputDStream}
import org.apache.spark.streaming.{Seconds, StreamingContext}
import scala.collection.mutable
object RDDQueue {
def main(args: Array[String]): Unit = {
val conf: SparkConf = new SparkConf().setMaster("local[2]").setAppName("RDDQueue")
val ssc: StreamingContext = new StreamingContext(conf, Seconds(3))
val queue: mutable.Queue[RDD[Int]] = mutable.Queue[RDD[Int]]()
val stream: InputDStream[Int] = ssc.queueStream(queue, oneAtATime = false)
val value: DStream[Int] = stream.reduce(_ + _)
value.print
ssc.start()
while (true) {
println(queue.size)
val rdd = ssc.sparkContext.parallelize(1 to 100)
queue.enqueue(rdd)
}
ssc.awaitTermination()
}
}