Storm 對數據處理時,不同的數據交給不同的bolt來處理,然後處理好的數據傳給同個bolt來存儲到數據庫,這時就需要分流與合流,我們通過一個例子瞭解分流與合流。
我們通過Spout讀取文本,然後發送到第一個bolt對文本進行切割,如果是空格的發給bolt(1),如果是逗號組成的文本發給bolt(2),也就是分流,然後在對切割好單詞把相同的單詞發送給第二個bolt同一個task來統計(合流),這些過程可以利用多臺服務器幫我們完成。
1、分流
1)主要Bolt中通過declareOutputFields先定義
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declareStream("streamId1", new Fields("field"));
declarer.declareStream("streamId2", new Fields("field"));
}
2)然後發送時指定發送數據流ID collector.emit("streamId1",new Values(value));
3)最後在構建拓撲時聲明bolt對應的數據流ID
builder.setBolt("split1", new SplitSentence1Bolt(), 2).shuffleGrouping("spout","streamId1");
builder.setBolt("split2", new SplitSentence2Bolt(), 2).shuffleGrouping("spout","streamId2");
2、合流
在構建拓撲時聲明bolt接收幾個bolt就可以
builder.setBolt("count", new WordCountBolt(), 2).fieldsGrouping("split1", new Fields("word"))
.fieldsGrouping("split2", new Fields("word"));
接下來我們來看整個例子:
第一步:創建spout數據源
import java.util.Map;
import org.apache.storm.spout.SpoutOutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseRichSpout;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Values;
import org.apache.storm.utils.Utils;
/**
* 數據源
* @author zhengcy
*
*/
@SuppressWarnings("serial")
public class SentenceSpout extends BaseRichSpout {
private SpoutOutputCollector collector;
private String[] sentences = {
"Apache Storm is a free and open source distributed realtime computation system",
"Storm,makes,it,easy,to,reliably,process,unbounded,streams,of,data",
"doing for realtime processing what Hadoop did for batch processing",
"can,be,used,with,any,programming,language",
"and is a lot of fun to use" };
private int index = 0;
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declareStream("streamId1", new Fields("sentence"));
declarer.declareStream("streamId2", new Fields("sentence"));
}
@SuppressWarnings("rawtypes")
public void open(Map config, TopologyContext context,SpoutOutputCollector collector) {
this.collector = collector;
}
public void nextTuple() {
if(index >= sentences.length){
return;
}
if(index%2==0){
collector.emit("streamId1",new Values(sentences[index]));
}else{
collector.emit("streamId2",new Values(sentences[index]));
}
index++;
Utils.sleep(1);
}
}
第二步:實現單詞切割bolt1
import org.apache.storm.topology.BasicOutputCollector;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseBasicBolt;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Tuple;
import org.apache.storm.tuple.Values;
/**
* 切割句子
* @author zhengcy
*
*/
@SuppressWarnings("serial")
public class SplitSentence1Bolt extends BaseBasicBolt {
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("word"));
}
@Override
public void execute(Tuple input, BasicOutputCollector collector) {
String sentence = input.getStringByField("sentence");
String[] words = sentence.split(" ");
for (String word : words) {
collector.emit(new Values(word));
}
}
}
第三步:實現單詞切割bolt2
import org.apache.storm.topology.BasicOutputCollector;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseBasicBolt;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Tuple;
import org.apache.storm.tuple.Values;
/**
* 切割句子
* @author zhengcy
*
*/
@SuppressWarnings("serial")
public class SplitSentence2Bolt extends BaseBasicBolt {
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("word"));
}
@Override
public void execute(Tuple input, BasicOutputCollector collector) {
String sentence = input.getStringByField("sentence");
String[] words = sentence.split(",");
for (String word : words) {
collector.emit(new Values(word));
}
}
}
第四步:對單詞進行統計bolt
import java.util.HashMap;
import java.util.Map;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.BasicOutputCollector;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseBasicBolt;
import org.apache.storm.tuple.Tuple;
/**
* 統計單詞
* @author zhengcy
*
*/
@SuppressWarnings("serial")
public class WordCountBolt extends BaseBasicBolt {
private Map<String, Long> counts = null;
@SuppressWarnings("rawtypes")
@Override
public void prepare(Map stormConf, TopologyContext context) {
this.counts = new HashMap<String, Long>();
}
@Override
public void cleanup() {
for (String key : counts.keySet()) {
System.out.println(key + " : " + this.counts.get(key));
}
}
@Override
public void execute(Tuple input, BasicOutputCollector collector) {
String word = input.getStringByField("word");
Long count = this.counts.get(word);
if (count == null) {
count = 0L;
}
count++;
this.counts.put(word, count);
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
}
}
第五步:創建Topology拓撲
import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.StormSubmitter;
import org.apache.storm.topology.TopologyBuilder;
import org.apache.storm.tuple.Fields;
/**
* 單詞統計拓撲
* @author zhengcy
*
*/
public class WordCountTopology {
public static void main(String[] args) throws Exception {
TopologyBuilder builder = new TopologyBuilder();
builder.setSpout("spout", new SentenceSpout(), 1);
builder.setBolt("split1", new SplitSentence1Bolt(), 2).shuffleGrouping("spout","streamId1");
builder.setBolt("split2", new SplitSentence2Bolt(), 2).shuffleGrouping("spout","streamId2");
builder.setBolt("count", new WordCountBolt(), 2).fieldsGrouping("split1", new Fields("word"))
.fieldsGrouping("split2", new Fields("word"));
Config conf = new Config();
conf.setDebug(false);
if (args != null && args.length > 0) {
// 集羣模式
conf.setNumWorkers(2);
StormSubmitter.submitTopology(args[0], conf, builder.createTopology());
} else {
// 本地模式
LocalCluster cluster = new LocalCluster();
cluster.submitTopology("word-count", conf, builder.createTopology());
Thread.sleep(10000);
cluster.shutdown();
}
}
}