Storm(六):數據流的分流與合流

   

          Storm 對數據處理時,不同的數據交給不同的bolt來處理,然後處理好的數據傳給同個bolt來存儲到數據庫,這時就需要分流與合流,我們通過一個例子瞭解分流與合流。

  

    我們通過Spout讀取文本,然後發送到第一個bolt對文本進行切割,如果是空格的發給bolt(1),如果是逗號組成的文本發給bolt(2),也就是分流,然後在對切割好單詞把相同的單詞發送給第二個bolt同一個task來統計(合流),這些過程可以利用多臺服務器幫我們完成。

  1、分流 

   1)主要Bolt中通過declareOutputFields先定義

    

        @Override
	public void declareOutputFields(OutputFieldsDeclarer declarer) {
		declarer.declareStream("streamId1", new Fields("field"));
		declarer.declareStream("streamId2", new Fields("field"));
	}
   2)然後發送時指定發送數據流ID

	collector.emit("streamId1",new Values(value));
  

   3)最後在構建拓撲時聲明bolt對應的數據流ID

   

       builder.setBolt("split1", new SplitSentence1Bolt(), 2).shuffleGrouping("spout","streamId1");		
       builder.setBolt("split2", new SplitSentence2Bolt(), 2).shuffleGrouping("spout","streamId2");

 2、合流

    在構建拓撲時聲明bolt接收幾個bolt就可以

    

       builder.setBolt("count", new WordCountBolt(), 2).fieldsGrouping("split1", new Fields("word"))
		.fieldsGrouping("split2", new Fields("word"));

 接下來我們來看整個例子:

 

    第一步:創建spout數據源

      

import java.util.Map;
import org.apache.storm.spout.SpoutOutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseRichSpout;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Values;
import org.apache.storm.utils.Utils;

/**
 * 數據源
 * @author zhengcy
 *
 */
@SuppressWarnings("serial")
public class SentenceSpout extends  BaseRichSpout {
	
	private SpoutOutputCollector collector;
	private String[] sentences = {
			"Apache Storm is a free and open source distributed realtime computation system",
			"Storm,makes,it,easy,to,reliably,process,unbounded,streams,of,data",
			"doing for realtime processing what Hadoop did for batch processing",
			 "can,be,used,with,any,programming,language",
			"and is a lot of fun to use" };
	private int index = 0;

	@Override
	public void declareOutputFields(OutputFieldsDeclarer declarer) {
		declarer.declareStream("streamId1", new Fields("sentence"));
		declarer.declareStream("streamId2", new Fields("sentence"));
	}

	@SuppressWarnings("rawtypes")
	public void open(Map config, TopologyContext context,SpoutOutputCollector collector) {
		this.collector = collector;
	}

	public void nextTuple() {
		 if(index >= sentences.length){
			return;	
	     }
		 if(index%2==0){
				collector.emit("streamId1",new Values(sentences[index]));
		 }else{
				collector.emit("streamId2",new Values(sentences[index]));
		 }
		 
		index++;
		Utils.sleep(1);
	}
}


   第二步:實現單詞切割bolt1

    

import org.apache.storm.topology.BasicOutputCollector;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseBasicBolt;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Tuple;
import org.apache.storm.tuple.Values;

/**
 * 切割句子
 * @author zhengcy
 *
 */
@SuppressWarnings("serial")
public class SplitSentence1Bolt extends BaseBasicBolt {

	@Override
	public void declareOutputFields(OutputFieldsDeclarer declarer) {
		declarer.declare(new Fields("word"));
	}

	@Override
	public void execute(Tuple input, BasicOutputCollector collector) {
		String sentence = input.getStringByField("sentence");
		String[] words = sentence.split(" ");
		for (String word : words) {
			collector.emit(new Values(word));
		}
	}
}


   第三步:實現單詞切割bolt2

    

import org.apache.storm.topology.BasicOutputCollector;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseBasicBolt;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Tuple;
import org.apache.storm.tuple.Values;

/**
 * 切割句子
 * @author zhengcy
 *
 */
@SuppressWarnings("serial")
public class SplitSentence2Bolt extends BaseBasicBolt {

	@Override
	public void declareOutputFields(OutputFieldsDeclarer declarer) {
		declarer.declare(new Fields("word"));
	}

	@Override
	public void execute(Tuple input, BasicOutputCollector collector) {
		String sentence = input.getStringByField("sentence");
		String[] words = sentence.split(",");
		for (String word : words) {
			collector.emit(new Values(word));
		}
	}
}

 

   第四步:對單詞進行統計bolt

     

import java.util.HashMap;
import java.util.Map;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.BasicOutputCollector;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseBasicBolt;
import org.apache.storm.tuple.Tuple;

/**
 * 統計單詞
 * @author zhengcy
 *
 */
@SuppressWarnings("serial")
public class WordCountBolt extends BaseBasicBolt {
	
	private  Map<String, Long> counts = null;
	

    @SuppressWarnings("rawtypes")
	@Override
    public void prepare(Map stormConf, TopologyContext context) {
		this.counts = new HashMap<String, Long>();
    }

	@Override
	public void cleanup() {
	   for (String key : counts.keySet()) {
	        System.out.println(key + " : " + this.counts.get(key));
	   }
	}

	@Override
	public void execute(Tuple input, BasicOutputCollector collector) {
		String word = input.getStringByField("word");
		Long count = this.counts.get(word);
		if (count == null) {
			count = 0L;
		}
		count++;
		this.counts.put(word, count);
	}

	@Override
	public void declareOutputFields(OutputFieldsDeclarer declarer) {
		
	}

}


    第五步:創建Topology拓撲

import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.StormSubmitter;
import org.apache.storm.topology.TopologyBuilder;
import org.apache.storm.tuple.Fields;

/**
 * 單詞統計拓撲
 * @author zhengcy
 *
 */
public class WordCountTopology {

	public static void main(String[] args) throws Exception {

		TopologyBuilder builder = new TopologyBuilder();
		builder.setSpout("spout", new SentenceSpout(), 1);
		builder.setBolt("split1", new SplitSentence1Bolt(), 2).shuffleGrouping("spout","streamId1");		
		builder.setBolt("split2", new SplitSentence2Bolt(), 2).shuffleGrouping("spout","streamId2");

		builder.setBolt("count", new WordCountBolt(), 2).fieldsGrouping("split1", new Fields("word"))
		.fieldsGrouping("split2", new Fields("word"));

		Config conf = new Config();
		conf.setDebug(false);
		
		if (args != null && args.length > 0) {
			// 集羣模式
			conf.setNumWorkers(2);
			StormSubmitter.submitTopology(args[0], conf, builder.createTopology());
		} else {
			// 本地模式
			LocalCluster cluster = new LocalCluster();
			cluster.submitTopology("word-count", conf, builder.createTopology());
		    Thread.sleep(10000);  
	        cluster.shutdown(); 
		}
	}
}



 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章