WINDOWS本機環境下搭建KAFKA+STORM

安裝工具準備

JDK1.8
具體下載及安裝方法可參考博主前一篇博文
kafka0.9.0.1
同上,可參考上一篇
storm1.0.2
https://storm.apache.org/downloads.html
下載之後解壓即可
python2.7
官網下載即可

以上的版本搭配不是唯一方案(只是博主本人體驗可以通過的方案之一)
JDK一定要下載版本1.7及以上(這是storm對JDK的版本要求)
kafka安裝包內自帶zookeeper,所以此處不需要再安裝zookeeper
注意點:所有的安裝文件夾名稱最好不要帶空格,避免後續的在啓動命令過程中對於文件夾名稱的報錯處理

kafka搭建及測試

搭建

解壓到當前文件夾即可,具體過程參照上一篇博文,不需要過多的配置

測試

舉例:解壓文件夾爲kafka0.9.0.1(以下命令行位置均爲該文件夾下)

開啓zookeeper服務(對於kafka的支撐管理,必須先開啓zookeeper服務才能後續開啓kafka以及storm等服務)
bin\windows\zookeeper-server-start.bat config\zookeeper.properties

在這裏插入圖片描述

開啓kafka服務
bin\windows\kafka-server-start.bat config\server.properties

在這裏插入圖片描述

創建一個消費者
bin\windows\kafka-console-consumer.bat --zookeeper localhost:2181 --topic test --from-beginning

在這裏插入圖片描述

創建一個生產者者
bin\windows\kafka-console-producer.bat --broker-list localhost:9092 --topic test

在這裏插入圖片描述

到此,producer和consumer能實現發消息和收消息說明kafka搭建成功
圖中comsumer比producer多一個new是因爲我之前用這個topic發過一個測試消息,而我consumer時候的設置參數是–from-beginning所以連同之前所有的消息一起接收了

storm搭建及測試

搭建

storm的搭建我是參考的這篇博主的
https://github.com/BVLC/caffe/wiki/Model-Zoo
(其實主要也就參考了一下python的各種設置,STORM_HOME的設置,storm的安裝其實很簡單)

測試

舉例:解壓文件夾爲storm1.0.2(以下命令均在解壓文件夾\bin下完成)

啓動zookeeper

啓動nimbus,supervisor以及ui
storm nimbus
storm supervisor
storm ui

在這裏插入圖片描述

瀏覽器地址欄輸入localhost:8080能看到STORM UI界面即爲成功

kafka+storm連接及測試

(emmm本人JAVA新手入坑,有些東西理解表達解釋的不對可以聯繫我大家交流交流)
eclipse下新建項目,及四個.java文件,這四個文件分別表示kafka producer , kafka consumer , storm spout 以及 storm bolt 藉此展示完整的數據從producer到被storm接收後並且處理輸出
KafkaProducer.java
KafkaConsumer.java
LogProcessTopology.java ----其中包含kafkaspout組件負責從kafka接收數據,幷包含簡單的topology搭建代碼
LogProcessBolt.java ----bolt的邏輯處理代碼,本代碼只負責簡單演示如何從kafka中取到數據,因此並無處理只是從input中接收並展示數據

KafkaProducer.java

package example.Demo;

import java.util.Properties;
import java.util.Random;

import org.apache.kafka.clients.producer.Producer;
import org.apache.kafka.clients.producer.ProducerRecord;

public class KafkaProducer {
	
	public static void main(String[] args) throws Exception {
		Properties props = new Properties();  
		props.put("bootstrap.servers","localhost:9092");
		props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");  
		props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");  
		String topic = "gold";
		
		Producer<String, String> producer = new org.apache.kafka.clients.producer.KafkaProducer<String, String>(props);  
		int count = 0;
		//{"id":"865456863256326","vid":"1495267869123456","uid":"965406863256326","gold":150,"platform":"ios","timestamp":1495267869}
		//模擬送禮人id
		String[] idArr = {"865456863256320","865456863256321","865456863256322","865456863256323","865456863256324","865456863256325","865456863256326","865456863256327","865456863256328","865456863256329"};
		//模擬直播間視頻id
		String[] vidArr = {"1495267869123450","1495267869123451","1495267869123452","1495267869123453","1495267869123454"};
		//模擬直播用戶id
		String[] uidArr = {"964226522333220","964226522333221","964226522333222","964226522333223","964226522333224"};
		//模擬用戶手機平臺
		String[] platformArr = {"android","ios"};
		Random random = new Random();
		while(true){
			int rint1 = random.nextInt(10);
			int rint2 = random.nextInt(5);
			int rint3 = random.nextInt(2);
			String log = "{\"id\":\""+idArr[rint1]+"\",\"vid\":\""+vidArr[rint2]+"\",\"uid\":\""+uidArr[rint2]+"\",\"gold\":"+random.nextInt(10000)+",\"platform\":\""+platformArr[rint3]+"\",\"timestamp\":"+System.currentTimeMillis()+"}";
			producer.send(new ProducerRecord<String, String>(topic, log));  
			System.out.println(log);
			count++;
			Thread.sleep(1000*5);
			if(count%10 == 0){
				//break;
				Thread.sleep(1000*60);
			}
		}
	}

}

KafkaConsumer.java

package example.Demo;

import java.util.ArrayList;
import java.util.Properties;

import org.apache.kafka.clients.consumer.Consumer;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;

public class KafkaConsumer {
	
	public static void main(String[] args) {
		Properties props = new Properties();  
		props.put("bootstrap.servers", "localhost:9092");
		props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");  
		props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");  
		props.setProperty("group.id", "1");
		String topic = "gold";
		
		Consumer<String, String> consumer = new org.apache.kafka.clients.consumer.KafkaConsumer<String, String>(props);  
		ArrayList<String> topicList = new ArrayList<String>();
		topicList.add(topic);
		consumer.subscribe(topicList);  
		
		while(true){
		    ConsumerRecords<String, String> records = consumer.poll(1);  
		    for (ConsumerRecord<String, String> record : records) {  
		        System.out.println(record);  
		    }  
		}
	}

}

LogProcessTopology.java

package example.Demo;

import java.util.Map;

import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.StormSubmitter;
import org.apache.storm.generated.AlreadyAliveException;
import org.apache.storm.generated.AuthorizationException;
import org.apache.storm.generated.InvalidTopologyException;
import org.apache.storm.generated.StormTopology;
import org.apache.storm.kafka.BrokerHosts;
import org.apache.storm.kafka.KafkaSpout;
import org.apache.storm.kafka.SpoutConfig;
import org.apache.storm.kafka.StringScheme;
import org.apache.storm.kafka.ZkHosts;
import org.apache.storm.spout.SchemeAsMultiScheme;
import org.apache.storm.spout.SpoutOutputCollector;
import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.TopologyBuilder;
import org.apache.storm.topology.base.BaseRichBolt;
import org.apache.storm.topology.base.BaseRichSpout;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Tuple;
import org.apache.storm.tuple.Values;
import org.apache.storm.utils.Utils;

import com.storm0709.stormProject0709.bolt.LogProcessBolt1;
import com.storm0709.stormProject0709.bolt.LogProcessBolt2;
import com.storm0709.stormProject0709.bolt.ParseLogBolt;

public class LogProcessTopology {	
	public static void main(String[] args) {
		TopologyBuilder topologyBuilder = new TopologyBuilder();
		
		//鏈接kafka的spout
		BrokerHosts hosts = new ZkHosts("localhost:2181"); //ZK地址
		String topic = "gold";
		String zkRoot = "/kafkaspout0709";//會在zk中創建這個節點,再節點裏面保存消費信息
		//用於保存消費者的偏離值
		String id = "consumer123456";//groupid    spout的唯一標誌
		
		SpoutConfig spoutConf = new SpoutConfig(hosts, topic, zkRoot, id);
		spoutConf.scheme = new SchemeAsMultiScheme(new StringScheme());//表示spout發送數據的格式
		//第一次啓動的時候在zk中並沒有保存消費的位置信息,可以這樣設置LatestTime,表示次用kafka topic的最新數據開始消費
		// EarliestTime() 表示從這個topic的最老的數據開始消費
		spoutConf.startOffsetTime = kafka.api.OffsetRequest.LatestTime();
		//spoutConf.startOffsetTime = kafka.api.OffsetRequest.EarliestTime();
		
		
		String SPOUT_ID = KafkaSpout.class.getSimpleName();
		String BOLT_ID_1 = ParseLogBolt.class.getSimpleName();
		//調憂參數
		topologyBuilder.setSpout(SPOUT_ID, new KafkaSpout(spoutConf));
		topologyBuilder.setBolt(BOLT_ID_1, new ParseLogBolt()).shuffleGrouping(SPOUT_ID);
		
		StormTopology createTopology = topologyBuilder.createTopology(); 
		String topology_name = LogProcessTopology.class.getSimpleName();
		Config config = new Config();
		if(args.length==0){
			//創建本地集羣
			LocalCluster localCluster = new LocalCluster();
			localCluster.submitTopology(topology_name, config, createTopology);
		}else{
			try {
				StormSubmitter.submitTopology(topology_name, config, createTopology);
			} catch (AlreadyAliveException e) {
				e.printStackTrace();
			} catch (InvalidTopologyException e) {
				e.printStackTrace();
			} catch (AuthorizationException e) {
				e.printStackTrace();
			}
		}
	}

}

LogProcessBolt.java

package example.Demo;

import java.util.HashMap;
import java.util.Map;

import org.apache.storm.Config;
import org.apache.storm.Constants;
import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseRichBolt;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Tuple;
import org.apache.storm.tuple.Values;

import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;

public class ParseLogBolt extends BaseRichBolt {
	private OutputCollector collector;
	private Map<String, String> idCountryMap;

	@Override
	public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
		this.collector = collector;
		// 在這吧全量的用戶信息加載過來
		// 在初始化的時候從redis中把送禮人id和省份信息加載過來,後期在storm的定時任務中每半個小時同步一次,把新註冊用戶的信息拉取過來
	}

	@Override
	public void execute(Tuple input) {

		 System.out.println("6..be here~"); String log =
		 input.getStringByField("str"); System.out.println("7..be here~");
		 System.out.println(log); this.collector.ack(input);
		 
	}

	@Override
	public void declareOutputFields(OutputFieldsDeclarer declarer) {
		declarer.declare(new Fields("province", "uid", "gold"));
	}

	@Override
	public Map<String, Object> getComponentConfiguration() {
		HashMap<String, Object> hashMap = new HashMap<String, Object>();
		hashMap.put(Config.TOPOLOGY_TICK_TUPLE_FREQ_SECS, 30 * 60);
		return hashMap;
	}

}

在eclipse中啓動上述1-3java文件
可以看到kafka producer在源源不斷的發送文件
並且在storm中可以接收到文件(需要的邏輯處理功能可以在bolt中繼續修改)

至此,KAFKA+STORM的本地搭建及流程跑通已完成

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章