WINDOWS本機環境下搭建KAFKA+STORM
安裝工具準備
JDK1.8
具體下載及安裝方法可參考博主前一篇博文
kafka0.9.0.1
同上,可參考上一篇
storm1.0.2
https://storm.apache.org/downloads.html
下載之後解壓即可
python2.7
官網下載即可
以上的版本搭配不是唯一方案(只是博主本人體驗可以通過的方案之一)
JDK一定要下載版本1.7及以上(這是storm對JDK的版本要求)
kafka安裝包內自帶zookeeper,所以此處不需要再安裝zookeeper
注意點:所有的安裝文件夾名稱最好不要帶空格,避免後續的在啓動命令過程中對於文件夾名稱的報錯處理
kafka搭建及測試
搭建
解壓到當前文件夾即可,具體過程參照上一篇博文,不需要過多的配置
測試
舉例:解壓文件夾爲kafka0.9.0.1(以下命令行位置均爲該文件夾下)
開啓zookeeper服務(對於kafka的支撐管理,必須先開啓zookeeper服務才能後續開啓kafka以及storm等服務)
bin\windows\zookeeper-server-start.bat config\zookeeper.properties
開啓kafka服務
bin\windows\kafka-server-start.bat config\server.properties
創建一個消費者
bin\windows\kafka-console-consumer.bat --zookeeper localhost:2181 --topic test --from-beginning
創建一個生產者者
bin\windows\kafka-console-producer.bat --broker-list localhost:9092 --topic test
到此,producer和consumer能實現發消息和收消息說明kafka搭建成功
圖中comsumer比producer多一個new是因爲我之前用這個topic發過一個測試消息,而我consumer時候的設置參數是–from-beginning所以連同之前所有的消息一起接收了
storm搭建及測試
搭建
storm的搭建我是參考的這篇博主的
https://github.com/BVLC/caffe/wiki/Model-Zoo
(其實主要也就參考了一下python的各種設置,STORM_HOME的設置,storm的安裝其實很簡單)
測試
舉例:解壓文件夾爲storm1.0.2(以下命令均在解壓文件夾\bin下完成)
啓動zookeeper
啓動nimbus,supervisor以及ui
storm nimbus
storm supervisor
storm ui
瀏覽器地址欄輸入localhost:8080能看到STORM UI界面即爲成功
kafka+storm連接及測試
(emmm本人JAVA新手入坑,有些東西理解表達解釋的不對可以聯繫我大家交流交流)
eclipse下新建項目,及四個.java文件,這四個文件分別表示kafka producer , kafka consumer , storm spout 以及 storm bolt 藉此展示完整的數據從producer到被storm接收後並且處理輸出
KafkaProducer.java
KafkaConsumer.java
LogProcessTopology.java ----其中包含kafkaspout組件負責從kafka接收數據,幷包含簡單的topology搭建代碼
LogProcessBolt.java ----bolt的邏輯處理代碼,本代碼只負責簡單演示如何從kafka中取到數據,因此並無處理只是從input中接收並展示數據
KafkaProducer.java
package example.Demo;
import java.util.Properties;
import java.util.Random;
import org.apache.kafka.clients.producer.Producer;
import org.apache.kafka.clients.producer.ProducerRecord;
public class KafkaProducer {
public static void main(String[] args) throws Exception {
Properties props = new Properties();
props.put("bootstrap.servers","localhost:9092");
props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
String topic = "gold";
Producer<String, String> producer = new org.apache.kafka.clients.producer.KafkaProducer<String, String>(props);
int count = 0;
//{"id":"865456863256326","vid":"1495267869123456","uid":"965406863256326","gold":150,"platform":"ios","timestamp":1495267869}
//模擬送禮人id
String[] idArr = {"865456863256320","865456863256321","865456863256322","865456863256323","865456863256324","865456863256325","865456863256326","865456863256327","865456863256328","865456863256329"};
//模擬直播間視頻id
String[] vidArr = {"1495267869123450","1495267869123451","1495267869123452","1495267869123453","1495267869123454"};
//模擬直播用戶id
String[] uidArr = {"964226522333220","964226522333221","964226522333222","964226522333223","964226522333224"};
//模擬用戶手機平臺
String[] platformArr = {"android","ios"};
Random random = new Random();
while(true){
int rint1 = random.nextInt(10);
int rint2 = random.nextInt(5);
int rint3 = random.nextInt(2);
String log = "{\"id\":\""+idArr[rint1]+"\",\"vid\":\""+vidArr[rint2]+"\",\"uid\":\""+uidArr[rint2]+"\",\"gold\":"+random.nextInt(10000)+",\"platform\":\""+platformArr[rint3]+"\",\"timestamp\":"+System.currentTimeMillis()+"}";
producer.send(new ProducerRecord<String, String>(topic, log));
System.out.println(log);
count++;
Thread.sleep(1000*5);
if(count%10 == 0){
//break;
Thread.sleep(1000*60);
}
}
}
}
KafkaConsumer.java
package example.Demo;
import java.util.ArrayList;
import java.util.Properties;
import org.apache.kafka.clients.consumer.Consumer;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
public class KafkaConsumer {
public static void main(String[] args) {
Properties props = new Properties();
props.put("bootstrap.servers", "localhost:9092");
props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
props.setProperty("group.id", "1");
String topic = "gold";
Consumer<String, String> consumer = new org.apache.kafka.clients.consumer.KafkaConsumer<String, String>(props);
ArrayList<String> topicList = new ArrayList<String>();
topicList.add(topic);
consumer.subscribe(topicList);
while(true){
ConsumerRecords<String, String> records = consumer.poll(1);
for (ConsumerRecord<String, String> record : records) {
System.out.println(record);
}
}
}
}
LogProcessTopology.java
package example.Demo;
import java.util.Map;
import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.StormSubmitter;
import org.apache.storm.generated.AlreadyAliveException;
import org.apache.storm.generated.AuthorizationException;
import org.apache.storm.generated.InvalidTopologyException;
import org.apache.storm.generated.StormTopology;
import org.apache.storm.kafka.BrokerHosts;
import org.apache.storm.kafka.KafkaSpout;
import org.apache.storm.kafka.SpoutConfig;
import org.apache.storm.kafka.StringScheme;
import org.apache.storm.kafka.ZkHosts;
import org.apache.storm.spout.SchemeAsMultiScheme;
import org.apache.storm.spout.SpoutOutputCollector;
import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.TopologyBuilder;
import org.apache.storm.topology.base.BaseRichBolt;
import org.apache.storm.topology.base.BaseRichSpout;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Tuple;
import org.apache.storm.tuple.Values;
import org.apache.storm.utils.Utils;
import com.storm0709.stormProject0709.bolt.LogProcessBolt1;
import com.storm0709.stormProject0709.bolt.LogProcessBolt2;
import com.storm0709.stormProject0709.bolt.ParseLogBolt;
public class LogProcessTopology {
public static void main(String[] args) {
TopologyBuilder topologyBuilder = new TopologyBuilder();
//鏈接kafka的spout
BrokerHosts hosts = new ZkHosts("localhost:2181"); //ZK地址
String topic = "gold";
String zkRoot = "/kafkaspout0709";//會在zk中創建這個節點,再節點裏面保存消費信息
//用於保存消費者的偏離值
String id = "consumer123456";//groupid spout的唯一標誌
SpoutConfig spoutConf = new SpoutConfig(hosts, topic, zkRoot, id);
spoutConf.scheme = new SchemeAsMultiScheme(new StringScheme());//表示spout發送數據的格式
//第一次啓動的時候在zk中並沒有保存消費的位置信息,可以這樣設置LatestTime,表示次用kafka topic的最新數據開始消費
// EarliestTime() 表示從這個topic的最老的數據開始消費
spoutConf.startOffsetTime = kafka.api.OffsetRequest.LatestTime();
//spoutConf.startOffsetTime = kafka.api.OffsetRequest.EarliestTime();
String SPOUT_ID = KafkaSpout.class.getSimpleName();
String BOLT_ID_1 = ParseLogBolt.class.getSimpleName();
//調憂參數
topologyBuilder.setSpout(SPOUT_ID, new KafkaSpout(spoutConf));
topologyBuilder.setBolt(BOLT_ID_1, new ParseLogBolt()).shuffleGrouping(SPOUT_ID);
StormTopology createTopology = topologyBuilder.createTopology();
String topology_name = LogProcessTopology.class.getSimpleName();
Config config = new Config();
if(args.length==0){
//創建本地集羣
LocalCluster localCluster = new LocalCluster();
localCluster.submitTopology(topology_name, config, createTopology);
}else{
try {
StormSubmitter.submitTopology(topology_name, config, createTopology);
} catch (AlreadyAliveException e) {
e.printStackTrace();
} catch (InvalidTopologyException e) {
e.printStackTrace();
} catch (AuthorizationException e) {
e.printStackTrace();
}
}
}
}
LogProcessBolt.java
package example.Demo;
import java.util.HashMap;
import java.util.Map;
import org.apache.storm.Config;
import org.apache.storm.Constants;
import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseRichBolt;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Tuple;
import org.apache.storm.tuple.Values;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
public class ParseLogBolt extends BaseRichBolt {
private OutputCollector collector;
private Map<String, String> idCountryMap;
@Override
public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
this.collector = collector;
// 在這吧全量的用戶信息加載過來
// 在初始化的時候從redis中把送禮人id和省份信息加載過來,後期在storm的定時任務中每半個小時同步一次,把新註冊用戶的信息拉取過來
}
@Override
public void execute(Tuple input) {
System.out.println("6..be here~"); String log =
input.getStringByField("str"); System.out.println("7..be here~");
System.out.println(log); this.collector.ack(input);
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("province", "uid", "gold"));
}
@Override
public Map<String, Object> getComponentConfiguration() {
HashMap<String, Object> hashMap = new HashMap<String, Object>();
hashMap.put(Config.TOPOLOGY_TICK_TUPLE_FREQ_SECS, 30 * 60);
return hashMap;
}
}
在eclipse中啓動上述1-3java文件
可以看到kafka producer在源源不斷的發送文件
並且在storm中可以接收到文件(需要的邏輯處理功能可以在bolt中繼續修改)
至此,KAFKA+STORM的本地搭建及流程跑通已完成