基於flume+kafka+storm日誌收集系統搭建
1. 環境
192.168.0.2 hadoop1
192.168.0.3 hadoop2
192.168.0.4 hadoop3
已經安裝了jdk1.7並配置了環境變量
2. 安裝版本
Flume:apache-flume-1.5.0-bin.tar.gz http://mirrors.cnnic.cn/apache/flume/
Strom:apache-strom-0.9.4.tar.gz
Kafka:kafka_2.11-0.8.2.0.tgz
Zookeeper: zookeeper-3.4.6.tar.gz
Kafka安裝目錄:/home/hadoop/kafka
strom安裝目錄:/home/hadoop/strom
flume安裝目錄:/home/hadoop/flume
zookeeper安裝目錄:/home/hadoop/zookeeper
單獨安裝這裏不再涉及,具體請參考本人之前的博客
storm集羣: http://blog.csdn.net/mapengbo521521/article/details/50051997
zk集羣搭建:http://blog.csdn.net/mapengbo521521/article/details/41777721
kafka集羣:http://blog.csdn.net/mapengbo521521/article/details/43732377
3. Flume與kafka集成
l Kafka安裝環境校驗
啓動kafka
cd /home/hadoop/kafka/bin
./kafka-server-start.sh ../config/server.properties &
在kafka上創建topic
./kafka-topics.sh --create --topic idoall_testTopic --replication-factor 4 --partitions 2 --zookeeper hadoop1:2181
在kafka上查看topic
./kafka-topics.sh --list --zookeeper hadoop1:2181
在kafka刪除topic
./kafka-run-class.sh kafka.admin.DeleteTopicCommand --topic idoall –zookeeper hadoop2:2181
啓動zookeeper查看topic是否存在
/home/hadoop/zookeeper/bin/zkCli.sh
ls /
/brokers
/brokers/topics
Kafka發送消息,在hadoop1上執行一下命令,輸入“hello kakfa”
./kafka-console-producer.sh --broker-list hadoop1:9092 --sync --topic test_topic
Kafka接收消息,在hadoop2上執行一下命令,接收hadoop1發出的消息
./bin/kafka-console-consumer.sh –zookeeper hadoop2:2181 --topic test_topic --from-beginning
如果hadoop2上成功接收到hadoop1的消息則說明kafka環境是正常的。
l Flume配置
Flume與kafka整合實際上是將flume作爲kafka的producer,發送消息給kafka
cd /home/hadoop/flume/conf
創建flume-kafka-conf.properties文件
vi flume-kafka-conf.properties
輸入以下內容
#agent配置
agent1.sources=source1
agent1.sinks=sink1
agent1.channels=channel1
#source配置, home/hadoop/flume/temp/log爲監控目錄
agent1.sources.source1.type=spooldir
agent1.sources.source1.spoolDir=home/hadoop/flume/temp/log
agent1.sources.source1.channels=channel1
agent1.sources.source1.fileHeader=false
#sink配置org.apache.kafka.skin.KafkaSink類需要自己編寫
agent1.sinks.sink1.type=org.apache.kafka.skin.KafkaSink
agent1.sinks.sink1.metadata.broker.list=hadoop1:9092,hadoop2:9092,hadoop3:9092
agent1.sinks.sink1.serializer.class=kafka.serializer.StringEncoder
agent1.sinks.sink1.request.required.ack1=1
agent1.sinks.sink1.max.message.size=1000000
agent1.sinks.sink1.channel=channel1
agent1.sinks.sink1.custom.topic.name=test_topic
#channel配置
agent1.channels.channel1.type=memory
agent1.channels.channel1.capacity=1000
在eclipse中創建javaproject,引入/home/hadoop/flume/lib和/home/hadoop/kafka/libs下所有的jar包
編寫flume的KafkaSink類,內容如下:
1. import org.slf4j.Logger;
2. import org.slf4j.LoggerFactory;
3.
4. import java.util.Map;
5. import java.util.Properties;
6. import kafka.javaapi.producer.Producer;
7. import kafka.producer.KeyedMessage;
8. import kafka.producer.ProducerConfig;
9. import org.apache.flume.Context;
10. import org.apache.flume.Channel;
11. import org.apache.flume.Event;
12. import org.apache.flume.Transaction;
13. import org.apache.flume.conf.Configurable;
14. import org.apache.flume.sink.AbstractSink;
15. import com.google.common.base.Preconditions;
16. import com.google.common.collect.ImmutableMap;
17.
18. public class KafkaSink extends AbstractSink implements Configurable {
19.
20. private Context context;
21. private Properties parameters;
22. private Producer<String, String> producer;
23.
24. private static final String PARTITION_KEY_NAME = "custom.partition.key";
25. private static final String CUSTOME_TOPIC_KEY_NAME = "custom.topic.name";
26. private static final String DEFAULT_ENCODING = "UTF-8";
27. private static final Logger LOGGER = LoggerFactory.getLogger(KafkaSink.class);
28.
29. public void configure(Context context) {
30. this.context = context;
31. ImmutableMap<String, String> props = context.getParameters();
32. this.parameters = new Properties();
33. for (Map.Entry<String,String> entry : props.entrySet()) {
34. this.parameters.put(entry.getKey(), entry.getValue());
35. }
36. }
37.
38. @Override
39. public synchronized void start() {
40. super.start();
41. ProducerConfig config = new ProducerConfig(this.parameters);
42. this.producer = new Producer<String, String>(config);
43. }
44.
45. public Status process() {
46. Status status = null;
47. Channel channel = getChannel();
48. Transaction transaction = channel.getTransaction();
49.
50. try {
51. transaction.begin();
52. Event event = channel.take();
53. if (event != null) {
54. String partitionKey = (String) parameters.get(PARTITION_KEY_NAME);
55. String topic = Preconditions.checkNotNull((String) this.parameters.get(CUSTOME_TOPIC_KEY_NAME),
56. "topic name is required");
57. String eventData = new String(event.getBody(), DEFAULT_ENCODING);
58. KeyedMessage<String, String> data = (partitionKey.isEmpty()) ? new KeyedMessage<String, String>(topic,
59. eventData) : new KeyedMessage<String, String>(topic, partitionKey, eventData);
60. LOGGER.info("Sending Message to Kafka : [" + topic + ":" + eventData + "]");
61. producer.send(data);
62. transaction.commit();
63. LOGGER.info("Send message success");
64. status = Status.READY;
65. } else {
66. transaction.rollback();
67. status = Status.BACKOFF;
68. }
69. } catch (Exception e) {
70. e.printStackTrace();
71. LOGGER.info("Send message failed!");
72. transaction.rollback();
73. status = Status.BACKOFF;
74. } finally {
75. transaction.close();
76. }
77. return status;
78. }
79.
80. @Override
81. public void stop() {
82. producer.close();
83. }
84. }
將該項目導出爲jar包:flume-kafka-plugin.jar。jar包名字隨意。
將flume-kafka-plugin.jar複製到/home/hadoop/flume/lib
並將kafka的相關jar包複製到/home/hadoop/flume/lib
cp /home/hadoop/kafka/libs/kafka_2.11-0.8.2.0.jar /home/hadoop/flume-1.5.0-bin/lib
cp /home/hadoop/kafka/libs/scala-library-2.11.5.jar /home/hadoop/flume-1.5.0-bin/lib
cp /home/hadoop/kafka/libs/metrics-core-2.2.0.jar /home/hadoop/flume-1.5.0-bin/lib
cp /home/hadoop/kafka/libs/kafka-client-0.8.2.0.jar /home/hadoop/flume-1.5.0-bin/lib
l flume與kafka整合驗證
在hadoop1上啓動flume
cd /home/hadoop/flume/bin
./flume-ng agent –c agent -n agent1 -f /home/hadoop/flume/conf/flume-kafka-conf.properties -Dflume.root.logger=INFO,console
在home/hadoop/flume/temp/log目錄下創建文件並輸入內容
vi test.txt
123
456
在hadoop2啓動kafka的consumer,查看是否可以接受到該數據
./bin/kafka-console-consumer.sh –zookeeper hadoop2:2181 --topic test_topic --from-beginning
此時如果看到console輸出hadoop1上的內容,表明flume和kafka整合成功
4. Kafka整合storm
在eclipse創建javaproject,並導入/home/hadoop/kafka/libs和/home/hadoop/storm/lib下的所有jar包
#編寫KafkaSpout.java文件
85. package kafka.with.storm;
86. import java.text.SimpleDateFormat;
87. import java.util.Date;
88. import java.util.HashMap;
89. import java.util.List;
90. import java.util.Map;
91. import java.util.Properties;
92. import kafka.consumer.ConsumerConfig;
93. import kafka.consumer.ConsumerIterator;
94. import kafka.consumer.KafkaStream;
95. import kafka.javaapi.consumer.ConsumerConnector;
96. import backtype.storm.spout.SpoutOutputCollector;
97. import backtype.storm.task.TopologyContext;
98. import backtype.storm.topology.IRichSpout;
99. import backtype.storm.topology.OutputFieldsDeclarer;
100. import backtype.storm.tuple.Fields;
101. import backtype.storm.tuple.Values;
102.
103. public class KafkaSpout implements IRichSpout {
104.
105. private SpoutOutputCollector collector;
106. private ConsumerConnector consumer;
107. private String topic;
108.
109. public KafkaSpout () {
110. }
111.
112. public KafkaSpout (String topic) {
113. this.topic = topic;
114. }
115.
116. public void nextTuple() {
117. }
118.
119. public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
120. this.collector = collector;
121. }
122.
123. public void ack(Object msgId) {
124. }
125.
126. public void activate() {
127.
128. consumer=kafka.consumer.Consumer.createJavaConsumerConnector(createConsumerConfi());
129. Map<String,Integer> topickMap = new HashMap<String, Integer>();
130. topickMap.put(topic, 1);
131.
132. System.out.println("*********Results********topic:"+topic);
133.
134. Map<String, List<KafkaStream<byte[],byte[]>>> streamMap=consumer.createMessageStreams(topickMap);
135. KafkaStream<byte[],byte[]>stream = streamMap.get(topic).get(0);
136. ConsumerIterator<byte[],byte[]> it =stream.iterator();
137. while(it.hasNext()){
138. String value =new String(it.next().message());
139. SimpleDateFormat formatter = new SimpleDateFormat ("yyyy年MM月dd日 HH:mm:ss SSS");
140. Date curDate = new Date(System.currentTimeMillis());//獲取當前時間
141. String str = formatter.format(curDate);
142.
143. System.out.println("storm接收到來自kafka的消息------->" + value);
144.
145. collector.emit(new Values(value,1,str), value);
146. }
147. }
148.
149. private static ConsumerConfig createConsumerConfig() {
150. Properties props = new Properties();
151. // 設置zookeeper的鏈接地址
152. props.put("zookeeper.connect","hadoop1:2181,hadoop2:2181,hadoop3:2181");
153. // 設置group id
154. props.put("group.id", "1");
155. props.put("auto.commit.interval.ms", "1000000");
156. props.put(“suto.commit.enable”,”true”);
157. props.put("zookeeper.session.timeout.ms","1000000");
158. return new ConsumerConfig(props);
159. }
160.
161. public void close() {
162. }
163.
164. public void deactivate() {
165. }
166.
167. public void fail(Object msgId) {
168. }
169.
170. public void declareOutputFields(OutputFieldsDeclarer declarer) {
171. declarer.declare(new Fields("word","id","time"));
172. }
173.
174. public Map<String, Object> getComponentConfiguration() {
175. System.out.println("getComponentConfiguration被調用");
176. topic="idoall_testTopic";
177. return null;
178. }
179. }
#編寫KafkaTopology.java文件
180. package kafka.with.storm;
181. import java.util.HashMap;
182. import java.util.Map;
183. import backtype.storm.Config;
184. import backtype.storm.LocalCluster;
185. import backtype.storm.topology.BasicOutputCollector;
186. import backtype.storm.topology.OutputFieldsDeclarer;
187. import backtype.storm.topology.TopologyBuilder;
188. import backtype.storm.topology.base.BaseBasicBolt;
189. import backtype.storm.tuple.Fields;
190. import backtype.storm.tuple.Tuple;
191. import backtype.storm.tuple.Values;
192. import backtype.storm.utils.Utils;
193.
194. public class KafkaTopology {
195.
196. public static void main(String[] args) {
197. TopologyBuilder builder = new TopologyBuilder();
198.
199. builder.setSpout("spout", new KafkaSpout(""), 1);
200. builder.setBolt("bolt1", new Bolt1(), 2).shuffleGrouping("spout");
201. builder.setBolt("bolt2", new Bolt2(), 2).fieldsGrouping("bolt1",new Fields("word"));
202.
203. Map conf = new HashMap();
204. conf.put(Config.TOPOLOGY_WORKERS, 1);
205. conf.put(Config.TOPOLOGY_DEBUG, true);
206.
207. LocalCluster cluster = new LocalCluster();
208. cluster.submitTopology("flume-kafka-storm-topology-integration", conf, builder.createTopology());
209.
210. Utils.sleep(1000*60*5); // local cluster test ...
211. cluster.shutdown();
212. }
213.
214. public static class Bolt1 extends BaseBasicBolt {
215.
216. public void execute(Tuple input, BasicOutputCollector collector) {
217. try {
218. String msg = input.getString(0);
219. int id = input.getInteger(1);
220. String time = input.getString(2);
221. msg = msg+"bolt1";
222. System.out.println("對消息加工第1次-------[arg0]:"+ msg +"---[arg1]:"+id+"---[arg2]:"+time+"------->"+msg);
223. if (msg != null) {
224. collector.emit(new Values(msg));
225. }
226. } catch (Exception e) {
227. e.printStackTrace();
228. }
229. }
230.
231.
232. public void declareOutputFields(OutputFieldsDeclarer declarer) {
233. declarer.declare(new Fields("word"));
234. }
235. }
236.
237. public static class Bolt2 extends BaseBasicBolt {
238. Map<String, Integer> counts = new HashMap<String, Integer>();
239.
240.
241. public void execute(Tuple tuple, BasicOutputCollector collector) {
242. String msg = tuple.getString(0);
243. msg = msg + "bolt2";
244. System.out.println("對消息加工第2次---------->"+msg);
245. collector.emit(new Values(msg,1));
246. }
247.
248.
249. public void declareOutputFields(OutputFieldsDeclarer declarer) {
250. declarer.declare(new Fields("word", "count"));
251. }
252. }
253. }
直接在eclipse中以strom本地模式運行KafkaTopology(也可以打包以strom集羣的方式運行)
在home/hadoop/flume/temp/log目錄下創建文件並輸入內容
vi test2.txt
123
456
在storm控制檯查看是夠能收到該文件內容則說明kafka與strom以已配置成功
5.
flume、kafka、storm的整合
從上面兩個例子我們可以看到,flume和kafka之前已經完成了通訊和部署,kafka和storm之間可以正常通訊,只差把storm的相關文件打包成jar部署到storm中即可完成三者的通訊。
複製kafka相關的jar包到storm的lib裏面。(因爲在上面我們已經說過,kafka和storm的整合,主要是重寫storm的spout,調用kafka的Consumer來接收消息並打印,所在需要用到這些jar包),對所有storm節點做相同的操作
cp /home/hadoop/kafka/libs/* /home/hadoop/storm/lib
rm –rf /home/hadoop/storm/lib/slf4j-log4j12-1.6.1.jar
#在hadoop1上啓動storm nimbus
cd /home/hadoop/storm
./bin/storm nimbus &
#在hadoop1,hadoop2上啓動storm supervisor
./bin/storm supervisor &
#在hadoop1上啓動storm ui
./bin/storm ui &
#將Eclipse中的文件打包成kafka-strom.jar複製到strom根目錄,然後用storm來運行
./bin/storm jar kafka-strom.jar kafka.with.storm.KafkaTopology
複製代碼
#在flume中發消息,在storm中看是否有接收到
在home/hadoop/flume/temp/log目錄下創建文件並輸入內容
vi test3.txt
123
456
如果有strom控制檯有輸出則說明配置成功。