- kafka原理介紹
待補充…
1、寫入流程
1)producer先從zookeeper的 "/brokers/…/state"節點找到該partition的leader
2)producer將消息發送給該leader
3)leader將消息寫入本地log
4)followers從leader pull消息,寫入本地log後向leader發送ACK
5)leader收到所有ISR中的replication的ACK後,增加HW(high watermark,最後commit 的offset)並向producer發送ACK
2、保存策略
無論消息是否被消費,kafka 都會保留所有消息。有兩種策略可以刪除舊數據:
基於時間:log.retention.hours=168
基於大小:log.retention.bytes=1073741824
3、消費
消息由生產者發佈到Kafka集羣后,會被消費者消費。消息的消費模型有兩種:推送模型(push)和拉取模型(pull)。
(1)同一個消費組內 ,消息只能被消費一次。同一個patition同一時間只能被一個組內消費者消費,如果廣播設置多個消費者組。消息生產到多個patition中,起到負載均衡作用。
問題:
使用消息隊列不可能是單機的(必然是分佈式or集羣)
Kafka天然是分佈式的,往一個topic丟數據,實際上就是往多個broker的partition存儲數據
數據寫到消息隊列,可能會存在數據丟失問題,數據在消息隊列需要持久化(磁盤?數據庫?Redis?分佈式文件系統?)
Kafka會將partition以消息日誌的方式(落磁盤)存儲起來,通過 順序訪問IO和緩存(等到一定的量或時間)才真正把數據寫到磁盤上,來提高速度。
想要保證消息(數據)是有序的,怎麼做?
Kafka會將數據寫到partition,單個partition的寫入是有順序的。如果要保證全局有序,那隻能寫入一個partition中。如果要消費也有序,消費者也只能有一個。
爲什麼在消息隊列中重複消費了數據
凡是分佈式就無法避免網絡抖動/機器宕機等問題的發生,很有可能消費者A讀取了數據,還沒來得及消費,就掛掉了。Zookeeper發現消費者A掛了,讓消費者B去消費原本消費者A的分區,等消費者A重連的時候,發現已經重複消費同一條數據了。(各種各樣的情況,消費者超時等等都有可能…)
如果業務上不允許重複消費的問題,最好消費者那端做業務上的校驗(如果已經消費過了,就不消費了)
- kafka安裝
環境:docker、docker-compose
version: '2'
services:
zookeeper:
image: zookeeper
ports:
- "2181:2181"
kafka:
image: wurstmeister/kafka
ports:
- "9092:9092"
environment:
KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://192.168.29.131:9092
KAFKA_ADVERTISED_HOST_NAME: 192.168.29.131
KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181
volumes:
- /var/run/docker.sock:/var/run/docker.sock
kafka-manager:
image: sheepkiller/kafka-manager:latest
restart: always
container_name: kafa-manager
hostname: kafka-manager
ports:
- "9002:9000"
links: # 連接本compose文件創建的container
- kafka
- zookeeper
environment:
ZK_HOSTS: zookeeper:2181
KAFKA_BROKERS: kafka:9092
APPLICATION_SECRET: letmein
KM_ARGS: -Djava.net.preferIPv4Stack=true
- kafka開發
配置文件:application.yml
server:
port: 8080
servlet:
context-path: /
spring:
kafka:
bootstrap-servers: 192.168.29.131:9092
producer:
retries: 0
batch-size: 16384
buffer-memory: 33554432
key-serializer: org.apache.kafka.common.serialization.StringSerializer
value-serializer: org.apache.kafka.common.serialization.StringSerializer
consumer:
group-id: service-kafka
# 手動提交
enable-auto-commit: false
auto-offset-reset: latest
key-deserializer: org.apache.kafka.common.serialization.StringDeserializer
value-deserializer: org.apache.kafka.common.serialization.StringDeserializer
properties:
session.timeout.ms: 60000
listener:
log-container-config: false
concurrency: 5
# 手動提交
ack-mode: manual_immediate
pom.xml文件
<!--kafka-->
<dependency>
<groupId>org.springframework.kafka</groupId>
<artifactId>spring-kafka</artifactId>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<optional>true</optional>
</dependency>
<dependency>
<groupId>cn.hutool</groupId>
<artifactId>hutool-all</artifactId>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
</dependency>
java代碼
生產者
package com.wqq.service.config;
import com.wqq.service.constants.KafkaConsts;
import lombok.AllArgsConstructor;
import org.springframework.boot.autoconfigure.kafka.KafkaProperties;
import org.springframework.boot.context.properties.EnableConfigurationProperties;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.kafka.annotation.EnableKafka;
import org.springframework.kafka.config.ConcurrentKafkaListenerContainerFactory;
import org.springframework.kafka.core.*;
import org.springframework.kafka.listener.ContainerProperties;
@Configuration
@EnableConfigurationProperties({KafkaProperties.class})
@EnableKafka
@AllArgsConstructor
public class KafkaConfig {
private final KafkaProperties kafkaProperties;
@Bean
public KafkaTemplate<String, String> kafkaTemplate() {
return new KafkaTemplate<>(producerFactory());
}
@Bean
public ProducerFactory<String, String> producerFactory() {
return new DefaultKafkaProducerFactory<>(kafkaProperties.buildProducerProperties());
}
@Bean
public ConcurrentKafkaListenerContainerFactory<String, String> kafkaListenerContainerFactory() {
ConcurrentKafkaListenerContainerFactory<String, String> factory = new ConcurrentKafkaListenerContainerFactory<>();
factory.setConsumerFactory(consumerFactory());
factory.setConcurrency(KafkaConsts.DEFAULT_PARTITION_NUM);
factory.setBatchListener(true);
factory.getContainerProperties().setPollTimeout(3000);
return factory;
}
@Bean
public ConsumerFactory<String, String> consumerFactory() {
return new DefaultKafkaConsumerFactory<>(kafkaProperties.buildConsumerProperties());
}
@Bean("ackContainerFactory")
public ConcurrentKafkaListenerContainerFactory<String, String> ackContainerFactory() {
ConcurrentKafkaListenerContainerFactory<String, String> factory = new ConcurrentKafkaListenerContainerFactory<>();
factory.setConsumerFactory(consumerFactory());
factory.getContainerProperties().setAckMode(ContainerProperties.AckMode.MANUAL_IMMEDIATE);
factory.setConcurrency(KafkaConsts.DEFAULT_PARTITION_NUM);
return factory;
}
}
package com.wqq.service.constants;
public interface KafkaConsts {
/**
* 默認分區大小
*/
Integer DEFAULT_PARTITION_NUM = 3;
/**
* Topic 名稱
*/
String TOPIC_TEST = "test";
}
package com.wqq.service;
import com.wqq.service.constants.KafkaConsts;
import org.junit.jupiter.api.Test;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.kafka.core.KafkaTemplate;
@SpringBootTest
class ServiceKafkaApplicationTests {
@Autowired
private KafkaTemplate<String, String> kafkaTemplate;
/**
* 測試發送消息
*/
@Test
public void testSend() throws Exception{
kafkaTemplate.send(KafkaConsts.TOPIC_TEST, "hello,kafka...");
}
}
消費者
package com.wqq.service.handler;
import com.wqq.service.constants.KafkaConsts;
import lombok.extern.slf4j.Slf4j;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.springframework.kafka.annotation.KafkaListener;
import org.springframework.kafka.support.Acknowledgment;
import org.springframework.stereotype.Component;
@Component
@Slf4j
public class MessageHandler {
@KafkaListener(topics = KafkaConsts.TOPIC_TEST, containerFactory = "ackContainerFactory")
public void handleMessage(ConsumerRecord record, Acknowledgment acknowledgment) {
try {
String message = (String) record.value();
log.info("收到消息: {}", message);
} catch (Exception e) {
log.error(e.getMessage(), e);
} finally {
// 手動提交 offset
acknowledgment.acknowledge();
}
}
}