vivo 基於原生 RabbitMQ 的高可用架構實踐

{"type":"doc","content":[{"type":"heading","attrs":{"align":null,"level":1},"content":[{"type":"text","text":"一、背景說明"}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"vivo 在 2016 年引入 RabbitMQ,基於開源 RabbitMQ 進行擴展,向業務提供消息中間件服務。"}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"2016~2018年,所有業務均使用一個集羣,隨着業務規模的增長,集羣負載越來越重,集羣故障頻發。"}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"2019年,RabbitMQ 進入高可用建設階段,完成了高可用組件 MQ 名字服務以及 RabbitMQ 集羣的同城雙活建設。"}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"同時進行業務使用集羣的物理拆分,嚴格按照集羣負載情況和業務流量進行業務使用集羣的分配以及動態調整。"}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"在 2019 年高可用建設後至今,業務流量增加了十倍,集羣未出現過嚴重故障。"}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"RabbitMQ 是實現了 AMQP 協議的開源消息代理軟件,起源於金融系統。"}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"具有豐富的特性:"}]},{"type":"blockquote","content":[{"type":"numberedlist","attrs":{"start":1,"normalizeStart":1},"content":[{"type":"listitem","content":[{"type":"paragraph","attrs":{"indent":0,"number":1,"align":null,"origin":null},"content":[{"type":"text","text":"消息可靠性保證,RabbitMQ 通過發送確認保證消息發送可靠、通過集羣化、消息持久化、鏡像隊列的方式保證消息在集羣的可靠、通過消費確認保證消息消費的可靠性。"}]}]},{"type":"listitem","content":[{"type":"paragraph","attrs":{"indent":0,"number":2,"align":null,"origin":null},"content":[{"type":"text","text":"RabbitMQ 提供了多種語言的客戶端。"}]}]},{"type":"listitem","content":[{"type":"paragraph","attrs":{"indent":0,"number":3,"align":null,"origin":null},"content":[{"type":"text","text":"提供了多種類型的 exchange,消息發送到集羣后通過exchange路由到具體的queue中。"}]}]},{"type":"listitem","content":[{"type":"paragraph","attrs":{"indent":0,"number":4,"align":null,"origin":null},"content":[{"type":"text","text":"RabbitMQ 提供了完善的管理後臺和管理 API,通過管理API可以快速與自建監控系統整合。"}]}]}]}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"RabbitMQ 在具體實踐中發現的問題:"}]},{"type":"blockquote","content":[{"type":"numberedlist","attrs":{"start":1,"normalizeStart":1},"content":[{"type":"listitem","content":[{"type":"paragraph","attrs":{"indent":0,"number":1,"align":null,"origin":null},"content":[{"type":"text","text":"爲保障業務高可用使用多套集羣進行物理隔離,多套集羣無統一平臺進行管理。"}]}]},{"type":"listitem","content":[{"type":"paragraph","attrs":{"indent":0,"number":2,"align":null,"origin":null},"content":[{"type":"text","text":"原生RabbitMQ客戶端使用集羣地址連接,使用多套集羣時業務需要關心集羣地址,使用混亂。"}]}]},{"type":"listitem","content":[{"type":"paragraph","attrs":{"indent":0,"number":3,"align":null,"origin":null},"content":[{"type":"text","text":"原生RabbitMQ僅有簡單的用戶名/密碼驗證,不對使用的業務應用方進行鑑權,不同業務容易混用exchange/queue信息,造成業務應用使用異常。"}]}]},{"type":"listitem","content":[{"type":"paragraph","attrs":{"indent":0,"number":4,"align":null,"origin":null},"content":[{"type":"text","text":"使用的業務應用方較多,無平臺維護消息發送方、消費方的關聯信息,多個版本迭代後無法確定對接方。"}]}]},{"type":"listitem","content":[{"type":"paragraph","attrs":{"indent":0,"number":5,"align":null,"origin":null},"content":[{"type":"text","text":"客戶端無限流,業務突發異常流量衝擊甚至擊垮集羣。"}]}]},{"type":"listitem","content":[{"type":"paragraph","attrs":{"indent":0,"number":6,"align":null,"origin":null},"content":[{"type":"text","text":"客戶端無異常消息重發策略,需要使用方實現。"}]}]},{"type":"listitem","content":[{"type":"paragraph","attrs":{"indent":0,"number":7,"align":null,"origin":null},"content":[{"type":"text","text":"集羣出現內存溢出等造成集羣阻塞時無法快速自動轉移到其它可用集羣。"}]}]},{"type":"listitem","content":[{"type":"paragraph","attrs":{"indent":0,"number":8,"align":null,"origin":null},"content":[{"type":"text","text":"使用鏡像隊列,隊列的master節點會落在具體某個節點上,在集羣隊列數較多時,容易出現節點負載不均衡的情況。"}]}]},{"type":"listitem","content":[{"type":"paragraph","attrs":{"indent":0,"number":9,"align":null,"origin":null},"content":[{"type":"text","text":"RabbitMQ無隊列自動平衡能力,在隊列較多時容易出現集羣節點負載不均問題。"}]}]}]}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"heading","attrs":{"align":null,"level":2},"content":[{"type":"text","text":"二、整體架構"}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"image","attrs":{"src":"https://static001.geekbang.org/infoq/1a/1a2ec8b9d5217ab0496c0ac1f97e8208.png","alt":null,"title":"","style":[{"key":"width","value":"100%"},{"key":"bordertype","value":"boxShadow"}],"href":"","fromPaste":false,"pastePass":false}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"heading","attrs":{"align":null,"level":2},"content":[{"type":"text","text":"1、MQ-Portal--支持應用使用申請"}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"過往業務團隊適用RabbitMQ時,應用申請的流量以及對接的應用等信息都在線下表格記錄,較爲零散,更新不及時,無法準確瞭解業務當前真實的使用情況,因此通過一個接入申請的流程可視化、平臺化建立應用使用的元數據信息。"}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"image","attrs":{"src":"https://static001.geekbang.org/infoq/18/1877f9b6c2c9f6f7eca7219fc35d8d76.png","alt":null,"title":"","style":[{"key":"width","value":"100%"},{"key":"bordertype","value":"boxShadow"}],"href":"","fromPaste":false,"pastePass":false}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"通過MQ-Portal的申請流程(如上圖),確定了消息發送應用、消費應用、使用exchange/queue、發送流量等信息使用申請提交後將進入vivo內部工單流程進行審批。"}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"image","attrs":{"src":"https://static001.geekbang.org/infoq/ed/eda3dfe28a6fbb334153bda66e985c3e.png","alt":null,"title":"","style":[{"key":"width","value":"100%"},{"key":"bordertype","value":"boxShadow"}],"href":"","fromPaste":false,"pastePass":false}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"工單流程審批通過後,通過工單的接口回調,分配應用具體使用的集羣,並在集羣上創建exchange/queue已經綁定關係。"}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"由於採用多集羣物理隔離的方式保證業務在正式環境的高可用,無法簡單通過一個exchange/queue的名稱定位到使用的集羣。"}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"每一個exchange/queue與集羣之間通過唯一的一對rmq.topic.key與rmq.secret.key進行關聯,這樣SDK啓動過程中即可定位到具體使用的集羣。"}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"rmq.topic.key與rmq.secret.key將在工單的回調接口中進行分配。"}]},{"type":"image","attrs":{"src":"https://static001.geekbang.org/infoq/f9/f982a8b7cd4e8b6473b99dc5fcc1ea81.png","alt":null,"title":"","style":[{"key":"width","value":"100%"},{"key":"bordertype","value":"boxShadow"}],"href":"","fromPaste":false,"pastePass":false}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"heading","attrs":{"align":null,"level":2},"content":[{"type":"text","text":"2、客戶端SDK能力概述"}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"客戶端SDK基於spring-message和spring-rabbit進行封裝,並在此基礎上提供了應用使用鑑權、集羣尋址、客戶端限流、生產消費重置、阻塞轉移等能力。"}]},{"type":"heading","attrs":{"align":null,"level":3},"content":[{"type":"text","marks":[{"type":"underline"}],"text":"2.1、應用使用鑑權"}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"開源RabbitMQ僅通過用戶名密碼的方式判斷是否允許連接集羣,但是應用是否允許使用exchange/queue是未進行校驗的。"}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"爲了避免不同業務混用exchange/queue,需要對應用進行使用鑑權。"}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"應用鑑權由SDK和MQ-NameServer協同完成。"}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"應用啓動時首先會上報應用配置的rmq.topic.key信息到MQ-NameServer,由MQ-NameServer判斷使用應用與申請應用是否一致,並且在SDK發送消息過程中還會進行二次校驗。"}]},{"type":"codeblock","attrs":{"lang":"java"},"content":[{"type":"text","text":"\n/**\n * 發送前校驗,並且獲取真正的發送factory,這樣業務可以聲明多個,\n * 但是用其中一個bean就可以發送所有的消息,並且不會導致任何異常\n * @param exchange 校驗參數\n * @return 發送工廠\n*/\npublic AbstractMessageProducerFactory beforeSend(String exchange) {\n if(closed || stopped){\n //上下文已經關閉拋出異常,阻止繼續發送,減少發送臨界狀態數據\n throw new RmqRuntimeException(String.format(\"producer sending message to exchange %s has closed, can't send message\", this.getExchange()));\n }\n if (exchange.equals(this.exchange)){\n return this;\n }\n if (!VIVO_RMQ_AUTH.isAuth(exchange)){\n throw new VivoRmqUnAuthException(String.format(\"發送topic校驗異常,請勿向無權限exchange %s 發送數據,發送失敗\", exchange));\n }\n //獲取真正的發送的bean,避免發送錯誤\n return PRODUCERS.get(exchange);\n}"}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"heading","attrs":{"align":null,"level":3},"content":[{"type":"text","marks":[{"type":"underline"}],"text":"2.2、集羣尋址"}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"前文說過,應用使用RabbitMQ嚴格按照集羣的負載情況和業務流量進行集羣的分配,因此具體某個應用使用的的不同的exchange/queue可能是分配在不同的集羣上的。"}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"爲了提升業務的開發效率, 需要屏蔽多集羣對業務的影響,因此按照應用配置的rmq.topic.key信息進行集羣的自動尋址。"}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"heading","attrs":{"align":null,"level":3},"content":[{"type":"text","marks":[{"type":"underline"}],"text":"2.3、客戶端限流"}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"原生SDK客戶端不進行發送流量限流,在部分應用存在異常持續向MQ發送消息時,可能會沖垮MQ集羣。並且一個集羣爲多應用共同使用,單一應用造成集羣影響將會影響使用異常集羣的所有應用。"}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"因此需要在SDK中提供客戶端限流的能力,必要時可以限制應用向集羣發送消息,保障集羣的穩定。"}]},{"type":"heading","attrs":{"align":null,"level":3},"content":[{"type":"text","marks":[{"type":"underline"}],"text":"2.4、生產消費重置"}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"(1)隨着業務規模增長,集羣負載持續增加,此時需要進行集羣的業務拆分。爲了減少在拆分過程中避免業務重啓,需要有生產消費重置功能。"}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"(2)集羣出現異常,可能會造成消費者掉線,此時通過生產消費重置可以快速拉起業務消費。"}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"爲了實現生產消費重置,需要實現一下流程:"}]},{"type":"blockquote","content":[{"type":"bulletedlist","content":[{"type":"listitem","content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"重置連接工廠連接參數"}]}]},{"type":"listitem","content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"重置連接"}]}]},{"type":"listitem","content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"建立新的連接"}]}]},{"type":"listitem","content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"重新啓動生產消費"}]}]}]}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"codeblock","attrs":{"lang":"java"},"content":[{"type":"text","text":"CachingConnectionFactory connectionFactory = new CachingConnectionFactory();\nconnectionFactory.setAddresses(address);\nconnectionFactory.resetConnection();\nrabbitAdmin = new RabbitAdmin(connectionFactory);\nrabbitTemplate = new RabbitTemplate(connectionFactory);"}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"同時MQ-SDK中有異常消息重發策略,可以避免在生產重置過程中導致的消息發送異常。"}]},{"type":"heading","attrs":{"align":null,"level":3},"content":[{"type":"text","marks":[{"type":"underline"}],"text":"2.5、阻塞轉移"}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"RabbitMQ在內存使用超過40%,或是磁盤使用超限制時會阻塞消息發送。"}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"由於vivo中間件團隊已經完成了RabbitMQ同城雙活的建設,因此在出現一個集羣發送阻塞時可以通過生產消費重置到雙活集羣完成阻塞的快速轉移。"}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"heading","attrs":{"align":null,"level":3},"content":[{"type":"text","marks":[{"type":"underline"}],"text":"2.6、多集羣調度"}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"隨着應用的發展,單集羣將無法滿足應用的流量需求,並且集羣隊列均爲鏡像隊列,無法簡單的通過增加集羣節點的方式實現業務支撐流量單集羣的水平擴容。"}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"因此需要SDK支持多集羣調度能力,通過將流量分散到多個集羣上滿足業務大流量需求。"}]},{"type":"heading","attrs":{"align":null,"level":3},"content":[{"type":"text","text":"3、MQ-NameServer--支持MQ-SDK實現故障快速切換"}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"MQ-NameServer爲無狀態服務,通過集羣部署即可保障自身高可用,主要用於解決以下問題:"}]},{"type":"blockquote","content":[{"type":"bulletedlist","content":[{"type":"listitem","content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"MQ-SDK啓動鑑權以及應用使用集羣定位。"}]}]},{"type":"listitem","content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"處理MQ-SDK的定時指標上報(消息發送數量、消息消費數量),並且返回當前可用集羣地址,確保SDK在集羣異常時按照正確地址進行重連。"}]}]},{"type":"listitem","content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"控制MQ-SDK進行生產消費重置。"}]}]}]}]},{"type":"heading","attrs":{"align":null,"level":3},"content":[{"type":"text","text":"4、MQ-Server高可用部署實踐"}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"image","attrs":{"src":"https://static001.geekbang.org/infoq/20/2081138915bc3a4283103b0e568bd3e5.png","alt":null,"title":"","style":[{"key":"width","value":"100%"},{"key":"bordertype","value":"boxShadow"}],"href":"","fromPaste":false,"pastePass":false}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"RabbitMQ 集羣均採用同城雙活部署架構,依靠MQ-SDK和MQ-NameServer提供的集羣尋址、故障快速切換等能力保障集羣的可用性。"}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"heading","attrs":{"align":null,"level":3},"content":[{"type":"text","marks":[{"type":"underline"}],"text":"4.1、集羣腦裂問題處理"}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"RabbitMQ官方提供了三種集羣腦裂恢復策略。"}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"strong"}],"text":"(1)ignore"}]},{"type":"blockquote","content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"忽略腦裂問題不處理,在出現腦裂時需要進行人爲干預纔可恢復。由於需要人爲干預,可能會造成部分消息丟失,在網絡非常可靠的情況可以使用。"}]}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"strong"}],"text":"(2)pause_minority"}]},{"type":"blockquote","content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"節點在與超過半數集羣節點失聯時將會自動暫停,直到檢測到與集羣超半數節點的通信恢復。極端情況下集羣內所有節點均暫停,造成集羣不可用。"}]}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"strong"}],"text":"(3)autoheal"}]},{"type":"blockquote","content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"少數派節點將自動重啓,此策略主要用於優先保證服務的可用性,而不是數據的可靠性,因爲重啓節點上的消息會丟失。"}]}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"由於RabbitMQ集羣均爲同城雙活部署,即使單集羣異常業務流量也可自動遷移到雙活機房集羣,因此選擇使用了pause_minority策略避免腦裂問題。"}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"2018年多次因網絡抖動造成集羣腦裂,在修改集羣腦裂恢復策略後,已未再出現腦裂問題。"}]},{"type":"heading","attrs":{"align":null,"level":3},"content":[{"type":"text","marks":[{"type":"underline"}],"text":"4.2、集羣高可用方案"}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"RabbitMQ採用集羣化部署,並且因爲集羣腦裂恢復策略採用pause_minority模式,每個集羣要求至少3個節點。"}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"推薦使用5或7節點部署高可用集羣,並且控制集羣隊列數量."}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"集羣隊列均爲鏡像隊列,確保消息存在備份,避免節點異常導致消息丟失。"}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"exchange、queue、消息均設置爲持久化,避免節點異常重啓消息丟失。"}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"隊列均設置爲lazy queues,減少節點內存使用的波動。"}]},{"type":"heading","attrs":{"align":null,"level":3},"content":[{"type":"text","marks":[{"type":"underline"}],"text":"4.3、同城雙活建設"}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"雙機房部署等價集羣,並且通過Federation插件將雙集羣組成聯盟集羣。"}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"本機房應用機器優先連接本機房MQ集羣,避免因專線抖動造成應用使用異常。"}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"通過MQ-NameServer心跳獲取最新的可用集羣信息,異常時重連到雙活集羣中,實現應用功能的快速恢復。"}]},{"type":"heading","attrs":{"align":null,"level":1},"content":[{"type":"text","text":"三、未來挑戰與展望"}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"目前對RabbitMQ的使用增強主要在MQ-SDK和MQ-NameServer側,SDK實現較爲複雜,後期希望可以構建消息中間件的代理層,可以簡化SDK並且對業務流量做更加細緻化的管理。"}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"horizontalrule"},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"作者:derek"}]}]}
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章