ignite partition rebalance調試過程

集羣初始化過程中在GridCachePartitionExchangeManager中啓動exchange-worker線程
new IgniteThread(cctx.igniteInstanceName(), "exchange-worker", exchWorker).start();
新加入節點激活集羣時(例如ignite.cluster().active(true))會調用GridClusterStateProcessor.changeGlobalState方法
GridDiscoveryManager.sendCustomEvent(msg),msg是封裝的ChangeGlobalStateMessage
在MessageWork將msg加入自己阻塞隊列queue中
MessageWork通過ServerImpl.sendMessageAcrossRing(msg)將信息發送到集羣構成的環中
DiscoveryWorker("disco-event-worker")監聽到發現事件將GridDhtPartitionsExchangeFuture加入自己的阻塞隊列futQ
ExchangeWorker從futQ中取出元素作爲CachePartitionExchangeWorkerTask task

ExchangeWorker{
    exchFut.init(newCrd);//exchFut即爲上面的task,如果當前節點在exchange後變爲coordinator時newCrd設置爲true
    if(firstDiscoEvt.type()== EVT_DISCOVERY_CUSTOM_EVT{
        GridDhtPartitionsExchangeFuture.onClusterStateChangeRequest(crdNode);//crdNode爲false表示當前節點不是協調器節點----------p13
        
        //在當前節點上啓動緩存 Cache actions
        List<ExchangeActions.CacheActionData> startReqList =  exchActions.cacheStartRequests();//獲得緩存啓動數據
        List<DynamicCacheDescriptor> startDescs = startReqList.DynamicCacheDescriptor()//僞代碼,獲得緩存啓動描述器
        GridCacheDatabaseSharedManager.readCheckpointAndRestoreMemory(startDescs);//獲得檢查點讀鎖,並恢復緩存
        registerCachesFuture = CacheAffinitySharedManager.onCacheChangeRequest(this, crd, exchActions);//this爲GridDhtPartitionsExchangeFuture對象,crd是否爲協調器,exchActions爲緩存行爲對象
        CachesRegistry.registerAllCachesAndGroups(grpDescs, cacheDescs);//註冊緩存組描述器和動態緩存描述器
        CacheAffinitySharedManager.processCacheStartRequests(fut,crd,exchActions);//處理緩存啓動請求 p20
        GridCacheProcessor.startCacheGroup(req.startCacheConfiguration(),cacheDesc,nearCfg,evts.topologyVersion(),req.disabledAfterStart());//啓動緩存組
        GridCacheContext cacheCtx = createCache(ccfg,grp,null,desc,exchTopVer,cacheObjCtx,affNode,true,disabledAfterStart);//創建緩存
        GridCacheProcessor.startCache(cache,desc.schema()!= null?desc.schema():new QuerySchema());//啓動創建的緩存
        
        //Affinity actions
        CacheAffinitySharedManager.initAffinity(cachesRegistry.group(grp.groupId()),grp.affinity(),fut);//初始化affinity
        List<List<ClusterNode>> assignment = GridAffinityAssignmentCache.calculate(topVer, evts, evts.discoveryCache());//獲得緩存分區到節點的分配
        List<List<ClusterNode>> RendezvousAffinityFunction.assignPartitions(AffinityFunctionContext affCtx);//默認用RendezvousAffinityFunction進行分配
        List<ClusterNode> RendezvousAffinityFunction.assignPartition(int part,List<ClusterNode> nodes,int backups,@Nullable Map<UUID, Collection<ClusterNode>> neighborhoodCache);//對每個分區進行分配
        GridAffinityAssignmentCache.initialize(topVer, assignment);//初始化affinity根據給定拓撲版本和分配
        
        //接p20 處理緩存結束請求
        CacheAffinitySharedManager.processCacheStopRequests(GridDhtPartitionsExchangeFuture fut,boolean crd,final ExchangeActions exchActions,boolean forceClose);
    }else if(firstDiscoEvt.type()== EVT_NODE_JOINED){
        registerCachesFuture = GridDhtPartitionsExchangeFuture.initCachesOnLocalJoin();
        CacheAffinitySharedManager.initCachesOnLocalJoin(locJoinCtx.cacheGroupDescriptors(),locJoinCtx.cacheDescriptors());//使用緩存組描述器s和緩存描述器s初始化緩存
        CachesRegistry.registerAllCachesAndGroups(grpDescs, cacheDescs);//註冊緩存組描述器和動態緩存描述器
        GridCacheProcessor.prepareCacheStart(desc.cacheConfiguration(),desc,t.get2(),exchTopVer,false);//開始啓動緩存
        GridCacheProcessor.startCacheGroup(desc.groupDescriptor(),desc.cacheType(),affNode,cacheObjCtx,exchTopVer);//啓動緩存組
        GridCacheContext cacheCtx = createCache(ccfg,grp,null,desc,exchTopVer,cacheObjCtx,affNode,true,disabledAfterStart);//創建緩存
        GridCacheProcessor.startCache(cache,desc.schema()!= null?desc.schema():new QuerySchema());//啓動創建的緩存
    }
    
    //接p13
    GridDhtPartitionsExchangeFuture.distributedExchange();
    CacheGroupContext.preloader().onTopologyChanged(this);
    IgniteCacheDatabaseSharedManager.reserveHistoryForExchange();//要在啓用持久性時正確地重新平衡,有必要在交換中保留歷史記錄
    GridDhtPartitionsExchangeFuture.waitPartitionRelease(distributed,true);//我們等待所有節點完成本地事務更新,原子更新和鎖釋放
    GridDhtPartitionsExchangeFuture.waitPartitionRelease(false, false);//等待完成剩餘的從主節點到備份節點的所有事務更新
    IgniteCacheDatabaseSharedManager.beforeExchange(this);//必須在所有拓撲回調之前運行數據庫回調。如果啓用了持久存儲,則首先還原磁盤上顯示的分區
    GridDhtPartitionTopologyImpl.beforeExchange( GridDhtPartitionsExchangeFuture exchFut,boolean affReady,boolean updateMoving);
    GridDhtPartitionTopologyImpl.initPartitions(affVer, affAssignment, exchFut, updateSeq);//用給定的拓撲版本和aff分配創建和初始化分區
    IgniteCacheDatabaseSharedManager.onStateRestored();//當所有分區都已完全還原並在節點啓動預先創建時調用
    
    //發送Single message
    GridDhtPartitionsExchangeFuture.sendPartitions(crd);//crd爲協調器節點,向協調器發送本地分區信息
    GridCacheIoManager.send(node, msg, SYSTEM_POOL);//msg爲創建的GridDhtPartitionsSingleMessage,包含了單個節點分區信息
    GridNioServer.AbstractNioClientWorker.offer((SessionChangeRequest)req);//底層使用NIO進行封裝發送到協調器節點

}

sys-#43{
//當前節點收到協調器節點發送的GridDhtPartitionsFullMessage
    GridIoManager.processRegularMessage0(msg,nodeId);//msg是收到的GridDhtPartitionsFullMessage,nodeId爲協調器節點UUID
    GridCacheIoManager.processMessage(UUID nodeId,GridCacheMessage msg,IgniteBiInClosure <UUID,GridCacheMessage> c);
    GridCachePartitionExchangeManager.processFullPartitionUpdate(node,msg);//處理全局的分區更新
    GridDhtPartitionsExchangeFuture.onReceiveFullMessage(final ClusterNode node, final GridDhtPartitionsFullMessage msg);//監聽到全局分區改變消息
    GridDhtPartitionsExchangeFuture.processFullMessage(true, node, msg);
    
    if(如果是本地節點加入導致分區變化執行該操作){
        CacheAffinitySharedManager.onLocalJoin(this, msg, resTopVer);
        CacheAffinitySharedManager.forAllCacheGroups(boolean crd, IgniteInClosureX<GridAffinityAssignmentCache> c);//對於所有的緩存組執行指定的閉包
        CacheGroupAffinityMessage affMsg = receivedAff.get(aff.groupId());//獲得緩存組的分區分配
        List<List<ClusterNode>> assignments = affMsg.createAssignments(nodesByOrder, evts.discoveryCache());//根據初始的idealAssigns和收到的discoCache計算理想的分區分配
        aff.calculate(evts.topologyVersion(), evts, evts.discoveryCache());//assignments爲null,需要重新計算理想的分區分配根據交換事件中的信息
        List<List<ClusterNode>> RendezvousAffinityFunction.assignPartitions(AffinityFunctionContext affCtx);//默認用RendezvousAffinityFunction進行分配
        List<ClusterNode> RendezvousAffinityFunction.assignPartition(int part,List<ClusterNode> nodes,int backups,@Nullable Map<UUID, Collection<ClusterNode>> neighborhoodCache);//對每個分區進行分配
        CacheAffinitySharedManager.initialize(evts.topologyVersion(), assignments);//初始化affinity根據給定拓撲版本和分配
        GridDhtPartitionTopologyImpl.initPartitions(affVer, affAssignment, exchFut, updateSeq);//用給定的拓撲版本和aff分配創建和初始化分區
    }else if(根據實際的分區分佈強制執行親和力重新分配){
        CacheAffinitySharedManager.applyAffinityFromFullMessage(this, msg);//從收到的全局消息中應用親和力差異
        forAllCacheGroups(false, new IgniteInClosureX<GridAffinityAssignmentCache>());
        Map<Integer, CacheGroupAffinityMessage> idealAffDiff = msg.idealAffinityDiff();//獲得與理想分區的差異
        List<List<ClusterNode>> idealAssignment = aff.calculate(evts.topologyVersion(), evts, evts.discoveryCache());//根據全局消息中的事件信息計算得到理想分配
        CacheGroupAffinityMessage affMsg = idealAffDiff != null ? idealAffDiff.get(aff.groupId()) : null;//得到只有分區差異信息的分區消息
        newAssignment.set(e.getKey(), CacheGroupAffinityMessage.toNodes(assign,nodesByOrder,evts.discoveryCache()));//根據差異信息調整分區分配
        CacheAffinitySharedManager.initialize(evts.topologyVersion(), assignments);//初始化affinity根據給定拓撲版本和分配
    }
    GridDhtPartitionsExchangeFuture.updatePartitionFullMap(resTopVer, msg);//更新所有緩存的分區映射
    partHistSuppliers.putAll(msg.partitionHistorySuppliers());//根據GridDhtPartitionsFullMessage給partHistSuppliers賦值
    CachePartitionFullCountersMap cntrMap = msg.partitionUpdateCounters(grpId,grp.topology().partitions());//獲得更新的分區數量
    grp.topology().update(resTopVer,entry.getValue(),cntrMap,msg.partsToReload(cctx.localNodeId(), grpId),msg.partitionSizes(grpId),null);//更新拓撲
    cntrMap.updateCounter(i, incomeCntrMap.updateCounter(i));//更新分區計數器
    AffinityAssignment aff = grp.affinity().readyAffinity(readyTopVer);//獲得目前的緩存全局分區信息
    updateRebalanceVersion(aff.topologyVersion(), aff.assignment());//更新rebalance版本
    GridDhtPartitionsExchangeFuture.onDone(resTopVer,err);//完成全局變化信息處理
    detectLostPartitions(res);//檢查丟失的分區,res爲目前的AffinityTopologyVersion
    processCacheStopRequestOnExchangeDone(exchActions);//如果沒有丟失分區,處理緩存結束請求
    ExchangeActions.cacheStopRequests();//停止緩存的請求
}

 

協調器節點視角 (當有節點加入時)
sys-#**{
    ...
    GridCacheIoManager.processMessage(nodeId, cacheMsg, c);//處理收到的GridDhtPartitionsSingleMessage消息
    GridCachePartitionExchangeManager.processSinglePartitionUpdate(node, msg);
    exchFut.onReceiveSingleMessage(node, msg);//處理單個節點的分區更新
    processSingleMessage(node.id(), msg);
    GridDhtPartitionsExchangeFuture.updatePartitionSingleMap(UUID nodeId,GridDhtPartitionsSingleMessage msg);//更新分區拓撲
    onAllReceived(null);
    GridCachePartitionExchangeManager.mergeExchangesOnCoordinator(this);//合併分區更改事件
    finishExchangeOnCoordinator(sndResNodes);//p115
    WaitRebalanceInfo waitRebalanceInfo = initAffinityOnNodeJoin(fut, crd);//初始化affinity
    initAffinityOnNodeJoin(evts,grpAdded,cache.affinity(),waitRebalanceInfo,latePrimary,affCache);
    List<List<ClusterNode>> idealAssignment = aff.calculate(evts.topologyVersion(), evts, evts.discoveryCache());//根據節點消息中的事件信息計算得到理想分配
    aff.initialize(evts.topologyVersion(), cachedAssignment(aff, newAssignment, affCache));//使用給定的拓撲版本和分配初始化親和力
    top.beforeExchange(this, true, true);//預初始化拓撲
    GridDhtPartitionTopologyImpl.initPartitions(affVer, affAssignment, exchFut, updateSeq);//用給定的拓撲版本和aff分配創建和初始化分區
    createPartitions(affVer, affAssignment, updateSeq);//根據affinity創建不存在的分區
    updateRebalanceVersion(affVer,affAssignment);//更新rebalance的版本rebalancedTopVer
    createMovingPartitions(grp.affinity().readyAffinity(evts.topologyVersion()));//創建移動的分區
    CachePartitionPartialCountersMap cntrs = msg.partitionUpdateCounters(grpId,top.partitions());//根據收到的single msg 更新分區計數器
    GridDhtPartitionsExchangeFuture.assignPartitionsStates();//分配分區的狀態
    GridDhtPartitionsFullMessage msg = createPartitionsMessage(true,minVer.compareToIgnoreTimestamp(PARTIAL_COUNTERS_MAP_SINCE) >= 0);//創建full msg
    m.addPartitionUpdateCounters(grp.groupId(), cntrsMap);//給full msg增加分區計數器 partCntrs2
    addFullPartitionsMap(m,dupData,compress,grp.groupId(),locMap,affCache.similarAffinityKey());//爲msg添加全局分區映射 parts和dupPartsData
    m.addPartitionSizes(grp.groupId(), grp.topology().globalPartSizes());//msg添加分區大小信息partsSizes
    msg.resultTopologyVersion(resTopVer);//更新msg的拓撲版本
    msg.prepareMarshal(cctx);//對msg進行序列化
    sendAllPartitions(msg, nodes, mergedJoinExchMsgs0, joinedNodeAff);//mergedJoinExchMsgs0爲多個節點加入產生single msg的merge,將full msg發送給非協調器節點
    cctx.io().send(node, fullMsgToSend, SYSTEM_POOL);
    onDone(exchCtx.events().topologyVersion(), null);
    
}
ExchangeWorker{
    exchFut.init(newCrd);
    if (firstDiscoEvt.type() == EVT_NODE_JOINED) {
        if(!firstDiscoEvt.eventNode().isLocal()){
            GridCacheProcessor.startReceivedCaches(firstDiscoEvt.eventNode().id(),topVer);//啓動交換期間從遠程節點接收的靜態配置的緩存
            CacheAffinitySharedManager.initStartedCaches(crdNode, this, receivedCaches);//初始化親和力
            updateTopologies(crdNode, cctx.coordinators().currentCoordinator());//更新所有拓撲上的拓撲版本和發現緩存
        }else{
            registerCachesFuture = GridDhtPartitionsExchangeFuture.initCachesOnLocalJoin();
            CacheAffinitySharedManager.initCachesOnLocalJoin(locJoinCtx.cacheGroupDescriptors(),locJoinCtx.cacheDescriptors());//使用緩存組描述器s和緩存描述器s初始化緩存
            CachesRegistry.registerAllCachesAndGroups(grpDescs, cacheDescs);//註冊緩存組描述器和動態緩存描述器
            GridCacheProcessor.prepareCacheStart(desc.cacheConfiguration(),desc,t.get2(),exchTopVer,false);//開始啓動緩存
        }
        
    }else if(firstDiscoEvt.type() == EVT_DISCOVERY_CUSTOM_EVT){
        cctx.affinity().onCentralizedAffinityChange(this, crdNode);//由服務器節點離開或自定義事件(具有集中的相似性分配)發起的交換調用
        forAllRegisteredCacheGroups(new IgniteInClosureX<CacheGroupDescriptor>() c);//對所有已經註冊的緩存執行b包中操作
        cache.aff.calculate(fut.initialVersion(), fut.events(), fut.events().discoveryCache());//計算affinity
        //discoCache.state().baselineTopology().equals(baselineTopology)爲false時重新分配分區,利用Wang/Jenkins hash對每個分區進行節點排序,前(1+backups)被記錄下來
        aff.assignPartitions(new GridAffinityFunctionContextImpl(discoCache.state().baselineTopology().createBaselineView(sorted, nodeFilter),
        prevAssignment, events.lastEvent(), topVer, backups));
        updateTopologies(crdNode, cctx.coordinators().currentCoordinator());////更新所有拓撲上的拓撲版本和發現緩存
    }
    GridDhtPartitionsExchangeFuture.distributedExchange();
    grp.preloader().onTopologyChanged(this);
    IgniteCacheDatabaseSharedManager.reserveHistoryForExchange();//要在啓用持久性時正確地重新平衡,有必要在交換中保留歷史記錄
    GridDhtPartitionsExchangeFuture.waitPartitionRelease(distributed,true);//我們等待所有節點完成本地事務更新,原子更新和鎖釋放
    GridDhtPartitionsExchangeFuture.waitPartitionRelease(false, false);//等待完成剩餘的從主節點到備份節點的所有事務更新
    IgniteCacheDatabaseSharedManager.beforeExchange(this);//必須在所有拓撲回調之前運行數據庫回調。如果啓用了持久存儲,則首先還原磁盤上顯示的分區
    GridDhtPartitionTopologyImpl.beforeExchange( GridDhtPartitionsExchangeFuture exchFut,boolean affReady,boolean updateMoving);
    GridDhtPartitionTopologyImpl.initPartitions(affVer, affAssignment, exchFut, updateSeq);//用給定的拓撲版本和aff分配創建和初始化分區
    GridDhtPartitionTopologyImpl.createPartitions(affVer, affAssignment, updateSeq);//根據affinity創建不存在的分區
    updateSeq = GridDhtPartitionTopologyImpl.updateLocal(p, locPart.state(), updateSeq, affVer);//更新本地node2part映射中的分區狀態,並重新計算diffFromAffinity
    updateRebalanceVersion(affVer,affAssignment);//更新rebalance的版本rebalancedTopVer
    if(!crd.isLocal()){//當前節點是協調器節點,不發送single message
    // Single message
    }
    GridDhtPartitionsExchangeFuture.initDone();//回頭通知future已經完成
    GridDhtPartitionsExchangeFuture.processSingleMessage(node.id(),msg);//處理單個分區消息
    GridDhtPartitionsExchangeFuture.onAllReceived(Collection <ClusterNode> sndResNodes);
    GridDhtPartitionsExchangeFuture.finishExchangeOnCoordinator(Collection <ClusterNode> sndResNodes);//在協調器節點完成分區變更
    cctx.affinity().onServerJoinWithExchangeMergeProtocol(this, true);
    forAllRegisteredCacheGroups(new IgniteInClosureX<CacheGroupDescriptor>() c);//對每個註冊的分區執行閉包
    initAffinityOnNodeJoin(evts,grpAdded,cache.affinity(),waitRebalanceInfo,latePrimary,affCache);//初始化affinity
    aff.initialize(evts.topologyVersion(), cachedAssignment(aff, newAssignment, affCache));//使用給定的拓撲版本和分配初始化親和力
    GridDhtPartitionsFullMessage msg = createPartitionsMessage(true,minVer.compareToIgnoreTimestamp(PARTIAL_COUNTERS_MAP_SINCE)> = 0);//生成full message
    m.addPartitionUpdateCounters(grp.groupId(), cntrsMap);//對msg添加分區計數器partCntrs2
    m.addPartitionSizes(grp.groupId(), grp.topology().globalPartSizes());//對msg添加分區大小消息partsSizes
    msg.prepareMarshal(cctx);//對full message進行序列化
    sendAllPartitions(msg, nodes, mergedJoinExchMsgs0, joinedNodeAff);//發送full message 底層使用NIO封裝發送出去,需要藉助於GridNioServer.processWrite(key)
    ConcurrentLinkedQueue<SessionChangeRequest> changeReqs.off(SessionChangeRequest req);//將full msg封裝到SessionChangeRequest對象中
}
grid-nio-work-tcp-comm-0-*{
    processSelectedKeysOptimized(selectedKeys.flip());//處理選擇器選擇的key
    GridNioServer.processWrite(key);//在key上進行寫就緒處理
    finished = msg.writeTo(buf, writer);//根據key獲得session,進而獲得buffer和writer
    writer.writeMessage("msg", msg);//寫對應的嵌套消息
    while ((req0 = changeReqs.poll()) != null){}//從changeReqs隊列中取請求
}
tcp-disco-msg-worker-#*{
    processMessage(msg);//msg爲ChangeGlobalStateFinishMessage消息
}

發佈了23 篇原創文章 · 獲贊 14 · 訪問量 1萬+
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章