NSQ系列(二):nsqd原理和實現

在前一篇文章,我們大概瞭解了NSQ的集羣架構和交互流程,這裏我們將結合代碼詳細瞭解下nsqd的一些特性是如何實現,主要有,

  • nsqd與Producer的交互。nsqd如何處理Producer投遞的消息,如果消息是延遲發送怎麼處理
  • nsqd與Consumer的交互。nsqd如何處理Consumer Client的連接,如何把消息投遞給Consumer,如果消息ack超時怎麼處理
  • nsqd與nsqlookupd的交互。nsqd與nsqlookupd定期ping以及獲取Topic和Channel的變更信息

數據結構

先看下數據結構的設計,主要三部分:NSQD、Topic和Channel

NSQD

nsq的數據結構定義如下,

type NSQD struct {
	// 64bit atomic vars need to be first for proper alignment on 32bit platforms
	clientIDSequence int64

	sync.RWMutex

	opts atomic.Value

	dl        *dirlock.DirLock
	isLoading int32
	errValue  atomic.Value
	startTime time.Time

	topicMap map[string]*Topic

	clientLock sync.RWMutex
	clients    map[int64]Client

	lookupPeers atomic.Value

	tcpListener   net.Listener
	httpListener  net.Listener
	httpsListener net.Listener
	tlsConfig     *tls.Config

	poolSize int

	notifyChan           chan interface{}
	optsNotificationChan chan struct{}
	exitChan             chan int
	waitGroup            util.WaitGroupWrapper

	ci *clusterinfo.ClusterInfo
}

NSQD的一些主要的字段,

topicMap map[string]*Topic

存儲所有的Topic信息,key爲topic name,

clients    map[int64]Client

存儲當前所有與nsqd建立連接的Consumer,key爲ClientID(nsqd會爲每個Consumer Client在建立連接時分配一個ID)。

Topic

type Topic struct {
	// 64bit atomic vars need to be first for proper alignment on 32bit platforms
	messageCount uint64
	messageBytes uint64

	sync.RWMutex

	name              string
	// 記錄Topic對應的Channel
	channelMap        map[string]*Channel 
	// 消息後端存儲(目前主要是磁盤存儲)
	backend           BackendQueue
	// 消息內存存儲
	memoryMsgChan     chan *Message
	startChan         chan int
	exitChan          chan int
	// 監聽Topic下的Channel信息是否更新
	channelUpdateChan chan int
	waitGroup         util.WaitGroupWrapper
	exitFlag          int32
	idFactory         *guidFactory
	
	// 是否是臨時Topic
	ephemeral      bool
	deleteCallback func(*Topic)
	deleter        sync.Once

	paused    int32
	pauseChan chan int

	ctx *context
}

Channel

NSQ中,消息從Producer投遞到Topic,Topic再投遞到Channel,和Topic一樣,Channel對消息的存儲也分內存和後端,其中,後端存儲目前也主要是磁盤存儲。

type Channel struct {
	// 64bit atomic vars need to be first for proper alignment on 32bit platforms
	requeueCount uint64
	messageCount uint64
	timeoutCount uint64

	sync.RWMutex

	topicName string
	name      string
	ctx       *context
	
	// 後端存儲
	backend BackendQueue
	// 內存存儲
	memoryMsgChan chan *Message
	exitFlag      int32
	exitMutex     sync.RWMutex

	// state tracking
	clients        map[int64]Consumer
	paused         int32
	// 是否是臨時Channel
	ephemeral      bool
	deleteCallback func(*Channel)
	deleter        sync.Once

	// Stats tracking
	e2eProcessingLatencyStream *quantile.Quantile

	// TODO: these can be DRYd up
	// NSQ支持消息延遲發送,Channel中會存儲消息的延遲發送信息
	// map存儲所有需要延遲發送的消息
	deferredMessages map[MessageID]*pqueue.Item
	// 延遲發送隊列, 基於延遲時間進行堆排序
	deferredPQ       pqueue.PriorityQueue
	deferredMutex    sync.Mutex
	// map存儲處於發送中的消息
	inFlightMessages map[MessageID]*Message
	// 同樣用隊列來記錄消息的超時時間
	inFlightPQ       inFlightPqueue
	inFlightMutex    sync.Mutex
}

功能實現

nsqd啓動

先看下nsqd的啓動過程,

func (p *program) Start() error {
	// 加載 and 驗證 啓動配置
	opts := nsqd.NewOptions()
	...
	
	// 加載元數據
	err = p.nsqd.LoadMetadata()
	// 持久化元數據
	err = p.nsqd.PersistMetadata()
	// 異步啓動 nsqd 實例
	go func() {
		err := p.nsqd.Main()
		if err != nil {
			p.Stop()
			os.Exit(1)
		}
	}()
}

上面代碼涉及到了MetaData的加載和持久化,這裏MetaData主要存儲Topic和Channel信息,這樣nsqd在崩潰重啓時,可以像崩潰前一樣繼續服務。

func (n *NSQD) Main() error {
	// 監聽錯誤退出
	exitCh := make(chan error)
	var once sync.Once
	exitFunc := func(err error) {
		once.Do(func() {
			if err != nil {
				n.logf(LOG_FATAL, "%s", err)
			}
			exitCh <- err
		})
	}

	tcpServer := &tcpServer{ctx: ctx}
	n.waitGroup.Wrap(func() {
		exitFunc(protocol.TCPServer(n.tcpListener, tcpServer, n.logf))
	})
	httpServer := newHTTPServer(ctx, false, n.getOpts().TLSRequired == TLSRequired)
	n.waitGroup.Wrap(func() {
		exitFunc(http_api.Serve(n.httpListener, httpServer, "HTTP", n.logf))
	})
	if n.tlsConfig != nil && n.getOpts().HTTPSAddress != "" {
		httpsServer := newHTTPServer(ctx, true, true)
		n.waitGroup.Wrap(func() {
			exitFunc(http_api.Serve(n.httpsListener, httpsServer, "HTTPS", n.logf))
		})
	}

	n.waitGroup.Wrap(n.queueScanLoop)
	n.waitGroup.Wrap(n.lookupLoop)
	if n.getOpts().StatsdAddress != "" {
		n.waitGroup.Wrap(n.statsdLoop)
	}
}

nsqd的啓動,除了啓動TCP Server和HTTP Server(nsq支持TCP和HTTP兩種方式發佈消息,消費信息只支持TCP),還啓動了三個Goroutine loop,queueScanLoop、lookupLoop 和 statsdLoop。

提供服務

在啓動模塊我看看到了nsqd啓動了TCP Server和HTTP Server,以及三個goroutine,這裏詳細介紹下這些服務和loop的作用,

TCP Server

nsq支持基於TCP連接來發送、消費消息,創建、刪除Topic和Channel等。TCP Server的Handle方法負責處理每個與nsqd建立的TCP連接。nsq制定了一套簡單的通訊協議(詳細可以參考https://nsq.io/clients/tcp_protocol_spec.html),代碼中的實現就是protocolV2的IOLoop方法,

IOLoop

func (p *protocolV2) IOLoop(conn net.Conn) error {
	// 註冊Client信息。每個與nsqd建立連接的Client都會被分配一個ID
	clientID := atomic.AddInt64(&p.ctx.nsqd.clientIDSequence, 1)
	client := newClientV2(clientID, conn, p.ctx)
	p.ctx.nsqd.AddClient(client.ID, client)

	// 啓動messagePump goroutine,主要進行消息投遞(後面詳細解釋)
	// messagePumpStartedChan確保messagePump goroutine啓動完成後,再執行後續邏輯
	messagePumpStartedChan := make(chan bool)
	go p.messagePump(client, messagePumpStartedChan)
	<-messagePumpStartedChan

	// 在for循環中不斷讀取並處理Client發來的命令
	for {
		// 設置Client的心跳信息
		...

		// 讀取信息
		line, err = client.Reader.ReadSlice('\n')
		...
		params := bytes.Split(line, separatorBytes)

		var response []byte
		// Exec方法實現了命令處理的邏輯(後面介紹)
		response, err = p.Exec(client, params)
		if err != nil {
			// 如果處理出錯,返回Client錯誤信息
			...
			// 如果發送Client錯誤信息失敗 或者 錯誤屬於 FatalClientErr,終止與Client的連接
			sendErr := p.Send(client, frameTypeError, []byte(err.Error()))
			if sendErr != nil {
				break
			}
			if _, ok := err.(*protocol.FatalClientErr); ok {
				break
			}
			continue
		}
		// 返回客戶端執行結果
		if response != nil {
			err = p.Send(client, frameTypeResponse, response)
			if err != nil {
				err = fmt.Errorf("failed to send response - %s", err)
				break
			}
		}
	}
	
	// 關閉與Client的連接;關閉ExitChan;刪除nsqd中註冊的Client信息
	conn.Close()
	close(client.ExitChan)
	if client.Channel != nil {
		client.Channel.RemoveClient(client.ID)
	}

	p.ctx.nsqd.RemoveClient(client.ID)
	return err
}

IOLoop就是循環接受Client的命令請求、執行並返回Client執行結果,整個流程有兩個比較重要的地方(messagePump和Exec,後面會單獨結束),詳細過程可以看代碼註釋。

messagePump

在IOLoop的for開始前啓動了一個messagePump goroutine進行消息投遞,這裏結合代碼看下詳細過程,大家可以順着代碼看,整個過程沒有很繞的邏輯,

func (p *protocolV2) messagePump(client *clientV2, startedChan chan bool) {
	// 一堆變量...
	var err error
	// Channel在內存中存儲消息的chan
	var memoryMsgChan chan *Message
	// Channel在磁盤中存儲消息的chan
	var backendMsgChan chan []byte
	// Client訂閱的Channel
	var subChannel *Channel
	// NOTE: `flusherChan` is used to bound message latency for
	// the pathological case of a channel on a low volume topic
	// with >1 clients having >1 RDY counts
	var flusherChan <-chan time.Time
	// 發送消息採樣率
	var sampleRate int32

	subEventChan := client.SubEventChan
	identifyEventChan := client.IdentifyEventChan
	outputBufferTicker := time.NewTicker(client.OutputBufferTimeout)
	heartbeatTicker := time.NewTicker(client.HeartbeatInterval)
	heartbeatChan := heartbeatTicker.C
	msgTimeout := client.MsgTimeout

	// v2 opportunistically buffers data to clients to reduce write system calls
	// we force flush in two cases:
	//    1. when the client is not ready to receive messages
	//    2. we're buffered and the channel has nothing left to send us
	//       (ie. we would block in this loop anyway)
	//
	flushed := true

	// signal to the goroutine that started the messagePump
	// that we've started up
	close(startedChan)

	for {
		// 如果Client還未指定訂閱的Channel
		// 或者Client尚未準備好接受消息(Channel處於Pause狀態;處於發送中的消息超過閾值)
		if subChannel == nil || !client.IsReadyForMessages() {
			// the client is not ready to receive messages...
			memoryMsgChan = nil
			backendMsgChan = nil
			flusherChan = nil
			// force flush
			client.writeLock.Lock()
			err = client.Flush()
			client.writeLock.Unlock()
			if err != nil {
				goto exit
			}
			flushed = true
		} else if flushed {
			// last iteration we flushed...
			// do not select on the flusher ticker channel
			memoryMsgChan = subChannel.memoryMsgChan
			backendMsgChan = subChannel.backend.ReadChan()
			flusherChan = nil
		} else {
			// we're buffered (if there isn't any more data we should flush)...
			// select on the flusher ticker channel, too
			memoryMsgChan = subChannel.memoryMsgChan
			backendMsgChan = subChannel.backend.ReadChan()
			flusherChan = outputBufferTicker.C
		}

		select {
		case <-flusherChan:
			// if this case wins, we're either starved
			// or we won the race between other channels...
			// in either case, force flush
			client.writeLock.Lock()
			err = client.Flush()
			client.writeLock.Unlock()
			if err != nil {
				goto exit
			}
			flushed = true
		case <-client.ReadyStateChan:
		case subChannel = <-subEventChan:
			// you can't SUB anymore
			subEventChan = nil
		case identifyData := <-identifyEventChan:
			// you can't IDENTIFY anymore
			identifyEventChan = nil

			outputBufferTicker.Stop()
			if identifyData.OutputBufferTimeout > 0 {
				outputBufferTicker = time.NewTicker(identifyData.OutputBufferTimeout)
			}

			heartbeatTicker.Stop()
			heartbeatChan = nil
			if identifyData.HeartbeatInterval > 0 {
				heartbeatTicker = time.NewTicker(identifyData.HeartbeatInterval)
				heartbeatChan = heartbeatTicker.C
			}

			if identifyData.SampleRate > 0 {
				sampleRate = identifyData.SampleRate
			}

			msgTimeout = identifyData.MsgTimeout
		case <-heartbeatChan:
			err = p.Send(client, frameTypeResponse, heartbeatBytes)
			if err != nil {
				goto exit
			}
		case b := <-backendMsgChan:
			// 從磁盤消息chan中讀取消息,投遞給Client
			// 如果指定了sample_rate,消息將會採樣發送給Client
			if sampleRate > 0 && rand.Int31n(100) > sampleRate {
				continue
			}

			msg, err := decodeMessage(b)
			if err != nil {
				continue
			}
			// 記錄嘗試次數
			msg.Attempts++
			// 發送消息
			subChannel.StartInFlightTimeout(msg, client.ID, msgTimeout)
			client.SendingMessage()
			err = p.SendMessage(client, msg)
			if err != nil {
				goto exit
			}
			flushed = false
		case msg := <-memoryMsgChan:
			// 發送內存chan的消息和發送磁盤chan中的消息邏輯相同,這裏不再重複
			if sampleRate > 0 && rand.Int31n(100) > sampleRate {
				continue
			}
			msg.Attempts++

			subChannel.StartInFlightTimeout(msg, client.ID, msgTimeout)
			client.SendingMessage()
			err = p.SendMessage(client, msg)
			if err != nil {
				goto exit
			}
			flushed = false
		case <-client.ExitChan:
			goto exit
		}
	}
}

注意:在從Channel中獲取消息,投遞給Consumer的這段代碼中,消息是從內存chan中取還是從後端存儲中取,不存在優先級,是隨機取消息。

Exec

Exec方法就是封裝了各個命令處理邏輯的實現,這裏抽取幾個常見的命令看下實現原理,

func (p *protocolV2) Exec(client *clientV2, params [][]byte) ([]byte, error) {
	if bytes.Equal(params[0], []byte("IDENTIFY")) {
		return p.IDENTIFY(client, params)
	}
	err := enforceTLSPolicy(client, p, params[0])
	if err != nil {
		return nil, err
	}
	switch {
	case bytes.Equal(params[0], []byte("FIN")):
		return p.FIN(client, params)
	case bytes.Equal(params[0], []byte("RDY")):
		return p.RDY(client, params)
	case bytes.Equal(params[0], []byte("REQ")):
		return p.REQ(client, params)
	case bytes.Equal(params[0], []byte("PUB")):
		return p.PUB(client, params)
	case bytes.Equal(params[0], []byte("MPUB")):
		return p.MPUB(client, params)
	case bytes.Equal(params[0], []byte("DPUB")):
		return p.DPUB(client, params)
	case bytes.Equal(params[0], []byte("NOP")):
		return p.NOP(client, params)
	case bytes.Equal(params[0], []byte("TOUCH")):
		return p.TOUCH(client, params)
	case bytes.Equal(params[0], []byte("SUB")):
		return p.SUB(client, params)
	case bytes.Equal(params[0], []byte("CLS")):
		return p.CLS(client, params)
	case bytes.Equal(params[0], []byte("AUTH")):
		return p.AUTH(client, params)
	}
	return nil, protocol.NewFatalClientErr(nil, "E_INVALID", fmt.Sprintf("invalid command %s", params[0]))
}

PUB

PUB是Producer發送消息,與發送有關的還有MPUB(批量發送)、DPUB(延遲發送),流程和PUB差不多,這裏結合PUB的實現看下消息發送的整個流程。

func (p *protocolV2) PUB(client *clientV2, params [][]byte) ([]byte, error) {
	// 參數校驗、讀取數據、權限校驗
	...
	messageBody := make([]byte, bodyLen)
	...
	// 獲取消息所屬的Topic
	// 如果當前nsqd沒有這個Topic的信息,nsqd會創建一個新的Topic,同時從nsqlookupd中獲取這個Topic的Channel信息
	topic := p.ctx.nsqd.GetTopic(topicName)
	msg := NewMessage(topic.GenerateID(), messageBody)
	// 消息投遞到Topic
	err = topic.PutMessage(msg)
	if err != nil {
		return nil, protocol.NewFatalClientErr(err, "E_PUB_FAILED", "PUB failed "+err.Error())
	}

	client.PublishedMessage(topicName, 1)

	return okBytes, nil
}

Topic的消息投遞是把消息存儲內存或者後端,

func (t *Topic) put(m *Message) error {
	select {
	case t.memoryMsgChan <- m:
	default:
		b := bufferPoolGet()
		err := writeMessageToBackend(b, m, t.backend)
		bufferPoolPut(b)
		t.ctx.nsqd.SetHealth(err)
		if err != nil {
			return err
		}
	}
	return nil
}

這裏的投遞可以看出,消息還是優先投遞到內存chan中,如果內存chan滿了,纔會寫到後端存儲中。
大家可能疑惑消息進一步投遞的邏輯在哪兒,其實nsqd在新建一個Topic的時候,也會啓動一個messagePump goroutine,來把消息

// Topic的messagePump負責從內存chan和後端存儲中拿到消息,投遞到每一個Channel
func (t *Topic) messagePump() {
	var msg *Message
	var buf []byte
	var err error
	var chans []*Channel
	var memoryMsgChan chan *Message
	var backendChan chan []byte

	// do not pass messages before Start(), but avoid blocking Pause() or GetChannel()
	for {
		select {
		case <-t.channelUpdateChan:
			continue
		case <-t.pauseChan:
			continue
		case <-t.exitChan:
			goto exit
		case <-t.startChan:
		}
		break
	}
	t.RLock()
	// 獲取Topic下所有的Channel
	for _, c := range t.channelMap {
		chans = append(chans, c)
	}
	t.RUnlock()
	if len(chans) > 0 && !t.IsPaused() {
		memoryMsgChan = t.memoryMsgChan
		backendChan = t.backend.ReadChan()
	}

	// main message loop
	for {
		select {
		case msg = <-memoryMsgChan:
		case buf = <-backendChan:
			msg, err = decodeMessage(buf)
			if err != nil {
				continue
			}
		case <-t.channelUpdateChan:
			// 更新Channel信息
			chans = chans[:0]
			t.RLock()
			for _, c := range t.channelMap {
				chans = append(chans, c)
			}
			t.RUnlock()
			...
			continue
		case <-t.pauseChan:
			...
			continue
		case <-t.exitChan:
			goto exit
		}

		for i, channel := range chans {
			chanMsg := msg
			// 每個Channel都需要一個單獨的消息變量
			// 但是Topic已經創建了一個,所以第一個Channel直接引用Topic創建的消息
			if i > 0 {
				chanMsg = NewMessage(msg.ID, msg.Body)
				chanMsg.Timestamp = msg.Timestamp
				chanMsg.deferred = msg.deferred
			}
			// 如果消息需要延遲發送,放到Channel的延遲隊列中
			if chanMsg.deferred != 0 {
				channel.PutMessageDeferred(chanMsg, chanMsg.deferred)
				continue
			}
			err := channel.PutMessage(chanMsg)
		}
	}
}

消息投遞到Channel後,SUB整個流程就結束了,等待着Consumer來消費。

SUB

SUB是消費消息,是Consumer與nsqd建立連接後發送的消費命令,SUB的邏輯其實就是把Consumer Client註冊到對應的Channel上,而實際把消息從Channel投遞到Consumer Client是在前面TCP Server的messagePump中完成的。

func (p *protocolV2) SUB(client *clientV2, params [][]byte) ([]byte, error) {
	// 參數校驗;獲取要消費的Topic、Channel;權限校驗
	...
	// This retry-loop is a work-around for a race condition, where the
	// last client can leave the channel between GetChannel() and AddClient().
	// Avoid adding a client to an ephemeral channel / topic which has started exiting.
	var channel *Channel
	for {
		topic := p.ctx.nsqd.GetTopic(topicName)
		channel = topic.GetChannel(channelName)
		if err := channel.AddClient(client.ID, client); err != nil {
			return nil, protocol.NewFatalClientErr(nil, "E_TOO_MANY_CHANNEL_CONSUMERS",
				fmt.Sprintf("channel consumers for %s:%s exceeds limit of %d",
					topicName, channelName, p.ctx.nsqd.getOpts().MaxChannelConsumers))
		}

		if (channel.ephemeral && channel.Exiting()) || (topic.ephemeral && topic.Exiting()) {
			channel.RemoveClient(client.ID)
			time.Sleep(1 * time.Millisecond)
			continue
		}
		break
	}
	atomic.StoreInt32(&client.State, stateSubscribed)
	client.Channel = channel
	// update message pump
	client.SubEventChan <- channel

	return okBytes, nil
}

queueScanLoop

queueScanLoop負責處理兩個消息隊列:發送中隊列和延遲隊列。它維護一個woker pool來併發處理消息隊列,pool的size是可以動態調整的(默認是4)。
queueScanLoop會定期隨機抽取一定數量(可以通過QueueScanSelectionCount指定)的channel進行處理。

func (n *NSQD) queueScanLoop() {
	workCh := make(chan *Channel, n.getOpts().QueueScanSelectionCount)
	responseCh := make(chan bool, n.getOpts().QueueScanSelectionCount)
	closeCh := make(chan int)
	
	// 開啓queueScanLoop定期抽取的timer
	workTicker := time.NewTicker(n.getOpts().QueueScanInterval)
	refreshTicker := time.NewTicker(n.getOpts().QueueScanRefreshInterval)

	channels := n.channels()
	// 調整並初始化worker pool
	// worker數據的確定: 1 <= pool <= min(num * 0.25, QueueScanWorkerPoolMax)
	n.resizePool(len(channels), workCh, responseCh, closeCh)

	for {
		select {
		case <-workTicker.C:
			if len(channels) == 0 {
				continue
			}
		case <-refreshTicker.C:
			channels = n.channels()
			n.resizePool(len(channels), workCh, responseCh, closeCh)
			continue
		case <-n.exitChan:
			goto exit
		}
		
		// 確定每次要抽取多少個channel
		num := n.getOpts().QueueScanSelectionCount
		if num > len(channels) {
			num = len(channels)
		}

	loop:
		for _, i := range util.UniqRands(num, len(channels)) {
			workCh <- channels[i]
		}

		numDirty := 0
		for i := 0; i < num; i++ {
			if <-responseCh {
				numDirty++
			}
		}
		// dirty 隊列超過一定比例, 跳過等待, 繼續循環處理
		if float64(numDirty)/float64(num) > n.getOpts().QueueScanDirtyPercent {
			goto loop
		}
	}
}

關於每個worker處理Channel的邏輯,代碼如下,

func (n *NSQD) queueScanWorker(workCh chan *Channel, responseCh chan bool, closeCh chan int) {
	for {
		select {
		case c := <-workCh:
			now := time.Now().UnixNano()
			dirty := false
			if c.processInFlightQueue(now) {
				dirty = true
			}
			if c.processDeferredQueue(now) {
				dirty = true
			}
			responseCh <- dirty
		case <-closeCh:
			return
		}
	}
}

worker在收到要處理的Channel後,先處理待確認的消息隊列(processInFlightQueue),然後再處理延遲消息隊列(processDeferredQueue)。

  • processInFlightQueue。因爲Channel會把處於發送中的消息基於過期時間放在堆排序的隊列中,worker傳入當前時間,從隊列中取出過期時間小於當前時間(意味着已經超過了過期時間仍然沒有ACK)的消息,將這部分消息從隊列中移除,重新放入到channel中。
  • processDeferredQueue。nsqd也會把延遲發送的消息基於發送時間放在堆排序的隊列中,這樣worker就可以知道哪些消息需要被髮送,把它們再次存儲到channel中。延遲發送也是NSQ支持的特性之一,這裏可以看到NSQ把需要延遲發送的消息存儲在內存中,因此存在問題是 消息因宕機丟失;消息量過大佔用過多內存。
    關於dirty,如果一個channel存在過期的消息要再次投遞、或者延遲到期的消息需要投遞,就把這個channel標記爲dirty。如果被標記爲dirty的channel比例超過設定的閾值(QueueScanDirtyPercent),就會再開啓一輪消息處理,而不是sleep一段時間,這樣可以方式消息處理過慢。

lookupLoop

lookupLoop,負責與nsqlookupd的交互,主要有,

  • 定時ping所有的nsqlookupd
  • 更新nsqlookupd配置並與所有的nsqlookupd建立連接
  • 接受nsqlookupd關於Topic或者Channel變更的通知
func (n *NSQD) lookupLoop() {
	// for announcements, lookupd determines the host automatically
	ticker := time.Tick(15 * time.Second)
	for {
		// connect用來標記是否需要建立連接
		// 當nsqd初次啓動或者nsqd收到有新的nsqlookupd加入時,nsqd需要check是否需要與nsqlookupd建立連接
		if connect {
			for _, host := range n.getOpts().NSQLookupdTCPAddresses {
				if in(host, lookupAddrs) {
					continue
				}
				lookupPeer := newLookupPeer(host, n.getOpts().MaxBodySize, n.logf, connectCallback(n, hostname))
				lookupPeer.Command(nil) // start the connection
				lookupPeers = append(lookupPeers, lookupPeer)
				lookupAddrs = append(lookupAddrs, host)
			}
			n.lookupPeers.Store(lookupPeers)
			connect = false
		}

		select {
		case <-ticker:
			// 定期ping一下每個nsqlookupd
			for _, lookupPeer := range lookupPeers {
				cmd := nsq.Ping()
				_, err := lookupPeer.Command(cmd)
			}
		case val := <-n.notifyChan:
			// 接收nsqlookupd的消息通知,可能是Topic或者Channel存在變更
			// 把變更同步到nsqd
			var cmd *nsq.Command
			var branch string

			switch val.(type) {
			case *Channel:
				// notify all nsqlookupds that a new channel exists, or that it's removed
				branch = "channel"
				channel := val.(*Channel)
				if channel.Exiting() == true {
					cmd = nsq.UnRegister(channel.topicName, channel.name)
				} else {
					cmd = nsq.Register(channel.topicName, channel.name)
				}
			case *Topic:
				// notify all nsqlookupds that a new topic exists, or that it's removed
				branch = "topic"
				topic := val.(*Topic)
				if topic.Exiting() == true {
					cmd = nsq.UnRegister(topic.name, "")
				} else {
					cmd = nsq.Register(topic.name, "")
				}
			}

			for _, lookupPeer := range lookupPeers {
				_, err := lookupPeer.Command(cmd)
			}
		case <-n.optsNotificationChan:
			// nsqlookupd列表可能發生變化(可以通過接口動態設置)
			// 更新nsqlookupd實例列表到最新的配置
			var tmpPeers []*lookupPeer
			var tmpAddrs []string
			for _, lp := range lookupPeers {
				if in(lp.addr, n.getOpts().NSQLookupdTCPAddresses) {
					tmpPeers = append(tmpPeers, lp)
					tmpAddrs = append(tmpAddrs, lp.addr)
					continue
				}
				lp.Close()
			}
			lookupPeers = tmpPeers
			lookupAddrs = tmpAddrs
			connect = true
		case <-n.exitChan:
			goto exit
		}
	}
}

statsdLoop

用戶可以配置是否開啓statsdLoop,statsdLoop主要用於將nsqd當前的指標信息以UDP的形式同步給另外的Server,類似於監控數據打點,這樣可以方便用戶在admin觀察nsqd實例的運行狀態。主要記錄的指標有消息堆積量、Channel的Client連接數、Channel待確認消息數量、延遲發送的消息數量、GC耗時、內存使用率等信息。

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章