kube-proxy ipvs踩坑(二)

環境

  • Kubernetes version:
    v1.9.11

  • OS:
    NAME=“CentOS Linux”
    VERSION=“7 (Core)”
    ID=“centos”
    ID_LIKE=“rhel fedora”
    VERSION_ID=“7”
    PRETTY_NAME=“CentOS Linux 7 (Core)”
    ANSI_COLOR=“0;31”
    CPE_NAME=“cpe:/o:centos:centos:7”
    HOME_URL=“https://www.centos.org/”
    BUG_REPORT_URL=“https://bugs.centos.org/”

CENTOS_MANTISBT_PROJECT=“CentOS-7”
CENTOS_MANTISBT_PROJECT_VERSION=“7”
REDHAT_SUPPORT_PRODUCT=“centos”
REDHAT_SUPPORT_PRODUCT_VERSION=“7”

  • 部署方式
    二進制

問題描述,在部署完k8s集羣之後,部署一些服務(服務類型爲clusterIP),都能正常使用,其中有一些服務是nodePort類型的,不能正常的訪問。
然後查看ipvs種的規則,沒有。增加kube-proxy啓動日誌級別爲4,然後查看對應的日誌,發現其他類型的service都有添加ipvs vitrul server的日誌,
就是NodePort類型的service就是沒有發現對應的添加ipvs vitrul server的日誌,通過查看對應處理NodePort service的代碼,找出問題所在的原因。

代碼具體位置:
https://github.com/kubernetes/kubernetes/blob/release-1.9/pkg/proxy/ipvs/proxier.go#L1312
https://github.com/kubernetes/kubernetes/blob/release-1.9/pkg/proxy/ipvs/proxier.go#L176

下面截取代碼片段:

/ This is where all of the ipvs calls happen.
// assumes proxier.mu is held
func (proxier *Proxier) syncProxyRules() {
        ......
		if svcInfo.nodePort != 0 {
			lp := utilproxy.LocalPort{
				Description: "nodePort for " + svcNameString,
				IP:          "",
				Port:        svcInfo.nodePort,
				Protocol:    protocol,
			}
			if proxier.portsMap[lp] != nil {
				glog.V(4).Infof("Port %s was open before and is still needed", lp.String())
				replacementPortsMap[lp] = proxier.portsMap[lp]
			} else {
				socket, err := proxier.portMapper.OpenLocalPort(&lp)
				if err != nil {
					glog.Errorf("can't open %s, skipping this nodePort: %v", lp.String(), err)
					continue
				}
				if lp.Protocol == "udp" {
					isIPv6 := utilproxy.IsIPv6(svcInfo.clusterIP)
					utilproxy.ClearUDPConntrackForPort(proxier.exec, lp.Port, isIPv6)
				}
				replacementPortsMap[lp] = socket
			} // We're holding the port, so it's OK to install ipvs rules.

			// Nodeports need SNAT, unless they're local.
			// ipset call
			if !svcInfo.onlyNodeLocalEndpoints {
				entry = &utilipset.Entry{
					// No need to provide ip info
					Port:     svcInfo.nodePort,
					Protocol: protocol,
					SetType:  utilipset.BitmapPort,
				}
				switch protocol {
				case "tcp":
					proxier.nodePortSetTCP.activeEntries.Insert(entry.String())
				case "udp":
					proxier.nodePortSetUDP.activeEntries.Insert(entry.String())
				default:
					// It should never hit
					glog.Errorf("Unsupported protocol type: %s", protocol)
				}
			}

			// Build ipvs kernel routes for each node ip address
			nodeIPs, err := proxier.ipGetter.NodeIPs()
			if err != nil {
				glog.Errorf("Failed to get node IP, err: %v", err)
			} else {
				for _, nodeIP := range nodeIPs {
					// ipvs call
					serv := &utilipvs.VirtualServer{
						Address:   nodeIP,
						Port:      uint16(svcInfo.nodePort),
						Protocol:  string(svcInfo.protocol),
						Scheduler: proxier.ipvsScheduler,
					}
					if svcInfo.sessionAffinityType == api.ServiceAffinityClientIP {
						serv.Flags |= utilipvs.FlagPersistent
						serv.Timeout = uint32(svcInfo.stickyMaxAgeSeconds)
					}
					// There is no need to bind Node IP to dummy interface, so set parameter `bindAddr` to `false`.
					if err := proxier.syncService(svcNameString, serv, false); err == nil {
						activeIPVSServices[serv.String()] = true
						if err := proxier.syncEndpoint(svcName, svcInfo.onlyNodeLocalEndpoints, serv); err != nil {
							glog.Errorf("Failed to sync endpoint for service: %v, err: %v", serv, err)
						}
					} else {
						glog.Errorf("Failed to sync service: %v, err: %v", serv, err)
					}
				}
			}
		}
	}
     ......
}

proxier.ipGetter.NodeIPs()的真實實現如下

func (r *realIPGetter) NodeIPs() (ips []net.IP, err error) {
 	interfaces, err := net.Interfaces()
 	if err != nil {
 		return nil, err
 	}
 	for i := range interfaces {
 		name := interfaces[i].Name
 		// We assume node ip bind to eth{x}
 		if !strings.HasPrefix(name, "eth") {
 			continue
 		}
 		intf, err := net.InterfaceByName(name)
 		if err != nil {
 			utilruntime.HandleError(fmt.Errorf("Failed to get interface by name: %s, error: %v", name, err))
 			continue
 		}
 		addrs, err := intf.Addrs()
 		if err != nil {
 			utilruntime.HandleError(fmt.Errorf("Failed to get addresses from interface: %s, error: %v", name, err))
 			continue
 		}
 		for _, a := range addrs {
 			if ipnet, ok := a.(*net.IPNet); ok {
 				ips = append(ips, ipnet.IP)
 			}
 		}
 	}
 	return
}

syncProxyRules方法處理NodePort類型的service的核心代碼主要是;

nodeIPs, err := proxier.ipGetter.NodeIPs()
for _, nodeIP := range nodeIPs {
    // ipvs call
    serv := &utilipvs.VirtualServer{
        Address:   nodeIP,
        Port:      uint16(svcInfo.nodePort),
        Protocol:  string(svcInfo.protocol),
        Scheduler: proxier.ipvsScheduler,
    }
    if svcInfo.sessionAffinityType == api.ServiceAffinityClientIP {
        serv.Flags |= utilipvs.FlagPersistent
        serv.Timeout = uint32(svcInfo.stickyMaxAgeSeconds)
    }
    // There is no need to bind Node IP to dummy interface, so set parameter `bindAddr` to `false`.
    if err := proxier.syncService(svcNameString, serv, false); err == nil {
        activeIPVSServices[serv.String()] = true
        if err := proxier.syncEndpoint(svcName, svcInfo.onlyNodeLocalEndpoints, serv); err != nil {
            glog.Errorf("Failed to sync endpoint for service: %v, err: %v", serv, err)
        }
    } else {
        glog.Errorf("Failed to sync service: %v, err: %v", serv, err)
    }
}
func (proxier *Proxier) syncService(svcName string, vs *utilipvs.VirtualServer, bindAddr bool) error {
	appliedVirtualServer, _ := proxier.ipvs.GetVirtualServer(vs)
	if appliedVirtualServer == nil || !appliedVirtualServer.Equal(vs) {
		if appliedVirtualServer == nil {
			// IPVS service is not found, create a new service
			glog.V(3).Infof("Adding new service %q %s:%d/%s", svcName, vs.Address, vs.Port, vs.Protocol)
			if err := proxier.ipvs.AddVirtualServer(vs); err != nil {
				glog.Errorf("Failed to add IPVS service %q: %v", svcName, err)
				return err
			}
		} else {
			// IPVS service was changed, update the existing one
			// During updates, service VIP will not go down
			glog.V(3).Infof("IPVS service %s was changed", svcName)
			if err := proxier.ipvs.UpdateVirtualServer(appliedVirtualServer); err != nil {
				glog.Errorf("Failed to update IPVS service, err:%v", err)
				return err
			}
		}
	}

	// bind service address to dummy interface even if service not changed,
	// in case that service IP was removed by other processes
	if bindAddr {
		_, err := proxier.netlinkHandle.EnsureAddressBind(vs.Address.String(), DefaultDummyDevice)
		if err != nil {
			glog.Errorf("Failed to bind service address to dummy device %q: %v", svcName, err)
			return err
		}
	}
	return nil
}

其中proxier.syncService是調用ipvs的api添加ipvs的vitrul server。我們文章開始提到的問題是壓根就沒有添加NodePort類型service的日誌,所以
我就通過代碼上下文斷言是nodeIps爲空,只有nodeIPs爲空,循環體纔不會執行,所以就不會添加NodePort的service的ipvs vitrul server。然後在通過查看
func (r *realIPGetter) NodeIPs() (ips []net.IP, err error),發現其中有行代碼可能是導致問題的原因:

if !strings.HasPrefix(name, "eth") {
    continue
}

這個方法去節點的ip只取以eth開頭的網卡的ip,通過與運維同事確認,該主機的網卡不是以eth開頭的,最後更改這個獲取ip的方法,然後編譯代碼,NodePort的服務正常訪問

最後查閱相關文檔確認該問題已經在v1.10.0版本中已經被修復,具體請查看修復該問題的PR

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章