說明
etcd用k8s的statefulset實現, 一個statefulset的yaml就能實現etcd的集羣的動態伸縮, 當你減少statefulset的replicas的時候(或者節點出故障down掉), 自動的把節點從etcd集羣中移除,當時增加statefulset的replicas的時候(或者故障節點恢復),自動的把節點添加到etcd集羣中. 但是當你減少的集羣節點數目小於初始化集羣的時候設置的最小集羣節點個數的時候, 減少集羣節點(或者節點出故障down掉)不會將節點從集羣中移除, 新增節點(或者故障節點恢復),自動的把新增節點(或者故障節點恢復)更新到etcd集羣中. 聲明下該statefulset是基於k8s官方提供的原版yaml修改測試後的. 文件位置,最新源碼中(老版本源碼中不一定會有): https://github.com/kubernetes/kubernetes/test/e2e/testing-manifests/statefulset/etcd 中的yaml. 但是該yaml無法正常的創建etcd集羣, 存在很多問題. 可以自己去測試.
修改後的yaml如下. 如果自己的k8s中不提供存儲支持, 請自行將yaml的的pvc改成emptyDir的volume即可. 另外鏡像可以自己換成etcd官方鏡像.
yaml如下:
apiVersion: v1
kind: Service
metadata:
name: etcd
labels:
app: etcd
spec:
ports:
- port: 2380
name: etcd-server
- port: 2379
name: etcd-client
clusterIP: None
selector:
app: etcd
publishNotReadyAddresses: true
---
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: etcd
labels:
app: etcd
spec:
serviceName: etcd
replicas: 3
selector:
matchLabels:
app: etcd
template:
metadata:
name: etcd
labels:
app: etcd
spec:
containers:
- name: etcd
image: gcr.k8s.io/etcd:3.2.24
imagePullPolicy: Always
ports:
- containerPort: 2380
name: peer
- containerPort: 2379
name: client
resources:
requests:
cpu: 100m
memory: 512Mi
env:
- name: INITIAL_CLUSTER_SIZE
value: "3"
- name: SET_NAME
value: etcd
- name: NAMESPACE
valueFrom:
fieldRef:
fieldPath: meta.namepace
volumeMounts:
- name: datadir
mountPath: /var/run/etcd
lifecycle:
preStop:
exec:
command:
- "/bin/sh"
- "-ec"
- |
EPS=""
for i in $(seq 0 $((${INITIAL_CLUSTER_SIZE} - 1))); do
EPS="${EPS}${EPS:+,}http://${SET_NAME}-${i}.${SET_NAME}:2379"
done
HOSTNAME=$(hostname)
member_hash() {
etcdctl member list | grep http://${HOSTNAME}.${SET_NAME}:2380 | cut -d':' -f1 | cut -d'[' -f1
}
# Remove everything otherwise the cluster will no longer scale-up
SET_ID=${HOSTNAME:5:${#HOSTNAME}}
# adding a new member to existing cluster (assuming all initial pods are available)
if [ "${SET_ID}" -ge ${INITIAL_CLUSTER_SIZE} ]; then
echo "Removing ${HOSTNAME} from etcd cluster"
ETCDCTL_ENDPOINT=${EPS} etcdctl member remove $(member_hash)
if [ $? -eq 0 ]; then
rm -rf /var/run/etcd/*
fi
fi
command:
- "/bin/sh"
- "-ec"
- |
HOSTNAME=$(hostname)
# store member id into PVC for later member replacement
collect_member() {
while ! etcdctl member list &>/dev/null; do sleep 1; done
etcdctl member list | grep http://${HOSTNAME}.${SET_NAME}:2380 | cut -d':' -f1 | cut -d'[' -f1 > /var/run/etcd/member_id
exit 0
}
eps() {
EPS=""
for i in $(seq 0 $((${INITIAL_CLUSTER_SIZE} - 1))); do
EPS="${EPS}${EPS:+,}http://${SET_NAME}-${i}.${SET_NAME}:2379"
done
echo ${EPS}
}
member_hash() {
etcdctl member list | grep http://${HOSTNAME}.${SET_NAME}:2380 | cut -d':' -f1 | cut -d'[' -f1
}
# re-joining after failure?
if [ -e /var/run/etcd/default.etcd ]; then
echo "Re-joining etcd member"
member_id=$(cat /var/run/etcd/member_id)
# re-join member
POD_IP=$(hostname -i)
ETCDCTL_ENDPOINT=$(eps) etcdctl member update ${member_id} http://${HOSTNAME}.${SET_NAME}:2380
exec etcd --name ${HOSTNAME} \
--listen-peer-urls http://${POD_IP}:2380 \
--listen-client-urls http://${POD_IP}:2379,http://127.0.0.1:2379 \
--advertise-client-urls http://${POD_IP}:2379 \
--data-dir /var/run/etcd/default.etcd
fi
# etcd-SET_ID
SET_ID=${HOSTNAME:5:${#HOSTNAME}}
# adding a new member to existing cluster (assuming all initial pods are available)
if [ "${SET_ID}" -ge ${INITIAL_CLUSTER_SIZE} ]; then
export ETCDCTL_ENDPOINT=$(eps)
# member already added?
MEMBER_HASH=$(member_hash)
if [ -n "${MEMBER_HASH}" ]; then
# the member hash exists but for some reason etcd failed
# as the datadir has not be created, we can remove the member
# and retrieve new hash
etcdctl member remove ${MEMBER_HASH}
fi
echo "Adding new member"
etcdctl member add ${HOSTNAME} http://${HOSTNAME}.${SET_NAME}:2380 | grep "^ETCD_" > /var/run/etcd/new_member_envs
if [ $? -ne 0 ]; then
echo "Exiting"
rm -f /var/run/etcd/new_member_envs
exit 1
fi
cat /var/run/etcd/new_member_envs
source /var/run/etcd/new_member_envs
collect_member &
POD_IP=$(hostname -i)
exec etcd --name ${HOSTNAME} \
--listen-peer-urls http://${POD_IP}:2380 \
--listen-client-urls http://${POD_IP}:2379,http://127.0.0.1:2379 \
--advertise-client-urls http://${POD_IP}:2379 \
--data-dir /var/run/etcd/default.etcd \
--initial-advertise-peer-urls http://${HOSTNAME}.${SET_NAME}:2380 \
--initial-cluster ${ETCD_INITIAL_CLUSTER} \
--initial-cluster-state ${ETCD_INITIAL_CLUSTER_STATE}
fi
for i in $(seq 0 $((${INITIAL_CLUSTER_SIZE} - 1))); do
while true; do
echo "Waiting for ${SET_NAME}-${i}.${SET_NAME} to come up"
ping -W 1 -c 1 ${SET_NAME}-${i}.${SET_NAME}.${NAMESPACE}.svc.cluster.local > /dev/null && break
sleep 1s
done
done
PEERS=""
for i in $(seq 0 $((${INITIAL_CLUSTER_SIZE} - 1))); do
PEERS="${PEERS}${PEERS:+,}${SET_NAME}-${i}=http://${SET_NAME}-${i}.${SET_NAME}:2380"
done
collect_member &
# join member
POD_IP=$(hostname -i)
exec etcd --name ${HOSTNAME} \
--initial-advertise-peer-urls http://${POD_IP}:2380 \
--listen-peer-urls http://${POD_IP}:2380 \
--listen-client-urls http://${POD_IP}:2379,http://127.0.0.1:2379 \
--advertise-client-urls http://${POD_IP}:2379 \
--initial-cluster-token etcd-cluster-1 \
--initial-cluster ${PEERS} \
--initial-cluster-state new \
--data-dir /var/run/etcd/default.etcd
volumeClaimTemplates:
- metadata:
name: datadir
spec:
accessModes:
- "ReadWriteOnce"
resources:
requests:
# upstream recommended max is 700M
storage: 1Gi
emptyDir的yaml如下:
apiVersion: v1
kind: Service
metadata:
name: etcd
labels:
app: etcd
spec:
ports:
- port: 2380
name: etcd-server
- port: 2379
name: etcd-client
clusterIP: None
selector:
app: etcd
publishNotReadyAddresses: true
---
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: etcd
labels:
app: etcd
spec:
serviceName: etcd
replicas: 3
selector:
matchLabels:
app: etcd
template:
metadata:
name: etcd
labels:
app: etcd
spec:
volumes:
- name: datadir
emptyDir: {}
containers:
- name: etcd
image: gcr.k8s.io/etcd:3.2.24
imagePullPolicy: Always
ports:
- containerPort: 2380
name: peer
- containerPort: 2379
name: client
resources:
requests:
cpu: 100m
memory: 512Mi
env:
- name: INITIAL_CLUSTER_SIZE
value: "3"
- name: SET_NAME
value: etcd
- name: NAMESPACE
valueFrom:
fieldRef:
fieldPath: meta.namepace
volumeMounts:
- name: datadir
mountPath: /var/run/etcd
lifecycle:
preStop:
exec:
command:
- "/bin/sh"
- "-ec"
- |
EPS=""
for i in $(seq 0 $((${INITIAL_CLUSTER_SIZE} - 1))); do
EPS="${EPS}${EPS:+,}http://${SET_NAME}-${i}.${SET_NAME}:2379"
done
HOSTNAME=$(hostname)
member_hash() {
etcdctl member list | grep http://${HOSTNAME}.${SET_NAME}:2380 | cut -d':' -f1 | cut -d'[' -f1
}
# Remove everything otherwise the cluster will no longer scale-up
SET_ID=${HOSTNAME:5:${#HOSTNAME}}
# adding a new member to existing cluster (assuming all initial pods are available)
if [ "${SET_ID}" -ge ${INITIAL_CLUSTER_SIZE} ]; then
echo "Removing ${HOSTNAME} from etcd cluster"
ETCDCTL_ENDPOINT=${EPS} etcdctl member remove $(member_hash)
if [ $? -eq 0 ]; then
rm -rf /var/run/etcd/*
fi
fi
command:
- "/bin/sh"
- "-ec"
- |
HOSTNAME=$(hostname)
# store member id into PVC for later member replacement
collect_member() {
while ! etcdctl member list &>/dev/null; do sleep 1; done
etcdctl member list | grep http://${HOSTNAME}.${SET_NAME}:2380 | cut -d':' -f1 | cut -d'[' -f1 > /var/run/etcd/member_id
exit 0
}
eps() {
EPS=""
for i in $(seq 0 $((${INITIAL_CLUSTER_SIZE} - 1))); do
EPS="${EPS}${EPS:+,}http://${SET_NAME}-${i}.${SET_NAME}:2379"
done
echo ${EPS}
}
member_hash() {
etcdctl member list | grep http://${HOSTNAME}.${SET_NAME}:2380 | cut -d':' -f1 | cut -d'[' -f1
}
# re-joining after failure?
if [ -e /var/run/etcd/default.etcd ]; then
echo "Re-joining etcd member"
member_id=$(cat /var/run/etcd/member_id)
# re-join member
POD_IP=$(hostname -i)
ETCDCTL_ENDPOINT=$(eps) etcdctl member update ${member_id} http://${HOSTNAME}.${SET_NAME}:2380
exec etcd --name ${HOSTNAME} \
--listen-peer-urls http://${POD_IP}:2380 \
--listen-client-urls http://${POD_IP}:2379,http://127.0.0.1:2379 \
--advertise-client-urls http://${POD_IP}:2379 \
--data-dir /var/run/etcd/default.etcd
fi
# etcd-SET_ID
SET_ID=${HOSTNAME:5:${#HOSTNAME}}
# adding a new member to existing cluster (assuming all initial pods are available)
if [ "${SET_ID}" -ge ${INITIAL_CLUSTER_SIZE} ]; then
export ETCDCTL_ENDPOINT=$(eps)
# member already added?
MEMBER_HASH=$(member_hash)
if [ -n "${MEMBER_HASH}" ]; then
# the member hash exists but for some reason etcd failed
# as the datadir has not be created, we can remove the member
# and retrieve new hash
etcdctl member remove ${MEMBER_HASH}
fi
echo "Adding new member"
etcdctl member add ${HOSTNAME} http://${HOSTNAME}.${SET_NAME}:2380 | grep "^ETCD_" > /var/run/etcd/new_member_envs
if [ $? -ne 0 ]; then
echo "Exiting"
rm -f /var/run/etcd/new_member_envs
exit 1
fi
cat /var/run/etcd/new_member_envs
source /var/run/etcd/new_member_envs
collect_member &
POD_IP=$(hostname -i)
exec etcd --name ${HOSTNAME} \
--listen-peer-urls http://${POD_IP}:2380 \
--listen-client-urls http://${POD_IP}:2379,http://127.0.0.1:2379 \
--advertise-client-urls http://${POD_IP}:2379 \
--data-dir /var/run/etcd/default.etcd \
--initial-advertise-peer-urls http://${HOSTNAME}.${SET_NAME}:2380 \
--initial-cluster ${ETCD_INITIAL_CLUSTER} \
--initial-cluster-state ${ETCD_INITIAL_CLUSTER_STATE}
fi
for i in $(seq 0 $((${INITIAL_CLUSTER_SIZE} - 1))); do
while true; do
echo "Waiting for ${SET_NAME}-${i}.${SET_NAME} to come up"
ping -W 1 -c 1 ${SET_NAME}-${i}.${SET_NAME}.${NAMESPACE}.svc.cluster.local > /dev/null && break
sleep 1s
done
done
PEERS=""
for i in $(seq 0 $((${INITIAL_CLUSTER_SIZE} - 1))); do
PEERS="${PEERS}${PEERS:+,}${SET_NAME}-${i}=http://${SET_NAME}-${i}.${SET_NAME}:2380"
done
collect_member &
# join member
POD_IP=$(hostname -i)
exec etcd --name ${HOSTNAME} \
--initial-advertise-peer-urls http://${POD_IP}:2380 \
--listen-peer-urls http://${POD_IP}:2380 \
--listen-client-urls http://${POD_IP}:2379,http://127.0.0.1:2379 \
--advertise-client-urls http://${POD_IP}:2379 \
--initial-cluster-token etcd-cluster-1 \
--initial-cluster ${PEERS} \
--initial-cluster-state new \
--data-dir /var/run/etcd/default.etcd
# volumeClaimTemplates:
# - metadata:
# name: datadir
# spec:
# accessModes:
# - "ReadWriteOnce"
# resources:
# requests:
# # upstream recommended max is 700M
# storage: 1Gi