生产环境自建Kubernetes集群多master部署-武穆逸仙 In August 2025

01 环境说明

操作系统： Rocky Linux release 9.3

containerd 版本：1.6.26

kubernetes 版本：v1.28.2

K8S master 节点 IP：192.168.2.175

K8S master 节点 IP：192.168.2.176

K8S master 节点 IP：192.168.2.177

K8S worker 节点 IP：192.168.2.185

K8S worker 节点 IP：192.168.2.187

K8S worker 节点 IP：192.168.3.62

K8S worker 节点 IP：192.168.3.70

VIP 192.168.3.251

网络插件：flannel

kube-proxy 网络转发： ipvs

kubernetes 源：阿里云镜像站

service-cidr：10.96.0.0/16

pod-network-cidr：10.244.0.0/16

02 部署准备

注意：此操作在所有节点进行

1、修改内核参数


vim /etc/sysctl.conf
vm.swappiness=0
net.ipv4.ip_forward = 1
net.bridge.bridge-nf-call-ip6tables = 1
net.bridge.bridge-nf-call-iptables = 1
net.bridge.bridge-nf-call-arptables=1
sysctl -p

2、关闭 swap


swapoff -a && sysctl -w vm.swappiness=0

修改 fstab 不在挂载 swap


vi /etc/fstab
/dev/mapper/centos-swap swap                    swap    defaults        0 0

3、将 SELinux 设置为 disabled 模式


setenforce 0

sed -i 's/^SELINUX=enforcing$/SELINUX=disabled/' /etc/selinux/config

4、cgroup2 开启(经常升级内核不建议执行不然会出现升级后不能启动，只能用旧内核启动的问题)


grubby \
  --update-kernel=ALL \
  --args="systemd.unified_cgroup_hierarchy=1"

5、内核模块加载


cat <

6、重启系统使上面配置生效


reboot

7、安装依赖


$ dnf install -y   dnf-utils  ipvsadm  telnet  wget  net-tools  conntrack  ipset  jq  iptables  curl  sysstat  libseccomp  socat  nfs-utils  fuse  fuse-devel

03 安装 containerd

1、导入 containerd 源


$ yum-config-manager --add-repo http://mirrors.aliyun.com/docker-ce/linux/centos/docker-ce.repo

2、安装 containerd


$ yum install containerd.io

04 containerd 配置

1、配置 containerd


#生成默认配置
containerd config default > /etc/containerd/config.toml

#修改配置
sandbox_image = "registry.k8s.io/pause:3.6" 改成国内地址
sandbox_image = "registry.aliyuncs.com/google_containers/pause:3.9"
Updated config for group driver changed..

[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc]
  ...
  [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options]
    SystemdCgroup = true

2、创建存储挂在数据


mkdir -p /var/lib/containerd/
mkdir -p /apps/containerd/ # 改成你大硬盘路径

/etc/fstab

echo "/apps/containerd /var/lib/containerd none defaults,bind,nofail 0 0" >>/etc/fstab

systemctl daemon-reload
# 挂在
mount -a
# 查看是否挂在
[root@k8s-master-1 containerd]# mount | grep containerd
/dev/vda3 on /var/lib/containerd type xfs (rw,relatime,attr2,inode64,logbufs=8,logbsize=32k,noquota)

3 开启开机启动并启动


systemctl enable containerd.service --now

4、查看进程是否启动


systemctl status containerd.service

5、查看数据盘是否有文件有证明挂在正确


ll /apps/containerd/

6、创建 crictl 配置


cat <

8、查看配置是否生效


[root@k8s-master-1 containerd]# crictl info|  grep sandboxImage
     "sandboxImage": "registry.aliyuncs.com/google_containers/pause:3.6",
[root@k8s-master-1 containerd]# crictl info|  grep SystemdCgroup
            "SystemdCgroup": true

05 安装 kubelet kubeadm kubectl

1、导入 repo 源


# 注意，这里就是用 el7 的源，google 没有为 rhel8、rhel9 再单独打包
cat <

06 配置 kubelet

1、创建 kubelet 存储挂在


mkdir /var/lib/kubelet
mkdir /apps/kubelet
/etc/fstab

echo "/apps/kubelet /var/lib/kubelet none defaults,bind,nofail 0 0" >>/etc/fstab

systemctl daemon-reload
# 挂在
mount -a

2、查看是否挂在


[root@k8s-master-1]# mount | grep kubelet
/dev/vda3 on /var/lib/kubelet type xfs (rw,relatime,attr2,inode64,logbufs=8,logbsize=32k,noquota)

6、刷新 service


systemctl daemon-reload

# 设置 kubelet 开机启动
systemctl enable kubelet.service
# 查看启动状态
systemctl status kubelet.service

7、创建 lb master1 节点


# 官方文档: https://github.com/kubernetes/kubeadm/blob/main/docs/ha-considerations.md#kube-vip
# 可使用镜像  juestnow/kube-vip:v0.6.4
# KVVERSION=$(curl -sL https://api.github.com/repos/kube-vip/kube-vip/releases | jq -r ".[0].name")
export KVVERSION='v0.6.4'
export VIP=192.168.3.251
export INTERFACE='eth0'
# 简化命令，将命令设置为别名
alias kube-vip="ctr run --rm --net-host docker.io/juestnow/kube-vip:$KVVERSION vip /kube-vip"

# 下载镜像
ctr images pull docker.io/juestnow/kube-vip:$KVVERSION

# 执行命令创建 yaml
kube-vip manifest pod \
    --interface $INTERFACE \
    --vip $VIP \
    --controlplane \
    --arp \
    --leaderElection | tee /etc/kubernetes/manifests/kube-vip.yaml

# 修改镜像策略
sed -i 's/Always/IfNotPresent/g' /etc/kubernetes/manifests/kube-vip.yaml
sed -i "s#ghcr.io/kube-vip/kube-vip:v0.6.4#docker.io/juestnow/kube-vip:$KVVERSION#g" /etc/kubernetes/manifests/kube-vip.yaml

# 下载镜像
crictl pull docker.io/juestnow/kube-vip:$KVVERSION
# 修改后内容
cat  /etc/kubernetes/manifests/kube-vip.yaml
apiVersion: v1
kind: Pod
metadata:
  creationTimestamp: null
  name: kube-vip
  namespace: kube-system
spec:
  containers:
  - args:
    - manager
    env:
    - name: vip_arp
      value: "true"
    - name: port
      value: "6443"
    - name: vip_interface
      value: eth0
    - name: vip_cidr
      value: "32"
    - name: cp_enable
      value: "true"
    - name: cp_namespace
      value: kube-system
    - name: vip_ddns
      value: "false"
    - name: vip_leaderelection
      value: "true"
    - name: vip_leasename
      value: plndr-cp-lock
    - name: vip_leaseduration
      value: "5"
    - name: vip_renewdeadline
      value: "3"
    - name: vip_retryperiod
      value: "1"
    - name: vip_address
      value: 192.168.3.251
    - name: prometheus_server
      value: :2112
    image: docker.io/juestnow/kube-vip:v0.6.4
    imagePullPolicy: IfNotPresent
    name: kube-vip
    resources: {}
    securityContext:
      capabilities:
        add:
        - NET_ADMIN
        - NET_RAW
    volumeMounts:
    - mountPath: /etc/kubernetes/admin.conf
      name: kubeconfig
  hostAliases:
  - hostnames:
    - kubernetes
    ip: 127.0.0.1
  hostNetwork: true
  volumes:
  - hostPath:
      path: /etc/kubernetes/admin.conf
    name: kubeconfig
status: {}

# 最后将该配置文件放到所有控制平面的/etc/kubernetes/manifests
scp -rp /etc/kubernetes/manifests/kube-vip.yaml root@192.168.2.176:/etc/kubernetes/manifests/
scp -rp /etc/kubernetes/manifests/kube-vip.yaml root@192.168.2.177:/etc/kubernetes/manifests/

07 初始化 kubernetes master1 执行


kubeadm init --apiserver-advertise-address=0.0.0.0 \
                     --apiserver-cert-extra-sans=127.0.0.1 \
                     --kubernetes-version 1.28.2 \
                     --image-repository=registry.aliyuncs.com/google_containers \
                     --ignore-preflight-errors=all  \
                     --service-cidr=10.96.0.0/16 \
                     --pod-network-cidr=10.244.0.0/16 \
                     --ignore-preflight-errors=all \
                     --upload-certs \
                     --control-plane-endpoint=192.168.3.251 \
                     --cri-socket=unix:///var/run/containerd/containerd.sock

#初始化过程
[root@k8s-master-1 tmp]# kubeadm init --apiserver-advertise-address=0.0.0.0 \
                     --apiserver-cert-extra-sans=127.0.0.1 \
                     --kubernetes-version 1.28.2 \
                     --image-repository=registry.aliyuncs.com/google_containers \
                     --ignore-preflight-errors=all  \
                     --service-cidr=10.96.0.0/16 \
                     --pod-network-cidr=10.244.0.0/16 \
                     --ignore-preflight-errors=all \
                     --upload-certs \
                     --control-plane-endpoint=192.168.3.251 \
                     --cri-socket=unix:///var/run/containerd/containerd.sock
[init] Using Kubernetes version: v1.28.2
[preflight] Running pre-flight checks
        [WARNING Hostname]: hostname "k8s-master-1" could not be reached
        [WARNING Hostname]: hostname "k8s-master-1": lookup k8s-master-1 on 192.168.2.84:53: no such host
        [WARNING FileContent--proc-sys-net-bridge-bridge-nf-call-iptables]: /proc/sys/net/bridge/bridge-nf-call-iptables does not exist
[preflight] Pulling images required for setting up a Kubernetes cluster
[preflight] This might take a minute or two, depending on the speed of your internet connection
[preflight] You can also perform this action in beforehand using 'kubeadm config images pull'
W0111 19:59:33.239720  811848 checks.go:835] detected that the sandbox image "registry.aliyuncs.com/google_containers/pause:3.6" of the container runtime is inconsistent with that used by kubeadm. It is recommended that using "registry.aliyuncs.com/google_containers/pause:3.9" as the CRI sandbox image.
[certs] Using certificateDir folder "/etc/kubernetes/pki"
[certs] Generating "ca" certificate and key
[certs] Generating "apiserver" certificate and key
[certs] apiserver serving cert is signed for DNS names [kubernetes kubernetes.default kubernetes.default.svc kubernetes.default.svc.cluster.local rocky] and IPs [10.96.0.1 192.168.2.175 192.168.3.251 127.0.0.1]
[certs] Generating "apiserver-kubelet-client" certificate and key
[certs] Generating "front-proxy-ca" certificate and key
[certs] Generating "front-proxy-client" certificate and key
[certs] Generating "etcd/ca" certificate and key
[certs] Generating "etcd/server" certificate and key
[certs] etcd/server serving cert is signed for DNS names [localhost k8s-master-1] and IPs [192.168.2.175 127.0.0.1 ::1]
[certs] Generating "etcd/peer" certificate and key
[certs] etcd/peer serving cert is signed for DNS names [localhost k8s-master-1] and IPs [192.168.2.175 127.0.0.1 ::1]
[certs] Generating "etcd/healthcheck-client" certificate and key
[certs] Generating "apiserver-etcd-client" certificate and key
[certs] Generating "sa" key and public key
[kubeconfig] Using kubeconfig folder "/etc/kubernetes"
[kubeconfig] Writing "admin.conf" kubeconfig file
[kubeconfig] Writing "kubelet.conf" kubeconfig file
[kubeconfig] Writing "controller-manager.conf" kubeconfig file
[kubeconfig] Writing "scheduler.conf" kubeconfig file
[etcd] Creating static Pod manifest for local etcd in "/etc/kubernetes/manifests"
[control-plane] Using manifest folder "/etc/kubernetes/manifests"
[control-plane] Creating static Pod manifest for "kube-apiserver"
[control-plane] Creating static Pod manifest for "kube-controller-manager"
[control-plane] Creating static Pod manifest for "kube-scheduler"
[kubelet-start] Writing kubelet environment file with flags to file "/var/lib/kubelet/kubeadm-flags.env"
[kubelet-start] Writing kubelet configuration to file "/var/lib/kubelet/config.yaml"
[kubelet-start] Starting the kubelet
[wait-control-plane] Waiting for the kubelet to boot up the control plane as static Pods from directory "/etc/kubernetes/manifests". This can take up to 4m0s
[apiclient] All control plane components are healthy after 7.563962 seconds
[upload-config] Storing the configuration used in ConfigMap "kubeadm-config" in the "kube-system" Namespace
[kubelet] Creating a ConfigMap "kubelet-config" in namespace kube-system with the configuration for the kubelets in the cluster
[upload-certs] Storing the certificates in Secret "kubeadm-certs" in the "kube-system" Namespace
[upload-certs] Using certificate key:
c6a80e1929786899137bb0a765323fa0cb7c14fb8c0bedb61a0eaf1583a13abd
[mark-control-plane] Marking the node rocky as control-plane by adding the labels: [node-role.kubernetes.io/control-plane node.kubernetes.io/exclude-from-external-load-balancers]
[mark-control-plane] Marking the node rocky as control-plane by adding the taints [node-role.kubernetes.io/control-plane:NoSchedule]
[bootstrap-token] Using token: u9ryln.7f9t2ih8v1es5d79
[bootstrap-token] Configuring bootstrap tokens, cluster-info ConfigMap, RBAC Roles
[bootstrap-token] Configured RBAC rules to allow Node Bootstrap tokens to get nodes
[bootstrap-token] Configured RBAC rules to allow Node Bootstrap tokens to post CSRs in order for nodes to get long term certificate credentials
[bootstrap-token] Configured RBAC rules to allow the csrapprover controller automatically approve CSRs from a Node Bootstrap Token
[bootstrap-token] Configured RBAC rules to allow certificate rotation for all node client certificates in the cluster
[bootstrap-token] Creating the "cluster-info" ConfigMap in the "kube-public" namespace
[kubelet-finalize] Updating "/etc/kubernetes/kubelet.conf" to point to a rotatable kubelet client certificate and key
[addons] Applied essential addon: CoreDNS
[addons] Applied essential addon: kube-proxy

Your Kubernetes control-plane has initialized successfully!

To start using your cluster, you need to run the following as a regular user:

  mkdir -p $HOME/.kube
  sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
  sudo chown $(id -u):$(id -g) $HOME/.kube/config

Alternatively, if you are the root user, you can run:

  export KUBECONFIG=/etc/kubernetes/admin.conf

You should now deploy a pod network to the cluster.
Run "kubectl apply -f [podnetwork].yaml" with one of the options listed at:
  https://kubernetes.io/docs/concepts/cluster-administration/addons/

You can now join any number of the control-plane node running the following command on each as root:

  kubeadm join 192.168.3.251:6443 --token vx5j0a.7n1jgk7cj7hffkmy \
        --discovery-token-ca-cert-hash sha256:6055c9951d7d92d1243006e973a41a375b71b8e20ae4ccdf35ac4a7edfd4531a \
        --control-plane --certificate-key 5da3036c3748773980d0cc9ee4352ace20f6b3a5fbee5a5aad2a9ff0bba3ccd2

Please note that the certificate-key gives access to cluster sensitive data, keep it secret!
As a safeguard, uploaded-certs will be deleted in two hours; If necessary, you can use
"kubeadm init phase upload-certs --upload-certs" to reload certs afterward.

Then you can join any number of worker nodes by running the following on each as root:

kubeadm join 192.168.3.251:6443 --token vx5j0a.7n1jgk7cj7hffkmy \
        --discovery-token-ca-cert-hash sha256:6055c9951d7d92d1243006e973a41a375b71b8e20ae4ccdf35ac4a7edfd4531a

#错误排除
journalctl -u kubelet
# 查看集群状态
[root@k8s-master-1 apps]# kubectl  get cs
Warning: v1 ComponentStatus is deprecated in v1.19+
NAME                 STATUS    MESSAGE   ERROR
scheduler            Healthy   ok
controller-manager   Healthy   ok
etcd-0               Healthy   ok
# 查看集群 pod
[root@k8s-master-1 apps]# kubectl  get pod -A
NAMESPACE     NAME                                   READY   STATUS    RESTARTS   AGE
kube-system   coredns-66f779496c-dk8sr               0/1     Pending   0          4m40s
kube-system   coredns-66f779496c-vmqcl               0/1     Pending   0          4m40s
kube-system   etcd-k8s-master-1                      1/1     Running   1          4m52s
kube-system   kube-apiserver-k8s-master-1            1/1     Running   1          4m57s
kube-system   kube-controller-manager-k8s-master-1   1/1     Running   1          4m52s
kube-system   kube-proxy-rmc6j                       1/1     Running   0          4m40s
kube-system   kube-scheduler-k8s-master-1            1/1     Running   1          4m53s

# 修改 kube-proxy 为 ipvs
kubectl -n kube-system edit cm kube-proxy
    logging:
      flushFrequency: 0
      options:
        json:
          infoBufferSize: "0"
      verbosity: 0
    metricsBindAddress: ""
    mode: "ipvs" # 添加 ipvs
    nodePortAddresses: null
# 让配置生效
kubectl -n kube-system delete pod kube-proxy-rmc6j
# 查看  kube-ipvs0 网卡是否创建
[root@k8s-master-1 apps]# ip a | grep kube-ipvs0
3: kube-ipvs0: <BROADCAST,NOARP> mtu 1500 qdisc noop state DOWN group default
    inet 10.96.0.1/32 scope global kube-ipvs0
    inet 10.96.0.10/32 scope global kube-ipvs0
# 查看 ipvs 信息
[root@k8s-master-1 apps]# ipvsadm -ln
IP Virtual Server version 1.2.1 (size=4096)
Prot LocalAddress:Port Scheduler Flags
  -> RemoteAddress:Port           Forward Weight ActiveConn InActConn
TCP  10.96.0.1:443 rr
  -> 192.168.2.175:6443           Masq    1      0          0
TCP  10.96.0.10:53 rr
TCP  10.96.0.10:9153 rr
UDP  10.96.0.10:53 rr

08 部署 master2，master3 执行


# kubeadm join 192.168.3.251:6443 --token u9ryln.7f9t2ih8v1es5d79 \
        --discovery-token-ca-cert-hash sha256:2c8298c1e572f37919d6df24cb80984b421a25ffd06bcc8ba522afb0ce9a5f83 \
        --control-plane --certificate-key c6a80e1929786899137bb0a765323fa0cb7c14fb8c0bedb61a0eaf1583a13abd

[root@k8s-master-2 tmp]# kubeadm join 192.168.3.251:6443 --token vx5j0a.7n1jgk7cj7hffkmy \
>         --discovery-token-ca-cert-hash sha256:6055c9951d7d92d1243006e973a41a375b71b8e20ae4ccdf35ac4a7edfd4531a \
>         --control-plane --certificate-key 5da3036c3748773980d0cc9ee4352ace20f6b3a5fbee5a5aad2a9ff0bba3ccd2 
W0111 20:26:07.335263    8470 initconfiguration.go:120] Usage of CRI endpoints without URL scheme is deprecated and can cause kubelet errors in the future. Automatically prepending scheme "unix" to the "criSocket" with value "/var/run/cri-docker/cri-docker.sock". Please update your configuration!
[preflight] Running pre-flight checks
        [WARNING Hostname]: hostname "k8s-master-2" could not be reached
        [WARNING Hostname]: hostname "k8s-master-2": lookup k8s-master-2 on 192.168.2.84:53: no such host
[preflight] Reading configuration from the cluster...
[preflight] FYI: You can look at this config file with 'kubectl -n kube-system get cm kubeadm-config -o yaml'
[preflight] Running pre-flight checks before initializing the new control plane instance
[preflight] Pulling images required for setting up a Kubernetes cluster
[preflight] This might take a minute or two, depending on the speed of your internet connection
[preflight] You can also perform this action in beforehand using 'kubeadm config images pull'
[download-certs] Downloading the certificates in Secret "kubeadm-certs" in the "kube-system" Namespace
[download-certs] Saving the certificates to the folder: "/etc/kubernetes/pki"
[certs] Using certificateDir folder "/etc/kubernetes/pki"
[certs] Generating "etcd/healthcheck-client" certificate and key
[certs] Generating "etcd/server" certificate and key
[certs] etcd/server serving cert is signed for DNS names [k8s-master-2 localhost] and IPs [192.168.2.176 127.0.0.1 ::1]
[certs] Generating "etcd/peer" certificate and key
[certs] etcd/peer serving cert is signed for DNS names [k8s-master-2 localhost] and IPs [192.168.2.176 127.0.0.1 ::1]
[certs] Generating "apiserver-etcd-client" certificate and key
[certs] Generating "apiserver" certificate and key
[certs] apiserver serving cert is signed for DNS names [k8s-master-2 kubernetes kubernetes.default kubernetes.default.svc kubernetes.default.svc.cluster.local] and IPs [10.96.0.1 192.168.2.176 192.168.3.251 127.0.0.1]
[certs] Generating "apiserver-kubelet-client" certificate and key
[certs] Generating "front-proxy-client" certificate and key
[certs] Valid certificates and keys now exist in "/etc/kubernetes/pki"
[certs] Using the existing "sa" key
[kubeconfig] Generating kubeconfig files
[kubeconfig] Using kubeconfig folder "/etc/kubernetes"
[kubeconfig] Writing "admin.conf" kubeconfig file
[kubeconfig] Writing "controller-manager.conf" kubeconfig file
[kubeconfig] Writing "scheduler.conf" kubeconfig file
[control-plane] Using manifest folder "/etc/kubernetes/manifests"
[control-plane] Creating static Pod manifest for "kube-apiserver"
[control-plane] Creating static Pod manifest for "kube-controller-manager"
[control-plane] Creating static Pod manifest for "kube-scheduler"
[check-etcd] Checking that the etcd cluster is healthy
[kubelet-start] Writing kubelet configuration to file "/var/lib/kubelet/config.yaml"
[kubelet-start] Writing kubelet environment file with flags to file "/var/lib/kubelet/kubeadm-flags.env"
[kubelet-start] Starting the kubelet
[kubelet-start] Waiting for the kubelet to perform the TLS Bootstrap...
[etcd] Announced new etcd member joining to the existing etcd cluster
[etcd] Creating static Pod manifest for "etcd"
[etcd] Waiting for the new etcd member to join the cluster. This can take up to 40s
[kubelet-check] Initial timeout of 40s passed.
The 'update-status' phase is deprecated and will be removed in a future release. Currently it performs no operation
[mark-control-plane] Marking the node k8s-master-2 as control-plane by adding the labels: [node-role.kubernetes.io/control-plane node.kubernetes.io/exclude-from-external-load-balancers]
[mark-control-plane] Marking the node k8s-master-2 as control-plane by adding the taints [node-role.kubernetes.io/control-plane:NoSchedule]

This node has joined the cluster and a new control plane instance was created:

* Certificate signing request was sent to apiserver and approval was received.
* The Kubelet was informed of the new secure connection details.
* Control plane label and taint were applied to the new node.
* The Kubernetes control plane instances scaled up.
* A new etcd member was added to the local/stacked etcd cluster.

To start administering your cluster from this node, you need to run the following as a regular user:

        mkdir -p $HOME/.kube
        sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
        sudo chown $(id -u):$(id -g) $HOME/.kube/config

Run 'kubectl get nodes' to see this node join the cluster.

# 设置 kubelet 开机启动
systemctl enable kubelet.service
# 查看启动状态
systemctl status kubelet.service

#错误排除
journalctl -u kubelet

# master 节点查看节点
[root@k8s-master-1 tmp]# kubectl get nodes
NAME    STATUS     ROLES           AGE     VERSION
k8s-master-1   NotReady   control-plane   5m56s   v1.28.2
k8s-master-2   NotReady   control-plane   10m     v1.28.2
k8s-master-3  NotReady   control-plane   10m     v1.28.2

09 部署 node 节点(所有 node 节点执行)


# kubeadm join 192.168.3.251:6443 --token vx5j0a.7n1jgk7cj7hffkmy \
        --discovery-token-ca-cert-hash sha256:6055c9951d7d92d1243006e973a41a375b71b8e20ae4ccdf35ac4a7edfd4531a
[root@k8s-node-1 ~]# kubeadm join 192.168.3.251:6443 --token vx5j0a.7n1jgk7cj7hffkmy \
>         --discovery-token-ca-cert-hash sha256:6055c9951d7d92d1243006e973a41a375b71b8e20ae4ccdf35ac4a7edfd4531a
W0112 09:18:43.791610  356308 initconfiguration.go:120] Usage of CRI endpoints without URL scheme is deprecated and can cause kubelet errors in the future. Automatically prepending scheme "unix" to the "criSocket" with value "/var/run/cri-docker/cri-docker.sock". Please update your configuration!
[preflight] Running pre-flight checks
        [WARNING Hostname]: hostname "k8s-node-1" could not be reached
        [WARNING Hostname]: hostname "k8s-node-1": lookup k8s-node-1 on 192.168.2.84:53: no such host
[preflight] Reading configuration from the cluster...
[preflight] FYI: You can look at this config file with 'kubectl -n kube-system get cm kubeadm-config -o yaml'
[kubelet-start] Writing kubelet configuration to file "/var/lib/kubelet/config.yaml"
[kubelet-start] Writing kubelet environment file with flags to file "/var/lib/kubelet/kubeadm-flags.env"
[kubelet-start] Starting the kubelet
[kubelet-start] Waiting for the kubelet to perform the TLS Bootstrap...

This node has joined the cluster:
* Certificate signing request was sent to apiserver and a response was received.
* The Kubelet was informed of the new secure connection details.

Run 'kubectl get nodes' on the control-plane to see this node join the cluster.

10 flannel cni 部署

cat <<EOF | kubectl create -f -
---
kind: Namespace
apiVersion: v1
metadata:
  name: kube-flannel
  labels:
    k8s-app: flannel
    pod-security.kubernetes.io/enforce: privileged
---
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
  labels:
    k8s-app: flannel
  name: flannel
rules:
- apiGroups:
  - ""
  resources:
  - pods
  verbs:
  - get
- apiGroups:
  - ""
  resources:
  - nodes
  verbs:
  - get
  - list
  - watch
- apiGroups:
  - ""
  resources:
  - nodes/status
  verbs:
  - patch
- apiGroups:
  - networking.k8s.io
  resources:
  - clustercidrs
  verbs:
  - list
  - watch
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
  labels:
    k8s-app: flannel
  name: flannel
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: flannel
subjects:
- kind: ServiceAccount
  name: flannel
  namespace: kube-flannel
---
apiVersion: v1
kind: ServiceAccount
metadata:
  labels:
    k8s-app: flannel
  name: flannel
  namespace: kube-flannel
---
kind: ConfigMap
apiVersion: v1
metadata:
  name: kube-flannel-cfg
  namespace: kube-flannel
  labels:
    tier: node
    k8s-app: flannel
    app: flannel
data:
  cni-conf.json: |
    {
      "name": "cbr0",
      "cniVersion": "0.3.1",
      "plugins": [
        {
          "type": "flannel",
          "delegate": {
            "hairpinMode": true,
            "isDefaultGateway": true
          }
        },
        {
          "type": "portmap",
          "capabilities": {
            "portMappings": true
          }
        }
      ]
    }
  net-conf.json: |
    {
      "Network": "10.244.0.0/16",
      "Backend": {
        "Type": "vxlan"
      }
    }
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
  name: kube-flannel-ds
  namespace: kube-flannel
  labels:
    tier: node
    app: flannel
    k8s-app: flannel
spec:
  selector:
    matchLabels:
      app: flannel
  template:
    metadata:
      labels:
        tier: node
        app: flannel
    spec:
      affinity:
        nodeAffinity:
          requiredDuringSchedulingIgnoredDuringExecution:
            nodeSelectorTerms:
            - matchExpressions:
              - key: kubernetes.io/os
                operator: In
                values:
                - linux
      hostNetwork: true
      priorityClassName: system-node-critical
      tolerations:
      - operator: Exists
        effect: NoSchedule
      serviceAccountName: flannel
      initContainers:
      - name: install-cni-plugin
        image: docker.io/flannel/flannel-cni-plugin:v1.2.0
        command:
        - cp
        args:
        - -f
        - /flannel
        - /opt/cni/bin/flannel
        volumeMounts:
        - name: cni-plugin
          mountPath: /opt/cni/bin
      - name: install-cni
        image: docker.io/flannel/flannel:v0.22.3
        command:
        - cp
        args:
        - -f
        - /etc/kube-flannel/cni-conf.json
        - /etc/cni/net.d/10-flannel.conflist
        volumeMounts:
        - name: cni
          mountPath: /etc/cni/net.d
        - name: flannel-cfg
          mountPath: /etc/kube-flannel/
      containers:
      - name: kube-flannel
        image: docker.io/flannel/flannel:v0.22.3
        command:
        - /opt/bin/flanneld
        args:
        - --ip-masq
        - --kube-subnet-mgr
        resources:
          requests:
            cpu: "100m"
            memory: "50Mi"
        securityContext:
          privileged: false
          capabilities:
            add: ["NET_ADMIN", "NET_RAW"]
        env:
        - name: POD_NAME
          valueFrom:
            fieldRef:
              fieldPath: metadata.name
        - name: POD_NAMESPACE
          valueFrom:
            fieldRef:
              fieldPath: metadata.namespace
        - name: EVENT_QUEUE_DEPTH
          value: "5000"
        volumeMounts:
        - name: run
          mountPath: /run/flannel
        - name: flannel-cfg
          mountPath: /etc/kube-flannel/
        - name: xtables-lock
          mountPath: /run/xtables.lock
      volumes:
      - name: run
        hostPath:
          path: /run/flannel
      - name: cni-plugin
        hostPath:
          path: /opt/cni/bin
      - name: cni
        hostPath:
          path: /etc/cni/net.d
      - name: flannel-cfg
        configMap:
          name: kube-flannel-cfg
      - name: xtables-lock
        hostPath:
          path: /run/xtables.lock
          type: FileOrCreate
EOF

11 集群测试

[root@k8s-master-1 tmp]# kubectl  get pod -A
 kubectl  get pod -A
NAMESPACE      NAME                            READY   STATUS    RESTARTS       AGE
kube-flannel   kube-flannel-ds-775sk           1/1     Running   0              3m26s
kube-flannel   kube-flannel-ds-px8vn           1/1     Running   0              3m26s
kube-system    coredns-66f779496c-86psn        1/1     Running   0              13h
kube-system    coredns-66f779496c-ptkdz        1/1     Running   0              13h
kube-system    etcd-rocky                      1/1     Running   3              13h
kube-system    kube-apiserver-rocky            1/1     Running   14 (13h ago)   13h
kube-system    kube-controller-manager-rocky   1/1     Running   6 (13h ago)    13h
kube-system    kube-proxy-5rld2                1/1     Running   0              13h
kube-system    kube-proxy-hkzts                1/1     Running   0              14m
kube-system    kube-scheduler-rocky            1/1     Running   5 (13h ago)    13h
kube-system    kube-vip-rocky                  1/1     Running   9 (32m ago)    13h

# dns 测试
dig @10.96.0.10 www.qq.com
cat <<EOF | kubectl create -f -
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
  name: net-tools
  labels:
    k8s-app: net-tools
spec:
  selector:
    matchLabels:
      k8s-app: net-tools
  template:
    metadata:
      labels:
        k8s-app: net-tools
    spec:
      tolerations:
        - effect: NoSchedule
          operator: Exists
        - key: CriticalAddonsOnly
          operator: Exists
        - effect: NoExecute
          operator: Exists
      containers:
        - name: net-tools
          image: juestnow/net-tools
          command:
            - /bin/sh
            - "-c"
            - set -e -x; tail -f /dev/null
          resources:
            limits:
              memory: 30Mi
            requests:
              cpu: 50m
              memory: 20Mi
      dnsConfig:
        options:
          - name: single-request-reopen

EOF
[root@k8s-master-1 tmp]# kubectl  get pod
NAME              READY   STATUS              RESTARTS   AGE
net-tools-8wxnf   0/1     ContainerCreating   0          18s
net-tools-bxdns   0/1     ContainerCreating   0          18s

[root@k8s-master-1 tmp]# kubectl  get pod
NAME              READY   STATUS    RESTARTS   AGE
net-tools-8wxnf   1/1     Running   0          105s
net-tools-bxdns   1/1     Running   0          105s

[root@k8s-master-1 tmp]# kubectl exec -ti net-tools-8wxnf /bin/sh
/ # ping www.qq.com
PING www.qq.com (121.14.77.221): 56 data bytes
64 bytes from 121.14.77.221: seq=0 ttl=51 time=7.157 ms
^C
--- www.qq.com ping statistics ---
1 packets transmitted, 1 packets received, 0% packet loss
round-trip min/avg/max = 7.157/7.157/7.157 ms

#内网
/ # nc -vz kubernetes 443
kubernetes (10.96.0.1:443) open

/ # curl -k https://kubernetes
{
  "kind": "Status",
  "apiVersion": "v1",
  "metadata": {},
  "status": "Failure",
  "message": "forbidden: User \"system:anonymous\" cannot get path \"/\"",
  "reason": "Forbidden",
  "details": {},
  "code": 403
}/ #
# 内部解析正常
#证明集群网络正常

查看集节点信息


NAME           STATUS   ROLES           AGE   VERSION   INTERNAL-IP     EXTERNAL-IP    OS-IMAGE                      KERNEL-VERSION                 CONTAINER-RUNTIME
k8s-master-1   Ready    control-plane   13h   v1.28.2   192.168.2.175   <none>         Rocky Linux 9.3 (Blue Onyx)   5.14.0-284.30.1.el9_2.x86_64   containerd://1.6.26
k8s-master-2   Ready    control-plane   13h   v1.28.2   192.168.2.176   <none>         Rocky Linux 9.3 (Blue Onyx)   5.14.0-284.30.1.el9_2.x86_64   containerd://1.6.26
k8s-master-3   Ready    control-plane   13h   v1.28.2   192.168.2.177   <none>         Rocky Linux 9.3 (Blue Onyx)   5.14.0-284.30.1.el9_2.x86_64   containerd://1.6.26
k8s-node-1     Ready    <none>          13h   v1.28.2   192.168.2.185   <none>         Rocky Linux 9.3 (Blue Onyx)   5.14.0-284.30.1.el9_2.x86_64   containerd://1.6.26
k8s-node-2     Ready    <none>          13h   v1.28.2   192.168.2.187   <none>         Rocky Linux 9.3 (Blue Onyx)   5.14.0-284.30.1.el9_2.x86_64   containerd://1.6.26
k8s-node-3     Ready    <none>          13h   v1.28.2   192.168.3.62    <none>         Rocky Linux 9.3 (Blue Onyx)   5.14.0-284.30.1.el9_2.x86_64   containerd://1.6.26
k8s-node-4     Ready    <none>          13h   v1.28.2   192.168.3.70    <none>         Rocky Linux 9.3 (Blue Onyx)   5.14.0-284.30.1.el9_2.x86_64   containerd://1.6.26

12 超大集群负载方案

由于使用 kube-vip 方案同时只能一个 master 对外提供服务不能多 master 负载均衡下面内部使用 127.0.0.1 每个节点启动代理可以是 nginx haproxy 以下使用镜像：

项目地址 https://github.com/qist/k8s/tree/master/dockerfile/k8s-ha-master
nginx 镜像 docker.io/juestnow/nginx-proxy:1.21.6
haproxy 镜像 docker.io/juestnow/haproxy-proxy:2.5.4
prometheus 端口 8404
CP_HOSTS 后端 master ip 192.168.2.175,192.168.2.176,192.168.2.177
CPU_NUM 配置进程使用 cpu 数量 4
BACKEND_PORT 后端端口 6443
HOST_PORT 代理监听端口 8443
所有节点执行
kube-vip kubectl 跟 ci/cd 工具使用

cat <<EOF | tee /etc/kubernetes/manifests/kube-lb.yaml
apiVersion: v1
kind: Pod
metadata:
  creationTimestamp: null
  labels:
    component: kube-lb
    tier: control-plane
  annotations:
    prometheus.io/port: "8404"
    prometheus.io/scrape: "true"    
  name: kube-lb
  namespace: kube-system
spec:
  containers:
  - args:
    - "CP_HOSTS=192.168.2.175,192.168.2.176,192.168.2.177"
    image: docker.io/juestnow/haproxy-proxy:2.5.4
    imagePullPolicy: IfNotPresent
    name: kube-lb
    env:
    - name: CPU_NUM
      value: "4"
    - name: BACKEND_PORT
      value: "6443"
    - name: HOST_PORT
      value: "8443"
    - name: CP_HOSTS
      value: "192.168.2.175,192.168.2.176,192.168.2.177"
  hostNetwork: true
  priorityClassName: system-cluster-critical
status: {}
EOF

# 查看是否部署完成
[root@k8s-master-1 ~]# kubectl -n kube-system   get pod| grep kube-lb
kube-lb-k8s-master-1                  1/1     Running   0              77s
kube-lb-k8s-master-2                  1/1     Running   0              84s

# 进入节点查询端口是否监听
[root@k8s-master-1~]# ss -tnlp | grep 8443
LISTEN 0      4096               *:8443             *:*    users:(("haproxy",pid=829813,fd=7))
# 替换 配置 kubeconfig 文件 server 地址 master1 节点
sed -i 's/192.168.2.175:6443/127.0.0.1:8443/g' /etc/kubernetes/controller-manager.conf
sed -i 's/192.168.2.175:6443/127.0.0.1:8443/g' /etc/kubernetes/scheduler.conf
sed -i 's/192.168.3.251:6443/127.0.0.1:8443/g' /etc/kubernetes/kubelet.conf

# controller-manager scheduler server 地址是本地 ip
# master2 节点
sed -i 's/192.168.2.176:6443/127.0.0.1:8443/g' /etc/kubernetes/controller-manager.conf
sed -i 's/192.168.2.176:6443/127.0.0.1:8443/g' /etc/kubernetes/scheduler.conf
sed -i 's/192.168.3.251:6443/127.0.0.1:8443/g' /etc/kubernetes/kubelet.conf
# master3 节点
sed -i 's/192.168.2.177:6443/127.0.0.1:8443/g' /etc/kubernetes/controller-manager.conf
sed -i 's/192.168.2.177:6443/127.0.0.1:8443/g' /etc/kubernetes/scheduler.conf
sed -i 's/192.168.3.251:6443/127.0.0.1:8443/g' /etc/kubernetes/kubelet.conf

# 所有 ndoe 节点 
sed -i 's/192.168.3.251:6443/127.0.0.1:8443/g' /etc/kubernetes/kubelet.conf

# 重启 kubelet
systemctl restart kubelet

# 以修改 master1 为例
[root@k8s-master-1 ~]# netstat -tnp| grep kubelet
tcp        0      0 127.0.0.1:33892         127.0.0.1:8443          ESTABLISHED 832614/kubelet

# 查看 controller-manager scheduler CONTAINER ID
[root@master-1 ~]# crictl ps
CONTAINER           IMAGE               CREATED             STATE               NAME                      ATTEMPT             POD ID              POD
ea2189e1a86da       bd4be6845ffba       12 minutes ago      Running             kube-lb                   0                   c11bebef002c1       kube-lb-master-1
62f1acd2683c4       ead0a4a53df89       38 minutes ago      Running             coredns                   0                   1c299fce6b19e       coredns-66f779496c-86psn
619ccabe67ac7       ead0a4a53df89       38 minutes ago      Running             coredns                   0                   e8d5a7fc93544       coredns-66f779496c-ptkdz
d25e5a09017f4       e23f7ca36333c       38 minutes ago      Running             kube-flannel              0                   7b27e08049458       kube-flannel-ds-775sk
983817065f5b3       35d002bc4cbfa       About an hour ago   Running             kube-vip                  9                   e9d04ca9f5db9       kube-vip-master-1
1c2733db52682       cdcab12b2dd16       14 hours ago        Running             kube-apiserver            14                  31488e8169f07       kube-apiserver-master-1
c16f1b235008f       55f13c92defb1       14 hours ago        Running             kube-controller-manager   6                   9d14a61354bc4       kube-controller-manager-master-1
8b19001f00f0b       7a5d9d67a13f6       14 hours ago        Running             kube-scheduler            5                   cc35049599e04       kube-scheduler-master-1
a1e3b1477ee15       c120fed2beb84       14 hours ago        Running             kube-proxy                0                   c42361e6da312       kube-proxy-5rld2
24a99953e8dd5       73deb9a3f7025       14 hours ago        Running             etcd                      3                   933789ea5868d       etcd-master-1

# 删除 controller-manager scheduler
 crictl rm -f 8b19001f00f0b c16f1b235008f

# 查看 是否修改成功
[root@master-1 ~]# netstat -tnp| grep 8443
tcp        0      0 127.0.0.1:33892         127.0.0.1:8443          ESTABLISHED 832614/kubelet
tcp        0      0 127.0.0.1:36420         127.0.0.1:8443          ESTABLISHED 833501/kube-control
tcp        0      0 127.0.0.1:36446         127.0.0.1:8443          ESTABLISHED 833500/kube-schedul
tcp        0      0 127.0.0.1:36430         127.0.0.1:8443          ESTABLISHED 833500/kube-schedul
tcp6       0      0 127.0.0.1:8443          127.0.0.1:36430         ESTABLISHED 829813/haproxy
tcp6       0      0 127.0.0.1:8443          127.0.0.1:36446         ESTABLISHED 829813/haproxy
tcp6       0      0 127.0.0.1:8443          127.0.0.1:36420         ESTABLISHED 829813/haproxy
tcp6       0      0 127.0.0.1:8443          127.0.0.1:33892         ESTABLISHED 829813/haproxy

# 修改 kube-proxy kubeconfig 地址
kubectl  -n kube-system edit cm kube-proxy

找到  server: https://192.168.3.251:6443
改成 server: https://127.0.0.1:8443

# 重启 kube-proxy pod
kubectl  -n kube-system rollout restart daemonsets  kube-proxy

# 查看是否启动成功
[root@master-1 ~]# netstat -tnp| grep kube-proxy
tcp        0      0 127.0.0.1:16228         127.0.0.1:8443          ESTABLISHED 836301/kube-proxy

博主的文章没有高度、深度和广度，只是凑字数。利用读书、参考、引用、抄袭、复制和粘贴等多种方式打造成自己的纯镀 24k 文章！如若有侵权，请联系博主删除。

☆ END ☆

网络技术

喜欢就点个赞吧

生产环境自建Kubernetes集群多master部署

扫描/识别二维码阅读全文