Files
monok8s/clitools/pkg/node/prereqs.go
2026-03-30 18:41:18 +08:00

192 lines
5.0 KiB
Go

package node
import (
"context"
"errors"
"fmt"
"net"
"strings"
"time"
"k8s.io/klog/v2"
)
func ValidateNodeIPAndAPIServerReachability(ctx context.Context, nct *NodeContext) error {
requireLocalIP := func(wantedIP string) error {
wantedIP = strings.TrimSpace(wantedIP)
if wantedIP == "" {
return fmt.Errorf("API server advertise address is required")
}
ip := net.ParseIP(wantedIP)
if ip == nil {
return fmt.Errorf("invalid API server advertise address %q", wantedIP)
}
ifaces, err := net.Interfaces()
if err != nil {
return fmt.Errorf("list interfaces: %w", err)
}
for _, iface := range ifaces {
addrs, err := iface.Addrs()
if err != nil {
continue
}
for _, addr := range addrs {
var got net.IP
switch v := addr.(type) {
case *net.IPNet:
got = v.IP
case *net.IPAddr:
got = v.IP
}
if got != nil && got.Equal(ip) {
return nil
}
}
}
return fmt.Errorf("required local IP is not present on any interface: %s", wantedIP)
}
checkAPIServerReachable := func(endpoint string) error {
endpoint = strings.TrimSpace(endpoint)
if endpoint == "" {
return fmt.Errorf("API server endpoint is required")
}
host, port, err := net.SplitHostPort(endpoint)
if err != nil {
return fmt.Errorf("invalid API server endpoint %q: %w", endpoint, err)
}
if strings.TrimSpace(host) == "" || strings.TrimSpace(port) == "" {
return fmt.Errorf("invalid API server endpoint %q", endpoint)
}
klog.Infof("checking API server reachability: %s:%s", host, port)
var lastErr error
for i := 0; i < 20; i++ {
d := net.Dialer{Timeout: 1 * time.Second}
conn, err := d.DialContext(ctx, "tcp", endpoint)
if err == nil {
_ = conn.Close()
klog.Infof("API server is reachable")
return nil
}
lastErr = err
select {
case <-ctx.Done():
return ctx.Err()
case <-time.After(1 * time.Second):
}
}
return fmt.Errorf("cannot reach API server at %s: %w", endpoint, lastErr)
}
cfg := nct.Config.Spec
switch strings.TrimSpace(cfg.ClusterRole) {
case "control-plane":
if err := requireLocalIP(cfg.APIServerAdvertiseAddress); err != nil {
return err
}
case "worker":
if err := requireLocalIP(cfg.APIServerAdvertiseAddress); err != nil {
return err
}
if err := checkAPIServerReachable(cfg.APIServerEndpoint); err != nil {
return err
}
default:
return fmt.Errorf("Incorrect ClusterRole: %s", cfg.ClusterRole)
}
return nil
}
func CheckForVersionSkew(ctx context.Context, nctx *NodeContext) error {
if nctx.BootstrapState == nil {
return errors.New("BootstrapState is nil, call ClassifyBootstrapAction() first")
}
role := strings.TrimSpace(nctx.Config.Spec.ClusterRole)
wantVersion := normalizeKubeVersion(strings.TrimSpace(nctx.Config.Spec.KubernetesVersion))
if wantVersion == "" {
return errors.New("spec.kubernetesVersion is required")
}
switch nctx.LocalClusterState.MembershipKind {
case LocalMembershipFresh:
// Nothing to compare for fresh nodes.
return nil
case LocalMembershipPartial:
return fmt.Errorf("cannot check version skew with partial local cluster state")
}
versionKubeconfig := chooseVersionKubeconfig(nctx.LocalClusterState)
if versionKubeconfig == "" {
return fmt.Errorf("no kubeconfig available for version detection")
}
currentVersion, err := getServerVersion(ctx, versionKubeconfig)
if err != nil {
if role == "control-plane" {
return fmt.Errorf("existing control-plane state found, but cluster version could not be determined: %w", err)
}
// Worker path stays permissive.
nctx.BootstrapState.UnsupportedWorkerVersionSkew = true
nctx.BootstrapState.VersionSkewReason = "cluster version could not be determined"
if nctx.BootstrapState.Action == BootstrapActionManageWorker {
nctx.BootstrapState.Action = BootstrapActionReconcileWorker
}
return nil
}
nctx.BootstrapState.DetectedClusterVersion = currentVersion
switch role {
case "control-plane":
if !isSupportedControlPlaneSkew(currentVersion, wantVersion) {
return fmt.Errorf(
"unsupported control-plane version skew: cluster=%s node=%s",
currentVersion, wantVersion,
)
}
if nctx.BootstrapState.Action == BootstrapActionManageControlPlane {
if versionEq(currentVersion, wantVersion) {
nctx.BootstrapState.Action = BootstrapActionReconcileControlPlane
} else {
nctx.BootstrapState.Action = BootstrapActionUpgradeControlPlane
}
}
case "worker":
if !isSupportedWorkerSkew(currentVersion, wantVersion) {
nctx.BootstrapState.UnsupportedWorkerVersionSkew = true
nctx.BootstrapState.VersionSkewReason = fmt.Sprintf(
"unsupported worker version skew: cluster=%s node=%s",
currentVersion, wantVersion,
)
}
if nctx.BootstrapState.Action == BootstrapActionManageWorker {
if versionEq(currentVersion, wantVersion) {
nctx.BootstrapState.Action = BootstrapActionReconcileWorker
} else {
nctx.BootstrapState.Action = BootstrapActionUpgradeWorker
}
}
default:
return fmt.Errorf("unsupported cluster role %q", role)
}
return nil
}