Fixed reboot not working

This commit is contained in:
2026-03-26 20:44:04 +08:00
parent 7d20a2e920
commit bf85462e34
4 changed files with 327 additions and 26 deletions

View File

@@ -31,6 +31,8 @@ load_config() {
MGMT_IFACE="${MGMT_IFACE:-}"
MGMT_ADDRESS="${MGMT_ADDRESS:-}"
MGMT_GATEWAY="${MGMT_GATEWAY:-}"
DNS_NAMESERVERS="${DNS_NAMESERVERS:-}"
DNS_SEARCH_DOMAINS="${DNS_SEARCH_DOMAINS:-}"
}
validate_config() {
@@ -45,17 +47,23 @@ validate_config() {
fail "MGMT_ADDRESS must include a CIDR prefix, example: 10.0.0.13/24"
;;
esac
if [ -n "$DNS_NAMESERVERS" ]; then
for ns in $DNS_NAMESERVERS; do
case "$ns" in
10.96.0.10)
fail "DNS_NAMESERVERS must not contain cluster DNS service IP (10.96.0.10)"
;;
esac
done
fi
}
check_prereqs() {
# only the special one, coreutils should not be checked
need_cmd ip
need_cmd hostname
need_cmd grep
need_cmd awk
need_cmd cut
need_cmd mkdir
need_cmd printf
need_cmd cat
}
configure_mgmt_interface() {
@@ -123,16 +131,59 @@ net.ipv4.ip_forward = 1
EOF
}
configure_dns() {
local tmpfile
local ns_count=0
if [ -z "$DNS_NAMESERVERS" ]; then
log "DNS_NAMESERVERS not set; leaving /etc/resolv.conf unchanged"
return
fi
mkdir -p /etc
tmpfile="/etc/resolv.conf.monok8s.tmp"
: > "$tmpfile"
if [ -n "$DNS_SEARCH_DOMAINS" ]; then
printf 'search %s\n' "$DNS_SEARCH_DOMAINS" >> "$tmpfile"
fi
for ns in $DNS_NAMESERVERS; do
printf 'nameserver %s\n' "$ns" >> "$tmpfile"
ns_count=$((ns_count + 1))
done
[ "$ns_count" -gt 0 ] || fail "DNS_NAMESERVERS is set but no valid nameservers were parsed"
printf 'options timeout:2 attempts:3\n' >> "$tmpfile"
mv "$tmpfile" /etc/resolv.conf
log "configured /etc/resolv.conf from DNS_NAMESERVERS"
}
print_summary() {
log "node configuration applied"
log "hostname: $HOSTNAME"
log "interface: $MGMT_IFACE"
log "address: $MGMT_ADDRESS"
if [ -n "${MGMT_GATEWAY:-}" ]; then
log "gateway: $MGMT_GATEWAY"
else
log "gateway: <not set>"
fi
if [ -n "${DNS_NAMESERVERS:-}" ]; then
log "dns nameservers: $DNS_NAMESERVERS"
else
log "dns nameservers: <unchanged>"
fi
if [ -n "${DNS_SEARCH_DOMAINS:-}" ]; then
log "dns search: $DNS_SEARCH_DOMAINS"
else
log "dns search: <not set>"
fi
}
main() {
@@ -142,8 +193,9 @@ main() {
ensure_ip_forward
configure_mgmt_interface
configure_dns
set_hostname_if_needed
print_summary
}
main "$@"
main "$@"

View File

@@ -4,6 +4,8 @@ set -eu
CONFIG_DIR="${CONFIG_DIR:-/opt/monok8s/config}"
CLUSTER_ENV="${CONFIG_DIR}/cluster.env"
KUBEADM_CONFIG_OUT="${KUBEADM_CONFIG_OUT:-/tmp/kubeadm-init.yaml}"
ADMIN_KUBECONFIG="/etc/kubernetes/admin.conf"
KUBELET_KUBECONFIG="/etc/kubernetes/kubelet.conf"
log() {
echo "[monok8s] $*"
@@ -40,7 +42,7 @@ load_config() {
SANS="${SANS:-}"
ALLOW_SCHEDULING_ON_CONTROL_PLANE="${ALLOW_SCHEDULING_ON_CONTROL_PLANE:-yes}"
SKIP_IMAGE_CHECK="${SKIP_IMAGE_CHECK:-no}"
KUBECONFIG_USER_HOME="${KUBECONFIG_USER_HOME:-/root}"
KUBE_PROXY_NODEPORT_ADDRESSES="${KUBE_PROXY_NODEPORT_ADDRESSES:-primary}"
BOOTSTRAP_MODE="${BOOTSTRAP_MODE:-init}"
JOIN_KIND="${JOIN_KIND:-worker}"
@@ -51,6 +53,162 @@ load_config() {
CNI_PLUGIN="${CNI_PLUGIN:-none}"
}
kubectl_admin() {
kubectl --kubeconfig "$ADMIN_KUBECONFIG" "$@"
}
kubectl_kubelet() {
kubectl --kubeconfig "$KUBELET_KUBECONFIG" "$@"
}
start_kubelet() {
log "starting kubelet..."
rc-service kubelet start >/dev/null 2>&1 || true
}
restart_kubelet() {
log "restarting kubelet..."
rc-service kubelet restart
}
check_kubelet_running() {
log "waiting for kubelet to become ready..."
last_status="unknown"
for _ in $(seq 1 30); do
if rc-service kubelet status >/dev/null 2>&1; then
log "kubelet is up"
return 0
fi
last_status="service-not-running"
sleep 1
done
fail "kubelet did not become ready in time (${last_status})"
}
is_local_control_plane_node() {
[ -f /etc/kubernetes/manifests/kube-apiserver.yaml ]
}
wait_for_local_apiserver() {
need_cmd nc
log "waiting for local API server on ${APISERVER_ADVERTISE_ADDRESS}:6443..."
for _ in $(seq 1 90); do
if nc -z "${APISERVER_ADVERTISE_ADDRESS}" 6443 >/dev/null 2>&1; then
log "local API server TCP port is reachable"
return 0
fi
sleep 2
done
fail "local API server did not become reachable on ${APISERVER_ADVERTISE_ADDRESS}:6443"
}
wait_for_admin_api() {
[ -f "$ADMIN_KUBECONFIG" ] || fail "missing admin kubeconfig: $ADMIN_KUBECONFIG"
log "waiting for Kubernetes API to respond via admin.conf..."
for _ in $(seq 1 90); do
if kubectl_admin version -o yaml >/dev/null 2>&1; then
log "Kubernetes API is responding"
return 0
fi
sleep 2
done
fail "Kubernetes API did not become ready in time"
}
wait_for_existing_cluster_if_needed() {
case "$BOOTSTRAP_MODE" in
init)
if [ -f "$ADMIN_KUBECONFIG" ]; then
start_kubelet
check_kubelet_running
if is_local_control_plane_node; then
wait_for_local_apiserver
fi
wait_for_admin_api
fi
;;
join)
if [ -f "$KUBELET_KUBECONFIG" ]; then
start_kubelet
check_kubelet_running
fi
;;
esac
}
get_cluster_server_version() {
kubectl_admin version -o yaml 2>/dev/null \
| awk '
$1 == "serverVersion:" { in_server=1; next }
in_server && $1 == "gitVersion:" { print $2; exit }
'
}
get_api_server_version_from_kubelet_kubeconfig() {
kubectl_kubelet version -o yaml 2>/dev/null \
| awk '
$1 == "serverVersion:" { in_server=1; next }
in_server && $1 == "gitVersion:" { print $2; exit }
'
}
validate_cidr_list_or_primary() {
value="$1"
[ -n "$value" ] || return 0
if [ "$value" = "primary" ]; then
return 0
fi
old_ifs="$IFS"
IFS=','
for item in $value; do
trimmed="$(printf '%s' "$item" | sed 's/^ *//;s/ *$//')"
[ -n "$trimmed" ] || fail "KUBE_PROXY_NODEPORT_ADDRESSES contains an empty entry"
case "$trimmed" in
*/*)
;;
*)
fail "KUBE_PROXY_NODEPORT_ADDRESSES must be 'primary' or a comma-separated list of CIDRs"
;;
esac
ip_part="${trimmed%/*}"
prefix_part="${trimmed#*/}"
printf '%s' "$prefix_part" | grep -Eq '^[0-9]+$' \
|| fail "invalid CIDR prefix in KUBE_PROXY_NODEPORT_ADDRESSES: $trimmed"
case "$ip_part" in
*:*)
printf '%s' "$prefix_part" | awk '{ exit !($1 >= 0 && $1 <= 128) }' \
|| fail "invalid IPv6 CIDR prefix in KUBE_PROXY_NODEPORT_ADDRESSES: $trimmed"
;;
*.*.*.*)
printf '%s' "$prefix_part" | awk '{ exit !($1 >= 0 && $1 <= 32) }' \
|| fail "invalid IPv4 CIDR prefix in KUBE_PROXY_NODEPORT_ADDRESSES: $trimmed"
;;
*)
fail "invalid CIDR entry in KUBE_PROXY_NODEPORT_ADDRESSES: $trimmed"
;;
esac
done
IFS="$old_ifs"
}
validate_config() {
case "$BOOTSTRAP_MODE" in
init)
@@ -76,6 +234,8 @@ validate_config() {
fail "BOOTSTRAP_MODE must be 'init' or 'join'"
;;
esac
validate_cidr_list_or_primary "$KUBE_PROXY_NODEPORT_ADDRESSES"
}
normalize_version() {
@@ -141,19 +301,43 @@ validate_target_matches_local_binaries() {
}
decide_bootstrap_action() {
current_version=""
case "$BOOTSTRAP_MODE" in
init)
if [ -f /etc/kubernetes/admin.conf ]; then
BOOTSTRAP_ACTION="upgrade-control-plane"
else
if [ ! -f "$ADMIN_KUBECONFIG" ]; then
BOOTSTRAP_ACTION="init"
log "selected bootstrap action: $BOOTSTRAP_ACTION"
return 0
fi
current_version="$(get_cluster_server_version || true)"
[ -n "$current_version" ] || fail "existing control-plane config found, but cluster version could not be determined"
log "detected existing control-plane version: $current_version"
if version_eq "$current_version" "$KUBERNETES_VERSION"; then
BOOTSTRAP_ACTION="reconcile-control-plane"
else
BOOTSTRAP_ACTION="upgrade-control-plane"
fi
;;
join)
if [ -f /etc/kubernetes/kubelet.conf ]; then
BOOTSTRAP_ACTION="upgrade-node"
else
if [ ! -f "$KUBELET_KUBECONFIG" ]; then
BOOTSTRAP_ACTION="join"
log "selected bootstrap action: $BOOTSTRAP_ACTION"
return 0
fi
current_version="$(get_api_server_version_from_kubelet_kubeconfig || true)"
[ -n "$current_version" ] || fail "existing kubelet config found, but cluster version could not be determined"
log "detected cluster version visible from this node: $current_version"
if version_eq "$current_version" "$KUBERNETES_VERSION"; then
BOOTSTRAP_ACTION="reconcile-node"
else
BOOTSTRAP_ACTION="upgrade-node"
fi
;;
*)
@@ -164,6 +348,25 @@ decide_bootstrap_action() {
log "selected bootstrap action: $BOOTSTRAP_ACTION"
}
reconcile_control_plane() {
log "reconciling existing control-plane node"
start_kubelet
check_kubelet_running
wait_for_local_apiserver
wait_for_admin_api
apply_local_node_metadata_if_possible
allow_single_node_scheduling
}
reconcile_node() {
log "reconciling existing joined node"
start_kubelet
check_kubelet_running
}
validate_upgrade_path() {
current="$1"
target="$2"
@@ -192,7 +395,9 @@ check_upgrade_prereqs() {
}
run_kubeadm_upgrade_apply() {
current_version="$(get_cluster_server_version)"
current_version="$(get_cluster_server_version || true)"
[ -n "$current_version" ] || fail "cannot determine current control-plane version; API server is not reachable"
log "current control-plane version: $current_version"
log "target control-plane version: $KUBERNETES_VERSION"
@@ -230,7 +435,7 @@ check_prereqs() {
need_cmd crictl
need_cmd rc-service
need_cmd awk
need_cmd ip
need_cmd ip
need_cmd grep
need_cmd sed
need_cmd hostname
@@ -256,7 +461,7 @@ check_apiserver_reachable() {
}
start_crio() {
rc-service crio start
rc-service crio start
}
check_crio_running() {
@@ -355,6 +560,36 @@ run_kubeadm_join() {
esac
}
generate_kube_proxy_config_block() {
if [ -z "${KUBE_PROXY_NODEPORT_ADDRESSES:-}" ]; then
return 0
fi
if [ "$KUBE_PROXY_NODEPORT_ADDRESSES" = "primary" ]; then
cat <<EOF
---
apiVersion: kubeproxy.config.k8s.io/v1alpha1
kind: KubeProxyConfiguration
nodePortAddresses:
- primary
EOF
return 0
fi
echo "---"
echo "apiVersion: kubeproxy.config.k8s.io/v1alpha1"
echo "kind: KubeProxyConfiguration"
echo "nodePortAddresses:"
old_ifs="$IFS"
IFS=','
for item in $KUBE_PROXY_NODEPORT_ADDRESSES; do
trimmed="$(printf '%s' "$item" | sed 's/^ *//;s/ *$//')"
[ -n "$trimmed" ] && printf ' - "%s"\n' "$trimmed"
done
IFS="$old_ifs"
}
generate_kubeadm_config() {
log "generating kubeadm config at $KUBEADM_CONFIG_OUT..."
@@ -405,6 +640,8 @@ kind: KubeletConfiguration
cgroupDriver: cgroupfs
containerRuntimeEndpoint: ${CONTAINER_RUNTIME_ENDPOINT}
EOF
generate_kube_proxy_config_block >> "$KUBEADM_CONFIG_OUT"
}
run_kubeadm_init() {
@@ -503,15 +740,10 @@ print_next_steps() {
case "$BOOTSTRAP_MODE" in
init)
cat <<EOF
Try these now:
export KUBECONFIG=/root/.kube/config
kubectl get nodes -o wide
kubectl describe nodes
Notes:
- On a fresh cluster without a CNI, nodes may stay NotReady.
- If you want pods to run on this same node, keep ALLOW_SCHEDULING_ON_CONTROL_PLANE=yes.
- kube-proxy nodePortAddresses is set to: ${KUBE_PROXY_NODEPORT_ADDRESSES:-<unset>}
EOF
;;
join)
@@ -537,36 +769,45 @@ main() {
check_prereqs
validate_network_requirements
decide_bootstrap_action
install_cni_if_requested
start_crio
check_crio_running
wait_for_existing_cluster_if_needed
decide_bootstrap_action
case "$BOOTSTRAP_ACTION" in
init)
check_required_images
generate_kubeadm_config
run_kubeadm_init
rc-service kubelet restart
restart_kubelet
apply_local_node_metadata_if_possible
allow_single_node_scheduling
;;
reconcile-control-plane)
reconcile_control_plane
;;
upgrade-control-plane)
check_upgrade_prereqs
check_required_images
generate_kubeadm_config
run_kubeadm_upgrade_apply
rc-service kubelet restart
restart_kubelet
apply_local_node_metadata_if_possible
allow_single_node_scheduling
;;
join)
run_kubeadm_join
;;
reconcile-node)
reconcile_node
;;
upgrade-node)
check_upgrade_prereqs
run_kubeadm_upgrade_node
rc-service kubelet restart
restart_kubelet
;;
*)
fail "unsupported BOOTSTRAP_ACTION: $BOOTSTRAP_ACTION"