#!/bin/sh set -eu CONFIG_DIR="${CONFIG_DIR:-/opt/monok8s/config}" CLUSTER_ENV="${CONFIG_DIR}/cluster.env" KUBEADM_CONFIG_OUT="${KUBEADM_CONFIG_OUT:-/tmp/kubeadm-init.yaml}" log() { echo "[monok8s] $*" } fail() { echo "[monok8s] ERROR: $*" >&2 exit 1 } need_cmd() { command -v "$1" >/dev/null 2>&1 || fail "missing required command: $1" } require_file() { [ -f "$1" ] || fail "required file not found: $1" } load_config() { require_file "$CLUSTER_ENV" # shellcheck disable=SC1090 . "$CLUSTER_ENV" : "${KUBERNETES_VERSION:?KUBERNETES_VERSION is required}" : "${NODE_NAME:?NODE_NAME is required}" : "${APISERVER_ADVERTISE_ADDRESS:?APISERVER_ADVERTISE_ADDRESS is required}" POD_SUBNET="${POD_SUBNET:-10.244.0.0/16}" SERVICE_SUBNET="${SERVICE_SUBNET:-10.96.0.0/12}" CLUSTER_NAME="${CLUSTER_NAME:-monok8s}" CLUSTER_DOMAIN="${CLUSTER_DOMAIN:-cluster.local}" CONTAINER_RUNTIME_ENDPOINT="${CONTAINER_RUNTIME_ENDPOINT:-unix:///var/run/crio/crio.sock}" SANS="${SANS:-}" ALLOW_SCHEDULING_ON_CONTROL_PLANE="${ALLOW_SCHEDULING_ON_CONTROL_PLANE:-yes}" SKIP_IMAGE_CHECK="${SKIP_IMAGE_CHECK:-no}" KUBECONFIG_USER_HOME="${KUBECONFIG_USER_HOME:-/root}" BOOTSTRAP_MODE="${BOOTSTRAP_MODE:-init}" JOIN_KIND="${JOIN_KIND:-worker}" API_SERVER_ENDPOINT="${API_SERVER_ENDPOINT:-}" BOOTSTRAP_TOKEN="${BOOTSTRAP_TOKEN:-}" DISCOVERY_TOKEN_CA_CERT_HASH="${DISCOVERY_TOKEN_CA_CERT_HASH:-}" CONTROL_PLANE_CERT_KEY="${CONTROL_PLANE_CERT_KEY:-}" CNI_PLUGIN="${CNI_PLUGIN:-none}" } validate_config() { case "$BOOTSTRAP_MODE" in init) ;; join) : "${API_SERVER_ENDPOINT:?API_SERVER_ENDPOINT is required for join mode}" : "${BOOTSTRAP_TOKEN:?BOOTSTRAP_TOKEN is required for join mode}" : "${DISCOVERY_TOKEN_CA_CERT_HASH:?DISCOVERY_TOKEN_CA_CERT_HASH is required for join mode}" case "$JOIN_KIND" in worker|control-plane) ;; *) fail "JOIN_KIND must be 'worker' or 'control-plane'" ;; esac if [ "$JOIN_KIND" = "control-plane" ]; then : "${CONTROL_PLANE_CERT_KEY:?CONTROL_PLANE_CERT_KEY is required for JOIN_KIND=control-plane}" fi ;; *) fail "BOOTSTRAP_MODE must be 'init' or 'join'" ;; esac } normalize_version() { # strip leading "v" echo "${1#v}" } version_major_minor() { normalize_version "$1" | awk -F. '{ print $1 "." $2 }' } version_eq() { [ "$(normalize_version "$1")" = "$(normalize_version "$2")" ] } version_lt() { [ "$(printf '%s\n%s\n' "$(normalize_version "$1")" "$(normalize_version "$2")" | sort -V | head -n1)" != "$(normalize_version "$2")" ] } version_gt() { [ "$(printf '%s\n%s\n' "$(normalize_version "$1")" "$(normalize_version "$2")" | sort -V | tail -n1)" = "$(normalize_version "$1")" ] \ && ! version_eq "$1" "$2" } minor_diff() { a="$(version_major_minor "$1")" b="$(version_major_minor "$2")" a_major="${a%.*}" a_minor="${a#*.}" b_major="${b%.*}" b_minor="${b#*.}" [ "$a_major" = "$b_major" ] || fail "major version change unsupported here: $1 -> $2" echo $((b_minor - a_minor)) } get_kubeadm_binary_version() { kubeadm version -o short } get_cluster_server_version() { kubectl --kubeconfig /etc/kubernetes/admin.conf version -o yaml \ | awk ' $1 == "serverVersion:" { in_server=1; next } in_server && $1 == "gitVersion:" { print $2; exit } ' } get_api_server_version_from_kubelet_kubeconfig() { kubectl --kubeconfig /etc/kubernetes/kubelet.conf version -o yaml \ | awk ' $1 == "serverVersion:" { in_server=1; next } in_server && $1 == "gitVersion:" { print $2; exit } ' } validate_target_matches_local_binaries() { kubeadm_ver="$(get_kubeadm_binary_version)" if ! version_eq "$kubeadm_ver" "$KUBERNETES_VERSION"; then fail "kubeadm binary version ($kubeadm_ver) does not match target KUBERNETES_VERSION ($KUBERNETES_VERSION)" fi } decide_bootstrap_action() { case "$BOOTSTRAP_MODE" in init) if [ -f /etc/kubernetes/admin.conf ]; then BOOTSTRAP_ACTION="upgrade-control-plane" else BOOTSTRAP_ACTION="init" fi ;; join) if [ -f /etc/kubernetes/kubelet.conf ]; then BOOTSTRAP_ACTION="upgrade-node" else BOOTSTRAP_ACTION="join" fi ;; *) fail "unsupported BOOTSTRAP_MODE: $BOOTSTRAP_MODE" ;; esac log "selected bootstrap action: $BOOTSTRAP_ACTION" } validate_upgrade_path() { current="$1" target="$2" if version_eq "$current" "$target"; then log "cluster is already at target version: $target" return 0 fi if version_gt "$current" "$target"; then fail "downgrade is not supported: current=$current target=$target" fi diff="$(minor_diff "$current" "$target")" case "$diff" in 0|1) ;; *) fail "unsupported upgrade path: current=$current target=$target (minor skip too large)" ;; esac } check_upgrade_prereqs() { validate_target_matches_local_binaries } run_kubeadm_upgrade_apply() { current_version="$(get_cluster_server_version)" log "current control-plane version: $current_version" log "target control-plane version: $KUBERNETES_VERSION" validate_upgrade_path "$current_version" "$KUBERNETES_VERSION" if version_eq "$current_version" "$KUBERNETES_VERSION"; then log "control-plane already at target version; skipping kubeadm upgrade apply" return 0 fi log "running kubeadm upgrade plan..." kubeadm upgrade plan "$KUBERNETES_VERSION" log "running kubeadm upgrade apply..." kubeadm upgrade apply -y "$KUBERNETES_VERSION" } run_kubeadm_upgrade_node() { cluster_version="$(get_api_server_version_from_kubelet_kubeconfig)" log "cluster/control-plane version visible from this node: $cluster_version" log "target node version: $KUBERNETES_VERSION" if ! version_eq "$cluster_version" "$KUBERNETES_VERSION"; then fail "control-plane version ($cluster_version) does not match target ($KUBERNETES_VERSION); upgrade control-plane first" fi log "running kubeadm upgrade node..." kubeadm upgrade node } check_prereqs() { need_cmd kubeadm need_cmd kubelet need_cmd kubectl need_cmd crictl need_cmd rc-service need_cmd awk need_cmd ip need_cmd grep need_cmd sed need_cmd hostname } check_apiserver_reachable() { host="${API_SERVER_ENDPOINT%:*}" port="${API_SERVER_ENDPOINT##*:}" need_cmd nc log "checking API server reachability: ${host}:${port}" for _ in $(seq 1 20); do if nc -z "$host" "$port" >/dev/null 2>&1; then log "API server is reachable" return 0 fi sleep 1 done fail "cannot reach API server at ${host}:${port}" } start_crio() { rc-service crio start } check_crio_running() { log "waiting for CRI-O to become ready..." last_status="unknown" for _ in $(seq 1 30); do if rc-service crio status >/dev/null 2>&1; then last_status="service-running" if crictl --runtime-endpoint "$CONTAINER_RUNTIME_ENDPOINT" info >/dev/null 2>&1; then log "CRI-O is up" return 0 fi last_status="service-running-but-runtime-not-ready" else last_status="service-not-running" fi sleep 1 done fail "CRI-O did not become ready in time (${last_status})" } image_present() { wanted="$1" repo="${wanted%:*}" tag="${wanted##*:}" crictl --runtime-endpoint "$CONTAINER_RUNTIME_ENDPOINT" images \ | awk 'NR>1 { print $1 ":" $2 }' \ | grep -Fx "$repo:$tag" >/dev/null 2>&1 } check_required_images() { [ "$SKIP_IMAGE_CHECK" = "yes" ] && { log "skipping image check (SKIP_IMAGE_CHECK=yes)" return 0 } log "checking required Kubernetes images for $KUBERNETES_VERSION..." missing_any=0 for img in $(kubeadm config images list --kubernetes-version "$KUBERNETES_VERSION"); do if image_present "$img"; then log "found image: $img" else echo "[monok8s] MISSING image: $img" >&2 missing_any=1 fi done [ "$missing_any" -eq 0 ] || fail "preload the Kubernetes images before bootstrapping" log "all required images are present" } check_not_already_bootstrapped() { case "$BOOTSTRAP_MODE" in init) if [ -f /etc/kubernetes/admin.conf ]; then fail "cluster already appears initialized (/etc/kubernetes/admin.conf exists)" fi ;; join) if [ -f /etc/kubernetes/kubelet.conf ]; then fail "node already appears joined (/etc/kubernetes/kubelet.conf exists)" fi ;; esac } run_kubeadm_join() { log "running kubeadm join..." case "$JOIN_KIND" in worker) kubeadm join "${API_SERVER_ENDPOINT}" \ --token "${BOOTSTRAP_TOKEN}" \ --discovery-token-ca-cert-hash "${DISCOVERY_TOKEN_CA_CERT_HASH}" \ --node-name "${NODE_NAME}" \ --cri-socket "${CONTAINER_RUNTIME_ENDPOINT}" ;; control-plane) kubeadm join "${API_SERVER_ENDPOINT}" \ --token "${BOOTSTRAP_TOKEN}" \ --discovery-token-ca-cert-hash "${DISCOVERY_TOKEN_CA_CERT_HASH}" \ --control-plane \ --certificate-key "${CONTROL_PLANE_CERT_KEY}" \ --apiserver-advertise-address "${APISERVER_ADVERTISE_ADDRESS}" \ --node-name "${NODE_NAME}" \ --cri-socket "${CONTAINER_RUNTIME_ENDPOINT}" ;; esac } generate_kubeadm_config() { log "generating kubeadm config at $KUBEADM_CONFIG_OUT..." SAN_LINES="" if [ -n "${SANS:-}" ]; then old_ifs="$IFS" IFS=',' for san in $SANS; do san_trimmed="$(echo "$san" | sed 's/^ *//;s/ *$//')" [ -n "$san_trimmed" ] && SAN_LINES="${SAN_LINES} - \"${san_trimmed}\" " done IFS="$old_ifs" fi cat > "$KUBEADM_CONFIG_OUT" </dev/null 2>&1 \ || fail "required local IP is not present on any interface: $wanted_ip" } validate_network_requirements() { case "$BOOTSTRAP_MODE" in init) require_local_ip "$APISERVER_ADVERTISE_ADDRESS" ;; join) require_local_ip "$APISERVER_ADVERTISE_ADDRESS" check_apiserver_reachable ;; *) fail "unsupported BOOTSTRAP_MODE: $BOOTSTRAP_MODE" ;; esac } wait_for_node() { log "waiting for node registration: $NODE_NAME" for _ in $(seq 1 60); do if kubectl --kubeconfig /etc/kubernetes/admin.conf get node "$NODE_NAME" >/dev/null 2>&1; then return 0 fi sleep 1 done fail "node $NODE_NAME did not register in time" } apply_local_node_metadata_if_possible() { if [ "$BOOTSTRAP_MODE" != "init" ]; then log "skipping node labels/annotations from this node (not control-plane init mode)" return 0 fi wait_for_node if [ -n "${NODE_ANNOTATIONS:-}" ]; then kubectl --kubeconfig /etc/kubernetes/admin.conf annotate node "$NODE_NAME" $(printf '%s' "$NODE_ANNOTATIONS" | tr ',' ' ') --overwrite fi if [ -n "${NODE_LABELS:-}" ]; then kubectl --kubeconfig /etc/kubernetes/admin.conf label node "$NODE_NAME" $(printf '%s' "$NODE_LABELS" | tr ',' ' ') --overwrite fi } install_cni_if_requested() { case "${CNI_PLUGIN}" in none) if [ -f /etc/cni/net.d/10-crio-bridge.conflist ]; then mv /etc/cni/net.d/10-crio-bridge.conflist \ /etc/cni/net.d/10-crio-bridge.conflist.disabled fi log "bootstrap bridge CNI disabled; install a cluster CNI (e.g., flannel) for pod networking" ;; bridge) if [ -f /etc/cni/net.d/10-crio-bridge.conflist.disabled ]; then mv /etc/cni/net.d/10-crio-bridge.conflist.disabled \ /etc/cni/net.d/10-crio-bridge.conflist fi log "bootstrap bridge CNI enabled" ;; *) fail "unsupported CNI_PLUGIN: ${CNI_PLUGIN}" ;; esac } allow_single_node_scheduling() { if [ "$ALLOW_SCHEDULING_ON_CONTROL_PLANE" != "yes" ]; then log "leaving control-plane taint in place" return 0 fi log "removing control-plane taint so this single node can schedule workloads..." kubectl --kubeconfig /etc/kubernetes/admin.conf taint nodes "$NODE_NAME" node-role.kubernetes.io/control-plane- >/dev/null 2>&1 || true } print_next_steps() { echo echo "[monok8s] bootstrap complete" echo case "$BOOTSTRAP_MODE" in init) cat <