#!/bin/sh set -eu CONFIG_DIR="${CONFIG_DIR:-/opt/monok8s/config}" CLUSTER_ENV="${CONFIG_DIR}/cluster.env" KUBEADM_CONFIG_OUT="${KUBEADM_CONFIG_OUT:-/tmp/kubeadm-init.yaml}" ADMIN_KUBECONFIG="/etc/kubernetes/admin.conf" KUBELET_KUBECONFIG="/etc/kubernetes/kubelet.conf" log() { echo "[monok8s] $*" } fail() { echo "[monok8s] ERROR: $*" >&2 exit 1 } need_cmd() { command -v "$1" >/dev/null 2>&1 || fail "missing required command: $1" } require_file() { [ -f "$1" ] || fail "required file not found: $1" } load_config() { require_file "$CLUSTER_ENV" # shellcheck disable=SC1090 . "$CLUSTER_ENV" : "${KUBERNETES_VERSION:?KUBERNETES_VERSION is required}" : "${NODE_NAME:?NODE_NAME is required}" : "${APISERVER_ADVERTISE_ADDRESS:?APISERVER_ADVERTISE_ADDRESS is required}" POD_SUBNET="${POD_SUBNET:-10.244.0.0/16}" SERVICE_SUBNET="${SERVICE_SUBNET:-10.96.0.0/12}" CLUSTER_NAME="${CLUSTER_NAME:-monok8s}" CLUSTER_DOMAIN="${CLUSTER_DOMAIN:-cluster.local}" CONTAINER_RUNTIME_ENDPOINT="${CONTAINER_RUNTIME_ENDPOINT:-unix:///var/run/crio/crio.sock}" SANS="${SANS:-}" ALLOW_SCHEDULING_ON_CONTROL_PLANE="${ALLOW_SCHEDULING_ON_CONTROL_PLANE:-yes}" SKIP_IMAGE_CHECK="${SKIP_IMAGE_CHECK:-no}" KUBE_PROXY_NODEPORT_ADDRESSES="${KUBE_PROXY_NODEPORT_ADDRESSES:-primary}" BOOTSTRAP_MODE="${BOOTSTRAP_MODE:-init}" JOIN_KIND="${JOIN_KIND:-worker}" API_SERVER_ENDPOINT="${API_SERVER_ENDPOINT:-}" BOOTSTRAP_TOKEN="${BOOTSTRAP_TOKEN:-}" DISCOVERY_TOKEN_CA_CERT_HASH="${DISCOVERY_TOKEN_CA_CERT_HASH:-}" CONTROL_PLANE_CERT_KEY="${CONTROL_PLANE_CERT_KEY:-}" CNI_PLUGIN="${CNI_PLUGIN:-none}" } kubectl_admin() { kubectl --kubeconfig "$ADMIN_KUBECONFIG" "$@" } kubectl_kubelet() { kubectl --kubeconfig "$KUBELET_KUBECONFIG" "$@" } start_kubelet() { log "starting kubelet..." rc-service kubelet start >/dev/null 2>&1 || true } restart_kubelet() { log "restarting kubelet..." rc-service kubelet restart } check_kubelet_running() { log "waiting for kubelet to become ready..." last_status="unknown" for _ in $(seq 1 30); do if rc-service kubelet status >/dev/null 2>&1; then log "kubelet is up" return 0 fi last_status="service-not-running" sleep 1 done fail "kubelet did not become ready in time (${last_status})" } is_local_control_plane_node() { [ -f /etc/kubernetes/manifests/kube-apiserver.yaml ] } wait_for_local_apiserver() { need_cmd nc log "waiting for local API server on ${APISERVER_ADVERTISE_ADDRESS}:6443..." for _ in $(seq 1 90); do if nc -z "${APISERVER_ADVERTISE_ADDRESS}" 6443 >/dev/null 2>&1; then log "local API server TCP port is reachable" return 0 fi sleep 2 done fail "local API server did not become reachable on ${APISERVER_ADVERTISE_ADDRESS}:6443" } wait_for_admin_api() { [ -f "$ADMIN_KUBECONFIG" ] || fail "missing admin kubeconfig: $ADMIN_KUBECONFIG" log "waiting for Kubernetes API to respond via admin.conf..." for _ in $(seq 1 90); do if kubectl_admin version -o yaml >/dev/null 2>&1; then log "Kubernetes API is responding" return 0 fi sleep 2 done fail "Kubernetes API did not become ready in time" } wait_for_existing_cluster_if_needed() { case "$BOOTSTRAP_MODE" in init) if [ -f "$ADMIN_KUBECONFIG" ]; then start_kubelet check_kubelet_running if is_local_control_plane_node; then wait_for_local_apiserver fi wait_for_admin_api fi ;; join) if [ -f "$KUBELET_KUBECONFIG" ]; then start_kubelet check_kubelet_running fi ;; esac } get_cluster_server_version() { kubectl_admin version -o yaml 2>/dev/null \ | awk ' $1 == "serverVersion:" { in_server=1; next } in_server && $1 == "gitVersion:" { print $2; exit } ' } get_api_server_version_from_kubelet_kubeconfig() { kubectl_kubelet version -o yaml 2>/dev/null \ | awk ' $1 == "serverVersion:" { in_server=1; next } in_server && $1 == "gitVersion:" { print $2; exit } ' } validate_cidr_list_or_primary() { value="$1" [ -n "$value" ] || return 0 if [ "$value" = "primary" ]; then return 0 fi old_ifs="$IFS" IFS=',' for item in $value; do trimmed="$(printf '%s' "$item" | sed 's/^ *//;s/ *$//')" [ -n "$trimmed" ] || fail "KUBE_PROXY_NODEPORT_ADDRESSES contains an empty entry" case "$trimmed" in */*) ;; *) fail "KUBE_PROXY_NODEPORT_ADDRESSES must be 'primary' or a comma-separated list of CIDRs" ;; esac ip_part="${trimmed%/*}" prefix_part="${trimmed#*/}" printf '%s' "$prefix_part" | grep -Eq '^[0-9]+$' \ || fail "invalid CIDR prefix in KUBE_PROXY_NODEPORT_ADDRESSES: $trimmed" case "$ip_part" in *:*) printf '%s' "$prefix_part" | awk '{ exit !($1 >= 0 && $1 <= 128) }' \ || fail "invalid IPv6 CIDR prefix in KUBE_PROXY_NODEPORT_ADDRESSES: $trimmed" ;; *.*.*.*) printf '%s' "$prefix_part" | awk '{ exit !($1 >= 0 && $1 <= 32) }' \ || fail "invalid IPv4 CIDR prefix in KUBE_PROXY_NODEPORT_ADDRESSES: $trimmed" ;; *) fail "invalid CIDR entry in KUBE_PROXY_NODEPORT_ADDRESSES: $trimmed" ;; esac done IFS="$old_ifs" } validate_config() { case "$BOOTSTRAP_MODE" in init) ;; join) : "${API_SERVER_ENDPOINT:?API_SERVER_ENDPOINT is required for join mode}" : "${BOOTSTRAP_TOKEN:?BOOTSTRAP_TOKEN is required for join mode}" : "${DISCOVERY_TOKEN_CA_CERT_HASH:?DISCOVERY_TOKEN_CA_CERT_HASH is required for join mode}" case "$JOIN_KIND" in worker|control-plane) ;; *) fail "JOIN_KIND must be 'worker' or 'control-plane'" ;; esac if [ "$JOIN_KIND" = "control-plane" ]; then : "${CONTROL_PLANE_CERT_KEY:?CONTROL_PLANE_CERT_KEY is required for JOIN_KIND=control-plane}" fi ;; *) fail "BOOTSTRAP_MODE must be 'init' or 'join'" ;; esac validate_cidr_list_or_primary "$KUBE_PROXY_NODEPORT_ADDRESSES" } normalize_version() { # strip leading "v" echo "${1#v}" } version_major_minor() { normalize_version "$1" | awk -F. '{ print $1 "." $2 }' } version_eq() { [ "$(normalize_version "$1")" = "$(normalize_version "$2")" ] } version_lt() { [ "$(printf '%s\n%s\n' "$(normalize_version "$1")" "$(normalize_version "$2")" | sort -V | head -n1)" != "$(normalize_version "$2")" ] } version_gt() { [ "$(printf '%s\n%s\n' "$(normalize_version "$1")" "$(normalize_version "$2")" | sort -V | tail -n1)" = "$(normalize_version "$1")" ] \ && ! version_eq "$1" "$2" } minor_diff() { a="$(version_major_minor "$1")" b="$(version_major_minor "$2")" a_major="${a%.*}" a_minor="${a#*.}" b_major="${b%.*}" b_minor="${b#*.}" [ "$a_major" = "$b_major" ] || fail "major version change unsupported here: $1 -> $2" echo $((b_minor - a_minor)) } get_kubeadm_binary_version() { kubeadm version -o short } get_cluster_server_version() { kubectl --kubeconfig /etc/kubernetes/admin.conf version -o yaml \ | awk ' $1 == "serverVersion:" { in_server=1; next } in_server && $1 == "gitVersion:" { print $2; exit } ' } get_api_server_version_from_kubelet_kubeconfig() { kubectl --kubeconfig /etc/kubernetes/kubelet.conf version -o yaml \ | awk ' $1 == "serverVersion:" { in_server=1; next } in_server && $1 == "gitVersion:" { print $2; exit } ' } validate_target_matches_local_binaries() { kubeadm_ver="$(get_kubeadm_binary_version)" if ! version_eq "$kubeadm_ver" "$KUBERNETES_VERSION"; then fail "kubeadm binary version ($kubeadm_ver) does not match target KUBERNETES_VERSION ($KUBERNETES_VERSION)" fi } decide_bootstrap_action() { current_version="" case "$BOOTSTRAP_MODE" in init) if [ ! -f "$ADMIN_KUBECONFIG" ]; then BOOTSTRAP_ACTION="init" log "selected bootstrap action: $BOOTSTRAP_ACTION" return 0 fi current_version="$(get_cluster_server_version || true)" [ -n "$current_version" ] || fail "existing control-plane config found, but cluster version could not be determined" log "detected existing control-plane version: $current_version" if version_eq "$current_version" "$KUBERNETES_VERSION"; then BOOTSTRAP_ACTION="reconcile-control-plane" else BOOTSTRAP_ACTION="upgrade-control-plane" fi ;; join) if [ ! -f "$KUBELET_KUBECONFIG" ]; then BOOTSTRAP_ACTION="join" log "selected bootstrap action: $BOOTSTRAP_ACTION" return 0 fi current_version="$(get_api_server_version_from_kubelet_kubeconfig || true)" [ -n "$current_version" ] || fail "existing kubelet config found, but cluster version could not be determined" log "detected cluster version visible from this node: $current_version" if version_eq "$current_version" "$KUBERNETES_VERSION"; then BOOTSTRAP_ACTION="reconcile-node" else BOOTSTRAP_ACTION="upgrade-node" fi ;; *) fail "unsupported BOOTSTRAP_MODE: $BOOTSTRAP_MODE" ;; esac log "selected bootstrap action: $BOOTSTRAP_ACTION" } reconcile_control_plane() { log "reconciling existing control-plane node" start_kubelet check_kubelet_running wait_for_local_apiserver wait_for_admin_api apply_local_node_metadata_if_possible allow_single_node_scheduling } reconcile_node() { log "reconciling existing joined node" start_kubelet check_kubelet_running } validate_upgrade_path() { current="$1" target="$2" if version_eq "$current" "$target"; then log "cluster is already at target version: $target" return 0 fi if version_gt "$current" "$target"; then fail "downgrade is not supported: current=$current target=$target" fi diff="$(minor_diff "$current" "$target")" case "$diff" in 0|1) ;; *) fail "unsupported upgrade path: current=$current target=$target (minor skip too large)" ;; esac } check_upgrade_prereqs() { validate_target_matches_local_binaries } run_kubeadm_upgrade_apply() { current_version="$(get_cluster_server_version || true)" [ -n "$current_version" ] || fail "cannot determine current control-plane version; API server is not reachable" log "current control-plane version: $current_version" log "target control-plane version: $KUBERNETES_VERSION" validate_upgrade_path "$current_version" "$KUBERNETES_VERSION" if version_eq "$current_version" "$KUBERNETES_VERSION"; then log "control-plane already at target version; skipping kubeadm upgrade apply" return 0 fi log "running kubeadm upgrade plan..." kubeadm upgrade plan "$KUBERNETES_VERSION" log "running kubeadm upgrade apply..." kubeadm upgrade apply -y "$KUBERNETES_VERSION" } run_kubeadm_upgrade_node() { cluster_version="$(get_api_server_version_from_kubelet_kubeconfig)" log "cluster/control-plane version visible from this node: $cluster_version" log "target node version: $KUBERNETES_VERSION" if ! version_eq "$cluster_version" "$KUBERNETES_VERSION"; then fail "control-plane version ($cluster_version) does not match target ($KUBERNETES_VERSION); upgrade control-plane first" fi log "running kubeadm upgrade node..." kubeadm upgrade node } check_prereqs() { need_cmd kubeadm need_cmd kubelet need_cmd kubectl need_cmd crictl need_cmd rc-service need_cmd awk need_cmd ip need_cmd grep need_cmd sed need_cmd hostname } check_apiserver_reachable() { host="${API_SERVER_ENDPOINT%:*}" port="${API_SERVER_ENDPOINT##*:}" need_cmd nc log "checking API server reachability: ${host}:${port}" for _ in $(seq 1 20); do if nc -z "$host" "$port" >/dev/null 2>&1; then log "API server is reachable" return 0 fi sleep 1 done fail "cannot reach API server at ${host}:${port}" } start_crio() { rc-service crio start } check_crio_running() { log "waiting for CRI-O to become ready..." last_status="unknown" for _ in $(seq 1 30); do if rc-service crio status >/dev/null 2>&1; then last_status="service-running" if crictl --runtime-endpoint "$CONTAINER_RUNTIME_ENDPOINT" info >/dev/null 2>&1; then log "CRI-O is up" return 0 fi last_status="service-running-but-runtime-not-ready" else last_status="service-not-running" fi sleep 1 done fail "CRI-O did not become ready in time (${last_status})" } image_present() { wanted="$1" repo="${wanted%:*}" tag="${wanted##*:}" crictl --runtime-endpoint "$CONTAINER_RUNTIME_ENDPOINT" images \ | awk 'NR>1 { print $1 ":" $2 }' \ | grep -Fx "$repo:$tag" >/dev/null 2>&1 } check_required_images() { [ "$SKIP_IMAGE_CHECK" = "yes" ] && { log "skipping image check (SKIP_IMAGE_CHECK=yes)" return 0 } log "checking required Kubernetes images for $KUBERNETES_VERSION..." missing_any=0 for img in $(kubeadm config images list --kubernetes-version "$KUBERNETES_VERSION"); do if image_present "$img"; then log "found image: $img" else echo "[monok8s] MISSING image: $img" >&2 missing_any=1 fi done [ "$missing_any" -eq 0 ] || fail "preload the Kubernetes images before bootstrapping" log "all required images are present" } check_not_already_bootstrapped() { case "$BOOTSTRAP_MODE" in init) if [ -f /etc/kubernetes/admin.conf ]; then fail "cluster already appears initialized (/etc/kubernetes/admin.conf exists)" fi ;; join) if [ -f /etc/kubernetes/kubelet.conf ]; then fail "node already appears joined (/etc/kubernetes/kubelet.conf exists)" fi ;; esac } run_kubeadm_join() { log "running kubeadm join..." case "$JOIN_KIND" in worker) kubeadm join "${API_SERVER_ENDPOINT}" \ --token "${BOOTSTRAP_TOKEN}" \ --discovery-token-ca-cert-hash "${DISCOVERY_TOKEN_CA_CERT_HASH}" \ --node-name "${NODE_NAME}" \ --cri-socket "${CONTAINER_RUNTIME_ENDPOINT}" ;; control-plane) kubeadm join "${API_SERVER_ENDPOINT}" \ --token "${BOOTSTRAP_TOKEN}" \ --discovery-token-ca-cert-hash "${DISCOVERY_TOKEN_CA_CERT_HASH}" \ --control-plane \ --certificate-key "${CONTROL_PLANE_CERT_KEY}" \ --apiserver-advertise-address "${APISERVER_ADVERTISE_ADDRESS}" \ --node-name "${NODE_NAME}" \ --cri-socket "${CONTAINER_RUNTIME_ENDPOINT}" ;; esac } generate_kube_proxy_config_block() { if [ -z "${KUBE_PROXY_NODEPORT_ADDRESSES:-}" ]; then return 0 fi if [ "$KUBE_PROXY_NODEPORT_ADDRESSES" = "primary" ]; then cat < "$KUBEADM_CONFIG_OUT" <> "$KUBEADM_CONFIG_OUT" } run_kubeadm_init() { log "running kubeadm init..." kubeadm init --config "$KUBEADM_CONFIG_OUT" } require_local_ip() { wanted_ip="$1" ip -o addr show | awk '{print $4}' | cut -d/ -f1 | grep -Fx "$wanted_ip" >/dev/null 2>&1 \ || fail "required local IP is not present on any interface: $wanted_ip" } validate_network_requirements() { case "$BOOTSTRAP_MODE" in init) require_local_ip "$APISERVER_ADVERTISE_ADDRESS" ;; join) require_local_ip "$APISERVER_ADVERTISE_ADDRESS" check_apiserver_reachable ;; *) fail "unsupported BOOTSTRAP_MODE: $BOOTSTRAP_MODE" ;; esac } wait_for_node() { log "waiting for node registration: $NODE_NAME" for _ in $(seq 1 60); do if kubectl --kubeconfig /etc/kubernetes/admin.conf get node "$NODE_NAME" >/dev/null 2>&1; then return 0 fi sleep 1 done fail "node $NODE_NAME did not register in time" } apply_local_node_metadata_if_possible() { if [ "$BOOTSTRAP_MODE" != "init" ]; then log "skipping node labels/annotations from this node (not control-plane init mode)" return 0 fi wait_for_node if [ -n "${NODE_ANNOTATIONS:-}" ]; then kubectl --kubeconfig /etc/kubernetes/admin.conf annotate node "$NODE_NAME" $(printf '%s' "$NODE_ANNOTATIONS" | tr ',' ' ') --overwrite fi if [ -n "${NODE_LABELS:-}" ]; then kubectl --kubeconfig /etc/kubernetes/admin.conf label node "$NODE_NAME" $(printf '%s' "$NODE_LABELS" | tr ',' ' ') --overwrite fi } install_cni_if_requested() { case "${CNI_PLUGIN}" in none) if [ -f /etc/cni/net.d/10-crio-bridge.conflist ]; then mv /etc/cni/net.d/10-crio-bridge.conflist \ /etc/cni/net.d/10-crio-bridge.conflist.disabled fi log "bootstrap bridge CNI disabled; install a cluster CNI (e.g., flannel) for pod networking" ;; bridge) if [ -f /etc/cni/net.d/10-crio-bridge.conflist.disabled ]; then mv /etc/cni/net.d/10-crio-bridge.conflist.disabled \ /etc/cni/net.d/10-crio-bridge.conflist fi log "bootstrap bridge CNI enabled" ;; *) fail "unsupported CNI_PLUGIN: ${CNI_PLUGIN}" ;; esac } allow_single_node_scheduling() { if [ "$ALLOW_SCHEDULING_ON_CONTROL_PLANE" != "yes" ]; then log "leaving control-plane taint in place" return 0 fi log "removing control-plane taint so this single node can schedule workloads..." kubectl --kubeconfig /etc/kubernetes/admin.conf taint nodes "$NODE_NAME" node-role.kubernetes.io/control-plane- >/dev/null 2>&1 || true } print_next_steps() { echo echo "[monok8s] bootstrap complete" echo case "$BOOTSTRAP_MODE" in init) cat <} EOF ;; join) cat <