diff --git a/README.md b/README.md index 13ed085..b27373b 100644 --- a/README.md +++ b/README.md @@ -136,6 +136,11 @@ The currently tested upgrade chain is: - `1.33.10 -> 1.34.6` - `1.34.6 -> 1.35.3` +Tested worker node upgrade chain: + +- `1.33.3 -> 1.34.1` +- `1.33.1 -> 1.35.3` + --- ## Current status diff --git a/alpine/build-rootfs.sh b/alpine/build-rootfs.sh index a882d44..0acb40e 100755 --- a/alpine/build-rootfs.sh +++ b/alpine/build-rootfs.sh @@ -6,7 +6,7 @@ source /utils.sh /preload-k8s-images.sh || exit 1 -export CTL_BIN_LAYER=$( skopeo inspect docker-daemon:localhost/monok8s/node-control:dev | jq -r '.Layers[0] | sub("^sha256:"; "")' ) +export CTL_BIN_LAYER=$( skopeo inspect docker-daemon:localhost/monok8s/node-control:$TAG | jq -r '.Layers[0] | sub("^sha256:"; "")' ) mkdir -p \ "$ROOTFS/dev" \ diff --git a/clitools/docker/ctl-builder-local.Dockerfile b/clitools/docker/ctl-builder-local.Dockerfile deleted file mode 100644 index e0be383..0000000 --- a/clitools/docker/ctl-builder-local.Dockerfile +++ /dev/null @@ -1,24 +0,0 @@ -FROM golang:1.26-alpine AS build - -ARG VERSION -ARG KUBE_VERSION -ARG GIT_REV=unknown - -WORKDIR /src - -RUN apk add --no-cache git build-base - -COPY go.mod go.sum ./ -RUN go mod download - -COPY . . - -RUN test -f pkg/buildinfo/buildinfo_gen.go - -RUN mkdir -p /out && \ - GOOS=darwin GOARCH=arm64 CGO_ENABLED=0 \ - go build -trimpath -ldflags="-s -w" \ - -o /out/ctl-${VERSION} ./cmd/ctl - -FROM scratch -COPY --from=build /out/ / diff --git a/clitools/makefile b/clitools/makefile index 58580c9..3cb6ba3 100644 --- a/clitools/makefile +++ b/clitools/makefile @@ -14,7 +14,6 @@ KUBE_VERSION ?= v1.33.3 GIT_REV := $(shell git rev-parse HEAD) PACKAGES_DIR := packages -BIN_DIR := bin OUT_DIR := out UBOOT_TOOLS_OUT := $(OUT_DIR)/uboot-tools @@ -37,9 +36,6 @@ DOWNLOAD_PACKAGES_STAMP := $(PACKAGES_DIR)/.download-packages.stamp $(PACKAGES_DIR): mkdir -p $@ -$(BIN_DIR): - mkdir -p $@ - $(OUT_DIR): mkdir -p $@ @@ -129,13 +125,14 @@ build-agent: .buildinfo build-crds uboot-tools -t $(CTL_IMAGE) \ --output type=image,push=true,registry.insecure=true . -build-local: build-crds .buildinfo | $(BIN_DIR) +build-local: .buildinfo build-crds uboot-tools docker buildx build \ - -f docker/ctl-builder-local.Dockerfile \ + --platform linux/arm64 \ + -f docker/ctl-agent.Dockerfile \ + --build-arg BASE_IMAGE=$(CTL_BUILD_BASE_IMAGE) \ --build-arg VERSION=$(VERSION) \ - --build-arg KUBE_VERSION=$(KUBE_VERSION) \ - --build-arg GIT_REV=$(GIT_REV) \ - --output type=local,dest=./$(BIN_DIR) . + --load \ + -t localhost/monok8s/node-control:$(VERSION) . push-agent: .buildinfo build-crds uboot-tools test -n "$(IMAGE_REPOSITORY)" @@ -156,7 +153,6 @@ run-agent: clean: -docker image rm localhost/monok8s/node-control:$(VERSION) >/dev/null 2>&1 || true rm -rf \ - $(BIN_DIR) \ $(OUT_DIR)/crds \ $(BUILDINFO_FILE) diff --git a/clitools/pkg/controller/osupgrade/watch.go b/clitools/pkg/controller/osupgrade/watch.go index 6f93b39..83518a1 100644 --- a/clitools/pkg/controller/osupgrade/watch.go +++ b/clitools/pkg/controller/osupgrade/watch.go @@ -272,11 +272,17 @@ func listTargetNodeNames( }) if osu.Spec.NodeSelector != nil { - sel, err := metav1.LabelSelectorAsSelector(osu.Spec.NodeSelector) + userSelector, err := metav1.LabelSelectorAsSelector(osu.Spec.NodeSelector) if err != nil { return nil, fmt.Errorf("invalid nodeSelector: %w", err) } - selector = sel + + reqs, selectable := userSelector.Requirements() + if !selectable { + selector = labels.Nothing() + } else { + selector = selector.Add(reqs...) + } } list, err := clients.Kubernetes.CoreV1(). diff --git a/clitools/pkg/node/kubeadm_compat.go b/clitools/pkg/node/kubeadm_compat.go new file mode 100644 index 0000000..2610bb3 --- /dev/null +++ b/clitools/pkg/node/kubeadm_compat.go @@ -0,0 +1,108 @@ +package node + +import ( + "context" + "errors" + "fmt" + "os" + "strings" + + "k8s.io/klog/v2" + + "example.com/monok8s/pkg/system" +) + +const kubeadmUpgradeNodeHostnameBugFixedIn = "v1.35.0" + +// COMPAT(kubeadm-upgrade-node-hostname) +// Affects: Kubernetes/kubeadm < v1.35.0 +// Upstream: kubernetes/kubeadm#3244, kubernetes/kubernetes#134319 +// RemoveWhen: minimum supported Kubernetes version >= v1.35.0 +// +// Affected kubeadm versions can derive the target Node name for +// `kubeadm upgrade node` from the local OS hostname instead of the existing +// kubeadm NodeRegistration / kubelet --hostname-override state. +func needsKubeadmUpgradeNodeHostnameWorkaround(kubeadmVersion string) bool { + lt, err := versionLt(kubeadmVersion, kubeadmUpgradeNodeHostnameBugFixedIn) + if err != nil { + klog.Warningf( + "could not parse kubeadm version %q; enabling kubeadm upgrade node hostname workaround: %v", + kubeadmVersion, + err, + ) + return true + } + return lt +} + +// runWithTemporaryHostname works around kubernetes/kubeadm#3244, fixed by +// kubernetes/kubernetes#134319 in Kubernetes v1.35.0. +// +// Affected kubeadm versions can derive the target Node name for +// `kubeadm upgrade node` from the local OS hostname instead of the existing +// kubeadm NodeRegistration / kubelet --hostname-override state. That breaks +// valid setups where the machine hostname differs from the Kubernetes Node +// name: kubeadm may authenticate as one node but try to get/patch another Node, +// and the Node authorizer correctly rejects it. +// +// Keep this workaround scoped to affected kubeadm versions only. Set the +// temporary hostname to the Kubernetes Node name, run kubeadm, then restore the +// configured machine hostname immediately afterward. +func runWithTemporaryHostname(ctx context.Context, nctx *NodeContext, fn func(context.Context) error) error { + if nctx == nil { + return errors.New("node context is nil") + } + + temporaryHostname := strings.TrimSpace(nctx.Config.Spec.NodeName) + if temporaryHostname == "" { + return errors.New("temporary hostname is required") + } + + originalHostname, err := os.Hostname() + if err != nil { + return fmt.Errorf("get current hostname: %w", err) + } + + if originalHostname == temporaryHostname { + return fn(ctx) + } + + restoreHostname := strings.TrimSpace(nctx.Config.Spec.Network.Hostname) + if restoreHostname == "" { + restoreHostname = originalHostname + } + + klog.Warningf( + "temporarily changing hostname for kubeadm upgrade node: current=%q temporary=%q restore=%q", + originalHostname, + temporaryHostname, + restoreHostname, + ) + + if err := system.SetHostname(temporaryHostname); err != nil { + return fmt.Errorf("set temporary hostname to %q: %w", temporaryHostname, err) + } + + defer func() { + if err := system.SetHostname(restoreHostname); err != nil { + klog.Errorf("failed to restore hostname to %q: %v", restoreHostname, err) + } + }() + + return fn(ctx) +} + +// COMPAT(kubeadm-upgrade-node-hostname) +// RemoveWhen: minimum supported Kubernetes version >= v1.35.0 +func runKubeadmUpgradeNodeWithCompat( + ctx context.Context, + nctx *NodeContext, + kubeadmVersion string, + fn func(context.Context) error, +) error { + if needsKubeadmUpgradeNodeHostnameWorkaround(kubeadmVersion) { + return runWithTemporaryHostname(ctx, nctx, fn) + } + + return fn(ctx) +} diff --git a/clitools/pkg/node/kubeadm_upgrade.go b/clitools/pkg/node/kubeadm_upgrade.go index c00ad25..1fe91eb 100644 --- a/clitools/pkg/node/kubeadm_upgrade.go +++ b/clitools/pkg/node/kubeadm_upgrade.go @@ -329,21 +329,28 @@ func RunKubeadmUpgradeNode(ctx context.Context, nctx *NodeContext) error { kubeconfigPath, } - _, err := nctx.SystemRunner.RunWithOptions( - ctx, - "kubeadm", - args, - system.RunOptions{ - Timeout: 10 * time.Minute, - OnStdoutLine: func(line string) { - klog.Infof("[kubeadm] %s", line) + runKubeadm := func(ctx context.Context) error { + _, err := nctx.SystemRunner.RunWithOptions( + ctx, + "kubeadm", + args, + system.RunOptions{ + Timeout: 10 * time.Minute, + OnStdoutLine: func(line string) { + klog.Infof("[kubeadm] %s", line) + }, + OnStderrLine: func(line string) { + klog.Infof("[kubeadm] %s", line) + }, }, - OnStderrLine: func(line string) { - klog.Infof("[kubeadm] %s", line) - }, - }, - ) - if err != nil { + ) + return err + } + + // COMPAT(kubeadm-upgrade-node-hostname) + // RemoveWhen: minimum supported Kubernetes version >= v1.35.0 + // Replace this wrapper with direct runKubeadm(ctx). + if err := runKubeadmUpgradeNodeWithCompat(ctx, nctx, wantVersion, runKubeadm); err != nil { return fmt.Errorf("run kubeadm upgrade node: %w", err) } diff --git a/clitools/pkg/node/prereqs.go b/clitools/pkg/node/prereqs.go index 030c1a8..ee1031e 100644 --- a/clitools/pkg/node/prereqs.go +++ b/clitools/pkg/node/prereqs.go @@ -203,6 +203,26 @@ func versionEq(a, b string) bool { return normalizeKubeVersion(a) == normalizeKubeVersion(b) } +func versionLt(a, b string) (bool, error) { + av, err := parseKubeVersion(a) + if err != nil { + return false, err + } + + bv, err := parseKubeVersion(b) + if err != nil { + return false, err + } + + if av.Major != bv.Major { + return av.Major < bv.Major, nil + } + if av.Minor != bv.Minor { + return av.Minor < bv.Minor, nil + } + return av.Patch < bv.Patch, nil +} + func normalizeKubeVersion(v string) string { v = strings.TrimSpace(v) if v == "" { diff --git a/makefile b/makefile index e698e1a..7c7187e 100644 --- a/makefile +++ b/makefile @@ -181,7 +181,7 @@ $(INITRAMFS): $(INITRAMFS_DEPS) $(DOWNLOAD_PACKAGES_STAMP) | $(OUT_DIR) test -f $@ $(CLITOOLS_BIN): $(CLITOOLS_SRCS) - $(MAKE) -C clitools build-agent + $(MAKE) -C clitools build-local VERSION="$(TAG)" vpp: $(BUILD_BASE_STAMP) $(VPP_TAR) $(DPDK_TAR) $(FMLIB_TAR) $(FMC_TAR) $(NXP_TAR) @build_base_tag=$$(docker image inspect \