diff --git a/.gitignore b/.gitignore index 22b1c22..385d538 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +.DS_Store clitools/bin packages/ out/ diff --git a/clitools/pkg/apis/monok8s/v1alpha1/groupversion_info.go b/clitools/pkg/apis/monok8s/v1alpha1/groupversion_info.go index ca03799..78b3204 100644 --- a/clitools/pkg/apis/monok8s/v1alpha1/groupversion_info.go +++ b/clitools/pkg/apis/monok8s/v1alpha1/groupversion_info.go @@ -44,3 +44,12 @@ func addKnownTypes(scheme *runtime.Scheme) error { metav1.AddToGroupVersion(scheme, SchemeGroupVersion) return nil } + +func NodeAgentLabels() map[string]string { + return map[string]string{ + "app.kubernetes.io/name": NodeAgentName, + "app.kubernetes.io/component": "agent", + "app.kubernetes.io/part-of": "monok8s", + "app.kubernetes.io/managed-by": NodeControlName, + } +} diff --git a/clitools/pkg/apis/monok8s/v1alpha1/osupgrade.go b/clitools/pkg/apis/monok8s/v1alpha1/osupgrade.go index dd66bf7..b6b4c98 100644 --- a/clitools/pkg/apis/monok8s/v1alpha1/osupgrade.go +++ b/clitools/pkg/apis/monok8s/v1alpha1/osupgrade.go @@ -133,6 +133,12 @@ type OSUpgradeProgressStatus struct { CurrentStep int32 `json:"currentStep,omitempty" yaml:"currentStep,omitempty"` CurrentFrom string `json:"currentFrom,omitempty" yaml:"currentFrom,omitempty"` CurrentTo string `json:"currentTo,omitempty" yaml:"currentTo,omitempty"` + + // ObservedRetryNonce records the last retryNonce value the agent accepted. + // When spec.retryNonce is changed by the user and differs from this value, + // the agent may retry a failed upgrade. + // +optional + ObservedRetryNonce string `json:"observedRetryNonce,omitempty"` } func (osu OSUpgrade) StatusPhase() string { @@ -142,3 +148,11 @@ func (osu OSUpgrade) StatusPhase() string { } return phase } + +func (osup OSUpgradeProgress) StatusPhase() string { + phase := "" + if osup.Status != nil { + phase = string(osup.Status.Phase) + } + return phase +} diff --git a/clitools/pkg/cmd/agent/agent.go b/clitools/pkg/cmd/agent/agent.go index 3273f93..ad28795 100644 --- a/clitools/pkg/cmd/agent/agent.go +++ b/clitools/pkg/cmd/agent/agent.go @@ -152,14 +152,11 @@ func watchOnce( if !targetsNode(item, nodeName) { continue } - if !shouldHandle(item) { - continue - } klog.InfoS("found existing osupgradeprogress", "name", item.Name, "node", nodeName, - "phase", progressPhase(item.Status), + "phase", item.StatusPhase(), "resourceVersion", item.ResourceVersion, ) @@ -227,20 +224,11 @@ func watchOnce( if !targetsNode(osup, nodeName) { continue } - if !shouldHandle(osup) { - klog.V(2).InfoS("skipping osupgradeprogress due to phase", - "name", osup.Name, - "node", nodeName, - "phase", progressPhase(osup.Status), - "eventType", evt.Type, - ) - continue - } - klog.InfoS("received osupgradeprogress event", + klog.V(4).InfoS("received osupgradeprogress event", "name", osup.Name, "node", nodeName, - "phase", progressPhase(osup.Status), + "phase", osup.StatusPhase(), "eventType", evt.Type, "resourceVersion", osup.ResourceVersion, ) @@ -262,28 +250,3 @@ func targetsNode(osup *monov1alpha1.OSUpgradeProgress, nodeName string) bool { } return osup.Spec.NodeName == nodeName } - -func shouldHandle(osup *monov1alpha1.OSUpgradeProgress) bool { - if osup == nil { - return false - } - - if osup.Status == nil { - return false - } - - switch osup.Status.Phase { - case "", - monov1alpha1.OSUpgradeProgressPhasePending: - return true - default: - return false - } -} - -func progressPhase(st *monov1alpha1.OSUpgradeProgressStatus) string { - if st == nil { - return "" - } - return string(st.Phase) -} diff --git a/clitools/pkg/cmd/controller/controller.go b/clitools/pkg/cmd/controller/controller.go index be93a8b..ba5659e 100644 --- a/clitools/pkg/cmd/controller/controller.go +++ b/clitools/pkg/cmd/controller/controller.go @@ -3,6 +3,7 @@ package controller import ( "context" "errors" + "fmt" "net" "net/http" "os" @@ -57,7 +58,7 @@ func NewCmdController(flags *genericclioptions.ConfigFlags) *cobra.Command { }() go func() { - httpErrCh <- httpListen(ctx, clients, conf) + httpErrCh <- listenAndServe(ctx, clients, conf) }() select { @@ -92,63 +93,125 @@ func NewCmdController(flags *genericclioptions.ConfigFlags) *cobra.Command { return cmd } -func httpListen(ctx context.Context, clients *kube.Clients, conf ServerConfig) error { - address, port := "", "8443" - addr := net.JoinHostPort(address, port) - +func listenAndServe(ctx context.Context, clients *kube.Clients, conf ServerConfig) error { nodeName := os.Getenv("NODE_NAME") - server := mkscontroller.NewServer(ctx, clients, conf.Namespace, nodeName) + controllerServer := mkscontroller.NewServer(ctx, clients, conf.Namespace, nodeName) - s := &http.Server{ - Addr: addr, - Handler: server, + healthMux := http.NewServeMux() + healthMux.HandleFunc("/healthz", func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte("ok\n")) + }) + healthMux.HandleFunc("/readyz", func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte("ok\n")) + }) + + healthAddr := net.JoinHostPort("", "8080") + controllerAddr := net.JoinHostPort("", "8443") + + healthHTTPServer := &http.Server{ + Addr: healthAddr, + Handler: healthMux, + IdleTimeout: 90 * time.Second, + ReadTimeout: 10 * time.Second, + WriteTimeout: 10 * time.Second, + MaxHeaderBytes: 1 << 20, + } + + controllerHTTPServer := &http.Server{ + Addr: controllerAddr, + Handler: controllerServer, IdleTimeout: 90 * time.Second, ReadTimeout: 4 * time.Minute, WriteTimeout: 4 * time.Minute, MaxHeaderBytes: 1 << 20, } - serverErrCh := make(chan error, 1) + serverErrCh := make(chan error, 2) go func() { - if conf.TLSCertFile != "" { - klog.InfoS("starting HTTPS server", - "addr", addr, - "certFile", conf.TLSCertFile, - "keyFile", conf.TLSPrivateKeyFile, - ) - serverErrCh <- s.ListenAndServeTLS(conf.TLSCertFile, conf.TLSPrivateKeyFile) + klog.InfoS("starting health HTTP server", "addr", healthAddr) + + err := healthHTTPServer.ListenAndServe() + if err != nil && !errors.Is(err, http.ErrServerClosed) { + serverErrCh <- fmt.Errorf("health HTTP server: %w", err) return } - klog.InfoS("starting HTTP server", "addr", addr) - serverErrCh <- s.ListenAndServe() + serverErrCh <- nil + }() + + go func() { + if conf.TLSCertFile != "" { + klog.InfoS("starting controller HTTPS server", + "addr", controllerAddr, + "certFile", conf.TLSCertFile, + "keyFile", conf.TLSPrivateKeyFile, + ) + + err := controllerHTTPServer.ListenAndServeTLS(conf.TLSCertFile, conf.TLSPrivateKeyFile) + if err != nil && !errors.Is(err, http.ErrServerClosed) { + serverErrCh <- fmt.Errorf("controller HTTPS server: %w", err) + return + } + + serverErrCh <- nil + return + } + + klog.InfoS("starting controller HTTP server", "addr", controllerAddr) + + err := controllerHTTPServer.ListenAndServe() + if err != nil && !errors.Is(err, http.ErrServerClosed) { + serverErrCh <- fmt.Errorf("controller HTTP server: %w", err) + return + } + + serverErrCh <- nil }() select { case <-ctx.Done(): - klog.InfoS("shutting down HTTP server", "addr", addr) + klog.InfoS("shutting down HTTP servers", + "healthAddr", healthAddr, + "controllerAddr", controllerAddr, + ) shutdownCtx, cancel := context.WithTimeout(context.Background(), 10*time.Second) defer cancel() - err := s.Shutdown(shutdownCtx) - if err != nil { - return err + var errs []error + + if err := healthHTTPServer.Shutdown(shutdownCtx); err != nil { + errs = append(errs, fmt.Errorf("shutdown health HTTP server: %w", err)) } - err = <-serverErrCh - if err != nil && !errors.Is(err, http.ErrServerClosed) { - return err + if err := controllerHTTPServer.Shutdown(shutdownCtx); err != nil { + errs = append(errs, fmt.Errorf("shutdown controller HTTP server: %w", err)) } + + for i := 0; i < 2; i++ { + if err := <-serverErrCh; err != nil { + errs = append(errs, err) + } + } + + if len(errs) > 0 { + return errors.Join(errs...) + } + return context.Canceled case err := <-serverErrCh: - if err != nil && !errors.Is(err, http.ErrServerClosed) { + if err != nil { klog.ErrorS(err, "HTTP server failed") return err } - return nil + + // One server exited cleanly unexpectedly. Treat that as failure because + // the process should keep both servers alive until ctx is canceled. + return fmt.Errorf("HTTP server exited unexpectedly") } } diff --git a/clitools/pkg/cmd/create/create.go b/clitools/pkg/cmd/create/create.go index 3cb9ed9..b450c45 100644 --- a/clitools/pkg/cmd/create/create.go +++ b/clitools/pkg/cmd/create/create.go @@ -42,23 +42,6 @@ func NewCmdCreate(flags *genericclioptions.ConfigFlags) *cobra.Command { return err }, }, - &cobra.Command{ - Use: "controller", - Short: "Print controller deployment template", - RunE: func(cmd *cobra.Command, _ []string) error { - ns, _, err := flags.ToRawKubeConfigLoader().Namespace() - if err != nil { - return err - } - - out, err := render.RenderControllerDeployments(ns) - if err != nil { - return err - } - _, err = fmt.Fprint(cmd.OutOrStdout(), out) - return err - }, - }, ) var authorizedKeysPath string @@ -90,6 +73,38 @@ func NewCmdCreate(flags *genericclioptions.ConfigFlags) *cobra.Command { sshdcmd.Flags().StringVar(&authorizedKeysPath, "authkeys", "", "path to authorized_keys file") cmd.AddCommand(&sshdcmd) + + cconf := render.ControllerConf{} + controllercmd := cobra.Command{ + Use: "controller", + Short: "Print controller deployment template", + RunE: func(cmd *cobra.Command, _ []string) error { + ns, _, err := flags.ToRawKubeConfigLoader().Namespace() + if err != nil { + return err + } + + cconf.Namespace = ns + + out, err := render.RenderControllerDeployments(cconf) + if err != nil { + return err + } + + _, err = fmt.Fprint(cmd.OutOrStdout(), out) + return err + }, + } + + controllercmd.Flags().StringVar( + &cconf.Image, + "image", + "", + "Controller image, including optional registry and tag", + ) + + cmd.AddCommand(&controllercmd) + return cmd } diff --git a/clitools/pkg/cmd/root/root.go b/clitools/pkg/cmd/root/root.go index ce03222..7b87340 100644 --- a/clitools/pkg/cmd/root/root.go +++ b/clitools/pkg/cmd/root/root.go @@ -19,8 +19,10 @@ import ( func init() { klog.InitFlags(nil) + _ = flag.Set("logtostderr", "true") + if os.Getenv("DEBUG") != "" { - _ = flag.Set("v", "4") // debug level + _ = flag.Set("v", "4") } else { _ = flag.Set("v", "0") } @@ -39,7 +41,11 @@ func NewRootCmd() *cobra.Command { }, } + // Expose klog stdlib flags through Cobra/pflag. + cmd.PersistentFlags().AddGoFlagSet(flag.CommandLine) + flags.AddFlags(cmd.PersistentFlags()) + cmd.AddCommand( versioncmd.NewCmdVersion(), initcmd.NewCmdInit(flags), @@ -49,5 +55,6 @@ func NewRootCmd() *cobra.Command { controllercmd.NewCmdController(flags), internalcmd.NewCmdInternal(), ) + return cmd } diff --git a/clitools/pkg/controller/osupgrade/handler.go b/clitools/pkg/controller/osupgrade/handler.go index af60af6..96322df 100644 --- a/clitools/pkg/controller/osupgrade/handler.go +++ b/clitools/pkg/controller/osupgrade/handler.go @@ -65,13 +65,20 @@ func handleOSUpgradeProgressLocked( } if osup.Spec.NodeName != nodeName { + klog.V(4).InfoS("skipping osupgradeprogress due to nodeName mismatch", + "name", osup.Name, + "node", nodeName, + "target", osup.Spec.NodeName, + ) return nil } - if osup.Status.Phase != "" && - osup.Status.Phase != monov1alpha1.OSUpgradeProgressPhasePending && - osup.Status.Phase != monov1alpha1.OSUpgradeProgressPhaseDownloading { - // tune this logic however you want + if !shouldProcessProgress(osup) { + klog.V(2).InfoS("skipping osupgradeprogress due to phase", + "name", osup.Name, + "node", nodeName, + "phase", osup.StatusPhase(), + ) return nil } @@ -124,7 +131,9 @@ func handleOSUpgradeProgressLocked( now := metav1.Now() cur.Status.CurrentVersion = buildinfo.KubeVersion cur.Status.TargetVersion = plan.ResolvedTarget + cur.Status.PlannedPath = plannedPath(plan) cur.Status.Phase = monov1alpha1.OSUpgradeProgressPhaseDownloading + cur.Status.ObservedRetryNonce = cur.Spec.RetryNonce cur.Status.Message = fmt.Sprintf("downloading image: %s", first.URL) cur.Status.LastUpdatedAt = &now }) @@ -238,6 +247,26 @@ func handleOSUpgradeProgressLocked( select {} } +func shouldProcessProgress(osup *monov1alpha1.OSUpgradeProgress) bool { + if osup == nil { + return false + } + + if osup.Status == nil { + return false + } + + switch osup.Status.Phase { + case "", + monov1alpha1.OSUpgradeProgressPhasePending: + return true + case monov1alpha1.OSUpgradeProgressPhaseFailed: + return osup.Spec.RetryNonce != osup.Status.ObservedRetryNonce + default: + return false + } +} + func triggerReboot() error { _ = os.WriteFile("/proc/sysrq-trigger", []byte("s\n"), 0) _ = os.WriteFile("/proc/sysrq-trigger", []byte("u\n"), 0) diff --git a/clitools/pkg/controller/osupgrade/planner.go b/clitools/pkg/controller/osupgrade/planner.go index 7111fd2..5d4ee20 100644 --- a/clitools/pkg/controller/osupgrade/planner.go +++ b/clitools/pkg/controller/osupgrade/planner.go @@ -335,6 +335,14 @@ func lowestPatchInMinor(versions []Version, major, minor int) (Version, bool) { return Version{}, false } +func plannedPath(plan *Plan) []string { + ppath := []string{} + for _, img := range plan.Path { + ppath = append(ppath, img.Version) + } + return ppath +} + func versionsToStrings(vs []Version) []string { out := make([]string, 0, len(vs)) for _, v := range vs { diff --git a/clitools/pkg/controller/osupgrade/progress.go b/clitools/pkg/controller/osupgrade/progress.go index 587d111..c2750fb 100644 --- a/clitools/pkg/controller/osupgrade/progress.go +++ b/clitools/pkg/controller/osupgrade/progress.go @@ -209,6 +209,7 @@ func failProgress( cur.Status = &monov1alpha1.OSUpgradeProgressStatus{} } + cur.Status.ObservedRetryNonce = cur.Spec.RetryNonce cur.Status.LastUpdatedAt = &now cur.Status.Message = fmt.Sprintf("%s: %v", action, cause) cur.Status.Phase = monov1alpha1.OSUpgradeProgressPhaseFailed @@ -237,6 +238,7 @@ func markProgressCompleted( cur.Status = &monov1alpha1.OSUpgradeProgressStatus{} } + cur.Status.ObservedRetryNonce = cur.Spec.RetryNonce cur.Status.Phase = monov1alpha1.OSUpgradeProgressPhaseCompleted cur.Status.Message = message cur.Status.CurrentVersion = osup.Status.CurrentVersion diff --git a/clitools/pkg/controller/server.go b/clitools/pkg/controller/server.go index 5711bd7..9feb297 100644 --- a/clitools/pkg/controller/server.go +++ b/clitools/pkg/controller/server.go @@ -73,17 +73,13 @@ func (s *Server) Initialize() { ws.Consumes(restful.MIME_JSON) ws.Produces(restful.MIME_JSON) - ws.Route(ws.GET("/healthz").To(s.queryHealthz). + ws.Route(ws.GET("/status").To(s.queryStatus). Doc("Return basic controller status")) - // Stub for now - ws.Route(ws.GET("/readyz").To(s.queryHealthz). - Doc("Stub for now")) - s.restfulCont.Add(ws) } -func (s *Server) queryHealthz(request *restful.Request, response *restful.Response) { +func (s *Server) queryStatus(request *restful.Request, response *restful.Response) { resp := StatusResponse{ OK: true, Service: "monok8s-controller", diff --git a/clitools/pkg/node/agent.go b/clitools/pkg/node/agent.go index 2eea1fb..7f5acc4 100644 --- a/clitools/pkg/node/agent.go +++ b/clitools/pkg/node/agent.go @@ -20,9 +20,8 @@ import ( ) const ( - controlAgentNodeSelectorValue = "true" - controlAgentImage = "localhost/monok8s/node-control:dev" - kubeconfig = "/etc/kubernetes/admin.conf" + controlAgentImage = "localhost/monok8s/node-control:dev" + kubeconfig = "/etc/kubernetes/admin.conf" ) func ApplyNodeControlDaemonSetResources(ctx context.Context, n *NodeContext) error { @@ -265,12 +264,7 @@ func applyNodeAgentClusterRoleBinding(ctx context.Context, kubeClient kubernetes func applyNodeAgentDaemonSet(ctx context.Context, kubeClient kubernetes.Interface, namespace string, labels map[string]string) error { privileged := true - dsLabels := map[string]string{ - "app.kubernetes.io/name": monov1alpha1.NodeAgentName, - "app.kubernetes.io/component": "agent", - "app.kubernetes.io/part-of": "monok8s", - "app.kubernetes.io/managed-by": monov1alpha1.NodeControlName, - } + dsLabels := monov1alpha1.NodeAgentLabels() want := &appsv1.DaemonSet{ ObjectMeta: metav1.ObjectMeta{ @@ -294,7 +288,7 @@ func applyNodeAgentDaemonSet(ctx context.Context, kubeClient kubernetes.Interfac HostPID: true, DNSPolicy: corev1.DNSClusterFirstWithHostNet, NodeSelector: map[string]string{ - monov1alpha1.NodeControlKey: controlAgentNodeSelectorValue, + monov1alpha1.NodeControlKey: "true", }, Tolerations: []corev1.Toleration{ {Operator: corev1.TolerationOpExists}, diff --git a/clitools/pkg/node/metadata.go b/clitools/pkg/node/metadata.go index 5d3c64c..22939f0 100644 --- a/clitools/pkg/node/metadata.go +++ b/clitools/pkg/node/metadata.go @@ -61,7 +61,7 @@ func ApplyLocalNodeMetadataIfPossible(ctx context.Context, nctx *NodeContext) er // Additional Labels if spec.EnableNodeControl { - node.Labels[monov1alpah1.NodeControlKey] = controlAgentNodeSelectorValue + node.Labels[monov1alpah1.NodeControlKey] = "true" } _, err = client.CoreV1().Nodes().Update(ctx, node, metav1.UpdateOptions{}) diff --git a/clitools/pkg/render/controller.go b/clitools/pkg/render/controller.go index 6ada0b1..c92e429 100644 --- a/clitools/pkg/render/controller.go +++ b/clitools/pkg/render/controller.go @@ -14,13 +14,20 @@ import ( monov1alpha1 "example.com/monok8s/pkg/apis/monok8s/v1alpha1" buildinfo "example.com/monok8s/pkg/buildinfo" - templates "example.com/monok8s/pkg/templates" ) -func RenderControllerDeployments(namespace string) (string, error) { - vals := templates.LoadTemplateValuesFromEnv() +type ControllerConf struct { + Namespace string + Image string + Labels map[string]string +} - labels := map[string]string{ +func RenderControllerDeployments(conf ControllerConf) (string, error) { + if conf.Namespace == "" { + return "", fmt.Errorf("namespace is required") + } + + conf.Labels = map[string]string{ "app.kubernetes.io/name": monov1alpha1.ControllerName, "app.kubernetes.io/component": "controller", "app.kubernetes.io/part-of": "monok8s", @@ -28,10 +35,10 @@ func RenderControllerDeployments(namespace string) (string, error) { } objs := []runtime.Object{ - buildControllerServiceAccount(namespace, labels), - buildControllerClusterRole(labels), - buildControllerClusterRoleBinding(namespace, labels), - buildControllerDeployment(vals, namespace, labels), + buildControllerServiceAccount(conf), + buildControllerClusterRole(conf), + buildControllerClusterRoleBinding(conf), + buildControllerDeployment(conf), } s := runtime.NewScheme() @@ -57,7 +64,7 @@ func RenderControllerDeployments(namespace string) (string, error) { return buf.String(), nil } -func buildControllerServiceAccount(namespace string, labels map[string]string) *corev1.ServiceAccount { +func buildControllerServiceAccount(conf ControllerConf) *corev1.ServiceAccount { automount := true @@ -68,14 +75,14 @@ func buildControllerServiceAccount(namespace string, labels map[string]string) * }, ObjectMeta: metav1.ObjectMeta{ Name: monov1alpha1.ControllerName, - Namespace: namespace, - Labels: labels, + Namespace: conf.Namespace, + Labels: conf.Labels, }, AutomountServiceAccountToken: &automount, } } -func buildControllerClusterRole(labels map[string]string) *rbacv1.ClusterRole { +func buildControllerClusterRole(conf ControllerConf) *rbacv1.ClusterRole { wantRules := []rbacv1.PolicyRule{ { APIGroups: []string{monov1alpha1.Group}, @@ -111,19 +118,19 @@ func buildControllerClusterRole(labels map[string]string) *rbacv1.ClusterRole { }, ObjectMeta: metav1.ObjectMeta{ Name: monov1alpha1.ControllerName, - Labels: labels, + Labels: conf.Labels, }, Rules: wantRules, } } -func buildControllerClusterRoleBinding(namespace string, labels map[string]string) *rbacv1.ClusterRoleBinding { +func buildControllerClusterRoleBinding(conf ControllerConf) *rbacv1.ClusterRoleBinding { wantSubjects := []rbacv1.Subject{ { Kind: "ServiceAccount", Name: monov1alpha1.ControllerName, - Namespace: namespace, + Namespace: conf.Namespace, }, } @@ -140,14 +147,14 @@ func buildControllerClusterRoleBinding(namespace string, labels map[string]strin }, ObjectMeta: metav1.ObjectMeta{ Name: monov1alpha1.ControllerName, - Labels: labels, + Labels: conf.Labels, }, Subjects: wantSubjects, RoleRef: wantRoleRef, } } -func buildControllerDeployment(tVals templates.TemplateValues, namespace string, labels map[string]string) *appsv1.Deployment { +func buildControllerDeployment(conf ControllerConf) *appsv1.Deployment { replicas := int32(1) selectorLabels := map[string]string{ @@ -155,10 +162,13 @@ func buildControllerDeployment(tVals templates.TemplateValues, namespace string, "app.kubernetes.io/component": "controller", } - podLabels := mergeStringMaps(labels, selectorLabels) + podLabels := mergeStringMaps(conf.Labels, selectorLabels) runAsNonRoot := true allowPrivilegeEscalation := false + userGroup := int64(65532) + + image, pullPolicy := controllerImage(conf) return &appsv1.Deployment{ TypeMeta: metav1.TypeMeta{ @@ -167,8 +177,8 @@ func buildControllerDeployment(tVals templates.TemplateValues, namespace string, }, ObjectMeta: metav1.ObjectMeta{ Name: monov1alpha1.ControllerName, - Namespace: namespace, - Labels: labels, + Namespace: conf.Namespace, + Labels: conf.Labels, }, Spec: appsv1.DeploymentSpec{ Replicas: &replicas, @@ -184,12 +194,12 @@ func buildControllerDeployment(tVals templates.TemplateValues, namespace string, Containers: []corev1.Container{ { Name: "controller", - Image: fmt.Sprintf("localhost/monok8s/node-control:%s", buildinfo.Version), - ImagePullPolicy: corev1.PullIfNotPresent, + Image: image, + ImagePullPolicy: pullPolicy, Args: []string{ "controller", "--namespace", - namespace, + conf.Namespace, }, Env: []corev1.EnvVar{ { @@ -239,6 +249,10 @@ func buildControllerDeployment(tVals templates.TemplateValues, namespace string, Port: intstr.FromString("http"), }, }, + InitialDelaySeconds: 5, + PeriodSeconds: 60, + TimeoutSeconds: 2, + FailureThreshold: 3, }, ReadinessProbe: &corev1.Probe{ ProbeHandler: corev1.ProbeHandler{ @@ -247,13 +261,64 @@ func buildControllerDeployment(tVals templates.TemplateValues, namespace string, Port: intstr.FromString("http"), }, }, + InitialDelaySeconds: 2, + PeriodSeconds: 5, + TimeoutSeconds: 2, + FailureThreshold: 3, }, SecurityContext: &corev1.SecurityContext{ RunAsNonRoot: &runAsNonRoot, + RunAsUser: &userGroup, + RunAsGroup: &userGroup, AllowPrivilegeEscalation: &allowPrivilegeEscalation, }, }, }, + NodeSelector: controllerNodeSelector(conf), + Affinity: controllerAffinity(conf), + }, + }, + }, + } +} + +func controllerImage(conf ControllerConf) (string, corev1.PullPolicy) { + if conf.Image != "" { + return conf.Image, corev1.PullIfNotPresent + } + + return fmt.Sprintf("localhost/monok8s/node-control:%s", buildinfo.Version), corev1.PullNever +} + +func controllerNodeSelector(conf ControllerConf) map[string]string { + if conf.Image != "" { + return nil + } + + // Local image exists on managed nodes only. + return map[string]string{ + monov1alpha1.NodeControlKey: "true", + } +} + +func controllerAffinity(conf ControllerConf) *corev1.Affinity { + // Local image exists only on managed nodes, so in that mode we already use + // NodeSelector and should not fight placement with anti-affinity. + if conf.Image == "" { + return nil + } + + return &corev1.Affinity{ + PodAntiAffinity: &corev1.PodAntiAffinity{ + PreferredDuringSchedulingIgnoredDuringExecution: []corev1.WeightedPodAffinityTerm{ + { + Weight: 100, + PodAffinityTerm: corev1.PodAffinityTerm{ + TopologyKey: corev1.LabelHostname, + LabelSelector: &metav1.LabelSelector{ + MatchLabels: monov1alpha1.NodeAgentLabels(), + }, + }, }, }, }, diff --git a/clitools/pkg/render/sshd.go b/clitools/pkg/render/sshd.go index 58722dc..94b6799 100644 --- a/clitools/pkg/render/sshd.go +++ b/clitools/pkg/render/sshd.go @@ -160,6 +160,7 @@ func buildSSHDDeployment( Labels: podLabels, }, Spec: corev1.PodSpec{ + HostPID: true, NodeSelector: selectorLabels, Containers: []corev1.Container{ { @@ -215,60 +216,110 @@ exec /usr/sbin/sshd \ corev1.ResourceMemory: resource.MustParse("128Mi"), }, }, - VolumeMounts: []corev1.VolumeMount{ - { - Name: "authorized-keys", - MountPath: "/authorized-keys", - ReadOnly: true, - }, - { - Name: "host-etc", - MountPath: "/host/etc", - }, - { - Name: "host-var", - MountPath: "/host/var", - }, - }, - }, - }, - Volumes: []corev1.Volume{ - { - Name: "authorized-keys", - VolumeSource: corev1.VolumeSource{ - ConfigMap: &corev1.ConfigMapVolumeSource{ - LocalObjectReference: corev1.LocalObjectReference{ - Name: sshdConfigName, + VolumeMounts: append( + []corev1.VolumeMount{ + { + Name: "authorized-keys", + MountPath: "/authorized-keys", + ReadOnly: true, }, - DefaultMode: ptrInt32(0600), }, - }, - }, - { - Name: "host-etc", - VolumeSource: corev1.VolumeSource{ - HostPath: &corev1.HostPathVolumeSource{ - Path: "/etc", - Type: ptrHostPathType(corev1.HostPathDirectory), - }, - }, - }, - { - Name: "host-var", - VolumeSource: corev1.VolumeSource{ - HostPath: &corev1.HostPathVolumeSource{ - Path: "/var", - Type: ptrHostPathType(corev1.HostPathDirectory), - }, - }, + buildHostRootVolumeMounts()..., + ), }, }, + Volumes: append( + []corev1.Volume{ + { + Name: "authorized-keys", + VolumeSource: corev1.VolumeSource{ + ConfigMap: &corev1.ConfigMapVolumeSource{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: sshdConfigName, + }, + DefaultMode: ptrInt32(0600), + }, + }, + }, + }, + buildHostRootVolumes()..., + ), }, }, }, } } +func buildHostRootVolumeMounts() []corev1.VolumeMount { + paths := []struct { + name string + mountPath string + readOnly bool + }{ + {"host-bin", "/host/bin", true}, + {"host-sbin", "/host/sbin", true}, + {"host-lib", "/host/lib", true}, + {"host-usr", "/host/usr", true}, + {"host-etc", "/host/etc", false}, + {"host-run", "/host/run", false}, + {"host-proc", "/host/proc", false}, + {"host-sys", "/host/sys", false}, + {"host-dev", "/host/dev", false}, + {"host-var", "/host/var", false}, + } + + mounts := make([]corev1.VolumeMount, 0, len(paths)) + + for _, p := range paths { + mounts = append(mounts, corev1.VolumeMount{ + Name: p.name, + MountPath: p.mountPath, + ReadOnly: p.readOnly, + }) + } + + return mounts +} + +func buildHostRootVolumes() []corev1.Volume { + hostPathDir := corev1.HostPathDirectory + + paths := []struct { + name string + path string + }{ + {"host-bin", "/bin"}, + {"host-sbin", "/sbin"}, + {"host-lib", "/lib"}, + {"host-usr", "/usr"}, + {"host-etc", "/etc"}, + {"host-run", "/run"}, + {"host-proc", "/proc"}, + {"host-sys", "/sys"}, + {"host-dev", "/dev"}, + + // /var is an rbind mount in monok8s and may be private. + // Mount the real backing path instead. + {"host-var", "/data/var"}, + } + + volumes := make([]corev1.Volume, 0, len(paths)) + + for _, p := range paths { + volumes = append(volumes, corev1.Volume{ + Name: p.name, + VolumeSource: corev1.VolumeSource{ + HostPath: &corev1.HostPathVolumeSource{ + Path: p.path, + Type: &hostPathDir, + }, + }, + }) + } + + return volumes +} + func ptrInt32(v int32) *int32 { return &v } diff --git a/clitools/pkg/templates/values.go b/clitools/pkg/templates/values.go index 6e69313..05196b3 100644 --- a/clitools/pkg/templates/values.go +++ b/clitools/pkg/templates/values.go @@ -84,7 +84,7 @@ func LoadTemplateValuesFromEnv() TemplateValues { v := defaultTemplateValues() v.Hostname = getenvDefault("MKS_HOSTNAME", v.Hostname) - v.NodeName = getenvDefault("MKS_NODE_NAME", v.Hostname) + v.NodeName = getenvDefault("MKS_NODE_NAME", getenvDefault("NODE_NAME", v.Hostname)) v.KubernetesVersion = getenvDefault("MKS_KUBERNETES_VERSION", v.KubernetesVersion) v.ClusterName = getenvDefault("MKS_CLUSTER_NAME", v.ClusterName) diff --git a/devtools/serve-images.sh b/devtools/serve-images.sh new file mode 100755 index 0000000..58b5cd1 --- /dev/null +++ b/devtools/serve-images.sh @@ -0,0 +1,6 @@ +#/bin/bash + +SCRIPT_DIR="$(dirname "${BASH_SOURCE[0]}")" +OUT_DIR="$(realpath "$SCRIPT_DIR"/../out/)" + +python3 -m http.server 8000 --bind 0.0.0.0 --directory "$OUT_DIR" diff --git a/devtools/test-upgrade.sh b/devtools/test-upgrade.sh index b4b6452..445e9ef 100755 --- a/devtools/test-upgrade.sh +++ b/devtools/test-upgrade.sh @@ -1,15 +1,29 @@ #!/bin/bash SCRIPT_DIR="$(dirname "${BASH_SOURCE[0]}")" -OUT_DIR="$( realpath "$SCRIPT_DIR"/../out/ )" +OUT_DIR="$(realpath "$SCRIPT_DIR"/../out/)" set -e -BASE_URL="http://localhost:8000" -TARGET_VERSION="v$1" -STABLE_VERSION="v1.34.6" +DEFAULT_BASE_URL="http://localhost:8000" +DEFAULT_TARGET_VERSION="v1.34.1" +STABLE_VERSION="v1.34.1" NAME="my-upgrade-1" +if [ -r /dev/tty ]; then + printf "Enter the base url (%s): " "$DEFAULT_BASE_URL" > /dev/tty + read -r BASE_URL < /dev/tty + + printf "Enter the target version (%s): " "$DEFAULT_TARGET_VERSION" > /dev/tty + read -r TARGET_VERSION < /dev/tty +else + echo "No TTY available for interactive input" >&2 + exit 1 +fi + +BASE_URL="${BASE_URL:-$DEFAULT_BASE_URL}" +TARGET_VERSION="${TARGET_VERSION:-$DEFAULT_TARGET_VERSION}" + echo "apiVersion: monok8s.io/v1alpha1" echo "kind: OSUpgrade" echo "metadata:" diff --git a/docs/installing-ssh-pod.md b/docs/installing-ssh-pod.md new file mode 100644 index 0000000..5d3eac2 --- /dev/null +++ b/docs/installing-ssh-pod.md @@ -0,0 +1,204 @@ +# Installing the recovery SSHD pod + +This page explains how to install a temporary SSH server pod for break-glass recovery. + +Use this when normal Kubernetes access is degraded, for example after the API server certificate expires or rotates and you need to retrieve updated host-side credentials. + +The SSHD pod is intended for recovery and debugging only. Remove it when you are done. + +## What this does + +The recovery pod starts an SSH server on the selected node and authorizes your local SSH public key. + +The pod also mounts selected host paths under `/host`, so you can inspect the host filesystem and run some host-side recovery commands through `chroot`. + +For example: + +```sh +chroot /host /bin/sh -lc 'rc-status' +chroot /host /bin/sh -lc 'rc-service crio status' +chroot /host /bin/sh -lc 'rc-service kubelet status' +``` + +## Requirements + +You need: + +- A working `kubectl` connection to the cluster. +- Access to the `node-agent` DaemonSet in the `mono-system` namespace. +- A local SSH public key, usually `~/.ssh/id_rsa.pub` or `~/.ssh/id_ed25519.pub`. + +Use a public key file only. Do not pass your private key. + +## Generate the SSHD manifest + +To print the recovery SSHD manifest: + +```bash +kubectl exec -i -n mono-system ds/node-agent -- \ + ctl create sshd --authkeys /dev/stdin < ~/.ssh/id_rsa.pub +``` + +This reads your local public key and places it into the generated pod's `authorized_keys`. + +If you use Ed25519 keys, use: + +```bash +kubectl exec -i -n mono-system ds/node-agent -- \ + ctl create sshd --authkeys /dev/stdin < ~/.ssh/id_ed25519.pub +``` + +## Generate and apply the manifest + +To create the recovery SSHD resources in one step: + +```bash +kubectl exec -i -n mono-system ds/node-agent -- \ + ctl create sshd --authkeys /dev/stdin < ~/.ssh/id_rsa.pub \ + | kubectl apply -f - +``` + +For Ed25519: + +```bash +kubectl exec -i -n mono-system ds/node-agent -- \ + ctl create sshd --authkeys /dev/stdin < ~/.ssh/id_ed25519.pub \ + | kubectl apply -f - +``` + +## Why `-i` is used instead of `-it` + +Use `-i`, not `-it`, when piping the SSH public key. + +The `-t` option allocates a pseudo-TTY. A pseudo-TTY can modify piped input, which is not what you want when passing an SSH public key through stdin. + +Correct: + +```bash +kubectl exec -i -n mono-system ds/node-agent -- \ + ctl create sshd --authkeys /dev/stdin < ~/.ssh/id_rsa.pub +``` + +Avoid: + +```bash +kubectl exec -it -n mono-system ds/node-agent -- \ + ctl create sshd --authkeys /dev/stdin < ~/.ssh/id_rsa.pub +``` + +## Check that the pod is running + +After applying the manifest, check the pod: + +```bash +kubectl get pods -n mono-system -l app.kubernetes.io/name=sshd +``` + +Check the service: + +```bash +kubectl get svc -n mono-system -l app.kubernetes.io/name=sshd +``` + +If the pod does not start, inspect it: + +```bash +kubectl describe pod -n mono-system -l app.kubernetes.io/name=sshd +``` + +## Connect through SSH + +The exact SSH command depends on how the generated service exposes the pod. + +If the service uses a NodePort such as `30022`, connect with: + +```bash +ssh -p 30022 root@ +``` + +Replace `` with the node's reachable IP address. + +## Access the host environment + +Inside the SSH session, the host filesystem is available under `/host`. + +Useful checks: + +```sh +ls -la /host +chroot /host /bin/sh -lc 'rc-status' +chroot /host /bin/sh -lc 'rc-service crio status' +chroot /host /bin/sh -lc 'rc-service kubelet status' +``` + +Restart CRI-O: + +```sh +chroot /host /bin/sh -lc 'rc-service crio restart' +``` + +Restart kubelet: + +```sh +chroot /host /bin/sh -lc 'rc-service kubelet restart' +``` + +You can also inspect host processes from the pod because the recovery pod uses the host PID namespace: + +```sh +ps aux | grep -E 'kubelet|crio' +``` + +## Notes for monok8s host mounts + +The recovery pod does not mount host `/` directly. + +On monok8s, `/` and `/var` may be private mounts. Mounting them directly as host paths can fail with errors such as: + +```text +path "/" is mounted on "/" but it is not a shared or slave mount +``` + +or: + +```text +path "/var" is mounted on "/var" but it is not a shared or slave mount +``` + +Instead, the recovery pod assembles a minimal host root under `/host` from individual host paths. + +For `/var`, it uses the backing path: + +```text +/data/var -> /host/var +``` + +This avoids the private bind-mount issue. + +## Remove the recovery pod + +When recovery is complete, remove the generated resources. + +If the resources use the default SSHD labels: + +```bash +kubectl delete deployment -n mono-system -l app.kubernetes.io/name=sshd +kubectl delete service -n mono-system -l app.kubernetes.io/name=sshd +kubectl delete configmap -n mono-system -l app.kubernetes.io/name=sshd +``` + +If your generated manifest uses a fixed resource name, you can also remove them by name: + +```bash +kubectl delete deployment -n mono-system sshd +kubectl delete service -n mono-system sshd +kubectl delete configmap -n mono-system sshd-authorized-keys +``` + +## Security warning + +This pod is powerful. + +It runs with root-level recovery access and can inspect or modify host files through `/host`. Treat it as a temporary break-glass tool, not a normal service. + +Do not leave it running after recovery. diff --git a/docs/ota.md b/docs/ota.md index bd7beff..2ec4354 100644 --- a/docs/ota.md +++ b/docs/ota.md @@ -1,19 +1,54 @@ -## Upgrade process +# OS OTA Upgrades -We use an agent to watch the OSUpgrade CRD to handle this. Our image versions follows upstream. +MonoK8s upgrades are driven through two custom resources: -To issue an upgrade. Simply use +- `OSUpgrade`: the user-facing upgrade request. +- `OSUpgradeProgress`: the per-node upgrade state watched and executed by the node agent. +The node agent does the actual upgrade work. It watches `OSUpgradeProgress` resources assigned to its node, downloads the selected image, writes it to the inactive rootfs partition, updates status, and reboots when ready. + +The controller is optional but strongly recommended. It watches `OSUpgrade` resources and creates the matching `OSUpgradeProgress` resources for the target nodes. + +## Install the controller + +By default, each managed node only runs the node agent. The node agent does **not** watch `OSUpgrade` directly; it only watches `OSUpgradeProgress`. + +You can create `OSUpgradeProgress` resources by hand, but normal users should not need to. Install the controller instead, then create `OSUpgrade` resources. + +Install the controller from the existing node-agent image: + +```bash +kubectl exec -i -n mono-system ds/node-agent -- \ + ctl create controller --image REPO/IMAGE:TAG | kubectl apply -f - +``` + +### `--image` + +`--image` is optional. + +If omitted, the generated Deployment uses the local controller image that is already shipped with managed nodes. In that mode, the controller Deployment is scheduled only onto managed nodes because the image is expected to exist locally. + +If provided, the generated Deployment uses that image directly. This is useful when you host the controller image in your own registry. + +There is no official public image repository yet, so external controller images must currently be managed by the operator. + +## Create an upgrade + +Create an `OSUpgrade` resource to request an upgrade: + +```bash kubectl apply -f upgrade.yaml +``` + +Example: -Example yaml ```yaml apiVersion: monok8s.io/v1alpha1 kind: OSUpgrade metadata: - name: "my-ugrade-2" + name: my-upgrade-2 spec: - version: "v1.35.3" + version: v1.35.3 nodeSelector: {} catalog: inline: | @@ -34,24 +69,61 @@ spec: - version: v1.35.1 url: http://localhost:8000/rootfs.ext4.zst checksum: sha256:99af82a263deca44ad91d21d684f0fa944d5d0456a1da540f1c644f8aa59b14b - size: 1858076672 # expanded image size in bytes, use "zstd -lv image.zst to check" + size: 1858076672 # expanded image size in bytes; check with: zstd -lv image.zst blocked: - v1.34.0 ``` -catalog also accepts URL or ConfigMap※ +### `spec.version` + +`spec.version` is the requested target version. + +It may be either: + +- an explicit version, such as `v1.35.3` +- `stable`, if the catalog defines a `stable` version + +### `spec.nodeSelector` + +`spec.nodeSelector` selects the nodes that should receive the upgrade. + +An empty selector means all eligible managed nodes. + +### `spec.catalog` + +The catalog tells the agent where to find available OS images. + +The catalog can be provided inline: + ```yaml catalog: - URL: https://example.com/images.yaml - -catalog: - ConfigMap: images-cm + inline: | + stable: v1.35.1 + images: + - version: v1.35.1 + url: https://example.invalid/images/monok8s-v1.35.1.img.zst + checksum: sha256:abc + size: 1858076672 ``` -※ ConfigMap requires additional RBAC permissions which is not enabled by default. You can edit -the node-agent's ClusterRole and add `configmaps: get` to allow this. +It can also be loaded from a URL: + +```yaml +catalog: + url: https://example.com/images.yaml +``` + +Or from a ConfigMap: + +```yaml +catalog: + configMap: images-cm +``` + +ConfigMap catalogs require extra RBAC. This permission is not enabled by default. To use a ConfigMap catalog, edit the relevant ClusterRole and allow `get` on `configmaps`. + +Catalog content should look like this: -Contents should look like this ```yaml stable: v1.35.1 images: @@ -70,64 +142,114 @@ images: - version: v1.35.1 url: http://localhost:8000/rootfs.ext4.zst checksum: sha256:99af82a263deca44ad91d21d684f0fa944d5d0456a1da540f1c644f8aa59b14b - size: 1858076672 # expanded image size in bytes, use "zstd -lv image.zst to check" + size: 1858076672 # expanded image size in bytes; check with: zstd -lv image.zst blocked: - v1.34.0 ``` -### Monitoring the upgrades +## Monitor upgrades -kubectl get osugrades -``` -NAME DESIRED RESOLVED PHASE TARGETS OK FAIL AGE -my-upgrade-3 stable v1.35.4 RollingOut 3 1 0 1m -my-upgrade-2 v1.35.3 v1.35.3 Accepted 2 0 0 1m -my-downgrade-1 v1.33.2 v1.33.2 Rejected 2 0 2 1m +List upgrade requests: + +```bash +kubectl get osupgrades ``` -kubectl get osupgradeprogress +Example output: + +```text +NAME DESIRED RESOLVED PHASE +my-upgrade-3 stable v1.35.4 Pending +my-upgrade-2 v1.35.3 v1.35.3 Accepted +my-downgrade-1 v1.33.2 v1.33.2 Rejected ``` + +List per-node progress: + +```bash +kubectl get osupgradeprogresses +``` + +Example output: + +```text NAME NODE SOURCE CURRENT TARGET STATUS -osupgrade-abc123f node-1 my-upgrade-2 v1.34.1 v1.35.3 downloading -osupgrade-cde456g node-2 my-upgrade-2 v1.35.3 v1.35.3 completed +osupgrade-abc123f node-1 my-upgrade-2 v1.34.1 v1.35.3 Downloading +osupgrade-cde456g node-2 my-upgrade-2 v1.35.3 v1.35.3 Completed ``` +Inspect one node's progress: +```bash kubectl describe osupgradeprogress osupgrade-abc123f +``` + +Example resource: + ```yaml apiVersion: monok8s.io/v1alpha1 kind: OSUpgradeProgress metadata: - name: "osupgrade-abc123f" + name: osupgrade-abc123f spec: sourceRef: name: my-upgrade-2 nodeName: node-1 status: - currentVersion: "v1.34.1" - targetVersion: "v1.35.3" + currentVersion: v1.34.1 + targetVersion: v1.35.3 phase: Downloading startedAt: null completedAt: null lastUpdatedAt: null retryCount: 0 - inactivePartition: "B" + inactivePartition: B failureReason: "" message: "" ``` +## Retry a failed upgrade + +If an upgrade fails, for example because the image download failed, edit `spec.retryNonce` on the affected `OSUpgradeProgress` resource. + +Any changed value is enough. The field is only used to tell the node agent that the user intentionally requested a retry. + +Example: + +```bash +kubectl patch osupgradeprogress osupgrade-abc123f \ + --type merge \ + -p '{"spec":{"retryNonce":"retry-1"}}' +``` + +If the same node fails again and you want to retry again, change the nonce to a new value: + +```bash +kubectl patch osupgradeprogress osupgrade-abc123f \ + --type merge \ + -p '{"spec":{"retryNonce":"retry-2"}}' +``` + ## Development notes -### Flashing manually into partition B +### Flash an image manually into partition B -**Use nmap ncat**. Otherwise we'll have all kinds of fabulous issues sending it. +Use nmap's `ncat`. Other tools may work, but they are more likely to cause annoying stream or connection behavior. -Sending side -``` -pv "out/rootfs.ext4.zst" | ncat 10.0.0.10 1234 --send-only +On the sending machine: + +```bash +pv out/rootfs.ext4.zst | ncat 10.0.0.10 1234 --send-only ``` -Receiving side -``` -ncat -l 1234 --recv-only | zstd -d -c | dd of=/dev/sda3 bs=4M status=progress && sync && echo "SUCCESS" +On the receiving machine: + +```bash +ncat -l 1234 --recv-only | \ + zstd -d -c | \ + dd of=/dev/sda3 bs=4M status=progress && \ + sync && \ + echo "SUCCESS" ``` + +Be careful with the target partition. The example writes to `/dev/sda3`, which is assumed to be rootfs B in that setup. Verify the partition layout before running this on real hardware.