Drafting ctl controller

This commit is contained in:
2026-04-20 02:50:04 +08:00
parent c6b399ba22
commit 6ddff7c433
52 changed files with 3093 additions and 347 deletions

View File

@@ -9,10 +9,7 @@ import (
"github.com/spf13/cobra"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/watch"
"k8s.io/cli-runtime/pkg/genericclioptions"
"k8s.io/klog/v2"
@@ -23,18 +20,13 @@ import (
"example.com/monok8s/pkg/templates"
)
const defaultPollInterval = 15 * time.Second
var runtimeDefaultUnstructuredConverter = runtime.DefaultUnstructuredConverter
func NewCmdAgent(flags *genericclioptions.ConfigFlags) *cobra.Command {
var namespace string
var envFile string
var pollInterval time.Duration
cmd := &cobra.Command{
Use: "agent --env-file path",
Short: "Watch OSUpgrade resources and process matching upgrades for this node",
Short: "Watch OSUpgradeProgress resources for this node and process upgrades",
RunE: func(cmd *cobra.Command, _ []string) error {
if envFile == "" {
return fmt.Errorf("--env-file is required")
@@ -61,7 +53,6 @@ func NewCmdAgent(flags *genericclioptions.ConfigFlags) *cobra.Command {
"node", cfg.Spec.NodeName,
"namespace", namespace,
"envFile", envFile,
"pollInterval", pollInterval,
)
clients, err := kube.NewClients(flags)
@@ -69,13 +60,12 @@ func NewCmdAgent(flags *genericclioptions.ConfigFlags) *cobra.Command {
return fmt.Errorf("create kube clients: %w", err)
}
return runPollLoop(ctx, clients, namespace, cfg.Spec.NodeName, pollInterval)
return runWatchLoop(ctx, clients, namespace, cfg.Spec.NodeName)
},
}
cmd.Flags().StringVar(&namespace, "namespace", "kube-system", "namespace to watch")
cmd.Flags().StringVar(&namespace, "namespace", templates.DefaultNamespace, "namespace to watch")
cmd.Flags().StringVar(&envFile, "env-file", "", "path to env file containing MKS_* variables")
cmd.Flags().DurationVar(&pollInterval, "poll-interval", defaultPollInterval, "poll interval for OSUpgrade resources")
return cmd
}
@@ -94,9 +84,9 @@ func waitForControlGate(ctx context.Context, envFile string, pollInterval time.D
for {
_, err := os.Stat(marker)
if err == nil {
klog.InfoS("Control gate is present; waiting before starting poll loop", "path", marker)
klog.InfoS("Control gate is present; waiting before starting watch loop", "path", marker)
} else if os.IsNotExist(err) {
klog.InfoS("Control gate not present; starting poll loop", "path", marker)
klog.InfoS("Control gate not present; starting watch loop", "path", marker)
return nil
} else {
return fmt.Errorf("stat upgrade marker %s: %w", marker, err)
@@ -110,126 +100,181 @@ func waitForControlGate(ctx context.Context, envFile string, pollInterval time.D
}
}
func runPollLoop(ctx context.Context, clients *kube.Clients, namespace, nodeName string, interval time.Duration) error {
gvr := schema.GroupVersionResource{
Group: monov1alpha1.Group,
Version: monov1alpha1.Version,
Resource: "osupgrades",
}
ticker := time.NewTicker(interval)
defer ticker.Stop()
func runWatchLoop(ctx context.Context, clients *kube.Clients, namespace, nodeName string) error {
var resourceVersion string
for {
if err := pollOnce(ctx, clients, gvr, namespace, nodeName); err != nil {
klog.ErrorS(err, "poll failed", "namespace", namespace, "node", nodeName)
if ctx.Err() != nil {
return ctx.Err()
}
select {
case <-ctx.Done():
return ctx.Err()
case <-ticker.C:
err := watchOnce(ctx, clients, namespace, nodeName, &resourceVersion)
if err != nil {
if ctx.Err() != nil {
return ctx.Err()
}
klog.ErrorS(err, "watch failed; retrying",
"namespace", namespace,
"node", nodeName,
"resourceVersion", resourceVersion,
)
select {
case <-ctx.Done():
return ctx.Err()
case <-time.After(2 * time.Second):
}
continue
}
}
}
func pollOnce(
func watchOnce(
ctx context.Context,
clients *kube.Clients,
gvr schema.GroupVersionResource,
namespace string,
nodeName string,
resourceVersion *string,
) error {
list, err := clients.Dynamic.Resource(gvr).Namespace(namespace).List(ctx, metav1.ListOptions{})
list, err := clients.MonoKS.
Monok8sV1alpha1().
OSUpgradeProgresses(namespace).
List(ctx, metav1.ListOptions{})
if err != nil {
return fmt.Errorf("list osupgrades: %w", err)
}
klog.InfoS("agent tick", "namespace", namespace, "items", len(list.Items), "node", nodeName)
nodeLabels := labels.Set{
"kubernetes.io/hostname": nodeName,
"monok8s.io/node-name": nodeName,
"monok8s.io/control-agent": "true",
return fmt.Errorf("list osupgradeprogresses: %w", err)
}
for i := range list.Items {
item := &list.Items[i]
osu, err := decodeOSUpgrade(item)
if err != nil {
klog.ErrorS(err, "failed to decode osupgrade",
"name", item.GetName(),
"resourceVersion", item.GetResourceVersion(),
)
if !targetsNode(item, nodeName) {
continue
}
if !shouldHandle(item) {
continue
}
if !matchesNode(osu, nodeName, nodeLabels) {
klog.V(2).InfoS("skipping osupgrade; not targeted to this node",
"name", osu.Name,
"node", nodeName,
)
continue
}
klog.InfoS("matched osupgrade",
"name", osu.Name,
klog.InfoS("found existing osupgradeprogress",
"name", item.Name,
"node", nodeName,
"desiredVersion", osu.Spec.DesiredVersion,
"phase", statusPhase(osu.Status),
"resourceVersion", osu.ResourceVersion,
"phase", progressPhase(item.Status),
"resourceVersion", item.ResourceVersion,
)
if err := osupgradeController.HandleOSUpgrade(ctx, clients, namespace, nodeName, osu); err != nil {
klog.ErrorS(err, "failed to handle osupgrade",
"name", osu.Name,
if err := osupgradeController.HandleOSUpgradeProgress(ctx, clients, namespace, nodeName, item); err != nil {
klog.ErrorS(err, "failed to handle existing osupgradeprogress",
"name", item.Name,
"node", nodeName,
)
continue
}
}
return nil
}
*resourceVersion = list.ResourceVersion
func decodeOSUpgrade(item *unstructured.Unstructured) (*monov1alpha1.OSUpgrade, error) {
var osu monov1alpha1.OSUpgrade
if err := runtimeDefaultUnstructuredConverter.FromUnstructured(item.Object, &osu); err != nil {
return nil, fmt.Errorf("convert unstructured to OSUpgrade: %w", err)
}
return &osu, nil
}
func matchesNode(osu *monov1alpha1.OSUpgrade, nodeName string, nodeLabels labels.Set) bool {
if osu == nil {
return false
}
sel := osu.Spec.NodeSelector
if sel == nil {
// No selector means "match all nodes".
return true
}
selector, err := metav1.LabelSelectorAsSelector(sel)
w, err := clients.MonoKS.
Monok8sV1alpha1().
OSUpgradeProgresses(namespace).
Watch(ctx, metav1.ListOptions{
ResourceVersion: *resourceVersion,
})
if err != nil {
klog.ErrorS(err, "invalid node selector on osupgrade", "name", osu.Name)
return fmt.Errorf("watch osupgradeprogresses: %w", err)
}
defer w.Stop()
klog.InfoS("watching osupgradeprogresses",
"namespace", namespace,
"node", nodeName,
"resourceVersion", *resourceVersion,
)
for {
select {
case <-ctx.Done():
return ctx.Err()
case evt, ok := <-w.ResultChan():
if !ok {
return fmt.Errorf("watch channel closed")
}
switch evt.Type {
case watch.Bookmark:
obj, ok := evt.Object.(*monov1alpha1.OSUpgradeProgress)
if ok && obj != nil && obj.ResourceVersion != "" {
*resourceVersion = obj.ResourceVersion
}
continue
case watch.Error:
return fmt.Errorf("watch returned error event")
}
osup, ok := evt.Object.(*monov1alpha1.OSUpgradeProgress)
if !ok {
klog.V(1).InfoS("skipping unexpected watch object type",
"type", fmt.Sprintf("%T", evt.Object),
)
continue
}
if osup.ResourceVersion != "" {
*resourceVersion = osup.ResourceVersion
}
if !targetsNode(osup, nodeName) {
continue
}
if !shouldHandle(osup) {
klog.V(2).InfoS("skipping osupgradeprogress due to phase",
"name", osup.Name,
"node", nodeName,
"phase", progressPhase(osup.Status),
"eventType", evt.Type,
)
continue
}
klog.InfoS("received osupgradeprogress event",
"name", osup.Name,
"node", nodeName,
"phase", progressPhase(osup.Status),
"eventType", evt.Type,
"resourceVersion", osup.ResourceVersion,
)
if err := osupgradeController.HandleOSUpgradeProgress(ctx, clients, namespace, nodeName, osup); err != nil {
klog.ErrorS(err, "failed to handle osupgradeprogress",
"name", osup.Name,
"node", nodeName,
"eventType", evt.Type,
)
}
}
}
}
func targetsNode(osup *monov1alpha1.OSUpgradeProgress, nodeName string) bool {
if osup == nil {
return false
}
return osup.Spec.NodeName == nodeName
}
func shouldHandle(osup *monov1alpha1.OSUpgradeProgress) bool {
if osup == nil {
return false
}
if selector.Empty() {
switch osup.Status.Phase {
case "",
monov1alpha1.OSUpgradeProgressPhasePending:
return true
default:
return false
}
if selector.Matches(nodeLabels) {
return true
}
return false
}
func statusPhase(st *monov1alpha1.OSUpgradeStatus) string {
func progressPhase(st *monov1alpha1.OSUpgradeProgressStatus) string {
if st == nil {
return ""
}