Fixed some race conditions
This commit is contained in:
@@ -4,6 +4,7 @@ import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"sync/atomic"
|
||||
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/klog/v2"
|
||||
@@ -16,9 +17,42 @@ import (
|
||||
"example.com/monok8s/pkg/node/uboot"
|
||||
)
|
||||
|
||||
type UpgradeRunner struct {
|
||||
running atomic.Bool
|
||||
rebooting atomic.Bool
|
||||
}
|
||||
|
||||
var r UpgradeRunner
|
||||
|
||||
func (r *UpgradeRunner) Run(fn func() error) error {
|
||||
if r.rebooting.Load() {
|
||||
return nil
|
||||
}
|
||||
|
||||
if !r.running.CompareAndSwap(false, true) {
|
||||
return nil
|
||||
}
|
||||
defer r.running.Store(false)
|
||||
|
||||
if r.rebooting.Load() {
|
||||
return nil
|
||||
}
|
||||
|
||||
return fn()
|
||||
}
|
||||
|
||||
func HandleOSUpgrade(ctx context.Context, clients *kube.Clients,
|
||||
namespace string, nodeName string,
|
||||
osu *monov1alpha1.OSUpgrade,
|
||||
) error {
|
||||
return r.Run(func() error {
|
||||
return handleOSUpgradeLocked(ctx, clients, namespace, nodeName, osu)
|
||||
})
|
||||
}
|
||||
|
||||
func handleOSUpgradeLocked(ctx context.Context, clients *kube.Clients,
|
||||
namespace string, nodeName string,
|
||||
osu *monov1alpha1.OSUpgrade,
|
||||
) error {
|
||||
osup, err := ensureProgressHeartbeat(ctx, clients, namespace, nodeName, osu)
|
||||
if err != nil {
|
||||
@@ -57,14 +91,16 @@ func HandleOSUpgrade(ctx context.Context, clients *kube.Clients,
|
||||
|
||||
first := plan.Path[0]
|
||||
|
||||
osup.Status.TargetVersion = plan.ResolvedTarget
|
||||
osup.Status.Phase = monov1alpha1.OSUpgradeProgressPhaseDownloading
|
||||
osup.Status.Message = fmt.Sprintf("downloading image: %s", first.URL)
|
||||
|
||||
now := metav1.Now()
|
||||
osup.Status.LastUpdatedAt = &now
|
||||
osup, err = updateProgressStatus(ctx, clients, osup_gvr, osup)
|
||||
|
||||
updated, err := updateProgressRobust(ctx, clients, osup.Namespace, osup.Name, func(cur *monov1alpha1.OSUpgradeProgress) {
|
||||
now := metav1.Now()
|
||||
cur.Status.TargetVersion = plan.ResolvedTarget
|
||||
cur.Status.Phase = monov1alpha1.OSUpgradeProgressPhaseDownloading
|
||||
cur.Status.Message = fmt.Sprintf("downloading image: %s", first.URL)
|
||||
cur.Status.LastUpdatedAt = &now
|
||||
})
|
||||
if updated != nil {
|
||||
osup = updated
|
||||
}
|
||||
if err != nil {
|
||||
return fmt.Errorf("update progress status: %w", err)
|
||||
}
|
||||
@@ -82,7 +118,6 @@ func HandleOSUpgrade(ctx context.Context, clients *kube.Clients,
|
||||
|
||||
imageSHA, err := first.SHA256()
|
||||
if err != nil {
|
||||
now = metav1.Now()
|
||||
return failProgress(ctx, clients, osup, "apply image", err)
|
||||
}
|
||||
|
||||
@@ -91,31 +126,38 @@ func HandleOSUpgrade(ctx context.Context, clients *kube.Clients,
|
||||
|
||||
imageOptions := osimage.ApplyOptions{
|
||||
URL: first.URL,
|
||||
TargetPath: "/dev/sda?",
|
||||
TargetPath: "/dev/mksaltpart",
|
||||
ExpectedRawSHA256: imageSHA,
|
||||
ExpectedRawSize: first.Size,
|
||||
BufferSize: 6 * 1024 * 1024,
|
||||
Progress: func(p osimage.Progress) {
|
||||
pLogger.Log(p)
|
||||
|
||||
if err := statusUpdater.Run(func() error {
|
||||
updated, err := updateProgressRobust(ctx, clients, osup.Namespace, osup.Name, func(cur *monov1alpha1.OSUpgradeProgress) {
|
||||
now := metav1.Now()
|
||||
|
||||
now := metav1.Now()
|
||||
switch p.Stage {
|
||||
case "flash":
|
||||
osup.Status.Phase = monov1alpha1.OSUpgradeProgressPhaseWriting
|
||||
case "verify":
|
||||
osup.Status.Phase = monov1alpha1.OSUpgradeProgressPhaseVerifying
|
||||
switch p.Stage {
|
||||
case "flash":
|
||||
cur.Status.Phase = monov1alpha1.OSUpgradeProgressPhaseWriting
|
||||
case "verify":
|
||||
cur.Status.Phase = monov1alpha1.OSUpgradeProgressPhaseVerifying
|
||||
}
|
||||
|
||||
cur.Status.TargetVersion = plan.ResolvedTarget
|
||||
cur.Status.LastUpdatedAt = &now
|
||||
cur.Status.Message = fmt.Sprintf(
|
||||
"%s: %d%%",
|
||||
p.Stage,
|
||||
osimage.PercentOf(p.BytesComplete, p.BytesTotal),
|
||||
)
|
||||
})
|
||||
if updated != nil {
|
||||
osup = updated
|
||||
}
|
||||
osup.Status.LastUpdatedAt = &now
|
||||
osup.Status.Message = fmt.Sprintf("%s: %d%%", p.Stage, osimage.PercentOf(p.BytesComplete, p.BytesTotal))
|
||||
|
||||
updated, err := updateProgressStatus(ctx, clients, osup_gvr, osup)
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "update progress status")
|
||||
return err
|
||||
return fmt.Errorf("update progress status: %w", err)
|
||||
}
|
||||
|
||||
osup = updated
|
||||
return nil
|
||||
}); err != nil {
|
||||
klog.ErrorS(err, "throttled progress update failed")
|
||||
@@ -125,7 +167,6 @@ func HandleOSUpgrade(ctx context.Context, clients *kube.Clients,
|
||||
|
||||
result, err := osimage.ApplyImageStreamed(ctx, imageOptions)
|
||||
if err != nil {
|
||||
now = metav1.Now()
|
||||
return failProgress(ctx, clients, osup, "apply image", err)
|
||||
}
|
||||
|
||||
@@ -136,18 +177,36 @@ func HandleOSUpgrade(ctx context.Context, clients *kube.Clients,
|
||||
return failProgress(ctx, clients, osup, "set boot env", err)
|
||||
}
|
||||
|
||||
now = metav1.Now()
|
||||
osup.Status.LastUpdatedAt = &now
|
||||
osup.Status.Message = "image applied, verified, and next boot environment updated"
|
||||
osup.Status.Phase = monov1alpha1.OSUpgradeProgressPhaseRebooting
|
||||
|
||||
osup, err = updateProgressStatus(ctx, clients, osup_gvr, osup)
|
||||
updated, err = updateProgressRobust(ctx, clients, osup.Namespace, osup.Name, func(cur *monov1alpha1.OSUpgradeProgress) {
|
||||
now := metav1.Now()
|
||||
cur.Status.TargetVersion = plan.ResolvedTarget
|
||||
cur.Status.Message = "image applied, verified, and next boot environment updated"
|
||||
cur.Status.Phase = monov1alpha1.OSUpgradeProgressPhaseRebooting
|
||||
cur.Status.LastUpdatedAt = &now
|
||||
})
|
||||
if updated != nil {
|
||||
osup = updated
|
||||
}
|
||||
if err != nil {
|
||||
return fmt.Errorf("update progress status: %w", err)
|
||||
}
|
||||
|
||||
// TODO: Drain the node here
|
||||
// TODO: Issue Reboot
|
||||
// Get all running pods outta here!
|
||||
// kubectl.Run()
|
||||
// Wait for the node to be drained
|
||||
// kubectl.Wait()
|
||||
|
||||
return nil
|
||||
r.rebooting.Store(true)
|
||||
if err := triggerReboot(); err != nil {
|
||||
r.rebooting.Store(false)
|
||||
return fmt.Errorf("trigger reboot: %w", err)
|
||||
}
|
||||
select {}
|
||||
}
|
||||
|
||||
func triggerReboot() error {
|
||||
_ = os.WriteFile("/proc/sysrq-trigger", []byte("s\n"), 0)
|
||||
_ = os.WriteFile("/proc/sysrq-trigger", []byte("u\n"), 0)
|
||||
return os.WriteFile("/proc/sysrq-trigger", []byte("b\n"), 0)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user