Added supervised-init.sh to retry ctl init
This commit is contained in:
@@ -38,4 +38,4 @@ if [ -n "$K8S_MINOR" ]; then
|
|||||||
"$MIGRATION_STATE_DIR/k8s/$K8S_MINOR"
|
"$MIGRATION_STATE_DIR/k8s/$K8S_MINOR"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
/usr/local/bin/ctl init --env-file "$CONFIG_DIR/cluster.env" >>/var/log/monok8s/bootstrap.log 2>&1 &
|
/usr/lib/monok8s/lib/supervised-init.sh &
|
||||||
|
|||||||
57
alpine/rootfs-extra/usr/lib/monok8s/lib/supervised-init.sh
Executable file
57
alpine/rootfs-extra/usr/lib/monok8s/lib/supervised-init.sh
Executable file
@@ -0,0 +1,57 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
set -eu
|
||||||
|
|
||||||
|
CONFIG_DIR=/opt/monok8s/config
|
||||||
|
LOG=/var/log/monok8s/bootstrap.log
|
||||||
|
STATE_DIR=/run/monok8s
|
||||||
|
FAIL_COUNT_FILE="$STATE_DIR/bootstrap-fail-count"
|
||||||
|
LOCK_DIR="$STATE_DIR/supervised-init.lock"
|
||||||
|
|
||||||
|
# For debugging
|
||||||
|
HOLD_FILE="$CONFIG_DIR/bootstrap.hold"
|
||||||
|
|
||||||
|
mkdir -p "$STATE_DIR" /var/log/monok8s
|
||||||
|
|
||||||
|
if ! mkdir "$LOCK_DIR" 2>/dev/null; then
|
||||||
|
echo "[$(date -Is)] supervised-init already running" >> "$LOG"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
trap 'rmdir "$LOCK_DIR"' EXIT INT TERM
|
||||||
|
|
||||||
|
fail_count=0
|
||||||
|
if [ -f "$FAIL_COUNT_FILE" ]; then
|
||||||
|
fail_count="$(cat "$FAIL_COUNT_FILE" 2>/dev/null || echo 0)"
|
||||||
|
case "$fail_count" in
|
||||||
|
''|*[!0-9]*) fail_count=0 ;;
|
||||||
|
esac
|
||||||
|
fi
|
||||||
|
|
||||||
|
while true; do
|
||||||
|
if [ -f "$HOLD_FILE" ]; then
|
||||||
|
echo "[$(date -Is)] bootstrap held by $HOLD_FILE" >> "$LOG"
|
||||||
|
sleep 300
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "[$(date -Is)] starting ctl init" >> "$LOG"
|
||||||
|
|
||||||
|
if /usr/local/bin/ctl init --env-file "$CONFIG_DIR/cluster.env" >> "$LOG" 2>&1; then
|
||||||
|
echo "[$(date -Is)] ctl init succeeded" >> "$LOG"
|
||||||
|
rm -f "$FAIL_COUNT_FILE"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
fail_count=$((fail_count + 1))
|
||||||
|
echo "$fail_count" > "$FAIL_COUNT_FILE"
|
||||||
|
|
||||||
|
echo "[$(date -Is)] ctl init failed, count=$fail_count" >> "$LOG"
|
||||||
|
|
||||||
|
case "$fail_count" in
|
||||||
|
1) sleep 10 ;;
|
||||||
|
2) sleep 30 ;;
|
||||||
|
3) sleep 60 ;;
|
||||||
|
4) sleep 120 ;;
|
||||||
|
*) sleep 300 ;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
@@ -70,7 +70,7 @@ func NewRunner(cfg *monov1alpha1.MonoKSConfig) *Runner {
|
|||||||
{
|
{
|
||||||
RegKey: "EngageControlGate",
|
RegKey: "EngageControlGate",
|
||||||
Name: "Engage the control gate",
|
Name: "Engage the control gate",
|
||||||
Desc: "Prevents agent polling resources prematurely",
|
Desc: "Prevents agent watching resources prematurely",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
RegKey: "StartCRIO",
|
RegKey: "StartCRIO",
|
||||||
@@ -165,7 +165,7 @@ func NewRunner(cfg *monov1alpha1.MonoKSConfig) *Runner {
|
|||||||
{
|
{
|
||||||
RegKey: "ReleaseControlGate",
|
RegKey: "ReleaseControlGate",
|
||||||
Name: "Release the control gate",
|
Name: "Release the control gate",
|
||||||
Desc: "Allow agent to start polling resources",
|
Desc: "Allow agent to start watching resources",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user