Files
monok8s/initramfs/rootfs-extra/init

295 lines
6.8 KiB
Bash
Executable File

#!/bin/sh
set -eu
log() {
echo "[init] $*" >&2
}
panic() {
echo "initramfs panic: $*" >&2
exec sh
}
mount_or_panic() {
mount "$@" || panic "mount failed: $*"
}
mount_retry() {
dev="$1"
target="$2"
fstype="$3"
opts="$4"
i=0
while :; do
if mount -o "$opts" -t "$fstype" "$dev" "$target"; then
return 0
fi
i=$((i + 1))
[ "$i" -ge 50 ] && panic "Timed out mounting $dev on $target"
sleep 0.2
done
}
wait_for_path() {
path="$1"
i=0
while [ ! -e "$path" ]; do
i=$((i + 1))
[ "$i" -ge 50 ] && panic "Timed out waiting for $path"
sleep 0.2
done
}
get_cmdline_arg() {
key="$1"
for arg in $(cat /proc/cmdline); do
case "$arg" in
"$key"=*)
echo "${arg#"$key"=}"
return 0
;;
esac
done
return 1
}
# Read KEY=VALUE pairs from /sys/class/block/*/uevent without spawning grep/cut.
get_uevent_value() {
file="$1"
want_key="$2"
[ -f "$file" ] || return 1
while IFS='=' read -r k v; do
[ "$k" = "$want_key" ] && {
echo "$v"
return 0
}
done < "$file"
return 1
}
# Return the /dev/<partition> path for the first partition whose GPT PARTNAME matches.
find_first_part_by_partname() {
want_label="$1"
for p in /sys/class/block/*; do
[ -f "$p/partition" ] || continue
partname="$(get_uevent_value "$p/uevent" PARTNAME || true)"
[ "$partname" = "$want_label" ] || continue
devname="$(basename "$p")"
echo "/dev/$devname"
return 0
done
return 1
}
wait_for_partnames() {
timeout="${1:-3}"
shift
i=0
while [ "$i" -lt "$timeout" ]; do
all_found=1
for name in "$@"; do
if ! find_first_part_by_partname "$name" >/dev/null; then
all_found=0
break
fi
done
[ "$all_found" -eq 1 ] && return 0
sleep 1
i=$((i + 1))
log "Still waiting for $@ to populate($i)"
done
return 1
}
find_part_by_partuuid() {
want="$1"
for p in /sys/class/block/*; do
[ -f "$p/partition" ] || continue
partuuid="$(get_uevent_value "$p/uevent" PARTUUID || true)"
[ "$partuuid" = "$want" ] || continue
echo "/dev/$(basename "$p")"
return 0
done
return 1
}
# Return the parent disk name for a partition device name.
# Examples:
# sda2 -> sda
# mmcblk0p2 -> mmcblk0
parent_disk_name_for_part() {
part_devname="$1"
real="$(readlink -f "/sys/class/block/$part_devname")" || return 1
parent="$(basename "$(dirname "$real")")" || return 1
echo "$parent"
return 0
}
# Find a sibling partition on the same disk by GPT PARTNAME.
find_sibling_part_on_same_disk() {
part_path="$1"
want_label="$2"
part_devname="$(basename "$part_path")"
disk_devname="$(parent_disk_name_for_part "$part_devname")" || return 1
for p in /sys/class/block/"$disk_devname"*; do
[ -f "$p/partition" ] || continue
partname="$(get_uevent_value "$p/uevent" PARTNAME || true)"
[ "$partname" = "$want_label" ] || continue
echo "/dev/$(basename "$p")"
return 0
done
return 1
}
# Resolve preferred root device from sysfs.
# Prefer PARTUUID first, then optionally filesystem UUID if explicitly provided.
resolve_preferred_root() {
pref_root="$1"
[ -n "$pref_root" ] || return 1
find_part_by_partuuid "$pref_root"
}
wanted_root_labels_for_slot() {
slot="$1"
case "$slot" in
B|b)
echo "rootfsB"
;;
*)
echo "rootfsA"
;;
esac
}
find_fallback_root_for_slot() {
slot="$1"
for label in $(wanted_root_labels_for_slot "$slot"); do
dev="$(find_first_part_by_partname "$label" || true)"
if [ -n "$dev" ]; then
echo "$dev"
return 0
fi
done
return 1
}
mkdir -p /dev /proc /sys /run
mount_or_panic -t devtmpfs devtmpfs /dev
mount_or_panic -t proc proc /proc
mount_or_panic -t sysfs sysfs /sys
mount_or_panic -t tmpfs tmpfs /run
echo 1 > /proc/sys/kernel/printk
mkdir -p /dev/pts
mount_or_panic -t devpts devpts /dev/pts
# Optional early fan kick. Do not fail boot if this path is not ready yet.
if [ -w /sys/class/hwmon/hwmon0/pwm1 ]; then
echo 100 > /sys/class/hwmon/hwmon0/pwm1 || true
fi
log "Booting kernel took $(cut -d' ' -f1 /proc/uptime) seconds."
. /etc/build-info || panic "failed to source /etc/build-info"
wait_for_partnames 30 rootfsA rootfsB data || panic "failed to wait for fs"
ROOT_CMD="$(get_cmdline_arg root || true)"
BOOT_PART="$(get_cmdline_arg bootpart || true)"
PREFERRED_PARTUUID="$(get_cmdline_arg pref_root || true)"
ROOT_DEV="$(resolve_preferred_root "$PREFERRED_PARTUUID" || true)"
if [ -n "$ROOT_DEV" ]; then
log "Using preferred root device: $ROOT_DEV"
fi
if [ -z "$ROOT_DEV" ]; then
ROOT_DEV="$(find_fallback_root_for_slot "$BOOT_PART" || true)"
if [ -n "$ROOT_DEV" ]; then
log "Preferred root not found. Falling back to first valid root device: $ROOT_DEV"
fi
fi
[ -n "$ROOT_DEV" ] || panic "no usable root device found"
DATA_DEV="$(find_sibling_part_on_same_disk "$ROOT_DEV" data || true)"
[ -n "$DATA_DEV" ] || panic "no data partition found on same disk as $ROOT_DEV"
wait_for_path "$ROOT_DEV"
wait_for_path "$DATA_DEV"
e2fsck -p "$DATA_DEV" || {
log "Auto fsck failed, forcing repair"
e2fsck -y "$DATA_DEV" || panic "fsck failed on $DATA_DEV"
}
mkdir -p /newroot
mkdir -p /newroot/data
mkdir -p /newroot/var
mount_retry "$ROOT_DEV" /newroot ext4 ro
mount_retry "$DATA_DEV" /newroot/data ext4 rw
mkdir -p /newroot/data/var
mkdir -p /newroot/data/etc-overlay/upper
mkdir -p /newroot/data/etc-overlay/work
mount_or_panic --bind /newroot/data/var /newroot/var
# BusyBox mount just needs a normal -o option string here.
# The important bit is that overlayfs itself requires lowerdir/upperdir/workdir,
# and workdir must live on the same filesystem as upperdir.
mount_or_panic -t overlay overlay \
-o "lowerdir=/newroot/etc,upperdir=/newroot/data/etc-overlay/upper,workdir=/newroot/data/etc-overlay/work" \
/newroot/etc
if [ "$BOOT_PART" = "A" ]; then
ALT_PART="$(find_sibling_part_on_same_disk "$ROOT_DEV" rootfsB || true)"
else
ALT_PART="$(find_sibling_part_on_same_disk "$ROOT_DEV" rootfsA || true)"
fi
if [ -n "$ALT_PART" ]; then
ln -sf "$ALT_PART" /dev/mksaltpart
fi
mount_or_panic --move /dev /newroot/dev
mount_or_panic --move /proc /newroot/proc
mount_or_panic --move /sys /newroot/sys
mount_or_panic --move /run /newroot/run
log "Switching root to $ROOT_DEV (data: $DATA_DEV, slot: $BOOT_PART)"
exec switch_root /newroot /sbin/init
panic "switch_root returned unexpectedly"