Part 41: k3s Upgrades — The Easy Mode
"k3s upgrades are short. Backup, swap binary, restart, verify. Repeat per node."
Why
Part 40 walked the painful kubeadm flow. k3s is the easy alternative. There is no kubeadm upgrade plan. There is no required-stop sequence. The "upgrade" is essentially curl | sh again with a new INSTALL_K3S_VERSION. The k3s daemon stops, the new binary replaces the old, the daemon restarts, and the new version is live.
The thesis: K8s.Dsl ships a K3sUpgradeSaga that is structurally similar to KubeadmUpgradeSaga from Part 40 but with fewer steps. Plus an optional integration with system-upgrade-controller for users who want fully-automated rolling upgrades on a schedule.
The simple path
[Saga]
public sealed class K3sUpgradeSaga
{
[SagaStep(Order = 1, Compensation = nameof(NothingToCompensate))]
public async Task<Result> BackupBeforeUpgrade(K3sUpgradeContext ctx, CancellationToken ct)
{
var result = await _velero.RunBackupAsync($"pre-upgrade-{ctx.TargetVersion}", new[] { "kube-system" }, ct);
if (result.IsSuccess) ctx.PreUpgradeBackupId = result.Value;
return result.Map();
}
[SagaStep(Order = 2, Compensation = nameof(RestoreFromBackup))]
public async Task<Result> UpgradeServerNodes(K3sUpgradeContext ctx, CancellationToken ct)
{
// For HA k3s (with --cluster-init), upgrade server nodes one at a time
foreach (var node in ctx.ServerNodes)
{
var result = await UpgradeOneNode(node, ctx.TargetVersion, isServer: true, ct);
if (result.IsFailure) return result;
// Wait for the API to come back
var waitResult = await WaitForApiHealthy(node, ct);
if (waitResult.IsFailure) return waitResult;
}
return Result.Success();
}
[SagaStep(Order = 3, Compensation = nameof(NothingToCompensate))]
public async Task<Result> UpgradeAgentNodes(K3sUpgradeContext ctx, CancellationToken ct)
{
// Agents (workers) can be upgraded in parallel because they are stateless
var tasks = ctx.AgentNodes.Select(async node =>
{
await _kubectl.DrainAsync(node, ct: ct);
var upgradeResult = await UpgradeOneNode(node, ctx.TargetVersion, isServer: false, ct);
await _kubectl.UncordonAsync(node, ct);
return upgradeResult;
});
var results = await Task.WhenAll(tasks);
return results.All(r => r.IsSuccess) ? Result.Success() : Result.Failure("agent upgrade had failures");
}
private async Task<Result> UpgradeOneNode(string nodeName, string targetVersion, bool isServer, CancellationToken ct)
{
var role = isServer ? "server" : "agent";
var installCmd =
$"curl -sfL https://get.k3s.io | " +
$"INSTALL_K3S_VERSION={targetVersion} " +
$"INSTALL_K3S_SKIP_START=true " +
$"sh -s - {role}";
return await _vagrant.SshCommandAsync(nodeName, installCmd, ct).Map();
// The k3s installer replaces the binary; the daemon picks up the new version on restart
}
public async Task<Result> RestoreFromBackup(K3sUpgradeContext ctx, CancellationToken ct)
=> ctx.PreUpgradeBackupId is null ? Result.Success() : await _velero.RestoreAsync(ctx.PreUpgradeBackupId, ct).Map();
public Task<Result> NothingToCompensate(K3sUpgradeContext ctx, CancellationToken ct)
=> Task.FromResult(Result.Success());
}[Saga]
public sealed class K3sUpgradeSaga
{
[SagaStep(Order = 1, Compensation = nameof(NothingToCompensate))]
public async Task<Result> BackupBeforeUpgrade(K3sUpgradeContext ctx, CancellationToken ct)
{
var result = await _velero.RunBackupAsync($"pre-upgrade-{ctx.TargetVersion}", new[] { "kube-system" }, ct);
if (result.IsSuccess) ctx.PreUpgradeBackupId = result.Value;
return result.Map();
}
[SagaStep(Order = 2, Compensation = nameof(RestoreFromBackup))]
public async Task<Result> UpgradeServerNodes(K3sUpgradeContext ctx, CancellationToken ct)
{
// For HA k3s (with --cluster-init), upgrade server nodes one at a time
foreach (var node in ctx.ServerNodes)
{
var result = await UpgradeOneNode(node, ctx.TargetVersion, isServer: true, ct);
if (result.IsFailure) return result;
// Wait for the API to come back
var waitResult = await WaitForApiHealthy(node, ct);
if (waitResult.IsFailure) return waitResult;
}
return Result.Success();
}
[SagaStep(Order = 3, Compensation = nameof(NothingToCompensate))]
public async Task<Result> UpgradeAgentNodes(K3sUpgradeContext ctx, CancellationToken ct)
{
// Agents (workers) can be upgraded in parallel because they are stateless
var tasks = ctx.AgentNodes.Select(async node =>
{
await _kubectl.DrainAsync(node, ct: ct);
var upgradeResult = await UpgradeOneNode(node, ctx.TargetVersion, isServer: false, ct);
await _kubectl.UncordonAsync(node, ct);
return upgradeResult;
});
var results = await Task.WhenAll(tasks);
return results.All(r => r.IsSuccess) ? Result.Success() : Result.Failure("agent upgrade had failures");
}
private async Task<Result> UpgradeOneNode(string nodeName, string targetVersion, bool isServer, CancellationToken ct)
{
var role = isServer ? "server" : "agent";
var installCmd =
$"curl -sfL https://get.k3s.io | " +
$"INSTALL_K3S_VERSION={targetVersion} " +
$"INSTALL_K3S_SKIP_START=true " +
$"sh -s - {role}";
return await _vagrant.SshCommandAsync(nodeName, installCmd, ct).Map();
// The k3s installer replaces the binary; the daemon picks up the new version on restart
}
public async Task<Result> RestoreFromBackup(K3sUpgradeContext ctx, CancellationToken ct)
=> ctx.PreUpgradeBackupId is null ? Result.Success() : await _velero.RestoreAsync(ctx.PreUpgradeBackupId, ct).Map();
public Task<Result> NothingToCompensate(K3sUpgradeContext ctx, CancellationToken ct)
=> Task.FromResult(Result.Success());
}Three steps. The first is the same backup as the kubeadm version. The second upgrades server nodes sequentially. The third upgrades agents in parallel. Total time on a typical cluster: ~5 minutes (vs. ~30 minutes for kubeadm).
Optional: system-upgrade-controller
For users who want unattended automatic upgrades, K3s.Dsl can install system-upgrade-controller — a Kubernetes operator that watches Plan CRDs and rolls upgrades across the cluster on its own. The user creates a Plan like this:
apiVersion: upgrade.cattle.io/v1
kind: Plan
metadata:
name: k3s-server
namespace: system-upgrade
spec:
concurrency: 1
cordon: true
nodeSelector:
matchLabels:
node-role.kubernetes.io/control-plane: "true"
serviceAccountName: system-upgrade
upgrade:
image: rancher/k3s-upgrade
version: v1.32.0+k3s1
---
apiVersion: upgrade.cattle.io/v1
kind: Plan
metadata:
name: k3s-agent
namespace: system-upgrade
spec:
concurrency: 2
cordon: true
nodeSelector:
matchLabels:
node-role.kubernetes.io/control-plane: "false"
prepare:
args: ["prepare", "k3s-server"]
image: rancher/k3s-upgrade
serviceAccountName: system-upgrade
upgrade:
image: rancher/k3s-upgrade
version: v1.32.0+k3s1apiVersion: upgrade.cattle.io/v1
kind: Plan
metadata:
name: k3s-server
namespace: system-upgrade
spec:
concurrency: 1
cordon: true
nodeSelector:
matchLabels:
node-role.kubernetes.io/control-plane: "true"
serviceAccountName: system-upgrade
upgrade:
image: rancher/k3s-upgrade
version: v1.32.0+k3s1
---
apiVersion: upgrade.cattle.io/v1
kind: Plan
metadata:
name: k3s-agent
namespace: system-upgrade
spec:
concurrency: 2
cordon: true
nodeSelector:
matchLabels:
node-role.kubernetes.io/control-plane: "false"
prepare:
args: ["prepare", "k3s-server"]
image: rancher/k3s-upgrade
serviceAccountName: system-upgrade
upgrade:
image: rancher/k3s-upgrade
version: v1.32.0+k3s1The controller watches for these Plans and rolls the upgrade in the background. K3s.Dsl provides a K3sUpgradePlanContributor that generates these Plan resources from the [Cluster] config:
[Injectable(ServiceLifetime.Singleton)]
public sealed class K3sUpgradePlanContributor : IK8sManifestContributor
{
public bool ShouldContribute() =>
_config.K8s?.Distribution == "k3s" && _config.K8s?.K3s?.SystemUpgradeController?.Enabled == true;
public void Contribute(KubernetesBundle bundle)
{
// Generate the Plan CRDs above
}
}[Injectable(ServiceLifetime.Singleton)]
public sealed class K3sUpgradePlanContributor : IK8sManifestContributor
{
public bool ShouldContribute() =>
_config.K8s?.Distribution == "k3s" && _config.K8s?.K3s?.SystemUpgradeController?.Enabled == true;
public void Contribute(KubernetesBundle bundle)
{
// Generate the Plan CRDs above
}
}Most users do not need this. The K3sUpgradeSaga from above is enough for manual upgrades, which is what homelab k8s upgrade --to v1.32.0+k3s1 runs.
What this gives you that kubeadm doesn't
A k3s upgrade is fast. The whole flow takes minutes instead of hours. The cost is that you do not exercise the kubeadm-specific upgrade logic — which is fine if your production also runs k3s, and a problem if your production runs kubeadm.
The k3s upgrade saga gives you, for the same surface area as kubeadm:
- One verb to upgrade
- Backup before, restore on failure
- Sequential server upgrades (to keep etcd quorum)
- Parallel agent upgrades (faster on 3+ workers)
- Optional system-upgrade-controller for unattended scheduled upgrades
- Same
homelab k8s upgradeinterface so the choice between kubeadm and k3s does not change the user's commands
The bargain pays back the first time you upgrade k3s in 5 minutes and remember kubeadm took 45.