Part 29: Postgres with CloudNativePG
"Postgres-as-a-Deployment is a foot gun. Postgres-as-an-operator is what production looks like."
Why
Running Postgres in Kubernetes via a Deployment is the path most tutorials show. It works in dev. It does not work in prod, because:
- Replication is manual (no operator means no automatic failover)
- Backups are manual (you write a CronJob that runs
pg_dump) - Point-in-time recovery is impossible (no WAL archiving)
- Resize is destructive (the PVC can grow but the data has to be moved)
- Connection pooling is a separate add-on (PgBouncer)
The right answer is a Postgres operator. The current best in 2025 is CloudNativePG — a CNCF Sandbox project from EnterpriseDB that handles HA, backups, PITR, monitoring, and rolling upgrades through a Cluster CRD.
The thesis: K8s.Dsl ships CloudNativePgHelmReleaseContributor to install the operator and lets each workload contributor declare its own Cluster CRD instance via the RawManifest escape hatch. Backups go to the in-cluster MinIO via the operator's S3 backend.
The operator
[Injectable(ServiceLifetime.Singleton)]
public sealed class CloudNativePgHelmReleaseContributor : IHelmReleaseContributor
{
public string TargetCluster => "*";
public void Contribute(KubernetesBundle bundle)
{
bundle.HelmReleases.Add(new HelmReleaseSpec
{
Name = "cnpg",
Namespace = "cnpg-system",
Chart = "cnpg/cloudnative-pg",
Version = "0.23.0",
RepoUrl = "https://cloudnative-pg.github.io/charts",
CreateNamespace = true,
Wait = true,
Values = new()
{
["replicaCount"] = _config.K8s?.Topology == "k8s-ha" ? 3 : 1,
["monitoring"] = new Dictionary<string, object?>
{
["podMonitorEnabled"] = true,
["grafanaDashboard"] = new Dictionary<string, object?>
{
["create"] = true,
["labels"] = new Dictionary<string, object?>
{
["grafana_dashboard"] = "1"
}
}
}
}
});
}
}[Injectable(ServiceLifetime.Singleton)]
public sealed class CloudNativePgHelmReleaseContributor : IHelmReleaseContributor
{
public string TargetCluster => "*";
public void Contribute(KubernetesBundle bundle)
{
bundle.HelmReleases.Add(new HelmReleaseSpec
{
Name = "cnpg",
Namespace = "cnpg-system",
Chart = "cnpg/cloudnative-pg",
Version = "0.23.0",
RepoUrl = "https://cloudnative-pg.github.io/charts",
CreateNamespace = true,
Wait = true,
Values = new()
{
["replicaCount"] = _config.K8s?.Topology == "k8s-ha" ? 3 : 1,
["monitoring"] = new Dictionary<string, object?>
{
["podMonitorEnabled"] = true,
["grafanaDashboard"] = new Dictionary<string, object?>
{
["create"] = true,
["labels"] = new Dictionary<string, object?>
{
["grafana_dashboard"] = "1"
}
}
}
}
});
}
}A workload's Cluster instance (e.g., for GitLab)
[Injectable(ServiceLifetime.Singleton)]
public sealed class GitLabPostgresClusterContributor : IK8sManifestContributor
{
public string TargetCluster => "*";
public void Contribute(KubernetesBundle bundle)
{
bundle.Namespaces["gitlab-data"] ??= new NamespaceManifest { Name = "gitlab-data" };
bundle.CrdInstances.Add(new RawManifest
{
ApiVersion = "postgresql.cnpg.io/v1",
Kind = "Cluster",
Metadata = new() { Name = "gitlab-pg", Namespace = "gitlab-data" },
Spec = new Dictionary<string, object?>
{
["instances"] = _config.K8s?.Topology == "k8s-ha" ? 3 : 1,
["primaryUpdateStrategy"] = "unsupervised",
["postgresql"] = new Dictionary<string, object?>
{
["parameters"] = new Dictionary<string, object?>
{
["max_connections"] = "200",
["shared_buffers"] = "256MB",
["effective_cache_size"] = "1GB"
}
},
["bootstrap"] = new Dictionary<string, object?>
{
["initdb"] = new Dictionary<string, object?>
{
["database"] = "gitlabhq_production",
["owner"] = "gitlab",
["secret"] = new Dictionary<string, object?>
{
["name"] = "gitlab-postgres-secret"
}
}
},
["storage"] = new Dictionary<string, object?>
{
["size"] = "20Gi",
["storageClass"] = "longhorn"
},
["monitoring"] = new Dictionary<string, object?>
{
["enablePodMonitor"] = true
},
["backup"] = new Dictionary<string, object?>
{
["barmanObjectStore"] = new Dictionary<string, object?>
{
["destinationPath"] = "s3://gitlab-postgres-backups",
["s3Credentials"] = new Dictionary<string, object?>
{
["accessKeyId"] = new Dictionary<string, object?>
{
["name"] = "gitlab-pg-backup-secret",
["key"] = "ACCESS_KEY_ID"
},
["secretAccessKey"] = new Dictionary<string, object?>
{
["name"] = "gitlab-pg-backup-secret",
["key"] = "SECRET_ACCESS_KEY"
}
},
["endpointURL"] = "https://minio.gitlab-data.svc.cluster.local",
["wal"] = new Dictionary<string, object?>
{
["compression"] = "gzip"
}
},
["retentionPolicy"] = "30d"
}
}
});
// Schedule a daily backup
bundle.CrdInstances.Add(new RawManifest
{
ApiVersion = "postgresql.cnpg.io/v1",
Kind = "ScheduledBackup",
Metadata = new() { Name = "gitlab-pg-daily", Namespace = "gitlab-data" },
Spec = new Dictionary<string, object?>
{
["schedule"] = "0 0 2 * * *", // 02:00 daily (cron+seconds)
["backupOwnerReference"] = "self",
["cluster"] = new Dictionary<string, object?>
{
["name"] = "gitlab-pg"
}
}
});
}
}[Injectable(ServiceLifetime.Singleton)]
public sealed class GitLabPostgresClusterContributor : IK8sManifestContributor
{
public string TargetCluster => "*";
public void Contribute(KubernetesBundle bundle)
{
bundle.Namespaces["gitlab-data"] ??= new NamespaceManifest { Name = "gitlab-data" };
bundle.CrdInstances.Add(new RawManifest
{
ApiVersion = "postgresql.cnpg.io/v1",
Kind = "Cluster",
Metadata = new() { Name = "gitlab-pg", Namespace = "gitlab-data" },
Spec = new Dictionary<string, object?>
{
["instances"] = _config.K8s?.Topology == "k8s-ha" ? 3 : 1,
["primaryUpdateStrategy"] = "unsupervised",
["postgresql"] = new Dictionary<string, object?>
{
["parameters"] = new Dictionary<string, object?>
{
["max_connections"] = "200",
["shared_buffers"] = "256MB",
["effective_cache_size"] = "1GB"
}
},
["bootstrap"] = new Dictionary<string, object?>
{
["initdb"] = new Dictionary<string, object?>
{
["database"] = "gitlabhq_production",
["owner"] = "gitlab",
["secret"] = new Dictionary<string, object?>
{
["name"] = "gitlab-postgres-secret"
}
}
},
["storage"] = new Dictionary<string, object?>
{
["size"] = "20Gi",
["storageClass"] = "longhorn"
},
["monitoring"] = new Dictionary<string, object?>
{
["enablePodMonitor"] = true
},
["backup"] = new Dictionary<string, object?>
{
["barmanObjectStore"] = new Dictionary<string, object?>
{
["destinationPath"] = "s3://gitlab-postgres-backups",
["s3Credentials"] = new Dictionary<string, object?>
{
["accessKeyId"] = new Dictionary<string, object?>
{
["name"] = "gitlab-pg-backup-secret",
["key"] = "ACCESS_KEY_ID"
},
["secretAccessKey"] = new Dictionary<string, object?>
{
["name"] = "gitlab-pg-backup-secret",
["key"] = "SECRET_ACCESS_KEY"
}
},
["endpointURL"] = "https://minio.gitlab-data.svc.cluster.local",
["wal"] = new Dictionary<string, object?>
{
["compression"] = "gzip"
}
},
["retentionPolicy"] = "30d"
}
}
});
// Schedule a daily backup
bundle.CrdInstances.Add(new RawManifest
{
ApiVersion = "postgresql.cnpg.io/v1",
Kind = "ScheduledBackup",
Metadata = new() { Name = "gitlab-pg-daily", Namespace = "gitlab-data" },
Spec = new Dictionary<string, object?>
{
["schedule"] = "0 0 2 * * *", // 02:00 daily (cron+seconds)
["backupOwnerReference"] = "self",
["cluster"] = new Dictionary<string, object?>
{
["name"] = "gitlab-pg"
}
}
});
}
}The cluster gets:
- 3 instances in HA topology, 1 instance otherwise (CloudNativePG handles the leader election)
- A primary database named
gitlabhq_productionwith thegitlabuser - 20 GB Longhorn-backed storage per instance
- Backup to MinIO via Barman (the open-source Postgres backup tool CloudNativePG bundles)
- Daily scheduled backups at 02:00 with 30-day retention
- Pod monitor for Prometheus scraping
The CloudNativePG operator handles the failover: if the primary's pod dies, a replica is promoted within ~10 seconds. From GitLab's perspective, the database connection (to gitlab-pg-rw.gitlab-data.svc.cluster.local) automatically routes to the new primary because the operator updates the Service.
Point-in-time recovery
CloudNativePG supports PITR via the WAL archive in MinIO. To restore to a specific point in time:
// Create a new Cluster from the backup
bundle.CrdInstances.Add(new RawManifest
{
ApiVersion = "postgresql.cnpg.io/v1",
Kind = "Cluster",
Metadata = new() { Name = "gitlab-pg-restored", Namespace = "gitlab-data" },
Spec = new Dictionary<string, object?>
{
["instances"] = 1,
["bootstrap"] = new Dictionary<string, object?>
{
["recovery"] = new Dictionary<string, object?>
{
["source"] = "gitlab-pg",
["recoveryTarget"] = new Dictionary<string, object?>
{
["targetTime"] = "2026-04-18 14:30:00+00"
}
}
},
["externalClusters"] = new[]
{
new Dictionary<string, object?>
{
["name"] = "gitlab-pg",
["barmanObjectStore"] = new Dictionary<string, object?>
{
["destinationPath"] = "s3://gitlab-postgres-backups",
["s3Credentials"] = /* same as above */,
["endpointURL"] = "https://minio.gitlab-data.svc.cluster.local"
}
}
},
["storage"] = new Dictionary<string, object?> { ["size"] = "20Gi", ["storageClass"] = "longhorn" }
}
});// Create a new Cluster from the backup
bundle.CrdInstances.Add(new RawManifest
{
ApiVersion = "postgresql.cnpg.io/v1",
Kind = "Cluster",
Metadata = new() { Name = "gitlab-pg-restored", Namespace = "gitlab-data" },
Spec = new Dictionary<string, object?>
{
["instances"] = 1,
["bootstrap"] = new Dictionary<string, object?>
{
["recovery"] = new Dictionary<string, object?>
{
["source"] = "gitlab-pg",
["recoveryTarget"] = new Dictionary<string, object?>
{
["targetTime"] = "2026-04-18 14:30:00+00"
}
}
},
["externalClusters"] = new[]
{
new Dictionary<string, object?>
{
["name"] = "gitlab-pg",
["barmanObjectStore"] = new Dictionary<string, object?>
{
["destinationPath"] = "s3://gitlab-postgres-backups",
["s3Credentials"] = /* same as above */,
["endpointURL"] = "https://minio.gitlab-data.svc.cluster.local"
}
}
},
["storage"] = new Dictionary<string, object?> { ["size"] = "20Gi", ["storageClass"] = "longhorn" }
}
});The new cluster boots, replays WAL from the backup until the target time, and is then a writable database. You can kubectl exec into a pod and run psql against it. This is the same flow production uses, exercised in dev.
What this gives you that Postgres-as-Deployment doesn't
A Deployment of postgres:16-alpine with a PVC works for "I need a Postgres for my dev workload". It does not survive a node failure (one pod dies, the data is gone unless the PVC is on shared storage). It does not back up. It does not support PITR. It does not failover.
CloudNativePG via the operator pattern gives you, for the same surface area:
- Real HA with leader election and automatic failover
- Real backups to S3-compatible storage (in our case, the in-cluster MinIO)
- Real PITR via WAL archive
- Real metrics via the bundled pod monitor
- The same operator that runs in production at large companies
The bargain pays back the first time your dev cluster's Postgres primary dies and CloudNativePG promotes a replica without you doing anything.