treewide: k8s 1.28 and lots related updates
First, this contains a bunch of dependency updates. Important ones in no
particular order:
Kubernetes 1.24.2 -> 1.28.8
etcd 3.5.4 -> 3.5.13
Protobuf 1.32.0 -> 1.33.0
OpenTelemetry 0.20.0 -> 1.20.0
containerd 1.6.6 -> 1.7.15
CoreDNS 1.9.2 -> 1.11.1
With Kubernetes 1.25 PodSecurityPolicies are removed, this replaces them
with a static PodSecurity admission configuration which behaves the same
or is slightly more permissive in most ways. Only known exceptions are
that NET_RAW is no longer an allowed permission and non-standard SELinux
labels are no longer permitted (but these never did anything anyways).
The RBAC policies are intentionally not removed yet as we do not yet
have the capability to actually update these, so they will be removed
when that is available (#288), until then they will stay in-place but
do nothing.
With the containerd upgrade the deprecated option for ignoring
preseeded/pinned images for garbage collection in Kubelet can be
removed.
This change also contains some drive-by fixes to the controller-manager,
like passing the Service IP net and disabling cloud-related control
loops which generate spurious warnings if enabled.
The containerd tracing patch is removed as we can now use OTel v1, thus
that patch is no longer necessary.
An actual upgrade test will be part of a future CL as this one is
already quite large and it works stand-alone.
Co-authored-by: Tim Windelschmidt <tim@monogon.tech>
Change-Id: I8e5f51e6e6240a1b67590458b2f1c24d58c8e91e
Reviewed-on: https://review.monogon.dev/c/monogon/+/2315
Tested-by: Jenkins CI
Reviewed-by: Tim Windelschmidt <tim@monogon.tech>
diff --git a/metropolis/node/kubernetes/BUILD.bazel b/metropolis/node/kubernetes/BUILD.bazel
index 8e68973..d775c50 100644
--- a/metropolis/node/kubernetes/BUILD.bazel
+++ b/metropolis/node/kubernetes/BUILD.bazel
@@ -45,6 +45,9 @@
"@io_k8s_api//storage/v1:storage",
"@io_k8s_apimachinery//pkg/api/errors",
"@io_k8s_apimachinery//pkg/apis/meta/v1:meta",
+ "@io_k8s_apimachinery//pkg/runtime",
+ "@io_k8s_apimachinery//pkg/runtime/schema",
+ "@io_k8s_apiserver//pkg/apis/apiserver",
"@io_k8s_client_go//informers",
"@io_k8s_client_go//informers/core/v1:core",
"@io_k8s_client_go//informers/storage/v1:storage",
@@ -58,6 +61,8 @@
"@io_k8s_client_go//util/workqueue",
"@io_k8s_kubelet//config/v1beta1",
"@io_k8s_kubelet//pkg/apis/pluginregistration/v1:pluginregistration",
+ "@io_k8s_kubernetes//plugin/pkg/admission/security/podsecurity",
+ "@io_k8s_pod_security_admission//admission/api/v1:api",
"@org_golang_google_grpc//:go_default_library",
"@org_golang_google_grpc//codes",
"@org_golang_google_grpc//status",
diff --git a/metropolis/node/kubernetes/apiserver.go b/metropolis/node/kubernetes/apiserver.go
index 9c4132d..7da4490 100644
--- a/metropolis/node/kubernetes/apiserver.go
+++ b/metropolis/node/kubernetes/apiserver.go
@@ -18,11 +18,19 @@
import (
"context"
+ "encoding/json"
"encoding/pem"
"fmt"
"net"
"os/exec"
+ metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+ "k8s.io/apimachinery/pkg/runtime"
+ "k8s.io/apimachinery/pkg/runtime/schema"
+ "k8s.io/apiserver/pkg/apis/apiserver"
+ "k8s.io/kubernetes/plugin/pkg/admission/security/podsecurity"
+ podsecurityadmissionv1 "k8s.io/pod-security-admission/admission/api/v1"
+
common "source.monogon.dev/metropolis/node"
"source.monogon.dev/metropolis/node/core/localstorage"
"source.monogon.dev/metropolis/node/kubernetes/pki"
@@ -48,6 +56,56 @@
serverKey []byte
}
+func mustWrapUnknownJSON(o schema.ObjectKind) *runtime.Unknown {
+ oRaw, err := json.Marshal(o)
+ if err != nil {
+ panic("While marshaling object into runtime.Unknown: " + err.Error())
+ }
+ var typ runtime.TypeMeta
+ typ.SetGroupVersionKind(o.GroupVersionKind())
+ return &runtime.Unknown{
+ TypeMeta: typ,
+ Raw: oRaw,
+ ContentType: runtime.ContentTypeJSON,
+ }
+}
+
+func mustMarshalJSON(o any) []byte {
+ out, err := json.Marshal(o)
+ if err != nil {
+ panic("mustMarshalJSON failed: " + err.Error())
+ }
+ return out
+}
+
+var (
+ podsecurityadmission = &podsecurityadmissionv1.PodSecurityConfiguration{
+ TypeMeta: metav1.TypeMeta{
+ APIVersion: podsecurityadmissionv1.SchemeGroupVersion.String(),
+ Kind: "PodSecurityConfiguration",
+ },
+ Defaults: podsecurityadmissionv1.PodSecurityDefaults{
+ Enforce: "baseline",
+ Warn: "baseline",
+ Audit: "baseline",
+ },
+ Exemptions: podsecurityadmissionv1.PodSecurityExemptions{},
+ }
+
+ admissionConfig = apiserver.AdmissionConfiguration{
+ TypeMeta: metav1.TypeMeta{
+ APIVersion: apiserver.SchemeGroupVersion.String(),
+ Kind: "AdmissionConfiguration",
+ },
+ Plugins: []apiserver.AdmissionPluginConfiguration{{
+ Name: podsecurity.PluginName,
+ Configuration: mustWrapUnknownJSON(podsecurityadmission),
+ }},
+ }
+
+ admissionConfigRaw = mustMarshalJSON(admissionConfig)
+)
+
func (s *apiserverService) loadPKI(ctx context.Context) error {
for _, el := range []struct {
targetCert *[]byte
@@ -95,7 +153,7 @@
"--authorization-mode=Node,RBAC",
args.FileOpt("--client-ca-file", "client-ca.pem",
pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: s.idCA})),
- "--enable-admission-plugins=NodeRestriction,PodSecurityPolicy",
+ "--enable-admission-plugins=NodeRestriction",
"--enable-aggregator-routing=true",
fmt.Sprintf("--secure-port=%d", common.KubernetesAPIPort),
fmt.Sprintf("--etcd-servers=unix:///%s:0", s.EphemeralConsensusDirectory.ClientSocket.FullPath()),
@@ -126,6 +184,7 @@
pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: s.serverCert})),
args.FileOpt("--tls-private-key-file", "server-key.pem",
pem.EncodeToMemory(&pem.Block{Type: "PRIVATE KEY", Bytes: s.serverKey})),
+ args.FileOpt("--admission-control-config-file", "admission-control.json", admissionConfigRaw),
)
if args.Error() != nil {
return err
diff --git a/metropolis/node/kubernetes/controller-manager.go b/metropolis/node/kubernetes/controller-manager.go
index 363571d..252de53 100644
--- a/metropolis/node/kubernetes/controller-manager.go
+++ b/metropolis/node/kubernetes/controller-manager.go
@@ -30,6 +30,7 @@
type controllerManagerConfig struct {
clusterNet net.IPNet
+ serviceNet net.IPNet
// All PKI-related things are in DER
kubeConfig []byte
rootCA []byte
@@ -78,12 +79,18 @@
pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: config.rootCA})),
"--use-service-account-credentials=true", // Enables things like PSP enforcement
fmt.Sprintf("--cluster-cidr=%v", config.clusterNet.String()),
+ fmt.Sprintf("--service-cluster-ip-range=%v", config.serviceNet.String()),
args.FileOpt("--tls-cert-file", "server-cert.pem",
pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: config.serverCert})),
args.FileOpt("--tls-private-key-file", "server-key.pem",
pem.EncodeToMemory(&pem.Block{Type: "PRIVATE KEY", Bytes: config.serverKey})),
"--allocate-node-cidrs",
- "--cluster-cidr="+config.clusterNet.String(),
+ // Disables unused cloud control loops and prevents warnings.
+ "--cloud-provider=external",
+ "--controllers=*,-certificatesigningrequest-signing-controller",
+ // This is intentionally empty, but if unset it tries to mkdir it
+ // in the usual place, generating an error.
+ "--flex-volume-plugin-dir=/kubernetes/conf/flexvolume-plugins",
)
if args.Error() != nil {
diff --git a/metropolis/node/kubernetes/csi.go b/metropolis/node/kubernetes/csi.go
index 58c381a..f150a13 100644
--- a/metropolis/node/kubernetes/csi.go
+++ b/metropolis/node/kubernetes/csi.go
@@ -30,7 +30,7 @@
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
"google.golang.org/protobuf/types/known/wrapperspb"
- "k8s.io/kubelet/pkg/apis/pluginregistration/v1"
+ pluginregistration "k8s.io/kubelet/pkg/apis/pluginregistration/v1"
"source.monogon.dev/metropolis/node/core/localstorage"
"source.monogon.dev/metropolis/pkg/fsquota"
diff --git a/metropolis/node/kubernetes/kubelet.go b/metropolis/node/kubernetes/kubelet.go
index 136bc28..e8c7836 100644
--- a/metropolis/node/kubernetes/kubelet.go
+++ b/metropolis/node/kubernetes/kubelet.go
@@ -85,12 +85,13 @@
return &kubeletconfig.KubeletConfiguration{
TypeMeta: v1.TypeMeta{
Kind: "KubeletConfiguration",
- APIVersion: kubeletconfig.GroupName + "/v1beta1",
+ APIVersion: kubeletconfig.SchemeGroupVersion.String(),
},
- TLSCertFile: fargs.ArgPath("server.crt", s.serverCert),
- TLSPrivateKeyFile: s.KubeletDirectory.PKI.Key.FullPath(),
- TLSMinVersion: "VersionTLS13",
- ClusterDNS: clusterDNS,
+ ContainerRuntimeEndpoint: "unix://" + s.EphemeralDirectory.Containerd.ClientSocket.FullPath(),
+ TLSCertFile: fargs.ArgPath("server.crt", s.serverCert),
+ TLSPrivateKeyFile: s.KubeletDirectory.PKI.Key.FullPath(),
+ TLSMinVersion: "VersionTLS13",
+ ClusterDNS: clusterDNS,
Authentication: kubeletconfig.KubeletAuthentication{
X509: kubeletconfig.KubeletX509Authentication{
ClientCAFile: fargs.ArgPath("ca.crt", s.serverCACert),
@@ -135,9 +136,6 @@
cmd := exec.CommandContext(ctx, "/kubernetes/bin/kube", "kubelet",
fargs.FileOpt("--config", "config.json", configRaw),
- fmt.Sprintf("--container-runtime-endpoint=unix://%s", s.EphemeralDirectory.Containerd.ClientSocket.FullPath()),
- //TODO: Remove with k8s 1.29 (https://github.com/kubernetes/kubernetes/pull/118544)
- "--pod-infra-container-image", "preseed.metropolis.internal/node/kubernetes/pause:latest",
fargs.FileOpt("--kubeconfig", "kubeconfig", s.kubeconfig),
fmt.Sprintf("--root-dir=%s", s.KubeletDirectory.FullPath()),
)
diff --git a/metropolis/node/kubernetes/plugins/kvmdevice/kvmdevice.go b/metropolis/node/kubernetes/plugins/kvmdevice/kvmdevice.go
index a6d4657..b9b4fb4 100644
--- a/metropolis/node/kubernetes/plugins/kvmdevice/kvmdevice.go
+++ b/metropolis/node/kubernetes/plugins/kvmdevice/kvmdevice.go
@@ -36,7 +36,7 @@
"google.golang.org/grpc"
corev1 "k8s.io/api/core/v1"
deviceplugin "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1"
- "k8s.io/kubelet/pkg/apis/pluginregistration/v1"
+ pluginregistration "k8s.io/kubelet/pkg/apis/pluginregistration/v1"
"source.monogon.dev/metropolis/node/core/localstorage"
"source.monogon.dev/metropolis/pkg/logtree"
diff --git a/metropolis/node/kubernetes/reconciler/BUILD.bazel b/metropolis/node/kubernetes/reconciler/BUILD.bazel
index e4bc41d..306c273 100644
--- a/metropolis/node/kubernetes/reconciler/BUILD.bazel
+++ b/metropolis/node/kubernetes/reconciler/BUILD.bazel
@@ -5,7 +5,6 @@
srcs = [
"reconciler.go",
"resources_csi.go",
- "resources_podsecuritypolicy.go",
"resources_rbac.go",
"resources_runtimeclass.go",
"resources_storageclass.go",
@@ -15,8 +14,7 @@
deps = [
"//metropolis/pkg/supervisor",
"@io_k8s_api//core/v1:core",
- "@io_k8s_api//node/v1beta1",
- "@io_k8s_api//policy/v1beta1",
+ "@io_k8s_api//node/v1:node",
"@io_k8s_api//rbac/v1:rbac",
"@io_k8s_api//storage/v1:storage",
"@io_k8s_apimachinery//pkg/apis/meta/v1:meta",
diff --git a/metropolis/node/kubernetes/reconciler/reconciler.go b/metropolis/node/kubernetes/reconciler/reconciler.go
index 6c13df0..4ed4859 100644
--- a/metropolis/node/kubernetes/reconciler/reconciler.go
+++ b/metropolis/node/kubernetes/reconciler/reconciler.go
@@ -114,7 +114,6 @@
func allResources(clientSet kubernetes.Interface) map[string]resource {
return map[string]resource{
- "psps": resourcePodSecurityPolicies{clientSet},
"clusterroles": resourceClusterRoles{clientSet},
"clusterrolebindings": resourceClusterRoleBindings{clientSet},
"storageclasses": resourceStorageClasses{clientSet},
diff --git a/metropolis/node/kubernetes/reconciler/resources_podsecuritypolicy.go b/metropolis/node/kubernetes/reconciler/resources_podsecuritypolicy.go
deleted file mode 100644
index 97a38dd..0000000
--- a/metropolis/node/kubernetes/reconciler/resources_podsecuritypolicy.go
+++ /dev/null
@@ -1,108 +0,0 @@
-// Copyright 2020 The Monogon Project Authors.
-//
-// SPDX-License-Identifier: Apache-2.0
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package reconciler
-
-import (
- "context"
-
- core "k8s.io/api/core/v1"
- policy "k8s.io/api/policy/v1beta1"
- meta "k8s.io/apimachinery/pkg/apis/meta/v1"
- "k8s.io/client-go/kubernetes"
-)
-
-type resourcePodSecurityPolicies struct {
- kubernetes.Interface
-}
-
-func (r resourcePodSecurityPolicies) List(ctx context.Context) ([]meta.Object, error) {
- res, err := r.PolicyV1beta1().PodSecurityPolicies().List(ctx, listBuiltins)
- if err != nil {
- return nil, err
- }
- objs := make([]meta.Object, len(res.Items))
- for i := range res.Items {
- objs[i] = &res.Items[i]
- }
- return objs, nil
-}
-
-func (r resourcePodSecurityPolicies) Create(ctx context.Context, el meta.Object) error {
- _, err := r.PolicyV1beta1().PodSecurityPolicies().Create(ctx, el.(*policy.PodSecurityPolicy), meta.CreateOptions{})
- return err
-}
-
-func (r resourcePodSecurityPolicies) Delete(ctx context.Context, name string) error {
- return r.PolicyV1beta1().PodSecurityPolicies().Delete(ctx, name, meta.DeleteOptions{})
-}
-
-func (r resourcePodSecurityPolicies) Expected() []meta.Object {
- return []meta.Object{
- &policy.PodSecurityPolicy{
- ObjectMeta: meta.ObjectMeta{
- Name: "default",
- Labels: builtinLabels(nil),
- Annotations: map[string]string{
- "kubernetes.io/description": "This default PSP allows the creation of pods using features that are" +
- " generally considered safe against any sort of escape.",
- },
- },
- Spec: policy.PodSecurityPolicySpec{
- AllowPrivilegeEscalation: True(),
- AllowedCapabilities: []core.Capability{ // runc's default list of allowed capabilities
- "SETPCAP",
- "MKNOD",
- "AUDIT_WRITE",
- "CHOWN",
- "NET_RAW",
- "DAC_OVERRIDE",
- "FOWNER",
- "FSETID",
- "KILL",
- "SETGID",
- "SETUID",
- "NET_BIND_SERVICE",
- "SYS_CHROOT",
- "SETFCAP",
- },
- HostNetwork: false,
- HostIPC: false,
- HostPID: false,
- FSGroup: policy.FSGroupStrategyOptions{
- Rule: policy.FSGroupStrategyRunAsAny,
- },
- RunAsUser: policy.RunAsUserStrategyOptions{
- Rule: policy.RunAsUserStrategyRunAsAny,
- },
- SELinux: policy.SELinuxStrategyOptions{
- Rule: policy.SELinuxStrategyRunAsAny,
- },
- SupplementalGroups: policy.SupplementalGroupsStrategyOptions{
- Rule: policy.SupplementalGroupsStrategyRunAsAny,
- },
- Volumes: []policy.FSType{ // Volumes considered safe to use
- policy.ConfigMap,
- policy.EmptyDir,
- policy.Projected,
- policy.Secret,
- policy.DownwardAPI,
- policy.PersistentVolumeClaim,
- },
- },
- },
- }
-}
diff --git a/metropolis/node/kubernetes/reconciler/resources_runtimeclass.go b/metropolis/node/kubernetes/reconciler/resources_runtimeclass.go
index 11c2fa0..b41c2c9 100644
--- a/metropolis/node/kubernetes/reconciler/resources_runtimeclass.go
+++ b/metropolis/node/kubernetes/reconciler/resources_runtimeclass.go
@@ -19,7 +19,7 @@
import (
"context"
- node "k8s.io/api/node/v1beta1"
+ node "k8s.io/api/node/v1"
meta "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/kubernetes"
)
@@ -29,7 +29,7 @@
}
func (r resourceRuntimeClasses) List(ctx context.Context) ([]meta.Object, error) {
- res, err := r.NodeV1beta1().RuntimeClasses().List(ctx, listBuiltins)
+ res, err := r.NodeV1().RuntimeClasses().List(ctx, listBuiltins)
if err != nil {
return nil, err
}
@@ -41,12 +41,12 @@
}
func (r resourceRuntimeClasses) Create(ctx context.Context, el meta.Object) error {
- _, err := r.NodeV1beta1().RuntimeClasses().Create(ctx, el.(*node.RuntimeClass), meta.CreateOptions{})
+ _, err := r.NodeV1().RuntimeClasses().Create(ctx, el.(*node.RuntimeClass), meta.CreateOptions{})
return err
}
func (r resourceRuntimeClasses) Delete(ctx context.Context, name string) error {
- return r.NodeV1beta1().RuntimeClasses().Delete(ctx, name, meta.DeleteOptions{})
+ return r.NodeV1().RuntimeClasses().Delete(ctx, name, meta.DeleteOptions{})
}
func (r resourceRuntimeClasses) Expected() []meta.Object {
diff --git a/metropolis/node/kubernetes/service_controller.go b/metropolis/node/kubernetes/service_controller.go
index d309009..ccb5b29 100644
--- a/metropolis/node/kubernetes/service_controller.go
+++ b/metropolis/node/kubernetes/service_controller.go
@@ -68,6 +68,7 @@
return fmt.Errorf("could not generate controller manager pki config: %w", err)
}
controllerManagerConfig.clusterNet = s.c.ClusterNet
+ controllerManagerConfig.serviceNet = s.c.ServiceIPRange
schedulerConfig, err := getPKISchedulerConfig(ctx, s.c.KPKI)
if err != nil {
return fmt.Errorf("could not generate scheduler pki config: %w", err)