m/n/kubernetes: implement Metropolis authenticating proxy

This implements an authenticating proxy for K8s which can authenticate
Metropolis credentials and passes the extracted identity information
back to the Kubernetes API server. It currently only handles user
authentication, machine-to-machine authentication is still done by the
API server itself. It also adds a role binding to allow full access
to the owner as we do not have an identity system yet.

Change-Id: I02043924bb7ce7a1acdb826dad2d27a4c2008136
Reviewed-on: https://review.monogon.dev/c/monogon/+/509
Reviewed-by: Sergiusz Bazanski <serge@monogon.tech>
diff --git a/metropolis/node/kubernetes/BUILD.bazel b/metropolis/node/kubernetes/BUILD.bazel
index a162bcc..2a35c4b 100644
--- a/metropolis/node/kubernetes/BUILD.bazel
+++ b/metropolis/node/kubernetes/BUILD.bazel
@@ -19,6 +19,7 @@
         "//metropolis/node/core/localstorage:go_default_library",
         "//metropolis/node/core/network:go_default_library",
         "//metropolis/node/core/network/dns:go_default_library",
+        "//metropolis/node/kubernetes/authproxy:go_default_library",
         "//metropolis/node/kubernetes/clusternet:go_default_library",
         "//metropolis/node/kubernetes/nfproxy:go_default_library",
         "//metropolis/node/kubernetes/pki:go_default_library",
diff --git a/metropolis/node/kubernetes/apiserver.go b/metropolis/node/kubernetes/apiserver.go
index 39105b2..cd4ff60 100644
--- a/metropolis/node/kubernetes/apiserver.go
+++ b/metropolis/node/kubernetes/apiserver.go
@@ -109,7 +109,7 @@
 			pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: s.aggregationClientCert})),
 		args.FileOpt("--proxy-client-key-file", "aggregation-client-key.pem",
 			pem.EncodeToMemory(&pem.Block{Type: "PRIVATE KEY", Bytes: s.aggregationClientKey})),
-		"--requestheader-allowed-names=front-proxy-client",
+		"--requestheader-allowed-names=front-proxy-client,metropolis-auth-proxy-client",
 		args.FileOpt("--requestheader-client-ca-file", "aggregation-ca.pem",
 			pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: s.aggregationCA})),
 		"--requestheader-extra-headers-prefix=X-Remote-Extra-",
diff --git a/metropolis/node/kubernetes/authproxy/BUILD.bazel b/metropolis/node/kubernetes/authproxy/BUILD.bazel
new file mode 100644
index 0000000..965e8ad
--- /dev/null
+++ b/metropolis/node/kubernetes/authproxy/BUILD.bazel
@@ -0,0 +1,15 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_library")
+
+go_library(
+    name = "go_default_library",
+    srcs = ["authproxy.go"],
+    importpath = "source.monogon.dev/metropolis/node/kubernetes/authproxy",
+    visibility = ["//visibility:public"],
+    deps = [
+        "//metropolis/node:go_default_library",
+        "//metropolis/node/core/identity:go_default_library",
+        "//metropolis/node/kubernetes/pki:go_default_library",
+        "//metropolis/pkg/supervisor:go_default_library",
+        "@io_k8s_apimachinery//pkg/apis/meta/v1:go_default_library",
+    ],
+)
diff --git a/metropolis/node/kubernetes/authproxy/authproxy.go b/metropolis/node/kubernetes/authproxy/authproxy.go
new file mode 100644
index 0000000..0477b88
--- /dev/null
+++ b/metropolis/node/kubernetes/authproxy/authproxy.go
@@ -0,0 +1,151 @@
+// Package authproxy implements an authenticating proxy in front of the K8s
+// API server converting Metropolis credentials into authentication headers.
+package authproxy
+
+import (
+	"context"
+	"crypto/tls"
+	"crypto/x509"
+	"encoding/json"
+	"fmt"
+	"net"
+	"net/http"
+	"net/http/httputil"
+	"net/url"
+	"strings"
+	"time"
+
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+
+	"source.monogon.dev/metropolis/node"
+	"source.monogon.dev/metropolis/node/core/identity"
+	"source.monogon.dev/metropolis/node/kubernetes/pki"
+	"source.monogon.dev/metropolis/pkg/supervisor"
+)
+
+type Service struct {
+	// KPKI is a reference to the Kubernetes PKI
+	KPKI *pki.PKI
+	// Node contains the node identity
+	Node *identity.Node
+}
+
+func (s *Service) getTLSCert(ctx context.Context, name pki.KubeCertificateName) (*tls.Certificate, error) {
+	cert, key, err := s.KPKI.Certificate(ctx, name)
+	if err != nil {
+		return nil, fmt.Errorf("could not load certificate %q from PKI: %w", name, err)
+	}
+	parsedKey, err := x509.ParsePKCS8PrivateKey(key)
+	if err != nil {
+		return nil, fmt.Errorf("failed to parse key for cert %q: %w", name, err)
+	}
+	return &tls.Certificate{
+		Certificate: [][]byte{cert},
+		PrivateKey:  parsedKey,
+	}, nil
+}
+
+func respondWithK8sStatus(w http.ResponseWriter, status *metav1.Status) error {
+	w.Header().Set("Content-Type", "application/json")
+	w.WriteHeader(int(status.Code))
+	return json.NewEncoder(w).Encode(status)
+}
+
+func (s *Service) Run(ctx context.Context) error {
+	logger := supervisor.Logger(ctx)
+
+	k8sCAs := x509.NewCertPool()
+	cert, _, err := s.KPKI.Certificate(ctx, pki.IdCA)
+	parsedCert, err := x509.ParseCertificate(cert)
+	if err != nil {
+		return fmt.Errorf("failed to parse K8s CA certificate: %w", err)
+	}
+	k8sCAs.AddCert(parsedCert)
+
+	clientCert, err := s.getTLSCert(ctx, pki.MetropolisAuthProxyClient)
+	if err != nil {
+		return err
+	}
+
+	proxy := httputil.NewSingleHostReverseProxy(&url.URL{
+		Host:   net.JoinHostPort("localhost", node.KubernetesAPIPort.PortString()),
+		Scheme: "https",
+	})
+	proxy.Transport = &http.Transport{
+		DialContext: (&net.Dialer{
+			Timeout:   30 * time.Second,
+			KeepAlive: 30 * time.Second,
+		}).DialContext,
+		TLSClientConfig: &tls.Config{
+			RootCAs:      k8sCAs,
+			Certificates: []tls.Certificate{*clientCert},
+			NextProtos:   []string{"h2", "http/1.1"},
+		},
+		ForceAttemptHTTP2:     true,
+		MaxIdleConns:          100,
+		IdleConnTimeout:       90 * time.Second,
+		TLSHandshakeTimeout:   10 * time.Second,
+		ExpectContinueTimeout: 1 * time.Second,
+	}
+	proxy.ErrorHandler = func(w http.ResponseWriter, req *http.Request, err error) {
+		logger.Infof("Proxy error: %v", err)
+		respondWithK8sStatus(w, &metav1.Status{
+			Status:  metav1.StatusFailure,
+			Code:    http.StatusBadGateway,
+			Reason:  metav1.StatusReasonServiceUnavailable,
+			Message: "authproxy could not reach apiserver",
+		})
+	}
+
+	serverCert, err := s.getTLSCert(ctx, pki.APIServer)
+	if err != nil {
+		return err
+	}
+	clientCAs := x509.NewCertPool()
+	clientCAs.AddCert(s.Node.ClusterCA())
+	server := &http.Server{
+		Addr: ":" + node.KubernetesAPIWrappedPort.PortString(),
+		TLSConfig: &tls.Config{
+			MinVersion:   tls.VersionTLS12,
+			NextProtos:   []string{"h2", "http/1.1"},
+			ClientAuth:   tls.RequireAndVerifyClientCert,
+			ClientCAs:    clientCAs,
+			Certificates: []tls.Certificate{*serverCert},
+		},
+		// Limits match @io_k8s_apiserver/pkg/server:secure_serving.go Serve()
+		MaxHeaderBytes:    1 << 20,
+		IdleTimeout:       90 * time.Second,
+		ReadHeaderTimeout: 32 * time.Second,
+
+		Handler: http.HandlerFunc(func(rw http.ResponseWriter, req *http.Request) {
+			// Guaranteed to exist because of RequireAndVerifyClientCert
+			clientCert := req.TLS.VerifiedChains[0][0]
+			clientIdentity, err := identity.VerifyUserInCluster(clientCert, s.Node.ClusterCA())
+			if err != nil {
+				respondWithK8sStatus(rw, &metav1.Status{
+					Status:  metav1.StatusFailure,
+					Code:    http.StatusUnauthorized,
+					Reason:  metav1.StatusReasonUnauthorized,
+					Message: fmt.Sprintf("Metropolis authentication failed: %v", err),
+				})
+				return
+			}
+			// Clone the request as otherwise modifying it is not allowed
+			newReq := req.Clone(req.Context())
+			// Drop any X-Remote headers to prevent injection
+			for k := range newReq.Header {
+				if strings.HasPrefix(http.CanonicalHeaderKey(k), http.CanonicalHeaderKey("X-Remote-")) {
+					newReq.Header.Del(k)
+				}
+			}
+			newReq.Header.Set("X-Remote-User", clientIdentity)
+			newReq.Header.Set("X-Remote-Group", "")
+
+			proxy.ServeHTTP(rw, newReq)
+		}),
+	}
+	go server.ListenAndServeTLS("", "")
+	logger.Info("K8s AuthProxy running")
+	<-ctx.Done()
+	return server.Close()
+}
diff --git a/metropolis/node/kubernetes/pki/kubernetes.go b/metropolis/node/kubernetes/pki/kubernetes.go
index 0c795f2..542c614 100644
--- a/metropolis/node/kubernetes/pki/kubernetes.go
+++ b/metropolis/node/kubernetes/pki/kubernetes.go
@@ -76,6 +76,9 @@
 	//   https://kubernetes.io/docs/tasks/extend-kubernetes/configure-aggregation-layer/#ca-reusage-and-conflicts
 	AggregationCA    KubeCertificateName = "aggregation-ca"
 	FrontProxyClient KubeCertificateName = "front-proxy-client"
+	// The Metropolis authentication proxy needs to be able to proxy requests
+	// and assert the established identity to the Kubernetes API server.
+	MetropolisAuthProxyClient KubeCertificateName = "metropolis-auth-proxy-client"
 )
 
 const (
@@ -149,6 +152,7 @@
 		Mode:      opki.CertificateManaged,
 	}
 	make(AggregationCA, FrontProxyClient, opki.Client("front-proxy-client", nil))
+	make(AggregationCA, MetropolisAuthProxyClient, opki.Client("metropolis-auth-proxy-client", nil))
 
 	return &pki
 }
diff --git a/metropolis/node/kubernetes/reconciler/resources_rbac.go b/metropolis/node/kubernetes/reconciler/resources_rbac.go
index 15386a6..0976ba5 100644
--- a/metropolis/node/kubernetes/reconciler/resources_rbac.go
+++ b/metropolis/node/kubernetes/reconciler/resources_rbac.go
@@ -28,6 +28,7 @@
 	clusterRolePSPDefault                    = builtinRBACName("psp-default")
 	clusterRoleBindingDefaultPSP             = builtinRBACName("default-psp-for-sa")
 	clusterRoleBindingAPIServerKubeletClient = builtinRBACName("apiserver-kubelet-client")
+	clusterRoleBindingOwnerAdmin             = builtinRBACName("owner-admin")
 )
 
 type resourceClusterRoles struct {
@@ -150,5 +151,27 @@
 				},
 			},
 		},
+		clusterRoleBindingOwnerAdmin: &rbac.ClusterRoleBinding{
+			ObjectMeta: meta.ObjectMeta{
+				Name:   clusterRoleBindingOwnerAdmin,
+				Labels: builtinLabels(nil),
+				Annotations: map[string]string{
+					"kubernetes.io/description": "This binding grants the Metropolis Cluster owner access to the " +
+						"cluster-admin role on Kubernetes.",
+				},
+			},
+			RoleRef: rbac.RoleRef{
+				APIGroup: rbac.GroupName,
+				Kind:     "ClusterRole",
+				Name:     "cluster-admin",
+			},
+			Subjects: []rbac.Subject{
+				{
+					APIGroup: rbac.GroupName,
+					Kind:     "User",
+					Name:     "owner",
+				},
+			},
+		},
 	}
 }
diff --git a/metropolis/node/kubernetes/service.go b/metropolis/node/kubernetes/service.go
index 1af7607..03be33c 100644
--- a/metropolis/node/kubernetes/service.go
+++ b/metropolis/node/kubernetes/service.go
@@ -32,6 +32,7 @@
 	"source.monogon.dev/metropolis/node/core/localstorage"
 	"source.monogon.dev/metropolis/node/core/network"
 	"source.monogon.dev/metropolis/node/core/network/dns"
+	"source.monogon.dev/metropolis/node/kubernetes/authproxy"
 	"source.monogon.dev/metropolis/node/kubernetes/clusternet"
 	"source.monogon.dev/metropolis/node/kubernetes/nfproxy"
 	"source.monogon.dev/metropolis/node/kubernetes/pki"
@@ -173,6 +174,11 @@
 		KubeletDirectory: &s.c.Root.Data.Kubernetes.Kubelet,
 	}
 
+	authProxy := authproxy.Service{
+		KPKI: s.c.KPKI,
+		Node: s.c.Node,
+	}
+
 	for _, sub := range []struct {
 		name     string
 		runnable supervisor.Runnable
@@ -185,6 +191,7 @@
 		{"clusternet", clusternet.Run},
 		{"nfproxy", nfproxy.Run},
 		{"kvmdeviceplugin", kvmDevicePlugin.Run},
+		{"authproxy", authProxy.Run},
 	} {
 		err := supervisor.Run(ctx, sub.name, sub.runnable)
 		if err != nil {
diff --git a/metropolis/node/ports.go b/metropolis/node/ports.go
index e7adfb7..c90e7dc 100644
--- a/metropolis/node/ports.go
+++ b/metropolis/node/ports.go
@@ -37,6 +37,9 @@
 	// KubernetesAPIPort is the TCP port on which the Kubernetes API is
 	// exposed.
 	KubernetesAPIPort Port = 6443
+	// KubernetesAPIWrappedPort is the TCP port on which the Metropolis
+	// authenticating proxy for the Kubernetes API is exposed.
+	KubernetesAPIWrappedPort Port = 6444
 	// DebuggerPort is the port on which the delve debugger runs (on debug
 	// builds only). Not to be confused with DebugServicePort.
 	DebuggerPort Port = 2345
@@ -54,6 +57,8 @@
 		return "wireguard"
 	case KubernetesAPIPort:
 		return "kubernetes-api"
+	case KubernetesAPIWrappedPort:
+		return "kubernetes-api-wrapped"
 	case DebuggerPort:
 		return "delve"
 	}