m/node: enable user namespaces in K8s

This enables the two feature gates for user namespace support in K8s.
We did not previously have a passwd file which caused Go's UserLookup
to fail with an unexpected error. Add an mostly-empty placeholder file
to placate it.

Change-Id: I71a7a6dc889a289512075a25b7e551f2cd65ffb6
Reviewed-on: https://review.monogon.dev/c/monogon/+/3665
Reviewed-by: Tim Windelschmidt <tim@monogon.tech>
Tested-by: Jenkins CI
diff --git a/metropolis/node/BUILD.bazel b/metropolis/node/BUILD.bazel
index 6ae234a..c09ca2d 100644
--- a/metropolis/node/BUILD.bazel
+++ b/metropolis/node/BUILD.bazel
@@ -33,6 +33,10 @@
     },
 )
 
+exports_files([
+    "passwd",
+])
+
 erofs_image(
     name = "rootfs",
     files = {
@@ -42,6 +46,7 @@
         # These should not be explicitly used by Metropolis code and are only here for compatibility with
         # paths hardcoded by standard libraries (like Go's).
         "@cacerts//file": "/etc/ssl/cert.pem",
+        "//metropolis/node:passwd": "/etc/passwd",
         "//osbase/net/dns:resolv.conf": "/etc/resolv.conf",
         "//osbase/net/dns:hosts": "/etc/hosts",
         ":os-release-info": "/etc/os-release",
diff --git a/metropolis/node/kubernetes/BUILD.bazel b/metropolis/node/kubernetes/BUILD.bazel
index 9f51ba0..15eec36 100644
--- a/metropolis/node/kubernetes/BUILD.bazel
+++ b/metropolis/node/kubernetes/BUILD.bazel
@@ -66,6 +66,7 @@
         "@io_k8s_component_base//featuregate",
         "@io_k8s_kubelet//config/v1beta1",
         "@io_k8s_kubelet//pkg/apis/pluginregistration/v1:pluginregistration",
+        "@io_k8s_kubernetes//pkg/features",
         "@io_k8s_kubernetes//plugin/pkg/admission/security/podsecurity",
         "@io_k8s_pod_security_admission//admission/api/v1:api",
         "@org_golang_google_grpc//:grpc",
diff --git a/metropolis/node/kubernetes/feature_gates.go b/metropolis/node/kubernetes/feature_gates.go
index 06d970f..9be3b35 100644
--- a/metropolis/node/kubernetes/feature_gates.go
+++ b/metropolis/node/kubernetes/feature_gates.go
@@ -5,6 +5,7 @@
 	"strings"
 
 	"k8s.io/component-base/featuregate"
+	"k8s.io/kubernetes/pkg/features"
 )
 
 type featureGates map[featuregate.Feature]bool
@@ -32,4 +33,7 @@
 	return out
 }
 
-var extraFeatureGates = featureGates{}
+var extraFeatureGates = featureGates{
+	features.UserNamespacesSupport:              true,
+	features.UserNamespacesPodSecurityStandards: true,
+}
diff --git a/metropolis/node/passwd b/metropolis/node/passwd
new file mode 100644
index 0000000..e363ba6
--- /dev/null
+++ b/metropolis/node/passwd
@@ -0,0 +1 @@
+root:x:0:0:root:/nonexistent:/sbin/nologin
diff --git a/metropolis/test/e2e/preseedtest/main.go b/metropolis/test/e2e/preseedtest/main.go
index a44377f..77fcba6 100644
--- a/metropolis/test/e2e/preseedtest/main.go
+++ b/metropolis/test/e2e/preseedtest/main.go
@@ -19,6 +19,9 @@
 import (
 	"fmt"
 	"net/http"
+	"os"
+	"strconv"
+	"strings"
 )
 
 func main() {
@@ -26,6 +29,28 @@
 	http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
 		fmt.Fprintf(w, "Hello world from preseeded image\n")
 	})
+	http.HandleFunc("/ready_userns", func(w http.ResponseWriter, r *http.Request) {
+		uidMapRaw, err := os.ReadFile("/proc/self/uid_map")
+		if err != nil {
+			http.Error(w, err.Error(), http.StatusInternalServerError)
+			return
+		}
+		uidMapFields := strings.Fields(string(uidMapRaw))
+		if len(uidMapFields) != 3 {
+			http.Error(w, fmt.Sprintf("Bad uid_map contents, not 3 fields: %q", string(uidMapRaw)), http.StatusInternalServerError)
+			return
+		}
+		startId, err := strconv.ParseUint(uidMapFields[1], 10, 64)
+		if err != nil {
+			http.Error(w, fmt.Sprintf("while parsing start ID: %v", err), http.StatusInternalServerError)
+			return
+		}
+		if startId == 0 {
+			http.Error(w, "Not in a non-initial user namespace, UID space starts at 0", http.StatusInternalServerError)
+			return
+		}
+		fmt.Fprintf(w, "Hello world from a user namespace\n")
+	})
 	err := http.ListenAndServe(":80", nil)
 	if err != nil {
 		fmt.Printf("Serve failed: %v\n", err)
diff --git a/metropolis/test/e2e/suites/kubernetes/BUILD.bazel b/metropolis/test/e2e/suites/kubernetes/BUILD.bazel
index 318b7cf..9e30540 100644
--- a/metropolis/test/e2e/suites/kubernetes/BUILD.bazel
+++ b/metropolis/test/e2e/suites/kubernetes/BUILD.bazel
@@ -46,6 +46,7 @@
         "@io_k8s_apimachinery//pkg/api/resource",
         "@io_k8s_apimachinery//pkg/apis/meta/v1:meta",
         "@io_k8s_kubernetes//pkg/api/v1/pod",
+        "@io_k8s_utils//ptr",
         "@org_golang_google_protobuf//types/known/fieldmaskpb",
     ],
 )
diff --git a/metropolis/test/e2e/suites/kubernetes/run_test.go b/metropolis/test/e2e/suites/kubernetes/run_test.go
index baaa235..18239e0 100644
--- a/metropolis/test/e2e/suites/kubernetes/run_test.go
+++ b/metropolis/test/e2e/suites/kubernetes/run_test.go
@@ -23,6 +23,7 @@
 	"k8s.io/apimachinery/pkg/api/resource"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	podv1 "k8s.io/kubernetes/pkg/api/v1/pod"
+	"k8s.io/utils/ptr"
 
 	common "source.monogon.dev/metropolis/node"
 	apb "source.monogon.dev/metropolis/proto/api"
@@ -400,6 +401,32 @@
 			return fmt.Errorf("pod is not ready: %v, log:\n  %s", pod.Status.Phase, strings.Join(lines, "\n  "))
 		})
 	}
+	util.TestEventual(t, "Deployment in user namespace", ctx, largeTestTimeout, func(ctx context.Context) error {
+		deployment := makeTestDeploymentSpec("test-userns-1")
+		deployment.Spec.Template.Spec.HostUsers = ptr.To(false)
+		deployment.Spec.Template.Spec.Containers[0].ReadinessProbe.HTTPGet.Path = "/ready_userns"
+		_, err := clientSet.AppsV1().Deployments("default").Create(ctx, deployment, metav1.CreateOptions{})
+		return err
+	})
+	util.TestEventual(t, "Deployment in user namespace is running", ctx, largeTestTimeout, func(ctx context.Context) error {
+		res, err := clientSet.CoreV1().Pods("default").List(ctx, metav1.ListOptions{LabelSelector: "name=test-userns-1"})
+		if err != nil {
+			return err
+		}
+		if len(res.Items) == 0 {
+			return errors.New("pod didn't get created")
+		}
+		pod := res.Items[0]
+		if podv1.IsPodAvailable(&pod, 1, metav1.NewTime(time.Now())) {
+			return nil
+		}
+		events, err := clientSet.CoreV1().Events("default").List(ctx, metav1.ListOptions{FieldSelector: fmt.Sprintf("involvedObject.name=%s,involvedObject.namespace=default", pod.Name)})
+		if err != nil || len(events.Items) == 0 {
+			return fmt.Errorf("pod is not ready: %v", pod.Status.Phase)
+		} else {
+			return fmt.Errorf("pod is not ready: %v", events.Items[0].Message)
+		}
+	})
 	util.TestEventual(t, "In-cluster self-test job", ctx, smallTestTimeout, func(ctx context.Context) error {
 		_, err := clientSet.BatchV1().Jobs("default").Create(ctx, makeSelftestSpec("selftest"), metav1.CreateOptions{})
 		return err