m/node: switch to cgroupv2

This switches us from legacy cgroup (v1) to cgroup v2 aka unified
cgroup. Our versions of Kubernetes, containerd and runc/gVisor all
support this by now.

cgroup_bpf needs to be enabled in the kernel for containerd with cgroup
v2. Also enable swap as this now works with cgroup v2, this gets rid of
a warning for every pod being started.

We are not really using cgroups ourselves, but as the root cgroup in v2
is special, move our own process into a subgroup at startup.

Change-Id: I8d63b2ad672568c052c3fe1a2306182f033667fa
Reviewed-on: https://review.monogon.dev/c/monogon/+/3207
Tested-by: Jenkins CI
Reviewed-by: Jan Schär <jan@monogon.tech>
diff --git a/metropolis/node/core/BUILD.bazel b/metropolis/node/core/BUILD.bazel
index 938a7d7..9c80741 100644
--- a/metropolis/node/core/BUILD.bazel
+++ b/metropolis/node/core/BUILD.bazel
@@ -45,6 +45,7 @@
         "@com_github_cenkalti_backoff_v4//:backoff",
         "@com_github_containerd_containerd//:containerd",
         "@com_github_containerd_containerd//namespaces",
+        "@com_github_opencontainers_runc//libcontainer/cgroups",
         "@org_golang_google_grpc//:go_default_library",
         "@org_golang_google_grpc//codes",
         "@org_golang_google_grpc//status",
diff --git a/metropolis/node/core/mounts.go b/metropolis/node/core/mounts.go
index cb8351d..047552b 100644
--- a/metropolis/node/core/mounts.go
+++ b/metropolis/node/core/mounts.go
@@ -19,8 +19,8 @@
 import (
 	"fmt"
 	"os"
-	"strings"
 
+	"github.com/opencontainers/runc/libcontainer/cgroups"
 	"golang.org/x/sys/unix"
 )
 
@@ -50,39 +50,15 @@
 		}
 	}
 
-	// Mount all available CGroups for v1 (v2 uses a single unified hierarchy
-	// and is not supported by our runtimes yet)
-	if err := unix.Mount("tmpfs", "/sys/fs/cgroup", "tmpfs", unix.MS_NOEXEC|unix.MS_NOSUID|unix.MS_NODEV, ""); err != nil {
+	if err := unix.Mount("cgroup2", "/sys/fs/cgroup", "cgroup2", unix.MS_NOEXEC|unix.MS_NOSUID|unix.MS_NODEV, "nsdelegate,memory_recursiveprot"); err != nil {
 		panic(err)
 	}
-	cgroupsRaw, err := os.ReadFile("/proc/cgroups")
-	if err != nil {
+	// Create main cgroup "everything" and move ourselves into it.
+	if err := os.Mkdir("/sys/fs/cgroup/everything", 0755); err != nil {
 		panic(err)
 	}
-
-	cgroupLines := strings.Split(string(cgroupsRaw), "\n")
-	for _, cgroupLine := range cgroupLines {
-		if cgroupLine == "" || strings.HasPrefix(cgroupLine, "#") {
-			continue
-		}
-		cgroupParts := strings.Split(cgroupLine, "\t")
-		cgroupName := cgroupParts[0]
-		if err := os.Mkdir("/sys/fs/cgroup/"+cgroupName, 0755); err != nil {
-			panic(err)
-		}
-		if err := unix.Mount("cgroup", "/sys/fs/cgroup/"+cgroupName, "cgroup", unix.MS_NOEXEC|unix.MS_NOSUID|unix.MS_NODEV, cgroupName); err != nil {
-			panic(err)
-		}
-	}
-
-	// Enable hierarchical memory accounting
-	useMemoryHierarchy, err := os.OpenFile("/sys/fs/cgroup/memory/memory.use_hierarchy", os.O_RDWR, 0)
-	if err != nil {
+	if err := cgroups.WriteCgroupProc("/sys/fs/cgroup/everything", os.Getpid()); err != nil {
 		panic(err)
 	}
-	if _, err := useMemoryHierarchy.WriteString("1"); err != nil {
-		panic(err)
-	}
-	useMemoryHierarchy.Close()
 	return nil
 }