Add containerd & gVisor support

This adds containerd, CNI, gVisor and all the necessary shims
and supporting infrastructure. It also enables all relevant features in
the Linux kernel. containerd is designed as a simple supervisor.Runnable.
It is not being started yet, this will happen in D497.

Split out from feature/kubelet.

Test Plan:
Has been tested in conjunction with the rest of D497, will be
covered by a K8s E2E test there.

X-Origin-Diff: phab/D509
GitOrigin-RevId: 92523516b7e361a30da330eb187787e6045bfd17
diff --git a/core/BUILD b/core/BUILD
index 1628bd1..4df9777 100644
--- a/core/BUILD
+++ b/core/BUILD
@@ -6,14 +6,18 @@
         "//third_party/xfsprogs:mkfs.xfs",
         "@io_k8s_kubernetes//cmd/kubelet:_kubelet-pure",
         "@com_github_containerd_containerd//cmd/containerd",
-        "@com_github_containerd_containerd//cmd/containerd-shim",
-        "@com_github_containerd_containerd//cmd/containerd-shim-runc-v1",
         "@com_github_containerd_containerd//cmd/containerd-shim-runc-v2",
         "@com_github_containernetworking_plugins//plugins/main/loopback",
         "@com_github_containernetworking_plugins//plugins/main/ptp",
         "@com_github_containernetworking_plugins//plugins/ipam/host-local",
         "@com_github_opencontainers_runc//:runc",
         "@com_github_google_gvisor//runsc",
+        "@com_github_google_gvisor_containerd_shim//cmd/containerd-shim-runsc-v1",
+        "//core/internal/containerd:ptp.json",
+        "//core/internal/containerd:loopback.json",
+        "//core/internal/containerd:config.toml",
+        "//core/internal/containerd:runsc.toml",
+        "@cacerts//file",
     ],
     outs = [
         "initramfs.cpio.lz4",
@@ -23,22 +27,31 @@
 dir /dev 0755 0 0
 nod /dev/console 0600 0 0 c 5 1
 nod /dev/null 0644 0 0 c 1 3
+nod /dev/ptmx 0644 0 0 c 5 2
 file /init $(location //core/cmd/init) 0755 0 0
+dir /etc 0755 0 0
+dir /etc/ssl 0755 0 0
+file /etc/ssl/cert.pem $(location @cacerts//file) 0444 0 0
 dir /bin 0755 0 0
 file /bin/mkfs.xfs $(location //third_party/xfsprogs:mkfs.xfs) 0755 0 0
 file /bin/kube-controlplane $(location //core/cmd/kube-controlplane) 0755 0 0
 file /bin/kubelet $(location @io_k8s_kubernetes//cmd/kubelet:_kubelet-pure) 0755 0 0
 dir /containerd 0755 0 0
-file /containerd/containerd $(location @com_github_containerd_containerd//cmd/containerd) 0755 0 0
-file /containerd/containerd-shim $(location @com_github_containerd_containerd//cmd/containerd-shim) 0755 0 0
-file /containerd/containerd-shim-runc-v1 $(location @com_github_containerd_containerd//cmd/containerd-shim-runc-v1) 0755 0 0
-file /containerd/containerd-shim-runc-v2 $(location @com_github_containerd_containerd//cmd/containerd-shim-runc-v2) 0755 0 0
-file /containerd/runsc $(location @com_github_google_gvisor//runsc) 0755 0 0
-file /containerd/runc $(location @com_github_opencontainers_runc//:runc) 0755 0 0
-dir /containerd/cni-plugins 0755 0 0
-file /containerd/cni-plugins/loopback $(location @com_github_containernetworking_plugins//plugins/main/loopback) 0755 0 0
-file /containerd/cni-plugins/ptp $(location @com_github_containernetworking_plugins//plugins/main/ptp) 0755 0 0
-file /containerd/cni-plugins/host-local $(location @com_github_containernetworking_plugins//plugins/ipam/host-local) 0755 0 0
+dir /containerd/bin 0755 0 0
+file /containerd/bin/containerd $(location @com_github_containerd_containerd//cmd/containerd) 0755 0 0
+file /containerd/bin/containerd-shim-runsc-v1 $(location @com_github_google_gvisor_containerd_shim//cmd/containerd-shim-runsc-v1) 0755 0 0
+file /containerd/bin/runsc $(location @com_github_google_gvisor//runsc) 0755 0 0
+dir /containerd/bin/cni 0755 0 0
+file /containerd/bin/cni/loopback $(location @com_github_containernetworking_plugins//plugins/main/loopback) 0755 0 0
+file /containerd/bin/cni/ptp $(location @com_github_containernetworking_plugins//plugins/main/ptp) 0755 0 0
+file /containerd/bin/cni/host-local $(location @com_github_containernetworking_plugins//plugins/ipam/host-local) 0755 0 0
+dir /containerd/run 0755 0 0
+dir /containerd/conf 0755 0 0
+dir /containerd/conf/cni 0755 0 0
+file /containerd/conf/cni/10-ptp.conf $(location //core/internal/containerd:ptp.json) 0444 0 0
+file /containerd/conf/cni/99-loopback.conf $(location //core/internal/containerd:loopback.json) 0444 0 0
+file /containerd/conf/config.toml $(location //core/internal/containerd:config.toml) 0444 0 0
+file /containerd/conf/runsc.toml $(location //core/internal/containerd:runsc.toml) 0444 0 0
 EOF
     """,
     tools = [
diff --git a/core/cmd/init/switchroot.go b/core/cmd/init/switchroot.go
index 0e68b06..d51b1fa 100644
--- a/core/cmd/init/switchroot.go
+++ b/core/cmd/init/switchroot.go
@@ -19,6 +19,7 @@
 import (
 	"fmt"
 	"io"
+	"io/ioutil"
 	"os"
 	"path/filepath"
 	"strings"
@@ -154,6 +155,40 @@
 		}
 	}
 
+	// Mount all available CGroups for v1 (v2 uses a single unified hierarchy and is not supported by our runtimes yet)
+	if unix.Mount("tmpfs", "/mnt/sys/fs/cgroup", "tmpfs", unix.MS_NOEXEC|unix.MS_NOSUID|unix.MS_NODEV, ""); err != nil {
+		panic(err)
+	}
+	cgroupsRaw, err := ioutil.ReadFile("/mnt/proc/cgroups")
+	if err != nil {
+		panic(err)
+	}
+
+	cgroupLines := strings.Split(string(cgroupsRaw), "\n")
+	for _, cgroupLine := range cgroupLines {
+		if cgroupLine == "" || strings.HasPrefix(cgroupLine, "#") {
+			continue
+		}
+		cgroupParts := strings.Split(cgroupLine, "\t")
+		cgroupName := cgroupParts[0]
+		if err := os.Mkdir("/mnt/sys/fs/cgroup/"+cgroupName, 0755); err != nil {
+			panic(err)
+		}
+		if err := unix.Mount("cgroup", "/mnt/sys/fs/cgroup/"+cgroupName, "cgroup", unix.MS_NOEXEC|unix.MS_NOSUID|unix.MS_NODEV, cgroupName); err != nil {
+			panic(err)
+		}
+	}
+
+	// Enable hierarchical memory accounting
+	useMemoryHierarchy, err := os.OpenFile("/mnt/sys/fs/cgroup/memory/memory.use_hierarchy", os.O_RDWR, 0)
+	if err != nil {
+		panic(err)
+	}
+	if _, err := useMemoryHierarchy.WriteString("1"); err != nil {
+		panic(err)
+	}
+	useMemoryHierarchy.Close()
+
 	// Chroot to new root.
 	// This is adapted from util-linux's switch_root.
 	err = os.Chdir("/mnt")
diff --git a/core/internal/containerd/BUILD.bazel b/core/internal/containerd/BUILD.bazel
new file mode 100644
index 0000000..dd7cf6d
--- /dev/null
+++ b/core/internal/containerd/BUILD.bazel
@@ -0,0 +1,19 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_library")
+
+go_library(
+    name = "go_default_library",
+    srcs = ["main.go"],
+    importpath = "git.monogon.dev/source/nexantic.git/core/internal/containerd",
+    visibility = ["//core:__subpackages__"],
+    deps = [
+        "//core/pkg/logbuffer:go_default_library",
+        "@org_golang_x_sys//unix:go_default_library",
+    ],
+)
+
+exports_files([
+    "config.toml",
+    "runsc.toml",
+    "loopback.json",
+    "ptp.json",
+])
diff --git a/core/internal/containerd/config.toml b/core/internal/containerd/config.toml
new file mode 100644
index 0000000..86ef097
--- /dev/null
+++ b/core/internal/containerd/config.toml
@@ -0,0 +1,118 @@
+version = 2
+root = "/data/containerd"
+state = "/containerd/run"
+plugin_dir = ""
+disabled_plugins = []
+required_plugins = []
+oom_score = 0
+
+[grpc]
+  address = "/containerd/run/containerd.sock"
+  tcp_address = ""
+  tcp_tls_cert = ""
+  tcp_tls_key = ""
+  uid = 0
+  gid = 0
+  max_recv_message_size = 16777216
+  max_send_message_size = 16777216
+
+[ttrpc]
+  address = ""
+  uid = 0
+  gid = 0
+
+[debug]
+  address = ""
+  uid = 0
+  gid = 0
+  level = ""
+
+[metrics]
+  address = ""
+  grpc_histogram = false
+
+[cgroup]
+  path = ""
+
+[timeouts]
+  "io.containerd.timeout.shim.cleanup" = "5s"
+  "io.containerd.timeout.shim.load" = "5s"
+  "io.containerd.timeout.shim.shutdown" = "3s"
+  "io.containerd.timeout.task.state" = "2s"
+
+[plugins]
+  [plugins."io.containerd.gc.v1.scheduler"]
+    pause_threshold = 0.02
+    deletion_threshold = 0
+    mutation_threshold = 100
+    schedule_delay = "0s"
+    startup_delay = "100ms"
+  [plugins."io.containerd.grpc.v1.cri"]
+    disable_tcp_service = true
+    stream_server_address = "127.0.0.1"
+    stream_server_port = "0"
+    stream_idle_timeout = "4h0m0s"
+    enable_selinux = false
+    sandbox_image = "k8s.gcr.io/pause:3.1"
+    stats_collect_period = 10
+    systemd_cgroup = false
+    enable_tls_streaming = false
+    max_container_log_line_size = 16384
+    disable_cgroup = false
+    disable_apparmor = true
+    restrict_oom_score_adj = false
+    max_concurrent_downloads = 3
+    disable_proc_mount = false
+    [plugins."io.containerd.grpc.v1.cri".containerd]
+      snapshotter = "overlayfs"
+      default_runtime_name = "runsc"
+      no_pivot = false
+      [plugins."io.containerd.grpc.v1.cri".containerd.default_runtime]
+        runtime_type = ""
+        runtime_engine = ""
+        runtime_root = ""
+        privileged_without_host_devices = false
+      [plugins."io.containerd.grpc.v1.cri".containerd.untrusted_workload_runtime]
+        runtime_type = ""
+        runtime_engine = ""
+        runtime_root = ""
+        privileged_without_host_devices = false
+      [plugins."io.containerd.grpc.v1.cri".containerd.runtimes]
+        [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runsc]
+          runtime_type = "io.containerd.runsc.v1"
+          runtime_engine = ""
+          runtime_root = ""
+          privileged_without_host_devices = false
+          [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runsc.options]
+            TypeUrl = "io.containerd.runsc.v1.options"
+            ConfigPath = "/containerd/conf/runsc.toml"
+    [plugins."io.containerd.grpc.v1.cri".cni]
+      bin_dir = "/containerd/bin/cni"
+      conf_dir = "/containerd/conf/cni"
+      max_conf_num = 1
+      conf_template = ""
+    [plugins."io.containerd.grpc.v1.cri".registry]
+      [plugins."io.containerd.grpc.v1.cri".registry.mirrors]
+        [plugins."io.containerd.grpc.v1.cri".registry.mirrors."docker.io"]
+          endpoint = ["https://registry-1.docker.io"]
+    [plugins."io.containerd.grpc.v1.cri".x509_key_pair_streaming]
+      tls_cert_file = ""
+      tls_key_file = ""
+  [plugins."io.containerd.internal.v1.opt"]
+    path = "/containerd/bin"
+  [plugins."io.containerd.internal.v1.restart"]
+    interval = "10s"
+  [plugins."io.containerd.metadata.v1.bolt"]
+    content_sharing_policy = "shared"
+  [plugins."io.containerd.monitor.v1.cgroups"]
+    no_prometheus = false
+  [plugins."io.containerd.runtime.v1.linux"]
+    shim = "containerd-shim"
+    runtime = "noop"
+    runtime_root = ""
+    no_shim = false
+    shim_debug = false
+  [plugins."io.containerd.runtime.v2.task"]
+    platforms = ["linux/amd64"]
+  [plugins."io.containerd.service.v1.diff-service"]
+    default = ["walking"]
\ No newline at end of file
diff --git a/core/internal/containerd/loopback.json b/core/internal/containerd/loopback.json
new file mode 100644
index 0000000..f375c5d
--- /dev/null
+++ b/core/internal/containerd/loopback.json
@@ -0,0 +1,4 @@
+{
+  "cniVersion": "0.3.0",
+  "type": "loopback"
+}
diff --git a/core/internal/containerd/main.go b/core/internal/containerd/main.go
new file mode 100644
index 0000000..f4952e4
--- /dev/null
+++ b/core/internal/containerd/main.go
@@ -0,0 +1,48 @@
+// Copyright 2020 The Monogon Project Authors.
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package containerd
+
+import (
+	"context"
+	"os"
+	"os/exec"
+
+	"git.monogon.dev/source/nexantic.git/core/pkg/logbuffer"
+
+	"golang.org/x/sys/unix"
+)
+
+// Implements supervisor.Runnable for later integration
+
+func RunContainerd(ctx context.Context) error {
+	containerdLog := logbuffer.New(1000, 16384)
+	cmd := exec.CommandContext(ctx, "/containerd/bin/containerd", "--config", "/containerd/conf/config.toml")
+	cmd.Stdout = containerdLog
+	cmd.Stderr = containerdLog
+	cmd.Env = []string{"PATH=/containerd/bin", "TMPDIR=/containerd/run/tmp"}
+
+	if err := unix.Mount("tmpfs", "/containerd/run", "tmpfs", 0, ""); err != nil {
+		panic(err)
+	}
+	if err := os.MkdirAll("/containerd/run/tmp", 0755); err != nil {
+		panic(err)
+	}
+
+	// TODO(lorenz): Healthcheck against cri.Status() RPC
+
+	return cmd.Run()
+}
diff --git a/core/internal/containerd/ptp.json b/core/internal/containerd/ptp.json
new file mode 100644
index 0000000..d95da5d
--- /dev/null
+++ b/core/internal/containerd/ptp.json
@@ -0,0 +1,12 @@
+{
+  "name": "k8s-pod-network",
+  "cniVersion": "0.3.1",
+  "type": "ptp",
+  "mtu": 1420,
+  "ipam": {
+    "type": "host-local",
+    "subnet": "192.168.198.0/24",
+    "routes": [{ "dst": "0.0.0.0/0" }],
+    "dataDir": "/containerd/run/ipam"
+  }
+}
diff --git a/core/internal/containerd/runsc.toml b/core/internal/containerd/runsc.toml
new file mode 100644
index 0000000..52d846f
--- /dev/null
+++ b/core/internal/containerd/runsc.toml
@@ -0,0 +1,5 @@
+root = "/containerd/run/runsc"
+[runsc_config]
+debug = "false"
+# Setting intentionally left here in case anybody needs it since it is hard to find
+#debug-log = "/containerd/run/runsc-logs/"
\ No newline at end of file