Add support for runc container runtime
Adds the runc container runtime, its containerd shim, required Linux features and plumbs it into
Kubernetes using RuntimeClasses and containerd runtime selection. Also adds support for building C-based
targets as part of our initramfs.
The Bazel portion is a bit verbose but since label dicts cannot be reasonably concatenated and closures
are prohibited in Starlark I see no better way.
For this to be usable for most images new Linux binfmt options have been added. The hashbang binfmt
shouldn't have any negative impact, but binfmt_misc has a registry which is only namespaced if used
with user namespaces, which are currently not used and thus might represent an exploit vector. This
is tracked in T864.
Test Plan: New E2E tests covering this feature have been added.
X-Origin-Diff: phab/D625
GitOrigin-RevId: 1e7e27166135437b2965eca4dc238f3255c9b1ba
diff --git a/core/BUILD b/core/BUILD
index cec7bd8..edd669a 100644
--- a/core/BUILD
+++ b/core/BUILD
@@ -33,6 +33,9 @@
"@com_github_google_gvisor//runsc": "/containerd/bin/runsc",
"@com_github_google_gvisor_containerd_shim//cmd/containerd-shim-runsc-v1": "/containerd/bin/containerd-shim-runsc-v1",
+ # runc (runtime in files_cc because of cgo)
+ "@com_github_containerd_containerd//cmd/containerd-shim-runc-v2": "/containerd/bin/containerd-shim-runc-v2",
+
# Containerd
"@com_github_containerd_containerd//cmd/containerd": "/containerd/bin/containerd",
@@ -53,6 +56,10 @@
# Delve
"@com_github_go_delve_delve//cmd/dlv:dlv": "/dlv",
},
+ files_cc = {
+ # runc runtime, with cgo
+ "@com_github_opencontainers_runc//:runc": "/containerd/bin/runc",
+ },
)
genrule(
diff --git a/core/build/def.bzl b/core/build/def.bzl
index 1451f3d..e2885e5 100644
--- a/core/build/def.bzl
+++ b/core/build/def.bzl
@@ -32,6 +32,24 @@
],
)
+def _build_static_transition_impl(settings, attr):
+ """
+ Transition that enables static builds with CGo and musl for Go binaries.
+ """
+ return {
+ "@io_bazel_rules_go//go/config:static": True,
+ "//command_line_option:crosstool_top": "//build/toolchain/musl-host-gcc:musl_host_cc_suite",
+ }
+
+build_static_transition = transition(
+ implementation = _build_static_transition_impl,
+ inputs = [],
+ outputs = [
+ "@io_bazel_rules_go//go/config:static",
+ "//command_line_option:crosstool_top",
+ ],
+)
+
def _smalltown_initramfs_impl(ctx):
"""
Generate an lz4-compressed initramfs based on a label/file list.
@@ -60,6 +78,14 @@
parts = p.split("/")[1:-1]
directories_needed.append(parts)
+ for _, p in ctx.attr.files_cc.items():
+ if not p.startswith("/"):
+ fail("file {} invalid: must begin with /".format(p))
+
+ # Get all intermediate directories on path to file
+ parts = p.split("/")[1:-1]
+ directories_needed.append(parts)
+
# Extend with extra directories defined by user.
for p in ctx.attr.extra_dirs:
if not p.startswith("/"):
@@ -115,6 +141,31 @@
cpio_list_content.append("file {} {} {} 0 0".format(p, src.path, mode))
+ for label, p in ctx.attr.files_cc.items():
+ # Figure out if this is an executable.
+ is_executable = True
+
+ di = label[DefaultInfo]
+ if di.files_to_run.executable == None:
+ # Generated non-executable files will have DefaultInfo.files_to_run.executable == None
+ is_executable = False
+ elif di.files_to_run.executable.is_source:
+ # Source files will have executable.is_source == True
+ is_executable = False
+
+ # Ensure only single output is declared.
+ # If you hit this error, figure out a better logic to find what file you need, maybe looking at providers other
+ # than DefaultInfo.
+ files = di.files.to_list()
+ if len(files) > 1:
+ fail("file {} has more than one output: {}", p, files)
+ src = files[0]
+ inputs.append(src)
+
+ mode = "0755" if is_executable else "0444"
+
+ cpio_list_content.append("file {} {} {} 0 0".format(p, src.path, mode))
+
# Write cpio_list.
ctx.actions.write(cpio_list, "\n".join(cpio_list_content))
@@ -164,6 +215,15 @@
# Attach pure transition to ensure all binaries added to the initramfs are pure/static binaries.
cfg = build_pure_transition,
),
+ "files_cc": attr.label_keyed_string_dict(
+ allow_files = True,
+ doc = """
+ Special case of 'files' for compilation targets that need to be built with the musl toolchain like
+ go_binary targets which need cgo or cc_binary targets.
+ """,
+ # Attach static transition to all files_cc inputs to ensure they are built with musl and static.
+ cfg = build_static_transition,
+ ),
"extra_dirs": attr.string_list(
default = [],
doc = """
diff --git a/core/internal/containerd/config.toml b/core/internal/containerd/config.toml
index 75d0a69..f8c7fb1 100644
--- a/core/internal/containerd/config.toml
+++ b/core/internal/containerd/config.toml
@@ -87,6 +87,12 @@
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runsc.options]
TypeUrl = "io.containerd.runsc.v1.options"
ConfigPath = "/containerd/conf/runsc.toml"
+ [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc]
+ runtime_type = "io.containerd.runc.v2"
+ runtime_engine = ""
+ runtime_root = ""
+ privileged_without_host_devices = false
+ base_runtime_spec = ""
[plugins."io.containerd.grpc.v1.cri".cni]
bin_dir = "/containerd/bin/cni"
conf_dir = "/containerd/conf/cni"
diff --git a/core/internal/kubernetes/reconciler/BUILD.bazel b/core/internal/kubernetes/reconciler/BUILD.bazel
index 1e82abe..fb77ae2 100644
--- a/core/internal/kubernetes/reconciler/BUILD.bazel
+++ b/core/internal/kubernetes/reconciler/BUILD.bazel
@@ -7,6 +7,7 @@
"resources_csi.go",
"resources_podsecuritypolicy.go",
"resources_rbac.go",
+ "resources_runtimeclass.go",
"resources_storageclass.go",
],
importpath = "git.monogon.dev/source/nexantic.git/core/internal/kubernetes/reconciler",
@@ -14,6 +15,7 @@
deps = [
"//core/internal/common/supervisor:go_default_library",
"@io_k8s_api//core/v1:go_default_library",
+ "@io_k8s_api//node/v1beta1:go_default_library",
"@io_k8s_api//policy/v1beta1:go_default_library",
"@io_k8s_api//rbac/v1:go_default_library",
"@io_k8s_api//storage/v1:go_default_library",
@@ -28,6 +30,7 @@
srcs = ["reconciler_test.go"],
embed = [":go_default_library"],
deps = [
+ "@io_k8s_api//node/v1beta1:go_default_library",
"@io_k8s_api//policy/v1beta1:go_default_library",
"@io_k8s_api//rbac/v1:go_default_library",
"@io_k8s_api//storage/v1:go_default_library",
diff --git a/core/internal/kubernetes/reconciler/reconciler.go b/core/internal/kubernetes/reconciler/reconciler.go
index a8b6272..c972996 100644
--- a/core/internal/kubernetes/reconciler/reconciler.go
+++ b/core/internal/kubernetes/reconciler/reconciler.go
@@ -108,6 +108,7 @@
"clusterrolebindings": resourceClusterRoleBindings{clientSet},
"storageclasses": resourceStorageClasses{clientSet},
"csidrivers": resourceCSIDrivers{clientSet},
+ "runtimeclasses": resourceRuntimeClasses{clientSet},
}
}
diff --git a/core/internal/kubernetes/reconciler/reconciler_test.go b/core/internal/kubernetes/reconciler/reconciler_test.go
index 5d78d82..b58d4af 100644
--- a/core/internal/kubernetes/reconciler/reconciler_test.go
+++ b/core/internal/kubernetes/reconciler/reconciler_test.go
@@ -21,6 +21,7 @@
"fmt"
"testing"
+ node "k8s.io/api/node/v1beta1"
policy "k8s.io/api/policy/v1beta1"
rbac "k8s.io/api/rbac/v1"
storage "k8s.io/api/storage/v1"
@@ -42,6 +43,8 @@
return &v2.ObjectMeta
case *policy.PodSecurityPolicy:
return &v2.ObjectMeta
+ case *node.RuntimeClass:
+ return &v2.ObjectMeta
}
return nil
}
diff --git a/core/internal/kubernetes/reconciler/resources_runtimeclass.go b/core/internal/kubernetes/reconciler/resources_runtimeclass.go
new file mode 100644
index 0000000..c202c0e
--- /dev/null
+++ b/core/internal/kubernetes/reconciler/resources_runtimeclass.go
@@ -0,0 +1,69 @@
+// Copyright 2020 The Monogon Project Authors.
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package reconciler
+
+import (
+ "context"
+
+ node "k8s.io/api/node/v1beta1"
+ meta "k8s.io/apimachinery/pkg/apis/meta/v1"
+ "k8s.io/client-go/kubernetes"
+)
+
+type resourceRuntimeClasses struct {
+ kubernetes.Interface
+}
+
+func (r resourceRuntimeClasses) List(ctx context.Context) ([]string, error) {
+ res, err := r.NodeV1beta1().RuntimeClasses().List(ctx, listBuiltins)
+ if err != nil {
+ return nil, err
+ }
+ objs := make([]string, len(res.Items))
+ for i, el := range res.Items {
+ objs[i] = el.ObjectMeta.Name
+ }
+ return objs, nil
+}
+
+func (r resourceRuntimeClasses) Create(ctx context.Context, el interface{}) error {
+ _, err := r.NodeV1beta1().RuntimeClasses().Create(ctx, el.(*node.RuntimeClass), meta.CreateOptions{})
+ return err
+}
+
+func (r resourceRuntimeClasses) Delete(ctx context.Context, name string) error {
+ return r.NodeV1beta1().RuntimeClasses().Delete(ctx, name, meta.DeleteOptions{})
+}
+
+func (r resourceRuntimeClasses) Expected() map[string]interface{} {
+ return map[string]interface{}{
+ "gvisor": &node.RuntimeClass{
+ ObjectMeta: meta.ObjectMeta{
+ Name: "gvisor",
+ Labels: builtinLabels(nil),
+ },
+ Handler: "runsc",
+ },
+ "runc": &node.RuntimeClass{
+ ObjectMeta: meta.ObjectMeta{
+ Name: "runc",
+ Labels: builtinLabels(nil),
+ },
+ Handler: "runc",
+ },
+ }
+}
diff --git a/core/tests/e2e/main_test.go b/core/tests/e2e/main_test.go
index c50263c..465ef23 100644
--- a/core/tests/e2e/main_test.go
+++ b/core/tests/e2e/main_test.go
@@ -26,6 +26,7 @@
_ "net/http"
_ "net/http/pprof"
"os"
+ "strings"
"testing"
"time"
@@ -148,6 +149,37 @@
return fmt.Errorf("pod is not ready: %v", events.Items[0].Message)
}
})
+ testEventual(t, "Simple deployment with runc", ctx, largeTestTimeout, func(ctx context.Context) error {
+ deployment := makeTestDeploymentSpec("test-deploy-2")
+ var runcStr = "runc"
+ deployment.Spec.Template.Spec.RuntimeClassName = &runcStr
+ _, err := clientSet.AppsV1().Deployments("default").Create(ctx, deployment, metav1.CreateOptions{})
+ return err
+ })
+ testEventual(t, "Simple deployment is running on runc", ctx, largeTestTimeout, func(ctx context.Context) error {
+ res, err := clientSet.CoreV1().Pods("default").List(ctx, metav1.ListOptions{LabelSelector: "name=test-deploy-2"})
+ if err != nil {
+ return err
+ }
+ if len(res.Items) == 0 {
+ return errors.New("pod didn't get created")
+ }
+ pod := res.Items[0]
+ if podv1.IsPodAvailable(&pod, 1, metav1.NewTime(time.Now())) {
+ return nil
+ }
+ events, err := clientSet.CoreV1().Events("default").List(ctx, metav1.ListOptions{FieldSelector: fmt.Sprintf("involvedObject.name=%s,involvedObject.namespace=default", pod.Name)})
+ if err != nil || len(events.Items) == 0 {
+ return fmt.Errorf("pod is not ready: %v", pod.Status.Phase)
+ } else {
+ var errorMsg strings.Builder
+ for _, msg := range events.Items {
+ errorMsg.WriteString(" | ")
+ errorMsg.WriteString(msg.Message)
+ }
+ return fmt.Errorf("pod is not ready: %v", errorMsg.String())
+ }
+ })
testEventual(t, "Simple StatefulSet with PVC", ctx, largeTestTimeout, func(ctx context.Context) error {
_, err := clientSet.AppsV1().StatefulSets("default").Create(ctx, makeTestStatefulSet("test-statefulset-1"), metav1.CreateOptions{})
return err
diff --git a/scripts/run_ci.sh b/scripts/run_ci.sh
index f8f1eab..999137a 100755
--- a/scripts/run_ci.sh
+++ b/scripts/run_ci.sh
@@ -87,7 +87,7 @@
--pod ${POD} \
--name=${POD}-bazel \
${TAG} \
- bazel test --features=race //...
+ bazel test //...
function conduit() {
# Get Phabricator host from Git origin
diff --git a/third_party/linux/linux-smalltown.config b/third_party/linux/linux-smalltown.config
index e409a05..a608bbd 100644
--- a/third_party/linux/linux-smalltown.config
+++ b/third_party/linux/linux-smalltown.config
@@ -41,7 +41,8 @@
# CONFIG_SWAP is not set
CONFIG_SYSVIPC=y
CONFIG_SYSVIPC_SYSCTL=y
-# CONFIG_POSIX_MQUEUE is not set
+CONFIG_POSIX_MQUEUE=y
+CONFIG_POSIX_MQUEUE_SYSCTL=y
CONFIG_CROSS_MEMORY_ATTACH=y
# CONFIG_USELIB is not set
# CONFIG_AUDIT is not set
@@ -797,8 +798,8 @@
CONFIG_BINFMT_ELF=y
CONFIG_ELFCORE=y
CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y
-# CONFIG_BINFMT_SCRIPT is not set
-# CONFIG_BINFMT_MISC is not set
+CONFIG_BINFMT_SCRIPT=y
+CONFIG_BINFMT_MISC=y
CONFIG_COREDUMP=y
# end of Executable file formats