Add KVM device plugin
This adds a KVM device plugin for Kubernetes. This plugin allows for unprivileged access and granular
control of KVM access.
Test Plan: Tested in subsequent revision
X-Origin-Diff: phab/D739
GitOrigin-RevId: 5cd738a47d24e7bfdc29bbd1a31537209e1ebf46
diff --git a/metropolis/node/kubernetes/BUILD.bazel b/metropolis/node/kubernetes/BUILD.bazel
index ada30c9..1ab4c52 100644
--- a/metropolis/node/kubernetes/BUILD.bazel
+++ b/metropolis/node/kubernetes/BUILD.bazel
@@ -20,6 +20,7 @@
"//metropolis/node/kubernetes/clusternet:go_default_library",
"//metropolis/node/kubernetes/nfproxy:go_default_library",
"//metropolis/node/kubernetes/pki:go_default_library",
+ "//metropolis/node/kubernetes/plugins/kvmdevice:go_default_library",
"//metropolis/node/kubernetes/reconciler:go_default_library",
"//metropolis/pkg/fileargs:go_default_library",
"//metropolis/pkg/fsquota:go_default_library",
diff --git a/metropolis/node/kubernetes/csi.go b/metropolis/node/kubernetes/csi.go
index 03091c1..3f88d6f 100644
--- a/metropolis/node/kubernetes/csi.go
+++ b/metropolis/node/kubernetes/csi.go
@@ -33,8 +33,8 @@
pluginregistration "k8s.io/kubelet/pkg/apis/pluginregistration/v1"
"source.monogon.dev/metropolis/node/core/localstorage"
- "source.monogon.dev/metropolis/pkg/logtree"
"source.monogon.dev/metropolis/pkg/fsquota"
+ "source.monogon.dev/metropolis/pkg/logtree"
"source.monogon.dev/metropolis/pkg/supervisor"
)
@@ -231,7 +231,7 @@
// Registration endpoints
func (s *csiPluginServer) GetInfo(ctx context.Context, req *pluginregistration.InfoRequest) (*pluginregistration.PluginInfo, error) {
return &pluginregistration.PluginInfo{
- Type: "CSIPlugin",
+ Type: pluginregistration.CSIPlugin,
Name: "dev.monogon.metropolis.vfs",
Endpoint: s.KubeletDirectory.Plugins.VFS.FullPath(),
SupportedVersions: []string{"1.2"}, // Keep in sync with container-storage-interface/spec package version
diff --git a/metropolis/node/kubernetes/plugins/kvmdevice/BUILD.bazel b/metropolis/node/kubernetes/plugins/kvmdevice/BUILD.bazel
new file mode 100644
index 0000000..d666f41
--- /dev/null
+++ b/metropolis/node/kubernetes/plugins/kvmdevice/BUILD.bazel
@@ -0,0 +1,17 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_library")
+
+go_library(
+ name = "go_default_library",
+ srcs = ["kvmdevice.go"],
+ importpath = "source.monogon.dev/metropolis/node/kubernetes/plugins/kvmdevice",
+ visibility = ["//visibility:public"],
+ deps = [
+ "//metropolis/node/core/localstorage:go_default_library",
+ "//metropolis/pkg/logtree:go_default_library",
+ "//metropolis/pkg/supervisor:go_default_library",
+ "@io_k8s_kubelet//pkg/apis/deviceplugin/v1beta1:go_default_library",
+ "@io_k8s_kubelet//pkg/apis/pluginregistration/v1:go_default_library",
+ "@org_golang_google_grpc//:go_default_library",
+ "@org_golang_x_sys//unix:go_default_library",
+ ],
+)
diff --git a/metropolis/node/kubernetes/plugins/kvmdevice/kvmdevice.go b/metropolis/node/kubernetes/plugins/kvmdevice/kvmdevice.go
new file mode 100644
index 0000000..e887d1d
--- /dev/null
+++ b/metropolis/node/kubernetes/plugins/kvmdevice/kvmdevice.go
@@ -0,0 +1,184 @@
+// Copyright 2020 The Monogon Project Authors.
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package kvmdevice implements a Kubernetes device plugin for the virtual KVM device. Using the device plugin API
+// allows us to take advantage of the scheduler to locate pods on machines eligible for KVM and also allows granular
+// access control to KVM using quotas instead of needing privileged access.
+// Since KVM devices are virtual, this plugin emulates a huge number of them so that we never run out.
+package kvmdevice
+
+import (
+ "bytes"
+ "context"
+ "fmt"
+ "io/ioutil"
+ "net"
+ "os"
+ "strconv"
+ "strings"
+
+ "golang.org/x/sys/unix"
+ "google.golang.org/grpc"
+ deviceplugin "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1"
+ "k8s.io/kubelet/pkg/apis/pluginregistration/v1"
+
+ "source.monogon.dev/metropolis/node/core/localstorage"
+ "source.monogon.dev/metropolis/pkg/logtree"
+ "source.monogon.dev/metropolis/pkg/supervisor"
+)
+
+type Plugin struct {
+ *deviceplugin.UnimplementedDevicePluginServer
+ KubeletDirectory *localstorage.DataKubernetesKubeletDirectory
+
+ logger logtree.LeveledLogger
+}
+
+func (k *Plugin) GetInfo(context.Context, *pluginregistration.InfoRequest) (*pluginregistration.PluginInfo, error) {
+ return &pluginregistration.PluginInfo{
+ Type: pluginregistration.DevicePlugin,
+ Name: "devices.monogon.dev/kvm",
+ Endpoint: k.KubeletDirectory.Plugins.KVM.FullPath(),
+ SupportedVersions: []string{"v1beta1"},
+ }, nil
+}
+
+func (k *Plugin) NotifyRegistrationStatus(ctx context.Context, req *pluginregistration.RegistrationStatus) (*pluginregistration.RegistrationStatusResponse, error) {
+ if !req.PluginRegistered {
+ k.logger.Errorf("KVM plugin failed to register: %v", req.Error)
+ }
+ return &pluginregistration.RegistrationStatusResponse{}, nil
+}
+
+func (k *Plugin) GetDevicePluginOptions(context.Context, *deviceplugin.Empty) (*deviceplugin.DevicePluginOptions, error) {
+ return &deviceplugin.DevicePluginOptions{
+ GetPreferredAllocationAvailable: false,
+ PreStartRequired: false,
+ }, nil
+}
+
+func (k *Plugin) ListAndWatch(req *deviceplugin.Empty, s deviceplugin.DevicePlugin_ListAndWatchServer) error {
+ var devs []*deviceplugin.Device
+
+ // TODO(T963): Get this value from Kubelet configuration (or something higher-level?)
+ for i := 0; i < 256; i++ {
+ devs = append(devs, &deviceplugin.Device{
+ ID: fmt.Sprintf("kvm%v", i),
+ Health: deviceplugin.Healthy,
+ })
+ }
+
+ s.Send(&deviceplugin.ListAndWatchResponse{Devices: devs})
+
+ <-s.Context().Done()
+ return nil
+}
+
+func (k *Plugin) Allocate(ctx context.Context, req *deviceplugin.AllocateRequest) (*deviceplugin.AllocateResponse, error) {
+ var response deviceplugin.AllocateResponse
+
+ for _, req := range req.ContainerRequests {
+ var devices []*deviceplugin.DeviceSpec
+ for range req.DevicesIDs {
+ dev := new(deviceplugin.DeviceSpec)
+ dev.HostPath = "/dev/kvm"
+ dev.ContainerPath = "/dev/kvm"
+ dev.Permissions = "rw"
+ devices = append(devices, dev)
+ }
+ response.ContainerResponses = append(response.ContainerResponses, &deviceplugin.ContainerAllocateResponse{
+ Devices: devices})
+ }
+
+ return &response, nil
+}
+
+// deviceNumberFromString gets a Linux device number from a string containing two decimal numbers representing the major
+// and minor device numbers separated by a colon. Whitespace is ignored.
+func deviceNumberFromString(s string) (uint64, error) {
+ kvmDevParts := strings.Split(s, ":")
+ if len(kvmDevParts) != 2 {
+ return 0, fmt.Errorf("device file spec contains an invalid number of colons: `%v`", s)
+ }
+ major, err := strconv.ParseUint(strings.TrimSpace(kvmDevParts[0]), 10, 32)
+ if err != nil {
+ return 0, fmt.Errorf("failed to convert major number to an integer: %w", err)
+ }
+ minor, err := strconv.ParseUint(strings.TrimSpace(kvmDevParts[1]), 10, 32)
+ if err != nil {
+ return 0, fmt.Errorf("failed to convert minor number to an integer: %w", err)
+ }
+
+ return unix.Mkdev(uint32(major), uint32(minor)), nil
+}
+
+func (k *Plugin) Run(ctx context.Context) error {
+ k.logger = supervisor.Logger(ctx)
+
+ l1tfStatus, err := ioutil.ReadFile("/sys/devices/system/cpu/vulnerabilities/l1tf")
+ if err != nil && !os.IsNotExist(err) {
+ return fmt.Errorf("failed to query for CPU vulnerabilities: %v", err)
+ }
+
+ if bytes.Contains(l1tfStatus, []byte("vulnerable")) {
+ k.logger.Warning("CPU is vulnerable to L1TF, not exposing KVM.")
+ supervisor.Signal(ctx, supervisor.SignalHealthy)
+ supervisor.Signal(ctx, supervisor.SignalDone)
+ return nil
+ }
+
+ kvmDevRaw, err := ioutil.ReadFile("/sys/devices/virtual/misc/kvm/dev")
+ if err != nil {
+ k.logger.Warning("KVM is not available. Check firmware settings and CPU.")
+ supervisor.Signal(ctx, supervisor.SignalHealthy)
+ supervisor.Signal(ctx, supervisor.SignalDone)
+ return nil
+ }
+
+ kvmDevNode, err := deviceNumberFromString(string(kvmDevRaw))
+
+ err = unix.Mknod("/dev/kvm", 0660, int(kvmDevNode))
+ if err != nil && err != unix.EEXIST {
+ return fmt.Errorf("failed to create KVM device node: %v", err)
+ }
+
+ pluginListener, err := net.ListenUnix("unix", &net.UnixAddr{Name: k.KubeletDirectory.Plugins.KVM.FullPath(), Net: "unix"})
+ if err != nil {
+ return fmt.Errorf("failed to listen on device plugin socket: %w", err)
+ }
+ pluginListener.SetUnlinkOnClose(true)
+
+ pluginServer := grpc.NewServer()
+ deviceplugin.RegisterDevicePluginServer(pluginServer, k)
+ if err := supervisor.Run(ctx, "kvm-device", supervisor.GRPCServer(pluginServer, pluginListener, false)); err != nil {
+ return err
+ }
+
+ registrationListener, err := net.ListenUnix("unix", &net.UnixAddr{Name: k.KubeletDirectory.PluginsRegistry.KVMReg.FullPath(), Net: "unix"})
+ if err != nil {
+ return fmt.Errorf("failed to listen on registration socket: %w", err)
+ }
+ registrationListener.SetUnlinkOnClose(true)
+
+ registrationServer := grpc.NewServer()
+ pluginregistration.RegisterRegistrationServer(registrationServer, k)
+ if err := supervisor.Run(ctx, "registration", supervisor.GRPCServer(registrationServer, registrationListener, true)); err != nil {
+ return err
+ }
+ supervisor.Signal(ctx, supervisor.SignalHealthy)
+ supervisor.Signal(ctx, supervisor.SignalDone)
+ return nil
+}
diff --git a/metropolis/node/kubernetes/service.go b/metropolis/node/kubernetes/service.go
index 8d0c795..bd0d211 100644
--- a/metropolis/node/kubernetes/service.go
+++ b/metropolis/node/kubernetes/service.go
@@ -34,6 +34,7 @@
"source.monogon.dev/metropolis/node/kubernetes/clusternet"
"source.monogon.dev/metropolis/node/kubernetes/nfproxy"
"source.monogon.dev/metropolis/node/kubernetes/pki"
+ "source.monogon.dev/metropolis/node/kubernetes/plugins/kvmdevice"
"source.monogon.dev/metropolis/node/kubernetes/reconciler"
"source.monogon.dev/metropolis/pkg/supervisor"
apb "source.monogon.dev/metropolis/proto/api"
@@ -136,6 +137,10 @@
ClientSet: clientSet,
}
+ kvmDevicePlugin := kvmdevice.Plugin{
+ KubeletDirectory: &s.c.Root.Data.Kubernetes.Kubelet,
+ }
+
for _, sub := range []struct {
name string
runnable supervisor.Runnable
@@ -149,6 +154,7 @@
{"csi-provisioner", csiProvisioner.Run},
{"clusternet", clusternet.Run},
{"nfproxy", nfproxy.Run},
+ {"kvmdeviceplugin", kvmDevicePlugin.Run},
} {
err := supervisor.Run(ctx, sub.name, sub.runnable)
if err != nil {