|  | // Copyright 2020 The Monogon Project Authors. | 
|  | // | 
|  | // SPDX-License-Identifier: Apache-2.0 | 
|  | // | 
|  | // Licensed under the Apache License, Version 2.0 (the "License"); | 
|  | // you may not use this file except in compliance with the License. | 
|  | // You may obtain a copy of the License at | 
|  | // | 
|  | //     http://www.apache.org/licenses/LICENSE-2.0 | 
|  | // | 
|  | // Unless required by applicable law or agreed to in writing, software | 
|  | // distributed under the License is distributed on an "AS IS" BASIS, | 
|  | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
|  | // See the License for the specific language governing permissions and | 
|  | // limitations under the License. | 
|  |  | 
|  | // Package kvmdevice implements a Kubernetes device plugin for the virtual KVM | 
|  | // device. Using the device plugin API allows us to take advantage of the | 
|  | // scheduler to locate pods on machines eligible for KVM and also allows | 
|  | // granular access control to KVM using quotas instead of needing privileged | 
|  | // access. | 
|  | // Since KVM devices are virtual, this plugin emulates a huge number of them so | 
|  | // that we never run out. | 
|  | package kvmdevice | 
|  |  | 
|  | import ( | 
|  | "bytes" | 
|  | "context" | 
|  | "fmt" | 
|  | "net" | 
|  | "os" | 
|  | "strconv" | 
|  | "strings" | 
|  |  | 
|  | "golang.org/x/sys/unix" | 
|  | "google.golang.org/grpc" | 
|  | corev1 "k8s.io/api/core/v1" | 
|  | deviceplugin "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1" | 
|  | "k8s.io/kubelet/pkg/apis/pluginregistration/v1" | 
|  |  | 
|  | "source.monogon.dev/metropolis/node/core/localstorage" | 
|  | "source.monogon.dev/metropolis/pkg/logtree" | 
|  | "source.monogon.dev/metropolis/pkg/supervisor" | 
|  | ) | 
|  |  | 
|  | // Name is the name of the KVM devices this plugin exposes | 
|  | var Name corev1.ResourceName = "devices.monogon.dev/kvm" | 
|  |  | 
|  | type Plugin struct { | 
|  | *deviceplugin.UnimplementedDevicePluginServer | 
|  | KubeletDirectory *localstorage.DataKubernetesKubeletDirectory | 
|  |  | 
|  | logger logtree.LeveledLogger | 
|  | } | 
|  |  | 
|  | func (k *Plugin) GetInfo(context.Context, *pluginregistration.InfoRequest) (*pluginregistration.PluginInfo, error) { | 
|  | return &pluginregistration.PluginInfo{ | 
|  | Type:              pluginregistration.DevicePlugin, | 
|  | Name:              string(Name), | 
|  | Endpoint:          k.KubeletDirectory.Plugins.KVM.FullPath(), | 
|  | SupportedVersions: []string{"v1beta1"}, | 
|  | }, nil | 
|  | } | 
|  |  | 
|  | func (k *Plugin) NotifyRegistrationStatus(ctx context.Context, req *pluginregistration.RegistrationStatus) (*pluginregistration.RegistrationStatusResponse, error) { | 
|  | if !req.PluginRegistered { | 
|  | k.logger.Errorf("KVM plugin failed to register: %v", req.Error) | 
|  | } | 
|  | return &pluginregistration.RegistrationStatusResponse{}, nil | 
|  | } | 
|  |  | 
|  | func (k *Plugin) GetDevicePluginOptions(context.Context, *deviceplugin.Empty) (*deviceplugin.DevicePluginOptions, error) { | 
|  | return &deviceplugin.DevicePluginOptions{ | 
|  | GetPreferredAllocationAvailable: false, | 
|  | PreStartRequired:                false, | 
|  | }, nil | 
|  | } | 
|  |  | 
|  | func (k *Plugin) ListAndWatch(req *deviceplugin.Empty, s deviceplugin.DevicePlugin_ListAndWatchServer) error { | 
|  | var devs []*deviceplugin.Device | 
|  |  | 
|  | // TODO(T963): Get this value from Kubelet configuration (or something higher-level?) | 
|  | for i := 0; i < 256; i++ { | 
|  | devs = append(devs, &deviceplugin.Device{ | 
|  | ID:     fmt.Sprintf("kvm%v", i), | 
|  | Health: deviceplugin.Healthy, | 
|  | }) | 
|  | } | 
|  |  | 
|  | s.Send(&deviceplugin.ListAndWatchResponse{Devices: devs}) | 
|  |  | 
|  | <-s.Context().Done() | 
|  | return nil | 
|  | } | 
|  |  | 
|  | func (k *Plugin) Allocate(ctx context.Context, req *deviceplugin.AllocateRequest) (*deviceplugin.AllocateResponse, error) { | 
|  | var response deviceplugin.AllocateResponse | 
|  |  | 
|  | for _, req := range req.ContainerRequests { | 
|  | var devices []*deviceplugin.DeviceSpec | 
|  | for range req.DevicesIDs { | 
|  | dev := new(deviceplugin.DeviceSpec) | 
|  | dev.HostPath = "/dev/kvm" | 
|  | dev.ContainerPath = "/dev/kvm" | 
|  | dev.Permissions = "rw" | 
|  | devices = append(devices, dev) | 
|  | } | 
|  | response.ContainerResponses = append(response.ContainerResponses, &deviceplugin.ContainerAllocateResponse{ | 
|  | Devices: devices}) | 
|  | } | 
|  |  | 
|  | return &response, nil | 
|  | } | 
|  |  | 
|  | // deviceNumberFromString gets a Linux device number from a string containing | 
|  | // two decimal numbers representing the major and minor device numbers | 
|  | // separated by a colon. Whitespace is ignored. | 
|  | func deviceNumberFromString(s string) (uint64, error) { | 
|  | kvmDevParts := strings.Split(s, ":") | 
|  | if len(kvmDevParts) != 2 { | 
|  | return 0, fmt.Errorf("device file spec contains an invalid number of colons: `%v`", s) | 
|  | } | 
|  | major, err := strconv.ParseUint(strings.TrimSpace(kvmDevParts[0]), 10, 32) | 
|  | if err != nil { | 
|  | return 0, fmt.Errorf("failed to convert major number to an integer: %w", err) | 
|  | } | 
|  | minor, err := strconv.ParseUint(strings.TrimSpace(kvmDevParts[1]), 10, 32) | 
|  | if err != nil { | 
|  | return 0, fmt.Errorf("failed to convert minor number to an integer: %w", err) | 
|  | } | 
|  |  | 
|  | return unix.Mkdev(uint32(major), uint32(minor)), nil | 
|  | } | 
|  |  | 
|  | func (k *Plugin) Run(ctx context.Context) error { | 
|  | k.logger = supervisor.Logger(ctx) | 
|  |  | 
|  | l1tfStatus, err := os.ReadFile("/sys/devices/system/cpu/vulnerabilities/l1tf") | 
|  | if err != nil && !os.IsNotExist(err) { | 
|  | return fmt.Errorf("failed to query for CPU vulnerabilities: %v", err) | 
|  | } | 
|  |  | 
|  | if bytes.Contains(l1tfStatus, []byte("vulnerable")) { | 
|  | k.logger.Warning("CPU is vulnerable to L1TF, not exposing KVM.") | 
|  | supervisor.Signal(ctx, supervisor.SignalHealthy) | 
|  | supervisor.Signal(ctx, supervisor.SignalDone) | 
|  | return nil | 
|  | } | 
|  |  | 
|  | kvmDevRaw, err := os.ReadFile("/sys/devices/virtual/misc/kvm/dev") | 
|  | if err != nil { | 
|  | k.logger.Warning("KVM is not available. Check firmware settings and CPU.") | 
|  | supervisor.Signal(ctx, supervisor.SignalHealthy) | 
|  | supervisor.Signal(ctx, supervisor.SignalDone) | 
|  | return nil | 
|  | } | 
|  |  | 
|  | kvmDevNode, err := deviceNumberFromString(string(kvmDevRaw)) | 
|  |  | 
|  | err = unix.Mknod("/dev/kvm", 0660, int(kvmDevNode)) | 
|  | if err != nil && err != unix.EEXIST { | 
|  | return fmt.Errorf("failed to create KVM device node: %v", err) | 
|  | } | 
|  |  | 
|  | pluginListener, err := net.ListenUnix("unix", &net.UnixAddr{Name: k.KubeletDirectory.Plugins.KVM.FullPath(), Net: "unix"}) | 
|  | if err != nil { | 
|  | return fmt.Errorf("failed to listen on device plugin socket: %w", err) | 
|  | } | 
|  | pluginListener.SetUnlinkOnClose(true) | 
|  |  | 
|  | pluginServer := grpc.NewServer() | 
|  | deviceplugin.RegisterDevicePluginServer(pluginServer, k) | 
|  | if err := supervisor.Run(ctx, "kvm-device", supervisor.GRPCServer(pluginServer, pluginListener, false)); err != nil { | 
|  | return err | 
|  | } | 
|  |  | 
|  | registrationListener, err := net.ListenUnix("unix", &net.UnixAddr{Name: k.KubeletDirectory.PluginsRegistry.KVMReg.FullPath(), Net: "unix"}) | 
|  | if err != nil { | 
|  | return fmt.Errorf("failed to listen on registration socket: %w", err) | 
|  | } | 
|  | registrationListener.SetUnlinkOnClose(true) | 
|  |  | 
|  | registrationServer := grpc.NewServer() | 
|  | pluginregistration.RegisterRegistrationServer(registrationServer, k) | 
|  | if err := supervisor.Run(ctx, "registration", supervisor.GRPCServer(registrationServer, registrationListener, true)); err != nil { | 
|  | return err | 
|  | } | 
|  | supervisor.Signal(ctx, supervisor.SignalHealthy) | 
|  | supervisor.Signal(ctx, supervisor.SignalDone) | 
|  | return nil | 
|  | } |